73 #ifndef TVM_RUNTIME_DISCO_SESSION_H_
74 #define TVM_RUNTIME_DISCO_SESSION_H_
76 #include <tvm/ffi/function.h>
112 return "kGetGlobalFunc";
114 return "kCallPacked";
116 return "kSyncWorker";
118 return "kCopyFromWorker0";
120 return "kCopyToWorker0";
122 return "kDebugGetFromRemote";
124 return "kDebugSetRegister";
126 TVM_FFI_THROW(ValueError) <<
"Unknown DiscoAction: " <<
static_cast<int>(action);
152 inline void DebugCopyFrom(
int worker_id, ffi::AnyView source);
173 class DRef :
public ObjectRef {
175 explicit DRef(ObjectPtr<DRefObj> data) : ObjectRef(data) { TVM_FFI_ICHECK(data !=
nullptr); }
201 template <
typename... Args>
255 TVM_DLL
virtual void DebugSetRegister(int64_t reg_id, ffi::AnyView value,
int worker_id) = 0;
294 ffi::String process_pool_creator, ffi::String entrypoint);
307 virtual void Send(
const ffi::PackedArgs& args) = 0;
309 virtual ffi::PackedArgs
Recv() = 0;
311 virtual void Reply(
const ffi::PackedArgs& args) = 0;
351 template <
typename... Args>
353 constexpr
int offset = 3;
354 constexpr
int kNumArgs = offset +
sizeof...(Args);
355 ffi::AnyView packed_args[kNumArgs];
356 ffi::PackedArgs::Fill(packed_args,
360 std::forward<Args>(args)...);
361 return this->
CallWithPacked(ffi::PackedArgs(packed_args, kNumArgs));
An object that exists on all workers.
Definition: session.h:137
TVM_FFI_DECLARE_OBJECT_INFO_STATIC("runtime.disco.DRef", DRefObj, Object)
~DRefObj()
Definition: session.h:337
static constexpr const bool _type_mutable
Definition: session.h:156
static constexpr const uint32_t _type_index
Definition: session.h:154
ffi::Any DebugGetFromRemote(int worker_id)
Get the value of a DRef from a remote worker.
Definition: session.h:343
int64_t reg_id
The id of the register.
Definition: session.h:160
void DebugCopyFrom(int worker_id, ffi::AnyView source)
Copy from the Tensor provided to a remote worker.
Definition: session.h:347
static constexpr const bool _type_final
Definition: session.h:155
ObjectRef session
Back-pointer to the host controler session.
Definition: session.h:162
Managed reference to DRefObj.
Definition: session.h:173
DRef(ObjectPtr< DRefObj > data)
Definition: session.h:175
TVM_FFI_DEFINE_OBJECT_REF_METHODS_NOTNULLABLE(DRef, ObjectRef, DRefObj)
A bi-directional channel for controler-worker communication. This channel is primarily used to transf...
Definition: session.h:303
virtual void Reply(const ffi::PackedArgs &args)=0
Reply a packed sequence to the sender.
virtual ffi::PackedArgs Recv()=0
Receive a packed sequence from worker.
virtual ~DiscoChannel()=default
virtual void Send(const ffi::PackedArgs &args)=0
Send a packed sequence to the receiver.
virtual ffi::PackedArgs RecvReply()=0
Receive a reply from the worker.
A Disco interactive session. It allows users to interact with the Disco command queue with various ff...
Definition: session.h:183
virtual void CopyToWorker0(const Tensor &host_array, const DRef &remote_array)=0
Copy the controler-side Tensor to worker-0.
virtual DRef GetGlobalFunc(const std::string &name)=0
Get a global functions on workers.
friend struct SessionObj::FFI
Definition: session.h:257
virtual void Shutdown()=0
Signal all the workers to shutdown.
virtual DRef CallWithPacked(const ffi::PackedArgs &args)=0
Call packed function on each worker using a packed sequence. The calling convention: The first elemen...
virtual void DeallocReg(int reg_id)=0
Deallocate a register id, kill it on all workers, and append it to free_regs_.
virtual ffi::Any DebugGetFromRemote(int64_t reg_id, int worker_id)=0
Get the value of a register from a remote worker.
virtual void SyncWorker(int worker_id)=0
Synchrnoize the controler with a worker, and it will wait until worker finishes executing this instru...
static constexpr const bool _type_mutable
Definition: session.h:261
virtual int64_t GetNumWorkers()=0
Get the number of workers in the session.
virtual void CopyFromWorker0(const Tensor &host_array, const DRef &remote_array)=0
Copy an Tensor from worker-0 to the controler-side Tensor.
virtual ~SessionObj()=default
virtual void InitCCL(ffi::String ccl, IntTuple device_ids)=0
Initialize the data plane between workers.
TVM_FFI_DECLARE_OBJECT_INFO("runtime.disco.Session", SessionObj, Object)
DRef TVM_ALWAYS_INLINE CallPacked(const DRef &func, Args &&... args)
Call a ffi::Function on workers providing variadic arguments.
virtual void DebugSetRegister(int64_t reg_id, ffi::AnyView value, int worker_id)=0
Set the value of a register on a remote worker.
Managed reference to SessionObj.
Definition: session.h:273
static Session ProcessSession(int num_workers, int num_groups, ffi::String process_pool_creator, ffi::String entrypoint)
Create a session backed by pipe-based multiprocessing.
static Session ThreadedSession(int num_workers, int num_groups)
Create a session backed by a thread pool of workers.
TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(Session, ObjectRef, SessionObj)
Managed Tensor. The array is backed by reference counted blocks.
Definition: tensor.h:54
A special communication channel between controler and worker-0, assuming they are always collocated i...
Definition: session.h:320
std::mutex queue_mutex_
The mutex that guards host_arrays
Definition: session.h:328
std::queue< Tensor > host_arrays
The host-side arrays to passed to worker-0 for special uses, for example, copy-to-worker0 and copy-fr...
Definition: session.h:326
Defines tuple of integers.
std::string DiscoAction2String(DiscoAction action)
Converts the enum class DiscoAction to string.
Definition: session.h:105
DiscoAction
All possible kinds of Disco commands.
Definition: session.h:92
@ kRuntimeDiscoDRef
runtime::DRef for disco distributed runtime
Definition: object.h:65
ffi::Shape IntTuple
Definition: int_tuple.h:33
An object that builds and maintains block scope and StmtSref mapping for Dependence analysis.
Definition: analyzer.h:37
A managed object in the TVM runtime.
A device-independent managed Tensor abstraction.