tvm
device_api.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef TVM_RUNTIME_DEVICE_API_H_
25 #define TVM_RUNTIME_DEVICE_API_H_
26 
27 #include <tvm/ffi/any.h>
28 #include <tvm/ffi/error.h>
29 #include <tvm/ffi/optional.h>
30 #include <tvm/ffi/string.h>
31 #include <tvm/runtime/base.h>
32 
33 #include <string>
38 typedef void* TVMStreamHandle;
39 
40 namespace tvm {
41 
42 // alias DLDevice
43 using Device = DLDevice;
44 
45 namespace runtime {
46 
70 #ifdef __cplusplus
71 typedef enum : int32_t {
72 #else
73 typedef enum {
74 #endif
75  // To help avoid accidental conflicts between `DLDeviceType`
76  // and this enumeration, start numbering the new enumerators
77  // a little higher than (currently) seems necessary.
78  TVMDeviceExtType_End = 36, // sentinel value
80 
84 enum DeviceAttrKind : int {
85  kExist = 0,
87  kWarpSize = 2,
95  kGcnArch = 10,
102 };
103 
104 #ifdef TVM_KALLOC_ALIGNMENT
106 constexpr int kAllocAlignment = TVM_KALLOC_ALIGNMENT;
107 
109 constexpr int kTempAllocaAlignment = TVM_KALLOC_ALIGNMENT;
110 #else
112 constexpr int kAllocAlignment = 64;
113 
115 constexpr int kTempAllocaAlignment = 64;
116 #endif // TVM_KALLOC_ALIGNMENT
117 
119 constexpr int kMaxStackAlloca = 1024;
120 
123 constexpr int kDefaultWorkspaceAlignment = 1;
124 
130  public:
132  virtual ~DeviceAPI() {}
137  virtual void SetDevice(Device dev) = 0;
145  virtual void GetAttr(Device dev, DeviceAttrKind kind, ffi::Any* rv) = 0;
146 
153  virtual size_t GetDataSize(const DLTensor& arr,
154  ffi::Optional<ffi::String> mem_scope = std::nullopt);
155 
162  virtual void GetTargetProperty(Device dev, const std::string& property, ffi::Any* rv) {}
163 
173  virtual void* AllocDataSpace(Device dev, size_t nbytes, size_t alignment,
174  DLDataType type_hint) = 0;
184  virtual void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype,
185  ffi::Optional<ffi::String> mem_scope = std::nullopt);
191  virtual void FreeDataSpace(Device dev, void* ptr) = 0;
202  virtual void CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream);
209 
216  virtual void FreeStream(Device dev, TVMStreamHandle stream);
217 
223  virtual void StreamSync(Device dev, TVMStreamHandle stream) = 0;
229  virtual void SetStream(Device dev, TVMStreamHandle stream);
248  virtual void SyncStreamFromTo(Device dev, TVMStreamHandle event_src, TVMStreamHandle event_dst);
265  virtual void* AllocWorkspace(Device dev, size_t nbytes, DLDataType type_hint = {});
272  virtual void FreeWorkspace(Device dev, void* ptr);
273 
280  static DeviceAPI* Get(Device dev, bool allow_missing = false);
281 
287  static bool NeedSetDevice(int device_type) { return device_type != kDLCPU; }
288 
292  virtual bool SupportsDevicePointerArithmeticsOnHost() { return false; }
293 
294  protected:
308  virtual void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
309  size_t num_bytes, Device dev_from, Device dev_to,
310  DLDataType type_hint, TVMStreamHandle stream);
311 };
312 
318 inline const char* DLDeviceType2Str(int type) {
319  switch (type) {
320  case kDLCPU:
321  return "cpu";
322  case kDLCUDA:
323  return "cuda";
324  case kDLCUDAHost:
325  return "cuda_host";
326  case kDLCUDAManaged:
327  return "cuda_managed";
328  case kDLOpenCL:
329  return "opencl";
330  case kDLVulkan:
331  return "vulkan";
332  case kDLMetal:
333  return "metal";
334  case kDLVPI:
335  return "vpi";
336  case kDLROCM:
337  return "rocm";
338  case kDLROCMHost:
339  return "rocm_host";
340  case kDLExtDev:
341  return "ext_dev";
342  case kDLOneAPI:
343  return "oneapi";
344  case kDLWebGPU:
345  return "webgpu";
346  case kDLHexagon:
347  return "hexagon";
348  case kDLTrn:
349  return "trn";
350  default:
351  TVM_FFI_THROW(InternalError) << "unknown type = " << type;
352  }
353  throw;
354 }
355 
357 constexpr int kRPCSessMask = 128;
358 static_assert(kRPCSessMask >= TVMDeviceExtType_End);
359 
363 inline bool IsRPCSessionDevice(Device dev) { return (dev.device_type / kRPCSessMask) > 0; }
364 
369 inline int GetRPCSessionIndex(Device dev) {
370  TVM_FFI_ICHECK(IsRPCSessionDevice(dev)) << "GetRPCSessionIndex: dev has no RPC session";
371  return dev.device_type / kRPCSessMask - 1;
372 }
373 
382  dev.device_type = static_cast<DLDeviceType>(dev.device_type % kRPCSessMask);
383  return dev;
384 }
385 
386 inline std::ostream& operator<<(std::ostream& os, DLDevice dev) { // NOLINT(*)
388  os << "remote[" << tvm::runtime::GetRPCSessionIndex(dev) << "]-";
390  }
391  os << tvm::runtime::DLDeviceType2Str(static_cast<int>(dev.device_type)) << ":" << dev.device_id;
392  return os;
393 }
394 
402 inline Device AddRPCSessionMask(Device dev, int session_table_index) {
403  TVM_FFI_ICHECK(!IsRPCSessionDevice(dev))
404  << "AddRPCSessionMask: dev already non-zero RPCSessionIndex: " << dev;
405  dev.device_type =
406  static_cast<DLDeviceType>(dev.device_type | (kRPCSessMask * (session_table_index + 1)));
407  return dev;
408 }
409 
415 TVM_RUNTIME_DLL bool RuntimeEnabled(const ffi::String& target);
416 
418 namespace symbol {
419 constexpr const char* tvm_global_barrier_state = "__tvm_global_barrier_state";
421 constexpr const char* tvm_set_device = "__tvm_set_device";
422 } // namespace symbol
423 
424 } // namespace runtime
425 } // namespace tvm
426 
427 #endif // TVM_RUNTIME_DEVICE_API_H_
TVM Runtime Device API, abstracts the device specific interface for memory management.
Definition: device_api.h:129
static bool NeedSetDevice(int device_type)
Whether a certian device type requires set device device before launching the kernel function.
Definition: device_api.h:287
virtual void GetTargetProperty(Device dev, const std::string &property, ffi::Any *rv)
Query the device for specified properties.
Definition: device_api.h:162
static DeviceAPI * Get(Device dev, bool allow_missing=false)
Get device API based on device.
virtual size_t GetDataSize(const DLTensor &arr, ffi::Optional< ffi::String > mem_scope=std::nullopt)
Get the physical memory size required.
virtual void CopyDataFromTo(DLTensor *from, DLTensor *to, TVMStreamHandle stream)
copy data from one place to another
virtual TVMStreamHandle CreateStream(Device dev)
Create a new stream of execution.
virtual void SyncStreamFromTo(Device dev, TVMStreamHandle event_src, TVMStreamHandle event_dst)
Synchronize 2 streams of execution.
virtual void FreeWorkspace(Device dev, void *ptr)
Free temporal workspace in backend execution.
virtual bool SupportsDevicePointerArithmeticsOnHost()
Whether pointer arithmetics on a device owned pointer may be performed on the host.
Definition: device_api.h:292
virtual void SetDevice(Device dev)=0
Set the environment device id to device.
virtual void FreeStream(Device dev, TVMStreamHandle stream)
Free a stream of execution.
virtual TVMStreamHandle GetCurrentStream(Device dev)
Get the current stream.
virtual void GetAttr(Device dev, DeviceAttrKind kind, ffi::Any *rv)=0
Get attribute of specified device.
virtual void * AllocWorkspace(Device dev, size_t nbytes, DLDataType type_hint={})
Allocate temporal workspace for backend execution.
virtual void StreamSync(Device dev, TVMStreamHandle stream)=0
Synchronize the stream.
virtual void FreeDataSpace(Device dev, void *ptr)=0
Free a data space on device.
virtual void CopyDataFromTo(const void *from, size_t from_offset, void *to, size_t to_offset, size_t num_bytes, Device dev_from, Device dev_to, DLDataType type_hint, TVMStreamHandle stream)
copy data from one place to another
virtual void SetStream(Device dev, TVMStreamHandle stream)
Set the stream.
virtual void * AllocDataSpace(Device dev, size_t nbytes, size_t alignment, DLDataType type_hint)=0
Allocate a data space on device.
virtual void * AllocDataSpace(Device dev, int ndim, const int64_t *shape, DLDataType dtype, ffi::Optional< ffi::String > mem_scope=std::nullopt)
Allocate a data space on device with memory scope support.
virtual ~DeviceAPI()
virtual destructor
Definition: device_api.h:132
void * TVMStreamHandle
The stream that is specific to device can be NULL, which indicates the default one.
Definition: device_api.h:38
constexpr const char * tvm_set_device
global function to set device
Definition: device_api.h:421
constexpr const char * tvm_global_barrier_state
Definition: device_api.h:419
const char * DLDeviceType2Str(int type)
The name of DLDeviceType.
Definition: device_api.h:318
TVMDeviceExtType
Extension device types in TVM.
Definition: device_api.h:73
@ TVMDeviceExtType_End
Definition: device_api.h:78
constexpr int kMaxStackAlloca
Maximum size that can be allocated on stack.
Definition: device_api.h:119
DeviceAttrKind
the query type into GetAttr
Definition: device_api.h:84
@ kDeviceName
Definition: device_api.h:90
@ kDriverVersion
Definition: device_api.h:97
@ kMaxThreadsPerBlock
Definition: device_api.h:86
@ kMultiProcessorCount
Definition: device_api.h:92
@ kMaxThreadDimensions
Definition: device_api.h:93
@ kApiVersion
Definition: device_api.h:96
@ kImagePitchAlignment
Definition: device_api.h:101
@ kMaxClockRate
Definition: device_api.h:91
@ kWarpSize
Definition: device_api.h:87
@ kTotalGlobalMemory
Definition: device_api.h:99
@ kAvailableGlobalMemory
Definition: device_api.h:100
@ kMaxRegistersPerBlock
Definition: device_api.h:94
@ kComputeVersion
Definition: device_api.h:89
@ kGcnArch
Definition: device_api.h:95
@ kMaxSharedMemoryPerBlock
Definition: device_api.h:88
@ kExist
Definition: device_api.h:85
@ kL2CacheSizeBytes
Definition: device_api.h:98
constexpr int kRPCSessMask
The device type bigger than this is RPC device.
Definition: device_api.h:357
constexpr int kDefaultWorkspaceAlignment
Number of bytes each allocation must align to by default in the workspace buffer to service intermedi...
Definition: device_api.h:123
std::ostream & operator<<(std::ostream &os, const DataType &dtype)
Definition: data_type.h:452
TVM_RUNTIME_DLL bool RuntimeEnabled(const ffi::String &target)
Check if runtime module is enabled for target.
int GetRPCSessionIndex(Device dev)
Return the RPCSessTable index of the RPC Session that owns this device.
Definition: device_api.h:369
constexpr int kTempAllocaAlignment
Number of bytes each allocation must align to in temporary allocation.
Definition: device_api.h:115
bool IsRPCSessionDevice(Device dev)
Return true if a Device is owned by an RPC session.
Definition: device_api.h:363
Device AddRPCSessionMask(Device dev, int session_table_index)
Add a RPC session mask to a Device. RPC clients typically do this when decoding a Device received fro...
Definition: device_api.h:402
constexpr int kAllocAlignment
Number of bytes each allocation must align to.
Definition: device_api.h:112
Device RemoveRPCSessionMask(Device dev)
Remove the RPC session mask from a Device. RPC clients typically do this when encoding a Device for t...
Definition: device_api.h:381
constexpr const char * device_type
The device type.
Definition: stmt.h:1011
Tensor shape(const Tensor &src, DataType dtype, const std::string name="T_shape", const std::string tag=kInjective)
Get the shape of input tensor.
Definition: transform.h:1981
An object that builds and maintains block scope and StmtSref mapping for Dependence analysis.
Definition: analyzer.h:37
DLDevice Device
Definition: device_api.h:43
#define TVM_RUNTIME_DLL
Definition: base.h:88