tvm
ndarray.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef TVM_RUNTIME_NDARRAY_H_
25 #define TVM_RUNTIME_NDARRAY_H_
26 
31 #include <tvm/runtime/data_type.h>
32 #include <tvm/runtime/object.h>
33 #include <tvm/runtime/serializer.h>
34 
35 #include <atomic>
36 #include <functional>
37 #include <utility>
38 #include <vector>
39 
40 namespace tvm {
41 
42 // alias DLDevice
43 using Device = DLDevice;
44 
45 namespace runtime {
46 
51 class NDArray : public ObjectRef {
52  public:
54  class ContainerBase;
56  class Container;
60  NDArray() {}
65  explicit NDArray(ObjectPtr<Object> data) : ObjectRef(data) {}
66 
68  inline void reset();
73  inline int use_count() const;
75  inline const DLTensor* operator->() const;
77  inline bool IsContiguous() const;
84  inline void CopyFrom(const DLTensor* other);
85  inline void CopyFrom(const NDArray& other);
93  TVM_DLL void CopyFromBytes(const void* data, size_t nbytes);
100  inline void CopyTo(DLTensor* other) const;
101  inline void CopyTo(const NDArray& other) const;
109  TVM_DLL void CopyToBytes(void* data, size_t nbytes) const;
117  TVM_DLL NDArray CopyTo(const Device& dev, Optional<String> mem_scope = NullOpt) const;
123  inline bool Load(dmlc::Stream* stream);
128  inline void Save(dmlc::Stream* stream) const;
129 
150  TVM_DLL NDArray CreateView(ShapeTuple shape, DLDataType dtype, uint64_t relative_byte_offset = 0);
151 
157  TVM_DLL DLManagedTensor* ToDLPack() const;
166  TVM_DLL static NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev,
167  Optional<String> mem_scope = NullOpt);
178  TVM_DLL static NDArray FromExternalDLTensor(const DLTensor& dl_tensor);
186  TVM_DLL static NDArray NewFromDLTensor(DLTensor* dl_tensor, const Device& dev);
198  TVM_DLL static NDArray FromDLPack(DLManagedTensor* tensor);
205  TVM_DLL static void CopyFromTo(const DLTensor* from, DLTensor* to,
206  TVMStreamHandle stream = nullptr);
207 
208  TVM_DLL ShapeTuple Shape() const;
209  TVM_DLL runtime::DataType DataType() const;
220  TVM_DLL static bool AbilityOfZeroCopyForDLTensor(DLTensor* tensor, const Device& dev);
221  // internal namespace
222  struct Internal;
223 
224  private:
225  TVM_DLL static bool IsAligned(const DLTensor& tensor);
226 
227  protected:
228  friend class TVMPODValue_;
229  template <typename Derived>
230  friend class TVMPODValue_CRTP_;
231  friend class TVMRetValue;
232  friend class TVMArgsSetter;
237  inline Container* get_mutable() const;
238  // Helper functions for FFI handling.
248  inline static ObjectPtr<Object> FFIDataFromHandle(TVMArrayHandle handle);
253  inline static void FFIDecRef(TVMArrayHandle handle);
259  inline static TVMArrayHandle FFIGetHandle(const ObjectRef& nd);
260 };
261 
267 inline bool SaveDLTensor(dmlc::Stream* strm, const DLTensor* tensor);
268 
277  public:
284  DLTensor dl_tensor;
285 
292  void* manager_ctx{nullptr};
293 
294  protected:
300 };
301 
307  public:
310  // Initialize the type index.
312  dl_tensor.data = nullptr;
313  dl_tensor.ndim = 0;
314  dl_tensor.shape = nullptr;
315  dl_tensor.strides = nullptr;
316  dl_tensor.byte_offset = 0;
317  }
318 
319  Container(void* data, ShapeTuple shape, DLDataType dtype, Device dev) {
320  // Initialize the type index.
322  dl_tensor.data = data;
323  shape_ = std::move(shape);
324  dl_tensor.ndim = static_cast<int>(shape_.size());
325  dl_tensor.shape = const_cast<ShapeTuple::index_type*>(shape_.data());
326  dl_tensor.dtype = dtype;
327  dl_tensor.strides = nullptr;
328  dl_tensor.byte_offset = 0;
329  dl_tensor.device = dev;
330  }
335  void SetDeleter(FDeleter deleter) { deleter_ = deleter; }
336 
337  // Expose DecRef and IncRef as public function
338  // NOTE: they are only for developer purposes only.
339  using Object::DecRef;
340  using Object::IncRef;
341 
342  // Information for object protocol.
343  static constexpr const uint32_t _type_index = TypeIndex::kRuntimeNDArray;
344  static constexpr const uint32_t _type_child_slots = 0;
345  static constexpr const uint32_t _type_child_slots_can_overflow = true;
346  static constexpr const char* _type_key = "runtime.NDArray";
348 
349  protected:
350  friend class RPCWrappedFunc;
351  friend class NDArray;
352 };
353 
354 // implementations of inline functions
361 inline size_t GetDataSize(const DLTensor& arr) {
362  size_t size = 1;
363  for (tvm_index_t i = 0; i < arr.ndim; ++i) {
364  size *= static_cast<size_t>(arr.shape[i]);
365  }
366  size *= (arr.dtype.bits * arr.dtype.lanes + 7) / 8;
367  return size;
368 }
369 
375 static inline bool IsContiguous(const DLTensor& arr) {
376  if (arr.strides == nullptr) return true;
377  int64_t expected_stride = 1;
378  for (int32_t i = arr.ndim; i != 0; --i) {
379  int32_t k = i - 1;
380  if (arr.shape[k] == 1) {
381  // Skip stride check if shape[k] is 1, where the dimension is contiguous
382  // regardless of the value of stride.
383  //
384  // For example, PyTorch will normalize stride to 1 if shape is 1 when exporting
385  // to DLPack.
386  // More context: https://github.com/pytorch/pytorch/pull/83158
387  continue;
388  }
389  if (arr.strides[k] != expected_stride) return false;
390  expected_stride *= arr.shape[k];
391  }
392  return true;
393 }
394 
395 inline bool NDArray::IsContiguous() const {
396  return ::tvm::runtime::IsContiguous(get_mutable()->dl_tensor);
397 }
398 
399 inline void NDArray::CopyFrom(const DLTensor* other) {
400  ICHECK(data_ != nullptr);
401  CopyFromTo(other, &(get_mutable()->dl_tensor));
402 }
403 
404 inline void NDArray::CopyFrom(const NDArray& other) {
405  ICHECK(data_ != nullptr);
406  ICHECK(other.data_ != nullptr);
407  CopyFromTo(&(other.get_mutable()->dl_tensor), &(get_mutable()->dl_tensor));
408 }
409 
410 inline void NDArray::CopyTo(DLTensor* other) const {
411  ICHECK(data_ != nullptr);
412  CopyFromTo(&(get_mutable()->dl_tensor), other);
413 }
414 
415 inline void NDArray::CopyTo(const NDArray& other) const {
416  ICHECK(data_ != nullptr);
417  ICHECK(other.data_ != nullptr);
418  CopyFromTo(&(get_mutable()->dl_tensor), &(other.get_mutable()->dl_tensor));
419 }
420 
421 inline int NDArray::use_count() const { return data_.use_count(); }
422 
423 inline const DLTensor* NDArray::operator->() const { return &(get_mutable()->dl_tensor); }
424 
426  return static_cast<NDArray::Container*>(data_.get());
427 }
428 
430  return GetObjectPtr<Object>(
431  static_cast<NDArray::Container*>(reinterpret_cast<NDArray::ContainerBase*>(handle)));
432 }
433 
435  // NOTE: it is necessary to cast to container then to base
436  // so that the FFI handle uses the ContainerBase address.
437  auto ptr = reinterpret_cast<TVMArrayHandle>(static_cast<NDArray::ContainerBase*>(
438  static_cast<NDArray::Container*>(const_cast<Object*>(nd.get()))));
439  return ptr;
440 }
441 
442 inline void NDArray::FFIDecRef(TVMArrayHandle handle) {
443  static_cast<NDArray::Container*>(reinterpret_cast<NDArray::ContainerBase*>(handle))->DecRef();
444 }
445 
447  return static_cast<NDArray::Container*>(reinterpret_cast<NDArray::ContainerBase*>(handle));
448 }
449 
451 constexpr uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F;
452 
453 inline bool SaveDLTensor(dmlc::Stream* strm, const DLTensor* tensor) {
454  uint64_t header = kTVMNDArrayMagic, reserved = 0;
455  strm->Write(header);
456  strm->Write(reserved);
457  // Always save data as CPU context
458  //
459  // Parameters that get serialized should be in CPU by default.
460  // So even the array's context is GPU, it will be stored as CPU array.
461  // This is used to prevent case when another user loads the parameters
462  // back on machine that do not have GPU or related context.
463  //
464  // We can always do array.CopyTo(target_dev) to get a corresponding
465  // array in the target context.
466  Device cpu_dev;
467  cpu_dev.device_type = kDLCPU;
468  cpu_dev.device_id = 0;
469  strm->Write(cpu_dev);
470  strm->Write(tensor->ndim);
471  strm->Write(tensor->dtype);
472  int ndim = tensor->ndim;
473  strm->WriteArray(tensor->shape, ndim);
474  int type_bytes = (tensor->dtype.bits + 7) / 8;
475  int64_t num_elems = 1;
476  for (int i = 0; i < ndim; ++i) {
477  num_elems *= tensor->shape[i];
478  }
479  int64_t data_byte_size = type_bytes * num_elems;
480  strm->Write(data_byte_size);
481 
482  if (DMLC_IO_NO_ENDIAN_SWAP && tensor->device.device_type == kDLCPU &&
483  tensor->strides == nullptr && tensor->byte_offset == 0) {
484  // quick path
485  strm->Write(tensor->data, data_byte_size);
486  } else {
487  std::vector<uint8_t> bytes(data_byte_size);
488  ICHECK_EQ(
489  TVMArrayCopyToBytes(const_cast<DLTensor*>(tensor), dmlc::BeginPtr(bytes), data_byte_size),
490  0)
491  << TVMGetLastError();
492  if (!DMLC_IO_NO_ENDIAN_SWAP) {
493  dmlc::ByteSwap(dmlc::BeginPtr(bytes), type_bytes, num_elems);
494  }
495  strm->Write(dmlc::BeginPtr(bytes), data_byte_size);
496  }
497  return true;
498 }
499 
500 inline void NDArray::Save(dmlc::Stream* strm) const { SaveDLTensor(strm, operator->()); }
501 
502 inline bool NDArray::Load(dmlc::Stream* strm) {
503  uint64_t header, reserved;
504  ICHECK(strm->Read(&header)) << "Invalid DLTensor file format";
505  ICHECK(strm->Read(&reserved)) << "Invalid DLTensor file format";
506  ICHECK(header == kTVMNDArrayMagic) << "Invalid DLTensor file format";
507  Device dev;
508  int ndim;
509  DLDataType dtype;
510  ICHECK(strm->Read(&dev)) << "Invalid DLTensor file format";
511  ICHECK(strm->Read(&ndim)) << "Invalid DLTensor file format";
512  ICHECK(strm->Read(&dtype)) << "Invalid DLTensor file format";
513  ICHECK_EQ(dev.device_type, kDLCPU) << "Invalid DLTensor device: can only save as CPU tensor";
514  std::vector<int64_t> shape(ndim);
515  if (ndim != 0) {
516  ICHECK(strm->ReadArray(&shape[0], ndim)) << "Invalid DLTensor file format";
517  }
518  NDArray ret = NDArray::Empty(ShapeTuple(shape), dtype, dev);
519  int64_t num_elems = 1;
520  int elem_bytes = (ret->dtype.bits + 7) / 8;
521  for (int i = 0; i < ret->ndim; ++i) {
522  num_elems *= ret->shape[i];
523  }
524  int64_t data_byte_size;
525  ICHECK(strm->Read(&data_byte_size)) << "Invalid DLTensor file format";
526  ICHECK(data_byte_size == num_elems * elem_bytes) << "Invalid DLTensor file format";
527  auto read_ret = strm->Read(ret->data, data_byte_size);
528  // Only check non-empty data
529  if (ndim > 0 && shape[0] != 0) {
530  ICHECK(read_ret) << "Invalid DLTensor file format";
531  }
532  if (!DMLC_IO_NO_ENDIAN_SWAP) {
533  dmlc::ByteSwap(ret->data, elem_bytes, num_elems);
534  }
535  *this = ret;
536  return true;
537 }
538 
546  if (device.device_type == DLDeviceType::kDLCUDA) {
547  return Device{DLDeviceType::kDLCUDAHost, 0};
548  } else if (device.device_type == DLDeviceType::kDLROCM) {
549  return Device{DLDeviceType::kDLROCMHost, 0};
550  } else {
551  // Fallback to CPU.
552  return Device{DLDeviceType::kDLCPU, 0};
553  }
554 }
555 
556 } // namespace runtime
557 } // namespace tvm
558 
559 namespace std {
560 template <>
561 struct hash<tvm::Device> {
562  std::size_t operator()(const tvm::Device& dev) const {
563  return ((dev.device_id << 8) | dev.device_type);
564  }
565 };
566 
567 template <>
568 struct equal_to<tvm::Device> {
569  bool operator()(const tvm::Device& lhs, const tvm::Device& rhs) const {
570  return (lhs.device_type == rhs.device_type && lhs.device_id == rhs.device_id);
571  }
572 };
573 } // namespace std
574 
575 #endif // TVM_RUNTIME_NDARRAY_H_
DLTensor * TVMArrayHandle
the array handle
Definition: c_runtime_api.h:204
const char * TVMGetLastError(void)
return str message of the last error all function in this file will return 0 when success and nonzero...
void * TVMStreamHandle
The stream that is specific to device can be NULL, which indicates the default one.
Definition: c_runtime_api.h:238
int64_t tvm_index_t
type of array index.
Definition: c_runtime_api.h:89
int TVMArrayCopyToBytes(TVMArrayHandle handle, void *data, size_t nbytes)
Copy array data to CPU byte array.
DataType dtype() const
Definition: expr.h:129
Runtime primitive data type.
Definition: data_type.h:43
int bits() const
Definition: data_type.h:94
The container base structure contains all the fields except for the Object header.
Definition: ndarray.h:276
DLTensor dl_tensor
The corresponding dl_tensor field.
Definition: ndarray.h:284
void * manager_ctx
additional context, reserved for recycling
Definition: ndarray.h:292
ShapeTuple shape_
The shape container, can be used for shape data.
Definition: ndarray.h:299
Object container class that backs NDArray.
Definition: ndarray.h:306
static constexpr const uint32_t _type_index
Definition: ndarray.h:343
Container()
default constructor
Definition: ndarray.h:309
static constexpr const uint32_t _type_child_slots_can_overflow
Definition: ndarray.h:345
void SetDeleter(FDeleter deleter)
Set the deleter field.
Definition: ndarray.h:335
Container(void *data, ShapeTuple shape, DLDataType dtype, Device dev)
Definition: ndarray.h:319
static constexpr const char * _type_key
Definition: ndarray.h:346
static constexpr const uint32_t _type_child_slots
Definition: ndarray.h:344
friend class RPCWrappedFunc
Definition: ndarray.h:350
void DecRef()
developer function, decrease reference counter.
Definition: object.h:846
TVM_DECLARE_BASE_OBJECT_INFO(NDArray::Container, Object)
Managed NDArray. The array is backed by reference counted blocks.
Definition: ndarray.h:51
static TVMArrayHandle FFIGetHandle(const ObjectRef &nd)
Get FFI Array handle from ndarray.
Definition: ndarray.h:434
void CopyFrom(const DLTensor *other)
Copy data content from another array.
Definition: ndarray.h:399
bool IsContiguous() const
Definition: ndarray.h:395
NDArray CreateView(ShapeTuple shape, DLDataType dtype, uint64_t relative_byte_offset=0)
Create a NDArray that shares the data memory with the current one.
static NDArray FromExternalDLTensor(const DLTensor &dl_tensor)
Create a NDArray backed by an external DLTensor without memory copying.
NDArray CopyTo(const Device &dev, Optional< String > mem_scope=NullOpt) const
Copy the data to another device.
NDArray()
default constructor
Definition: ndarray.h:60
static NDArray Empty(ShapeTuple shape, DLDataType dtype, Device dev, Optional< String > mem_scope=NullOpt)
Create an empty NDArray.
static NDArray NewFromDLTensor(DLTensor *dl_tensor, const Device &dev)
Create new NDArray, data is copied from DLTensor.
void CopyTo(DLTensor *other) const
Copy data content into another array.
Definition: ndarray.h:410
runtime::DataType DataType() const
int use_count() const
Definition: ndarray.h:421
DLManagedTensor * ToDLPack() const
Create a reference view of NDArray that represents as DLManagedTensor.
static ObjectPtr< Object > FFIDataFromHandle(TVMArrayHandle handle)
Construct NDArray's Data field from array handle in FFI.
Definition: ndarray.h:429
void CopyToBytes(void *data, size_t nbytes) const
Copy data content into another array.
static NDArray FromDLPack(DLManagedTensor *tensor)
Create a NDArray backed by a dlpack tensor.
ShapeTuple Shape() const
const DLTensor * operator->() const
Definition: ndarray.h:423
Container * get_mutable() const
Get mutable internal container pointer.
Definition: ndarray.h:425
bool Load(dmlc::Stream *stream)
Load NDArray from stream.
Definition: ndarray.h:502
static void FFIDecRef(TVMArrayHandle handle)
DecRef resource managed by an FFI array handle.
Definition: ndarray.h:442
static bool AbilityOfZeroCopyForDLTensor(DLTensor *tensor, const Device &dev)
Check conditions for construction NDArray over DLTensor without copying. There are three conditions t...
static void CopyFromTo(const DLTensor *from, DLTensor *to, TVMStreamHandle stream=nullptr)
Function to copy data from one array to another.
void Save(dmlc::Stream *stream) const
Save NDArray to stream.
Definition: ndarray.h:500
void reset()
reset the content of NDArray to be nullptr
void CopyFromBytes(const void *data, size_t nbytes)
Copy data content from a byte buffer.
NDArray(ObjectPtr< Object > data)
constructor.
Definition: ndarray.h:65
A custom smart pointer for Object.
Definition: object.h:362
Base class of all object reference.
Definition: object.h:519
const Object * get() const
Definition: object.h:554
ObjectPtr< Object > data_
Internal pointer that backs the reference.
Definition: object.h:605
base class of all object containers.
Definition: object.h:171
uint32_t type_index_
Type index(tag) that indicates the type of the object.
Definition: object.h:265
void DecRef()
developer function, decrease reference counter.
Definition: object.h:846
void(* FDeleter)(Object *self)
Object deleter.
Definition: object.h:177
void IncRef()
developer function, increases reference counter.
Definition: object.h:844
static uint32_t RuntimeTypeIndex()
Definition: object.h:229
FDeleter deleter_
deleter of this object to enable customized allocation. If the deleter is nullptr,...
Definition: object.h:273
Optional container that to represent to a Nullable variant of T.
Definition: optional.h:51
Reference to shape tuple objects.
Definition: shape_tuple.h:85
const index_type * data() const
Return the data pointer.
Definition: shape_tuple.h:125
size_t size() const
Return the size of the shape tuple.
Definition: shape_tuple.h:132
ShapeTupleObj::index_type index_type
The type of shape index element.
Definition: shape_tuple.h:88
Definition: packed_func.h:1824
A utility class that adds methods useful for each POD type.
Definition: packed_func.h:738
Internal base class to handle conversion to POD values.
Definition: packed_func.h:615
Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...
Definition: packed_func.h:946
size_t GetDataSize(const DLTensor &arr)
return the size of data the DLTensor hold, in term of number of bytes
Definition: ndarray.h:361
Device GetPreferredHostDevice(Device device)
Get the preferred host device from the input device.
Definition: ndarray.h:545
bool SaveDLTensor(dmlc::Stream *strm, const DLTensor *tensor)
Save a DLTensor to stream.
Definition: ndarray.h:453
constexpr uint64_t kTVMNDArrayMagic
Magic number for NDArray file.
Definition: ndarray.h:451
Object * TVMArrayHandleToObjectHandle(TVMArrayHandle handle)
Definition: ndarray.h:446
Tensor shape(const Tensor &src, DataType dtype, const std::string name="T_shape", const std::string tag=kInjective)
Get the shape of input tensor.
Definition: transform.h:1913
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36
PrimExpr ret(PrimExpr value, Span span=Span())
Return the value.
DLDevice Device
Definition: ndarray.h:43
constexpr runtime::NullOptType NullOpt
Definition: optional.h:169
A managed object in the TVM runtime.
Runtime Optional container types.
Serializer extension to support TVM data types Include this file to enable serialization of DLDataTyp...
Runtime ShapeTuple container types.
Runtime String container types.
@ kRuntimeNDArray
runtime::NDArray.
Definition: object.h:64