tvm
conv2d.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
21 #include <tvm/runtime/device_api.h>
22 
23 #include <cassert>
24 
25 #ifndef TVM_RUNTIME_HEXAGON_OPS_CONV2D_H_
26 #define TVM_RUNTIME_HEXAGON_OPS_CONV2D_H_
27 
28 namespace tvm {
29 namespace runtime {
30 namespace hexagon {
31 static constexpr auto hexagon_device = DLDevice{static_cast<DLDeviceType>(kDLHexagon), 0};
32 
33 // Standalone DLTensor: the standalone-ness means that this object owns the shape
34 // (as opposed to a DLTensor).
35 template <size_t NDIM>
36 class SDLTensor : public DLTensor {
37  public:
38  SDLTensor(void* data_ptr, DLDataType data_type, void* data_space, const int64_t* data_dims)
39  : SDLTensor(data_ptr, data_type, data_space) {
40  for (size_t i = 0; i < NDIM; ++i) dims[i] = data_dims[i];
41  }
42 
43  SDLTensor(void* data_ptr, DLDataType data_type, void* data_space,
44  std::initializer_list<int64_t> data_dims)
45  : SDLTensor(data_ptr, data_type, data_space, data_dims.begin()) {}
46 
47  void* GetDataSpace() const { return data_space; }
48 
49  private:
60  SDLTensor(void* data_ptr, DLDataType data_type, void* data_space) : data_space(data_space) {
61  data = data_ptr;
62  device = hexagon_device;
63  ndim = NDIM;
64  dtype = data_type;
65  shape = dims;
66  strides = nullptr;
67  byte_offset = 0;
68  }
69 
70  void* data_space = nullptr;
71  int64_t dims[NDIM];
72 };
73 
74 inline void* to_ptr(uintptr_t v) { return reinterpret_cast<void*>(v); }
75 
76 inline uintptr_t to_uint(void* ptr) { return reinterpret_cast<uintptr_t>(ptr); }
77 
78 constexpr int xyc_to_sm_16b(int y, int x, int c) {
79  // Map y,x,c coordinates within a block to the offset (in 16-bit elements)
80  // from the beginning of the block in spatial-major layout.
81  // 10-bit spatial mask: yyyxcccccx
82  assert(y >= 0 && x >= 0 && c >= 0);
83  return y << 7 | (x & 2) << 5 | c << 1 | (x & 1);
84 }
85 
86 constexpr int hwio_to_sm_16b(int width, int y, int x, int i, int o) {
87  // Map y,x,i,o coordinates within a chunk (assuming the origin at the
88  // top-left spatial corner) to the offset (in 16-bit elements) from the
89  // beginning of the chunk in spatial-major layout.
90  // Spatial mask: p..piiiioooooi, where p..p are position bits.
91  assert(width >= 1);
92  assert(y >= 0 && x >= 0 && i >= 0 && o >= 0);
93  int p = y * width + (width - 1 - x);
94  return p << 10 | (i & 0x1e) << 5 | o << 1 | (i & 1);
95 }
96 
97 inline constexpr int round_up(int v, int p2) { return (v + p2 - 1) & -p2; }
98 
99 // Returns the block address at the given index
100 // Assumptions
101 // - The data type of tensor is fp16
102 // - There is only one batch, and hence n==0
103 inline uintptr_t nhwc_at(const DLTensor& a, int n, int y, int x, int c) {
104  if (y < 0 || y >= a.shape[1]) return uintptr_t(0);
105  auto p = static_cast<uintptr_t*>(a.data);
106  assert(n == 0);
107  return p[y * a.shape[2] * a.shape[3] + x * a.shape[3] + c];
108 }
109 
110 // Returns the address of the chunk stored at given index
111 // Assumptions
112 // - The data type of tensor is fp16
113 inline uintptr_t hwio_at(const DLTensor& f, int y, int x, int i, int o) {
114  auto p = static_cast<uintptr_t*>(f.data);
115  return p[y * f.shape[1] * f.shape[2] * f.shape[3] + x * f.shape[2] * f.shape[3] + i * f.shape[3] +
116  o];
117 }
118 
136 void blockize_hwc_16b(void* out, void* inp_flat, int height, int width, int depth);
137 
147 void deblockize_hwc_16b(void* out_flat, void* inp, int height, int width, int depth);
148 
176 void chunkify_hwio_16b(void** out_ptr, int out_ptr_size, void* out, void* inp, int height,
177  int width, int idepth, int odepth);
178 
179 SDLTensor<4> prepare_nhwc(tvm::runtime::DeviceAPI* device_api, const DLTensor* nhwc_flat,
180  bool copy_data);
181 
182 int calculate_num_weight_chunks(int64_t* shape_hwio);
183 
184 SDLTensor<4> prepare_hwio(tvm::runtime::DeviceAPI* device_api, const DLTensor* hwio_flat,
185  int num_chunks, void** ptr_table);
186 
187 template <size_t N>
188 void release(tvm::runtime::DeviceAPI* device_api, const SDLTensor<N>& tensor) {
189  if (auto* data_space = tensor.GetDataSpace()) {
190  device_api->FreeDataSpace(hexagon_device, data_space);
191  }
192 }
193 
194 } // namespace hexagon
195 } // namespace runtime
196 } // namespace tvm
197 
198 #endif // TVM_RUNTIME_HEXAGON_OPS_CONV2D_H_
void * GetDataSpace() const
Definition: conv2d.h:47
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36
TVM Runtime Device API, abstracts the device specific interface for memory management.
Definition: device_api.h:71
void chunkify_hwio_16b(void **out_ptr, int out_ptr_size, void *out, void *inp, int height, int width, int idepth, int odepth)
Convert the layout of weights from flat to "chunked". The term chunked is explained below: ...
virtual void FreeDataSpace(Device dev, void *ptr)=0
Free a data space on device.
void release(tvm::runtime::DeviceAPI *device_api, const SDLTensor< N > &tensor)
Definition: conv2d.h:188
SDLTensor< 4 > prepare_hwio(tvm::runtime::DeviceAPI *device_api, const DLTensor *hwio_flat, int num_chunks, void **ptr_table)
constexpr int xyc_to_sm_16b(int y, int x, int c)
Definition: conv2d.h:78
void * to_ptr(uintptr_t v)
Definition: conv2d.h:74
SDLTensor(void *data_ptr, DLDataType data_type, void *data_space, const int64_t *data_dims)
Definition: conv2d.h:38
Definition: c_runtime_api.h:89
uintptr_t hwio_at(const DLTensor &f, int y, int x, int i, int o)
Definition: conv2d.h:113
void blockize_hwc_16b(void *out, void *inp_flat, int height, int width, int depth)
Function to "blockize" the flat input data The term "blockize" is used to mention that the data is st...
uintptr_t to_uint(void *ptr)
Definition: conv2d.h:76
Abstract device memory management API.
Tensor shape(const Tensor &src, DataType dtype, const std::string name="T_shape", const std::string tag=kInjective)
Get the shape of input tensor.
Definition: transform.h:1758
SDLTensor(void *data_ptr, DLDataType data_type, void *data_space, std::initializer_list< int64_t > data_dims)
Definition: conv2d.h:43
uintptr_t nhwc_at(const DLTensor &a, int n, int y, int x, int c)
Definition: conv2d.h:103
void deblockize_hwc_16b(void *out_flat, void *inp, int height, int width, int depth)
Convert back from non-contguous layout to a flat layout.
SDLTensor< 4 > prepare_nhwc(tvm::runtime::DeviceAPI *device_api, const DLTensor *nhwc_flat, bool copy_data)
constexpr int round_up(int v, int p2)
Definition: conv2d.h:97
int calculate_num_weight_chunks(int64_t *shape_hwio)
Definition: conv2d.h:36
constexpr int hwio_to_sm_16b(int width, int y, int x, int i, int o)
Definition: conv2d.h:86