|
void * | to_ptr (uintptr_t v) |
|
uintptr_t | to_uint (void *ptr) |
|
constexpr int | xyc_to_sm_16b (int y, int x, int c) |
|
constexpr int | hwio_to_sm_16b (int width, int y, int x, int i, int o) |
|
constexpr int | round_up (int v, int p2) |
|
uintptr_t | nhwc_at (const DLTensor &a, int n, int y, int x, int c) |
|
uintptr_t | hwio_at (const DLTensor &f, int y, int x, int i, int o) |
|
void | blockize_hwc_16b (void *out, void *inp_flat, int height, int width, int depth) |
| Function to "blockize" the flat input data The term "blockize" is used to mention that the data is stored in non-contiguous blocks. More...
|
|
void | deblockize_hwc_16b (void *out_flat, void *inp, int height, int width, int depth) |
| Convert back from non-contguous layout to a flat layout. More...
|
|
void | chunkify_hwio_16b (void **out_ptr, int out_ptr_size, void *out, void *inp, int height, int width, int idepth, int odepth) |
| Convert the layout of weights from flat to "chunked". The term chunked is explained below: More...
|
|
SDLTensor< 4 > | prepare_nhwc (tvm::runtime::DeviceAPI *device_api, const DLTensor *nhwc_flat, bool copy_data) |
|
int | calculate_num_weight_chunks (int64_t *shape_hwio) |
|
SDLTensor< 4 > | prepare_hwio (tvm::runtime::DeviceAPI *device_api, const DLTensor *hwio_flat, int num_chunks, void **ptr_table) |
|
template<size_t N> |
void | release (tvm::runtime::DeviceAPI *device_api, const SDLTensor< N > &tensor) |
|
void tvm::runtime::hexagon::blockize_hwc_16b |
( |
void * |
out, |
|
|
void * |
inp_flat, |
|
|
int |
height, |
|
|
int |
width, |
|
|
int |
depth |
|
) |
| |
Function to "blockize" the flat input data The term "blockize" is used to mention that the data is stored in non-contiguous blocks.
The input is mapped into the below mentioned layout (notation similar to index map used for transform layout):
lambda n, h, w, c: n, h//8, w//4, c//32, AXIS_SEPARATOR, h%8, (w%4)//2, c%32, w%2
where AXIS_SEPARATOR represents split up in the physical layout
- Parameters
-
out | Pre-allocated output memory pointer |
inp_flat | Flat input data pointer |
height | |
width | |
depth | |
void tvm::runtime::hexagon::chunkify_hwio_16b |
( |
void ** |
out_ptr, |
|
|
int |
out_ptr_size, |
|
|
void * |
out, |
|
|
void * |
inp, |
|
|
int |
height, |
|
|
int |
width, |
|
|
int |
idepth, |
|
|
int |
odepth |
|
) |
| |
Convert the layout of weights from flat to "chunked". The term chunked is explained below:
Weights are packed into the below mentioned layout (notation similar to index map): Since weights cannot be exactly represented into a index map notation, the base split up is mentioned below with a few gotchas
lambda h, w, i, o: h//8, w//4, o//32, i//32, h%8, w%4, (i%32)//2, o%32, i%2
The gotchas are:
- (w%4) is actually stored in the right to left order, as in 3,2,1,0 instead of 0,1,2,3
- The h%8 and (w%4) dimensions are not padded up, leading to chunks of different sizes (thereby the name "chunked" instead of packed)
- The thinnest chunk of width is stored first. For example, if a kernel is 5x5, the first chunk along the width has size 1 (representing index 0) and then next one has size 4 representing indices (1,2,3,4)
- Parameters
-
out_ptr | Base pointer table to be filled with the list of pointers to the first addresses of the "chunked" weights |
out_ptr_size | The number of chunks |
out | Pointer to pre-allocated output memory |
inp | Pointer to flat input data |
height | |
width | |
idepth | |
odepth | |