tvm
Classes | Functions
tvm::runtime::hexagon Namespace Reference

Classes

class  SDLTensor
 

Functions

void * to_ptr (uintptr_t v)
 
uintptr_t to_uint (void *ptr)
 
constexpr int xyc_to_sm_16b (int y, int x, int c)
 
constexpr int hwio_to_sm_16b (int width, int y, int x, int i, int o)
 
constexpr int round_up (int v, int p2)
 
uintptr_t nhwc_at (const DLTensor &a, int n, int y, int x, int c)
 
uintptr_t hwio_at (const DLTensor &f, int y, int x, int i, int o)
 
void blockize_hwc_16b (void *out, void *inp_flat, int height, int width, int depth)
 Function to "blockize" the flat input data The term "blockize" is used to mention that the data is stored in non-contiguous blocks. More...
 
void deblockize_hwc_16b (void *out_flat, void *inp, int height, int width, int depth)
 Convert back from non-contguous layout to a flat layout. More...
 
void chunkify_hwio_16b (void **out_ptr, int out_ptr_size, void *out, void *inp, int height, int width, int idepth, int odepth)
 Convert the layout of weights from flat to "chunked". The term chunked is explained below: More...
 
SDLTensor< 4 > prepare_nhwc (tvm::runtime::DeviceAPI *device_api, const DLTensor *nhwc_flat, bool copy_data)
 
int calculate_num_weight_chunks (int64_t *shape_hwio)
 
SDLTensor< 4 > prepare_hwio (tvm::runtime::DeviceAPI *device_api, const DLTensor *hwio_flat, int num_chunks, void **ptr_table)
 
template<size_t N>
void release (tvm::runtime::DeviceAPI *device_api, const SDLTensor< N > &tensor)
 

Function Documentation

◆ blockize_hwc_16b()

void tvm::runtime::hexagon::blockize_hwc_16b ( void *  out,
void *  inp_flat,
int  height,
int  width,
int  depth 
)

Function to "blockize" the flat input data The term "blockize" is used to mention that the data is stored in non-contiguous blocks.

The input is mapped into the below mentioned layout (notation similar to index map used for transform layout):

lambda n, h, w, c: n, h//8, w//4, c//32, AXIS_SEPARATOR, h%8, (w%4)//2, c%32, w%2

where AXIS_SEPARATOR represents split up in the physical layout

Parameters
outPre-allocated output memory pointer
inp_flatFlat input data pointer
height
width
depth

◆ calculate_num_weight_chunks()

int tvm::runtime::hexagon::calculate_num_weight_chunks ( int64_t *  shape_hwio)

◆ chunkify_hwio_16b()

void tvm::runtime::hexagon::chunkify_hwio_16b ( void **  out_ptr,
int  out_ptr_size,
void *  out,
void *  inp,
int  height,
int  width,
int  idepth,
int  odepth 
)

Convert the layout of weights from flat to "chunked". The term chunked is explained below:

Weights are packed into the below mentioned layout (notation similar to index map): Since weights cannot be exactly represented into a index map notation, the base split up is mentioned below with a few gotchas

lambda h, w, i, o: h//8, w//4, o//32, i//32, h%8, w%4, (i%32)//2, o%32, i%2

The gotchas are:

  • (w%4) is actually stored in the right to left order, as in 3,2,1,0 instead of 0,1,2,3
  • The h%8 and (w%4) dimensions are not padded up, leading to chunks of different sizes (thereby the name "chunked" instead of packed)
  • The thinnest chunk of width is stored first. For example, if a kernel is 5x5, the first chunk along the width has size 1 (representing index 0) and then next one has size 4 representing indices (1,2,3,4)
Parameters
out_ptrBase pointer table to be filled with the list of pointers to the first addresses of the "chunked" weights
out_ptr_sizeThe number of chunks
outPointer to pre-allocated output memory
inpPointer to flat input data
height
width
idepth
odepth

◆ deblockize_hwc_16b()

void tvm::runtime::hexagon::deblockize_hwc_16b ( void *  out_flat,
void *  inp,
int  height,
int  width,
int  depth 
)

Convert back from non-contguous layout to a flat layout.

Parameters
out_flatPre-allocated output memory pointer
inpBlockized input data pointer
height
width
depth

◆ hwio_at()

uintptr_t tvm::runtime::hexagon::hwio_at ( const DLTensor &  f,
int  y,
int  x,
int  i,
int  o 
)
inline

◆ hwio_to_sm_16b()

constexpr int tvm::runtime::hexagon::hwio_to_sm_16b ( int  width,
int  y,
int  x,
int  i,
int  o 
)

◆ nhwc_at()

uintptr_t tvm::runtime::hexagon::nhwc_at ( const DLTensor &  a,
int  n,
int  y,
int  x,
int  c 
)
inline

◆ prepare_hwio()

SDLTensor<4> tvm::runtime::hexagon::prepare_hwio ( tvm::runtime::DeviceAPI device_api,
const DLTensor *  hwio_flat,
int  num_chunks,
void **  ptr_table 
)

◆ prepare_nhwc()

SDLTensor<4> tvm::runtime::hexagon::prepare_nhwc ( tvm::runtime::DeviceAPI device_api,
const DLTensor *  nhwc_flat,
bool  copy_data 
)

◆ release()

template<size_t N>
void tvm::runtime::hexagon::release ( tvm::runtime::DeviceAPI device_api,
const SDLTensor< N > &  tensor 
)

◆ round_up()

constexpr int tvm::runtime::hexagon::round_up ( int  v,
int  p2 
)
inline

◆ to_ptr()

void* tvm::runtime::hexagon::to_ptr ( uintptr_t  v)
inline

◆ to_uint()

uintptr_t tvm::runtime::hexagon::to_uint ( void *  ptr)
inline

◆ xyc_to_sm_16b()

constexpr int tvm::runtime::hexagon::xyc_to_sm_16b ( int  y,
int  x,
int  c 
)