Classes
class	SDLTensor

Functions
void *	to_ptr (uintptr_t v)

uintptr_t	to_uint (void *ptr)

constexpr int	xyc_to_sm_16b (int y, int x, int c)

constexpr int	hwio_to_sm_16b (int width, int y, int x, int i, int o)

constexpr int	round_up (int v, int p2)

uintptr_t	nhwc_at (const DLTensor &a, int n, int y, int x, int c)

uintptr_t	hwio_at (const DLTensor &f, int y, int x, int i, int o)

void	blockize_hwc_16b (void out, void inp_flat, int height, int width, int depth)
	Function to "blockize" the flat input data The term "blockize" is used to mention that the data is stored in non-contiguous blocks. More...

void	deblockize_hwc_16b (void out_flat, void inp, int height, int width, int depth)
	Convert back from non-contguous layout to a flat layout. More...

void	chunkify_hwio_16b (void *out_ptr, int out_ptr_size, void out, void *inp, int height, int width, int idepth, int odepth)
	Convert the layout of weights from flat to "chunked". The term chunked is explained below: More...

SDLTensor< 4 >	prepare_nhwc (tvm::runtime::DeviceAPI device_api, const DLTensor nhwc_flat, bool copy_data)

int	calculate_num_weight_chunks (int64_t *shape_hwio)

SDLTensor< 4 >	prepare_hwio (tvm::runtime::DeviceAPI device_api, const DLTensor hwio_flat, int num_chunks, void **ptr_table)

template<size_t N>
void	release (tvm::runtime::DeviceAPI *device_api, const SDLTensor< N > &tensor)

Function Documentation

◆ blockize_hwc_16b()

void tvm::runtime::hexagon::blockize_hwc_16b	(	void *	out,
		void *	inp_flat,
		int	height,
		int	width,
		int	depth
	)

Function to "blockize" the flat input data The term "blockize" is used to mention that the data is stored in non-contiguous blocks.

The input is mapped into the below mentioned layout (notation similar to index map used for transform layout):

lambda n, h, w, c: n, h//8, w//4, c//32, AXIS_SEPARATOR, h%8, (w%4)//2, c%32, w%2

where AXIS_SEPARATOR represents split up in the physical layout

Parameters

out	Pre-allocated output memory pointer
inp_flat	Flat input data pointer
height
width
depth

◆ calculate_num_weight_chunks()

int tvm::runtime::hexagon::calculate_num_weight_chunks ( int64_t * shape_hwio )

◆ chunkify_hwio_16b()

void tvm::runtime::hexagon::chunkify_hwio_16b	(	void **	out_ptr,
		int	out_ptr_size,
		void *	out,
		void *	inp,
		int	height,
		int	width,
		int	idepth,
		int	odepth
	)

Convert the layout of weights from flat to "chunked". The term chunked is explained below:

Weights are packed into the below mentioned layout (notation similar to index map): Since weights cannot be exactly represented into a index map notation, the base split up is mentioned below with a few gotchas

lambda h, w, i, o: h//8, w//4, o//32, i//32, h%8, w%4, (i%32)//2, o%32, i%2

The gotchas are:

(w%4) is actually stored in the right to left order, as in 3,2,1,0 instead of 0,1,2,3
The h%8 and (w%4) dimensions are not padded up, leading to chunks of different sizes (thereby the name "chunked" instead of packed)
The thinnest chunk of width is stored first. For example, if a kernel is 5x5, the first chunk along the width has size 1 (representing index 0) and then next one has size 4 representing indices (1,2,3,4)

Parameters

out_ptr	Base pointer table to be filled with the list of pointers to the first addresses of the "chunked" weights
out_ptr_size	The number of chunks
out	Pointer to pre-allocated output memory
inp	Pointer to flat input data
height
width
idepth
odepth

◆ deblockize_hwc_16b()

void tvm::runtime::hexagon::deblockize_hwc_16b	(	void *	out_flat,
		void *	inp,
		int	height,
		int	width,
		int	depth
	)

Convert back from non-contguous layout to a flat layout.

Parameters

out_flat	Pre-allocated output memory pointer
inp	Blockized input data pointer
height
width
depth

◆ hwio_at()

uintptr_t tvm::runtime::hexagon::hwio_at	(	const DLTensor &	f,
		int	y,
		int	x,
		int	i,
		int	o
	)

inline

◆ hwio_to_sm_16b()

constexpr int tvm::runtime::hexagon::hwio_to_sm_16b	(	int	width,
		int	y,
		int	x,
		int	i,
		int	o
	)

◆ nhwc_at()

uintptr_t tvm::runtime::hexagon::nhwc_at	(	const DLTensor &	a,
		int	n,
		int	y,
		int	x,
		int	c
	)

inline

◆ prepare_hwio()

SDLTensor<4> tvm::runtime::hexagon::prepare_hwio	(	tvm::runtime::DeviceAPI *	device_api,
		const DLTensor *	hwio_flat,
		int	num_chunks,
		void **	ptr_table
	)

◆ prepare_nhwc()

SDLTensor<4> tvm::runtime::hexagon::prepare_nhwc	(	tvm::runtime::DeviceAPI *	device_api,
		const DLTensor *	nhwc_flat,
		bool	copy_data
	)

◆ release()

template<size_t N>

void tvm::runtime::hexagon::release	(	tvm::runtime::DeviceAPI *	device_api,
		const SDLTensor< N > &	tensor
	)

◆ round_up()

constexpr int tvm::runtime::hexagon::round_up	(	int	v,
		int	p2
	)

inline

◆ to_ptr()

void* tvm::runtime::hexagon::to_ptr ( uintptr_t v )

inline

◆ to_uint()

uintptr_t tvm::runtime::hexagon::to_uint ( void * ptr )

inline

◆ xyc_to_sm_16b()

constexpr int tvm::runtime::hexagon::xyc_to_sm_16b	(	int	y,
		int	x,
		int	c
	)

Classes

Functions

Function Documentation

◆ blockize_hwc_16b()

◆ calculate_num_weight_chunks()

◆ chunkify_hwio_16b()

◆ deblockize_hwc_16b()

◆ hwio_at()

◆ hwio_to_sm_16b()

◆ nhwc_at()

◆ prepare_hwio()

◆ prepare_nhwc()

◆ release()

◆ round_up()

◆ to_ptr()

◆ to_uint()

◆ xyc_to_sm_16b()