tvm
|
PrimFunc specific attribute names. More...
Functions | |
bool | IsPragmaKey (const std::string &attr_key) |
Check if attr_key is a pragma key extension. More... | |
Variables | |
constexpr const char * | kKernelLaunchParams = "tir.kernel_launch_params" |
List of thread IterVar that a DeviceLaunch function corresponds to. More... | |
constexpr const char * | kNoAlias = "tir.noalias" |
Whether to set noalias rule on the function arguments. More... | |
constexpr const char * | kIsEntryFunc = "tir.is_entry_func" |
Mark the function as the entry function of the final generated runtime module. More... | |
constexpr const char * | kIsGlobalFunc = "tir.is_global_func" |
Mark the function as the global function called from the host. More... | |
constexpr const char * | kIsHostFunc = "tir.is_host_func" |
Mark the function as run on the host, mutually exclusive with kTarget. More... | |
constexpr const char * | thread_extent = "thread_extent" |
Mark launching extent of thread, used by device API. More... | |
constexpr const char * | virtual_thread = "virtual_thread" |
Mark launching of a virtual thread. More... | |
constexpr const char * | coproc_scope = "coproc_scope" |
Mark region is processed by a co-proccesor. More... | |
constexpr const char * | coproc_uop_scope = "coproc_uop_scope" |
Mark region creates coprocessor micro ops, can be reused if corresponding variable is independent. More... | |
constexpr const char * | volatile_scope = "volatile_scope" |
Mark the scope as volatile access for certain handle. More... | |
constexpr const char * | extern_scope = "extern_scope" |
Mark the scope as generated by extern primitive. such scope can contain arbitrary ir program and we need to be careful when make certain assumptions about the structure of the program. More... | |
constexpr const char * | compute_scope = "compute_scope" |
Mark the scope as when computation start to happen This can hint some code generator to create a new function for compute. More... | |
constexpr const char * | storage_alignment = "storage_alignment" |
Mark storage alignment requirement of buffers. More... | |
constexpr const char * | realize_scope = "realize_scope" |
Mark storage scope of realization. More... | |
constexpr const char * | device_id = "device_id" |
The allocation device for global malloc in host. More... | |
constexpr const char * | device_type = "device_type" |
The device type. More... | |
constexpr const char * | loop_scope = "loop_scope" |
Mark of loop scope. More... | |
constexpr const char * | reduce_scope = "reduce_scope" |
Mark of reduce scope. More... | |
constexpr const char * | pragma_auto_unroll_max_step = "pragma_auto_unroll_max_step" |
Pragma: auto-unroll, max_step. More... | |
constexpr const char * | pragma_unroll_explicit = "pragma_unroll_explicit" |
Pragma: unroll explicit. More... | |
constexpr const char * | pragma_scope_prefix = "pragma_" |
Mark region is guarded by the pragma extension. More... | |
constexpr const char * | pragma_import_c = "pragma_import_c" |
Import C source or file into the final code gen module. More... | |
constexpr const char * | pragma_import_llvm = "pragma_import_llvm" |
Import llvm source or file into the final code gen module. More... | |
constexpr const char * | pragma_tensor_core = "pragma_tensor_core" |
Try to modify the AST to support Tensor Core. More... | |
constexpr const char * | prefetch_scope = "prefetch_scope" |
Mark of prefetch scope, value=offset, run prefetch of Tensor on the current loop scope. More... | |
constexpr const char * | layout_transforms = "layout_transforms" |
Marks the layout transforms to be used for a tensor. More... | |
constexpr const char * | axis_separators = "axis_separators" |
Marks the physical axis separators. More... | |
constexpr const char * | double_buffer_scope = "double_buffer_scope" |
Marks production of double buffer data. More... | |
constexpr const char * | double_buffer_write = "double_buffer_write" |
Marks region used by double buffer write. More... | |
constexpr const char * | rolling_buffer_scope = "rolling_buffer_scope" |
Mark realization for rolling buffer optimization. More... | |
constexpr const char * | scan_update_scope = "scan_update_scope" |
Mark of scan update scope. More... | |
constexpr const char * | scan_init_scope = "scan_init_scope" |
Mark of scan init scope. More... | |
constexpr const char * | buffer_dim_align = "buffer_dim_align" |
Mark alignment of buffer dimension stmt.node is Tensor stmt.value is tvm_tuple(dim, align, offset) This gives hint to require stride of dim to be k * align + offset. More... | |
constexpr const char * | buffer_bound = "buffer_bound" |
Mark stores/loads with theirs bounds. More... | |
constexpr const char * | buffer_bind_scope = "buffer_bind_scope" |
Bind the buffer specification to the region of the op When this scope occurs, the stmt.node is a Array<NodeRef> = [buffer, tensor] stmt.value is a tvm_tuple(min0, extent0, min1, extent1, ...). The scope represents that we need to bind the storage region of tensor to buffer. This will affect replacement of some variables inside the scope that corresponds to field of buffer to be the actual expressions of tensor during storage flattening phase. More... | |
constexpr const char * | channel_read_scope = "channel_read_scope" |
channel read scope More... | |
constexpr const char * | channel_read_advance = "channel_read_advance" |
Advance step of channel after end of scope. More... | |
constexpr const char * | channel_write_scope = "channel_write_scope" |
channel write scope More... | |
constexpr const char * | channel_write_advance = "channel_write_advance" |
Advance step of channel after end of scope. More... | |
constexpr const char * | pipeline_stage_scope = "pipeline_stage_scope" |
pipeline stage scope, implies always execution More... | |
constexpr const char * | pipeline_exec_scope = "pipeline_exec_scope" |
pipeline execution scope, implies the scope can be pipelined. More... | |
constexpr const char * | device_scope = "device_scope" |
Mark that it is in the device scope. More... | |
constexpr const char * | async_scope = "async_scope" |
Mark that the attached statement runs asynchronously. More... | |
constexpr const char * | async_commit_queue_scope = "async_commit_queue_scope" |
Annotations for invoking and synchronizing asynchronous operations. More... | |
constexpr const char * | async_wait_queue_scope = "async_wait_queue_scope" |
constexpr const char * | async_wait_inflight_count = "async_wait_inflight_count" |
constexpr const char * | fragment_shape = "fragment_shape" |
Mark that the shape of TensorCore fragment. More... | |
constexpr const char * | fragment_layout = "fragment_layout" |
Mark that the layout of TensorCore fragment. More... | |
constexpr const char * | hand_threaded = "hand_threaded" |
Mark that the kernel is hand threaded and doesn't need syncs inserted. More... | |
constexpr const char * | script_parsing_detect_access = "tir.script_parsing_detect_access" |
Mark whether the script-completer need to fill in missing access region during script parsing. More... | |
constexpr const char * | pragma_loop_partition_hint = "pragma_loop_partition_hint" |
Mark that the loop should be partitioned. More... | |
constexpr const char * | software_pipeline_stage = "software_pipeline_stage" |
Mark the stage of a statement in the software pipeline. More... | |
constexpr const char * | software_pipeline_order = "software_pipeline_order" |
Mark the order of a statement in the software pipeline. More... | |
constexpr const char * | software_pipeline_async_stages = "software_pipeline_async_stages" |
List stages in the software pipeline that should run asynchronously. More... | |
constexpr const char * | layout_free_buffers = "layout_free_buffers" |
Mark the buffers which is const access and can be transformed layout. More... | |
constexpr const char * | manifest_shared_memory_local_stage = "tir.manifest_shared_memory_local_stage" |
Mark the local stage for the shared memory access should be added. More... | |
constexpr const char * | meta_schedule_tiling_structure = "meta_schedule.tiling_structure" |
Mark the tiling structure of blocks that are applied by rule Multi-Level-Tiling. More... | |
constexpr const char * | meta_schedule_cooperative_fetch = "meta_schedule.cooperative_fetch" |
Mark that the loop should be further skip and bound to environment threads to enable cooperative fetching. More... | |
constexpr const char * | meta_schedule_thread_extent_low_inclusive |
The allowed range of thread extent in thread bindings. More... | |
constexpr const char * | meta_schedule_thread_extent_high_inclusive |
The allowed range of thread extent in thread bindings. More... | |
constexpr const char * | meta_schedule_random_compute_producer |
Mark the block whose producer needs to be applied by rule Random-Compute-Location. More... | |
constexpr const char * | meta_schedule_parallel = "meta_schedule.parallel" |
Mark auto-parallel setting on the block. More... | |
constexpr const char * | meta_schedule_vectorize = "meta_schedule.vectorize" |
Mark auto-vectorize setting on the block. More... | |
constexpr const char * | meta_schedule_unroll_explicit = "meta_schedule.unroll_explicit" |
Mark auto-unroll setting on the block. More... | |
constexpr const char * | meta_schedule_unroll_implicit = "meta_schedule.unroll_implicit" |
Mark auto-unroll setting on the block. More... | |
constexpr const char * | meta_schedule_auto_tensorize = "meta_schedule.auto_tensorize" |
Mark that a block should be further rewritten using tensorization. More... | |
constexpr const char * | meta_schedule_layout_rewrite_preproc = "meta_schedule.layout_rewrite_preproc" |
Mark that a block is a preprocessor block for layout rewrite. More... | |
constexpr const char * | meta_schedule_auto_tensorize_init = "meta_schedule.auto_tensorize_init" |
Mark that the init statement of a block should be further rewritten using tensorization. More... | |
constexpr const char * | require_block_var_bound_predicate = "require_bound_predicate" |
Mark that the block need to add predicate for block var bounds during lowering. More... | |
constexpr const char * | meta_schedule_tensor_core_enabled = "meta_schedule.tensor_core_enabled" |
Mark that tensor core is enabled in the PrimExpr. More... | |
constexpr const char * | meta_schedule_cache_type = "meta_schedule.cache_type" |
Mark a block as generated by cache_read or cache_write block. 0 means cache_read; 1 means cache_write. More... | |
constexpr const int | meta_schedule_cache_type_read = 0 |
constexpr const int | meta_schedule_cache_type_write = 1 |
constexpr const char * | auto_copy = "auto_copy" |
Mark auto copy for memhammer. More... | |
constexpr const char * | local_stage = "local_stage" |
Mark local stage constraint on data copy. More... | |
constexpr const char * | vector_bytes = "vector_bytes" |
Mark vectorization length constraint on block. More... | |
constexpr const char * | warp_execution = "warp_execution" |
Mark that a block is executed by a warp. This implies the extend of threadIdx.x is warp size. More... | |
constexpr const char * | meta_schedule_inline_rule = "meta_schedule.inline_rule" |
Mark that a block is disallowed in auto inline. More... | |
PrimFunc specific attribute names.
namespace of possible attributes in AttrStmt.attr_key
|
inline |
Check if attr_key is a pragma key extension.
attr_key | The attr key to be compared |
constexpr const char* tvm::tir::attr::async_commit_queue_scope = "async_commit_queue_scope" |
Annotations for invoking and synchronizing asynchronous operations.
Synchronization is done in terms of "queue": It is an abstract entity associated with each asynchronous unit, and it tracks invocations and completions of asynchronous operations in the FIFO order.
Similarly to PTX instructions commit_group and wait_group, these annotations express synchronization by "counting":
async_commit_queue(i): Group one or more invocations of async operations in the given scope, and "commit" (or push) them to the queue i. A group of operations committed together is awaited as one chunk. Groups committed to the same queue complete in the FIFO order.
async_wait_queue(i, N): Block until only N most recent committed groups are still in-flight at the queue i. N does not have to be a constant, but some backends may require a constant count.
constexpr const char* tvm::tir::attr::async_scope = "async_scope" |
Mark that the attached statement runs asynchronously.
constexpr const char* tvm::tir::attr::async_wait_inflight_count = "async_wait_inflight_count" |
constexpr const char* tvm::tir::attr::async_wait_queue_scope = "async_wait_queue_scope" |
constexpr const char* tvm::tir::attr::auto_copy = "auto_copy" |
Mark auto copy for memhammer.
constexpr const char* tvm::tir::attr::axis_separators = "axis_separators" |
Marks the physical axis separators.
Only applies to a DataProducer, as it should be made part of the Buffer definition in a PrimFunc. See BufferNode::axis_separators
for more details.
constexpr const char* tvm::tir::attr::buffer_bind_scope = "buffer_bind_scope" |
Bind the buffer specification to the region of the op When this scope occurs, the stmt.node is a Array<NodeRef> = [buffer, tensor] stmt.value is a tvm_tuple(min0, extent0, min1, extent1, ...). The scope represents that we need to bind the storage region of tensor to buffer. This will affect replacement of some variables inside the scope that corresponds to field of buffer to be the actual expressions of tensor during storage flattening phase.
constexpr const char* tvm::tir::attr::buffer_bound = "buffer_bound" |
Mark stores/loads with theirs bounds.
constexpr const char* tvm::tir::attr::buffer_dim_align = "buffer_dim_align" |
Mark alignment of buffer dimension stmt.node is Tensor stmt.value is tvm_tuple(dim, align, offset) This gives hint to require stride of dim to be k * align + offset.
constexpr const char* tvm::tir::attr::channel_read_advance = "channel_read_advance" |
Advance step of channel after end of scope.
constexpr const char* tvm::tir::attr::channel_read_scope = "channel_read_scope" |
channel read scope
constexpr const char* tvm::tir::attr::channel_write_advance = "channel_write_advance" |
Advance step of channel after end of scope.
constexpr const char* tvm::tir::attr::channel_write_scope = "channel_write_scope" |
channel write scope
constexpr const char* tvm::tir::attr::compute_scope = "compute_scope" |
Mark the scope as when computation start to happen This can hint some code generator to create a new function for compute.
constexpr const char* tvm::tir::attr::coproc_scope = "coproc_scope" |
Mark region is processed by a co-proccesor.
constexpr const char* tvm::tir::attr::coproc_uop_scope = "coproc_uop_scope" |
Mark region creates coprocessor micro ops, can be reused if corresponding variable is independent.
constexpr const char* tvm::tir::attr::device_id = "device_id" |
The allocation device for global malloc in host.
constexpr const char* tvm::tir::attr::device_scope = "device_scope" |
Mark that it is in the device scope.
constexpr const char* tvm::tir::attr::device_type = "device_type" |
The device type.
constexpr const char* tvm::tir::attr::double_buffer_scope = "double_buffer_scope" |
Marks production of double buffer data.
constexpr const char* tvm::tir::attr::double_buffer_write = "double_buffer_write" |
Marks region used by double buffer write.
constexpr const char* tvm::tir::attr::extern_scope = "extern_scope" |
Mark the scope as generated by extern primitive. such scope can contain arbitrary ir program and we need to be careful when make certain assumptions about the structure of the program.
constexpr const char* tvm::tir::attr::fragment_layout = "fragment_layout" |
Mark that the layout of TensorCore fragment.
constexpr const char* tvm::tir::attr::fragment_shape = "fragment_shape" |
Mark that the shape of TensorCore fragment.
constexpr const char* tvm::tir::attr::hand_threaded = "hand_threaded" |
Mark that the kernel is hand threaded and doesn't need syncs inserted.
constexpr const char* tvm::tir::attr::kIsEntryFunc = "tir.is_entry_func" |
constexpr const char* tvm::tir::attr::kIsGlobalFunc = "tir.is_global_func" |
constexpr const char* tvm::tir::attr::kIsHostFunc = "tir.is_host_func" |
constexpr const char* tvm::tir::attr::kKernelLaunchParams = "tir.kernel_launch_params" |
List of thread IterVar that a DeviceLaunch function corresponds to.
Type: Array<String>
We call a device kernel launch function f using the following convention:
Call(f, [arg1, arg2, ..., arg_n, work_size_1, work_size_2, ... work_size_m, dyn_shmem_size])
Here n = len(arg), m = len(work_size) = len(launch_params)-1.
The list of kernel launch params indicates which additional parameters will be provided to the PackedFunc by the calling scope.
"threadIdx.x", "threadIdx.y", "threadIdx.z"
The extent of the thread count in x/y/z, to be used when launching the compute kernel on the device. For example, the gridDimX/Y/Z parameters passed to cuLaunchKernel when launching a CUDA kernel, or the groupCountX/Y/Z parameters passed to vkCmdDispatch when dispatching a compute pipeline to Vulkan.
"blockIdx.x", "blockIdx.y", "blockIdx.z"
The extent of the block iterators, to be used when launching the compute kernel on the device. For example, the blockDimX/Y/Z parameters passed to cuLaunchKernel when launching a CUDA kernel. For runtimes that do not require the block to be provided externally, this parameter is ignored. For example, the spv::ExecutionModeLocalSize for SPIR-V shaders on Vulkan, where this parameter is defined in the shader.
tvm::runtime::launch_param::kUseDynamicSharedMemoryTag
The size of the shared memory that may be allocated internally by the kernel. For example, exposed as the CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES attribute in cuda.
Defined as "tir.use_dyn_shared_memory".
constexpr const char* tvm::tir::attr::kNoAlias = "tir.noalias" |
constexpr const char* tvm::tir::attr::layout_free_buffers = "layout_free_buffers" |
Mark the buffers which is const access and can be transformed layout.
constexpr const char* tvm::tir::attr::layout_transforms = "layout_transforms" |
Marks the layout transforms to be used for a tensor.
Only applies to a DataProducer, as it should be made part of the PrimFunc attributes for TIR.
constexpr const char* tvm::tir::attr::local_stage = "local_stage" |
Mark local stage constraint on data copy.
constexpr const char* tvm::tir::attr::loop_scope = "loop_scope" |
Mark of loop scope.
constexpr const char* tvm::tir::attr::manifest_shared_memory_local_stage = "tir.manifest_shared_memory_local_stage" |
Mark the local stage for the shared memory access should be added.
constexpr const char* tvm::tir::attr::meta_schedule_auto_tensorize = "meta_schedule.auto_tensorize" |
Mark that a block should be further rewritten using tensorization.
constexpr const char* tvm::tir::attr::meta_schedule_auto_tensorize_init = "meta_schedule.auto_tensorize_init" |
Mark that the init statement of a block should be further rewritten using tensorization.
constexpr const char* tvm::tir::attr::meta_schedule_cache_type = "meta_schedule.cache_type" |
Mark a block as generated by cache_read or cache_write block. 0 means cache_read; 1 means cache_write.
constexpr const int tvm::tir::attr::meta_schedule_cache_type_read = 0 |
constexpr const int tvm::tir::attr::meta_schedule_cache_type_write = 1 |
constexpr const char* tvm::tir::attr::meta_schedule_cooperative_fetch = "meta_schedule.cooperative_fetch" |
Mark that the loop should be further skip and bound to environment threads to enable cooperative fetching.
constexpr const char* tvm::tir::attr::meta_schedule_inline_rule = "meta_schedule.inline_rule" |
Mark that a block is disallowed in auto inline.
constexpr const char* tvm::tir::attr::meta_schedule_layout_rewrite_preproc = "meta_schedule.layout_rewrite_preproc" |
Mark that a block is a preprocessor block for layout rewrite.
constexpr const char* tvm::tir::attr::meta_schedule_parallel = "meta_schedule.parallel" |
Mark auto-parallel setting on the block.
constexpr const char* tvm::tir::attr::meta_schedule_random_compute_producer |
Mark the block whose producer needs to be applied by rule Random-Compute-Location.
constexpr const char* tvm::tir::attr::meta_schedule_tensor_core_enabled = "meta_schedule.tensor_core_enabled" |
Mark that tensor core is enabled in the PrimExpr.
constexpr const char* tvm::tir::attr::meta_schedule_thread_extent_high_inclusive |
The allowed range of thread extent in thread bindings.
constexpr const char* tvm::tir::attr::meta_schedule_thread_extent_low_inclusive |
The allowed range of thread extent in thread bindings.
constexpr const char* tvm::tir::attr::meta_schedule_tiling_structure = "meta_schedule.tiling_structure" |
Mark the tiling structure of blocks that are applied by rule Multi-Level-Tiling.
constexpr const char* tvm::tir::attr::meta_schedule_unroll_explicit = "meta_schedule.unroll_explicit" |
Mark auto-unroll setting on the block.
constexpr const char* tvm::tir::attr::meta_schedule_unroll_implicit = "meta_schedule.unroll_implicit" |
Mark auto-unroll setting on the block.
constexpr const char* tvm::tir::attr::meta_schedule_vectorize = "meta_schedule.vectorize" |
Mark auto-vectorize setting on the block.
constexpr const char* tvm::tir::attr::pipeline_exec_scope = "pipeline_exec_scope" |
pipeline execution scope, implies the scope can be pipelined.
constexpr const char* tvm::tir::attr::pipeline_stage_scope = "pipeline_stage_scope" |
pipeline stage scope, implies always execution
constexpr const char* tvm::tir::attr::pragma_auto_unroll_max_step = "pragma_auto_unroll_max_step" |
Pragma: auto-unroll, max_step.
constexpr const char* tvm::tir::attr::pragma_import_c = "pragma_import_c" |
Import C source or file into the final code gen module.
constexpr const char* tvm::tir::attr::pragma_import_llvm = "pragma_import_llvm" |
Import llvm source or file into the final code gen module.
constexpr const char* tvm::tir::attr::pragma_loop_partition_hint = "pragma_loop_partition_hint" |
Mark that the loop should be partitioned.
constexpr const char* tvm::tir::attr::pragma_scope_prefix = "pragma_" |
Mark region is guarded by the pragma extension.
constexpr const char* tvm::tir::attr::pragma_tensor_core = "pragma_tensor_core" |
Try to modify the AST to support Tensor Core.
constexpr const char* tvm::tir::attr::pragma_unroll_explicit = "pragma_unroll_explicit" |
Pragma: unroll explicit.
constexpr const char* tvm::tir::attr::prefetch_scope = "prefetch_scope" |
Mark of prefetch scope, value=offset, run prefetch of Tensor on the current loop scope.
constexpr const char* tvm::tir::attr::realize_scope = "realize_scope" |
Mark storage scope of realization.
constexpr const char* tvm::tir::attr::reduce_scope = "reduce_scope" |
Mark of reduce scope.
constexpr const char* tvm::tir::attr::require_block_var_bound_predicate = "require_bound_predicate" |
Mark that the block need to add predicate for block var bounds during lowering.
constexpr const char* tvm::tir::attr::rolling_buffer_scope = "rolling_buffer_scope" |
Mark realization for rolling buffer optimization.
constexpr const char* tvm::tir::attr::scan_init_scope = "scan_init_scope" |
Mark of scan init scope.
constexpr const char* tvm::tir::attr::scan_update_scope = "scan_update_scope" |
Mark of scan update scope.
constexpr const char* tvm::tir::attr::script_parsing_detect_access = "tir.script_parsing_detect_access" |
Mark whether the script-completer need to fill in missing access region during script parsing.
constexpr const char* tvm::tir::attr::software_pipeline_async_stages = "software_pipeline_async_stages" |
List stages in the software pipeline that should run asynchronously.
constexpr const char* tvm::tir::attr::software_pipeline_order = "software_pipeline_order" |
Mark the order of a statement in the software pipeline.
constexpr const char* tvm::tir::attr::software_pipeline_stage = "software_pipeline_stage" |
Mark the stage of a statement in the software pipeline.
constexpr const char* tvm::tir::attr::storage_alignment = "storage_alignment" |
Mark storage alignment requirement of buffers.
constexpr const char* tvm::tir::attr::thread_extent = "thread_extent" |
Mark launching extent of thread, used by device API.
constexpr const char* tvm::tir::attr::vector_bytes = "vector_bytes" |
Mark vectorization length constraint on block.
constexpr const char* tvm::tir::attr::virtual_thread = "virtual_thread" |
Mark launching of a virtual thread.
constexpr const char* tvm::tir::attr::volatile_scope = "volatile_scope" |
Mark the scope as volatile access for certain handle.
constexpr const char* tvm::tir::attr::warp_execution = "warp_execution" |
Mark that a block is executed by a warp. This implies the extend of threadIdx.x is warp size.