33 #ifndef TVM_TIR_BUILTIN_H_ 34 #define TVM_TIR_BUILTIN_H_ 47 TVM_DLL
const Op&
ret();
156 TVM_DLL
const Op&
fma();
745 #endif // TVM_TIR_BUILTIN_H_ Definition: builtin.h:735
Definition: builtin.h:730
Definition: builtin.h:726
const Op & tvm_storage_sync()
See pseudo code.
const Op & tvm_stack_alloca()
See pesudo code.
const Op & tvm_thread_allreduce()
See pesudo code.
const Op & tvm_call_cpacked()
See pesudo code.
const Op & tvm_warp_shuffle()
See pseudo code.
const Op & texture2d_store()
Store to texture 2d memory.
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36
const Op & call_llvm_intrin()
Call an LLVM intrinsic with a given intrinsic id and signature from the types of args in the runtime ...
const Op & mma_store()
tvm intrinsic for storing the result of PTX MMA into a destination pointer. For example, if each thread in a warp of size 32 has 4 elements from the result of m16xn8xk16 MMA in its registers, this intrinsic can be used to store the result in a 16x8 region in shared or global memory.
Definition: builtin.h:736
const Op & fma()
Fused multiply add.
const Op & call_llvm_pure_intrin()
Call an LLVM pure intrinsic with a given intrinsic id and signature from the types of args in the run...
const Op & atomic_add()
atomic add instruction, corresponding e.g. to atomicAdd in CUDA
const Op & tvm_global_barrier_kinit()
Initialize the global barrier. Call this at beginning of kernel that need global barrier.
const Op & tvm_call_cpacked_lowered()
Lowered version of call c-packed, the space of value and type codes are explicitly allocated...
const Op & vectorcombine()
Concat two vectors.
const Op & tvm_warp_shuffle_up()
const Op & tvm_call_trace_packed()
See pesudo code.
const Op & tvm_store_matrix_sync()
tvm intrinsic for tensor core store operators.
Definition: builtin.h:729
const Op & call_spirv_pure_glsl450()
Call an SPIRV pure GLSL450 intrinsic.
const Op & tvm_thread_context()
See pesudo code Mark the content as thread local context, can get optimized by only call the call onc...
const Op & address_of()
Returns the address of an element in the buffer (see pseudocode below).
const Op & bitwise_not()
Bitwise not operator.
Definition: builtin.h:734
const Op & q_multiply_shift()
Execute a multiplication between two Q-numbers x and y followed by a right shift s The default roundi...
Primitive operators(builtin intrinsics) and registry for them.
const Op & vectorlow()
Get the low-level half of the vector.
const Op & tvm_mma_sync()
tvm intrinsic for tensor core mma_sync operators.
Definition: builtin.h:732
const Op & reinterpret()
Reinterpret the value using the target type.
const Op & ptx_ldmatrix()
tvm intrinsic for ptx load matrix from shared memory.
const Op & call_pure_extern()
Call an pure extern C function with given name and signature from the types of args in the runtime en...
const Op & texture2d_load()
Load from texture 2d memory.
const Op & popcount()
Popcount.
Definition: builtin.h:733
const Op & tvm_call_packed()
See pesudo code.
Definition: builtin.h:737
const Op & likely()
Marks a condition is likely going to happen.
const Op & tvm_struct_get()
See pesudo code.
Definition: builtin.h:739
const Op & tvm_stack_make_array()
Allocate a NDArray(DLTensor) on stack, return the handle.
const Op & prefetch()
Prefetch a cacheline.
const Op & tvm_throw_last_error()
See pesudo code.
const Op & tvm_load_matrix_sync()
tvm intrinsic for tensor core load operators.
const Op & tvm_access_ptr()
Get head access address with memory access pattern info.
const Op & ptx_mma()
tvm intrinsic for ptx tensor core mma instructions.
const Op & mem_copy()
Copy 1d memory from source to destination Same semantics as memcpy(destination, source, size) Allows for device specific implementations e.g. direct memory access (DMA)
const Op & tvm_stack_make_shape()
Allocate a shape tuple on stack, return the handle.
const Op & if_then_else()
Same as select, used for unsafe memory access.
Definition: builtin.h:740
Managed reference class to OpNode.
Definition: op.h:165
const Op & ptx_cp_async()
tvm intrinsics for ptx async copy from global to shared memory
Definition: builtin.h:728
const Op & ptx_mma_sp()
tvm intrinsic for sparse tensor core ptx instructions.
const Op & tvm_warp_activemask()
const Op & call_extern()
Call an extern C function with given name and signature from the types of args in the runtime environ...
const Op & shift_right()
Right shift.
const Op & tvm_struct_set()
See pesudo code.
const Op & tvm_tuple()
tvm_tuple is not an actual function and cannot codegen. It is used to represent tuple structure in va...
const Op & tvm_bmma_sync()
tvm intrinsic for tensor core bmma_sync operators.
const Op & lookup_param()
See pseudo code Type lookup_param(String param_name) { return __tvm_param__param_name; }...
const Op & bitwise_xor()
Bitwise xor operator.
const Op & tvm_static_handle()
Create a function local static handle that iniitalizes to nullptr. can be used to cache function loca...
const Op & tvm_check_return()
Checks the return value of another call is correct or returns a given value.
TVMStructFieldKind
The kind of structure field info used in intrinsic.
Definition: builtin.h:724
const Op & bitwise_and()
Bitwise and operator.
const Op & isnan()
Check if value is nan.
const Op & ptx_commit_group()
tvm intrinsics for ptx async copy commit and wait.
const Op & tvm_warp_shuffle_down()
const Op & bitwise_or()
Bitwise or operator.
const Op & tvm_fill_fragment()
tvm intrinsic for tensor core fill_fragment operators.
const Op & ret()
Return value.
const Op & vectorhigh()
Get the high level half of the vector.
const Op & tvm_call_packed_lowered()
Lowered version of call packed, the space of value and type codes are explicitly allocated.
const Op & large_uint_imm()
See pesudo code.
const Op & mma_fill()
tvm intrinsic for zero-initalizing an MMA accumulation registor. For example, if each thread in a war...
const Op & tvm_context_id()
Return a unique context id, used for hint of workspace separation. Different context id ganrantees no...
Definition: builtin.h:727
const Op & shift_left()
Left shift.
const Op & nd_mem_alloc_with_scope()
Create an Nd memory allocation with storage scope.
Definition: builtin.h:731
const Op & tvm_call_trace_packed_lowered()
Lowered version of trace intrinsic, the space of value and type codes are explicitly allocated...
const Op & isnullptr()
See pesudo code.
const Op & ptx_wait_group()