api/doxygen/tir_2builtin_8h_source.html

 /*

  * Licensed to the Apache Software Foundation (ASF) under one

  * or more contributor license agreements.  See the NOTICE file

  * distributed with this work for additional information

  * regarding copyright ownership.  The ASF licenses this file

  * to you under the Apache License, Version 2.0 (the

  * "License"); you may not use this file except in compliance

  * with the License.  You may obtain a copy of the License at

  *

  *   http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing,

  * software distributed under the License is distributed on an

  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

  * KIND, either express or implied.  See the License for the

  * specific language governing permissions and limitations

  * under the License.

  */


 #ifndef TVM_TIR_BUILTIN_H_

 #define TVM_TIR_BUILTIN_H_


 #include <tvm/ir/op.h>

 #include <tvm/tir/expr.h>


 namespace tvm {

 namespace tir {


 namespace builtin {

 TVM_DLL const Op& ret();

 TVM_DLL const Op& reinterpret();


 TVM_DLL const Op& likely();


 TVM_DLL const Op& bitwise_and();


 TVM_DLL const Op& bitwise_or();


 TVM_DLL const Op& bitwise_xor();


 TVM_DLL const Op& bitwise_not();


 TVM_DLL const Op& shift_left();


 TVM_DLL const Op& shift_right();


 TVM_DLL const Op& large_uint_imm();


 TVM_DLL const Op& q_multiply_shift();


 TVM_DLL const Op& address_of();


 TVM_DLL const Op& if_then_else();


 TVM_DLL const Op& isnullptr();


 TVM_DLL const Op& isnan();


 TVM_DLL const Op& popcount();


 TVM_DLL const Op& fma();


 TVM_DLL const Op& call_extern();


 TVM_DLL const Op& call_pure_extern();


 TVM_DLL const Op& call_llvm_intrin();


 TVM_DLL const Op& call_llvm_pure_intrin();


 TVM_DLL const Op& call_spirv_pure_glsl450();


 // TODO(tvm-team) revisit the builtins below

 // some of them can simply become ops with special codegen attr.

 TVM_DLL const Op& prefetch();


 TVM_DLL const Op& tvm_access_ptr();


 TVM_DLL const Op& tvm_static_handle();


 TVM_DLL const Op& tvm_context_id();


 TVM_DLL const Op& tvm_tuple();


 TVM_DLL const Op& tvm_struct_get();


 TVM_DLL const Op& tvm_struct_set();


 TVM_DLL const Op& lookup_param();


 TVM_DLL const Op& tvm_throw_last_error();


 TVM_DLL const Op& tvm_stack_alloca();


 TVM_DLL const Op& tvm_stack_make_shape();


 TVM_DLL const Op& tvm_stack_make_array();


 TVM_DLL const Op& tvm_call_packed();


 TVM_DLL const Op& tvm_call_cpacked();


 TVM_DLL const Op& tvm_call_trace_packed();


 TVM_DLL const Op& tvm_check_return();


 TVM_DLL const Op& tvm_thread_context();


 TVM_DLL const Op& tvm_thread_invariant();


 TVM_DLL const Op& tvm_call_packed_lowered();


 TVM_DLL const Op& tvm_call_cpacked_lowered();


 TVM_DLL const Op& tvm_call_trace_packed_lowered();


 TVM_DLL const Op& tvm_storage_sync();


 TVM_DLL const Op& tvm_warp_shuffle();

 TVM_DLL const Op& tvm_warp_shuffle_up();

 TVM_DLL const Op& tvm_warp_shuffle_down();

 TVM_DLL const Op& tvm_warp_activemask();


 TVM_DLL const Op& tvm_global_barrier_kinit();


 TVM_DLL const Op& tvm_thread_allreduce();


 // TODO(tvm-team) TensorCore specific intrinsics should be directly registered under

 //                cuda. namespace and used through op.

 TVM_DLL const Op& tvm_load_matrix_sync();


 TVM_DLL const Op& tvm_mma_sync();


 TVM_DLL const Op& tvm_bmma_sync();


 TVM_DLL const Op& tvm_fill_fragment();


 TVM_DLL const Op& tvm_store_matrix_sync();


 TVM_DLL const Op& ptx_mma();


 TVM_DLL const Op& ptx_ldg32();


 TVM_DLL const Op& ptx_ldg32();


 TVM_DLL const Op& ptx_mma_sp();


 TVM_DLL const Op& ptx_ldmatrix();


 TVM_DLL const Op& ptx_cp_async();


 TVM_DLL const Op& ptx_cp_async_bulk();


 TVM_DLL const Op& ptx_commit_group();

 TVM_DLL const Op& ptx_wait_group();


 TVM_DLL const Op& ptx_cp_async_barrier();


 TVM_DLL const Op& ptx_init_barrier_thread_count();


 TVM_DLL const Op& ptx_arrive_barrier();


 TVM_DLL const Op& ptx_arrive_barrier_expect_tx();


 TVM_DLL const Op& ptx_wait_barrier();


 TVM_DLL const Op& create_barriers();


 TVM_DLL const Op& mma_store();


 TVM_DLL const Op& mma_fill();


 // Metal SimdGroup matrix intrinsics


 TVM_DLL const Op& make_filled_simdgroup_matrix();


 TVM_DLL const Op& simdgroup_load();


 TVM_DLL const Op& simdgroup_store();


 TVM_DLL const Op& simdgroup_multiply_accumulate();


 // TODO(tvm-team) replace the usage of the vector operations by Shuffle.

 TVM_DLL const Op& vectorhigh();


 TVM_DLL const Op& vectorlow();


 TVM_DLL const Op& vectorcombine();


 TVM_DLL const Op& dp4a();


 TVM_DLL const Op& atomic_add();

 TVM_DLL const Op& nd_mem_alloc_with_scope();


 TVM_DLL const Op& texture2d_store();


 TVM_DLL const Op& texture2d_load();


 TVM_DLL const Op& dma_copy();


 TVM_DLL const Op& dma_wait();


 TVM_DLL const Op& dma_start_group();


 TVM_DLL const Op& dma_end_group();


 TVM_DLL const Op& assume();


 TVM_DLL const Op& undef();


 TVM_DLL const Op& start_profile_intrinsic();


 TVM_DLL const Op& end_profile_intrinsic();


 TVM_DLL const Op& anylist_getitem();


 TVM_DLL const Op& anylist_resetitem();


 TVM_DLL const Op& anylist_setitem_call_packed();


 TVM_DLL const Op& anylist_setitem_call_cpacked();


 TVM_DLL const Op& vscale();


 TVM_DLL const Op& get_active_lane_mask();


 enum TVMStructFieldKind : int {

   // array head address

   kArrAddr,

   kArrData,

   kArrShape,

   kArrStrides,

   kArrNDim,

   kArrTypeCode,

   kArrTypeBits,

   kArrTypeLanes,

   kArrByteOffset,

   kArrDeviceId,

   kArrDeviceType,

   kArrKindBound_,

   // TVMValue field

   kTVMValueContent,

   kTVMValueKindBound_

 };

 }  // namespace builtin

 }  // namespace tir

 }  // namespace tvm

 #endif  // TVM_TIR_BUILTIN_H_

tvm::Op
Managed reference class to OpNode.
Definition: op.h:165

op.h
Primitive operators(builtin intrinsics) and registry for them.

tvm::tir::builtin::tvm_call_packed_lowered
const Op & tvm_call_packed_lowered()
Lowered version of call packed, the space of value and type codes are explicitly allocated.

tvm::tir::builtin::tvm_thread_invariant
const Op & tvm_thread_invariant()
Mark a condition to be thread invariant. This means the condition must be the same for all threads.

tvm::tir::builtin::bitwise_not
const Op & bitwise_not()
Bitwise not operator.

tvm::tir::builtin::assume
const Op & assume()
Provide a true statement that can be used for simplifications.

tvm::tir::builtin::q_multiply_shift
const Op & q_multiply_shift()
Execute a multiplication between two Q-numbers x and y followed by a right shift s The default roundi...

tvm::tir::builtin::tvm_mma_sync
const Op & tvm_mma_sync()
tvm intrinsic for tensor core mma_sync operators.

tvm::tir::builtin::bitwise_xor
const Op & bitwise_xor()
Bitwise xor operator.

tvm::tir::builtin::bitwise_and
const Op & bitwise_and()
Bitwise and operator.

tvm::tir::builtin::dma_wait
const Op & dma_wait()
Wait until the number of DMA groups in flight is less than or equal to some maximum.

tvm::tir::builtin::tvm_stack_alloca
const Op & tvm_stack_alloca()
See pesudo code.

tvm::tir::builtin::simdgroup_store
const Op & simdgroup_store()
tvm intrinsic for storing data from simdgroup to device memory or threadgroup memory.

tvm::tir::builtin::fma
const Op & fma()
Fused multiply add.

tvm::tir::builtin::tvm_call_cpacked
const Op & tvm_call_cpacked()
See pesudo code.

tvm::tir::builtin::ptx_commit_group
const Op & ptx_commit_group()
tvm intrinsics for ptx async copy commit and wait.

tvm::tir::builtin::popcount
const Op & popcount()
Popcount.

tvm::tir::builtin::tvm_context_id
const Op & tvm_context_id()
Return a unique context id, used for hint of workspace separation. Different context id ganrantees no...

tvm::tir::builtin::likely
const Op & likely()
Marks a condition is likely going to happen.

tvm::tir::builtin::tvm_struct_get
const Op & tvm_struct_get()
See pesudo code.

tvm::tir::builtin::tvm_check_return
const Op & tvm_check_return()
Checks the return value of another call is correct or returns a given value.

tvm::tir::builtin::shift_left
const Op & shift_left()
Left shift.

tvm::tir::builtin::tvm_stack_make_array
const Op & tvm_stack_make_array()
Allocate a NDArray(DLTensor) on stack, return the handle.

tvm::tir::builtin::ptx_cp_async_barrier
const Op & ptx_cp_async_barrier()
tvm intrinsics for ptx async copy barrier using cp.async.mbarrier.arrive

tvm::tir::builtin::tvm_call_packed
const Op & tvm_call_packed()
See pesudo code.

tvm::tir::builtin::create_barriers
const Op & create_barriers()
tvm intrinsics to create N barriers

tvm::tir::builtin::mma_fill
const Op & mma_fill()
tvm intrinsic for zero-initializing an MMA accumulation register. For example, if each thread in a wa...

tvm::tir::builtin::vectorcombine
const Op & vectorcombine()
Concat two vectors.

tvm::tir::builtin::tvm_call_cpacked_lowered
const Op & tvm_call_cpacked_lowered()
Lowered version of call c-packed, the space of value and type codes are explicitly allocated.

tvm::tir::builtin::large_uint_imm
const Op & large_uint_imm()
See pesudo code.

tvm::tir::builtin::ptx_mma_sp
const Op & ptx_mma_sp()
tvm intrinsic for sparse tensor core ptx instructions.

tvm::tir::builtin::dma_end_group
const Op & dma_end_group()
End a group of DMA copies.

tvm::tir::builtin::anylist_setitem_call_packed
const Op & anylist_setitem_call_packed()
Set an item into any list by running packed function call.

tvm::tir::builtin::tvm_access_ptr
const Op & tvm_access_ptr()
Get head access address with memory access pattern info.

tvm::tir::builtin::vectorhigh
const Op & vectorhigh()
Get the high level half of the vector.

tvm::tir::builtin::prefetch
const Op & prefetch()
Prefetch a cacheline.

tvm::tir::builtin::ptx_cp_async_bulk
const Op & ptx_cp_async_bulk()
tvm intrinsics for ptx async copy from global to shared memory using cp.async.bulk

tvm::tir::builtin::bitwise_or
const Op & bitwise_or()
Bitwise or operator.

tvm::tir::builtin::dp4a
const Op & dp4a()
Dot product of two int8x4 vectors and add an optional accumulator.

tvm::tir::builtin::tvm_fill_fragment
const Op & tvm_fill_fragment()
tvm intrinsic for tensor core fill_fragment operators.

tvm::tir::builtin::simdgroup_multiply_accumulate
const Op & simdgroup_multiply_accumulate()
tvm intrinsic for multiply and accumulate two matrices in simdgroup

tvm::tir::builtin::call_extern
const Op & call_extern()
Call an extern C function with given name and signature from the types of args in the runtime environ...

tvm::tir::builtin::tvm_static_handle
const Op & tvm_static_handle()
Create a function local static handle that iniitalizes to nullptr. can be used to cache function loca...

tvm::tir::builtin::tvm_thread_context
const Op & tvm_thread_context()
See pesudo code Mark the content as thread local context, can get optimized by only call the call onc...

tvm::tir::builtin::end_profile_intrinsic
const Op & end_profile_intrinsic()
Profiling intrinsic.

tvm::tir::builtin::tvm_struct_set
const Op & tvm_struct_set()
See pesudo code.

tvm::tir::builtin::texture2d_store
const Op & texture2d_store()
Store to texture 2d memory.

tvm::tir::builtin::isnan
const Op & isnan()
Check if value is nan.

tvm::tir::builtin::address_of
const Op & address_of()
Returns the address of an element in the buffer (see pseudocode below).

tvm::tir::builtin::simdgroup_load
const Op & simdgroup_load()
tvm intrinsic for loading data from device memory or threadgroup memory to simdgroup.

tvm::tir::builtin::mma_store
const Op & mma_store()
tvm intrinsic for storing the result of PTX MMA into a destination pointer. For example,...

tvm::tir::builtin::undef
const Op & undef()
Returns an initialized but arbitrary value.

tvm::tir::builtin::ptx_ldg32
const Op & ptx_ldg32()
tvm intrinsic for ptx predicate load with 32-bit data type.

tvm::tir::builtin::reinterpret
const Op & reinterpret()
Reinterpret the value using the target type.

tvm::tir::builtin::ptx_cp_async
const Op & ptx_cp_async()
tvm intrinsics for ptx async copy from global to shared memory using cp.async

tvm::tir::builtin::vectorlow
const Op & vectorlow()
Get the low-level half of the vector.

tvm::tir::builtin::call_llvm_intrin
const Op & call_llvm_intrin()
Call an LLVM intrinsic with a given intrinsic id and signature from the types of args in the runtime ...

tvm::tir::builtin::ptx_wait_barrier
const Op & ptx_wait_barrier()
tvm intrinsics for ptx barrier wait using mbarrier.try_wait

tvm::tir::builtin::tvm_bmma_sync
const Op & tvm_bmma_sync()
tvm intrinsic for tensor core bmma_sync operators.

tvm::tir::builtin::call_llvm_pure_intrin
const Op & call_llvm_pure_intrin()
Call an LLVM pure intrinsic with a given intrinsic id and signature from the types of args in the run...

tvm::tir::builtin::anylist_setitem_call_cpacked
const Op & anylist_setitem_call_cpacked()
Same as anylist_setitem_call_packed but use C calling convention.

tvm::tir::builtin::tvm_storage_sync
const Op & tvm_storage_sync()
See pseudo code.

tvm::tir::builtin::tvm_throw_last_error
const Op & tvm_throw_last_error()
See pesudo code.

tvm::tir::builtin::tvm_load_matrix_sync
const Op & tvm_load_matrix_sync()
tvm intrinsic for tensor core load operators.

tvm::tir::builtin::nd_mem_alloc_with_scope
const Op & nd_mem_alloc_with_scope()
Create an Nd memory allocation with storage scope.

tvm::tir::builtin::tvm_thread_allreduce
const Op & tvm_thread_allreduce()
See pesudo code.

tvm::tir::builtin::isnullptr
const Op & isnullptr()
See pesudo code.

tvm::tir::builtin::start_profile_intrinsic
const Op & start_profile_intrinsic()
Profiling intrinsic.

tvm::tir::builtin::tvm_call_trace_packed_lowered
const Op & tvm_call_trace_packed_lowered()
Lowered version of trace intrinsic, the space of value and type codes are explicitly allocated....

tvm::tir::builtin::anylist_resetitem
const Op & anylist_resetitem()
Reset and clear a item in any list.

tvm::tir::builtin::vscale
const Op & vscale()
Get the target's vscale value. It will be lowered to llvm.vscale intrinsic (https://llvm....

tvm::tir::builtin::ptx_arrive_barrier_expect_tx
const Op & ptx_arrive_barrier_expect_tx()
tvm intrinsic for ptx barrier arrival with expect tx using mbarrier.arrive.expect_tx

tvm::tir::builtin::tvm_tuple
const Op & tvm_tuple()
tvm_tuple is not an actual function and cannot codegen. It is used to represent tuple structure in va...

tvm::tir::builtin::atomic_add
const Op & atomic_add()
atomic add instruction, corresponding e.g. to atomicAdd in CUDA

tvm::tir::builtin::anylist_getitem
const Op & anylist_getitem()
Get a item from any list and return it.

tvm::tir::builtin::tvm_stack_make_shape
const Op & tvm_stack_make_shape()
Allocate a shape tuple on stack, return the handle.

tvm::tir::builtin::ptx_arrive_barrier
const Op & ptx_arrive_barrier()
tvm intrinsics for ptx barrier arrival using mbarrier.arrive

tvm::tir::builtin::call_spirv_pure_glsl450
const Op & call_spirv_pure_glsl450()
Call an SPIRV pure GLSL450 intrinsic.

tvm::tir::builtin::tvm_call_trace_packed
const Op & tvm_call_trace_packed()
See pesudo code.

tvm::tir::builtin::tvm_global_barrier_kinit
const Op & tvm_global_barrier_kinit()
Initialize the global barrier. Call this at beginning of kernel that need global barrier.

tvm::tir::builtin::tvm_warp_shuffle
const Op & tvm_warp_shuffle()
See pseudo code.

tvm::tir::builtin::ptx_init_barrier_thread_count
const Op & ptx_init_barrier_thread_count()
tvm intrinsics for ptx barrier initialization of thread count using mbarrier.init

tvm::tir::builtin::dma_start_group
const Op & dma_start_group()
Start a group of DMA copies.

tvm::tir::builtin::get_active_lane_mask
const Op & get_active_lane_mask()
Calculate a predicate mask given an upper bound (limit) and a current value (base).

tvm::tir::builtin::TVMStructFieldKind
TVMStructFieldKind
The kind of structure field info used in intrinsic.
Definition: builtin.h:974

tvm::tir::builtin::kArrAddr
@ kArrAddr
Definition: builtin.h:976

tvm::tir::builtin::kArrTypeLanes
@ kArrTypeLanes
Definition: builtin.h:983

tvm::tir::builtin::kArrTypeBits
@ kArrTypeBits
Definition: builtin.h:982

tvm::tir::builtin::kArrKindBound_
@ kArrKindBound_
Definition: builtin.h:987

tvm::tir::builtin::kArrShape
@ kArrShape
Definition: builtin.h:978

tvm::tir::builtin::kArrTypeCode
@ kArrTypeCode
Definition: builtin.h:981

tvm::tir::builtin::kTVMValueContent
@ kTVMValueContent
Definition: builtin.h:989

tvm::tir::builtin::kArrData
@ kArrData
Definition: builtin.h:977

tvm::tir::builtin::kArrDeviceId
@ kArrDeviceId
Definition: builtin.h:985

tvm::tir::builtin::kArrStrides
@ kArrStrides
Definition: builtin.h:979

tvm::tir::builtin::kTVMValueKindBound_
@ kTVMValueKindBound_
Definition: builtin.h:990

tvm::tir::builtin::kArrDeviceType
@ kArrDeviceType
Definition: builtin.h:986

tvm::tir::builtin::kArrNDim
@ kArrNDim
Definition: builtin.h:980

tvm::tir::builtin::kArrByteOffset
@ kArrByteOffset
Definition: builtin.h:984

tvm::tir::builtin::ptx_mma
const Op & ptx_mma()
tvm intrinsic for ptx tensor core mma instructions.

tvm::tir::builtin::tvm_warp_shuffle_up
const Op & tvm_warp_shuffle_up()

tvm::tir::builtin::call_pure_extern
const Op & call_pure_extern()
Call an pure extern C function with given name and signature from the types of args in the runtime en...

tvm::tir::builtin::texture2d_load
const Op & texture2d_load()
Load from texture 2d memory.

tvm::tir::builtin::if_then_else
const Op & if_then_else()
Same as select, used for unsafe memory access.

tvm::tir::builtin::ret
const Op & ret()
Return value.

tvm::tir::builtin::ptx_ldmatrix
const Op & ptx_ldmatrix()
tvm intrinsic for ptx load matrix from shared memory.

tvm::tir::builtin::ptx_wait_group
const Op & ptx_wait_group()

tvm::tir::builtin::shift_right
const Op & shift_right()
Right shift.

tvm::tir::builtin::lookup_param
const Op & lookup_param()
See pseudo code Type lookup_param(String param_name) { return __tvm_param__param_name; }.

tvm::tir::builtin::tvm_warp_shuffle_down
const Op & tvm_warp_shuffle_down()

tvm::tir::builtin::make_filled_simdgroup_matrix
const Op & make_filled_simdgroup_matrix()
tvm intrinsic for initializing and simdgroup with given value.

tvm::tir::builtin::tvm_warp_activemask
const Op & tvm_warp_activemask()

tvm::tir::builtin::tvm_store_matrix_sync
const Op & tvm_store_matrix_sync()
tvm intrinsic for tensor core store operators.

tvm::tir::builtin::dma_copy
const Op & dma_copy()
Initiate a non-blocking DMA copy from source to destination.

tvm
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36

expr.h
TIR expressions.