tvm
|
The user-facing schedule class. More...
#include <schedule.h>
Public Member Functions | |
virtual | ~ScheduleNode ()=default |
TVM_DECLARE_FINAL_OBJECT_INFO (ScheduleNode, runtime::Object) | |
virtual IRModule | mod () const |
Get the IRModule associated with this schedule. More... | |
virtual ScheduleState | state () const =0 |
virtual Optional< Trace > | trace () const =0 |
virtual Optional< GlobalVar > | func_working_on () const =0 |
virtual void | WorkOn (const String &func_name)=0 |
Instruct the schedule to work on a function in the IRModule. More... | |
virtual Schedule | Copy ()=0 |
Returns a copy of the schedule, including both its state and its symbol table, guaranteeing that 1) SRef tree is completely reconstructed; 2) The IRModule being scheduled is not modified; 3) All the random variables are valid in the copy, pointing to the corresponding sref reconstructed. More... | |
virtual void | Seed (support::LinearCongruentialEngine::TRandState seed)=0 |
Seed the randomness. More... | |
virtual support::LinearCongruentialEngine::TRandState | ForkSeed ()=0 |
Fork the random state. More... | |
virtual Block | Get (const BlockRV &block_rv) const =0 |
Get the block corresponding to the specific BlockRV. More... | |
virtual For | Get (const LoopRV &loop_rv) const =0 |
Get the for loop corresponding to the specific LoopRV. More... | |
virtual PrimExpr | Get (const ExprRV &expr_rv) const =0 |
Get the expr corresponding to the specific random variable. More... | |
virtual StmtSRef | GetSRef (const BlockRV &block_rv) const =0 |
Get the block sref corresponding to the specific BlockRV. More... | |
virtual StmtSRef | GetSRef (const LoopRV &loop_rv) const =0 |
Get the loop sref corresponding to the specific LoopRV. More... | |
virtual bool | HasBlock (const BlockRV &block_rv) const =0 |
Check the existance of a specific BlockRV. More... | |
virtual StmtSRef | GetSRef (const StmtNode *stmt) const |
Get the block/loop sref corresponding to the specific statement. More... | |
StmtSRef | GetSRef (const Stmt &stmt) const |
Get the block/loop sref corresponding to the specific statement. More... | |
virtual void | RemoveRV (const BlockRV &block_rv)=0 |
Remove a block random variable from the symbol table. More... | |
virtual void | RemoveRV (const LoopRV &loop_rv)=0 |
Remove a loop random variable from the symbol table. More... | |
virtual void | RemoveRV (const ExprRV &expr_rv)=0 |
Remove an integer random variable from the symbol table. More... | |
virtual ExprRV | SampleCategorical (const Array< runtime::Int > &candidates, const Array< runtime::Float > &probs, Optional< runtime::Int > decision=NullOpt)=0 |
Sample an integer given the probability distribution. More... | |
virtual Array< ExprRV > | SamplePerfectTile (const LoopRV &loop_rv, int n, int max_innermost_factor, Optional< Array< Integer >> decision=NullOpt)=0 |
Sample the factors to perfect tile a specific loop. More... | |
virtual Array< ExprRV > | SamplePartitionedTile (const LoopRV &loop_rv, int n, int partition_pos, int innerpart_factor, Optional< Array< Integer >> decision=NullOpt)=0 |
Sample the factors to a partitioned tile for a specific loop. More... | |
virtual LoopRV | SampleComputeLocation (const BlockRV &block_rv, Optional< Integer > decision=NullOpt)=0 |
Sample a compute-at location of the given block. More... | |
virtual BlockRV | GetBlock (const String &name, const Optional< String > &func_name=NullOpt)=0 |
Retrieve a block in a specific function with its name. More... | |
virtual Array< LoopRV > | GetLoops (const BlockRV &block_rv)=0 |
Get the parent loops of the block in its scope, from outer to inner. More... | |
virtual Array< BlockRV > | GetChildBlocks (const BlockRV &block_rv)=0 |
Get the leaf blocks of a specific scope. More... | |
virtual Array< BlockRV > | GetChildBlocks (const LoopRV &loop_rv)=0 |
Get the leaf blocks of under a specific loop. More... | |
virtual Array< BlockRV > | GetProducers (const BlockRV &block_rv)=0 |
Get the producer of a specific block, under the same block scope. More... | |
virtual Array< BlockRV > | GetConsumers (const BlockRV &block_rv)=0 |
Get the consumers of a specific block, under the same block scope. More... | |
virtual Array< BlockRV > | GetOutputBlocks (const BlockRV &scope_block_rv)=0 |
Get the list of output blocks within the given scope An output block is a block which has atleast one buffer being written to, but is not allocated within the PrimFunc. More... | |
virtual LoopRV | Merge (const Array< LoopRV > &loop_rvs)=0 |
Merge a list of loops into one. The loops under their LCA requires: 1) Under the same scope 2) Can't have annotations or thread bindings 3) Start with 0 and have same extent and same nesting depth 4) From target loop to their LCA, the inner loop must be the only child of the outer loop. More... | |
virtual LoopRV | Fuse (const Array< LoopRV > &loop_rvs, bool preserve_unit_iters=true)=0 |
Fuse a list of consecutive loops into one. It requires: 1) The loops can't have annotations or thread bindings. 2) The (i+1)-th loop must be the only child of the i-th loop. 3) All loops must start with 0. 4) The domain of a loop to be fused cannot depend on another loop to be fused. More... | |
virtual Array< LoopRV > | Split (const LoopRV &loop_rv, const Array< Optional< ExprRV >> &factors, bool preserve_unit_iters=true, bool disable_predication=false)=0 |
Split a loop into a list of consecutive loops. It requires: 1) The loop can't have annotation or thread binding. 2) The loop must start with 0. More... | |
virtual Array< LoopRV > | LoopPartition (const LoopRV &loop_rv, const Array< Optional< ExprRV >> &factors, bool preserve_unit_iters=true)=0 |
Partition the loops into sequence of multiple loops 1) The loop can't have annotation or thread binding. More... | |
virtual void | Reorder (const Array< LoopRV > &ordered_loop_rvs)=0 |
Reorder a list of loops. It doesn't require the loops to be consecutive. It requires: 1) The loops are in the same chain. That means: the loops can be ordered to [l_1, l_2, ... , l_n] where l_i is an ancestor of l_{i+1} and there are only single-branch loops between l_1 and l_n (which also indicates they are under the same scope). 2) After reordering, the domain of an outer loop cannot depend on any of the inner loops. 3) For every block under the loop nests, its block binding must be affine, and the block variables must be either data parallel or reduction. 4) No duplicated loops are allowed in the arguments. More... | |
virtual void | ReorderBlockIterVar (const BlockRV &block_rv, const Array< Integer > new_order)=0 |
Reorder the itervars inside a block. More... | |
virtual LoopRV | AddUnitLoop (const BlockRV &block_rv)=0 |
Create a new unit loop on top of the specific block. More... | |
virtual LoopRV | AddUnitLoop (const LoopRV &loop_rv)=0 |
Create a new unit loop on top of the specific loop. More... | |
virtual void | Parallel (const LoopRV &loop_rv)=0 |
Parallelize the input loop. It requires: 1) The scope block that the loop is in should have stage-pipeline property 2) All the blocks under the loop are complete blocks or reduction blocks, and have affine bindings 3) For each block under the loop, the loop can only be contained in data-parallel block iters' bindings. More... | |
virtual void | Vectorize (const LoopRV &loop_rv)=0 |
Vectorize the input loop. It requires: 1) The scope block that the loop is in should have stage-pipeline property 2) All the blocks under the loop are complete blocks or reduction blocks, and have affine bindings 3) For each block under the loop, the loop can only be contained in data-parallel block iters' bindings. More... | |
virtual void | Bind (const LoopRV &loop_rv, const String &thread_axis)=0 |
Bind the input loop to the given thread axis. It requires: 1) The scope block that the loop is in should have stage-pipeline property 2) All the blocks under the loop are complete blocks or reduction blocks, and have affine bindings 3) For each block under the loop, if the thread axis starts with "threadIdx‘, the loop can only be contained in data-parallel block iter and reduction block iters’ bindings. Otherwise the loop can only be contained in data-parallel block iters' bindings. More... | |
virtual void | Unroll (const LoopRV &loop_rv)=0 |
Unroll the input loop. It requires nothing. More... | |
virtual BlockRV | CacheRead (const BlockRV &block_rv, int read_buffer_index, const String &storage_scope, const Array< BlockRV > consumer_blocks={})=0 |
Create a block that reads a buffer region into a read cache. It requires: 1) There is at most one block who writes the buffer in the scope. 2) The scope block have stage-pipeline property. More... | |
virtual BlockRV | CacheWrite (const BlockRV &block_rv, int write_buffer_index, const String &storage_scope, const Array< BlockRV > consumer_blocks={})=0 |
Create a block that writes a buffer region into a write cache. It requires: 1) There is only one block who writes the target buffer. 2) The scope block have stage-pipeline property. More... | |
virtual BlockRV | ReindexCacheRead (const BlockRV &block_rv, int read_buffer_index, const String &storage_scope, const IndexMap &index_map)=0 |
Create a block that reads a buffer region into a read cache. It requires: 1) There is at most one block who writes the buffer in the scope. 2) The scope block have stage-pipeline property. Compared to cache read, the indices to access allocated cache buffer is customized by user. More... | |
virtual BlockRV | ReindexCacheWrite (const BlockRV &block_rv, int write_buffer_index, const String &storage_scope, const IndexMap &index_map)=0 |
Create a block that writes a buffer region into a write cache. It requires: 1) There is only one block who writes the target buffer. 2) The scope block have stage-pipeline property. Compared to cache write, the indices to access allocated cache buffer is customized by user. More... | |
virtual Array< BlockRV > | CacheInplace (const BlockRV &block_rv, int read_buffer_index, const String &storage_scope)=0 |
Create 2 blocks that read&write a buffer region into a read/write cache. It requires the target block both read & write the target buffer. More... | |
virtual Array< BlockRV > | CacheIndex (const BlockRV &block_rv, const String &storage_scope, int cse_thresh)=0 |
Create a block to cache precomputed index for later use. if there is no index computation, keep unchanged. More... | |
virtual BlockRV | ReIndex (const BlockRV &block_rv, int buffer_index, BufferIndexType buffer_index_type)=0 |
Create a block that read/write a buffer region into a read/write cache with reindexing. The layout of the cache will be the same as by the iterators of the block that reads/writes the buffer. It requires: 1) There is only one block who reads/writes the target buffer 2) There is only one buffer load/store of this buffer in the block. More... | |
virtual BlockRV | ReadAt (const LoopRV &loop_rv, const BlockRV &block_rv, int read_buffer_index, const String &storage_scope)=0 |
virtual BlockRV | WriteAt (const LoopRV &loop_rv, const BlockRV &block_rv, int write_buffer_index, const String &storage_scope)=0 |
virtual void | ComputeAt (const BlockRV &block_rv, const LoopRV &loop_rv, bool preserve_unit_loops, int index=-1)=0 |
Move a producer block under the specific loop, and regenerate the loops induced by the block so that the buffer region produced by the producer block could cover those regions consumed by its consumer blocks under the given loop. It requires: 1) block and loop are under the same scope, loop is not the ancestor of block 2) The scope block has stage-pipeline property 3) The subtree of the scope block, where the given block is in, satisfies the compact dataflow condition. i.e. all the blocks in the scope block's subtree must be either complete block or reduction block 4) The block is not an output block with regard to the scope block, i.e. the buffers written by the block are allocated under the scope block 5) All the consumers of the block are under the given loop. More... | |
virtual void | ReverseComputeAt (const BlockRV &block_rv, const LoopRV &loop_rv, bool preserve_unit_loops, int index=-1)=0 |
Move a consumer block under the specific loop, and regenerate the loops induced by the block so that the buffer region consumed by the consumer block could cover those regions produced by its producer blocks under the given loop. It requires: 1) block and loop are under the same scope, loop is not the ancestor of block 2) The scope block has stage-pipeline property 3) The subtree of the scope block, where the given block is in, satisfies the compact dataflow condition. i.e. all the blocks in the scope block's subtree must be either complete block or reduction block 4) All the producers of the block are under the given loop. More... | |
virtual void | ComputeInline (const BlockRV &block)=0 |
Inline a block into its consumer(s). It requires: 1) The block is a complete non-root block, which only produces one buffer 2) The block must not be the only leaf in the scope. 3) The body of the block must be a BufferStore statement in the form of, A[i, j, k, ...] = ... where the indices of the LHS are all distinct atomic variables, and no variables other than those indexing variables are allowed in the statement. More... | |
virtual void | ReverseComputeInline (const BlockRV &block)=0 |
Inline a block into its only producer. It requires: 1) The block is a complete non-root block, which only produces and consumers one buffer 2) The block must not be the only leaf in the scope. 3) The only producer of the block is a read-after-write producer and a complete non-root block 4) The body of the block must be a BufferStore statement in the form of, B[f(i, j, k, ...)] = g(i, j, k, A[i, j, k, ...] ...) where the indices of each BufferLoad on the RHS are all distinct atomic variables, and no variables other than those indexing variables are allowed in the statement. More... | |
virtual BlockRV | DecomposeReduction (const BlockRV &block_rv, const LoopRV &loop_rv)=0 |
Decompose a reduction block into two separate blocks. a) The init block, which is translated from the init statement of the reduction block; b) The update block, which is the original block without init statement. More... | |
virtual BlockRV | RFactor (const LoopRV &loop_rv, int factor_axis)=0 |
Factorize an associative reduction block by the specified loop. More... | |
virtual void | StorageAlign (const BlockRV &block_rv, int buffer_index, int axis, int factor, int offset)=0 |
Set alignment requirement for specific dimension such that stride[axis] == k * factor + offset for some k. This is useful to set memory layout for more friendly memory access pattern. For example, we can set alignment to be factor=2, offset=1 to avoid bank conflict for thread access on higher dimension in GPU shared memory. More... | |
virtual void | SetScope (const BlockRV &block_rv, int buffer_index, const String &storage_scope)=0 |
Set the storage scope of a buffer, where the buffer is specified by a block and a write-index. More... | |
virtual void | UnsafeSetDType (const BlockRV &block_rv, int buffer_index, const String &dtype)=0 |
Set the data type of a buffer, where the buffer is specified by a block and a write-index. More... | |
virtual BlockRV | Blockize (const LoopRV &loop_rv, bool preserve_unit_iters=true)=0 |
Convert the subtree rooted at a specific loop into a block. More... | |
virtual BlockRV | Blockize (const Array< BlockRV > &blocks, bool preserve_unit_iters=true)=0 |
Convert specified blocks into a nested block. More... | |
virtual void | Tensorize (const LoopRV &loop_rv, const String &intrin, bool preserve_unit_iters=true)=0 |
Tensorize the computation enclosed by loop with the tensor intrin. More... | |
virtual void | Tensorize (const BlockRV &block_rv, const String &intrin, bool preserve_unit_iters=true)=0 |
Tensorize the computation enclosed by loop with the tensor intrin. More... | |
virtual void | Annotate (const LoopRV &loop_rv, const String &ann_key, const ObjectRef &ann_val)=0 |
Annotate a loop with a key value pair. More... | |
virtual void | Annotate (const BlockRV &block_rv, const String &ann_key, const ObjectRef &ann_val)=0 |
Annotate a block with a key value pair. More... | |
virtual void | Unannotate (const LoopRV &loop_rv, const String &ann_key)=0 |
Unannotate a loop's annotation with key ann_key. More... | |
virtual void | Unannotate (const BlockRV &block_rv, const String &ann_key)=0 |
Unannotate a block's annotation with key ann_key. More... | |
virtual void | TransformLayout (const BlockRV &block_rv, int buffer_index, BufferIndexType buffer_index_type, const IndexMap &index_map, const Optional< IndexMap > &pad_value=NullOpt, bool assume_injective_transform=false)=0 |
Apply a transformation represented by IndexMap to buffer. More... | |
virtual void | TransformBlockLayout (const BlockRV &block_rv, const IndexMap &index_map)=0 |
Apply a transformation represented by IndexMap to block. More... | |
virtual void | SetAxisSeparator (const BlockRV &block_rv, int buffer_index, BufferIndexType buffer_index_type, const Array< IntImm > &axis_separators)=0 |
Set the axis separator of a buffer, where the buffer is specified by a block and a read or write index. More... | |
virtual BlockRV | DecomposePadding (const BlockRV &block_rv, const LoopRV &loop_rv)=0 |
Decompose a padding block into a block filling const pad values and a block writing in-bound values. More... | |
virtual void | PadEinsum (const BlockRV &block_rv, const Array< Integer > &padding)=0 |
Pad the computation of Einsum. More... | |
virtual void | RollingBuffer (const BlockRV &block_rv, int write_buffer_index)=0 |
Compute the target buffer via rolling buffering. More... | |
virtual void | AnnotateBufferAccess (const BlockRV &block_rv, int buffer_index, BufferIndexType buffer_index_type, const IndexMap &index_map)=0 |
Annotate the buffer access of a block. More... | |
virtual void | EnterPostproc ()=0 |
A no-op that marks the start of postprocessing phase of scheduling. More... | |
virtual void | UnsafeHideBufferAccess (const BlockRV &block_rv, const String &buf_type, const Array< IntImm > &buf_index_array)=0 |
Hide some buffer access in the given block. More... | |
Public Member Functions inherited from tvm::runtime::Object | |
uint32_t | type_index () const |
std::string | GetTypeKey () const |
size_t | GetTypeKeyHash () const |
template<typename TargetType > | |
bool | IsInstance () const |
bool | unique () const |
Object () | |
Object (const Object &other) | |
Object (Object &&other) | |
Object & | operator= (const Object &other) |
Object & | operator= (Object &&other) |
Static Public Attributes | |
static constexpr const char * | _type_key = "tir.Schedule" |
Static Public Attributes inherited from tvm::runtime::Object | |
static constexpr const char * | _type_key = "runtime.Object" |
static constexpr bool | _type_final = false |
static constexpr uint32_t | _type_child_slots = 0 |
static constexpr bool | _type_child_slots_can_overflow = true |
static constexpr bool | _type_has_method_visit_attrs = true |
static constexpr bool | _type_has_method_sequal_reduce = false |
static constexpr bool | _type_has_method_shash_reduce = false |
static constexpr uint32_t | _type_index = TypeIndex::kDynamic |
Friends | |
class | Schedule |
Additional Inherited Members | |
Public Types inherited from tvm::runtime::Object | |
typedef void(* | FDeleter) (Object *self) |
Object deleter. More... | |
using | RefCounterType = std::atomic< int32_t > |
Static Public Member Functions inherited from tvm::runtime::Object | |
static std::string | TypeIndex2Key (uint32_t tindex) |
Get the type key of the corresponding index from runtime. More... | |
static size_t | TypeIndex2KeyHash (uint32_t tindex) |
Get the type key hash of the corresponding index from runtime. More... | |
static uint32_t | TypeKey2Index (const std::string &key) |
Get the type index of the corresponding key from runtime. More... | |
static uint32_t | _GetOrAllocRuntimeTypeIndex () |
static uint32_t | RuntimeTypeIndex () |
Protected Member Functions inherited from tvm::runtime::Object | |
void | IncRef () |
developer function, increases reference counter. More... | |
void | DecRef () |
developer function, decrease reference counter. More... | |
Static Protected Member Functions inherited from tvm::runtime::Object | |
static uint32_t | GetOrAllocRuntimeTypeIndex (const std::string &key, uint32_t static_tindex, uint32_t parent_tindex, uint32_t type_child_slots, bool type_child_slots_can_overflow) |
Get the type index using type key. More... | |
Protected Attributes inherited from tvm::runtime::Object | |
uint32_t | type_index_ {0} |
Type index(tag) that indicates the type of the object. More... | |
RefCounterType | ref_counter_ {0} |
The internal reference counter. More... | |
FDeleter | deleter_ = nullptr |
deleter of this object to enable customized allocation. If the deleter is nullptr, no deletion will be performed. The creator of the object must always set the deleter field properly. More... | |
The user-facing schedule class.
|
virtualdefault |
Create a new unit loop on top of the specific block.
block_rv | The block above which the new loop is created |
Create a new unit loop on top of the specific loop.
loop_rv | The loop above which the new loop is created |
|
pure virtual |
Annotate a block with a key value pair.
block_rv | The block to be annotated |
ann_key | The annotation key |
ann_val | The annotation value, a string or a ExprRV |
|
pure virtual |
Annotate a loop with a key value pair.
loop_rv | The loop to be annotated |
ann_key | The annotation key |
ann_val | The annotation value, a string or a ExprRV |
|
pure virtual |
Annotate the buffer access of a block.
block_rv | The block to be annotated |
buffer_index | The index of the buffer in block's read or write region |
buffer_index_type | The type of the buffer index, kRead or kWrite. |
index_map | The index map that defines the new read or write region |
|
pure virtual |
Bind the input loop to the given thread axis. It requires: 1) The scope block that the loop is in should have stage-pipeline property 2) All the blocks under the loop are complete blocks or reduction blocks, and have affine bindings 3) For each block under the loop, if the thread axis starts with "threadIdx‘, the loop can only be contained in data-parallel block iter and reduction block iters’ bindings. Otherwise the loop can only be contained in data-parallel block iters' bindings.
loop_rv | The loop to be bound to the thread axis |
thread_axis | The thread axis to be bound to the loop |
|
pure virtual |
Convert specified blocks into a nested block.
blocks | the specified block to construct the new block |
preserve_unit_iters | Whether or not to preserve unit iterators in block bindings |
|
pure virtual |
Convert the subtree rooted at a specific loop into a block.
loop_rv | the root of the subtree |
preserve_unit_iters | Whether or not to preserve unit iterators in block bindings |
|
pure virtual |
Create a block to cache precomputed index for later use. if there is no index computation, keep unchanged.
block_rv | The target block |
storage_scope | The storage scope of cached block |
cse_thresh | The repeat threshold that determines a common sub expr |
|
pure virtual |
Create 2 blocks that read&write a buffer region into a read/write cache. It requires the target block both read & write the target buffer.
block_rv | The target block operates on the target buffer. |
read_buffer_index | The index of the buffer in block's read region. |
storage_scope | The target storage scope |
|
pure virtual |
Create a block that reads a buffer region into a read cache. It requires: 1) There is at most one block who writes the buffer in the scope. 2) The scope block have stage-pipeline property.
block_rv | The consumer block of the target buffer. |
read_buffer_index | The index of the buffer in block's read region. |
storage_scope | The target storage scope. |
consumer_blocks | An optional list of consumers of the cache to rewrite. |
|
pure virtual |
Create a block that writes a buffer region into a write cache. It requires: 1) There is only one block who writes the target buffer. 2) The scope block have stage-pipeline property.
block_rv | The producer of the buffer |
write_buffer_index | The index of the buffer in block's write region |
storage_scope | The target storage scope |
consumer_blocks | An optional list of consumers to read from cache directly. |
|
pure virtual |
Move a producer block under the specific loop, and regenerate the loops induced by the block so that the buffer region produced by the producer block could cover those regions consumed by its consumer blocks under the given loop. It requires: 1) block
and loop
are under the same scope, loop
is not the ancestor of block
2) The scope block has stage-pipeline property 3) The subtree of the scope block, where the given block is in, satisfies the compact dataflow condition. i.e. all the blocks in the scope block's subtree must be either complete block or reduction block 4) The block is not an output block with regard to the scope block, i.e. the buffers written by the block are allocated under the scope block 5) All the consumers of the block are under the given loop.
block_rv | The block to be moved |
loop_rv | The loop where the block to be moved under |
preserve_unit_loops | Whether to keep the trivial loops whose extents are 1 |
index | The block index of the loop body subtree blocks:
|
|
pure virtual |
Inline a block into its consumer(s). It requires: 1) The block is a complete non-root block, which only produces one buffer 2) The block must not be the only leaf in the scope. 3) The body of the block must be a BufferStore statement in the form of, A[i, j, k, ...] = ... where the indices of the LHS are all distinct atomic variables, and no variables other than those indexing variables are allowed in the statement.
block | The block to be inlined to its consumer(s) |
|
pure virtual |
Returns a copy of the schedule, including both its state and its symbol table, guaranteeing that 1) SRef tree is completely reconstructed; 2) The IRModule being scheduled is not modified; 3) All the random variables are valid in the copy, pointing to the corresponding sref reconstructed.
|
pure virtual |
Decompose a padding block into a block filling const pad values and a block writing in-bound values.
block_rv | The block that match the padding pattern. |
loop_rv | The loop above which the const filling block is inserted before. |
|
pure virtual |
Decompose a reduction block into two separate blocks. a) The init block, which is translated from the init statement of the reduction block; b) The update block, which is the original block without init statement.
The init block is inserted right before the given loop.
The schedule primitive requires: 1) The input block is a reduction block. 2) The input loop is the ancestor of the block. 3) The input loop is not lower than all the loops related to reduce block var.
block_rv | The reduction block to be decomposed |
loop_rv | The loop above which the init block is inserted before. |
|
pure virtual |
A no-op that marks the start of postprocessing phase of scheduling.
|
pure virtual |
Fork the random state.
|
pure virtual |
Fuse a list of consecutive loops into one. It requires: 1) The loops can't have annotations or thread bindings. 2) The (i+1)-th loop must be the only child of the i-th loop. 3) All loops must start with 0. 4) The domain of a loop to be fused cannot depend on another loop to be fused.
loop_rvs | The loops to be fused |
preserve_unit_iters | Whether or not to preserve unit iterators in block bindings |
Get the expr corresponding to the specific random variable.
expr_rv | The random variable to be looked up |
|
pure virtual |
Retrieve a block in a specific function with its name.
By default, if func_name
is not specified, the schedule will search for the block in the function that is currently being "worked on". To switch the function to be worked on, use WorkOn
before calling this method.
name | The name of the block to be retrieved |
func_name | The name of the function |
|
pure virtual |
Get the leaf blocks of a specific scope.
block_rv | The block where the scope is rooted |
|
pure virtual |
Get the leaf blocks of under a specific loop.
loop_rv | The loop under which collecting is conducted |
|
pure virtual |
Get the consumers of a specific block, under the same block scope.
block_rv | The block to be queried |
Get the parent loops of the block in its scope, from outer to inner.
block_rv | The query block |
|
pure virtual |
Get the list of output blocks within the given scope An output block is a block which has atleast one buffer being written to, but is not allocated within the PrimFunc.
scope_block_rv | The scope block from which output blocks are collected |
|
pure virtual |
Get the producer of a specific block, under the same block scope.
block_rv | The block in the query |
Get the block/loop sref corresponding to the specific statement.
stmt | The statement to be looked up |
Get the block/loop sref corresponding to the specific statement.
stmt | The statement to be looked up |
|
pure virtual |
|
pure virtual |
Partition the loops into sequence of multiple loops 1) The loop can't have annotation or thread binding.
loop_rv | The loop to be partition |
factors | The positive integers, and at most one of which is NullOpt , which means that factor is inferred. |
preserve_unit_iters | Whether or not to preserve unit iterators in block bindings |
Merge a list of loops into one. The loops under their LCA requires: 1) Under the same scope 2) Can't have annotations or thread bindings 3) Start with 0 and have same extent and same nesting depth 4) From target loop to their LCA, the inner loop must be the only child of the outer loop.
loop_rvs | The loops to be merged |
|
inlinevirtual |
Get the IRModule associated with this schedule.
|
pure virtual |
Pad the computation of Einsum.
block_rv | The block that matches the Einsum pattern. |
padding | The padding for each block iter. |
This schedule primitives identifies the Einsum pattern in the block body, and find its producer blocks. It then pads the computation of the Einsum pattern and its producer blocks. The output buffer and the producer buffer is resized according to the padding size. It requires the output buffer and the producer buffer to be allocated inside the PrimFunc.
The padding is a list of non-negative integers, each element corresponds to the padding for each block iter in the order of block iters. The block and its producer blocks should have trivial bindings, i.e. each block iter is bound to a single loop variable. After padding, the block iter extent and the corresponding outer loop is extended by the padding size.
The size of the producer buffers are infered from the padding size of the Einsum computation. The producer buffers are padded by the initial value of the corresponding reduction.
|
pure virtual |
Parallelize the input loop. It requires: 1) The scope block that the loop is in should have stage-pipeline property 2) All the blocks under the loop are complete blocks or reduction blocks, and have affine bindings 3) For each block under the loop, the loop can only be contained in data-parallel block iters' bindings.
loop_rv | The loop to be parallelized |
|
pure virtual |
|
pure virtual |
Create a block that read/write a buffer region into a read/write cache with reindexing. The layout of the cache will be the same as by the iterators of the block that reads/writes the buffer. It requires: 1) There is only one block who reads/writes the target buffer 2) There is only one buffer load/store of this buffer in the block.
block_rv | The block operates on the target buffer. |
buffer_index | The index of the buffer in block's read or write region. |
buffer_index_type | The type of the buffer index, kRead or kWrite. |
|
pure virtual |
Create a block that reads a buffer region into a read cache. It requires: 1) There is at most one block who writes the buffer in the scope. 2) The scope block have stage-pipeline property. Compared to cache read, the indices to access allocated cache buffer is customized by user.
block_rv | The consumer block of the target buffer. |
read_buffer_index | The index of the buffer in block's read region. |
storage_scope | The target storage scope. |
index_map | User defined indices to access allocated cache buffer, maps from block iter vars. |
|
pure virtual |
Create a block that writes a buffer region into a write cache. It requires: 1) There is only one block who writes the target buffer. 2) The scope block have stage-pipeline property. Compared to cache write, the indices to access allocated cache buffer is customized by user.
block_rv | The producer of the buffer |
write_buffer_index | The index of the buffer in block's write region |
storage_scope | The target storage scope |
index_map | User defined indices to access allocated cache buffer, maps from block iter vars. |
|
pure virtual |
Remove a block random variable from the symbol table.
block_rv | The random variable to be removed |
|
pure virtual |
Remove an integer random variable from the symbol table.
expr_rv | The random variable to be removed |
|
pure virtual |
Remove a loop random variable from the symbol table.
loop_rv | The random variable to be removed |
|
pure virtual |
Reorder a list of loops. It doesn't require the loops to be consecutive. It requires: 1) The loops are in the same chain. That means: the loops can be ordered to [l_1, l_2, ... , l_n] where l_i is an ancestor of l_{i+1} and there are only single-branch loops between l_1 and l_n (which also indicates they are under the same scope). 2) After reordering, the domain of an outer loop cannot depend on any of the inner loops. 3) For every block under the loop nests, its block binding must be affine, and the block variables must be either data parallel or reduction. 4) No duplicated loops are allowed in the arguments.
ordered_loop_rvs | The loops in the new order |
|
pure virtual |
Reorder the itervars inside a block.
block_rv | The block to be transformed. |
new_order | The new itervar order. |
|
pure virtual |
Move a consumer block under the specific loop, and regenerate the loops induced by the block so that the buffer region consumed by the consumer block could cover those regions produced by its producer blocks under the given loop. It requires: 1) block
and loop
are under the same scope, loop
is not the ancestor of block
2) The scope block has stage-pipeline property 3) The subtree of the scope block, where the given block is in, satisfies the compact dataflow condition. i.e. all the blocks in the scope block's subtree must be either complete block or reduction block 4) All the producers of the block are under the given loop.
block_rv | The block to be moved |
loop_rv | The loop where the block to be moved under |
preserve_unit_loops | Whether to keep the trivial loops whose extents are 1 |
index | The block index of the loop body subtree blocks:
|
|
pure virtual |
Inline a block into its only producer. It requires: 1) The block is a complete non-root block, which only produces and consumers one buffer 2) The block must not be the only leaf in the scope. 3) The only producer of the block is a read-after-write producer and a complete non-root block 4) The body of the block must be a BufferStore statement in the form of, B[f(i, j, k, ...)] = g(i, j, k, A[i, j, k, ...] ...) where the indices of each BufferLoad
on the RHS are all distinct atomic variables, and no variables other than those indexing variables are allowed in the statement.
block | The block to be inlined to its producer |
|
pure virtual |
Factorize an associative reduction block by the specified loop.
An associative reduction cannot be parallelized directly, because it leads to potential race condition during accumulation. Alternatively, the reduction could be factorized on a loop with the following steps:
n
separate chunks, where n
is the loop extentn
intermediate buffers;n
separate buffer into the result buffer. Note that the Step 2 above introduces opportunities for parallelization. RFactor is a schedule primitive that implements the transformation described above. loop_rv | The loop outside block we want to do rfactor |
factor_axis | The position where the new dimension is placed in the new introduced rfactor buffer. Suppose the original reduction block writes to buffer B with ndim(B) dimensions, then factor_axis should be in range [-ndim(B) - 1, ndim(B)] , and the negative index will be normalized to a non-negative one |
|
pure virtual |
Compute the target buffer via rolling buffering.
This primitive selects the outermost rollable axis with a positive bound overlap that appears in the block's ancestor loops as rolling axis
, fold and circularize the buffer along the rolling dimension, append block predicate to avoid recomputing overlapping elements. It requires: 1) The buffer to be an intermediate buffer defined via alloc_buffer
. 2) The LCA of the producer and consumer of the buffer is a for loop, typically, the producer and consumer of the buffer are cascaded through compute_at. 3) The access region of the buffer has at least one dimension that contains a positive bound overlap.
block_rv | The producer block of the buffer. |
write_buffer_index | The index of the buffer in block's write region. |
|
pure virtual |
Sample an integer given the probability distribution.
candidates | The candidates |
probs | The probability distribution of the candidates |
decision | The sampling decision |
|
pure virtual |
Sample a compute-at location of the given block.
block_rv | The block whose compute-at location is to be sampled |
decision | The sampling decision |
|
pure virtual |
Sample the factors to a partitioned tile for a specific loop.
The sampled tile size will be partitioned into two parts. The second part has a guarantee that their extent's product have a factor of innerpart_factor
. The first part is loops at [0, partition_pos); the second part is loops at [partition_pos, n) and we will have innerpart_factor
| (l[partition_pos].extent * ... * l[n-1].extent)
loop_rv | The loop to be tiled |
n | The number of tiles to be sampled |
partition_pos | The position to partition tiles to two parts |
innerpart_factor | The factor of the second part |
decision | The sampling decision |
n
, the random partitioned tile sizes sampled
|
pure virtual |
Sample the factors to perfect tile a specific loop.
loop_rv | The loop to be tiled |
n | The number of tiles to be sampled |
max_innermost_factor | The maximum tile size allowed to be sampled in the innermost loop |
decision | The sampling decision |
n
, the random perfect tile sizes sampled
|
pure virtual |
Seed the randomness.
seed | The new random seed, -1 if use device random, otherwise non-negative |
|
pure virtual |
Set the axis separator of a buffer, where the buffer is specified by a block and a read or write index.
block_rv | The block that accesses the target buffer. |
buffer_index | The index of the buffer in block's read or write region. |
buffer_index_type | The type of the buffer index, kRead or kWrite. |
axis_separators | The axis separator of the buffer |
|
pure virtual |
Set the storage scope of a buffer, where the buffer is specified by a block and a write-index.
block_rv | The producer block of the buffer |
buffer_index | The index of the buffer in block's write region |
storage_scope | The storage scope to be set |
|
pure virtual |
Split a loop into a list of consecutive loops. It requires: 1) The loop can't have annotation or thread binding. 2) The loop must start with 0.
loop_rv | The loop to be split |
factors | The positive tiling factors, and at most one of which is NullOpt , which means that factor is inferred. |
preserve_unit_iters | Whether or not to preserve unit iterators in block bindings. |
disable_predication | If enabled, don't create a predicate for guarding the loop. This can be useful when splitting with scalable factors that the schedule writer knows are divisible by the loop bound. Warning: enabling this feature may result in incorrect code generation if not used carefully. |
|
pure virtual |
|
pure virtual |
Set alignment requirement for specific dimension such that stride[axis] == k * factor + offset for some k. This is useful to set memory layout for more friendly memory access pattern. For example, we can set alignment to be factor=2, offset=1 to avoid bank conflict for thread access on higher dimension in GPU shared memory.
block_rv | The producer block of the buffer |
buffer_index | The index of the buffer in block's write region |
axis | The dimension to be specified for alignment |
factor | The factor multiple of alignment |
offset | The required offset factor |
|
pure virtual |
Tensorize the computation enclosed by loop with the tensor intrin.
block_rv | The block to be tensorized |
intrin | Name of the tensor intrinsic |
preserve_unit_iters | Whether or not to preserve unit iterators in block bindings |
|
pure virtual |
Tensorize the computation enclosed by loop with the tensor intrin.
loop_rv | The loop to be tensorized |
intrin | Name of the tensor intrinsic |
preserve_unit_iters | Whether or not to preserve unit iterators in block bindings |
|
pure virtual |
Apply a transformation represented by IndexMap to block.
The block iters and the block body are transformed by the given index_map. Outer loops corresponding to each new block iter are regenerated. The index_map is required to be bijective affine since we need its inverse mapping.
block_rv | The block to be transformed |
index_map | The transformation to apply. |
|
pure virtual |
Apply a transformation represented by IndexMap to buffer.
The indices and the access region to the target buffer is transformed by the given index_map. The index_map is used to infer the new shape of the buffer. Buffer must be either a function parameter, or allocated in a block (it cannot be a buffer subregion created via 'match_buffer').
block_rv | The block that accesses the target buffer. |
buffer_index | The index of the buffer in block's read or write region. |
buffer_index_type | The type of the buffer index, kRead or kWrite. |
index_map | The transformation to apply. |
pad_value | The value to write into padding introduced by the transformation. If the schedule contains a producer block for the specified buffer, the pad value will be written as part of the producer block if possible, or after the producer block otherwise. Otherwise, if the buffer is an input, will insert an annotation block to state that the padding contains the known value. |
Note: If applied to an input buffer, the calling scope is responsible for ensuring that the pad_value is present. Algebraic symplifications, branch elimination, and other optimizations may assume that this precondition is met, and may result in incorrect results being returned.
assume_injective_transform | If set to true, the schedule primitive will assume the index_map is injective and skip checking overlapping of the mapped indices. This can be useful for complicated index_map that the analysis does not cover. It is the callers' responsibility to ensure the index map is injective, otherwise, the correctness of the schedule is not guaranteed. |
tvm::tir::ScheduleNode::TVM_DECLARE_FINAL_OBJECT_INFO | ( | ScheduleNode | , |
runtime::Object | |||
) |
|
pure virtual |
Unannotate a block's annotation with key ann_key.
block_rv | The block to be unannotated |
ann_key | The annotation key |
|
pure virtual |
Unannotate a loop's annotation with key ann_key.
loop_rv | The loop to be unannotated |
ann_key | The annotation key |
|
pure virtual |
Unroll the input loop. It requires nothing.
loop_rv | The loop to be unrolled |
|
pure virtual |
Hide some buffer access in the given block.
block_rv | The block where we hide buffer access. |
buf_type | The buffer type: read/write |
buf_index_array | The array of buffer indices we hide access. |
|
pure virtual |
Set the data type of a buffer, where the buffer is specified by a block and a write-index.
block_rv | The producer block of the buffer |
buffer_index | the index of the buffer in block's write region |
dtype | The data type to be set |
|
pure virtual |
Vectorize the input loop. It requires: 1) The scope block that the loop is in should have stage-pipeline property 2) All the blocks under the loop are complete blocks or reduction blocks, and have affine bindings 3) For each block under the loop, the loop can only be contained in data-parallel block iters' bindings.
loop_rv | The loop to be vectorized |
|
pure virtual |
Instruct the schedule to work on a function in the IRModule.
By default, the schedule works on the function with the name "main", or the only function in the IRModule if there is only one. If there is multiple functions in the IRModule, and none of their names are "main", users will have to call this method to explicitly specify which function to work on.
This sugar function will guide the GetBlock
method if its func_name
is not specified.
func_name | The name of the function to be working on |
|
pure virtual |
|
friend |
|
staticconstexpr |