tvm
schedule_rule.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef TVM_META_SCHEDULE_SCHEDULE_RULE_H_
21 #define TVM_META_SCHEDULE_SCHEDULE_RULE_H_
22 
23 #include <tvm/ffi/container/array.h>
24 #include <tvm/ffi/container/map.h>
25 #include <tvm/ffi/function.h>
26 #include <tvm/ffi/optional.h>
27 #include <tvm/ffi/reflection/registry.h>
28 #include <tvm/ffi/string.h>
29 #include <tvm/ir/expr.h>
30 #include <tvm/runtime/object.h>
32 
33 namespace tvm {
34 namespace meta_schedule {
35 
36 class TuneContext;
37 class ScheduleRule;
38 
40 class ScheduleRuleNode : public runtime::Object {
41  public:
43  virtual ~ScheduleRuleNode() = default;
44 
45  static void RegisterReflection() {
46  // No fields to register
47  }
48 
54  virtual void InitializeWithTuneContext(const TuneContext& context) = 0;
55 
62  virtual ffi::Array<tir::Schedule> Apply(const tir::Schedule& sch, const tir::BlockRV& block) = 0;
63 
68  virtual ScheduleRule Clone() const = 0;
69 
70  static constexpr const bool _type_mutable = true;
71  TVM_FFI_DECLARE_OBJECT_INFO("meta_schedule.ScheduleRule", ScheduleRuleNode, Object);
72 };
73 
78 class ScheduleRule : public runtime::ObjectRef {
79  public:
84  using FInitializeWithTuneContext = ffi::TypedFunction<void(const TuneContext&)>;
91  using FApply =
92  ffi::TypedFunction<ffi::Array<tir::Schedule>(const tir::Schedule&, const tir::BlockRV&)>;
97  using FAsString = ffi::TypedFunction<ffi::String()>;
102  using FClone = ffi::TypedFunction<ScheduleRule()>;
108  TVM_DLL static ScheduleRule ApplyCustomRule();
110  TVM_DLL static bool IsApplyCustomRule(const ScheduleRule& rule);
122  TVM_DLL static ScheduleRule AutoInline(bool into_producer, //
123  bool into_consumer, //
124  bool inline_const_tensor, //
125  bool disallow_if_then_else, //
126  bool require_injective, //
127  bool require_ordered, //
128  ffi::Optional<ffi::Array<ffi::String>> disallow_op);
129 
138 
159  ffi::String structure, //
160  ffi::Optional<ffi::Array<ffi::String>> tile_binds, //
161  ffi::Optional<Integer> max_innermost_factor, //
162  ffi::Optional<ffi::Array<Integer>> vector_load_lens, //
163  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_read, //
164  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_write,
165  ffi::Optional<ffi::Function> filter_fn = std::nullopt);
166 
185  ffi::String intrin_name, ffi::String structure,
186  ffi::Optional<ffi::Array<ffi::String>> tile_binds,
187  ffi::Optional<Integer> max_innermost_factor,
188  ffi::Optional<ffi::Array<Integer>> vector_load_lens,
189  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_read,
190  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_write);
191 
213  ffi::Array<ffi::Map<ffi::String, ffi::String>> intrin_groups, ffi::String structure,
214  ffi::Optional<ffi::Array<ffi::String>> tile_binds,
215  ffi::Optional<Integer> max_innermost_factor,
216  ffi::Optional<ffi::Array<Integer>> vector_load_lens,
217  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_read,
218  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_write, bool use_software_pipeline);
219 
232  ffi::String structure, Integer vector_length_in_bits,
233  ffi::Optional<Integer> max_innermost_factor,
234  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_read,
235  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_write);
236 
245  TVM_DLL static ScheduleRule AddRFactor(int max_jobs_per_core, //
246  ffi::Optional<Integer> max_innermost_factor);
253  TVM_DLL static ScheduleRule CrossThreadReduction(ffi::Array<Integer> thread_extents);
272  TVM_DLL static ScheduleRule ParallelizeVectorizeUnroll(int max_jobs_per_core, //
273  int max_vectorize_extent, //
274  ffi::Array<Integer> unroll_max_steps, //
275  bool unroll_explicit);
284  TVM_DLL static ScheduleRule AutoBind(int max_threadblocks, ffi::Array<Integer> thread_extents,
285  int max_threads_per_block = -1);
294  TVM_DLL static ScheduleRule PyScheduleRule(
295  FInitializeWithTuneContext f_initialize_with_tune_context, //
296  FApply f_apply, //
297  FClone f_clone, //
298  FAsString f_as_string);
299 
301  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultLLVM();
303  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultX86(const ffi::String& type);
305  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultCUDA();
307  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultCUDATensorCore();
309  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultHexagon();
311  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultARM(const ffi::String& type);
313  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultRISCV(int vlen);
314 
316 };
317 
320  public:
325 
334 
335  static void RegisterReflection() {
336  // `f_initialize_with_tune_context` is not registered
337  // `f_apply` is not registered
338  // `f_as_string` is not registered
339  // `f_clone` is not registered
340  }
341 
342  void InitializeWithTuneContext(const TuneContext& context) final;
343  ffi::Array<tir::Schedule> Apply(const tir::Schedule& sch, const tir::BlockRV& block) final;
344  ScheduleRule Clone() const final;
345  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("meta_schedule.PyScheduleRule", PyScheduleRuleNode,
347 };
348 
349 } // namespace meta_schedule
350 } // namespace tvm
351 
352 #endif // TVM_META_SCHEDULE_SCHEDULE_RULE_H_
Container of constant int that adds more constructors.
Definition: expr.h:600
The schedule rule with customized methods on the python-side.
Definition: schedule_rule.h:319
FInitializeWithTuneContext f_initialize_with_tune_context
The packed function to the InitializeWithTuneContext function.
Definition: schedule_rule.h:327
ScheduleRule::FAsString FAsString
Definition: schedule_rule.h:324
void InitializeWithTuneContext(const TuneContext &context) final
Initialize the design space generator with tuning context.
ScheduleRule::FInitializeWithTuneContext FInitializeWithTuneContext
Definition: schedule_rule.h:321
static void RegisterReflection()
Definition: schedule_rule.h:335
ScheduleRule::FClone FClone
Definition: schedule_rule.h:323
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("meta_schedule.PyScheduleRule", PyScheduleRuleNode, ScheduleRuleNode)
FApply f_apply
The packed function to the Apply function.
Definition: schedule_rule.h:329
ScheduleRule::FApply FApply
Definition: schedule_rule.h:322
FAsString f_as_string
The packed function to the AsString function.
Definition: schedule_rule.h:331
FClone f_clone
The packed function to the Clone function.
Definition: schedule_rule.h:333
ScheduleRule Clone() const final
Deep clone the schedule rule.
ffi::Array< tir::Schedule > Apply(const tir::Schedule &sch, const tir::BlockRV &block) final
Apply a schedule rule to the specific block in the given schedule.
Rules to modify a block in a schedule.
Definition: schedule_rule.h:40
virtual void InitializeWithTuneContext(const TuneContext &context)=0
Initialize the design space generator with tuning context.
static void RegisterReflection()
Definition: schedule_rule.h:45
virtual ~ScheduleRuleNode()=default
Virtual destructor.
virtual ScheduleRule Clone() const =0
Deep clone the schedule rule.
virtual ffi::Array< tir::Schedule > Apply(const tir::Schedule &sch, const tir::BlockRV &block)=0
Apply a schedule rule to the specific block in the given schedule.
static constexpr const bool _type_mutable
Definition: schedule_rule.h:70
TVM_FFI_DECLARE_OBJECT_INFO("meta_schedule.ScheduleRule", ScheduleRuleNode, Object)
Managed reference to ScheduleRuleNode.
Definition: schedule_rule.h:78
static ScheduleRule AddRFactor(int max_jobs_per_core, ffi::Optional< Integer > max_innermost_factor)
Create a rule: add-rfactor to some blocks if needed.
static ffi::Array< ScheduleRule, void > DefaultLLVM()
Create default schedule rules for LLVM.
ffi::TypedFunction< ffi::Array< tir::Schedule >(const tir::Schedule &, const tir::BlockRV &)> FApply
The function type of Apply method.
Definition: schedule_rule.h:92
static ScheduleRule RandomComputeLocation()
A rule that randomly select a compute-at location for a free block.
TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(ScheduleRule, ObjectRef, ScheduleRuleNode)
static ScheduleRule MultiLevelTilingWideVector(ffi::String structure, Integer vector_length_in_bits, ffi::Optional< Integer > max_innermost_factor, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_read, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_write)
Extension of MultiLevelTiling for backends with wide vectors. The loop over the innermost spatial axi...
static ScheduleRule AutoBind(int max_threadblocks, ffi::Array< Integer > thread_extents, int max_threads_per_block=-1)
Auto bind loops around the block to BlockIdx and ThreadIdx.
static ffi::Array< ScheduleRule, void > DefaultCUDA()
Create default schedule rules for CUDA.
static ScheduleRule ApplyCustomRule()
Create a rule that applies customized rules registered using block attribute schedule_rule....
ffi::TypedFunction< void(const TuneContext &)> FInitializeWithTuneContext
The function type of InitializeWithTuneContext method.
Definition: schedule_rule.h:84
static ScheduleRule InlineConstantScalars()
Inline blocks that produce a constant scalar. Such blocks get in the way of ReverseComputeInline duri...
static ScheduleRule AutoInline(bool into_producer, bool into_consumer, bool inline_const_tensor, bool disallow_if_then_else, bool require_injective, bool require_ordered, ffi::Optional< ffi::Array< ffi::String >> disallow_op)
Create an auto-inline rule that inlines spatial blocks if it satisfies some conditions.
static ffi::Array< ScheduleRule, void > DefaultRISCV(int vlen)
Create default schedule rules for RISCV CPU (RVV)
ffi::TypedFunction< ScheduleRule()> FClone
The function type of Clone method.
Definition: schedule_rule.h:102
static ScheduleRule CrossThreadReduction(ffi::Array< Integer > thread_extents)
Create a schedule rule which applies cross-thread reduction to some reduction blocks correspondingly ...
static ffi::Array< ScheduleRule, void > DefaultARM(const ffi::String &type)
Create default schedule rules for ARM CPU (NEON and DOTPROD)
ffi::TypedFunction< ffi::String()> FAsString
Get the schedule rule as string with name.
Definition: schedule_rule.h:97
static ScheduleRule MultiLevelTiling(ffi::String structure, ffi::Optional< ffi::Array< ffi::String >> tile_binds, ffi::Optional< Integer > max_innermost_factor, ffi::Optional< ffi::Array< Integer >> vector_load_lens, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_read, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_write, ffi::Optional< ffi::Function > filter_fn=std::nullopt)
Create a mega rule: multi-level tiling with data reuse.
static ffi::Array< ScheduleRule, void > DefaultHexagon()
Create default schedule rules for Hexagon.
static ScheduleRule MultiLevelTilingWithIntrin(ffi::String intrin_name, ffi::String structure, ffi::Optional< ffi::Array< ffi::String >> tile_binds, ffi::Optional< Integer > max_innermost_factor, ffi::Optional< ffi::Array< Integer >> vector_load_lens, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_read, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_write)
Extension of MultiLevelTiling for auto-tensorization with a single intrinsic.
static bool IsApplyCustomRule(const ScheduleRule &rule)
Check if the rule is ApplyCustomRule
static ffi::Array< ScheduleRule, void > DefaultCUDATensorCore()
Create default postprocessors for CUDA with TensorCore.
static ScheduleRule PyScheduleRule(FInitializeWithTuneContext f_initialize_with_tune_context, FApply f_apply, FClone f_clone, FAsString f_as_string)
Create a schedule rule with customized methods on the python-side.
static ffi::Array< ScheduleRule, void > DefaultX86(const ffi::String &type)
Create default schedule rules for x86 (AVX512 and VNNI)
static ScheduleRule ParallelizeVectorizeUnroll(int max_jobs_per_core, int max_vectorize_extent, ffi::Array< Integer > unroll_max_steps, bool unroll_explicit)
Mark parallelize, vectorize and unroll to the root block. The mark will be applied to each block in a...
static ScheduleRule MultiLevelTilingTensorCore(ffi::Array< ffi::Map< ffi::String, ffi::String >> intrin_groups, ffi::String structure, ffi::Optional< ffi::Array< ffi::String >> tile_binds, ffi::Optional< Integer > max_innermost_factor, ffi::Optional< ffi::Array< Integer >> vector_load_lens, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_read, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_write, bool use_software_pipeline)
Extension of MultiLevelTiling for auto-tensorization with multiple groups of candidate tensor core in...
Managed reference to TuneContextNode.
Definition: tune_context.h:98
Managed reference to BlockRVNode.
Definition: schedule.h:63
Managed reference to ScheduleNode.
Definition: schedule.h:885
Base expr nodes in TVM.
Performance counters for profiling via the PAPI library.
Definition: analyzer.h:37
A managed object in the TVM runtime.