tvm
schedule_rule.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
20 #ifndef TVM_META_SCHEDULE_SCHEDULE_RULE_H_
21 #define TVM_META_SCHEDULE_SCHEDULE_RULE_H_
22 
23 #include <tvm/ffi/container/array.h>
24 #include <tvm/ffi/container/map.h>
25 #include <tvm/ffi/function.h>
26 #include <tvm/ffi/optional.h>
27 #include <tvm/ffi/reflection/registry.h>
28 #include <tvm/ffi/string.h>
29 #include <tvm/ir/expr.h>
30 #include <tvm/runtime/object.h>
32 
33 namespace tvm {
34 namespace meta_schedule {
35 
36 class TuneContext;
37 class ScheduleRule;
38 
40 class ScheduleRuleNode : public runtime::Object {
41  public:
43  virtual ~ScheduleRuleNode() = default;
44 
45  static void RegisterReflection() {
46  namespace refl = tvm::ffi::reflection;
47  refl::ObjectDef<ScheduleRuleNode>();
48  }
49 
55  virtual void InitializeWithTuneContext(const TuneContext& context) = 0;
56 
63  virtual ffi::Array<s_tir::Schedule> Apply(const s_tir::Schedule& sch,
64  const s_tir::SBlockRV& block) = 0;
65 
70  virtual ScheduleRule Clone() const = 0;
71 
72  static constexpr const bool _type_mutable = true;
73  TVM_FFI_DECLARE_OBJECT_INFO("meta_schedule.ScheduleRule", ScheduleRuleNode, Object);
74 };
75 
80 class ScheduleRule : public runtime::ObjectRef {
81  public:
86  using FInitializeWithTuneContext = ffi::TypedFunction<void(const TuneContext&)>;
93  using FApply = ffi::TypedFunction<ffi::Array<s_tir::Schedule>(const s_tir::Schedule&,
94  const s_tir::SBlockRV&)>;
99  using FAsString = ffi::TypedFunction<ffi::String()>;
104  using FClone = ffi::TypedFunction<ScheduleRule()>;
110  TVM_DLL static ScheduleRule ApplyCustomRule();
112  TVM_DLL static bool IsApplyCustomRule(const ScheduleRule& rule);
124  TVM_DLL static ScheduleRule AutoInline(bool into_producer, //
125  bool into_consumer, //
126  bool inline_const_tensor, //
127  bool disallow_if_then_else, //
128  bool require_injective, //
129  bool require_ordered, //
130  ffi::Optional<ffi::Array<ffi::String>> disallow_op);
131 
140 
161  ffi::String structure, //
162  ffi::Optional<ffi::Array<ffi::String>> tile_binds, //
163  ffi::Optional<Integer> max_innermost_factor, //
164  ffi::Optional<ffi::Array<Integer>> vector_load_lens, //
165  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_read, //
166  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_write,
167  ffi::Optional<ffi::Function> filter_fn = std::nullopt);
168 
187  ffi::String intrin_name, ffi::String structure,
188  ffi::Optional<ffi::Array<ffi::String>> tile_binds,
189  ffi::Optional<Integer> max_innermost_factor,
190  ffi::Optional<ffi::Array<Integer>> vector_load_lens,
191  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_read,
192  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_write);
193 
215  ffi::Array<ffi::Map<ffi::String, ffi::String>> intrin_groups, ffi::String structure,
216  ffi::Optional<ffi::Array<ffi::String>> tile_binds,
217  ffi::Optional<Integer> max_innermost_factor,
218  ffi::Optional<ffi::Array<Integer>> vector_load_lens,
219  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_read,
220  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_write, bool use_software_pipeline);
221 
234  ffi::String structure, Integer vector_length_in_bits,
235  ffi::Optional<Integer> max_innermost_factor,
236  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_read,
237  ffi::Optional<ffi::Map<ffi::String, ffi::Any>> reuse_write);
238 
247  TVM_DLL static ScheduleRule AddRFactor(int max_jobs_per_core, //
248  ffi::Optional<Integer> max_innermost_factor);
255  TVM_DLL static ScheduleRule CrossThreadReduction(ffi::Array<Integer> thread_extents);
274  TVM_DLL static ScheduleRule ParallelizeVectorizeUnroll(int max_jobs_per_core, //
275  int max_vectorize_extent, //
276  ffi::Array<Integer> unroll_max_steps, //
277  bool unroll_explicit);
286  TVM_DLL static ScheduleRule AutoBind(int max_threadblocks, ffi::Array<Integer> thread_extents,
287  int max_threads_per_block = -1);
296  TVM_DLL static ScheduleRule PyScheduleRule(
297  FInitializeWithTuneContext f_initialize_with_tune_context, //
298  FApply f_apply, //
299  FClone f_clone, //
300  FAsString f_as_string);
301 
303  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultLLVM();
305  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultX86(const ffi::String& type);
307  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultCUDA();
309  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultCUDATensorCore();
311  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultHexagon();
313  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultARM(const ffi::String& type);
315  TVM_DLL static ffi::Array<ScheduleRule, void> DefaultRISCV(int vlen);
316 
318 };
319 
322  public:
327 
336 
337  static void RegisterReflection() {
338  // `f_initialize_with_tune_context` is not registered
339  // `f_apply` is not registered
340  // `f_as_string` is not registered
341  // `f_clone` is not registered
342  namespace refl = tvm::ffi::reflection;
343  refl::ObjectDef<PyScheduleRuleNode>();
344  }
345 
346  void InitializeWithTuneContext(const TuneContext& context) final;
347  ffi::Array<s_tir::Schedule> Apply(const s_tir::Schedule& sch, const s_tir::SBlockRV& block) final;
348  ScheduleRule Clone() const final;
349  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("meta_schedule.PyScheduleRule", PyScheduleRuleNode,
351 };
352 
353 } // namespace meta_schedule
354 } // namespace tvm
355 
356 #endif // TVM_META_SCHEDULE_SCHEDULE_RULE_H_
Container of constant int that adds more constructors.
Definition: expr.h:599
The schedule rule with customized methods on the python-side.
Definition: schedule_rule.h:321
FInitializeWithTuneContext f_initialize_with_tune_context
The packed function to the InitializeWithTuneContext function.
Definition: schedule_rule.h:329
ScheduleRule::FAsString FAsString
Definition: schedule_rule.h:326
ffi::Array< s_tir::Schedule > Apply(const s_tir::Schedule &sch, const s_tir::SBlockRV &block) final
Apply a schedule rule to the specific block in the given schedule.
void InitializeWithTuneContext(const TuneContext &context) final
Initialize the design space generator with tuning context.
ScheduleRule::FInitializeWithTuneContext FInitializeWithTuneContext
Definition: schedule_rule.h:323
static void RegisterReflection()
Definition: schedule_rule.h:337
ScheduleRule::FClone FClone
Definition: schedule_rule.h:325
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("meta_schedule.PyScheduleRule", PyScheduleRuleNode, ScheduleRuleNode)
FApply f_apply
The packed function to the Apply function.
Definition: schedule_rule.h:331
ScheduleRule::FApply FApply
Definition: schedule_rule.h:324
FAsString f_as_string
The packed function to the AsString function.
Definition: schedule_rule.h:333
FClone f_clone
The packed function to the Clone function.
Definition: schedule_rule.h:335
ScheduleRule Clone() const final
Deep clone the schedule rule.
Rules to modify a block in a schedule.
Definition: schedule_rule.h:40
virtual void InitializeWithTuneContext(const TuneContext &context)=0
Initialize the design space generator with tuning context.
static void RegisterReflection()
Definition: schedule_rule.h:45
virtual ~ScheduleRuleNode()=default
Virtual destructor.
virtual ScheduleRule Clone() const =0
Deep clone the schedule rule.
static constexpr const bool _type_mutable
Definition: schedule_rule.h:72
TVM_FFI_DECLARE_OBJECT_INFO("meta_schedule.ScheduleRule", ScheduleRuleNode, Object)
virtual ffi::Array< s_tir::Schedule > Apply(const s_tir::Schedule &sch, const s_tir::SBlockRV &block)=0
Apply a schedule rule to the specific block in the given schedule.
Managed reference to ScheduleRuleNode.
Definition: schedule_rule.h:80
static ScheduleRule AddRFactor(int max_jobs_per_core, ffi::Optional< Integer > max_innermost_factor)
Create a rule: add-rfactor to some blocks if needed.
static ffi::Array< ScheduleRule, void > DefaultLLVM()
Create default schedule rules for LLVM.
static ScheduleRule RandomComputeLocation()
A rule that randomly select a compute-at location for a free block.
TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(ScheduleRule, ObjectRef, ScheduleRuleNode)
static ScheduleRule MultiLevelTilingWideVector(ffi::String structure, Integer vector_length_in_bits, ffi::Optional< Integer > max_innermost_factor, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_read, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_write)
Extension of MultiLevelTiling for backends with wide vectors. The loop over the innermost spatial axi...
static ScheduleRule AutoBind(int max_threadblocks, ffi::Array< Integer > thread_extents, int max_threads_per_block=-1)
Auto bind loops around the block to BlockIdx and ThreadIdx.
static ffi::Array< ScheduleRule, void > DefaultCUDA()
Create default schedule rules for CUDA.
static ScheduleRule ApplyCustomRule()
Create a rule that applies customized rules registered using block attribute schedule_rule....
ffi::TypedFunction< void(const TuneContext &)> FInitializeWithTuneContext
The function type of InitializeWithTuneContext method.
Definition: schedule_rule.h:86
static ScheduleRule InlineConstantScalars()
Inline blocks that produce a constant scalar. Such blocks get in the way of ReverseComputeInline duri...
static ScheduleRule AutoInline(bool into_producer, bool into_consumer, bool inline_const_tensor, bool disallow_if_then_else, bool require_injective, bool require_ordered, ffi::Optional< ffi::Array< ffi::String >> disallow_op)
Create an auto-inline rule that inlines spatial blocks if it satisfies some conditions.
ffi::TypedFunction< ffi::Array< s_tir::Schedule >(const s_tir::Schedule &, const s_tir::SBlockRV &)> FApply
The function type of Apply method.
Definition: schedule_rule.h:94
static ffi::Array< ScheduleRule, void > DefaultRISCV(int vlen)
Create default schedule rules for RISCV CPU (RVV)
ffi::TypedFunction< ScheduleRule()> FClone
The function type of Clone method.
Definition: schedule_rule.h:104
static ScheduleRule CrossThreadReduction(ffi::Array< Integer > thread_extents)
Create a schedule rule which applies cross-thread reduction to some reduction blocks correspondingly ...
static ffi::Array< ScheduleRule, void > DefaultARM(const ffi::String &type)
Create default schedule rules for ARM CPU (NEON and DOTPROD)
ffi::TypedFunction< ffi::String()> FAsString
Get the schedule rule as string with name.
Definition: schedule_rule.h:99
static ScheduleRule MultiLevelTiling(ffi::String structure, ffi::Optional< ffi::Array< ffi::String >> tile_binds, ffi::Optional< Integer > max_innermost_factor, ffi::Optional< ffi::Array< Integer >> vector_load_lens, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_read, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_write, ffi::Optional< ffi::Function > filter_fn=std::nullopt)
Create a mega rule: multi-level tiling with data reuse.
static ffi::Array< ScheduleRule, void > DefaultHexagon()
Create default schedule rules for Hexagon.
static ScheduleRule MultiLevelTilingWithIntrin(ffi::String intrin_name, ffi::String structure, ffi::Optional< ffi::Array< ffi::String >> tile_binds, ffi::Optional< Integer > max_innermost_factor, ffi::Optional< ffi::Array< Integer >> vector_load_lens, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_read, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_write)
Extension of MultiLevelTiling for auto-tensorization with a single intrinsic.
static bool IsApplyCustomRule(const ScheduleRule &rule)
Check if the rule is ApplyCustomRule
static ffi::Array< ScheduleRule, void > DefaultCUDATensorCore()
Create default postprocessors for CUDA with TensorCore.
static ScheduleRule PyScheduleRule(FInitializeWithTuneContext f_initialize_with_tune_context, FApply f_apply, FClone f_clone, FAsString f_as_string)
Create a schedule rule with customized methods on the python-side.
static ffi::Array< ScheduleRule, void > DefaultX86(const ffi::String &type)
Create default schedule rules for x86 (AVX512 and VNNI)
static ScheduleRule ParallelizeVectorizeUnroll(int max_jobs_per_core, int max_vectorize_extent, ffi::Array< Integer > unroll_max_steps, bool unroll_explicit)
Mark parallelize, vectorize and unroll to the root block. The mark will be applied to each block in a...
static ScheduleRule MultiLevelTilingTensorCore(ffi::Array< ffi::Map< ffi::String, ffi::String >> intrin_groups, ffi::String structure, ffi::Optional< ffi::Array< ffi::String >> tile_binds, ffi::Optional< Integer > max_innermost_factor, ffi::Optional< ffi::Array< Integer >> vector_load_lens, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_read, ffi::Optional< ffi::Map< ffi::String, ffi::Any >> reuse_write, bool use_software_pipeline)
Extension of MultiLevelTiling for auto-tensorization with multiple groups of candidate tensor core in...
Managed reference to TuneContextNode.
Definition: tune_context.h:98
Managed reference to SBlockRVNode.
Definition: schedule.h:65
Managed reference to ScheduleNode.
Definition: schedule.h:897
Base expr nodes in TVM.
Definition: repr_printer.h:91
Performance counters for profiling via the PAPI library.
Definition: analyzer.h:37
A managed object in the TVM runtime.