24 #ifndef TVM_RUNTIME_PROFILING_H_
25 #define TVM_RUNTIME_PROFILING_H_
27 #include <tvm/ffi/container/array.h>
28 #include <tvm/ffi/container/map.h>
29 #include <tvm/ffi/function.h>
38 #include <unordered_map>
78 static constexpr
const char*
_type_key =
"runtime.TimerNode";
88 class Timer :
public ObjectRef {
159 namespace profiling {
170 static constexpr
const char*
_type_key =
"runtime.profiling.DeviceWrapper";
225 String
AsTable(
bool sort =
true,
bool aggregate =
true,
bool compute_col_sums =
true)
const;
260 static constexpr
const char*
_type_key =
"runtime.profiling.Report";
271 explicit Report(Array<Map<String, ffi::Any>> calls,
272 Map<String, Map<String, ffi::Any>> device_metrics,
273 Map<String, ffi::Any> configuration);
307 virtual void Init(Array<DeviceWrapper> devs) = 0;
320 virtual Map<String, ffi::Any>
Stop(ffi::ObjectRef obj) = 0;
324 static constexpr
const char*
_type_key =
"runtime.profiling.MetricCollector";
384 explicit Profiler(std::vector<Device> devs, std::vector<MetricCollector> metric_collectors,
385 std::unordered_map<String, ffi::Any> configuration = {});
407 std::unordered_map<std::string, ffi::Any> extra_metrics = {});
412 void StopCall(std::unordered_map<std::string, ffi::Any> extra_metrics = {});
425 std::vector<Device> devs_;
426 bool is_running_{
false};
427 std::vector<CallFrame> calls_;
428 std::stack<CallFrame> in_flight_;
429 std::vector<MetricCollector> collectors_;
430 std::unordered_map<String, ffi::Any> configuration_;
444 static constexpr
const char*
_type_key =
"runtime.profiling.Duration";
459 static constexpr
const char*
_type_key =
"runtime.profiling.Percent";
474 static constexpr
const char*
_type_key =
"runtime.profiling.Count";
489 static constexpr
const char*
_type_key =
"runtime.profiling.Ratio";
543 int device_id,
int warmup_iters, Array<MetricCollector> collectors);
593 int min_repeat_ms,
int limit_zero_time_iterations,
594 int cooldown_interval_ms,
int repeats_to_cooldown,
595 int cache_flush_bytes = 0,
ffi::Function f_preproc =
nullptr);
Managed NDArray. The array is backed by reference counted blocks.
Definition: ndarray.h:53
Base class for all implementations.
Definition: profiling.h:52
virtual int64_t SyncAndGetElapsedNanos()=0
Synchronize timer state and return elapsed time between Start and Stop.
static constexpr const char * _type_key
Definition: profiling.h:78
virtual void Stop()=0
Stop the timer.
virtual void Start()=0
Start the timer.
TVM_DECLARE_BASE_OBJECT_INFO(TimerNode, Object)
virtual ~TimerNode()
Definition: profiling.h:76
Timer for a specific device.
Definition: profiling.h:88
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Timer, ObjectRef, TimerNode)
static Timer Start(Device dev)
Get a device specific timer.
Definition: profiling.h:464
static constexpr const char * _type_key
Definition: profiling.h:474
TVM_DECLARE_FINAL_OBJECT_INFO(CountNode, Object)
CountNode(int64_t a)
Definition: profiling.h:472
int64_t value
Definition: profiling.h:467
Wrapper for Device.
Definition: profiling.h:175
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(DeviceWrapper, ObjectRef, DeviceWrapperNode)
DeviceWrapper(Device dev)
Definition: profiling.h:177
Definition: profiling.h:434
double microseconds
Definition: profiling.h:437
DurationNode(double a)
Definition: profiling.h:442
static constexpr const char * _type_key
Definition: profiling.h:444
TVM_DECLARE_FINAL_OBJECT_INFO(DurationNode, Object)
Interface for user defined profiling metric collection.
Definition: profiling.h:301
static constexpr const char * _type_key
Definition: profiling.h:324
virtual ObjectRef Start(Device dev)=0
Start colling metrics for a function call.
virtual void Init(Array< DeviceWrapper > devs)=0
Initialization call. Called before profiling has started. Any expensive precomputation should happen ...
TVM_DECLARE_BASE_OBJECT_INFO(MetricCollectorNode, Object)
virtual Map< String, ffi::Any > Stop(ffi::ObjectRef obj)=0
Stop collecting metrics.
virtual ~MetricCollectorNode()
Definition: profiling.h:322
Wrapper for MetricCollectorNode.
Definition: profiling.h:329
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(MetricCollector, ObjectRef, MetricCollectorNode)
Definition: profiling.h:449
PercentNode(double a)
Definition: profiling.h:457
static constexpr const char * _type_key
Definition: profiling.h:459
double percent
Definition: profiling.h:452
TVM_DECLARE_FINAL_OBJECT_INFO(PercentNode, Object)
Definition: profiling.h:369
bool IsRunning() const
Check if the profiler is currently running.
Definition: profiling.h:422
void StartCall(String name, Device dev, std::unordered_map< std::string, ffi::Any > extra_metrics={})
Start a function call.
void Stop()
Stop the profiler.
void StopCall(std::unordered_map< std::string, ffi::Any > extra_metrics={})
Stop the last StartCall.
Profiler(std::vector< Device > devs, std::vector< MetricCollector > metric_collectors, std::unordered_map< String, ffi::Any > configuration={})
profiling::Report Report()
A report of total runtime between Start and Stop as well as individual statistics for each StartCall-...
void Start()
Start the profiler.
Definition: profiling.h:479
TVM_DECLARE_FINAL_OBJECT_INFO(RatioNode, Object)
RatioNode(double a)
Definition: profiling.h:487
double ratio
Definition: profiling.h:482
static constexpr const char * _type_key
Definition: profiling.h:489
Data collected from a profiling run. Includes per-call metrics and per-device metrics.
Definition: profiling.h:183
static constexpr const char * _type_key
Definition: profiling.h:260
String AsTable(bool sort=true, bool aggregate=true, bool compute_col_sums=true) const
Create a human readable table of profiling metrics.
String AsJSON() const
Convert this report to JSON.
Array< Map< String, ffi::Any > > calls
A list of function calls and the metrics recorded for that call.
Definition: profiling.h:192
Map< String, ffi::Any > configuration
Definition: profiling.h:205
String AsCSV() const
Output calls in CSV format.
TVM_DECLARE_FINAL_OBJECT_INFO(ReportNode, Object)
Map< String, Map< String, ffi::Any > > device_metrics
Metrics collected for the entire run of the model on a per-device basis.
Definition: profiling.h:200
Definition: profiling.h:264
Report(Array< Map< String, ffi::Any >> calls, Map< String, Map< String, ffi::Any >> device_metrics, Map< String, ffi::Any > configuration)
TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(Report, ObjectRef, ReportNode)
static Report FromJSON(String json)
Abstract device memory management API.
String ShapeString(const std::vector< NDArray > &shapes)
String representation of an array of NDArray shapes.
ffi::Function WrapTimeEvaluator(ffi::Function f, Device dev, int number, int repeat, int min_repeat_ms, int limit_zero_time_iterations, int cooldown_interval_ms, int repeats_to_cooldown, int cache_flush_bytes=0, ffi::Function f_preproc=nullptr)
Wrap a timer function to measure the time cost of a given packed function.
ffi::Function ProfileFunction(ffi::Module mod, std::string func_name, int device_type, int device_id, int warmup_iters, Array< MetricCollector > collectors)
Collect performance information of a function execution. Usually used with a compiled PrimFunc (via t...
Timer DefaultTimer(Device dev)
Default timer if one does not exist for the device.
constexpr const char * device_id
The allocation device for global malloc in host.
Definition: stmt.h:1090
constexpr const char * device_type
The device type.
Definition: stmt.h:1092
tvm::PrimExpr mod(const tvm::PrimExpr &a, const tvm::PrimExpr &b)
Definition: broadcast.h:306
Tensor shape(const Tensor &src, DataType dtype, const std::string name="T_shape", const std::string tag=kInjective)
Get the shape of input tensor.
Definition: transform.h:1945
Tensor repeat(const Tensor &x, int repeats, int axis, std::string name="T_repeat", std::string tag=kBroadcast)
Creates an operation to repeat elements of an array.
Definition: transform.h:1336
Performance counters for profiling via the PAPI library.
Definition: analyzer.h:37
DLDevice Device
Definition: device_api.h:42
A device-independent managed NDArray abstraction.
A managed object in the TVM runtime.
Runtime container of the functions generated by TVM, This is used to support dynamically link,...
Definition: profiling.h:335
Timer timer
Definition: profiling.h:341
std::vector< std::pair< MetricCollector, ObjectRef > > extra_collectors
Definition: profiling.h:347
String name
Definition: profiling.h:339
Device dev
Definition: profiling.h:337
std::unordered_map< std::string, ffi::Any > extra_metrics
Definition: profiling.h:343
Wrapper for Device because Device is not passable across the ffi::Function interface.
Definition: profiling.h:163
TVM_DECLARE_BASE_OBJECT_INFO(DeviceWrapperNode, Object)
Device device
Definition: profiling.h:165
DeviceWrapperNode(Device device)
Definition: profiling.h:168
static constexpr const char * _type_key
Definition: profiling.h:170