tvm
profiling.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef TVM_RUNTIME_PROFILING_H_
25 #define TVM_RUNTIME_PROFILING_H_
26 
27 #include <tvm/ffi/container/array.h>
28 #include <tvm/ffi/container/map.h>
29 #include <tvm/ffi/function.h>
30 #include <tvm/runtime/base.h>
31 #include <tvm/runtime/device_api.h>
32 #include <tvm/runtime/module.h>
33 #include <tvm/runtime/object.h>
34 #include <tvm/runtime/tensor.h>
35 
36 #include <stack>
37 #include <string>
38 #include <unordered_map>
39 #include <utility>
40 #include <vector>
41 
42 namespace tvm {
43 
44 namespace runtime {
45 
52 class TimerNode : public Object {
53  public:
58  virtual void Start() = 0;
63  virtual void Stop() = 0;
74  virtual int64_t SyncAndGetElapsedNanos() = 0;
75 
76  virtual ~TimerNode() {}
77 
78  static constexpr const bool _type_mutable = true;
79  TVM_FFI_DECLARE_OBJECT_INFO("runtime.TimerNode", TimerNode, Object);
80 };
81 
88 class Timer : public ObjectRef {
89  public:
145  static TVM_DLL Timer Start(Device dev);
146 
148 };
149 
158 
159 namespace profiling {
163 struct DeviceWrapperNode : public Object {
166 
169  TVM_FFI_DECLARE_OBJECT_INFO("runtime.profiling.DeviceWrapper", DeviceWrapperNode, Object);
170 };
171 
173 class DeviceWrapper : public ObjectRef {
174  public:
175  explicit DeviceWrapper(Device dev) { data_ = ffi::make_object<DeviceWrapperNode>(dev); }
177 };
178 
181 class ReportNode : public Object {
182  public:
190  ffi::Array<ffi::Map<ffi::String, ffi::Any>> calls;
198  ffi::Map<ffi::String, ffi::Map<ffi::String, ffi::Any>> device_metrics;
203  ffi::Map<ffi::String, ffi::Any> configuration;
208  ffi::String AsCSV() const;
223  ffi::String AsTable(bool sort = true, bool aggregate = true, bool compute_col_sums = true) const;
256  ffi::String AsJSON() const;
257  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Report", ReportNode, Object);
258 };
259 
260 class Report : public ObjectRef {
261  public:
267  explicit Report(ffi::Array<ffi::Map<ffi::String, ffi::Any>> calls,
268  ffi::Map<ffi::String, ffi::Map<ffi::String, ffi::Any>> device_metrics,
269  ffi::Map<ffi::String, ffi::Any> configuration);
270 
275  static Report FromJSON(ffi::String json);
277 };
278 
297 class MetricCollectorNode : public Object {
298  public:
303  virtual void Init(ffi::Array<DeviceWrapper> devs) = 0;
310  virtual ObjectRef Start(Device dev) = 0;
316  virtual ffi::Map<ffi::String, ffi::Any> Stop(ffi::ObjectRef obj) = 0;
317 
318  virtual ~MetricCollectorNode() {}
319 
320  static constexpr const bool _type_mutable = true;
321  TVM_FFI_DECLARE_OBJECT_INFO("runtime.profiling.MetricCollector", MetricCollectorNode, Object);
322 };
323 
325 class MetricCollector : public ObjectRef {
326  public:
328 };
329 
331 struct CallFrame {
335  ffi::String name;
339  std::unordered_map<std::string, ffi::Any> extra_metrics;
343  std::vector<std::pair<MetricCollector, ObjectRef>> extra_collectors;
344 };
345 
365 class Profiler {
366  public:
380  explicit Profiler(std::vector<Device> devs, std::vector<MetricCollector> metric_collectors,
381  std::unordered_map<ffi::String, ffi::Any> configuration = {});
386  void Start();
391  void Stop();
402  void StartCall(ffi::String name, Device dev,
403  std::unordered_map<std::string, ffi::Any> extra_metrics = {});
408  void StopCall(std::unordered_map<std::string, ffi::Any> extra_metrics = {});
418  bool IsRunning() const { return is_running_; }
419 
420  private:
421  std::vector<Device> devs_;
422  bool is_running_{false};
423  std::vector<CallFrame> calls_;
424  std::stack<CallFrame> in_flight_;
425  std::vector<MetricCollector> collectors_;
426  std::unordered_map<ffi::String, ffi::Any> configuration_;
427 };
428 
429 /* \brief A duration in time. */
430 class DurationNode : public Object {
431  public:
432  /* The duration as a floating point number of microseconds. */
433  double microseconds;
434 
435  /* \brief Construct a new duration.
436  * \param a The duration in microseconds.
437  */
438  explicit DurationNode(double a) : microseconds(a) {}
439  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Duration", DurationNode, Object);
440 };
441 
442 /* A percentage of something */
443 class PercentNode : public Object {
444  public:
445  /* The percent as a floating point value out of 100%. i.e. if `percent` is 10 then we have 10%. */
446  double percent;
447 
448  /* \brief Construct a new percentage.
449  * \param a The percentage out of 100.
450  */
451  explicit PercentNode(double a) : percent(a) {}
452  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Percent", PercentNode, Object);
453 };
454 
455 /* A count of something */
456 class CountNode : public Object {
457  public:
458  /* The actual count */
459  int64_t value;
460 
461  /* \brief Construct a new count.
462  * \param a The count.
463  */
464  explicit CountNode(int64_t a) : value(a) {}
465  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Count", CountNode, Object);
466 };
467 
468 /* \brief A ratio of two things. */
469 class RatioNode : public Object {
470  public:
471  /* The ratio as a double precision floating point number. */
472  double ratio;
473 
474  /* \brief Construct a new ratio.
475  * \param a The ratio.
476  */
477  explicit RatioNode(double a) : ratio(a) {}
478  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Ratio", RatioNode, Object);
479 };
480 
485 ffi::String ShapeString(const std::vector<Tensor>& shapes);
491 ffi::String ShapeString(Tensor shape, DLDataType dtype);
497 ffi::String ShapeString(const std::vector<int64_t>& shape, DLDataType dtype);
498 
530 ffi::Function ProfileFunction(ffi::Module mod, std::string func_name, int device_type,
531  int device_id, int warmup_iters,
532  ffi::Array<MetricCollector> collectors);
533 
582  int min_repeat_ms, int limit_zero_time_iterations,
583  int cooldown_interval_ms, int repeats_to_cooldown,
584  int cache_flush_bytes = 0, ffi::Function f_preproc = nullptr);
585 
586 } // namespace profiling
587 } // namespace runtime
588 } // namespace tvm
589 
590 #endif // TVM_RUNTIME_PROFILING_H_
Managed Tensor. The array is backed by reference counted blocks.
Definition: tensor.h:53
Base class for all implementations.
Definition: profiling.h:52
virtual int64_t SyncAndGetElapsedNanos()=0
Synchronize timer state and return elapsed time between Start and Stop.
TVM_FFI_DECLARE_OBJECT_INFO("runtime.TimerNode", TimerNode, Object)
virtual void Stop()=0
Stop the timer.
virtual void Start()=0
Start the timer.
static constexpr const bool _type_mutable
Definition: profiling.h:78
virtual ~TimerNode()
Definition: profiling.h:76
Timer for a specific device.
Definition: profiling.h:88
TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(Timer, ObjectRef, TimerNode)
static Timer Start(Device dev)
Get a device specific timer.
Definition: profiling.h:456
CountNode(int64_t a)
Definition: profiling.h:464
int64_t value
Definition: profiling.h:459
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Count", CountNode, Object)
Wrapper for Device.
Definition: profiling.h:173
TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(DeviceWrapper, ObjectRef, DeviceWrapperNode)
DeviceWrapper(Device dev)
Definition: profiling.h:175
Definition: profiling.h:430
double microseconds
Definition: profiling.h:433
DurationNode(double a)
Definition: profiling.h:438
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Duration", DurationNode, Object)
Interface for user defined profiling metric collection.
Definition: profiling.h:297
TVM_FFI_DECLARE_OBJECT_INFO("runtime.profiling.MetricCollector", MetricCollectorNode, Object)
virtual ObjectRef Start(Device dev)=0
Start colling metrics for a function call.
static constexpr const bool _type_mutable
Definition: profiling.h:320
virtual ffi::Map< ffi::String, ffi::Any > Stop(ffi::ObjectRef obj)=0
Stop collecting metrics.
virtual ~MetricCollectorNode()
Definition: profiling.h:318
virtual void Init(ffi::Array< DeviceWrapper > devs)=0
Initialization call. Called before profiling has started. Any expensive precomputation should happen ...
Wrapper for MetricCollectorNode.
Definition: profiling.h:325
TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(MetricCollector, ObjectRef, MetricCollectorNode)
Definition: profiling.h:443
PercentNode(double a)
Definition: profiling.h:451
double percent
Definition: profiling.h:446
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Percent", PercentNode, Object)
Definition: profiling.h:365
bool IsRunning() const
Check if the profiler is currently running.
Definition: profiling.h:418
Profiler(std::vector< Device > devs, std::vector< MetricCollector > metric_collectors, std::unordered_map< ffi::String, ffi::Any > configuration={})
void Stop()
Stop the profiler.
void StopCall(std::unordered_map< std::string, ffi::Any > extra_metrics={})
Stop the last StartCall.
void StartCall(ffi::String name, Device dev, std::unordered_map< std::string, ffi::Any > extra_metrics={})
Start a function call.
profiling::Report Report()
A report of total runtime between Start and Stop as well as individual statistics for each StartCall-...
void Start()
Start the profiler.
Definition: profiling.h:469
RatioNode(double a)
Definition: profiling.h:477
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Ratio", RatioNode, Object)
double ratio
Definition: profiling.h:472
Data collected from a profiling run. Includes per-call metrics and per-device metrics.
Definition: profiling.h:181
ffi::String AsJSON() const
Convert this report to JSON.
ffi::Map< ffi::String, ffi::Map< ffi::String, ffi::Any > > device_metrics
Metrics collected for the entire run of the model on a per-device basis.
Definition: profiling.h:198
ffi::String AsTable(bool sort=true, bool aggregate=true, bool compute_col_sums=true) const
Create a human readable table of profiling metrics.
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("runtime.profiling.Report", ReportNode, Object)
ffi::Array< ffi::Map< ffi::String, ffi::Any > > calls
A list of function calls and the metrics recorded for that call.
Definition: profiling.h:190
ffi::Map< ffi::String, ffi::Any > configuration
Definition: profiling.h:203
ffi::String AsCSV() const
Output calls in CSV format.
Definition: profiling.h:260
static Report FromJSON(ffi::String json)
TVM_FFI_DEFINE_OBJECT_REF_METHODS_NOTNULLABLE(Report, ObjectRef, ReportNode)
Report(ffi::Array< ffi::Map< ffi::String, ffi::Any >> calls, ffi::Map< ffi::String, ffi::Map< ffi::String, ffi::Any >> device_metrics, ffi::Map< ffi::String, ffi::Any > configuration)
Abstract device memory management API.
tvm::relax::Function Function
Definition: transform.h:42
ffi::Function ProfileFunction(ffi::Module mod, std::string func_name, int device_type, int device_id, int warmup_iters, ffi::Array< MetricCollector > collectors)
Collect performance information of a function execution. Usually used with a compiled PrimFunc (via t...
ffi::String ShapeString(const std::vector< Tensor > &shapes)
ffi::String representation of an array of Tensor shapes
ffi::Function WrapTimeEvaluator(ffi::Function f, Device dev, int number, int repeat, int min_repeat_ms, int limit_zero_time_iterations, int cooldown_interval_ms, int repeats_to_cooldown, int cache_flush_bytes=0, ffi::Function f_preproc=nullptr)
Wrap a timer function to measure the time cost of a given packed function.
Timer DefaultTimer(Device dev)
Default timer if one does not exist for the device.
constexpr const char * device_id
The allocation device for global malloc in host.
Definition: stmt.h:1061
constexpr const char * device_type
The device type.
Definition: stmt.h:1063
tvm::PrimExpr mod(const tvm::PrimExpr &a, const tvm::PrimExpr &b)
Definition: broadcast.h:308
Tensor shape(const Tensor &src, DataType dtype, const std::string name="T_shape", const std::string tag=kInjective)
Get the shape of input tensor.
Definition: transform.h:1960
Tensor repeat(const Tensor &x, int repeats, int axis, std::string name="T_repeat", std::string tag=kBroadcast)
Creates an operation to repeat elements of an array.
Definition: transform.h:1349
Performance counters for profiling via the PAPI library.
Definition: analyzer.h:37
DLDevice Device
Definition: device_api.h:42
A managed object in the TVM runtime.
Runtime container of the functions generated by TVM, This is used to support dynamically link,...
A device-independent managed Tensor abstraction.
Definition: profiling.h:331
Timer timer
Definition: profiling.h:337
std::vector< std::pair< MetricCollector, ObjectRef > > extra_collectors
Definition: profiling.h:343
Device dev
Definition: profiling.h:333
std::unordered_map< std::string, ffi::Any > extra_metrics
Definition: profiling.h:339
ffi::String name
Definition: profiling.h:335
Wrapper for Device because Device is not passable across the ffi::Function interface.
Definition: profiling.h:163
Device device
Definition: profiling.h:165
DeviceWrapperNode(Device device)
Definition: profiling.h:168
TVM_FFI_DECLARE_OBJECT_INFO("runtime.profiling.DeviceWrapper", DeviceWrapperNode, Object)