tvm
profiling.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef TVM_RUNTIME_PROFILING_H_
25 #define TVM_RUNTIME_PROFILING_H_
26 
27 #include <tvm/ffi/container/array.h>
28 #include <tvm/ffi/container/map.h>
29 #include <tvm/ffi/function.h>
30 #include <tvm/runtime/base.h>
31 #include <tvm/runtime/device_api.h>
32 #include <tvm/runtime/module.h>
33 #include <tvm/runtime/ndarray.h>
34 #include <tvm/runtime/object.h>
35 
36 #include <stack>
37 #include <string>
38 #include <unordered_map>
39 #include <utility>
40 #include <vector>
41 
42 namespace tvm {
43 
44 namespace runtime {
45 
52 class TimerNode : public Object {
53  public:
58  virtual void Start() = 0;
63  virtual void Stop() = 0;
74  virtual int64_t SyncAndGetElapsedNanos() = 0;
75 
76  virtual ~TimerNode() {}
77 
78  static constexpr const char* _type_key = "runtime.TimerNode";
80 };
81 
88 class Timer : public ObjectRef {
89  public:
145  static TVM_DLL Timer Start(Device dev);
146 
148 };
149 
158 
159 namespace profiling {
163 struct DeviceWrapperNode : public Object {
166 
169 
170  static constexpr const char* _type_key = "runtime.profiling.DeviceWrapper";
172 };
173 
175 class DeviceWrapper : public ObjectRef {
176  public:
177  explicit DeviceWrapper(Device dev) { data_ = make_object<DeviceWrapperNode>(dev); }
179 };
180 
183 class ReportNode : public Object {
184  public:
192  Array<Map<String, ffi::Any>> calls;
200  Map<String, Map<String, ffi::Any>> device_metrics;
205  Map<String, ffi::Any> configuration;
210  String AsCSV() const;
225  String AsTable(bool sort = true, bool aggregate = true, bool compute_col_sums = true) const;
258  String AsJSON() const;
259 
260  static constexpr const char* _type_key = "runtime.profiling.Report";
262 };
263 
264 class Report : public ObjectRef {
265  public:
271  explicit Report(Array<Map<String, ffi::Any>> calls,
272  Map<String, Map<String, ffi::Any>> device_metrics,
273  Map<String, ffi::Any> configuration);
274 
279  static Report FromJSON(String json);
281 };
282 
301 class MetricCollectorNode : public Object {
302  public:
307  virtual void Init(Array<DeviceWrapper> devs) = 0;
314  virtual ObjectRef Start(Device dev) = 0;
320  virtual Map<String, ffi::Any> Stop(ffi::ObjectRef obj) = 0;
321 
322  virtual ~MetricCollectorNode() {}
323 
324  static constexpr const char* _type_key = "runtime.profiling.MetricCollector";
326 };
327 
329 class MetricCollector : public ObjectRef {
330  public:
332 };
333 
335 struct CallFrame {
339  String name;
343  std::unordered_map<std::string, ffi::Any> extra_metrics;
347  std::vector<std::pair<MetricCollector, ObjectRef>> extra_collectors;
348 };
349 
369 class Profiler {
370  public:
384  explicit Profiler(std::vector<Device> devs, std::vector<MetricCollector> metric_collectors,
385  std::unordered_map<String, ffi::Any> configuration = {});
390  void Start();
395  void Stop();
406  void StartCall(String name, Device dev,
407  std::unordered_map<std::string, ffi::Any> extra_metrics = {});
412  void StopCall(std::unordered_map<std::string, ffi::Any> extra_metrics = {});
422  bool IsRunning() const { return is_running_; }
423 
424  private:
425  std::vector<Device> devs_;
426  bool is_running_{false};
427  std::vector<CallFrame> calls_;
428  std::stack<CallFrame> in_flight_;
429  std::vector<MetricCollector> collectors_;
430  std::unordered_map<String, ffi::Any> configuration_;
431 };
432 
433 /* \brief A duration in time. */
434 class DurationNode : public Object {
435  public:
436  /* The duration as a floating point number of microseconds. */
437  double microseconds;
438 
439  /* \brief Construct a new duration.
440  * \param a The duration in microseconds.
441  */
442  explicit DurationNode(double a) : microseconds(a) {}
443 
444  static constexpr const char* _type_key = "runtime.profiling.Duration";
446 };
447 
448 /* A percentage of something */
449 class PercentNode : public Object {
450  public:
451  /* The percent as a floating point value out of 100%. i.e. if `percent` is 10 then we have 10%. */
452  double percent;
453 
454  /* \brief Construct a new percentage.
455  * \param a The percentage out of 100.
456  */
457  explicit PercentNode(double a) : percent(a) {}
458 
459  static constexpr const char* _type_key = "runtime.profiling.Percent";
461 };
462 
463 /* A count of something */
464 class CountNode : public Object {
465  public:
466  /* The actual count */
467  int64_t value;
468 
469  /* \brief Construct a new count.
470  * \param a The count.
471  */
472  explicit CountNode(int64_t a) : value(a) {}
473 
474  static constexpr const char* _type_key = "runtime.profiling.Count";
476 };
477 
478 /* \brief A ratio of two things. */
479 class RatioNode : public Object {
480  public:
481  /* The ratio as a double precision floating point number. */
482  double ratio;
483 
484  /* \brief Construct a new ratio.
485  * \param a The ratio.
486  */
487  explicit RatioNode(double a) : ratio(a) {}
488 
489  static constexpr const char* _type_key = "runtime.profiling.Ratio";
491 };
492 
497 String ShapeString(const std::vector<NDArray>& shapes);
503 String ShapeString(NDArray shape, DLDataType dtype);
509 String ShapeString(const std::vector<int64_t>& shape, DLDataType dtype);
510 
542 ffi::Function ProfileFunction(ffi::Module mod, std::string func_name, int device_type,
543  int device_id, int warmup_iters, Array<MetricCollector> collectors);
544 
593  int min_repeat_ms, int limit_zero_time_iterations,
594  int cooldown_interval_ms, int repeats_to_cooldown,
595  int cache_flush_bytes = 0, ffi::Function f_preproc = nullptr);
596 
597 } // namespace profiling
598 } // namespace runtime
599 } // namespace tvm
600 
601 #endif // TVM_RUNTIME_PROFILING_H_
Managed NDArray. The array is backed by reference counted blocks.
Definition: ndarray.h:53
Base class for all implementations.
Definition: profiling.h:52
virtual int64_t SyncAndGetElapsedNanos()=0
Synchronize timer state and return elapsed time between Start and Stop.
static constexpr const char * _type_key
Definition: profiling.h:78
virtual void Stop()=0
Stop the timer.
virtual void Start()=0
Start the timer.
TVM_DECLARE_BASE_OBJECT_INFO(TimerNode, Object)
virtual ~TimerNode()
Definition: profiling.h:76
Timer for a specific device.
Definition: profiling.h:88
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Timer, ObjectRef, TimerNode)
static Timer Start(Device dev)
Get a device specific timer.
Definition: profiling.h:464
static constexpr const char * _type_key
Definition: profiling.h:474
TVM_DECLARE_FINAL_OBJECT_INFO(CountNode, Object)
CountNode(int64_t a)
Definition: profiling.h:472
int64_t value
Definition: profiling.h:467
Wrapper for Device.
Definition: profiling.h:175
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(DeviceWrapper, ObjectRef, DeviceWrapperNode)
DeviceWrapper(Device dev)
Definition: profiling.h:177
Definition: profiling.h:434
double microseconds
Definition: profiling.h:437
DurationNode(double a)
Definition: profiling.h:442
static constexpr const char * _type_key
Definition: profiling.h:444
TVM_DECLARE_FINAL_OBJECT_INFO(DurationNode, Object)
Interface for user defined profiling metric collection.
Definition: profiling.h:301
static constexpr const char * _type_key
Definition: profiling.h:324
virtual ObjectRef Start(Device dev)=0
Start colling metrics for a function call.
virtual void Init(Array< DeviceWrapper > devs)=0
Initialization call. Called before profiling has started. Any expensive precomputation should happen ...
TVM_DECLARE_BASE_OBJECT_INFO(MetricCollectorNode, Object)
virtual Map< String, ffi::Any > Stop(ffi::ObjectRef obj)=0
Stop collecting metrics.
virtual ~MetricCollectorNode()
Definition: profiling.h:322
Wrapper for MetricCollectorNode.
Definition: profiling.h:329
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(MetricCollector, ObjectRef, MetricCollectorNode)
Definition: profiling.h:449
PercentNode(double a)
Definition: profiling.h:457
static constexpr const char * _type_key
Definition: profiling.h:459
double percent
Definition: profiling.h:452
TVM_DECLARE_FINAL_OBJECT_INFO(PercentNode, Object)
Definition: profiling.h:369
bool IsRunning() const
Check if the profiler is currently running.
Definition: profiling.h:422
void StartCall(String name, Device dev, std::unordered_map< std::string, ffi::Any > extra_metrics={})
Start a function call.
void Stop()
Stop the profiler.
void StopCall(std::unordered_map< std::string, ffi::Any > extra_metrics={})
Stop the last StartCall.
Profiler(std::vector< Device > devs, std::vector< MetricCollector > metric_collectors, std::unordered_map< String, ffi::Any > configuration={})
profiling::Report Report()
A report of total runtime between Start and Stop as well as individual statistics for each StartCall-...
void Start()
Start the profiler.
Definition: profiling.h:479
TVM_DECLARE_FINAL_OBJECT_INFO(RatioNode, Object)
RatioNode(double a)
Definition: profiling.h:487
double ratio
Definition: profiling.h:482
static constexpr const char * _type_key
Definition: profiling.h:489
Data collected from a profiling run. Includes per-call metrics and per-device metrics.
Definition: profiling.h:183
static constexpr const char * _type_key
Definition: profiling.h:260
String AsTable(bool sort=true, bool aggregate=true, bool compute_col_sums=true) const
Create a human readable table of profiling metrics.
String AsJSON() const
Convert this report to JSON.
Array< Map< String, ffi::Any > > calls
A list of function calls and the metrics recorded for that call.
Definition: profiling.h:192
Map< String, ffi::Any > configuration
Definition: profiling.h:205
String AsCSV() const
Output calls in CSV format.
TVM_DECLARE_FINAL_OBJECT_INFO(ReportNode, Object)
Map< String, Map< String, ffi::Any > > device_metrics
Metrics collected for the entire run of the model on a per-device basis.
Definition: profiling.h:200
Definition: profiling.h:264
Report(Array< Map< String, ffi::Any >> calls, Map< String, Map< String, ffi::Any >> device_metrics, Map< String, ffi::Any > configuration)
TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(Report, ObjectRef, ReportNode)
static Report FromJSON(String json)
Abstract device memory management API.
tvm::relax::Function Function
Definition: transform.h:42
String ShapeString(const std::vector< NDArray > &shapes)
String representation of an array of NDArray shapes.
ffi::Function WrapTimeEvaluator(ffi::Function f, Device dev, int number, int repeat, int min_repeat_ms, int limit_zero_time_iterations, int cooldown_interval_ms, int repeats_to_cooldown, int cache_flush_bytes=0, ffi::Function f_preproc=nullptr)
Wrap a timer function to measure the time cost of a given packed function.
ffi::Function ProfileFunction(ffi::Module mod, std::string func_name, int device_type, int device_id, int warmup_iters, Array< MetricCollector > collectors)
Collect performance information of a function execution. Usually used with a compiled PrimFunc (via t...
Timer DefaultTimer(Device dev)
Default timer if one does not exist for the device.
constexpr const char * device_id
The allocation device for global malloc in host.
Definition: stmt.h:1090
constexpr const char * device_type
The device type.
Definition: stmt.h:1092
tvm::PrimExpr mod(const tvm::PrimExpr &a, const tvm::PrimExpr &b)
Definition: broadcast.h:306
Tensor shape(const Tensor &src, DataType dtype, const std::string name="T_shape", const std::string tag=kInjective)
Get the shape of input tensor.
Definition: transform.h:1945
Tensor repeat(const Tensor &x, int repeats, int axis, std::string name="T_repeat", std::string tag=kBroadcast)
Creates an operation to repeat elements of an array.
Definition: transform.h:1336
Performance counters for profiling via the PAPI library.
Definition: analyzer.h:37
DLDevice Device
Definition: device_api.h:42
A device-independent managed NDArray abstraction.
A managed object in the TVM runtime.
Runtime container of the functions generated by TVM, This is used to support dynamically link,...
Definition: profiling.h:335
Timer timer
Definition: profiling.h:341
std::vector< std::pair< MetricCollector, ObjectRef > > extra_collectors
Definition: profiling.h:347
String name
Definition: profiling.h:339
Device dev
Definition: profiling.h:337
std::unordered_map< std::string, ffi::Any > extra_metrics
Definition: profiling.h:343
Wrapper for Device because Device is not passable across the ffi::Function interface.
Definition: profiling.h:163
TVM_DECLARE_BASE_OBJECT_INFO(DeviceWrapperNode, Object)
Device device
Definition: profiling.h:165
DeviceWrapperNode(Device device)
Definition: profiling.h:168
static constexpr const char * _type_key
Definition: profiling.h:170