tvm
profiling.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef TVM_RUNTIME_PROFILING_H_
25 #define TVM_RUNTIME_PROFILING_H_
26 
29 #include <tvm/runtime/device_api.h>
30 #include <tvm/runtime/object.h>
32 #include <tvm/runtime/registry.h>
33 
34 #include <stack>
35 #include <string>
36 #include <unordered_map>
37 #include <utility>
38 #include <vector>
39 
40 namespace tvm {
41 
42 namespace runtime {
43 
50 class TimerNode : public Object {
51  public:
56  virtual void Start() = 0;
61  virtual void Stop() = 0;
72  virtual int64_t SyncAndGetElapsedNanos() = 0;
73 
74  virtual ~TimerNode() {}
75 
76  static constexpr const char* _type_key = "TimerNode";
78 };
79 
86 class Timer : public ObjectRef {
87  public:
140  static TVM_DLL Timer Start(Device dev);
141 
143 };
144 
153 
154 namespace profiling {
158 struct DeviceWrapperNode : public Object {
161 
164 
165  static constexpr const char* _type_key = "runtime.profiling.DeviceWrapper";
167 };
168 
170 class DeviceWrapper : public ObjectRef {
171  public:
172  explicit DeviceWrapper(Device dev) { data_ = make_object<DeviceWrapperNode>(dev); }
174 };
175 
178 class ReportNode : public Object {
179  public:
205  String AsCSV() const;
220  String AsTable(bool sort = true, bool aggregate = true, bool compute_col_sums = true) const;
253  String AsJSON() const;
254 
255  static constexpr const char* _type_key = "runtime.profiling.Report";
257 };
258 
259 class Report : public ObjectRef {
260  public:
267  Map<String, Map<String, ObjectRef>> device_metrics,
268  Map<String, ObjectRef> configuration);
269 
274  static Report FromJSON(String json);
276 };
277 
296 class MetricCollectorNode : public Object {
297  public:
302  virtual void Init(Array<DeviceWrapper> devs) = 0;
309  virtual ObjectRef Start(Device dev) = 0;
316 
317  virtual ~MetricCollectorNode() {}
318 
319  static constexpr const char* _type_key = "runtime.profiling.MetricCollector";
321 };
322 
324 class MetricCollector : public ObjectRef {
325  public:
327 };
328 
330 struct CallFrame {
338  std::unordered_map<std::string, ObjectRef> extra_metrics;
342  std::vector<std::pair<MetricCollector, ObjectRef>> extra_collectors;
343 };
344 
364 class Profiler {
365  public:
379  explicit Profiler(std::vector<Device> devs, std::vector<MetricCollector> metric_collectors,
380  std::unordered_map<String, ObjectRef> configuration = {});
385  void Start();
390  void Stop();
401  void StartCall(String name, Device dev,
402  std::unordered_map<std::string, ObjectRef> extra_metrics = {});
407  void StopCall(std::unordered_map<std::string, ObjectRef> extra_metrics = {});
417  bool IsRunning() const { return is_running_; }
418 
419  private:
420  std::vector<Device> devs_;
421  bool is_running_{false};
422  std::vector<CallFrame> calls_;
423  std::stack<CallFrame> in_flight_;
424  std::vector<MetricCollector> collectors_;
425  std::unordered_map<String, ObjectRef> configuration_;
426 };
427 
428 /* \brief A duration in time. */
429 class DurationNode : public Object {
430  public:
431  /* The duration as a floating point number of microseconds. */
432  double microseconds;
433 
434  /* \brief Construct a new duration.
435  * \param a The duration in microseconds.
436  */
437  explicit DurationNode(double a) : microseconds(a) {}
438 
439  static constexpr const char* _type_key = "runtime.profiling.Duration";
441 };
442 
443 /* A percentage of something */
444 class PercentNode : public Object {
445  public:
446  /* The percent as a floating point value out of 100%. i.e. if `percent` is 10 then we have 10%. */
447  double percent;
448 
449  /* \brief Construct a new percentage.
450  * \param a The percentage out of 100.
451  */
452  explicit PercentNode(double a) : percent(a) {}
453 
454  static constexpr const char* _type_key = "runtime.profiling.Percent";
456 };
457 
458 /* A count of something */
459 class CountNode : public Object {
460  public:
461  /* The actual count */
462  int64_t value;
463 
464  /* \brief Construct a new count.
465  * \param a The count.
466  */
467  explicit CountNode(int64_t a) : value(a) {}
468 
469  static constexpr const char* _type_key = "runtime.profiling.Count";
471 };
472 
473 /* \brief A ratio of two things. */
474 class RatioNode : public Object {
475  public:
476  /* The ratio as a double precision floating point number. */
477  double ratio;
478 
479  /* \brief Construct a new ratio.
480  * \param a The ratio.
481  */
482  explicit RatioNode(double a) : ratio(a) {}
483 
484  static constexpr const char* _type_key = "runtime.profiling.Ratio";
486 };
487 
492 String ShapeString(const std::vector<NDArray>& shapes);
498 String ShapeString(NDArray shape, DLDataType dtype);
504 String ShapeString(const std::vector<int64_t>& shape, DLDataType dtype);
505 
537 PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, int device_id,
538  int warmup_iters, Array<MetricCollector> collectors);
539 
587 PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms,
588  int limit_zero_time_iterations, int cooldown_interval_ms,
589  int repeats_to_cooldown, int cache_flush_bytes = 0,
590  PackedFunc f_preproc = nullptr);
591 
592 } // namespace profiling
593 } // namespace runtime
594 } // namespace tvm
595 
596 #endif // TVM_RUNTIME_PROFILING_H_
Array, container representing a contiguous sequence of ObjectRefs.
Definition: array.h:289
Map container of NodeRef->NodeRef in DSL graph. Map implements copy on write semantics,...
Definition: map.h:1271
Module container of TVM.
Definition: module.h:79
Managed NDArray. The array is backed by reference counted blocks.
Definition: ndarray.h:51
Base class of all object reference.
Definition: object.h:519
ObjectPtr< Object > data_
Internal pointer that backs the reference.
Definition: object.h:605
base class of all object containers.
Definition: object.h:171
Packed function is a type-erased function. The arguments are passed by packed format.
Definition: packed_func.h:139
Reference to string objects.
Definition: string.h:98
Base class for all implementations.
Definition: profiling.h:50
virtual int64_t SyncAndGetElapsedNanos()=0
Synchronize timer state and return elapsed time between Start and Stop.
static constexpr const char * _type_key
Definition: profiling.h:76
virtual void Stop()=0
Stop the timer.
virtual void Start()=0
Start the timer.
TVM_DECLARE_BASE_OBJECT_INFO(TimerNode, Object)
virtual ~TimerNode()
Definition: profiling.h:74
Timer for a specific device.
Definition: profiling.h:86
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Timer, ObjectRef, TimerNode)
static Timer Start(Device dev)
Get a device specific timer.
Definition: profiling.h:459
static constexpr const char * _type_key
Definition: profiling.h:469
TVM_DECLARE_FINAL_OBJECT_INFO(CountNode, Object)
CountNode(int64_t a)
Definition: profiling.h:467
int64_t value
Definition: profiling.h:462
Wrapper for Device.
Definition: profiling.h:170
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(DeviceWrapper, ObjectRef, DeviceWrapperNode)
DeviceWrapper(Device dev)
Definition: profiling.h:172
Definition: profiling.h:429
double microseconds
Definition: profiling.h:432
DurationNode(double a)
Definition: profiling.h:437
static constexpr const char * _type_key
Definition: profiling.h:439
TVM_DECLARE_FINAL_OBJECT_INFO(DurationNode, Object)
Interface for user defined profiling metric collection.
Definition: profiling.h:296
static constexpr const char * _type_key
Definition: profiling.h:319
virtual ObjectRef Start(Device dev)=0
Start colling metrics for a function call.
virtual void Init(Array< DeviceWrapper > devs)=0
Initialization call. Called before profiling has started. Any expensive precomputation should happen ...
TVM_DECLARE_BASE_OBJECT_INFO(MetricCollectorNode, Object)
virtual Map< String, ObjectRef > Stop(ObjectRef obj)=0
Stop collecting metrics.
virtual ~MetricCollectorNode()
Definition: profiling.h:317
Wrapper for MetricCollectorNode.
Definition: profiling.h:324
TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(MetricCollector, ObjectRef, MetricCollectorNode)
Definition: profiling.h:444
PercentNode(double a)
Definition: profiling.h:452
static constexpr const char * _type_key
Definition: profiling.h:454
double percent
Definition: profiling.h:447
TVM_DECLARE_FINAL_OBJECT_INFO(PercentNode, Object)
Definition: profiling.h:364
bool IsRunning() const
Check if the profiler is currently running.
Definition: profiling.h:417
void StartCall(String name, Device dev, std::unordered_map< std::string, ObjectRef > extra_metrics={})
Start a function call.
void Stop()
Stop the profiler.
Profiler(std::vector< Device > devs, std::vector< MetricCollector > metric_collectors, std::unordered_map< String, ObjectRef > configuration={})
void StopCall(std::unordered_map< std::string, ObjectRef > extra_metrics={})
Stop the last StartCall.
profiling::Report Report()
A report of total runtime between Start and Stop as well as individual statistics for each StartCall-...
void Start()
Start the profiler.
Definition: profiling.h:474
TVM_DECLARE_FINAL_OBJECT_INFO(RatioNode, Object)
RatioNode(double a)
Definition: profiling.h:482
double ratio
Definition: profiling.h:477
static constexpr const char * _type_key
Definition: profiling.h:484
Data collected from a profiling run. Includes per-call metrics and per-device metrics.
Definition: profiling.h:178
Map< String, ObjectRef > configuration
Definition: profiling.h:200
static constexpr const char * _type_key
Definition: profiling.h:255
String AsTable(bool sort=true, bool aggregate=true, bool compute_col_sums=true) const
Create a human readable table of profiling metrics.
String AsJSON() const
Convert this report to JSON.
Array< Map< String, ObjectRef > > calls
A list of function calls and the metrics recorded for that call.
Definition: profiling.h:187
String AsCSV() const
Output calls in CSV format.
TVM_DECLARE_FINAL_OBJECT_INFO(ReportNode, Object)
Map< String, Map< String, ObjectRef > > device_metrics
Metrics collected for the entire run of the model on a per-device basis.
Definition: profiling.h:195
Definition: profiling.h:259
TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(Report, ObjectRef, ReportNode)
Report(Array< Map< String, ObjectRef >> calls, Map< String, Map< String, ObjectRef >> device_metrics, Map< String, ObjectRef > configuration)
static Report FromJSON(String json)
Abstract device memory management API.
Runtime Map container types.
String ShapeString(const std::vector< NDArray > &shapes)
String representation of an array of NDArray shapes.
PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, int device_id, int warmup_iters, Array< MetricCollector > collectors)
Collect performance information of a function execution. Usually used with a compiled PrimFunc (via t...
PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms, int limit_zero_time_iterations, int cooldown_interval_ms, int repeats_to_cooldown, int cache_flush_bytes=0, PackedFunc f_preproc=nullptr)
Wrap a timer function to measure the time cost of a given packed function.
Timer DefaultTimer(Device dev)
Default timer if one does not exist for the device.
constexpr const char * device_id
The allocation device for global malloc in host.
Definition: stmt.h:1416
constexpr const char * device_type
The device type.
Definition: stmt.h:1418
tvm::PrimExpr mod(const tvm::PrimExpr &a, const tvm::PrimExpr &b)
Definition: broadcast.h:290
Tensor shape(const Tensor &src, DataType dtype, const std::string name="T_shape", const std::string tag=kInjective)
Get the shape of input tensor.
Definition: transform.h:1853
Tensor repeat(const Tensor &x, int repeats, int axis, std::string name="T_repeat", std::string tag=kBroadcast)
Creates an operation to repeat elements of an array.
Definition: transform.h:1244
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36
DLDevice Device
Definition: ndarray.h:43
A managed object in the TVM runtime.
Type-erased function used across TVM API.
This file defines the TVM global function registry.
Definition: profiling.h:330
Timer timer
Definition: profiling.h:336
std::unordered_map< std::string, ObjectRef > extra_metrics
Definition: profiling.h:338
std::vector< std::pair< MetricCollector, ObjectRef > > extra_collectors
Definition: profiling.h:342
String name
Definition: profiling.h:334
Device dev
Definition: profiling.h:332
Wrapper for Device because Device is not passable across the PackedFunc interface.
Definition: profiling.h:158
TVM_DECLARE_BASE_OBJECT_INFO(DeviceWrapperNode, Object)
Device device
Definition: profiling.h:160
DeviceWrapperNode(Device device)
Definition: profiling.h:163
static constexpr const char * _type_key
Definition: profiling.h:165