tvm
threading_backend.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef TVM_RUNTIME_THREADING_BACKEND_H_
25 #define TVM_RUNTIME_THREADING_BACKEND_H_
26 
28 
29 #include <algorithm>
30 #include <functional>
31 #include <memory>
32 #include <vector>
33 
34 #if defined(__linux__) || defined(__ANDROID__)
35 #if defined(__ANDROID__)
36 #ifndef CPU_SET
37 #define CPU_SETSIZE 1024
38 #define __NCPUBITS (8 * sizeof(uint64_t))
39 typedef struct {
40  uint64_t __bits[CPU_SETSIZE / __NCPUBITS];
41 } cpu_set_t;
42 
43 #define CPU_SET(cpu, cpusetp) \
44  ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
45 #define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t))
46 #define CPU_ISSET(cpu, cpusetp) \
47  (1UL << ((cpu) % __NCPUBITS)) == \
48  ((cpusetp)->__bits[(cpu) / __NCPUBITS] & (1UL << ((cpu) % __NCPUBITS)))
49 #define CPU_EQUAL(left, right) (memcmp(&left, &right, sizeof(cpu_set_t)) == 0)
50 
51 #endif
52 #endif
53 #endif
54 
55 namespace tvm {
56 namespace runtime {
57 namespace threading {
58 
63 class ThreadGroup {
64  public:
65  class Impl;
66 
79  TVM_DLL ThreadGroup(int num_workers, std::function<void(int)> worker_callback,
80  bool exclude_worker0 = false);
81  TVM_DLL ~ThreadGroup();
82 
86  TVM_DLL void Join();
87 
88  enum AffinityMode : int {
89  kBig = 1,
90  kLittle = -1,
91  /*Different threads will get different affinities.*/
93  /*All threads will get the same core group affinity.*/
95  };
109  TVM_DLL int Configure(AffinityMode mode, int nthreads, bool exclude_worker0,
110  std::vector<unsigned int> cpus = {});
111 
112  private:
113  Impl* impl_;
114 };
115 
119 TVM_DLL void Yield();
123 TVM_DLL int MaxConcurrency();
127 TVM_DLL void SetMaxConcurrency(int value);
134 TVM_DLL void ResetThreadPool();
135 
144  std::vector<unsigned int> cpus);
145 
150 TVM_DLL int32_t NumThreads();
151 
152 } // namespace threading
153 
173 template <typename T>
174 inline void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end);
175 
176 namespace detail {
177 
178 // The detailed implementation of `parallel_for_with_threading_backend`.
179 // To avoid template expansion, the implementation cannot be placed
180 // in .cc files.
181 
182 template <typename T>
183 struct ParallelForWithThreadingBackendLambdaInvoker {
184  static int TVMParallelLambdaInvoke(int task_id, TVMParallelGroupEnv* penv, void* cdata) {
185  int num_task = penv->num_task;
186  // Convert void* back to lambda type.
187  T* lambda_ptr = static_cast<T*>(cdata);
188  // Invoke the lambda with the task id (thread id).
189  (*lambda_ptr)(task_id, num_task);
190  return 0;
191  }
192 };
193 
194 template <typename T>
195 inline void parallel_launch_with_threading_backend(T flambda) {
196  // Launch the lambda by passing its address.
197  void* cdata = &flambda;
198  TVMBackendParallelLaunch(ParallelForWithThreadingBackendLambdaInvoker<T>::TVMParallelLambdaInvoke,
199  cdata, /*num_task=*/0);
200 }
201 
202 } // namespace detail
203 
204 template <typename T>
205 inline void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end) {
206  if (end - begin == 1) {
207  flambda(begin);
208  return;
209  }
210 
211  auto flaunch = [begin, end, flambda](int task_id, int num_task) {
212  // For each thread, do static division and call into flambda.
213  int64_t total_len = end - begin;
214  int64_t step = (total_len + num_task - 1) / num_task;
215  int64_t local_begin = std::min(begin + step * task_id, end);
216  int64_t local_end = std::min(local_begin + step, end);
217  for (int64_t i = local_begin; i < local_end; ++i) {
218  flambda(i);
219  }
220  };
221  // Launch with all threads.
222  detail::parallel_launch_with_threading_backend(flaunch);
223 }
224 
225 } // namespace runtime
226 } // namespace tvm
227 
228 #endif // TVM_RUNTIME_THREADING_BACKEND_H_
TVM runtime backend API.
int TVMBackendParallelLaunch(FTVMParallelLambda flambda, void *cdata, int num_task)
Backend function for running parallel jobs.
A platform-agnostic abstraction for managing a collection of thread pool threads.
Definition: threading_backend.h:63
void Join()
Blocks until all non-main threads in the pool finish.
AffinityMode
Definition: threading_backend.h:88
@ kBig
Definition: threading_backend.h:89
@ kLittle
Definition: threading_backend.h:90
@ kSpecifyOneCorePerThread
Definition: threading_backend.h:92
@ kSpecifyThreadShareAllCore
Definition: threading_backend.h:94
int Configure(AffinityMode mode, int nthreads, bool exclude_worker0, std::vector< unsigned int > cpus={})
configure the CPU id affinity
ThreadGroup(int num_workers, std::function< void(int)> worker_callback, bool exclude_worker0=false)
Creates a collection of threads which run a provided function.
void Configure(tvm::runtime::threading::ThreadGroup::AffinityMode mode, int nthreads, std::vector< unsigned int > cpus)
Configuring the CPU affinity mode for the working threads.
int32_t NumThreads()
Get the number of threads being used by the TVM runtime.
void ResetThreadPool()
Reset the threads in the pool. All current threads are destroyed and new ones are created.
void Yield()
Platform-agnostic no-op.
void SetMaxConcurrency(int value)
Setting the maximum number of available cores.
void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end)
Definition: threading_backend.h:205
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36
PrimExpr min(PrimExpr a, PrimExpr b, Span span=Span())
take minimum of two values
Environment for TVM parallel task.
Definition: c_backend_api.h:119
int32_t num_task
total amount of task
Definition: c_backend_api.h:125