api/doxygen/threading__backend_8h_source.html

 /*

  * Licensed to the Apache Software Foundation (ASF) under one

  * or more contributor license agreements.  See the NOTICE file

  * distributed with this work for additional information

  * regarding copyright ownership.  The ASF licenses this file

  * to you under the Apache License, Version 2.0 (the

  * "License"); you may not use this file except in compliance

  * with the License.  You may obtain a copy of the License at

  *

  *   http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing,

  * software distributed under the License is distributed on an

  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

  * KIND, either express or implied.  See the License for the

  * specific language governing permissions and limitations

  * under the License.

  */


 #ifndef TVM_RUNTIME_THREADING_BACKEND_H_

 #define TVM_RUNTIME_THREADING_BACKEND_H_


 #include <tvm/runtime/c_backend_api.h>


 #include <algorithm>

 #include <functional>

 #include <memory>

 #include <vector>


 #if defined(__linux__) || defined(__ANDROID__)

 #if defined(__ANDROID__)

 #ifndef CPU_SET

 #define CPU_SETSIZE 1024

 #define __NCPUBITS (8 * sizeof(uint64_t))

 typedef struct {

   uint64_t __bits[CPU_SETSIZE / __NCPUBITS];

 } cpu_set_t;


 #define CPU_SET(cpu, cpusetp) \

   ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))

 #define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t))

 #define CPU_ISSET(cpu, cpusetp)    \

   (1UL << ((cpu) % __NCPUBITS)) == \

       ((cpusetp)->__bits[(cpu) / __NCPUBITS] & (1UL << ((cpu) % __NCPUBITS)))

 #define CPU_EQUAL(left, right) (memcmp(&left, &right, sizeof(cpu_set_t)) == 0)


 #endif

 #endif

 #endif


 namespace tvm {

 namespace runtime {

 namespace threading {


 class ThreadGroup {

  public:

   class Impl;


   TVM_DLL ThreadGroup(int num_workers, std::function<void(int)> worker_callback,

                       bool exclude_worker0 = false);

   TVM_DLL ~ThreadGroup();


   TVM_DLL void Join();


   enum AffinityMode : int {

     kBig = 1,

     kLittle = -1,

     /*Different threads will get different affinities.*/

     kSpecifyOneCorePerThread = -2,

     /*All threads will get the same core group affinity.*/

     kSpecifyThreadShareAllCore = -3,

   };

   TVM_DLL int Configure(AffinityMode mode, int nthreads, bool exclude_worker0,

                         std::vector<unsigned int> cpus = {});


  private:

   Impl* impl_;

 };


 TVM_DLL void Yield();

 TVM_DLL int MaxConcurrency();

 TVM_DLL void SetMaxConcurrency(int value);

 TVM_DLL void ResetThreadPool();


 TVM_DLL void Configure(tvm::runtime::threading::ThreadGroup::AffinityMode mode, int nthreads,

                        std::vector<unsigned int> cpus);


 TVM_DLL int32_t NumThreads();


 }  // namespace threading


 template <typename T>

 inline void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end);


 namespace detail {


 // The detailed implementation of `parallel_for_with_threading_backend`.

 // To avoid template expansion, the implementation cannot be placed

 // in .cc files.


 template <typename T>

 struct ParallelForWithThreadingBackendLambdaInvoker {

   static int TVMParallelLambdaInvoke(int task_id, TVMParallelGroupEnv* penv, void* cdata) {

     int num_task = penv->num_task;

     // Convert void* back to lambda type.

     T* lambda_ptr = static_cast<T*>(cdata);

     // Invoke the lambda with the task id (thread id).

     (*lambda_ptr)(task_id, num_task);

     return 0;

   }

 };


 template <typename T>

 inline void parallel_launch_with_threading_backend(T flambda) {

   // Launch the lambda by passing its address.

   void* cdata = &flambda;

   TVMBackendParallelLaunch(ParallelForWithThreadingBackendLambdaInvoker<T>::TVMParallelLambdaInvoke,

                            cdata, /*num_task=*/0);

 }


 }  // namespace detail


 template <typename T>

 inline void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end) {

   if (end - begin == 1) {

     flambda(begin);

     return;

   }


   auto flaunch = [begin, end, flambda](int task_id, int num_task) {

     // For each thread, do static division and call into flambda.

     int64_t total_len = end - begin;

     int64_t step = (total_len + num_task - 1) / num_task;

     int64_t local_begin = std::min(begin + step * task_id, end);

     int64_t local_end = std::min(local_begin + step, end);

     for (int64_t i = local_begin; i < local_end; ++i) {

       flambda(i);

     }

   };

   // Launch with all threads.

   detail::parallel_launch_with_threading_backend(flaunch);

 }


 }  // namespace runtime

 }  // namespace tvm


 #endif  // TVM_RUNTIME_THREADING_BACKEND_H_

c_backend_api.h
TVM runtime backend API.

TVMBackendParallelLaunch
int TVMBackendParallelLaunch(FTVMParallelLambda flambda, void *cdata, int num_task)
Backend function for running parallel jobs.

tvm::runtime::threading::ThreadGroup
A platform-agnostic abstraction for managing a collection of thread pool threads.
Definition: threading_backend.h:63

tvm::runtime::threading::ThreadGroup::Join
void Join()
Blocks until all non-main threads in the pool finish.

tvm::runtime::threading::ThreadGroup::AffinityMode
AffinityMode
Definition: threading_backend.h:88

tvm::runtime::threading::ThreadGroup::kBig
@ kBig
Definition: threading_backend.h:89

tvm::runtime::threading::ThreadGroup::kLittle
@ kLittle
Definition: threading_backend.h:90

tvm::runtime::threading::ThreadGroup::kSpecifyOneCorePerThread
@ kSpecifyOneCorePerThread
Definition: threading_backend.h:92

tvm::runtime::threading::ThreadGroup::kSpecifyThreadShareAllCore
@ kSpecifyThreadShareAllCore
Definition: threading_backend.h:94

tvm::runtime::threading::ThreadGroup::Configure
int Configure(AffinityMode mode, int nthreads, bool exclude_worker0, std::vector< unsigned int > cpus={})
configure the CPU id affinity

tvm::runtime::threading::ThreadGroup::~ThreadGroup
~ThreadGroup()

tvm::runtime::threading::ThreadGroup::ThreadGroup
ThreadGroup(int num_workers, std::function< void(int)> worker_callback, bool exclude_worker0=false)
Creates a collection of threads which run a provided function.

tvm::runtime::threading::Configure
void Configure(tvm::runtime::threading::ThreadGroup::AffinityMode mode, int nthreads, std::vector< unsigned int > cpus)
Configuring the CPU affinity mode for the working threads.

tvm::runtime::threading::NumThreads
int32_t NumThreads()
Get the number of threads being used by the TVM runtime.

tvm::runtime::threading::ResetThreadPool
void ResetThreadPool()
Reset the threads in the pool. All current threads are destroyed and new ones are created.

tvm::runtime::threading::Yield
void Yield()
Platform-agnostic no-op.

tvm::runtime::threading::SetMaxConcurrency
void SetMaxConcurrency(int value)
Setting the maximum number of available cores.

tvm::runtime::threading::MaxConcurrency
int MaxConcurrency()

tvm::runtime::parallel_for_with_threading_backend
void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end)
Definition: threading_backend.h:205

tvm
runtime implementation for LibTorch/TorchScript.
Definition: analyzer.h:36

tvm::min
PrimExpr min(PrimExpr a, PrimExpr b, Span span=Span())
take minimum of two values

TVMParallelGroupEnv
Environment for TVM parallel task.
Definition: c_backend_api.h:119

TVMParallelGroupEnv::num_task
int32_t num_task
total amount of task
Definition: c_backend_api.h:125