api/doxygen/_2workspace_2include_2tvm_2runtime_2threading_backend_8h-example.html

/*

 * Licensed to the Apache Software Foundation (ASF) under one

 * or more contributor license agreements.  See the NOTICE file

 * distributed with this work for additional information

 * regarding copyright ownership.  The ASF licenses this file

 * to you under the Apache License, Version 2.0 (the

 * "License"); you may not use this file except in compliance

 * with the License.  You may obtain a copy of the License at

 *

 *   http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing,

 * software distributed under the License is distributed on an

 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

 * KIND, either express or implied.  See the License for the

 * specific language governing permissions and limitations

 * under the License.

 */


#ifndef TVM_RUNTIME_THREADING_BACKEND_H_

#define TVM_RUNTIME_THREADING_BACKEND_H_


#include <tvm/runtime/c_backend_api.h>


#include <algorithm>

#include <functional>

#include <memory>

#include <vector>


#if defined(__linux__) || defined(__ANDROID__)

#if defined(__ANDROID__)

#ifndef CPU_SET

#define CPU_SETSIZE 1024

#define __NCPUBITS (8 * sizeof(uint64_t))

typedef struct {

  uint64_t __bits[CPU_SETSIZE / __NCPUBITS];

} cpu_set_t;


#define CPU_SET(cpu, cpusetp) \

  ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))

#define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t))

#define CPU_ISSET(cpu, cpusetp)    \

  (1UL << ((cpu) % __NCPUBITS)) == \

      ((cpusetp)->__bits[(cpu) / __NCPUBITS] & (1UL << ((cpu) % __NCPUBITS)))

#define CPU_EQUAL(left, right) (memcmp(&left, &right, sizeof(cpu_set_t)) == 0)


#endif

#endif

#endif


namespace tvm {

namespace runtime {

namespace threading {


class ThreadGroup {

 public:

  class Impl;


  TVM_DLL ThreadGroup(int num_workers, std::function<void(int)> worker_callback,

                      bool exclude_worker0 = false);

  TVM_DLL ~ThreadGroup();


  TVM_DLL void Join();


  enum AffinityMode : int {

    kBig = 1,

    kLittle = -1,

    /*Different threads will get different affinities.*/

    kSpecifyOneCorePerThread = -2,

    /*All threads will get the same core group affinity.*/

    kSpecifyThreadShareAllCore = -3,

  };

  TVM_DLL int Configure(AffinityMode mode, int nthreads, bool exclude_worker0,

                        std::vector<unsigned int> cpus = {});


 private:

  Impl* impl_;

};


TVM_DLL void Yield();

TVM_DLL int MaxConcurrency();

TVM_DLL void SetMaxConcurrency(int value);

TVM_DLL void ResetThreadPool();


TVM_DLL void Configure(tvm::runtime::threading::ThreadGroup::AffinityMode mode, int nthreads,

                       std::vector<unsigned int> cpus);


TVM_DLL int32_t NumThreads();


}  // namespace threading


template <typename T>

inline void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end);


namespace detail {


// The detailed implementation of `parallel_for_with_threading_backend`.

// To avoid template expansion, the implementation cannot be placed

// in .cc files.


template <typename T>

struct ParallelForWithThreadingBackendLambdaInvoker {

  static int TVMParallelLambdaInvoke(int task_id, TVMParallelGroupEnv* penv, void* cdata) {

    int num_task = penv->num_task;

    // Convert void* back to lambda type.

    T* lambda_ptr = static_cast<T*>(cdata);

    // Invoke the lambda with the task id (thread id).

    (*lambda_ptr)(task_id, num_task);

    return 0;

  }

};


template <typename T>

inline void parallel_launch_with_threading_backend(T flambda) {

  // Launch the lambda by passing its address.

  void* cdata = &flambda;

  TVMBackendParallelLaunch(ParallelForWithThreadingBackendLambdaInvoker<T>::TVMParallelLambdaInvoke,

                           cdata, /*num_task=*/0);

}


}  // namespace detail


template <typename T>

inline void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end) {

  if (end - begin == 1) {

    flambda(begin);

    return;

  }


  auto flaunch = [begin, end, flambda](int task_id, int num_task) {

    // For each thread, do static division and call into flambda.

    int64_t total_len = end - begin;

    int64_t step = (total_len + num_task - 1) / num_task;

    int64_t local_begin = std::min(begin + step * task_id, end);

    int64_t local_end = std::min(local_begin + step, end);

    for (int64_t i = local_begin; i < local_end; ++i) {

      flambda(i);

    }

  };

  // Launch with all threads.

  detail::parallel_launch_with_threading_backend(flaunch);

}


}  // namespace runtime

}  // namespace tvm


#endif  // TVM_RUNTIME_THREADING_BACKEND_H_

c_backend_api.h
TVM runtime backend API.

TVMBackendParallelLaunch
int TVMBackendParallelLaunch(FTVMParallelLambda flambda, void *cdata, int num_task)
Backend function for running parallel jobs.

tvm::runtime::threading::ThreadGroup::Join
void Join()
Blocks until all non-main threads in the pool finish.

tvm::runtime::threading::ThreadGroup::AffinityMode
AffinityMode
Definition: threading_backend.h:88

tvm::runtime::threading::ThreadGroup::kBig
@ kBig
Definition: threading_backend.h:89

tvm::runtime::threading::ThreadGroup::kLittle
@ kLittle
Definition: threading_backend.h:90

tvm::runtime::threading::ThreadGroup::kSpecifyOneCorePerThread
@ kSpecifyOneCorePerThread
Definition: threading_backend.h:92

tvm::runtime::threading::ThreadGroup::kSpecifyThreadShareAllCore
@ kSpecifyThreadShareAllCore
Definition: threading_backend.h:94

tvm::runtime::threading::ThreadGroup::Configure
int Configure(AffinityMode mode, int nthreads, bool exclude_worker0, std::vector< unsigned int > cpus={})
configure the CPU id affinity

tvm::runtime::threading::ThreadGroup::~ThreadGroup
~ThreadGroup()

tvm::runtime::threading::ThreadGroup::ThreadGroup
ThreadGroup(int num_workers, std::function< void(int)> worker_callback, bool exclude_worker0=false)
Creates a collection of threads which run a provided function.

tvm::runtime::threading::Configure
void Configure(tvm::runtime::threading::ThreadGroup::AffinityMode mode, int nthreads, std::vector< unsigned int > cpus)
Configuring the CPU affinity mode for the working threads.

tvm::runtime::threading::NumThreads
int32_t NumThreads()
Get the number of threads being used by the TVM runtime.

tvm::runtime::threading::ResetThreadPool
void ResetThreadPool()
Reset the threads in the pool. All current threads are destroyed and new ones are created.

tvm::runtime::threading::Yield
void Yield()
Platform-agnostic no-op.

tvm::runtime::threading::SetMaxConcurrency
void SetMaxConcurrency(int value)
Setting the maximum number of available cores.

tvm::runtime::threading::MaxConcurrency
int MaxConcurrency()

tvm::runtime::parallel_for_with_threading_backend
void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end)
Definition: threading_backend.h:205

tvm
Performance counters for profiling via the PAPI library.
Definition: analyzer.h:36

tvm::min
PrimExpr min(PrimExpr a, PrimExpr b, Span span=Span())
take minimum of two values

TVMParallelGroupEnv
Environment for TVM parallel task.
Definition: c_backend_api.h:119

TVMParallelGroupEnv::num_task
int32_t num_task
total amount of task
Definition: c_backend_api.h:125
flambda	The lambda to be executed in parallel. It should have the signature "void (int i)".
begin	The start index of this parallel loop (inclusive).
end	The end index of this parallel loop (exclusive).