Commit 6040b6f9 by Zhao Wu Committed by Wuwei Lin

[ThreadPool] Solve thread transitions issue (#4344)

* [ThreadPool] Solve thread transitions issue

* Use pthread_atfork to avoid master thread affinity be derived by child.

* Code Format

* comment of exclude_worker0_

* set full cpu affinity

* Redundant blank line

* CPPLint

* CPPLint namespace

* CPPLint

* Fix the wrong logic of bind master thread.
parent 3a133550
......@@ -283,6 +283,10 @@ class ThreadPool {
// The SpscTaskQueue only hosts ONE item at a time
queues_.emplace_back(std::unique_ptr<SpscTaskQueue>(new SpscTaskQueue()));
}
const char* exclude_worker0 = getenv("TVM_EXCLUDE_WORKER0");
if (exclude_worker0 && atoi(exclude_worker0) == 0) {
exclude_worker0_ = false;
}
threads_ = std::unique_ptr<tvm::runtime::threading::ThreadGroup>(
new tvm::runtime::threading::ThreadGroup(
num_workers_, [this](int worker_id) { this->RunWorker(worker_id); },
......@@ -369,7 +373,7 @@ class ThreadPool {
int num_workers_;
// number of workers used (can be restricted with affinity pref)
int num_workers_used_;
// if excluding worker 0 and using master to run task 0
// if or not to exclude worker 0 and use master to run task 0
#ifndef _LIBCPP_SGX_CONFIG
bool exclude_worker0_{true};
#else
......
......@@ -133,25 +133,44 @@ class ThreadGroup::Impl {
sizeof(cpu_set_t), &cpuset);
#endif
}
if (exclude_worker0) { // bind the master thread to core 0
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
if (reverse) {
CPU_SET(sorted_order_[sorted_order_.size() - 1], &cpuset);
} else {
CPU_SET(sorted_order_[0], &cpuset);
}
if (exclude_worker0) { // master thread run task
#if defined(__ANDROID__)
sched_setaffinity(pthread_self(),
sizeof(cpu_set_t), &cpuset);
SetFullCpuAffinity();
#else
pthread_setaffinity_np(pthread_self(),
sizeof(cpu_set_t), &cpuset);
// if we set TVM_BIND_MASTER_THREAD to be 1, we will bind master thread
// to core 0.
const char* bind_master_thread = getenv("TVM_BIND_MASTER_THREAD");
if (bind_master_thread && atoi(bind_master_thread) == 1) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
if (reverse) {
CPU_SET(sorted_order_[sorted_order_.size() - 1], &cpuset);
} else {
CPU_SET(sorted_order_[0], &cpuset);
}
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
}
pthread_atfork(nullptr, nullptr, ThreadGroup::Impl::SetFullCpuAffinity);
#endif
}
#endif
}
static void SetFullCpuAffinity() {
#if defined(__linux__) || defined(__ANDROID__)
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
for (unsigned i = 0; i < std::thread::hardware_concurrency(); i++) {
CPU_SET(i, &cpuset);
}
#if defined(__ANDROID__)
sched_setaffinity(pthread_self(), sizeof(cpu_set_t), &cpuset);
#else
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
#endif
#endif
}
void InitSortedOrder() {
unsigned int threads = std::thread::hardware_concurrency();
std::vector<std::pair <unsigned int, int64_t> > max_freqs;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment