일상 코딩
[C++/CPP] 19.06. multi-threading Example inner product 벡터 내적을 통한 멀티 쓰레딩 예제 본문
C++/따배C++ 19강 모던 C++ 기능들
[C++/CPP] 19.06. multi-threading Example inner product 벡터 내적을 통한 멀티 쓰레딩 예제
polarcompass 2021. 12. 16. 07:27728x90
#include<chrono>
#include<iostream>
#include<mutex>
#include<random>
#include<thread>
#include<utility>
#include<vector>
#include<atomic>
#include<future>
#include<numeric>
#include<algorithm>
#include<execution>
using namespace std;
mutex mtx;
void dotProductNaive(const vector<int> &v0, const vector<int> &v1,
const unsigned &&i_start, const unsigned &&i_end, unsigned long long &sum)
{
for (unsigned i = i_start; i < i_end; ++i)
sum += v0[i] * v1[i];
}
void dotProductLock(const vector<int> &v0, const vector<int> &v1,
const unsigned i_start, const unsigned i_end, unsigned long long &sum)
{
// std::scoped_lock lock(mtx); // C++17
for (unsigned i = i_start; i < i_end; ++i)
{
std::scoped_lock lock(mtx); // C++17
sum += v0[i] * v1[i];
}
}
void dotProductAtomic(const vector<int> &v0, const vector<int> &v1,
const unsigned i_start, const unsigned i_end, atomic<unsigned long long> &sum)
{
// std::scoped_lock lock(mtx); // C++17
for (unsigned i = i_start; i < i_end; ++i)
{
sum += v0[i] * v1[i];
}
}
auto dotProductFuture(const vector<int> &v0, const vector<int> &v1,
const unsigned i_start, const unsigned i_end)
{
int sum = 0; // local sum
for (unsigned i = i_start; i < i_end; ++i)
{
sum += v0[i] * v1[i];
}
return sum;
}
int main()
{
const long long n_data = 100000000;
const unsigned n_threads = 4;
//initialize vectors
std::vector<int> v0, v1;
v0.reserve(n_data);
v1.reserve(n_data);
// random number
random_device seed;
mt19937 engine(seed());
uniform_int_distribution<> uniformDist(1, 10);
for (long long i = 0; i < n_data; ++i)
{
v0.push_back(uniformDist(engine));
v1.push_back(uniformDist(engine));
}
std::cout << "std:;inner_product" << endl;
{
const auto sta = chrono::steady_clock::now();
const auto sum = std::inner_product(v0.begin(), v0.end(), v1.begin(), 0ull);
const chrono::duration<double> dur = chrono::steady_clock::now() - sta;
cout << dur.count() << endl;
cout << sum << endl;
cout << endl;
}
// 답이 안 맞음.
cout << "Naive" << endl;
{
const auto sta = chrono::steady_clock::now();
unsigned long long sum = 0;
vector<thread> threads;
threads.resize(n_threads);
const unsigned n_per_thread = n_data / n_threads; ;; // assume remainder = 0
// thread 배정
for(unsigned t=0; t < n_threads; ++t)
{
threads[t] = std::thread(dotProductNaive, std::ref(v0), std::ref(v1),
t * n_per_thread, (t+1) * n_per_thread, std::ref(sum));
}
// join 배정
for ( unsigned t = 0; t < n_threads; ++t)
{
threads[t].join();
}
const chrono::duration<double> dur = chrono::steady_clock::now() - sta;
cout << dur.count() << endl;
cout << sum << endl;
cout << endl;
}
cout << "Lock guard" << endl;
// 여기까진 thread 실패 사례
{
const auto sta = chrono::steady_clock::now();
unsigned long long sum = 0;
vector<thread> threads;
threads.resize(n_threads);
const unsigned n_per_thread = n_data / n_threads; ;; // assume remainder = 0
// thread 배정
for(unsigned t=0; t < n_threads; ++t)
{
threads[t] = std::thread(dotProductLock, std::ref(v0), std::ref(v1),
t * n_per_thread, (t+1) * n_per_thread, std::ref(sum));
}
// join 배정
for ( unsigned t = 0; t < n_threads; ++t)
{
threads[t].join();
}
const chrono::duration<double> dur = chrono::steady_clock::now() - sta;
cout << dur.count() << endl;
cout << sum << endl;
cout << endl;
}
cout << "Atomic" << endl;
// 빈번하게 호출되는 sum 변수에 Atomic을 대입하면
// 느려지게 된다.
{
atomic<unsigned long long> sum = 0;
vector<thread> threads;
threads.resize(n_threads);
const auto sta = chrono::steady_clock::now();
const unsigned n_per_thread = n_data / n_threads; ;; // assume remainder = 0
// thread 배정
for(unsigned t=0; t < n_threads; ++t)
{
threads[t] = std::thread(dotProductAtomic, std::ref(v0), std::ref(v1),
t * n_per_thread, (t+1) * n_per_thread, std::ref(sum));
}
// join 배정
for ( unsigned t = 0; t < n_threads; ++t)
{
threads[t].join();
}
const chrono::duration<double> dur = chrono::steady_clock::now() - sta;
cout << dur.count() << endl;
cout << sum << endl;
cout << endl;
}
cout << "Task base, future, promise" << endl;
{
unsigned long long sum = 0;
vector<std::future<int>> futures;
futures.resize(n_threads);
const auto sta = chrono::steady_clock::now();
const unsigned n_per_thread = n_data / n_threads; ;; // assume remainder = 0
// thread 배정
// 연산 함수에서 local 변수로 sum을 사용하기에
// 각각의 thread에서 따로 따로 계산 후
// 마지막에 get()으로 계산 결과 전부 취합하는 방법을 쓴다.
for(unsigned t=0; t < n_threads; ++t)
{
futures[t] = std::async(dotProductFuture, std::ref(v0), std::ref(v1),
t * n_per_thread, (t+1) * n_per_thread);
}
// join 배정
// 계산결과 get()으로 전부 취합.
for ( unsigned t = 0; t < n_threads; ++t)
{
sum += futures[t].get();
}
const chrono::duration<double> dur = chrono::steady_clock::now() - sta;
cout << dur.count() << endl;
cout << sum << endl;
cout << endl;
}
cout << "std::transform_reduce" << endl;
{
const auto sta = chrono::steady_clock::now();
const auto sum = std::transform_reduce(std::execution::par, v0.begin(), v0.end(), v1.begin(), 0ull);
const chrono::duration<double> dur = chrono::steady_clock::now() - sta;
cout << dur.count() << endl;
cout << sum << endl;
cout << endl;
}
return 0;
}
728x90
'C++ > 따배C++ 19강 모던 C++ 기능들' 카테고리의 다른 글
[C++/CPP] 19.08. 자료형 추론 type inferance, auto decltype (0) | 2022.01.02 |
---|---|
[C++/CPP] 19.07. Perfect forwarding std::forward (0) | 2021.12.30 |
[C++/CPP] 19.05 Task base, async, future, promise 사용법 (0) | 2021.12.15 |
[C++/CPP] 19.04 Race Condition and std::atomic, std::scoped_lock (0) | 2021.12.12 |
[C++/CPP] 19.03 std::thread와 멀티 쓰레딩 기초 (0) | 2021.12.12 |