簡述
會用多個版本來寫。操作是用指令行來操作。
除了第二版本是在内容上基于第一個版本的完善,其他都是降低算法的複雜程度的,可以放心閱讀
文章目錄
-
- 簡述
- 版本1
- 版本2
- 版本3
- 版本4
版本1
- 要求n被thread_count整除 不然可以會有計算錯誤。
#include <iostream>
#include <omp.h>
#define FUN(x) (x * x)
using namespace std;
#pragma warning(disable : 4996)
void Trap(double a, double b, int n, double *global_result_p);
int main(int argc, char **argv) {
if (argc < 5) return 0;
double global_result = 0.0;
double a, b;
int n;
int thread_count = strtol(argv[1], NULL, 10);
a = strtod(argv[2], NULL);
b = strtod(argv[3], NULL);
n = strtol(argv[4], NULL, 10);
// n 必須被 thread_count 整除
#pragma omp parallel num_threads (thread_count)
Trap(a, b, n, &global_result);
cout << "With n = " << n << " trapzoids, our estimate\n";
cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}
void Trap(double a, double b, int n, double *global_result_p) {
double h, x, my_result;
double local_a, local_b;
int i, local_n;
int my_rank = omp_get_thread_num();
int thread_count = omp_get_num_threads();
h = (b - a) / n;
local_n = n / thread_count; // same in all threads
local_a = a + my_rank * local_n * h;
local_b = local_a + local_n * h;
x = local_a; // initial x
my_result = (FUN(local_a) + FUN(local_b)) / 2;
for (i = 1; i < local_n; ++i) {
x += h;
my_result += FUN(x);
}
my_result *= h;
# pragma omp critical
*global_result_p += my_result;
}
編譯好源檔案之後,就用下面的指令執行
- 這裡是用4個線程,來計算在0,1區間上的 x 2 x^2 x2的用n=64劃分的梯形積分公式。結果接近1/3 可以通過提高n來提高精度。但這個版本隻能計算n為thread_count的整數倍的時候
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 64
With n = 64 trapzoids, our estimate
of the integral from 0 to 1 = 0.333374
版本2
- 可以使用不被thread_count整除的n
- 改進代碼: 添加了一個變量left表示剩餘。剩餘的讓前left個線程每個再多算點。(一般來說線程數目都會比n要小很多。(不然在算法上就會不精确(n不夠大的話)。))
left = n % thread_count;
if (my_rank < left) local_n += 1;
if (my_rank < left){ local_a = a + my_rank * local_n * h; }
else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }
實際的代碼:
#include <iostream>
#include <omp.h>
#define FUN(x) (x * x)
using namespace std;
#pragma warning(disable : 4996)
void Trap(double a, double b, int n, double *global_result_p);
int main(int argc, char **argv) {
if (argc < 5) return 0;
double global_result = 0.0;
double a, b;
int n;
int thread_count = strtol(argv[1], NULL, 10);
a = strtod(argv[2], NULL);
b = strtod(argv[3], NULL);
n = strtol(argv[4], NULL, 10);
#pragma omp parallel num_threads (thread_count)
Trap(a, b, n, &global_result);
cout << "With n = " << n << " trapzoids, our estimate\n";
cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}
void Trap(double a, double b, int n, double *global_result_p) {
double h, x, my_result;
double local_a, local_b;
int i, local_n, left;
int my_rank = omp_get_thread_num();
int thread_count = omp_get_num_threads();
h = (b - a) / n;
local_n = n / thread_count; // same in all threads
left = n % thread_count;
if (my_rank < left) local_n += 1;
if (my_rank < left){ local_a = a + my_rank * local_n * h; }
else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }
local_b = local_a + local_n * h;
x = local_a; // initial x
my_result = (FUN(local_a) + FUN(local_b)) / 2;
for (i = 1; i < local_n; ++i) {
x += h;
my_result += FUN(x);
}
my_result *= h;
# pragma omp critical
*global_result_p += my_result;
}
發現精度不斷的提升,在隻改變n的數目的情況下。
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 64
With n = 64 trapzoids, our estimate
of the integral from 0 to 1 = 0.333374
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 65
With n = 65 trapzoids, our estimate
of the integral from 0 to 1 = 0.333373
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 66
With n = 66 trapzoids, our estimate
of the integral from 0 to 1 = 0.333372
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 67
With n = 67 trapzoids, our estimate
of the integral from 0 to 1 = 0.33337
版本3
- 對于部分對指針不太熟的人來說,我們可以做出下面的改進
- 修改部分:
#pragma omp parallel num_threads (thread_count)
{
double my_result = Trap(a, b, n);
#pragma omp critical
global_result += my_result;
}
#include <iostream>
#include <omp.h>
#define FUN(x) (x * x)
using namespace std;
#pragma warning(disable : 4996)
double Trap(double a, double b, int n);
int main(int argc, char **argv) {
if (argc < 5) return 0;
double global_result = 0.0;
double a, b;
int n;
int thread_count = strtol(argv[1], NULL, 10);
a = strtod(argv[2], NULL);
b = strtod(argv[3], NULL);
n = strtol(argv[4], NULL, 10);
#pragma omp parallel num_threads (thread_count)
{
double my_result = Trap(a, b, n);
#pragma omp critical
global_result += my_result;
}
cout << "With n = " << n << " trapzoids, our estimate\n";
cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}
double Trap(double a, double b, int n) {
double h, x, my_result;
double local_a, local_b;
int i, local_n, left;
int my_rank = omp_get_thread_num();
int thread_count = omp_get_num_threads();
h = (b - a) / n;
local_n = n / thread_count; // same in all threads
left = n % thread_count;
if (my_rank < left) local_n += 1;
if (my_rank < left){ local_a = a + my_rank * local_n * h; }
else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }
local_b = local_a + local_n * h;
x = local_a; // initial x
my_result = (FUN(local_a) + FUN(local_b)) / 2;
for (i = 1; i < local_n; ++i) {
x += h;
my_result += FUN(x);
}
my_result *= h;
return my_result;
}
結果
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 67
With n = 67 trapzoids, our estimate
of the integral from 0 to 1 = 0.33337
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 68
With n = 68 trapzoids, our estimate
of the integral from 0 to 1 = 0.333369
PS D:\C++\VS\repo\OpenMP-TEST\Debug>
版本4
- 使用規約變量和規約操作
- 會讓代碼更加簡單
#pragma omp parallel num_threads (thread_count) reduction(+:global_result)
{
global_result += Trap(a, b, n);
}
- 跟版本3是一樣的。就是讓代碼更加簡單了而已,用了map-reduce的思路
-
前面的放操作符,後面的設定變量。reduction(<operator>: <variable list>)
- 但是有需要注意的,如果是減法就需要小心了。在每個線程上确實是使用了減法,但是全局上也是可能做了減法,(例如
)這樣的事情是有可能發生的。一般來說,滿足交換律的都不用擔心這點hh-1 - (-5)= 4