天天看點

梯形積分法【OpenMP實作】多個版本

簡述

會用多個版本來寫。操作是用指令行來操作。

除了第二版本是在内容上基于第一個版本的完善,其他都是降低算法的複雜程度的,可以放心閱讀

文章目錄

    • 簡述
    • 版本1
    • 版本2
    • 版本3
    • 版本4

版本1

  • 要求n被thread_count整除 不然可以會有計算錯誤。
#include <iostream>
#include <omp.h>

#define FUN(x) (x * x)

using namespace std;
#pragma warning(disable : 4996)
void Trap(double a, double b, int n, double *global_result_p);
int main(int argc, char **argv) {
	if (argc < 5) return 0;
	double global_result = 0.0;
	double a, b;
	int n;
	int thread_count = strtol(argv[1], NULL, 10);
	a = strtod(argv[2], NULL);
	b = strtod(argv[3], NULL);
	n = strtol(argv[4], NULL, 10);

	// n 必須被 thread_count 整除
#pragma omp parallel num_threads (thread_count)
	Trap(a, b, n, &global_result);

	cout << "With n = " << n << " trapzoids, our estimate\n";
	cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}

void Trap(double a, double b, int n, double *global_result_p) {
	double h, x, my_result;
	double local_a, local_b;
	int i, local_n;
	int my_rank = omp_get_thread_num();
	int thread_count = omp_get_num_threads();

	h = (b - a) / n;
	local_n = n / thread_count;  // same in all threads
	local_a = a + my_rank * local_n * h;
	local_b = local_a + local_n * h;
	x = local_a; // initial x
	my_result = (FUN(local_a) + FUN(local_b)) / 2;
	for (i = 1; i < local_n; ++i) {
		x += h;
		my_result += FUN(x);
	}
	my_result *= h;
# pragma omp critical 
	*global_result_p += my_result;
}
           

編譯好源檔案之後,就用下面的指令執行

  • 這裡是用4個線程,來計算在0,1區間上的 x 2 x^2 x2的用n=64劃分的梯形積分公式。結果接近1/3 可以通過提高n來提高精度。但這個版本隻能計算n為thread_count的整數倍的時候
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 64
With n = 64 trapzoids, our estimate
of the integral from 0 to 1 = 0.333374
           

版本2

  • 可以使用不被thread_count整除的n
  • 改進代碼: 添加了一個變量left表示剩餘。剩餘的讓前left個線程每個再多算點。(一般來說線程數目都會比n要小很多。(不然在算法上就會不精确(n不夠大的話)。))
left = n % thread_count;
if (my_rank < left) local_n += 1;
if (my_rank < left){ local_a = a + my_rank * local_n * h; }
else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }
           

實際的代碼:

#include <iostream>
#include <omp.h>

#define FUN(x) (x * x)

using namespace std;
#pragma warning(disable : 4996)
void Trap(double a, double b, int n, double *global_result_p);
int main(int argc, char **argv) {
	if (argc < 5) return 0;
	double global_result = 0.0;
	double a, b;
	int n;
	int thread_count = strtol(argv[1], NULL, 10);
	a = strtod(argv[2], NULL);
	b = strtod(argv[3], NULL);
	n = strtol(argv[4], NULL, 10);

#pragma omp parallel num_threads (thread_count)
	Trap(a, b, n, &global_result);

	cout << "With n = " << n << " trapzoids, our estimate\n";
	cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}

void Trap(double a, double b, int n, double *global_result_p) {
	double h, x, my_result;
	double local_a, local_b;
	int i, local_n, left;
	int my_rank = omp_get_thread_num();
	int thread_count = omp_get_num_threads();

	h = (b - a) / n;

	local_n = n / thread_count;  // same in all threads
	left = n % thread_count;
	if (my_rank < left) local_n += 1;
	if (my_rank < left){ local_a = a + my_rank * local_n * h; }
	else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }

	local_b = local_a + local_n * h;
	x = local_a; // initial x
	my_result = (FUN(local_a) + FUN(local_b)) / 2;
	for (i = 1; i < local_n; ++i) {
		x += h;
		my_result += FUN(x);
	}
	my_result *= h;
# pragma omp critical 
	*global_result_p += my_result;
}
           

發現精度不斷的提升,在隻改變n的數目的情況下。

PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 64
With n = 64 trapzoids, our estimate
of the integral from 0 to 1 = 0.333374
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 65
With n = 65 trapzoids, our estimate
of the integral from 0 to 1 = 0.333373
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 66
With n = 66 trapzoids, our estimate
of the integral from 0 to 1 = 0.333372
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 67
With n = 67 trapzoids, our estimate
of the integral from 0 to 1 = 0.33337
           

版本3

  • 對于部分對指針不太熟的人來說,我們可以做出下面的改進
  • 修改部分:
#pragma omp parallel num_threads (thread_count)
	{
		double my_result = Trap(a, b, n);
#pragma omp critical 
		global_result += my_result;
	}
           
#include <iostream>
#include <omp.h>

#define FUN(x) (x * x)

using namespace std;
#pragma warning(disable : 4996)
double Trap(double a, double b, int n);
int main(int argc, char **argv) {
	if (argc < 5) return 0;
	double global_result = 0.0;
	double a, b;
	int n;
	int thread_count = strtol(argv[1], NULL, 10);
	a = strtod(argv[2], NULL);
	b = strtod(argv[3], NULL);
	n = strtol(argv[4], NULL, 10);


#pragma omp parallel num_threads (thread_count)
	{
		double my_result = Trap(a, b, n);
#pragma omp critical 
		global_result += my_result;
	}

	cout << "With n = " << n << " trapzoids, our estimate\n";
	cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}

double Trap(double a, double b, int n) {
	double h, x, my_result;
	double local_a, local_b;
	int i, local_n, left;
	int my_rank = omp_get_thread_num();
	int thread_count = omp_get_num_threads();

	h = (b - a) / n;

	local_n = n / thread_count;  // same in all threads
	left = n % thread_count;
	if (my_rank < left) local_n += 1;
	if (my_rank < left){ local_a = a + my_rank * local_n * h; }
	else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }

	local_b = local_a + local_n * h;
	x = local_a; // initial x
	my_result = (FUN(local_a) + FUN(local_b)) / 2;
	for (i = 1; i < local_n; ++i) {
		x += h;
		my_result += FUN(x);
	}
	my_result *= h;
	return my_result;
}
           

結果

PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 67
With n = 67 trapzoids, our estimate
of the integral from 0 to 1 = 0.33337
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 68
With n = 68 trapzoids, our estimate
of the integral from 0 to 1 = 0.333369
PS D:\C++\VS\repo\OpenMP-TEST\Debug>
           

版本4

  • 使用規約變量和規約操作
  • 會讓代碼更加簡單
#pragma omp parallel num_threads (thread_count) reduction(+:global_result)
	{
		global_result += Trap(a, b, n);
	}
           
  • 跟版本3是一樣的。就是讓代碼更加簡單了而已,用了map-reduce的思路
  • reduction(<operator>: <variable list>)

    前面的放操作符,後面的設定變量。
  • 但是有需要注意的,如果是減法就需要小心了。在每個線程上确實是使用了減法,但是全局上也是可能做了減法,(例如

    -1 - (-5)= 4

    )這樣的事情是有可能發生的。一般來說,滿足交換律的都不用擔心這點hh
梯形積分法【OpenMP實作】多個版本

繼續閱讀