一个简单的MPI并行计算程序及性能分析

安装MPICH并编译quad_mpi.c

本实验所用的平台为Ubuntu 18.04.3 LTS,Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz × 32。本试验用于计算并行计算积分时,数值积分与解析积分之间的误差。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# 首先创建目录用于下载并保存MPI源码
mkdir mpich-3.1
cd mpich-3.1
mkdir src
wget http://www.mpich.org/static/downloads/3.1.4/mpich-3.1.4.tar.gz
sudo tar -zxvf mpich-3.1.4.tar.gz

# 指定安装文件夹并编译安装
cd mpich-3.1.4
sudo ./configure -prefix=/home/zsh/mpich-3.1
sudo make
sudo make install

# 打开~/.bashrc并在文件最后添加以下环境变量
export MPI_ROOT=/home/zsh/mpich-3.1
export PATH=$MPI_ROOT/bin:$PATH
export MANPATH=$MPI_ROOT/man:$MANPATH

# 更新配置文件使其生效
source .bashrc

# 检查mpicc和mpirun是否加入到环境变量中
which mpicc
which mpirun

# 编译quad_mpi.c文件
mpicc quad_mpi.c –o quad_mpi

使用不同数目的处理器核心运行代码并记录时间

1
2
3
4
# 分别使用4, 16, 32个处理器核心运行代码
mpirun -np 4 ./quad_mpi
mpirun -np 16 ./quad_mpi
mpirun -np 32 ./quad_mpi

可以看到,当处理器个数分别为4,16,32个时,程序的运行时间分别为0.169s, 0.029s, 0.081s,当处理器个数最多的时候,程序的运行时间反而上升。 因此并不是处理器个数越多,程序运行的就越快,多核间的通信也需要耗时,具体的应用场景需要具体分析。

改变离散点个数,观察误差与离散点数目之间的关系

将离散点个数n分别为100000(缩小100倍),10000000(不变)以及1000000000(扩大100倍),使用4个核进行计算,并记录数值积分解与解析解之间的误差,结果如下:

从结果可得,实验所得误差分别为7.8e-04, 7.8e-06, 7.8e-08,随离散点数目的增加而降低。

更换被积函数,比较数值积分与解析积分之间的误差

将被积函数替换为 $f(x) = 66 / (pi ( 66 x x x + 66 ) )$,积分上限$A=0$,积分下限$B=66$。使用4个处理器核心进行计算,且离散点个数设为10000000,结果如下:

可以看到,数值积分与解析积分之间的误差为9.35e-07。

quad_mpi.c 代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# include <math.h>
# include <mpi.h>
# include <stdio.h>
# include <stdlib.h>
# include <time.h>

int main ( int argc, char *argv[] );
double f ( double x );
void timestamp ( );

/******************************************************************************/

int main ( int argc, char *argv[] )

/******************************************************************************/
/*
Purpose:

MAIN is the main program for QUAD_MPI.

Licensing:

This code is distributed under the GNU LGPL license.

Modified:

19 July 2010

Author:

John Burkardt
*/
{
double a;
double b;
double error;
double exact;
int i;
int master = 0;
double my_a;
double my_b;
int my_id;
int my_n;
double my_total;
int n;
int p;
int p_num;
int source;
MPI_Status status;
int tag;
int target;
double total;
double wtime;
double x;

a = 0.0;
b = 10.0;
n = 10000000;
exact = 0.49936338107645674464;
/*
Initialize MPI.
*/
MPI_Init ( &argc, &argv );
/*
Get this processor's ID.
*/
MPI_Comm_rank ( MPI_COMM_WORLD, &my_id );
/*
Get the number of processes.
*/
MPI_Comm_size ( MPI_COMM_WORLD, &p_num );

if ( my_id == master )
{
/*
We want N to be the total number of evaluations.
If necessary, we adjust N to be divisible by the number of processes.
*/
my_n = n / ( p_num - 1 );
n = ( p_num - 1 ) * my_n;

wtime = MPI_Wtime ( );

timestamp ( );
printf ( "\n" );
printf ( "QUAD_MPI\n" );
printf ( " C/MPI version\n" );
printf ( " Estimate an integral of f(x) from A to B.\n" );
printf ( " f(x) = 50 / (pi * ( 2500 * x * x + 1 ) )\n" );
printf ( "\n" );
printf ( " A = %f\n", a );
printf ( " B = %f\n", b );
printf ( " N = %d\n", n );
printf ( " EXACT = %24.16f\n", exact );
printf ( "\n" );
printf ( " Use MPI to divide the computation among\n" );
printf ( " multiple processes.\n" );
}

source = master;
MPI_Bcast ( &my_n, 1, MPI_INT, source, MPI_COMM_WORLD );
/*
Process 0 assigns each process a subinterval of [A,B].
*/
if ( my_id == master )
{
for ( p = 1; p <= p_num - 1; p++ )
{
my_a = ( ( double ) ( p_num - p ) * a
+ ( double ) ( p - 1 ) * b )
/ ( double ) ( p_num - 1 );

target = p;
tag = 1;
MPI_Send ( &my_a, 1, MPI_DOUBLE, target, tag, MPI_COMM_WORLD );

my_b = ( ( double ) ( p_num - p - 1 ) * a
+ ( double ) ( p ) * b )
/ ( double ) ( p_num - 1 );

target = p;
tag = 2;
MPI_Send ( &my_b, 1, MPI_DOUBLE, target, tag, MPI_COMM_WORLD );
}
total = 0.0;
my_total = 0.0;
}
/*
Processes receive MY_A, MY_B, and compute their part of the integral.
*/
else
{
source = master;
tag = 1;
MPI_Recv ( &my_a, 1, MPI_DOUBLE, source, tag, MPI_COMM_WORLD, &status );

source = master;
tag = 2;
MPI_Recv ( &my_b, 1, MPI_DOUBLE, source, tag, MPI_COMM_WORLD, &status );

my_total = 0.0;
for ( i = 1; i <= my_n; i++ )
{
x = ( ( double ) ( my_n - i ) * my_a
+ ( double ) ( i - 1 ) * my_b )
/ ( double ) ( my_n - 1 );
my_total = my_total + f ( x );
}

my_total = ( my_b - my_a ) * my_total / ( double ) ( my_n );

printf ( " Process %d contributed MY_TOTAL = %f\n", my_id, my_total );
}
/*
Each process sends its value to the master process.
*/
MPI_Reduce ( &my_total, &total, 1, MPI_DOUBLE, MPI_SUM, master, MPI_COMM_WORLD );
/*
Compute the weighted estimate.
*/
if ( my_id == master )
{
error = fabs ( total - exact );
wtime = MPI_Wtime ( ) - wtime;

printf ( "\n" );
printf ( " Estimate = %24.16f\n", total );
printf ( " Error = %e\n\n", error );
printf ( " Time = %f\n\n", wtime );
}
/*
Terminate MPI.
*/
MPI_Finalize ( );
/*
Terminate.
*/
if ( my_id == master )
{
printf ( "\n" );
printf ( "QUAD_MPI:\n" );
printf ( " Normal end of execution.\n" );
printf ( "\n" );
timestamp ( );
}

return 0;
}
/******************************************************************************/

double f ( double x )

/******************************************************************************/
/*
Purpose:

F evaluates the function.
*/
{
double pi;
double value;

pi = 3.141592653589793;
value = 50.0 / ( pi * ( 2500.0 * x * x + 1.0 ) );

return value;
}
/******************************************************************************/

void timestamp ( void )

/******************************************************************************/
/*
Purpose:

TIMESTAMP prints the current YMDHMS date as a time stamp.

Example:

31 May 2001 09:45:54 AM

Licensing:

This code is distributed under the GNU LGPL license.

Modified:

24 September 2003

Author:

John Burkardt

Parameters:

None
*/
{
# define TIME_SIZE 40

static char time_buffer[TIME_SIZE];
const struct tm *tm;
time_t now;

now = time ( NULL );
tm = localtime ( &now );

strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );

printf ( "%s\n", time_buffer );

return;
# undef TIME_SIZE
}