1 : |
dalvarez |
14 |
/******************************************************************************/
|
2 : |
|
|
/* */
|
3 : |
|
|
/* Copyright (c) 2008, 2009, 2010 */
|
4 : |
|
|
/* Computer Architecture Group (CAG) */
|
5 : |
|
|
/* University of A Coruña, Spain */
|
6 : |
|
|
/* (http://gac.des.udc.es) */
|
7 : |
|
|
/* Galicia Supercomputing Center (CESGA) */
|
8 : |
|
|
/* (http://www.cesga.es) */
|
9 : |
|
|
/* Hewlett-Packard Spain (HP) */
|
10 : |
|
|
/* (http://www.hp.es) */
|
11 : |
|
|
/* */
|
12 : |
|
|
/* This file is part of UPC Operations Microbenchmarking Suite (UOMS). */
|
13 : |
|
|
/* */
|
14 : |
|
|
/* UOMS is free software: you can redistribute it and/or modify */
|
15 : |
|
|
/* it under the terms of the GNU Lesser General Public License as published */
|
16 : |
|
|
/* by the Free Software Foundation, either version 3 of the License, or */
|
17 : |
|
|
/* (at your option) any later version. */
|
18 : |
|
|
/* */
|
19 : |
|
|
/* UOMS is distributed in the hope that it will be useful, */
|
20 : |
|
|
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
|
21 : |
|
|
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
22 : |
|
|
/* GNU Lesser General Public License for more details. */
|
23 : |
|
|
/* */
|
24 : |
|
|
/* You should have received a copy of the GNU Lesser General Public License */
|
25 : |
|
|
/* along with UOMS. If not, see <http://www.gnu.org/licenses/>. */
|
26 : |
|
|
/* */
|
27 : |
|
|
/******************************************************************************/
|
28 : |
dalvarez |
1 |
|
29 : |
dalvarez |
14 |
/******************************************************************************/
|
30 : |
|
|
/* */
|
31 : |
|
|
/* FUNDING: This development has been funded by Hewlett-Packard Spain */
|
32 : |
|
|
/* */
|
33 : |
|
|
/* Project Name: */
|
34 : |
|
|
/* UPCHACO (2008-2011) */
|
35 : |
|
|
/* Subproject: */
|
36 : |
|
|
/* Improving UPC Usability and Performance in Constellation Systems: */
|
37 : |
|
|
/* Implementation/Extensions of UPC Libraries. */
|
38 : |
|
|
/* (UPCPUProject -> UPC Performance and Usability Project) */
|
39 : |
|
|
/* */
|
40 : |
|
|
/******************************************************************************/
|
41 : |
dalvarez |
1 |
|
42 : |
dalvarez |
14 |
/******************************************************************************
|
43 : |
dalvarez |
1 |
|
44 : |
|
|
For further documentation, see
|
45 : |
|
|
|
46 : |
|
|
[1] Files under doc/
|
47 : |
|
|
|
48 : |
dalvarez |
14 |
*******************************************************************************/
|
49 : |
dalvarez |
1 |
|
50 : |
|
|
#include <stdio.h>
|
51 : |
|
|
#include <time.h>
|
52 : |
|
|
#include <sys/utsname.h>
|
53 : |
|
|
#include "defines.h" |
54 : |
|
|
#include "headers.h" |
55 : |
|
|
#include "timers/timers.h" |
56 : |
|
|
|
57 : |
|
|
extern FILE* unit;
|
58 : |
|
|
extern int cache_invalidation;
|
59 : |
|
|
extern int warmup;
|
60 : |
|
|
extern char * char_reduce_op;
|
61 : |
|
|
extern char * char_sync_mode;
|
62 : |
|
|
extern int num_sizes;
|
63 : |
|
|
extern long *sizes;
|
64 : |
|
|
extern char * valid_bms[NUM_BMS];
|
65 : |
|
|
extern int num_bms;
|
66 : |
|
|
extern int *bm_list;
|
67 : |
dalvarez |
15 |
extern int *bm_list;
|
68 : |
|
|
extern uint64_t timeLimit;
|
69 : |
|
|
extern int time_limit_set;
|
70 : |
dalvarez |
1 |
|
71 : |
|
|
/*
|
72 : |
|
|
Gets the aggreated bandwidth factor
|
73 : |
|
|
*/
|
74 : |
|
|
double fAggreatedBw(int operation_code){
|
75 : |
|
|
|
76 : |
|
|
double factor=1.0;
|
77 : |
|
|
|
78 : |
|
|
switch (operation_code) {
|
79 : |
dalvarez |
14 |
case FORALL_R:
|
80 : |
|
|
case FORALL_W:
|
81 : |
|
|
case FORALL_RW:
|
82 : |
|
|
case FOR_R:
|
83 : |
|
|
case FOR_W:
|
84 : |
|
|
case FOR_RW:
|
85 : |
dalvarez |
1 |
case BROADCAST:
|
86 : |
dalvarez |
14 |
case SCATTER:
|
87 : |
|
|
case GATHER:
|
88 : |
|
|
case PERMUTE:
|
89 : |
dalvarez |
1 |
factor = THREADS;
|
90 : |
|
|
break;
|
91 : |
|
|
case EXCHANGE:
|
92 : |
|
|
factor = THREADS*THREADS;
|
93 : |
|
|
break;
|
94 : |
dalvarez |
14 |
case GATHERALL:
|
95 : |
dalvarez |
1 |
factor = THREADS+THREADS*THREADS;
|
96 : |
|
|
break;
|
97 : |
|
|
default:
|
98 : |
|
|
factor = 1.0;
|
99 : |
|
|
break;
|
100 : |
|
|
}
|
101 : |
|
|
return factor;
|
102 : |
|
|
}
|
103 : |
|
|
|
104 : |
|
|
/*
|
105 : |
|
|
Prints usage
|
106 : |
|
|
*/
|
107 : |
|
|
void print_usage(char *appname){
|
108 : |
|
|
printf("\nUOMS usage:\n");
|
109 : |
dalvarez |
15 |
printf("[Your UPC runtime launch command] %s [-help | -version | [-off_cache] [-warmup] [-reduce_op OP] [-sync_mode MODE] [-msglen FILE | [-minsize SIZE] [-maxsize SIZE] ] [-time SECONDS] [-input FILE] [output_file]]\n",appname);
|
110 : |
dalvarez |
1 |
printf("Where:\n");
|
111 : |
|
|
printf("-help: Print this usage information and exits\n");
|
112 : |
|
|
printf("-version: Print UOMS version and exits\n");
|
113 : |
|
|
printf("-off_cache: Enable cache invalidation\n");
|
114 : |
|
|
printf("-warmup: Enable a warmup iteration\n");
|
115 : |
|
|
printf("-reduce_op OP: Choose the reduce operation to be performed by upc_all_reduce and upc_all_prefix_reduce\n");
|
116 : |
|
|
printf("Valid operations are:\n");
|
117 : |
|
|
printf(" - UPC_ADD (default)\n");
|
118 : |
|
|
printf(" - UPC_MULT\n");
|
119 : |
|
|
printf(" - UPC_AND\n");
|
120 : |
|
|
printf(" - UPC_OR\n");
|
121 : |
|
|
printf(" - UPC_XOR\n");
|
122 : |
|
|
printf(" - UPC_LOGAND\n");
|
123 : |
|
|
printf(" - UPC_LOGOR\n");
|
124 : |
|
|
printf(" - UPC_MIN\n");
|
125 : |
|
|
printf(" - UPC_MAX\n");
|
126 : |
|
|
printf("-sync_mode MODE: Choose the synchronization mode for the collective operations\n");
|
127 : |
|
|
printf("Valid modes are:\n");
|
128 : |
|
|
printf(" - UPC_IN_ALLSYNC|UPC_OUT_ALLSYNC (default)\n");
|
129 : |
|
|
printf(" - UPC_IN_ALLSYNC|UPC_OUT_MYSYNC\n");
|
130 : |
|
|
printf(" - UPC_IN_ALLSYNC|UPC_OUT_NOSYNC\n");
|
131 : |
|
|
printf(" - UPC_IN_MYSYNC|UPC_OUT_ALLSYNC\n");
|
132 : |
|
|
printf(" - UPC_IN_MYSYNC|UPC_OUT_MYSYNC\n");
|
133 : |
|
|
printf(" - UPC_IN_MYSYNC|UPC_OUT_NOSYNC\n");
|
134 : |
|
|
printf(" - UPC_IN_NOSYNC|UPC_OUT_ALLSYNC\n");
|
135 : |
|
|
printf(" - UPC_IN_NOSYNC|UPC_OUT_MYSYNC\n");
|
136 : |
|
|
printf(" - UPC_IN_NOSYNC|UPC_OUT_NOSYNC\n");
|
137 : |
|
|
printf("-msglen FILE: Read user defined block sizes from FILE (in bytes). If specified it will override -minsize and -maxsize\n");
|
138 : |
dalvarez |
15 |
printf("-minsize SIZE: Specifies the minimum block size (in bytes). Sizes will increase by a factor of 2. Default is 4\n");
|
139 : |
|
|
printf("-maxsize SIZE: Specifies the maximum block size (in bytes). Default is 16MB\n");
|
140 : |
|
|
printf("-time SECONDS: Specifies the maximum run time in seconds for each block size. Disabled by default. Important: this setting will not interrupt an ongoing operation\n");
|
141 : |
dalvarez |
1 |
printf("-input FILE: Read user defined list of benchmarks to run from FILE\n");
|
142 : |
|
|
printf("Valid benchmark names are:\n");
|
143 : |
|
|
for(int i = 0; i < NUM_BMS; i++){
|
144 : |
|
|
printf(" - %s\n",valid_bms[i]);
|
145 : |
|
|
}
|
146 : |
|
|
printf("output_file: Alternative output file (rather than stdout)\n\n");
|
147 : |
|
|
}
|
148 : |
|
|
|
149 : |
|
|
/*
|
150 : |
|
|
Prints performance data acording to the header type
|
151 : |
|
|
*/
|
152 : |
|
|
void print_performance_data(int operation_code, int cursize,int niterations,uint64_t minTime, uint64_t maxTime, uint64_t totalTime) {
|
153 : |
|
|
if (operation_header(operation_code)==1) {
|
154 : |
|
|
if (!MYTHREAD)
|
155 : |
|
|
fprintf(unit,"%14d %13llu %13llu %10.2lf\n",
|
156 : |
|
|
niterations,ticksToNS(minTime),ticksToNS(maxTime),(((double)ticksToNS(totalTime))/niterations));
|
157 : |
|
|
}
|
158 : |
|
|
else if (operation_header(operation_code)==2) {
|
159 : |
|
|
if (!MYTHREAD)
|
160 : |
|
|
fprintf(unit,"%13d %13d %13llu %13llu %12.2lf\n",
|
161 : |
|
|
cursize,niterations,ticksToNS(minTime),ticksToNS(maxTime),(((double)ticksToNS(totalTime))/niterations));
|
162 : |
|
|
}
|
163 : |
|
|
else if (operation_header(operation_code)==3) {
|
164 : |
|
|
if (!MYTHREAD) {
|
165 : |
|
|
double opBw = (((double)fAggreatedBw(operation_code)*cursize)/((double) ticksToNS(minTime)/1000));
|
166 : |
|
|
fprintf(unit,"%13d %13d %13llu %13llu %12.2lf %12.2lf \n",
|
167 : |
|
|
cursize,niterations,ticksToNS(minTime),ticksToNS(maxTime),(((double)ticksToNS(totalTime))/niterations),opBw);
|
168 : |
|
|
}
|
169 : |
|
|
}
|
170 : |
|
|
fflush(unit);
|
171 : |
|
|
return;
|
172 : |
|
|
}
|
173 : |
|
|
|
174 : |
|
|
/*
|
175 : |
|
|
Prints UOMS version
|
176 : |
|
|
*/
|
177 : |
|
|
void print_version(){
|
178 : |
|
|
printf("UPC Operations Microbenchmarking Suite (UOMS) version: %s\n",VERSION);
|
179 : |
|
|
}
|
180 : |
|
|
|
181 : |
|
|
/*
|
182 : |
|
|
Prints general benchmark info
|
183 : |
|
|
*/
|
184 : |
|
|
void UOMS_general_info(){
|
185 : |
|
|
|
186 : |
dalvarez |
15 |
time_t T;
|
187 : |
|
|
struct utsname info;
|
188 : |
dalvarez |
1 |
|
189 : |
dalvarez |
15 |
time(&T);
|
190 : |
|
|
uname( &info );
|
191 : |
dalvarez |
1 |
|
192 : |
|
|
|
193 : |
dalvarez |
15 |
fprintf(unit,"#-----------------------------------------------------\n");
|
194 : |
|
|
fprintf(unit,"# UPC Operations Microbenchmark Suite V%s \n",VERSION);
|
195 : |
|
|
fprintf(unit,"#-----------------------------------------------------\n");
|
196 : |
|
|
fprintf(unit,"# Date : %s",asctime(localtime(&T)));
|
197 : |
|
|
fprintf(unit,"# Machine : %s\n",info.machine);
|
198 : |
|
|
fprintf(unit,"# System : %s\n",info.sysname);
|
199 : |
|
|
fprintf(unit,"# Release : %s\n",info.release);
|
200 : |
|
|
fprintf(unit,"\n");
|
201 : |
dalvarez |
1 |
if(cache_invalidation == 1)
|
202 : |
dalvarez |
15 |
fprintf(unit,"# Cache invalidation : Enabled\n");
|
203 : |
dalvarez |
1 |
else
|
204 : |
dalvarez |
15 |
fprintf(unit,"# Cache invalidation : Disabled\n");
|
205 : |
|
|
fprintf(unit,"\n");
|
206 : |
dalvarez |
1 |
if(warmup == 1)
|
207 : |
dalvarez |
15 |
fprintf(unit,"# Warmup iteration : Enabled\n");
|
208 : |
dalvarez |
1 |
else
|
209 : |
dalvarez |
15 |
fprintf(unit,"# Warmup iteration : Disabled\n");
|
210 : |
|
|
fprintf(unit,"\n");
|
211 : |
|
|
if(time_limit_set == 1){
|
212 : |
|
|
fprintf(unit,"# Time limit per block size : %ld seconds\n",timeLimit);
|
213 : |
|
|
fprintf(unit,"\n");
|
214 : |
|
|
}
|
215 : |
dalvarez |
1 |
fprintf(unit,"# Problem sizes:\n");
|
216 : |
|
|
for(int i = 0; i < num_sizes; i++){
|
217 : |
|
|
fprintf(unit,"# %ld\n",sizes[i]);
|
218 : |
|
|
}
|
219 : |
dalvarez |
15 |
fprintf(unit,"\n");
|
220 : |
|
|
fprintf(unit,"# Synchronization mode : %s \n",char_sync_mode);
|
221 : |
|
|
fprintf(unit,"\n");
|
222 : |
|
|
fprintf(unit,"# Reduce Op : %s \n",char_reduce_op);
|
223 : |
|
|
fprintf(unit,"\n");
|
224 : |
|
|
fprintf(unit,"\n");
|
225 : |
|
|
fprintf(unit,"# List of Benchmarks to run:\n");
|
226 : |
|
|
fprintf(unit,"\n");
|
227 : |
dalvarez |
1 |
for(int i = 0; i < num_bms; i++){
|
228 : |
|
|
fprintf(unit,"# %s\n",valid_bms[bm_list[i]]);
|
229 : |
|
|
}
|
230 : |
dalvarez |
15 |
fprintf(unit,"\n");
|
231 : |
dalvarez |
1 |
}
|
232 : |
|
|
|
233 : |
|
|
|
234 : |
|
|
void UOMS_function_info(int operation,int nthreads,int headertype){
|
235 : |
|
|
|
236 : |
|
|
char *funcname;
|
237 : |
|
|
|
238 : |
dalvarez |
14 |
switch (operation) {
|
239 : |
|
|
case FORALL_R:
|
240 : |
|
|
case FORALL_W:
|
241 : |
|
|
case FORALL_RW:
|
242 : |
|
|
case FOR_R:
|
243 : |
|
|
case FOR_W:
|
244 : |
|
|
case FOR_RW:
|
245 : |
|
|
case BROADCAST:
|
246 : |
|
|
case SCATTER:
|
247 : |
|
|
case GATHER:
|
248 : |
|
|
case GATHERALL:
|
249 : |
|
|
case EXCHANGE:
|
250 : |
|
|
case PERMUTE:
|
251 : |
|
|
case BARRIER:
|
252 : |
|
|
case MEMGET:
|
253 : |
|
|
case MEMPUT:
|
254 : |
|
|
case MEMCPY:
|
255 : |
|
|
case LMEMGET:
|
256 : |
|
|
case LMEMPUT:
|
257 : |
|
|
case LMEMCPY:
|
258 : |
|
|
case SMEMCPY:
|
259 : |
|
|
case MEMMOVE:
|
260 : |
|
|
case ALLALLOC:
|
261 : |
|
|
case FREE:
|
262 : |
|
|
case REDUCE_C:
|
263 : |
|
|
case PREFIX_REDUCE_C:
|
264 : |
|
|
case REDUCE_UC:
|
265 : |
|
|
case PREFIX_REDUCE_UC:
|
266 : |
|
|
case REDUCE_S:
|
267 : |
|
|
case PREFIX_REDUCE_S:
|
268 : |
|
|
case REDUCE_US:
|
269 : |
|
|
case PREFIX_REDUCE_US:
|
270 : |
|
|
case REDUCE_I:
|
271 : |
|
|
case PREFIX_REDUCE_I:
|
272 : |
|
|
case REDUCE_UI:
|
273 : |
|
|
case PREFIX_REDUCE_UI:
|
274 : |
|
|
case REDUCE_L:
|
275 : |
|
|
case PREFIX_REDUCE_L:
|
276 : |
|
|
case REDUCE_UL:
|
277 : |
|
|
case PREFIX_REDUCE_UL:
|
278 : |
|
|
case REDUCE_F:
|
279 : |
|
|
case PREFIX_REDUCE_F:
|
280 : |
|
|
case REDUCE_D:
|
281 : |
|
|
case PREFIX_REDUCE_D:
|
282 : |
|
|
case REDUCE_LD:
|
283 : |
|
|
case PREFIX_REDUCE_LD:
|
284 : |
dalvarez |
1 |
#ifdef ASYNC_MEM_TEST
|
285 : |
dalvarez |
14 |
case AMEMGET:
|
286 : |
|
|
case AMEMPUT:
|
287 : |
|
|
case AMEMCPY:
|
288 : |
|
|
case ALMEMGET:
|
289 : |
|
|
case ALMEMPUT:
|
290 : |
|
|
case ALMEMCPY:
|
291 : |
dalvarez |
1 |
#endif
|
292 : |
dalvarez |
7 |
#ifdef ASYNCI_MEM_TEST
|
293 : |
dalvarez |
14 |
case AIMEMGET:
|
294 : |
|
|
case AIMEMPUT:
|
295 : |
|
|
case AIMEMCPY:
|
296 : |
|
|
case AILMEMGET:
|
297 : |
|
|
case AILMEMPUT:
|
298 : |
|
|
case AILMEMCPY:
|
299 : |
dalvarez |
7 |
#endif
|
300 : |
dalvarez |
14 |
funcname = valid_bms[operation];
|
301 : |
|
|
break;
|
302 : |
|
|
default: funcname = "Not yet defined";
|
303 : |
dalvarez |
1 |
}
|
304 : |
|
|
|
305 : |
|
|
|
306 : |
|
|
fprintf(unit,"\n");
|
307 : |
|
|
fprintf(unit,"#---------------------------------------------------\n");
|
308 : |
|
|
fprintf(unit,"# Benchmarking %s \n",funcname);
|
309 : |
|
|
fprintf(unit,"# #processes = %d \n",nthreads);
|
310 : |
|
|
fprintf(unit,"#---------------------------------------------------\n");
|
311 : |
|
|
if (headertype==1)
|
312 : |
|
|
fprintf(unit," #repetitions t_min[nsec] t_max[nsec] t_avg[nsec]\n");
|
313 : |
|
|
else if (headertype==2)
|
314 : |
|
|
fprintf(unit," #bytes #repetitions t_min[nsec] t_max[nsec] t_avg[nsec]\n");
|
315 : |
|
|
else if (headertype==3)
|
316 : |
|
|
fprintf(unit," #bytes #repetitions t_min[nsec] t_max[nsec] t_avg[nsec] Bw_aggregated[MB/sec]\n");
|
317 : |
|
|
|
318 : |
|
|
}
|
319 : |
|
|
|
320 : |
|
|
|