1 : |
dalvarez |
1 |
/*****************************************************************************/
|
2 : |
|
|
/* */
|
3 : |
|
|
/* Copyright (c) 2008, 2009, 2010 */
|
4 : |
|
|
/* Computer Architecture Group (CAG) */
|
5 : |
|
|
/* University of A Coruña, Spain */
|
6 : |
|
|
/* (http://gac.des.udc.es) */
|
7 : |
|
|
/* Galicia Supercomputing Center (CESGA) */
|
8 : |
|
|
/* (http://www.cesga.es) */
|
9 : |
|
|
/* Hewlett-Packard Spain (HP) */
|
10 : |
|
|
/* (http://www.hp.es) */
|
11 : |
|
|
/* */
|
12 : |
|
|
/* This file is part of UPC Operations Microbenchmarking Suite (UOMS). */
|
13 : |
|
|
/* */
|
14 : |
|
|
/* UOMS is free software: you can redistribute it and/or modify */
|
15 : |
|
|
/* it under the terms of the GNU Lesser General Public License as published */
|
16 : |
|
|
/* by the Free Software Foundation, either version 3 of the License, or */
|
17 : |
|
|
/* (at your option) any later version. */
|
18 : |
|
|
/* */
|
19 : |
|
|
/* UOMS is distributed in the hope that it will be useful, */
|
20 : |
|
|
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
|
21 : |
|
|
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
22 : |
|
|
/* GNU Lesser General Public License for more details. */
|
23 : |
|
|
/* */
|
24 : |
|
|
/* You should have received a copy of the GNU Lesser General Public License */
|
25 : |
|
|
/* along with UOMS. If not, see <http://www.gnu.org/licenses/>. */
|
26 : |
|
|
/* */
|
27 : |
|
|
/*****************************************************************************/
|
28 : |
|
|
|
29 : |
|
|
/*****************************************************************************/
|
30 : |
|
|
/* */
|
31 : |
|
|
/* FUNDING: This development has been funded by Hewlett-Packard Spain */
|
32 : |
|
|
/* */
|
33 : |
|
|
/* Project Name: */
|
34 : |
|
|
/* UPCHACO (2008-2011) */
|
35 : |
|
|
/* Subproject: */
|
36 : |
|
|
/* Improving UPC Usability and Performance in Constellation Systems: */
|
37 : |
|
|
/* Implementation/Extensions of UPC Libraries. */
|
38 : |
|
|
/* (UPCPUProject -> UPC Performance and Usability Project) */
|
39 : |
|
|
/* */
|
40 : |
|
|
/*****************************************************************************/
|
41 : |
|
|
|
42 : |
|
|
/*****************************************************************************
|
43 : |
|
|
|
44 : |
|
|
For further documentation, see
|
45 : |
|
|
|
46 : |
|
|
[1] Files under doc/
|
47 : |
|
|
|
48 : |
|
|
******************************************************************************/
|
49 : |
|
|
|
50 : |
|
|
#include <sched.h>
|
51 : |
|
|
#include <stdio.h>
|
52 : |
|
|
#include <stdlib.h>
|
53 : |
|
|
#include <time.h>
|
54 : |
|
|
#include <sys/utsname.h>
|
55 : |
|
|
#include <sys/time.h>
|
56 : |
|
|
#include <values.h>
|
57 : |
|
|
#include <stdio.h>
|
58 : |
|
|
#include <unistd.h>
|
59 : |
|
|
#include <math.h>
|
60 : |
|
|
#include <string.h>
|
61 : |
|
|
|
62 : |
|
|
/* UPC */
|
63 : |
|
|
#include <upc.h>
|
64 : |
|
|
#include <upc_strict.h>
|
65 : |
|
|
#include <upc_collective.h>
|
66 : |
|
|
|
67 : |
|
|
/* Timers */
|
68 : |
|
|
#include "timers.h"
|
69 : |
|
|
|
70 : |
|
|
#include "defines.h" |
71 : |
|
|
#include "headers.h" |
72 : |
|
|
|
73 : |
|
|
FILE* unit;
|
74 : |
|
|
|
75 : |
|
|
/*
|
76 : |
|
|
Distributed array used in various benchmarks
|
77 : |
|
|
*/
|
78 : |
|
|
shared char *distArr;
|
79 : |
|
|
|
80 : |
|
|
/*
|
81 : |
|
|
Broadcast array
|
82 : |
|
|
*/
|
83 : |
|
|
shared [] char *broadcastArr;
|
84 : |
|
|
|
85 : |
|
|
/*
|
86 : |
|
|
Scatter array
|
87 : |
|
|
*/
|
88 : |
|
|
shared [] char *scatterArr;
|
89 : |
|
|
|
90 : |
|
|
/*
|
91 : |
|
|
Gather array
|
92 : |
|
|
*/
|
93 : |
|
|
shared [] char *gatherArr;
|
94 : |
|
|
|
95 : |
|
|
/*
|
96 : |
|
|
Gatherall array
|
97 : |
|
|
*/
|
98 : |
|
|
shared char *gatherallArr;
|
99 : |
|
|
|
100 : |
|
|
/*
|
101 : |
|
|
Exchange array
|
102 : |
|
|
*/
|
103 : |
|
|
shared char *exchangeArr;
|
104 : |
|
|
|
105 : |
|
|
/*
|
106 : |
|
|
Permute array
|
107 : |
|
|
*/
|
108 : |
|
|
shared char *permuteArr;
|
109 : |
|
|
|
110 : |
|
|
/*
|
111 : |
|
|
Reduce array (will store only 1 element)
|
112 : |
|
|
*/
|
113 : |
|
|
shared [] char *reduceArr;
|
114 : |
|
|
|
115 : |
|
|
/*
|
116 : |
|
|
Prefix reduce array
|
117 : |
|
|
*/
|
118 : |
|
|
shared char *prefixReduceArr;
|
119 : |
|
|
|
120 : |
|
|
/*
|
121 : |
|
|
Pointer for memory allocation and freeing test
|
122 : |
|
|
*/
|
123 : |
|
|
shared char *mem_alloc_tests_pointer;
|
124 : |
|
|
|
125 : |
|
|
/*
|
126 : |
|
|
Arrays for p2p benchmarking
|
127 : |
|
|
*/
|
128 : |
|
|
shared char *p2pDistArr;
|
129 : |
|
|
shared char *p2pDistArr2;
|
130 : |
|
|
char *p2pLocalArr;
|
131 : |
|
|
char *p2pLocalArr2;
|
132 : |
|
|
|
133 : |
|
|
shared double b;// for reduction result
|
134 : |
|
|
shared int perm[THREADS]; //for permutation
|
135 : |
|
|
shared long times[THREADS]; //for timing
|
136 : |
|
|
|
137 : |
|
|
long *sizes;
|
138 : |
|
|
int num_sizes = 0;
|
139 : |
|
|
|
140 : |
|
|
int *bm_list;
|
141 : |
|
|
int num_bms = 0;
|
142 : |
|
|
char * valid_bms[NUM_BMS];
|
143 : |
|
|
|
144 : |
|
|
int cache_invalidation = 0;
|
145 : |
|
|
|
146 : |
|
|
upc_op_t reduce_op = UPC_ADD;
|
147 : |
|
|
char * char_reduce_op = "UPC_ADD";
|
148 : |
|
|
|
149 : |
|
|
int warmup;
|
150 : |
|
|
|
151 : |
|
|
upc_flag_t sync_mode = UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC;
|
152 : |
|
|
char * char_sync_mode = "UPC_IN_ALLSYNC|UPC_OUT_ALLSYNC";
|
153 : |
|
|
|
154 : |
|
|
int main(int argc, char **argv)
|
155 : |
|
|
/*
|
156 : |
|
|
|
157 : |
|
|
Input variables:
|
158 : |
|
|
|
159 : |
|
|
-argc (type int)
|
160 : |
|
|
Number of command line arguments
|
161 : |
|
|
|
162 : |
|
|
-argv (type char **)
|
163 : |
|
|
List of command line arguments
|
164 : |
|
|
|
165 : |
|
|
Return value (type int)
|
166 : |
|
|
0 always
|
167 : |
|
|
|
168 : |
|
|
*/
|
169 : |
|
|
{
|
170 : |
|
|
|
171 : |
|
|
init(argc, argv);
|
172 : |
|
|
|
173 : |
|
|
if (!MYTHREAD)
|
174 : |
|
|
UOMS_general_info();
|
175 : |
|
|
|
176 : |
|
|
for(int i = 0; i < num_bms; i++){
|
177 : |
|
|
if(p2poperation(bm_list[i]) == 0){
|
178 : |
|
|
bench(bm_list[i]);
|
179 : |
|
|
}
|
180 : |
|
|
else{
|
181 : |
|
|
if (THREADS == 2){
|
182 : |
|
|
affinity_bench(bm_list[i]);
|
183 : |
|
|
}
|
184 : |
|
|
else{
|
185 : |
|
|
if(MYTHREAD == 0){
|
186 : |
|
|
fprintf(unit,"\n#---------------------------------------------------\n");
|
187 : |
|
|
fprintf(unit,"# Skipping benchmark %s. To run it use only 2 threads.\n",valid_bms[bm_list[i]]);
|
188 : |
|
|
fprintf(unit,"#---------------------------------------------------\n\n");
|
189 : |
|
|
}
|
190 : |
|
|
}
|
191 : |
|
|
}
|
192 : |
|
|
}
|
193 : |
|
|
|
194 : |
|
|
if(unit != stdout && unit != stderr){
|
195 : |
|
|
fclose(unit);
|
196 : |
|
|
}
|
197 : |
|
|
|
198 : |
|
|
return 0;
|
199 : |
|
|
}
|
200 : |
|
|
|
201 : |
|
|
|
202 : |
|
|
|
203 : |
|
|
int p2poperation(int operation_code) {
|
204 : |
|
|
switch (operation_code) {
|
205 : |
|
|
case LMEMCPY:
|
206 : |
|
|
case MEMCPY:
|
207 : |
|
|
case LMEMGET:
|
208 : |
|
|
case MEMGET:
|
209 : |
|
|
case LMEMPUT:
|
210 : |
|
|
case MEMPUT:
|
211 : |
|
|
#ifdef ASYNC_MEM_TEST
|
212 : |
|
|
case ALMEMCPY:
|
213 : |
|
|
case AMEMCPY:
|
214 : |
|
|
case ALMEMGET:
|
215 : |
|
|
case AMEMGET:
|
216 : |
|
|
case ALMEMPUT:
|
217 : |
|
|
case AMEMPUT:
|
218 : |
|
|
#endif
|
219 : |
|
|
case SMEMCPY:
|
220 : |
|
|
case MEMMOVE:
|
221 : |
|
|
return 1;
|
222 : |
|
|
default:
|
223 : |
|
|
break;
|
224 : |
|
|
}
|
225 : |
|
|
return 0;
|
226 : |
|
|
}
|
227 : |
|
|
|
228 : |
|
|
/*
|
229 : |
|
|
Generic bench function
|
230 : |
|
|
*/
|
231 : |
|
|
void bench(int operation_code) {
|
232 : |
|
|
|
233 : |
|
|
long int cursize;
|
234 : |
|
|
long int niterations, iter;
|
235 : |
|
|
uint64_t start, end;
|
236 : |
|
|
uint64_t minTime, totalTime, maxTime, prevMinTime;
|
237 : |
|
|
|
238 : |
|
|
if (!MYTHREAD)
|
239 : |
|
|
UOMS_function_info(operation_code,THREADS,operation_header(operation_code));
|
240 : |
|
|
|
241 : |
|
|
int global_iter;
|
242 : |
|
|
int initial_iter = (warmup)?0:1;
|
243 : |
|
|
|
244 : |
|
|
int mem_is_ok;
|
245 : |
|
|
|
246 : |
|
|
for(global_iter = initial_iter; global_iter <2; global_iter++) {
|
247 : |
|
|
|
248 : |
|
|
prevMinTime = 0;
|
249 : |
|
|
|
250 : |
|
|
/*
|
251 : |
|
|
Benchmarking coll with cursize-size and niterations-iterations
|
252 : |
|
|
*/
|
253 : |
|
|
for(int cursize_index=0;cursize_index<num_sizes;cursize_index++){
|
254 : |
|
|
|
255 : |
|
|
cursize = sizes[cursize_index];
|
256 : |
|
|
|
257 : |
|
|
/*
|
258 : |
|
|
Skip test that do not achieve the minimum size
|
259 : |
|
|
*/
|
260 : |
|
|
if(cursize <= 0){
|
261 : |
|
|
continue;
|
262 : |
|
|
}
|
263 : |
|
|
else if(operation_code == REDUCE_C || operation_code == PREFIX_REDUCE_C){
|
264 : |
|
|
if(cursize<sizeof(char)){
|
265 : |
|
|
continue;
|
266 : |
|
|
}
|
267 : |
|
|
}
|
268 : |
|
|
else if(operation_code == REDUCE_UC || operation_code == PREFIX_REDUCE_UC){
|
269 : |
|
|
if(cursize<sizeof(unsigned char)){
|
270 : |
|
|
continue;
|
271 : |
|
|
}
|
272 : |
|
|
}
|
273 : |
|
|
else if(operation_code == REDUCE_S || operation_code == PREFIX_REDUCE_S){
|
274 : |
|
|
if(cursize<sizeof(short)){
|
275 : |
|
|
continue;
|
276 : |
|
|
}
|
277 : |
|
|
}
|
278 : |
|
|
else if(operation_code == REDUCE_US || operation_code == PREFIX_REDUCE_US){
|
279 : |
|
|
if(cursize<sizeof(unsigned short)){
|
280 : |
|
|
continue;
|
281 : |
|
|
}
|
282 : |
|
|
}
|
283 : |
|
|
else if(operation_code == REDUCE_I || operation_code == PREFIX_REDUCE_I){
|
284 : |
|
|
if(cursize<sizeof(int)){
|
285 : |
|
|
continue;
|
286 : |
|
|
}
|
287 : |
|
|
}
|
288 : |
|
|
else if(operation_code == REDUCE_UI || operation_code == PREFIX_REDUCE_UI){
|
289 : |
|
|
if(cursize<sizeof(unsigned int)){
|
290 : |
|
|
continue;
|
291 : |
|
|
}
|
292 : |
|
|
}
|
293 : |
|
|
else if(operation_code == REDUCE_L || operation_code == PREFIX_REDUCE_L){
|
294 : |
|
|
if(cursize<sizeof(long)){
|
295 : |
|
|
continue;
|
296 : |
|
|
}
|
297 : |
|
|
}
|
298 : |
|
|
else if(operation_code == REDUCE_UL || operation_code == PREFIX_REDUCE_UL){
|
299 : |
|
|
if(cursize<sizeof(unsigned long)){
|
300 : |
|
|
continue;
|
301 : |
|
|
}
|
302 : |
|
|
}
|
303 : |
|
|
else if(operation_code == REDUCE_F || operation_code == PREFIX_REDUCE_F){
|
304 : |
|
|
if(cursize<sizeof(float)){
|
305 : |
|
|
continue;
|
306 : |
|
|
}
|
307 : |
|
|
}
|
308 : |
|
|
else if(operation_code == REDUCE_D || operation_code == PREFIX_REDUCE_D){
|
309 : |
|
|
if(cursize<sizeof(double)){
|
310 : |
|
|
continue;
|
311 : |
|
|
}
|
312 : |
|
|
}
|
313 : |
|
|
else if(operation_code == REDUCE_LD || operation_code == PREFIX_REDUCE_LD){
|
314 : |
|
|
if(cursize<sizeof(long double)){
|
315 : |
|
|
continue;
|
316 : |
|
|
}
|
317 : |
|
|
}
|
318 : |
|
|
|
319 : |
|
|
long int nbuckets;
|
320 : |
|
|
niterations = niters(cursize);
|
321 : |
|
|
|
322 : |
|
|
if(cache_invalidation == 1){
|
323 : |
|
|
nbuckets=niterations;
|
324 : |
|
|
}
|
325 : |
|
|
else{
|
326 : |
|
|
nbuckets=1;
|
327 : |
|
|
}
|
328 : |
|
|
|
329 : |
|
|
if(allocate_arrays(operation_code,cursize,nbuckets) == -1) // Unsuccesfully allocation
|
330 : |
|
|
continue;
|
331 : |
|
|
|
332 : |
|
|
upc_barrier;
|
333 : |
|
|
|
334 : |
|
|
minTime=MAXLONG;
|
335 : |
|
|
maxTime=0L;
|
336 : |
|
|
totalTime=0L;
|
337 : |
|
|
|
338 : |
|
|
uint64_t tmax;
|
339 : |
|
|
|
340 : |
|
|
for(iter=0;iter<niterations;iter++){
|
341 : |
|
|
|
342 : |
|
|
/*
|
343 : |
|
|
For this benchmark the array should be allocated per iteration
|
344 : |
|
|
*/
|
345 : |
|
|
if(operation_code == FREE){
|
346 : |
|
|
mem_alloc_tests_pointer = upc_all_alloc(THREADS,cursize);
|
347 : |
|
|
UPCMEM_OK(mem_alloc_tests_pointer);
|
348 : |
|
|
if(mem_is_ok == -1)
|
349 : |
|
|
continue;
|
350 : |
|
|
}
|
351 : |
|
|
|
352 : |
|
|
upc_barrier;
|
353 : |
|
|
|
354 : |
|
|
start = getTicks();
|
355 : |
|
|
function(operation_code,cursize,(iter%nbuckets)*cursize);
|
356 : |
|
|
upc_barrier;
|
357 : |
|
|
end = getTicks() - start;
|
358 : |
|
|
|
359 : |
|
|
if(operation_code == ALLALLOC){
|
360 : |
|
|
UPCMEM_OK(mem_alloc_tests_pointer);
|
361 : |
|
|
if(mem_is_ok == -1)
|
362 : |
|
|
continue;
|
363 : |
|
|
upc_barrier;
|
364 : |
|
|
if(MYTHREAD == 0)
|
365 : |
|
|
upc_free(mem_alloc_tests_pointer);
|
366 : |
|
|
}
|
367 : |
|
|
|
368 : |
|
|
times[MYTHREAD]=end;
|
369 : |
|
|
upc_barrier;
|
370 : |
|
|
if (MYTHREAD == 0) {
|
371 : |
|
|
int i;
|
372 : |
|
|
tmax = 0L;
|
373 : |
|
|
for(i=0;i<THREADS;i++) {
|
374 : |
|
|
if (times[i]>tmax)
|
375 : |
|
|
tmax=times[i];
|
376 : |
|
|
}
|
377 : |
|
|
}
|
378 : |
|
|
end=tmax;
|
379 : |
|
|
|
380 : |
|
|
totalTime+=end;
|
381 : |
|
|
|
382 : |
|
|
//in order to avoid irregular latencies for short messages
|
383 : |
|
|
if (end<minTime)
|
384 : |
|
|
minTime = ((end<prevMinTime)&&(cursize<32*1024))?prevMinTime:end;
|
385 : |
|
|
if (end>maxTime)
|
386 : |
|
|
maxTime = end;
|
387 : |
|
|
}
|
388 : |
|
|
|
389 : |
|
|
upc_barrier;
|
390 : |
|
|
|
391 : |
|
|
free_arrays(operation_code);
|
392 : |
|
|
|
393 : |
|
|
if(global_iter)
|
394 : |
|
|
print_performance_data(operation_code,cursize,niterations,minTime,maxTime,totalTime);
|
395 : |
|
|
prevMinTime = minTime;
|
396 : |
|
|
|
397 : |
|
|
upc_barrier;
|
398 : |
|
|
|
399 : |
|
|
if(operation_code == BARRIER){
|
400 : |
|
|
break;
|
401 : |
|
|
}
|
402 : |
|
|
|
403 : |
|
|
}
|
404 : |
|
|
|
405 : |
|
|
}//fi global_iter
|
406 : |
|
|
|
407 : |
|
|
return;
|
408 : |
|
|
|
409 : |
|
|
}
|
410 : |
|
|
|
411 : |
|
|
|
412 : |
|
|
|
413 : |
|
|
/*
|
414 : |
|
|
Call the corresponding function
|
415 : |
|
|
*/
|
416 : |
|
|
void function(int operation_code, long int cursize,long int offset){
|
417 : |
|
|
|
418 : |
|
|
switch (operation_code) {
|
419 : |
|
|
case BROADCAST:
|
420 : |
|
|
upc_all_broadcast(&(distArr[THREADS*offset]),&(broadcastArr[offset]), cursize, sync_mode);
|
421 : |
|
|
break;
|
422 : |
|
|
case SCATTER:
|
423 : |
|
|
upc_all_scatter(&(distArr[THREADS*offset]),&(scatterArr[THREADS*offset]), cursize, sync_mode);
|
424 : |
|
|
break;
|
425 : |
|
|
case GATHER:
|
426 : |
|
|
upc_all_gather( &(gatherArr[THREADS*offset]),&(distArr[THREADS*offset]), cursize, sync_mode);
|
427 : |
|
|
break;
|
428 : |
|
|
case GATHERALL:
|
429 : |
|
|
upc_all_gather_all( &(gatherallArr[THREADS*THREADS*offset]),&(distArr[THREADS*offset]), cursize, sync_mode);
|
430 : |
|
|
break;
|
431 : |
|
|
case EXCHANGE:
|
432 : |
|
|
upc_all_exchange(&(exchangeArr[THREADS*THREADS*offset]), &(distArr[THREADS*THREADS*offset]), cursize, sync_mode );
|
433 : |
|
|
break;
|
434 : |
|
|
case PERMUTE:
|
435 : |
|
|
upc_all_permute(&(permuteArr[THREADS*offset]), &(distArr[THREADS*offset]), perm, cursize, sync_mode );
|
436 : |
|
|
break;
|
437 : |
|
|
case REDUCE_C:
|
438 : |
|
|
upc_all_reduceC((shared char *)reduceArr, (shared char*)&(distArr[THREADS*offset]),
|
439 : |
|
|
reduce_op, (cursize/sizeof(char))*THREADS, cursize/sizeof(char), NULL, sync_mode );
|
440 : |
|
|
break;
|
441 : |
|
|
case PREFIX_REDUCE_C:
|
442 : |
|
|
upc_all_prefix_reduceC((shared char *)&(distArr[THREADS*offset]), (shared char *)&(prefixReduceArr[THREADS*offset]),
|
443 : |
|
|
reduce_op, (cursize/sizeof(char))*THREADS, cursize/sizeof(char), NULL, sync_mode );
|
444 : |
|
|
break;
|
445 : |
|
|
case REDUCE_UC:
|
446 : |
|
|
upc_all_reduceUC((shared unsigned char *)reduceArr, (shared unsigned char *)&(distArr[THREADS*offset]),
|
447 : |
|
|
reduce_op, (cursize/sizeof(unsigned char))*THREADS, cursize/sizeof(unsigned char), NULL, sync_mode );
|
448 : |
|
|
break;
|
449 : |
|
|
case PREFIX_REDUCE_UC:
|
450 : |
|
|
upc_all_prefix_reduceUC((shared unsigned char *)&(distArr[THREADS*offset]), (shared unsigned char *)&(prefixReduceArr[THREADS*offset]),
|
451 : |
|
|
reduce_op, (cursize/sizeof(unsigned char))*THREADS, cursize/sizeof(unsigned char), NULL, sync_mode );
|
452 : |
|
|
break;
|
453 : |
|
|
case REDUCE_S:
|
454 : |
|
|
upc_all_reduceS((shared short *)reduceArr, (shared short *)&(distArr[THREADS*offset]),
|
455 : |
|
|
reduce_op, (cursize/sizeof(short))*THREADS, cursize/sizeof(short), NULL, sync_mode );
|
456 : |
|
|
break;
|
457 : |
|
|
case PREFIX_REDUCE_S:
|
458 : |
|
|
upc_all_prefix_reduceS((shared short *)&(distArr[THREADS*offset]), (shared short *)&(prefixReduceArr[THREADS*offset]),
|
459 : |
|
|
reduce_op, (cursize/sizeof(short))*THREADS, cursize/sizeof(short), NULL, sync_mode );
|
460 : |
|
|
break;
|
461 : |
|
|
case REDUCE_US:
|
462 : |
|
|
upc_all_reduceUS((shared unsigned short *)reduceArr, (shared unsigned short *)&(distArr[THREADS*offset]),
|
463 : |
|
|
reduce_op, (cursize/sizeof(unsigned short))*THREADS, cursize/sizeof(unsigned short), NULL, sync_mode );
|
464 : |
|
|
break;
|
465 : |
|
|
case PREFIX_REDUCE_US:
|
466 : |
|
|
upc_all_prefix_reduceUS((shared unsigned short *)&(distArr[THREADS*offset]), (shared unsigned short *)&(prefixReduceArr[THREADS*offset]),
|
467 : |
|
|
reduce_op, (cursize/sizeof(unsigned short))*THREADS, cursize/sizeof(unsigned short), NULL, sync_mode );
|
468 : |
|
|
break;
|
469 : |
|
|
case REDUCE_I:
|
470 : |
|
|
upc_all_reduceI((shared int *)reduceArr, (shared int *)&(distArr[THREADS*offset]),
|
471 : |
|
|
reduce_op, (cursize/sizeof(int))*THREADS, cursize/sizeof(int), NULL, sync_mode );
|
472 : |
|
|
break;
|
473 : |
|
|
case PREFIX_REDUCE_I:
|
474 : |
|
|
upc_all_prefix_reduceI((shared int *)&(distArr[THREADS*offset]), (shared int *)&(prefixReduceArr[THREADS*offset]),
|
475 : |
|
|
reduce_op, (cursize/sizeof(int))*THREADS, cursize/sizeof(int), NULL, sync_mode );
|
476 : |
|
|
break;
|
477 : |
|
|
case REDUCE_UI:
|
478 : |
|
|
upc_all_reduceUI((shared unsigned int *)reduceArr, (shared unsigned int *)&(distArr[THREADS*offset]),
|
479 : |
|
|
reduce_op, (cursize/sizeof(unsigned int))*THREADS, cursize/sizeof(unsigned int), NULL, sync_mode );
|
480 : |
|
|
break;
|
481 : |
|
|
case PREFIX_REDUCE_UI:
|
482 : |
|
|
upc_all_prefix_reduceUI((shared unsigned int *)&(distArr[THREADS*offset]), (shared unsigned int *)&(prefixReduceArr[THREADS*offset]),
|
483 : |
|
|
reduce_op, (cursize/sizeof(unsigned int))*THREADS, cursize/sizeof(unsigned int), NULL, sync_mode );
|
484 : |
|
|
break;
|
485 : |
|
|
case REDUCE_L:
|
486 : |
|
|
upc_all_reduceL((shared long *)reduceArr, (shared long *)&(distArr[THREADS*offset]),
|
487 : |
|
|
reduce_op, (cursize/sizeof(long))*THREADS, cursize/sizeof(long), NULL, sync_mode );
|
488 : |
|
|
break;
|
489 : |
|
|
case PREFIX_REDUCE_L:
|
490 : |
|
|
upc_all_prefix_reduceL((shared long *)&(distArr[THREADS*offset]), (shared long *)&(prefixReduceArr[THREADS*offset]),
|
491 : |
|
|
reduce_op, (cursize/sizeof(long))*THREADS, cursize/sizeof(long), NULL, sync_mode );
|
492 : |
|
|
break;
|
493 : |
|
|
case REDUCE_UL:
|
494 : |
|
|
upc_all_reduceUL((shared unsigned long *)reduceArr, (shared unsigned long *)&(distArr[THREADS*offset]),
|
495 : |
|
|
reduce_op, (cursize/sizeof(unsigned long))*THREADS, cursize/sizeof(unsigned long), NULL, sync_mode );
|
496 : |
|
|
break;
|
497 : |
|
|
case PREFIX_REDUCE_UL:
|
498 : |
|
|
upc_all_prefix_reduceUL((shared unsigned long *)&(distArr[THREADS*offset]), (shared unsigned long *)&(prefixReduceArr[THREADS*offset]),
|
499 : |
|
|
reduce_op, (cursize/sizeof(unsigned long))*THREADS, cursize/sizeof(unsigned long), NULL, sync_mode );
|
500 : |
|
|
break;
|
501 : |
|
|
case REDUCE_F:
|
502 : |
|
|
upc_all_reduceF((shared float *)reduceArr, (shared float *)&(distArr[THREADS*offset]),
|
503 : |
|
|
reduce_op, (cursize/sizeof(float))*THREADS, cursize/sizeof(float), NULL, sync_mode );
|
504 : |
|
|
break;
|
505 : |
|
|
case PREFIX_REDUCE_F:
|
506 : |
|
|
upc_all_prefix_reduceF((shared float *)&(distArr[THREADS*offset]), (shared float *)&(prefixReduceArr[THREADS*offset]),
|
507 : |
|
|
reduce_op, (cursize/sizeof(float))*THREADS, cursize/sizeof(float), NULL, sync_mode );
|
508 : |
|
|
break;
|
509 : |
|
|
case REDUCE_D:
|
510 : |
|
|
upc_all_reduceD((shared double *)reduceArr, (shared double *)&(distArr[THREADS*offset]),
|
511 : |
|
|
reduce_op, (cursize/sizeof(double))*THREADS, cursize/sizeof(double), NULL, sync_mode );
|
512 : |
|
|
break;
|
513 : |
|
|
case PREFIX_REDUCE_D:
|
514 : |
|
|
upc_all_prefix_reduceD((shared double *)&(distArr[THREADS*offset]), (shared double *)&(prefixReduceArr[THREADS*offset]),
|
515 : |
|
|
reduce_op, (cursize/sizeof(double))*THREADS, cursize/sizeof(double), NULL, sync_mode );
|
516 : |
|
|
break;
|
517 : |
|
|
case REDUCE_LD:
|
518 : |
|
|
upc_all_reduceLD((shared long double *)reduceArr, (shared long double *)&(distArr[THREADS*offset]),
|
519 : |
|
|
reduce_op, (cursize/sizeof(long double))*THREADS, cursize/sizeof(long double), NULL, sync_mode );
|
520 : |
|
|
break;
|
521 : |
|
|
case PREFIX_REDUCE_LD:
|
522 : |
|
|
upc_all_prefix_reduceLD((shared long double *)&(distArr[THREADS*offset]),(shared long double *)&(prefixReduceArr[THREADS*offset]),
|
523 : |
|
|
reduce_op, (cursize/sizeof(long double))*THREADS, cursize/sizeof(long double), NULL, sync_mode );
|
524 : |
|
|
break;
|
525 : |
|
|
case BARRIER:
|
526 : |
|
|
upc_barrier;
|
527 : |
|
|
break;
|
528 : |
|
|
case MEMGET:
|
529 : |
|
|
if (!MYTHREAD)
|
530 : |
|
|
upc_memget((p2pLocalArr+offset),&(p2pDistArr[1+THREADS*offset]),cursize);
|
531 : |
|
|
break;
|
532 : |
|
|
case LMEMGET:
|
533 : |
|
|
if (!MYTHREAD)
|
534 : |
|
|
upc_memget((p2pLocalArr+offset),&(p2pDistArr[THREADS*offset]),cursize);
|
535 : |
|
|
break;
|
536 : |
|
|
#ifdef ASYNC_MEM_TEST
|
537 : |
|
|
case AMEMGET:
|
538 : |
|
|
if (!MYTHREAD){
|
539 : |
|
|
upc_memget_asynci((p2pLocalArr+offset),&(p2pDistArr[1+THREADS*offset]),cursize);
|
540 : |
|
|
upc_waitsynci();
|
541 : |
|
|
}
|
542 : |
|
|
break;
|
543 : |
|
|
case ALMEMGET:
|
544 : |
|
|
if (!MYTHREAD){
|
545 : |
|
|
upc_memget_asynci((p2pLocalArr+offset),&(p2pDistArr[THREADS*offset]),cursize);
|
546 : |
|
|
upc_waitsynci();
|
547 : |
|
|
}
|
548 : |
|
|
break;
|
549 : |
|
|
#endif
|
550 : |
|
|
case MEMPUT:
|
551 : |
|
|
if (!MYTHREAD)
|
552 : |
|
|
upc_memput(&(p2pDistArr[1+THREADS*offset]),p2pLocalArr+offset,cursize);
|
553 : |
|
|
break;
|
554 : |
|
|
case LMEMPUT:
|
555 : |
|
|
if (!MYTHREAD)
|
556 : |
|
|
upc_memput(&(p2pDistArr[THREADS*offset]),p2pLocalArr+offset,cursize);
|
557 : |
|
|
break;
|
558 : |
|
|
#ifdef ASYNC_MEM_TEST
|
559 : |
|
|
case AMEMPUT:
|
560 : |
|
|
if (!MYTHREAD){
|
561 : |
|
|
upc_memput_asynci(&(p2pDistArr[1+THREADS*offset]),p2pLocalArr+offset,cursize);
|
562 : |
|
|
upc_waitsynci();
|
563 : |
|
|
}
|
564 : |
|
|
break;
|
565 : |
|
|
case ALMEMPUT:
|
566 : |
|
|
if (!MYTHREAD){
|
567 : |
|
|
upc_memput_asynci(&(p2pDistArr[THREADS*offset]),p2pLocalArr+offset,cursize);
|
568 : |
|
|
upc_waitsynci();
|
569 : |
|
|
}
|
570 : |
|
|
break;
|
571 : |
|
|
#endif
|
572 : |
|
|
case MEMCPY:
|
573 : |
|
|
if (!MYTHREAD)
|
574 : |
|
|
upc_memcpy(&(p2pDistArr[1+THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
|
575 : |
|
|
break;
|
576 : |
|
|
case LMEMCPY:
|
577 : |
|
|
if (!MYTHREAD)
|
578 : |
|
|
upc_memcpy(&(p2pDistArr[THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
|
579 : |
|
|
break;
|
580 : |
|
|
#ifdef ASYNC_MEM_TEST
|
581 : |
|
|
case AMEMCPY:
|
582 : |
|
|
if (!MYTHREAD){
|
583 : |
|
|
upc_memcpy_asynci(&(p2pDistArr[1+THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
|
584 : |
|
|
upc_waitsynci();
|
585 : |
|
|
}
|
586 : |
|
|
break;
|
587 : |
|
|
case ALMEMCPY:
|
588 : |
|
|
if (!MYTHREAD){
|
589 : |
|
|
upc_memcpy_asynci(&(p2pDistArr[THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
|
590 : |
|
|
upc_waitsynci();
|
591 : |
|
|
}
|
592 : |
|
|
break;
|
593 : |
|
|
#endif
|
594 : |
|
|
case SMEMCPY:
|
595 : |
|
|
if (!MYTHREAD)
|
596 : |
|
|
memcpy(p2pLocalArr2+offset,p2pLocalArr+offset,cursize);
|
597 : |
|
|
break;
|
598 : |
|
|
case MEMMOVE:
|
599 : |
|
|
if (!MYTHREAD)
|
600 : |
|
|
memmove(p2pLocalArr2+offset,p2pLocalArr+offset,cursize);
|
601 : |
|
|
break;
|
602 : |
|
|
case ALLALLOC:
|
603 : |
|
|
mem_alloc_tests_pointer = upc_all_alloc(THREADS,cursize);
|
604 : |
|
|
break;
|
605 : |
|
|
case FREE:
|
606 : |
|
|
if(!MYTHREAD)
|
607 : |
|
|
upc_free(mem_alloc_tests_pointer);
|
608 : |
|
|
break;
|
609 : |
|
|
|
610 : |
|
|
default:
|
611 : |
|
|
break;
|
612 : |
|
|
}
|
613 : |
|
|
return;
|
614 : |
|
|
}
|
615 : |
|
|
|