Log In | Get Help   
Home My Page Projects Code Snippets Project Openings UPC Operations Microbenchmarking Suite
Summary Activity Tracker Lists Docs News SCM Files
[uoms] Annotation of /trunk/uoms/src/UOMS.upc
[uoms] / trunk / uoms / src / UOMS.upc Repository:
ViewVC logotype

Annotation of /trunk/uoms/src/UOMS.upc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 15 - (view) (download)

1 : dalvarez 14 /******************************************************************************/
2 :     /* */
3 :     /* Copyright (c) 2008, 2009, 2010 */
4 :     /* Computer Architecture Group (CAG) */
5 :     /* University of A Coruña, Spain */
6 :     /* (http://gac.des.udc.es) */
7 :     /* Galicia Supercomputing Center (CESGA) */
8 :     /* (http://www.cesga.es) */
9 :     /* Hewlett-Packard Spain (HP) */
10 :     /* (http://www.hp.es) */
11 :     /* */
12 :     /* This file is part of UPC Operations Microbenchmarking Suite (UOMS). */
13 :     /* */
14 :     /* UOMS is free software: you can redistribute it and/or modify */
15 :     /* it under the terms of the GNU Lesser General Public License as published */
16 :     /* by the Free Software Foundation, either version 3 of the License, or */
17 :     /* (at your option) any later version. */
18 :     /* */
19 :     /* UOMS is distributed in the hope that it will be useful, */
20 :     /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
21 :     /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
22 :     /* GNU Lesser General Public License for more details. */
23 :     /* */
24 :     /* You should have received a copy of the GNU Lesser General Public License */
25 :     /* along with UOMS. If not, see <http://www.gnu.org/licenses/>. */
26 :     /* */
27 :     /******************************************************************************/
28 : dalvarez 1
29 : dalvarez 14 /******************************************************************************/
30 :     /* */
31 :     /* FUNDING: This development has been funded by Hewlett-Packard Spain */
32 :     /* */
33 :     /* Project Name: */
34 :     /* UPCHACO (2008-2011) */
35 :     /* Subproject: */
36 :     /* Improving UPC Usability and Performance in Constellation Systems: */
37 :     /* Implementation/Extensions of UPC Libraries. */
38 :     /* (UPCPUProject -> UPC Performance and Usability Project) */
39 :     /* */
40 :     /******************************************************************************/
41 : dalvarez 1
42 : dalvarez 14 /******************************************************************************
43 : dalvarez 1
44 :     For further documentation, see
45 :    
46 :     [1] Files under doc/
47 :    
48 : dalvarez 14 *******************************************************************************/
49 : dalvarez 1
50 :     #include <sched.h>
51 :     #include <stdio.h>
52 :     #include <stdlib.h>
53 :     #include <time.h>
54 :     #include <sys/utsname.h>
55 :     #include <sys/time.h>
56 :     #include <values.h>
57 :     #include <stdio.h>
58 :     #include <unistd.h>
59 :     #include <math.h>
60 :     #include <string.h>
61 :    
62 :     /* UPC */
63 :     #include <upc.h>
64 :     #include <upc_collective.h>
65 :    
66 :     /* Timers */
67 :     #include "timers.h"
68 :    
69 :     #include "defines.h"
70 :     #include "headers.h"
71 :    
72 :     FILE* unit;
73 :    
74 :     /*
75 :     Distributed array used in various benchmarks
76 :     */
77 :     shared char *distArr;
78 :    
79 :     /*
80 :     Broadcast array
81 :     */
82 :     shared [] char *broadcastArr;
83 :    
84 :     /*
85 :     Scatter array
86 :     */
87 :     shared [] char *scatterArr;
88 :    
89 :     /*
90 :     Gather array
91 :     */
92 :     shared [] char *gatherArr;
93 :    
94 :     /*
95 :     Gatherall array
96 :     */
97 :     shared char *gatherallArr;
98 :    
99 :     /*
100 :     Exchange array
101 :     */
102 :     shared char *exchangeArr;
103 :    
104 :     /*
105 :     Permute array
106 :     */
107 :     shared char *permuteArr;
108 :    
109 :     /*
110 :     Reduce array (will store only 1 element)
111 :     */
112 :     shared [] char *reduceArr;
113 :    
114 :     /*
115 :     Prefix reduce array
116 :     */
117 :     shared char *prefixReduceArr;
118 :    
119 :     /*
120 :     Pointer for memory allocation and freeing test
121 :     */
122 :     shared char *mem_alloc_tests_pointer;
123 :    
124 :     /*
125 :     Arrays for p2p benchmarking
126 :     */
127 :     shared char *p2pDistArr;
128 :     shared char *p2pDistArr2;
129 :     char *p2pLocalArr;
130 :     char *p2pLocalArr2;
131 :    
132 :     shared double b;// for reduction result
133 :     shared int perm[THREADS]; //for permutation
134 : dalvarez 7 shared long threads_times[THREADS]; //for timing
135 : dalvarez 1
136 :     long *sizes;
137 :     int num_sizes = 0;
138 :    
139 :     int *bm_list;
140 :     int num_bms = 0;
141 :     char * valid_bms[NUM_BMS];
142 :    
143 :     int cache_invalidation = 0;
144 :    
145 :     upc_op_t reduce_op = UPC_ADD;
146 :     char * char_reduce_op = "UPC_ADD";
147 :    
148 :     int warmup;
149 :    
150 : dalvarez 15 int time_limit_set = 0;
151 :    
152 : dalvarez 1 upc_flag_t sync_mode = UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC;
153 :     char * char_sync_mode = "UPC_IN_ALLSYNC|UPC_OUT_ALLSYNC";
154 :    
155 : dalvarez 14 int global_aux_var = 0;
156 :    
157 : dalvarez 15 uint64_t timeLimit;
158 :    
159 : dalvarez 1 int main(int argc, char **argv)
160 :     /*
161 :    
162 :     Input variables:
163 :    
164 :     -argc (type int)
165 :     Number of command line arguments
166 :    
167 :     -argv (type char **)
168 :     List of command line arguments
169 :    
170 :     Return value (type int)
171 :     0 always
172 :    
173 :     */
174 :     {
175 :    
176 :     init(argc, argv);
177 :    
178 :     if (!MYTHREAD)
179 :     UOMS_general_info();
180 :    
181 :     for(int i = 0; i < num_bms; i++){
182 :     if(p2poperation(bm_list[i]) == 0){
183 :     bench(bm_list[i]);
184 :     }
185 :     else{
186 :     if (THREADS == 2){
187 :     affinity_bench(bm_list[i]);
188 :     }
189 :     else{
190 :     if(MYTHREAD == 0){
191 :     fprintf(unit,"\n#---------------------------------------------------\n");
192 :     fprintf(unit,"# Skipping benchmark %s. To run it use only 2 threads.\n",valid_bms[bm_list[i]]);
193 :     fprintf(unit,"#---------------------------------------------------\n\n");
194 :     }
195 :     }
196 :     }
197 :     }
198 :    
199 :     if(unit != stdout && unit != stderr){
200 :     fclose(unit);
201 :     }
202 :    
203 :     return 0;
204 :     }
205 :    
206 :    
207 :    
208 :     int p2poperation(int operation_code) {
209 :     switch (operation_code) {
210 :     case LMEMCPY:
211 :     case MEMCPY:
212 :     case LMEMGET:
213 :     case MEMGET:
214 :     case LMEMPUT:
215 :     case MEMPUT:
216 :     #ifdef ASYNC_MEM_TEST
217 :     case ALMEMCPY:
218 :     case AMEMCPY:
219 :     case ALMEMGET:
220 :     case AMEMGET:
221 :     case ALMEMPUT:
222 :     case AMEMPUT:
223 :     #endif
224 : dalvarez 7 #ifdef ASYNCI_MEM_TEST
225 :     case AILMEMCPY:
226 :     case AIMEMCPY:
227 :     case AILMEMGET:
228 :     case AIMEMGET:
229 :     case AILMEMPUT:
230 :     case AIMEMPUT:
231 :     #endif
232 : dalvarez 1 case SMEMCPY:
233 :     case MEMMOVE:
234 :     return 1;
235 :     default:
236 :     break;
237 :     }
238 :     return 0;
239 :     }
240 :    
241 :     /*
242 :     Generic bench function
243 :     */
244 :     void bench(int operation_code) {
245 :    
246 :     long int cursize;
247 :     long int niterations, iter;
248 :     uint64_t start, end;
249 :     uint64_t minTime, totalTime, maxTime, prevMinTime;
250 :    
251 :     if (!MYTHREAD)
252 :     UOMS_function_info(operation_code,THREADS,operation_header(operation_code));
253 :    
254 :     int global_iter;
255 :     int initial_iter = (warmup)?0:1;
256 :    
257 :     int mem_is_ok;
258 :    
259 :     for(global_iter = initial_iter; global_iter <2; global_iter++) {
260 :    
261 :     prevMinTime = 0;
262 :    
263 :     /*
264 :     Benchmarking coll with cursize-size and niterations-iterations
265 :     */
266 :     for(int cursize_index=0;cursize_index<num_sizes;cursize_index++){
267 :    
268 :     cursize = sizes[cursize_index];
269 :    
270 :     /*
271 :     Skip test that do not achieve the minimum size
272 :     */
273 :     if(cursize <= 0){
274 :     continue;
275 :     }
276 :     else if(operation_code == REDUCE_C || operation_code == PREFIX_REDUCE_C){
277 :     if(cursize<sizeof(char)){
278 :     continue;
279 :     }
280 :     }
281 :     else if(operation_code == REDUCE_UC || operation_code == PREFIX_REDUCE_UC){
282 :     if(cursize<sizeof(unsigned char)){
283 :     continue;
284 :     }
285 :     }
286 :     else if(operation_code == REDUCE_S || operation_code == PREFIX_REDUCE_S){
287 :     if(cursize<sizeof(short)){
288 :     continue;
289 :     }
290 :     }
291 :     else if(operation_code == REDUCE_US || operation_code == PREFIX_REDUCE_US){
292 :     if(cursize<sizeof(unsigned short)){
293 :     continue;
294 :     }
295 :     }
296 :     else if(operation_code == REDUCE_I || operation_code == PREFIX_REDUCE_I){
297 :     if(cursize<sizeof(int)){
298 :     continue;
299 :     }
300 :     }
301 :     else if(operation_code == REDUCE_UI || operation_code == PREFIX_REDUCE_UI){
302 :     if(cursize<sizeof(unsigned int)){
303 :     continue;
304 :     }
305 :     }
306 :     else if(operation_code == REDUCE_L || operation_code == PREFIX_REDUCE_L){
307 :     if(cursize<sizeof(long)){
308 :     continue;
309 :     }
310 :     }
311 :     else if(operation_code == REDUCE_UL || operation_code == PREFIX_REDUCE_UL){
312 :     if(cursize<sizeof(unsigned long)){
313 :     continue;
314 :     }
315 :     }
316 :     else if(operation_code == REDUCE_F || operation_code == PREFIX_REDUCE_F){
317 :     if(cursize<sizeof(float)){
318 :     continue;
319 :     }
320 :     }
321 :     else if(operation_code == REDUCE_D || operation_code == PREFIX_REDUCE_D){
322 :     if(cursize<sizeof(double)){
323 :     continue;
324 :     }
325 :     }
326 :     else if(operation_code == REDUCE_LD || operation_code == PREFIX_REDUCE_LD){
327 :     if(cursize<sizeof(long double)){
328 :     continue;
329 :     }
330 :     }
331 :    
332 :     long int nbuckets;
333 :     niterations = niters(cursize);
334 :    
335 :     if(cache_invalidation == 1){
336 :     nbuckets=niterations;
337 :     }
338 :     else{
339 :     nbuckets=1;
340 :     }
341 :    
342 : dalvarez 14 if(allocate_arrays(operation_code,cursize,nbuckets) == -1) // Unsuccessful allocation
343 : dalvarez 1 continue;
344 :    
345 :     upc_barrier;
346 :    
347 :     minTime=MAXLONG;
348 :     maxTime=0L;
349 :     totalTime=0L;
350 :    
351 :     uint64_t tmax;
352 :    
353 :     for(iter=0;iter<niterations;iter++){
354 :    
355 :     /*
356 :     For this benchmark the array should be allocated per iteration
357 :     */
358 :     if(operation_code == FREE){
359 :     mem_alloc_tests_pointer = upc_all_alloc(THREADS,cursize);
360 :     UPCMEM_OK(mem_alloc_tests_pointer);
361 :     if(mem_is_ok == -1)
362 :     continue;
363 :     }
364 :    
365 :     upc_barrier;
366 :    
367 :     start = getTicks();
368 :     function(operation_code,cursize,(iter%nbuckets)*cursize);
369 :     upc_barrier;
370 :     end = getTicks() - start;
371 :    
372 : dalvarez 14 /*
373 :     For this benchmark the array should be freed per iteration
374 :     */
375 : dalvarez 1 if(operation_code == ALLALLOC){
376 :     UPCMEM_OK(mem_alloc_tests_pointer);
377 :     if(mem_is_ok == -1)
378 :     continue;
379 :     upc_barrier;
380 :     if(MYTHREAD == 0)
381 :     upc_free(mem_alloc_tests_pointer);
382 :     }
383 :    
384 : dalvarez 7 threads_times[MYTHREAD]=end;
385 : dalvarez 1 upc_barrier;
386 :     if (MYTHREAD == 0) {
387 :     int i;
388 :     tmax = 0L;
389 :     for(i=0;i<THREADS;i++) {
390 : dalvarez 7 if (threads_times[i]>tmax)
391 :     tmax=threads_times[i];
392 : dalvarez 1 }
393 :     }
394 :     end=tmax;
395 :    
396 :     totalTime+=end;
397 :    
398 :     //in order to avoid irregular latencies for short messages
399 :     if (end<minTime)
400 :     minTime = ((end<prevMinTime)&&(cursize<32*1024))?prevMinTime:end;
401 :     if (end>maxTime)
402 :     maxTime = end;
403 : dalvarez 15
404 :     if (time_limit_set == 1){
405 :     if (ticksToNS(totalTime)/(1000*1000*1000) >= timeLimit){
406 :     niterations = iter+1;
407 :     break;
408 :     }
409 :     }
410 :    
411 : dalvarez 1 }
412 :    
413 :     upc_barrier;
414 :    
415 :     free_arrays(operation_code);
416 :    
417 :     if(global_iter)
418 :     print_performance_data(operation_code,cursize,niterations,minTime,maxTime,totalTime);
419 :     prevMinTime = minTime;
420 :    
421 :     upc_barrier;
422 :    
423 :     if(operation_code == BARRIER){
424 :     break;
425 :     }
426 :    
427 :     }
428 :    
429 :     }//fi global_iter
430 :    
431 :     return;
432 :    
433 :     }
434 :    
435 :    
436 :    
437 :     /*
438 :     Call the corresponding function
439 :     */
440 :     void function(int operation_code, long int cursize,long int offset){
441 :    
442 : dalvarez 14 int aux_var = 0;
443 :    
444 : dalvarez 1 switch (operation_code) {
445 : dalvarez 14 case FORALL_R:
446 :     upc_forall(int i = offset; i < cursize*THREADS+offset; i++; &distArr[i]){
447 :     aux_var+=distArr[i];
448 :     }
449 :     /*
450 :     Some compilers with aggressive optimizations may drop the whole loop if they detect that
451 :     aux_var is not going to be used. Writing its value to an external variable may prevent this.
452 :     */
453 :     global_aux_var = aux_var;
454 :     break;
455 :     case FORALL_W:
456 :     upc_forall(int i = offset; i < cursize*THREADS+offset; i++; &distArr[i]){
457 :     distArr[i] = i;
458 :     }
459 :     break;
460 :     case FORALL_RW:
461 :     upc_forall(int i = offset; i < cursize*THREADS+offset; i++; &distArr[i]){
462 :     distArr[i] += i;
463 :     }
464 :     break;
465 :     case FOR_R:
466 :     if(MYTHREAD == 0){
467 :     for(int i = offset; i < cursize*THREADS+offset; i++){
468 :     aux_var+=distArr[i];
469 :     }
470 :     }
471 :     /*
472 :     Some compilers with aggressive optimizations may drop the whole loop if they detect that
473 :     aux_var is not going to be used. Writing its value to an external variable may prevent this.
474 :     */
475 :     global_aux_var = aux_var;
476 :     break;
477 :     case FOR_W:
478 :     if(MYTHREAD == 0){
479 :     for(int i = offset; i < cursize*THREADS+offset; i++){
480 :     distArr[i] = i;
481 :     }
482 :     }
483 :     break;
484 :     case FOR_RW:
485 :     if(MYTHREAD == 0){
486 :     for(int i = offset; i < cursize*THREADS+offset; i++){
487 :     distArr[i] += i;
488 :     }
489 :     }
490 :     break;
491 : dalvarez 1 case BROADCAST:
492 : dalvarez 14 upc_all_broadcast(&(distArr[THREADS*offset]),&(broadcastArr[offset]), cursize, sync_mode);
493 :     break;
494 : dalvarez 1 case SCATTER:
495 : dalvarez 14 upc_all_scatter(&(distArr[THREADS*offset]),&(scatterArr[THREADS*offset]), cursize, sync_mode);
496 :     break;
497 : dalvarez 1 case GATHER:
498 : dalvarez 14 upc_all_gather( &(gatherArr[THREADS*offset]),&(distArr[THREADS*offset]), cursize, sync_mode);
499 :     break;
500 : dalvarez 1 case GATHERALL:
501 : dalvarez 14 upc_all_gather_all( &(gatherallArr[THREADS*THREADS*offset]),&(distArr[THREADS*offset]), cursize, sync_mode);
502 :     break;
503 : dalvarez 1 case EXCHANGE:
504 : dalvarez 14 upc_all_exchange(&(exchangeArr[THREADS*THREADS*offset]), &(distArr[THREADS*THREADS*offset]), cursize, sync_mode );
505 :     break;
506 : dalvarez 1 case PERMUTE:
507 : dalvarez 14 upc_all_permute(&(permuteArr[THREADS*offset]), &(distArr[THREADS*offset]), perm, cursize, sync_mode );
508 :     break;
509 : dalvarez 1 case REDUCE_C:
510 : dalvarez 14 upc_all_reduceC((shared char *)reduceArr, (shared char*)&(distArr[THREADS*offset]),
511 :     reduce_op, (cursize/sizeof(char))*THREADS, cursize/sizeof(char), NULL, sync_mode );
512 :     break;
513 : dalvarez 1 case PREFIX_REDUCE_C:
514 : dalvarez 14 upc_all_prefix_reduceC((shared char *)&(distArr[THREADS*offset]), (shared char *)&(prefixReduceArr[THREADS*offset]),
515 :     reduce_op, (cursize/sizeof(char))*THREADS, cursize/sizeof(char), NULL, sync_mode );
516 :     break;
517 : dalvarez 1 case REDUCE_UC:
518 : dalvarez 14 upc_all_reduceUC((shared unsigned char *)reduceArr, (shared unsigned char *)&(distArr[THREADS*offset]),
519 :     reduce_op, (cursize/sizeof(unsigned char))*THREADS, cursize/sizeof(unsigned char), NULL, sync_mode );
520 :     break;
521 : dalvarez 1 case PREFIX_REDUCE_UC:
522 : dalvarez 14 upc_all_prefix_reduceUC((shared unsigned char *)&(distArr[THREADS*offset]), (shared unsigned char *)&(prefixReduceArr[THREADS*offset]),
523 :     reduce_op, (cursize/sizeof(unsigned char))*THREADS, cursize/sizeof(unsigned char), NULL, sync_mode );
524 :     break;
525 : dalvarez 1 case REDUCE_S:
526 : dalvarez 14 upc_all_reduceS((shared short *)reduceArr, (shared short *)&(distArr[THREADS*offset]),
527 :     reduce_op, (cursize/sizeof(short))*THREADS, cursize/sizeof(short), NULL, sync_mode );
528 :     break;
529 : dalvarez 1 case PREFIX_REDUCE_S:
530 : dalvarez 14 upc_all_prefix_reduceS((shared short *)&(distArr[THREADS*offset]), (shared short *)&(prefixReduceArr[THREADS*offset]),
531 :     reduce_op, (cursize/sizeof(short))*THREADS, cursize/sizeof(short), NULL, sync_mode );
532 :     break;
533 : dalvarez 1 case REDUCE_US:
534 : dalvarez 14 upc_all_reduceUS((shared unsigned short *)reduceArr, (shared unsigned short *)&(distArr[THREADS*offset]),
535 :     reduce_op, (cursize/sizeof(unsigned short))*THREADS, cursize/sizeof(unsigned short), NULL, sync_mode );
536 :     break;
537 : dalvarez 1 case PREFIX_REDUCE_US:
538 : dalvarez 14 upc_all_prefix_reduceUS((shared unsigned short *)&(distArr[THREADS*offset]), (shared unsigned short *)&(prefixReduceArr[THREADS*offset]),
539 :     reduce_op, (cursize/sizeof(unsigned short))*THREADS, cursize/sizeof(unsigned short), NULL, sync_mode );
540 :     break;
541 : dalvarez 1 case REDUCE_I:
542 : dalvarez 14 upc_all_reduceI((shared int *)reduceArr, (shared int *)&(distArr[THREADS*offset]),
543 :     reduce_op, (cursize/sizeof(int))*THREADS, cursize/sizeof(int), NULL, sync_mode );
544 :     break;
545 : dalvarez 1 case PREFIX_REDUCE_I:
546 : dalvarez 14 upc_all_prefix_reduceI((shared int *)&(distArr[THREADS*offset]), (shared int *)&(prefixReduceArr[THREADS*offset]),
547 :     reduce_op, (cursize/sizeof(int))*THREADS, cursize/sizeof(int), NULL, sync_mode );
548 :     break;
549 : dalvarez 1 case REDUCE_UI:
550 : dalvarez 14 upc_all_reduceUI((shared unsigned int *)reduceArr, (shared unsigned int *)&(distArr[THREADS*offset]),
551 :     reduce_op, (cursize/sizeof(unsigned int))*THREADS, cursize/sizeof(unsigned int), NULL, sync_mode );
552 :     break;
553 : dalvarez 1 case PREFIX_REDUCE_UI:
554 : dalvarez 14 upc_all_prefix_reduceUI((shared unsigned int *)&(distArr[THREADS*offset]), (shared unsigned int *)&(prefixReduceArr[THREADS*offset]),
555 :     reduce_op, (cursize/sizeof(unsigned int))*THREADS, cursize/sizeof(unsigned int), NULL, sync_mode );
556 :     break;
557 : dalvarez 1 case REDUCE_L:
558 : dalvarez 14 upc_all_reduceL((shared long *)reduceArr, (shared long *)&(distArr[THREADS*offset]),
559 :     reduce_op, (cursize/sizeof(long))*THREADS, cursize/sizeof(long), NULL, sync_mode );
560 :     break;
561 : dalvarez 1 case PREFIX_REDUCE_L:
562 : dalvarez 14 upc_all_prefix_reduceL((shared long *)&(distArr[THREADS*offset]), (shared long *)&(prefixReduceArr[THREADS*offset]),
563 :     reduce_op, (cursize/sizeof(long))*THREADS, cursize/sizeof(long), NULL, sync_mode );
564 :     break;
565 : dalvarez 1 case REDUCE_UL:
566 : dalvarez 14 upc_all_reduceUL((shared unsigned long *)reduceArr, (shared unsigned long *)&(distArr[THREADS*offset]),
567 :     reduce_op, (cursize/sizeof(unsigned long))*THREADS, cursize/sizeof(unsigned long), NULL, sync_mode );
568 :     break;
569 : dalvarez 1 case PREFIX_REDUCE_UL:
570 : dalvarez 14 upc_all_prefix_reduceUL((shared unsigned long *)&(distArr[THREADS*offset]), (shared unsigned long *)&(prefixReduceArr[THREADS*offset]),
571 :     reduce_op, (cursize/sizeof(unsigned long))*THREADS, cursize/sizeof(unsigned long), NULL, sync_mode );
572 :     break;
573 : dalvarez 1 case REDUCE_F:
574 : dalvarez 14 upc_all_reduceF((shared float *)reduceArr, (shared float *)&(distArr[THREADS*offset]),
575 :     reduce_op, (cursize/sizeof(float))*THREADS, cursize/sizeof(float), NULL, sync_mode );
576 :     break;
577 : dalvarez 1 case PREFIX_REDUCE_F:
578 : dalvarez 14 upc_all_prefix_reduceF((shared float *)&(distArr[THREADS*offset]), (shared float *)&(prefixReduceArr[THREADS*offset]),
579 :     reduce_op, (cursize/sizeof(float))*THREADS, cursize/sizeof(float), NULL, sync_mode );
580 :     break;
581 : dalvarez 1 case REDUCE_D:
582 : dalvarez 14 upc_all_reduceD((shared double *)reduceArr, (shared double *)&(distArr[THREADS*offset]),
583 :     reduce_op, (cursize/sizeof(double))*THREADS, cursize/sizeof(double), NULL, sync_mode );
584 :     break;
585 : dalvarez 1 case PREFIX_REDUCE_D:
586 : dalvarez 14 upc_all_prefix_reduceD((shared double *)&(distArr[THREADS*offset]), (shared double *)&(prefixReduceArr[THREADS*offset]),
587 :     reduce_op, (cursize/sizeof(double))*THREADS, cursize/sizeof(double), NULL, sync_mode );
588 :     break;
589 : dalvarez 1 case REDUCE_LD:
590 : dalvarez 14 upc_all_reduceLD((shared long double *)reduceArr, (shared long double *)&(distArr[THREADS*offset]),
591 :     reduce_op, (cursize/sizeof(long double))*THREADS, cursize/sizeof(long double), NULL, sync_mode );
592 :     break;
593 : dalvarez 1 case PREFIX_REDUCE_LD:
594 : dalvarez 14 upc_all_prefix_reduceLD((shared long double *)&(distArr[THREADS*offset]),(shared long double *)&(prefixReduceArr[THREADS*offset]),
595 :     reduce_op, (cursize/sizeof(long double))*THREADS, cursize/sizeof(long double), NULL, sync_mode );
596 :     break;
597 : dalvarez 1 case BARRIER:
598 : dalvarez 14 upc_barrier;
599 :     break;
600 : dalvarez 1 case MEMGET:
601 : dalvarez 14 if (!MYTHREAD)
602 :     upc_memget((p2pLocalArr+offset),&(p2pDistArr[1+THREADS*offset]),cursize);
603 :     break;
604 : dalvarez 1 case LMEMGET:
605 : dalvarez 14 if (!MYTHREAD)
606 :     upc_memget((p2pLocalArr+offset),&(p2pDistArr[THREADS*offset]),cursize);
607 :     break;
608 :     #ifdef ASYNC_MEM_TEST
609 : dalvarez 1 case AMEMGET:
610 : dalvarez 14 if (!MYTHREAD){
611 :     upc_handle_t handler = upc_memget_async((p2pLocalArr+offset),&(p2pDistArr[1+THREADS*offset]),cursize);
612 :     upc_waitsync(handler);
613 :     }
614 :     break;
615 : dalvarez 7 case ALMEMGET:
616 : dalvarez 14 if (!MYTHREAD){
617 :     upc_handle_t handler = upc_memget_async((p2pLocalArr+offset),&(p2pDistArr[THREADS*offset]),cursize);
618 :     upc_waitsync(handler);
619 :     }
620 :     break;
621 :     #endif
622 :     #ifdef ASYNCI_MEM_TEST
623 : dalvarez 7 case AIMEMGET:
624 : dalvarez 14 if (!MYTHREAD){
625 :     upc_memget_asynci((p2pLocalArr+offset),&(p2pDistArr[1+THREADS*offset]),cursize);
626 :     upc_waitsynci();
627 :     }
628 :     break;
629 : dalvarez 7 case AILMEMGET:
630 : dalvarez 14 if (!MYTHREAD){
631 :     upc_memget_asynci((p2pLocalArr+offset),&(p2pDistArr[THREADS*offset]),cursize);
632 :     upc_waitsynci();
633 :     }
634 :     break;
635 :     #endif
636 : dalvarez 1 case MEMPUT:
637 : dalvarez 14 if (!MYTHREAD)
638 :     upc_memput(&(p2pDistArr[1+THREADS*offset]),p2pLocalArr+offset,cursize);
639 :     break;
640 : dalvarez 1 case LMEMPUT:
641 : dalvarez 14 if (!MYTHREAD)
642 :     upc_memput(&(p2pDistArr[THREADS*offset]),p2pLocalArr+offset,cursize);
643 :     break;
644 :     #ifdef ASYNC_MEM_TEST
645 : dalvarez 1 case AMEMPUT:
646 : dalvarez 14 if (!MYTHREAD){
647 :     upc_handle_t handler = upc_memput_async(&(p2pDistArr[1+THREADS*offset]),p2pLocalArr+offset,cursize);
648 :     upc_waitsync(handler);
649 :     }
650 :     break;
651 : dalvarez 7 case ALMEMPUT:
652 : dalvarez 14 if (!MYTHREAD){
653 :     upc_handle_t handler = upc_memput_async(&(p2pDistArr[THREADS*offset]),p2pLocalArr+offset,cursize);
654 :     upc_waitsync(handler);
655 :     }
656 :     break;
657 :     #endif
658 :     #ifdef ASYNCI_MEM_TEST
659 : dalvarez 7 case AIMEMPUT:
660 : dalvarez 14 if (!MYTHREAD){
661 :     upc_memput_asynci(&(p2pDistArr[1+THREADS*offset]),p2pLocalArr+offset,cursize);
662 :     upc_waitsynci();
663 :     }
664 :     break;
665 : dalvarez 7 case AILMEMPUT:
666 : dalvarez 14 if (!MYTHREAD){
667 :     upc_memput_asynci(&(p2pDistArr[THREADS*offset]),p2pLocalArr+offset,cursize);
668 :     upc_waitsynci();
669 :     }
670 :     break;
671 :     #endif
672 : dalvarez 1 case MEMCPY:
673 : dalvarez 14 if (!MYTHREAD)
674 :     upc_memcpy(&(p2pDistArr[1+THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
675 :     break;
676 : dalvarez 1 case LMEMCPY:
677 : dalvarez 14 if (!MYTHREAD)
678 :     upc_memcpy(&(p2pDistArr[THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
679 :     break;
680 :     #ifdef ASYNC_MEM_TEST
681 : dalvarez 1 case AMEMCPY:
682 : dalvarez 14 if (!MYTHREAD){
683 :     upc_handle_t handler = upc_memcpy_async(&(p2pDistArr[1+THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
684 :     upc_waitsync(handler);
685 :     }
686 :     break;
687 : dalvarez 7 case ALMEMCPY:
688 : dalvarez 14 if (!MYTHREAD){
689 :     upc_handle_t handler = upc_memcpy_async(&(p2pDistArr[THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
690 :     upc_waitsync(handler);
691 :     }
692 :     break;
693 :     #endif
694 :     #ifdef ASYNCI_MEM_TEST
695 : dalvarez 7 case AIMEMCPY:
696 : dalvarez 14 if (!MYTHREAD){
697 :     upc_memcpy_asynci(&(p2pDistArr[1+THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
698 :     upc_waitsynci();
699 :     }
700 :     break;
701 : dalvarez 7 case AILMEMCPY:
702 : dalvarez 14 if (!MYTHREAD){
703 :     upc_memcpy_asynci(&(p2pDistArr[THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize);
704 :     upc_waitsynci();
705 :     }
706 :     break;
707 :     #endif
708 : dalvarez 1 case SMEMCPY:
709 : dalvarez 14 if (!MYTHREAD)
710 :     memcpy(p2pLocalArr2+offset,p2pLocalArr+offset,cursize);
711 :     break;
712 : dalvarez 1 case MEMMOVE:
713 : dalvarez 14 if (!MYTHREAD)
714 :     memmove(p2pLocalArr2+offset,p2pLocalArr+offset,cursize);
715 :     break;
716 : dalvarez 1 case ALLALLOC:
717 : dalvarez 14 mem_alloc_tests_pointer = upc_all_alloc(THREADS,cursize);
718 :     break;
719 : dalvarez 1 case FREE:
720 : dalvarez 14 if(!MYTHREAD)
721 :     upc_free(mem_alloc_tests_pointer);
722 :     break;
723 : dalvarez 1
724 :     default:
725 : dalvarez 14 break;
726 : dalvarez 1 }
727 :     return;
728 :     }
729 :    

root@forge.cesga.es
ViewVC Help
Powered by ViewVC 1.0.0  

Powered By FusionForge