1 |
/*****************************************************************************/ |
/******************************************************************************/ |
2 |
/* */ |
/* */ |
3 |
/* Copyright (c) 2008, 2009, 2010 */ |
/* Copyright (c) 2008, 2009, 2010 */ |
4 |
/* Computer Architecture Group (CAG) */ |
/* Computer Architecture Group (CAG) */ |
24 |
/* You should have received a copy of the GNU Lesser General Public License */ |
/* You should have received a copy of the GNU Lesser General Public License */ |
25 |
/* along with UOMS. If not, see <http://www.gnu.org/licenses/>. */ |
/* along with UOMS. If not, see <http://www.gnu.org/licenses/>. */ |
26 |
/* */ |
/* */ |
27 |
/*****************************************************************************/ |
/******************************************************************************/ |
28 |
|
|
29 |
/*****************************************************************************/ |
/******************************************************************************/ |
30 |
/* */ |
/* */ |
31 |
/* FUNDING: This development has been funded by Hewlett-Packard Spain */ |
/* FUNDING: This development has been funded by Hewlett-Packard Spain */ |
32 |
/* */ |
/* */ |
35 |
/* Subproject: */ |
/* Subproject: */ |
36 |
/* Improving UPC Usability and Performance in Constellation Systems: */ |
/* Improving UPC Usability and Performance in Constellation Systems: */ |
37 |
/* Implementation/Extensions of UPC Libraries. */ |
/* Implementation/Extensions of UPC Libraries. */ |
38 |
/* (UPCPUProject -> UPC Performance and Usability Project) */ |
/* (UPCPUProject -> UPC Performance and Usability Project) */ |
39 |
/* */ |
/* */ |
40 |
/*****************************************************************************/ |
/******************************************************************************/ |
41 |
|
|
42 |
/***************************************************************************** |
/****************************************************************************** |
43 |
|
|
44 |
For further documentation, see |
For further documentation, see |
45 |
|
|
46 |
[1] Files under doc/ |
[1] Files under doc/ |
47 |
|
|
48 |
******************************************************************************/ |
*******************************************************************************/ |
49 |
|
|
50 |
#include <sched.h> |
#include <sched.h> |
51 |
#include <stdio.h> |
#include <stdio.h> |
61 |
|
|
62 |
/* UPC */ |
/* UPC */ |
63 |
#include <upc.h> |
#include <upc.h> |
|
#include <upc_strict.h> |
|
64 |
#include <upc_collective.h> |
#include <upc_collective.h> |
65 |
|
|
66 |
/* Timers */ |
/* Timers */ |
131 |
|
|
132 |
shared double b;// for reduction result |
shared double b;// for reduction result |
133 |
shared int perm[THREADS]; //for permutation |
shared int perm[THREADS]; //for permutation |
134 |
shared long times[THREADS]; //for timing |
shared long threads_times[THREADS]; //for timing |
135 |
|
|
136 |
long *sizes; |
long *sizes; |
137 |
int num_sizes = 0; |
int num_sizes = 0; |
150 |
upc_flag_t sync_mode = UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC; |
upc_flag_t sync_mode = UPC_IN_ALLSYNC | UPC_OUT_ALLSYNC; |
151 |
char * char_sync_mode = "UPC_IN_ALLSYNC|UPC_OUT_ALLSYNC"; |
char * char_sync_mode = "UPC_IN_ALLSYNC|UPC_OUT_ALLSYNC"; |
152 |
|
|
153 |
|
int global_aux_var = 0; |
154 |
|
|
155 |
int main(int argc, char **argv) |
int main(int argc, char **argv) |
156 |
/* |
/* |
157 |
|
|
217 |
case ALMEMPUT: |
case ALMEMPUT: |
218 |
case AMEMPUT: |
case AMEMPUT: |
219 |
#endif |
#endif |
220 |
|
#ifdef ASYNCI_MEM_TEST |
221 |
|
case AILMEMCPY: |
222 |
|
case AIMEMCPY: |
223 |
|
case AILMEMGET: |
224 |
|
case AIMEMGET: |
225 |
|
case AILMEMPUT: |
226 |
|
case AIMEMPUT: |
227 |
|
#endif |
228 |
case SMEMCPY: |
case SMEMCPY: |
229 |
case MEMMOVE: |
case MEMMOVE: |
230 |
return 1; |
return 1; |
335 |
nbuckets=1; |
nbuckets=1; |
336 |
} |
} |
337 |
|
|
338 |
if(allocate_arrays(operation_code,cursize,nbuckets) == -1) // Unsuccesfully allocation |
if(allocate_arrays(operation_code,cursize,nbuckets) == -1) // Unsuccessful allocation |
339 |
continue; |
continue; |
340 |
|
|
341 |
upc_barrier; |
upc_barrier; |
365 |
upc_barrier; |
upc_barrier; |
366 |
end = getTicks() - start; |
end = getTicks() - start; |
367 |
|
|
368 |
|
/* |
369 |
|
For this benchmark the array should be freed per iteration |
370 |
|
*/ |
371 |
if(operation_code == ALLALLOC){ |
if(operation_code == ALLALLOC){ |
372 |
UPCMEM_OK(mem_alloc_tests_pointer); |
UPCMEM_OK(mem_alloc_tests_pointer); |
373 |
if(mem_is_ok == -1) |
if(mem_is_ok == -1) |
377 |
upc_free(mem_alloc_tests_pointer); |
upc_free(mem_alloc_tests_pointer); |
378 |
} |
} |
379 |
|
|
380 |
times[MYTHREAD]=end; |
threads_times[MYTHREAD]=end; |
381 |
upc_barrier; |
upc_barrier; |
382 |
if (MYTHREAD == 0) { |
if (MYTHREAD == 0) { |
383 |
int i; |
int i; |
384 |
tmax = 0L; |
tmax = 0L; |
385 |
for(i=0;i<THREADS;i++) { |
for(i=0;i<THREADS;i++) { |
386 |
if (times[i]>tmax) |
if (threads_times[i]>tmax) |
387 |
tmax=times[i]; |
tmax=threads_times[i]; |
388 |
} |
} |
389 |
} |
} |
390 |
end=tmax; |
end=tmax; |
427 |
*/ |
*/ |
428 |
void function(int operation_code, long int cursize,long int offset){ |
void function(int operation_code, long int cursize,long int offset){ |
429 |
|
|
430 |
|
int aux_var = 0; |
431 |
|
|
432 |
switch (operation_code) { |
switch (operation_code) { |
433 |
|
case FORALL_R: |
434 |
|
upc_forall(int i = offset; i < cursize*THREADS+offset; i++; &distArr[i]){ |
435 |
|
aux_var+=distArr[i]; |
436 |
|
} |
437 |
|
/* |
438 |
|
Some compilers with aggressive optimizations may drop the whole loop if they detect that |
439 |
|
aux_var is not going to be used. Writing its value to an external variable may prevent this. |
440 |
|
*/ |
441 |
|
global_aux_var = aux_var; |
442 |
|
break; |
443 |
|
case FORALL_W: |
444 |
|
upc_forall(int i = offset; i < cursize*THREADS+offset; i++; &distArr[i]){ |
445 |
|
distArr[i] = i; |
446 |
|
} |
447 |
|
break; |
448 |
|
case FORALL_RW: |
449 |
|
upc_forall(int i = offset; i < cursize*THREADS+offset; i++; &distArr[i]){ |
450 |
|
distArr[i] += i; |
451 |
|
} |
452 |
|
break; |
453 |
|
case FOR_R: |
454 |
|
if(MYTHREAD == 0){ |
455 |
|
for(int i = offset; i < cursize*THREADS+offset; i++){ |
456 |
|
aux_var+=distArr[i]; |
457 |
|
} |
458 |
|
} |
459 |
|
/* |
460 |
|
Some compilers with aggressive optimizations may drop the whole loop if they detect that |
461 |
|
aux_var is not going to be used. Writing its value to an external variable may prevent this. |
462 |
|
*/ |
463 |
|
global_aux_var = aux_var; |
464 |
|
break; |
465 |
|
case FOR_W: |
466 |
|
if(MYTHREAD == 0){ |
467 |
|
for(int i = offset; i < cursize*THREADS+offset; i++){ |
468 |
|
distArr[i] = i; |
469 |
|
} |
470 |
|
} |
471 |
|
break; |
472 |
|
case FOR_RW: |
473 |
|
if(MYTHREAD == 0){ |
474 |
|
for(int i = offset; i < cursize*THREADS+offset; i++){ |
475 |
|
distArr[i] += i; |
476 |
|
} |
477 |
|
} |
478 |
|
break; |
479 |
case BROADCAST: |
case BROADCAST: |
480 |
upc_all_broadcast(&(distArr[THREADS*offset]),&(broadcastArr[offset]), cursize, sync_mode); |
upc_all_broadcast(&(distArr[THREADS*offset]),&(broadcastArr[offset]), cursize, sync_mode); |
481 |
break; |
break; |
596 |
#ifdef ASYNC_MEM_TEST |
#ifdef ASYNC_MEM_TEST |
597 |
case AMEMGET: |
case AMEMGET: |
598 |
if (!MYTHREAD){ |
if (!MYTHREAD){ |
599 |
|
upc_handle_t handler = upc_memget_async((p2pLocalArr+offset),&(p2pDistArr[1+THREADS*offset]),cursize); |
600 |
|
upc_waitsync(handler); |
601 |
|
} |
602 |
|
break; |
603 |
|
case ALMEMGET: |
604 |
|
if (!MYTHREAD){ |
605 |
|
upc_handle_t handler = upc_memget_async((p2pLocalArr+offset),&(p2pDistArr[THREADS*offset]),cursize); |
606 |
|
upc_waitsync(handler); |
607 |
|
} |
608 |
|
break; |
609 |
|
#endif |
610 |
|
#ifdef ASYNCI_MEM_TEST |
611 |
|
case AIMEMGET: |
612 |
|
if (!MYTHREAD){ |
613 |
upc_memget_asynci((p2pLocalArr+offset),&(p2pDistArr[1+THREADS*offset]),cursize); |
upc_memget_asynci((p2pLocalArr+offset),&(p2pDistArr[1+THREADS*offset]),cursize); |
614 |
upc_waitsynci(); |
upc_waitsynci(); |
615 |
} |
} |
616 |
break; |
break; |
617 |
case ALMEMGET: |
case AILMEMGET: |
618 |
if (!MYTHREAD){ |
if (!MYTHREAD){ |
619 |
upc_memget_asynci((p2pLocalArr+offset),&(p2pDistArr[THREADS*offset]),cursize); |
upc_memget_asynci((p2pLocalArr+offset),&(p2pDistArr[THREADS*offset]),cursize); |
620 |
upc_waitsynci(); |
upc_waitsynci(); |
632 |
#ifdef ASYNC_MEM_TEST |
#ifdef ASYNC_MEM_TEST |
633 |
case AMEMPUT: |
case AMEMPUT: |
634 |
if (!MYTHREAD){ |
if (!MYTHREAD){ |
635 |
|
upc_handle_t handler = upc_memput_async(&(p2pDistArr[1+THREADS*offset]),p2pLocalArr+offset,cursize); |
636 |
|
upc_waitsync(handler); |
637 |
|
} |
638 |
|
break; |
639 |
|
case ALMEMPUT: |
640 |
|
if (!MYTHREAD){ |
641 |
|
upc_handle_t handler = upc_memput_async(&(p2pDistArr[THREADS*offset]),p2pLocalArr+offset,cursize); |
642 |
|
upc_waitsync(handler); |
643 |
|
} |
644 |
|
break; |
645 |
|
#endif |
646 |
|
#ifdef ASYNCI_MEM_TEST |
647 |
|
case AIMEMPUT: |
648 |
|
if (!MYTHREAD){ |
649 |
upc_memput_asynci(&(p2pDistArr[1+THREADS*offset]),p2pLocalArr+offset,cursize); |
upc_memput_asynci(&(p2pDistArr[1+THREADS*offset]),p2pLocalArr+offset,cursize); |
650 |
upc_waitsynci(); |
upc_waitsynci(); |
651 |
} |
} |
652 |
break; |
break; |
653 |
case ALMEMPUT: |
case AILMEMPUT: |
654 |
if (!MYTHREAD){ |
if (!MYTHREAD){ |
655 |
upc_memput_asynci(&(p2pDistArr[THREADS*offset]),p2pLocalArr+offset,cursize); |
upc_memput_asynci(&(p2pDistArr[THREADS*offset]),p2pLocalArr+offset,cursize); |
656 |
upc_waitsynci(); |
upc_waitsynci(); |
668 |
#ifdef ASYNC_MEM_TEST |
#ifdef ASYNC_MEM_TEST |
669 |
case AMEMCPY: |
case AMEMCPY: |
670 |
if (!MYTHREAD){ |
if (!MYTHREAD){ |
671 |
|
upc_handle_t handler = upc_memcpy_async(&(p2pDistArr[1+THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize); |
672 |
|
upc_waitsync(handler); |
673 |
|
} |
674 |
|
break; |
675 |
|
case ALMEMCPY: |
676 |
|
if (!MYTHREAD){ |
677 |
|
upc_handle_t handler = upc_memcpy_async(&(p2pDistArr[THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize); |
678 |
|
upc_waitsync(handler); |
679 |
|
} |
680 |
|
break; |
681 |
|
#endif |
682 |
|
#ifdef ASYNCI_MEM_TEST |
683 |
|
case AIMEMCPY: |
684 |
|
if (!MYTHREAD){ |
685 |
upc_memcpy_asynci(&(p2pDistArr[1+THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize); |
upc_memcpy_asynci(&(p2pDistArr[1+THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize); |
686 |
upc_waitsynci(); |
upc_waitsynci(); |
687 |
} |
} |
688 |
break; |
break; |
689 |
case ALMEMCPY: |
case AILMEMCPY: |
690 |
if (!MYTHREAD){ |
if (!MYTHREAD){ |
691 |
upc_memcpy_asynci(&(p2pDistArr[THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize); |
upc_memcpy_asynci(&(p2pDistArr[THREADS*offset]),&(p2pDistArr2[THREADS*offset]),cursize); |
692 |
upc_waitsynci(); |
upc_waitsynci(); |