/******************************************************************************/ /* */ /* Copyright (c) 2008, 2009, 2010 */ /* Computer Architecture Group (CAG) */ /* University of A Coruña, Spain */ /* (http://gac.des.udc.es) */ /* Galicia Supercomputing Center (CESGA) */ /* (http://www.cesga.es) */ /* Hewlett-Packard Spain (HP) */ /* (http://www.hp.es) */ /* */ /* This file is part of UPC Operations Microbenchmarking Suite (UOMS). */ /* */ /* UOMS is free software: you can redistribute it and/or modify */ /* it under the terms of the GNU Lesser General Public License as published */ /* by the Free Software Foundation, either version 3 of the License, or */ /* (at your option) any later version. */ /* */ /* UOMS is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU Lesser General Public License for more details. */ /* */ /* You should have received a copy of the GNU Lesser General Public License */ /* along with UOMS. If not, see . */ /* */ /******************************************************************************/ /******************************************************************************/ /* */ /* FUNDING: This development has been funded by Hewlett-Packard Spain */ /* */ /* Project Name: */ /* UPCHACO (2008-2011) */ /* Subproject: */ /* Improving UPC Usability and Performance in Constellation Systems: */ /* Implementation/Extensions of UPC Libraries. */ /* (UPCPUProject -> UPC Performance and Usability Project) */ /* */ /******************************************************************************/ /****************************************************************************** For further documentation, see [1] Files under doc/ *******************************************************************************/ #include #include #include #include "defines.h" #include "headers.h" #include "timers/timers.h" extern FILE* unit; extern int cache_invalidation; extern int warmup; extern char * char_reduce_op; extern char * char_sync_mode; extern int num_sizes; extern long *sizes; extern char * valid_bms[NUM_BMS]; extern int num_bms; extern int *bm_list; extern int *bm_list; extern uint64_t timeLimit; extern int time_limit_set; /* Gets the aggreated bandwidth factor */ double fAggreatedBw(int operation_code){ double factor=1.0; switch (operation_code) { case FORALL_R: case FORALL_W: case FORALL_RW: case FOR_R: case FOR_W: case FOR_RW: case BROADCAST: case SCATTER: case GATHER: case PERMUTE: factor = THREADS; break; case EXCHANGE: factor = THREADS*THREADS; break; case GATHERALL: factor = THREADS+THREADS*THREADS; break; default: factor = 1.0; break; } return factor; } /* Prints usage */ void print_usage(char *appname){ printf("\nUOMS usage:\n"); printf("[Your UPC runtime launch command] %s [-help | -version | [-off_cache] [-warmup] [-reduce_op OP] [-sync_mode MODE] [-msglen FILE | [-minsize SIZE] [-maxsize SIZE] ] [-time SECONDS] [-input FILE] [output_file]]\n",appname); printf("Where:\n"); printf("-help: Print this usage information and exits\n"); printf("-version: Print UOMS version and exits\n"); printf("-off_cache: Enable cache invalidation\n"); printf("-warmup: Enable a warmup iteration\n"); printf("-reduce_op OP: Choose the reduce operation to be performed by upc_all_reduce and upc_all_prefix_reduce\n"); printf("Valid operations are:\n"); printf(" - UPC_ADD (default)\n"); printf(" - UPC_MULT\n"); printf(" - UPC_AND\n"); printf(" - UPC_OR\n"); printf(" - UPC_XOR\n"); printf(" - UPC_LOGAND\n"); printf(" - UPC_LOGOR\n"); printf(" - UPC_MIN\n"); printf(" - UPC_MAX\n"); printf("-sync_mode MODE: Choose the synchronization mode for the collective operations\n"); printf("Valid modes are:\n"); printf(" - UPC_IN_ALLSYNC|UPC_OUT_ALLSYNC (default)\n"); printf(" - UPC_IN_ALLSYNC|UPC_OUT_MYSYNC\n"); printf(" - UPC_IN_ALLSYNC|UPC_OUT_NOSYNC\n"); printf(" - UPC_IN_MYSYNC|UPC_OUT_ALLSYNC\n"); printf(" - UPC_IN_MYSYNC|UPC_OUT_MYSYNC\n"); printf(" - UPC_IN_MYSYNC|UPC_OUT_NOSYNC\n"); printf(" - UPC_IN_NOSYNC|UPC_OUT_ALLSYNC\n"); printf(" - UPC_IN_NOSYNC|UPC_OUT_MYSYNC\n"); printf(" - UPC_IN_NOSYNC|UPC_OUT_NOSYNC\n"); printf("-msglen FILE: Read user defined block sizes from FILE (in bytes). If specified it will override -minsize and -maxsize\n"); printf("-minsize SIZE: Specifies the minimum block size (in bytes). Sizes will increase by a factor of 2. Default is 4\n"); printf("-maxsize SIZE: Specifies the maximum block size (in bytes). Default is 16MB\n"); printf("-time SECONDS: Specifies the maximum run time in seconds for each block size. Disabled by default. Important: this setting will not interrupt an ongoing operation\n"); printf("-input FILE: Read user defined list of benchmarks to run from FILE\n"); printf("Valid benchmark names are:\n"); for(int i = 0; i < NUM_BMS; i++){ printf(" - %s\n",valid_bms[i]); } printf("output_file: Alternative output file (rather than stdout)\n\n"); } /* Prints performance data acording to the header type */ void print_performance_data(int operation_code, int cursize,int niterations,uint64_t minTime, uint64_t maxTime, uint64_t totalTime) { if (operation_header(operation_code)==1) { if (!MYTHREAD) fprintf(unit,"%14d %13llu %13llu %10.2lf\n", niterations,ticksToNS(minTime),ticksToNS(maxTime),(((double)ticksToNS(totalTime))/niterations)); } else if (operation_header(operation_code)==2) { if (!MYTHREAD) fprintf(unit,"%13d %13d %13llu %13llu %12.2lf\n", cursize,niterations,ticksToNS(minTime),ticksToNS(maxTime),(((double)ticksToNS(totalTime))/niterations)); } else if (operation_header(operation_code)==3) { if (!MYTHREAD) { double opBw = (((double)fAggreatedBw(operation_code)*cursize)/((double) ticksToNS(minTime)/1000)); fprintf(unit,"%13d %13d %13llu %13llu %12.2lf %12.2lf \n", cursize,niterations,ticksToNS(minTime),ticksToNS(maxTime),(((double)ticksToNS(totalTime))/niterations),opBw); } } fflush(unit); return; } /* Prints UOMS version */ void print_version(){ printf("UPC Operations Microbenchmarking Suite (UOMS) version: %s\n",VERSION); } /* Prints general benchmark info */ void UOMS_general_info(){ time_t T; struct utsname info; time(&T); uname( &info ); fprintf(unit,"#-----------------------------------------------------\n"); fprintf(unit,"# UPC Operations Microbenchmark Suite V%s \n",VERSION); fprintf(unit,"#-----------------------------------------------------\n"); fprintf(unit,"# Date : %s",asctime(localtime(&T))); fprintf(unit,"# Machine : %s\n",info.machine); fprintf(unit,"# System : %s\n",info.sysname); fprintf(unit,"# Release : %s\n",info.release); fprintf(unit,"\n"); if(cache_invalidation == 1) fprintf(unit,"# Cache invalidation : Enabled\n"); else fprintf(unit,"# Cache invalidation : Disabled\n"); fprintf(unit,"\n"); if(warmup == 1) fprintf(unit,"# Warmup iteration : Enabled\n"); else fprintf(unit,"# Warmup iteration : Disabled\n"); fprintf(unit,"\n"); if(time_limit_set == 1){ fprintf(unit,"# Time limit per block size : %ld seconds\n",timeLimit); fprintf(unit,"\n"); } fprintf(unit,"# Problem sizes:\n"); for(int i = 0; i < num_sizes; i++){ fprintf(unit,"# %ld\n",sizes[i]); } fprintf(unit,"\n"); fprintf(unit,"# Synchronization mode : %s \n",char_sync_mode); fprintf(unit,"\n"); fprintf(unit,"# Reduce Op : %s \n",char_reduce_op); fprintf(unit,"\n"); fprintf(unit,"\n"); fprintf(unit,"# List of Benchmarks to run:\n"); fprintf(unit,"\n"); for(int i = 0; i < num_bms; i++){ fprintf(unit,"# %s\n",valid_bms[bm_list[i]]); } fprintf(unit,"\n"); } void UOMS_function_info(int operation,int nthreads,int headertype){ char *funcname; switch (operation) { case FORALL_R: case FORALL_W: case FORALL_RW: case FOR_R: case FOR_W: case FOR_RW: case BROADCAST: case SCATTER: case GATHER: case GATHERALL: case EXCHANGE: case PERMUTE: case BARRIER: case MEMGET: case MEMPUT: case MEMCPY: case LMEMGET: case LMEMPUT: case LMEMCPY: case SMEMCPY: case MEMMOVE: case ALLALLOC: case FREE: case REDUCE_C: case PREFIX_REDUCE_C: case REDUCE_UC: case PREFIX_REDUCE_UC: case REDUCE_S: case PREFIX_REDUCE_S: case REDUCE_US: case PREFIX_REDUCE_US: case REDUCE_I: case PREFIX_REDUCE_I: case REDUCE_UI: case PREFIX_REDUCE_UI: case REDUCE_L: case PREFIX_REDUCE_L: case REDUCE_UL: case PREFIX_REDUCE_UL: case REDUCE_F: case PREFIX_REDUCE_F: case REDUCE_D: case PREFIX_REDUCE_D: case REDUCE_LD: case PREFIX_REDUCE_LD: #ifdef ASYNC_MEM_TEST case AMEMGET: case AMEMPUT: case AMEMCPY: case ALMEMGET: case ALMEMPUT: case ALMEMCPY: #endif #ifdef ASYNCI_MEM_TEST case AIMEMGET: case AIMEMPUT: case AIMEMCPY: case AILMEMGET: case AILMEMPUT: case AILMEMCPY: #endif funcname = valid_bms[operation]; break; default: funcname = "Not yet defined"; } fprintf(unit,"\n"); fprintf(unit,"#---------------------------------------------------\n"); fprintf(unit,"# Benchmarking %s \n",funcname); fprintf(unit,"# #processes = %d \n",nthreads); fprintf(unit,"#---------------------------------------------------\n"); if (headertype==1) fprintf(unit," #repetitions t_min[nsec] t_max[nsec] t_avg[nsec]\n"); else if (headertype==2) fprintf(unit," #bytes #repetitions t_min[nsec] t_max[nsec] t_avg[nsec]\n"); else if (headertype==3) fprintf(unit," #bytes #repetitions t_min[nsec] t_max[nsec] t_avg[nsec] Bw_aggregated[MB/sec]\n"); }