/*****************************************************************************/
/* */
/* Copyright (c) 2008, 2009, 2010 */
/* Computer Architecture Group (CAG) */
/* University of A Coruña, Spain */
/* (http://gac.des.udc.es) */
/* Galicia Supercomputing Center (CESGA) */
/* (http://www.cesga.es) */
/* Hewlett-Packard Spain (HP) */
/* (http://www.hp.es) */
/* */
/* This file is part of UPC Operations Microbenchmarking Suite (UOMS). */
/* */
/* UOMS is free software: you can redistribute it and/or modify */
/* it under the terms of the GNU Lesser General Public License as published */
/* by the Free Software Foundation, either version 3 of the License, or */
/* (at your option) any later version. */
/* */
/* UOMS is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
/* GNU Lesser General Public License for more details. */
/* */
/* You should have received a copy of the GNU Lesser General Public License */
/* along with UOMS. If not, see . */
/* */
/*****************************************************************************/
/*****************************************************************************/
/* */
/* FUNDING: This development has been funded by Hewlett-Packard Spain */
/* */
/* Project Name: */
/* UPCHACO (2008-2011) */
/* Subproject: */
/* Improving UPC Usability and Performance in Constellation Systems: */
/* Implementation/Extensions of UPC Libraries. */
/* (UPCPUProject -> UPC Performance and Usability Project) */
/* */
/*****************************************************************************/
/*****************************************************************************
For further documentation, see
[1] Files under doc/
******************************************************************************/
#include
#include
#include
#include "defines.h"
#include "headers.h"
#include "timers/timers.h"
extern FILE* unit;
extern int cache_invalidation;
extern int warmup;
extern char * char_reduce_op;
extern char * char_sync_mode;
extern int num_sizes;
extern long *sizes;
extern char * valid_bms[NUM_BMS];
extern int num_bms;
extern int *bm_list;
/*
Gets the aggreated bandwidth factor
*/
double fAggreatedBw(int operation_code){
double factor=1.0;
switch (operation_code) {
case BROADCAST:
factor = THREADS;
break;
case EXCHANGE:
factor = THREADS*THREADS;
break;
case GATHERALL:
factor = THREADS+THREADS*THREADS;
break;
case SCATTER:
factor = THREADS;
break;
case GATHER:
factor = THREADS;
break;
case PERMUTE:
factor = THREADS;
break;
default:
factor = 1.0;
break;
}
return factor;
}
/*
Prints usage
*/
void print_usage(char *appname){
printf("\nUOMS usage:\n");
printf("[Your UPC runtime launch command] %s [-help | -version | [-off_cache] [-warmup] [-reduce_op OP] [-sync_mode MODE] [-msglen FILE | [-minsize SIZE] [-maxsize SIZE] ] [-input FILE] [output_file]]\n",appname);
printf("Where:\n");
printf("-help: Print this usage information and exits\n");
printf("-version: Print UOMS version and exits\n");
printf("-off_cache: Enable cache invalidation\n");
printf("-warmup: Enable a warmup iteration\n");
printf("-reduce_op OP: Choose the reduce operation to be performed by upc_all_reduce and upc_all_prefix_reduce\n");
printf("Valid operations are:\n");
printf(" - UPC_ADD (default)\n");
printf(" - UPC_MULT\n");
printf(" - UPC_AND\n");
printf(" - UPC_OR\n");
printf(" - UPC_XOR\n");
printf(" - UPC_LOGAND\n");
printf(" - UPC_LOGOR\n");
printf(" - UPC_MIN\n");
printf(" - UPC_MAX\n");
printf("-sync_mode MODE: Choose the synchronization mode for the collective operations\n");
printf("Valid modes are:\n");
printf(" - UPC_IN_ALLSYNC|UPC_OUT_ALLSYNC (default)\n");
printf(" - UPC_IN_ALLSYNC|UPC_OUT_MYSYNC\n");
printf(" - UPC_IN_ALLSYNC|UPC_OUT_NOSYNC\n");
printf(" - UPC_IN_MYSYNC|UPC_OUT_ALLSYNC\n");
printf(" - UPC_IN_MYSYNC|UPC_OUT_MYSYNC\n");
printf(" - UPC_IN_MYSYNC|UPC_OUT_NOSYNC\n");
printf(" - UPC_IN_NOSYNC|UPC_OUT_ALLSYNC\n");
printf(" - UPC_IN_NOSYNC|UPC_OUT_MYSYNC\n");
printf(" - UPC_IN_NOSYNC|UPC_OUT_NOSYNC\n");
printf("-msglen FILE: Read user defined block sizes from FILE (in bytes). If specified it will override -minsize and -maxsize\n");
printf("-minsize SIZE: Specifies the minimum block size (in bytes). Sizes will increase by a factor of 2\n");
printf("-maxsize SIZE: Specifies the maximum block size (in bytes)\n");
printf("-input FILE: Read user defined list of benchmarks to run from FILE\n");
printf("Valid benchmark names are:\n");
for(int i = 0; i < NUM_BMS; i++){
printf(" - %s\n",valid_bms[i]);
}
printf("output_file: Alternative output file (rather than stdout)\n\n");
}
/*
Prints performance data acording to the header type
*/
void print_performance_data(int operation_code, int cursize,int niterations,uint64_t minTime, uint64_t maxTime, uint64_t totalTime) {
if (operation_header(operation_code)==1) {
if (!MYTHREAD)
fprintf(unit,"%14d %13llu %13llu %10.2lf\n",
niterations,ticksToNS(minTime),ticksToNS(maxTime),(((double)ticksToNS(totalTime))/niterations));
}
else if (operation_header(operation_code)==2) {
if (!MYTHREAD)
fprintf(unit,"%13d %13d %13llu %13llu %12.2lf\n",
cursize,niterations,ticksToNS(minTime),ticksToNS(maxTime),(((double)ticksToNS(totalTime))/niterations));
}
else if (operation_header(operation_code)==3) {
if (!MYTHREAD) {
double opBw = (((double)fAggreatedBw(operation_code)*cursize)/((double) ticksToNS(minTime)/1000));
fprintf(unit,"%13d %13d %13llu %13llu %12.2lf %12.2lf \n",
cursize,niterations,ticksToNS(minTime),ticksToNS(maxTime),(((double)ticksToNS(totalTime))/niterations),opBw);
}
}
fflush(unit);
return;
}
/*
Prints UOMS version
*/
void print_version(){
printf("UPC Operations Microbenchmarking Suite (UOMS) version: %s\n",VERSION);
}
/*
Prints general benchmark info
*/
void UOMS_general_info(){
time_t T;
struct utsname info;
time(&T);
uname( &info );
fprintf(unit,"#---------------------------------------------------\n");
fprintf(unit,"# UPC Operations Microbenchmark Suite V%s \n",VERSION);
fprintf(unit,"#---------------------------------------------------\n");
fprintf(unit,"# Date : %s",asctime(localtime(&T)));
fprintf(unit,"# Machine : %s\n",info.machine);
fprintf(unit,"# System : %s\n",info.sysname);
fprintf(unit,"# Release : %s\n",info.release);
fprintf(unit,"\n");
if(cache_invalidation == 1)
fprintf(unit,"# Cache invalidation : Enabled\n");
else
fprintf(unit,"# Cache invalidation : Disabled\n");
fprintf(unit,"\n");
if(warmup == 1)
fprintf(unit,"# Warmup iteration : Enabled\n");
else
fprintf(unit,"# Warmup iteration : Disabled\n");
fprintf(unit,"\n");
fprintf(unit,"# Problem sizes:\n");
for(int i = 0; i < num_sizes; i++){
fprintf(unit,"# %ld\n",sizes[i]);
}
fprintf(unit,"\n");
fprintf(unit,"# Synchronization mode : %s \n",char_sync_mode);
fprintf(unit,"\n");
fprintf(unit,"# Reduce Op : %s \n",char_reduce_op);
fprintf(unit,"\n");
fprintf(unit,"\n");
fprintf(unit,"# List of Benchmarks to run:\n");
fprintf(unit,"\n");
for(int i = 0; i < num_bms; i++){
fprintf(unit,"# %s\n",valid_bms[bm_list[i]]);
}
fprintf(unit,"\n");
}
void UOMS_function_info(int operation,int nthreads,int headertype){
char *funcname;
switch (operation) {
case BROADCAST:
case SCATTER:
case GATHER:
case GATHERALL:
case EXCHANGE:
case PERMUTE:
case BARRIER:
case MEMGET:
case MEMPUT:
case MEMCPY:
case LMEMGET:
case LMEMPUT:
case LMEMCPY:
case SMEMCPY:
case MEMMOVE:
case ALLALLOC:
case FREE:
case REDUCE_C:
case PREFIX_REDUCE_C:
case REDUCE_UC:
case PREFIX_REDUCE_UC:
case REDUCE_S:
case PREFIX_REDUCE_S:
case REDUCE_US:
case PREFIX_REDUCE_US:
case REDUCE_I:
case PREFIX_REDUCE_I:
case REDUCE_UI:
case PREFIX_REDUCE_UI:
case REDUCE_L:
case PREFIX_REDUCE_L:
case REDUCE_UL:
case PREFIX_REDUCE_UL:
case REDUCE_F:
case PREFIX_REDUCE_F:
case REDUCE_D:
case PREFIX_REDUCE_D:
case REDUCE_LD:
case PREFIX_REDUCE_LD:
#ifdef ASYNC_MEM_TEST
case AMEMGET:
case AMEMPUT:
case AMEMCPY:
case ALMEMGET:
case ALMEMPUT:
case ALMEMCPY:
#endif
#ifdef ASYNCI_MEM_TEST
case AIMEMGET:
case AIMEMPUT:
case AIMEMCPY:
case AILMEMGET:
case AILMEMPUT:
case AILMEMCPY:
#endif
funcname = valid_bms[operation];
break;
default: funcname = "Not yet defined";
}
fprintf(unit,"\n");
fprintf(unit,"#---------------------------------------------------\n");
fprintf(unit,"# Benchmarking %s \n",funcname);
fprintf(unit,"# #processes = %d \n",nthreads);
fprintf(unit,"#---------------------------------------------------\n");
if (headertype==1)
fprintf(unit," #repetitions t_min[nsec] t_max[nsec] t_avg[nsec]\n");
else if (headertype==2)
fprintf(unit," #bytes #repetitions t_min[nsec] t_max[nsec] t_avg[nsec]\n");
else if (headertype==3)
fprintf(unit," #bytes #repetitions t_min[nsec] t_max[nsec] t_avg[nsec] Bw_aggregated[MB/sec]\n");
}