/******************************************************************************/
/* */
/* Copyright (c) 2008, 2009, 2010 */
/* Computer Architecture Group (CAG) */
/* University of A Coruña, Spain */
/* (http://gac.des.udc.es) */
/* Galicia Supercomputing Center (CESGA) */
/* (http://www.cesga.es) */
/* Hewlett-Packard Spain (HP) */
/* (http://www.hp.es) */
/* */
/* This file is part of UPC Operations Microbenchmarking Suite (UOMS). */
/* */
/* UOMS is free software: you can redistribute it and/or modify */
/* it under the terms of the GNU Lesser General Public License as published */
/* by the Free Software Foundation, either version 3 of the License, or */
/* (at your option) any later version. */
/* */
/* UOMS is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
/* GNU Lesser General Public License for more details. */
/* */
/* You should have received a copy of the GNU Lesser General Public License */
/* along with UOMS. If not, see <http://www.gnu.org/licenses/>. */
/* */
/******************************************************************************/
/******************************************************************************/
/* */
/* FUNDING: This development has been funded by Hewlett-Packard Spain */
/* */
/* Project Name: */
/* UPCHACO (2008-2011) */
/* Subproject: */
/* Improving UPC Usability and Performance in Constellation Systems: */
/* Implementation/Extensions of UPC Libraries. */
/* (UPCPUProject -> UPC Performance and Usability Project) */
/* */
/******************************************************************************/
/******************************************************************************
For further documentation, see
[1] Files under doc/
*******************************************************************************/
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "headers.h"
#include "defines.h"
/*
Affinity related functions
*/
extern FILE *unit;
extern int minsize, maxsize;
void UOMS_coreaffinity_info(int core0, int core1, long total){
int i;
char *smask0;
char *smask1;
cpu_set_t mask0, mask1;
CPU_ZERO(&mask0);
CPU_ZERO(&mask1);
CPU_SET(core0 + 0*(core1-core0),&mask0);
CPU_SET(core0 + 1*(core1-core0),&mask1);
smask0 = malloc(sizeof(char)*(total+1));
smask1 = malloc(sizeof(char)*(total+1));
for(i=0;i<total;i++){
if(CPU_ISSET(i,&mask0))
smask0[i] = '1';
else
smask0[i] = '0';
if(CPU_ISSET(i,&mask1))
smask1[i] = '1';
else
smask1[i] = '0';
}
smask0[total] = '\0';
smask1[total] = '\0';
fprintf(unit,"\n");
fprintf(unit,"#---------------------------------------------------------\n");
fprintf(unit,"# using #cores = %d and %d (Number of cores per node: %ld)\n",core0,core1,total);
fprintf(unit,"# CPU Mask: %s (core %d), %s (core %d)\n",smask0,core0,smask1,core1);
fprintf(unit,"#---------------------------------------------------------\n");
fprintf(unit,"\n");
free(smask0);
free(smask1);
}
/*
Study the UPC performance fixing the affinity of the UPC THREADS
to specific cores
*/
void affinity_bench(int operation_code){
long num_cores;
cpu_set_t mask;
int cpu, core0, core1;
int i;
/*
Determine the number of cores
*/
#ifndef NUMCORES
num_cores = sysconf(_SC_NPROCESSORS_ONLN);
#else
num_cores = NUMCORES;
#endif
if(num_cores < 1){
if(!MYTHREAD)
printf("Unable to determine the correct number of cores\n");
return;
}
/*
This code will test communications using core 0 with power-of-2 cores
and core [num_cores/2] with power-of-2+num_cores/2 cores
This way it will perform a couple of redundant tests, but it will
also test all the important configurations from locality perspective
in NUMA machines
The second part of the code (core [num_cores/2] with
power-of-2+num_cores/2 cores) is only important when testing communications
between 2 nodes, being each one a NUMA machine with the network
interface attached to just one cell
*/
if(num_cores >= 4){
for(i = 1; i<=num_cores/2; i*=2){
//First part
core0 = 0;
core1 = i;
CPU_ZERO(&mask);
if (!MYTHREAD)
UOMS_coreaffinity_info(core0,core1,num_cores);
cpu = core0 + MYTHREAD*(core1-core0);
CPU_SET(cpu,&mask);
sched_setaffinity(0,sizeof(cpu_set_t),&mask);
bench(operation_code);
//Second part
core0 = num_cores/2;
core1 = core0 + i;
if(core1 >= num_cores)
continue;
CPU_ZERO(&mask);
if (!MYTHREAD)
UOMS_coreaffinity_info(core0,core1,num_cores);
cpu = core0 + MYTHREAD*(core1-core0);
CPU_SET(cpu,&mask);
sched_setaffinity(0,sizeof(cpu_set_t),&mask);
bench(operation_code);
}
}
else{
/*
For completition, this code test mono and dual-core machines
*/
core0 = 0;
if(num_cores < 2)
core1 = 0;
else
core1 = 1;
CPU_ZERO(&mask);
if (!MYTHREAD)
UOMS_coreaffinity_info(core0,core1,num_cores);
cpu = core0 + MYTHREAD*core1;
CPU_SET(cpu,&mask);
sched_setaffinity(0,sizeof(cpu_set_t),&mask);
bench(operation_code);
}
}