1 : |
dalvarez |
14 |
/******************************************************************************/
|
2 : |
|
|
/* */
|
3 : |
|
|
/* Copyright (c) 2008, 2009, 2010 */
|
4 : |
|
|
/* Computer Architecture Group (CAG) */
|
5 : |
|
|
/* University of A Coruña, Spain */
|
6 : |
|
|
/* (http://gac.des.udc.es) */
|
7 : |
|
|
/* Galicia Supercomputing Center (CESGA) */
|
8 : |
|
|
/* (http://www.cesga.es) */
|
9 : |
|
|
/* Hewlett-Packard Spain (HP) */
|
10 : |
|
|
/* (http://www.hp.es) */
|
11 : |
|
|
/* */
|
12 : |
|
|
/* This file is part of UPC Operations Microbenchmarking Suite (UOMS). */
|
13 : |
|
|
/* */
|
14 : |
|
|
/* UOMS is free software: you can redistribute it and/or modify */
|
15 : |
|
|
/* it under the terms of the GNU Lesser General Public License as published */
|
16 : |
|
|
/* by the Free Software Foundation, either version 3 of the License, or */
|
17 : |
|
|
/* (at your option) any later version. */
|
18 : |
|
|
/* */
|
19 : |
|
|
/* UOMS is distributed in the hope that it will be useful, */
|
20 : |
|
|
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
|
21 : |
|
|
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
22 : |
|
|
/* GNU Lesser General Public License for more details. */
|
23 : |
|
|
/* */
|
24 : |
|
|
/* You should have received a copy of the GNU Lesser General Public License */
|
25 : |
|
|
/* along with UOMS. If not, see <http://www.gnu.org/licenses/>. */
|
26 : |
|
|
/* */
|
27 : |
|
|
/******************************************************************************/
|
28 : |
dalvarez |
1 |
|
29 : |
dalvarez |
14 |
/******************************************************************************/
|
30 : |
|
|
/* */
|
31 : |
|
|
/* FUNDING: This development has been funded by Hewlett-Packard Spain */
|
32 : |
|
|
/* */
|
33 : |
|
|
/* Project Name: */
|
34 : |
|
|
/* UPCHACO (2008-2011) */
|
35 : |
|
|
/* Subproject: */
|
36 : |
|
|
/* Improving UPC Usability and Performance in Constellation Systems: */
|
37 : |
|
|
/* Implementation/Extensions of UPC Libraries. */
|
38 : |
|
|
/* (UPCPUProject -> UPC Performance and Usability Project) */
|
39 : |
|
|
/* */
|
40 : |
|
|
/******************************************************************************/
|
41 : |
dalvarez |
1 |
|
42 : |
dalvarez |
14 |
/******************************************************************************
|
43 : |
dalvarez |
1 |
|
44 : |
|
|
For further documentation, see
|
45 : |
|
|
|
46 : |
|
|
[1] Files under doc/
|
47 : |
|
|
|
48 : |
dalvarez |
14 |
*******************************************************************************/
|
49 : |
dalvarez |
1 |
|
50 : |
|
|
#include <sched.h>
|
51 : |
|
|
#include <stdio.h>
|
52 : |
|
|
#include <stdlib.h>
|
53 : |
|
|
#include <unistd.h>
|
54 : |
|
|
#include "headers.h" |
55 : |
|
|
#include "defines.h" |
56 : |
|
|
|
57 : |
|
|
/*
|
58 : |
|
|
Affinity related functions
|
59 : |
|
|
*/
|
60 : |
|
|
|
61 : |
|
|
extern FILE *unit;
|
62 : |
|
|
extern int minsize, maxsize;
|
63 : |
|
|
|
64 : |
|
|
void UOMS_coreaffinity_info(int core0, int core1, long total){
|
65 : |
|
|
|
66 : |
|
|
int i;
|
67 : |
|
|
|
68 : |
|
|
char *smask0;
|
69 : |
|
|
char *smask1;
|
70 : |
|
|
cpu_set_t mask0, mask1;
|
71 : |
|
|
CPU_ZERO(&mask0);
|
72 : |
|
|
CPU_ZERO(&mask1);
|
73 : |
|
|
CPU_SET(core0 + 0*(core1-core0),&mask0);
|
74 : |
|
|
CPU_SET(core0 + 1*(core1-core0),&mask1);
|
75 : |
|
|
|
76 : |
|
|
smask0 = malloc(sizeof(char)*(total+1));
|
77 : |
|
|
smask1 = malloc(sizeof(char)*(total+1));
|
78 : |
|
|
|
79 : |
|
|
for(i=0;i<total;i++){
|
80 : |
|
|
if(CPU_ISSET(i,&mask0))
|
81 : |
|
|
smask0[i] = '1';
|
82 : |
|
|
else
|
83 : |
|
|
smask0[i] = '0';
|
84 : |
|
|
|
85 : |
|
|
if(CPU_ISSET(i,&mask1))
|
86 : |
|
|
smask1[i] = '1';
|
87 : |
|
|
else
|
88 : |
|
|
smask1[i] = '0';
|
89 : |
|
|
}
|
90 : |
|
|
smask0[total] = '\0';
|
91 : |
|
|
smask1[total] = '\0';
|
92 : |
|
|
|
93 : |
|
|
fprintf(unit,"\n");
|
94 : |
|
|
fprintf(unit,"#---------------------------------------------------------\n");
|
95 : |
|
|
fprintf(unit,"# using #cores = %d and %d (Number of cores per node: %ld)\n",core0,core1,total);
|
96 : |
|
|
fprintf(unit,"# CPU Mask: %s (core %d), %s (core %d)\n",smask0,core0,smask1,core1);
|
97 : |
|
|
fprintf(unit,"#---------------------------------------------------------\n");
|
98 : |
|
|
fprintf(unit,"\n");
|
99 : |
|
|
|
100 : |
|
|
free(smask0);
|
101 : |
|
|
free(smask1);
|
102 : |
|
|
}
|
103 : |
|
|
|
104 : |
|
|
/*
|
105 : |
|
|
Study the UPC performance fixing the affinity of the UPC THREADS
|
106 : |
|
|
to specific cores
|
107 : |
|
|
*/
|
108 : |
|
|
void affinity_bench(int operation_code){
|
109 : |
|
|
|
110 : |
|
|
long num_cores;
|
111 : |
|
|
cpu_set_t mask;
|
112 : |
|
|
int cpu, core0, core1;
|
113 : |
|
|
|
114 : |
|
|
int i;
|
115 : |
|
|
|
116 : |
|
|
/*
|
117 : |
|
|
Determine the number of cores
|
118 : |
|
|
*/
|
119 : |
|
|
#ifndef NUMCORES
|
120 : |
|
|
num_cores = sysconf(_SC_NPROCESSORS_ONLN);
|
121 : |
|
|
#else
|
122 : |
|
|
num_cores = NUMCORES;
|
123 : |
|
|
#endif
|
124 : |
|
|
if(num_cores < 1){
|
125 : |
|
|
if(!MYTHREAD)
|
126 : |
|
|
printf("Unable to determine the correct number of cores\n");
|
127 : |
|
|
return;
|
128 : |
|
|
}
|
129 : |
|
|
|
130 : |
|
|
/*
|
131 : |
|
|
This code will test communications using core 0 with power-of-2 cores
|
132 : |
|
|
and core [num_cores/2] with power-of-2+num_cores/2 cores
|
133 : |
|
|
|
134 : |
|
|
This way it will perform a couple of redundant tests, but it will
|
135 : |
|
|
also test all the important configurations from locality perspective
|
136 : |
|
|
in NUMA machines
|
137 : |
|
|
|
138 : |
|
|
The second part of the code (core [num_cores/2] with
|
139 : |
|
|
power-of-2+num_cores/2 cores) is only important when testing communications
|
140 : |
|
|
between 2 nodes, being each one a NUMA machine with the network
|
141 : |
|
|
interface attached to just one cell
|
142 : |
|
|
*/
|
143 : |
|
|
if(num_cores >= 4){
|
144 : |
|
|
for(i = 1; i<=num_cores/2; i*=2){
|
145 : |
|
|
//First part
|
146 : |
|
|
core0 = 0;
|
147 : |
|
|
core1 = i;
|
148 : |
|
|
CPU_ZERO(&mask);
|
149 : |
|
|
if (!MYTHREAD)
|
150 : |
|
|
UOMS_coreaffinity_info(core0,core1,num_cores);
|
151 : |
|
|
cpu = core0 + MYTHREAD*(core1-core0);
|
152 : |
|
|
CPU_SET(cpu,&mask);
|
153 : |
|
|
sched_setaffinity(0,sizeof(cpu_set_t),&mask);
|
154 : |
|
|
bench(operation_code);
|
155 : |
|
|
|
156 : |
|
|
//Second part
|
157 : |
|
|
core0 = num_cores/2;
|
158 : |
|
|
core1 = core0 + i;
|
159 : |
|
|
if(core1 >= num_cores)
|
160 : |
|
|
continue;
|
161 : |
|
|
CPU_ZERO(&mask);
|
162 : |
|
|
if (!MYTHREAD)
|
163 : |
|
|
UOMS_coreaffinity_info(core0,core1,num_cores);
|
164 : |
|
|
cpu = core0 + MYTHREAD*(core1-core0);
|
165 : |
|
|
CPU_SET(cpu,&mask);
|
166 : |
|
|
sched_setaffinity(0,sizeof(cpu_set_t),&mask);
|
167 : |
|
|
bench(operation_code);
|
168 : |
|
|
}
|
169 : |
|
|
}
|
170 : |
|
|
else{
|
171 : |
|
|
/*
|
172 : |
|
|
For completition, this code test mono and dual-core machines
|
173 : |
|
|
*/
|
174 : |
|
|
core0 = 0;
|
175 : |
|
|
if(num_cores < 2)
|
176 : |
|
|
core1 = 0;
|
177 : |
|
|
else
|
178 : |
|
|
core1 = 1;
|
179 : |
|
|
CPU_ZERO(&mask);
|
180 : |
|
|
if (!MYTHREAD)
|
181 : |
|
|
UOMS_coreaffinity_info(core0,core1,num_cores);
|
182 : |
|
|
cpu = core0 + MYTHREAD*core1;
|
183 : |
|
|
CPU_SET(cpu,&mask);
|
184 : |
|
|
sched_setaffinity(0,sizeof(cpu_set_t),&mask);
|
185 : |
|
|
bench(operation_code);
|
186 : |
|
|
}
|
187 : |
|
|
}
|