1 : |
agomez |
1 |
#include "netcommunication.h" |
2 : |
|
|
#include <iostream>
|
3 : |
|
|
|
4 : |
|
|
NetCommunication::NetCommunication(const CharPtrVector& funcNameArgs, int nh) {
|
5 : |
|
|
// pvmConst contains information about which tags and dataencoding to use
|
6 : |
|
|
pvmConst = new PVMConstants();
|
7 : |
|
|
nHostInn = nh;
|
8 : |
|
|
int i;
|
9 : |
|
|
if (funcNameArgs.Size() <= 0)
|
10 : |
|
|
{
|
11 : |
|
|
cerr << "Must have name of the function to start on slaves\n";
|
12 : |
|
|
exit(EXIT_FAILURE);
|
13 : |
|
|
}
|
14 : |
|
|
slaveProgram = new char[strlen(funcNameArgs[0]) + 1];
|
15 : |
|
|
strcpy(slaveProgram, funcNameArgs[0]);
|
16 : |
|
|
numarg = funcNameArgs.Size() - 1;
|
17 : |
|
|
slaveArguments = new char*[numarg + 1];
|
18 : |
|
|
for (i = 0; i < numarg; i++)
|
19 : |
|
|
{
|
20 : |
|
|
slaveArguments[i] = new char[strlen(funcNameArgs[i + 1]) + 1];
|
21 : |
|
|
strcpy(slaveArguments[i], funcNameArgs[i + 1]);
|
22 : |
|
|
}
|
23 : |
|
|
slaveArguments[numarg] = NULL;
|
24 : |
|
|
|
25 : |
|
|
numVar = -1;
|
26 : |
|
|
mytid = -1;
|
27 : |
|
|
nhost = 0;
|
28 : |
|
|
narch = 0;
|
29 : |
|
|
numProcesses = 0;
|
30 : |
|
|
numGoodProcesses = 0;
|
31 : |
|
|
NETSTARTED = 0;
|
32 : |
|
|
tids = NULL;
|
33 : |
|
|
status = NULL;
|
34 : |
|
|
ERROR = -1;
|
35 : |
|
|
SUCCESS = 1;
|
36 : |
|
|
|
37 : |
|
|
likelihoodHJ = 0.0;
|
38 : |
|
|
likelihoodSA = 0.0;
|
39 : |
|
|
likelihoodBFGS = 0.0;
|
40 : |
|
|
convergedSA = 0;
|
41 : |
|
|
convergedHJ = 0;
|
42 : |
|
|
convergedBFGS = 0;
|
43 : |
|
|
maxNumHosts = 500;
|
44 : |
|
|
}
|
45 : |
|
|
|
46 : |
|
|
NetCommunication::~NetCommunication()
|
47 : |
|
|
{
|
48 : |
|
|
int i;
|
49 : |
|
|
if (tids != NULL)
|
50 : |
|
|
delete[] tids;
|
51 : |
|
|
if (status != NULL)
|
52 : |
|
|
delete[] status;
|
53 : |
|
|
if (hostTids != NULL)
|
54 : |
|
|
delete[] hostTids;
|
55 : |
|
|
if (dataIDs != NULL)
|
56 : |
|
|
delete[] dataIDs;
|
57 : |
|
|
delete[] slaveProgram;
|
58 : |
|
|
for (i = 0; i < numarg; i++)
|
59 : |
|
|
delete[] slaveArguments[i];
|
60 : |
|
|
delete[] slaveArguments;
|
61 : |
|
|
if (NETSTARTED == 1)
|
62 : |
|
|
stopNetCommunication();
|
63 : |
|
|
delete pvmConst;
|
64 : |
|
|
}
|
65 : |
|
|
|
66 : |
|
|
// ********************************************************
|
67 : |
|
|
// Functions for starting and stopping netcommunication
|
68 : |
|
|
// ********************************************************
|
69 : |
|
|
int NetCommunication::startPVM()
|
70 : |
|
|
{
|
71 : |
|
|
/*
|
72 : |
|
|
Þetta fall er afgreitt í bili!
|
73 : |
|
|
ATH: Þarf e.t.v. að skoða parametrana í MPI_Init.
|
74 : |
|
|
*/
|
75 : |
|
|
int info;
|
76 : |
|
|
// Held að það sé í lagi að hafa bara NULL hér...
|
77 : |
|
|
MPI_Init(NULL, NULL);
|
78 : |
|
|
if (mytid < 0)
|
79 : |
|
|
{
|
80 : |
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &mytid);
|
81 : |
|
|
if (mytid < 0)
|
82 : |
|
|
{
|
83 : |
|
|
printErrorMsg("Error in netcommunication - MPI not started");
|
84 : |
|
|
return ERROR;
|
85 : |
|
|
}
|
86 : |
|
|
|
87 : |
|
|
int flag;
|
88 : |
|
|
flag = 0;
|
89 : |
|
|
// Checks whether MPI_Init has been called successfully.
|
90 : |
|
|
MPI_Initialized(&flag);
|
91 : |
|
|
if (!flag)
|
92 : |
|
|
{
|
93 : |
|
|
printErrorMsg("Error in netcommunication - MPI_Init has not been called!");
|
94 : |
|
|
}
|
95 : |
|
|
// nhost á að vera 1 hérna
|
96 : |
|
|
MPI_Comm_size(MPI_COMM_WORLD, &nhost);
|
97 : |
|
|
|
98 : |
|
|
tids = new int[maxNumHosts];
|
99 : |
|
|
status = new int[maxNumHosts];
|
100 : |
|
|
hostTids = new int[maxNumHosts]; //Added jongud
|
101 : |
|
|
dataIDs = new int[maxNumHosts]; //Added jongud
|
102 : |
|
|
|
103 : |
|
|
if (nHostInn > 0)
|
104 : |
|
|
nhost = nHostInn;
|
105 : |
|
|
}
|
106 : |
|
|
return 1;
|
107 : |
|
|
}
|
108 : |
|
|
|
109 : |
|
|
int NetCommunication::startNetCommunication()
|
110 : |
|
|
{
|
111 : |
|
|
/*
|
112 : |
|
|
Þetta fall er afgreitt í bili!
|
113 : |
|
|
*/
|
114 : |
|
|
int i, OK, info;
|
115 : |
|
|
if (NETSTARTED == 1 && mytid >= 0)
|
116 : |
|
|
{
|
117 : |
|
|
// have alredy enrolled in MPI and spawned program on slaves
|
118 : |
|
|
cerr << "Warning in netcommunication - already enrolled in MPI and running " << slaveProgram << " on slaves\n";
|
119 : |
|
|
return SUCCESS;
|
120 : |
|
|
}
|
121 : |
|
|
else
|
122 : |
|
|
{
|
123 : |
|
|
if (numVar <= 0)
|
124 : |
|
|
{
|
125 : |
|
|
cerr << "Error in netcommunication - number of variables must be positive\n";
|
126 : |
|
|
return ERROR;
|
127 : |
|
|
}
|
128 : |
|
|
OK = startPVM();
|
129 : |
|
|
int* errcodes = new int[nhost];
|
130 : |
|
|
if (OK == 1)
|
131 : |
|
|
{
|
132 : |
|
|
/*
|
133 : |
|
|
Í PVM var notað pvm_catchout(stdin) til að fá output úr
|
134 : |
|
|
child processunum, í MPI setur maður flag á eftir mpirun
|
135 : |
|
|
til að fá sambærilega hegðun, þá búast til skrár fyrir hvert
|
136 : |
|
|
process sem skrifa út allt sem kemur úr stdout og stderr í þeim.
|
137 : |
|
|
*/
|
138 : |
|
|
MPI_Comm_spawn(slaveProgram, slaveArguments, nhost, MPI_INFO_NULL,
|
139 : |
|
|
0, MPI_COMM_WORLD, &intercomm, errcodes);
|
140 : |
|
|
int tidsCounter;
|
141 : |
|
|
for (i = 0; i < nhost; i++)
|
142 : |
|
|
{
|
143 : |
|
|
tidsCounter = i;
|
144 : |
|
|
if(errcodes[i] == 0)
|
145 : |
|
|
{
|
146 : |
|
|
numProcesses++;
|
147 : |
|
|
numGoodProcesses++;
|
148 : |
|
|
status[i] = 1;
|
149 : |
|
|
tids[i]=i;
|
150 : |
|
|
}
|
151 : |
|
|
else
|
152 : |
|
|
{
|
153 : |
|
|
cerr << "Error in netcommunication - unable to spawn process\n";
|
154 : |
|
|
return ERROR;
|
155 : |
|
|
}
|
156 : |
|
|
}
|
157 : |
|
|
delete [] errcodes;
|
158 : |
|
|
}
|
159 : |
|
|
|
160 : |
|
|
/*
|
161 : |
|
|
Þýðandinn var eitthvað að kvarta hérna...
|
162 : |
|
|
for (i = 0; i < nhost; i++)
|
163 : |
|
|
{
|
164 : |
|
|
|
165 : |
|
|
//hosts to be monitored for deletion, suspension and resumption
|
166 : |
|
|
hostTids[i] = hostp[i].hi_tid;
|
167 : |
|
|
}
|
168 : |
|
|
|
169 : |
|
|
*/
|
170 : |
|
|
if (OK == 1)
|
171 : |
|
|
{
|
172 : |
|
|
// Have started slaveProgram slaveArguments on all nhost hosts.
|
173 : |
|
|
// send initial info to all slave processes
|
174 : |
|
|
OK = startProcesses();
|
175 : |
|
|
if (OK == 1)
|
176 : |
|
|
{
|
177 : |
|
|
NETSTARTED = 1;
|
178 : |
|
|
return SUCCESS;
|
179 : |
|
|
}
|
180 : |
|
|
else if (OK == -1)
|
181 : |
|
|
{
|
182 : |
|
|
return ERROR;
|
183 : |
|
|
}
|
184 : |
|
|
else
|
185 : |
|
|
{
|
186 : |
|
|
printErrorMsg("Error in netcommunication - unrecognised return value");
|
187 : |
|
|
stopNetCommunication();
|
188 : |
|
|
return ERROR;
|
189 : |
|
|
}
|
190 : |
|
|
|
191 : |
|
|
}
|
192 : |
|
|
else if (OK == 0)
|
193 : |
|
|
{
|
194 : |
|
|
stopNetCommunication();
|
195 : |
|
|
return ERROR;
|
196 : |
|
|
}
|
197 : |
|
|
else
|
198 : |
|
|
{
|
199 : |
|
|
printErrorMsg("Error in netcommunication - unrecognised return value");
|
200 : |
|
|
stopNetCommunication();
|
201 : |
|
|
return ERROR;
|
202 : |
|
|
}
|
203 : |
|
|
}
|
204 : |
|
|
}
|
205 : |
|
|
|
206 : |
|
|
void NetCommunication::stopNetCommunication()
|
207 : |
|
|
{
|
208 : |
|
|
/*
|
209 : |
|
|
Fínt í bili.
|
210 : |
|
|
*/
|
211 : |
|
|
int i, tid, info, numTasks;
|
212 : |
|
|
int stopparam = -1;
|
213 : |
|
|
|
214 : |
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &tid);
|
215 : |
|
|
if (tid >= 0)
|
216 : |
|
|
{
|
217 : |
|
|
for(int i=0; i<nhost; i++)
|
218 : |
|
|
{
|
219 : |
|
|
MPI_Send(&stopparam, 1, MPI_INT, i, pvmConst->getStopTag(), intercomm);
|
220 : |
|
|
}
|
221 : |
|
|
MPI_Finalize();
|
222 : |
|
|
}
|
223 : |
|
|
mytid = -1;
|
224 : |
|
|
NETSTARTED = 0;
|
225 : |
|
|
}
|
226 : |
|
|
|
227 : |
|
|
int NetCommunication::startProcesses()
|
228 : |
|
|
{
|
229 : |
|
|
/*
|
230 : |
|
|
Þetta fall er afgreitt í bili!
|
231 : |
|
|
*/
|
232 : |
|
|
//Send number of variables, group name and number of processes to spawned processes
|
233 : |
|
|
int cansend = 1;
|
234 : |
|
|
int i, info;
|
235 : |
|
|
|
236 : |
|
|
for (i = 0; i < nhost; i++)
|
237 : |
|
|
{
|
238 : |
|
|
// send initial message to all spawned processes
|
239 : |
|
|
cansend = sendInitialMessage(i);
|
240 : |
|
|
if (cansend == -1)
|
241 : |
|
|
{
|
242 : |
|
|
// Error occured in sending inital message to process with id = i
|
243 : |
|
|
printErrorMsg("Error in netcommunication - unable to send message");
|
244 : |
|
|
return ERROR;
|
245 : |
|
|
}
|
246 : |
|
|
else if (cansend == 0)
|
247 : |
|
|
{
|
248 : |
|
|
printErrorMsg("Error in netcommunication - unable to send message");
|
249 : |
|
|
status[i] = -1;
|
250 : |
|
|
return ERROR;
|
251 : |
|
|
}
|
252 : |
|
|
else if (cansend == 1)
|
253 : |
|
|
{
|
254 : |
|
|
status[i] = 1;
|
255 : |
|
|
}
|
256 : |
|
|
else
|
257 : |
|
|
{
|
258 : |
|
|
printErrorMsg("Error in netcommunication - unrecognised return value");
|
259 : |
|
|
stopNetCommunication();
|
260 : |
|
|
return ERROR;
|
261 : |
|
|
}
|
262 : |
|
|
}
|
263 : |
|
|
return SUCCESS;
|
264 : |
|
|
}
|
265 : |
|
|
|
266 : |
|
|
int NetCommunication::sendInitialMessage(int id)
|
267 : |
|
|
{
|
268 : |
|
|
int OK, info;
|
269 : |
|
|
|
270 : |
|
|
if (id < 0 || id >= nhost)
|
271 : |
|
|
{
|
272 : |
|
|
printErrorMsg("Error in netcommunication - invalid slave ID");
|
273 : |
|
|
return 0;
|
274 : |
|
|
}
|
275 : |
|
|
|
276 : |
|
|
// check if process with identity = id is up and running
|
277 : |
|
|
// spurning að sleppa þessu í bili, leyfum þessu að vera á meðan hitt er klárað.
|
278 : |
|
|
OK = checkProcess(id);
|
279 : |
|
|
if (OK == 1)
|
280 : |
|
|
{
|
281 : |
|
|
MPI_Comm parentcomm;
|
282 : |
|
|
MPI_Comm_get_parent( &parentcomm );
|
283 : |
|
|
if(parentcomm == MPI_COMM_NULL)
|
284 : |
|
|
{
|
285 : |
|
|
MPI_Send(&numVar, 1, MPI_INT, id, pvmConst->getStartTag(), intercomm);
|
286 : |
|
|
MPI_Send(&id, 1, MPI_INT, id, pvmConst->getStartTag(), intercomm);
|
287 : |
|
|
}
|
288 : |
|
|
else
|
289 : |
|
|
{
|
290 : |
|
|
printErrorMsg("Error in netcommunication - slave calling master send");
|
291 : |
|
|
stopNetCommunication();
|
292 : |
|
|
}
|
293 : |
|
|
return SUCCESS;
|
294 : |
|
|
|
295 : |
|
|
}
|
296 : |
|
|
else if (OK == -1)
|
297 : |
|
|
{
|
298 : |
|
|
printErrorMsg("Error in netcommunication - unable to check status");
|
299 : |
|
|
stopNetCommunication();
|
300 : |
|
|
return ERROR;
|
301 : |
|
|
}
|
302 : |
|
|
else if (OK == 0)
|
303 : |
|
|
{
|
304 : |
|
|
printErrorMsg("Error in netcommunication - unable to send initial message");
|
305 : |
|
|
return OK;
|
306 : |
|
|
}
|
307 : |
|
|
else
|
308 : |
|
|
{
|
309 : |
|
|
printErrorMsg("Error in netcommunication - unrecognised return value");
|
310 : |
|
|
stopNetCommunication();
|
311 : |
|
|
return ERROR;
|
312 : |
|
|
}
|
313 : |
|
|
}
|
314 : |
|
|
|
315 : |
|
|
int NetCommunication::checkProcess(int id) {
|
316 : |
|
|
/*
|
317 : |
|
|
Þetta fall er ekki að gera neitt, því það sendir adrei nein
|
318 : |
|
|
út með tagginu getTaskDiedTag()
|
319 : |
|
|
*/
|
320 : |
|
|
int info, bufID, recvTid, flag;
|
321 : |
|
|
MPI_Status stats, recvstats;
|
322 : |
|
|
assert(id >= 0);
|
323 : |
|
|
assert(id < numProcesses);
|
324 : |
|
|
|
325 : |
|
|
//bufID = pvm_probe(tids[id], pvmConst->getTaskDiedTag());
|
326 : |
|
|
|
327 : |
|
|
// Non-blocking probe which checks for a message with this tag, if there is no message then
|
328 : |
|
|
// flag is false, otherwise it is true, then something is maybe wrong with the process!
|
329 : |
|
|
|
330 : |
|
|
// ATH: Þetta flag mun líklega alltaf vera false... það tékkar bara strax og heldur svo
|
331 : |
|
|
// áfram, þetta virkar eins og pvm_probe, svo þetta ætti að vera í lagi hér.
|
332 : |
|
|
// Held samt að það sé irrelevant að vera með þetta checkprocess núna, það á allt að
|
333 : |
|
|
// vera í lagi...
|
334 : |
|
|
bufID = MPI_Iprobe(id, pvmConst->getTaskDiedTag(), intercomm, &flag, &stats);
|
335 : |
|
|
if (flag == true) {
|
336 : |
|
|
// message has arrived from tids[id] that has halted
|
337 : |
|
|
//info = pvm_recv(tids[id], pvmConst->getTaskDiedTag());
|
338 : |
|
|
|
339 : |
|
|
// Blocking receive-message for bookkeeping of status of the process.
|
340 : |
|
|
MPI_Recv(&recvTid, 1, MPI_INT, stats.MPI_SOURCE, pvmConst->getTaskDiedTag(), intercomm, &recvstats);
|
341 : |
|
|
//if (info < 0) {
|
342 : |
|
|
// printErrorMsg("Error in netcommunication - unable to check process");
|
343 : |
|
|
// return ERROR;
|
344 : |
|
|
//}
|
345 : |
|
|
|
346 : |
|
|
//info = pvm_upkint(&recvTid, 1, 1);
|
347 : |
|
|
//if (info < 0) {
|
348 : |
|
|
// printErrorMsg("Error in netcommunication - unable to check process");
|
349 : |
|
|
// return ERROR;
|
350 : |
|
|
//}
|
351 : |
|
|
|
352 : |
|
|
|
353 : |
|
|
if (recvTid != tids[id])
|
354 : |
|
|
return ERROR;
|
355 : |
|
|
|
356 : |
|
|
status[id] = -1;
|
357 : |
|
|
numGoodProcesses--;
|
358 : |
|
|
return 0;
|
359 : |
|
|
|
360 : |
|
|
}
|
361 : |
|
|
else
|
362 : |
|
|
{
|
363 : |
|
|
return SUCCESS;
|
364 : |
|
|
}
|
365 : |
|
|
}
|
366 : |
|
|
|
367 : |
|
|
void NetCommunication::checkProcesses() {
|
368 : |
|
|
/*
|
369 : |
|
|
Þetta fall er komið í bili.
|
370 : |
|
|
Þetta fall erl íka í raun óþarfi...
|
371 : |
|
|
*/
|
372 : |
|
|
int i, info, tidDown, flag;
|
373 : |
|
|
MPI_Request req;
|
374 : |
|
|
MPI_Status nonb;
|
375 : |
|
|
MPI_Irecv(&tidDown,1,MPI_INT,MPI_ANY_SOURCE,pvmConst->getTaskDiedTag(),intercomm,&req);
|
376 : |
|
|
MPI_Test(&req, &flag, &nonb);
|
377 : |
|
|
while (flag == true) {
|
378 : |
|
|
// got message that task is down, receive it
|
379 : |
|
|
i = 0;
|
380 : |
|
|
while ((tids[i] != tidDown) && (i < numProcesses))
|
381 : |
|
|
i++;
|
382 : |
|
|
|
383 : |
|
|
assert((i >= 0) && (i < numProcesses));
|
384 : |
|
|
status[i] = -1;
|
385 : |
|
|
numGoodProcesses--;
|
386 : |
|
|
MPI_Irecv (&tidDown,1,MPI_INT,MPI_ANY_SOURCE,pvmConst->getTaskDiedTag(),intercomm,&req);
|
387 : |
|
|
MPI_Test(&req, &flag, &nonb);
|
388 : |
|
|
}
|
389 : |
|
|
}
|
390 : |
|
|
|
391 : |
|
|
void NetCommunication::getHealthOfProcesses(int* procTids) {
|
392 : |
|
|
/*
|
393 : |
|
|
Þetta fall er afgreitt.
|
394 : |
|
|
|
395 : |
|
|
*/
|
396 : |
|
|
checkProcesses();
|
397 : |
|
|
int i;
|
398 : |
|
|
for (i = 0; i < numProcesses; i++)
|
399 : |
|
|
procTids[i] = status[i];
|
400 : |
|
|
}
|
401 : |
|
|
|
402 : |
|
|
// ********************************************************
|
403 : |
|
|
// Functions for sending and receiving messages
|
404 : |
|
|
// ********************************************************
|
405 : |
|
|
int NetCommunication::sendData(const ParameterVector& sendP) {
|
406 : |
|
|
/*
|
407 : |
|
|
Komið í bili, þarf samt að skoða MPI_PACK eða eitthvað álíka til að
|
408 : |
|
|
raða inn í buffer og senda strengina, gæti verið að maður þurfi þá að
|
409 : |
|
|
pakka int með sem er lengd char fylkisins. Það er samt bara kallað á þetta
|
410 : |
|
|
fall einu sinni í byrjun til að senda switches, svo að það ætti að vera í lagi.
|
411 : |
|
|
!!! ATH !!!
|
412 : |
|
|
Passa að allir sem kalla á þetta sendi communicator !!!
|
413 : |
|
|
!!!
|
414 : |
|
|
*/
|
415 : |
|
|
// must absolutely check if this is possible or can not delete
|
416 : |
|
|
// stringValue now.}}}}}}}}}}}}}x
|
417 : |
|
|
int i, info;
|
418 : |
|
|
char** stringValue;
|
419 : |
|
|
if (NETSTARTED == 1) {
|
420 : |
|
|
stringValue = new char*[numVar];
|
421 : |
|
|
for (i = 0; i < numVar; i++) {
|
422 : |
|
|
stringValue[i] = new char(strlen(sendP[i].getName())+1);
|
423 : |
|
|
strcpy(stringValue[i], sendP[i].getName());
|
424 : |
|
|
// This was done with pvm_mcast in the old version, it works similar to this, but it might be
|
425 : |
|
|
// broadcasting the data via a tree structure, this should not create too much overhead.
|
426 : |
|
|
for(int j = 0; j<nhost; j++)
|
427 : |
|
|
{
|
428 : |
|
|
MPI_Send(stringValue[i],strlen(stringValue[i]), MPI_BYTE, j, pvmConst->getMasterSendStringTag(), intercomm);
|
429 : |
|
|
}
|
430 : |
|
|
};
|
431 : |
|
|
assert(sendP.Size() >= numVar);
|
432 : |
|
|
|
433 : |
|
|
for (i = 0; i < numVar; i++)
|
434 : |
|
|
delete [] stringValue[i];
|
435 : |
|
|
delete [] stringValue;
|
436 : |
|
|
if (info < 0) {
|
437 : |
|
|
printErrorMsg("Error in netcommunication - unable to send data");
|
438 : |
|
|
stopNetCommunication();
|
439 : |
|
|
return ERROR;
|
440 : |
|
|
} else
|
441 : |
|
|
return SUCCESS;
|
442 : |
|
|
|
443 : |
|
|
} else {
|
444 : |
|
|
printErrorMsg("Error in netcommunication - unable to send data");
|
445 : |
|
|
return ERROR;
|
446 : |
|
|
}
|
447 : |
|
|
}
|
448 : |
|
|
|
449 : |
|
|
int NetCommunication::sendData(const ParameterVector& sendP, int processID) {
|
450 : |
|
|
/*
|
451 : |
|
|
Búið í bili...
|
452 : |
|
|
*/
|
453 : |
|
|
int i, info;
|
454 : |
|
|
char** stringValue;
|
455 : |
|
|
if (NETSTARTED == 1)
|
456 : |
|
|
{
|
457 : |
|
|
stringValue = new char*[numVar];
|
458 : |
|
|
for (i = 0; i < numVar; i++)
|
459 : |
|
|
{
|
460 : |
|
|
stringValue[i] = new char(strlen(sendP[i].getName())+1);
|
461 : |
|
|
strcpy(stringValue[i], sendP[i].getName());
|
462 : |
|
|
// This could be causing some overhead, could consider packing it in a buffer
|
463 : |
|
|
// before I send it, like the old pvm version, let's see if this works ok.
|
464 : |
|
|
// I think this function is only called once.
|
465 : |
|
|
MPI_Send(stringValue[i],strlen(stringValue[i]), MPI_BYTE,tids[processID],pvmConst->getMasterSendStringTag(),intercomm);
|
466 : |
|
|
};
|
467 : |
|
|
assert(sendP.Size() >= numVar);
|
468 : |
|
|
|
469 : |
|
|
for (i = 0; i < numVar; i++)
|
470 : |
|
|
delete [] stringValue[i];
|
471 : |
|
|
delete [] stringValue;
|
472 : |
|
|
return SUCCESS;
|
473 : |
|
|
}
|
474 : |
|
|
else
|
475 : |
|
|
{
|
476 : |
|
|
printErrorMsg("Error in netcommunication - unable to send data");
|
477 : |
|
|
return ERROR;
|
478 : |
|
|
}
|
479 : |
|
|
}
|
480 : |
|
|
|
481 : |
|
|
int NetCommunication::sendBoundData(const DoubleVector& sendP)
|
482 : |
|
|
{
|
483 : |
|
|
/*
|
484 : |
|
|
Komið í bili...
|
485 : |
|
|
*/
|
486 : |
|
|
int i, info;
|
487 : |
|
|
double* temp;
|
488 : |
|
|
|
489 : |
|
|
if (NETSTARTED == 1)
|
490 : |
|
|
{
|
491 : |
|
|
temp = new double[numVar];
|
492 : |
|
|
for (i = 0; i < numVar; i++)
|
493 : |
|
|
temp[i] = sendP[i];
|
494 : |
|
|
for(int j = 0; j< nhost; j++)
|
495 : |
|
|
{
|
496 : |
|
|
// This was originally done with pvm_mcast, question if this is causing overhead.
|
497 : |
|
|
MPI_Send(temp, numVar , MPI_DOUBLE,j,pvmConst->getMasterSendBoundTag(),intercomm);
|
498 : |
|
|
}
|
499 : |
|
|
delete[] temp;
|
500 : |
|
|
return SUCCESS;
|
501 : |
|
|
}
|
502 : |
|
|
else
|
503 : |
|
|
{
|
504 : |
|
|
printErrorMsg("Error in netcommunication - unable to send data");
|
505 : |
|
|
return ERROR;
|
506 : |
|
|
}
|
507 : |
|
|
}
|
508 : |
|
|
|
509 : |
|
|
int NetCommunication::sendBoundData(const DoubleVector& sendP, int processID)
|
510 : |
|
|
{
|
511 : |
|
|
/*
|
512 : |
|
|
Komið í bili!
|
513 : |
|
|
*/
|
514 : |
|
|
int i, info;
|
515 : |
|
|
double* temp;
|
516 : |
|
|
|
517 : |
|
|
if (NETSTARTED == 1)
|
518 : |
|
|
{
|
519 : |
|
|
temp = new double[numVar];
|
520 : |
|
|
for (i = 0; i < numVar; i++)
|
521 : |
|
|
temp[i] = sendP[i];
|
522 : |
|
|
|
523 : |
|
|
MPI_Send(temp,numVar, MPI_DOUBLE,tids[processID],pvmConst->getMasterSendBoundTag(),intercomm);
|
524 : |
|
|
delete[] temp;
|
525 : |
|
|
return SUCCESS;
|
526 : |
|
|
}
|
527 : |
|
|
else
|
528 : |
|
|
{
|
529 : |
|
|
printErrorMsg("Error in netcommunication - unable to send data");
|
530 : |
|
|
return ERROR;
|
531 : |
|
|
}
|
532 : |
|
|
}
|
533 : |
|
|
|
534 : |
|
|
int NetCommunication::sendData(NetDataVariables* sendP, int processID)
|
535 : |
|
|
{
|
536 : |
|
|
/*
|
537 : |
|
|
Komið í bili!
|
538 : |
|
|
*/
|
539 : |
|
|
int info;
|
540 : |
|
|
int cansend = 1;
|
541 : |
|
|
assert(processID >= 0);
|
542 : |
|
|
assert(processID < numProcesses);
|
543 : |
|
|
|
544 : |
|
|
if (NETSTARTED == 1)
|
545 : |
|
|
{
|
546 : |
|
|
// check is process with id = processID is up and running
|
547 : |
|
|
cansend = checkProcess(processID);
|
548 : |
|
|
if (cansend == -1)
|
549 : |
|
|
{
|
550 : |
|
|
printErrorMsg("Error in netcommunication - invalid process ID");
|
551 : |
|
|
stopNetCommunication();
|
552 : |
|
|
return ERROR;
|
553 : |
|
|
|
554 : |
|
|
}
|
555 : |
|
|
else if (cansend == 0)
|
556 : |
|
|
{
|
557 : |
|
|
//process with id = processID is not up and running
|
558 : |
|
|
return cansend;
|
559 : |
|
|
|
560 : |
|
|
}
|
561 : |
|
|
else if (cansend == 1)
|
562 : |
|
|
{
|
563 : |
|
|
MPI_Send(&sendP->tag,1,MPI_INT, tids[processID],pvmConst->getMasterSendVarTag(),intercomm);
|
564 : |
|
|
MPI_Send(&sendP->x_id,1,MPI_INT, tids[processID],pvmConst->getMasterSendVarTag(),intercomm);
|
565 : |
|
|
MPI_Send(sendP->x,numVar,MPI_DOUBLE, tids[processID],pvmConst->getMasterSendVarTag(),intercomm);
|
566 : |
|
|
return SUCCESS;
|
567 : |
|
|
}
|
568 : |
|
|
else
|
569 : |
|
|
{
|
570 : |
|
|
printErrorMsg("Error in netcommunication - unable to send data");
|
571 : |
|
|
stopNetCommunication();
|
572 : |
|
|
return ERROR;
|
573 : |
|
|
}
|
574 : |
|
|
|
575 : |
|
|
}
|
576 : |
|
|
else
|
577 : |
|
|
{
|
578 : |
|
|
printErrorMsg("Error in netcommunication - unable to send data");
|
579 : |
|
|
return ERROR;
|
580 : |
|
|
}
|
581 : |
|
|
}
|
582 : |
|
|
|
583 : |
|
|
int NetCommunication::receiveData(NetDataResult* rp)
|
584 : |
|
|
{
|
585 : |
|
|
/*
|
586 : |
|
|
Komið í bili...
|
587 : |
|
|
Þarf að passa að kasta villu ef einhver af þessum nær ekki að receive-a, nota kannski
|
588 : |
|
|
MPI_Probe...
|
589 : |
|
|
*/
|
590 : |
|
|
int info;
|
591 : |
|
|
MPI_Status status, status2;
|
592 : |
|
|
|
593 : |
|
|
if (NETSTARTED == 1)
|
594 : |
|
|
{
|
595 : |
|
|
MPI_Recv(&rp->tag, 1, MPI_INT, MPI_ANY_SOURCE, pvmConst->getMasterReceiveDataTag(), intercomm, &status);
|
596 : |
|
|
MPI_Recv(&rp->result, 1, MPI_DOUBLE, status.MPI_SOURCE, pvmConst->getMasterReceiveDataTag(), intercomm, &status2);
|
597 : |
|
|
MPI_Recv(&rp->who, 1, MPI_INT, status.MPI_SOURCE, pvmConst->getMasterReceiveDataTag(), intercomm, &status2);
|
598 : |
|
|
MPI_Recv(&rp->x_id, 1, MPI_INT, status.MPI_SOURCE, pvmConst->getMasterReceiveDataTag(), intercomm, &status2);
|
599 : |
|
|
cout << "Skrifa result úr netcomm: " << rp->result << "\n";
|
600 : |
|
|
return SUCCESS;
|
601 : |
|
|
}
|
602 : |
|
|
else
|
603 : |
|
|
{
|
604 : |
|
|
printErrorMsg("Error in netcommunication - unable to receive data");
|
605 : |
|
|
return ERROR;
|
606 : |
|
|
}
|
607 : |
|
|
}
|
608 : |
|
|
|
609 : |
|
|
// ********************************************************
|
610 : |
|
|
// Functions which set/return information about netcommunication
|
611 : |
|
|
// ********************************************************
|
612 : |
|
|
int NetCommunication::getNumHosts()
|
613 : |
|
|
{
|
614 : |
|
|
return nhost;
|
615 : |
|
|
}
|
616 : |
|
|
|
617 : |
|
|
int NetCommunication::getNumProcesses()
|
618 : |
|
|
{
|
619 : |
|
|
return numProcesses;
|
620 : |
|
|
}
|
621 : |
|
|
|
622 : |
|
|
int NetCommunication::getNumVar()
|
623 : |
|
|
{
|
624 : |
|
|
return numVar;
|
625 : |
|
|
}
|
626 : |
|
|
|
627 : |
|
|
int NetCommunication::getNumRunningProcesses()
|
628 : |
|
|
{
|
629 : |
|
|
return numGoodProcesses;
|
630 : |
|
|
}
|
631 : |
|
|
|
632 : |
|
|
int NetCommunication::netCommStarted()
|
633 : |
|
|
{
|
634 : |
|
|
return NETSTARTED;
|
635 : |
|
|
}
|
636 : |
|
|
|
637 : |
|
|
void NetCommunication::setNumInSendVar(int nVar)
|
638 : |
|
|
{
|
639 : |
|
|
if (nVar <= 0)
|
640 : |
|
|
{
|
641 : |
|
|
cerr << "Error in netcommunication - number of variables must be positive\n";
|
642 : |
|
|
exit(EXIT_FAILURE);
|
643 : |
|
|
}
|
644 : |
|
|
numVar = nVar;
|
645 : |
|
|
}
|
646 : |
|
|
|
647 : |
|
|
void NetCommunication::printErrorMsg(const char* errorMsg)
|
648 : |
|
|
{
|
649 : |
|
|
/*
|
650 : |
|
|
Eina fallið sem ég virðist þurfa að eiga eitthvað við hér...
|
651 : |
|
|
*/
|
652 : |
|
|
char* msg;
|
653 : |
|
|
msg = new char[strlen(errorMsg) + 1];
|
654 : |
|
|
strcpy(msg, errorMsg);
|
655 : |
|
|
// Ákvað að gera þetta svona, vona að þetta flood-i ekki command line...
|
656 : |
|
|
cout << msg << "\n";
|
657 : |
|
|
delete[] msg;
|
658 : |
|
|
cerr << errorMsg << endl;
|
659 : |
|
|
}
|
660 : |
|
|
|
661 : |
|
|
int NetCommunication::netError() {
|
662 : |
|
|
return ERROR;
|
663 : |
|
|
}
|
664 : |
|
|
|
665 : |
|
|
int NetCommunication::netSuccess() {
|
666 : |
|
|
return SUCCESS;
|
667 : |
|
|
}
|
668 : |
|
|
|
669 : |
|
|
MasterCommunication::MasterCommunication(CommandLineInfo* info)
|
670 : |
|
|
: NetCommunication(info->getFunction(), info->getNumProc())
|
671 : |
|
|
{
|
672 : |
|
|
int wait = info->getWaitMaster();
|
673 : |
|
|
tmout = new timeval;
|
674 : |
|
|
if (wait == -1)
|
675 : |
|
|
tmout = NULL;
|
676 : |
|
|
else if (wait >= 0)
|
677 : |
|
|
{
|
678 : |
|
|
tmout->tv_sec = wait;
|
679 : |
|
|
tmout->tv_usec = 0;
|
680 : |
|
|
}
|
681 : |
|
|
else
|
682 : |
|
|
{
|
683 : |
|
|
cerr << "Error in netcommunication - invalid value for wait " << wait << "\n";
|
684 : |
|
|
exit(EXIT_FAILURE);
|
685 : |
|
|
}
|
686 : |
|
|
}
|
687 : |
|
|
|
688 : |
|
|
MasterCommunication::~MasterCommunication()
|
689 : |
|
|
{
|
690 : |
|
|
delete tmout;
|
691 : |
|
|
}
|
692 : |
|
|
|
693 : |
|
|
int MasterCommunication::receiveData(NetDataResult* rp)
|
694 : |
|
|
{
|
695 : |
|
|
/*
|
696 : |
|
|
Komið í bili...
|
697 : |
|
|
Þarf að passa að kasta villu ef einhver af þessum nær ekki að receive-a, nota kannski
|
698 : |
|
|
MPI_Probe, þetta var gert með Timeout receive í gömlu útgáfunni...
|
699 : |
|
|
*/
|
700 : |
|
|
int info;
|
701 : |
|
|
MPI_Status status, status2;
|
702 : |
|
|
|
703 : |
|
|
if (NETSTARTED == 1)
|
704 : |
|
|
{
|
705 : |
|
|
MPI_Recv(&rp->tag, 1, MPI_INT, MPI_ANY_SOURCE, pvmConst->getMasterReceiveDataTag(), intercomm, &status);
|
706 : |
|
|
MPI_Recv(&rp->result, 1, MPI_DOUBLE, status.MPI_SOURCE, pvmConst->getMasterReceiveDataTag(), intercomm, &status2);
|
707 : |
|
|
MPI_Recv(&rp->who, 1, MPI_INT, status.MPI_SOURCE, pvmConst->getMasterReceiveDataTag(), intercomm, &status2);
|
708 : |
|
|
MPI_Recv(&rp->x_id, 1, MPI_INT, status.MPI_SOURCE, pvmConst->getMasterReceiveDataTag(), intercomm, &status2);
|
709 : |
|
|
return SUCCESS;
|
710 : |
|
|
}
|
711 : |
|
|
else
|
712 : |
|
|
{
|
713 : |
|
|
printErrorMsg("Error in netcommunication - unable to receive data");
|
714 : |
|
|
return ERROR;
|
715 : |
|
|
}
|
716 : |
|
|
}
|