1 : |
agomez |
1 |
#!/usr/bin/python
|
2 : |
agomez |
9 |
#
|
3 : |
|
|
# BonFIRE Virtual Clusters on Federated Clouds Demonstration Kit
|
4 : |
|
|
#
|
5 : |
|
|
# Copyright (c) Fundacion Centro Tecnologico de Supercomputacion de Galicia 2012
|
6 : |
|
|
#
|
7 : |
|
|
# License GPL Version 3
|
8 : |
|
|
#
|
9 : |
|
|
# The research leading to these results has received funding from
|
10 : |
|
|
# the European Community's Seventh Frameqork Programme (FP7/2007-2013)
|
11 : |
|
|
# under agreement number 257386
|
12 : |
|
|
#
|
13 : |
|
|
# This software is provided with ABSOLUTELY NO WARRANTY
|
14 : |
|
|
#
|
15 : |
agomez |
1 |
import sys
|
16 : |
|
|
import time
|
17 : |
|
|
|
18 : |
|
|
from logger import log,configure
|
19 : |
|
|
|
20 : |
|
|
UPDATER_PERIOD = 30
|
21 : |
|
|
|
22 : |
|
|
if len(sys.argv) >1:
|
23 : |
|
|
configure(logfile = sys.argv[1], debug=True, console=False)
|
24 : |
|
|
else:
|
25 : |
|
|
configure(debug=True, console=True)
|
26 : |
|
|
|
27 : |
|
|
try:
|
28 : |
|
|
import bonfire
|
29 : |
|
|
import ogs
|
30 : |
|
|
import hostsfile
|
31 : |
|
|
|
32 : |
|
|
default_bonfire = "/etc/default/bonfire"
|
33 : |
|
|
bonfire.read_defaults(default_bonfire)
|
34 : |
|
|
hosts_file = "/etc/hosts"
|
35 : |
|
|
|
36 : |
|
|
log.info("Bonfire URI: %s" % bonfire.uri)
|
37 : |
|
|
log.info("Credentials: %s:%s" % (bonfire.user,bonfire.password,))
|
38 : |
|
|
log.info("Experiment: %s" % bonfire.experiment_id)
|
39 : |
|
|
|
40 : |
|
|
log.debug("Set OGS all.q queue to use /bin/bash")
|
41 : |
|
|
while(True):
|
42 : |
|
|
try:
|
43 : |
|
|
ogs.modify_queue("all.q",{"shell":"/bin/bash"})
|
44 : |
|
|
ogs.modify_queue("all.q",{"rerun":"TRUE"})
|
45 : |
|
|
ogs.modify_global({"max_unheard":"00:02:00","reschedule_unknown":"00:01:00"})
|
46 : |
|
|
break
|
47 : |
|
|
except Exception as excpt:
|
48 : |
|
|
log.exception(excpt)
|
49 : |
|
|
time.sleep(10)
|
50 : |
|
|
|
51 : |
|
|
while(True):
|
52 : |
|
|
try:
|
53 : |
|
|
log.info("Get experiment")
|
54 : |
|
|
experiment = bonfire.get_experiment(bonfire.experiment_id)
|
55 : |
|
|
|
56 : |
|
|
try:
|
57 : |
|
|
#Update /etc/hosts
|
58 : |
|
|
log.info("Hosts file")
|
59 : |
|
|
hosts = hostsfile.load(hosts_file)
|
60 : |
|
|
new_hosts = [(compute['ip'],compute['hostname']) for compute in experiment['computes'] if "client" in compute['hostname'] and not "DONE" in compute['state'] and not "FAILED" in compute ['state'] and not hosts.has_key(compute['ip'])]
|
61 : |
|
|
log.debug(new_hosts)
|
62 : |
|
|
for ip,hostname in new_hosts:
|
63 : |
|
|
#Add to hosts list file
|
64 : |
|
|
hostsfile.append(hosts_file,ip,hostname)
|
65 : |
|
|
except Exception as excpt:
|
66 : |
|
|
log.exception(excpt)
|
67 : |
|
|
|
68 : |
|
|
try:
|
69 : |
|
|
#Update OGS
|
70 : |
|
|
log.info("Update OGS")
|
71 : |
|
|
hosts = ogs.get_execution_hosts()
|
72 : |
|
|
new_hosts = [(compute['ip'],compute['hostname'],float(compute['cpu'])) for compute in experiment['computes'] if "client" in compute['hostname'] and not "DONE" in compute ['state'] and not "FAILED" in compute ['state'] and not compute['hostname'] in hosts]
|
73 : |
|
|
log.debug(new_hosts)
|
74 : |
|
|
for ip,hostname,cpu in new_hosts:
|
75 : |
|
|
#Add to OGS
|
76 : |
|
|
ogs.new_host(hostname,cpu)
|
77 : |
|
|
except Exception as excpt:
|
78 : |
|
|
log.exception(excpt)
|
79 : |
|
|
|
80 : |
|
|
try:
|
81 : |
|
|
#Remove invalid hosts
|
82 : |
|
|
log.info("Remove invalid hosts")
|
83 : |
|
|
done_hostnames = [compute['hostname'] for compute in experiment['computes'] if "client" in compute['hostname'] and (("DONE" in compute['state']) or ("FAILED" in compute['state']))]
|
84 : |
|
|
log.debug(done_hostnames)
|
85 : |
|
|
for hostname in done_hostnames:
|
86 : |
|
|
#Remove from OGS
|
87 : |
|
|
ogs.remove_host(hostname)
|
88 : |
|
|
except Exception as excpt:
|
89 : |
|
|
log.exception(excpt)
|
90 : |
|
|
except Exception as excpt:
|
91 : |
|
|
log.exception(excpt)
|
92 : |
|
|
time.sleep(UPDATER_PERIOD)
|
93 : |
|
|
except Exception as excpt:
|
94 : |
|
|
log.exception(excpt)
|