1 : |
agomez |
10 |
#
|
2 : |
|
|
# BonFIRE Virtual Clusters on Federated Clouds Demonstration Kit
|
3 : |
|
|
#
|
4 : |
|
|
# Copyright (c) Fundacion Centro Tecnologico de Supercomputacion de Galicia 2012
|
5 : |
|
|
#
|
6 : |
|
|
# License GPL Version 3
|
7 : |
|
|
#
|
8 : |
|
|
# The research leading to these results has received funding from
|
9 : |
|
|
# the European Community's Seventh Framework Programme (FP7/2007-2013)
|
10 : |
|
|
# under agreement number 257386
|
11 : |
|
|
#
|
12 : |
|
|
# This software is provided with ABSOLUTELY NO WARRANTY
|
13 : |
|
|
#
|
14 : |
|
|
import os
|
15 : |
|
|
import subprocess
|
16 : |
|
|
import tempfile
|
17 : |
|
|
import vcutil
|
18 : |
|
|
import xml.dom.minidom
|
19 : |
|
|
|
20 : |
|
|
def add_administration_host(hostname):
|
21 : |
|
|
vcutil.execute("qconf -ah %s" % hostname)
|
22 : |
|
|
|
23 : |
|
|
def delete_administration_host(hostname):
|
24 : |
|
|
vcutil.execute("qconf -dh %s" % hostname)
|
25 : |
|
|
|
26 : |
|
|
def get_administration_hosts():
|
27 : |
|
|
hosts = []
|
28 : |
|
|
try:
|
29 : |
|
|
output,err = vcutil.execute("qconf -sh")
|
30 : |
|
|
hosts = [host.strip() for host in output.split('\n') if len(host.strip())>0]
|
31 : |
|
|
except vcutil.CommandError as e:
|
32 : |
|
|
if not "no submit host defined" in e.output:
|
33 : |
|
|
raise e
|
34 : |
|
|
return hosts
|
35 : |
|
|
|
36 : |
|
|
def add_execution_host(hostname):
|
37 : |
|
|
template = """hostname %s
|
38 : |
|
|
load_scaling NONE
|
39 : |
|
|
complex_values NONE
|
40 : |
|
|
user_lists NONE
|
41 : |
|
|
xuser_lists NONE
|
42 : |
|
|
projects NONE
|
43 : |
|
|
xprojects NONE
|
44 : |
|
|
usage_scaling NONE
|
45 : |
|
|
report_variables NONE
|
46 : |
|
|
""" % hostname
|
47 : |
|
|
file = open(tempfile.mkstemp()[1],'w')
|
48 : |
|
|
file.write(template)
|
49 : |
|
|
file.close()
|
50 : |
|
|
vcutil.execute("qconf -Ae %s" % file.name)
|
51 : |
|
|
os.remove(file.name)
|
52 : |
|
|
|
53 : |
|
|
def delete_execution_host(hostname):
|
54 : |
|
|
vcutil.execute("qconf -de %s" % hostname)
|
55 : |
|
|
|
56 : |
|
|
def get_execution_hosts():
|
57 : |
|
|
hosts = []
|
58 : |
|
|
try:
|
59 : |
|
|
output,err = vcutil.execute("qconf -sel")
|
60 : |
|
|
hosts = [host.strip() for host in output.split('\n') if len(host.strip())>0]
|
61 : |
|
|
except vcutil.CommandError as e:
|
62 : |
|
|
if not "no execution host defined" in e.output:
|
63 : |
|
|
raise e
|
64 : |
|
|
return hosts
|
65 : |
|
|
|
66 : |
|
|
def add_submit_host(hostname):
|
67 : |
|
|
vcutil.execute("qconf -as %s" % hostname)
|
68 : |
|
|
|
69 : |
|
|
def delete_submit_host(hostname):
|
70 : |
|
|
vcutil.execute("qconf -ds %s" % hostname)
|
71 : |
|
|
|
72 : |
|
|
def get_submit_hosts():
|
73 : |
|
|
hosts = []
|
74 : |
|
|
try:
|
75 : |
|
|
output,err = vcutil.execute("qconf -ss")
|
76 : |
|
|
hosts = [host.strip() for host in output.split('\n') if len(host.strip())>0]
|
77 : |
|
|
except vcutil.CommandError as e:
|
78 : |
|
|
if not "no submit host defined" in e.output:
|
79 : |
|
|
raise e
|
80 : |
|
|
return hosts
|
81 : |
|
|
|
82 : |
|
|
def get_group(groupname):
|
83 : |
|
|
hosts = []
|
84 : |
|
|
output,err = vcutil.execute("qconf -shgrp %s" % groupname)
|
85 : |
|
|
hosts = [host for host in output.split('\n',1)[1].split()[1:] if len(host.strip('\\'))>0]
|
86 : |
|
|
if "NONE" in hosts:
|
87 : |
|
|
hosts.remove("NONE")
|
88 : |
|
|
return hosts
|
89 : |
|
|
|
90 : |
|
|
def add_to_group(groupname,hostname):
|
91 : |
|
|
hosts = get_group(groupname)
|
92 : |
|
|
if not hostname in hosts:
|
93 : |
|
|
hosts.append(hostname)
|
94 : |
|
|
template = """group_name %s
|
95 : |
|
|
hostlist %s""" % (groupname," ".join(hosts))
|
96 : |
|
|
file = open(tempfile.mkstemp()[1],'w')
|
97 : |
|
|
file.write(template)
|
98 : |
|
|
file.close()
|
99 : |
|
|
vcutil.execute("qconf -Mhgrp %s" % file.name)
|
100 : |
|
|
os.remove(file.name)
|
101 : |
|
|
|
102 : |
|
|
def delete_from_group(groupname,hostname):
|
103 : |
|
|
hosts = get_group(groupname)
|
104 : |
|
|
if hostname in hosts:
|
105 : |
|
|
hosts.remove(hostname)
|
106 : |
|
|
if len(hosts) == 0:
|
107 : |
|
|
hosts.append("NONE")
|
108 : |
|
|
template = """group_name %s
|
109 : |
|
|
hostlist %s""" % (groupname," ".join(hosts))
|
110 : |
|
|
file = open(tempfile.mkstemp()[1],'w')
|
111 : |
|
|
file.write(template)
|
112 : |
|
|
file.close()
|
113 : |
|
|
vcutil.execute("qconf -Mhgrp %s" % file.name)
|
114 : |
|
|
os.remove(file.name)
|
115 : |
|
|
|
116 : |
|
|
"""
|
117 : |
|
|
def get_jobs_at_host(hostname):
|
118 : |
|
|
output,err = vcutil.execute("qstat -xml -q *@%s" % hostname)
|
119 : |
|
|
dom = xml.dom.minidom.parseString(output)
|
120 : |
|
|
jobs = [node.firstChild.nodeValue for node in dom.getElementsByTagName("JB_job_number") ]
|
121 : |
|
|
return jobs
|
122 : |
|
|
"""
|
123 : |
|
|
|
124 : |
|
|
def get_tasks_at_host(hostname):
|
125 : |
|
|
tasks = []
|
126 : |
|
|
output,err = vcutil.execute("qstat -xml -f -g d -q *@%s" % hostname, ignore_error = True)
|
127 : |
|
|
try:
|
128 : |
|
|
dom = xml.dom.minidom.parseString(output)
|
129 : |
|
|
except:
|
130 : |
|
|
return tasks
|
131 : |
|
|
for instance in [ ql for ql in dom.getElementsByTagName("Queue-List") if hostname in ql.getElementsByTagName("name")[0].firstChild.nodeValue ]:
|
132 : |
|
|
for job in instance.getElementsByTagName("job_list"):
|
133 : |
|
|
jobid = job.getElementsByTagName("JB_job_number")[0].firstChild.nodeValue
|
134 : |
|
|
taskid = None
|
135 : |
|
|
if len(job.getElementsByTagName("tasks"))>0:
|
136 : |
|
|
taskid = job.getElementsByTagName("tasks")[0].firstChild.nodeValue
|
137 : |
|
|
tasks.append((jobid,taskid))
|
138 : |
|
|
return tasks
|
139 : |
|
|
|
140 : |
|
|
def delete_job(jobid,taskid = None, force=False):
|
141 : |
|
|
opts=""
|
142 : |
|
|
if force:
|
143 : |
|
|
opts = "-f"
|
144 : |
|
|
if taskid:
|
145 : |
|
|
jobid = "%s.%s" % (jobid,taskid)
|
146 : |
|
|
vcutil.execute("qdel %s %s" % (opts,jobid,))
|
147 : |
|
|
|
148 : |
|
|
def resched_job(jobid,taskid = None, force=False):
|
149 : |
|
|
opts=""
|
150 : |
|
|
if force:
|
151 : |
|
|
opts = "-f"
|
152 : |
|
|
if taskid:
|
153 : |
|
|
jobid = "%s.%s" % (jobid,taskid)
|
154 : |
|
|
vcutil.execute("qmod %s -rj %s" % (opts,jobid,))
|
155 : |
|
|
|
156 : |
|
|
def get_queues():
|
157 : |
|
|
queues = []
|
158 : |
|
|
try:
|
159 : |
|
|
output,err = vcutil.execute("qconf -sql")
|
160 : |
|
|
queues = [queue.strip() for queue in output.split('\n') if len(queue.strip())>0]
|
161 : |
|
|
except vcutil.CommandError as e:
|
162 : |
|
|
if not "no cqueue list defined" in e.output:
|
163 : |
|
|
raise e
|
164 : |
|
|
return queues
|
165 : |
|
|
|
166 : |
|
|
def get_queue(queue_name):
|
167 : |
|
|
output,err = vcutil.execute("qconf -sq %s" % queue_name)
|
168 : |
|
|
queue = {}
|
169 : |
|
|
output = "".join(output.split("\\\n"))
|
170 : |
|
|
for line in output.split('\n'):
|
171 : |
|
|
line = line.strip()
|
172 : |
|
|
if len(line)>0:
|
173 : |
|
|
key,value = line.split(None,1)
|
174 : |
|
|
queue[key] = value
|
175 : |
|
|
return queue
|
176 : |
|
|
|
177 : |
|
|
def modify_queue(queue_name,mods):
|
178 : |
|
|
queue = get_queue(queue_name)
|
179 : |
|
|
for key,value in mods.items():
|
180 : |
|
|
queue[key] = value
|
181 : |
|
|
file = open(tempfile.mkstemp()[1],'w')
|
182 : |
|
|
file.write("\n".join([ "%s %s" % items for items in queue.items()]))
|
183 : |
|
|
file.close()
|
184 : |
|
|
vcutil.execute("qconf -Mq %s" % file.name)
|
185 : |
|
|
os.remove(file.name)
|
186 : |
|
|
|
187 : |
|
|
def get_global():
|
188 : |
|
|
output,err = vcutil.execute("qconf -sconf")
|
189 : |
|
|
conf = {}
|
190 : |
|
|
output = "".join(output.split("\\\n"))
|
191 : |
|
|
for line in output.split('\n'):
|
192 : |
|
|
line = line.strip()
|
193 : |
|
|
if len(line)>0 and not line.startswith('#'):
|
194 : |
|
|
key,value = line.split(None,1)
|
195 : |
|
|
conf[key] = value
|
196 : |
|
|
return conf
|
197 : |
|
|
|
198 : |
|
|
def modify_global(mods):
|
199 : |
|
|
conf = get_global()
|
200 : |
|
|
for key,value in mods.items():
|
201 : |
|
|
conf[key] = value
|
202 : |
|
|
file = open(tempfile.gettempdir()+'/global','w')
|
203 : |
|
|
file.write("\n".join([ "%s %s" % items for items in conf.items()]))
|
204 : |
|
|
file.write("\n")
|
205 : |
|
|
file.close()
|
206 : |
|
|
vcutil.execute("qconf -Mconf %s" % file.name)
|
207 : |
|
|
os.remove(file.name)
|
208 : |
|
|
|
209 : |
|
|
def set_slots_host(queue_name,host,number):
|
210 : |
|
|
slots = get_queue(queue_name)['slots']
|
211 : |
|
|
slots = [s.strip().strip('[]') for s in slots.split(",") ]
|
212 : |
|
|
if number<1:
|
213 : |
|
|
number = 1
|
214 : |
|
|
else:
|
215 : |
|
|
number = int(number)
|
216 : |
|
|
new_slots = []
|
217 : |
|
|
added = False
|
218 : |
|
|
for slot in slots:
|
219 : |
|
|
if slot.split('=')[0] == host:
|
220 : |
|
|
slot="%s=%i" % (host,number)
|
221 : |
|
|
added = True
|
222 : |
|
|
if "=" in slot:
|
223 : |
|
|
slot = "[%s]" % (slot,)
|
224 : |
|
|
new_slots.append(slot)
|
225 : |
|
|
if not added:
|
226 : |
|
|
new_slots.append("[%s=%i]"%(host,number))
|
227 : |
|
|
modify_queue(queue_name,{'slots':",".join(new_slots)})
|
228 : |
|
|
|
229 : |
|
|
def del_slots_host(queue_name,host):
|
230 : |
|
|
slots = get_queue(queue_name)['slots']
|
231 : |
|
|
slots = [s.strip('[]') for s in slots.split(",") ]
|
232 : |
|
|
new_slots = []
|
233 : |
|
|
for slot in slots:
|
234 : |
|
|
if slot.split('=')[0] == host:
|
235 : |
|
|
continue
|
236 : |
|
|
if "=" in slot:
|
237 : |
|
|
slot = "[%s]" % (slot,)
|
238 : |
|
|
new_slots.append("%s"%(slot,))
|
239 : |
|
|
modify_queue(queue_name,{'slots':",".join(new_slots)})
|
240 : |
|
|
|
241 : |
|
|
def start_execd():
|
242 : |
|
|
vcutil.execute("/etc/init.d/sgeexecd")
|
243 : |
|
|
|
244 : |
|
|
def start_sgemaster():
|
245 : |
|
|
vcutil.execute("/etc/init.d/sgemaster")
|
246 : |
|
|
|
247 : |
|
|
def start_qmaster():
|
248 : |
|
|
vcutil.execute("/etc/init.d/sgemaster -qmaster")
|
249 : |
|
|
|
250 : |
|
|
def start_shadowd():
|
251 : |
|
|
vcutil.execute("/etc/init.d/sgemaster -shadowd")
|
252 : |
|
|
|
253 : |
|
|
def remove_host(hostname):
|
254 : |
|
|
if hostname in get_execution_hosts():
|
255 : |
|
|
delete_from_group("@allhosts",hostname)
|
256 : |
|
|
for jobid,taskid in get_tasks_at_host(hostname):
|
257 : |
|
|
# delete_job(jobid,taskid,force=True)
|
258 : |
|
|
resched_job(jobid,taskid,force=True)
|
259 : |
|
|
delete_execution_host(hostname)
|
260 : |
|
|
delete_administration_host(hostname)
|
261 : |
|
|
# del_slots_host('all.q',hostname)
|
262 : |
|
|
|
263 : |
|
|
def new_host(hostname, slots=1):
|
264 : |
|
|
if not hostname in get_execution_hosts():
|
265 : |
|
|
add_to_group("@allhosts",hostname)
|
266 : |
|
|
add_administration_host(hostname)
|
267 : |
|
|
set_slots_host('all.q',hostname,slots)
|
268 : |
|
|
|
269 : |
|
|
if __name__== "__main__":
|
270 : |
|
|
print get_administration_hosts()
|
271 : |
|
|
print "------"
|
272 : |
|
|
print get_submit_hosts()
|
273 : |
|
|
print "------"
|
274 : |
|
|
print get_execution_hosts()
|
275 : |
|
|
print "------"
|
276 : |
|
|
group="@allhosts"
|
277 : |
|
|
node="node253"
|
278 : |
|
|
print get_group(group)
|
279 : |
|
|
print "------"
|
280 : |
|
|
|
281 : |
|
|
|
282 : |
|
|
|
283 : |
|
|
|
284 : |
|
|
|
285 : |
|
|
|