# # BonFIRE Virtual Clusters on Federated Clouds Demonstration Kit # # Copyright (c) Fundacion Centro Tecnologico de Supercomputacion de Galicia 2012 # # License Apache Software # # The research leading to these results has received funding from # the European Community's Seventh Framework Programme (FP7/2007-2013) # under agreement number 257386 # # This software is provided with ABSOLUTELY NO WARRANTY # import os import subprocess import tempfile import vcutil import xml.dom.minidom def add_administration_host(hostname): vcutil.execute("qconf -ah %s" % hostname) def delete_administration_host(hostname): vcutil.execute("qconf -dh %s" % hostname) def get_administration_hosts(): hosts = [] try: output,err = vcutil.execute("qconf -sh") hosts = [host.strip() for host in output.split('\n') if len(host.strip())>0] except vcutil.CommandError as e: if not "no submit host defined" in e.output: raise e return hosts def add_execution_host(hostname): template = """hostname %s load_scaling NONE complex_values NONE user_lists NONE xuser_lists NONE projects NONE xprojects NONE usage_scaling NONE report_variables NONE """ % hostname file = open(tempfile.mkstemp()[1],'w') file.write(template) file.close() vcutil.execute("qconf -Ae %s" % file.name) os.remove(file.name) def delete_execution_host(hostname): vcutil.execute("qconf -de %s" % hostname) def get_execution_hosts(): hosts = [] try: output,err = vcutil.execute("qconf -sel") hosts = [host.strip() for host in output.split('\n') if len(host.strip())>0] except vcutil.CommandError as e: if not "no execution host defined" in e.output: raise e return hosts def add_submit_host(hostname): vcutil.execute("qconf -as %s" % hostname) def delete_submit_host(hostname): vcutil.execute("qconf -ds %s" % hostname) def get_submit_hosts(): hosts = [] try: output,err = vcutil.execute("qconf -ss") hosts = [host.strip() for host in output.split('\n') if len(host.strip())>0] except vcutil.CommandError as e: if not "no submit host defined" in e.output: raise e return hosts def get_group(groupname): hosts = [] output,err = vcutil.execute("qconf -shgrp %s" % groupname) hosts = [host for host in output.split('\n',1)[1].split()[1:] if len(host.strip('\\'))>0] if "NONE" in hosts: hosts.remove("NONE") return hosts def add_to_group(groupname,hostname): hosts = get_group(groupname) if not hostname in hosts: hosts.append(hostname) template = """group_name %s hostlist %s""" % (groupname," ".join(hosts)) file = open(tempfile.mkstemp()[1],'w') file.write(template) file.close() vcutil.execute("qconf -Mhgrp %s" % file.name) os.remove(file.name) def delete_from_group(groupname,hostname): hosts = get_group(groupname) if hostname in hosts: hosts.remove(hostname) if len(hosts) == 0: hosts.append("NONE") template = """group_name %s hostlist %s""" % (groupname," ".join(hosts)) file = open(tempfile.mkstemp()[1],'w') file.write(template) file.close() vcutil.execute("qconf -Mhgrp %s" % file.name) os.remove(file.name) """ def get_jobs_at_host(hostname): output,err = vcutil.execute("qstat -xml -q *@%s" % hostname) dom = xml.dom.minidom.parseString(output) jobs = [node.firstChild.nodeValue for node in dom.getElementsByTagName("JB_job_number") ] return jobs """ def get_tasks_at_host(hostname): tasks = [] output,err = vcutil.execute("qstat -xml -f -g d -q *@%s" % hostname, ignore_error = True) try: dom = xml.dom.minidom.parseString(output) except: return tasks for instance in [ ql for ql in dom.getElementsByTagName("Queue-List") if hostname in ql.getElementsByTagName("name")[0].firstChild.nodeValue ]: for job in instance.getElementsByTagName("job_list"): jobid = job.getElementsByTagName("JB_job_number")[0].firstChild.nodeValue taskid = None if len(job.getElementsByTagName("tasks"))>0: taskid = job.getElementsByTagName("tasks")[0].firstChild.nodeValue tasks.append((jobid,taskid)) return tasks def delete_job(jobid,taskid = None, force=False): opts="" if force: opts = "-f" if taskid: jobid = "%s.%s" % (jobid,taskid) vcutil.execute("qdel %s %s" % (opts,jobid,)) def resched_job(jobid,taskid = None, force=False): opts="" if force: opts = "-f" if taskid: jobid = "%s.%s" % (jobid,taskid) vcutil.execute("qmod %s -rj %s" % (opts,jobid,)) def get_queues(): queues = [] try: output,err = vcutil.execute("qconf -sql") queues = [queue.strip() for queue in output.split('\n') if len(queue.strip())>0] except vcutil.CommandError as e: if not "no cqueue list defined" in e.output: raise e return queues def get_queue(queue_name): output,err = vcutil.execute("qconf -sq %s" % queue_name) queue = {} output = "".join(output.split("\\\n")) for line in output.split('\n'): line = line.strip() if len(line)>0: key,value = line.split(None,1) queue[key] = value return queue def modify_queue(queue_name,mods): queue = get_queue(queue_name) for key,value in mods.items(): queue[key] = value file = open(tempfile.mkstemp()[1],'w') file.write("\n".join([ "%s %s" % items for items in queue.items()])) file.close() vcutil.execute("qconf -Mq %s" % file.name) os.remove(file.name) def get_global(): output,err = vcutil.execute("qconf -sconf") conf = {} output = "".join(output.split("\\\n")) for line in output.split('\n'): line = line.strip() if len(line)>0 and not line.startswith('#'): key,value = line.split(None,1) conf[key] = value return conf def modify_global(mods): conf = get_global() for key,value in mods.items(): conf[key] = value file = open(tempfile.gettempdir()+'/global','w') file.write("\n".join([ "%s %s" % items for items in conf.items()])) file.write("\n") file.close() vcutil.execute("qconf -Mconf %s" % file.name) os.remove(file.name) def set_slots_host(queue_name,host,number): slots = get_queue(queue_name)['slots'] slots = [s.strip().strip('[]') for s in slots.split(",") ] if number<1: number = 1 else: number = int(number) new_slots = [] added = False for slot in slots: if slot.split('=')[0] == host: slot="%s=%i" % (host,number) added = True if "=" in slot: slot = "[%s]" % (slot,) new_slots.append(slot) if not added: new_slots.append("[%s=%i]"%(host,number)) modify_queue(queue_name,{'slots':",".join(new_slots)}) def del_slots_host(queue_name,host): slots = get_queue(queue_name)['slots'] slots = [s.strip('[]') for s in slots.split(",") ] new_slots = [] for slot in slots: if slot.split('=')[0] == host: continue if "=" in slot: slot = "[%s]" % (slot,) new_slots.append("%s"%(slot,)) modify_queue(queue_name,{'slots':",".join(new_slots)}) def start_execd(): vcutil.execute("/etc/init.d/sgeexecd") def start_sgemaster(): vcutil.execute("/etc/init.d/sgemaster") def start_qmaster(): vcutil.execute("/etc/init.d/sgemaster -qmaster") def start_shadowd(): vcutil.execute("/etc/init.d/sgemaster -shadowd") def remove_host(hostname): if hostname in get_execution_hosts(): delete_from_group("@allhosts",hostname) for jobid,taskid in get_tasks_at_host(hostname): # delete_job(jobid,taskid,force=True) resched_job(jobid,taskid,force=True) delete_execution_host(hostname) delete_administration_host(hostname) # del_slots_host('all.q',hostname) def new_host(hostname, slots=1): if not hostname in get_execution_hosts(): add_to_group("@allhosts",hostname) add_administration_host(hostname) set_slots_host('all.q',hostname,slots) if __name__== "__main__": print get_administration_hosts() print "------" print get_submit_hosts() print "------" print get_execution_hosts() print "------" group="@allhosts" node="node253" print get_group(group) print "------"