29 |
def ea_bf(exp,iter): |
def ea_bf(exp,iter): |
30 |
"""It Submits and deletes experiments, checks if the VMs are available and saves the time""" |
"""It Submits and deletes experiments, checks if the VMs are available and saves the time""" |
31 |
|
|
32 |
#: Url experiment manager y data |
#: Url experiment manager and data |
33 |
#user = 'agomez' |
|
|
#user = cr.getuser('~/.restfully/api.bonfire-project.eu.yml') |
|
|
#passwd = 'elpais00' |
|
|
#passwd = cr.getpass('~/.restfully/api.bonfire-project.eu.yml') |
|
34 |
theurl = 'https://api.bonfire-project.eu:443/managed_experiments' |
theurl = 'https://api.bonfire-project.eu:443/managed_experiments' |
35 |
#theurl= 'https://api.integration.bonfire.grid5000.fr/managed_experiments' |
#theurl= 'https://api.integration.bonfire.grid5000.fr/managed_experiments' |
36 |
raiz = 'https://api.bonfire-project.eu:443' |
raiz = 'https://api.bonfire-project.eu:443' |
39 |
app=0 |
app=0 |
40 |
ela=0 |
ela=0 |
41 |
|
|
42 |
#Construimos el objeto job que nos permite enviar, borrar, comprobar logs y le pasamos las credenciales |
#Create the experiment manager object which allows submission, deletion, and check logs. |
43 |
job = em.sub() |
job = em.sub() |
44 |
|
|
45 |
|
#Inform it about user credentials |
46 |
job.user = cr.getuser('~/.restfully/api.bonfire-project.eu.yml') |
job.user = cr.getuser('~/.restfully/api.bonfire-project.eu.yml') |
47 |
job.passwd = cr.getpass('~/.restfully/api.bonfire-project.eu.yml') |
job.passwd = cr.getpass('~/.restfully/api.bonfire-project.eu.yml') |
48 |
|
|
49 |
#Enviamos y procesamos el envio |
#Send the request |
50 |
|
|
51 |
#Diccionario con tiempos |
# Dictionary of times |
52 |
#sub_time = envio |
#sub_time = time of the submision |
53 |
#broker_time = registro en el broker from log info |
#broker_time = Registry from broker got from log info |
54 |
#ssh_able = accesible por ssh |
#ssh_able = ssh accesible |
55 |
#del_time = borrado |
#del_time = deleted |
56 |
#ssh_disable = no accesible por ssh |
#ssh_disable = can not be connected by ssh |
57 |
|
|
58 |
|
|
59 |
#Creamos experimento enviando json |
#Create the experiment sending the JSON file |
60 |
stats_log = dict() |
stats_log = dict() |
61 |
sub_time = time.time() |
sub_time = time.time() |
62 |
#print 'exp', exp |
#print 'exp', exp |
89 |
print '----------------\n' |
print '----------------\n' |
90 |
|
|
91 |
|
|
92 |
#Pedimos el log y lo procesamos |
# Get the log and process it |
93 |
compute=[] |
compute=[] |
94 |
deployed_log = '' |
deployed_log = '' |
95 |
log_error=0 |
log_error=0 |
98 |
j=0 |
j=0 |
99 |
print 'Deploying experiment...' |
print 'Deploying experiment...' |
100 |
while('deployed' not in deployed_log and log_error is 0 ): |
while('deployed' not in deployed_log and log_error is 0 ): |
101 |
#print 'Comprobando log del experimento...', job_infor['experiment'] |
#print 'Checking experiment log...', job_infor['experiment'] |
102 |
#print |
#print |
103 |
respuesta_log = job.log(raiz + job_infor['log']) |
respuesta_log = job.log(raiz + job_infor['log']) |
104 |
#print(respuesta_log) |
#print(respuesta_log) |
126 |
compute_uris=dict() |
compute_uris=dict() |
127 |
if log_error is 0 and 'deployed' in deployed_log: |
if log_error is 0 and 'deployed' in deployed_log: |
128 |
status_cluster='' |
status_cluster='' |
129 |
#Tiempo de referencia para medir que todos los WN esten desplegados y cambio de estado |
# Reference time to measure that all WN have been deployed and change state |
130 |
#t2=time.time() |
#t2=time.time() |
131 |
waiting = 0 |
waiting = 0 |
132 |
statusini='' |
statusini='' |
133 |
wnstatus=dict() |
wnstatus=dict() |
134 |
while 'Failed' not in status and waiting < 1200 and 'Running' not in status_cluster and 'Failed' not in status_cluster: |
while 'Failed' not in status and waiting < 1200 and 'Running' not in status_cluster and 'Failed' not in status_cluster: |
135 |
j=0 #Para que cluster deployed todos los nodos tienen que estar running. |
j=0 # A deployed cluster means that all nodes must be running. |
136 |
#print "Failed' not in status", 'Failed' not in status |
#print "Failed' not in status", 'Failed' not in status |
137 |
#print "waiting < 1200", waiting < 1200 |
#print "waiting < 1200", waiting < 1200 |
138 |
#print "Running not in status_cluster", 'Running' not in status_cluster |
#print "Running not in status_cluster", 'Running' not in status_cluster |
139 |
#print "Failed not in status_cluster", 'Failed' not in status_cluster |
#print "Failed not in status_cluster", 'Failed' not in status_cluster |
140 |
"""Recorre todos los nodos del cluster""" |
"""Loop on all of cluster nodes""" |
141 |
for com in uris_log['computes']: |
for com in uris_log['computes']: |
142 |
#print (('Running' or 'Failed') not in status and waiting < 1200, status) |
#print (('Running' or 'Failed') not in status and waiting < 1200, status) |
143 |
#print 'status', 'Running' not in status and 'Failed' not in status and waiting < 1200, status |
#print 'status', 'Running' not in status and 'Failed' not in status and waiting < 1200, status |
176 |
#print ('Compute '+com+'is running'), j |
#print ('Compute '+com+'is running'), j |
177 |
print (compute_info['hostname'], status) |
print (compute_info['hostname'], status) |
178 |
if 'client' in compute_info['hostname']: |
if 'client' in compute_info['hostname']: |
179 |
#Nodo de referencia para elasticidad |
# Elasticity reference node |
180 |
ela_ref=com |
ela_ref=com |
181 |
|
|
182 |
if j == len(uris_log['computes']): |
if j == len(uris_log['computes']): |
199 |
print 'Possible Broker Error', respuesta_compute |
print 'Possible Broker Error', respuesta_compute |
200 |
print '----------------' |
print '----------------' |
201 |
|
|
202 |
"""Muestra los nodos desplegados y caracteristicas""" |
"""Show the deployed nodes and their carasteristics""" |
203 |
nodes=0 |
nodes=0 |
204 |
for i in compute_uris.keys(): |
for i in compute_uris.keys(): |
205 |
print compute_uris[i] |
print compute_uris[i] |
211 |
print 'Master', master |
print 'Master', master |
212 |
print '----------------\n' |
print '----------------\n' |
213 |
|
|
214 |
#Probamos si se puede acceder por ssh |
# Check if it is ssh-available |
215 |
if log_error is 0 and 'deployed' in deployed_log and 'Running' in status_cluster: |
if log_error is 0 and 'deployed' in deployed_log and 'Running' in status_cluster: |
216 |
for uri in compute_uris.keys(): |
for uri in compute_uris.keys(): |
217 |
compute_info=compute_uris[uri] |
compute_info=compute_uris[uri] |
255 |
ap.sub_app(str(master)) |
ap.sub_app(str(master)) |
256 |
stats_log['app']=time.time()-app_time |
stats_log['app']=time.time()-app_time |
257 |
|
|
258 |
#Elasticidad:add nodes |
#Elasticity:add nodes |
259 |
if ela == 1: |
if ela == 1: |
260 |
#Actualizamos nodos totales cluster |
#Updete the total number of nodes |
261 |
nodes=nodes+1 |
nodes=nodes+1 |
262 |
#Pedimos info de un compute ya desplegado |
#Ask for info about one compute node |
263 |
ela_ref_info=compute_uris[ela_ref] |
ela_ref_info=compute_uris[ela_ref] |
264 |
if '444' in uris_log['broker']: |
if '444' in uris_log['broker']: |
265 |
#Montamos la url para solicitar al broker un compute |
#Create the URL to request a new node |
266 |
theurl_comp=uris_log['broker'].replace(':444','')+'/computes' |
theurl_comp=uris_log['broker'].replace(':444','')+'/computes' |
267 |
#print theurl_comp |
#print theurl_comp |
268 |
#Desplegamos nuevo nodo |
#Deploy a new node |
269 |
#print theurl_comp,ela_ref_info['name'],ela_ref_info['instance'],ela_ref_info['disk_ref'],ela_ref_info['net_ref'],ela_ref_info['loc_ref'] |
#print theurl_comp,ela_ref_info['name'],ela_ref_info['instance'],ela_ref_info['disk_ref'],ela_ref_info['net_ref'],ela_ref_info['loc_ref'] |
270 |
ela_time=time.time() |
ela_time=time.time() |
271 |
node_output=job.submit_xml(theurl_comp,ela_ref_info['name'],ela_ref_info['instance'],ela_ref_info['disk_ref'],ela_ref_info['net_ref'],ela_ref_info['loc_ref']) |
node_output=job.submit_xml(theurl_comp,ela_ref_info['name'],ela_ref_info['instance'],ela_ref_info['disk_ref'],ela_ref_info['net_ref'],ela_ref_info['loc_ref']) |
272 |
#print 'node_output', node_output |
#print 'node_output', node_output |
273 |
#Parseamos info y comprobamos estado |
#Parse return and check status |
274 |
#compute_info = xml_conv.xml_compute_parse(node_output) |
#compute_info = xml_conv.xml_compute_parse(node_output) |
275 |
#status=compute_info['state'].capitalize() |
#status=compute_info['state'].capitalize() |
276 |
status='' |
status='' |
283 |
print '\n-------------------' |
print '\n-------------------' |
284 |
print 'Warning: Compute status error.' |
print 'Warning: Compute status error.' |
285 |
print compute_info |
print compute_info |
286 |
print 'Respuesta', respuesta_compute |
print 'Answer', respuesta_compute |
287 |
print 'Trying again...' |
print 'Trying again...' |
288 |
time.sleep(5) |
time.sleep(5) |
289 |
node_output = job.computes(ela_ref) |
node_output = job.computes(ela_ref) |
308 |
compute_uris[ela_ref]=compute_info |
compute_uris[ela_ref]=compute_info |
309 |
print '\n' |
print '\n' |
310 |
|
|
311 |
#Parseamos la info del nuevo nodo y evaluamos cuando esta ssh-available |
#Parse information about new node and check if ssh-available |
312 |
ssh_test.ssh_open(compute_info['ip'],compute_info['hostname']) |
ssh_test.ssh_open(compute_info['ip'],compute_info['hostname']) |
313 |
stats_log['ela_ssh']=time.time()-ela_time |
stats_log['ela_ssh']=time.time()-ela_time |
314 |
print '------------------------' |
print '------------------------' |
315 |
print 'Node is ssh-available' |
print 'Node is ssh-available' |
316 |
print '------------------------'+'\n' |
print '------------------------'+'\n' |
317 |
#Evaluamos si se ha added el nodo al sistema de colas |
#Evaluate if the node has been added to OGS |
318 |
nodes_qhost=0 |
nodes_qhost=0 |
319 |
while nodes_qhost != nodes: |
while nodes_qhost != nodes: |
320 |
out=ssh_test.ssh_qhost(master) |
out=ssh_test.ssh_qhost(master) |
322 |
error_try=0 |
error_try=0 |
323 |
try: |
try: |
324 |
nodes_qhost=xml_conv.xml_qhost(out) |
nodes_qhost=xml_conv.xml_qhost(out) |
325 |
print 'Nodos contados y totales', nodes_qhost, nodes |
print 'Counted nodes and total', nodes_qhost, nodes |
326 |
except Exception, inst: |
except Exception, inst: |
327 |
print '\n-------------------' |
print '\n-------------------' |
328 |
print 'Warning: XML reply is not correct.' |
print 'Warning: XML reply is not correct.' |
337 |
if iter is 0: |
if iter is 0: |
338 |
for i in stats_log: |
for i in stats_log: |
339 |
print i+'\t'+str(stats_log[str(i)]) |
print i+'\t'+str(stats_log[str(i)]) |
340 |
#Imprimimos tiempos |
#Print times |
341 |
#Borramos el experimento |
#Delete experiment |
342 |
print '----------------' |
print '----------------' |
343 |
print 'Deleting experiment...', raiz + job_infor['experiment'] |
print 'Deleting experiment...', raiz + job_infor['experiment'] |
344 |
print '----------------\n' |
print '----------------\n' |
350 |
|
|
351 |
|
|
352 |
|
|
353 |
#Probamos si se puede acceder por ssh |
#Check if ssh-available |
354 |
# if log_error is 0 and 'deployed' in deployed_log and 'Running' in status: |
# if log_error is 0 and 'deployed' in deployed_log and 'Running' in status: |
355 |
# for uri in compute_uris.keys(): |
# for uri in compute_uris.keys(): |
356 |
# compute_info=compute_uris[uri] |
# compute_info=compute_uris[uri] |
399 |
for j in stats_log: |
for j in stats_log: |
400 |
print(str(stats_log[str(j)])+'\t') |
print(str(stats_log[str(j)])+'\t') |
401 |
|
|
402 |
#print 'Tiempo ssh', stats_log['ssh_close'] - stats_log['disable'] |
#print 'Time ssh', stats_log['ssh_close'] - stats_log['disable'] |
403 |
# print 'Borrado: ', time.asctime(time.localtime(stats_log['del_time'])) |
# print 'Delete: ', time.asctime(time.localtime(stats_log['del_time'])) |
404 |
|
|
405 |
else: |
else: |
406 |
print 'Experiment deleted. Broker api error.' |
print 'Experiment deleted. Broker api error.' |