Log In | Get Help   
Home My Page Projects Code Snippets Project Openings BonFIRE VCOC Demonstration Kit
Summary Activity SCM Files Wiki
[bonfiredemokit] View of /virt-cluster/vc/vc-main-init.py
[bonfiredemokit] / virt-cluster / vc / vc-main-init.py Repository:
ViewVC logotype

View of /virt-cluster/vc/vc-main-init.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 14 - (download) (as text) (annotate)
Mon Oct 8 09:46:28 2012 UTC (11 years, 7 months ago) by agomez
File size: 10826 byte(s)
New license from GPL to Apache
Cleaning some files that are not used.
#!/usr/bin/python
#
# BonFIRE Virtual Clusters on Federated Clouds Demonstration Kit
#
# Copyright (c) Fundacion Centro Tecnologico de Supercomputacion de Galicia 2012
# 
# License Apache Software
#
# The research leading to these results has received funding from 
# the European Community's Seventh Framework Programme (FP7/2007-2013) 
# under agreement number 257386
#
# This software is provided with ABSOLUTELY NO WARRANTY
# 
import os
import os.path
import subprocess
import sys
import time

from logger import log,configure
if len(sys.argv)>1:
        configure(logfile = sys.argv[1], debug=True, console=False)
else:
        configure(debug=True, console=True)

try:
	import bonfire
	import hostsfile
	import ogs
	import vcutil
	allow_single_cluster = True
	default_bonfire = "/etc/default/bonfire" 
	bonfire.read_defaults(default_bonfire)
	hosts_file = "/etc/hosts"
	log.info("Bonfire URI: %s" % bonfire.uri)
	log.info("Credentials: %s:%s" % (bonfire.user,bonfire.password,))
	log.info("Experiment: %s" % bonfire.experiment_id)

	#Basic hostsfile for main nodes
	master_host = None
	shadow_host = None
	log.info("Get experiment")
	while (master_host == None) and ((not allow_single_cluster) or (shadow_host == None)) :
		experiment = bonfire.get_experiment(bonfire.experiment_id)
		for compute in experiment['computes']:
			if compute["hostname"].startswith("master"):
				master_host = (compute['ip'],compute['hostname'])
			elif compute["hostname"].startswith("shadow"):
				shadow_host = (compute['ip'],compute['hostname'])
	main_hosts = [('127.0.0.1','localhost'), master_host]
	if shadow_host:
		main_hosts.append(shadow_host)
	log.debug(main_hosts)
	hostsfile.store(hosts_file,main_hosts)

	#Initialize volume
	#log.info "Zeroing volume"
	#log.info vcutil.execute("dd if=/dev/zero of=/dev/xvde bs=1M ",ignore_error=True)
	#Destroy previous file system
#	log.info("Destroying file system")
#	log.info(vcutil.execute("dd if=/dev/zero of=/dev/xvde bs=1M count=10",ignore_error=True))

	if bonfire.hostname.startswith("master"):
		log.info("I am master node")

		backup_hostname = ""
		if shadow_host:
			backup_hostname = shadow_host[1]
		ogs_conf = """
SGE_ROOT=\"/shared/ogs\"
SGE_QMASTER_PORT=\"6444\"
SGE_EXECD_PORT=\"6445\"
SGE_ENABLE_SMF=\"false\"
SGE_ENABLE_ST=\"false\"
SGE_CLUSTER_NAME=\"virtual\"
CELL_NAME=\"default\"
ADMIN_USER=\"\"
QMASTER_SPOOL_DIR=\"/shared/ogs/default/spool/qmaster\"
EXECD_SPOOL_DIR=\"/shared/ogs/default/spool\"
GID_RANGE=\"20000-20100\"
SPOOLING_METHOD=\"classic\"
DB_SPOOLING_SERVER=\"none\"
PAR_EXECD_INST_COUNT=\"20\"
ADMIN_HOST_LIST=\"%s\"
SUBMIT_HOST_LIST=\"%s %s\"
EXEC_HOST_LIST=\"\"
EXECD_SPOOL_DIR_LOCAL=\"\"
COPY_COMMAND=\"scp\"
DEFAULT_DOMAIN=\"none\"
ADMIN_MAIL=\"none\"
ADD_TO_RC=\"false\"
SET_FILE_PERMS=\"true\"
RESCHEDULE_JOBS=\"wait\"
SCHEDD_CONF=\"1\"
SHADOW_HOST=\"%s\"
REMOVE_RC=\"false\"
HOSTNAME_RESOLVING=\"true\"
""" % (master_host[1], master_host[1], backup_hostname, backup_hostname)

		if shadow_host:
			drbd_global = """global {
usage-count no;
}

common {
protocol C;
meta-disk internal;

syncer {
rate 10M;
}

disk {
on-io-error   detach;
}
}"""

			drbd_res = """resource r0 {
   net {
    allow-two-primaries;
    after-sb-0pri disconnect;
    after-sb-1pri disconnect;
    after-sb-2pri disconnect;
  }

  on %s {
    device    /dev/drbd1;
    disk      /dev/xvde;
    address   %s:7789;
    meta-disk internal;
  }
  on %s {
    device    /dev/drbd1;
    disk      /dev/xvde;
    address   %s:7789;
    meta-disk internal;
  }
}""" % (master_host[1],master_host[0],shadow_host[1],shadow_host[0])

			ocfs_conf = """cluster:
    node_count = 2
    name = ocfs2
node:
    ip_port = 7777
    ip_address = %s
    number = 0
    name = %s
    cluster = ocfs2
node:
    ip_port = 7777
    ip_address = %s
    number = 1
    name = %s
    cluster = ocfs2""" % (master_host[0],master_host[1],shadow_host[0],shadow_host[1])


			#Install DRBD
			log.info("Master - Configure DRBD")
			with open("/etc/drbd.d/global_common.conf",'w') as file:
				file.write(drbd_global)
			with open("/etc/drbd.d/r0.res",'w') as file:
				file.write(drbd_res)
			log.debug(vcutil.execute("modprobe drbd"))

			log.info("Master - Prepare SSH access to shadow")
			import paramiko
			ssh = paramiko.SSHClient()
			ssh.load_system_host_keys()
			ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
			sftp = None
			while True:
				try:
			#		ssh.connect(shadow_host[1],key_filename="/root/.ssh/id_rsa")
					ssh.connect(shadow_host[1],username="root",password="bonfire",timeout=10)
					sftp = ssh.open_sftp()
					log.info("Connected to: %s" % str(ssh.exec_command("hostname")[1].read()))
					break
				except Exception as err:
					log.info("Shadow not available: ")
					log.exception(err)
					time.sleep(10)
			
			log.info("Shadow - Configure DRBD")
			#log.info("Fill with %i blocks of 1M" % dev_size)
			#log.info(ssh.exec_command("dd if=/dev/zero of=/dev/xvde bs=1M count=%i" % dev_size)[1].read())
			#log.info(ssh.exec_command("dd if=/dev/zero of=/dev/xvde bs=1M" )[1].read())
			#log.info("Destroying file system")
			#log.info(ssh.exec_command("dd if=/dev/zero of=/dev/xvde bs=1M count=10")[1].read())
			file = sftp.open("/etc/drbd.d/global_common.conf",'w')
			file.write(drbd_global)
			file.close()
			file = sftp.open("/etc/drbd.d/r0.res",'w')
			file.write(drbd_res)
			file.close()
			log.debug(ssh.exec_command("modprobe drbd")[1].read())
			log.debug(ssh.exec_command("/etc/init.d/drbd restart")[1].read())
			log.debug(ssh.exec_command("drbdadm -- --force create-md r0")[1].read())
			log.debug(vcutil.execute("drbd-overview"))
			log.debug(ssh.exec_command("drbdadm up r0")[1].read())
			output = vcutil.execute("drbd-overview")
			log.debug(output)
			log.info("Master - Connecting DRBD")
			while not "Connected" in output[0]:
				time.sleep(2)
				log.debug(vcutil.execute("drbdadm down r0"))
				log.debug(vcutil.execute("drbdadm -- --force wipe-md r0"))
				log.debug(vcutil.execute("/etc/init.d/drbd restart"))
				log.debug(vcutil.execute("drbdadm -- --force create-md r0"))
				log.debug(vcutil.execute("drbdadm up r0"))
				output = vcutil.execute("drbd-overview")
				log.debug(output)
				while "WFConnection" in output[0]:
					time.sleep(2)
					output = vcutil.execute("drbd-overview")
					log.debug(output)

			log.info("Master - Syncing volumes...")
			log.debug(vcutil.execute("drbd-overview"))
			log.debug(vcutil.execute("drbdadm -- --clear-bitmap new-current-uuid r0"))
			log.debug(vcutil.execute("drbd-overview"))
			log.debug(vcutil.execute("drbdadm primary r0"))
			log.debug(vcutil.execute("drbd-overview"))
			#log.debug(vcutil.execute("drbdadm -- --overwrite-data-of-peer primary all"))
			#output = vcutil.execute("drbd-overview")
			#log.debug(output)
			#while not "UpToDate/UpToDate" in output[0]:
			#	time.sleep(10)
			#	output = vcutil.execute("drbd-overview")
			#	log.debug(output)

			log.info("Shadow - Setting both as primary...")
			log.debug(ssh.exec_command("drbdadm primary all")[1].read())

			#Install OCFS & NFS
			log.info("Master - Configuring OCFS")
			with open("/etc/ocfs2/cluster.conf",'w') as file:
				file.write(ocfs_conf)
			log.debug(vcutil.execute("""/etc/init.d/ocfs2 restart
/etc/init.d/o2cb restart
yes | mkfs -t ocfs2 -N 2 -F -L ocfs2_drbd1 /dev/drbd1
mkdir -p /volume
mount /dev/drbd1 /volume"""))
		else:
			log.info("Master - Formatting volume")
			log.debug(vcutil.execute("""mkfs -t ext3 /dev/xvde
mkdir -p /volume
mount /dev/xvde /volume
mkdir /shared
mount --bind /volume /shared"""))

		log.info("Master - Configuring NFS")
		log.debug(vcutil.execute("""echo '/volume  172.18.0.0/16(rw,sync,no_root_squash,no_subtree_check)' >> /etc/exports
/etc/init.d/nfs-kernel-server restart"""))

		if shadow_host:
			log.info("Master - Re-mounting volumes...")
			log.debug(vcutil.execute("""mkdir -p /shared
mount -t nfs -o nordirplus,hard,nointr,rw %s:/volume /shared""" % master_host[1]))

			log.info("Shadow - Configuring OCFS")
			file = sftp.open("/etc/ocfs2/cluster.conf",'w')
			file.write(ocfs_conf)
			file.close()
			log.debug(ssh.exec_command("""/etc/init.d/ocfs2 restart
/etc/init.d/o2cb restart
mkdir -p /volume
mount /dev/drbd1 /volume""")[1].read())

			log.info("Shadow - Configuring NFS")
			log.debug(ssh.exec_command("""echo '/volume  172.18.0.0/16(rw,sync,no_root_squash,no_subtree_check)' >> /etc/exports
/etc/init.d/nfs-kernel-server restart""")[1].read())

			log.info("Shadow - Re-mounting volumes...")
			log.debug(ssh.exec_command("""mkdir -p /shared
mount -t nfs -o nordirplus,hard,nointr,rw %s:/volume /shared
df -h""" % shadow_host[1])[1].read())

		#Install OGS & VC scripts
		log.info("Master - Installing OGS...")
		log.debug(vcutil.execute("""mkdir -p /shared/ogs
mkdir -p /shared/home
df -h"""))
		with open("/shared/ogs/ogs.conf",'w') as file:
			file.write(ogs_conf)
		log.debug(vcutil.execute("""export SGE_ROOT=/shared/ogs
tar xzf /root/sge_root.tar.gz -C $SGE_ROOT/..
chown -R root:root $SGE_ROOT
cd $SGE_ROOT
./inst_sge -m -auto $SGE_ROOT/ogs.conf
echo \"source $SGE_ROOT/default/common/settings.sh\" >> /root/.bashrc
echo \"export SGE_CHECK_INTERVAL=45\" >> /root/.bashrc
echo \"export SGE_GET_ACTIVE_INTERVAL=90\" >> /root/.bashrc
echo \"export SGE_DELAY_TIME=120\" >> /root/.bashrc
cp $SGE_ROOT/default/common/sgemaster /etc/init.d"""))

		log.info("Master - Initializing Virtual Cluster script...")
	#	vcutil.execute(". /shared/ogs/default/common/settings.sh; python -u /root/vc/vc-main-host-updater.py &> /var/log/vc-main-host-updater.log", fork = True)
		vcutil.execute(". /root/.bashrc; python -u /root/vc/vc-main-host-updater.py /var/log/vc-main-host-updater.log", fork = True)

		if shadow_host:
			log.info("Shadow - Installing OGS...")
			log.debug(ssh.exec_command("""export SGE_ROOT=/shared/ogs
cd $SGE_ROOT
./inst_sge -sm -auto $SGE_ROOT/ogs.conf
echo \". $SGE_ROOT/default/common/settings.sh\" >> /root/.bashrc
echo \"export SGE_CHECK_INTERVAL=45\" >> /root/.bashrc
echo \"export SGE_GET_ACTIVE_INTERVAL=90\" >> /root/.bashrc
echo \"export SGE_DELAY_TIME=120\" >> /root/.bashrc
cp $SGE_ROOT/default/common/sgemaster /etc/init.d""")[1].read())
			
			log.info("Shadow - Initializing Virtual Cluster script...")
			log.debug(ssh.exec_command("nohup < /dev/null python -u /root/vc/vc-main-host-updater.py /var/log/vc-main-host-updater.log &")[1].read())
	else:
		log.info("I am shadow node, additional startup actions for me will be performed by master node")
except Exception as excpt:
	log.exception(excpt)

root@forge.cesga.es
ViewVC Help
Powered by ViewVC 1.0.0  

Powered By FusionForge