Log In | Get Help   
Home My Page Projects Code Snippets Project Openings BonFIRE VCOC Demonstration Kit
Summary Activity SCM Files Wiki
[bonfiredemokit] Annotation of /virt-cluster/vc/vc-main-init.py
[bonfiredemokit] / virt-cluster / vc / vc-main-init.py Repository:
ViewVC logotype

Annotation of /virt-cluster/vc/vc-main-init.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 9 - (view) (download) (as text)

1 : agomez 1 #!/usr/bin/python
2 : agomez 9 #
3 :     # BonFIRE Virtual Clusters on Federated Clouds Demonstration Kit
4 :     #
5 :     # Copyright (c) Fundacion Centro Tecnologico de Supercomputacion de Galicia 2012
6 :     #
7 :     # License GPL Version 3
8 :     #
9 :     # The research leading to these results has received funding from
10 :     # the European Community's Seventh Frameqork Programme (FP7/2007-2013)
11 :     # under agreement number 257386
12 :     #
13 :     # This software is provided with ABSOLUTELY NO WARRANTY
14 :     #
15 : agomez 1 import os
16 :     import os.path
17 :     import subprocess
18 :     import sys
19 :     import time
20 :    
21 :     from logger import log,configure
22 :     if len(sys.argv)>1:
23 :     configure(logfile = sys.argv[1], debug=True, console=False)
24 :     else:
25 :     configure(debug=True, console=True)
26 :    
27 :     try:
28 :     import bonfire
29 :     import hostsfile
30 :     import ogs
31 :     import vcutil
32 :     allow_single_cluster = True
33 :     default_bonfire = "/etc/default/bonfire"
34 :     bonfire.read_defaults(default_bonfire)
35 :     hosts_file = "/etc/hosts"
36 :     log.info("Bonfire URI: %s" % bonfire.uri)
37 :     log.info("Credentials: %s:%s" % (bonfire.user,bonfire.password,))
38 :     log.info("Experiment: %s" % bonfire.experiment_id)
39 :    
40 :     #Basic hostsfile for main nodes
41 :     master_host = None
42 :     shadow_host = None
43 :     log.info("Get experiment")
44 :     while (master_host == None) and ((not allow_single_cluster) or (shadow_host == None)) :
45 :     experiment = bonfire.get_experiment(bonfire.experiment_id)
46 :     for compute in experiment['computes']:
47 :     if compute["hostname"].startswith("master"):
48 :     master_host = (compute['ip'],compute['hostname'])
49 :     elif compute["hostname"].startswith("shadow"):
50 :     shadow_host = (compute['ip'],compute['hostname'])
51 :     main_hosts = [('127.0.0.1','localhost'), master_host]
52 :     if shadow_host:
53 :     main_hosts.append(shadow_host)
54 :     log.debug(main_hosts)
55 :     hostsfile.store(hosts_file,main_hosts)
56 :    
57 :     #Initialize volume
58 :     #log.info "Zeroing volume"
59 :     #log.info vcutil.execute("dd if=/dev/zero of=/dev/xvde bs=1M ",ignore_error=True)
60 :     #Destroy previous file system
61 :     # log.info("Destroying file system")
62 :     # log.info(vcutil.execute("dd if=/dev/zero of=/dev/xvde bs=1M count=10",ignore_error=True))
63 :    
64 :     if bonfire.hostname.startswith("master"):
65 :     log.info("I am master node")
66 :    
67 :     backup_hostname = ""
68 :     if shadow_host:
69 :     backup_hostname = shadow_host[1]
70 :     ogs_conf = """
71 :     SGE_ROOT=\"/shared/ogs\"
72 :     SGE_QMASTER_PORT=\"6444\"
73 :     SGE_EXECD_PORT=\"6445\"
74 :     SGE_ENABLE_SMF=\"false\"
75 :     SGE_ENABLE_ST=\"false\"
76 :     SGE_CLUSTER_NAME=\"virtual\"
77 :     CELL_NAME=\"default\"
78 :     ADMIN_USER=\"\"
79 :     QMASTER_SPOOL_DIR=\"/shared/ogs/default/spool/qmaster\"
80 :     EXECD_SPOOL_DIR=\"/shared/ogs/default/spool\"
81 :     GID_RANGE=\"20000-20100\"
82 :     SPOOLING_METHOD=\"classic\"
83 :     DB_SPOOLING_SERVER=\"none\"
84 :     PAR_EXECD_INST_COUNT=\"20\"
85 :     ADMIN_HOST_LIST=\"%s\"
86 :     SUBMIT_HOST_LIST=\"%s %s\"
87 :     EXEC_HOST_LIST=\"\"
88 :     EXECD_SPOOL_DIR_LOCAL=\"\"
89 :     COPY_COMMAND=\"scp\"
90 :     DEFAULT_DOMAIN=\"none\"
91 :     ADMIN_MAIL=\"none\"
92 :     ADD_TO_RC=\"false\"
93 :     SET_FILE_PERMS=\"true\"
94 :     RESCHEDULE_JOBS=\"wait\"
95 :     SCHEDD_CONF=\"1\"
96 :     SHADOW_HOST=\"%s\"
97 :     REMOVE_RC=\"false\"
98 :     HOSTNAME_RESOLVING=\"true\"
99 :     """ % (master_host[1], master_host[1], backup_hostname, backup_hostname)
100 :    
101 :     if shadow_host:
102 :     drbd_global = """global {
103 :     usage-count no;
104 :     }
105 :    
106 :     common {
107 :     protocol C;
108 :     meta-disk internal;
109 :    
110 :     syncer {
111 :     rate 10M;
112 :     }
113 :    
114 :     disk {
115 :     on-io-error detach;
116 :     }
117 :     }"""
118 :    
119 :     drbd_res = """resource r0 {
120 :     net {
121 :     allow-two-primaries;
122 :     after-sb-0pri disconnect;
123 :     after-sb-1pri disconnect;
124 :     after-sb-2pri disconnect;
125 :     }
126 :    
127 :     on %s {
128 :     device /dev/drbd1;
129 :     disk /dev/xvde;
130 :     address %s:7789;
131 :     meta-disk internal;
132 :     }
133 :     on %s {
134 :     device /dev/drbd1;
135 :     disk /dev/xvde;
136 :     address %s:7789;
137 :     meta-disk internal;
138 :     }
139 :     }""" % (master_host[1],master_host[0],shadow_host[1],shadow_host[0])
140 :    
141 :     ocfs_conf = """cluster:
142 :     node_count = 2
143 :     name = ocfs2
144 :     node:
145 :     ip_port = 7777
146 :     ip_address = %s
147 :     number = 0
148 :     name = %s
149 :     cluster = ocfs2
150 :     node:
151 :     ip_port = 7777
152 :     ip_address = %s
153 :     number = 1
154 :     name = %s
155 :     cluster = ocfs2""" % (master_host[0],master_host[1],shadow_host[0],shadow_host[1])
156 :    
157 :    
158 :     #Install DRBD
159 :     log.info("Master - Configure DRBD")
160 :     with open("/etc/drbd.d/global_common.conf",'w') as file:
161 :     file.write(drbd_global)
162 :     with open("/etc/drbd.d/r0.res",'w') as file:
163 :     file.write(drbd_res)
164 :     log.debug(vcutil.execute("modprobe drbd"))
165 :    
166 :     log.info("Master - Prepare SSH access to shadow")
167 :     import paramiko
168 :     ssh = paramiko.SSHClient()
169 :     ssh.load_system_host_keys()
170 :     ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
171 :     sftp = None
172 :     while True:
173 :     try:
174 :     # ssh.connect(shadow_host[1],key_filename="/root/.ssh/id_rsa")
175 :     ssh.connect(shadow_host[1],username="root",password="bonfire",timeout=10)
176 :     sftp = ssh.open_sftp()
177 :     log.info("Connected to: %s" % str(ssh.exec_command("hostname")[1].read()))
178 :     break
179 :     except Exception as err:
180 :     log.info("Shadow not available: ")
181 :     log.exception(err)
182 :     time.sleep(10)
183 :    
184 :     log.info("Shadow - Configure DRBD")
185 :     #log.info("Fill with %i blocks of 1M" % dev_size)
186 :     #log.info(ssh.exec_command("dd if=/dev/zero of=/dev/xvde bs=1M count=%i" % dev_size)[1].read())
187 :     #log.info(ssh.exec_command("dd if=/dev/zero of=/dev/xvde bs=1M" )[1].read())
188 :     #log.info("Destroying file system")
189 :     #log.info(ssh.exec_command("dd if=/dev/zero of=/dev/xvde bs=1M count=10")[1].read())
190 :     file = sftp.open("/etc/drbd.d/global_common.conf",'w')
191 :     file.write(drbd_global)
192 :     file.close()
193 :     file = sftp.open("/etc/drbd.d/r0.res",'w')
194 :     file.write(drbd_res)
195 :     file.close()
196 :     log.debug(ssh.exec_command("modprobe drbd")[1].read())
197 :     log.debug(ssh.exec_command("/etc/init.d/drbd restart")[1].read())
198 :     log.debug(ssh.exec_command("drbdadm -- --force create-md r0")[1].read())
199 :     log.debug(vcutil.execute("drbd-overview"))
200 :     log.debug(ssh.exec_command("drbdadm up r0")[1].read())
201 :     output = vcutil.execute("drbd-overview")
202 :     log.debug(output)
203 :     log.info("Master - Connecting DRBD")
204 :     while not "Connected" in output[0]:
205 :     time.sleep(2)
206 :     log.debug(vcutil.execute("drbdadm down r0"))
207 :     log.debug(vcutil.execute("drbdadm -- --force wipe-md r0"))
208 :     log.debug(vcutil.execute("/etc/init.d/drbd restart"))
209 :     log.debug(vcutil.execute("drbdadm -- --force create-md r0"))
210 :     log.debug(vcutil.execute("drbdadm up r0"))
211 :     output = vcutil.execute("drbd-overview")
212 :     log.debug(output)
213 :     while "WFConnection" in output[0]:
214 :     time.sleep(2)
215 :     output = vcutil.execute("drbd-overview")
216 :     log.debug(output)
217 :    
218 :     log.info("Master - Syncing volumes...")
219 :     log.debug(vcutil.execute("drbd-overview"))
220 :     log.debug(vcutil.execute("drbdadm -- --clear-bitmap new-current-uuid r0"))
221 :     log.debug(vcutil.execute("drbd-overview"))
222 :     log.debug(vcutil.execute("drbdadm primary r0"))
223 :     log.debug(vcutil.execute("drbd-overview"))
224 :     #log.debug(vcutil.execute("drbdadm -- --overwrite-data-of-peer primary all"))
225 :     #output = vcutil.execute("drbd-overview")
226 :     #log.debug(output)
227 :     #while not "UpToDate/UpToDate" in output[0]:
228 :     # time.sleep(10)
229 :     # output = vcutil.execute("drbd-overview")
230 :     # log.debug(output)
231 :    
232 :     log.info("Shadow - Setting both as primary...")
233 :     log.debug(ssh.exec_command("drbdadm primary all")[1].read())
234 :    
235 :     #Install OCFS & NFS
236 :     log.info("Master - Configuring OCFS")
237 :     with open("/etc/ocfs2/cluster.conf",'w') as file:
238 :     file.write(ocfs_conf)
239 :     log.debug(vcutil.execute("""/etc/init.d/ocfs2 restart
240 :     /etc/init.d/o2cb restart
241 :     yes | mkfs -t ocfs2 -N 2 -F -L ocfs2_drbd1 /dev/drbd1
242 :     mkdir -p /volume
243 :     mount /dev/drbd1 /volume"""))
244 :     else:
245 :     log.info("Master - Formatting volume")
246 :     log.debug(vcutil.execute("""mkfs -t ext3 /dev/xvde
247 :     mkdir -p /volume
248 :     mount /dev/xvde /volume
249 :     mkdir /shared
250 :     mount --bind /volume /shared"""))
251 :    
252 :     log.info("Master - Configuring NFS")
253 :     log.debug(vcutil.execute("""echo '/volume 172.18.0.0/16(rw,sync,no_root_squash,no_subtree_check)' >> /etc/exports
254 :     /etc/init.d/nfs-kernel-server restart"""))
255 :    
256 :     if shadow_host:
257 :     log.info("Master - Re-mounting volumes...")
258 :     log.debug(vcutil.execute("""mkdir -p /shared
259 :     mount -t nfs -o nordirplus,hard,nointr,rw %s:/volume /shared""" % master_host[1]))
260 :    
261 :     log.info("Shadow - Configuring OCFS")
262 :     file = sftp.open("/etc/ocfs2/cluster.conf",'w')
263 :     file.write(ocfs_conf)
264 :     file.close()
265 :     log.debug(ssh.exec_command("""/etc/init.d/ocfs2 restart
266 :     /etc/init.d/o2cb restart
267 :     mkdir -p /volume
268 :     mount /dev/drbd1 /volume""")[1].read())
269 :    
270 :     log.info("Shadow - Configuring NFS")
271 :     log.debug(ssh.exec_command("""echo '/volume 172.18.0.0/16(rw,sync,no_root_squash,no_subtree_check)' >> /etc/exports
272 :     /etc/init.d/nfs-kernel-server restart""")[1].read())
273 :    
274 :     log.info("Shadow - Re-mounting volumes...")
275 :     log.debug(ssh.exec_command("""mkdir -p /shared
276 :     mount -t nfs -o nordirplus,hard,nointr,rw %s:/volume /shared
277 :     df -h""" % shadow_host[1])[1].read())
278 :    
279 :     #Install OGS & VC scripts
280 :     log.info("Master - Installing OGS...")
281 :     log.debug(vcutil.execute("""mkdir -p /shared/ogs
282 :     mkdir -p /shared/home
283 :     df -h"""))
284 :     with open("/shared/ogs/ogs.conf",'w') as file:
285 :     file.write(ogs_conf)
286 :     log.debug(vcutil.execute("""export SGE_ROOT=/shared/ogs
287 :     tar xzf /root/sge_root.tar.gz -C $SGE_ROOT/..
288 :     chown -R root:root $SGE_ROOT
289 :     cd $SGE_ROOT
290 :     ./inst_sge -m -auto $SGE_ROOT/ogs.conf
291 :     echo \"source $SGE_ROOT/default/common/settings.sh\" >> /root/.bashrc
292 :     echo \"export SGE_CHECK_INTERVAL=45\" >> /root/.bashrc
293 :     echo \"export SGE_GET_ACTIVE_INTERVAL=90\" >> /root/.bashrc
294 :     echo \"export SGE_DELAY_TIME=120\" >> /root/.bashrc
295 :     cp $SGE_ROOT/default/common/sgemaster /etc/init.d"""))
296 :    
297 :     log.info("Master - Initializing Virtual Cluster script...")
298 :     # vcutil.execute(". /shared/ogs/default/common/settings.sh; python -u /root/vc/vc-main-host-updater.py &> /var/log/vc-main-host-updater.log", fork = True)
299 :     vcutil.execute(". /root/.bashrc; python -u /root/vc/vc-main-host-updater.py /var/log/vc-main-host-updater.log", fork = True)
300 :    
301 :     if shadow_host:
302 :     log.info("Shadow - Installing OGS...")
303 :     log.debug(ssh.exec_command("""export SGE_ROOT=/shared/ogs
304 :     cd $SGE_ROOT
305 :     ./inst_sge -sm -auto $SGE_ROOT/ogs.conf
306 :     echo \". $SGE_ROOT/default/common/settings.sh\" >> /root/.bashrc
307 :     echo \"export SGE_CHECK_INTERVAL=45\" >> /root/.bashrc
308 :     echo \"export SGE_GET_ACTIVE_INTERVAL=90\" >> /root/.bashrc
309 :     echo \"export SGE_DELAY_TIME=120\" >> /root/.bashrc
310 :     cp $SGE_ROOT/default/common/sgemaster /etc/init.d""")[1].read())
311 :    
312 :     log.info("Shadow - Initializing Virtual Cluster script...")
313 :     log.debug(ssh.exec_command("nohup < /dev/null python -u /root/vc/vc-main-host-updater.py /var/log/vc-main-host-updater.log &")[1].read())
314 :     else:
315 :     log.info("I am shadow node, additional startup actions for me will be performed by master node")
316 :     except Exception as excpt:
317 :     log.exception(excpt)

root@forge.cesga.es
ViewVC Help
Powered by ViewVC 1.0.0  

Powered By FusionForge