1 : |
agomez |
10 |
#
|
2 : |
|
|
# BonFIRE Virtual Clusters on Federated Clouds Demonstration Kit
|
3 : |
|
|
#
|
4 : |
|
|
# Copyright (c) Fundacion Centro Tecnologico de Supercomputacion de Galicia 2012
|
5 : |
|
|
#
|
6 : |
agomez |
14 |
# License Apache Software
|
7 : |
agomez |
10 |
#
|
8 : |
|
|
# The research leading to these results has received funding from
|
9 : |
|
|
# the European Community's Seventh Framework Programme (FP7/2007-2013)
|
10 : |
|
|
# under agreement number 257386
|
11 : |
|
|
#
|
12 : |
|
|
# This software is provided with ABSOLUTELY NO WARRANTY
|
13 : |
|
|
#
|
14 : |
|
|
require 'rubygems'
|
15 : |
|
|
require 'restfully'
|
16 : |
|
|
require 'restfully/addons/bonfire'
|
17 : |
|
|
|
18 : |
|
|
##Editable parameters
|
19 : |
|
|
#LOCATION = "de-hlrs"
|
20 : |
|
|
LOCATION = "fr-inria"
|
21 : |
|
|
#LOCATION = "uk-epcc"
|
22 : |
|
|
##
|
23 : |
|
|
|
24 : |
|
|
EXPERIMENT_NAME = "Build image master"
|
25 : |
|
|
EXPERIMENT_DESCRIPTION = "Build image master for virtual cluster"
|
26 : |
|
|
EXPERIMENT_WALLTIME = 3600
|
27 : |
|
|
IMAGE_NAME = "BonFIRE Debian Squeeze 2G v3"
|
28 : |
|
|
WAN_NAME = "BonFIRE WAN"
|
29 : |
|
|
|
30 : |
|
|
session = Restfully::Session.new(
|
31 : |
|
|
:configuration_file => "~/.restfully/api.bonfire-project.eu",
|
32 : |
|
|
:cache => false,
|
33 : |
|
|
:gateway => "ssh.fr-inria.bonfire-project.eu",
|
34 : |
|
|
:keys => ["~/.ssh/id_rsa"]
|
35 : |
|
|
)
|
36 : |
|
|
session.logger.level = Logger::INFO
|
37 : |
|
|
#session.logger.level = Logger::DEBUG
|
38 : |
|
|
|
39 : |
|
|
NEW_IMAGE_NAME = "VirtualClusterMaster-#{session.config[:username]}-v0.1"
|
40 : |
|
|
|
41 : |
|
|
experiment = nil
|
42 : |
|
|
|
43 : |
|
|
begin
|
44 : |
|
|
session.logger.info "Deploying experiment..."
|
45 : |
|
|
experiment = session.root.experiments.submit(
|
46 : |
|
|
:name => EXPERIMENT_NAME,
|
47 : |
|
|
:description => EXPERIMENT_DESCRIPTION + " - " + Time.now.to_s,
|
48 : |
|
|
# :status => "waiting",
|
49 : |
|
|
:walltime => EXPERIMENT_WALLTIME
|
50 : |
|
|
)
|
51 : |
|
|
|
52 : |
|
|
location = session.root.locations[:"#{LOCATION}"]
|
53 : |
|
|
session.logger.info "Chosen location is: #{location['name']}"
|
54 : |
|
|
fail "Can't select the machine location" if location.nil?
|
55 : |
|
|
|
56 : |
|
|
session.logger.info "Deleting previous version"
|
57 : |
|
|
storage = location.storages.find{|s| s["name"] == NEW_IMAGE_NAME}
|
58 : |
|
|
storage.delete unless storage.nil?
|
59 : |
|
|
|
60 : |
|
|
session.logger.info "Launching machine..."
|
61 : |
|
|
machine = experiment.computes.submit(
|
62 : |
|
|
:name => "machine-experiment#{experiment['id']}",
|
63 : |
|
|
:instance_type => "small",
|
64 : |
|
|
:disk => [{:storage => location.storages.find{|s| s['name'] == IMAGE_NAME}}],
|
65 : |
|
|
:nic => [
|
66 : |
|
|
{:network => location.networks.find{|n| n['name'] == WAN_NAME}}
|
67 : |
|
|
],
|
68 : |
|
|
:location => location,
|
69 : |
|
|
:context => {}
|
70 : |
|
|
)
|
71 : |
|
|
hostname = "#{machine['name']}-#{machine['id']}"
|
72 : |
|
|
session.logger.info "Machine: #{hostname} #{machine['nic'][0]['ip']}"
|
73 : |
|
|
|
74 : |
|
|
session.logger.info "Starting experiment..."
|
75 : |
|
|
experiment.update(:status => "running")
|
76 : |
|
|
|
77 : |
|
|
#Build SSH keys
|
78 : |
|
|
#session.logger.info "Buildgin SSH keypair..."
|
79 : |
|
|
#ssh_priv = "/tmp/ssh-#{Random.rand(1000)}"
|
80 : |
|
|
#ssh_public = ssh_priv + ".pub"
|
81 : |
|
|
#`ssh-keygen -f #{ssh_priv} -t rsa -P ""`
|
82 : |
|
|
|
83 : |
|
|
session.logger.info "Checking if machine is ready..."
|
84 : |
|
|
until [machine].all?{|vm|
|
85 : |
|
|
vm.reload['state'] == 'RUNNING' && vm.ssh.accessible?
|
86 : |
|
|
} do
|
87 : |
|
|
fail "Machine has failed" if [machine].any?{|vm| vm['state'] == 'FAILED'}
|
88 : |
|
|
session.logger.info "Machine is not ready. Waiting..."
|
89 : |
|
|
sleep 20
|
90 : |
|
|
end
|
91 : |
|
|
|
92 : |
|
|
ocfs_debconf = "cat << EOF | debconf-set-selections
|
93 : |
|
|
ocfs2-tools ocfs2-tools/idle_timeout select 30000
|
94 : |
|
|
ocfs2-tools ocfs2-tools/reconnect_delay select 2000
|
95 : |
|
|
ocfs2-tools ocfs2-tools/init select true
|
96 : |
|
|
ocfs2-tools ocfs2-tools/clustername select ocfs2
|
97 : |
|
|
ocfs2-tools ocfs2-tools/heartbeat_threshold select 31
|
98 : |
|
|
ocfs2-tools ocfs2-tools/keepalive_delay select 2000
|
99 : |
|
|
|
100 : |
|
|
ocfs2-tools ocfs2-tools/idle_timeout seen true
|
101 : |
|
|
ocfs2-tools ocfs2-tools/reconnect_delay seen true
|
102 : |
|
|
ocfs2-tools ocfs2-tools/init seen true
|
103 : |
|
|
ocfs2-tools ocfs2-tools/clustername seen true
|
104 : |
|
|
ocfs2-tools ocfs2-tools/heartbeat_threshold seen true
|
105 : |
|
|
ocfs2-tools ocfs2-tools/keepalive_delay seen true
|
106 : |
|
|
EOF"
|
107 : |
|
|
|
108 : |
|
|
session.logger.info "Machine is ready"
|
109 : |
|
|
machine.ssh do |ssh|
|
110 : |
|
|
session.logger.info "Preinstallation..."
|
111 : |
|
|
output = ssh.exec!(ocfs_debconf)
|
112 : |
|
|
session.logger.info output unless output.nil?
|
113 : |
|
|
output = ssh.scp.upload!('killproc.sh', '/root/')
|
114 : |
|
|
session.logger.info output unless output.nil?
|
115 : |
|
|
session.logger.info "Installing software..."
|
116 : |
|
|
output = ssh.exec!("apt-get update
|
117 : |
|
|
apt-get install -y -q python-paramiko
|
118 : |
|
|
apt-get install -y -q drbd8-utils ocfs2-tools nfs-kernel-server
|
119 : |
|
|
apt-get autoclean
|
120 : |
|
|
dpkg-reconfigure --frontend=noninteractive ocfs2-tools
|
121 : |
|
|
apt-get install -y -q ocfs2-tools
|
122 : |
|
|
echo 'source /etc/default/bonfire' >> /root/.bashrc
|
123 : |
|
|
sed -i '2 i\ sleep 5' /etc/rc.local
|
124 : |
|
|
sed -i '2 i\ /root/killproc.sh &' /etc/rc.local
|
125 : |
|
|
sed -i '/^exit/ i\python -u /root/vc/vc-main-init.py /var/log/vc-main-init.log' /etc/rc.local
|
126 : |
|
|
chmod 755 /etc/rc.local
|
127 : |
|
|
chmod u+x /root/killproc.sh")
|
128 : |
|
|
session.logger.info output unless output.nil?
|
129 : |
|
|
session.logger.info "Uploading OGS..."
|
130 : |
|
|
output = ssh.scp.upload!('sge_root.tar.gz', '/root/')
|
131 : |
|
|
session.logger.info output unless output.nil?
|
132 : |
|
|
|
133 : |
|
|
#session.logger.info "Uploading SSH keys..."
|
134 : |
|
|
#output = ssh.exec!("mkdir -p /root/.ssh")
|
135 : |
|
|
#session.logger.info output unless output.nil?
|
136 : |
|
|
#output = ssh.scp.upload!(ssh_priv, '/root/.ssh/id_rsa')
|
137 : |
|
|
#session.logger.info output unless output.nil?
|
138 : |
|
|
#output = ssh.scp.upload!(ssh_public, '/root/.ssh/id_rsa.pub')
|
139 : |
|
|
#session.logger.info output unless output.nil?
|
140 : |
|
|
#output = ssh.exec!("cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys")
|
141 : |
|
|
#session.logger.info output unless output.nil?
|
142 : |
|
|
|
143 : |
|
|
session.logger.info "Uploading Virtual Cluster scripts..."
|
144 : |
|
|
output = ssh.exec!("mkdir -p /root/vc")
|
145 : |
|
|
session.logger.info output unless output.nil?
|
146 : |
|
|
Dir.glob("vc/*.py").each do |file|
|
147 : |
|
|
output = ssh.scp.upload!(file, '/root/vc/')
|
148 : |
|
|
session.logger.info output unless output.nil?
|
149 : |
|
|
end
|
150 : |
|
|
end
|
151 : |
|
|
session.logger.warn "Image installation finished"
|
152 : |
|
|
|
153 : |
|
|
session.logger.warn "Saving image and shutting down"
|
154 : |
|
|
machine.update(:disk => [{:save_as => {:name => NEW_IMAGE_NAME}}])
|
155 : |
|
|
machine.update(:state => 'SHUTDOWN')
|
156 : |
|
|
|
157 : |
|
|
until machine.reload['state'] == 'DONE' do
|
158 : |
|
|
session.logger.info "Machine is #{machine['state']}."
|
159 : |
|
|
sleep 20
|
160 : |
|
|
end
|
161 : |
|
|
|
162 : |
|
|
experiment.delete
|
163 : |
|
|
|
164 : |
|
|
session.logger.info "Image built: #{NEW_IMAGE_NAME}"
|
165 : |
|
|
session.logger.warn "Experiment terminated!"
|
166 : |
|
|
|
167 : |
|
|
rescue Exception => e
|
168 : |
|
|
session.logger.error "#{e.class.name}: #{e.message}"
|
169 : |
|
|
session.logger.error e.backtrace.join("\n")
|
170 : |
|
|
session.logger.warn "Cleaning up in 30 seconds. Hit CTRL-C now to keep your VMs..."
|
171 : |
|
|
sleep 30
|
172 : |
|
|
experiment.delete unless experiment.nil?
|
173 : |
|
|
end
|