Blame view

GanetiCluster.py 10.6 KB
f795df3ae   Thanasis Naskos   initial commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
  from pysqlite2 import dbapi2 as sqlite
  #import sys, os
  import sys, time, paramiko, os, requests, base64, json, time
  import Utils
  from Instance import Instance
  
  headers = {
      "accept": "application/json",
      "content-type": "application/json",
      "user-agent": "Ganeti RAPI Client",
  }
  
  
  class GanetiCluster(object):
      '''
      This class holds all instances that take part in the virtual cluster.
      It can create and stop new instances - and working in conjuction with the
      db specific classes set up various environments. 
      '''
  
  
      def __init__(self,properties_file):
          '''
          Constructor
          ''' 
          self.utils = Utils.Utils(properties_file)
  	self.base_url = 'https://192.168.1.6:5080/2/'
  	self.username = "ganeti_webmgr"
  	self.password = "9CCNPcF4SJyX"
  
  #        Make sure the sqlite file exists. if not, create it and the table we need
          con = sqlite.connect(self.utils.db_file)
          cur = con.cursor()
          try:
              instances = cur.execute('select * from instances'
                              ).fetchall()
              print """Already discovered instances from previous database file. Use describe_instances without arguments to update.
              """
              print "Found records:
  ", instances
          except sqlite.DatabaseError:
              cur.execute('create table instances(id text, image_id text, dns_name text, ip_address text, mac_address text, vpn text, state text, instance_type text, launch_time text, placement text, username text, password text)')
              con.commit()
              
          cur.close()
          con.close()
          
          
      def describe_instances(self, state=None, pattern=None):
  	instances = []
          if state != "pollDB":
  	    kwargs = {
  		    "headers": headers,
  		    "auth": (self.username, self.password),
  		    "verify": False,
  		    "params":"bulk=1"
  		}
  	    r = requests.request("get", self.base_url+"instances", **kwargs)
  	    list_answer = json.loads(r.content)
  	    for i in range(0,len(list_answer)):
  		   #TODO check the image id
  		   server_id = str(list_answer[i]['name']);
  		   server_image_id = str(list_answer[i]['os']);
  		   server_state = str(list_answer[i]['status']);
  		   server_instance_type = str(list_answer[i]['disk_usage'])+"_"+str(list_answer[i]['oper_vcpus'])+"_"+str(list_answer[i]['oper_ram']);
  		   server_launch_time = str(list_answer[i]['mtime']);
  		   server_placement = str(list_answer[i]['pnode']);
  		   server_vpn = str(list_answer[i]['pnode'])+":"+str(list_answer[i]['network_port']);
  		   server_dns_name = str(list_answer[i]['name']);
  		   server_mac_address = str(list_answer[i]['nic.macs'][0])
  		   server_ip_address = self.utils.mac_ip_mapping[server_dns_name][1];
  
  		   instance = Instance(server_id, server_image_id, server_dns_name, server_ip_address, server_mac_address, server_vpn, server_state, server_instance_type, server_launch_time, server_placement, "root","secretpw");
  		   is_mine = False
  		   r_tags = requests.request("get", self.base_url+"instances/"+server_id+"/tags", **kwargs)
  		   list_tag_answer = json.loads(r_tags.content)
  		   for j in range(0,len(list_tag_answer)):
  			if list_tag_answer[j] == "owner:"+self.utils.ownerTag:
  				is_mine = True
  				break
  		   if is_mine:
  			is_mine = False
  			if state:
  	                   if state == server_state:
  	                       instances.append(instance)
  	                else:
  	                   instances.append(instance)
  	    ## if simple call
              if not state:
                  self.utils.refresh_instance_db(instances)                    
          else :
              ## read from the local database
              con = sqlite.connect(self.utils.db_file)
              cur = con.cursor()
              instancesfromDB = []
              try:
                  instancesfromDB = cur.execute('select * from instances'
                              ).fetchall()
              except sqlite.DatabaseError:
                  con.rollback()
                  
              cur.close()
              con.close()
              
              
              for instancefromDB in instancesfromDB:
                  instances.append(self.utils.return_instance_from_tuple(instancefromDB))
              
          ## if you are using patterns and state, show only matching state and id's
          matched_instances = []
          if pattern:
              for instance in instances:
                  if instance.id.find(pattern) != -1:
                      matched_instances.append(instance)
                      
              if len(matched_instances) > 0:
                  return matched_instances
              else:
                  return None
          else:
              return instances
  
          
      def describe_images(self, pattern=None):
  	kwargs = {
  	    "headers": headers,
  	    "auth": (self.username, self.password),
  	    "verify": False
  	}
  	r = requests.request("get", self.base_url+"os", **kwargs)
  	images = json.loads(r.content)
  	return images
  
      def run_instances(self, instance_count, os, disk, cpu, ram):
  	return self.run_instances_names(self.utils.getFree_dns_names(int(instance_count)), os, disk, cpu, ram)
      
      def run_instances_names(self, names, os, disk, cpu, ram): 
          instances = []
  	pending_instances = {}
  	jobIDs = []
  	running_vm_cnt = 0
          for name in names:
  		print "running instance: " + name
  	        kwargs = {
  	            "headers": headers,
  	            "auth": (self.username, self.password),
  	            "verify": False
  	        }
  
                  body = {
                      "__version__": 1,
                      "mode": 'create',
                      "hypervisor": 'kvm',
                      "os_type": os,
                      "instance_name": name,
                      "disk_template": 'plain',
                      "disks": [{'size':disk}],
                      "beparams": {"memory":ram,"maxmem":ram,"minmem":ram,'vcpus':cpu},
                      "hvparams": {'vnc_bind_address':'0.0.0.0'},
                      "nics": [{'ip':self.utils.mac_ip_mapping[name][1],'mac':self.utils.mac_ip_mapping[name][0]}]
                  }
  		rabbit_limit = int(self.utils.rabbit_maxmem/int(ram))
  		if running_vm_cnt < rabbit_limit:
  			body['pnode'] = 'rabbit.delab.csd.auth.gr'
  		elif running_vm_cnt < rabbit_limit+int(self.utils.panther_maxmem/int(ram)):
  			body['pnode'] = 'panther.delab.csd.auth.gr'
  		else:
  			body['pnode'] = 'deer.delab.csd.auth.gr'
                  
                  kwargs["data"] = json.dumps(body)
  
                  r = requests.request("post", self.base_url+"instances", **kwargs)
                  jobID = json.loads(r.content)
  		jobIDs.append(jobID)
  
          	instance = Instance(name, os, name, self.utils.mac_ip_mapping[name][1], self.utils.mac_ip_mapping[name][0], "", "", disk+"_"+cpu+"_"+ram, str(time.time()), "", "root","secretpw");
  
  		pending_instances[str(jobID)] = instance
  		running_vm_cnt = running_vm_cnt + 1
  
  	failures = []
  	for JID in jobIDs:
  		status = ""
  		while status != "success":
  			time.sleep(5)
  			kwargs = {
  			    "headers": headers,
  			    "auth": (self.username, self.password),
  			    "verify": False
  			}	
  
  			body = {
  			    "fields": ['status'],
  			}
  
  			kwargs["data"] = json.dumps(body)
  
  			r = requests.request("get", self.base_url+"jobs/"+str(JID)+"/wait", **kwargs)
  			status = json.loads(r.content)['job_info'][0]
  #			print status
  			if (status == 'error'):
  				failures.append(JID)
  				break
  			elif(status == 'success'):
  				instances.append(pending_instances[str(JID)]);
  
  	#print failures
  	self.tag_instances_owner(instances)
          return instances
  
      def tag_instances_owner(self, instances):
  	jobIDs = []
  	headersTag = {
  	    "accept": "application/json",
  	    "user-agent": "Ganeti RAPI Client",
  	}
  	for instance in instances:
  		print "tagging instance: " + instance.id
  	        kwargs = {
  	            "headers": headersTag,
  	            "auth": (self.username, self.password),
  	            "verify": False
  	        }
  
                  body = {
                      "tag": ['owner:'+self.utils.ownerTag]
                  }
                  kwargs["params"] = body
  
                  r = requests.request("put", self.base_url+"instances/"+instance.id+"/tags", **kwargs)
                  jobID = json.loads(r.content)
  		jobIDs.append(jobID)
          
      def terminate_instances(self, names):
  	for name in names:
  	        kwargs = {
  	            "headers": headers,
  	            "auth": (self.username, self.password),
  	            "verify": False
  	        }
              	print "Terminating: ", name
  		r = requests.request("delete", self.base_url+"instances/"+str(name), **kwargs)
  		self.utils.free_ips.append(name)
              
      
      def block_until_running (self, instances):
          ''' Blocks until all defined instances have reached running state and an ip has been assigned'''
          ## Run describe instances until everyone is running
  	running_instances = []
  	remaining  = len(instances)
  	for instance in instances:
  		print "Waiting for", remaining, "instances to run"
  		name = instance.dns_name
  	        kwargs = {
  	            "headers": headers,
  	            "auth": (self.username, self.password),
  	            "verify": False
  	        }
  		status = ""
  		timeout = 300
  		while status != "running" and timeout > 0:
  			time.sleep(5)
  			r = requests.request("get", self.base_url+"instances/"+str(name), **kwargs)
              		status = json.loads(r.content)['status']
  			timeout = timeout - 5
  		if (timeout <= 0):
  			print "timeout while waiting for running state: " + str(name)
  		else:
  			remaining = remaining - 1
  			running_instances.append(instance)
  	#call describe_instances() to store instances to db
  	self.describe_instances()
          return self.block_until_ping(instances)
  
      def block_until_ping(self, instances):
          ''' Blocks until all defined instances have reached running state and an ip has been assigned'''
          ## Run describe instances until everyone is running
  	ping_instances = []
  	remaining  = len(instances)
  	for instance in instances:
  		print "Waiting for", remaining, "instances to ping"
  		ping = False
  		timeout = 150
  		while (not ping and timeout > 0):
  			response = os.system("ping -c 1 -W 4 " + instance.ip_address)
  		        if response == 0:
  			    ping = True;
  		        else:
  			    time.sleep(1);
  			timeout = timeout - 1
  		if (timeout <= 0):
  			print "timeout while trying to ping: " + str(instance.ip_address)
  		else:
  			remaining = remaining - 1
  			ping_instances.append(instance)
          return instances
              
  if __name__ == "__main__":
      euca = GanetiCluster('Coordinator_gf_r1_0p_6a_4r_4cl_smg_s_first_True_safe_timelimit.properties')
      euca.describe_instances();
      euca.block_until_ping(euca.block_until_running(euca.run_instances(['vm50.delab.csd.auth.gr','vm51.delab.csd.auth.gr'],'snf-image+hadoop_limit','5G','2','1G')));
      k = euca.describe_instances();
      euca.terminate_instances([k[0].dns_name,k[1].dns_name]);