ia64/xen-unstable

view tools/python/xen/xend/XendCheckpoint.py @ 19263:bb281d11fa99

xend: Make /var/lib/xen if not already present.
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Mar 03 11:37:31 2009 +0000 (2009-03-03)
parents b1b9cf7a2d36
children db53046ca5f0
line source
1 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
2 # Copyright (C) 2005 XenSource Ltd
4 # This file is subject to the terms and conditions of the GNU General
5 # Public License. See the file "COPYING" in the main directory of
6 # this archive for more details.
8 import os
9 import os.path
10 import re
11 import string
12 import threading
13 import fcntl
14 from struct import pack, unpack, calcsize
16 from xen.util.xpopen import xPopen3
17 import xen.util.auxbin
18 import xen.lowlevel.xc
20 from xen.xend import balloon, sxp, image
21 from xen.xend.XendError import XendError, VmError
22 from xen.xend.XendLogging import log
23 from xen.xend.XendConfig import XendConfig
24 from xen.xend.XendConstants import *
25 from xen.xend import XendNode
27 SIGNATURE = "LinuxGuestRecord"
28 QEMU_SIGNATURE = "QemuDeviceModelRecord"
29 dm_batch = 512
30 XC_SAVE = "xc_save"
31 XC_RESTORE = "xc_restore"
34 sizeof_int = calcsize("i")
35 sizeof_unsigned_int = calcsize("I")
36 sizeof_unsigned_long = calcsize("L")
39 xc = xen.lowlevel.xc.xc()
42 def write_exact(fd, buf, errmsg):
43 if os.write(fd, buf) != len(buf):
44 raise XendError(errmsg)
47 def read_exact(fd, size, errmsg):
48 buf = ''
49 while size != 0:
50 readstr = os.read(fd, size)
51 if not len(readstr):
52 log.error("read_exact: EOF trying to read %d (buf='%s')" % \
53 (size, buf))
54 raise XendError(errmsg)
55 size = size - len(readstr)
56 buf = buf + readstr
57 return buf
60 def insert_after(list, pred, value):
61 for i,k in enumerate(list):
62 if type(k) == type([]):
63 if k[0] == pred:
64 list.insert (i+1, value)
65 return
68 def save(fd, dominfo, network, live, dst, checkpoint=False, node=-1):
69 try:
70 if not os.path.isdir("/var/lib/xen"):
71 os.makedirs("/var/lib/xen")
72 except Exception, exn:
73 log.exception("Can't create directory '/var/lib/xen'")
74 raise XendError("Can't create directory '/var/lib/xen'")
76 write_exact(fd, SIGNATURE, "could not write guest state file: signature")
78 sxprep = dominfo.sxpr()
80 if node > -1:
81 insert_after(sxprep,'vcpus',['node', str(node)])
83 config = sxp.to_string(sxprep)
85 domain_name = dominfo.getName()
86 # Rename the domain temporarily, so that we don't get a name clash if this
87 # domain is migrating (live or non-live) to the local host. Doing such a
88 # thing is useful for debugging.
89 dominfo.setName('migrating-' + domain_name)
91 try:
92 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name)
94 write_exact(fd, pack("!i", len(config)),
95 "could not write guest state file: config len")
96 write_exact(fd, config, "could not write guest state file: config")
98 image_cfg = dominfo.info.get('image', {})
99 hvm = dominfo.info.is_hvm()
101 # xc_save takes three customization parameters: maxit, max_f, and
102 # flags the last controls whether or not save is 'live', while the
103 # first two further customize behaviour when 'live' save is
104 # enabled. Passing "0" simply uses the defaults compiled into
105 # libxenguest; see the comments and/or code in xc_linux_save() for
106 # more information.
107 cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
108 str(dominfo.getDomid()), "0", "0",
109 str(int(live) | (int(hvm) << 2)) ]
110 log.debug("[xc_save]: %s", string.join(cmd))
112 def saveInputHandler(line, tochild):
113 log.debug("In saveInputHandler %s", line)
114 if line == "suspend":
115 log.debug("Suspending %d ...", dominfo.getDomid())
116 dominfo.shutdown('suspend')
117 dominfo.waitForShutdown()
118 if line in ('suspend', 'suspended'):
119 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2,
120 domain_name)
121 log.info("Domain %d suspended.", dominfo.getDomid())
122 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
123 domain_name)
124 if hvm:
125 dominfo.image.saveDeviceModel()
127 if line == "suspend":
128 tochild.write("done\n")
129 tochild.flush()
130 log.debug('Written done')
132 forkHelper(cmd, fd, saveInputHandler, False)
134 # put qemu device model state
135 if os.path.exists("/var/lib/xen/qemu-save.%d" % dominfo.getDomid()):
136 write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature")
137 qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(),
138 os.O_RDONLY)
139 while True:
140 buf = os.read(qemu_fd, dm_batch)
141 if len(buf):
142 write_exact(fd, buf, "could not write device model state")
143 else:
144 break
145 os.close(qemu_fd)
146 os.remove("/var/lib/xen/qemu-save.%d" % dominfo.getDomid())
148 if checkpoint:
149 dominfo.resumeDomain()
150 else:
151 dominfo.destroy()
152 dominfo.testDeviceComplete()
153 try:
154 dominfo.setName(domain_name, False)
155 except VmError:
156 # Ignore this. The name conflict (hopefully) arises because we
157 # are doing localhost migration; if we are doing a suspend of a
158 # persistent VM, we need the rename, and don't expect the
159 # conflict. This needs more thought.
160 pass
162 except Exception, exn:
163 log.exception("Save failed on domain %s (%s) - resuming.", domain_name,
164 dominfo.getDomid())
165 dominfo.resumeDomain()
167 try:
168 dominfo.setName(domain_name)
169 except:
170 log.exception("Failed to reset the migrating domain's name")
172 raise exn
175 def restore(xd, fd, dominfo = None, paused = False, relocating = False):
176 try:
177 if not os.path.isdir("/var/lib/xen"):
178 os.makedirs("/var/lib/xen")
179 except Exception, exn:
180 log.exception("Can't create directory '/var/lib/xen'")
181 raise XendError("Can't create directory '/var/lib/xen'")
183 signature = read_exact(fd, len(SIGNATURE),
184 "not a valid guest state file: signature read")
185 if signature != SIGNATURE:
186 raise XendError("not a valid guest state file: found '%s'" %
187 signature)
189 l = read_exact(fd, sizeof_int,
190 "not a valid guest state file: config size read")
191 vmconfig_size = unpack("!i", l)[0]
192 vmconfig_buf = read_exact(fd, vmconfig_size,
193 "not a valid guest state file: config read")
195 p = sxp.Parser()
196 p.input(vmconfig_buf)
197 if not p.ready:
198 raise XendError("not a valid guest state file: config parse")
200 vmconfig = p.get_val()
202 if not relocating:
203 domconfig = XendConfig(sxp_obj = vmconfig)
204 othervm = xd.domain_lookup_nr(domconfig["name_label"])
205 if othervm is None or othervm.domid is None:
206 othervm = xd.domain_lookup_nr(domconfig["uuid"])
207 if othervm is not None and othervm.domid is not None:
208 raise VmError("Domain '%s' already exists with ID '%d'" % (domconfig["name_label"], othervm.domid))
210 if dominfo:
211 dominfo.resume()
212 else:
213 dominfo = xd.restore_(vmconfig)
215 # repin domain vcpus if a target node number was specified
216 # this is done prior to memory allocation to aide in memory
217 # distribution for NUMA systems.
218 nodenr = -1
219 for i,l in enumerate(vmconfig):
220 if type(l) == type([]):
221 if l[0] == 'node':
222 nodenr = int(l[1])
224 if nodenr >= 0:
225 node_to_cpu = XendNode.instance().xc.physinfo()['node_to_cpu']
226 if nodenr < len(node_to_cpu):
227 for v in range(0, dominfo.info['VCPUs_max']):
228 xc.vcpu_setaffinity(dominfo.domid, v, node_to_cpu[nodenr])
230 store_port = dominfo.getStorePort()
231 console_port = dominfo.getConsolePort()
233 assert store_port
234 assert console_port
236 # if hvm, pass mem size to calculate the store_mfn
237 image_cfg = dominfo.info.get('image', {})
238 is_hvm = dominfo.info.is_hvm()
239 if is_hvm:
240 apic = int(dominfo.info['platform'].get('apic', 0))
241 pae = int(dominfo.info['platform'].get('pae', 0))
242 log.info("restore hvm domain %d, apic=%d, pae=%d",
243 dominfo.domid, apic, pae)
244 else:
245 apic = 0
246 pae = 0
248 try:
249 restore_image = image.create(dominfo, dominfo.info)
250 memory = restore_image.getRequiredAvailableMemory(
251 dominfo.info['memory_dynamic_max'] / 1024)
252 maxmem = restore_image.getRequiredAvailableMemory(
253 dominfo.info['memory_static_max'] / 1024)
254 shadow = restore_image.getRequiredShadowMemory(
255 dominfo.info['shadow_memory'] * 1024,
256 dominfo.info['memory_static_max'] / 1024)
258 log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, ",
259 dominfo.info['shadow_memory'],
260 dominfo.info['memory_static_max'],
261 dominfo.info['memory_static_min'])
263 # Round shadow up to a multiple of a MiB, as shadow_mem_control
264 # takes MiB and we must not round down and end up under-providing.
265 shadow = ((shadow + 1023) / 1024) * 1024
267 # set memory limit
268 xc.domain_setmaxmem(dominfo.getDomid(), maxmem)
270 balloon.free(memory + shadow, dominfo)
272 shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024)
273 dominfo.info['shadow_memory'] = shadow_cur
275 cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
276 fd, dominfo.getDomid(),
277 store_port, console_port, int(is_hvm), pae, apic])
278 log.debug("[xc_restore]: %s", string.join(cmd))
280 handler = RestoreInputHandler()
282 forkHelper(cmd, fd, handler.handler, True)
284 # We don't want to pass this fd to any other children -- we
285 # might need to recover the disk space that backs it.
286 try:
287 flags = fcntl.fcntl(fd, fcntl.F_GETFD)
288 flags |= fcntl.FD_CLOEXEC
289 fcntl.fcntl(fd, fcntl.F_SETFD, flags)
290 except:
291 pass
293 if handler.store_mfn is None:
294 raise XendError('Could not read store MFN')
296 if not is_hvm and handler.console_mfn is None:
297 raise XendError('Could not read console MFN')
299 # get qemu state and create a tmp file for dm restore
300 # Even PV guests may have QEMU stat, but its not currently
301 # used so only bother with HVM currently.
302 if is_hvm:
303 qemu_signature = read_exact(fd, len(QEMU_SIGNATURE),
304 "invalid device model signature read")
305 if qemu_signature != QEMU_SIGNATURE:
306 raise XendError("not a valid device model state: found '%s'" %
307 qemu_signature)
308 qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(),
309 os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
310 while True:
311 buf = os.read(fd, dm_batch)
312 if len(buf):
313 write_exact(qemu_fd, buf,
314 "could not write dm state to tmp file")
315 else:
316 break
317 os.close(qemu_fd)
318 restore_image.setCpuid()
321 os.read(fd, 1) # Wait for source to close connection
323 dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
325 #
326 # We shouldn't hold the domains_lock over a waitForDevices
327 # As this function sometime gets called holding this lock,
328 # we must release it and re-acquire it appropriately
329 #
330 from xen.xend import XendDomain
332 lock = True;
333 try:
334 XendDomain.instance().domains_lock.release()
335 except:
336 lock = False;
338 try:
339 dominfo.waitForDevices() # Wait for backends to set up
340 except Exception, exn:
341 log.exception(exn)
343 if lock:
344 XendDomain.instance().domains_lock.acquire()
346 if not paused:
347 dominfo.unpause()
349 return dominfo
350 except:
351 dominfo.destroy()
352 raise
355 class RestoreInputHandler:
356 def __init__(self):
357 self.store_mfn = None
358 self.console_mfn = None
361 def handler(self, line, _):
362 m = re.match(r"^(store-mfn) (\d+)$", line)
363 if m:
364 self.store_mfn = int(m.group(2))
365 else:
366 m = re.match(r"^(console-mfn) (\d+)$", line)
367 if m:
368 self.console_mfn = int(m.group(2))
371 def forkHelper(cmd, fd, inputHandler, closeToChild):
372 child = xPopen3(cmd, True, -1, [fd, xc.handle()])
374 if closeToChild:
375 child.tochild.close()
377 thread = threading.Thread(target = slurp, args = (child.childerr,))
378 thread.start()
380 try:
381 try:
382 while 1:
383 line = child.fromchild.readline()
384 if line == "":
385 break
386 else:
387 line = line.rstrip()
388 log.debug('%s', line)
389 inputHandler(line, child.tochild)
391 except IOError, exn:
392 raise XendError('Error reading from child process for %s: %s' %
393 (cmd, exn))
394 finally:
395 child.fromchild.close()
396 if not closeToChild:
397 child.tochild.close()
398 thread.join()
399 child.childerr.close()
400 status = child.wait()
402 if status >> 8 == 127:
403 raise XendError("%s failed: popen failed" % string.join(cmd))
404 elif status != 0:
405 raise XendError("%s failed" % string.join(cmd))
408 def slurp(infile):
409 while 1:
410 line = infile.readline()
411 if line == "":
412 break
413 else:
414 line = line.strip()
415 m = re.match(r"^ERROR: (.*)", line)
416 if m is None:
417 log.info('%s', line)
418 else:
419 log.error('%s', m.group(1))