ia64/xen-unstable

view tools/python/xen/xend/XendCheckpoint.py @ 15873:7dfc9a7a0d4e

Xend: "shadow_memory" setting in xm config is in MiB, not bytes.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Tue Sep 11 12:30:39 2007 +0100 (2007-09-11)
parents 32f331858d75
children e8905caa47b5
line source
1 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
2 # Copyright (C) 2005 XenSource Ltd
4 # This file is subject to the terms and conditions of the GNU General
5 # Public License. See the file "COPYING" in the main directory of
6 # this archive for more details.
8 import os
9 import re
10 import string
11 import threading
12 import fcntl
13 from struct import pack, unpack, calcsize
15 from xen.util.xpopen import xPopen3
16 import xen.util.auxbin
17 import xen.lowlevel.xc
19 from xen.xend import balloon, sxp, image
20 from xen.xend.XendError import XendError, VmError
21 from xen.xend.XendLogging import log
22 from xen.xend.XendConfig import XendConfig
23 from xen.xend.XendConstants import *
25 SIGNATURE = "LinuxGuestRecord"
26 QEMU_SIGNATURE = "QemuDeviceModelRecord"
27 dm_batch = 512
28 XC_SAVE = "xc_save"
29 XC_RESTORE = "xc_restore"
32 sizeof_int = calcsize("i")
33 sizeof_unsigned_int = calcsize("I")
34 sizeof_unsigned_long = calcsize("L")
37 xc = xen.lowlevel.xc.xc()
40 def write_exact(fd, buf, errmsg):
41 if os.write(fd, buf) != len(buf):
42 raise XendError(errmsg)
45 def read_exact(fd, size, errmsg):
46 buf = ''
47 while size != 0:
48 readstr = os.read(fd, size)
49 if not len(readstr):
50 log.error("read_exact: EOF trying to read %d (buf='%s')" % \
51 (size, buf))
52 raise XendError(errmsg)
53 size = size - len(readstr)
54 buf = buf + readstr
55 return buf
58 def save(fd, dominfo, network, live, dst, checkpoint=False):
59 write_exact(fd, SIGNATURE, "could not write guest state file: signature")
61 config = sxp.to_string(dominfo.sxpr())
63 domain_name = dominfo.getName()
64 # Rename the domain temporarily, so that we don't get a name clash if this
65 # domain is migrating (live or non-live) to the local host. Doing such a
66 # thing is useful for debugging.
67 dominfo.setName('migrating-' + domain_name)
69 try:
70 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name)
72 write_exact(fd, pack("!i", len(config)),
73 "could not write guest state file: config len")
74 write_exact(fd, config, "could not write guest state file: config")
76 image_cfg = dominfo.info.get('image', {})
77 hvm = dominfo.info.is_hvm()
79 # xc_save takes three customization parameters: maxit, max_f, and
80 # flags the last controls whether or not save is 'live', while the
81 # first two further customize behaviour when 'live' save is
82 # enabled. Passing "0" simply uses the defaults compiled into
83 # libxenguest; see the comments and/or code in xc_linux_save() for
84 # more information.
85 cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
86 str(dominfo.getDomid()), "0", "0",
87 str(int(live) | (int(hvm) << 2)) ]
88 log.debug("[xc_save]: %s", string.join(cmd))
90 def saveInputHandler(line, tochild):
91 log.debug("In saveInputHandler %s", line)
92 if line == "suspend":
93 log.debug("Suspending %d ...", dominfo.getDomid())
94 dominfo.shutdown('suspend')
95 dominfo.waitForShutdown()
96 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2,
97 domain_name)
98 log.info("Domain %d suspended.", dominfo.getDomid())
99 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
100 domain_name)
101 if hvm:
102 dominfo.image.saveDeviceModel()
104 tochild.write("done\n")
105 tochild.flush()
106 log.debug('Written done')
108 forkHelper(cmd, fd, saveInputHandler, False)
110 # put qemu device model state
111 if hvm:
112 write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature")
113 qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(),
114 os.O_RDONLY)
115 while True:
116 buf = os.read(qemu_fd, dm_batch)
117 if len(buf):
118 write_exact(fd, buf, "could not write device model state")
119 else:
120 break
121 os.close(qemu_fd)
122 os.remove("/var/lib/xen/qemu-save.%d" % dominfo.getDomid())
124 if checkpoint:
125 dominfo.resumeDomain()
126 else:
127 dominfo.destroyDomain()
128 dominfo.testDeviceComplete()
129 try:
130 dominfo.setName(domain_name)
131 except VmError:
132 # Ignore this. The name conflict (hopefully) arises because we
133 # are doing localhost migration; if we are doing a suspend of a
134 # persistent VM, we need the rename, and don't expect the
135 # conflict. This needs more thought.
136 pass
138 except Exception, exn:
139 log.exception("Save failed on domain %s (%s).", domain_name,
140 dominfo.getDomid())
142 dominfo.resumeDomain()
143 log.debug("XendCheckpoint.save: resumeDomain")
145 try:
146 dominfo.setName(domain_name)
147 except:
148 log.exception("Failed to reset the migrating domain's name")
150 raise exn
153 def restore(xd, fd, dominfo = None, paused = False):
154 signature = read_exact(fd, len(SIGNATURE),
155 "not a valid guest state file: signature read")
156 if signature != SIGNATURE:
157 raise XendError("not a valid guest state file: found '%s'" %
158 signature)
160 l = read_exact(fd, sizeof_int,
161 "not a valid guest state file: config size read")
162 vmconfig_size = unpack("!i", l)[0]
163 vmconfig_buf = read_exact(fd, vmconfig_size,
164 "not a valid guest state file: config read")
166 p = sxp.Parser()
167 p.input(vmconfig_buf)
168 if not p.ready:
169 raise XendError("not a valid guest state file: config parse")
171 vmconfig = p.get_val()
173 if dominfo:
174 dominfo.resume()
175 else:
176 dominfo = xd.restore_(vmconfig)
178 store_port = dominfo.getStorePort()
179 console_port = dominfo.getConsolePort()
181 assert store_port
182 assert console_port
184 # if hvm, pass mem size to calculate the store_mfn
185 image_cfg = dominfo.info.get('image', {})
186 is_hvm = dominfo.info.is_hvm()
187 if is_hvm:
188 apic = int(dominfo.info['platform'].get('apic', 0))
189 pae = int(dominfo.info['platform'].get('pae', 0))
190 log.info("restore hvm domain %d, apic=%d, pae=%d",
191 dominfo.domid, apic, pae)
192 else:
193 apic = 0
194 pae = 0
196 try:
197 restore_image = image.create(dominfo, dominfo.info)
198 memory = restore_image.getRequiredAvailableMemory(
199 dominfo.info['memory_dynamic_max'] / 1024)
200 maxmem = restore_image.getRequiredAvailableMemory(
201 dominfo.info['memory_static_max'] / 1024)
202 shadow = restore_image.getRequiredShadowMemory(
203 dominfo.info['shadow_memory'] * 1024,
204 dominfo.info['memory_static_max'] / 1024)
206 log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, ",
207 dominfo.info['shadow_memory'],
208 dominfo.info['memory_static_max'],
209 dominfo.info['memory_static_min'])
211 # Round shadow up to a multiple of a MiB, as shadow_mem_control
212 # takes MiB and we must not round down and end up under-providing.
213 shadow = ((shadow + 1023) / 1024) * 1024
215 # set memory limit
216 xc.domain_setmaxmem(dominfo.getDomid(), maxmem)
218 balloon.free(memory + shadow)
220 shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024)
221 dominfo.info['shadow_memory'] = shadow_cur
223 cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
224 fd, dominfo.getDomid(),
225 store_port, console_port, int(is_hvm), pae, apic])
226 log.debug("[xc_restore]: %s", string.join(cmd))
228 handler = RestoreInputHandler()
230 forkHelper(cmd, fd, handler.handler, True)
232 # We don't want to pass this fd to any other children -- we
233 # might need to recover the disk space that backs it.
234 try:
235 flags = fcntl.fcntl(fd, fcntl.F_GETFD)
236 flags |= fcntl.FD_CLOEXEC
237 fcntl.fcntl(fd, fcntl.F_SETFD, flags)
238 except:
239 pass
241 if handler.store_mfn is None:
242 raise XendError('Could not read store MFN')
244 if not is_hvm and handler.console_mfn is None:
245 raise XendError('Could not read console MFN')
247 # get qemu state and create a tmp file for dm restore
248 if is_hvm:
249 qemu_signature = read_exact(fd, len(QEMU_SIGNATURE),
250 "invalid device model signature read")
251 if qemu_signature != QEMU_SIGNATURE:
252 raise XendError("not a valid device model state: found '%s'" %
253 qemu_signature)
254 qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(),
255 os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
256 while True:
257 buf = os.read(fd, dm_batch)
258 if len(buf):
259 write_exact(qemu_fd, buf,
260 "could not write dm state to tmp file")
261 else:
262 break
263 os.close(qemu_fd)
266 os.read(fd, 1) # Wait for source to close connection
268 dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
270 #
271 # We shouldn't hold the domains_lock over a waitForDevices
272 # As this function sometime gets called holding this lock,
273 # we must release it and re-acquire it appropriately
274 #
275 from xen.xend import XendDomain
277 lock = True;
278 try:
279 XendDomain.instance().domains_lock.release()
280 except:
281 lock = False;
283 try:
284 dominfo.waitForDevices() # Wait for backends to set up
285 except Exception, exn:
286 log.exception(exn)
288 if lock:
289 XendDomain.instance().domains_lock.acquire()
291 if not paused:
292 dominfo.unpause()
294 return dominfo
295 except:
296 dominfo.destroy()
297 raise
300 class RestoreInputHandler:
301 def __init__(self):
302 self.store_mfn = None
303 self.console_mfn = None
306 def handler(self, line, _):
307 m = re.match(r"^(store-mfn) (\d+)$", line)
308 if m:
309 self.store_mfn = int(m.group(2))
310 else:
311 m = re.match(r"^(console-mfn) (\d+)$", line)
312 if m:
313 self.console_mfn = int(m.group(2))
316 def forkHelper(cmd, fd, inputHandler, closeToChild):
317 child = xPopen3(cmd, True, -1, [fd, xc.handle()])
319 if closeToChild:
320 child.tochild.close()
322 thread = threading.Thread(target = slurp, args = (child.childerr,))
323 thread.start()
325 try:
326 try:
327 while 1:
328 line = child.fromchild.readline()
329 if line == "":
330 break
331 else:
332 line = line.rstrip()
333 log.debug('%s', line)
334 inputHandler(line, child.tochild)
336 except IOError, exn:
337 raise XendError('Error reading from child process for %s: %s' %
338 (cmd, exn))
339 finally:
340 child.fromchild.close()
341 if not closeToChild:
342 child.tochild.close()
343 thread.join()
344 child.childerr.close()
345 status = child.wait()
347 if status >> 8 == 127:
348 raise XendError("%s failed: popen failed" % string.join(cmd))
349 elif status != 0:
350 raise XendError("%s failed" % string.join(cmd))
353 def slurp(infile):
354 while 1:
355 line = infile.readline()
356 if line == "":
357 break
358 else:
359 line = line.strip()
360 m = re.match(r"^ERROR: (.*)", line)
361 if m is None:
362 log.info('%s', line)
363 else:
364 log.error('%s', m.group(1))