ia64/xen-unstable

view tools/python/xen/xend/XendCheckpoint.py @ 15749:d8b5b02c52cf

[HVM] [TOOLS] Move device state save earlier in suspend path
Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Mon Aug 13 16:47:11 2007 +0100 (2007-08-13)
parents c585f993385c
children 458e8b37aec8
line source
1 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
2 # Copyright (C) 2005 XenSource Ltd
4 # This file is subject to the terms and conditions of the GNU General
5 # Public License. See the file "COPYING" in the main directory of
6 # this archive for more details.
8 import os
9 import re
10 import string
11 import threading
12 import fcntl
13 from struct import pack, unpack, calcsize
15 from xen.util.xpopen import xPopen3
16 import xen.util.auxbin
17 import xen.lowlevel.xc
19 from xen.xend import balloon, sxp
20 from xen.xend.XendError import XendError, VmError
21 from xen.xend.XendLogging import log
22 from xen.xend.XendConfig import XendConfig
23 from xen.xend.XendConstants import *
25 SIGNATURE = "LinuxGuestRecord"
26 QEMU_SIGNATURE = "QemuDeviceModelRecord"
27 dm_batch = 512
28 XC_SAVE = "xc_save"
29 XC_RESTORE = "xc_restore"
32 sizeof_int = calcsize("i")
33 sizeof_unsigned_int = calcsize("I")
34 sizeof_unsigned_long = calcsize("L")
37 xc = xen.lowlevel.xc.xc()
40 def write_exact(fd, buf, errmsg):
41 if os.write(fd, buf) != len(buf):
42 raise XendError(errmsg)
45 def read_exact(fd, size, errmsg):
46 buf = ''
47 while size != 0:
48 readstr = os.read(fd, size)
49 if not len(readstr):
50 log.error("read_exact: EOF trying to read %d (buf='%s')" % \
51 (size, buf))
52 raise XendError(errmsg)
53 size = size - len(readstr)
54 buf = buf + readstr
55 return buf
58 def save(fd, dominfo, network, live, dst, checkpoint=False):
59 write_exact(fd, SIGNATURE, "could not write guest state file: signature")
61 config = sxp.to_string(dominfo.sxpr())
63 domain_name = dominfo.getName()
64 # Rename the domain temporarily, so that we don't get a name clash if this
65 # domain is migrating (live or non-live) to the local host. Doing such a
66 # thing is useful for debugging.
67 dominfo.setName('migrating-' + domain_name)
69 try:
70 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name)
72 write_exact(fd, pack("!i", len(config)),
73 "could not write guest state file: config len")
74 write_exact(fd, config, "could not write guest state file: config")
76 image_cfg = dominfo.info.get('image', {})
77 hvm = dominfo.info.is_hvm()
79 # xc_save takes three customization parameters: maxit, max_f, and
80 # flags the last controls whether or not save is 'live', while the
81 # first two further customize behaviour when 'live' save is
82 # enabled. Passing "0" simply uses the defaults compiled into
83 # libxenguest; see the comments and/or code in xc_linux_save() for
84 # more information.
85 cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
86 str(dominfo.getDomid()), "0", "0",
87 str(int(live) | (int(hvm) << 2)) ]
88 log.debug("[xc_save]: %s", string.join(cmd))
90 def saveInputHandler(line, tochild):
91 log.debug("In saveInputHandler %s", line)
92 if line == "suspend":
93 log.debug("Suspending %d ...", dominfo.getDomid())
94 dominfo.shutdown('suspend')
95 dominfo.waitForShutdown()
96 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2,
97 domain_name)
98 log.info("Domain %d suspended.", dominfo.getDomid())
99 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
100 domain_name)
101 if hvm:
102 dominfo.image.saveDeviceModel()
104 tochild.write("done\n")
105 tochild.flush()
106 log.debug('Written done')
108 forkHelper(cmd, fd, saveInputHandler, False)
110 # put qemu device model state
111 if hvm:
112 write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature")
113 qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(),
114 os.O_RDONLY)
115 while True:
116 buf = os.read(qemu_fd, dm_batch)
117 if len(buf):
118 write_exact(fd, buf, "could not write device model state")
119 else:
120 break
121 os.close(qemu_fd)
122 os.remove("/var/lib/xen/qemu-save.%d" % dominfo.getDomid())
124 if checkpoint:
125 dominfo.resumeDomain()
126 else:
127 dominfo.destroyDomain()
128 dominfo.testDeviceComplete()
129 try:
130 dominfo.setName(domain_name)
131 except VmError:
132 # Ignore this. The name conflict (hopefully) arises because we
133 # are doing localhost migration; if we are doing a suspend of a
134 # persistent VM, we need the rename, and don't expect the
135 # conflict. This needs more thought.
136 pass
138 except Exception, exn:
139 log.exception("Save failed on domain %s (%s).", domain_name,
140 dominfo.getDomid())
142 dominfo.resumeDomain()
143 log.debug("XendCheckpoint.save: resumeDomain")
145 try:
146 dominfo.setName(domain_name)
147 except:
148 log.exception("Failed to reset the migrating domain's name")
150 raise exn
153 def restore(xd, fd, dominfo = None, paused = False):
154 signature = read_exact(fd, len(SIGNATURE),
155 "not a valid guest state file: signature read")
156 if signature != SIGNATURE:
157 raise XendError("not a valid guest state file: found '%s'" %
158 signature)
160 l = read_exact(fd, sizeof_int,
161 "not a valid guest state file: config size read")
162 vmconfig_size = unpack("!i", l)[0]
163 vmconfig_buf = read_exact(fd, vmconfig_size,
164 "not a valid guest state file: config read")
166 p = sxp.Parser()
167 p.input(vmconfig_buf)
168 if not p.ready:
169 raise XendError("not a valid guest state file: config parse")
171 vmconfig = p.get_val()
173 if dominfo:
174 dominfo.resume()
175 else:
176 dominfo = xd.restore_(vmconfig)
178 store_port = dominfo.getStorePort()
179 console_port = dominfo.getConsolePort()
181 assert store_port
182 assert console_port
184 nr_pfns = (dominfo.getMemoryTarget() + 3) / 4
186 # if hvm, pass mem size to calculate the store_mfn
187 image_cfg = dominfo.info.get('image', {})
188 is_hvm = dominfo.info.is_hvm()
189 if is_hvm:
190 apic = int(dominfo.info['platform'].get('apic', 0))
191 pae = int(dominfo.info['platform'].get('pae', 0))
192 log.info("restore hvm domain %d, apic=%d, pae=%d",
193 dominfo.domid, apic, pae)
194 else:
195 apic = 0
196 pae = 0
198 try:
199 shadow = dominfo.info['shadow_memory']
200 log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, ",
201 dominfo.info['shadow_memory'],
202 dominfo.info['memory_static_max'],
203 dominfo.info['memory_static_min'])
205 balloon.free(xc.pages_to_kib(nr_pfns) + shadow * 1024)
207 shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow)
208 dominfo.info['shadow_memory'] = shadow_cur
210 xc.domain_setmaxmem(dominfo.getDomid(), dominfo.getMemoryMaximum())
212 cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
213 fd, dominfo.getDomid(),
214 store_port, console_port, int(is_hvm), pae, apic])
215 log.debug("[xc_restore]: %s", string.join(cmd))
217 handler = RestoreInputHandler()
219 forkHelper(cmd, fd, handler.handler, True)
221 # We don't want to pass this fd to any other children -- we
222 # might need to recover ths disk space that backs it.
223 try:
224 flags = fcntl.fcntl(fd, fcntl.F_GETFD)
225 flags |= fcntl.FD_CLOEXEC
226 fcntl.fcntl(fd, fcntl.F_SETFD, flags)
227 except:
228 pass
230 if handler.store_mfn is None:
231 raise XendError('Could not read store MFN')
233 if not is_hvm and handler.console_mfn is None:
234 raise XendError('Could not read console MFN')
236 # get qemu state and create a tmp file for dm restore
237 if is_hvm:
238 qemu_signature = read_exact(fd, len(QEMU_SIGNATURE),
239 "invalid device model signature read")
240 if qemu_signature != QEMU_SIGNATURE:
241 raise XendError("not a valid device model state: found '%s'" %
242 qemu_signature)
243 qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(),
244 os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
245 while True:
246 buf = os.read(fd, dm_batch)
247 if len(buf):
248 write_exact(qemu_fd, buf,
249 "could not write dm state to tmp file")
250 else:
251 break
252 os.close(qemu_fd)
255 os.read(fd, 1) # Wait for source to close connection
257 dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
259 #
260 # We shouldn't hold the domains_lock over a waitForDevices
261 # As this function sometime gets called holding this lock,
262 # we must release it and re-acquire it appropriately
263 #
264 from xen.xend import XendDomain
266 lock = True;
267 try:
268 XendDomain.instance().domains_lock.release()
269 except:
270 lock = False;
272 try:
273 dominfo.waitForDevices() # Wait for backends to set up
274 except Exception, exn:
275 log.exception(exn)
277 if lock:
278 XendDomain.instance().domains_lock.acquire()
280 if not paused:
281 dominfo.unpause()
283 return dominfo
284 except:
285 dominfo.destroy()
286 raise
289 class RestoreInputHandler:
290 def __init__(self):
291 self.store_mfn = None
292 self.console_mfn = None
295 def handler(self, line, _):
296 m = re.match(r"^(store-mfn) (\d+)$", line)
297 if m:
298 self.store_mfn = int(m.group(2))
299 else:
300 m = re.match(r"^(console-mfn) (\d+)$", line)
301 if m:
302 self.console_mfn = int(m.group(2))
305 def forkHelper(cmd, fd, inputHandler, closeToChild):
306 child = xPopen3(cmd, True, -1, [fd, xc.handle()])
308 if closeToChild:
309 child.tochild.close()
311 thread = threading.Thread(target = slurp, args = (child.childerr,))
312 thread.start()
314 try:
315 try:
316 while 1:
317 line = child.fromchild.readline()
318 if line == "":
319 break
320 else:
321 line = line.rstrip()
322 log.debug('%s', line)
323 inputHandler(line, child.tochild)
325 except IOError, exn:
326 raise XendError('Error reading from child process for %s: %s' %
327 (cmd, exn))
328 finally:
329 child.fromchild.close()
330 if not closeToChild:
331 child.tochild.close()
332 thread.join()
333 child.childerr.close()
334 status = child.wait()
336 if status >> 8 == 127:
337 raise XendError("%s failed: popen failed" % string.join(cmd))
338 elif status != 0:
339 raise XendError("%s failed" % string.join(cmd))
342 def slurp(infile):
343 while 1:
344 line = infile.readline()
345 if line == "":
346 break
347 else:
348 line = line.strip()
349 m = re.match(r"^ERROR: (.*)", line)
350 if m is None:
351 log.info('%s', line)
352 else:
353 log.error('%s', m.group(1))