ia64/xen-unstable

view tools/python/xen/xend/XendCheckpoint.py @ 15786:505021d029eb

xend: Fix memory ballooning logic during save/restore. Should be same
as during domain creation.
Signed-off-by: Chris Lalancette <clalance@redhat.com>
author kfraser@localhost.localdomain
date Tue Aug 28 16:06:32 2007 +0100 (2007-08-28)
parents 458e8b37aec8
children 32f331858d75
line source
1 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
2 # Copyright (C) 2005 XenSource Ltd
4 # This file is subject to the terms and conditions of the GNU General
5 # Public License. See the file "COPYING" in the main directory of
6 # this archive for more details.
8 import os
9 import re
10 import string
11 import threading
12 import fcntl
13 from struct import pack, unpack, calcsize
15 from xen.util.xpopen import xPopen3
16 import xen.util.auxbin
17 import xen.lowlevel.xc
19 from xen.xend import balloon, sxp, image
20 from xen.xend.XendError import XendError, VmError
21 from xen.xend.XendLogging import log
22 from xen.xend.XendConfig import XendConfig
23 from xen.xend.XendConstants import *
25 SIGNATURE = "LinuxGuestRecord"
26 QEMU_SIGNATURE = "QemuDeviceModelRecord"
27 dm_batch = 512
28 XC_SAVE = "xc_save"
29 XC_RESTORE = "xc_restore"
32 sizeof_int = calcsize("i")
33 sizeof_unsigned_int = calcsize("I")
34 sizeof_unsigned_long = calcsize("L")
37 xc = xen.lowlevel.xc.xc()
40 def write_exact(fd, buf, errmsg):
41 if os.write(fd, buf) != len(buf):
42 raise XendError(errmsg)
45 def read_exact(fd, size, errmsg):
46 buf = ''
47 while size != 0:
48 readstr = os.read(fd, size)
49 if not len(readstr):
50 log.error("read_exact: EOF trying to read %d (buf='%s')" % \
51 (size, buf))
52 raise XendError(errmsg)
53 size = size - len(readstr)
54 buf = buf + readstr
55 return buf
58 def save(fd, dominfo, network, live, dst, checkpoint=False):
59 write_exact(fd, SIGNATURE, "could not write guest state file: signature")
61 config = sxp.to_string(dominfo.sxpr())
63 domain_name = dominfo.getName()
64 # Rename the domain temporarily, so that we don't get a name clash if this
65 # domain is migrating (live or non-live) to the local host. Doing such a
66 # thing is useful for debugging.
67 dominfo.setName('migrating-' + domain_name)
69 try:
70 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name)
72 write_exact(fd, pack("!i", len(config)),
73 "could not write guest state file: config len")
74 write_exact(fd, config, "could not write guest state file: config")
76 image_cfg = dominfo.info.get('image', {})
77 hvm = dominfo.info.is_hvm()
79 # xc_save takes three customization parameters: maxit, max_f, and
80 # flags the last controls whether or not save is 'live', while the
81 # first two further customize behaviour when 'live' save is
82 # enabled. Passing "0" simply uses the defaults compiled into
83 # libxenguest; see the comments and/or code in xc_linux_save() for
84 # more information.
85 cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd),
86 str(dominfo.getDomid()), "0", "0",
87 str(int(live) | (int(hvm) << 2)) ]
88 log.debug("[xc_save]: %s", string.join(cmd))
90 def saveInputHandler(line, tochild):
91 log.debug("In saveInputHandler %s", line)
92 if line == "suspend":
93 log.debug("Suspending %d ...", dominfo.getDomid())
94 dominfo.shutdown('suspend')
95 dominfo.waitForShutdown()
96 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2,
97 domain_name)
98 log.info("Domain %d suspended.", dominfo.getDomid())
99 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
100 domain_name)
101 if hvm:
102 dominfo.image.saveDeviceModel()
104 tochild.write("done\n")
105 tochild.flush()
106 log.debug('Written done')
108 forkHelper(cmd, fd, saveInputHandler, False)
110 # put qemu device model state
111 if hvm:
112 write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature")
113 qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(),
114 os.O_RDONLY)
115 while True:
116 buf = os.read(qemu_fd, dm_batch)
117 if len(buf):
118 write_exact(fd, buf, "could not write device model state")
119 else:
120 break
121 os.close(qemu_fd)
122 os.remove("/var/lib/xen/qemu-save.%d" % dominfo.getDomid())
124 if checkpoint:
125 dominfo.resumeDomain()
126 else:
127 dominfo.destroyDomain()
128 dominfo.testDeviceComplete()
129 try:
130 dominfo.setName(domain_name)
131 except VmError:
132 # Ignore this. The name conflict (hopefully) arises because we
133 # are doing localhost migration; if we are doing a suspend of a
134 # persistent VM, we need the rename, and don't expect the
135 # conflict. This needs more thought.
136 pass
138 except Exception, exn:
139 log.exception("Save failed on domain %s (%s).", domain_name,
140 dominfo.getDomid())
142 dominfo.resumeDomain()
143 log.debug("XendCheckpoint.save: resumeDomain")
145 try:
146 dominfo.setName(domain_name)
147 except:
148 log.exception("Failed to reset the migrating domain's name")
150 raise exn
153 def restore(xd, fd, dominfo = None, paused = False):
154 signature = read_exact(fd, len(SIGNATURE),
155 "not a valid guest state file: signature read")
156 if signature != SIGNATURE:
157 raise XendError("not a valid guest state file: found '%s'" %
158 signature)
160 l = read_exact(fd, sizeof_int,
161 "not a valid guest state file: config size read")
162 vmconfig_size = unpack("!i", l)[0]
163 vmconfig_buf = read_exact(fd, vmconfig_size,
164 "not a valid guest state file: config read")
166 p = sxp.Parser()
167 p.input(vmconfig_buf)
168 if not p.ready:
169 raise XendError("not a valid guest state file: config parse")
171 vmconfig = p.get_val()
173 if dominfo:
174 dominfo.resume()
175 else:
176 dominfo = xd.restore_(vmconfig)
178 store_port = dominfo.getStorePort()
179 console_port = dominfo.getConsolePort()
181 assert store_port
182 assert console_port
184 # if hvm, pass mem size to calculate the store_mfn
185 image_cfg = dominfo.info.get('image', {})
186 is_hvm = dominfo.info.is_hvm()
187 if is_hvm:
188 apic = int(dominfo.info['platform'].get('apic', 0))
189 pae = int(dominfo.info['platform'].get('pae', 0))
190 log.info("restore hvm domain %d, apic=%d, pae=%d",
191 dominfo.domid, apic, pae)
192 else:
193 apic = 0
194 pae = 0
196 try:
197 restore_image = image.create(dominfo, dominfo.info['image'],
198 dominfo.info['device'])
199 memory = restore_image.getRequiredAvailableMemory(
200 dominfo.info['memory'] * 1024)
201 maxmem = restore_image.getRequiredAvailableMemory(
202 dominfo.info['maxmem'] * 1024)
203 shadow = restore_image.getRequiredShadowMemory(
204 dominfo.info['shadow_memory'] * 1024,
205 dominfo.info['maxmem'] * 1024)
207 log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, ",
208 dominfo.info['shadow_memory'],
209 dominfo.info['memory_static_max'],
210 dominfo.info['memory_static_min'])
212 # Round shadow up to a multiple of a MiB, as shadow_mem_control
213 # takes MiB and we must not round down and end up under-providing.
214 shadow = ((shadow + 1023) / 1024) * 1024
216 # set memory limit
217 xc.domain_setmaxmem(dominfo.getDomid(), maxmem)
219 balloon.free(memory + shadow)
221 shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024)
222 dominfo.info['shadow_memory'] = shadow_cur
224 cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE),
225 fd, dominfo.getDomid(),
226 store_port, console_port, int(is_hvm), pae, apic])
227 log.debug("[xc_restore]: %s", string.join(cmd))
229 handler = RestoreInputHandler()
231 forkHelper(cmd, fd, handler.handler, True)
233 # We don't want to pass this fd to any other children -- we
234 # might need to recover the disk space that backs it.
235 try:
236 flags = fcntl.fcntl(fd, fcntl.F_GETFD)
237 flags |= fcntl.FD_CLOEXEC
238 fcntl.fcntl(fd, fcntl.F_SETFD, flags)
239 except:
240 pass
242 if handler.store_mfn is None:
243 raise XendError('Could not read store MFN')
245 if not is_hvm and handler.console_mfn is None:
246 raise XendError('Could not read console MFN')
248 # get qemu state and create a tmp file for dm restore
249 if is_hvm:
250 qemu_signature = read_exact(fd, len(QEMU_SIGNATURE),
251 "invalid device model signature read")
252 if qemu_signature != QEMU_SIGNATURE:
253 raise XendError("not a valid device model state: found '%s'" %
254 qemu_signature)
255 qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(),
256 os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
257 while True:
258 buf = os.read(fd, dm_batch)
259 if len(buf):
260 write_exact(qemu_fd, buf,
261 "could not write dm state to tmp file")
262 else:
263 break
264 os.close(qemu_fd)
267 os.read(fd, 1) # Wait for source to close connection
269 dominfo.completeRestore(handler.store_mfn, handler.console_mfn)
271 #
272 # We shouldn't hold the domains_lock over a waitForDevices
273 # As this function sometime gets called holding this lock,
274 # we must release it and re-acquire it appropriately
275 #
276 from xen.xend import XendDomain
278 lock = True;
279 try:
280 XendDomain.instance().domains_lock.release()
281 except:
282 lock = False;
284 try:
285 dominfo.waitForDevices() # Wait for backends to set up
286 except Exception, exn:
287 log.exception(exn)
289 if lock:
290 XendDomain.instance().domains_lock.acquire()
292 if not paused:
293 dominfo.unpause()
295 return dominfo
296 except:
297 dominfo.destroy()
298 raise
301 class RestoreInputHandler:
302 def __init__(self):
303 self.store_mfn = None
304 self.console_mfn = None
307 def handler(self, line, _):
308 m = re.match(r"^(store-mfn) (\d+)$", line)
309 if m:
310 self.store_mfn = int(m.group(2))
311 else:
312 m = re.match(r"^(console-mfn) (\d+)$", line)
313 if m:
314 self.console_mfn = int(m.group(2))
317 def forkHelper(cmd, fd, inputHandler, closeToChild):
318 child = xPopen3(cmd, True, -1, [fd, xc.handle()])
320 if closeToChild:
321 child.tochild.close()
323 thread = threading.Thread(target = slurp, args = (child.childerr,))
324 thread.start()
326 try:
327 try:
328 while 1:
329 line = child.fromchild.readline()
330 if line == "":
331 break
332 else:
333 line = line.rstrip()
334 log.debug('%s', line)
335 inputHandler(line, child.tochild)
337 except IOError, exn:
338 raise XendError('Error reading from child process for %s: %s' %
339 (cmd, exn))
340 finally:
341 child.fromchild.close()
342 if not closeToChild:
343 child.tochild.close()
344 thread.join()
345 child.childerr.close()
346 status = child.wait()
348 if status >> 8 == 127:
349 raise XendError("%s failed: popen failed" % string.join(cmd))
350 elif status != 0:
351 raise XendError("%s failed" % string.join(cmd))
354 def slurp(infile):
355 while 1:
356 line = infile.readline()
357 if line == "":
358 break
359 else:
360 line = line.strip()
361 m = re.match(r"^ERROR: (.*)", line)
362 if m is None:
363 log.info('%s', line)
364 else:
365 log.error('%s', m.group(1))