ia64/xen-unstable

view tools/python/xen/xend/XendDomainInfo.py @ 9808:83b092a2cbe6

The attached patch replaces the current numbers identifying the
individual steps of device migration with constants.

Signed-off-by: Stefan Berger <stefanb@us.ibm.com>
author emellor@leeni.uk.xensource.com
date Fri Apr 21 11:57:34 2006 +0100 (2006-04-21)
parents 1fe63743a147
children 681a18bf049e
line source
1 #===========================================================================
2 # This library is free software; you can redistribute it and/or
3 # modify it under the terms of version 2.1 of the GNU Lesser General Public
4 # License as published by the Free Software Foundation.
5 #
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
10 #
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the Free Software
13 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14 #============================================================================
15 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
16 # Copyright (C) 2005, 2006 XenSource Ltd
17 #============================================================================
19 """Representation of a single domain.
20 Includes support for domain construction, using
21 open-ended configurations.
23 Author: Mike Wray <mike.wray@hp.com>
25 """
27 import errno
28 import logging
29 import string
30 import time
31 import threading
33 import xen.lowlevel.xc
34 from xen.util import asserts
35 from xen.util.blkif import blkdev_uname_to_file
37 import balloon
38 import image
39 import sxp
40 import uuid
41 import XendDomain
42 import XendRoot
44 from xen.xend.XendBootloader import bootloader
45 from xen.xend.XendError import XendError, VmError
47 from xen.xend.xenstore.xstransact import xstransact, complete
48 from xen.xend.xenstore.xsutil import GetDomainPath, IntroduceDomain
49 from xen.xend.xenstore.xswatch import xswatch
52 """Shutdown code for poweroff."""
53 DOMAIN_POWEROFF = 0
55 """Shutdown code for reboot."""
56 DOMAIN_REBOOT = 1
58 """Shutdown code for suspend."""
59 DOMAIN_SUSPEND = 2
61 """Shutdown code for crash."""
62 DOMAIN_CRASH = 3
64 """Shutdown code for halt."""
65 DOMAIN_HALT = 4
67 """Map shutdown codes to strings."""
68 shutdown_reasons = {
69 DOMAIN_POWEROFF: "poweroff",
70 DOMAIN_REBOOT : "reboot",
71 DOMAIN_SUSPEND : "suspend",
72 DOMAIN_CRASH : "crash",
73 DOMAIN_HALT : "halt"
74 }
76 restart_modes = [
77 "restart",
78 "destroy",
79 "preserve",
80 "rename-restart"
81 ]
83 STATE_DOM_OK = 1
84 STATE_DOM_SHUTDOWN = 2
86 SHUTDOWN_TIMEOUT = 30.0
88 ZOMBIE_PREFIX = 'Zombie-'
90 """Constants for the different stages of ext. device migration """
91 DEV_MIGRATE_TEST = 0
92 DEV_MIGRATE_STEP1 = 1
93 DEV_MIGRATE_STEP2 = 2
94 DEV_MIGRATE_STEP3 = 3
96 """Minimum time between domain restarts in seconds."""
97 MINIMUM_RESTART_TIME = 20
99 RESTART_IN_PROGRESS = 'xend/restart_in_progress'
102 xc = xen.lowlevel.xc.xc()
103 xroot = XendRoot.instance()
105 log = logging.getLogger("xend.XendDomainInfo")
106 #log.setLevel(logging.TRACE)
109 ##
110 # All parameters of VMs that may be configured on-the-fly, or at start-up.
111 #
112 VM_CONFIG_PARAMS = [
113 ('name', str),
114 ('on_poweroff', str),
115 ('on_reboot', str),
116 ('on_crash', str),
117 ]
120 ##
121 # Configuration entries that we expect to round-trip -- be read from the
122 # config file or xc, written to save-files (i.e. through sxpr), and reused as
123 # config on restart or restore, all without munging. Some configuration
124 # entries are munged for backwards compatibility reasons, or because they
125 # don't come out of xc in the same form as they are specified in the config
126 # file, so those are handled separately.
127 ROUNDTRIPPING_CONFIG_ENTRIES = [
128 ('uuid', str),
129 ('ssidref', int),
130 ('vcpus', int),
131 ('vcpu_avail', int),
132 ('cpu_weight', float),
133 ('memory', int),
134 ('maxmem', int),
135 ('bootloader', str),
136 ]
138 ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFIG_PARAMS
141 ##
142 # All entries written to the store. This is VM_CONFIG_PARAMS, plus those
143 # entries written to the store that cannot be reconfigured on-the-fly.
144 #
145 VM_STORE_ENTRIES = [
146 ('uuid', str),
147 ('ssidref', int),
148 ('vcpus', int),
149 ('vcpu_avail', int),
150 ('memory', int),
151 ('maxmem', int),
152 ('start_time', float),
153 ]
155 VM_STORE_ENTRIES += VM_CONFIG_PARAMS
158 #
159 # There are a number of CPU-related fields:
160 #
161 # vcpus: the number of virtual CPUs this domain is configured to use.
162 # vcpu_avail: a bitmap telling the guest domain whether it may use each of
163 # its VCPUs. This is translated to
164 # <dompath>/cpu/<id>/availability = {online,offline} for use
165 # by the guest domain.
166 # cpumap: a list of bitmaps, one for each VCPU, giving the physical
167 # CPUs that that VCPU may use.
168 # cpu: a configuration setting requesting that VCPU 0 is pinned to
169 # the specified physical CPU.
170 #
171 # vcpus and vcpu_avail settings persist with the VM (i.e. they are persistent
172 # across save, restore, migrate, and restart). The other settings are only
173 # specific to the domain, so are lost when the VM moves.
174 #
177 def create(config):
178 """Create a VM from a configuration.
180 @param config configuration
181 @raise: VmError for invalid configuration
182 """
184 log.debug("XendDomainInfo.create(%s)", config)
186 vm = XendDomainInfo(parseConfig(config))
187 try:
188 vm.construct()
189 vm.initDomain()
190 vm.storeVmDetails()
191 vm.storeDomDetails()
192 vm.registerWatches()
193 vm.refreshShutdown()
194 return vm
195 except:
196 log.exception('Domain construction failed')
197 vm.destroy()
198 raise
201 def recreate(xeninfo, priv):
202 """Create the VM object for an existing domain. The domain must not
203 be dying, as the paths in the store should already have been removed,
204 and asking us to recreate them causes problems."""
206 log.debug("XendDomainInfo.recreate(%s)", xeninfo)
208 assert not xeninfo['dying']
210 domid = xeninfo['dom']
211 uuid1 = xeninfo['handle']
212 xeninfo['uuid'] = uuid.toString(uuid1)
213 dompath = GetDomainPath(domid)
214 if not dompath:
215 raise XendError(
216 'No domain path in store for existing domain %d' % domid)
218 log.info("Recreating domain %d, UUID %s.", domid, xeninfo['uuid'])
219 try:
220 vmpath = xstransact.Read(dompath, "vm")
221 if not vmpath:
222 raise XendError(
223 'No vm path in store for existing domain %d' % domid)
224 uuid2_str = xstransact.Read(vmpath, "uuid")
225 if not uuid2_str:
226 raise XendError(
227 'No vm/uuid path in store for existing domain %d' % domid)
229 uuid2 = uuid.fromString(uuid2_str)
231 if uuid1 != uuid2:
232 raise XendError(
233 'Uuid in store does not match uuid for existing domain %d: '
234 '%s != %s' % (domid, uuid2_str, xeninfo['uuid']))
236 vm = XendDomainInfo(xeninfo, domid, dompath, True, priv)
238 except Exception, exn:
239 if priv:
240 log.warn(str(exn))
242 vm = XendDomainInfo(xeninfo, domid, dompath, True, priv)
243 vm.recreateDom()
244 vm.removeVm()
245 vm.storeVmDetails()
246 vm.storeDomDetails()
248 vm.registerWatches()
249 vm.refreshShutdown(xeninfo)
250 return vm
253 def restore(config):
254 """Create a domain and a VM object to do a restore.
256 @param config: domain configuration
257 """
259 log.debug("XendDomainInfo.restore(%s)", config)
261 vm = XendDomainInfo(parseConfig(config), None, None, False, False, True)
262 try:
263 vm.construct()
264 vm.storeVmDetails()
265 vm.createDevices()
266 vm.createChannels()
267 vm.storeDomDetails()
268 vm.endRestore()
269 return vm
270 except:
271 vm.destroy()
272 raise
275 def parseConfig(config):
276 def get_cfg(name, conv = None):
277 val = sxp.child_value(config, name)
279 if conv and not val is None:
280 try:
281 return conv(val)
282 except TypeError, exn:
283 raise VmError(
284 'Invalid setting %s = %s in configuration: %s' %
285 (name, val, str(exn)))
286 else:
287 return val
290 log.debug("parseConfig: config is %s", config)
292 result = {}
294 for e in ROUNDTRIPPING_CONFIG_ENTRIES:
295 result[e[0]] = get_cfg(e[0], e[1])
297 result['cpu'] = get_cfg('cpu', int)
298 result['cpus'] = get_cfg('cpus', str)
299 result['image'] = get_cfg('image')
301 try:
302 if result['image']:
303 v = sxp.child_value(result['image'], 'vcpus')
304 if result['vcpus'] is None and v is not None:
305 result['vcpus'] = int(v)
306 elif v is not None and int(v) != result['vcpus']:
307 log.warn(('Image VCPUs setting overrides vcpus=%d elsewhere.'
308 ' Using %s VCPUs for VM %s.') %
309 (result['vcpus'], v, result['uuid']))
310 result['vcpus'] = int(v)
311 except TypeError, exn:
312 raise VmError(
313 'Invalid configuration setting: vcpus = %s: %s' %
314 (sxp.child_value(result['image'], 'vcpus', 1), str(exn)))
316 try:
317 # support legacy config files with 'cpu' parameter
318 # NB: prepending to list to support previous behavior
319 # where 'cpu' parameter pinned VCPU0.
320 if result['cpu']:
321 if result['cpus']:
322 result['cpus'] = "%s,%s" % (str(result['cpu']), result['cpus'])
323 else:
324 result['cpus'] = str(result['cpu'])
326 # convert 'cpus' string to list of ints
327 # 'cpus' supports a list of ranges (0-3), seperated by
328 # commas, and negation, (^1).
329 # Precedence is settled by order of the string:
330 # "0-3,^1" -> [0,2,3]
331 # "0-3,^1,1" -> [0,1,2,3]
332 if result['cpus']:
333 cpus = []
334 for c in result['cpus'].split(','):
335 if c.find('-') != -1:
336 (x,y) = c.split('-')
337 for i in range(int(x),int(y)+1):
338 cpus.append(int(i))
339 else:
340 # remove this element from the list
341 if c[0] == '^':
342 cpus = [x for x in cpus if x != int(c[1:])]
343 else:
344 cpus.append(int(c))
346 result['cpus'] = cpus
348 except ValueError, exn:
349 raise VmError(
350 'Invalid configuration setting: cpus = %s: %s' %
351 (result['cpus'], exn))
353 result['backend'] = []
354 for c in sxp.children(config, 'backend'):
355 result['backend'].append(sxp.name(sxp.child0(c)))
357 result['device'] = []
358 for d in sxp.children(config, 'device'):
359 c = sxp.child0(d)
360 result['device'].append((sxp.name(c), c))
362 # Configuration option "restart" is deprecated. Parse it, but
363 # let on_xyz override it if they are present.
364 restart = get_cfg('restart')
365 if restart:
366 def handle_restart(event, val):
367 if result[event] is None:
368 result[event] = val
370 if restart == "onreboot":
371 handle_restart('on_poweroff', 'destroy')
372 handle_restart('on_reboot', 'restart')
373 handle_restart('on_crash', 'destroy')
374 elif restart == "always":
375 handle_restart('on_poweroff', 'restart')
376 handle_restart('on_reboot', 'restart')
377 handle_restart('on_crash', 'restart')
378 elif restart == "never":
379 handle_restart('on_poweroff', 'destroy')
380 handle_restart('on_reboot', 'destroy')
381 handle_restart('on_crash', 'destroy')
382 else:
383 log.warn("Ignoring malformed and deprecated config option "
384 "restart = %s", restart)
386 log.debug("parseConfig: result is %s", result)
387 return result
390 def domain_by_name(name):
391 return XendDomain.instance().domain_lookup_by_name_nr(name)
394 def shutdown_reason(code):
395 """Get a shutdown reason from a code.
397 @param code: shutdown code
398 @type code: int
399 @return: shutdown reason
400 @rtype: string
401 """
402 return shutdown_reasons.get(code, "?")
404 def dom_get(dom):
405 """Get info from xen for an existing domain.
407 @param dom: domain id
408 @return: info or None
409 """
410 try:
411 domlist = xc.domain_getinfo(dom, 1)
412 if domlist and dom == domlist[0]['dom']:
413 return domlist[0]
414 except Exception, err:
415 # ignore missing domain
416 log.trace("domain_getinfo(%d) failed, ignoring: %s", dom, str(err))
417 return None
420 class XendDomainInfo:
422 def __init__(self, info, domid = None, dompath = None, augment = False,
423 priv = False, resume = False):
425 self.info = info
427 if not self.infoIsSet('uuid'):
428 self.info['uuid'] = uuid.toString(uuid.create())
430 if domid is not None:
431 self.domid = domid
432 elif 'dom' in info:
433 self.domid = int(info['dom'])
434 else:
435 self.domid = None
437 self.vmpath = XendDomain.VMROOT + self.info['uuid']
438 self.dompath = dompath
440 if augment:
441 self.augmentInfo(priv)
443 self.validateInfo()
445 self.image = None
447 self.store_port = None
448 self.store_mfn = None
449 self.console_port = None
450 self.console_mfn = None
452 self.vmWatch = None
453 self.shutdownWatch = None
455 self.shutdownStartTime = None
457 self.state = STATE_DOM_OK
458 self.state_updated = threading.Condition()
459 self.refresh_shutdown_lock = threading.Condition()
461 self.setResume(resume)
463 ## private:
465 def readVMDetails(self, params):
466 """Read the specified parameters from the store.
467 """
468 try:
469 return self.gatherVm(*params)
470 except ValueError:
471 # One of the int/float entries in params has a corresponding store
472 # entry that is invalid. We recover, because older versions of
473 # Xend may have put the entry there (memory/target, for example),
474 # but this is in general a bad situation to have reached.
475 log.exception(
476 "Store corrupted at %s! Domain %d's configuration may be "
477 "affected.", self.vmpath, self.domid)
478 return []
481 def storeChanged(self, _):
482 log.trace("XendDomainInfo.storeChanged");
484 changed = False
486 def f(x, y):
487 if y is not None and self.info[x[0]] != y:
488 self.info[x[0]] = y
489 changed = True
491 map(f, VM_CONFIG_PARAMS, self.readVMDetails(VM_CONFIG_PARAMS))
493 im = self.readVm('image')
494 current_im = self.info['image']
495 if (im is not None and
496 (current_im is None or sxp.to_string(current_im) != im)):
497 self.info['image'] = sxp.from_string(im)
498 changed = True
500 if changed:
501 # Update the domain section of the store, as this contains some
502 # parameters derived from the VM configuration.
503 self.storeDomDetails()
505 return 1
508 def augmentInfo(self, priv):
509 """Augment self.info, as given to us through {@link #recreate}, with
510 values taken from the store. This recovers those values known to xend
511 but not to the hypervisor.
512 """
513 def useIfNeeded(name, val):
514 if not self.infoIsSet(name) and val is not None:
515 self.info[name] = val
517 if priv:
518 entries = VM_STORE_ENTRIES[:]
519 entries.remove(('memory', int))
520 entries.remove(('maxmem', int))
521 else:
522 entries = VM_STORE_ENTRIES
523 entries.append(('image', str))
525 map(lambda x, y: useIfNeeded(x[0], y), entries,
526 self.readVMDetails(entries))
528 device = []
529 for c in controllerClasses:
530 devconfig = self.getDeviceConfigurations(c)
531 if devconfig:
532 device.extend(map(lambda x: (c, x), devconfig))
533 useIfNeeded('device', device)
536 def validateInfo(self):
537 """Validate and normalise the info block. This has either been parsed
538 by parseConfig, or received from xc through recreate and augmented by
539 the current store contents.
540 """
541 def defaultInfo(name, val):
542 if not self.infoIsSet(name):
543 self.info[name] = val()
545 try:
546 defaultInfo('name', lambda: "Domain-%d" % self.domid)
547 defaultInfo('ssidref', lambda: 0)
548 defaultInfo('on_poweroff', lambda: "destroy")
549 defaultInfo('on_reboot', lambda: "restart")
550 defaultInfo('on_crash', lambda: "restart")
551 defaultInfo('cpu', lambda: None)
552 defaultInfo('cpus', lambda: [])
553 defaultInfo('cpu_weight', lambda: 1.0)
555 # some domains don't have a config file (e.g. dom0 )
556 # to set number of vcpus so we derive available cpus
557 # from max_vcpu_id which is present for running domains.
558 if not self.infoIsSet('vcpus') and self.infoIsSet('max_vcpu_id'):
559 avail = int(self.info['max_vcpu_id'])+1
560 else:
561 avail = int(1)
563 defaultInfo('vcpus', lambda: avail)
564 defaultInfo('online_vcpus', lambda: self.info['vcpus'])
565 defaultInfo('max_vcpu_id', lambda: self.info['vcpus']-1)
566 defaultInfo('vcpu_avail', lambda: (1 << self.info['vcpus']) - 1)
568 defaultInfo('memory', lambda: 0)
569 defaultInfo('maxmem', lambda: 0)
570 defaultInfo('bootloader', lambda: None)
571 defaultInfo('backend', lambda: [])
572 defaultInfo('device', lambda: [])
573 defaultInfo('image', lambda: None)
575 self.check_name(self.info['name'])
577 if isinstance(self.info['image'], str):
578 self.info['image'] = sxp.from_string(self.info['image'])
580 if self.info['memory'] == 0:
581 if self.infoIsSet('mem_kb'):
582 self.info['memory'] = (self.info['mem_kb'] + 1023) / 1024
584 if self.info['maxmem'] < self.info['memory']:
585 self.info['maxmem'] = self.info['memory']
587 for (n, c) in self.info['device']:
588 if not n or not c or n not in controllerClasses:
589 raise VmError('invalid device (%s, %s)' %
590 (str(n), str(c)))
592 for event in ['on_poweroff', 'on_reboot', 'on_crash']:
593 if self.info[event] not in restart_modes:
594 raise VmError('invalid restart event: %s = %s' %
595 (event, str(self.info[event])))
597 except KeyError, exn:
598 log.exception(exn)
599 raise VmError('Unspecified domain detail: %s' % exn)
602 def readVm(self, *args):
603 return xstransact.Read(self.vmpath, *args)
605 def writeVm(self, *args):
606 return xstransact.Write(self.vmpath, *args)
608 def removeVm(self, *args):
609 return xstransact.Remove(self.vmpath, *args)
611 def gatherVm(self, *args):
612 return xstransact.Gather(self.vmpath, *args)
615 ## public:
617 def storeVm(self, *args):
618 return xstransact.Store(self.vmpath, *args)
621 ## private:
623 def readDom(self, *args):
624 return xstransact.Read(self.dompath, *args)
626 def writeDom(self, *args):
627 return xstransact.Write(self.dompath, *args)
630 ## public:
632 def removeDom(self, *args):
633 return xstransact.Remove(self.dompath, *args)
635 def recreateDom(self):
636 complete(self.dompath, lambda t: self._recreateDom(t))
638 def _recreateDom(self, t):
639 t.remove()
640 t.mkdir()
641 t.set_permissions({ 'dom' : self.domid })
644 ## private:
646 def storeDom(self, *args):
647 return xstransact.Store(self.dompath, *args)
650 ## public:
652 def completeRestore(self, store_mfn, console_mfn):
654 log.debug("XendDomainInfo.completeRestore")
656 self.store_mfn = store_mfn
657 self.console_mfn = console_mfn
659 self.introduceDomain()
660 self.storeDomDetails()
661 self.registerWatches()
662 self.refreshShutdown()
664 log.debug("XendDomainInfo.completeRestore done")
667 def storeVmDetails(self):
668 to_store = {}
670 for k in VM_STORE_ENTRIES:
671 if self.infoIsSet(k[0]):
672 to_store[k[0]] = str(self.info[k[0]])
674 if self.infoIsSet('image'):
675 to_store['image'] = sxp.to_string(self.info['image'])
677 log.debug("Storing VM details: %s", to_store)
679 self.writeVm(to_store)
682 def storeDomDetails(self):
683 to_store = {
684 'domid': str(self.domid),
685 'vm': self.vmpath,
686 'name': self.info['name'],
687 'console/limit': str(xroot.get_console_limit() * 1024),
688 'memory/target': str(self.info['memory'] * 1024)
689 }
691 def f(n, v):
692 if v is not None:
693 to_store[n] = str(v)
695 f('console/port', self.console_port)
696 f('console/ring-ref', self.console_mfn)
697 f('store/port', self.store_port)
698 f('store/ring-ref', self.store_mfn)
700 to_store.update(self.vcpuDomDetails())
702 log.debug("Storing domain details: %s", to_store)
704 self.writeDom(to_store)
707 ## private:
709 def vcpuDomDetails(self):
710 def availability(n):
711 if self.info['vcpu_avail'] & (1 << n):
712 return 'online'
713 else:
714 return 'offline'
716 result = {}
717 for v in range(0, self.info['vcpus']):
718 result["cpu/%d/availability" % v] = availability(v)
719 return result
722 ## public:
724 def registerWatches(self):
725 """Register a watch on this VM's entries in the store, and the
726 domain's control/shutdown node, so that when they are changed
727 externally, we keep up to date. This should only be called by {@link
728 #create}, {@link #recreate}, or {@link #restore}, once the domain's
729 details have been written, but before the new instance is returned."""
730 self.vmWatch = xswatch(self.vmpath, self.storeChanged)
731 self.shutdownWatch = xswatch(self.dompath + '/control/shutdown',
732 self.handleShutdownWatch)
735 def getDomid(self):
736 return self.domid
738 def setName(self, name):
739 self.check_name(name)
740 self.info['name'] = name
741 self.storeVm("name", name)
743 def getName(self):
744 return self.info['name']
746 def getDomainPath(self):
747 return self.dompath
750 def getStorePort(self):
751 """For use only by image.py and XendCheckpoint.py."""
752 return self.store_port
755 def getConsolePort(self):
756 """For use only by image.py and XendCheckpoint.py"""
757 return self.console_port
760 def getVCpuCount(self):
761 return self.info['vcpus']
764 def setVCpuCount(self, vcpus):
765 self.info['vcpu_avail'] = (1 << vcpus) - 1
766 self.storeVm('vcpu_avail', self.info['vcpu_avail'])
767 self.writeDom(self.vcpuDomDetails())
770 def getSsidref(self):
771 return self.info['ssidref']
773 def getMemoryTarget(self):
774 """Get this domain's target memory size, in KB."""
775 return self.info['memory'] * 1024
777 def getResume(self):
778 return "%s" % self.info['resume']
780 def endRestore(self):
781 self.setResume(False)
783 def setResume(self, state):
784 self.info['resume'] = state
786 def refreshShutdown(self, xeninfo = None):
787 # If set at the end of this method, a restart is required, with the
788 # given reason. This restart has to be done out of the scope of
789 # refresh_shutdown_lock.
790 restart_reason = None
792 self.refresh_shutdown_lock.acquire()
793 try:
794 if xeninfo is None:
795 xeninfo = dom_get(self.domid)
796 if xeninfo is None:
797 # The domain no longer exists. This will occur if we have
798 # scheduled a timer to check for shutdown timeouts and the
799 # shutdown succeeded. It will also occur if someone
800 # destroys a domain beneath us. We clean up the domain,
801 # just in case, but we can't clean up the VM, because that
802 # VM may have migrated to a different domain on this
803 # machine.
804 self.cleanupDomain()
805 return
807 if xeninfo['dying']:
808 # Dying means that a domain has been destroyed, but has not
809 # yet been cleaned up by Xen. This state could persist
810 # indefinitely if, for example, another domain has some of its
811 # pages mapped. We might like to diagnose this problem in the
812 # future, but for now all we do is make sure that it's not us
813 # holding the pages, by calling cleanupDomain. We can't
814 # clean up the VM, as above.
815 self.cleanupDomain()
816 return
818 elif xeninfo['crashed']:
819 if self.readDom('xend/shutdown_completed'):
820 # We've seen this shutdown already, but we are preserving
821 # the domain for debugging. Leave it alone.
822 return
824 log.warn('Domain has crashed: name=%s id=%d.',
825 self.info['name'], self.domid)
827 if xroot.get_enable_dump():
828 self.dumpCore()
830 restart_reason = 'crash'
832 elif xeninfo['shutdown']:
833 if self.readDom('xend/shutdown_completed'):
834 # We've seen this shutdown already, but we are preserving
835 # the domain for debugging. Leave it alone.
836 return
838 else:
839 reason = shutdown_reason(xeninfo['shutdown_reason'])
841 log.info('Domain has shutdown: name=%s id=%d reason=%s.',
842 self.info['name'], self.domid, reason)
844 self.clearRestart()
846 if reason == 'suspend':
847 self.state_set(STATE_DOM_SHUTDOWN)
848 # Don't destroy the domain. XendCheckpoint will do
849 # this once it has finished. However, stop watching
850 # the VM path now, otherwise we will end up with one
851 # watch for the old domain, and one for the new.
852 self.unwatchVm()
853 elif reason in ['poweroff', 'reboot']:
854 restart_reason = reason
855 else:
856 self.destroy()
858 elif self.dompath is None:
859 # We have yet to manage to call introduceDomain on this
860 # domain. This can happen if a restore is in progress, or has
861 # failed. Ignore this domain.
862 pass
863 else:
864 # Domain is alive. If we are shutting it down, then check
865 # the timeout on that, and destroy it if necessary.
867 if self.shutdownStartTime:
868 timeout = (SHUTDOWN_TIMEOUT - time.time() +
869 self.shutdownStartTime)
870 if timeout < 0:
871 log.info(
872 "Domain shutdown timeout expired: name=%s id=%s",
873 self.info['name'], self.domid)
874 self.destroy()
875 finally:
876 self.refresh_shutdown_lock.release()
878 if restart_reason:
879 self.maybeRestart(restart_reason)
882 def handleShutdownWatch(self, _):
883 log.debug('XendDomainInfo.handleShutdownWatch')
885 reason = self.readDom('control/shutdown')
887 if reason and reason != 'suspend':
888 sst = self.readDom('xend/shutdown_start_time')
889 now = time.time()
890 if sst:
891 self.shutdownStartTime = float(sst)
892 timeout = float(sst) + SHUTDOWN_TIMEOUT - now
893 else:
894 self.shutdownStartTime = now
895 self.storeDom('xend/shutdown_start_time', now)
896 timeout = SHUTDOWN_TIMEOUT
898 log.trace(
899 "Scheduling refreshShutdown on domain %d in %ds.",
900 self.domid, timeout)
901 threading.Timer(timeout, self.refreshShutdown).start()
903 return True
906 def shutdown(self, reason):
907 if not reason in shutdown_reasons.values():
908 raise XendError('Invalid reason: %s' % reason)
909 self.storeDom("control/shutdown", reason)
912 ## private:
914 def clearRestart(self):
915 self.removeDom("xend/shutdown_start_time")
918 def maybeRestart(self, reason):
919 # Dispatch to the correct method based upon the configured on_{reason}
920 # behaviour.
921 {"destroy" : self.destroy,
922 "restart" : self.restart,
923 "preserve" : self.preserve,
924 "rename-restart" : self.renameRestart}[self.info['on_' + reason]]()
927 def renameRestart(self):
928 self.restart(True)
931 def dumpCore(self):
932 """Create a core dump for this domain. Nothrow guarantee."""
934 try:
935 corefile = "/var/xen/dump/%s.%s.core" % (self.info['name'],
936 self.domid)
937 xc.domain_dumpcore(self.domid, corefile)
939 except:
940 log.exception("XendDomainInfo.dumpCore failed: id = %s name = %s",
941 self.domid, self.info['name'])
944 ## public:
946 def setMemoryTarget(self, target):
947 """Set the memory target of this domain.
948 @param target In MiB.
949 """
950 log.debug("Setting memory target of domain %s (%d) to %d MiB.",
951 self.info['name'], self.domid, target)
953 self.info['memory'] = target
954 self.storeVm("memory", target)
955 self.storeDom("memory/target", target << 10)
958 def update(self, info = None):
959 """Update with info from xc.domain_getinfo().
960 """
962 log.trace("XendDomainInfo.update(%s) on domain %d", info, self.domid)
964 if not info:
965 info = dom_get(self.domid)
966 if not info:
967 return
969 self.info.update(info)
970 self.validateInfo()
971 self.refreshShutdown(info)
973 log.trace("XendDomainInfo.update done on domain %d: %s", self.domid,
974 self.info)
977 ## private:
979 def state_set(self, state):
980 self.state_updated.acquire()
981 try:
982 if self.state != state:
983 self.state = state
984 self.state_updated.notifyAll()
985 finally:
986 self.state_updated.release()
989 ## public:
991 def waitForShutdown(self):
992 self.state_updated.acquire()
993 try:
994 while self.state == STATE_DOM_OK:
995 self.state_updated.wait()
996 finally:
997 self.state_updated.release()
1000 def __str__(self):
1001 s = "<domain"
1002 s += " id=" + str(self.domid)
1003 s += " name=" + self.info['name']
1004 s += " memory=" + str(self.info['memory'])
1005 s += " ssidref=" + str(self.info['ssidref'])
1006 s += ">"
1007 return s
1009 __repr__ = __str__
1012 ## private:
1014 def createDevice(self, deviceClass, devconfig):
1015 return self.getDeviceController(deviceClass).createDevice(devconfig)
1018 def waitForDevices_(self, deviceClass):
1019 return self.getDeviceController(deviceClass).waitForDevices()
1022 def waitForDevice(self, deviceClass, devid):
1023 return self.getDeviceController(deviceClass).waitForDevice(devid)
1026 def reconfigureDevice(self, deviceClass, devid, devconfig):
1027 return self.getDeviceController(deviceClass).reconfigureDevice(
1028 devid, devconfig)
1031 ## public:
1033 def destroyDevice(self, deviceClass, devid):
1034 return self.getDeviceController(deviceClass).destroyDevice(devid)
1037 def getDeviceSxprs(self, deviceClass):
1038 return self.getDeviceController(deviceClass).sxprs()
1041 ## private:
1043 def getDeviceConfigurations(self, deviceClass):
1044 return self.getDeviceController(deviceClass).configurations()
1047 def getDeviceController(self, name):
1048 if name not in controllerClasses:
1049 raise XendError("unknown device type: " + str(name))
1051 return controllerClasses[name](self)
1054 ## public:
1056 def sxpr(self):
1057 sxpr = ['domain',
1058 ['domid', self.domid]]
1060 for e in ROUNDTRIPPING_CONFIG_ENTRIES:
1061 if self.infoIsSet(e[0]):
1062 sxpr.append([e[0], self.info[e[0]]])
1064 if self.infoIsSet('image'):
1065 sxpr.append(['image', self.info['image']])
1067 for cls in controllerClasses:
1068 for config in self.getDeviceConfigurations(cls):
1069 sxpr.append(['device', config])
1071 def stateChar(name):
1072 if name in self.info:
1073 if self.info[name]:
1074 return name[0]
1075 else:
1076 return '-'
1077 else:
1078 return '?'
1080 state = reduce(
1081 lambda x, y: x + y,
1082 map(stateChar,
1083 ['running', 'blocked', 'paused', 'shutdown', 'crashed',
1084 'dying']))
1086 sxpr.append(['state', state])
1087 if self.infoIsSet('shutdown'):
1088 reason = shutdown_reason(self.info['shutdown_reason'])
1089 sxpr.append(['shutdown_reason', reason])
1090 if self.infoIsSet('cpu_time'):
1091 sxpr.append(['cpu_time', self.info['cpu_time']/1e9])
1092 sxpr.append(['online_vcpus', self.info['online_vcpus']])
1094 if self.infoIsSet('start_time'):
1095 up_time = time.time() - self.info['start_time']
1096 sxpr.append(['up_time', str(up_time) ])
1097 sxpr.append(['start_time', str(self.info['start_time']) ])
1099 if self.store_mfn:
1100 sxpr.append(['store_mfn', self.store_mfn])
1101 if self.console_mfn:
1102 sxpr.append(['console_mfn', self.console_mfn])
1104 return sxpr
1107 def getVCPUInfo(self):
1108 try:
1109 # We include the domain name and ID, to help xm.
1110 sxpr = ['domain',
1111 ['domid', self.domid],
1112 ['name', self.info['name']],
1113 ['vcpu_count', self.info['online_vcpus']]]
1115 for i in range(0, self.info['max_vcpu_id']+1):
1116 info = xc.vcpu_getinfo(self.domid, i)
1118 sxpr.append(['vcpu',
1119 ['number', i],
1120 ['online', info['online']],
1121 ['blocked', info['blocked']],
1122 ['running', info['running']],
1123 ['cpu_time', info['cpu_time'] / 1e9],
1124 ['cpu', info['cpu']],
1125 ['cpumap', info['cpumap']]])
1127 return sxpr
1129 except RuntimeError, exn:
1130 raise XendError(str(exn))
1133 ## private:
1135 def check_name(self, name):
1136 """Check if a vm name is valid. Valid names contain alphabetic characters,
1137 digits, or characters in '_-.:/+'.
1138 The same name cannot be used for more than one vm at the same time.
1140 @param name: name
1141 @raise: VmError if invalid
1142 """
1143 if name is None or name == '':
1144 raise VmError('missing vm name')
1145 for c in name:
1146 if c in string.digits: continue
1147 if c in '_-.:/+': continue
1148 if c in string.ascii_letters: continue
1149 raise VmError('invalid vm name')
1151 dominfo = domain_by_name(name)
1152 if not dominfo:
1153 return
1154 if self.domid is None:
1155 raise VmError("VM name '%s' already in use by domain %d" %
1156 (name, dominfo.domid))
1157 if dominfo.domid != self.domid:
1158 raise VmError("VM name '%s' is used in both domains %d and %d" %
1159 (name, self.domid, dominfo.domid))
1162 def construct(self):
1163 """Construct the domain.
1165 @raise: VmError on error
1166 """
1168 log.debug('XendDomainInfo.construct: %s %s',
1169 self.domid,
1170 self.info['ssidref'])
1172 self.domid = xc.domain_create(
1173 dom = 0, ssidref = self.info['ssidref'],
1174 handle = uuid.fromString(self.info['uuid']))
1176 if self.domid < 0:
1177 raise VmError('Creating domain failed: name=%s' %
1178 self.info['name'])
1180 self.dompath = GetDomainPath(self.domid)
1182 self.recreateDom()
1184 # Set maximum number of vcpus in domain
1185 xc.domain_max_vcpus(self.domid, int(self.info['vcpus']))
1188 def introduceDomain(self):
1189 assert self.domid is not None
1190 assert self.store_mfn is not None
1191 assert self.store_port is not None
1193 try:
1194 IntroduceDomain(self.domid, self.store_mfn, self.store_port)
1195 except RuntimeError, exn:
1196 raise XendError(str(exn))
1199 def initDomain(self):
1200 log.debug('XendDomainInfo.initDomain: %s %s',
1201 self.domid,
1202 self.info['cpu_weight'])
1204 if not self.infoIsSet('image'):
1205 raise VmError('Missing image in configuration')
1207 try:
1208 self.image = image.create(self,
1209 self.info['image'],
1210 self.info['device'])
1212 xc.domain_setcpuweight(self.domid, self.info['cpu_weight'])
1214 # repin domain vcpus if a restricted cpus list is provided
1215 # this is done prior to memory allocation to aide in memory
1216 # distribution for NUMA systems.
1217 cpus = self.info['cpus']
1218 if cpus is not None and len(cpus) > 0:
1219 for v in range(0, self.info['max_vcpu_id']+1):
1220 # pincpu takes a list of ints
1221 cpu = [ int( cpus[v % len(cpus)] ) ]
1222 xc.vcpu_setaffinity(self.domid, v, cpu)
1224 m = self.image.getDomainMemory(self.info['memory'] * 1024)
1225 balloon.free(m)
1226 xc.domain_setmaxmem(self.domid, m)
1227 xc.domain_memory_increase_reservation(self.domid, m, 0, 0)
1229 self.createChannels()
1231 channel_details = self.image.createImage()
1233 self.store_mfn = channel_details['store_mfn']
1234 if 'console_mfn' in channel_details:
1235 self.console_mfn = channel_details['console_mfn']
1237 self.introduceDomain()
1239 self.createDevices()
1241 if self.info['bootloader']:
1242 self.image.cleanupBootloading()
1244 self.info['start_time'] = time.time()
1246 except RuntimeError, exn:
1247 raise VmError(str(exn))
1250 ## public:
1252 def cleanupDomain(self):
1253 """Cleanup domain resources; release devices. Idempotent. Nothrow
1254 guarantee."""
1256 self.refresh_shutdown_lock.acquire()
1257 try:
1258 self.unwatchShutdown()
1260 self.release_devices()
1262 if self.image:
1263 try:
1264 self.image.destroy()
1265 except:
1266 log.exception(
1267 "XendDomainInfo.cleanup: image.destroy() failed.")
1268 self.image = None
1270 try:
1271 self.removeDom()
1272 except:
1273 log.exception("Removing domain path failed.")
1275 try:
1276 if not self.info['name'].startswith(ZOMBIE_PREFIX):
1277 self.info['name'] = ZOMBIE_PREFIX + self.info['name']
1278 except:
1279 log.exception("Renaming Zombie failed.")
1281 self.state_set(STATE_DOM_SHUTDOWN)
1282 finally:
1283 self.refresh_shutdown_lock.release()
1286 def cleanupVm(self):
1287 """Cleanup VM resources. Idempotent. Nothrow guarantee."""
1289 self.unwatchVm()
1291 try:
1292 self.removeVm()
1293 except:
1294 log.exception("Removing VM path failed.")
1297 ## private:
1299 def unwatchVm(self):
1300 """Remove the watch on the VM path, if any. Idempotent. Nothrow
1301 guarantee."""
1303 try:
1304 try:
1305 if self.vmWatch:
1306 self.vmWatch.unwatch()
1307 finally:
1308 self.vmWatch = None
1309 except:
1310 log.exception("Unwatching VM path failed.")
1313 def unwatchShutdown(self):
1314 """Remove the watch on the domain's control/shutdown node, if any.
1315 Idempotent. Nothrow guarantee. Expects to be protected by the
1316 refresh_shutdown_lock."""
1318 try:
1319 try:
1320 if self.shutdownWatch:
1321 self.shutdownWatch.unwatch()
1322 finally:
1323 self.shutdownWatch = None
1324 except:
1325 log.exception("Unwatching control/shutdown failed.")
1328 ## public:
1330 def destroy(self):
1331 """Cleanup VM and destroy domain. Nothrow guarantee."""
1333 log.debug("XendDomainInfo.destroy: domid=%s", self.domid)
1335 self.cleanupVm()
1336 if self.dompath is not None:
1337 self.destroyDomain()
1340 def destroyDomain(self):
1341 log.debug("XendDomainInfo.destroyDomain(%s)", self.domid)
1343 try:
1344 if self.domid is not None:
1345 xc.domain_destroy(self.domid)
1346 except:
1347 log.exception("XendDomainInfo.destroy: xc.domain_destroy failed.")
1349 self.cleanupDomain()
1352 ## private:
1354 def release_devices(self):
1355 """Release all domain's devices. Nothrow guarantee."""
1357 while True:
1358 t = xstransact("%s/device" % self.dompath)
1359 for n in controllerClasses.keys():
1360 for d in t.list(n):
1361 try:
1362 t.remove(d)
1363 except:
1364 # Log and swallow any exceptions in removal --
1365 # there's nothing more we can do.
1366 log.exception(
1367 "Device release failed: %s; %s; %s",
1368 self.info['name'], n, d)
1369 if t.commit():
1370 break
1373 def createChannels(self):
1374 """Create the channels to the domain.
1375 """
1376 self.store_port = self.createChannel()
1377 self.console_port = self.createChannel()
1380 def createChannel(self):
1381 """Create an event channel to the domain.
1382 """
1383 try:
1384 return xc.evtchn_alloc_unbound(dom=self.domid, remote_dom=0)
1385 except:
1386 log.exception("Exception in alloc_unbound(%d)", self.domid)
1387 raise
1390 ## public:
1392 def createDevices(self):
1393 """Create the devices for a vm.
1395 @raise: VmError for invalid devices
1396 """
1398 for (n, c) in self.info['device']:
1399 self.createDevice(n, c)
1401 if self.image:
1402 self.image.createDeviceModel()
1404 ## public:
1406 def testMigrateDevices(self, live, dst):
1407 """ Notify all device about intention of migration
1408 @raise: XendError for a device that cannot be migrated
1409 """
1410 for (n, c) in self.info['device']:
1411 rc = self.migrateDevice(n, c, live, dst, DEV_MIGRATE_TEST)
1412 if rc != 0:
1413 raise XendError("Device of type '%s' refuses migration." % n)
1415 def migrateDevices(self, live, dst, step, domName=''):
1416 """Notify the devices about migration
1417 """
1418 ctr = 0
1419 try:
1420 for (n, c) in self.info['device']:
1421 self.migrateDevice(n, c, live, dst, step, domName)
1422 ctr = ctr + 1
1423 except:
1424 for (n, c) in self.info['device']:
1425 if ctr == 0:
1426 step = step - 1
1427 ctr = ctr - 1
1428 self.recoverMigrateDevice(n, c, live, dst, step, domName)
1429 raise
1431 def migrateDevice(self, deviceClass, deviceConfig, live, dst, step, domName=''):
1432 return self.getDeviceController(deviceClass).migrate(deviceConfig, live, dst, step, domName)
1434 def recoverMigrateDevice(self, deviceClass, deviceConfig, live, dst, step, domName=''):
1435 return self.getDeviceController(deviceClass).recover_migrate(deviceConfig, live, dst, step, domName)
1437 def waitForDevices(self):
1438 """Wait for this domain's configured devices to connect.
1440 @raise: VmError if any device fails to initialise.
1441 """
1442 for c in controllerClasses:
1443 self.waitForDevices_(c)
1446 def device_create(self, dev_config):
1447 """Create a new device.
1449 @param dev_config: device configuration
1450 """
1451 dev_type = sxp.name(dev_config)
1452 devid = self.createDevice(dev_type, dev_config)
1453 self.waitForDevice(dev_type, devid)
1454 self.info['device'].append((dev_type, dev_config))
1455 return self.getDeviceController(dev_type).sxpr(devid)
1458 def device_configure(self, dev_config, devid):
1459 """Configure an existing device.
1460 @param dev_config: device configuration
1461 @param devid: device id
1462 """
1463 deviceClass = sxp.name(dev_config)
1464 self.reconfigureDevice(deviceClass, devid, dev_config)
1467 def pause(self):
1468 xc.domain_pause(self.domid)
1471 def unpause(self):
1472 xc.domain_unpause(self.domid)
1475 ## private:
1477 def restart(self, rename = False):
1478 """Restart the domain after it has exited.
1480 @param rename True if the old domain is to be renamed and preserved,
1481 False if it is to be destroyed.
1482 """
1484 self.configure_bootloader()
1485 config = self.sxpr()
1487 if self.readVm(RESTART_IN_PROGRESS):
1488 log.error('Xend failed during restart of domain %d. '
1489 'Refusing to restart to avoid loops.',
1490 self.domid)
1491 self.destroy()
1492 return
1494 self.writeVm(RESTART_IN_PROGRESS, 'True')
1496 now = time.time()
1497 rst = self.readVm('xend/previous_restart_time')
1498 if rst:
1499 rst = float(rst)
1500 timeout = now - rst
1501 if timeout < MINIMUM_RESTART_TIME:
1502 log.error(
1503 'VM %s restarting too fast (%f seconds since the last '
1504 'restart). Refusing to restart to avoid loops.',
1505 self.info['name'], timeout)
1506 self.destroy()
1507 return
1509 self.writeVm('xend/previous_restart_time', str(now))
1511 try:
1512 if rename:
1513 self.preserveForRestart()
1514 else:
1515 self.unwatchVm()
1516 self.destroyDomain()
1518 # new_dom's VM will be the same as this domain's VM, except where
1519 # the rename flag has instructed us to call preserveForRestart.
1520 # In that case, it is important that we remove the
1521 # RESTART_IN_PROGRESS node from the new domain, not the old one,
1522 # once the new one is available.
1524 new_dom = None
1525 try:
1526 new_dom = XendDomain.instance().domain_create(config)
1527 new_dom.unpause()
1528 new_dom.removeVm(RESTART_IN_PROGRESS)
1529 except:
1530 if new_dom:
1531 new_dom.removeVm(RESTART_IN_PROGRESS)
1532 new_dom.destroy()
1533 else:
1534 self.removeVm(RESTART_IN_PROGRESS)
1535 raise
1536 except:
1537 log.exception('Failed to restart domain %d.', self.domid)
1540 def preserveForRestart(self):
1541 """Preserve a domain that has been shut down, by giving it a new UUID,
1542 cloning the VM details, and giving it a new name. This allows us to
1543 keep this domain for debugging, but restart a new one in its place
1544 preserving the restart semantics (name and UUID preserved).
1545 """
1547 new_name = self.generateUniqueName()
1548 new_uuid = uuid.toString(uuid.create())
1549 log.info("Renaming dead domain %s (%d, %s) to %s (%s).",
1550 self.info['name'], self.domid, self.info['uuid'],
1551 new_name, new_uuid)
1552 self.unwatchVm()
1553 self.release_devices()
1554 self.info['name'] = new_name
1555 self.info['uuid'] = new_uuid
1556 self.vmpath = XendDomain.VMROOT + new_uuid
1557 self.storeVmDetails()
1558 self.preserve()
1561 def preserve(self):
1562 log.info("Preserving dead domain %s (%d).", self.info['name'],
1563 self.domid)
1564 self.unwatchVm()
1565 self.storeDom('xend/shutdown_completed', 'True')
1566 self.state_set(STATE_DOM_SHUTDOWN)
1569 # private:
1571 def generateUniqueName(self):
1572 n = 1
1573 while True:
1574 name = "%s-%d" % (self.info['name'], n)
1575 try:
1576 self.check_name(name)
1577 return name
1578 except VmError:
1579 n += 1
1582 def configure_bootloader(self):
1583 if not self.info['bootloader']:
1584 return
1585 # if we're restarting with a bootloader, we need to run it
1586 blcfg = None
1587 config = self.sxpr()
1588 # FIXME: this assumes that we want to use the first disk
1589 for dev in sxp.children(config, "device"):
1590 disk = sxp.child(dev, "vbd")
1591 if disk is None:
1592 continue
1593 fn = blkdev_uname_to_file(sxp.child_value(disk, "uname"))
1594 blcfg = bootloader(self.info['bootloader'], fn, 1,
1595 self.info['vcpus'])
1596 if blcfg is None:
1597 msg = "Had a bootloader specified, but can't find disk"
1598 log.error(msg)
1599 raise VmError(msg)
1600 self.info['image'] = sxp.to_string(blcfg)
1603 def send_sysrq(self, key):
1604 asserts.isCharConvertible(key)
1606 self.storeDom("control/sysrq", '%c' % key)
1609 def infoIsSet(self, name):
1610 return name in self.info and self.info[name] is not None
1613 #============================================================================
1614 # Register device controllers and their device config types.
1616 """A map from device-class names to the subclass of DevController that
1617 implements the device control specific to that device-class."""
1618 controllerClasses = {}
1620 def addControllerClass(device_class, cls):
1621 """Register a subclass of DevController to handle the named device-class.
1622 """
1623 cls.deviceClass = device_class
1624 controllerClasses[device_class] = cls
1627 from xen.xend.server import blkif, netif, tpmif, pciif, iopif, irqif, usbif
1628 addControllerClass('vbd', blkif.BlkifController)
1629 addControllerClass('vif', netif.NetifController)
1630 addControllerClass('vtpm', tpmif.TPMifController)
1631 addControllerClass('pci', pciif.PciController)
1632 addControllerClass('ioports', iopif.IOPortsController)
1633 addControllerClass('irq', irqif.IRQController)
1634 addControllerClass('usb', usbif.UsbifController)