ia64/xen-unstable

view tools/python/xen/xend/XendDomainInfo.py @ 8751:0bd023cf351e

I have previously posted a patch with changes to the xenbus for being
able to detect the reason why a frontend is created, which can be either
due to domain creation or a restore. The attached patch now puts this
functionality into XenD and allows driver implementations in Xend to
read the state and see whether a domain is currently being resumed or
created and write this value into the xenstore for hotplug scripts to
read it. The state of a domain being resumed is set when a
XendDomainInfo object is created and reset at the end of the restore
function.

Signed-off-by: Stefan Berger <stefanb@us.ibm.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Feb 03 11:54:05 2006 +0100 (2006-02-03)
parents dd5649730b32
children 66260f1a9bde
line source
1 #===========================================================================
2 # This library is free software; you can redistribute it and/or
3 # modify it under the terms of version 2.1 of the GNU Lesser General Public
4 # License as published by the Free Software Foundation.
5 #
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
10 #
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the Free Software
13 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14 #============================================================================
15 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
16 # Copyright (C) 2005 XenSource Ltd
17 #============================================================================
19 """Representation of a single domain.
20 Includes support for domain construction, using
21 open-ended configurations.
23 Author: Mike Wray <mike.wray@hp.com>
25 """
27 import logging
28 import string
29 import time
30 import threading
32 import xen.lowlevel.xc
33 from xen.util import asserts
34 from xen.util.blkif import blkdev_uname_to_file
36 import balloon
37 import image
38 import sxp
39 import uuid
40 import XendDomain
41 import XendRoot
43 from xen.xend.XendBootloader import bootloader
44 from xen.xend.XendError import XendError, VmError
46 from xen.xend.xenstore.xstransact import xstransact, complete
47 from xen.xend.xenstore.xsutil import GetDomainPath, IntroduceDomain
48 from xen.xend.xenstore.xswatch import xswatch
51 """Shutdown code for poweroff."""
52 DOMAIN_POWEROFF = 0
54 """Shutdown code for reboot."""
55 DOMAIN_REBOOT = 1
57 """Shutdown code for suspend."""
58 DOMAIN_SUSPEND = 2
60 """Shutdown code for crash."""
61 DOMAIN_CRASH = 3
63 """Shutdown code for halt."""
64 DOMAIN_HALT = 4
66 """Map shutdown codes to strings."""
67 shutdown_reasons = {
68 DOMAIN_POWEROFF: "poweroff",
69 DOMAIN_REBOOT : "reboot",
70 DOMAIN_SUSPEND : "suspend",
71 DOMAIN_CRASH : "crash",
72 DOMAIN_HALT : "halt"
73 }
75 restart_modes = [
76 "restart",
77 "destroy",
78 "preserve",
79 "rename-restart"
80 ]
82 STATE_DOM_OK = 1
83 STATE_DOM_SHUTDOWN = 2
85 SHUTDOWN_TIMEOUT = 30
87 ZOMBIE_PREFIX = 'Zombie-'
89 """Minimum time between domain restarts in seconds."""
90 MINIMUM_RESTART_TIME = 20
92 RESTART_IN_PROGRESS = 'xend/restart_in_progress'
95 xc = xen.lowlevel.xc.xc()
96 xroot = XendRoot.instance()
98 log = logging.getLogger("xend.XendDomainInfo")
99 #log.setLevel(logging.TRACE)
102 ##
103 # All parameters of VMs that may be configured on-the-fly, or at start-up.
104 #
105 VM_CONFIG_PARAMS = [
106 ('name', str),
107 ('on_poweroff', str),
108 ('on_reboot', str),
109 ('on_crash', str),
110 ]
113 ##
114 # Configuration entries that we expect to round-trip -- be read from the
115 # config file or xc, written to save-files (i.e. through sxpr), and reused as
116 # config on restart or restore, all without munging. Some configuration
117 # entries are munged for backwards compatibility reasons, or because they
118 # don't come out of xc in the same form as they are specified in the config
119 # file, so those are handled separately.
120 ROUNDTRIPPING_CONFIG_ENTRIES = [
121 ('uuid', str),
122 ('ssidref', int),
123 ('vcpus', int),
124 ('vcpu_avail', int),
125 ('cpu_weight', float),
126 ('memory', int),
127 ('maxmem', int),
128 ('bootloader', str),
129 ]
131 ROUNDTRIPPING_CONFIG_ENTRIES += VM_CONFIG_PARAMS
134 ##
135 # All entries written to the store. This is VM_CONFIG_PARAMS, plus those
136 # entries written to the store that cannot be reconfigured on-the-fly.
137 #
138 VM_STORE_ENTRIES = [
139 ('uuid', str),
140 ('ssidref', int),
141 ('vcpus', int),
142 ('vcpu_avail', int),
143 ('memory', int),
144 ('maxmem', int),
145 ('start_time', float),
146 ]
148 VM_STORE_ENTRIES += VM_CONFIG_PARAMS
151 #
152 # There are a number of CPU-related fields:
153 #
154 # vcpus: the number of virtual CPUs this domain is configured to use.
155 # vcpu_avail: a bitmap telling the guest domain whether it may use each of
156 # its VCPUs. This is translated to
157 # <dompath>/cpu/<id>/availability = {online,offline} for use
158 # by the guest domain.
159 # cpumap: a list of bitmaps, one for each VCPU, giving the physical
160 # CPUs that that VCPU may use.
161 # cpu: a configuration setting requesting that VCPU 0 is pinned to
162 # the specified physical CPU.
163 #
164 # vcpus and vcpu_avail settings persist with the VM (i.e. they are persistent
165 # across save, restore, migrate, and restart). The other settings are only
166 # specific to the domain, so are lost when the VM moves.
167 #
170 def create(config):
171 """Create a VM from a configuration.
173 @param config configuration
174 @raise: VmError for invalid configuration
175 """
177 log.debug("XendDomainInfo.create(%s)", config)
179 vm = XendDomainInfo(parseConfig(config))
180 try:
181 vm.construct()
182 vm.initDomain()
183 vm.storeVmDetails()
184 vm.storeDomDetails()
185 vm.registerWatch()
186 vm.refreshShutdown()
187 return vm
188 except:
189 log.exception('Domain construction failed')
190 vm.destroy()
191 raise
194 def recreate(xeninfo, priv):
195 """Create the VM object for an existing domain. The domain must not
196 be dying, as the paths in the store should already have been removed,
197 and asking us to recreate them causes problems."""
199 log.debug("XendDomainInfo.recreate(%s)", xeninfo)
201 assert not xeninfo['dying']
203 domid = xeninfo['dom']
204 uuid1 = xeninfo['handle']
205 xeninfo['uuid'] = uuid.toString(uuid1)
206 dompath = GetDomainPath(domid)
207 if not dompath:
208 raise XendError(
209 'No domain path in store for existing domain %d' % domid)
211 log.info("Recreating domain %d, UUID %s.", domid, xeninfo['uuid'])
212 try:
213 vmpath = xstransact.Read(dompath, "vm")
214 if not vmpath:
215 raise XendError(
216 'No vm path in store for existing domain %d' % domid)
217 uuid2_str = xstransact.Read(vmpath, "uuid")
218 if not uuid2_str:
219 raise XendError(
220 'No vm/uuid path in store for existing domain %d' % domid)
222 uuid2 = uuid.fromString(uuid2_str)
224 if uuid1 != uuid2:
225 raise XendError(
226 'Uuid in store does not match uuid for existing domain %d: '
227 '%s != %s' % (domid, uuid2_str, xeninfo['uuid']))
229 vm = XendDomainInfo(xeninfo, domid, dompath, True, priv)
231 except Exception, exn:
232 if priv:
233 log.warn(str(exn))
235 vm = XendDomainInfo(xeninfo, domid, dompath, True, priv)
236 vm.recreateDom()
237 vm.removeVm()
238 vm.storeVmDetails()
239 vm.storeDomDetails()
241 vm.registerWatch()
242 vm.refreshShutdown(xeninfo)
243 return vm
246 def restore(config):
247 """Create a domain and a VM object to do a restore.
249 @param config: domain configuration
250 """
252 log.debug("XendDomainInfo.restore(%s)", config)
254 vm = XendDomainInfo(parseConfig(config), None, None, False, False, True)
255 try:
256 vm.construct()
257 vm.storeVmDetails()
258 vm.createDevices()
259 vm.createChannels()
260 vm.storeDomDetails()
261 vm.endRestore()
262 return vm
263 except:
264 vm.destroy()
265 raise
268 def parseConfig(config):
269 def get_cfg(name, conv = None):
270 val = sxp.child_value(config, name)
272 if conv and not val is None:
273 try:
274 return conv(val)
275 except TypeError, exn:
276 raise VmError(
277 'Invalid setting %s = %s in configuration: %s' %
278 (name, val, str(exn)))
279 else:
280 return val
283 log.debug("parseConfig: config is %s", config)
285 result = {}
287 for e in ROUNDTRIPPING_CONFIG_ENTRIES:
288 result[e[0]] = get_cfg(e[0], e[1])
290 result['cpu'] = get_cfg('cpu', int)
291 result['cpus'] = get_cfg('cpus', str)
292 result['image'] = get_cfg('image')
294 try:
295 if result['image']:
296 v = sxp.child_value(result['image'], 'vcpus')
297 if v is not None and int(v) != result['vcpus']:
298 log.warn(('Image VCPUs setting overrides vcpus=%d elsewhere.'
299 ' Using %s VCPUs for VM %s.') %
300 (result['vcpus'], v, result['uuid']))
301 result['vcpus'] = int(v)
302 except TypeError, exn:
303 raise VmError(
304 'Invalid configuration setting: vcpus = %s: %s' %
305 (sxp.child_value(result['image'], 'vcpus', 1), str(exn)))
307 try:
308 # support legacy config files with 'cpu' parameter
309 # NB: prepending to list to support previous behavior
310 # where 'cpu' parameter pinned VCPU0.
311 if result['cpu']:
312 if result['cpus']:
313 result['cpus'] = "%s,%s" % (str(result['cpu']), result['cpus'])
314 else:
315 result['cpus'] = str(result['cpu'])
317 # convert 'cpus' string to list of ints
318 # 'cpus' supports a list of ranges (0-3), seperated by
319 # commas, and negation, (^1).
320 # Precedence is settled by order of the string:
321 # "0-3,^1" -> [0,2,3]
322 # "0-3,^1,1" -> [0,1,2,3]
323 if result['cpus']:
324 cpus = []
325 for c in result['cpus'].split(','):
326 if c.find('-') != -1:
327 (x,y) = c.split('-')
328 for i in range(int(x),int(y)+1):
329 cpus.append(int(i))
330 else:
331 # remove this element from the list
332 if c[0] == '^':
333 cpus = [x for x in cpus if x != int(c[1])]
334 else:
335 cpus.append(int(c))
337 result['cpus'] = cpus
339 except ValueError, exn:
340 raise VmError(
341 'Invalid configuration setting: cpus = %s: %s' %
342 (result['cpus'], exn))
344 result['backend'] = []
345 for c in sxp.children(config, 'backend'):
346 result['backend'].append(sxp.name(sxp.child0(c)))
348 result['device'] = []
349 for d in sxp.children(config, 'device'):
350 c = sxp.child0(d)
351 result['device'].append((sxp.name(c), c))
353 # Configuration option "restart" is deprecated. Parse it, but
354 # let on_xyz override it if they are present.
355 restart = get_cfg('restart')
356 if restart:
357 def handle_restart(event, val):
358 if result[event] is None:
359 result[event] = val
361 if restart == "onreboot":
362 handle_restart('on_poweroff', 'destroy')
363 handle_restart('on_reboot', 'restart')
364 handle_restart('on_crash', 'destroy')
365 elif restart == "always":
366 handle_restart('on_poweroff', 'restart')
367 handle_restart('on_reboot', 'restart')
368 handle_restart('on_crash', 'restart')
369 elif restart == "never":
370 handle_restart('on_poweroff', 'destroy')
371 handle_restart('on_reboot', 'destroy')
372 handle_restart('on_crash', 'destroy')
373 else:
374 log.warn("Ignoring malformed and deprecated config option "
375 "restart = %s", restart)
377 log.debug("parseConfig: result is %s", result)
378 return result
381 def domain_by_name(name):
382 return XendDomain.instance().domain_lookup_by_name_nr(name)
385 def shutdown_reason(code):
386 """Get a shutdown reason from a code.
388 @param code: shutdown code
389 @type code: int
390 @return: shutdown reason
391 @rtype: string
392 """
393 return shutdown_reasons.get(code, "?")
395 def dom_get(dom):
396 """Get info from xen for an existing domain.
398 @param dom: domain id
399 @return: info or None
400 """
401 try:
402 domlist = xc.domain_getinfo(dom, 1)
403 if domlist and dom == domlist[0]['dom']:
404 return domlist[0]
405 except Exception, err:
406 # ignore missing domain
407 log.trace("domain_getinfo(%d) failed, ignoring: %s", dom, str(err))
408 return None
411 class XendDomainInfo:
413 def __init__(self, info, domid = None, dompath = None, augment = False,
414 priv = False, resume = False):
416 self.info = info
418 if not self.infoIsSet('uuid'):
419 self.info['uuid'] = uuid.toString(uuid.create())
421 if domid is not None:
422 self.domid = domid
423 elif 'dom' in info:
424 self.domid = int(info['dom'])
425 else:
426 self.domid = None
428 self.vmpath = XendDomain.VMROOT + self.info['uuid']
429 self.dompath = dompath
431 if augment:
432 self.augmentInfo(priv)
434 self.validateInfo()
436 self.image = None
438 self.store_port = None
439 self.store_mfn = None
440 self.console_port = None
441 self.console_mfn = None
443 self.vmWatch = None
445 self.state = STATE_DOM_OK
446 self.state_updated = threading.Condition()
447 self.refresh_shutdown_lock = threading.Condition()
449 self.setResume(resume)
451 ## private:
453 def readVMDetails(self, params):
454 """Read the specified parameters from the store.
455 """
456 try:
457 return self.gatherVm(*params)
458 except ValueError:
459 # One of the int/float entries in params has a corresponding store
460 # entry that is invalid. We recover, because older versions of
461 # Xend may have put the entry there (memory/target, for example),
462 # but this is in general a bad situation to have reached.
463 log.exception(
464 "Store corrupted at %s! Domain %d's configuration may be "
465 "affected.", self.vmpath, self.domid)
466 return []
469 def storeChanged(self, _):
470 log.trace("XendDomainInfo.storeChanged");
472 changed = False
474 def f(x, y):
475 if y is not None and self.info[x[0]] != y:
476 self.info[x[0]] = y
477 changed = True
479 map(f, VM_CONFIG_PARAMS, self.readVMDetails(VM_CONFIG_PARAMS))
481 im = self.readVm('image')
482 current_im = self.info['image']
483 if (im is not None and
484 (current_im is None or sxp.to_string(current_im) != im)):
485 self.info['image'] = sxp.from_string(im)
486 changed = True
488 if changed:
489 # Update the domain section of the store, as this contains some
490 # parameters derived from the VM configuration.
491 self.storeDomDetails()
493 return 1
496 def augmentInfo(self, priv):
497 """Augment self.info, as given to us through {@link #recreate}, with
498 values taken from the store. This recovers those values known to xend
499 but not to the hypervisor.
500 """
501 def useIfNeeded(name, val):
502 if not self.infoIsSet(name) and val is not None:
503 self.info[name] = val
505 if priv:
506 entries = VM_STORE_ENTRIES[:]
507 entries.remove(('memory', int))
508 entries.remove(('maxmem', int))
509 else:
510 entries = VM_STORE_ENTRIES
511 entries.append(('image', str))
513 map(lambda x, y: useIfNeeded(x[0], y), entries,
514 self.readVMDetails(entries))
516 device = []
517 for c in controllerClasses:
518 devconfig = self.getDeviceConfigurations(c)
519 if devconfig:
520 device.extend(map(lambda x: (c, x), devconfig))
521 useIfNeeded('device', device)
524 def validateInfo(self):
525 """Validate and normalise the info block. This has either been parsed
526 by parseConfig, or received from xc through recreate and augmented by
527 the current store contents.
528 """
529 def defaultInfo(name, val):
530 if not self.infoIsSet(name):
531 self.info[name] = val()
533 try:
534 defaultInfo('name', lambda: "Domain-%d" % self.domid)
535 defaultInfo('ssidref', lambda: 0)
536 defaultInfo('on_poweroff', lambda: "destroy")
537 defaultInfo('on_reboot', lambda: "restart")
538 defaultInfo('on_crash', lambda: "restart")
539 defaultInfo('cpu', lambda: None)
540 defaultInfo('cpus', lambda: [])
541 defaultInfo('cpu_weight', lambda: 1.0)
543 # some domains don't have a config file (e.g. dom0 )
544 # to set number of vcpus so we derive available cpus
545 # from max_vcpu_id which is present for running domains.
546 if not self.infoIsSet('vcpus') and self.infoIsSet('max_vcpu_id'):
547 avail = int(self.info['max_vcpu_id'])+1
548 else:
549 avail = int(1)
551 defaultInfo('vcpus', lambda: avail)
552 defaultInfo('online_vcpus', lambda: self.info['vcpus'])
553 defaultInfo('max_vcpu_id', lambda: self.info['vcpus']-1)
554 defaultInfo('vcpu_avail', lambda: (1 << self.info['vcpus']) - 1)
556 defaultInfo('memory', lambda: 0)
557 defaultInfo('maxmem', lambda: 0)
558 defaultInfo('bootloader', lambda: None)
559 defaultInfo('backend', lambda: [])
560 defaultInfo('device', lambda: [])
561 defaultInfo('image', lambda: None)
563 self.check_name(self.info['name'])
565 if isinstance(self.info['image'], str):
566 self.info['image'] = sxp.from_string(self.info['image'])
568 if self.info['memory'] == 0:
569 if self.infoIsSet('mem_kb'):
570 self.info['memory'] = (self.info['mem_kb'] + 1023) / 1024
572 if self.info['maxmem'] < self.info['memory']:
573 self.info['maxmem'] = self.info['memory']
575 for (n, c) in self.info['device']:
576 if not n or not c or n not in controllerClasses:
577 raise VmError('invalid device (%s, %s)' %
578 (str(n), str(c)))
580 for event in ['on_poweroff', 'on_reboot', 'on_crash']:
581 if self.info[event] not in restart_modes:
582 raise VmError('invalid restart event: %s = %s' %
583 (event, str(self.info[event])))
585 except KeyError, exn:
586 log.exception(exn)
587 raise VmError('Unspecified domain detail: %s' % exn)
590 def readVm(self, *args):
591 return xstransact.Read(self.vmpath, *args)
593 def writeVm(self, *args):
594 return xstransact.Write(self.vmpath, *args)
596 def removeVm(self, *args):
597 return xstransact.Remove(self.vmpath, *args)
599 def gatherVm(self, *args):
600 return xstransact.Gather(self.vmpath, *args)
603 ## public:
605 def storeVm(self, *args):
606 return xstransact.Store(self.vmpath, *args)
609 ## private:
611 def readDom(self, *args):
612 return xstransact.Read(self.dompath, *args)
614 def writeDom(self, *args):
615 return xstransact.Write(self.dompath, *args)
618 ## public:
620 def removeDom(self, *args):
621 return xstransact.Remove(self.dompath, *args)
623 def recreateDom(self):
624 complete(self.dompath, lambda t: self._recreateDom(t))
626 def _recreateDom(self, t):
627 t.remove()
628 t.mkdir()
629 t.set_permissions({ 'dom' : self.domid })
632 ## private:
634 def storeDom(self, *args):
635 return xstransact.Store(self.dompath, *args)
638 ## public:
640 def completeRestore(self, store_mfn, console_mfn):
642 log.debug("XendDomainInfo.completeRestore")
644 self.store_mfn = store_mfn
645 self.console_mfn = console_mfn
647 self.introduceDomain()
648 self.storeDomDetails()
649 self.registerWatch()
650 self.refreshShutdown()
652 log.debug("XendDomainInfo.completeRestore done")
655 def storeVmDetails(self):
656 to_store = {}
658 for k in VM_STORE_ENTRIES:
659 if self.infoIsSet(k[0]):
660 to_store[k[0]] = str(self.info[k[0]])
662 if self.infoIsSet('image'):
663 to_store['image'] = sxp.to_string(self.info['image'])
665 log.debug("Storing VM details: %s", to_store)
667 self.writeVm(to_store)
670 def storeDomDetails(self):
671 to_store = {
672 'domid': str(self.domid),
673 'vm': self.vmpath,
674 'name': self.info['name'],
675 'console/limit': str(xroot.get_console_limit() * 1024),
676 'memory/target': str(self.info['memory'] * 1024)
677 }
679 def f(n, v):
680 if v is not None:
681 to_store[n] = str(v)
683 f('console/port', self.console_port)
684 f('console/ring-ref', self.console_mfn)
685 f('store/port', self.store_port)
686 f('store/ring-ref', self.store_mfn)
688 to_store.update(self.vcpuDomDetails())
690 log.debug("Storing domain details: %s", to_store)
692 self.writeDom(to_store)
695 ## private:
697 def vcpuDomDetails(self):
698 def availability(n):
699 if self.info['vcpu_avail'] & (1 << n):
700 return 'online'
701 else:
702 return 'offline'
704 result = {}
705 for v in range(0, self.info['vcpus']):
706 result["cpu/%d/availability" % v] = availability(v)
707 return result
710 ## public:
712 def registerWatch(self):
713 """Register a watch on this VM's entries in the store, so that
714 when they are changed externally, we keep up to date. This should
715 only be called by {@link #create}, {@link #recreate}, or {@link
716 #restore}, once the domain's details have been written, but before the
717 new instance is returned."""
718 self.vmWatch = xswatch(self.vmpath, self.storeChanged)
721 def getDomid(self):
722 return self.domid
724 def setName(self, name):
725 self.check_name(name)
726 self.info['name'] = name
727 self.storeVm("name", name)
729 def getName(self):
730 return self.info['name']
732 def getDomainPath(self):
733 return self.dompath
736 def getStorePort(self):
737 """For use only by image.py and XendCheckpoint.py."""
738 return self.store_port
741 def getConsolePort(self):
742 """For use only by image.py and XendCheckpoint.py"""
743 return self.console_port
746 def getVCpuCount(self):
747 return self.info['vcpus']
750 def setVCpuCount(self, vcpus):
751 self.info['vcpu_avail'] = (1 << vcpus) - 1
752 self.storeVm('vcpu_avail', self.info['vcpu_avail'])
753 self.writeDom(self.vcpuDomDetails())
756 def getSsidref(self):
757 return self.info['ssidref']
759 def getMemoryTarget(self):
760 """Get this domain's target memory size, in KB."""
761 return self.info['memory'] * 1024
763 def getResume(self):
764 return "%s" % self.info['resume']
766 def endRestore(self):
767 self.setResume(False)
769 def setResume(self, state):
770 self.info['resume'] = state
772 def refreshShutdown(self, xeninfo = None):
773 # If set at the end of this method, a restart is required, with the
774 # given reason. This restart has to be done out of the scope of
775 # refresh_shutdown_lock.
776 restart_reason = None
778 self.refresh_shutdown_lock.acquire()
779 try:
780 if xeninfo is None:
781 xeninfo = dom_get(self.domid)
782 if xeninfo is None:
783 # The domain no longer exists. This will occur if we have
784 # scheduled a timer to check for shutdown timeouts and the
785 # shutdown succeeded. It will also occur if someone
786 # destroys a domain beneath us. We clean up the domain,
787 # just in case, but we can't clean up the VM, because that
788 # VM may have migrated to a different domain on this
789 # machine.
790 self.cleanupDomain()
791 return
793 if xeninfo['dying']:
794 # Dying means that a domain has been destroyed, but has not
795 # yet been cleaned up by Xen. This state could persist
796 # indefinitely if, for example, another domain has some of its
797 # pages mapped. We might like to diagnose this problem in the
798 # future, but for now all we do is make sure that it's not us
799 # holding the pages, by calling cleanupDomain. We can't
800 # clean up the VM, as above.
801 self.cleanupDomain()
802 return
804 elif xeninfo['crashed']:
805 if self.readDom('xend/shutdown_completed'):
806 # We've seen this shutdown already, but we are preserving
807 # the domain for debugging. Leave it alone.
808 return
810 log.warn('Domain has crashed: name=%s id=%d.',
811 self.info['name'], self.domid)
813 if xroot.get_enable_dump():
814 self.dumpCore()
816 restart_reason = 'crash'
818 elif xeninfo['shutdown']:
819 if self.readDom('xend/shutdown_completed'):
820 # We've seen this shutdown already, but we are preserving
821 # the domain for debugging. Leave it alone.
822 return
824 else:
825 reason = shutdown_reason(xeninfo['shutdown_reason'])
827 log.info('Domain has shutdown: name=%s id=%d reason=%s.',
828 self.info['name'], self.domid, reason)
830 self.clearRestart()
832 if reason == 'suspend':
833 self.state_set(STATE_DOM_SHUTDOWN)
834 # Don't destroy the domain. XendCheckpoint will do
835 # this once it has finished. However, stop watching
836 # the VM path now, otherwise we will end up with one
837 # watch for the old domain, and one for the new.
838 self.unwatchVm()
839 elif reason in ['poweroff', 'reboot']:
840 restart_reason = reason
841 else:
842 self.destroy()
844 elif self.dompath is None:
845 # We have yet to manage to call introduceDomain on this
846 # domain. This can happen if a restore is in progress, or has
847 # failed. Ignore this domain.
848 pass
849 else:
850 # Domain is alive. If we are shutting it down, then check
851 # the timeout on that, and destroy it if necessary.
853 sst = self.readDom('xend/shutdown_start_time')
854 if sst:
855 sst = float(sst)
856 timeout = SHUTDOWN_TIMEOUT - time.time() + sst
857 if timeout < 0:
858 log.info(
859 "Domain shutdown timeout expired: name=%s id=%s",
860 self.info['name'], self.domid)
861 self.destroy()
862 else:
863 log.debug(
864 "Scheduling refreshShutdown on domain %d in %ds.",
865 self.domid, timeout)
866 threading.Timer(timeout, self.refreshShutdown).start()
867 finally:
868 self.refresh_shutdown_lock.release()
870 if restart_reason:
871 self.maybeRestart(restart_reason)
874 def shutdown(self, reason):
875 if not reason in shutdown_reasons.values():
876 raise XendError('Invalid reason: %s' % reason)
877 self.storeDom("control/shutdown", reason)
878 if reason != 'suspend':
879 self.storeDom('xend/shutdown_start_time', time.time())
882 ## private:
884 def clearRestart(self):
885 self.removeDom("xend/shutdown_start_time")
888 def maybeRestart(self, reason):
889 # Dispatch to the correct method based upon the configured on_{reason}
890 # behaviour.
891 {"destroy" : self.destroy,
892 "restart" : self.restart,
893 "preserve" : self.preserve,
894 "rename-restart" : self.renameRestart}[self.info['on_' + reason]]()
897 def renameRestart(self):
898 self.restart(True)
901 def dumpCore(self):
902 """Create a core dump for this domain. Nothrow guarantee."""
904 try:
905 corefile = "/var/xen/dump/%s.%s.core" % (self.info['name'],
906 self.domid)
907 xc.domain_dumpcore(self.domid, corefile)
909 except:
910 log.exception("XendDomainInfo.dumpCore failed: id = %s name = %s",
911 self.domid, self.info['name'])
914 ## public:
916 def setMemoryTarget(self, target):
917 """Set the memory target of this domain.
918 @param target In MiB.
919 """
920 log.debug("Setting memory target of domain %s (%d) to %d MiB.",
921 self.info['name'], self.domid, target)
923 self.info['memory'] = target
924 self.storeVm("memory", target)
925 self.storeDom("memory/target", target << 10)
928 def update(self, info = None):
929 """Update with info from xc.domain_getinfo().
930 """
932 log.trace("XendDomainInfo.update(%s) on domain %d", info, self.domid)
934 if not info:
935 info = dom_get(self.domid)
936 if not info:
937 return
939 self.info.update(info)
940 self.validateInfo()
941 self.refreshShutdown(info)
943 log.trace("XendDomainInfo.update done on domain %d: %s", self.domid,
944 self.info)
947 ## private:
949 def state_set(self, state):
950 self.state_updated.acquire()
951 try:
952 if self.state != state:
953 self.state = state
954 self.state_updated.notifyAll()
955 finally:
956 self.state_updated.release()
959 ## public:
961 def waitForShutdown(self):
962 self.state_updated.acquire()
963 try:
964 while self.state == STATE_DOM_OK:
965 self.state_updated.wait()
966 finally:
967 self.state_updated.release()
970 def __str__(self):
971 s = "<domain"
972 s += " id=" + str(self.domid)
973 s += " name=" + self.info['name']
974 s += " memory=" + str(self.info['memory'])
975 s += " ssidref=" + str(self.info['ssidref'])
976 s += ">"
977 return s
979 __repr__ = __str__
982 ## private:
984 def createDevice(self, deviceClass, devconfig):
985 return self.getDeviceController(deviceClass).createDevice(devconfig)
988 def waitForDevices_(self, deviceClass):
989 return self.getDeviceController(deviceClass).waitForDevices()
992 def waitForDevice(self, deviceClass, devid):
993 return self.getDeviceController(deviceClass).waitForDevice(devid)
996 def reconfigureDevice(self, deviceClass, devid, devconfig):
997 return self.getDeviceController(deviceClass).reconfigureDevice(
998 devid, devconfig)
1001 ## public:
1003 def destroyDevice(self, deviceClass, devid):
1004 return self.getDeviceController(deviceClass).destroyDevice(devid)
1007 def getDeviceSxprs(self, deviceClass):
1008 return self.getDeviceController(deviceClass).sxprs()
1011 ## private:
1013 def getDeviceConfigurations(self, deviceClass):
1014 return self.getDeviceController(deviceClass).configurations()
1017 def getDeviceController(self, name):
1018 if name not in controllerClasses:
1019 raise XendError("unknown device type: " + str(name))
1021 return controllerClasses[name](self)
1024 ## public:
1026 def sxpr(self):
1027 sxpr = ['domain',
1028 ['domid', self.domid]]
1030 for e in ROUNDTRIPPING_CONFIG_ENTRIES:
1031 if self.infoIsSet(e[0]):
1032 sxpr.append([e[0], self.info[e[0]]])
1034 if self.infoIsSet('image'):
1035 sxpr.append(['image', self.info['image']])
1037 for cls in controllerClasses:
1038 for config in self.getDeviceConfigurations(cls):
1039 sxpr.append(['device', config])
1041 def stateChar(name):
1042 if name in self.info:
1043 if self.info[name]:
1044 return name[0]
1045 else:
1046 return '-'
1047 else:
1048 return '?'
1050 state = reduce(
1051 lambda x, y: x + y,
1052 map(stateChar,
1053 ['running', 'blocked', 'paused', 'shutdown', 'crashed',
1054 'dying']))
1056 sxpr.append(['state', state])
1057 if self.infoIsSet('shutdown'):
1058 reason = shutdown_reason(self.info['shutdown_reason'])
1059 sxpr.append(['shutdown_reason', reason])
1060 if self.infoIsSet('cpu_time'):
1061 sxpr.append(['cpu_time', self.info['cpu_time']/1e9])
1062 sxpr.append(['online_vcpus', self.info['online_vcpus']])
1064 if self.infoIsSet('start_time'):
1065 up_time = time.time() - self.info['start_time']
1066 sxpr.append(['up_time', str(up_time) ])
1067 sxpr.append(['start_time', str(self.info['start_time']) ])
1069 if self.store_mfn:
1070 sxpr.append(['store_mfn', self.store_mfn])
1071 if self.console_mfn:
1072 sxpr.append(['console_mfn', self.console_mfn])
1074 return sxpr
1077 def getVCPUInfo(self):
1078 try:
1079 # We include the domain name and ID, to help xm.
1080 sxpr = ['domain',
1081 ['domid', self.domid],
1082 ['name', self.info['name']],
1083 ['vcpu_count', self.info['online_vcpus']]]
1085 for i in range(0, self.info['max_vcpu_id']+1):
1086 info = xc.vcpu_getinfo(self.domid, i)
1088 sxpr.append(['vcpu',
1089 ['number', i],
1090 ['online', info['online']],
1091 ['blocked', info['blocked']],
1092 ['running', info['running']],
1093 ['cpu_time', info['cpu_time'] / 1e9],
1094 ['cpu', info['cpu']],
1095 ['cpumap', info['cpumap']]])
1097 return sxpr
1099 except RuntimeError, exn:
1100 raise XendError(str(exn))
1103 ## private:
1105 def check_name(self, name):
1106 """Check if a vm name is valid. Valid names contain alphabetic characters,
1107 digits, or characters in '_-.:/+'.
1108 The same name cannot be used for more than one vm at the same time.
1110 @param name: name
1111 @raise: VmError if invalid
1112 """
1113 if name is None or name == '':
1114 raise VmError('missing vm name')
1115 for c in name:
1116 if c in string.digits: continue
1117 if c in '_-.:/+': continue
1118 if c in string.ascii_letters: continue
1119 raise VmError('invalid vm name')
1121 dominfo = domain_by_name(name)
1122 if not dominfo:
1123 return
1124 if self.domid is None:
1125 raise VmError("VM name '%s' already in use by domain %d" %
1126 (name, dominfo.domid))
1127 if dominfo.domid != self.domid:
1128 raise VmError("VM name '%s' is used in both domains %d and %d" %
1129 (name, self.domid, dominfo.domid))
1132 def construct(self):
1133 """Construct the domain.
1135 @raise: VmError on error
1136 """
1138 log.debug('XendDomainInfo.construct: %s %s',
1139 self.domid,
1140 self.info['ssidref'])
1142 self.domid = xc.domain_create(
1143 dom = 0, ssidref = self.info['ssidref'],
1144 handle = uuid.fromString(self.info['uuid']))
1146 if self.domid < 0:
1147 raise VmError('Creating domain failed: name=%s' %
1148 self.info['name'])
1150 self.dompath = GetDomainPath(self.domid)
1152 self.recreateDom()
1154 # Set maximum number of vcpus in domain
1155 xc.domain_max_vcpus(self.domid, int(self.info['vcpus']))
1158 def introduceDomain(self):
1159 assert self.domid is not None
1160 assert self.store_mfn is not None
1161 assert self.store_port is not None
1163 try:
1164 IntroduceDomain(self.domid, self.store_mfn, self.store_port)
1165 except RuntimeError, exn:
1166 raise XendError(str(exn))
1169 def initDomain(self):
1170 log.debug('XendDomainInfo.initDomain: %s %s',
1171 self.domid,
1172 self.info['cpu_weight'])
1174 if not self.infoIsSet('image'):
1175 raise VmError('Missing image in configuration')
1177 try:
1178 self.image = image.create(self,
1179 self.info['image'],
1180 self.info['device'])
1182 xc.domain_setcpuweight(self.domid, self.info['cpu_weight'])
1184 # repin domain vcpus if a restricted cpus list is provided
1185 # this is done prior to memory allocation to aide in memory
1186 # distribution for NUMA systems.
1187 cpus = self.info['cpus']
1188 if cpus is not None and len(cpus) > 0:
1189 for v in range(0, self.info['max_vcpu_id']+1):
1190 # pincpu takes a list of ints
1191 cpu = [ int( cpus[v % len(cpus)] ) ]
1192 xc.vcpu_setaffinity(self.domid, v, cpu)
1194 m = self.image.getDomainMemory(self.info['memory'] * 1024)
1195 balloon.free(m)
1196 xc.domain_setmaxmem(self.domid, m)
1197 xc.domain_memory_increase_reservation(self.domid, m, 0, 0)
1199 self.createChannels()
1201 channel_details = self.image.createImage()
1203 self.store_mfn = channel_details['store_mfn']
1204 if 'console_mfn' in channel_details:
1205 self.console_mfn = channel_details['console_mfn']
1207 self.introduceDomain()
1209 self.createDevices()
1211 if self.info['bootloader']:
1212 self.image.cleanupBootloading()
1214 self.info['start_time'] = time.time()
1216 except RuntimeError, exn:
1217 raise VmError(str(exn))
1220 ## public:
1222 def cleanupDomain(self):
1223 """Cleanup domain resources; release devices. Idempotent. Nothrow
1224 guarantee."""
1226 self.release_devices()
1228 if self.image:
1229 try:
1230 self.image.destroy()
1231 except:
1232 log.exception(
1233 "XendDomainInfo.cleanup: image.destroy() failed.")
1234 self.image = None
1236 try:
1237 self.removeDom()
1238 except:
1239 log.exception("Removing domain path failed.")
1241 try:
1242 if not self.info['name'].startswith(ZOMBIE_PREFIX):
1243 self.info['name'] = ZOMBIE_PREFIX + self.info['name']
1244 except:
1245 log.exception("Renaming Zombie failed.")
1247 self.state_set(STATE_DOM_SHUTDOWN)
1250 def cleanupVm(self):
1251 """Cleanup VM resources. Idempotent. Nothrow guarantee."""
1253 self.unwatchVm()
1255 try:
1256 self.removeVm()
1257 except:
1258 log.exception("Removing VM path failed.")
1261 ## private:
1263 def unwatchVm(self):
1264 """Remove the watch on the VM path, if any. Idempotent. Nothrow
1265 guarantee."""
1267 try:
1268 try:
1269 if self.vmWatch:
1270 self.vmWatch.unwatch()
1271 finally:
1272 self.vmWatch = None
1273 except:
1274 log.exception("Unwatching VM path failed.")
1277 ## public:
1279 def destroy(self):
1280 """Cleanup VM and destroy domain. Nothrow guarantee."""
1282 log.debug("XendDomainInfo.destroy: domid=%s", self.domid)
1284 self.cleanupVm()
1285 if self.dompath is not None:
1286 self.destroyDomain()
1289 def destroyDomain(self):
1290 log.debug("XendDomainInfo.destroyDomain(%s)", self.domid)
1292 try:
1293 if self.domid is not None:
1294 xc.domain_destroy(self.domid)
1295 except:
1296 log.exception("XendDomainInfo.destroy: xc.domain_destroy failed.")
1298 self.cleanupDomain()
1301 ## private:
1303 def release_devices(self):
1304 """Release all domain's devices. Nothrow guarantee."""
1306 while True:
1307 t = xstransact("%s/device" % self.dompath)
1308 for n in controllerClasses.keys():
1309 for d in t.list(n):
1310 try:
1311 t.remove(d)
1312 except:
1313 # Log and swallow any exceptions in removal --
1314 # there's nothing more we can do.
1315 log.exception(
1316 "Device release failed: %s; %s; %s",
1317 self.info['name'], n, d)
1318 if t.commit():
1319 break
1322 def createChannels(self):
1323 """Create the channels to the domain.
1324 """
1325 self.store_port = self.createChannel()
1326 self.console_port = self.createChannel()
1329 def createChannel(self):
1330 """Create an event channel to the domain.
1331 """
1332 try:
1333 return xc.evtchn_alloc_unbound(dom=self.domid, remote_dom=0)
1334 except:
1335 log.exception("Exception in alloc_unbound(%d)", self.domid)
1336 raise
1339 ## public:
1341 def createDevices(self):
1342 """Create the devices for a vm.
1344 @raise: VmError for invalid devices
1345 """
1347 for (n, c) in self.info['device']:
1348 self.createDevice(n, c)
1350 if self.image:
1351 self.image.createDeviceModel()
1354 def waitForDevices(self):
1355 """Wait for this domain's configured devices to connect.
1357 @raise: VmError if any device fails to initialise.
1358 """
1359 for c in controllerClasses:
1360 self.waitForDevices_(c)
1363 def device_create(self, dev_config):
1364 """Create a new device.
1366 @param dev_config: device configuration
1367 """
1368 dev_type = sxp.name(dev_config)
1369 devid = self.createDevice(dev_type, dev_config)
1370 self.waitForDevice(dev_type, devid)
1371 self.info['device'].append((dev_type, dev_config))
1372 return self.getDeviceController(dev_type).sxpr(devid)
1375 def device_configure(self, dev_config, devid):
1376 """Configure an existing device.
1377 @param dev_config: device configuration
1378 @param devid: device id
1379 """
1380 deviceClass = sxp.name(dev_config)
1381 self.reconfigureDevice(deviceClass, devid, dev_config)
1384 def pause(self):
1385 xc.domain_pause(self.domid)
1388 def unpause(self):
1389 xc.domain_unpause(self.domid)
1392 ## private:
1394 def restart(self, rename = False):
1395 """Restart the domain after it has exited.
1397 @param rename True if the old domain is to be renamed and preserved,
1398 False if it is to be destroyed.
1399 """
1401 self.configure_bootloader()
1402 config = self.sxpr()
1404 if self.readVm(RESTART_IN_PROGRESS):
1405 log.error('Xend failed during restart of domain %d. '
1406 'Refusing to restart to avoid loops.',
1407 self.domid)
1408 self.destroy()
1409 return
1411 self.writeVm(RESTART_IN_PROGRESS, 'True')
1413 now = time.time()
1414 rst = self.readVm('xend/previous_restart_time')
1415 if rst:
1416 rst = float(rst)
1417 timeout = now - rst
1418 if timeout < MINIMUM_RESTART_TIME:
1419 log.error(
1420 'VM %s restarting too fast (%f seconds since the last '
1421 'restart). Refusing to restart to avoid loops.',
1422 self.info['name'], timeout)
1423 self.destroy()
1424 return
1426 self.writeVm('xend/previous_restart_time', str(now))
1428 try:
1429 if rename:
1430 self.preserveForRestart()
1431 else:
1432 self.unwatchVm()
1433 self.destroyDomain()
1435 # new_dom's VM will be the same as this domain's VM, except where
1436 # the rename flag has instructed us to call preserveForRestart.
1437 # In that case, it is important that we remove the
1438 # RESTART_IN_PROGRESS node from the new domain, not the old one,
1439 # once the new one is available.
1441 new_dom = None
1442 try:
1443 new_dom = XendDomain.instance().domain_create(config)
1444 new_dom.unpause()
1445 new_dom.removeVm(RESTART_IN_PROGRESS)
1446 except:
1447 if new_dom:
1448 new_dom.removeVm(RESTART_IN_PROGRESS)
1449 new_dom.destroy()
1450 else:
1451 self.removeVm(RESTART_IN_PROGRESS)
1452 raise
1453 except:
1454 log.exception('Failed to restart domain %d.', self.domid)
1457 def preserveForRestart(self):
1458 """Preserve a domain that has been shut down, by giving it a new UUID,
1459 cloning the VM details, and giving it a new name. This allows us to
1460 keep this domain for debugging, but restart a new one in its place
1461 preserving the restart semantics (name and UUID preserved).
1462 """
1464 new_name = self.generateUniqueName()
1465 new_uuid = uuid.toString(uuid.create())
1466 log.info("Renaming dead domain %s (%d, %s) to %s (%s).",
1467 self.info['name'], self.domid, self.info['uuid'],
1468 new_name, new_uuid)
1469 self.unwatchVm()
1470 self.release_devices()
1471 self.info['name'] = new_name
1472 self.info['uuid'] = new_uuid
1473 self.vmpath = XendDomain.VMROOT + new_uuid
1474 self.storeVmDetails()
1475 self.preserve()
1478 def preserve(self):
1479 log.info("Preserving dead domain %s (%d).", self.info['name'],
1480 self.domid)
1481 self.unwatchVm()
1482 self.storeDom('xend/shutdown_completed', 'True')
1483 self.state_set(STATE_DOM_SHUTDOWN)
1486 # private:
1488 def generateUniqueName(self):
1489 n = 1
1490 while True:
1491 name = "%s-%d" % (self.info['name'], n)
1492 try:
1493 self.check_name(name)
1494 return name
1495 except VmError:
1496 n += 1
1499 def configure_bootloader(self):
1500 if not self.info['bootloader']:
1501 return
1502 # if we're restarting with a bootloader, we need to run it
1503 # FIXME: this assumes the disk is the first device and
1504 # that we're booting from the first disk
1505 blcfg = None
1506 config = self.sxpr()
1507 # FIXME: this assumes that we want to use the first disk
1508 dev = sxp.child_value(config, "device")
1509 if dev:
1510 disk = sxp.child_value(dev, "uname")
1511 fn = blkdev_uname_to_file(disk)
1512 blcfg = bootloader(self.info['bootloader'], fn, 1,
1513 self.info['vcpus'])
1514 if blcfg is None:
1515 msg = "Had a bootloader specified, but can't find disk"
1516 log.error(msg)
1517 raise VmError(msg)
1518 self.info['image'] = sxp.to_string(blcfg)
1521 def send_sysrq(self, key):
1522 asserts.isCharConvertible(key)
1524 self.storeDom("control/sysrq", '%c' % key)
1527 def infoIsSet(self, name):
1528 return name in self.info and self.info[name] is not None
1531 #============================================================================
1532 # Register device controllers and their device config types.
1534 """A map from device-class names to the subclass of DevController that
1535 implements the device control specific to that device-class."""
1536 controllerClasses = {}
1538 def addControllerClass(device_class, cls):
1539 """Register a subclass of DevController to handle the named device-class.
1540 """
1541 cls.deviceClass = device_class
1542 controllerClasses[device_class] = cls
1545 from xen.xend.server import blkif, netif, tpmif, pciif, iopif, usbif
1546 addControllerClass('vbd', blkif.BlkifController)
1547 addControllerClass('vif', netif.NetifController)
1548 addControllerClass('vtpm', tpmif.TPMifController)
1549 addControllerClass('pci', pciif.PciController)
1550 addControllerClass('ioports', iopif.IOPortsController)
1551 addControllerClass('usb', usbif.UsbifController)