ia64/xen-unstable

changeset 7151:4a2c162d3e7c

Within the store, split the persistent information regarding a VM from the
transient information regarding a domain. This allows live localhost migration,
which is important for testing migration, and also allows migration when we
have a distributed store. The backend paths in the store now refer to the
frontend domain ID, not its UUID, and blktap has changed to match.

To support this split in the information, the cleanup procedure has been split
to match.

Change the save-restore interface between XendDomain, XendDomainInfo, and
XendCheckpoint, to remove some intermingling, in particular taking XendDomain
out of the loop for the restore procedure.

Improved the recovery procedure to avoid trying to destroy dom0 when recovering.

Added a lock around XendDomain.refresh and one around
XendDomainInfo.refreshShutdown, to improve the behaviour when recreating
domains at startup. There are still races here, so more thought needs to be
given to the locking.

Live reconfiguration of an existing domain is temporarily broken.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Fri Sep 30 13:41:10 2005 +0100 (2005-09-30)
parents a39510ad5c59
children 034ada135379
files tools/blktap/xenbus.c tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/server/DevController.py
line diff
     1.1 --- a/tools/blktap/xenbus.c	Fri Sep 30 11:55:49 2005 +0100
     1.2 +++ b/tools/blktap/xenbus.c	Fri Sep 30 13:41:10 2005 +0100
     1.3 @@ -116,25 +116,25 @@ int xs_exists(struct xs_handle *h, const
     1.4  
     1.5  
     1.6  /* This assumes that the domain name we are looking for is unique! */
     1.7 -char *get_dom_uuid(struct xs_handle *h, const char *name)
     1.8 +char *get_dom_domid(struct xs_handle *h, const char *name)
     1.9  {
    1.10 -    char **e, *val, *uuid = NULL;
    1.11 +    char **e, *val, *domid = NULL;
    1.12      int num, i, len;
    1.13      char *path;
    1.14  
    1.15 -    e = xs_directory(h, "/domain", &num);
    1.16 +    e = xs_directory(h, "/local/domain", &num);
    1.17  
    1.18      i=0;
    1.19      while (i < num) {
    1.20 -        asprintf(&path, "/domain/%s/name", e[i]);
    1.21 +        asprintf(&path, "/local/domain/%s/name", e[i]);
    1.22          val = xs_read(h, path, &len);
    1.23          free(path);
    1.24          if (val == NULL)
    1.25              continue;
    1.26          if (strcmp(val, name) == 0) {
    1.27              /* match! */
    1.28 -            asprintf(&path, "/domain/%s/uuid", e[i]);
    1.29 -            uuid = xs_read(h, path, &len);
    1.30 +            asprintf(&path, "/local/domain/%s/domid", e[i]);
    1.31 +            domid = xs_read(h, path, &len);
    1.32              free(val);
    1.33              free(path);
    1.34              break;
    1.35 @@ -144,7 +144,7 @@ char *get_dom_uuid(struct xs_handle *h, 
    1.36      }
    1.37  
    1.38      free(e);
    1.39 -    return uuid;
    1.40 +    return domid;
    1.41  }
    1.42  
    1.43  static int strsep_len(const char *str, char c, unsigned int len)
    1.44 @@ -553,15 +553,15 @@ static void blkback_probe(struct xs_hand
    1.45  
    1.46  int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
    1.47  {
    1.48 -    char *uuid, *path;
    1.49 +    char *domid, *path;
    1.50      struct xenbus_watch *vbd_watch;
    1.51      int er;
    1.52  
    1.53 -    uuid = get_dom_uuid(h, domname);
    1.54 +    domid = get_dom_domid(h, domname);
    1.55  
    1.56 -    DPRINTF("%s: %s\n", domname, (uuid != NULL) ? uuid : "[ not found! ]");
    1.57 +    DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]");
    1.58  
    1.59 -    asprintf(&path, "/domain/%s/backend/vbd", uuid);
    1.60 +    asprintf(&path, "/local/domain/%s/backend/vbd", domid);
    1.61      if (path == NULL) 
    1.62          return -ENOMEM;
    1.63  
     2.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Fri Sep 30 11:55:49 2005 +0100
     2.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Fri Sep 30 13:41:10 2005 +0100
     2.3 @@ -56,7 +56,7 @@ def save(xd, fd, dominfo, live):
     2.4      # simply uses the defaults compiled into libxenguest; see the comments 
     2.5      # and/or code in xc_linux_save() for more information. 
     2.6      cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
     2.7 -           str(dominfo.domid), "0", "0", str(int(live)) ]
     2.8 +           str(dominfo.getDomid()), "0", "0", str(int(live)) ]
     2.9      log.info("[xc_save] " + join(cmd))
    2.10      child = xPopen3(cmd, True, -1, [fd, xc.handle()])
    2.11      
    2.12 @@ -76,10 +76,10 @@ def save(xd, fd, dominfo, live):
    2.13              if fd == child.fromchild.fileno():
    2.14                  l = child.fromchild.readline()
    2.15                  if l.rstrip() == "suspend":
    2.16 -                    log.info("suspending %d" % dominfo.domid)
    2.17 -                    xd.domain_shutdown(dominfo.domid, reason='suspend')
    2.18 +                    log.info("suspending %d" % dominfo.getDomid())
    2.19 +                    xd.domain_shutdown(dominfo.getDomid(), reason='suspend')
    2.20                      dominfo.state_wait(XendDomainInfo.STATE_VM_SUSPENDED)
    2.21 -                    log.info("suspend %d done" % dominfo.domid)
    2.22 +                    log.info("suspend %d done" % dominfo.getDomid())
    2.23                      child.tochild.write("done\n")
    2.24                      child.tochild.flush()
    2.25          if filter(lambda (fd, event): event & select.POLLHUP, r):
    2.26 @@ -90,11 +90,10 @@ def save(xd, fd, dominfo, live):
    2.27      if child.wait() != 0:
    2.28          raise XendError("xc_save failed: %s" % lasterr)
    2.29  
    2.30 -    dominfo.closeStoreChannel()
    2.31 -    xd.domain_destroy(dominfo.domid)
    2.32 +    dominfo.destroy()
    2.33      return None
    2.34  
    2.35 -def restore(xd, fd):
    2.36 +def restore(fd):
    2.37      signature = read_exact(fd, len(SIGNATURE),
    2.38          "not a valid guest state file: signature read")
    2.39      if signature != SIGNATURE:
    2.40 @@ -113,7 +112,7 @@ def restore(xd, fd):
    2.41          raise XendError("not a valid guest state file: config parse")
    2.42  
    2.43      vmconfig = p.get_val()
    2.44 -    dominfo = xd.domain_configure(vmconfig)
    2.45 +    dominfo = XendDomainInfo.restore(vmconfig)
    2.46  
    2.47      l = read_exact(fd, sizeof_unsigned_long,
    2.48                     "not a valid guest state file: pfn count read")
    2.49 @@ -133,7 +132,7 @@ def restore(xd, fd):
    2.50          console_evtchn = 0
    2.51  
    2.52      cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
    2.53 -           str(dominfo.domid), str(nr_pfns),
    2.54 +           str(dominfo.getDomid()), str(nr_pfns),
    2.55             str(store_evtchn), str(console_evtchn)]
    2.56      log.info("[xc_restore] " + join(cmd))
    2.57      child = xPopen3(cmd, True, -1, [fd, xc.handle()])
    2.58 @@ -161,10 +160,10 @@ def restore(xd, fd):
    2.59                          if dominfo.store_channel:
    2.60                              dominfo.setStoreRef(int(m.group(2)))
    2.61                              if dominfo.store_mfn >= 0:
    2.62 -                                IntroduceDomain(dominfo.domid,
    2.63 +                                IntroduceDomain(dominfo.getDomid(),
    2.64                                                  dominfo.store_mfn,
    2.65                                                  dominfo.store_channel.port1,
    2.66 -                                                dominfo.path)
    2.67 +                                                dominfo.getDomainPath())
    2.68                      m = re.match(r"^(console-mfn) (\d+)\n$", l)
    2.69                      if m:
    2.70                          dominfo.setConsoleRef(int(m.group(2)))
     3.1 --- a/tools/python/xen/xend/XendDomain.py	Fri Sep 30 11:55:49 2005 +0100
     3.2 +++ b/tools/python/xen/xend/XendDomain.py	Fri Sep 30 13:41:10 2005 +0100
     3.3 @@ -22,6 +22,7 @@
     3.4   Needs to be persistent for one uptime.
     3.5  """
     3.6  import os
     3.7 +import threading
     3.8  
     3.9  import xen.lowlevel.xc
    3.10  
    3.11 @@ -57,6 +58,9 @@ class XendDomain:
    3.12  
    3.13      """Dict of domain info indexed by domain id."""
    3.14      domains = None
    3.15 +
    3.16 +
    3.17 +    ## public:
    3.18      
    3.19      def __init__(self):
    3.20          # Hack alert. Python does not support mutual imports, but XendDomainInfo
    3.21 @@ -65,6 +69,7 @@ class XendDomain:
    3.22          # So we stuff the XendDomain instance (self) into xroot's components.
    3.23          xroot.add_component("xen.xend.XendDomain", self)
    3.24          self.domains = XendDomainDict()
    3.25 +        self.refresh_lock = threading.Condition()
    3.26          self.watchReleaseDomain()
    3.27          self.refresh()
    3.28          self.dom0_setup()
    3.29 @@ -94,6 +99,9 @@ class XendDomain:
    3.30          doms = self.list_sorted()
    3.31          return map(lambda x: x.getName(), doms)
    3.32  
    3.33 +
    3.34 +    ## private:
    3.35 +
    3.36      def onReleaseDomain(self):
    3.37          self.refresh()
    3.38  
    3.39 @@ -135,9 +143,6 @@ class XendDomain:
    3.40  
    3.41      def dom0_setup(self):
    3.42          dom0 = self.domain_lookup(PRIV_DOMAIN)
    3.43 -        if not dom0:
    3.44 -            dom0 = self.recreate_domain(self.xen_domain(PRIV_DOMAIN))
    3.45 -        dom0.dom0_init_store()
    3.46          dom0.dom0_enforce_vcpus()
    3.47  
    3.48  
    3.49 @@ -150,10 +155,10 @@ class XendDomain:
    3.50          if info.getDomid() in self.domains:
    3.51              notify = False
    3.52          self.domains[info.getDomid()] = info
    3.53 -        info.exportToDB()
    3.54 -        if notify:
    3.55 -            eserver.inject('xend.domain.create', [info.getName(),
    3.56 -                                                  info.getDomid()])
    3.57 +        #info.exportToDB()
    3.58 +        #if notify:
    3.59 +        #    eserver.inject('xend.domain.create', [info.getName(),
    3.60 +        #                                          info.getDomid()])
    3.61  
    3.62      def _delete_domain(self, domid, notify=True):
    3.63          """Remove a domain from the tables.
    3.64 @@ -164,8 +169,8 @@ class XendDomain:
    3.65          info = self.domains.get(domid)
    3.66          if info:
    3.67              del self.domains[domid]
    3.68 -            info.cleanup()
    3.69 -            info.delete()
    3.70 +            info.cleanupDomain()
    3.71 +            info.cleanupVm()
    3.72              if notify:
    3.73                  eserver.inject('xend.domain.died', [info.getName(),
    3.74                                                      info.getDomid()])
    3.75 @@ -174,25 +179,36 @@ class XendDomain:
    3.76      def refresh(self):
    3.77          """Refresh domain list from Xen.
    3.78          """
    3.79 -        doms = self.xen_domains()
    3.80 -        for d in self.domains.values():
    3.81 -            info = doms.get(d.getDomid())
    3.82 -            if info:
    3.83 -                d.update(info)
    3.84 -            else:
    3.85 -                self._delete_domain(d.getDomid())
    3.86 -        for d in doms:
    3.87 -            if d not in self.domains:
    3.88 -                try:
    3.89 -                    self.recreate_domain(doms[d])
    3.90 -                except:
    3.91 -                    log.exception(
    3.92 -                        "Failed to recreate information for domain %d.  "
    3.93 -                        "Destroying it in the hope of recovery.", d)
    3.94 +        self.refresh_lock.acquire()
    3.95 +        try:
    3.96 +            doms = self.xen_domains()
    3.97 +            for d in self.domains.values():
    3.98 +                info = doms.get(d.getDomid())
    3.99 +                if info:
   3.100 +                    d.update(info)
   3.101 +                else:
   3.102 +                    self._delete_domain(d.getDomid())
   3.103 +            for d in doms:
   3.104 +                if d not in self.domains and not doms[d]['dying']:
   3.105                      try:
   3.106 -                        xc.domain_destroy(dom = d)
   3.107 +                        self.recreate_domain(doms[d])
   3.108                      except:
   3.109 -                        log.exception('Destruction of %d failed.', d)
   3.110 +                        if d == PRIV_DOMAIN:
   3.111 +                            log.exception(
   3.112 +                                "Failed to recreate information for domain "
   3.113 +                                "%d.  Doing nothing except crossing my "
   3.114 +                                "fingers.", d)
   3.115 +                        else:
   3.116 +                            log.exception(
   3.117 +                                "Failed to recreate information for domain "
   3.118 +                                "%d.  Destroying it in the hope of "
   3.119 +                                "recovery.", d)
   3.120 +                            try:
   3.121 +                                xc.domain_destroy(dom = d)
   3.122 +                            except:
   3.123 +                                log.exception('Destruction of %d failed.', d)
   3.124 +        finally:
   3.125 +            self.refresh_lock.release()
   3.126  
   3.127  
   3.128      def update_domain(self, id):
   3.129 @@ -208,6 +224,9 @@ class XendDomain:
   3.130          else:
   3.131              self._delete_domain(id)
   3.132  
   3.133 +
   3.134 +    ## public:
   3.135 +
   3.136      def domain_create(self, config):
   3.137          """Create a domain from a configuration.
   3.138  
   3.139 @@ -219,19 +238,12 @@ class XendDomain:
   3.140          return dominfo
   3.141  
   3.142      def domain_configure(self, config):
   3.143 -        """Configure an existing domain. This is intended for internal
   3.144 -        use by domain restore and migrate.
   3.145 +        """Configure an existing domain.
   3.146  
   3.147          @param vmconfig: vm configuration
   3.148          """
   3.149 -        # We accept our configuration specified as ['config' [...]], which
   3.150 -        # some tools or configuration files may be using.  For save-restore,
   3.151 -        # we use the value of XendDomainInfo.sxpr() directly, which has no
   3.152 -        # such item.
   3.153 -        nested = sxp.child_value(config, 'config')
   3.154 -        if nested:
   3.155 -            config = nested
   3.156 -        return XendDomainInfo.restore(config)
   3.157 +        # !!!
   3.158 +        raise XendError("Unsupported")
   3.159  
   3.160      def domain_restore(self, src):
   3.161          """Restore a domain from file.
   3.162 @@ -241,7 +253,7 @@ class XendDomain:
   3.163  
   3.164          try:
   3.165              fd = os.open(src, os.O_RDONLY)
   3.166 -            dominfo = XendCheckpoint.restore(self, fd)
   3.167 +            dominfo = XendCheckpoint.restore(fd)
   3.168              self._add_domain(dominfo)
   3.169              return dominfo
   3.170          except OSError, ex:
     4.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Fri Sep 30 11:55:49 2005 +0100
     4.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri Sep 30 13:41:10 2005 +0100
     4.3 @@ -94,8 +94,8 @@ SIF_TPM_BE_DOMAIN = (1<<7)
     4.4  SHUTDOWN_TIMEOUT = 30
     4.5  
     4.6  
     4.7 -DOMROOT = '/domain'
     4.8 -VMROOT  = '/domain'
     4.9 +DOMROOT = '/local/domain/'
    4.10 +VMROOT  = '/vm/'
    4.11  
    4.12  
    4.13  xc = xen.lowlevel.xc.new()
    4.14 @@ -116,6 +116,31 @@ ROUNDTRIPPING_CONFIG_ENTRIES = [
    4.15      ]
    4.16  
    4.17  
    4.18 +def restore(config):
    4.19 +    """Create a domain and a VM object to do a restore.
    4.20 +
    4.21 +    @param config:    domain configuration
    4.22 +    """
    4.23 +
    4.24 +    log.debug("XendDomainInfo.restore(%s)", config)
    4.25 +
    4.26 +    try:
    4.27 +        uuid    =     sxp.child_value(config, 'uuid')
    4.28 +        ssidref = int(sxp.child_value(config, 'ssidref'))
    4.29 +    except TypeError, exn:
    4.30 +        raise VmError('Invalid ssidref in config: %s' % exn)
    4.31 +
    4.32 +    vm = XendDomainInfo(uuid, XendDomainInfo.parseConfig(config),
    4.33 +                        xc.domain_create(ssidref = ssidref))
    4.34 +    vm.storeVmDetails()
    4.35 +    vm.configure()
    4.36 +    vm.create_channel()
    4.37 +#         vm.exportToDB()
    4.38 +#    vm.refreshShutdown()
    4.39 +    vm.storeDomDetails()
    4.40 +    return vm
    4.41 +
    4.42 +
    4.43  def domain_exists(name):
    4.44      # See comment in XendDomain constructor.
    4.45      xd = get_component('xen.xend.XendDomain')
    4.46 @@ -161,7 +186,7 @@ class XendDomainInfo:
    4.47          @raise: VmError for invalid configuration
    4.48          """
    4.49  
    4.50 -        log.debug("XendDomainInfo.create(...)")
    4.51 +        log.debug("XendDomainInfo.create(%s)", config)
    4.52          
    4.53          vm = cls(getUuid(), cls.parseConfig(config))
    4.54          vm.construct()
    4.55 @@ -172,10 +197,14 @@ class XendDomainInfo:
    4.56  
    4.57  
    4.58      def recreate(cls, xeninfo):
    4.59 -        """Create the VM object for an existing domain."""
    4.60 +        """Create the VM object for an existing domain.  The domain must not
    4.61 +        be dying, as the paths in the store should already have been removed,
    4.62 +        and asking us to recreate them causes problems."""
    4.63  
    4.64          log.debug("XendDomainInfo.recreate(%s)", xeninfo)
    4.65  
    4.66 +        assert not xeninfo['dying']
    4.67 +
    4.68          domid = xeninfo['dom']
    4.69          try:
    4.70              dompath = GetDomainPath(domid)
    4.71 @@ -191,47 +220,31 @@ class XendDomainInfo:
    4.72                  raise XendError(
    4.73                      'No vm/uuid path in store for existing domain %d' % domid)
    4.74  
    4.75 +            log.info("Recreating domain %d, UUID %s.", domid, uuid)
    4.76 +
    4.77 +            vm = cls(uuid, xeninfo, domid, True)
    4.78 +
    4.79          except Exception, exn:
    4.80              log.warn(str(exn))
    4.81 +
    4.82              uuid = getUuid()
    4.83  
    4.84 -        log.info("Recreating domain %d, uuid %s", domid, uuid)
    4.85 +            log.info("Recreating domain %d with new UUID %s.", domid, uuid)
    4.86  
    4.87 -        vm = cls(uuid, xeninfo, domid, True)
    4.88 +            vm = cls(uuid, xeninfo, domid, True)
    4.89 +            vm.storeVmDetails()
    4.90 +            vm.storeDomDetails()
    4.91 +
    4.92 +        vm.create_channel()
    4.93 +        if domid == 0:
    4.94 +            vm.initStoreConnection()
    4.95 +
    4.96          vm.refreshShutdown(xeninfo)
    4.97          return vm
    4.98  
    4.99      recreate = classmethod(recreate)
   4.100  
   4.101  
   4.102 -    def restore(cls, config, uuid = None):
   4.103 -        """Create a domain and a VM object to do a restore.
   4.104 -
   4.105 -        @param config:    domain configuration
   4.106 -        @param uuid:      uuid to use
   4.107 -        """
   4.108 -        
   4.109 -        log.debug("XendDomainInfo.restore(%s, %s)", config, uuid)
   4.110 -
   4.111 -        if not uuid:
   4.112 -            uuid = getUuid()
   4.113 -
   4.114 -        try:
   4.115 -            ssidref = int(sxp.child_value(config, 'ssidref'))
   4.116 -        except TypeError, exn:
   4.117 -            raise VmError('Invalid ssidref in config: %s' % exn)
   4.118 -
   4.119 -        vm = cls(uuid, cls.parseConfig(config),
   4.120 -                 xc.domain_create(ssidref = ssidref))
   4.121 -        vm.create_channel()
   4.122 -        vm.configure()
   4.123 -        vm.exportToDB()
   4.124 -        vm.refreshShutdown()
   4.125 -        return vm
   4.126 -
   4.127 -    restore = classmethod(restore)
   4.128 -
   4.129 -
   4.130      def parseConfig(cls, config):
   4.131          def get_cfg(name, conv = None):
   4.132              val = sxp.child_value(config, name)
   4.133 @@ -294,8 +307,6 @@ class XendDomainInfo:
   4.134          self.uuid = uuid
   4.135          self.info = info
   4.136  
   4.137 -        self.path = DOMROOT + "/" + uuid
   4.138 -
   4.139          if domid:
   4.140              self.domid = domid
   4.141          elif 'dom' in info:
   4.142 @@ -303,6 +314,12 @@ class XendDomainInfo:
   4.143          else:
   4.144              self.domid = None
   4.145  
   4.146 +        self.vmpath  = VMROOT + uuid
   4.147 +        if self.domid is None:
   4.148 +            self.dompath = None
   4.149 +        else:
   4.150 +            self.dompath = DOMROOT + str(self.domid)
   4.151 +
   4.152          if augment:
   4.153              self.augmentInfo()
   4.154  
   4.155 @@ -317,9 +334,7 @@ class XendDomainInfo:
   4.156  
   4.157          self.state = STATE_VM_OK
   4.158          self.state_updated = threading.Condition()
   4.159 -
   4.160 -        self.writeVm("uuid", self.uuid)
   4.161 -        self.storeDom("vm", self.path)
   4.162 +        self.refresh_shutdown_lock = threading.Condition()
   4.163  
   4.164  
   4.165      def augmentInfo(self):
   4.166 @@ -332,14 +347,22 @@ class XendDomainInfo:
   4.167                  self.info[name] = val
   4.168  
   4.169          params = (("name", str),
   4.170 -                  ("restart-mode", str),
   4.171 +                  ("restart_mode", str),
   4.172                    ("image",        str),
   4.173 -                  ("start-time", float))
   4.174 +                  ("start_time", float))
   4.175  
   4.176          from_store = self.gatherVm(*params)
   4.177  
   4.178          map(lambda x, y: useIfNeeded(x[0], y), params, from_store)
   4.179  
   4.180 +        device = []
   4.181 +        for c in controllerClasses:
   4.182 +            devconfig = self.getDeviceConfigurations(c)
   4.183 +            if devconfig:
   4.184 +                device.extend(map(lambda x: (c, x), devconfig))
   4.185 +
   4.186 +        useIfNeeded('device', device)
   4.187 +
   4.188  
   4.189      def validateInfo(self):
   4.190          """Validate and normalise the info block.  This has either been parsed
   4.191 @@ -377,7 +400,7 @@ class XendDomainInfo:
   4.192              # mem_kb.
   4.193  
   4.194              def discard_negatives(name):
   4.195 -                if self.infoIsSet(name) and self.info[name] <= 0:
   4.196 +                if self.infoIsSet(name) and self.info[name] < 0:
   4.197                      del self.info[name]
   4.198  
   4.199              def valid_KiB_(mb_name, kb_name):
   4.200 @@ -403,7 +426,7 @@ class XendDomainInfo:
   4.201  
   4.202              def valid_KiB(mb_name, kb_name):
   4.203                  result = valid_KiB_(mb_name, kb_name)
   4.204 -                if result <= 0:
   4.205 +                if result is None or result < 0:
   4.206                      raise VmError('Invalid %s / %s: %s' %
   4.207                                    (mb_name, kb_name, result))
   4.208                  else:
   4.209 @@ -452,42 +475,60 @@ class XendDomainInfo:
   4.210  
   4.211  
   4.212      def readVm(self, *args):
   4.213 -        return xstransact.Read(self.path, *args)
   4.214 +        return xstransact.Read(self.vmpath, *args)
   4.215  
   4.216      def writeVm(self, *args):
   4.217 -        return xstransact.Write(self.path, *args)
   4.218 +        return xstransact.Write(self.vmpath, *args)
   4.219  
   4.220      def removeVm(self, *args):
   4.221 -        return xstransact.Remove(self.path, *args)
   4.222 +        return xstransact.Remove(self.vmpath, *args)
   4.223  
   4.224      def gatherVm(self, *args):
   4.225 -        return xstransact.Gather(self.path, *args)
   4.226 +        return xstransact.Gather(self.vmpath, *args)
   4.227  
   4.228      def storeVm(self, *args):
   4.229 -        return xstransact.Store(self.path, *args)
   4.230 +        return xstransact.Store(self.vmpath, *args)
   4.231  
   4.232      def readDom(self, *args):
   4.233 -        return xstransact.Read(self.path, *args)
   4.234 +        return xstransact.Read(self.dompath, *args)
   4.235  
   4.236      def writeDom(self, *args):
   4.237 -        return xstransact.Write(self.path, *args)
   4.238 +        return xstransact.Write(self.dompath, *args)
   4.239  
   4.240      def removeDom(self, *args):
   4.241 -        return xstransact.Remove(self.path, *args)
   4.242 +        return xstransact.Remove(self.dompath, *args)
   4.243  
   4.244      def gatherDom(self, *args):
   4.245 -        return xstransact.Gather(self.path, *args)
   4.246 +        return xstransact.Gather(self.dompath, *args)
   4.247  
   4.248      def storeDom(self, *args):
   4.249 -        return xstransact.Store(self.path, *args)
   4.250 +        return xstransact.Store(self.dompath, *args)
   4.251  
   4.252  
   4.253 -    def exportToDB(self):
   4.254 +    def storeVmDetails(self):
   4.255 +        to_store = {
   4.256 +            'uuid':               self.uuid,
   4.257 +
   4.258 +            # !!!
   4.259 +            'memory/target':      str(self.info['memory_KiB'])
   4.260 +            }
   4.261 +
   4.262 +        if self.infoIsSet('image'):
   4.263 +            to_store['image'] = sxp.to_string(self.info['image'])
   4.264 +
   4.265 +        for k in ['name', 'ssidref', 'restart_mode']:
   4.266 +            if self.infoIsSet(k):
   4.267 +                to_store[k] = str(self.info[k])
   4.268 +
   4.269 +        log.debug("Storing VM details: %s" % str(to_store))
   4.270 +
   4.271 +        self.writeVm(to_store)
   4.272 +
   4.273 +
   4.274 +    def storeDomDetails(self):
   4.275          to_store = {
   4.276              'domid':              str(self.domid),
   4.277 -            'uuid':               self.uuid,
   4.278 -
   4.279 -            'xend/restart_mode':  str(self.info['restart_mode']),
   4.280 +            'vm':                 self.vmpath,
   4.281  
   4.282              'memory/target':      str(self.info['memory_KiB'])
   4.283              }
   4.284 @@ -496,11 +537,9 @@ class XendDomainInfo:
   4.285              if v:
   4.286                  to_store[k] = str(v)
   4.287  
   4.288 -        to_store['image'] = sxp.to_string(self.info['image'])
   4.289 +        log.debug("Storing domain details: %s" % str(to_store))
   4.290  
   4.291 -        log.debug("Storing %s" % str(to_store))
   4.292 -
   4.293 -        self.writeVm(to_store)
   4.294 +        self.writeDom(to_store)
   4.295  
   4.296  
   4.297      def setDomid(self, domid):
   4.298 @@ -522,8 +561,8 @@ class XendDomainInfo:
   4.299      def getName(self):
   4.300          return self.info['name']
   4.301  
   4.302 -    def getPath(self):
   4.303 -        return self.path
   4.304 +    def getDomainPath(self):
   4.305 +        return self.dompath
   4.306  
   4.307      def getUuid(self):
   4.308          return self.uuid
   4.309 @@ -549,78 +588,97 @@ class XendDomainInfo:
   4.310  
   4.311  
   4.312      def refreshShutdown(self, xeninfo = None):
   4.313 -        if xeninfo is None:
   4.314 -            xeninfo = dom_get(self.domid)
   4.315 +        # If set at the end of this method, a restart is required, with the
   4.316 +        # given reason.  This restart has to be done out of the scope of
   4.317 +        # refresh_shutdown_lock.
   4.318 +        restart_reason = None
   4.319 +        
   4.320 +        self.refresh_shutdown_lock.acquire()
   4.321 +        try:
   4.322              if xeninfo is None:
   4.323 -                # The domain no longer exists.  This will occur if we have
   4.324 -                # scheduled a timer to check for shutdown timeouts and the
   4.325 -                # shutdown succeeded.
   4.326 +                xeninfo = dom_get(self.domid)
   4.327 +                if xeninfo is None:
   4.328 +                    # The domain no longer exists.  This will occur if we have
   4.329 +                    # scheduled a timer to check for shutdown timeouts and the
   4.330 +                    # shutdown succeeded.  It will also occur if someone
   4.331 +                    # destroys a domain beneath us.  We clean up, just in
   4.332 +                    # case.
   4.333 +                    self.cleanupDomain()
   4.334 +                    self.cleanupVm()
   4.335 +                    return
   4.336 +
   4.337 +            if xeninfo['dying']:
   4.338 +                # Dying means that a domain has been destroyed, but has not
   4.339 +                # yet been cleaned up by Xen.  This could persist indefinitely
   4.340 +                # if, for example, another domain has some of its pages
   4.341 +                # mapped.  We might like to diagnose this problem in the
   4.342 +                # future, but for now all we do is make sure that it's not
   4.343 +                # us holding the pages, by calling the cleanup methods.
   4.344 +                self.cleanupDomain()
   4.345 +                self.cleanupVm()
   4.346                  return
   4.347  
   4.348 -        if xeninfo['dying']:
   4.349 -            # Dying means that a domain has been destroyed, but has not yet
   4.350 -            # been cleaned up by Xen.  This could persist indefinitely if,
   4.351 -            # for example, another domain has some of its pages mapped.
   4.352 -            # We might like to diagnose this problem in the future, but for
   4.353 -            # now all we can sensibly do is ignore it.
   4.354 -            pass
   4.355 -
   4.356 -        elif xeninfo['crashed']:
   4.357 -            log.warn('Domain has crashed: name=%s id=%d.',
   4.358 -                     self.info['name'], self.domid)
   4.359 -
   4.360 -            if xroot.get_enable_dump():
   4.361 -                self.dumpCore()
   4.362 -
   4.363 -            self.maybeRestart('crashed')
   4.364 -
   4.365 -        elif xeninfo['shutdown']:
   4.366 -            reason = shutdown_reason(xeninfo['shutdown_reason'])
   4.367 -
   4.368 -            log.info('Domain has shutdown: name=%s id=%d reason=%s.',
   4.369 -                     self.info['name'], self.domid, reason)
   4.370 -
   4.371 -            self.clearRestart()
   4.372 +            elif xeninfo['crashed']:
   4.373 +                log.warn('Domain has crashed: name=%s id=%d.',
   4.374 +                         self.info['name'], self.domid)
   4.375  
   4.376 -            if reason == 'suspend':
   4.377 -                self.state_set(STATE_VM_SUSPENDED)
   4.378 -                # Don't destroy the domain.  XendCheckpoint will do this once
   4.379 -                # it has finished.
   4.380 -            elif reason in ['poweroff', 'reboot']:
   4.381 -                self.maybeRestart(reason)
   4.382 -            else:
   4.383 -                self.destroy()
   4.384 -
   4.385 -        else:
   4.386 -            # Domain is alive.  If we are shutting it down, then check
   4.387 -            # the timeout on that, and destroy it if necessary.
   4.388 +                if xroot.get_enable_dump():
   4.389 +                    self.dumpCore()
   4.390  
   4.391 -            sst = self.readVm('xend/shutdown_start_time')
   4.392 -            if sst:
   4.393 -                sst = float(sst)
   4.394 -                timeout = SHUTDOWN_TIMEOUT - time.time() + sst
   4.395 -                if timeout < 0:
   4.396 -                    log.info(
   4.397 -                        "Domain shutdown timeout expired: name=%s id=%s",
   4.398 -                        self.info['name'], self.domid)
   4.399 +                restart_reason = 'crashed'
   4.400 +
   4.401 +            elif xeninfo['shutdown']:
   4.402 +                reason = shutdown_reason(xeninfo['shutdown_reason'])
   4.403 +
   4.404 +                log.info('Domain has shutdown: name=%s id=%d reason=%s.',
   4.405 +                         self.info['name'], self.domid, reason)
   4.406 +
   4.407 +                self.clearRestart()
   4.408 +
   4.409 +                if reason == 'suspend':
   4.410 +                    self.state_set(STATE_VM_SUSPENDED)
   4.411 +                    # Don't destroy the domain.  XendCheckpoint will do this
   4.412 +                    # once it has finished.
   4.413 +                elif reason in ['poweroff', 'reboot']:
   4.414 +                    restart_reason = reason
   4.415 +                else:
   4.416                      self.destroy()
   4.417 -                else:
   4.418 -                    log.debug(
   4.419 -                        "Scheduling refreshShutdown on domain %d in %ds.",
   4.420 -                        self.domid, timeout)
   4.421 -                    scheduler.later(timeout, self.refreshShutdown)
   4.422 +
   4.423 +            else:
   4.424 +                # Domain is alive.  If we are shutting it down, then check
   4.425 +                # the timeout on that, and destroy it if necessary.
   4.426 +
   4.427 +                sst = self.readDom('xend/shutdown_start_time')
   4.428 +                if sst:
   4.429 +                    sst = float(sst)
   4.430 +                    timeout = SHUTDOWN_TIMEOUT - time.time() + sst
   4.431 +                    if timeout < 0:
   4.432 +                        log.info(
   4.433 +                            "Domain shutdown timeout expired: name=%s id=%s",
   4.434 +                            self.info['name'], self.domid)
   4.435 +                        self.destroy()
   4.436 +                    else:
   4.437 +                        log.debug(
   4.438 +                            "Scheduling refreshShutdown on domain %d in %ds.",
   4.439 +                            self.domid, timeout)
   4.440 +                        scheduler.later(timeout, self.refreshShutdown)
   4.441 +        finally:
   4.442 +            self.refresh_shutdown_lock.release()
   4.443 +
   4.444 +        if restart_reason:
   4.445 +            self.maybeRestart(restart_reason)
   4.446  
   4.447  
   4.448      def shutdown(self, reason):
   4.449          if not reason in shutdown_reasons.values():
   4.450              raise XendError('invalid reason:' + reason)
   4.451 -        self.storeVm("control/shutdown", reason)
   4.452 +        self.storeDom("control/shutdown", reason)
   4.453          if not reason == 'suspend':
   4.454 -            self.storeVm('xend/shutdown_start_time', time.time())
   4.455 +            self.storeDom('xend/shutdown_start_time', time.time())
   4.456  
   4.457  
   4.458      def clearRestart(self):
   4.459 -        self.removeVm("xend/shutdown_start_time")
   4.460 +        self.removeDom("xend/shutdown_start_time")
   4.461  
   4.462  
   4.463      def maybeRestart(self, reason):
   4.464 @@ -647,12 +705,10 @@ class XendDomainInfo:
   4.465          """Close the given channel, if set, and remove the given entry in the
   4.466          store.  Nothrow guarantee."""
   4.467          
   4.468 +        if channel:
   4.469 +            channel.close()
   4.470          try:
   4.471 -            try:
   4.472 -                if channel:
   4.473 -                    channel.close()
   4.474 -            finally:
   4.475 -                self.removeDom(entry)
   4.476 +            self.removeDom(entry)
   4.477          except Exception, exn:
   4.478              log.exception(exn)
   4.479          
   4.480 @@ -753,6 +809,10 @@ class XendDomainInfo:
   4.481  
   4.482      ## private:
   4.483  
   4.484 +    def getDeviceConfigurations(self, deviceClass):
   4.485 +        return self.getDeviceController(deviceClass).configurations()
   4.486 +
   4.487 +
   4.488      def getDeviceController(self, name):
   4.489          if name not in controllerClasses:
   4.490              raise XendError("unknown device type: " + str(name))
   4.491 @@ -864,9 +924,8 @@ class XendDomainInfo:
   4.492  
   4.493  
   4.494      def construct(self):
   4.495 -        """Construct the vm instance from its configuration.
   4.496 +        """Construct the domain.
   4.497  
   4.498 -        @param config: configuration
   4.499          @raise: VmError on error
   4.500          """
   4.501  
   4.502 @@ -881,17 +940,18 @@ class XendDomainInfo:
   4.503                            self.info['name'])
   4.504  
   4.505          try:
   4.506 +            self.dompath = DOMROOT + str(self.domid)
   4.507 +
   4.508              self.initDomain()
   4.509              self.construct_image()
   4.510              self.configure()
   4.511 -            self.exportToDB()
   4.512 -        except Exception, ex:
   4.513 -            # Catch errors, cleanup and re-raise.
   4.514 -            print 'Domain construction error:', ex
   4.515 -            import traceback
   4.516 -            traceback.print_exc()
   4.517 +            self.storeVmDetails()
   4.518 +            self.storeDomDetails()
   4.519 +        except Exception:
   4.520 +            log.exception('Domain construction failed')
   4.521              self.destroy()
   4.522 -            raise
   4.523 +            raise VmError('Creating domain failed: name=%s' %
   4.524 +                          self.info['name'])
   4.525  
   4.526  
   4.527      def initDomain(self):
   4.528 @@ -926,38 +986,29 @@ class XendDomainInfo:
   4.529                    self.domid, self.info['name'], self.info['memory_KiB'])
   4.530  
   4.531  
   4.532 -    def configure_vcpus(self, vcpus):
   4.533 +    def configure_vcpus(self):
   4.534          d = {}
   4.535 -        for v in range(0, vcpus):
   4.536 +        for v in range(0, self.info['vcpus']):
   4.537              d["cpu/%d/availability" % v] = "online"
   4.538          self.writeVm(d)
   4.539  
   4.540 +
   4.541      def construct_image(self):
   4.542          """Construct the boot image for the domain.
   4.543          """
   4.544          self.create_channel()
   4.545          self.image.createImage()
   4.546 -        self.exportToDB()
   4.547 -        if self.store_channel and self.store_mfn >= 0:
   4.548 -            IntroduceDomain(self.domid, self.store_mfn,
   4.549 -                            self.store_channel.port1, self.path)
   4.550 -        # get the configured value of vcpus and update store
   4.551 -        self.configure_vcpus(self.info['vcpus'])
   4.552 +#        !!! self.exportToDB()
   4.553 +        IntroduceDomain(self.domid, self.store_mfn,
   4.554 +                        self.store_channel.port1, self.dompath)
   4.555 +        self.configure_vcpus()
   4.556  
   4.557  
   4.558      ## public:
   4.559  
   4.560 -    def delete(self):
   4.561 -        """Delete the vm's db.
   4.562 -        """
   4.563 -        try:
   4.564 -            xstransact.Remove(self.path, 'domid')
   4.565 -        except Exception, ex:
   4.566 -            log.warning("error in domain db delete: %s", ex)
   4.567 -
   4.568 -
   4.569 -    def cleanup(self):
   4.570 -        """Cleanup vm resources: release devices.  Nothrow guarantee."""
   4.571 +    def cleanupDomain(self):
   4.572 +        """Cleanup domain resources; release devices.  Idempotent.  Nothrow
   4.573 +        guarantee."""
   4.574  
   4.575          self.state_set(STATE_VM_TERMINATED)
   4.576          self.release_devices()
   4.577 @@ -972,24 +1023,29 @@ class XendDomainInfo:
   4.578                      "XendDomainInfo.cleanup: image.destroy() failed.")
   4.579              self.image = None
   4.580  
   4.581 +        try:
   4.582 +            self.removeDom()
   4.583 +        except Exception:
   4.584 +            log.exception("Removing domain path failed.")
   4.585  
   4.586 -    def destroy(self):
   4.587 -        """Cleanup vm and destroy domain.  Nothrow guarantee."""
   4.588  
   4.589 -        log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
   4.590 -
   4.591 -        self.cleanup()
   4.592 +    def cleanupVm(self):
   4.593 +        """Cleanup VM resources.  Idempotent.  Nothrow guarantee."""
   4.594  
   4.595          try:
   4.596              self.removeVm()
   4.597          except Exception:
   4.598              log.exception("Removing VM path failed.")
   4.599  
   4.600 -        try:
   4.601 -            self.removeDom()
   4.602 -        except Exception:
   4.603 -            log.exception("Removing domain path failed.")
   4.604  
   4.605 +    def destroy(self):
   4.606 +        """Cleanup VM and destroy domain.  Nothrow guarantee."""
   4.607 +
   4.608 +        log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
   4.609 +
   4.610 +        self.cleanupDomain()
   4.611 +        self.cleanupVm()
   4.612 +        
   4.613          try:
   4.614              if self.domid is not None:
   4.615                  xc.domain_destroy(dom=self.domid)
   4.616 @@ -1002,11 +1058,12 @@ class XendDomainInfo:
   4.617          """
   4.618          return self.state == STATE_VM_TERMINATED
   4.619  
   4.620 +
   4.621      def release_devices(self):
   4.622 -        """Release all vm devices.  Nothrow guarantee."""
   4.623 +        """Release all domain's devices.  Nothrow guarantee."""
   4.624  
   4.625          while True:
   4.626 -            t = xstransact("%s/device" % self.path)
   4.627 +            t = xstransact("%s/device" % self.dompath)
   4.628              for n in controllerClasses.keys():
   4.629                  for d in t.list(n):
   4.630                      try:
   4.631 @@ -1020,6 +1077,7 @@ class XendDomainInfo:
   4.632              if t.commit():
   4.633                  break
   4.634  
   4.635 +
   4.636      def eventChannel(self, path=None):
   4.637          """Create an event channel to the domain.
   4.638          
   4.639 @@ -1030,9 +1088,29 @@ class XendDomainInfo:
   4.640              try:
   4.641                  port = int(self.readDom(path))
   4.642              except:
   4.643 -                # if anything goes wrong, assume the port was not yet set
   4.644 +                # The port is not yet set, i.e. the channel has not yet been
   4.645 +                # created.
   4.646                  pass
   4.647          ret = channel.eventChannel(0, self.domid, port1=port, port2=0)
   4.648 +
   4.649 +        # Stale port information from above causes an Invalid Argument to be
   4.650 +        # thrown by the eventChannel call below.  To recover, we throw away
   4.651 +        # port if it turns out to be bad, and just create a new channel.
   4.652 +        # If creating a new channel with two new ports fails, then something
   4.653 +        # else is going wrong, so we bail.
   4.654 +        while True:
   4.655 +            try:
   4.656 +                ret = channel.eventChannel(0, self.domid, port1 = port,
   4.657 +                                           port2 = 0)
   4.658 +                break
   4.659 +            except:
   4.660 +                log.exception("Exception in eventChannel(0, %d, %d, %d)",
   4.661 +                              self.domid, port, 0)
   4.662 +                if port == 0:
   4.663 +                    raise
   4.664 +                else:
   4.665 +                    port = 0
   4.666 +                    log.error("Recovering from above exception.")
   4.667          self.storeDom(path, ret.port1)
   4.668          return ret
   4.669          
   4.670 @@ -1113,10 +1191,11 @@ class XendDomainInfo:
   4.671          """Restart the domain after it has exited. """
   4.672  
   4.673          #            self.restart_check()
   4.674 -        self.cleanup()
   4.675  
   4.676          config = self.sxpr()
   4.677  
   4.678 +        self.cleanupDomain()
   4.679 +
   4.680          if self.readVm('xend/restart_in_progress'):
   4.681              log.error('Xend failed during restart of domain %d.  '
   4.682                        'Refusing to restart to avoid loops.',
   4.683 @@ -1188,26 +1267,23 @@ class XendDomainInfo:
   4.684          self.storeVm("cpu/%d/availability" % vcpu, availability)
   4.685  
   4.686      def send_sysrq(self, key=0):
   4.687 -        self.storeVm("control/sysrq", '%c' % key)
   4.688 +        self.storeDom("control/sysrq", '%c' % key)
   4.689  
   4.690 -    def dom0_init_store(self):
   4.691 -        if not self.store_channel:
   4.692 -            self.store_channel = self.eventChannel("store/port")
   4.693 -            if not self.store_channel:
   4.694 -                return
   4.695 +
   4.696 +    def initStoreConnection(self):
   4.697          ref = xc.init_store(self.store_channel.port2)
   4.698          if ref and ref >= 0:
   4.699              self.setStoreRef(ref)
   4.700              try:
   4.701                  IntroduceDomain(self.domid, ref, self.store_channel.port1,
   4.702 -                                self.path)
   4.703 +                                self.dompath)
   4.704              except RuntimeError, ex:
   4.705                  if ex.args[0] == errno.EISCONN:
   4.706                      pass
   4.707                  else:
   4.708                      raise
   4.709 -            # get run-time value of vcpus and update store
   4.710 -            self.configure_vcpus(dom_get(self.domid)['vcpus'])
   4.711 +        self.configure_vcpus()
   4.712 +
   4.713  
   4.714      def dom0_enforce_vcpus(self):
   4.715          dom = 0
     5.1 --- a/tools/python/xen/xend/server/DevController.py	Fri Sep 30 11:55:49 2005 +0100
     5.2 +++ b/tools/python/xen/xend/server/DevController.py	Fri Sep 30 13:41:10 2005 +0100
     5.3 @@ -219,7 +219,7 @@ class DevController:
     5.4      def backendPath(self, backdom, devid):
     5.5          """@param backdom [XendDomainInfo] The backend domain info."""
     5.6  
     5.7 -        return "%s/backend/%s/%s/%d" % (backdom.getPath(),
     5.8 +        return "%s/backend/%s/%s/%d" % (backdom.getDomainPath(),
     5.9                                          self.deviceClass,
    5.10                                          self.vm.getUuid(), devid)
    5.11  
    5.12 @@ -229,9 +229,9 @@ class DevController:
    5.13  
    5.14  
    5.15      def frontendRoot(self):
    5.16 -        return "%s/device/%s" % (self.vm.getPath(), self.deviceClass)
    5.17 +        return "%s/device/%s" % (self.vm.getDomainPath(), self.deviceClass)
    5.18  
    5.19  
    5.20      def frontendMiscPath(self):
    5.21 -        return "%s/device-misc/%s" % (self.vm.getPath(),
    5.22 +        return "%s/device-misc/%s" % (self.vm.getDomainPath(),
    5.23                                        self.deviceClass)