ia64/xen-unstable

changeset 7181:9c6b39746b78

Fix localhost live migration. We were overvigorously wiping out the store
entries when a domain closed and on save, which meant that the /vm entries
disappeared when a localhost migration occurred. XendCheckpoint has had extra
exception handling and logging added. It also now calls back through
XendDomain.restore_,which has the correct locking semantics to prevent race
conditions during migration.

Added assertions to XendCheckpoint to ensure that the channels are set after
XendDomainInfo.restore. I don't see why they would not be, and the old code
meant that in the case that they were not, IntroduceDomain would not be called
on the new domain, breaking Xend restart.

relocate calls through XendDomain.domain_restore_fd rather than directly to
XendCheckpoint to isolate XendCheckpoint from the rest of the world, and to
allow XendDomain to pass itself into XendCheckpoint for a callback.

Simplify the XendCheckpoint / XendDomainInfo interlock, giving only two
states, OK and TERMINATED. If XendCheckpoint asks for a suspend, but sees a
shutdown, it is valid for it to proceed -- either way the domain has stopped.
Higher level tools may wish to disallow this, but at the very least, there is
no sense in waiting for a suspend that will never come.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Tue Oct 04 11:14:50 2005 +0100 (2005-10-04)
parents a6154af4fc43
children 96cc6aa196b6
files tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/server/relocate.py
line diff
     1.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Tue Oct 04 11:01:38 2005 +0100
     1.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Tue Oct 04 11:14:50 2005 +0100
     1.3 @@ -1,4 +1,5 @@
     1.4  # Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
     1.5 +# Copyright (C) 2005 XenSource Ltd
     1.6  
     1.7  # This file is subject to the terms and conditions of the GNU General
     1.8  # Public License.  See the file "COPYING" in the main directory of
     1.9 @@ -15,7 +16,6 @@ from xen.util.xpopen import xPopen3
    1.10  
    1.11  import xen.lowlevel.xc
    1.12  
    1.13 -import XendDomainInfo
    1.14  from xen.xend.xenstore.xsutil import IntroduceDomain
    1.15  
    1.16  from XendError import XendError
    1.17 @@ -42,58 +42,75 @@ def read_exact(fd, size, errmsg):
    1.18          raise XendError(errmsg)
    1.19      return buf
    1.20  
    1.21 -def save(xd, fd, dominfo, live):
    1.22 +def save(fd, dominfo, live):
    1.23      write_exact(fd, SIGNATURE, "could not write guest state file: signature")
    1.24  
    1.25      config = sxp.to_string(dominfo.sxpr())
    1.26 -    write_exact(fd, pack("!i", len(config)),
    1.27 -                "could not write guest state file: config len")
    1.28 -    write_exact(fd, config, "could not write guest state file: config")
    1.29 +
    1.30 +    domain_name = dominfo.getName()
    1.31 +
    1.32 +    if live:
    1.33 +        dominfo.setName('migrating-' + domain_name)
    1.34 +
    1.35 +    try:
    1.36 +        write_exact(fd, pack("!i", len(config)),
    1.37 +                    "could not write guest state file: config len")
    1.38 +        write_exact(fd, config, "could not write guest state file: config")
    1.39  
    1.40 -    # xc_save takes three customization parameters: maxit, max_f, and flags
    1.41 -    # the last controls whether or not save is 'live', while the first two
    1.42 -    # further customize behaviour when 'live' save is enabled. Passing "0"
    1.43 -    # simply uses the defaults compiled into libxenguest; see the comments 
    1.44 -    # and/or code in xc_linux_save() for more information. 
    1.45 -    cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
    1.46 -           str(dominfo.getDomid()), "0", "0", str(int(live)) ]
    1.47 -    log.info("[xc_save] " + join(cmd))
    1.48 -    child = xPopen3(cmd, True, -1, [fd, xc.handle()])
    1.49 +        # xc_save takes three customization parameters: maxit, max_f, and
    1.50 +        # flags the last controls whether or not save is 'live', while the
    1.51 +        # first two further customize behaviour when 'live' save is
    1.52 +        # enabled. Passing "0" simply uses the defaults compiled into
    1.53 +        # libxenguest; see the comments and/or code in xc_linux_save() for
    1.54 +        # more information.
    1.55 +        cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
    1.56 +               str(dominfo.getDomid()), "0", "0", str(int(live)) ]
    1.57 +        log.info("[xc_save] " + join(cmd))
    1.58 +        child = xPopen3(cmd, True, -1, [fd, xc.handle()])
    1.59      
    1.60 -    lasterr = ""
    1.61 -    p = select.poll()
    1.62 -    p.register(child.fromchild.fileno())
    1.63 -    p.register(child.childerr.fileno())
    1.64 -    while True: 
    1.65 -        r = p.poll()
    1.66 -        for (fd, event) in r:
    1.67 -            if not event & select.POLLIN:
    1.68 -                continue
    1.69 -            if fd == child.childerr.fileno():
    1.70 -                l = child.childerr.readline()
    1.71 -                log.error(l.rstrip())
    1.72 -                lasterr = l.rstrip()
    1.73 -            if fd == child.fromchild.fileno():
    1.74 -                l = child.fromchild.readline()
    1.75 -                if l.rstrip() == "suspend":
    1.76 -                    log.info("suspending %d" % dominfo.getDomid())
    1.77 -                    xd.domain_shutdown(dominfo.getDomid(), reason='suspend')
    1.78 -                    dominfo.state_wait(XendDomainInfo.STATE_VM_SUSPENDED)
    1.79 -                    log.info("suspend %d done" % dominfo.getDomid())
    1.80 -                    child.tochild.write("done\n")
    1.81 -                    child.tochild.flush()
    1.82 -        if filter(lambda (fd, event): event & select.POLLHUP, r):
    1.83 -            break
    1.84 +        lasterr = ""
    1.85 +        p = select.poll()
    1.86 +        p.register(child.fromchild.fileno())
    1.87 +        p.register(child.childerr.fileno())
    1.88 +        while True: 
    1.89 +            r = p.poll()
    1.90 +            for (fd, event) in r:
    1.91 +                if not event & select.POLLIN:
    1.92 +                    continue
    1.93 +                if fd == child.childerr.fileno():
    1.94 +                    l = child.childerr.readline()
    1.95 +                    log.error(l.rstrip())
    1.96 +                    lasterr = l.rstrip()
    1.97 +                if fd == child.fromchild.fileno():
    1.98 +                    l = child.fromchild.readline()
    1.99 +                    if l.rstrip() == "suspend":
   1.100 +                        log.info("suspending %d", dominfo.getDomid())
   1.101 +                        dominfo.shutdown('suspend')
   1.102 +                        dominfo.waitForShutdown()
   1.103 +                        log.info("suspend %d done", dominfo.getDomid())
   1.104 +                        child.tochild.write("done\n")
   1.105 +                        child.tochild.flush()
   1.106 +            if filter(lambda (fd, event): event & select.POLLHUP, r):
   1.107 +                break
   1.108  
   1.109 -    if child.wait() >> 8 == 127:
   1.110 -        lasterr = "popen %s failed" % PATH_XC_SAVE
   1.111 -    if child.wait() != 0:
   1.112 -        raise XendError("xc_save failed: %s" % lasterr)
   1.113 +        if child.wait() >> 8 == 127:
   1.114 +            lasterr = "popen %s failed" % PATH_XC_SAVE
   1.115 +        if child.wait() != 0:
   1.116 +            raise XendError("xc_save failed: %s" % lasterr)
   1.117  
   1.118 -    dominfo.destroy()
   1.119 -    return None
   1.120 +        dominfo.destroyDomain()
   1.121 +    except Exception, exn:
   1.122 +        log.exception("Save failed on domain %s (%d).", domain_name,
   1.123 +                      dominfo.getDomid())
   1.124 +        try:
   1.125 +            if live:
   1.126 +                dominfo.setName(domain_name)
   1.127 +        except:
   1.128 +            log.exception("Failed to reset the migrating domain's name")
   1.129 +        raise Exception, exn
   1.130  
   1.131 -def restore(fd):
   1.132 +
   1.133 +def restore(xd, fd):
   1.134      signature = read_exact(fd, len(SIGNATURE),
   1.135          "not a valid guest state file: signature read")
   1.136      if signature != SIGNATURE:
   1.137 @@ -112,71 +129,72 @@ def restore(fd):
   1.138          raise XendError("not a valid guest state file: config parse")
   1.139  
   1.140      vmconfig = p.get_val()
   1.141 -    dominfo = XendDomainInfo.restore(vmconfig)
   1.142 +
   1.143 +    dominfo = xd.restore_(vmconfig)
   1.144  
   1.145 -    l = read_exact(fd, sizeof_unsigned_long,
   1.146 -                   "not a valid guest state file: pfn count read")
   1.147 -    nr_pfns = unpack("=L", l)[0]   # XXX endianess
   1.148 -    if nr_pfns > 1024*1024:     # XXX
   1.149 -        raise XendError(
   1.150 -            "not a valid guest state file: pfn count out of range")
   1.151 +    assert dominfo.store_channel
   1.152 +    assert dominfo.console_channel
   1.153  
   1.154 -    if dominfo.store_channel:
   1.155 -        store_evtchn = dominfo.store_channel.port2
   1.156 -    else:
   1.157 -        store_evtchn = 0
   1.158 +    try:
   1.159 +        l = read_exact(fd, sizeof_unsigned_long,
   1.160 +                       "not a valid guest state file: pfn count read")
   1.161 +        nr_pfns = unpack("=L", l)[0]   # XXX endianess
   1.162 +        if nr_pfns > 1024*1024:     # XXX
   1.163 +            raise XendError(
   1.164 +                "not a valid guest state file: pfn count out of range")
   1.165  
   1.166 -    if dominfo.console_channel:
   1.167 +        store_evtchn = dominfo.store_channel.port2
   1.168          console_evtchn = dominfo.console_channel.port2
   1.169 -    else:
   1.170 -        console_evtchn = 0
   1.171  
   1.172 -    cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
   1.173 -           str(dominfo.getDomid()), str(nr_pfns),
   1.174 -           str(store_evtchn), str(console_evtchn)]
   1.175 -    log.info("[xc_restore] " + join(cmd))
   1.176 -    child = xPopen3(cmd, True, -1, [fd, xc.handle()])
   1.177 -    child.tochild.close()
   1.178 +        cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
   1.179 +               str(dominfo.getDomid()), str(nr_pfns),
   1.180 +               str(store_evtchn), str(console_evtchn)]
   1.181 +        log.info("[xc_restore] " + join(cmd))
   1.182 +        child = xPopen3(cmd, True, -1, [fd, xc.handle()])
   1.183 +        child.tochild.close()
   1.184  
   1.185 -    lasterr = ""
   1.186 -    p = select.poll()
   1.187 -    p.register(child.fromchild.fileno())
   1.188 -    p.register(child.childerr.fileno())
   1.189 -    while True:
   1.190 -        r = p.poll()
   1.191 -        for (fd, event) in r:
   1.192 -            if not event & select.POLLIN:
   1.193 -                continue
   1.194 -            if fd == child.childerr.fileno():
   1.195 -                l = child.childerr.readline()
   1.196 -                log.error(l.rstrip())
   1.197 -                lasterr = l.rstrip()
   1.198 -            if fd == child.fromchild.fileno():
   1.199 -                l = child.fromchild.readline()
   1.200 -                while l:
   1.201 -                    log.info(l.rstrip())
   1.202 -                    m = re.match(r"^(store-mfn) (\d+)\n$", l)
   1.203 -                    if m:
   1.204 -                        if dominfo.store_channel:
   1.205 +        lasterr = ""
   1.206 +        p = select.poll()
   1.207 +        p.register(child.fromchild.fileno())
   1.208 +        p.register(child.childerr.fileno())
   1.209 +        while True:
   1.210 +            r = p.poll()
   1.211 +            for (fd, event) in r:
   1.212 +                if not event & select.POLLIN:
   1.213 +                    continue
   1.214 +                if fd == child.childerr.fileno():
   1.215 +                    l = child.childerr.readline()
   1.216 +                    log.error(l.rstrip())
   1.217 +                    lasterr = l.rstrip()
   1.218 +                if fd == child.fromchild.fileno():
   1.219 +                    l = child.fromchild.readline()
   1.220 +                    while l:
   1.221 +                        log.info(l.rstrip())
   1.222 +                        m = re.match(r"^(store-mfn) (\d+)\n$", l)
   1.223 +                        if m:
   1.224                              store_mfn = int(m.group(2))
   1.225                              dominfo.setStoreRef(store_mfn)
   1.226                              IntroduceDomain(dominfo.getDomid(),
   1.227                                              store_mfn,
   1.228                                              dominfo.store_channel.port1,
   1.229                                              dominfo.getDomainPath())
   1.230 -                    m = re.match(r"^(console-mfn) (\d+)\n$", l)
   1.231 -                    if m:
   1.232 -                        dominfo.setConsoleRef(int(m.group(2)))
   1.233 -                    try:
   1.234 -                        l = child.fromchild.readline()
   1.235 -                    except:
   1.236 -                        l = None
   1.237 -        if filter(lambda (fd, event): event & select.POLLHUP, r):
   1.238 -            break
   1.239 +                        m = re.match(r"^(console-mfn) (\d+)\n$", l)
   1.240 +                        if m:
   1.241 +                            dominfo.setConsoleRef(int(m.group(2)))
   1.242 +                        try:
   1.243 +                            l = child.fromchild.readline()
   1.244 +                        except:
   1.245 +                            l = None
   1.246 +            if filter(lambda (fd, event): event & select.POLLHUP, r):
   1.247 +                break
   1.248  
   1.249 -    if child.wait() >> 8 == 127:
   1.250 -        lasterr = "popen %s failed" % PATH_XC_RESTORE
   1.251 -    if child.wait() != 0:
   1.252 -        raise XendError("xc_restore failed: %s" % lasterr)
   1.253 +        if child.wait() >> 8 == 127:
   1.254 +            lasterr = "popen %s failed" % PATH_XC_RESTORE
   1.255 +        if child.wait() != 0:
   1.256 +            raise XendError("xc_restore failed: %s" % lasterr)
   1.257  
   1.258 -    return dominfo
   1.259 +        return dominfo
   1.260 +    except:
   1.261 +        log.exception("Restore failed")
   1.262 +        dominfo.destroy()
   1.263 +        raise
     2.1 --- a/tools/python/xen/xend/XendDomain.py	Tue Oct 04 11:01:38 2005 +0100
     2.2 +++ b/tools/python/xen/xend/XendDomain.py	Tue Oct 04 11:14:50 2005 +0100
     2.3 @@ -239,14 +239,42 @@ class XendDomain:
     2.4          """
     2.5  
     2.6          try:
     2.7 -            fd = os.open(src, os.O_RDONLY)
     2.8 -            dominfo = XendCheckpoint.restore(fd)
     2.9 -            self._add_domain(dominfo)
    2.10 -            return dominfo
    2.11 +            return self.domain_restore_fd(os.open(src, os.O_RDONLY))
    2.12          except OSError, ex:
    2.13              raise XendError("can't read guest state file %s: %s" %
    2.14                              (src, ex[1]))
    2.15  
    2.16 +    def domain_restore_fd(self, fd):
    2.17 +        """Restore a domain from the given file descriptor."""
    2.18 +
    2.19 +        try:
    2.20 +            XendCheckpoint.restore(self, fd)
    2.21 +        except Exception, ex:
    2.22 +            log.exception("Restore failed")
    2.23 +            raise
    2.24 +
    2.25 +
    2.26 +    def restore_(self, config):
    2.27 +        """Create a domain as part of the restore process.  This is called
    2.28 +        only from {@link XendCheckpoint}.
    2.29 +
    2.30 +        A restore request comes into XendDomain through {@link
    2.31 +        #domain_restore} or {@link #domain_restore_fd}.  That request is
    2.32 +        forwarded immediately to XendCheckpoint which, when it is ready, will
    2.33 +        call this method.  It is necessary to come through here rather than go
    2.34 +        directly to {@link XendDomainInfo.restore} because we need to
    2.35 +        serialise the domain creation process, but cannot lock
    2.36 +        domain_restore_fd as a whole, otherwise we will deadlock waiting for
    2.37 +        the old domain to die.
    2.38 +        """
    2.39 +        self.domains_lock.acquire()
    2.40 +        try:
    2.41 +            dominfo = XendDomainInfo.restore(config)
    2.42 +            self._add_domain(dominfo)
    2.43 +            return dominfo
    2.44 +        finally:
    2.45 +            self.domains_lock.release()
    2.46 +
    2.47  
    2.48      def domain_lookup(self, id):
    2.49          self.domains_lock.acquire()
    2.50 @@ -384,19 +412,8 @@ class XendDomain:
    2.51          port = xroot.get_xend_relocation_port()
    2.52          sock = relocate.setupRelocation(dst, port)
    2.53  
    2.54 -        # temporarily rename domain for localhost migration
    2.55 -        if dst == "localhost":
    2.56 -            dominfo.setName("tmp-" + dominfo.getName())
    2.57 -
    2.58 -        try:
    2.59 -            XendCheckpoint.save(self, sock.fileno(), dominfo, live)
    2.60 -        except:
    2.61 -            if dst == "localhost":
    2.62 -                dominfo.setName(
    2.63 -                    string.replace(dominfo.getName(), "tmp-", "", 1))
    2.64 -            raise
    2.65 +        XendCheckpoint.save(sock.fileno(), dominfo, live)
    2.66          
    2.67 -        return None
    2.68  
    2.69      def domain_save(self, id, dst):
    2.70          """Start saving a domain to file.
    2.71 @@ -411,7 +428,7 @@ class XendDomain:
    2.72              fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
    2.73  
    2.74              # For now we don't support 'live checkpoint' 
    2.75 -            return XendCheckpoint.save(self, fd, dominfo, False)
    2.76 +            return XendCheckpoint.save(fd, dominfo, False)
    2.77  
    2.78          except OSError, ex:
    2.79              raise XendError("can't write guest state file %s: %s" %
     3.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Tue Oct 04 11:01:38 2005 +0100
     3.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Tue Oct 04 11:14:50 2005 +0100
     3.3 @@ -80,7 +80,6 @@ restart_modes = [
     3.4  
     3.5  STATE_VM_OK         = "ok"
     3.6  STATE_VM_TERMINATED = "terminated"
     3.7 -STATE_VM_SUSPENDED  = "suspended"
     3.8  
     3.9  """Flag for a block device backend domain."""
    3.10  SIF_BLK_BE_DOMAIN = (1<<4)
    3.11 @@ -624,21 +623,22 @@ class XendDomainInfo:
    3.12                      # The domain no longer exists.  This will occur if we have
    3.13                      # scheduled a timer to check for shutdown timeouts and the
    3.14                      # shutdown succeeded.  It will also occur if someone
    3.15 -                    # destroys a domain beneath us.  We clean up, just in
    3.16 -                    # case.
    3.17 +                    # destroys a domain beneath us.  We clean up the domain,
    3.18 +                    # just in case, but we can't clean up the VM, because that
    3.19 +                    # VM may have migrated to a different domain on this
    3.20 +                    # machine.
    3.21                      self.cleanupDomain()
    3.22 -                    self.cleanupVm()
    3.23                      return
    3.24  
    3.25              if xeninfo['dying']:
    3.26                  # Dying means that a domain has been destroyed, but has not
    3.27 -                # yet been cleaned up by Xen.  This could persist indefinitely
    3.28 -                # if, for example, another domain has some of its pages
    3.29 -                # mapped.  We might like to diagnose this problem in the
    3.30 -                # future, but for now all we do is make sure that it's not
    3.31 -                # us holding the pages, by calling the cleanup methods.
    3.32 +                # yet been cleaned up by Xen.  This state could persist
    3.33 +                # indefinitely if, for example, another domain has some of its
    3.34 +                # pages mapped.  We might like to diagnose this problem in the
    3.35 +                # future, but for now all we do is make sure that it's not us
    3.36 +                # holding the pages, by calling cleanupDomain.  We can't
    3.37 +                # clean up the VM, as above.
    3.38                  self.cleanupDomain()
    3.39 -                self.cleanupVm()
    3.40                  return
    3.41  
    3.42              elif xeninfo['crashed']:
    3.43 @@ -651,10 +651,11 @@ class XendDomainInfo:
    3.44                  restart_reason = 'crash'
    3.45  
    3.46              elif xeninfo['shutdown']:
    3.47 -                if self.readDom('xend/shutdown'):
    3.48 +                if self.readDom('xend/shutdown_completed'):
    3.49                      # We've seen this shutdown already, but we are preserving
    3.50                      # the domain for debugging.  Leave it alone.
    3.51 -                    pass
    3.52 +                    return
    3.53 +
    3.54                  else:
    3.55                      reason = shutdown_reason(xeninfo['shutdown_reason'])
    3.56  
    3.57 @@ -664,7 +665,7 @@ class XendDomainInfo:
    3.58                      self.clearRestart()
    3.59  
    3.60                      if reason == 'suspend':
    3.61 -                        self.state_set(STATE_VM_SUSPENDED)
    3.62 +                        self.state_set(STATE_VM_TERMINATED)
    3.63                          # Don't destroy the domain.  XendCheckpoint will do
    3.64                          # this once it has finished.
    3.65                      elif reason in ['poweroff', 'reboot']:
    3.66 @@ -701,7 +702,7 @@ class XendDomainInfo:
    3.67          if not reason in shutdown_reasons.values():
    3.68              raise XendError('invalid reason:' + reason)
    3.69          self.storeDom("control/shutdown", reason)
    3.70 -        if not reason == 'suspend':
    3.71 +        if reason != 'suspend':
    3.72              self.storeDom('xend/shutdown_start_time', time.time())
    3.73  
    3.74  
    3.75 @@ -720,11 +721,6 @@ class XendDomainInfo:
    3.76           "rename-restart" : self.renameRestart}[self.info['on_' + reason]]()
    3.77  
    3.78  
    3.79 -    def preserve(self):
    3.80 -        log.info("Preserving dead domain %s (%d).", self.info['name'],
    3.81 -                 self.domid)
    3.82 -
    3.83 -
    3.84      def renameRestart(self):
    3.85          self.restart(True)
    3.86  
    3.87 @@ -814,9 +810,9 @@ class XendDomainInfo:
    3.88  
    3.89      ## public:
    3.90  
    3.91 -    def state_wait(self, state):
    3.92 +    def waitForShutdown(self):
    3.93          self.state_updated.acquire()
    3.94 -        while self.state != state:
    3.95 +        while self.state == STATE_VM_OK:
    3.96              self.state_updated.wait()
    3.97          self.state_updated.release()
    3.98  
    3.99 @@ -1054,7 +1050,6 @@ class XendDomainInfo:
   3.100          """Cleanup domain resources; release devices.  Idempotent.  Nothrow
   3.101          guarantee."""
   3.102  
   3.103 -        self.state_set(STATE_VM_TERMINATED)
   3.104          self.release_devices()
   3.105          self.closeStoreChannel()
   3.106          self.closeConsoleChannel()
   3.107 @@ -1087,8 +1082,14 @@ class XendDomainInfo:
   3.108  
   3.109          log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
   3.110  
   3.111 +        self.cleanupVm()
   3.112 +        self.destroyDomain()
   3.113 +
   3.114 +
   3.115 +    def destroyDomain(self):
   3.116 +        log.debug("XendDomainInfo.destroyDomain(%s)", str(self.domid))
   3.117 +
   3.118          self.cleanupDomain()
   3.119 -        self.cleanupVm()
   3.120          
   3.121          try:
   3.122              if self.domid is not None:
   3.123 @@ -1096,6 +1097,8 @@ class XendDomainInfo:
   3.124          except Exception:
   3.125              log.exception("XendDomainInfo.destroy: xc.domain_destroy failed.")
   3.126  
   3.127 +        self.state_set(STATE_VM_TERMINATED)
   3.128 +
   3.129  
   3.130      ## private:
   3.131  
   3.132 @@ -1243,14 +1246,18 @@ class XendDomainInfo:
   3.133  
   3.134          try:
   3.135              if rename:
   3.136 -                self.preserveShutdownDomain()
   3.137 +                self.preserveForRestart()
   3.138              else:
   3.139 -                self.cleanupDomain()
   3.140                  self.destroy()
   3.141                  
   3.142              try:
   3.143                  xd = get_component('xen.xend.XendDomain')
   3.144 -                xd.domain_unpause(xd.domain_create(config).getDomid())
   3.145 +                new_dom = xd.domain_create(config)
   3.146 +                try:
   3.147 +                    xc.domain_unpause(new_dom.getDomid())
   3.148 +                except:
   3.149 +                    new_dom.destroy()
   3.150 +                    raise
   3.151              except Exception, exn:
   3.152                  log.exception('Failed to restart domain %d.', self.domid)
   3.153          finally:
   3.154 @@ -1260,7 +1267,7 @@ class XendDomainInfo:
   3.155          #        self.exportToDB()
   3.156  
   3.157  
   3.158 -    def preserveShutdownDomain(self):
   3.159 +    def preserveForRestart(self):
   3.160          """Preserve a domain that has been shut down, by giving it a new UUID,
   3.161          cloning the VM details, and giving it a new name.  This allows us to
   3.162          keep this domain for debugging, but restart a new one in its place
   3.163 @@ -1276,8 +1283,14 @@ class XendDomainInfo:
   3.164          self.uuid = new_uuid
   3.165          self.vmpath = VMROOT + new_uuid
   3.166          self.storeVmDetails()
   3.167 -        self.storeDom('vm', self.vmpath)
   3.168 -        self.storeDom('xend/shutdown', 'True')
   3.169 +        self.preserve()
   3.170 +
   3.171 +
   3.172 +    def preserve(self):
   3.173 +        log.info("Preserving dead domain %s (%d).", self.info['name'],
   3.174 +                 self.domid)
   3.175 +        self.storeDom('xend/shutdown_completed', 'True')
   3.176 +        self.set_state(STATE_VM_TERMINATED)
   3.177  
   3.178  
   3.179      def generateShutdownName(self):
     4.1 --- a/tools/python/xen/xend/server/relocate.py	Tue Oct 04 11:01:38 2005 +0100
     4.2 +++ b/tools/python/xen/xend/server/relocate.py	Tue Oct 04 11:14:50 2005 +0100
     4.3 @@ -28,7 +28,6 @@ from xen.xend import EventServer
     4.4  from xen.xend.XendError import XendError
     4.5  from xen.xend import XendRoot
     4.6  from xen.xend.XendLogging import log
     4.7 -from xen.xend import XendCheckpoint
     4.8  
     4.9  
    4.10  eserver = EventServer.instance()
    4.11 @@ -120,7 +119,8 @@ class RelocationProtocol(protocol.Protoc
    4.12          if self.transport:
    4.13              self.send_reply(["ready", name])
    4.14              self.transport.sock.setblocking(1)
    4.15 -            XendCheckpoint.restore(self.transport.sock.fileno())
    4.16 +            xd = xroot.get_component("xen.xend.XendDomain")
    4.17 +            xd.domain_restore_fd(self.transport.sock.fileno())
    4.18              self.transport.sock.setblocking(0)
    4.19          else:
    4.20              log.error(name + ": no transport")