ia64/xen-unstable

changeset 7209:bc325488f47f

Merge the child-process handling of the save and restore functions into one
forkHelper function. Change the handling of file descriptor closure to wait
until both stdout and stderr descriptors have closed. This may fix the
intermittent bug seen whereby xm restore; xend restart leaves the domain in a
misconfigured state, presumably because IntroduceDomain is not being called by
XendCheckpoint.restore.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Tue Oct 04 17:49:52 2005 +0100 (2005-10-04)
parents 84c585a99a7b
children db651da37899
files tools/python/xen/xend/XendCheckpoint.py
line diff
     1.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Tue Oct 04 16:38:04 2005 +0100
     1.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Tue Oct 04 17:49:52 2005 +0100
     1.3 @@ -8,8 +8,8 @@
     1.4  import os
     1.5  import re
     1.6  import select
     1.7 +import string
     1.8  import sxp
     1.9 -from string import join
    1.10  from struct import pack, unpack, calcsize
    1.11  
    1.12  from xen.util.xpopen import xPopen3
    1.13 @@ -65,40 +65,22 @@ def save(fd, dominfo, live):
    1.14          # more information.
    1.15          cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
    1.16                 str(dominfo.getDomid()), "0", "0", str(int(live)) ]
    1.17 -        log.info("[xc_save] " + join(cmd))
    1.18 -        child = xPopen3(cmd, True, -1, [fd, xc.handle()])
    1.19 -    
    1.20 -        lasterr = ""
    1.21 -        p = select.poll()
    1.22 -        p.register(child.fromchild.fileno())
    1.23 -        p.register(child.childerr.fileno())
    1.24 -        while True: 
    1.25 -            r = p.poll()
    1.26 -            for (fd, event) in r:
    1.27 -                if not event & select.POLLIN:
    1.28 -                    continue
    1.29 -                if fd == child.childerr.fileno():
    1.30 -                    l = child.childerr.readline()
    1.31 -                    log.error(l.rstrip())
    1.32 -                    lasterr = l.rstrip()
    1.33 -                if fd == child.fromchild.fileno():
    1.34 -                    l = child.fromchild.readline()
    1.35 -                    if l.rstrip() == "suspend":
    1.36 -                        log.info("suspending %d", dominfo.getDomid())
    1.37 -                        dominfo.shutdown('suspend')
    1.38 -                        dominfo.waitForShutdown()
    1.39 -                        log.info("suspend %d done", dominfo.getDomid())
    1.40 -                        child.tochild.write("done\n")
    1.41 -                        child.tochild.flush()
    1.42 -            if filter(lambda (fd, event): event & select.POLLHUP, r):
    1.43 -                break
    1.44 +        log.debug("[xc_save]: %s", string.join(cmd))
    1.45  
    1.46 -        if child.wait() >> 8 == 127:
    1.47 -            lasterr = "popen %s failed" % PATH_XC_SAVE
    1.48 -        if child.wait() != 0:
    1.49 -            raise XendError("xc_save failed: %s" % lasterr)
    1.50 +        def saveInputHandler(line, tochild):
    1.51 +            log.debug("In saveInputHandler %s", line)
    1.52 +            if line == "suspend":
    1.53 +                log.debug("Suspending %d ...", dominfo.getDomid())
    1.54 +                dominfo.shutdown('suspend')
    1.55 +                dominfo.waitForShutdown()
    1.56 +                log.info("Domain %d suspended.", dominfo.getDomid())
    1.57 +                tochild.write("done\n")
    1.58 +                tochild.flush()
    1.59 +
    1.60 +        forkHelper(cmd, fd, saveInputHandler, False)
    1.61  
    1.62          dominfo.destroyDomain()
    1.63 +
    1.64      except Exception, exn:
    1.65          log.exception("Save failed on domain %s (%d).", domain_name,
    1.66                        dominfo.getDomid())
    1.67 @@ -149,52 +131,66 @@ def restore(xd, fd):
    1.68          cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
    1.69                 str(dominfo.getDomid()), str(nr_pfns),
    1.70                 str(store_evtchn), str(console_evtchn)]
    1.71 -        log.info("[xc_restore] " + join(cmd))
    1.72 -        child = xPopen3(cmd, True, -1, [fd, xc.handle()])
    1.73 -        child.tochild.close()
    1.74 +        log.debug("[xc_restore]: %s", string.join(cmd))
    1.75  
    1.76 -        lasterr = ""
    1.77 -        p = select.poll()
    1.78 -        p.register(child.fromchild.fileno())
    1.79 -        p.register(child.childerr.fileno())
    1.80 -        while True:
    1.81 -            r = p.poll()
    1.82 -            for (fd, event) in r:
    1.83 -                if not event & select.POLLIN:
    1.84 -                    continue
    1.85 -                if fd == child.childerr.fileno():
    1.86 -                    l = child.childerr.readline()
    1.87 -                    log.error(l.rstrip())
    1.88 -                    lasterr = l.rstrip()
    1.89 -                if fd == child.fromchild.fileno():
    1.90 -                    l = child.fromchild.readline()
    1.91 -                    while l:
    1.92 -                        log.info(l.rstrip())
    1.93 -                        m = re.match(r"^(store-mfn) (\d+)\n$", l)
    1.94 -                        if m:
    1.95 -                            store_mfn = int(m.group(2))
    1.96 -                            dominfo.setStoreRef(store_mfn)
    1.97 -                            IntroduceDomain(dominfo.getDomid(),
    1.98 -                                            store_mfn,
    1.99 -                                            dominfo.store_channel.port1,
   1.100 -                                            dominfo.getDomainPath())
   1.101 -                        m = re.match(r"^(console-mfn) (\d+)\n$", l)
   1.102 -                        if m:
   1.103 -                            dominfo.setConsoleRef(int(m.group(2)))
   1.104 -                        try:
   1.105 -                            l = child.fromchild.readline()
   1.106 -                        except:
   1.107 -                            l = None
   1.108 -            if filter(lambda (fd, event): event & select.POLLHUP, r):
   1.109 -                break
   1.110 +        def restoreInputHandler(line, _):
   1.111 +            m = re.match(r"^(store-mfn) (\d+)$", line)
   1.112 +            if m:
   1.113 +                store_mfn = int(m.group(2))
   1.114 +                dominfo.setStoreRef(store_mfn)
   1.115 +                IntroduceDomain(dominfo.getDomid(),
   1.116 +                                store_mfn,
   1.117 +                                dominfo.store_channel.port1,
   1.118 +                                dominfo.getDomainPath())
   1.119 +            else:
   1.120 +                m = re.match(r"^(console-mfn) (\d+)$", line)
   1.121 +                if m:
   1.122 +                    dominfo.setConsoleRef(int(m.group(2)))
   1.123  
   1.124 -        if child.wait() >> 8 == 127:
   1.125 -            lasterr = "popen %s failed" % PATH_XC_RESTORE
   1.126 -        if child.wait() != 0:
   1.127 -            raise XendError("xc_restore failed: %s" % lasterr)
   1.128 +        forkHelper(cmd, fd, restoreInputHandler, True)
   1.129  
   1.130          return dominfo
   1.131      except:
   1.132 -        log.exception("Restore failed")
   1.133          dominfo.destroy()
   1.134          raise
   1.135 +
   1.136 +
   1.137 +def forkHelper(cmd, fd, inputHandler, closeToChild):
   1.138 +    child = xPopen3(cmd, True, -1, [fd, xc.handle()])
   1.139 +
   1.140 +    if closeToChild:
   1.141 +        child.tochild.close()
   1.142 +
   1.143 +    fds = [child.fromchild.fileno(),
   1.144 +           child.childerr.fileno()]
   1.145 +    p = select.poll()
   1.146 +    map(p.register, fds)
   1.147 +    while len(fds) > 0:
   1.148 +        r = p.poll()
   1.149 +        for (fd, event) in r:
   1.150 +            if event & select.POLLHUP or event & select.POLLERR:
   1.151 +                fds.remove(fd)
   1.152 +                p.unregister(fd)
   1.153 +                continue
   1.154 +            if not event & select.POLLIN:
   1.155 +                continue
   1.156 +            if fd == child.childerr.fileno():
   1.157 +                lasterr = child.childerr.readline().rstrip()
   1.158 +                log.error('%s', lasterr)
   1.159 +            else
   1.160 +                l = child.fromchild.readline().rstrip()
   1.161 +                while l:
   1.162 +                    log.debug('%s', l)
   1.163 +                    inputHandler(l, child.tochild)
   1.164 +                    try:
   1.165 +                        l = child.fromchild.readline().rstrip()
   1.166 +                    except:
   1.167 +                        l = None
   1.168 +
   1.169 +    child.fromchild.close()
   1.170 +    child.childerr.close()
   1.171 +
   1.172 +    if child.wait() >> 8 == 127:
   1.173 +        lasterr = "popen failed"
   1.174 +    if child.wait() != 0:
   1.175 +        raise XendError("%s failed: %s" % (string.join(cmd), lasterr))