ia64/xen-unstable

changeset 17849:6f189de0f73d

xend: detect and report qemu-dm failure

Currently, when qemu-dm fails, typically its exit status is lost and
xend doesn't notice. In the patch below I use a fifo (named pipe) to
detect qemu-dm's termination and report the exit status to the
logfile, if possible. (If xend has been restarted since the domain
was created, this isn't possible but we can still know that it failed
and report that fact.)

It would be better to have a failure of qemu crash the domain, by
calling the SCHEDOP_shutdown hypercall with SHUTDOWN_crash. However
if you have on_crash=restart and the configuration is broken in some
way that causes qemu-dm to bomb out straight away this causes xend to
spin endlessly restarting the doomed domain. This is a general
problem with on_*=restart and ought to be fixed separately. When it
is fixed, we can safely arrange for domains whose dm has crashed to be
themselves forcibly crashed.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 12 17:01:31 2008 +0100 (2008-06-12)
parents 6b1795ee1b19
children 8182f5158dc2
files tools/python/xen/util/oshelp.py tools/python/xen/util/utils.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendLogging.py tools/python/xen/xend/image.py
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/tools/python/xen/util/oshelp.py	Thu Jun 12 17:01:31 2008 +0100
     1.3 @@ -0,0 +1,20 @@
     1.4 +import fcntl
     1.5 +import os
     1.6 +
     1.7 +def fcntl_setfd_cloexec(file, bool):
     1.8 +        f = fcntl.fcntl(file, fcntl.F_GETFD)
     1.9 +        if bool: f |= fcntl.FD_CLOEXEC
    1.10 +        else: f &= ~fcntl.FD_CLOEXEC
    1.11 +        fcntl.fcntl(file, fcntl.F_SETFD)
    1.12 +
    1.13 +def waitstatus_description(st):
    1.14 +        if os.WIFEXITED(st):
    1.15 +                es = os.WEXITSTATUS(st)
    1.16 +                if es: return "exited with nonzero status %i" % es
    1.17 +                else: return "exited"
    1.18 +        elif os.WIFSIGNALED(st):
    1.19 +                s = "died due to signal %i" % os.WTERMSIG(st)
    1.20 +                if os.WCOREDUMP(st): s += " (core dumped)"
    1.21 +                return s
    1.22 +        else:
    1.23 +                return "failed with unexpected wait status %i" % st
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/tools/python/xen/util/utils.py	Thu Jun 12 17:01:31 2008 +0100
     2.3 @@ -0,0 +1,6 @@
     2.4 +import traceback
     2.5 +import sys
     2.6 +
     2.7 +def exception_string(e):
     2.8 +        (ty,v,tb) = sys.exc_info()
     2.9 +        return traceback.format_exception_only(ty,v)
     3.1 --- a/tools/python/xen/xend/XendDomain.py	Thu Jun 12 16:55:09 2008 +0100
     3.2 +++ b/tools/python/xen/xend/XendDomain.py	Thu Jun 12 17:01:31 2008 +0100
     3.3 @@ -34,7 +34,7 @@ import xen.lowlevel.xc
     3.4  
     3.5  from xen.xend import XendOptions, XendCheckpoint, XendDomainInfo
     3.6  from xen.xend.PrettyPrint import prettyprint
     3.7 -from xen.xend import XendConfig
     3.8 +from xen.xend import XendConfig, image
     3.9  from xen.xend.XendError import XendError, XendInvalidDomain, VmError
    3.10  from xen.xend.XendError import VMBadState
    3.11  from xen.xend.XendLogging import log
    3.12 @@ -179,6 +179,8 @@ class XendDomain:
    3.13                          log.exception("Failed to create reference to running "
    3.14                                        "domain id: %d" % dom['domid'])
    3.15  
    3.16 +            image.cleanup_stale_sentinel_fifos()
    3.17 +
    3.18              # add all managed domains as dormant domains.
    3.19              for dom in managed:
    3.20                  dom_uuid = dom.get('uuid')
     4.1 --- a/tools/python/xen/xend/XendLogging.py	Thu Jun 12 16:55:09 2008 +0100
     4.2 +++ b/tools/python/xen/xend/XendLogging.py	Thu Jun 12 17:01:31 2008 +0100
     4.3 @@ -25,10 +25,10 @@ import tempfile
     4.4  import types
     4.5  import logging
     4.6  import logging.handlers
     4.7 -import fcntl
     4.8  
     4.9  from xen.util import mkdir
    4.10  from xen.xend.server import params
    4.11 +from xen.util import oshelp
    4.12  
    4.13  
    4.14  __all__ = [ 'log', 'init', 'getLogFilename' ]
    4.15 @@ -103,9 +103,7 @@ class XendRotatingFileHandler(logging.ha
    4.16      # entire FileHandler, StreamHandler & RotatingFileHandler classes which
    4.17      # is even worse
    4.18      def setCloseOnExec(self):
    4.19 -        flags = fcntl.fcntl(self.stream.fileno(), fcntl.F_GETFD)
    4.20 -        flags |= fcntl.FD_CLOEXEC
    4.21 -        fcntl.fcntl(self.stream.fileno(), fcntl.F_SETFD, flags)
    4.22 +        oshelp.fcntl_setfd_cloexec(self.stream, True)
    4.23          
    4.24  
    4.25  def init(filename, level):
     5.1 --- a/tools/python/xen/xend/image.py	Thu Jun 12 16:55:09 2008 +0100
     5.2 +++ b/tools/python/xen/xend/image.py	Thu Jun 12 17:01:31 2008 +0100
     5.3 @@ -22,6 +22,12 @@ import re
     5.4  import math
     5.5  import time
     5.6  import signal
     5.7 +import thread
     5.8 +import fcntl
     5.9 +import sys
    5.10 +import errno
    5.11 +import glob
    5.12 +import traceback
    5.13  
    5.14  import xen.lowlevel.xc
    5.15  from xen.xend.XendConstants import *
    5.16 @@ -32,11 +38,23 @@ from xen.xend.xenstore.xstransact import
    5.17  from xen.xend.xenstore.xswatch import xswatch
    5.18  from xen.xend import arch
    5.19  from xen.xend import XendOptions
    5.20 +from xen.util import oshelp
    5.21 +from xen.util import utils
    5.22  
    5.23  xc = xen.lowlevel.xc.xc()
    5.24  
    5.25  MAX_GUEST_CMDLINE = 1024
    5.26  
    5.27 +sentinel_path_prefix = '/var/run/xend/dm-'
    5.28 +sentinel_fifos_inuse = { }
    5.29 +
    5.30 +def cleanup_stale_sentinel_fifos():
    5.31 +    for path in glob.glob(sentinel_path_prefix + '*.fifo'):
    5.32 +        if path in sentinel_fifos_inuse: continue
    5.33 +        try: os.unlink(path)
    5.34 +        except OSError, e:
    5.35 +            log.warning('could not delete stale fifo %s: %s',
    5.36 +                path, utils.exception_string(e))
    5.37  
    5.38  def create(vm, vmConfig):
    5.39      """Create an image handler for a vm.
    5.40 @@ -324,6 +342,13 @@ class ImageHandler:
    5.41          args = args + self.dmargs
    5.42          return args
    5.43  
    5.44 +    def _openSentinel(self, sentinel_path_fifo):
    5.45 +        self.sentinel_fifo = file(sentinel_path_fifo, 'r')
    5.46 +        self.sentinel_lock = thread.allocate_lock()
    5.47 +        oshelp.fcntl_setfd_cloexec(self.sentinel_fifo, True)
    5.48 +        sentinel_fifos_inuse[sentinel_path_fifo] = 1
    5.49 +        self.sentinel_path_fifo = sentinel_path_fifo
    5.50 +
    5.51      def createDeviceModel(self, restore = False):
    5.52          if self.device_model is None:
    5.53              return
    5.54 @@ -339,21 +364,29 @@ class ImageHandler:
    5.55              env['XAUTHORITY'] = self.xauthority
    5.56          if self.vncconsole:
    5.57              args = args + ([ "-vncviewer" ])
    5.58 +        unique_id = "%i-%i" % (self.vm.getDomid(), time.time())
    5.59 +        sentinel_path = sentinel_path_prefix + unique_id
    5.60 +        sentinel_path_fifo = sentinel_path + '.fifo'
    5.61 +        os.mkfifo(sentinel_path_fifo, 0600)
    5.62 +        sentinel_write = file(sentinel_path_fifo, 'r+')
    5.63 +        self._openSentinel(sentinel_path_fifo)
    5.64 +        self.vm.storeDom("image/device-model-fifo", sentinel_path_fifo)
    5.65          xstransact.Mkdir("/local/domain/0/device-model/%i" % self.vm.getDomid())
    5.66          xstransact.SetPermissions("/local/domain/0/device-model/%i" % self.vm.getDomid(),
    5.67                          { 'dom': self.vm.getDomid(), 'read': True, 'write': True })
    5.68          log.info("spawning device models: %s %s", self.device_model, args)
    5.69          # keep track of pid and spawned options to kill it later
    5.70  
    5.71 -        logfile = "/var/log/xen/qemu-dm-%s.log" %  str(self.vm.info['name_label'])
    5.72 -        if os.path.exists(logfile):
    5.73 -            if os.path.exists(logfile + ".1"):
    5.74 -                os.unlink(logfile + ".1")
    5.75 -            os.rename(logfile, logfile + ".1")
    5.76 +        self.logfile = "/var/log/xen/qemu-dm-%s.log" %  str(self.vm.info['name_label'])
    5.77 +        if os.path.exists(self.logfile):
    5.78 +            if os.path.exists(self.logfile + ".1"):
    5.79 +                os.unlink(self.logfile + ".1")
    5.80 +            os.rename(self.logfile, self.logfile + ".1")
    5.81  
    5.82          null = os.open("/dev/null", os.O_RDONLY)
    5.83 -        logfd = os.open(logfile, os.O_WRONLY|os.O_CREAT|os.O_TRUNC)
    5.84 +        logfd = os.open(self.logfile, os.O_WRONLY|os.O_CREAT|os.O_TRUNC|os.O_APPEND)
    5.85          
    5.86 +        sys.stderr.flush()
    5.87          pid = os.fork()
    5.88          if pid == 0: #child
    5.89              try:
    5.90 @@ -362,18 +395,26 @@ class ImageHandler:
    5.91                  os.dup2(logfd, 2)
    5.92                  os.close(null)
    5.93                  os.close(logfd)
    5.94 +                self.sentinel_fifo.close()
    5.95                  try:
    5.96                      os.execve(self.device_model, args, env)
    5.97 -                except:
    5.98 -                    os._exit(127)
    5.99 +                except Exception, e:
   5.100 +                    print >>sys.stderr, (
   5.101 +                        'failed to set up fds or execute dm %s: %s' %
   5.102 +                        (self.device_model, utils.exception_string(e)))
   5.103 +                    os._exit(126)
   5.104              except:
   5.105                  os._exit(127)
   5.106          else:
   5.107              self.pid = pid
   5.108              os.close(null)
   5.109              os.close(logfd)
   5.110 +        sentinel_write.close()
   5.111          self.vm.storeDom("image/device-model-pid", self.pid)
   5.112          log.info("device model pid: %d", self.pid)
   5.113 +        # we would very much prefer not to have a thread here and instead
   5.114 +        #  have a callback but sadly we don't have Twisted in xend
   5.115 +        self.sentinel_thread = thread.start_new_thread(self._sentinel_watch,())
   5.116  
   5.117      def signalDeviceModel(self, cmd, ret, par = None):
   5.118          if self.device_model is None:
   5.119 @@ -419,46 +460,118 @@ class ImageHandler:
   5.120          xstransact.Store("/local/domain/0/device-model/%i"
   5.121                           % self.vm.getDomid(), ('command', 'continue'))
   5.122  
   5.123 +    def _dmfailed(self, message):
   5.124 +        log.warning("domain %s: %s", self.vm.getName(), message)
   5.125 +        # ideally we would like to forcibly crash the domain with
   5.126 +        # something like
   5.127 +        #    xc.domain_shutdown(self.vm.getDomid(), DOMAIN_CRASH)
   5.128 +        # but this can easily lead to very rapid restart loops against
   5.129 +        # which we currently have no protection
   5.130 +
   5.131      def recreate(self):
   5.132          if self.device_model is None:
   5.133              return
   5.134 -        self.pid = self.vm.gatherDom(('image/device-model-pid', int))
   5.135 +        name = self.vm.getName()
   5.136 +        sentinel_path_fifo = self.vm.readDom('image/device-model-fifo')
   5.137 +        fifo_fd = -1
   5.138 +        log.debug("rediscovering %s", sentinel_path_fifo)
   5.139 +        if sentinel_path_fifo is None:
   5.140 +            log.debug("%s device model no sentinel, cannot rediscover", name)
   5.141 +        else:
   5.142 +            try:
   5.143 +                # We open it O_WRONLY because that fails ENXIO if no-one
   5.144 +                # has it open for reading (see SuSv3).  The dm process got
   5.145 +                # a read/write descriptor from our earlier invocation.
   5.146 +                fifo_fd = os.open(sentinel_path_fifo, os.O_WRONLY|os.O_NONBLOCK)
   5.147 +            except OSError, e:
   5.148 +                if e.errno == errno.ENXIO:
   5.149 +                    self._dmfailed("%s device model no longer running"%name)
   5.150 +                elif e.errno == errno.ENOENT:
   5.151 +                    log.debug("%s device model sentinel %s absent!",
   5.152 +                            name, sentinel_path_fifo)
   5.153 +                else:
   5.154 +                    raise
   5.155 +        if fifo_fd >= 0:
   5.156 +            self._openSentinel(sentinel_path_fifo)
   5.157 +            os.close(fifo_fd)
   5.158 +            self.pid = self.vm.gatherDom(('image/device-model-pid', int))
   5.159 +            log.debug("%s device model rediscovered, pid %s sentinel fifo %s",
   5.160 +                    name, self.pid, sentinel_path_fifo)
   5.161 +            self.sentinel_thread = thread.start_new_thread(self._sentinel_watch,())
   5.162 +
   5.163 +    def _sentinel_watch(self):
   5.164 +        log.info("waiting for sentinel_fifo")
   5.165 +        try: self.sentinel_fifo.read(1)
   5.166 +        except OSError, e: pass
   5.167 +        self.sentinel_lock.acquire()
   5.168 +        try:
   5.169 +            if self.pid:
   5.170 +                (p,st) = os.waitpid(self.pid, os.WNOHANG)
   5.171 +                if p == self.pid:
   5.172 +                    message = oshelp.waitstatus_description(st)
   5.173 +                else:
   5.174 +                    # obviously it is malfunctioning, kill it now
   5.175 +                    try:
   5.176 +                        os.kill(self.pid, signal.SIGKILL)
   5.177 +                        message = "malfunctioning (closed sentinel), killed"
   5.178 +                    except:
   5.179 +                        message = "malfunctioning or died ?"
   5.180 +                message = "pid %d: %s" % (self.pid, message)
   5.181 +            else:
   5.182 +                message = "no longer running"
   5.183 +        except Exception, e:
   5.184 +            message = "waitpid failed: %s" % utils.exception_string(e)
   5.185 +        message = "device model failure: %s" % message
   5.186 +        try: message += "; see %s " % self.logfile
   5.187 +        except: pass
   5.188 +        self._dmfailed(message)
   5.189 +        self.pid = None
   5.190 +        self.sentinel_lock.release()
   5.191  
   5.192      def destroyDeviceModel(self):
   5.193          if self.device_model is None:
   5.194              return
   5.195          if self.pid:
   5.196 -            try:
   5.197 -                os.kill(self.pid, signal.SIGHUP)
   5.198 -            except OSError, exn:
   5.199 -                log.exception(exn)
   5.200 +            self.sentinel_lock.acquire()
   5.201              try:
   5.202 -                # Try to reap the child every 100ms for 10s. Then SIGKILL it.
   5.203 -                for i in xrange(100):
   5.204 -                    (p, rv) = os.waitpid(self.pid, os.WNOHANG)
   5.205 -                    if p == self.pid:
   5.206 -                        break
   5.207 -                    time.sleep(0.1)
   5.208 -                else:
   5.209 -                    log.warning("DeviceModel %d took more than 10s "
   5.210 -                                "to terminate: sending SIGKILL" % self.pid)
   5.211 +                try:
   5.212 +                    os.kill(self.pid, signal.SIGHUP)
   5.213 +                except OSError, exn:
   5.214 +                    log.exception(exn)
   5.215 +                try:
   5.216 +                    # Try to reap the child every 100ms for 10s. Then SIGKILL it.
   5.217 +                    for i in xrange(100):
   5.218 +                        (p, rv) = os.waitpid(self.pid, os.WNOHANG)
   5.219 +                        if p == self.pid:
   5.220 +                            break
   5.221 +                        time.sleep(0.1)
   5.222 +                    else:
   5.223 +                        log.warning("DeviceModel %d took more than 10s "
   5.224 +                                    "to terminate: sending SIGKILL" % self.pid)
   5.225 +                        os.kill(self.pid, signal.SIGKILL)
   5.226 +                        os.waitpid(self.pid, 0)
   5.227 +                except OSError, exn:
   5.228 +                    # This is expected if Xend has been restarted within the
   5.229 +                    # life of this domain.  In this case, we can kill the process,
   5.230 +                    # but we can't wait for it because it's not our child.
   5.231 +                    # We just make really sure it's going away (SIGKILL) first.
   5.232                      os.kill(self.pid, signal.SIGKILL)
   5.233 -                    os.waitpid(self.pid, 0)
   5.234 -            except OSError, exn:
   5.235 -                # This is expected if Xend has been restarted within the
   5.236 -                # life of this domain.  In this case, we can kill the process,
   5.237 -                # but we can't wait for it because it's not our child.
   5.238 -                # We just make really sure it's going away (SIGKILL) first.
   5.239 -                os.kill(self.pid, signal.SIGKILL)
   5.240 -            self.pid = None
   5.241 -            state = xstransact.Remove("/local/domain/0/device-model/%i"
   5.242 -                                      % self.vm.getDomid())
   5.243 +                state = xstransact.Remove("/local/domain/0/device-model/%i"
   5.244 +                                          % self.vm.getDomid())
   5.245 +            finally:
   5.246 +                self.pid = None
   5.247 +                self.sentinel_lock.release()
   5.248              
   5.249              try:
   5.250                  os.unlink('/var/run/tap/qemu-read-%d' % self.vm.getDomid())
   5.251                  os.unlink('/var/run/tap/qemu-write-%d' % self.vm.getDomid())
   5.252              except:
   5.253                  pass
   5.254 +            try:
   5.255 +                del sentinel_fifos_inuse[self.sentinel_path_fifo]
   5.256 +                os.unlink(self.sentinel_path_fifo)
   5.257 +            except:
   5.258 +                pass
   5.259  
   5.260      def setCpuid(self):
   5.261          xc.domain_set_policy_cpuid(self.vm.getDomid())