ia64/xen-unstable

changeset 8305:1aaa1abab953

Add one more fork to the Xend start-up process, this time with the parent
staying alive to watch the child. If the child crashes, then the parent
starts another one in its place. This provides robustness in the face of
crashes like one currently tracked as bug #411.

Tidy up chunks of SrvDaemon.py on the way past.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@leeni.uk.xensource.com
date Fri Dec 09 10:49:29 2005 +0000 (2005-12-09)
parents efc71a3e9f6f
children a12bd9e8fdf8
files tools/python/xen/xend/server/SrvDaemon.py tools/python/xen/xend/server/SrvServer.py
line diff
     1.1 --- a/tools/python/xen/xend/server/SrvDaemon.py	Thu Dec 08 16:17:53 2005 +0000
     1.2 +++ b/tools/python/xen/xend/server/SrvDaemon.py	Fri Dec 09 10:49:29 2005 +0000
     1.3 @@ -23,73 +23,37 @@ import relocate
     1.4  from params import *
     1.5  
     1.6  
     1.7 +XEND_PROCESS_NAME = 'xend'
     1.8 +
     1.9 +
    1.10  class Daemon:
    1.11      """The xend daemon.
    1.12      """
    1.13      def __init__(self):
    1.14 -        self.shutdown = 0
    1.15 -        self.traceon = 0
    1.16 +        self.traceon = False
    1.17          self.tracefile = None
    1.18          self.traceindent = 0
    1.19          self.child = 0 
    1.20 -        
    1.21 -    def read_pid(self, pidfile):
    1.22 -        """Read process id from a file.
    1.23 -
    1.24 -        @param pidfile: file to read
    1.25 -        @return pid or 0
    1.26 -        """
    1.27 -        if os.path.isfile(pidfile) and os.path.getsize(pidfile):
    1.28 -            try:
    1.29 -                f = open(pidfile, 'r')
    1.30 -                try:
    1.31 -                    return int(f.read())
    1.32 -                finally:
    1.33 -                    f.close()
    1.34 -            except:
    1.35 -                return 0
    1.36 -        else:
    1.37 -            return 0
    1.38  
    1.39 -    def find_process(self, pid, name):
    1.40 -        """Search for a process.
    1.41  
    1.42 -        @param pid: process id
    1.43 -        @param name: process name
    1.44 -        @return: pid if found, 0 otherwise
    1.45 -        """
    1.46 -        running = 0
    1.47 -        if pid:
    1.48 -            lines = os.popen('ps %d 2>/dev/null' % pid).readlines()
    1.49 -            exp = '^ *%d.+%s' % (pid, name)
    1.50 -            for line in lines:
    1.51 -                if re.search(exp, line):
    1.52 -                    running = pid
    1.53 -                    break
    1.54 -        return running
    1.55 -
    1.56 -    def cleanup_process(self, pidfile, name, kill):
    1.57 -        """Clean up the pidfile for a process.
    1.58 +    def cleanup_xend(self, kill):
    1.59 +        """Clean up the Xend pidfile.
    1.60          If a running process is found, kills it if 'kill' is true.
    1.61  
    1.62 -        @param pidfile: pid file
    1.63 -        @param name: process name
    1.64          @param kill: whether to kill the process
    1.65          @return running process id or 0
    1.66          """
    1.67          running = 0
    1.68 -        pid = self.read_pid(pidfile)
    1.69 -        if self.find_process(pid, name):
    1.70 +        pid = read_pid(XEND_PID_FILE)
    1.71 +        if find_process(pid, XEND_PROCESS_NAME):
    1.72              if kill:
    1.73 -                os.kill(pid, 1)
    1.74 +                os.kill(pid, signal.SIGTERM)
    1.75              else:
    1.76                  running = pid
    1.77 -        if running == 0 and os.path.isfile(pidfile):
    1.78 -            os.remove(pidfile)
    1.79 +        if running == 0 and os.path.isfile(XEND_PID_FILE):
    1.80 +            os.remove(XEND_PID_FILE)
    1.81          return running
    1.82  
    1.83 -    def cleanup_xend(self, kill):
    1.84 -        return self.cleanup_process(XEND_PID_FILE, "xend", kill)
    1.85  
    1.86      def status(self):
    1.87          """Returns the status of the xend daemon.
    1.88 @@ -97,15 +61,15 @@ class Daemon:
    1.89          0  Running
    1.90          3  Not running
    1.91          """
    1.92 -        if self.cleanup_process(XEND_PID_FILE, "xend", False) == 0:
    1.93 +        if self.cleanup_xend(False) == 0:
    1.94              return 3
    1.95          else:
    1.96              return 0
    1.97  
    1.98 -    def fork_pid(self, pidfile):
    1.99 -        """Fork and write the pid of the child to 'pidfile'.
   1.100  
   1.101 -        @param pidfile: pid file
   1.102 +    def fork_pid(self):
   1.103 +        """Fork and write the pid of the child to XEND_PID_FILE.
   1.104 +
   1.105          @return: pid of child in parent, 0 in child
   1.106          """
   1.107  
   1.108 @@ -113,7 +77,7 @@ class Daemon:
   1.109  
   1.110          if self.child:
   1.111              # Parent
   1.112 -            pidfile = open(pidfile, 'w')
   1.113 +            pidfile = open(XEND_PID_FILE, 'w')
   1.114              try:
   1.115                  pidfile.write(str(self.child))
   1.116              finally:
   1.117 @@ -121,6 +85,7 @@ class Daemon:
   1.118  
   1.119          return self.child
   1.120  
   1.121 +
   1.122      def daemonize(self):
   1.123          if not XEND_DAEMONIZE: return
   1.124   
   1.125 @@ -132,9 +97,9 @@ class Daemon:
   1.126          # Fork, this allows the group leader to exit,
   1.127          # which means the child can never again regain control of the
   1.128          # terminal
   1.129 -        if self.fork_pid(XEND_PID_FILE):
   1.130 -            self.exit()
   1.131 - 
   1.132 +        if os.fork():
   1.133 +            os._exit(0)
   1.134 +
   1.135          # Detach from standard file descriptors, and redirect them to
   1.136          # /dev/null or the log as appropriate.
   1.137          os.close(0)
   1.138 @@ -189,7 +154,41 @@ class Daemon:
   1.139              # Child
   1.140              self.daemonize()
   1.141              self.tracing(trace)
   1.142 -            self.run(os.fdopen(w, 'w'))
   1.143 +
   1.144 +            # If Xend proper segfaults, then we want to restart it.  Thus,
   1.145 +            # we fork a child for running Xend itself, and if it segfaults
   1.146 +            # (or exits any way other than cleanly) then we run it again.
   1.147 +            # The first time through we want the server to write to the (r,w)
   1.148 +            # pipe created above, so that we do not exit until the server is
   1.149 +            # ready to receive requests.  All subsequent restarts we don't
   1.150 +            # want this behaviour, or the pipe will eventually fill up, so
   1.151 +            # we just pass None into run in subsequent cases (by clearing w
   1.152 +            # in the parent of the first fork).
   1.153 +            while True:
   1.154 +                pid = self.fork_pid()
   1.155 +                if pid:
   1.156 +                    os.close(w)
   1.157 +                    w = False
   1.158 +
   1.159 +                    (_, status) = os.waitpid(pid, 0)
   1.160 +
   1.161 +                    if os.WIFEXITED(status):
   1.162 +                        code = os.WEXITSTATUS(status)
   1.163 +                        log.info('Xend exited with status %d.', code)
   1.164 +                        sys.exit(code)
   1.165 +
   1.166 +                    if os.WIFSIGNALED(status):
   1.167 +                        sig = os.WTERMSIG(status)
   1.168 +
   1.169 +                        if sig in (signal.SIGINT, signal.SIGTERM):
   1.170 +                            log.info('Xend stopped due to signal %d.', sig)
   1.171 +                            sys.exit(0)
   1.172 +                        else:
   1.173 +                            log.fatal(
   1.174 +                                'Xend died due to signal %d!  Restarting it.',
   1.175 +                                sig)
   1.176 +                else:
   1.177 +                    self.run(w and os.fdopen(w, 'w') or None)
   1.178  
   1.179          return ret
   1.180  
   1.181 @@ -290,18 +289,11 @@ class Daemon:
   1.182              if XEND_DEBUG:
   1.183                  traceback.print_exc()
   1.184              log.exception("Exception starting xend (%s)" % ex)
   1.185 -            status.write('1')
   1.186 -            status.close()
   1.187 -            self.exit(1)
   1.188 +            if status:
   1.189 +                status.write('1')
   1.190 +                status.close()
   1.191 +            sys.exit(1)
   1.192              
   1.193 -    def exit(self, rc=0):
   1.194 -        # Calling sys.exit() raises a SystemExit exception, which only
   1.195 -        # kills the current thread. Calling os._exit() makes the whole
   1.196 -        # Python process exit immediately. There doesn't seem to be another
   1.197 -        # way to exit a Python with running threads.
   1.198 -        #sys.exit(rc)
   1.199 -        os._exit(rc)
   1.200 -
   1.201  def instance():
   1.202      global inst
   1.203      try:
   1.204 @@ -311,10 +303,47 @@ def instance():
   1.205      return inst
   1.206  
   1.207  
   1.208 +def read_pid(pidfile):
   1.209 +    """Read process id from a file.
   1.210 +
   1.211 +    @param pidfile: file to read
   1.212 +    @return pid or 0
   1.213 +    """
   1.214 +    if os.path.isfile(pidfile) and os.path.getsize(pidfile):
   1.215 +        try:
   1.216 +            f = open(pidfile, 'r')
   1.217 +            try:
   1.218 +                return int(f.read())
   1.219 +            finally:
   1.220 +                f.close()
   1.221 +        except:
   1.222 +            return 0
   1.223 +    else:
   1.224 +        return 0
   1.225 +
   1.226 +
   1.227 +def find_process(pid, name):
   1.228 +    """Search for a process.
   1.229 +
   1.230 +    @param pid: process id
   1.231 +    @param name: process name
   1.232 +    @return: pid if found, 0 otherwise
   1.233 +    """
   1.234 +    running = 0
   1.235 +    if pid:
   1.236 +        lines = os.popen('ps %d 2>/dev/null' % pid).readlines()
   1.237 +        exp = '^ *%d.+%s' % (pid, name)
   1.238 +        for line in lines:
   1.239 +            if re.search(exp, line):
   1.240 +                running = pid
   1.241 +                break
   1.242 +    return running
   1.243 +
   1.244 +
   1.245  def main(argv = None):
   1.246      global XEND_DAEMONIZE
   1.247      
   1.248 -    XEND_DAEMONIZE = 0
   1.249 +    XEND_DAEMONIZE = False
   1.250      if argv is None:
   1.251          argv = sys.argv
   1.252  
     2.1 --- a/tools/python/xen/xend/server/SrvServer.py	Thu Dec 08 16:17:53 2005 +0000
     2.2 +++ b/tools/python/xen/xend/server/SrvServer.py	Fri Dec 09 10:49:29 2005 +0000
     2.3 @@ -68,7 +68,8 @@ class XendServers:
     2.4          # Running the network script will spawn another process, which takes
     2.5          # the status fd with it unless we set FD_CLOEXEC.  Failing to do this
     2.6          # causes the read in SrvDaemon to hang even when we have written here.
     2.7 -        fcntl.fcntl(status, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
     2.8 +        if status:
     2.9 +            fcntl.fcntl(status, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
    2.10          
    2.11          Vifctl.network('start')
    2.12          threads = []
    2.13 @@ -93,8 +94,9 @@ class XendServers:
    2.14              if threads_left:
    2.15                  time.sleep(.5)
    2.16  
    2.17 -        status.write('0')
    2.18 -        status.close()
    2.19 +        if status:
    2.20 +            status.write('0')
    2.21 +            status.close()
    2.22  
    2.23          for t in threads:
    2.24              t.join()