ia64/xen-unstable

changeset 9130:c8c0f99193d4

Change the way that reboot-timeouts are handled. Rather than refreshing the
list of domains each time XendDomainInfo.refresh is called, and spawning
threads for each shutdown still in progress, we watch the control/shutdown
node and spawn just the one thread immediately. When the daemon starts (say
after a crash) all domains are checked for entries in the store indicating a
reboot in progress.

This avoids a large cost on the store and in threads when the list of domains
is refreshed. This happens every time the server is queried, so the cost can
be quite high. It's especially a high cost when using xm shutdown -a -w --
this operation should perform a lot better now.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@leeni.uk.xensource.com
date Fri Mar 03 15:41:56 2006 +0100 (2006-03-03)
parents 2274f293af41
children 9e19dcb05f56
files tools/python/xen/xend/XendDomainInfo.py
line diff
     1.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Fri Mar 03 15:37:28 2006 +0100
     1.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri Mar 03 15:41:56 2006 +0100
     1.3 @@ -13,7 +13,7 @@
     1.4  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     1.5  #============================================================================
     1.6  # Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
     1.7 -# Copyright (C) 2005 XenSource Ltd
     1.8 +# Copyright (C) 2005, 2006 XenSource Ltd
     1.9  #============================================================================
    1.10  
    1.11  """Representation of a single domain.
    1.12 @@ -82,7 +82,7 @@ restart_modes = [
    1.13  STATE_DOM_OK       = 1
    1.14  STATE_DOM_SHUTDOWN = 2
    1.15  
    1.16 -SHUTDOWN_TIMEOUT = 30
    1.17 +SHUTDOWN_TIMEOUT = 30.0
    1.18  
    1.19  ZOMBIE_PREFIX = 'Zombie-'
    1.20  
    1.21 @@ -182,7 +182,7 @@ def create(config):
    1.22          vm.initDomain()
    1.23          vm.storeVmDetails()
    1.24          vm.storeDomDetails()
    1.25 -        vm.registerWatch()
    1.26 +        vm.registerWatches()
    1.27          vm.refreshShutdown()
    1.28          return vm
    1.29      except:
    1.30 @@ -238,7 +238,7 @@ def recreate(xeninfo, priv):
    1.31          vm.storeVmDetails()
    1.32          vm.storeDomDetails()
    1.33  
    1.34 -    vm.registerWatch()
    1.35 +    vm.registerWatches()
    1.36      vm.refreshShutdown(xeninfo)
    1.37      return vm
    1.38  
    1.39 @@ -443,7 +443,10 @@ class XendDomainInfo:
    1.40          self.console_mfn = None
    1.41  
    1.42          self.vmWatch = None
    1.43 +        self.shutdownWatch = None
    1.44  
    1.45 +        self.shutdownStartTime = None
    1.46 +        
    1.47          self.state = STATE_DOM_OK
    1.48          self.state_updated = threading.Condition()
    1.49          self.refresh_shutdown_lock = threading.Condition()
    1.50 @@ -648,7 +651,7 @@ class XendDomainInfo:
    1.51  
    1.52          self.introduceDomain()
    1.53          self.storeDomDetails()
    1.54 -        self.registerWatch()
    1.55 +        self.registerWatches()
    1.56          self.refreshShutdown()
    1.57  
    1.58          log.debug("XendDomainInfo.completeRestore done")
    1.59 @@ -711,13 +714,15 @@ class XendDomainInfo:
    1.60  
    1.61      ## public:
    1.62  
    1.63 -    def registerWatch(self):
    1.64 -        """Register a watch on this VM's entries in the store, so that
    1.65 -        when they are changed externally, we keep up to date.  This should
    1.66 -        only be called by {@link #create}, {@link #recreate}, or {@link
    1.67 -        #restore}, once the domain's details have been written, but before the
    1.68 -        new instance is returned."""
    1.69 +    def registerWatches(self):
    1.70 +        """Register a watch on this VM's entries in the store, and the
    1.71 +        domain's control/shutdown node, so that when they are changed
    1.72 +        externally, we keep up to date.  This should only be called by {@link
    1.73 +        #create}, {@link #recreate}, or {@link #restore}, once the domain's
    1.74 +        details have been written, but before the new instance is returned."""
    1.75          self.vmWatch = xswatch(self.vmpath, self.storeChanged)
    1.76 +        self.shutdownWatch = xswatch(self.dompath + '/control/shutdown',
    1.77 +                                     self.handleShutdownWatch)
    1.78  
    1.79  
    1.80      def getDomid(self):
    1.81 @@ -852,20 +857,14 @@ class XendDomainInfo:
    1.82                  # Domain is alive.  If we are shutting it down, then check
    1.83                  # the timeout on that, and destroy it if necessary.
    1.84  
    1.85 -                sst = self.readDom('xend/shutdown_start_time')
    1.86 -                if sst:
    1.87 -                    sst = float(sst)
    1.88 -                    timeout = SHUTDOWN_TIMEOUT - time.time() + sst
    1.89 +                if self.shutdownStartTime:
    1.90 +                    timeout = (SHUTDOWN_TIMEOUT - time.time() +
    1.91 +                               self.shutdownStartTime)
    1.92                      if timeout < 0:
    1.93                          log.info(
    1.94                              "Domain shutdown timeout expired: name=%s id=%s",
    1.95                              self.info['name'], self.domid)
    1.96                          self.destroy()
    1.97 -                    else:
    1.98 -                        log.debug(
    1.99 -                            "Scheduling refreshShutdown on domain %d in %ds.",
   1.100 -                            self.domid, timeout)
   1.101 -                        threading.Timer(timeout, self.refreshShutdown).start()
   1.102          finally:
   1.103              self.refresh_shutdown_lock.release()
   1.104  
   1.105 @@ -873,12 +872,34 @@ class XendDomainInfo:
   1.106              self.maybeRestart(restart_reason)
   1.107  
   1.108  
   1.109 +    def handleShutdownWatch(self, _):
   1.110 +        log.debug('XendDomainInfo.handleShutdownWatch')
   1.111 +        
   1.112 +        reason = self.readDom('control/shutdown')
   1.113 +
   1.114 +        if reason and reason != 'suspend':
   1.115 +            sst = self.readDom('xend/shutdown_start_time')
   1.116 +            now = time.time()
   1.117 +            if sst:
   1.118 +                self.shutdownStartTime = float(sst)
   1.119 +                timeout = float(sst) + SHUTDOWN_TIMEOUT - now
   1.120 +            else:
   1.121 +                self.shutdownStartTime = now
   1.122 +                self.storeDom('xend/shutdown_start_time', now)
   1.123 +                timeout = SHUTDOWN_TIMEOUT
   1.124 +
   1.125 +            log.trace(
   1.126 +                "Scheduling refreshShutdown on domain %d in %ds.",
   1.127 +                self.domid, timeout)
   1.128 +            threading.Timer(timeout, self.refreshShutdown).start()
   1.129 +
   1.130 +        return 1
   1.131 +
   1.132 +
   1.133      def shutdown(self, reason):
   1.134          if not reason in shutdown_reasons.values():
   1.135              raise XendError('Invalid reason: %s' % reason)
   1.136          self.storeDom("control/shutdown", reason)
   1.137 -        if reason != 'suspend':
   1.138 -            self.storeDom('xend/shutdown_start_time', time.time())
   1.139  
   1.140  
   1.141      ## private:
   1.142 @@ -1225,6 +1246,8 @@ class XendDomainInfo:
   1.143          """Cleanup domain resources; release devices.  Idempotent.  Nothrow
   1.144          guarantee."""
   1.145  
   1.146 +        self.unwatchShutdown()
   1.147 +
   1.148          self.release_devices()
   1.149  
   1.150          if self.image:
   1.151 @@ -1276,6 +1299,20 @@ class XendDomainInfo:
   1.152              log.exception("Unwatching VM path failed.")
   1.153  
   1.154  
   1.155 +    def unwatchShutdown(self):
   1.156 +        """Remove the watch on the domain's control/shutdown node, if any.
   1.157 +        Idempotent.  Nothrow guarantee."""
   1.158 +
   1.159 +        try:
   1.160 +            try:
   1.161 +                if self.shutdownWatch:
   1.162 +                    self.shutdownWatch.unwatch()
   1.163 +            finally:
   1.164 +                self.shutdownWatch = None
   1.165 +        except:
   1.166 +            log.exception("Unwatching control/shutdown failed.")
   1.167 +
   1.168 +
   1.169      ## public:
   1.170  
   1.171      def destroy(self):