direct-io.hg

changeset 15164:d93e560c1d50

Do not destroy domains that timeout when shutting down.

Instead of violently destroying a domain that is not responding to a
shutdown request, rename the domain to indicate the problem and leave
it alone; this allows the admin to make corrective actions (which may
or may not include destroying the domain).

Signed-off-by: John Levon <john.levon@sun.com>
author kfraser@localhost.localdomain
date Fri May 25 10:15:37 2007 +0100 (2007-05-25)
parents 16e376ed5638
children 4730ec3d5ab3
files tools/python/xen/xend/XendDomainInfo.py
line diff
     1.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Thu May 24 15:45:43 2007 +0100
     1.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri May 25 10:15:37 2007 +0100
     1.3 @@ -362,6 +362,7 @@ class XendDomainInfo:
     1.4          self.vmWatch = None
     1.5          self.shutdownWatch = None
     1.6          self.shutdownStartTime = None
     1.7 +        self.unresponsive = False
     1.8          self._resume = resume
     1.9  
    1.10          self.state_updated = threading.Condition()
    1.11 @@ -1128,21 +1129,25 @@ class XendDomainInfo:
    1.12                  # failed.  Ignore this domain.
    1.13                  pass
    1.14              else:
    1.15 -                # Domain is alive.  If we are shutting it down, then check
    1.16 -                # the timeout on that, and destroy it if necessary.
    1.17 +                # Domain is alive.
    1.18                  if xeninfo['paused']:
    1.19                      self._stateSet(DOM_STATE_PAUSED)
    1.20                  else:
    1.21                      self._stateSet(DOM_STATE_RUNNING)
    1.22                      
    1.23 -                if self.shutdownStartTime:
    1.24 +                if self.shutdownStartTime and not self.unresponsive:
    1.25                      timeout = (SHUTDOWN_TIMEOUT - time.time() +
    1.26                                 self.shutdownStartTime)
    1.27                      if timeout < 0:
    1.28 +                        # The domain is not responding to shutdown requests.
    1.29 +                        # Log a message, and rename the domain to indicate the
    1.30 +                        # state; we keep the domain running, however, to
    1.31 +                        # allow corrective action.
    1.32                          log.info(
    1.33                              "Domain shutdown timeout expired: name=%s id=%s",
    1.34                              self.info['name_label'], self.domid)
    1.35 -                        self.destroy()
    1.36 +                        self.setName('unresponsive-' + self.getName())
    1.37 +                        self.unresponsive = True
    1.38          finally:
    1.39              self.refresh_shutdown_lock.release()
    1.40  
    1.41 @@ -1431,6 +1436,7 @@ class XendDomainInfo:
    1.42          log.debug('XendDomainInfo.constructDomain')
    1.43  
    1.44          self.shutdownStartTime = None
    1.45 +        self.unresponsive = False
    1.46  
    1.47          hvm = self.info.is_hvm()
    1.48          if hvm: