ia64/xen-unstable

changeset 4104:c738e3f51a55

bitkeeper revision 1.1159.269.2 (4231ceccPlcoLKWWixfu3trU9tb-_A)

Improve error reporting for save/restore/migrate.

Signed-off-by: Mike Wray <mike.wray@hp.com>
author mjw@wray-m-3.hpl.hp.com
date Fri Mar 11 17:01:00 2005 +0000 (2005-03-11)
parents 552e3748f0e7
children 1e38b5603716
files tools/python/xen/xend/XendMigrate.py tools/python/xen/xend/server/SrvDomain.py
line diff
     1.1 --- a/tools/python/xen/xend/XendMigrate.py	Fri Mar 11 13:09:40 2005 +0000
     1.2 +++ b/tools/python/xen/xend/XendMigrate.py	Fri Mar 11 17:01:00 2005 +0000
     1.3 @@ -6,12 +6,14 @@ import errno
     1.4  import sys
     1.5  import socket
     1.6  import time
     1.7 +import types
     1.8  
     1.9  from twisted.internet import reactor
    1.10  from twisted.internet import defer
    1.11  #defer.Deferred.debug = 1
    1.12  from twisted.internet.protocol import Protocol
    1.13  from twisted.internet.protocol import ClientFactory
    1.14 +from twisted.python.failure import Failure
    1.15  
    1.16  import sxp
    1.17  import XendDB
    1.18 @@ -45,11 +47,9 @@ class Xfrd(Protocol):
    1.19          sxp.show(req, out=self.transport)
    1.20  
    1.21      def loseConnection(self):
    1.22 -        print 'Xfrd>loseConnection>'
    1.23          self.transport.loseConnection()
    1.24  
    1.25      def connectionLost(self, reason):
    1.26 -        print 'Xfrd>connectionLost>', reason
    1.27          self.xinfo.connectionLost(reason)
    1.28  
    1.29      def dataReceived(self, data):
    1.30 @@ -70,17 +70,15 @@ class XfrdClientFactory(ClientFactory):
    1.31          self.xinfo = xinfo
    1.32  
    1.33      def startedConnecting(self, connector):
    1.34 -        print 'Started to connect', 'self=', self, 'connector=', connector
    1.35 +        pass
    1.36  
    1.37      def buildProtocol(self, addr):
    1.38 -        print 'buildProtocol>', addr
    1.39          return Xfrd(self.xinfo)
    1.40  
    1.41      def clientConnectionLost(self, connector, reason):
    1.42 -        print 'clientConnectionLost>', 'connector=', connector, 'reason=', reason
    1.43 +        pass
    1.44  
    1.45      def clientConnectionFailed(self, connector, reason):
    1.46 -        print 'clientConnectionFailed>', 'connector=', connector, 'reason=', reason
    1.47          self.xinfo.error(reason)
    1.48  
    1.49  class XfrdInfo:
    1.50 @@ -90,7 +88,7 @@ class XfrdInfo:
    1.51  
    1.52      """Suspend timeout (seconds).
    1.53      We set a timeout because suspending a domain can hang."""
    1.54 -    timeout = 30
    1.55 +    timeout = 10
    1.56  
    1.57      def __init__(self):
    1.58          from xen.xend import XendDomain
    1.59 @@ -98,6 +96,9 @@ class XfrdInfo:
    1.60          self.deferred = defer.Deferred()
    1.61          self.suspended = {}
    1.62          self.paused = {}
    1.63 +        self.state = 'init'
    1.64 +        # List of errors encountered.
    1.65 +        self.errors = []
    1.66          
    1.67      def vmconfig(self):
    1.68          dominfo = self.xd.domain_get(self.src_dom)
    1.69 @@ -107,12 +108,38 @@ class XfrdInfo:
    1.70              val = None
    1.71          return val
    1.72  
    1.73 +    def add_error(self, err):
    1.74 +        """Add an error to the error list.
    1.75 +        Returns the error added (which may have been unwrapped if it
    1.76 +        was a Twisted Failure).
    1.77 +        """
    1.78 +        while isinstance(err, Failure):
    1.79 +            err = err.value
    1.80 +        if err not in self.errors:
    1.81 +            self.errors.append(err)
    1.82 +        return err
    1.83 +
    1.84 +    def error_summary(self, msg=None):
    1.85 +        """Get a XendError summarising the errors (if any).
    1.86 +        """
    1.87 +        if msg is None:
    1.88 +            msg = "errors"
    1.89 +        if self.errors:
    1.90 +            errmsg = msg + ': ' + ', '.join(map(str, self.errors))
    1.91 +        else:
    1.92 +            errmsg = msg
    1.93 +        return XendError(errmsg)
    1.94 +
    1.95 +    def get_errors(self):
    1.96 +        """Get the list of errors.
    1.97 +        """
    1.98 +        return self.errors
    1.99 +
   1.100      def error(self, err):
   1.101 -        print 'Error>', err
   1.102          self.state = 'error'
   1.103 +        self.add_error(err)
   1.104          if not self.deferred.called:
   1.105 -            print 'Error> calling errback'
   1.106 -            self.deferred.errback(err)
   1.107 +            self.deferred.errback(self.error_summary())
   1.108  
   1.109      def dispatch(self, xfrd, val):
   1.110          
   1.111 @@ -139,28 +166,23 @@ class XfrdInfo:
   1.112              cbok(val)
   1.113  
   1.114      def unknown(self, xfrd, val):
   1.115 -        print 'unknown>', val
   1.116          xfrd.loseConnection()
   1.117          return None
   1.118  
   1.119      def xfr_err(self, xfrd, val):
   1.120          # If we get an error with non-zero code the operation failed.
   1.121          # An error with code zero indicates hello success.
   1.122 -        print 'xfr_err>', val
   1.123          v = sxp.child0(val)
   1.124 -        print 'xfr_err>', type(v), v
   1.125          err = int(sxp.child0(val))
   1.126          if not err: return
   1.127 -        self.error(err);
   1.128 +        self.error("transfer daemon (xfrd) error: " + str(err))
   1.129          xfrd.loseConnection()
   1.130          return None
   1.131  
   1.132      def xfr_progress(self, xfrd, val):
   1.133 -        print 'xfr_progress>', val
   1.134          return None
   1.135  
   1.136      def xfr_vm_destroy(self, xfrd, val):
   1.137 -        print 'xfr_vm_destroy>', val
   1.138          try:
   1.139              vmid = sxp.child0(val)
   1.140              val = self.xd.domain_destroy(vmid)
   1.141 @@ -168,28 +190,32 @@ class XfrdInfo:
   1.142                  del self.paused[vmid]
   1.143              if vmid in self.suspended:
   1.144                  del self.suspended[vmid]
   1.145 -        except:
   1.146 +        except StandardError, err:
   1.147 +            self.add_error("vm_destroy failed")
   1.148 +            self.add_error(err)
   1.149              val = errno.EINVAL
   1.150          return ['xfr.err', val]
   1.151      
   1.152      def xfr_vm_pause(self, xfrd, val):
   1.153 -        print 'xfr_vm_pause>', val
   1.154          try:
   1.155              vmid = sxp.child0(val)
   1.156              val = self.xd.domain_pause(vmid)
   1.157              self.paused[vmid] = 1
   1.158 -        except:
   1.159 +        except StandardError, err:
   1.160 +            self.add_error("vm_pause failed")
   1.161 +            self.add_error(err)
   1.162              val = errno.EINVAL
   1.163          return ['xfr.err', val]
   1.164  
   1.165      def xfr_vm_unpause(self, xfrd, val):
   1.166 -        print 'xfr_vm_unpause>', val
   1.167          try:
   1.168              vmid = sxp.child0(val)
   1.169              val = self.xd.domain_unpause(vmid)
   1.170              if vmid in self.paused:
   1.171                  del self.paused[vmid]
   1.172 -        except:
   1.173 +        except StandardError, err:
   1.174 +            self.add_error("vm_unpause failed")
   1.175 +            self.add_error(err)
   1.176              val = errno.EINVAL
   1.177          return ['xfr.err', val]
   1.178  
   1.179 @@ -199,7 +225,6 @@ class XfrdInfo:
   1.180          Suspending can hang, so we set a timeout and fail if it
   1.181          takes too long.
   1.182          """
   1.183 -        print 'xfr_vm_suspend>', val
   1.184          try:
   1.185              vmid = sxp.child0(val)
   1.186              d = defer.Deferred()
   1.187 @@ -208,15 +233,15 @@ class XfrdInfo:
   1.188              # the domain died. Set a timeout and error handler so the subscriptions
   1.189              # will be cleaned up if suspending hangs or there is an error.
   1.190              def onSuspended(e, v):
   1.191 -                print 'xfr_vm_suspend>onSuspended>', e, v
   1.192                  if v[1] != vmid: return
   1.193                  subscribe(on=0)
   1.194 -                d.callback(v)
   1.195 +                if not d.called:
   1.196 +                    d.callback(v)
   1.197                  
   1.198              def onDied(e, v):
   1.199 -                print 'xfr_vm_suspend>onDied>', e, v
   1.200                  if v[1] != vmid: return
   1.201 -                d.errback(XendError('Domain died'))
   1.202 +                if not d.called:
   1.203 +                    d.errback(XendError('Domain %s died while suspending' % vmid))
   1.204                  
   1.205              def subscribe(on=1):
   1.206                  if on:
   1.207 @@ -227,24 +252,25 @@ class XfrdInfo:
   1.208                  action('xend.domain.died', onDied)
   1.209  
   1.210              def cberr(err):
   1.211 -                print 'xfr_vm_suspend>cberr>', err
   1.212                  subscribe(on=0)
   1.213 +                self.add_error("suspend failed")
   1.214 +                self.add_error(err)
   1.215                  return err
   1.216  
   1.217 +            d.addErrback(cberr)
   1.218 +            d.setTimeout(self.timeout)
   1.219              subscribe()
   1.220              val = self.xd.domain_shutdown(vmid, reason='suspend')
   1.221              self.suspended[vmid] = 1
   1.222 -            d.addErrback(cberr)
   1.223 -            d.setTimeout(self.timeout)
   1.224              return d
   1.225          except Exception, err:
   1.226 -            print 'xfr_vm_suspend> Exception', err
   1.227 +            self.add_error("suspend failed")
   1.228 +            self.add_error(err)
   1.229              traceback.print_exc()
   1.230              val = errno.EINVAL
   1.231          return ['xfr.err', val]
   1.232  
   1.233      def connectionLost(self, reason=None):
   1.234 -        print 'XfrdInfo>connectionLost>', reason
   1.235          for vmid in self.suspended:
   1.236              try:
   1.237                  self.xd.domain_destroy(vmid)
   1.238 @@ -279,7 +305,7 @@ class XendMigrateInfo(XfrdInfo):
   1.239                  ['id',    self.xid   ],
   1.240                  ['state', self.state ],
   1.241                  ['live',  self.live  ],
   1.242 -                ['resource', self.resource] ]
   1.243 +                ['resource', self.resource ] ]
   1.244          sxpr_src = ['src', ['host', self.src_host], ['domain', self.src_dom] ]
   1.245          sxpr.append(sxpr_src)
   1.246          sxpr_dst = ['dst', ['host', self.dst_host] ]
   1.247 @@ -291,12 +317,12 @@ class XendMigrateInfo(XfrdInfo):
   1.248      def request(self, xfrd):
   1.249          vmconfig = self.vmconfig()
   1.250          if not vmconfig:
   1.251 +            self.error(XendError("vm config not found"))
   1.252              xfrd.loseConnection()
   1.253              return
   1.254 -        log.info('Migrate BEGIN: ' + str(self.sxpr()))
   1.255 +        log.info('Migrate BEGIN: %s' % str(self.sxpr()))
   1.256          eserver.inject('xend.domain.migrate',
   1.257 -                       [ self.dominfo.name, self.dominfo.id,
   1.258 -                         "begin", self.sxpr() ])
   1.259 +                       [ self.dominfo.name, self.dominfo.id, "begin", self.sxpr() ])
   1.260          xfrd.request(['xfr.migrate',
   1.261                        self.src_dom,
   1.262                        vmconfig,
   1.263 @@ -305,19 +331,6 @@ class XendMigrateInfo(XfrdInfo):
   1.264                        self.live,
   1.265                        self.resource ])
   1.266          
   1.267 -##     def xfr_vm_suspend(self, xfrd, val):
   1.268 -##         def cbok(val):
   1.269 -##             # Special case for localhost: destroy devices early.
   1.270 -##             if self.dst_host in ["localhost", "127.0.0.1"]:
   1.271 -##                 self.dominfo.restart_cancel()
   1.272 -##                 self.dominfo.cleanup()
   1.273 -##                 self.dominfo.destroy_console()
   1.274 -##             return val
   1.275 -            
   1.276 -##         d = XfrdInfo.xfr_vm_suspend(self, xfrd, val)
   1.277 -##         d.addCallback(cbok)
   1.278 -##         return d
   1.279 -    
   1.280      def xfr_migrate_ok(self, xfrd, val):
   1.281          dom = int(sxp.child0(val))
   1.282          self.state = 'ok'
   1.283 @@ -327,17 +340,15 @@ class XendMigrateInfo(XfrdInfo):
   1.284              self.deferred.callback(self)
   1.285  
   1.286      def connectionLost(self, reason=None):
   1.287 -        print 'XfrdMigrateInfo>connectionLost>', reason
   1.288          XfrdInfo.connectionLost(self, reason)
   1.289          if self.state =='ok':
   1.290              log.info('Migrate OK: ' + str(self.sxpr()))
   1.291          else:
   1.292              self.state = 'error'
   1.293 -            self.error(XendError("migrate failed"))
   1.294 +            self.error("migrate failed")
   1.295              log.info('Migrate ERROR: ' + str(self.sxpr()))
   1.296          eserver.inject('xend.domain.migrate',
   1.297 -                       [ self.dominfo.name, self.dominfo.id,
   1.298 -                         self.state, self.sxpr() ])
   1.299 +                       [ self.dominfo.name, self.dominfo.id, self.state, self.sxpr() ])
   1.300  
   1.301  class XendSaveInfo(XfrdInfo):
   1.302      """Representation of a save in-progress and its interaction with xfrd.
   1.303 @@ -361,16 +372,15 @@ class XendSaveInfo(XfrdInfo):
   1.304          return sxpr
   1.305  
   1.306      def request(self, xfrd):
   1.307 -        print '***request>', self.vmconfig()
   1.308          vmconfig = self.vmconfig()
   1.309          if not vmconfig:
   1.310 +            self.error(XendError("vm config not found"))
   1.311              xfrd.loseConnection()
   1.312              return
   1.313 -        print '***request> begin'
   1.314          log.info('Save BEGIN: ' + str(self.sxpr()))
   1.315          eserver.inject('xend.domain.save',
   1.316 -                       [self.dominfo.name, self.dominfo.id,
   1.317 -                        "begin", self.sxpr()])
   1.318 +                       [ self.dominfo.name, self.dominfo.id,
   1.319 +                         "begin", self.sxpr() ])
   1.320          xfrd.request(['xfr.save', self.src_dom, vmconfig, self.file ])
   1.321          
   1.322      def xfr_save_ok(self, xfrd, val):
   1.323 @@ -380,13 +390,12 @@ class XendSaveInfo(XfrdInfo):
   1.324              self.deferred.callback(self)
   1.325  
   1.326      def connectionLost(self, reason=None):
   1.327 -        print 'XfrdSaveInfo>connectionLost>', reason
   1.328          XfrdInfo.connectionLost(self, reason)
   1.329          if self.state =='ok':
   1.330              log.info('Save OK: ' + str(self.sxpr()))
   1.331          else:
   1.332              self.state = 'error'
   1.333 -            self.error(XendError("save failed"))
   1.334 +            self.error("save failed")
   1.335              log.info('Save ERROR: ' + str(self.sxpr()))
   1.336          eserver.inject('xend.domain.save',
   1.337                         [ self.dominfo.name, self.dominfo.id,
   1.338 @@ -409,8 +418,9 @@ class XendRestoreInfo(XfrdInfo):
   1.339           return sxpr
   1.340  
   1.341      def request(self, xfrd):
   1.342 -        print '***request>', self.file
   1.343          log.info('restore BEGIN: ' + str(self.sxpr()))
   1.344 +        eserver.inject('xend.restore', [ 'begin', self.sxpr()])
   1.345 +                       
   1.346          xfrd.request(['xfr.restore', self.file ])
   1.347          
   1.348      def xfr_restore_ok(self, xfrd, val):
   1.349 @@ -419,8 +429,17 @@ class XendRestoreInfo(XfrdInfo):
   1.350          self.state = 'ok'
   1.351          if not self.deferred.called:
   1.352              self.deferred.callback(dominfo)
   1.353 +         
   1.354 +    def connectionLost(self, reason=None):
   1.355 +        XfrdInfo.connectionLost(self, reason)
   1.356 +        if self.state =='ok':
   1.357 +            log.info('Restore OK: ' + self.file)
   1.358 +        else:
   1.359 +            self.state = 'error'
   1.360 +            self.error("restore failed")
   1.361 +            log.info('Restore ERROR: ' + str(self.sxpr()))
   1.362 +        eserver.inject('xend.restore', [ self.state,  self.sxpr()])
   1.363  
   1.364 -         
   1.365  class XendMigrate:
   1.366      """External api for interaction with xfrd for migrate and save.
   1.367      Singleton.
   1.368 @@ -445,7 +464,6 @@ class XendMigrate:
   1.369          self.db.saveall("", self.session_db)
   1.370  
   1.371      def sync_session(self, xid):
   1.372 -        print 'sync_session>', type(xid), xid, self.session_db[xid]
   1.373          self.db.save(xid, self.session_db[xid])
   1.374  
   1.375      def close(self):
   1.376 @@ -458,7 +476,6 @@ class XendMigrate:
   1.377          self.sync_session(xid)
   1.378  
   1.379      def _delete_session(self, xid):
   1.380 -        print '***_delete_session>', xid
   1.381          if xid in self.session:
   1.382              del self.session[xid]
   1.383          if xid in self.session_db:
   1.384 @@ -482,16 +499,23 @@ class XendMigrate:
   1.385          @param info: session
   1.386          @return: deferred
   1.387          """
   1.388 -        def cbremove(val):
   1.389 -            print '***cbremove>', val
   1.390 +        dfr = defer.Deferred()
   1.391 +        def cbok(val):
   1.392              self._delete_session(info.xid)
   1.393 +            if not dfr.called:
   1.394 +                dfr.callback(val)
   1.395              return val
   1.396 +        def cberr(err):
   1.397 +            self._delete_session(info.xid)
   1.398 +            if not dfr.called:
   1.399 +                dfr.errback(err)
   1.400 +            return err
   1.401          self._add_session(info)
   1.402 -        info.deferred.addCallback(cbremove)
   1.403 -        info.deferred.addErrback(cbremove)
   1.404 +        info.deferred.addCallback(cbok)
   1.405 +        info.deferred.addErrback(cberr)
   1.406          xcf = XfrdClientFactory(info)
   1.407          reactor.connectTCP('localhost', XFRD_PORT, xcf)
   1.408 -        return info.deferred
   1.409 +        return dfr
   1.410      
   1.411      def migrate_begin(self, dominfo, host, port=XFRD_PORT, live=0, resource=0):
   1.412          """Begin to migrate a domain to another host.
     2.1 --- a/tools/python/xen/xend/server/SrvDomain.py	Fri Mar 11 13:09:40 2005 +0000
     2.2 +++ b/tools/python/xen/xend/server/SrvDomain.py	Fri Mar 11 17:01:00 2005 +0000
     2.3 @@ -29,13 +29,8 @@ class SrvDomain(SrvDir):
     2.4                      [['dom', 'int'],
     2.5                       ['config', 'sxpr']])
     2.6          deferred = fn(req.args, {'dom': self.dom.dom})
     2.7 -        deferred.addErrback(self._op_configure_err, req)
     2.8          return deferred
     2.9  
    2.10 -    def _op_configure_err(self, err, req):
    2.11 -        req.setResponseCode(http.BAD_REQUEST, "Error: "+ str(err))
    2.12 -        return str(err)
    2.13 -        
    2.14      def op_unpause(self, op, req):
    2.15          val = self.xd.domain_unpause(self.dom.name)
    2.16          return val
    2.17 @@ -68,16 +63,11 @@ class SrvDomain(SrvDir):
    2.18                       ['file', 'str']])
    2.19          deferred = fn(req.args, {'dom': self.dom.id})
    2.20          deferred.addCallback(self._op_save_cb, req)
    2.21 -        deferred.addErrback(self._op_save_err, req)
    2.22          return deferred
    2.23  
    2.24      def _op_save_cb(self, val, req):
    2.25          return 0
    2.26  
    2.27 -    def _op_save_err(self, err, req):
    2.28 -        req.setResponseCode(http.BAD_REQUEST, "Error: "+ str(err))
    2.29 -        return str(err)
    2.30 -        
    2.31      def op_migrate(self, op, req):
    2.32          fn = FormFn(self.xd.domain_migrate,
    2.33                      [['dom', 'str'],
    2.34 @@ -85,9 +75,7 @@ class SrvDomain(SrvDir):
    2.35                       ['live', 'int'],
    2.36                       ['resource', 'int']])
    2.37          deferred = fn(req.args, {'dom': self.dom.id})
    2.38 -        print 'op_migrate>', deferred
    2.39          deferred.addCallback(self._op_migrate_cb, req)
    2.40 -        deferred.addErrback(self._op_migrate_err, req)
    2.41          return deferred
    2.42  
    2.43      def _op_migrate_cb(self, info, req):
    2.44 @@ -101,11 +89,6 @@ class SrvDomain(SrvDir):
    2.45          print '_op_migrate_cb> url=', url
    2.46          return url
    2.47  
    2.48 -    def _op_migrate_err(self, err, req):
    2.49 -        print '_op_migrate_err>', err, req
    2.50 -        req.setResponseCode(http.BAD_REQUEST, "Error: "+ str(err))
    2.51 -        return str(err)
    2.52 -
    2.53      def op_pincpu(self, op, req):
    2.54          fn = FormFn(self.xd.domain_pincpu,
    2.55                      [['dom', 'str'],