ia64/xen-unstable

changeset 17735:eafb252f1412

xend: take care of dead qemu-dm process

This patch fix xend as when fatal error happened (e.g. qemu-dm process
was killed) log error message then mark that domain as crashed, do
what specified on crashed in the domain config file. Added some code in
xend to check those crashed hvm DM status each 30 seconds.

Signed-off-by: Xiaowei Hu <xiaowei.hu@oracle.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon May 26 08:53:25 2008 +0100 (2008-05-26)
parents 14f03101b61c
children 9df61d6601ba
files tools/python/xen/xend/server/SrvServer.py
line diff
     1.1 --- a/tools/python/xen/xend/server/SrvServer.py	Mon May 26 08:45:59 2008 +0100
     1.2 +++ b/tools/python/xen/xend/server/SrvServer.py	Mon May 26 08:53:25 2008 +0100
     1.3 @@ -44,6 +44,7 @@ import fcntl
     1.4  import re
     1.5  import time
     1.6  import signal
     1.7 +import os
     1.8  from threading import Thread
     1.9  
    1.10  from xen.web.httpserver import HttpServer, UnixHttpServer
    1.11 @@ -148,14 +149,27 @@ class XendServers:
    1.12  
    1.13              # Reaching this point means we can auto start domains
    1.14              try:
    1.15 -                xenddomain().autostart_domains()
    1.16 +                dom = xenddomain()
    1.17 +                dom.autostart_domains()
    1.18              except Exception, e:
    1.19                  log.exception("Failed while autostarting domains")
    1.20  
    1.21              # loop to keep main thread alive until it receives a SIGTERM
    1.22              self.running = True
    1.23              while self.running:
    1.24 -                time.sleep(100000000)
    1.25 +                # loop to destroy those hvm domain that whoes DM has dead unexpectedly.
    1.26 +                for item in dom.domains.values():
    1.27 +                    if item.info.is_hvm():
    1.28 +                        device_model_pid = item.gatherDom(('image/device-model-pid', str))
    1.29 +                        dm_stat_cmd = "ps -o stat --no-headers -p"+device_model_pid
    1.30 +                        dm_stat = os.popen(dm_stat_cmd).readline().rstrip()
    1.31 +                        if dm_stat == 'Z':
    1.32 +                            log.warn("Devices Model for domain " + str(item.domid) + "was killed unexpectedly")
    1.33 +                            item.info['crashed'] = 1
    1.34 +                            item.refreshShutdown(item.info)
    1.35 +                        else:
    1.36 +                            continue
    1.37 +                time.sleep(30)
    1.38                  
    1.39              if self.reloadingConfig:
    1.40                  log.info("Restarting all XML-RPC and Xen-API servers...")