ia64/xen-unstable

changeset 13557:207523704fb1

Implement clean return from save/restore failure (so that original
domain can continue execution).
Signed-off-by: Andrei Petrov <andrei.petrov@xensource.com>
author kfraser@localhost.localdomain
date Mon Jan 22 15:15:32 2007 +0000 (2007-01-22)
parents baa9b76ea3e1
children 66cc1685d957
files tools/libxc/xc_resume.c tools/libxc/xg_save_restore.h tools/python/xen/lowlevel/xc/xc.c tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py
line diff
     1.1 --- a/tools/libxc/xc_resume.c	Mon Jan 22 14:13:26 2007 +0000
     1.2 +++ b/tools/libxc/xc_resume.c	Mon Jan 22 15:15:32 2007 +0000
     1.3 @@ -1,5 +1,6 @@
     1.4  #include "xc_private.h"
     1.5 -
     1.6 +#include "xg_private.h"
     1.7 +#include "xg_save_restore.h"
     1.8  
     1.9  #if defined(__i386__) || defined(__x86_64__)
    1.10  static int modify_returncode(int xc_handle, uint32_t domid)
    1.11 @@ -22,19 +23,7 @@ static int modify_returncode(int xc_hand
    1.12  }
    1.13  #endif
    1.14  
    1.15 -
    1.16 -/*
    1.17 - * Resume execution of a domain after suspend shutdown.
    1.18 - * This can happen in one of two ways:
    1.19 - *  1. Resume with special return code.
    1.20 - *  2. Reset guest environment so it believes it is resumed in a new
    1.21 - *     domain context.
    1.22 - * (2) should be used only for guests which cannot handle the special
    1.23 - * new return code. (1) is always safe (but slower).
    1.24 - * 
    1.25 - * XXX Only (2) is implemented below. We need to use (1) by default!
    1.26 - */
    1.27 -int xc_domain_resume(int xc_handle, uint32_t domid)
    1.28 +static int xc_domain_resume_cooperative(int xc_handle, uint32_t domid)
    1.29  {
    1.30      DECLARE_DOMCTL;
    1.31      int rc;
    1.32 @@ -50,3 +39,142 @@ int xc_domain_resume(int xc_handle, uint
    1.33      domctl.domain = domid;
    1.34      return do_domctl(xc_handle, &domctl);
    1.35  }
    1.36 +
    1.37 +static int xc_domain_resume_any(int xc_handle, uint32_t domid)
    1.38 +{
    1.39 +    DECLARE_DOMCTL;
    1.40 +    int i, rc = -1;
    1.41 +
    1.42 +    /*
    1.43 +     * (x86 only) Rewrite store_mfn and console_mfn back to MFN (from PFN).
    1.44 +     */
    1.45 +#if defined(__i386__) || defined(__x86_64__)
    1.46 +    xc_dominfo_t info;
    1.47 +    unsigned long mfn, max_pfn = 0;
    1.48 +    vcpu_guest_context_t ctxt;
    1.49 +    start_info_t *start_info;
    1.50 +    shared_info_t *shinfo = NULL;
    1.51 +    xen_pfn_t *p2m_frame_list_list = NULL;
    1.52 +    xen_pfn_t *p2m_frame_list = NULL;
    1.53 +    xen_pfn_t *p2m = NULL;
    1.54 +
    1.55 +    if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 )
    1.56 +    {
    1.57 +        PERROR("Could not get domain info");
    1.58 +        goto out;
    1.59 +    }
    1.60 +
    1.61 +    /* Map the shared info frame */
    1.62 +    shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
    1.63 +                                  PROT_READ, info.shared_info_frame);
    1.64 +    if ( shinfo == NULL )
    1.65 +    {
    1.66 +        ERROR("Couldn't map shared info");
    1.67 +        goto out;
    1.68 +    }
    1.69 +
    1.70 +    max_pfn = shinfo->arch.max_pfn;
    1.71 +
    1.72 +    p2m_frame_list_list =
    1.73 +        xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, PROT_READ,
    1.74 +                             shinfo->arch.pfn_to_mfn_frame_list_list);
    1.75 +    if ( p2m_frame_list_list == NULL )
    1.76 +    {
    1.77 +        ERROR("Couldn't map p2m_frame_list_list");
    1.78 +        goto out;
    1.79 +    }
    1.80 +
    1.81 +    p2m_frame_list = xc_map_foreign_batch(xc_handle, domid, PROT_READ,
    1.82 +                                          p2m_frame_list_list,
    1.83 +                                          P2M_FLL_ENTRIES);
    1.84 +    if ( p2m_frame_list == NULL )
    1.85 +    {
    1.86 +        ERROR("Couldn't map p2m_frame_list");
    1.87 +        goto out;
    1.88 +    }
    1.89 +
    1.90 +    /* Map all the frames of the pfn->mfn table. For migrate to succeed,
    1.91 +       the guest must not change which frames are used for this purpose.
    1.92 +       (its not clear why it would want to change them, and we'll be OK
    1.93 +       from a safety POV anyhow. */
    1.94 +    p2m = xc_map_foreign_batch(xc_handle, domid, PROT_READ,
    1.95 +                               p2m_frame_list,
    1.96 +                               P2M_FL_ENTRIES);
    1.97 +    if ( p2m == NULL )
    1.98 +    {
    1.99 +        ERROR("Couldn't map p2m table");
   1.100 +        goto out;
   1.101 +    }
   1.102 +
   1.103 +    if ( lock_pages(&ctxt, sizeof(ctxt)) )
   1.104 +    {
   1.105 +        ERROR("Unable to lock ctxt");
   1.106 +        goto out;
   1.107 +    }
   1.108 +
   1.109 +    if ( xc_vcpu_getcontext(xc_handle, domid, 0, &ctxt) )
   1.110 +    {
   1.111 +        ERROR("Could not get vcpu context");
   1.112 +        goto out;
   1.113 +    }
   1.114 +
   1.115 +    mfn = ctxt.user_regs.edx;
   1.116 +
   1.117 +    start_info = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
   1.118 +                                      PROT_READ | PROT_WRITE, mfn);
   1.119 +    if ( start_info == NULL )
   1.120 +    {
   1.121 +        ERROR("Couldn't map start_info");
   1.122 +        goto out;
   1.123 +    }
   1.124 +
   1.125 +    start_info->store_mfn        = p2m[start_info->store_mfn];
   1.126 +    start_info->console.domU.mfn = p2m[start_info->console.domU.mfn];
   1.127 +
   1.128 +    munmap(start_info, PAGE_SIZE);
   1.129 +#endif /* defined(__i386__) || defined(__x86_64__) */
   1.130 +
   1.131 +    /* Reset all secondary CPU states. */
   1.132 +    for ( i = 1; i <= info.max_vcpu_id; i++ )
   1.133 +        xc_vcpu_setcontext(xc_handle, domid, i, NULL);
   1.134 +
   1.135 +    /* Ready to resume domain execution now. */
   1.136 +    domctl.cmd = XEN_DOMCTL_resumedomain;
   1.137 +    domctl.domain = domid;
   1.138 +    rc = do_domctl(xc_handle, &domctl);
   1.139 +
   1.140 +#if defined(__i386__) || defined(__x86_64__)
   1.141 + out:
   1.142 +    unlock_pages((void *)&ctxt, sizeof ctxt);
   1.143 +    if (p2m)
   1.144 +        munmap(p2m, P2M_FL_ENTRIES*PAGE_SIZE);
   1.145 +    if (p2m_frame_list)
   1.146 +        munmap(p2m_frame_list, P2M_FLL_ENTRIES*PAGE_SIZE);
   1.147 +    if (p2m_frame_list_list)
   1.148 +        munmap(p2m_frame_list_list, PAGE_SIZE);
   1.149 +    if (shinfo)
   1.150 +        munmap(shinfo, PAGE_SIZE);
   1.151 +#endif
   1.152 +
   1.153 +    return rc;
   1.154 +}
   1.155 +
   1.156 +/*
   1.157 + * Resume execution of a domain after suspend shutdown.
   1.158 + * This can happen in one of two ways:
   1.159 + *  1. Resume with special return code.
   1.160 + *  2. Reset guest environment so it believes it is resumed in a new
   1.161 + *     domain context.
   1.162 + * (2) should be used only for guests which cannot handle the special
   1.163 + * new return code. (1) is always safe (but slower).
   1.164 + */
   1.165 +int xc_domain_resume(int xc_handle, uint32_t domid)
   1.166 +{
   1.167 +    /*
   1.168 +     * XXX: Implement a way to select between options (1) and (2).
   1.169 +     * Or expose the options as two different methods to Python.
   1.170 +     */
   1.171 +    return (0
   1.172 +            ? xc_domain_resume_cooperative(xc_handle, domid)
   1.173 +            : xc_domain_resume_any(xc_handle, domid));
   1.174 +}
     2.1 --- a/tools/libxc/xg_save_restore.h	Mon Jan 22 14:13:26 2007 +0000
     2.2 +++ b/tools/libxc/xg_save_restore.h	Mon Jan 22 15:15:32 2007 +0000
     2.3 @@ -34,11 +34,10 @@
     2.4  **
     2.5  ** Returns 1 on success, 0 on failure.
     2.6  */
     2.7 -static int get_platform_info(int xc_handle, uint32_t dom,
     2.8 -                             /* OUT */ unsigned long *max_mfn,
     2.9 -                             /* OUT */ unsigned long *hvirt_start,
    2.10 -                             /* OUT */ unsigned int *pt_levels)
    2.11 -
    2.12 +static inline int get_platform_info(int xc_handle, uint32_t dom,
    2.13 +                                    /* OUT */ unsigned long *max_mfn,
    2.14 +                                    /* OUT */ unsigned long *hvirt_start,
    2.15 +                                    /* OUT */ unsigned int *pt_levels)
    2.16  {
    2.17      xen_capabilities_info_t xen_caps = "";
    2.18      xen_platform_parameters_t xen_params;
     3.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Mon Jan 22 14:13:26 2007 +0000
     3.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Mon Jan 22 15:15:32 2007 +0000
     3.3 @@ -1064,9 +1064,9 @@ static PyMethodDef pyxc_methods[] = {
     3.4        "Destroy a domain.\n"
     3.5        " dom [int]:    Identifier of domain to be destroyed.\n\n"
     3.6        "Returns: [int] 0 on success; -1 on error.\n" },
     3.7 -    
     3.8 +
     3.9      { "domain_resume", 
    3.10 -      (PyCFunction)pyxc_domain_resume, 
    3.11 +      (PyCFunction)pyxc_domain_resume,
    3.12        METH_VARARGS, "\n"
    3.13        "Resume execution of a suspended domain.\n"
    3.14        " dom [int]: Identifier of domain to be resumed.\n\n"
     4.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Mon Jan 22 14:13:26 2007 +0000
     4.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Mon Jan 22 15:15:32 2007 +0000
     4.3 @@ -122,6 +122,8 @@ def save(fd, dominfo, network, live, dst
     4.4              os.remove("/tmp/xen.qemu-dm.%d" % dominfo.getDomid())
     4.5  
     4.6          dominfo.destroyDomain()
     4.7 +        dominfo.testDeviceComplete()
     4.8 +
     4.9          try:
    4.10              dominfo.setName(domain_name)
    4.11          except VmError:
    4.12 @@ -134,11 +136,31 @@ def save(fd, dominfo, network, live, dst
    4.13      except Exception, exn:
    4.14          log.exception("Save failed on domain %s (%s).", domain_name,
    4.15                        dominfo.getDomid())
    4.16 +
    4.17 +        dominfo._releaseDevices()
    4.18 +        dominfo.testDeviceComplete()
    4.19 +        dominfo.testvifsComplete()
    4.20 +        log.debug("XendCheckpoint.save: devices released")
    4.21 +
    4.22 +        dominfo._resetChannels()
    4.23 +
    4.24 +        dominfo._removeDom('control/shutdown')
    4.25 +        dominfo._removeDom('device-misc/vif/nextDeviceID')
    4.26 +
    4.27 +        dominfo._createChannels()
    4.28 +        dominfo._introduceDomain()
    4.29 +        dominfo._storeDomDetails()
    4.30 +
    4.31 +        dominfo._createDevices()
    4.32 +        log.debug("XendCheckpoint.save: devices created")
    4.33 +
    4.34 +        dominfo.resumeDomain()
    4.35 +        log.debug("XendCheckpoint.save: resumeDomain")
    4.36 +
    4.37          try:
    4.38              dominfo.setName(domain_name)
    4.39          except:
    4.40              log.exception("Failed to reset the migrating domain's name")
    4.41 -        raise Exception, exn
    4.42  
    4.43  
    4.44  def restore(xd, fd, dominfo = None, paused = False):
     5.1 --- a/tools/python/xen/xend/XendDomain.py	Mon Jan 22 14:13:26 2007 +0000
     5.2 +++ b/tools/python/xen/xend/XendDomain.py	Mon Jan 22 15:15:32 2007 +0000
     5.3 @@ -1166,7 +1166,6 @@ class XendDomain:
     5.4          sock.send("receive\n")
     5.5          sock.recv(80)
     5.6          XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst)
     5.7 -        dominfo.testDeviceComplete()
     5.8          sock.close()
     5.9  
    5.10      def domain_save(self, domid, dst):
     6.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Mon Jan 22 14:13:26 2007 +0000
     6.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Mon Jan 22 15:15:32 2007 +0000
     6.3 @@ -1580,6 +1580,16 @@ class XendDomainInfo:
     6.4              log.exception("Exception in alloc_unbound(%d)", self.domid)
     6.5              raise
     6.6  
     6.7 +    def _resetChannels(self):
     6.8 +        """Reset all event channels in the domain.
     6.9 +        """
    6.10 +        try:
    6.11 +            return xc.evtchn_reset(dom=self.domid)
    6.12 +        except:
    6.13 +            log.exception("Exception in evtcnh_reset(%d)", self.domid)
    6.14 +            raise
    6.15 +
    6.16 +
    6.17      #
    6.18      # Bootloader configuration
    6.19      #
    6.20 @@ -1737,6 +1747,25 @@ class XendDomainInfo:
    6.21                  log.info("Dev still active but hit max loop timeout")
    6.22                  break
    6.23  
    6.24 +    def testvifsComplete(self):
    6.25 +        """ In case vifs are released and then created for the same
    6.26 +        domain, we need to wait the device shut down.
    6.27 +        """
    6.28 +        start = time.time()
    6.29 +        while True:
    6.30 +            test = 0
    6.31 +            diff = time.time() - start
    6.32 +            for i in self.getDeviceController('vif').deviceIDs():
    6.33 +                test = 1
    6.34 +                log.info("Dev %s still active, looping...", i)
    6.35 +                time.sleep(0.1)
    6.36 +                
    6.37 +            if test == 0:
    6.38 +                break
    6.39 +            if diff >= MIGRATE_TIMEOUT:
    6.40 +                log.info("Dev still active but hit max loop timeout")
    6.41 +                break
    6.42 +
    6.43      def _storeVmDetails(self):
    6.44          to_store = {}
    6.45