ia64/xen-unstable

changeset 16227:168beb9a27a5

x86: Save/restore new syscall/sysenter context info.
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Thu Oct 25 14:24:52 2007 +0100 (2007-10-25)
parents e5a0ed682280
children 65b961265093
files tools/libxc/xc_domain_restore.c tools/libxc/xc_domain_save.c xen/arch/x86/domctl.c xen/include/public/domctl.h
line diff
     1.1 --- a/tools/libxc/xc_domain_restore.c	Thu Oct 25 12:39:22 2007 +0100
     1.2 +++ b/tools/libxc/xc_domain_restore.c	Thu Oct 25 14:24:52 2007 +0100
     1.3 @@ -169,7 +169,8 @@ static int uncanonicalize_pagetable(int 
     1.4  
     1.5  
     1.6  /* Load the p2m frame list, plus potential extended info chunk */
     1.7 -static xen_pfn_t *load_p2m_frame_list(int io_fd, int *pae_extended_cr3)
     1.8 +static xen_pfn_t *load_p2m_frame_list(
     1.9 +    int io_fd, int *pae_extended_cr3, int *ext_vcpucontext)
    1.10  {
    1.11      xen_pfn_t *p2m_frame_list;
    1.12      vcpu_guest_context_either_t ctxt;
    1.13 @@ -200,7 +201,8 @@ static xen_pfn_t *load_p2m_frame_list(in
    1.14              
    1.15              /* 4-character chunk signature + 4-byte remaining chunk size. */
    1.16              if ( !read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
    1.17 -                 !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes)) )
    1.18 +                 !read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes)) ||
    1.19 +                 (tot_bytes < (chunk_bytes + 8)) )
    1.20              {
    1.21                  ERROR("read extended-info chunk signature failed");
    1.22                  return NULL;
    1.23 @@ -240,6 +242,10 @@ static xen_pfn_t *load_p2m_frame_list(in
    1.24                       & (1UL << VMASST_TYPE_pae_extended_cr3) )
    1.25                      *pae_extended_cr3 = 1;
    1.26              }
    1.27 +            else if ( !strncmp(chunk_sig, "extv", 4) )
    1.28 +            {
    1.29 +                *ext_vcpucontext = 1;
    1.30 +            }
    1.31              
    1.32              /* Any remaining bytes of this chunk: read and discard. */
    1.33              while ( chunk_bytes )
    1.34 @@ -289,7 +295,7 @@ int xc_domain_restore(int xc_handle, int
    1.35                        unsigned int hvm, unsigned int pae)
    1.36  {
    1.37      DECLARE_DOMCTL;
    1.38 -    int rc = 1, i, j, n, m, pae_extended_cr3 = 0;
    1.39 +    int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
    1.40      unsigned long mfn, pfn;
    1.41      unsigned int prev_pc, this_pc;
    1.42      int verify = 0;
    1.43 @@ -373,7 +379,8 @@ int xc_domain_restore(int xc_handle, int
    1.44      if ( !hvm ) 
    1.45      {
    1.46          /* Load the p2m frame list, plus potential extended info chunk */
    1.47 -        p2m_frame_list = load_p2m_frame_list(io_fd, &pae_extended_cr3);
    1.48 +        p2m_frame_list = load_p2m_frame_list(
    1.49 +            io_fd, &pae_extended_cr3, &ext_vcpucontext);
    1.50          if ( !p2m_frame_list )
    1.51              goto out;
    1.52  
    1.53 @@ -382,13 +389,12 @@ int xc_domain_restore(int xc_handle, int
    1.54          domctl.domain = dom;
    1.55          domctl.cmd    = XEN_DOMCTL_set_address_size;
    1.56          domctl.u.address_size.size = guest_width * 8;
    1.57 -        rc = do_domctl(xc_handle, &domctl);
    1.58 -        if ( rc != 0 )
    1.59 +        frc = do_domctl(xc_handle, &domctl);
    1.60 +        if ( frc != 0 )
    1.61          {
    1.62              ERROR("Unable to set guest address size.");
    1.63              goto out;
    1.64          }
    1.65 -        rc = 1;
    1.66      }
    1.67  
    1.68      /* We want zeroed memory so use calloc rather than malloc. */
    1.69 @@ -713,18 +719,19 @@ int xc_domain_restore(int xc_handle, int
    1.70              goto out;
    1.71          }
    1.72                  
    1.73 -        if ( (rc = xc_set_hvm_param(xc_handle, dom, 
    1.74 -                                    HVM_PARAM_IOREQ_PFN, magic_pfns[0]))
    1.75 -             || (rc = xc_set_hvm_param(xc_handle, dom, 
    1.76 -                                       HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]))
    1.77 -             || (rc = xc_set_hvm_param(xc_handle, dom, 
    1.78 -                                       HVM_PARAM_STORE_PFN, magic_pfns[2]))
    1.79 -             || (rc = xc_set_hvm_param(xc_handle, dom, 
    1.80 -                                       HVM_PARAM_PAE_ENABLED, pae))
    1.81 -             || (rc = xc_set_hvm_param(xc_handle, dom, 
    1.82 -                                       HVM_PARAM_STORE_EVTCHN, store_evtchn)) )
    1.83 +        if ( (frc = xc_set_hvm_param(xc_handle, dom, 
    1.84 +                                     HVM_PARAM_IOREQ_PFN, magic_pfns[0]))
    1.85 +             || (frc = xc_set_hvm_param(xc_handle, dom, 
    1.86 +                                        HVM_PARAM_BUFIOREQ_PFN, magic_pfns[1]))
    1.87 +             || (frc = xc_set_hvm_param(xc_handle, dom, 
    1.88 +                                        HVM_PARAM_STORE_PFN, magic_pfns[2]))
    1.89 +             || (frc = xc_set_hvm_param(xc_handle, dom, 
    1.90 +                                        HVM_PARAM_PAE_ENABLED, pae))
    1.91 +             || (frc = xc_set_hvm_param(xc_handle, dom, 
    1.92 +                                        HVM_PARAM_STORE_EVTCHN,
    1.93 +                                        store_evtchn)) )
    1.94          {
    1.95 -            ERROR("error setting HVM params: %i", rc);
    1.96 +            ERROR("error setting HVM params: %i", frc);
    1.97              goto out;
    1.98          }
    1.99          *store_mfn = magic_pfns[2];
   1.100 @@ -750,10 +757,15 @@ int xc_domain_restore(int xc_handle, int
   1.101              goto out;
   1.102          }
   1.103          
   1.104 -        rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len);
   1.105 -        if ( rc ) 
   1.106 +        frc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_len);
   1.107 +        if ( frc )
   1.108 +        {
   1.109              ERROR("error setting the HVM context");
   1.110 -       
   1.111 +            goto out;
   1.112 +        }
   1.113 +
   1.114 +        /* HVM success! */
   1.115 +        rc = 0;
   1.116          goto out;
   1.117      }
   1.118  
   1.119 @@ -929,7 +941,7 @@ int xc_domain_restore(int xc_handle, int
   1.120      {
   1.121          unsigned int count = 0;
   1.122          unsigned long *pfntab;
   1.123 -        int nr_frees, rc;
   1.124 +        int nr_frees;
   1.125  
   1.126          if ( !read_exact(io_fd, &count, sizeof(count)) ||
   1.127               (count > (1U << 28)) ) /* up to 1TB of address space */
   1.128 @@ -973,10 +985,10 @@ int xc_domain_restore(int xc_handle, int
   1.129              };
   1.130              set_xen_guest_handle(reservation.extent_start, pfntab);
   1.131  
   1.132 -            if ( (rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
   1.133 -                                    &reservation)) != nr_frees )
   1.134 +            if ( (frc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
   1.135 +                                     &reservation)) != nr_frees )
   1.136              {
   1.137 -                ERROR("Could not decrease reservation : %d", rc);
   1.138 +                ERROR("Could not decrease reservation : %d", frc);
   1.139                  goto out;
   1.140              }
   1.141              else
   1.142 @@ -1091,13 +1103,29 @@ int xc_domain_restore(int xc_handle, int
   1.143          domctl.domain = (domid_t)dom;
   1.144          domctl.u.vcpucontext.vcpu = i;
   1.145          set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt.c);
   1.146 -        rc = xc_domctl(xc_handle, &domctl);
   1.147 -        if ( rc != 0 )
   1.148 +        frc = xc_domctl(xc_handle, &domctl);
   1.149 +        if ( frc != 0 )
   1.150          {
   1.151              ERROR("Couldn't build vcpu%d", i);
   1.152              goto out;
   1.153          }
   1.154 -        rc = 1;
   1.155 +
   1.156 +        if ( !ext_vcpucontext )
   1.157 +            continue;
   1.158 +        if ( !read_exact(io_fd, &domctl.u.ext_vcpucontext, 128) ||
   1.159 +             (domctl.u.ext_vcpucontext.vcpu != i) )
   1.160 +        {
   1.161 +            ERROR("Error when reading extended ctxt %d", i);
   1.162 +            goto out;
   1.163 +        }
   1.164 +        domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
   1.165 +        domctl.domain = dom;
   1.166 +        frc = xc_domctl(xc_handle, &domctl);
   1.167 +        if ( frc != 0 )
   1.168 +        {
   1.169 +            ERROR("Couldn't set extended vcpu%d info\n", i);
   1.170 +            goto out;
   1.171 +        }
   1.172      }
   1.173  
   1.174      if ( !read_exact(io_fd, shared_info_page, PAGE_SIZE) )
     2.1 --- a/tools/libxc/xc_domain_save.c	Thu Oct 25 12:39:22 2007 +0100
     2.2 +++ b/tools/libxc/xc_domain_save.c	Thu Oct 25 14:24:52 2007 +0100
     2.3 @@ -777,16 +777,18 @@ static xen_pfn_t *map_and_save_p2m_table
     2.4       */
     2.5      {
     2.6          unsigned long signature = ~0UL;
     2.7 -        uint32_t chunk_sz = ((guest_width==8) 
     2.8 -                             ? sizeof(ctxt.x64) 
     2.9 -                             : sizeof(ctxt.x32));
    2.10 -        uint32_t tot_sz   = chunk_sz + 8;
    2.11 -        char chunk_sig[]  = "vcpu";
    2.12 +        uint32_t chunk1_sz = ((guest_width==8) 
    2.13 +                              ? sizeof(ctxt.x64) 
    2.14 +                              : sizeof(ctxt.x32));
    2.15 +        uint32_t chunk2_sz = 0;
    2.16 +        uint32_t tot_sz    = (chunk1_sz + 8) + (chunk2_sz + 8);
    2.17          if ( !write_exact(io_fd, &signature, sizeof(signature)) ||
    2.18 -             !write_exact(io_fd, &tot_sz,    sizeof(tot_sz)) ||
    2.19 -             !write_exact(io_fd, &chunk_sig, 4) ||
    2.20 -             !write_exact(io_fd, &chunk_sz,  sizeof(chunk_sz)) ||
    2.21 -             !write_exact(io_fd, &ctxt,      chunk_sz) )
    2.22 +             !write_exact(io_fd, &tot_sz, sizeof(tot_sz)) ||
    2.23 +             !write_exact(io_fd, "vcpu", 4) ||
    2.24 +             !write_exact(io_fd, &chunk1_sz, sizeof(chunk1_sz)) ||
    2.25 +             !write_exact(io_fd, &ctxt, chunk1_sz) ||
    2.26 +             !write_exact(io_fd, "extv", 4) ||
    2.27 +             !write_exact(io_fd, &chunk2_sz, sizeof(chunk2_sz)) )
    2.28          {
    2.29              ERROR("write: extended info");
    2.30              goto out;
    2.31 @@ -830,6 +832,7 @@ int xc_domain_save(int xc_handle, int io
    2.32                     void (*qemu_flip_buffer)(int, int))
    2.33  {
    2.34      xc_dominfo_t info;
    2.35 +    DECLARE_DOMCTL;
    2.36  
    2.37      int rc = 1, frc, i, j, last_iter, iter = 0;
    2.38      int live  = (flags & XCFLAGS_LIVE);
    2.39 @@ -1095,7 +1098,6 @@ int xc_domain_save(int xc_handle, int io
    2.40          while ( N < p2m_size )
    2.41          {
    2.42              unsigned int this_pc = (N * 100) / p2m_size;
    2.43 -            int rc;
    2.44  
    2.45              if ( (this_pc - prev_pc) >= 5 )
    2.46              {
    2.47 @@ -1107,10 +1109,10 @@ int xc_domain_save(int xc_handle, int io
    2.48              {
    2.49                  /* Slightly wasteful to peek the whole array evey time,
    2.50                     but this is fast enough for the moment. */
    2.51 -                rc = xc_shadow_control(
    2.52 +                frc = xc_shadow_control(
    2.53                      xc_handle, dom, XEN_DOMCTL_SHADOW_OP_PEEK, to_skip, 
    2.54                      p2m_size, NULL, 0, NULL);
    2.55 -                if ( rc != p2m_size )
    2.56 +                if ( frc != p2m_size )
    2.57                  {
    2.58                      ERROR("Error peeking shadow bitmap");
    2.59                      goto out;
    2.60 @@ -1601,6 +1603,20 @@ int xc_domain_save(int xc_handle, int io
    2.61              ERROR("Error when writing to state file (1) (errno %d)", errno);
    2.62              goto out;
    2.63          }
    2.64 +
    2.65 +        domctl.cmd = XEN_DOMCTL_get_ext_vcpucontext;
    2.66 +        domctl.domain = dom;
    2.67 +        domctl.u.ext_vcpucontext.vcpu = i;
    2.68 +        if ( xc_domctl(xc_handle, &domctl) < 0 )
    2.69 +        {
    2.70 +            ERROR("No extended context for VCPU%d", i);
    2.71 +            goto out;
    2.72 +        }
    2.73 +        if ( !write_exact(io_fd, &domctl.u.ext_vcpucontext, 128) )
    2.74 +        {
    2.75 +            ERROR("Error when writing to state file (2) (errno %d)", errno);
    2.76 +            goto out;
    2.77 +        }
    2.78      }
    2.79  
    2.80      /*
     3.1 --- a/xen/arch/x86/domctl.c	Thu Oct 25 12:39:22 2007 +0100
     3.2 +++ b/xen/arch/x86/domctl.c	Thu Oct 25 14:24:52 2007 +0100
     3.3 @@ -26,7 +26,6 @@
     3.4  #include <asm/hvm/cacheattr.h>
     3.5  #include <asm/processor.h>
     3.6  #include <xsm/xsm.h>
     3.7 -#include <xen/list.h>
     3.8  #include <asm/iommu.h>
     3.9  
    3.10  long arch_do_domctl(
    3.11 @@ -697,6 +696,79 @@ long arch_do_domctl(
    3.12      }
    3.13      break;
    3.14  
    3.15 +    case XEN_DOMCTL_set_ext_vcpucontext:
    3.16 +    case XEN_DOMCTL_get_ext_vcpucontext:
    3.17 +    {
    3.18 +        struct xen_domctl_ext_vcpucontext *evc;
    3.19 +        struct domain *d;
    3.20 +        struct vcpu *v;
    3.21 +
    3.22 +        evc = &domctl->u.ext_vcpucontext;
    3.23 +
    3.24 +        ret = (evc->size < sizeof(*evc)) ? -EINVAL : 0;
    3.25 +        evc->size = sizeof(*evc);
    3.26 +        if ( ret != 0 )
    3.27 +            break;
    3.28 +
    3.29 +        ret = -ESRCH;
    3.30 +        d = rcu_lock_domain_by_id(domctl->domain);
    3.31 +        if ( d == NULL )
    3.32 +            break;
    3.33 +
    3.34 +        ret = -ESRCH;
    3.35 +        if ( (evc->vcpu >= MAX_VIRT_CPUS) ||
    3.36 +             ((v = d->vcpu[evc->vcpu]) == NULL) )
    3.37 +            goto ext_vcpucontext_out;
    3.38 +
    3.39 +        if ( domctl->cmd == XEN_DOMCTL_get_ext_vcpucontext )
    3.40 +        {
    3.41 +#ifdef __x86_64__
    3.42 +            evc->sysenter_callback_cs      = v->arch.sysenter_callback_cs;
    3.43 +            evc->sysenter_callback_eip     = v->arch.sysenter_callback_eip;
    3.44 +            evc->sysenter_disables_events  = v->arch.sysenter_disables_events;
    3.45 +            evc->syscall32_callback_cs     = v->arch.syscall32_callback_cs;
    3.46 +            evc->syscall32_callback_eip    = v->arch.syscall32_callback_eip;
    3.47 +            evc->syscall32_disables_events = v->arch.syscall32_disables_events;
    3.48 +#else
    3.49 +            evc->sysenter_callback_cs      = 0;
    3.50 +            evc->sysenter_callback_eip     = 0;
    3.51 +            evc->sysenter_disables_events  = 0;
    3.52 +            evc->syscall32_callback_cs     = 0;
    3.53 +            evc->syscall32_callback_eip    = 0;
    3.54 +            evc->syscall32_disables_events = 0;
    3.55 +#endif
    3.56 +        }
    3.57 +        else
    3.58 +        {
    3.59 +#ifdef __x86_64__
    3.60 +            fixup_guest_code_selector(d, evc->sysenter_callback_cs);
    3.61 +            v->arch.sysenter_callback_cs      = evc->sysenter_callback_cs;
    3.62 +            v->arch.sysenter_callback_eip     = evc->sysenter_callback_eip;
    3.63 +            v->arch.sysenter_disables_events  = evc->sysenter_disables_events;
    3.64 +            fixup_guest_code_selector(d, evc->syscall32_callback_cs);
    3.65 +            v->arch.syscall32_callback_cs     = evc->syscall32_callback_cs;
    3.66 +            v->arch.syscall32_callback_eip    = evc->syscall32_callback_eip;
    3.67 +            v->arch.syscall32_disables_events = evc->syscall32_disables_events;
    3.68 +#else
    3.69 +            /* We do not support syscall/syscall32/sysenter on 32-bit Xen. */
    3.70 +            ret = -EINVAL;
    3.71 +            if ( (evc->sysenter_callback_cs & ~3) ||
    3.72 +                 evc->sysenter_callback_eip ||
    3.73 +                 (evc->syscall32_callback_cs & ~3) ||
    3.74 +                 evc->syscall32_callback_eip )
    3.75 +                goto ext_vcpucontext_out;
    3.76 +#endif
    3.77 +        }
    3.78 +
    3.79 +        ret = 0;
    3.80 +
    3.81 +    ext_vcpucontext_out:
    3.82 +        rcu_unlock_domain(d);
    3.83 +        if ( copy_to_guest(u_domctl, domctl, 1) )
    3.84 +            ret = -EFAULT;
    3.85 +    }
    3.86 +    break;
    3.87 +
    3.88      default:
    3.89          ret = -ENOSYS;
    3.90          break;
     4.1 --- a/xen/include/public/domctl.h	Thu Oct 25 12:39:22 2007 +0100
     4.2 +++ b/xen/include/public/domctl.h	Thu Oct 25 14:24:52 2007 +0100
     4.3 @@ -515,6 +515,31 @@ typedef struct xen_domctl_pin_mem_cachea
     4.4  DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);
     4.5  
     4.6  
     4.7 +#define XEN_DOMCTL_set_ext_vcpucontext 42
     4.8 +#define XEN_DOMCTL_get_ext_vcpucontext 43
     4.9 +struct xen_domctl_ext_vcpucontext {
    4.10 +    /* IN: VCPU that this call applies to. */
    4.11 +    uint32_t         vcpu;
    4.12 +    /*
    4.13 +     * SET: Size of struct (IN)
    4.14 +     * GET: Size of struct (OUT)
    4.15 +     */
    4.16 +    uint32_t         size;
    4.17 +#if defined(__i386__) || defined(__x86_64__)
    4.18 +    /* SYSCALL from 32-bit mode and SYSENTER callback information. */
    4.19 +    /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */
    4.20 +    uint64_aligned_t syscall32_callback_eip;
    4.21 +    uint64_aligned_t sysenter_callback_eip;
    4.22 +    uint16_t         syscall32_callback_cs;
    4.23 +    uint16_t         sysenter_callback_cs;
    4.24 +    uint8_t          syscall32_disables_events;
    4.25 +    uint8_t          sysenter_disables_events;
    4.26 +#endif
    4.27 +};
    4.28 +typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;
    4.29 +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t);
    4.30 +
    4.31 +
    4.32  struct xen_domctl {
    4.33      uint32_t cmd;
    4.34      uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
    4.35 @@ -549,6 +574,7 @@ struct xen_domctl {
    4.36          struct xen_domctl_memory_mapping    memory_mapping;
    4.37          struct xen_domctl_ioport_mapping    ioport_mapping;
    4.38          struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr;
    4.39 +        struct xen_domctl_ext_vcpucontext   ext_vcpucontext;
    4.40          uint8_t                             pad[128];
    4.41      } u;
    4.42  };