ia64/xen-unstable

changeset 19648:f0e2df69a8eb

x86 hvm: Allow cross-vendor migration

Intercept #UD and emulate SYSCALL/SYSENTER/SYSEXIT as necessary.

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 15:01:36 2009 +0100 (2009-05-26)
parents 1c627434605e
children fcc71d023408
files xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/vmcb.c xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/x86_emulate/x86_emulate.c xen/include/asm-x86/hvm/svm/vmcb.h xen/include/public/arch-x86/hvm/save.h
line diff
     1.1 --- a/xen/arch/x86/hvm/svm/svm.c	Tue May 26 11:52:31 2009 +0100
     1.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue May 26 15:01:36 2009 +0100
     1.3 @@ -37,6 +37,7 @@
     1.4  #include <asm/debugreg.h>
     1.5  #include <asm/msr.h>
     1.6  #include <asm/spinlock.h>
     1.7 +#include <asm/hvm/emulate.h>
     1.8  #include <asm/hvm/hvm.h>
     1.9  #include <asm/hvm/support.h>
    1.10  #include <asm/hvm/io.h>
    1.11 @@ -199,9 +200,9 @@ static int svm_vmcb_save(struct vcpu *v,
    1.12      c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
    1.13      c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
    1.14  
    1.15 -    c->sysenter_cs = vmcb->sysenter_cs;
    1.16 -    c->sysenter_esp = vmcb->sysenter_esp;
    1.17 -    c->sysenter_eip = vmcb->sysenter_eip;
    1.18 +    c->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs;
    1.19 +    c->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp;
    1.20 +    c->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip;
    1.21  
    1.22      c->pending_event = 0;
    1.23      c->error_code = 0;
    1.24 @@ -258,9 +259,9 @@ static int svm_vmcb_restore(struct vcpu 
    1.25      svm_update_guest_cr(v, 2);
    1.26      svm_update_guest_cr(v, 4);
    1.27  
    1.28 -    vmcb->sysenter_cs =  c->sysenter_cs;
    1.29 -    vmcb->sysenter_esp = c->sysenter_esp;
    1.30 -    vmcb->sysenter_eip = c->sysenter_eip;
    1.31 +    v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs;
    1.32 +    v->arch.hvm_svm.guest_sysenter_esp = c->sysenter_esp;
    1.33 +    v->arch.hvm_svm.guest_sysenter_eip = c->sysenter_eip;
    1.34  
    1.35      if ( paging_mode_hap(v->domain) )
    1.36      {
    1.37 @@ -286,7 +287,7 @@ static int svm_vmcb_restore(struct vcpu 
    1.38      return 0;
    1.39  }
    1.40  
    1.41 -        
    1.42 +
    1.43  static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
    1.44  {
    1.45      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    1.46 @@ -985,6 +986,16 @@ static int svm_msr_read_intercept(struct
    1.47          msr_content = v->arch.hvm_vcpu.guest_efer;
    1.48          break;
    1.49  
    1.50 +    case MSR_IA32_SYSENTER_CS:
    1.51 +        msr_content = v->arch.hvm_svm.guest_sysenter_cs;
    1.52 +        break;
    1.53 +    case MSR_IA32_SYSENTER_ESP:
    1.54 +        msr_content = v->arch.hvm_svm.guest_sysenter_esp;
    1.55 +        break;
    1.56 +    case MSR_IA32_SYSENTER_EIP:
    1.57 +        msr_content = v->arch.hvm_svm.guest_sysenter_eip;
    1.58 +        break;
    1.59 +
    1.60      case MSR_IA32_MC4_MISC: /* Threshold register */
    1.61      case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
    1.62          /*
    1.63 @@ -1067,6 +1078,16 @@ static int svm_msr_write_intercept(struc
    1.64      case MSR_K8_VM_HSAVE_PA:
    1.65          goto gpf;
    1.66  
    1.67 +    case MSR_IA32_SYSENTER_CS:
    1.68 +        v->arch.hvm_svm.guest_sysenter_cs = msr_content;
    1.69 +        break;
    1.70 +    case MSR_IA32_SYSENTER_ESP:
    1.71 +        v->arch.hvm_svm.guest_sysenter_esp = msr_content;
    1.72 +        break;
    1.73 +    case MSR_IA32_SYSENTER_EIP:
    1.74 +        v->arch.hvm_svm.guest_sysenter_eip = msr_content;
    1.75 +        break;
    1.76 +
    1.77      case MSR_IA32_DEBUGCTLMSR:
    1.78          vmcb->debugctlmsr = msr_content;
    1.79          if ( !msr_content || !cpu_has_svm_lbrv )
    1.80 @@ -1165,6 +1186,66 @@ static void svm_vmexit_do_rdtsc(struct c
    1.81      hvm_rdtsc_intercept(regs);
    1.82  }
    1.83  
    1.84 +static void svm_dump_regs(const char *from, struct cpu_user_regs *regs)
    1.85 +{
    1.86 +    printk("Dumping guest's current registers at %s...\n", from);
    1.87 +    printk("Size of regs = 0x%lx, address = %p\n",
    1.88 +           sizeof(struct cpu_user_regs), regs);
    1.89 +
    1.90 +    printk("r15 = 0x%016"PRIx64", r14 = 0x%016"PRIx64"\n",
    1.91 +           regs->r15, regs->r14);
    1.92 +    printk("r13 = 0x%016"PRIx64", r12 = 0x%016"PRIx64"\n",
    1.93 +           regs->r13, regs->r12);
    1.94 +    printk("rbp = 0x%016"PRIx64", rbx = 0x%016"PRIx64"\n",
    1.95 +           regs->rbp, regs->rbx);
    1.96 +    printk("r11 = 0x%016"PRIx64", r10 = 0x%016"PRIx64"\n",
    1.97 +           regs->r11, regs->r10);
    1.98 +    printk("r9  = 0x%016"PRIx64", r8  = 0x%016"PRIx64"\n",
    1.99 +           regs->r9, regs->r8);
   1.100 +    printk("rax = 0x%016"PRIx64", rcx = 0x%016"PRIx64"\n",
   1.101 +           regs->rax, regs->rcx);
   1.102 +    printk("rdx = 0x%016"PRIx64", rsi = 0x%016"PRIx64"\n",
   1.103 +           regs->rdx, regs->rsi);
   1.104 +    printk("rdi = 0x%016"PRIx64", rsp = 0x%016"PRIx64"\n",
   1.105 +           regs->rdi, regs->rsp);
   1.106 +    printk("error code = 0x%08"PRIx32", entry_vector = 0x%08"PRIx32"\n",
   1.107 +           regs->error_code, regs->entry_vector);
   1.108 +    printk("rip = 0x%016"PRIx64", rflags = 0x%016"PRIx64"\n",
   1.109 +           regs->rip, regs->rflags);
   1.110 +}
   1.111 +
   1.112 +static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs)
   1.113 +{
   1.114 +    struct hvm_emulate_ctxt ctxt;
   1.115 +    int rc;
   1.116 +
   1.117 +    hvm_emulate_prepare(&ctxt, regs);
   1.118 +
   1.119 +    rc = hvm_emulate_one(&ctxt);
   1.120 +
   1.121 +    switch ( rc )
   1.122 +    {
   1.123 +    case X86EMUL_UNHANDLEABLE:
   1.124 +        gdprintk(XENLOG_WARNING,
   1.125 +                 "instruction emulation failed @ %04x:%lx: "
   1.126 +                 "%02x %02x %02x %02x %02x %02x\n",
   1.127 +                 hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel,
   1.128 +                 ctxt.insn_buf_eip,
   1.129 +                 ctxt.insn_buf[0], ctxt.insn_buf[1],
   1.130 +                 ctxt.insn_buf[2], ctxt.insn_buf[3],
   1.131 +                 ctxt.insn_buf[4], ctxt.insn_buf[5]);
   1.132 +         return;
   1.133 +    case X86EMUL_EXCEPTION:
   1.134 +        if ( ctxt.exn_pending )
   1.135 +            hvm_inject_exception(ctxt.exn_vector, ctxt.exn_error_code, 0);
   1.136 +        break;
   1.137 +    default:
   1.138 +        break;
   1.139 +    }
   1.140 +
   1.141 +    hvm_emulate_writeback(&ctxt);
   1.142 +}
   1.143 +
   1.144  static void wbinvd_ipi(void *info)
   1.145  {
   1.146      wbinvd();
   1.147 @@ -1229,6 +1310,7 @@ asmlinkage void svm_vmexit_handler(struc
   1.148      if ( unlikely(exit_reason == VMEXIT_INVALID) )
   1.149      {
   1.150          svm_dump_vmcb(__func__, vmcb);
   1.151 +        svm_dump_regs(__func__, regs);
   1.152          goto exit_and_crash;
   1.153      }
   1.154  
   1.155 @@ -1305,6 +1387,10 @@ asmlinkage void svm_vmexit_handler(struc
   1.156          break;
   1.157      }
   1.158  
   1.159 +    case VMEXIT_EXCEPTION_UD:
   1.160 +        svm_vmexit_ud_intercept(regs);
   1.161 +        break;
   1.162 +
   1.163      /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
   1.164      case VMEXIT_EXCEPTION_MC:
   1.165          HVMTRACE_0D(MCE);
     2.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Tue May 26 11:52:31 2009 +0100
     2.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Tue May 26 15:01:36 2009 +0100
     2.3 @@ -150,9 +150,6 @@ static int construct_vmcb(struct vcpu *v
     2.4      svm_disable_intercept_for_msr(v, MSR_LSTAR);
     2.5      svm_disable_intercept_for_msr(v, MSR_STAR);
     2.6      svm_disable_intercept_for_msr(v, MSR_SYSCALL_MASK);
     2.7 -    svm_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
     2.8 -    svm_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
     2.9 -    svm_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
    2.10  
    2.11      vmcb->msrpm_base_pa = (u64)virt_to_maddr(arch_svm->msrpm);
    2.12      vmcb->iopm_base_pa  = (u64)virt_to_maddr(hvm_io_bitmap);
    2.13 @@ -222,7 +219,10 @@ static int construct_vmcb(struct vcpu *v
    2.14  
    2.15      paging_update_paging_modes(v);
    2.16  
    2.17 -    vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_no_device);
    2.18 +    vmcb->exception_intercepts =
    2.19 +        HVM_TRAP_MASK
    2.20 +        | (1U << TRAP_no_device)
    2.21 +        | (1U << TRAP_invalid_op);
    2.22  
    2.23      if ( paging_mode_hap(v->domain) )
    2.24      {
     3.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Tue May 26 11:52:31 2009 +0100
     3.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Tue May 26 15:01:36 2009 +0100
     3.3 @@ -668,7 +668,8 @@ static int construct_vmcs(struct vcpu *v
     3.4      __vmwrite(EXCEPTION_BITMAP,
     3.5                HVM_TRAP_MASK
     3.6                | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
     3.7 -              | (1U << TRAP_no_device));
     3.8 +              | (1U << TRAP_no_device)
     3.9 +              | (1U << TRAP_invalid_op));
    3.10  
    3.11      v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
    3.12      hvm_update_guest_cr(v, 0);
     4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Tue May 26 11:52:31 2009 +0100
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue May 26 15:01:36 2009 +0100
     4.3 @@ -37,6 +37,7 @@
     4.4  #include <asm/spinlock.h>
     4.5  #include <asm/paging.h>
     4.6  #include <asm/p2m.h>
     4.7 +#include <asm/hvm/emulate.h>
     4.8  #include <asm/hvm/hvm.h>
     4.9  #include <asm/hvm/support.h>
    4.10  #include <asm/hvm/vmx/vmx.h>
    4.11 @@ -2248,6 +2249,38 @@ asmlinkage void vmx_enter_realmode(struc
    4.12      regs->eflags |= (X86_EFLAGS_VM | X86_EFLAGS_IOPL);
    4.13  }
    4.14  
    4.15 +static void vmx_vmexit_ud_intercept(struct cpu_user_regs *regs)
    4.16 +{
    4.17 +     struct hvm_emulate_ctxt ctxt;
    4.18 +     int rc;
    4.19 + 
    4.20 +     hvm_emulate_prepare(&ctxt, regs);
    4.21 + 
    4.22 +     rc = hvm_emulate_one(&ctxt);
    4.23 + 
    4.24 +     switch ( rc )
    4.25 +     {
    4.26 +     case X86EMUL_UNHANDLEABLE:
    4.27 +         gdprintk(XENLOG_WARNING,
    4.28 +                  "instruction emulation failed @ %04x:%lx: "
    4.29 +                  "%02x %02x %02x %02x %02x %02x\n",
    4.30 +                  hvmemul_get_seg_reg(x86_seg_cs, &ctxt)->sel,
    4.31 +                  ctxt.insn_buf_eip,
    4.32 +                  ctxt.insn_buf[0], ctxt.insn_buf[1],
    4.33 +                  ctxt.insn_buf[2], ctxt.insn_buf[3],
    4.34 +                  ctxt.insn_buf[4], ctxt.insn_buf[5]);
    4.35 +          return;
    4.36 +     case X86EMUL_EXCEPTION:
    4.37 +         if ( ctxt.exn_pending )
    4.38 +             hvm_inject_exception(ctxt.exn_vector, ctxt.exn_error_code, 0);
    4.39 +         break;
    4.40 +     default:
    4.41 +         break;
    4.42 +     }
    4.43 + 
    4.44 +     hvm_emulate_writeback(&ctxt);
    4.45 +}
    4.46 +
    4.47  asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
    4.48  {
    4.49      unsigned int exit_reason, idtv_info;
    4.50 @@ -2434,6 +2467,9 @@ asmlinkage void vmx_vmexit_handler(struc
    4.51              HVMTRACE_0D(MCE);
    4.52              do_machine_check(regs);
    4.53              break;
    4.54 +        case TRAP_invalid_op:
    4.55 +            vmx_vmexit_ud_intercept(regs);
    4.56 +            break;
    4.57          default:
    4.58              goto exit_and_crash;
    4.59          }
     5.1 --- a/xen/arch/x86/x86_emulate/x86_emulate.c	Tue May 26 11:52:31 2009 +0100
     5.2 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c	Tue May 26 15:01:36 2009 +0100
     5.3 @@ -172,7 +172,7 @@ static uint8_t opcode_table[256] = {
     5.4  
     5.5  static uint8_t twobyte_table[256] = {
     5.6      /* 0x00 - 0x07 */
     5.7 -    SrcMem16|ModRM, ImplicitOps|ModRM, 0, 0, 0, 0, ImplicitOps, 0,
     5.8 +    SrcMem16|ModRM, ImplicitOps|ModRM, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
     5.9      /* 0x08 - 0x0F */
    5.10      ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
    5.11      /* 0x10 - 0x17 */
    5.12 @@ -186,7 +186,8 @@ static uint8_t twobyte_table[256] = {
    5.13      /* 0x28 - 0x2F */
    5.14      0, 0, 0, 0, 0, 0, 0, 0,
    5.15      /* 0x30 - 0x37 */
    5.16 -    ImplicitOps, ImplicitOps, ImplicitOps, 0, 0, 0, 0, 0,
    5.17 +    ImplicitOps, ImplicitOps, ImplicitOps, 0,
    5.18 +    ImplicitOps, ImplicitOps, 0, 0,
    5.19      /* 0x38 - 0x3F */
    5.20      0, 0, 0, 0, 0, 0, 0, 0,
    5.21      /* 0x40 - 0x47 */
    5.22 @@ -280,7 +281,17 @@ struct operand {
    5.23  };
    5.24  
    5.25  /* MSRs. */
    5.26 -#define MSR_TSC   0x10
    5.27 +#define MSR_TSC          0x00000010
    5.28 +#define MSR_SYSENTER_CS  0x00000174
    5.29 +#define MSR_SYSENTER_ESP 0x00000175
    5.30 +#define MSR_SYSENTER_EIP 0x00000176
    5.31 +#define MSR_EFER         0xc0000080
    5.32 +#define EFER_SCE         (1u<<0)
    5.33 +#define EFER_LMA         (1u<<10)
    5.34 +#define MSR_STAR         0xc0000081
    5.35 +#define MSR_LSTAR        0xc0000082
    5.36 +#define MSR_CSTAR        0xc0000083
    5.37 +#define MSR_FMASK        0xc0000084
    5.38  
    5.39  /* Control register flags. */
    5.40  #define CR0_PE    (1<<0)
    5.41 @@ -942,6 +953,20 @@ in_protmode(
    5.42  }
    5.43  
    5.44  static int
    5.45 +in_longmode(
    5.46 +    struct x86_emulate_ctxt *ctxt,
    5.47 +    struct x86_emulate_ops *ops)
    5.48 +{
    5.49 +    uint64_t efer;
    5.50 +
    5.51 +    if (ops->read_msr == NULL)
    5.52 +        return -1;
    5.53 +
    5.54 +    ops->read_msr(MSR_EFER, &efer, ctxt);
    5.55 +    return !!(efer & EFER_LMA);
    5.56 +}
    5.57 +
    5.58 +static int
    5.59  realmode_load_seg(
    5.60      enum x86_segment seg,
    5.61      uint16_t sel,
    5.62 @@ -3544,6 +3569,71 @@ x86_emulate(
    5.63          break;
    5.64      }
    5.65  
    5.66 +    case 0x05: /* syscall */ {
    5.67 +        uint64_t msr_content;
    5.68 +        struct segment_register cs = { 0 }, ss = { 0 };
    5.69 +        int rc;
    5.70 +
    5.71 +        fail_if(ops->read_msr == NULL);
    5.72 +        fail_if(ops->read_segment == NULL);
    5.73 +        fail_if(ops->write_segment == NULL);
    5.74 +
    5.75 +        generate_exception_if(in_realmode(ctxt, ops), EXC_UD, 0);
    5.76 +        generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, 0);
    5.77 +        generate_exception_if(lock_prefix, EXC_UD, 0);
    5.78 +
    5.79 +        /* Inject #UD if syscall/sysret are disabled. */
    5.80 +        rc = ops->read_msr(MSR_EFER, &msr_content, ctxt);
    5.81 +        fail_if(rc != 0);
    5.82 +        generate_exception_if((msr_content & EFER_SCE) == 0, EXC_UD, 0);
    5.83 +
    5.84 +        rc = ops->read_msr(MSR_STAR, &msr_content, ctxt);
    5.85 +        fail_if(rc != 0);
    5.86 +
    5.87 +        msr_content >>= 32;
    5.88 +        cs.sel = (uint16_t)(msr_content & 0xfffc);
    5.89 +        ss.sel = (uint16_t)(msr_content + 8);
    5.90 +
    5.91 +        cs.base = ss.base = 0; /* flat segment */
    5.92 +        cs.limit = ss.limit = ~0u;  /* 4GB limit */
    5.93 +        cs.attr.bytes = 0xc9b; /* G+DB+P+S+Code */
    5.94 +        ss.attr.bytes = 0xc93; /* G+DB+P+S+Data */
    5.95 +
    5.96 +        if ( in_longmode(ctxt, ops) )
    5.97 +        {
    5.98 +            cs.attr.fields.db = 0;
    5.99 +            cs.attr.fields.l = 1;
   5.100 +
   5.101 +            _regs.rcx = _regs.rip;
   5.102 +            _regs.r11 = _regs.eflags & ~EFLG_RF;
   5.103 +
   5.104 +            rc = ops->read_msr(mode_64bit() ? MSR_LSTAR : MSR_CSTAR,
   5.105 +                               &msr_content, ctxt);
   5.106 +            fail_if(rc != 0);
   5.107 +
   5.108 +            _regs.rip = msr_content;
   5.109 +
   5.110 +            rc = ops->read_msr(MSR_FMASK, &msr_content, ctxt);
   5.111 +            fail_if(rc != 0);
   5.112 +            _regs.eflags &= ~(msr_content | EFLG_RF);
   5.113 +        }
   5.114 +        else
   5.115 +        {
   5.116 +            rc = ops->read_msr(MSR_STAR, &msr_content, ctxt);
   5.117 +            fail_if(rc != 0);
   5.118 +
   5.119 +            _regs.rcx = _regs.rip;
   5.120 +            _regs.eip = (uint32_t)msr_content;
   5.121 +            _regs.eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
   5.122 +        }
   5.123 +
   5.124 +        if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) ||
   5.125 +             (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) )
   5.126 +            goto done;
   5.127 +
   5.128 +        break;
   5.129 +    }
   5.130 +
   5.131      case 0x06: /* clts */
   5.132          generate_exception_if(!mode_ring0(), EXC_GP, 0);
   5.133          fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL));
   5.134 @@ -3645,6 +3735,122 @@ x86_emulate(
   5.135              dst.type = OP_NONE;
   5.136          break;
   5.137  
   5.138 +    case 0x34: /* sysenter */ {
   5.139 +        uint64_t msr_content;
   5.140 +        struct segment_register cs, ss;
   5.141 +        int rc;
   5.142 +
   5.143 +        fail_if(ops->read_msr == NULL);
   5.144 +        fail_if(ops->read_segment == NULL);
   5.145 +        fail_if(ops->write_segment == NULL);
   5.146 +
   5.147 +        generate_exception_if(mode_ring0(), EXC_GP, 0);
   5.148 +        generate_exception_if(in_realmode(ctxt, ops), EXC_GP, 0);
   5.149 +        generate_exception_if(!in_protmode(ctxt, ops), EXC_GP, 0);
   5.150 +        generate_exception_if(lock_prefix, EXC_UD, 0);
   5.151 +
   5.152 +        rc = ops->read_msr(MSR_SYSENTER_CS, &msr_content, ctxt);
   5.153 +        fail_if(rc != 0);
   5.154 +
   5.155 +        if ( mode_64bit() )
   5.156 +            generate_exception_if(msr_content == 0, EXC_GP, 0);
   5.157 +        else
   5.158 +            generate_exception_if((msr_content & 0xfffc) == 0, EXC_GP, 0);
   5.159 +
   5.160 +        _regs.eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
   5.161 +
   5.162 +        ops->read_segment(x86_seg_cs, &cs, ctxt);
   5.163 +        cs.sel = (uint16_t)msr_content & ~3; /* SELECTOR_RPL_MASK */
   5.164 +        cs.base = 0;   /* flat segment */
   5.165 +        cs.limit = ~0u;  /* 4GB limit */
   5.166 +        cs.attr.bytes = 0xc9b; /* G+DB+P+S+Code */
   5.167 +
   5.168 +        ss.sel = cs.sel + 8;
   5.169 +        ss.base = 0;   /* flat segment */
   5.170 +        ss.limit = ~0u;  /* 4GB limit */
   5.171 +        ss.attr.bytes = 0xc93; /* G+DB+P+S+Data */
   5.172 +
   5.173 +        if ( in_longmode(ctxt, ops) )
   5.174 +        {
   5.175 +            cs.attr.fields.db = 0;
   5.176 +            cs.attr.fields.l = 1;
   5.177 +        }
   5.178 +
   5.179 +        rc = ops->write_segment(x86_seg_cs, &cs, ctxt);
   5.180 +        fail_if(rc != 0);
   5.181 +        rc = ops->write_segment(x86_seg_ss, &ss, ctxt);
   5.182 +        fail_if(rc != 0);
   5.183 +
   5.184 +        rc = ops->read_msr(MSR_SYSENTER_EIP, &msr_content, ctxt);
   5.185 +        fail_if(rc != 0);
   5.186 +        _regs.rip = msr_content;
   5.187 +
   5.188 +        rc = ops->read_msr(MSR_SYSENTER_ESP, &msr_content, ctxt);
   5.189 +        fail_if(rc != 0);
   5.190 +        _regs.rsp = msr_content;
   5.191 +
   5.192 +        break;
   5.193 +    }
   5.194 +
   5.195 +    case 0x35: /* sysexit */ {
   5.196 +        uint64_t msr_content;
   5.197 +        struct segment_register cs, ss;
   5.198 +        int user64 = !!(rex_prefix & 8); /* REX.W */
   5.199 +        int rc;
   5.200 +
   5.201 +        fail_if(ops->read_msr == NULL);
   5.202 +        fail_if(ops->read_segment == NULL);
   5.203 +        fail_if(ops->write_segment == NULL);
   5.204 +
   5.205 +        generate_exception_if(!mode_ring0(), EXC_GP, 0);
   5.206 +        generate_exception_if(in_realmode(ctxt, ops), EXC_GP, 0);
   5.207 +        generate_exception_if(!in_protmode(ctxt, ops), EXC_GP, 0);
   5.208 +        generate_exception_if(lock_prefix, EXC_UD, 0);
   5.209 +
   5.210 +        rc = ops->read_msr(MSR_SYSENTER_CS, &msr_content, ctxt);
   5.211 +        fail_if(rc != 0);
   5.212 +        rc = ops->read_segment(x86_seg_cs, &cs, ctxt);
   5.213 +        fail_if(rc != 0);
   5.214 +
   5.215 +        if ( user64 )
   5.216 +        {
   5.217 +            cs.sel = (uint16_t)(msr_content + 32);
   5.218 +            ss.sel = (cs.sel + 8);
   5.219 +            generate_exception_if(msr_content == 0, EXC_GP, 0);
   5.220 +        }
   5.221 +        else
   5.222 +        {
   5.223 +            cs.sel = (uint16_t)(msr_content + 16);
   5.224 +            ss.sel = (uint16_t)(msr_content + 24);
   5.225 +            generate_exception_if((msr_content & 0xfffc) == 0, EXC_GP, 0);
   5.226 +        }
   5.227 +
   5.228 +        cs.sel |= 0x3;   /* SELECTOR_RPL_MASK */
   5.229 +        cs.base = 0;   /* flat segment */
   5.230 +        cs.limit = ~0u;  /* 4GB limit */
   5.231 +        cs.attr.bytes = 0xcfb; /* G+DB+P+DPL3+S+Code */
   5.232 +
   5.233 +        ss.sel |= 0x3;   /* SELECTOR_RPL_MASK */
   5.234 +        ss.base = 0;   /* flat segment */
   5.235 +        ss.limit = ~0u;  /* 4GB limit */
   5.236 +        ss.attr.bytes = 0xcf3; /* G+DB+P+DPL3+S+Data */
   5.237 +
   5.238 +        if ( user64 )
   5.239 +        {
   5.240 +            cs.attr.fields.db = 0;
   5.241 +            cs.attr.fields.l = 1;
   5.242 +        }
   5.243 +
   5.244 +        rc = ops->write_segment(x86_seg_cs, &cs, ctxt);
   5.245 +        fail_if(rc != 0);
   5.246 +        rc = ops->write_segment(x86_seg_ss, &ss, ctxt);
   5.247 +        fail_if(rc != 0);
   5.248 +
   5.249 +        _regs.rip = _regs.rdx;
   5.250 +        _regs.rsp = _regs.rcx;
   5.251 +        break;
   5.252 +    }
   5.253 +
   5.254      case 0x6f: /* movq mm/m64,mm */ {
   5.255          uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 };
   5.256          struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
     6.1 --- a/xen/include/asm-x86/hvm/svm/vmcb.h	Tue May 26 11:52:31 2009 +0100
     6.2 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Tue May 26 15:01:36 2009 +0100
     6.3 @@ -459,6 +459,15 @@ struct arch_svm_struct {
     6.4      unsigned long *msrpm;
     6.5      int    launch_core;
     6.6      bool_t vmcb_in_sync;    /* VMCB sync'ed with VMSAVE? */
     6.7 +
     6.8 +    /* Upper four bytes are undefined in the VMCB, therefore we can't
     6.9 +     * use the fields in the VMCB. Write a 64bit value and then read a 64bit
    6.10 +     * value is fine unless there's a VMRUN/VMEXIT in between which clears
    6.11 +     * the upper four bytes.
    6.12 +     */
    6.13 +    uint64_t guest_sysenter_cs;
    6.14 +    uint64_t guest_sysenter_esp;
    6.15 +    uint64_t guest_sysenter_eip;
    6.16  };
    6.17  
    6.18  struct vmcb_struct *alloc_vmcb(void);
     7.1 --- a/xen/include/public/arch-x86/hvm/save.h	Tue May 26 11:52:31 2009 +0100
     7.2 +++ b/xen/include/public/arch-x86/hvm/save.h	Tue May 26 15:01:36 2009 +0100
     7.3 @@ -123,9 +123,7 @@ struct hvm_hw_cpu {
     7.4      uint32_t tr_arbytes;
     7.5      uint32_t ldtr_arbytes;
     7.6  
     7.7 -    uint32_t sysenter_cs;
     7.8 -    uint32_t padding0;
     7.9 -
    7.10 +    uint64_t sysenter_cs;
    7.11      uint64_t sysenter_esp;
    7.12      uint64_t sysenter_eip;
    7.13