ia64/xen-unstable

changeset 16989:92734271810a

vmx realmode: Emulate protected-mode transition while CS and SS have
bad selector values (bottom two bits non-zero).

Allows opensuse 10.3 install CD to boot. Unfortunately SUSE Linux 10.1
install CD still fails to work...

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Feb 05 15:45:10 2008 +0000 (2008-02-05)
parents df6b8bed2845
children bf4a24c172d2
files xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/realmode.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/hvm/vmx/x86_32/exits.S xen/arch/x86/hvm/vmx/x86_64/exits.S xen/arch/x86/mm/shadow/common.c xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_emulate.c xen/include/asm-x86/hvm/vmx/vmcs.h xen/include/asm-x86/x86_emulate.h
line diff
     1.1 --- a/xen/arch/x86/hvm/svm/svm.c	Tue Feb 05 10:40:10 2008 +0000
     1.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue Feb 05 15:45:10 2008 +0000
     1.3 @@ -578,8 +578,8 @@ static unsigned long svm_get_segment_bas
     1.4      case x86_seg_gdtr: return vmcb->gdtr.base;
     1.5      case x86_seg_idtr: return vmcb->idtr.base;
     1.6      case x86_seg_ldtr: svm_sync_vmcb(v); return vmcb->ldtr.base;
     1.7 +    default: BUG();
     1.8      }
     1.9 -    BUG();
    1.10      return 0;
    1.11  }
    1.12  
     2.1 --- a/xen/arch/x86/hvm/vmx/realmode.c	Tue Feb 05 10:40:10 2008 +0000
     2.2 +++ b/xen/arch/x86/hvm/vmx/realmode.c	Tue Feb 05 15:45:10 2008 +0000
     2.3 @@ -118,6 +118,18 @@ static void realmode_deliver_exception(
     2.4      }
     2.5  }
     2.6  
     2.7 +static uint32_t virtual_to_linear(
     2.8 +    enum x86_segment seg,
     2.9 +    uint32_t offset,
    2.10 +    struct realmode_emulate_ctxt *rm_ctxt)
    2.11 +{
    2.12 +    uint32_t addr = offset;
    2.13 +    if ( seg == x86_seg_none )
    2.14 +        return addr;
    2.15 +    ASSERT(is_x86_user_segment(seg));
    2.16 +    return addr + rm_ctxt->seg_reg[seg].base;
    2.17 +}
    2.18 +
    2.19  static int
    2.20  realmode_read(
    2.21      enum x86_segment seg,
    2.22 @@ -127,14 +139,17 @@ realmode_read(
    2.23      enum hvm_access_type access_type,
    2.24      struct realmode_emulate_ctxt *rm_ctxt)
    2.25  {
    2.26 -    uint32_t addr = rm_ctxt->seg_reg[seg].base + offset;
    2.27 +    uint32_t addr = virtual_to_linear(seg, offset, rm_ctxt);
    2.28  
    2.29      *val = 0;
    2.30  
    2.31 -    if ( hvm_copy_from_guest_phys(val, addr, bytes) )
    2.32 +    if ( hvm_copy_from_guest_virt_nofault(val, addr, bytes) )
    2.33      {
    2.34          struct vcpu *curr = current;
    2.35  
    2.36 +        if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
    2.37 +            return X86EMUL_UNHANDLEABLE;
    2.38 +
    2.39          if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
    2.40              return X86EMUL_UNHANDLEABLE;
    2.41  
    2.42 @@ -202,12 +217,15 @@ realmode_emulate_write(
    2.43  {
    2.44      struct realmode_emulate_ctxt *rm_ctxt =
    2.45          container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
    2.46 -    uint32_t addr = rm_ctxt->seg_reg[seg].base + offset;
    2.47 +    uint32_t addr = virtual_to_linear(seg, offset, rm_ctxt);
    2.48  
    2.49 -    if ( hvm_copy_to_guest_phys(addr, &val, bytes) )
    2.50 +    if ( hvm_copy_to_guest_virt_nofault(addr, &val, bytes) )
    2.51      {
    2.52          struct vcpu *curr = current;
    2.53  
    2.54 +        if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
    2.55 +            return X86EMUL_UNHANDLEABLE;
    2.56 +
    2.57          if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
    2.58              return X86EMUL_UNHANDLEABLE;
    2.59  
    2.60 @@ -244,7 +262,10 @@ realmode_rep_ins(
    2.61      struct realmode_emulate_ctxt *rm_ctxt =
    2.62          container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
    2.63      struct vcpu *curr = current;
    2.64 -    uint32_t paddr = rm_ctxt->seg_reg[dst_seg].base + dst_offset;
    2.65 +    uint32_t paddr = virtual_to_linear(dst_seg, dst_offset, rm_ctxt);
    2.66 +
    2.67 +    if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
    2.68 +        return X86EMUL_UNHANDLEABLE;
    2.69  
    2.70      if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
    2.71          return X86EMUL_UNHANDLEABLE;
    2.72 @@ -277,7 +298,10 @@ realmode_rep_outs(
    2.73      struct realmode_emulate_ctxt *rm_ctxt =
    2.74          container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
    2.75      struct vcpu *curr = current;
    2.76 -    uint32_t paddr = rm_ctxt->seg_reg[src_seg].base + src_offset;
    2.77 +    uint32_t paddr = virtual_to_linear(src_seg, src_offset, rm_ctxt);
    2.78 +
    2.79 +    if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
    2.80 +        return X86EMUL_UNHANDLEABLE;
    2.81  
    2.82      if ( curr->arch.hvm_vmx.real_mode_io_in_progress )
    2.83          return X86EMUL_UNHANDLEABLE;
    2.84 @@ -310,9 +334,29 @@ realmode_write_segment(
    2.85  {
    2.86      struct realmode_emulate_ctxt *rm_ctxt =
    2.87          container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
    2.88 -    memcpy(&rm_ctxt->seg_reg[seg], reg, sizeof(struct segment_register));
    2.89 +    struct vcpu *curr = current;
    2.90 +
    2.91 +    if ( seg == x86_seg_cs )
    2.92 +    {
    2.93 +        if ( reg->attr.fields.dpl != 0 )
    2.94 +            return X86EMUL_UNHANDLEABLE;
    2.95 +        curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_CS;
    2.96 +        if ( reg->sel & 3 )
    2.97 +            curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_CS;
    2.98 +    }
    2.99 +
   2.100      if ( seg == x86_seg_ss )
   2.101 +    {
   2.102 +        if ( reg->attr.fields.dpl != 0 )
   2.103 +            return X86EMUL_UNHANDLEABLE;
   2.104 +        curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_SS;
   2.105 +        if ( reg->sel & 3 )
   2.106 +            curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_SS;
   2.107          rm_ctxt->flags.mov_ss = 1;
   2.108 +    }
   2.109 +
   2.110 +    memcpy(&rm_ctxt->seg_reg[seg], reg, sizeof(struct segment_register));
   2.111 +
   2.112      return X86EMUL_OKAY;
   2.113  }
   2.114  
   2.115 @@ -336,7 +380,7 @@ realmode_read_io(
   2.116  
   2.117      if ( !curr->arch.hvm_vmx.real_mode_io_completed )
   2.118          return X86EMUL_RETRY;
   2.119 -    
   2.120 +
   2.121      *val = curr->arch.hvm_vmx.real_mode_io_data;
   2.122      curr->arch.hvm_vmx.real_mode_io_completed = 0;
   2.123  
   2.124 @@ -506,11 +550,19 @@ static int realmode_hlt(
   2.125  
   2.126  static int realmode_inject_hw_exception(
   2.127      uint8_t vector,
   2.128 +    uint16_t error_code,
   2.129      struct x86_emulate_ctxt *ctxt)
   2.130  {
   2.131      struct realmode_emulate_ctxt *rm_ctxt =
   2.132          container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
   2.133  
   2.134 +    /* We don't emulate protected-mode exception delivery. */
   2.135 +    if ( current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
   2.136 +        return X86EMUL_UNHANDLEABLE;
   2.137 +
   2.138 +    if ( error_code != 0 )
   2.139 +        return X86EMUL_UNHANDLEABLE;
   2.140 +
   2.141      rm_ctxt->exn_vector = vector;
   2.142      rm_ctxt->exn_insn_len = 0;
   2.143  
   2.144 @@ -525,6 +577,10 @@ static int realmode_inject_sw_interrupt(
   2.145      struct realmode_emulate_ctxt *rm_ctxt =
   2.146          container_of(ctxt, struct realmode_emulate_ctxt, ctxt);
   2.147  
   2.148 +    /* We don't emulate protected-mode exception delivery. */
   2.149 +    if ( current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
   2.150 +        return X86EMUL_UNHANDLEABLE;
   2.151 +
   2.152      rm_ctxt->exn_vector = vector;
   2.153      rm_ctxt->exn_insn_len = insn_len;
   2.154  
   2.155 @@ -568,12 +624,22 @@ static void realmode_emulate_one(struct 
   2.156      struct vcpu *curr = current;
   2.157      u32 new_intr_shadow;
   2.158      int rc, io_completed;
   2.159 +    unsigned long addr;
   2.160  
   2.161 -    rm_ctxt->insn_buf_eip = regs->eip;
   2.162 -    (void)hvm_copy_from_guest_phys(
   2.163 -        rm_ctxt->insn_buf,
   2.164 -        (uint32_t)(rm_ctxt->seg_reg[x86_seg_cs].base + regs->eip),
   2.165 -        sizeof(rm_ctxt->insn_buf));
   2.166 +    rm_ctxt->ctxt.addr_size =
   2.167 +        rm_ctxt->seg_reg[x86_seg_cs].attr.fields.db ? 32 : 16;
   2.168 +    rm_ctxt->ctxt.sp_size =
   2.169 +        rm_ctxt->seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16;
   2.170 +
   2.171 +    rm_ctxt->insn_buf_eip = (uint32_t)regs->eip;
   2.172 +    addr = virtual_to_linear(x86_seg_cs, regs->eip, rm_ctxt);
   2.173 +    if ( hvm_fetch_from_guest_virt_nofault(rm_ctxt->insn_buf, addr,
   2.174 +                                           sizeof(rm_ctxt->insn_buf))
   2.175 +         != HVMCOPY_okay )
   2.176 +    {
   2.177 +        gdprintk(XENLOG_ERR, "Failed to pre-fetch instruction bytes.\n");
   2.178 +        goto fail;
   2.179 +    }
   2.180  
   2.181      rm_ctxt->flag_word = 0;
   2.182  
   2.183 @@ -670,39 +736,35 @@ void vmx_realmode(struct cpu_user_regs *
   2.184      for ( i = 0; i < 10; i++ )
   2.185          hvm_get_segment_register(curr, i, &rm_ctxt.seg_reg[i]);
   2.186  
   2.187 -    rm_ctxt.ctxt.addr_size =
   2.188 -        rm_ctxt.seg_reg[x86_seg_cs].attr.fields.db ? 32 : 16;
   2.189 -    rm_ctxt.ctxt.sp_size =
   2.190 -        rm_ctxt.seg_reg[x86_seg_ss].attr.fields.db ? 32 : 16;
   2.191 -
   2.192      rm_ctxt.intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
   2.193  
   2.194      if ( curr->arch.hvm_vmx.real_mode_io_in_progress ||
   2.195           curr->arch.hvm_vmx.real_mode_io_completed )
   2.196          realmode_emulate_one(&rm_ctxt);
   2.197  
   2.198 -    if ( intr_info & INTR_INFO_VALID_MASK )
   2.199 +    /* Only deliver interrupts into emulated real mode. */
   2.200 +    if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) &&
   2.201 +         (intr_info & INTR_INFO_VALID_MASK) )
   2.202      {
   2.203          realmode_deliver_exception((uint8_t)intr_info, 0, &rm_ctxt);
   2.204          __vmwrite(VM_ENTRY_INTR_INFO, 0);
   2.205      }
   2.206  
   2.207 -    while ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) &&
   2.208 +    while ( curr->arch.hvm_vmx.vmxemul &&
   2.209              !softirq_pending(smp_processor_id()) &&
   2.210 -            !hvm_local_events_need_delivery(curr) &&
   2.211 -            !curr->arch.hvm_vmx.real_mode_io_in_progress )
   2.212 +            !curr->arch.hvm_vmx.real_mode_io_in_progress &&
   2.213 +            /* Check for pending interrupts only in proper real mode. */
   2.214 +            ((curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ||
   2.215 +             !hvm_local_events_need_delivery(curr)) )
   2.216          realmode_emulate_one(&rm_ctxt);
   2.217  
   2.218 -    /*
   2.219 -     * Cannot enter protected mode with bogus selector RPLs and DPLs. Hence we
   2.220 -     * fix up as best we can, even though this deviates from native execution
   2.221 -     */
   2.222 -    if  ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
   2.223 +    if ( !curr->arch.hvm_vmx.vmxemul )
   2.224      {
   2.225 -        /* CS.RPL == SS.RPL == SS.DPL == 0. */
   2.226 -        rm_ctxt.seg_reg[x86_seg_cs].sel &= ~3;
   2.227 -        rm_ctxt.seg_reg[x86_seg_ss].sel &= ~3;
   2.228 -        /* DS,ES,FS,GS: The most uninvasive trick is to set DPL == RPL. */
   2.229 +        /*
   2.230 +         * Cannot enter protected mode with bogus selector RPLs and DPLs.
   2.231 +         * At this point CS.RPL == SS.RPL == CS.DPL == SS.DPL == 0. For
   2.232 +         * DS, ES, FS and GS the most uninvasive trick is to set DPL == RPL.
   2.233 +         */
   2.234          rm_ctxt.seg_reg[x86_seg_ds].attr.fields.dpl =
   2.235              rm_ctxt.seg_reg[x86_seg_ds].sel & 3;
   2.236          rm_ctxt.seg_reg[x86_seg_es].attr.fields.dpl =
     3.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Tue Feb 05 10:40:10 2008 +0000
     3.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue Feb 05 15:45:10 2008 +0000
     3.3 @@ -1061,6 +1061,10 @@ static void vmx_update_guest_cr(struct v
     3.4                  vmx_fpu_enter(v);
     3.5          }
     3.6  
     3.7 +        v->arch.hvm_vmx.vmxemul &= ~VMXEMUL_REALMODE;
     3.8 +        if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
     3.9 +            v->arch.hvm_vmx.vmxemul |= VMXEMUL_REALMODE;
    3.10 +
    3.11          v->arch.hvm_vcpu.hw_cr[0] =
    3.12              v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
    3.13          __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
     4.1 --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S	Tue Feb 05 10:40:10 2008 +0000
     4.2 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S	Tue Feb 05 15:45:10 2008 +0000
     4.3 @@ -116,8 +116,8 @@ ENTRY(vmx_asm_do_vmentry)
     4.4          VMWRITE(UREGS_eflags)
     4.5  
     4.6  #ifndef VMXASSIST
     4.7 -        testb $X86_CR0_PE,VCPU_hvm_guest_cr0(%ebx)
     4.8 -        jz   vmx_goto_realmode
     4.9 +        testb $0xff,VCPU_vmx_emul(%ebx)
    4.10 +        jnz  vmx_goto_realmode
    4.11  #endif
    4.12  
    4.13          cmpb $0,VCPU_vmx_launched(%ebx)
     5.1 --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S	Tue Feb 05 10:40:10 2008 +0000
     5.2 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S	Tue Feb 05 15:45:10 2008 +0000
     5.3 @@ -135,8 +135,8 @@ ENTRY(vmx_asm_do_vmentry)
     5.4          VMWRITE(UREGS_eflags)
     5.5  
     5.6  #ifndef VMXASSIST
     5.7 -        testb $X86_CR0_PE,VCPU_hvm_guest_cr0(%rbx)
     5.8 -        jz   vmx_goto_realmode
     5.9 +        testb $0xff,VCPU_vmx_emul(%rbx)
    5.10 +        jnz  vmx_goto_realmode
    5.11  #endif
    5.12  
    5.13          cmpb $0,VCPU_vmx_launched(%rbx)
     6.1 --- a/xen/arch/x86/mm/shadow/common.c	Tue Feb 05 10:40:10 2008 +0000
     6.2 +++ b/xen/arch/x86/mm/shadow/common.c	Tue Feb 05 15:45:10 2008 +0000
     6.3 @@ -176,6 +176,8 @@ hvm_emulate_read(enum x86_segment seg,
     6.4                   unsigned int bytes,
     6.5                   struct x86_emulate_ctxt *ctxt)
     6.6  {
     6.7 +    if ( !is_x86_user_segment(seg) )
     6.8 +        return X86EMUL_UNHANDLEABLE;
     6.9      return hvm_read(seg, offset, val, bytes, hvm_access_read,
    6.10                      container_of(ctxt, struct sh_emulate_ctxt, ctxt));
    6.11  }
    6.12 @@ -191,6 +193,8 @@ hvm_emulate_insn_fetch(enum x86_segment 
    6.13          container_of(ctxt, struct sh_emulate_ctxt, ctxt);
    6.14      unsigned int insn_off = offset - sh_ctxt->insn_buf_eip;
    6.15  
    6.16 +    ASSERT(seg == x86_seg_cs);
    6.17 +
    6.18      /* Fall back if requested bytes are not in the prefetch cache. */
    6.19      if ( unlikely((insn_off + bytes) > sh_ctxt->insn_buf_bytes) )
    6.20          return hvm_read(seg, offset, val, bytes,
    6.21 @@ -215,6 +219,9 @@ hvm_emulate_write(enum x86_segment seg,
    6.22      unsigned long addr;
    6.23      int rc;
    6.24  
    6.25 +    if ( !is_x86_user_segment(seg) )
    6.26 +        return X86EMUL_UNHANDLEABLE;
    6.27 +
    6.28      /* How many emulations could we save if we unshadowed on stack writes? */
    6.29      if ( seg == x86_seg_ss )
    6.30          perfc_incr(shadow_fault_emulate_stack);
    6.31 @@ -242,6 +249,9 @@ hvm_emulate_cmpxchg(enum x86_segment seg
    6.32      unsigned long addr;
    6.33      int rc;
    6.34  
    6.35 +    if ( !is_x86_user_segment(seg) )
    6.36 +        return X86EMUL_UNHANDLEABLE;
    6.37 +
    6.38      rc = hvm_translate_linear_addr(
    6.39          seg, offset, bytes, hvm_access_write, sh_ctxt, &addr);
    6.40      if ( rc )
    6.41 @@ -266,6 +276,9 @@ hvm_emulate_cmpxchg8b(enum x86_segment s
    6.42      unsigned long addr;
    6.43      int rc;
    6.44  
    6.45 +    if ( !is_x86_user_segment(seg) )
    6.46 +        return X86EMUL_UNHANDLEABLE;
    6.47 +
    6.48      rc = hvm_translate_linear_addr(
    6.49          seg, offset, 8, hvm_access_write, sh_ctxt, &addr);
    6.50      if ( rc )
    6.51 @@ -292,6 +305,9 @@ pv_emulate_read(enum x86_segment seg,
    6.52  {
    6.53      unsigned int rc;
    6.54  
    6.55 +    if ( !is_x86_user_segment(seg) )
    6.56 +        return X86EMUL_UNHANDLEABLE;
    6.57 +
    6.58      *val = 0;
    6.59      if ( (rc = copy_from_user((void *)val, (void *)offset, bytes)) != 0 )
    6.60      {
    6.61 @@ -312,6 +328,8 @@ pv_emulate_write(enum x86_segment seg,
    6.62      struct sh_emulate_ctxt *sh_ctxt =
    6.63          container_of(ctxt, struct sh_emulate_ctxt, ctxt);
    6.64      struct vcpu *v = current;
    6.65 +    if ( !is_x86_user_segment(seg) )
    6.66 +        return X86EMUL_UNHANDLEABLE;
    6.67      return v->arch.paging.mode->shadow.x86_emulate_write(
    6.68          v, offset, &val, bytes, sh_ctxt);
    6.69  }
    6.70 @@ -327,6 +345,8 @@ pv_emulate_cmpxchg(enum x86_segment seg,
    6.71      struct sh_emulate_ctxt *sh_ctxt =
    6.72          container_of(ctxt, struct sh_emulate_ctxt, ctxt);
    6.73      struct vcpu *v = current;
    6.74 +    if ( !is_x86_user_segment(seg) )
    6.75 +        return X86EMUL_UNHANDLEABLE;
    6.76      return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
    6.77          v, offset, old, new, bytes, sh_ctxt);
    6.78  }
    6.79 @@ -343,6 +363,8 @@ pv_emulate_cmpxchg8b(enum x86_segment se
    6.80      struct sh_emulate_ctxt *sh_ctxt =
    6.81          container_of(ctxt, struct sh_emulate_ctxt, ctxt);
    6.82      struct vcpu *v = current;
    6.83 +    if ( !is_x86_user_segment(seg) )
    6.84 +        return X86EMUL_UNHANDLEABLE;
    6.85      return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
    6.86          v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
    6.87  }
     7.1 --- a/xen/arch/x86/x86_32/asm-offsets.c	Tue Feb 05 10:40:10 2008 +0000
     7.2 +++ b/xen/arch/x86/x86_32/asm-offsets.c	Tue Feb 05 15:45:10 2008 +0000
     7.3 @@ -84,7 +84,7 @@ void __dummy__(void)
     7.4      BLANK();
     7.5  
     7.6      OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched);
     7.7 -    OFFSET(VCPU_hvm_guest_cr0, struct vcpu, arch.hvm_vcpu.guest_cr[0]);
     7.8 +    OFFSET(VCPU_vmx_emul, struct vcpu, arch.hvm_vmx.vmxemul);
     7.9      OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]);
    7.10      BLANK();
    7.11  
     8.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Tue Feb 05 10:40:10 2008 +0000
     8.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Tue Feb 05 15:45:10 2008 +0000
     8.3 @@ -103,7 +103,7 @@ void __dummy__(void)
     8.4      BLANK();
     8.5  
     8.6      OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched);
     8.7 -    OFFSET(VCPU_hvm_guest_cr0, struct vcpu, arch.hvm_vcpu.guest_cr[0]);
     8.8 +    OFFSET(VCPU_vmx_emul, struct vcpu, arch.hvm_vmx.vmxemul);
     8.9      OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]);
    8.10      BLANK();
    8.11  
     9.1 --- a/xen/arch/x86/x86_emulate.c	Tue Feb 05 10:40:10 2008 +0000
     9.2 +++ b/xen/arch/x86/x86_emulate.c	Tue Feb 05 15:45:10 2008 +0000
     9.3 @@ -303,7 +303,11 @@ struct operand {
     9.4  #define EXC_OF  4
     9.5  #define EXC_BR  5
     9.6  #define EXC_UD  6
     9.7 +#define EXC_TS 10
     9.8 +#define EXC_NP 11
     9.9 +#define EXC_SS 12
    9.10  #define EXC_GP 13
    9.11 +#define EXC_PF 14
    9.12  
    9.13  /*
    9.14   * Instruction emulation:
    9.15 @@ -500,12 +504,12 @@ do {                                    
    9.16      if ( rc ) goto done;                                \
    9.17  } while (0)
    9.18  
    9.19 -#define generate_exception_if(p, e)                                     \
    9.20 -({  if ( (p) ) {                                                        \
    9.21 -        fail_if(ops->inject_hw_exception == NULL);                      \
    9.22 -        rc = ops->inject_hw_exception(e, ctxt) ? : X86EMUL_EXCEPTION;   \
    9.23 -        goto done;                                                      \
    9.24 -    }                                                                   \
    9.25 +#define generate_exception_if(p, e)                                      \
    9.26 +({  if ( (p) ) {                                                         \
    9.27 +        fail_if(ops->inject_hw_exception == NULL);                       \
    9.28 +        rc = ops->inject_hw_exception(e, 0, ctxt) ? : X86EMUL_EXCEPTION; \
    9.29 +        goto done;                                                       \
    9.30 +    }                                                                    \
    9.31  })
    9.32  
    9.33  /*
    9.34 @@ -774,7 +778,7 @@ in_realmode(
    9.35  }
    9.36  
    9.37  static int
    9.38 -load_seg(
    9.39 +realmode_load_seg(
    9.40      enum x86_segment seg,
    9.41      uint16_t sel,
    9.42      struct x86_emulate_ctxt *ctxt,
    9.43 @@ -783,11 +787,6 @@ load_seg(
    9.44      struct segment_register reg;
    9.45      int rc;
    9.46  
    9.47 -    if ( !in_realmode(ctxt, ops) ||
    9.48 -         (ops->read_segment == NULL) ||
    9.49 -         (ops->write_segment == NULL) )
    9.50 -        return X86EMUL_UNHANDLEABLE;
    9.51 -
    9.52      if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
    9.53          return rc;
    9.54  
    9.55 @@ -797,6 +796,148 @@ load_seg(
    9.56      return ops->write_segment(seg, &reg, ctxt);
    9.57  }
    9.58  
    9.59 +static int
    9.60 +protmode_load_seg(
    9.61 +    enum x86_segment seg,
    9.62 +    uint16_t sel,
    9.63 +    struct x86_emulate_ctxt *ctxt,
    9.64 +    struct x86_emulate_ops *ops)
    9.65 +{
    9.66 +    struct segment_register desctab, cs, segr;
    9.67 +    struct { uint32_t a, b; } desc;
    9.68 +    unsigned long val;
    9.69 +    uint8_t dpl, rpl, cpl;
    9.70 +    int rc, fault_type = EXC_TS;
    9.71 +
    9.72 +    /* NULL selector? */
    9.73 +    if ( (sel & 0xfffc) == 0 )
    9.74 +    {
    9.75 +        if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) )
    9.76 +            goto raise_exn;
    9.77 +        memset(&segr, 0, sizeof(segr));
    9.78 +        return ops->write_segment(seg, &segr, ctxt);
    9.79 +    }
    9.80 +
    9.81 +    /* LDT descriptor must be in the GDT. */
    9.82 +    if ( (seg == x86_seg_ldtr) && (sel & 4) )
    9.83 +        goto raise_exn;
    9.84 +
    9.85 +    if ( (rc = ops->read_segment(x86_seg_cs, &cs, ctxt)) ||
    9.86 +         (rc = ops->read_segment((sel & 4) ? x86_seg_ldtr : x86_seg_gdtr,
    9.87 +                                 &desctab, ctxt)) )
    9.88 +        return rc;
    9.89 +
    9.90 +    /* Check against descriptor table limit. */
    9.91 +    if ( ((sel & 0xfff8) + 7) > desctab.limit )
    9.92 +        goto raise_exn;
    9.93 +
    9.94 +    do {
    9.95 +        if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8),
    9.96 +                             &val, 4, ctxt)) )
    9.97 +            return rc;
    9.98 +        desc.a = val;
    9.99 +        if ( (rc = ops->read(x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
   9.100 +                             &val, 4, ctxt)) )
   9.101 +            return rc;
   9.102 +        desc.b = val;
   9.103 +
   9.104 +        /* Segment present in memory? */
   9.105 +        if ( !(desc.b & (1u<<15)) )
   9.106 +        {
   9.107 +            fault_type = EXC_NP;
   9.108 +            goto raise_exn;
   9.109 +        }
   9.110 +
   9.111 +        /* LDT descriptor is a system segment. All others are code/data. */
   9.112 +        if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )
   9.113 +            goto raise_exn;
   9.114 +
   9.115 +        dpl = (desc.b >> 13) & 3;
   9.116 +        rpl = sel & 3;
   9.117 +        cpl = cs.sel & 3;
   9.118 +
   9.119 +        switch ( seg )
   9.120 +        {
   9.121 +        case x86_seg_cs:
   9.122 +            /* Code segment? */
   9.123 +            if ( !(desc.b & (1u<<11)) )
   9.124 +                goto raise_exn;
   9.125 +            /* Non-conforming segment: check DPL against RPL. */
   9.126 +            if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) )
   9.127 +                goto raise_exn;
   9.128 +            break;
   9.129 +        case x86_seg_ss:
   9.130 +            /* Writable data segment? */
   9.131 +            if ( (desc.b & (5u<<9)) != (1u<<9) )
   9.132 +                goto raise_exn;
   9.133 +            if ( (dpl != cpl) || (dpl != rpl) )
   9.134 +                goto raise_exn;
   9.135 +            break;
   9.136 +        case x86_seg_ldtr:
   9.137 +            /* LDT system segment? */
   9.138 +            if ( (desc.b & (15u<<8)) != (2u<<8) )
   9.139 +                goto raise_exn;
   9.140 +            goto skip_accessed_flag;
   9.141 +        default:
   9.142 +            /* Readable code or data segment? */
   9.143 +            if ( (desc.b & (5u<<9)) == (4u<<9) )
   9.144 +                goto raise_exn;
   9.145 +            /* Non-conforming segment: check DPL against RPL and CPL. */
   9.146 +            if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) )
   9.147 +                goto raise_exn;
   9.148 +            break;
   9.149 +        }
   9.150 +
   9.151 +        /* Ensure Accessed flag is set. */
   9.152 +        rc = ((desc.b & 0x100) ? X86EMUL_OKAY : 
   9.153 +              ops->cmpxchg(
   9.154 +                  x86_seg_none, desctab.base + (sel & 0xfff8) + 4, desc.b,
   9.155 +                  desc.b | 0x100, 4, ctxt));
   9.156 +    } while ( rc == X86EMUL_CMPXCHG_FAILED );
   9.157 +
   9.158 +    if ( rc )
   9.159 +        return rc;
   9.160 +
   9.161 +    /* Force the Accessed flag in our local copy. */
   9.162 +    desc.b |= 0x100;
   9.163 +
   9.164 + skip_accessed_flag:
   9.165 +    segr.base = (((desc.b <<  0) & 0xff000000u) |
   9.166 +                 ((desc.b << 16) & 0x00ff0000u) |
   9.167 +                 ((desc.a >> 16) & 0x0000ffffu));
   9.168 +    segr.attr.bytes = (((desc.b >>  8) & 0x00ffu) |
   9.169 +                       ((desc.b >> 12) & 0x0f00u));
   9.170 +    segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
   9.171 +    if ( segr.attr.fields.g )
   9.172 +        segr.limit = (segr.limit << 12) | 0xfffu;
   9.173 +    segr.sel = sel;
   9.174 +    return ops->write_segment(seg, &segr, ctxt);
   9.175 +
   9.176 + raise_exn:
   9.177 +    if ( ops->inject_hw_exception == NULL )
   9.178 +        return X86EMUL_UNHANDLEABLE;
   9.179 +    if ( (rc = ops->inject_hw_exception(fault_type, sel & 0xfffc, ctxt)) )
   9.180 +        return rc;
   9.181 +    return X86EMUL_EXCEPTION;
   9.182 +}
   9.183 +
   9.184 +static int
   9.185 +load_seg(
   9.186 +    enum x86_segment seg,
   9.187 +    uint16_t sel,
   9.188 +    struct x86_emulate_ctxt *ctxt,
   9.189 +    struct x86_emulate_ops *ops)
   9.190 +{
   9.191 +    if ( (ops->read_segment == NULL) ||
   9.192 +         (ops->write_segment == NULL) )
   9.193 +        return X86EMUL_UNHANDLEABLE;
   9.194 +
   9.195 +    if ( in_realmode(ctxt, ops) )
   9.196 +        return realmode_load_seg(seg, sel, ctxt, ops);
   9.197 +
   9.198 +    return protmode_load_seg(seg, sel, ctxt, ops);
   9.199 +}
   9.200 +
   9.201  void *
   9.202  decode_register(
   9.203      uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
   9.204 @@ -1858,7 +1999,7 @@ x86_emulate(
   9.205      if ( (_regs.eflags & EFLG_TF) &&
   9.206           (rc == X86EMUL_OKAY) &&
   9.207           (ops->inject_hw_exception != NULL) )
   9.208 -        rc = ops->inject_hw_exception(EXC_DB, ctxt) ? : X86EMUL_EXCEPTION;
   9.209 +        rc = ops->inject_hw_exception(EXC_DB, 0, ctxt) ? : X86EMUL_EXCEPTION;
   9.210  
   9.211   done:
   9.212      return rc;
    10.1 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Tue Feb 05 10:40:10 2008 +0000
    10.2 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Tue Feb 05 15:45:10 2008 +0000
    10.3 @@ -95,10 +95,20 @@ struct arch_vmx_struct {
    10.4      unsigned long        host_cr0;
    10.5  
    10.6  #ifdef VMXASSIST
    10.7 +
    10.8      unsigned long        vmxassist_enabled:1;
    10.9      unsigned long        irqbase_mode:1;
   10.10      unsigned char        pm_irqbase[2];
   10.11 +
   10.12  #else
   10.13 +
   10.14 +    /* Are we emulating rather than VMENTERing? */
   10.15 +#define VMXEMUL_REALMODE 1  /* Yes, because CR0.PE == 0   */
   10.16 +#define VMXEMUL_BAD_CS   2  /* Yes, because CS.RPL != CPL */
   10.17 +#define VMXEMUL_BAD_SS   4  /* Yes, because SS.RPL != CPL */
   10.18 +    uint8_t              vmxemul;
   10.19 +
   10.20 +    /* I/O request in flight to device model. */
   10.21      bool_t               real_mode_io_in_progress;
   10.22      bool_t               real_mode_io_completed;
   10.23      unsigned long        real_mode_io_data;
    11.1 --- a/xen/include/asm-x86/x86_emulate.h	Tue Feb 05 10:40:10 2008 +0000
    11.2 +++ b/xen/include/asm-x86/x86_emulate.h	Tue Feb 05 15:45:10 2008 +0000
    11.3 @@ -39,9 +39,18 @@ enum x86_segment {
    11.4      x86_seg_tr,
    11.5      x86_seg_ldtr,
    11.6      x86_seg_gdtr,
    11.7 -    x86_seg_idtr
    11.8 +    x86_seg_idtr,
    11.9 +    /*
   11.10 +     * Dummy: used to emulate direct processor accesses to management
   11.11 +     * structures (TSS, GDT, LDT, IDT, etc.) which use linear addressing
   11.12 +     * (no segment component) and bypass usual segment- and page-level
   11.13 +     * protection checks.
   11.14 +     */
   11.15 +    x86_seg_none
   11.16  };
   11.17  
   11.18 +#define is_x86_user_segment(seg) ((unsigned)(seg) <= x86_seg_gs)
   11.19 +
   11.20  /* 
   11.21   * Attribute for segment selector. This is a copy of bit 40:47 & 52:55 of the
   11.22   * segment descriptor. It happens to match the format of an AMD SVM VMCB.
   11.23 @@ -333,6 +342,7 @@ struct x86_emulate_ops
   11.24      /* inject_hw_exception */
   11.25      int (*inject_hw_exception)(
   11.26          uint8_t vector,
   11.27 +        uint16_t error_code,
   11.28          struct x86_emulate_ctxt *ctxt);
   11.29  
   11.30      /* inject_sw_interrupt */