ia64/xen-unstable

changeset 9073:a376bab39768

SVM patch to add 64bit hv support.
This patch only modifies svm files.
Tested with c/s 9015 with 32bit hv using UP Dom0, with UP linux and
winxpsp1 unmodified guests.
Tested with c/s 9015 with 64bit hv using UP Dom0, with 32bit and 64bit
UP linux and 32bit winxpsp1.

Signed-off-by: Tom Woller <thomas.woller@amd.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Feb 28 22:57:38 2006 +0100 (2006-02-28)
parents a66763eb86fe
children bd816eee9cf6
files xen/arch/x86/hvm/svm/emulate.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/vmcb.c xen/arch/x86/hvm/svm/x86_64/exits.S xen/include/asm-x86/hvm/svm/emulate.h xen/include/asm-x86/hvm/svm/vmcb.h
line diff
     1.1 --- a/xen/arch/x86/hvm/svm/emulate.c	Tue Feb 28 19:00:15 2006 +0100
     1.2 +++ b/xen/arch/x86/hvm/svm/emulate.c	Tue Feb 28 22:57:38 2006 +0100
     1.3 @@ -86,7 +86,7 @@ static inline unsigned long DECODE_GPR_V
     1.4      case 0x7:
     1.5          value = regs->edi;
     1.6          break;
     1.7 -#if X86_64
     1.8 +#if __x86_64__
     1.9      case 0x8:
    1.10          value = regs->r8;
    1.11          break;
    1.12 @@ -318,20 +318,14 @@ unsigned long get_effective_addr_sib(str
    1.13  
    1.14  
    1.15  /* Get the register/mode number of src register in ModRM register. */
    1.16 -unsigned int decode_dest_reg(u8 m)
    1.17 +unsigned int decode_dest_reg(u8 prefix, u8 m)
    1.18  {
    1.19 -#if __x86_64__
    1.20 -    ASSERT(0); /* Need to adjust for REX prefix if applicable */
    1.21 -#endif
    1.22 -    return (m >> 3) & 7;
    1.23 +    return DECODE_MODRM_REG(prefix, m);
    1.24  }
    1.25  
    1.26 -unsigned int decode_src_reg(u8 m)
    1.27 +unsigned int decode_src_reg(u8 prefix, u8 m)
    1.28  {
    1.29 -#if __x86_64__
    1.30 -    ASSERT(0); /* Need to adjust for REX prefix if applicable */
    1.31 -#endif
    1.32 -    return m & 7;
    1.33 +    return DECODE_MODRM_RM(prefix, m);
    1.34  }
    1.35  
    1.36  
    1.37 @@ -431,7 +425,7 @@ static const u8 *opc_bytes[INSTR_MAX_COU
    1.38   * The caller can either pass a NULL pointer to the guest_eip_buf, or a pointer
    1.39   * to enough bytes to satisfy the instruction including prefix bytes.
    1.40   */
    1.41 -unsigned int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
    1.42 +int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
    1.43          enum instruction_index *list, unsigned int list_count, 
    1.44          u8 *guest_eip_buf, enum instruction_index *match)
    1.45  {
     2.1 --- a/xen/arch/x86/hvm/svm/svm.c	Tue Feb 28 19:00:15 2006 +0100
     2.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue Feb 28 22:57:38 2006 +0100
     2.3 @@ -164,7 +164,7 @@ void asidpool_retire( struct vmcb_struct
     2.4  }
     2.5  
     2.6  static inline void svm_inject_exception(struct vmcb_struct *vmcb, 
     2.7 -                                        int trap, int error_code)
     2.8 +                                        int trap, int ev, int error_code)
     2.9  {
    2.10      eventinj_t event;
    2.11  
    2.12 @@ -172,7 +172,7 @@ static inline void svm_inject_exception(
    2.13      event.fields.v = 1;
    2.14      event.fields.type = EVENTTYPE_EXCEPTION;
    2.15      event.fields.vector = trap;
    2.16 -    event.fields.ev = 1;
    2.17 +    event.fields.ev = ev;
    2.18      event.fields.errorcode = error_code;
    2.19  
    2.20      ASSERT(vmcb->eventinj.fields.v == 0);
    2.21 @@ -237,109 +237,62 @@ void svm_load_cpu_guest_regs(struct vcpu
    2.22  }
    2.23  
    2.24  #ifdef __x86_64__
    2.25 -static struct svm_msr_state percpu_msr[NR_CPUS];
    2.26 -
    2.27 -static u32 msr_data_index[VMX_MSR_COUNT] =
    2.28 -{
    2.29 -    MSR_LSTAR, MSR_STAR, MSR_CSTAR,
    2.30 -    MSR_SYSCALL_MASK, MSR_EFER,
    2.31 -};
    2.32  
    2.33  void svm_save_segments(struct vcpu *v)
    2.34  {
    2.35 -    rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_svm.msr_content.shadow_gs);
    2.36  }
    2.37 -
    2.38 -/*
    2.39 - * To avoid MSR save/restore at every VM exit/entry time, we restore
    2.40 - * the x86_64 specific MSRs at domain switch time. Since those MSRs are
    2.41 - * are not modified once set for generic domains, we don't save them,
    2.42 - * but simply reset them to the values set at percpu_traps_init().
    2.43 - */
    2.44  void svm_load_msrs(void)
    2.45  {
    2.46 -    struct svm_msr_state *host_state = &percpu_msr[smp_processor_id()];
    2.47 -    int i;
    2.48 -
    2.49 -    while ( host_state->flags )
    2.50 -    {
    2.51 -        i = find_first_set_bit(host_state->flags);
    2.52 -        wrmsrl(msr_data_index[i], host_state->msr_items[i]);
    2.53 -        clear_bit(i, &host_state->flags);
    2.54 -    }
    2.55  }
    2.56 -
    2.57 -static void svm_save_init_msrs(void)
    2.58 +void svm_restore_msrs(struct vcpu *v)
    2.59  {
    2.60 -    struct svm_msr_state *host_state = &percpu_msr[smp_processor_id()];
    2.61 -    int i;
    2.62 -
    2.63 -    for ( i = 0; i < SVM_MSR_COUNT; i++ )
    2.64 -        rdmsrl(msr_data_index[i], host_state->msr_items[i]);
    2.65  }
    2.66  
    2.67 -#define CASE_READ_MSR(address)                               \
    2.68 -    case MSR_ ## address:                                    \
    2.69 -    msr_content = msr->msr_items[SVM_INDEX_MSR_ ## address]; \
    2.70 -    break
    2.71 -
    2.72 -#define CASE_WRITE_MSR(address)                              \
    2.73 -    case MSR_ ## address:                                    \
    2.74 -    msr->msr_items[SVM_INDEX_MSR_ ## address] = msr_content; \
    2.75 -    if (!test_bit(SVM_INDEX_MSR_ ## address, &msr->flags))   \
    2.76 -    {                                                        \
    2.77 -        set_bit(SVM_INDEX_MSR_ ## address, &msr->flags);     \
    2.78 -    }                                                        \
    2.79 -    break
    2.80 -
    2.81 -
    2.82  #define IS_CANO_ADDRESS(add) 1
    2.83  
    2.84  static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
    2.85  {
    2.86      u64 msr_content = 0;
    2.87      struct vcpu *vc = current;
    2.88 -    struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
    2.89 +    //    struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
    2.90      struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
    2.91  
    2.92      switch (regs->ecx)
    2.93      {
    2.94      case MSR_EFER:
    2.95 -        msr_content = msr->msr_items[SVM_INDEX_MSR_EFER];
    2.96 -        HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", 
    2.97 -                (unsigned long long)msr_content);
    2.98 -
    2.99 -        if (test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state))
   2.100 -            msr_content |= 1 << _EFER_LME;
   2.101 -
   2.102 -        if (SVM_LONG_GUEST(vc))
   2.103 -            msr_content |= 1 << _EFER_LMA;
   2.104 -
   2.105 +        // msr_content = msr->msr_items[SVM_INDEX_MSR_EFER];
   2.106 +        msr_content = vmcb->efer;      
   2.107 +        msr_content &= ~EFER_SVME;
   2.108          break;
   2.109  
   2.110      case MSR_FS_BASE:
   2.111 -        if (!(SVM_LONG_GUEST(vc)))
   2.112 -            /* XXX should it be GP fault */
   2.113 -            domain_crash_synchronous();
   2.114 -        
   2.115          msr_content = vmcb->fs.base;
   2.116          break;
   2.117  
   2.118      case MSR_GS_BASE:
   2.119 -        if (!(SVM_LONG_GUEST(vc)))
   2.120 -            domain_crash_synchronous();
   2.121 -
   2.122          msr_content = vmcb->gs.base;
   2.123          break;
   2.124  
   2.125      case MSR_SHADOW_GS_BASE:
   2.126 -        msr_content = msr->shadow_gs;
   2.127 +        msr_content = vmcb->kerngsbase;
   2.128          break;
   2.129  
   2.130 -    CASE_READ_MSR(STAR);
   2.131 -    CASE_READ_MSR(LSTAR);
   2.132 -    CASE_READ_MSR(CSTAR);
   2.133 -    CASE_READ_MSR(SYSCALL_MASK);
   2.134 +    case MSR_STAR:
   2.135 +         msr_content = vmcb->star;
   2.136 +         break;
   2.137 + 
   2.138 +    case MSR_LSTAR:
   2.139 +         msr_content = vmcb->lstar;
   2.140 +         break;
   2.141 + 
   2.142 +    case MSR_CSTAR:
   2.143 +         msr_content = vmcb->cstar;
   2.144 +         break;
   2.145 + 
   2.146 +    case MSR_SYSCALL_MASK:
   2.147 +         msr_content = vmcb->sfmask;
   2.148 +         break;
   2.149 +
   2.150      default:
   2.151          return 0;
   2.152      }
   2.153 @@ -356,8 +309,6 @@ static inline int long_mode_do_msr_write
   2.154  {
   2.155      u64 msr_content = regs->eax | ((u64)regs->edx << 32); 
   2.156      struct vcpu *vc = current;
   2.157 -    struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
   2.158 -    struct svm_msr_state *host_state = &percpu_msr[smp_processor_id()];
   2.159      struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
   2.160  
   2.161      HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx msr_content %lx\n", 
   2.162 @@ -373,26 +324,20 @@ static inline int long_mode_do_msr_write
   2.163                      || !test_bit(SVM_CPU_STATE_PAE_ENABLED,
   2.164                                   &vc->arch.hvm_svm.cpu_state))
   2.165              {
   2.166 -                svm_inject_exception(vmcb, TRAP_gp_fault, 0);
   2.167 +                svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
   2.168              }
   2.169          }
   2.170  
   2.171          if (msr_content & EFER_LME)
   2.172              set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
   2.173  
   2.174 +        /* We have already recorded that we want LME, so it will be set 
   2.175 +         * next time CR0 gets updated. So we clear that bit and continue.
   2.176 +         */
   2.177 +        if ((msr_content ^ vmcb->efer) & EFER_LME)
   2.178 +            msr_content &= ~EFER_LME;  
   2.179          /* No update for LME/LMA since it have no effect */
   2.180 -        msr->msr_items[SVM_INDEX_MSR_EFER] = msr_content;
   2.181 -        if (msr_content & ~(EFER_LME | EFER_LMA))
   2.182 -        {
   2.183 -            msr->msr_items[SVM_INDEX_MSR_EFER] = msr_content;
   2.184 -            if (!test_bit(SVM_INDEX_MSR_EFER, &msr->flags))
   2.185 -            { 
   2.186 -                rdmsrl(MSR_EFER, host_state->msr_items[SVM_INDEX_MSR_EFER]);
   2.187 -                set_bit(SVM_INDEX_MSR_EFER, &host_state->flags);
   2.188 -                set_bit(SVM_INDEX_MSR_EFER, &msr->flags);  
   2.189 -                wrmsrl(MSR_EFER, msr_content);
   2.190 -            }
   2.191 -        }
   2.192 +        vmcb->efer = msr_content | EFER_SVME;
   2.193          break;
   2.194  
   2.195      case MSR_FS_BASE:
   2.196 @@ -403,63 +348,42 @@ static inline int long_mode_do_msr_write
   2.197          if (!IS_CANO_ADDRESS(msr_content))
   2.198          {
   2.199              HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
   2.200 -            svm_inject_exception(vmcb, TRAP_gp_fault, 0);
   2.201 +            svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
   2.202          }
   2.203  
   2.204          if (regs->ecx == MSR_FS_BASE)
   2.205 -	    vmcb->fs.base = msr_content;
   2.206 +            vmcb->fs.base = msr_content;
   2.207          else 
   2.208 -	    vmcb->gs.base = msr_content;
   2.209 +            vmcb->gs.base = msr_content;
   2.210          break;
   2.211  
   2.212      case MSR_SHADOW_GS_BASE:
   2.213 -        if (!(SVM_LONG_GUEST(vc)))
   2.214 -            domain_crash_synchronous();
   2.215 -
   2.216 -        vc->arch.hvm_svm.msr_content.shadow_gs = msr_content;
   2.217 -        wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
   2.218 -        break;
   2.219 -
   2.220 -    CASE_WRITE_MSR(STAR);
   2.221 -    CASE_WRITE_MSR(LSTAR);
   2.222 -    CASE_WRITE_MSR(CSTAR);
   2.223 -    CASE_WRITE_MSR(SYSCALL_MASK);
   2.224 +         vmcb->kerngsbase = msr_content;
   2.225 +         break;
   2.226 + 
   2.227 +    case MSR_STAR:
   2.228 +         vmcb->star = msr_content;
   2.229 +         break;
   2.230 + 
   2.231 +    case MSR_LSTAR:
   2.232 +         vmcb->lstar = msr_content;
   2.233 +         break;
   2.234 + 
   2.235 +    case MSR_CSTAR:
   2.236 +         vmcb->cstar = msr_content;
   2.237 +         break;
   2.238 + 
   2.239 +    case MSR_SYSCALL_MASK:
   2.240 +         vmcb->sfmask = msr_content;
   2.241 +         break;
   2.242 +
   2.243      default:
   2.244          return 0;
   2.245      }
   2.246      return 1;
   2.247  }
   2.248  
   2.249 -void
   2.250 -svm_restore_msrs(struct vcpu *v)
   2.251 -{
   2.252 -    int i = 0;
   2.253 -    struct svm_msr_state *guest_state;
   2.254 -    struct svm_msr_state *host_state;
   2.255 -    unsigned long guest_flags;
   2.256 -
   2.257 -    guest_state = &v->arch.hvm_svm.msr_content;;
   2.258 -    host_state = &percpu_msr[smp_processor_id()];
   2.259 -
   2.260 -    wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
   2.261 -    guest_flags = guest_state->flags;
   2.262 -    if (!guest_flags)
   2.263 -        return;
   2.264 -
   2.265 -    while (guest_flags){
   2.266 -        i = find_first_set_bit(guest_flags);
   2.267 -
   2.268 -        HVM_DBG_LOG(DBG_LEVEL_2,
   2.269 -                    "restore guest's index %d msr %lx with %lx\n",
   2.270 -                    i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
   2.271 -        set_bit(i, &host_state->flags);
   2.272 -        wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
   2.273 -        clear_bit(i, &guest_flags);
   2.274 -    }
   2.275 -}
   2.276  #else
   2.277 -#define	svm_save_init_msrs()	((void)0)
   2.278 -
   2.279  static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
   2.280  {
   2.281      return 0;
   2.282 @@ -497,11 +421,30 @@ int svm_instruction_length(struct vcpu *
   2.283  {
   2.284      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   2.285      unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
   2.286 -
   2.287 -    mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
   2.288 +    /* check which operating mode the guest is running */
   2.289 +    if( vmcb->efer & EFER_LMA )
   2.290 +        mode = vmcb->cs.attributes.fields.l ? 8 : 4;
   2.291 +    else
   2.292 +        mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
   2.293      return svm_instrlen(guest_cpu_user_regs(), mode);
   2.294  }
   2.295  
   2.296 +unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
   2.297 +{
   2.298 +    switch ( num )
   2.299 +    {
   2.300 +    case 0:
   2.301 +        return v->arch.hvm_svm.cpu_shadow_cr0;
   2.302 +    case 2:
   2.303 +        return v->arch.hvm_svm.cpu_cr2;
   2.304 +    case 3:
   2.305 +        return v->arch.hvm_svm.cpu_cr3;
   2.306 +    default:
   2.307 +        BUG();
   2.308 +    }
   2.309 +    return 0;                   /* dummy */
   2.310 +}
   2.311 +
   2.312  int start_svm(void)
   2.313  {
   2.314      u32 eax, ecx, edx;
   2.315 @@ -519,8 +462,6 @@ int start_svm(void)
   2.316      asidpool_init(smp_processor_id());    
   2.317      printk("AMD SVM Extension is enabled for cpu %d.\n", smp_processor_id());
   2.318      
   2.319 -    svm_save_init_msrs();
   2.320 -
   2.321      /* Setup HVM interfaces */
   2.322      hvm_funcs.disable = stop_svm;
   2.323  
   2.324 @@ -542,6 +483,7 @@ int start_svm(void)
   2.325      hvm_funcs.realmode = svm_realmode;
   2.326      hvm_funcs.paging_enabled = svm_paging_enabled;
   2.327      hvm_funcs.instruction_length = svm_instruction_length;
   2.328 +    hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;
   2.329  
   2.330      hvm_enabled = 1;    
   2.331  
   2.332 @@ -631,8 +573,17 @@ void save_svm_cpu_user_regs(struct vcpu 
   2.333  }
   2.334  
   2.335  #if defined (__x86_64__)
   2.336 -void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *c )
   2.337 +void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v )
   2.338  {
   2.339 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   2.340 +
   2.341 +    regs->rip    = vmcb->rip;
   2.342 +    regs->rsp    = vmcb->rsp;
   2.343 +    regs->rflags = vmcb->rflags;
   2.344 +    regs->cs     = vmcb->cs.sel;
   2.345 +    regs->ds     = vmcb->ds.sel;
   2.346 +    regs->es     = vmcb->es.sel;
   2.347 +    regs->ss     = vmcb->ss.sel;
   2.348  }
   2.349  #elif defined (__i386__)
   2.350  void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
   2.351 @@ -882,9 +833,9 @@ static int svm_do_page_fault(unsigned lo
   2.352  	/* No support for APIC */
   2.353          if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
   2.354          { 
   2.355 -            unsigned long inst_len;
   2.356 -	    inst_len = svm_instruction_length(v);
   2.357 -            if (inst_len == (unsigned long)-1)
   2.358 +            int inst_len;
   2.359 +            inst_len = svm_instruction_length(v);
   2.360 +            if (inst_len == -1)
   2.361              {
   2.362                  printf("%s: INST_LEN - Unable to decode properly.\n", __func__);
   2.363                  domain_crash_synchronous();
   2.364 @@ -937,6 +888,14 @@ static void svm_do_general_protection_fa
   2.365      eip = vmcb->rip;
   2.366      error_code = vmcb->exitinfo1;
   2.367  
   2.368 +    if (vmcb->idtr.limit == 0) {
   2.369 +        printf("Huh? We got a GP Fault with an invalid IDTR!\n");
   2.370 +        svm_dump_vmcb(__func__, vmcb);
   2.371 +        svm_dump_regs(__func__, regs);
   2.372 +        svm_dump_inst(vmcb->rip); 
   2.373 +        __hvm_bug(regs);
   2.374 +    }
   2.375 +
   2.376      HVM_DBG_LOG(DBG_LEVEL_1,
   2.377                  "svm_general_protection_fault: eip = %lx, erro_code = %lx",
   2.378                  eip, error_code);
   2.379 @@ -949,7 +908,7 @@ static void svm_do_general_protection_fa
   2.380  
   2.381      
   2.382      /* Reflect it back into the guest */
   2.383 -    svm_inject_exception(vmcb, TRAP_gp_fault, error_code);
   2.384 +    svm_inject_exception(vmcb, TRAP_gp_fault, 1, error_code);
   2.385  }
   2.386  
   2.387  /* Reserved bits: [31:14], [12:1] */
   2.388 @@ -961,7 +920,7 @@ static void svm_vmexit_do_cpuid(struct v
   2.389      unsigned int eax, ebx, ecx, edx;
   2.390      unsigned long eip;
   2.391      struct vcpu *v = current;
   2.392 -    unsigned int inst_len;
   2.393 +    int inst_len;
   2.394  
   2.395      ASSERT(vmcb);
   2.396  
   2.397 @@ -978,8 +937,10 @@ static void svm_vmexit_do_cpuid(struct v
   2.398  
   2.399      if (input == 1)
   2.400      {
   2.401 +#ifndef __x86_64__
   2.402          if ( hvm_apic_support(v->domain) &&
   2.403                  !vlapic_global_enabled((VLAPIC(v))) )
   2.404 +#endif
   2.405              clear_bit(X86_FEATURE_APIC, &edx);
   2.406  	    
   2.407  #if CONFIG_PAGING_LEVELS < 3
   2.408 @@ -1019,6 +980,7 @@ static void svm_vmexit_do_cpuid(struct v
   2.409              eip, input, eax, ebx, ecx, edx);
   2.410  
   2.411      inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
   2.412 +    ASSERT(inst_len > 0);
   2.413      __update_guest_eip(vmcb, inst_len);
   2.414  }
   2.415  
   2.416 @@ -1111,9 +1073,11 @@ static void svm_dr_access (struct vcpu *
   2.417      unsigned long *reg_p = 0;
   2.418      unsigned int gpreg = 0;
   2.419      unsigned long eip;
   2.420 -    unsigned int inst_len; 
   2.421 +    int inst_len; 
   2.422 +    int index;
   2.423      struct vmcb_struct *vmcb;
   2.424      u8 buffer[MAX_INST_LEN];
   2.425 +    u8 prefix = 0;
   2.426  
   2.427      vmcb = v->arch.hvm_svm.vmcb;
   2.428      
   2.429 @@ -1121,13 +1085,15 @@ static void svm_dr_access (struct vcpu *
   2.430  
   2.431      eip = vmcb->rip;
   2.432      inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
   2.433 -
   2.434 -    ASSERT(buffer[0] == 0x0f && (buffer[1] & 0xFD) == 0x21);
   2.435 -
   2.436 -    gpreg = decode_src_reg(buffer[2]);
   2.437 -#if DEBUG
   2.438 -    ASSERT(reg == decode_dest_reg(buffer[2]));
   2.439 -#endif
   2.440 +    index = skip_prefix_bytes(buffer, sizeof(buffer));
   2.441 +    
   2.442 +    ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
   2.443 +
   2.444 +    if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
   2.445 +        prefix = buffer[index-1];
   2.446 +
   2.447 +    gpreg = decode_src_reg(prefix, buffer[index + 2]);
   2.448 +    ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));
   2.449  
   2.450      HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
   2.451              eip, reg, gpreg);
   2.452 @@ -1148,6 +1114,7 @@ static void svm_dr_access (struct vcpu *
   2.453          __hvm_bug(regs);
   2.454          break;
   2.455      }
   2.456 +    ASSERT(inst_len > 0);
   2.457      __update_guest_eip(vmcb, inst_len);
   2.458  }
   2.459  
   2.460 @@ -1405,7 +1372,7 @@ static int svm_set_cr0(unsigned long val
   2.461                      &v->arch.hvm_svm.cpu_state))
   2.462          {
   2.463              HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
   2.464 -            svm_inject_exception(vmcb, TRAP_gp_fault, 0);
   2.465 +            svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
   2.466          }
   2.467  
   2.468          if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
   2.469 @@ -1468,7 +1435,7 @@ static int svm_set_cr0(unsigned long val
   2.470       */
   2.471      if ((value & X86_CR0_PE) == 0) {
   2.472      	if (value & X86_CR0_PG) {
   2.473 -            svm_inject_exception(vmcb, TRAP_gp_fault, 0);
   2.474 +            svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
   2.475              return 0;
   2.476          }
   2.477  
   2.478 @@ -1503,7 +1470,7 @@ static void mov_from_cr(int cr, int gp, 
   2.479          value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
   2.480          break;
   2.481      case 4:
   2.482 -        value = vmcb->cr4;
   2.483 +        value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
   2.484          break;
   2.485      case 8:
   2.486  #if 0
   2.487 @@ -1602,12 +1569,19 @@ static int mov_to_cr(int gpreg, int cr, 
   2.488  
   2.489      case 4:         
   2.490          /* CR4 */
   2.491 -        if (value & X86_CR4_PAE)
   2.492 -            __hvm_bug(regs);    /* not implemented */
   2.493 -
   2.494 -        old_cr = vmcb->cr4;
   2.495 -        
   2.496 -        vmcb->cr4 = value;
   2.497 +        if (value & X86_CR4_PAE) {
   2.498 +            set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
   2.499 +        } else {
   2.500 +            if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
   2.501 +                         &v->arch.hvm_svm.cpu_state)) {
   2.502 +                svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
   2.503 +            }
   2.504 +            clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
   2.505 +        }
   2.506 +
   2.507 +        old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
   2.508 +        v->arch.hvm_svm.cpu_shadow_cr4 = value;
   2.509 +        vmcb->cr4 = value | SVM_CR4_HOST_MASK;
   2.510    
   2.511          /*
   2.512           * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
   2.513 @@ -1636,10 +1610,12 @@ static int svm_cr_access(struct vcpu *v,
   2.514          struct cpu_user_regs *regs)
   2.515  {
   2.516      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   2.517 -    unsigned int inst_len = 0;
   2.518 +    int inst_len = 0;
   2.519 +    int index;
   2.520      unsigned int gpreg;
   2.521      unsigned long value;
   2.522 -    u8 buffer[6];   
   2.523 +    u8 buffer[MAX_INST_LEN];   
   2.524 +    u8 prefix = 0;
   2.525      int result = 1;
   2.526      enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
   2.527      enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
   2.528 @@ -1648,29 +1624,41 @@ static int svm_cr_access(struct vcpu *v,
   2.529      ASSERT(vmcb);
   2.530  
   2.531      inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
   2.532 +    /* get index to first actual instruction byte - as we will need to know where the 
   2.533 +     * prefix lives later on
   2.534 +     */
   2.535 +    index = skip_prefix_bytes(buffer, sizeof(buffer));
   2.536      
   2.537      if (type == TYPE_MOV_TO_CR) 
   2.538      {
   2.539          inst_len = __get_instruction_length_from_list(vmcb, list_a, 
   2.540 -                ARR_SIZE(list_a), buffer, &match);
   2.541 +                ARR_SIZE(list_a), &buffer[index], &match);
   2.542      }
   2.543      else
   2.544      {
   2.545          inst_len = __get_instruction_length_from_list(vmcb, list_b, 
   2.546 -                ARR_SIZE(list_b), buffer, &match);
   2.547 +                ARR_SIZE(list_b), &buffer[index], &match);
   2.548      }
   2.549  
   2.550 +    ASSERT(inst_len > 0);
   2.551 +
   2.552 +    inst_len += index;
   2.553 +
   2.554 +    /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
   2.555 +    if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
   2.556 +        prefix = buffer[index-1];
   2.557 +
   2.558      HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);
   2.559  
   2.560      switch (match) 
   2.561      {
   2.562      case INSTR_MOV2CR:
   2.563 -        gpreg = decode_src_reg(buffer[2]);
   2.564 +        gpreg = decode_src_reg(prefix, buffer[index+2]);
   2.565          result = mov_to_cr(gpreg, cr, regs);
   2.566          break;
   2.567  
   2.568      case INSTR_MOVCR2:
   2.569 -        gpreg = decode_src_reg(buffer[2]);
   2.570 +        gpreg = decode_src_reg(prefix, buffer[index+2]);
   2.571          mov_from_cr(cr, gpreg, regs);
   2.572          break;
   2.573  
   2.574 @@ -1686,7 +1674,7 @@ static int svm_cr_access(struct vcpu *v,
   2.575          if (svm_dbg_on)
   2.576              svm_dump_inst(svm_rip2pointer(vmcb));
   2.577          
   2.578 -        gpreg = decode_src_reg(buffer[2]);
   2.579 +        gpreg = decode_src_reg(prefix, buffer[index+2]);
   2.580          value = get_reg(gpreg, regs, vmcb) & 0xF;
   2.581  
   2.582          if (svm_dbg_on)
   2.583 @@ -1704,7 +1692,7 @@ static int svm_cr_access(struct vcpu *v,
   2.584      case INSTR_SMSW:
   2.585          svm_dump_inst(svm_rip2pointer(vmcb));
   2.586          value = v->arch.hvm_svm.cpu_shadow_cr0;
   2.587 -        gpreg = decode_src_reg(buffer[2]);
   2.588 +        gpreg = decode_src_reg(prefix, buffer[index+2]);
   2.589          set_reg(gpreg, value, regs, vmcb);
   2.590  
   2.591          if (svm_dbg_on)
   2.592 @@ -1727,7 +1715,7 @@ static int svm_cr_access(struct vcpu *v,
   2.593  static inline void svm_do_msr_access(struct vcpu *v, struct cpu_user_regs *regs)
   2.594  {
   2.595      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   2.596 -    unsigned int  inst_len;
   2.597 +    int  inst_len;
   2.598      int64_t tsc_sum;
   2.599  
   2.600      ASSERT(vmcb);
   2.601 @@ -1868,7 +1856,7 @@ void svm_handle_invlpg(const short invlp
   2.602      struct vcpu *v = current;
   2.603      u8 opcode[MAX_INST_SIZE], prefix, length = MAX_INST_SIZE;
   2.604      unsigned long g_vaddr;
   2.605 -    unsigned int inst_len;
   2.606 +    int inst_len;
   2.607      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   2.608  
   2.609      ASSERT(vmcb);
   2.610 @@ -1885,6 +1873,7 @@ void svm_handle_invlpg(const short invlp
   2.611      if (invlpga)
   2.612      {
   2.613          inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
   2.614 +        ASSERT(inst_len > 0);
   2.615          __update_guest_eip(vmcb, inst_len);
   2.616  
   2.617          /* 
   2.618 @@ -1898,6 +1887,7 @@ void svm_handle_invlpg(const short invlp
   2.619          /* What about multiple prefix codes? */
   2.620          prefix = (is_prefix(opcode[0])?opcode[0]:0);
   2.621          inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
   2.622 +        ASSERT(inst_len > 0);
   2.623  
   2.624          inst_len--;
   2.625          length -= inst_len;
   2.626 @@ -1949,7 +1939,10 @@ static int svm_do_vmmcall_reset_to_realm
   2.627      v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;
   2.628  
   2.629      vmcb->cr2 = 0;
   2.630 -    vmcb->cr4 = 0;
   2.631 +    vmcb->efer = EFER_SVME;
   2.632 +
   2.633 +    vmcb->cr4 = SVM_CR4_HOST_MASK;
   2.634 +    v->arch.hvm_svm.cpu_shadow_cr4 = 0;
   2.635  
   2.636      /* This will jump to ROMBIOS */
   2.637      vmcb->rip = 0xFFF0;
   2.638 @@ -2019,12 +2012,13 @@ static int svm_do_vmmcall_reset_to_realm
   2.639  static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
   2.640  {
   2.641      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   2.642 -    unsigned int inst_len;
   2.643 +    int inst_len;
   2.644  
   2.645      ASSERT(vmcb);
   2.646      ASSERT(regs);
   2.647  
   2.648      inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
   2.649 +    ASSERT(inst_len > 0);
   2.650  
   2.651      /* VMMCALL sanity check */
   2.652      if (vmcb->cpl > get_vmmcall_cpl(regs->edi))
   2.653 @@ -2478,7 +2472,7 @@ asmlinkage void svm_vmexit_handler(struc
   2.654          {
   2.655              v->arch.hvm_svm.injecting_event = 1;
   2.656              /* Inject #PG using Interruption-Information Fields */
   2.657 -            svm_inject_exception(vmcb, TRAP_page_fault, regs.error_code);
   2.658 +            svm_inject_exception(vmcb, TRAP_page_fault, 1, regs.error_code);
   2.659  
   2.660              v->arch.hvm_svm.cpu_cr2 = va;
   2.661              vmcb->cr2 = va;
     3.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Tue Feb 28 19:00:15 2006 +0100
     3.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Tue Feb 28 22:57:38 2006 +0100
     3.3 @@ -190,7 +190,6 @@ static int construct_init_vmcb_guest(str
     3.4      unsigned long eflags;
     3.5      unsigned long shadow_cr;
     3.6      struct vmcb_struct *vmcb = arch_svm->vmcb;
     3.7 -    struct Xgt_desc_struct desc;
     3.8  
     3.9      /* Allows IRQs to be shares */
    3.10      vmcb->vintr.fields.intr_masking = 1;
    3.11 @@ -224,9 +223,9 @@ static int construct_init_vmcb_guest(str
    3.12      vmcb->fs.base = 0;
    3.13      vmcb->gs.base = 0;
    3.14  
    3.15 -    __asm__ __volatile__ ("sidt  (%0) \n" :: "a"(&desc) : "memory");
    3.16 -    vmcb->idtr.base = desc.address;
    3.17 -    vmcb->idtr.limit = desc.size;
    3.18 +    /* Guest Interrupt descriptor table */
    3.19 +    vmcb->idtr.base = 0;
    3.20 +    vmcb->idtr.limit = 0;
    3.21  
    3.22      /* Set up segment attributes */
    3.23      attrib.bytes = 0;
    3.24 @@ -248,15 +247,11 @@ static int construct_init_vmcb_guest(str
    3.25      attrib.fields.type = 0xb;   /* type=0xb -> executable/readable, accessed */
    3.26      vmcb->cs.attributes = attrib;
    3.27  
    3.28 -    /* Global descriptor table */
    3.29 -    //NMERGE7500 - can probably remove access to gdtr
    3.30 -    vmcb->gdtr.base = regs->edx;
    3.31 -    regs->edx = 0;
    3.32 -    ASSERT(regs->eax <= 0xFFFF); /* Make sure we're in the limit */
    3.33 -    vmcb->gdtr.limit = regs->eax;
    3.34 -    regs->eax = 0;
    3.35 +    /* Guest Global descriptor table */
    3.36 +    vmcb->gdtr.base = 0;
    3.37 +    vmcb->gdtr.limit = 0;
    3.38  
    3.39 -    /* Local Descriptor Table */
    3.40 +    /* Guest Local Descriptor Table */
    3.41      attrib.fields.s = 0; /* not code or data segement */
    3.42      attrib.fields.type = 0x2; /* LDT */
    3.43      attrib.fields.db = 0; /* 16-bit */
    3.44 @@ -279,11 +274,10 @@ static int construct_init_vmcb_guest(str
    3.45      /* CR3 is set in svm_final_setup_guest */
    3.46  
    3.47      __asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) :); 
    3.48 -    shadow_cr = crn;
    3.49 -    vmcb->cr4 = shadow_cr;
    3.50 +    arch_svm->cpu_shadow_cr4 = crn & ~(X86_CR4_PGE | X86_CR4_PSE);
    3.51 +    vmcb->cr4 = crn | SVM_CR4_HOST_MASK;
    3.52  
    3.53 -//MERGE7500 - should write a 0 instead to rsp?
    3.54 -    vmcb->rsp = regs->esp;
    3.55 +    vmcb->rsp = 0;
    3.56      vmcb->rip = regs->eip;
    3.57  
    3.58      eflags = regs->eflags & ~HVM_EFLAGS_RESERVED_0; /* clear 0s */
     4.1 --- a/xen/arch/x86/hvm/svm/x86_64/exits.S	Tue Feb 28 19:00:15 2006 +0100
     4.2 +++ b/xen/arch/x86/hvm/svm/x86_64/exits.S	Tue Feb 28 22:57:38 2006 +0100
     4.3 @@ -107,8 +107,6 @@ ENTRY(svm_asm_do_launch)
     4.4          movq %rax, VMCB_rax(%rcx)
     4.5          movq VCPU_svm_hsa_pa(%rbx), %rax
     4.6          VMSAVE
     4.7 -	/* XXX FPU SAVE */
     4.8 -	/* XXX DO TSC OFFSET */
     4.9  
    4.10          movq VCPU_svm_vmcb_pa(%rbx), %rax
    4.11          popq %r15
    4.12 @@ -137,9 +135,7 @@ ENTRY(svm_asm_do_launch)
    4.13          VMSAVE
    4.14          /* rax is the only register we're allowed to touch here... */
    4.15  
    4.16 -	/* XXX FPU SAVE */
    4.17          GET_CURRENT(%rax)
    4.18 -	/* XXX DO TSC OFFSET */
    4.19          movq VCPU_svm_hsa_pa(%rax), %rax
    4.20          VMLOAD
    4.21  
     5.1 --- a/xen/include/asm-x86/hvm/svm/emulate.h	Tue Feb 28 19:00:15 2006 +0100
     5.2 +++ b/xen/include/asm-x86/hvm/svm/emulate.h	Tue Feb 28 22:57:38 2006 +0100
     5.3 @@ -83,15 +83,15 @@ extern unsigned long get_effective_addr_
     5.4          struct cpu_user_regs *regs, const u8 prefix, const u8 *operand, 
     5.5          u8 *size);
     5.6  extern OPERATING_MODE get_operating_mode (struct vmcb_struct *vmcb);
     5.7 -extern unsigned int decode_dest_reg(u8 modrm);
     5.8 -extern unsigned int decode_src_reg(u8 modrm);
     5.9 +extern unsigned int decode_dest_reg(u8 prefix, u8 modrm);
    5.10 +extern unsigned int decode_src_reg(u8 prefix, u8 modrm);
    5.11  extern unsigned long svm_rip2pointer(struct vmcb_struct *vmcb);
    5.12 -extern unsigned int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
    5.13 +extern int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
    5.14          enum instruction_index *list, unsigned int list_count, 
    5.15          u8 *guest_eip_buf, enum instruction_index *match);
    5.16  
    5.17  
    5.18 -static inline unsigned int __get_instruction_length(struct vmcb_struct *vmcb, 
    5.19 +static inline int __get_instruction_length(struct vmcb_struct *vmcb, 
    5.20          enum instruction_index instr, u8 *guest_eip_buf)
    5.21  {
    5.22      return __get_instruction_length_from_list(vmcb, &instr, 1, guest_eip_buf, 
    5.23 @@ -138,9 +138,20 @@ static inline unsigned int is_prefix(u8 
    5.24  }
    5.25  
    5.26  
    5.27 +static inline int skip_prefix_bytes(u8 *buf, size_t size)
    5.28 +{
    5.29 +    int index;
    5.30 +    for (index = 0; index < size && is_prefix(buf[index]); index ++)  
    5.31 +        /* do nothing */ ;
    5.32 +    return index;
    5.33 +}
    5.34 +
    5.35 +
    5.36 +
    5.37  static void inline __update_guest_eip(struct vmcb_struct *vmcb, 
    5.38 -        unsigned long inst_len) 
    5.39 +        int inst_len) 
    5.40  {
    5.41 +    ASSERT(inst_len > 0);
    5.42      vmcb->rip += inst_len;
    5.43  }
    5.44  
     6.1 --- a/xen/include/asm-x86/hvm/svm/vmcb.h	Tue Feb 28 19:00:15 2006 +0100
     6.2 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Tue Feb 28 22:57:38 2006 +0100
     6.3 @@ -269,21 +269,6 @@ enum {
     6.4  #define SVM_LONG_GUEST(ed)    \
     6.5    (test_bit(SVM_CPU_STATE_LMA_ENABLED, &ed->arch.hvm_svm.cpu_state))
     6.6  
     6.7 -enum {
     6.8 -    SVM_INDEX_MSR_LSTAR = 0,
     6.9 -    SVM_INDEX_MSR_STAR,
    6.10 -    SVM_INDEX_MSR_CSTAR,
    6.11 -    SVM_INDEX_MSR_SYSCALL_MASK,
    6.12 -    SVM_INDEX_MSR_EFER,
    6.13 -
    6.14 -    SVM_MSR_COUNT,
    6.15 -};
    6.16 -
    6.17 -struct svm_msr_state {
    6.18 -    unsigned long flags;
    6.19 -    unsigned long msr_items[SVM_MSR_COUNT];
    6.20 -    unsigned long shadow_gs;
    6.21 -};
    6.22  
    6.23  /* 
    6.24   * Attribute for segment selector. This is a copy of bit 40:47 & 52:55 of the
    6.25 @@ -449,7 +434,7 @@ struct vmcb_struct {
    6.26  
    6.27  struct arch_svm_struct {
    6.28      struct vmcb_struct	*vmcb;
    6.29 -    void		*host_save_area;
    6.30 +    void		        *host_save_area;
    6.31      u64                 host_save_pa;
    6.32      u64                 vmcb_pa;
    6.33      u32                 *iopm;
    6.34 @@ -461,11 +446,11 @@ struct arch_svm_struct {
    6.35      u32                 asid_core;
    6.36      
    6.37      unsigned long       flags;      /* VMCB flags */
    6.38 -    unsigned long       cpu_shadow_cr0; /* copy of guest read shadow CR0 */
    6.39 +    unsigned long       cpu_shadow_cr0; /* Guest value for CR0 */
    6.40 +    unsigned long       cpu_shadow_cr4; /* Guest value for CR4 */
    6.41      unsigned long       cpu_cr2;
    6.42      unsigned long       cpu_cr3;
    6.43      unsigned long       cpu_state;
    6.44 -    struct svm_msr_state msr_content;
    6.45      struct timer        hlt_timer;  /* hlt ins emulation wakeup timer */
    6.46  };
    6.47  
    6.48 @@ -487,6 +472,14 @@ enum {
    6.49  #define VMCB_EFLAGS_RESERVED_0          0xffc08028 /* bitmap for 0 */
    6.50  #define VMCB_EFLAGS_RESERVED_1          0x00000002 /* bitmap for 1 */
    6.51  
    6.52 +/* These bits in the CR4 are owned by the host */
    6.53 +#ifdef __i386__
    6.54 +#define SVM_CR4_HOST_MASK (0)
    6.55 +#else
    6.56 +#define SVM_CR4_HOST_MASK (X86_CR4_PAE)
    6.57 +#endif
    6.58 +
    6.59 +
    6.60  #endif /* ASM_X86_HVM_SVM_VMCS_H__ */
    6.61  
    6.62  /*