ia64/xen-unstable

changeset 5659:22ccddf93c40

To avoid MSR save/restore at every VM exit/entry time, we restore the
x86_64 specific MSRs at domain switch time if modified. In VMX domains,
we modify those upon requests from the guests to that end. Note that
IA32_EFER.LME and IA32_EFER.LMA are saved/restored by H/W on every VM
exit. For the usual domains (i.e. dom0 and domU), those MSRs are not
modified once set at initialization time, so we don't save them when
swiched out, but simply reset them (if modified) to the initial values
when switched in. This patch also include extended handling for 64-bit
guests. Please apply.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Chengyuan Li <chengyuan.li@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Arun Sharma <arun.sharma@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Mon Jul 04 08:21:35 2005 +0000 (2005-07-04)
parents 50ac4630390b
children 82d686a6b63e
files xen/arch/x86/domain.c xen/arch/x86/vmx.c xen/include/asm-x86/msr.h xen/include/asm-x86/vmx_vmcs.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Mon Jul 04 08:20:20 2005 +0000
     1.2 +++ b/xen/arch/x86/domain.c	Mon Jul 04 08:21:35 2005 +0000
     1.3 @@ -296,6 +296,8 @@ void arch_do_boot_vcpu(struct vcpu *v)
     1.4  }
     1.5  
     1.6  #ifdef CONFIG_VMX
     1.7 +static int vmx_switch_on;
     1.8 +
     1.9  void arch_vmx_do_resume(struct vcpu *v) 
    1.10  {
    1.11      u64 vmcs_phys_ptr = (u64) virt_to_phys(v->arch.arch_vmx.vmcs);
    1.12 @@ -364,6 +366,9 @@ static int vmx_final_setup_guest(
    1.13                             SHM_translate|SHM_external);
    1.14      }
    1.15  
    1.16 +    if (!vmx_switch_on)
    1.17 +        vmx_switch_on = 1;
    1.18 +
    1.19      return 0;
    1.20  
    1.21  out:
    1.22 @@ -441,9 +446,12 @@ int arch_set_info_guest(
    1.23      }
    1.24      else
    1.25      {
    1.26 -        if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, 
    1.27 -                                PGT_base_page_table) )
    1.28 -            return -EINVAL;
    1.29 +#ifdef __x86_64__
    1.30 +        if ( !(c->flags & VGCF_VMX_GUEST) )
    1.31 +#endif
    1.32 +            if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, 
    1.33 +                  PGT_base_page_table) )
    1.34 +                return -EINVAL;
    1.35      }
    1.36  
    1.37      if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
    1.38 @@ -524,6 +532,12 @@ void toggle_guest_mode(struct vcpu *v)
    1.39          : "=r" (__r) : "r" (value), "0" (__r) );\
    1.40      __r; })
    1.41  
    1.42 +#if CONFIG_VMX
    1.43 +#define load_msrs(_p, _n)     if (vmx_switch_on) vmx_load_msrs((_p), (_n))
    1.44 +#else
    1.45 +#define load_msrs(_p, _n)     ((void)0)
    1.46 +#endif 
    1.47 +
    1.48  static void load_segments(struct vcpu *p, struct vcpu *n)
    1.49  {
    1.50      struct vcpu_guest_context *pctxt = &p->arch.guest_context;
    1.51 @@ -681,6 +695,7 @@ long do_switch_to_user(void)
    1.52  #elif defined(__i386__)
    1.53  
    1.54  #define load_segments(_p, _n) ((void)0)
    1.55 +#define load_msrs(_p, _n)     ((void)0)
    1.56  #define save_segments(_p)     ((void)0)
    1.57  #define clear_segments()      ((void)0)
    1.58  
    1.59 @@ -780,6 +795,7 @@ void context_switch(struct vcpu *prev, s
    1.60          {
    1.61              load_LDT(next);
    1.62              load_segments(realprev, next);
    1.63 +            load_msrs(realprev, next);
    1.64          }
    1.65      }
    1.66  
     2.1 --- a/xen/arch/x86/vmx.c	Mon Jul 04 08:20:20 2005 +0000
     2.2 +++ b/xen/arch/x86/vmx.c	Mon Jul 04 08:21:35 2005 +0000
     2.3 @@ -46,6 +46,214 @@ int vmcs_size;
     2.4  unsigned int opt_vmx_debug_level = 0;
     2.5  integer_param("vmx_debug", opt_vmx_debug_level);
     2.6  
     2.7 +#ifdef __x86_64__
     2.8 +static struct msr_state percpu_msr[NR_CPUS];
     2.9 +
    2.10 +static u32 msr_data_index[VMX_MSR_COUNT] =
    2.11 +{
    2.12 +    MSR_LSTAR, MSR_STAR, MSR_CSTAR,
    2.13 +    MSR_SYSCALL_MASK, MSR_EFER,
    2.14 +};
    2.15 +
    2.16 +/*
    2.17 + * To avoid MSR save/restore at every VM exit/entry time, we restore
    2.18 + * the x86_64 specific MSRs at domain switch time. Since those MSRs are
    2.19 + * are not modified once set for generic domains, we don't save them, 
    2.20 + * but simply reset them to the values set at percpu_traps_init().
    2.21 + */
    2.22 +void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
    2.23 +{
    2.24 +    struct msr_state *host_state;
    2.25 +    host_state = &percpu_msr[smp_processor_id()];
    2.26 +
    2.27 +    while (host_state->flags){
    2.28 +        int i;
    2.29 +
    2.30 +        i = find_first_set_bit(host_state->flags);
    2.31 +        wrmsrl(msr_data_index[i], host_state->msr_items[i]);
    2.32 +        clear_bit(i, &host_state->flags);
    2.33 +    }
    2.34 +}
    2.35 +
    2.36 +static void vmx_save_init_msrs(void)
    2.37 +{
    2.38 +    struct msr_state *host_state;
    2.39 +    host_state = &percpu_msr[smp_processor_id()];
    2.40 +    int i;
    2.41 +
    2.42 +    for (i = 0; i < VMX_MSR_COUNT; i++)
    2.43 +        rdmsrl(msr_data_index[i], host_state->msr_items[i]);
    2.44 +}
    2.45 +
    2.46 +#define CASE_READ_MSR(address)              \
    2.47 +    case MSR_ ## address:                 \
    2.48 +    msr_content = msr->msr_items[VMX_INDEX_MSR_ ## address]; \
    2.49 +    break
    2.50 +
    2.51 +#define CASE_WRITE_MSR(address)   \
    2.52 +    case MSR_ ## address:                   \
    2.53 +    msr->msr_items[VMX_INDEX_MSR_ ## address] = msr_content; \
    2.54 +    if (!test_bit(VMX_INDEX_MSR_ ## address, &msr->flags)){ \
    2.55 +    	set_bit(VMX_INDEX_MSR_ ## address, &msr->flags);   \
    2.56 +    }\
    2.57 +    break
    2.58 +
    2.59 +#define IS_CANO_ADDRESS(add) 1
    2.60 +static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
    2.61 +{
    2.62 +    u64     msr_content = 0;
    2.63 +    struct vcpu *vc = current;
    2.64 +    struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
    2.65 +    switch(regs->ecx){
    2.66 +        case MSR_EFER:
    2.67 +            msr_content = msr->msr_items[VMX_INDEX_MSR_EFER];
    2.68 +            VMX_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n", (unsigned long long)msr_content);
    2.69 +            if (test_bit(VMX_CPU_STATE_LME_ENABLED,
    2.70 +                          &vc->arch.arch_vmx.cpu_state))
    2.71 +                msr_content |= 1 << _EFER_LME;
    2.72 +
    2.73 +            if (VMX_LONG_GUEST(vc))
    2.74 +                msr_content |= 1 << _EFER_LMA;
    2.75 +            break;
    2.76 +        case MSR_FS_BASE:
    2.77 +            if (!(VMX_LONG_GUEST(vc)))
    2.78 +                /* XXX should it be GP fault */
    2.79 +                domain_crash();
    2.80 +            __vmread(GUEST_FS_BASE, &msr_content);
    2.81 +            break;
    2.82 +        case MSR_GS_BASE:
    2.83 +            if (!(VMX_LONG_GUEST(vc)))
    2.84 +                domain_crash();
    2.85 +            __vmread(GUEST_GS_BASE, &msr_content);
    2.86 +            break;
    2.87 +        case MSR_SHADOW_GS_BASE:
    2.88 +            msr_content = msr->shadow_gs;
    2.89 +            break;
    2.90 +
    2.91 +        CASE_READ_MSR(STAR);
    2.92 +        CASE_READ_MSR(LSTAR);
    2.93 +        CASE_READ_MSR(CSTAR);
    2.94 +        CASE_READ_MSR(SYSCALL_MASK);
    2.95 +        default:
    2.96 +            return 0;
    2.97 +    }
    2.98 +    VMX_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %lx\n", msr_content);
    2.99 +    regs->eax = msr_content & 0xffffffff;
   2.100 +    regs->edx = msr_content >> 32;
   2.101 +    return 1;
   2.102 +}
   2.103 +
   2.104 +static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
   2.105 +{
   2.106 +    u64     msr_content = regs->eax | ((u64)regs->edx << 32); 
   2.107 +    struct vcpu *vc = current;
   2.108 +    struct msr_state * msr = &vc->arch.arch_vmx.msr_content;
   2.109 +    struct msr_state * host_state = 
   2.110 +		&percpu_msr[smp_processor_id()];
   2.111 +
   2.112 +    VMX_DBG_LOG(DBG_LEVEL_1, " mode_do_msr_write msr %lx msr_content %lx\n", 
   2.113 +                regs->ecx, msr_content);
   2.114 +
   2.115 +    switch (regs->ecx){
   2.116 +        case MSR_EFER:
   2.117 +            if ((msr_content & EFER_LME) ^
   2.118 +                  test_bit(VMX_CPU_STATE_LME_ENABLED,
   2.119 +                           &vc->arch.arch_vmx.cpu_state)){
   2.120 +                if (test_bit(VMX_CPU_STATE_PG_ENABLED,
   2.121 +                             &vc->arch.arch_vmx.cpu_state) ||
   2.122 +                    !test_bit(VMX_CPU_STATE_PAE_ENABLED,
   2.123 +                        &vc->arch.arch_vmx.cpu_state)){
   2.124 +                     vmx_inject_exception(vc, TRAP_gp_fault, 0);
   2.125 +                }
   2.126 +            }
   2.127 +            if (msr_content & EFER_LME)
   2.128 +                set_bit(VMX_CPU_STATE_LME_ENABLED,
   2.129 +                        &vc->arch.arch_vmx.cpu_state);
   2.130 +            /* No update for LME/LMA since it have no effect */
   2.131 +            msr->msr_items[VMX_INDEX_MSR_EFER] =
   2.132 +                  msr_content;
   2.133 +            if (msr_content & ~(EFER_LME | EFER_LMA)){
   2.134 +                msr->msr_items[VMX_INDEX_MSR_EFER] = msr_content;
   2.135 +                if (!test_bit(VMX_INDEX_MSR_EFER, &msr->flags)){ 
   2.136 +                    rdmsrl(MSR_EFER,
   2.137 +                            host_state->msr_items[VMX_INDEX_MSR_EFER]);
   2.138 +                      set_bit(VMX_INDEX_MSR_EFER, &host_state->flags);
   2.139 +                      set_bit(VMX_INDEX_MSR_EFER, &msr->flags);  
   2.140 +                      wrmsrl(MSR_EFER, msr_content);
   2.141 +                }
   2.142 +            }
   2.143 +            break;
   2.144 +
   2.145 +        case MSR_FS_BASE:
   2.146 +        case MSR_GS_BASE:
   2.147 +           if (!(VMX_LONG_GUEST(vc)))
   2.148 +                domain_crash();
   2.149 +           if (!IS_CANO_ADDRESS(msr_content)){
   2.150 +               VMX_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
   2.151 +               vmx_inject_exception(vc, TRAP_gp_fault, 0);
   2.152 +           }
   2.153 +           if (regs->ecx == MSR_FS_BASE)
   2.154 +               __vmwrite(GUEST_FS_BASE, msr_content);
   2.155 +           else 
   2.156 +               __vmwrite(GUEST_GS_BASE, msr_content);
   2.157 +           break;
   2.158 +
   2.159 +        case MSR_SHADOW_GS_BASE:
   2.160 +           if (!(VMX_LONG_GUEST(vc)))
   2.161 +               domain_crash();
   2.162 +           vc->arch.arch_vmx.msr_content.shadow_gs = msr_content;
   2.163 +           wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
   2.164 +           break;
   2.165 +
   2.166 +           CASE_WRITE_MSR(STAR);
   2.167 +           CASE_WRITE_MSR(LSTAR);
   2.168 +           CASE_WRITE_MSR(CSTAR);
   2.169 +           CASE_WRITE_MSR(SYSCALL_MASK);
   2.170 +        default:
   2.171 +            return 0;
   2.172 +    }
   2.173 +    return 1;
   2.174 +}
   2.175 +
   2.176 +void
   2.177 +vmx_restore_msrs(struct vcpu *d)
   2.178 +{
   2.179 +    int i = 0;
   2.180 +    struct msr_state *guest_state;
   2.181 +    struct msr_state *host_state;
   2.182 +    unsigned long guest_flags ;
   2.183 +
   2.184 +    guest_state = &d->arch.arch_vmx.msr_content;;
   2.185 +    host_state = &percpu_msr[smp_processor_id()];
   2.186 +
   2.187 +    wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
   2.188 +    guest_flags = guest_state->flags;
   2.189 +    if (!guest_flags)
   2.190 +        return;
   2.191 +
   2.192 +    while (guest_flags){
   2.193 +        i = find_first_set_bit(guest_flags);
   2.194 +
   2.195 +        VMX_DBG_LOG(DBG_LEVEL_2,
   2.196 +          "restore guest's index %d msr %lx with %lx\n",
   2.197 +          i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
   2.198 +        set_bit(i, &host_state->flags);
   2.199 +        wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
   2.200 +        clear_bit(i, &guest_flags);
   2.201 +    }
   2.202 +}
   2.203 +
   2.204 +#else  /* __i386__ */
   2.205 +#define  vmx_save_init_msrs()   ((void)0)
   2.206 +
   2.207 +static inline int  long_mode_do_msr_read(struct cpu_user_regs *regs){
   2.208 +    return 0;
   2.209 +}
   2.210 +static inline int  long_mode_do_msr_write(struct cpu_user_regs *regs){
   2.211 +    return 0;
   2.212 +}
   2.213 +#endif
   2.214 +
   2.215  extern long evtchn_send(int lport);
   2.216  extern long do_block(void);
   2.217  void do_nmi(struct cpu_user_regs *, unsigned long);
   2.218 @@ -93,6 +301,8 @@ int start_vmx(void)
   2.219          printk("VMXON is done\n");
   2.220      }
   2.221  
   2.222 +    vmx_save_init_msrs();
   2.223 +
   2.224      return 1;
   2.225  }
   2.226  
   2.227 @@ -122,7 +332,6 @@ static void inline __update_guest_eip(un
   2.228  static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 
   2.229  {
   2.230      unsigned long eip;
   2.231 -    l1_pgentry_t gpte;
   2.232      unsigned long gpa; /* FIXME: PAE */
   2.233      int result;
   2.234  
   2.235 @@ -139,13 +348,16 @@ static int vmx_do_page_fault(unsigned lo
   2.236          handle_mmio(va, va);
   2.237          return 1;
   2.238      }
   2.239 -    gpte = gva_to_gpte(va);
   2.240 -    if (!(l1e_get_flags(gpte) & _PAGE_PRESENT) )
   2.241 -            return 0;
   2.242 -    gpa = l1e_get_paddr(gpte) + (va & ~PAGE_MASK);
   2.243 +    gpa = gva_to_gpa(va);
   2.244  
   2.245      /* Use 1:1 page table to identify MMIO address space */
   2.246 -    if (mmio_space(gpa)){
   2.247 +    if ( mmio_space(gpa) ){
   2.248 +        if (gpa >= 0xFEE00000) { /* workaround for local APIC */
   2.249 +            u32 inst_len;
   2.250 +            __vmread(INSTRUCTION_LEN, &(inst_len));
   2.251 +            __update_guest_eip(inst_len);
   2.252 +            return 1;
   2.253 +        }
   2.254          handle_mmio(va, gpa);
   2.255          return 1;
   2.256      }
   2.257 @@ -196,9 +408,11 @@ static void vmx_vmexit_do_cpuid(unsigned
   2.258      cpuid(input, &eax, &ebx, &ecx, &edx);
   2.259  
   2.260      if (input == 1) {
   2.261 +#ifdef __i386__
   2.262          clear_bit(X86_FEATURE_PSE, &edx);
   2.263          clear_bit(X86_FEATURE_PAE, &edx);
   2.264          clear_bit(X86_FEATURE_PSE36, &edx);
   2.265 +#endif
   2.266      }
   2.267  
   2.268      regs->eax = (unsigned long) eax;
   2.269 @@ -386,8 +600,6 @@ static void vmx_io_instruction(struct cp
   2.270           * selector is null.
   2.271           */
   2.272          if (!vm86 && check_for_null_selector(eip)) {
   2.273 -            printf("String I/O with null selector (cs:eip=0x%lx:0x%lx)\n",
   2.274 -                cs, eip);
   2.275              laddr = (p->dir == IOREQ_WRITE) ? regs->esi : regs->edi;
   2.276          }
   2.277          p->pdata_valid = 1;
   2.278 @@ -709,10 +921,10 @@ error:
   2.279  static int vmx_set_cr0(unsigned long value)
   2.280  {
   2.281      struct vcpu *d = current;
   2.282 -    unsigned long old_base_mfn, mfn;
   2.283 +    unsigned long mfn;
   2.284      unsigned long eip;
   2.285      int paging_enabled;
   2.286 -
   2.287 +    unsigned long vm_entry_value;
   2.288      /* 
   2.289       * CR0: We don't want to lose PE and PG.
   2.290       */
   2.291 @@ -733,10 +945,42 @@ static int vmx_set_cr0(unsigned long val
   2.292              printk("Invalid CR3 value = %lx", d->arch.arch_vmx.cpu_cr3);
   2.293              domain_crash_synchronous(); /* need to take a clean path */
   2.294          }
   2.295 +
   2.296 +#if defined(__x86_64__)
   2.297 +        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
   2.298 +              &d->arch.arch_vmx.cpu_state) &&
   2.299 +          !test_bit(VMX_CPU_STATE_PAE_ENABLED,
   2.300 +              &d->arch.arch_vmx.cpu_state)){
   2.301 +            VMX_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
   2.302 +            vmx_inject_exception(d, TRAP_gp_fault, 0);
   2.303 +        }
   2.304 +        if (test_bit(VMX_CPU_STATE_LME_ENABLED,
   2.305 +              &d->arch.arch_vmx.cpu_state)){
   2.306 +            /* Here the PAE is should to be opened */
   2.307 +            VMX_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
   2.308 +            set_bit(VMX_CPU_STATE_LMA_ENABLED,
   2.309 +              &d->arch.arch_vmx.cpu_state);
   2.310 +            __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
   2.311 +            vm_entry_value |= VM_ENTRY_CONTROLS_IA_32E_MODE;
   2.312 +            __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
   2.313 +
   2.314 +        }
   2.315 +
   2.316 +	unsigned long crn;
   2.317 +        /* update CR4's PAE if needed */
   2.318 +        __vmread(GUEST_CR4, &crn);
   2.319 +        if ( (!(crn & X86_CR4_PAE)) &&
   2.320 +          test_bit(VMX_CPU_STATE_PAE_ENABLED,
   2.321 +              &d->arch.arch_vmx.cpu_state)){
   2.322 +            VMX_DBG_LOG(DBG_LEVEL_1, "enable PAE on cr4\n");
   2.323 +            __vmwrite(GUEST_CR4, crn | X86_CR4_PAE);
   2.324 +        }
   2.325 +#elif defined( __i386__)
   2.326 +       	unsigned long old_base_mfn;
   2.327          old_base_mfn = pagetable_get_pfn(d->arch.guest_table);
   2.328          if (old_base_mfn)
   2.329              put_page(pfn_to_page(old_base_mfn));
   2.330 -
   2.331 +#endif
   2.332          /*
   2.333           * Now arch.guest_table points to machine physical.
   2.334           */
   2.335 @@ -760,6 +1004,24 @@ static int vmx_set_cr0(unsigned long val
   2.336       * a partition disables the CR0.PE bit.
   2.337       */
   2.338      if ((value & X86_CR0_PE) == 0) {
   2.339 +        if ( value & X86_CR0_PG ) {
   2.340 +            /* inject GP here */
   2.341 +            vmx_inject_exception(d, TRAP_gp_fault, 0);
   2.342 +            return 0;
   2.343 +        } else {
   2.344 +            /* 
   2.345 +             * Disable paging here.
   2.346 +             * Same to PE == 1 && PG == 0
   2.347 +             */
   2.348 +            if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
   2.349 +                         &d->arch.arch_vmx.cpu_state)){
   2.350 +                clear_bit(VMX_CPU_STATE_LMA_ENABLED,
   2.351 +                          &d->arch.arch_vmx.cpu_state);
   2.352 +                __vmread(VM_ENTRY_CONTROLS, &vm_entry_value);
   2.353 +                vm_entry_value &= ~VM_ENTRY_CONTROLS_IA_32E_MODE;
   2.354 +                __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
   2.355 +            }
   2.356 +        }
   2.357  	__vmread(GUEST_RIP, &eip);
   2.358  	VMX_DBG_LOG(DBG_LEVEL_1,
   2.359  	    "Disabling CR0.PE at %%eip 0x%lx\n", eip);
   2.360 @@ -791,6 +1053,26 @@ static int vmx_set_cr0(unsigned long val
   2.361  #define CASE_GET_REG(REG, reg)  \
   2.362      case REG_ ## REG: value = regs->reg; break
   2.363  
   2.364 +#define CASE_EXTEND_SET_REG \
   2.365 +      CASE_EXTEND_REG(S)
   2.366 +#define CASE_EXTEND_GET_REG \
   2.367 +      CASE_EXTEND_REG(G)
   2.368 +
   2.369 +#ifdef __i386__
   2.370 +#define CASE_EXTEND_REG(T)
   2.371 +#else
   2.372 +#define CASE_EXTEND_REG(T)    \
   2.373 +    CASE_ ## T ## ET_REG(R8, r8); \
   2.374 +    CASE_ ## T ## ET_REG(R9, r9); \
   2.375 +    CASE_ ## T ## ET_REG(R10, r10); \
   2.376 +    CASE_ ## T ## ET_REG(R11, r11); \
   2.377 +    CASE_ ## T ## ET_REG(R12, r12); \
   2.378 +    CASE_ ## T ## ET_REG(R13, r13); \
   2.379 +    CASE_ ## T ## ET_REG(R14, r14); \
   2.380 +    CASE_ ## T ## ET_REG(R15, r15);
   2.381 +#endif
   2.382 +
   2.383 +
   2.384  /*
   2.385   * Write to control registers
   2.386   */
   2.387 @@ -808,6 +1090,7 @@ static int mov_to_cr(int gp, int cr, str
   2.388          CASE_GET_REG(EBP, ebp);
   2.389          CASE_GET_REG(ESI, esi);
   2.390          CASE_GET_REG(EDI, edi);
   2.391 +        CASE_EXTEND_GET_REG
   2.392      case REG_ESP:
   2.393          __vmread(GUEST_RSP, &value);
   2.394          break;
   2.395 @@ -878,12 +1161,30 @@ static int mov_to_cr(int gp, int cr, str
   2.396          break;
   2.397      }
   2.398      case 4:         
   2.399 +    {
   2.400          /* CR4 */
   2.401 -        if (value & X86_CR4_PAE)
   2.402 -            __vmx_bug(regs);    /* not implemented */
   2.403 +        unsigned long old_guest_cr;
   2.404 +        unsigned long pae_disabled = 0;
   2.405 +
   2.406 +        __vmread(GUEST_CR4, &old_guest_cr);
   2.407 +        if (value & X86_CR4_PAE){
   2.408 +            set_bit(VMX_CPU_STATE_PAE_ENABLED, &d->arch.arch_vmx.cpu_state);
   2.409 +            if(!vmx_paging_enabled(d))
   2.410 +                pae_disabled = 1;
   2.411 +        } else {
   2.412 +            if (test_bit(VMX_CPU_STATE_LMA_ENABLED,
   2.413 +                         &d->arch.arch_vmx.cpu_state)){
   2.414 +                vmx_inject_exception(d, TRAP_gp_fault, 0);
   2.415 +            }
   2.416 +            clear_bit(VMX_CPU_STATE_PAE_ENABLED, &d->arch.arch_vmx.cpu_state);
   2.417 +        }
   2.418 +
   2.419          __vmread(CR4_READ_SHADOW, &old_cr);
   2.420 -        
   2.421 -        __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
   2.422 +        if (pae_disabled)
   2.423 +            __vmwrite(GUEST_CR4, ((value & ~X86_CR4_PAE) | X86_CR4_VMXE));
   2.424 +        else
   2.425 +            __vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
   2.426 +
   2.427          __vmwrite(CR4_READ_SHADOW, value);
   2.428  
   2.429          /*
   2.430 @@ -891,10 +1192,10 @@ static int mov_to_cr(int gp, int cr, str
   2.431           * all TLB entries except global entries.
   2.432           */
   2.433          if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
   2.434 -            vmx_shadow_clear_state(d->domain);
   2.435              shadow_sync_all(d->domain);
   2.436          }
   2.437          break;
   2.438 +    }
   2.439      default:
   2.440          printk("invalid cr: %d\n", gp);
   2.441          __vmx_bug(regs);
   2.442 @@ -1000,7 +1301,9 @@ static inline void vmx_do_msr_read(struc
   2.443              regs->edx = 0;
   2.444              break;
   2.445          default:
   2.446 -            rdmsr(regs->ecx, regs->eax, regs->edx);
   2.447 +            if(long_mode_do_msr_read(regs))
   2.448 +                return;
   2.449 +            rdmsr_user(regs->ecx, regs->eax, regs->edx);
   2.450              break;
   2.451      }
   2.452  
   2.453 @@ -1026,6 +1329,7 @@ static inline void vmx_do_msr_write(stru
   2.454              __vmwrite(GUEST_SYSENTER_EIP, regs->eax);
   2.455              break;
   2.456          default:
   2.457 +            long_mode_do_msr_write(regs);
   2.458              break;
   2.459      }
   2.460  
     3.1 --- a/xen/include/asm-x86/msr.h	Mon Jul 04 08:20:20 2005 +0000
     3.2 +++ b/xen/include/asm-x86/msr.h	Mon Jul 04 08:21:35 2005 +0000
     3.3 @@ -18,6 +18,8 @@
     3.4  			  : /* no outputs */ \
     3.5  			  : "c" (msr), "a" (val1), "d" (val2))
     3.6  
     3.7 +#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32) 
     3.8 +
     3.9  #define rdmsr_user(msr,val1,val2) ({\
    3.10      int _rc; \
    3.11      __asm__ __volatile__( \
     4.1 --- a/xen/include/asm-x86/vmx_vmcs.h	Mon Jul 04 08:20:20 2005 +0000
     4.2 +++ b/xen/include/asm-x86/vmx_vmcs.h	Mon Jul 04 08:21:35 2005 +0000
     4.3 @@ -27,6 +27,14 @@
     4.4  extern int start_vmx(void);
     4.5  extern void stop_vmx(void);
     4.6  
     4.7 +#if defined (__x86_64__)
     4.8 +extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
     4.9 +void vmx_restore_msrs(struct vcpu *d);
    4.10 +#else
    4.11 +#define vmx_load_msrs(_p, _n)      ((void)0)
    4.12 +#define vmx_restore_msrs(_v)       ((void)0)
    4.13 +#endif
    4.14 +
    4.15  void vmx_enter_scheduler(void);
    4.16  
    4.17  enum {
    4.18 @@ -87,7 +95,6 @@ struct vmcs_struct *alloc_vmcs(void);
    4.19  void free_vmcs(struct vmcs_struct *);
    4.20  int  load_vmcs(struct arch_vmx_struct *, u64);
    4.21  int  store_vmcs(struct arch_vmx_struct *, u64);
    4.22 -void dump_vmcs(void);
    4.23  int  construct_vmcs(struct arch_vmx_struct *, struct cpu_user_regs *, 
    4.24                      struct vcpu_guest_context *, int);
    4.25