ia64/xen-unstable

changeset 15105:9e9c09c75110

x86: fix EFER handling

Introduce a per-CPU shadow of what is currently in EFER, as context
switch code must re-write this MSR so that all guests run with
appropriate EFER.SCE and EFER.NX settings. Remove EFER from the set of
MSRs that VMX deals with in a generic fashion.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kfraser@localhost.localdomain
date Tue May 15 10:43:18 2007 +0100 (2007-05-15)
parents f4390e34ad12
children c027880b50b4
files xen/arch/x86/domain.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/mm/shadow/multi.c xen/include/asm-x86/cpufeature.h xen/include/asm-x86/hvm/hvm.h xen/include/asm-x86/hvm/svm/svm.h xen/include/asm-x86/hvm/vmx/vmcs.h xen/include/asm-x86/hvm/vmx/vmx.h xen/include/asm-x86/msr.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Tue May 15 10:28:28 2007 +0100
     1.2 +++ b/xen/arch/x86/domain.c	Tue May 15 10:43:18 2007 +0100
     1.3 @@ -47,6 +47,7 @@
     1.4  #endif
     1.5  
     1.6  DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
     1.7 +DEFINE_PER_CPU(__u64, efer);
     1.8  
     1.9  static void paravirt_ctxt_switch_from(struct vcpu *v);
    1.10  static void paravirt_ctxt_switch_to(struct vcpu *v);
    1.11 @@ -1135,21 +1136,18 @@ void context_switch(struct vcpu *prev, s
    1.12          __context_switch();
    1.13  
    1.14  #ifdef CONFIG_COMPAT
    1.15 -        if ( is_idle_vcpu(prev) ||
    1.16 -             (is_pv_32on64_domain(prev->domain) !=
    1.17 -              is_pv_32on64_domain(next->domain)) )
    1.18 +        if ( !is_hvm_vcpu(next) &&
    1.19 +             (is_idle_vcpu(prev) ||
    1.20 +              is_hvm_vcpu(prev) ||
    1.21 +              is_pv_32on64_vcpu(prev) != is_pv_32on64_vcpu(next)) )
    1.22          {
    1.23 -            uint32_t efer_lo, efer_hi;
    1.24 +            uint64_t efer = read_efer();
    1.25  
    1.26              local_flush_tlb_one(GDT_VIRT_START(next) +
    1.27                                  FIRST_RESERVED_GDT_BYTE);
    1.28  
    1.29 -            rdmsr(MSR_EFER, efer_lo, efer_hi);
    1.30 -            if ( !is_pv_32on64_domain(next->domain) == !(efer_lo & EFER_SCE) )
    1.31 -            {
    1.32 -                efer_lo ^= EFER_SCE;
    1.33 -                wrmsr(MSR_EFER, efer_lo, efer_hi);
    1.34 -            }
    1.35 +            if ( !is_pv_32on64_vcpu(next) == !(efer & EFER_SCE) )
    1.36 +                write_efer(efer ^ EFER_SCE);
    1.37          }
    1.38  #endif
    1.39  
     2.1 --- a/xen/arch/x86/hvm/svm/svm.c	Tue May 15 10:28:28 2007 +0100
     2.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue May 15 10:43:18 2007 +0100
     2.3 @@ -93,11 +93,8 @@ static inline void svm_inject_exception(
     2.4  
     2.5  static void stop_svm(void)
     2.6  {
     2.7 -    u32 eax, edx;    
     2.8      /* We turn off the EFER_SVME bit. */
     2.9 -    rdmsr(MSR_EFER, eax, edx);
    2.10 -    eax &= ~EFER_SVME;
    2.11 -    wrmsr(MSR_EFER, eax, edx);
    2.12 +    write_efer(read_efer() & ~EFER_SVME);
    2.13  }
    2.14  
    2.15  static void svm_store_cpu_guest_regs(
    2.16 @@ -138,7 +135,13 @@ static inline int long_mode_do_msr_write
    2.17      {
    2.18      case MSR_EFER:
    2.19          /* Offending reserved bit will cause #GP. */
    2.20 -        if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
    2.21 +#ifdef __x86_64__
    2.22 +        if ( (msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) ||
    2.23 +#else
    2.24 +        if ( (msr_content & ~(EFER_NX | EFER_SCE)) ||
    2.25 +#endif
    2.26 +             (!cpu_has_nx && (msr_content & EFER_NX)) ||
    2.27 +             (!cpu_has_syscall && (msr_content & EFER_SCE)) )
    2.28          {
    2.29              gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
    2.30                       "EFER: %"PRIx64"\n", msr_content);
    2.31 @@ -495,7 +498,7 @@ int svm_vmcb_restore(struct vcpu *v, str
    2.32  }
    2.33  
    2.34          
    2.35 -void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
    2.36 +static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
    2.37  {
    2.38      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    2.39  
    2.40 @@ -511,7 +514,7 @@ void svm_save_cpu_state(struct vcpu *v, 
    2.41  }
    2.42  
    2.43  
    2.44 -void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
    2.45 +static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
    2.46  {
    2.47      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    2.48  
    2.49 @@ -530,13 +533,13 @@ void svm_load_cpu_state(struct vcpu *v, 
    2.50      hvm_set_guest_time(v, data->tsc);
    2.51  }
    2.52  
    2.53 -void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
    2.54 +static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
    2.55  {
    2.56      svm_save_cpu_state(v, ctxt);
    2.57      svm_vmcb_save(v, ctxt);
    2.58  }
    2.59  
    2.60 -int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
    2.61 +static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
    2.62  {
    2.63      svm_load_cpu_state(v, ctxt);
    2.64      if (svm_vmcb_restore(v, ctxt)) {
    2.65 @@ -911,6 +914,7 @@ static struct hvm_function_table svm_fun
    2.66      .paging_enabled       = svm_paging_enabled,
    2.67      .long_mode_enabled    = svm_long_mode_enabled,
    2.68      .pae_enabled          = svm_pae_enabled,
    2.69 +    .nx_enabled           = svm_nx_enabled,
    2.70      .interrupts_enabled   = svm_interrupts_enabled,
    2.71      .guest_x86_mode       = svm_guest_x86_mode,
    2.72      .get_guest_ctrl_reg   = svm_get_ctrl_reg,
    2.73 @@ -967,9 +971,7 @@ int start_svm(void)
    2.74           ((root_vmcb[cpu] = alloc_vmcb()) == NULL) )
    2.75          return 0;
    2.76  
    2.77 -    rdmsr(MSR_EFER, eax, edx);
    2.78 -    eax |= EFER_SVME;
    2.79 -    wrmsr(MSR_EFER, eax, edx);
    2.80 +    write_efer(read_efer() | EFER_SVME);
    2.81  
    2.82      svm_npt_detect();
    2.83  
     3.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Tue May 15 10:28:28 2007 +0100
     3.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Tue May 15 10:43:18 2007 +0100
     3.3 @@ -285,11 +285,6 @@ static void construct_vmcs(struct vcpu *
     3.4  
     3.5      vmx_vmcs_enter(v);
     3.6  
     3.7 -    v->arch.hvm_vmx.cpu_cr2 = 0;
     3.8 -    v->arch.hvm_vmx.cpu_cr3 = 0;
     3.9 -    memset(&v->arch.hvm_vmx.msr_state, 0, sizeof(v->arch.hvm_vmx.msr_state));
    3.10 -    v->arch.hvm_vmx.vmxassist_enabled = 0;
    3.11 -
    3.12      /* VMCS controls. */
    3.13      __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
    3.14      __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
     4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Tue May 15 10:28:28 2007 +0100
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue May 15 10:43:18 2007 +0100
     4.3 @@ -89,7 +89,7 @@ static DEFINE_PER_CPU(struct vmx_msr_sta
     4.4  static u32 msr_index[VMX_MSR_COUNT] =
     4.5  {
     4.6      MSR_LSTAR, MSR_STAR, MSR_CSTAR,
     4.7 -    MSR_SYSCALL_MASK, MSR_EFER,
     4.8 +    MSR_SYSCALL_MASK
     4.9  };
    4.10  
    4.11  static void vmx_save_host_msrs(void)
    4.12 @@ -116,8 +116,7 @@ static inline int long_mode_do_msr_read(
    4.13  
    4.14      switch ( (u32)regs->ecx ) {
    4.15      case MSR_EFER:
    4.16 -        HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content 0x%"PRIx64, msr_content);
    4.17 -        msr_content = guest_msr_state->msrs[VMX_INDEX_MSR_EFER];
    4.18 +        msr_content = v->arch.hvm_vmx.efer;
    4.19          break;
    4.20  
    4.21      case MSR_FS_BASE:
    4.22 @@ -129,7 +128,7 @@ static inline int long_mode_do_msr_read(
    4.23          goto check_long_mode;
    4.24  
    4.25      case MSR_SHADOW_GS_BASE:
    4.26 -        msr_content = guest_msr_state->shadow_gs;
    4.27 +        msr_content = v->arch.hvm_vmx.shadow_gs;
    4.28      check_long_mode:
    4.29          if ( !(vmx_long_mode_enabled(v)) )
    4.30          {
    4.31 @@ -181,7 +180,9 @@ static inline int long_mode_do_msr_write
    4.32      {
    4.33      case MSR_EFER:
    4.34          /* offending reserved bit will cause #GP */
    4.35 -        if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
    4.36 +        if ( (msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) ||
    4.37 +             (!cpu_has_nx && (msr_content & EFER_NX)) ||
    4.38 +             (!cpu_has_syscall && (msr_content & EFER_SCE)) )
    4.39          {
    4.40              gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
    4.41                       "EFER: %"PRIx64"\n", msr_content);
    4.42 @@ -189,7 +190,7 @@ static inline int long_mode_do_msr_write
    4.43          }
    4.44  
    4.45          if ( (msr_content & EFER_LME)
    4.46 -             &&  !(guest_msr_state->msrs[VMX_INDEX_MSR_EFER] & EFER_LME) )
    4.47 +             &&  !(v->arch.hvm_vmx.efer & EFER_LME) )
    4.48          {
    4.49              if ( unlikely(vmx_paging_enabled(v)) )
    4.50              {
    4.51 @@ -199,7 +200,7 @@ static inline int long_mode_do_msr_write
    4.52              }
    4.53          }
    4.54          else if ( !(msr_content & EFER_LME)
    4.55 -                  && (guest_msr_state->msrs[VMX_INDEX_MSR_EFER] & EFER_LME) )
    4.56 +                  && (v->arch.hvm_vmx.efer & EFER_LME) )
    4.57          {
    4.58              if ( unlikely(vmx_paging_enabled(v)) )
    4.59              {
    4.60 @@ -209,7 +210,11 @@ static inline int long_mode_do_msr_write
    4.61              }
    4.62          }
    4.63  
    4.64 -        guest_msr_state->msrs[VMX_INDEX_MSR_EFER] = msr_content;
    4.65 +        if ( (msr_content ^ v->arch.hvm_vmx.efer) & (EFER_NX|EFER_SCE) )
    4.66 +            write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
    4.67 +                       (msr_content & (EFER_NX|EFER_SCE)));
    4.68 +
    4.69 +        v->arch.hvm_vmx.efer = msr_content;
    4.70          break;
    4.71  
    4.72      case MSR_FS_BASE:
    4.73 @@ -227,7 +232,7 @@ static inline int long_mode_do_msr_write
    4.74              __vmwrite(GUEST_GS_BASE, msr_content);
    4.75          else
    4.76          {
    4.77 -            v->arch.hvm_vmx.msr_state.shadow_gs = msr_content;
    4.78 +            v->arch.hvm_vmx.shadow_gs = msr_content;
    4.79              wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
    4.80          }
    4.81  
    4.82 @@ -279,12 +284,14 @@ static void vmx_restore_host_msrs(void)
    4.83          wrmsrl(msr_index[i], host_msr_state->msrs[i]);
    4.84          clear_bit(i, &host_msr_state->flags);
    4.85      }
    4.86 +    if ( cpu_has_nx && !(read_efer() & EFER_NX) )
    4.87 +        write_efer(read_efer() | EFER_NX);
    4.88  }
    4.89  
    4.90  static void vmx_save_guest_msrs(struct vcpu *v)
    4.91  {
    4.92      /* MSR_SHADOW_GS_BASE may have been changed by swapgs instruction. */
    4.93 -    rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.msr_state.shadow_gs);
    4.94 +    rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
    4.95  }
    4.96  
    4.97  static void vmx_restore_guest_msrs(struct vcpu *v)
    4.98 @@ -296,11 +303,9 @@ static void vmx_restore_guest_msrs(struc
    4.99      guest_msr_state = &v->arch.hvm_vmx.msr_state;
   4.100      host_msr_state = &this_cpu(host_msr_state);
   4.101  
   4.102 -    wrmsrl(MSR_SHADOW_GS_BASE, guest_msr_state->shadow_gs);
   4.103 +    wrmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_vmx.shadow_gs);
   4.104  
   4.105      guest_flags = guest_msr_state->flags;
   4.106 -    if ( !guest_flags )
   4.107 -        return;
   4.108  
   4.109      while ( guest_flags ) {
   4.110          i = find_first_set_bit(guest_flags);
   4.111 @@ -312,23 +317,90 @@ static void vmx_restore_guest_msrs(struc
   4.112          wrmsrl(msr_index[i], guest_msr_state->msrs[i]);
   4.113          clear_bit(i, &guest_flags);
   4.114      }
   4.115 +
   4.116 +    if ( (v->arch.hvm_vmx.efer ^ read_efer()) & (EFER_NX|EFER_SCE) )
   4.117 +    {
   4.118 +        HVM_DBG_LOG(DBG_LEVEL_2,
   4.119 +                    "restore guest's EFER with value %lx",
   4.120 +                    v->arch.hvm_vmx.efer);
   4.121 +        write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
   4.122 +                   (v->arch.hvm_vmx.efer & (EFER_NX|EFER_SCE)));
   4.123 +    }
   4.124  }
   4.125  
   4.126  #else  /* __i386__ */
   4.127  
   4.128  #define vmx_save_host_msrs()        ((void)0)
   4.129 -#define vmx_restore_host_msrs()     ((void)0)
   4.130 +
   4.131 +static void vmx_restore_host_msrs(void)
   4.132 +{
   4.133 +    if ( cpu_has_nx && !(read_efer() & EFER_NX) )
   4.134 +        write_efer(read_efer() | EFER_NX);
   4.135 +}
   4.136 +
   4.137  #define vmx_save_guest_msrs(v)      ((void)0)
   4.138 -#define vmx_restore_guest_msrs(v)   ((void)0)
   4.139 +
   4.140 +static void vmx_restore_guest_msrs(struct vcpu *v)
   4.141 +{
   4.142 +    if ( (v->arch.hvm_vmx.efer ^ read_efer()) & EFER_NX )
   4.143 +    {
   4.144 +        HVM_DBG_LOG(DBG_LEVEL_2,
   4.145 +                    "restore guest's EFER with value %lx",
   4.146 +                    v->arch.hvm_vmx.efer);
   4.147 +        write_efer((read_efer() & ~EFER_NX) |
   4.148 +                   (v->arch.hvm_vmx.efer & EFER_NX));
   4.149 +    }
   4.150 +}
   4.151  
   4.152  static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
   4.153  {
   4.154 -    return 0;
   4.155 +    u64 msr_content = 0;
   4.156 +    struct vcpu *v = current;
   4.157 +
   4.158 +    switch ( regs->ecx ) {
   4.159 +    case MSR_EFER:
   4.160 +        msr_content = v->arch.hvm_vmx.efer;
   4.161 +        break;
   4.162 +
   4.163 +    default:
   4.164 +        return 0;
   4.165 +    }
   4.166 +
   4.167 +    regs->eax = msr_content >>  0;
   4.168 +    regs->edx = msr_content >> 32;
   4.169 +
   4.170 +    return 1;
   4.171  }
   4.172  
   4.173  static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
   4.174  {
   4.175 -    return 0;
   4.176 +    u64 msr_content = regs->eax | ((u64)regs->edx << 32);
   4.177 +    struct vcpu *v = current;
   4.178 +
   4.179 +    switch ( regs->ecx )
   4.180 +    {
   4.181 +    case MSR_EFER:
   4.182 +        /* offending reserved bit will cause #GP */
   4.183 +        if ( (msr_content & ~EFER_NX) ||
   4.184 +             (!cpu_has_nx && (msr_content & EFER_NX)) )
   4.185 +        {
   4.186 +            gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
   4.187 +                     "EFER: %"PRIx64"\n", msr_content);
   4.188 +            vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
   4.189 +            return 0;
   4.190 +        }
   4.191 +
   4.192 +        if ( (msr_content ^ v->arch.hvm_vmx.efer) & EFER_NX )
   4.193 +            write_efer((read_efer() & ~EFER_NX) | (msr_content & EFER_NX));
   4.194 +
   4.195 +        v->arch.hvm_vmx.efer = msr_content;
   4.196 +        break;
   4.197 +
   4.198 +    default:
   4.199 +        return 0;
   4.200 +    }
   4.201 +
   4.202 +    return 1;
   4.203  }
   4.204  
   4.205  #endif /* __i386__ */
   4.206 @@ -636,7 +708,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
   4.207      return -EINVAL;
   4.208  }
   4.209  
   4.210 -#ifdef HVM_DEBUG_SUSPEND
   4.211 +#if defined(__x86_64__) && defined(HVM_DEBUG_SUSPEND)
   4.212  static void dump_msr_state(struct vmx_msr_state *m)
   4.213  {
   4.214      int i = 0;
   4.215 @@ -647,17 +719,16 @@ static void dump_msr_state(struct vmx_ms
   4.216      printk("\n");
   4.217  }
   4.218  #else
   4.219 -static void dump_msr_state(struct vmx_msr_state *m)
   4.220 -{
   4.221 -}
   4.222 +#define dump_msr_state(m) ((void)0)
   4.223  #endif
   4.224          
   4.225 -void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
   4.226 +static void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
   4.227  {
   4.228 +#ifdef __x86_64__
   4.229      struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
   4.230      unsigned long guest_flags = guest_state->flags;
   4.231  
   4.232 -    data->shadow_gs = guest_state->shadow_gs;
   4.233 +    data->shadow_gs = v->arch.hvm_vmx.shadow_gs;
   4.234  
   4.235      /* save msrs */
   4.236      data->msr_flags        = guest_flags;
   4.237 @@ -665,15 +736,18 @@ void vmx_save_cpu_state(struct vcpu *v, 
   4.238      data->msr_star         = guest_state->msrs[VMX_INDEX_MSR_STAR];
   4.239      data->msr_cstar        = guest_state->msrs[VMX_INDEX_MSR_CSTAR];
   4.240      data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
   4.241 -    data->msr_efer         = guest_state->msrs[VMX_INDEX_MSR_EFER];
   4.242 +#endif
   4.243 +
   4.244 +    data->msr_efer = v->arch.hvm_vmx.efer;
   4.245  
   4.246      data->tsc = hvm_get_guest_time(v);
   4.247      
   4.248      dump_msr_state(guest_state);
   4.249  }
   4.250  
   4.251 -void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
   4.252 +static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
   4.253  {
   4.254 +#ifdef __x86_64__
   4.255      struct vmx_msr_state *guest_state = &v->arch.hvm_vmx.msr_state;
   4.256  
   4.257      /* restore msrs */
   4.258 @@ -682,9 +756,11 @@ void vmx_load_cpu_state(struct vcpu *v, 
   4.259      guest_state->msrs[VMX_INDEX_MSR_STAR]         = data->msr_star;
   4.260      guest_state->msrs[VMX_INDEX_MSR_CSTAR]        = data->msr_cstar;
   4.261      guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK] = data->msr_syscall_mask;
   4.262 -    guest_state->msrs[VMX_INDEX_MSR_EFER]         = data->msr_efer;
   4.263 -
   4.264 -    guest_state->shadow_gs = data->shadow_gs;
   4.265 +
   4.266 +    v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
   4.267 +#endif
   4.268 +
   4.269 +    v->arch.hvm_vmx.efer = data->msr_efer;
   4.270  
   4.271      v->arch.hvm_vmx.vmxassist_enabled = !(data->cr0 & X86_CR0_PE);
   4.272  
   4.273 @@ -694,7 +770,7 @@ void vmx_load_cpu_state(struct vcpu *v, 
   4.274  }
   4.275  
   4.276  
   4.277 -void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
   4.278 +static void vmx_save_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
   4.279  {
   4.280      vmx_save_cpu_state(v, ctxt);
   4.281      vmx_vmcs_enter(v);
   4.282 @@ -702,7 +778,7 @@ void vmx_save_vmcs_ctxt(struct vcpu *v, 
   4.283      vmx_vmcs_exit(v);
   4.284  }
   4.285  
   4.286 -int vmx_load_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
   4.287 +static int vmx_load_vmcs_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
   4.288  {
   4.289      vmx_load_cpu_state(v, ctxt);
   4.290      if (vmx_vmcs_restore(v, ctxt)) {
   4.291 @@ -1016,6 +1092,11 @@ static int vmx_pae_enabled(struct vcpu *
   4.292      return (vmx_paging_enabled(v) && (cr4 & X86_CR4_PAE));
   4.293  }
   4.294  
   4.295 +static int vmx_nx_enabled(struct vcpu *v)
   4.296 +{
   4.297 +    return v->arch.hvm_vmx.efer & EFER_NX;
   4.298 +}
   4.299 +
   4.300  static int vmx_interrupts_enabled(struct vcpu *v) 
   4.301  {
   4.302      unsigned long eflags = __vmread(GUEST_RFLAGS); 
   4.303 @@ -1096,6 +1177,7 @@ static struct hvm_function_table vmx_fun
   4.304      .paging_enabled       = vmx_paging_enabled,
   4.305      .long_mode_enabled    = vmx_long_mode_enabled,
   4.306      .pae_enabled          = vmx_pae_enabled,
   4.307 +    .nx_enabled           = vmx_nx_enabled,
   4.308      .interrupts_enabled   = vmx_interrupts_enabled,
   4.309      .guest_x86_mode       = vmx_guest_x86_mode,
   4.310      .get_guest_ctrl_reg   = vmx_get_ctrl_reg,
   4.311 @@ -1996,8 +2078,7 @@ static int vmx_set_cr0(unsigned long val
   4.312              else
   4.313              {
   4.314                  HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode\n");
   4.315 -                v->arch.hvm_vmx.msr_state.msrs[VMX_INDEX_MSR_EFER]
   4.316 -                    |= EFER_LMA;
   4.317 +                v->arch.hvm_vmx.efer |= EFER_LMA;
   4.318                  vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
   4.319                  vm_entry_value |= VM_ENTRY_IA32E_MODE;
   4.320                  __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
   4.321 @@ -2046,8 +2127,7 @@ static int vmx_set_cr0(unsigned long val
   4.322               */
   4.323              if ( vmx_long_mode_enabled(v) )
   4.324              {
   4.325 -                v->arch.hvm_vmx.msr_state.msrs[VMX_INDEX_MSR_EFER]
   4.326 -                    &= ~EFER_LMA;
   4.327 +                v->arch.hvm_vmx.efer &= ~EFER_LMA;
   4.328                  vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
   4.329                  vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
   4.330                  __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
   4.331 @@ -2079,7 +2159,7 @@ static int vmx_set_cr0(unsigned long val
   4.332      {
   4.333          if ( vmx_long_mode_enabled(v) )
   4.334          {
   4.335 -            v->arch.hvm_vmx.msr_state.msrs[VMX_INDEX_MSR_EFER] &= ~EFER_LMA;
   4.336 +            v->arch.hvm_vmx.efer &= ~EFER_LMA;
   4.337              vm_entry_value = __vmread(VM_ENTRY_CONTROLS);
   4.338              vm_entry_value &= ~VM_ENTRY_IA32E_MODE;
   4.339              __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value);
     5.1 --- a/xen/arch/x86/mm/shadow/multi.c	Tue May 15 10:28:28 2007 +0100
     5.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Tue May 15 10:43:18 2007 +0100
     5.3 @@ -181,11 +181,11 @@ guest_supports_superpages(struct vcpu *v
     5.4  static inline int
     5.5  guest_supports_nx(struct vcpu *v)
     5.6  {
     5.7 +    if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
     5.8 +        return 0;
     5.9      if ( !is_hvm_vcpu(v) )
    5.10 -        return cpu_has_nx;
    5.11 -
    5.12 -    // XXX - fix this!
    5.13 -    return 1;
    5.14 +        return 1;
    5.15 +    return hvm_nx_enabled(v);
    5.16  }
    5.17  
    5.18  
     6.1 --- a/xen/include/asm-x86/cpufeature.h	Tue May 15 10:28:28 2007 +0100
     6.2 +++ b/xen/include/asm-x86/cpufeature.h	Tue May 15 10:43:18 2007 +0100
     6.3 @@ -114,6 +114,7 @@
     6.4  #define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
     6.5  #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
     6.6  #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
     6.7 +#define cpu_has_syscall		boot_cpu_has(X86_FEATURE_SYSCALL)
     6.8  #define cpu_has_mp		boot_cpu_has(X86_FEATURE_MP)
     6.9  #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
    6.10  #define cpu_has_k6_mtrr		boot_cpu_has(X86_FEATURE_K6_MTRR)
    6.11 @@ -136,6 +137,7 @@
    6.12  #define cpu_has_xmm2		1
    6.13  #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
    6.14  #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
    6.15 +#define cpu_has_syscall		1
    6.16  #define cpu_has_mp		1
    6.17  #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
    6.18  #define cpu_has_k6_mtrr		0
     7.1 --- a/xen/include/asm-x86/hvm/hvm.h	Tue May 15 10:28:28 2007 +0100
     7.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Tue May 15 10:43:18 2007 +0100
     7.3 @@ -93,14 +93,17 @@ struct hvm_function_table {
     7.4       * 1) determine whether paging is enabled,
     7.5       * 2) determine whether long mode is enabled,
     7.6       * 3) determine whether PAE paging is enabled,
     7.7 -     * 4) determine whether interrupts are enabled or not,
     7.8 -     * 5) determine the mode the guest is running in,
     7.9 -     * 6) return the current guest control-register value
    7.10 -     * 7) return the current guest segment descriptor base
    7.11 +     * 4) determine whether NX is enabled,
    7.12 +     * 5) determine whether interrupts are enabled or not,
    7.13 +     * 6) determine the mode the guest is running in,
    7.14 +     * 7) return the current guest control-register value
    7.15 +     * 8) return the current guest segment descriptor base
    7.16 +     * 9) return the current guest segment descriptor
    7.17       */
    7.18      int (*paging_enabled)(struct vcpu *v);
    7.19      int (*long_mode_enabled)(struct vcpu *v);
    7.20      int (*pae_enabled)(struct vcpu *v);
    7.21 +    int (*nx_enabled)(struct vcpu *v);
    7.22      int (*interrupts_enabled)(struct vcpu *v);
    7.23      int (*guest_x86_mode)(struct vcpu *v);
    7.24      unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
    7.25 @@ -199,6 +202,12 @@ hvm_interrupts_enabled(struct vcpu *v)
    7.26  }
    7.27  
    7.28  static inline int
    7.29 +hvm_nx_enabled(struct vcpu *v)
    7.30 +{
    7.31 +    return hvm_funcs.nx_enabled(v);
    7.32 +}
    7.33 +
    7.34 +static inline int
    7.35  hvm_guest_x86_mode(struct vcpu *v)
    7.36  {
    7.37      return hvm_funcs.guest_x86_mode(v);
     8.1 --- a/xen/include/asm-x86/hvm/svm/svm.h	Tue May 15 10:28:28 2007 +0100
     8.2 +++ b/xen/include/asm-x86/hvm/svm/svm.h	Tue May 15 10:43:18 2007 +0100
     8.3 @@ -60,6 +60,11 @@ static inline int svm_pae_enabled(struct
     8.4      return svm_paging_enabled(v) && (guest_cr4 & X86_CR4_PAE);
     8.5  }
     8.6  
     8.7 +static inline int svm_nx_enabled(struct vcpu *v)
     8.8 +{
     8.9 +    return v->arch.hvm_svm.cpu_shadow_efer & EFER_NX;
    8.10 +}
    8.11 +
    8.12  static inline int svm_pgbit_test(struct vcpu *v)
    8.13  {
    8.14      return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
     9.1 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Tue May 15 10:28:28 2007 +0100
     9.2 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Tue May 15 10:43:18 2007 +0100
     9.3 @@ -39,7 +39,6 @@ enum {
     9.4      VMX_INDEX_MSR_STAR,
     9.5      VMX_INDEX_MSR_CSTAR,
     9.6      VMX_INDEX_MSR_SYSCALL_MASK,
     9.7 -    VMX_INDEX_MSR_EFER,
     9.8  
     9.9      VMX_MSR_COUNT
    9.10  };
    9.11 @@ -47,7 +46,6 @@ enum {
    9.12  struct vmx_msr_state {
    9.13      unsigned long flags;
    9.14      unsigned long msrs[VMX_MSR_COUNT];
    9.15 -    unsigned long shadow_gs;
    9.16  };
    9.17  
    9.18  struct arch_vmx_struct {
    9.19 @@ -76,7 +74,11 @@ struct arch_vmx_struct {
    9.20      unsigned long        cpu_shadow_cr4; /* copy of guest read shadow CR4 */
    9.21      unsigned long        cpu_cr2; /* save CR2 */
    9.22      unsigned long        cpu_cr3;
    9.23 +#ifdef __x86_64__
    9.24      struct vmx_msr_state msr_state;
    9.25 +    unsigned long        shadow_gs;
    9.26 +#endif
    9.27 +    unsigned long        efer;
    9.28      unsigned long        vmxassist_enabled:1;
    9.29  };
    9.30  
    10.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Tue May 15 10:28:28 2007 +0100
    10.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Tue May 15 10:43:18 2007 +0100
    10.3 @@ -261,14 +261,12 @@ static inline int vmx_paging_enabled(str
    10.4  
    10.5  static inline int vmx_long_mode_enabled(struct vcpu *v)
    10.6  {
    10.7 -    u64 efer = v->arch.hvm_vmx.msr_state.msrs[VMX_INDEX_MSR_EFER];
    10.8 -    return efer & EFER_LMA;
    10.9 +    return v->arch.hvm_vmx.efer & EFER_LMA;
   10.10  }
   10.11  
   10.12  static inline int vmx_lme_is_set(struct vcpu *v)
   10.13  {
   10.14 -    u64 efer = v->arch.hvm_vmx.msr_state.msrs[VMX_INDEX_MSR_EFER];
   10.15 -    return efer & EFER_LME;
   10.16 +    return v->arch.hvm_vmx.efer & EFER_LME;
   10.17  }
   10.18  
   10.19  static inline int vmx_pgbit_test(struct vcpu *v)
    11.1 --- a/xen/include/asm-x86/msr.h	Tue May 15 10:28:28 2007 +0100
    11.2 +++ b/xen/include/asm-x86/msr.h	Tue May 15 10:43:18 2007 +0100
    11.3 @@ -3,6 +3,9 @@
    11.4  
    11.5  #ifndef __ASSEMBLY__
    11.6  
    11.7 +#include <xen/smp.h>
    11.8 +#include <xen/percpu.h>
    11.9 +
   11.10  #define rdmsr(msr,val1,val2) \
   11.11       __asm__ __volatile__("rdmsr" \
   11.12  			  : "=a" (val1), "=d" (val2) \
   11.13 @@ -142,6 +145,25 @@ static inline void wrmsrl(unsigned int m
   11.14  #define EFER_NX (1<<_EFER_NX)
   11.15  #define EFER_SVME (1<<_EFER_SVME)
   11.16  
   11.17 +#ifndef __ASSEMBLY__
   11.18 +
   11.19 +DECLARE_PER_CPU(__u64, efer);
   11.20 +
   11.21 +static inline __u64 read_efer(void)
   11.22 +{
   11.23 +    if (!this_cpu(efer))
   11.24 +        rdmsrl(MSR_EFER, this_cpu(efer));
   11.25 +    return this_cpu(efer);
   11.26 +}
   11.27 +
   11.28 +static inline void write_efer(__u64 val)
   11.29 +{
   11.30 +    this_cpu(efer) = val;
   11.31 +    wrmsrl(MSR_EFER, val);
   11.32 +}
   11.33 +
   11.34 +#endif
   11.35 +
   11.36  /* Intel MSRs. Some also available on other CPUs */
   11.37  #define MSR_IA32_PLATFORM_ID	0x17
   11.38