ia64/xen-unstable

changeset 14640:a545ac9028d2

hvm: Avoid separate nested-page-table control-register logic in SVM code.
Clean up VMX set_cr0() function a little to avoid unnecesasry diffs
with SVM version.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Mar 29 12:04:35 2007 +0100 (2007-03-29)
parents 98b049ed2540
children 31f20aaac818
files xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/vmx.c
line diff
     1.1 --- a/xen/arch/x86/hvm/svm/svm.c	Thu Mar 29 11:01:41 2007 +0100
     1.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Thu Mar 29 12:04:35 2007 +0100
     1.3 @@ -205,7 +205,7 @@ static inline int long_mode_do_msr_write
     1.4      switch ( ecx )
     1.5      {
     1.6      case MSR_EFER:
     1.7 -        /* offending reserved bit will cause #GP */
     1.8 +        /* Offending reserved bit will cause #GP. */
     1.9          if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
    1.10          {
    1.11              gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
    1.12 @@ -213,53 +213,33 @@ static inline int long_mode_do_msr_write
    1.13              goto gp_fault;
    1.14          }
    1.15  
    1.16 -        /* 
    1.17 -         * update the VMCB's EFER with the intended value along with
    1.18 -         * that crucial EFER.SVME bit =)
    1.19 -         */
    1.20 -        vmcb->efer = msr_content | EFER_SVME;
    1.21 -
    1.22  #ifdef __x86_64__
    1.23 -
    1.24 -        /*
    1.25 -         * Check for EFER.LME transitions from 0->1 or 1->0.  Do the
    1.26 -         * sanity checks and then make sure that both EFER.LME and
    1.27 -         * EFER.LMA are cleared. (EFER.LME can't be set in the vmcb
    1.28 -         * until the guest also sets CR0.PG, since even if the guest has
    1.29 -         * paging "disabled", the vmcb's CR0 always has PG set.)
    1.30 -         */
    1.31          if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) )
    1.32          {
    1.33 -            /* EFER.LME transition from 0 to 1 */
    1.34 -            
    1.35 -            if ( svm_paging_enabled(v) ||
    1.36 -                 !svm_cr4_pae_is_set(v) )
    1.37 +            /* EFER.LME transition from 0 to 1. */
    1.38 +            if ( svm_paging_enabled(v) || !svm_cr4_pae_is_set(v) )
    1.39              {
    1.40                  gdprintk(XENLOG_WARNING, "Trying to set LME bit when "
    1.41                           "in paging mode or PAE bit is not set\n");
    1.42                  goto gp_fault;
    1.43              }
    1.44 -
    1.45 -            vmcb->efer &= ~(EFER_LME | EFER_LMA);
    1.46          }
    1.47          else if ( !(msr_content & EFER_LME) && svm_lme_is_set(v) )
    1.48          {
    1.49 -            /* EFER.LME transistion from 1 to 0 */
    1.50 -            
    1.51 +            /* EFER.LME transistion from 1 to 0. */
    1.52              if ( svm_paging_enabled(v) )
    1.53              {
    1.54                  gdprintk(XENLOG_WARNING, 
    1.55                           "Trying to clear EFER.LME while paging enabled\n");
    1.56                  goto gp_fault;
    1.57              }
    1.58 -
    1.59 -            vmcb->efer &= ~(EFER_LME | EFER_LMA);
    1.60          }
    1.61 -
    1.62  #endif /* __x86_64__ */
    1.63  
    1.64 -        /* update the guest EFER's shadow with the intended value */
    1.65          v->arch.hvm_svm.cpu_shadow_efer = msr_content;
    1.66 +        vmcb->efer = msr_content | EFER_SVME;
    1.67 +        if ( !svm_paging_enabled(v) )
    1.68 +            vmcb->efer &= ~(EFER_LME | EFER_LMA);
    1.69  
    1.70          break;
    1.71  
    1.72 @@ -1594,63 +1574,23 @@ static void svm_io_instruction(struct vc
    1.73      }
    1.74  }
    1.75  
    1.76 -static int npt_set_cr0(unsigned long value) 
    1.77 -{
    1.78 -    struct vcpu *v = current;
    1.79 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    1.80 -  
    1.81 -    /* ET is reserved and should be always be 1*/
    1.82 -    value |= X86_CR0_ET;
    1.83 -
    1.84 -    /* Check whether the guest is about to turn on long mode. 
    1.85 -     * If it is, set EFER.LME and EFER.LMA.  Update the shadow EFER.LMA
    1.86 -     * bit too, so svm_long_mode_enabled() will work.
    1.87 -     */
    1.88 -    if ( (value & X86_CR0_PG) && svm_lme_is_set(v) &&
    1.89 -         (vmcb->cr4 & X86_CR4_PAE) && (vmcb->cr0 & X86_CR0_PE) )
    1.90 -    {
    1.91 -        v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
    1.92 -        vmcb->efer |= EFER_LMA | EFER_LME;
    1.93 -    }
    1.94 -
    1.95 -    /* Whenever CR0.PG is cleared under long mode, LMA will be cleared 
    1.96 -     * immediatly. We emulate this process for svm_long_mode_enabled().
    1.97 -     */
    1.98 -    if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
    1.99 -    {
   1.100 -        if ( svm_long_mode_enabled(v) )
   1.101 -        {
   1.102 -            v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
   1.103 -        }
   1.104 -    }
   1.105 -    
   1.106 -    vmcb->cr0 = value | X86_CR0_WP;
   1.107 -    v->arch.hvm_svm.cpu_shadow_cr0 = value;
   1.108 -
   1.109 -    /* TS cleared? Then initialise FPU now. */
   1.110 -    if ( !(value & X86_CR0_TS) ) {
   1.111 -        setup_fpu(v);
   1.112 -        vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
   1.113 -    }
   1.114 -    
   1.115 -    paging_update_paging_modes(v);
   1.116 -    
   1.117 -    return 1;
   1.118 -}
   1.119 -
   1.120  static int svm_set_cr0(unsigned long value)
   1.121  {
   1.122      struct vcpu *v = current;
   1.123 -    unsigned long mfn;
   1.124 -    int paging_enabled;
   1.125 +    unsigned long mfn, old_value = v->arch.hvm_svm.cpu_shadow_cr0;
   1.126      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   1.127      unsigned long old_base_mfn;
   1.128    
   1.129 -    /* We don't want to lose PG.  ET is reserved and should be always be 1*/
   1.130 -    paging_enabled = svm_paging_enabled(v);
   1.131 +    HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
   1.132 +
   1.133 +    /* ET is reserved and should be always be 1. */
   1.134      value |= X86_CR0_ET;
   1.135 -    vmcb->cr0 = value | X86_CR0_PG | X86_CR0_WP;
   1.136 -    v->arch.hvm_svm.cpu_shadow_cr0 = value;
   1.137 +
   1.138 +    if ( (value & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG )
   1.139 +    {
   1.140 +        svm_inject_exception(v, TRAP_gp_fault, 1, 0);
   1.141 +        return 0;
   1.142 +    }
   1.143  
   1.144      /* TS cleared? Then initialise FPU now. */
   1.145      if ( !(value & X86_CR0_TS) )
   1.146 @@ -1659,152 +1599,72 @@ static int svm_set_cr0(unsigned long val
   1.147          vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
   1.148      }
   1.149  
   1.150 -    HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
   1.151 -
   1.152 -    if ( ((value & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG))
   1.153 -         && !paging_enabled ) 
   1.154 +    if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )
   1.155      {
   1.156 -        /* The guest CR3 must be pointing to the guest physical. */
   1.157 -        mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
   1.158 -        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
   1.159 -        {
   1.160 -            gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", 
   1.161 -                     v->arch.hvm_svm.cpu_cr3, mfn);
   1.162 -            domain_crash(v->domain);
   1.163 -            return 0;
   1.164 -        }
   1.165 -
   1.166  #if defined(__x86_64__)
   1.167 -        if ( svm_lme_is_set(v) && !svm_cr4_pae_is_set(v) )
   1.168 -        {
   1.169 -            HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
   1.170 -            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
   1.171 -        }
   1.172 -
   1.173          if ( svm_lme_is_set(v) )
   1.174          {
   1.175 +            if ( !svm_cr4_pae_is_set(v) )
   1.176 +            {
   1.177 +                HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
   1.178 +                svm_inject_exception(v, TRAP_gp_fault, 1, 0);
   1.179 +                return 0;
   1.180 +            }
   1.181              HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
   1.182              v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
   1.183              vmcb->efer |= EFER_LMA | EFER_LME;
   1.184          }
   1.185  #endif  /* __x86_64__ */
   1.186  
   1.187 -        /* Now arch.guest_table points to machine physical. */
   1.188 -        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
   1.189 -        v->arch.guest_table = pagetable_from_pfn(mfn);
   1.190 -        if ( old_base_mfn )
   1.191 -            put_page(mfn_to_page(old_base_mfn));
   1.192 -        paging_update_paging_modes(v);
   1.193 -
   1.194 -        HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
   1.195 -                    (unsigned long) (mfn << PAGE_SHIFT));
   1.196 -    }
   1.197 +        if ( !paging_mode_hap(v->domain) )
   1.198 +        {
   1.199 +            /* The guest CR3 must be pointing to the guest physical. */
   1.200 +            mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
   1.201 +            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
   1.202 +            {
   1.203 +                gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", 
   1.204 +                         v->arch.hvm_svm.cpu_cr3, mfn);
   1.205 +                domain_crash(v->domain);
   1.206 +                return 0;
   1.207 +            }
   1.208  
   1.209 -    if ( !((value & X86_CR0_PE) && (value & X86_CR0_PG)) && paging_enabled )
   1.210 -        if ( v->arch.hvm_svm.cpu_cr3 ) {
   1.211 -            put_page(mfn_to_page(get_mfn_from_gpfn(
   1.212 -                v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
   1.213 -            v->arch.guest_table = pagetable_null();
   1.214 -        }
   1.215 +            /* Now arch.guest_table points to machine physical. */
   1.216 +            old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
   1.217 +            v->arch.guest_table = pagetable_from_pfn(mfn);
   1.218 +            if ( old_base_mfn )
   1.219 +                put_page(mfn_to_page(old_base_mfn));
   1.220  
   1.221 -    /*
   1.222 -     * SVM implements paged real-mode and when we return to real-mode
   1.223 -     * we revert back to the physical mappings that the domain builder
   1.224 -     * created.
   1.225 -     */
   1.226 -    if ((value & X86_CR0_PE) == 0) {
   1.227 -        if (value & X86_CR0_PG) {
   1.228 -            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
   1.229 -            return 0;
   1.230 +            HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
   1.231 +                        (unsigned long) (mfn << PAGE_SHIFT));
   1.232          }
   1.233 -        paging_update_paging_modes(v);
   1.234      }
   1.235 -    else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
   1.236 +    else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) )
   1.237      {
   1.238 +        /* When CR0.PG is cleared, LMA is cleared immediately. */
   1.239          if ( svm_long_mode_enabled(v) )
   1.240          {
   1.241              vmcb->efer &= ~(EFER_LME | EFER_LMA);
   1.242              v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
   1.243          }
   1.244 -        /* we should take care of this kind of situation */
   1.245 +
   1.246 +        if ( !paging_mode_hap(v->domain) && v->arch.hvm_svm.cpu_cr3 )
   1.247 +        {
   1.248 +            put_page(mfn_to_page(get_mfn_from_gpfn(
   1.249 +                v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT)));
   1.250 +            v->arch.guest_table = pagetable_null();
   1.251 +        }
   1.252 +    }
   1.253 +
   1.254 +    vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0 = value;
   1.255 +    if ( !paging_mode_hap(v->domain) )
   1.256 +        vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
   1.257 +
   1.258 +    if ( (value ^ old_value) & X86_CR0_PG )
   1.259          paging_update_paging_modes(v);
   1.260 -    }
   1.261  
   1.262      return 1;
   1.263  }
   1.264  
   1.265 -static int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
   1.266 -{  
   1.267 -    unsigned long value;
   1.268 -    struct vcpu *v = current;
   1.269 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   1.270 -    struct vlapic *vlapic = vcpu_vlapic(v);
   1.271 -
   1.272 -    value = get_reg(gpreg, regs, vmcb);
   1.273 -
   1.274 -    switch ( cr )
   1.275 -    {
   1.276 -    case 0:
   1.277 -        return npt_set_cr0(value);
   1.278 -
   1.279 -    case 3:
   1.280 -        vmcb->cr3 = value;
   1.281 -        v->arch.hvm_svm.cpu_cr3 = value;
   1.282 -        break;
   1.283 -
   1.284 -    case 4: /* CR4 */
   1.285 -        vmcb->cr4 = value;
   1.286 -        v->arch.hvm_svm.cpu_shadow_cr4 = value;
   1.287 -        paging_update_paging_modes(v);
   1.288 -        break;
   1.289 -
   1.290 -    case 8:
   1.291 -        vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
   1.292 -        vmcb->vintr.fields.tpr = value & 0x0F;
   1.293 -        break;
   1.294 -
   1.295 -    default:
   1.296 -        gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
   1.297 -        domain_crash(v->domain);
   1.298 -        return 0;
   1.299 -    }
   1.300 -    
   1.301 -    return 1;
   1.302 -}
   1.303 -
   1.304 -static void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
   1.305 -{
   1.306 -    unsigned long value = 0;
   1.307 -    struct vcpu *v = current;
   1.308 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   1.309 -    struct vlapic *vlapic = vcpu_vlapic(v);
   1.310 -
   1.311 -    switch ( cr )
   1.312 -    {
   1.313 -    case 0:
   1.314 -        value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr0;
   1.315 -        break;
   1.316 -    case 2:
   1.317 -        value = vmcb->cr2;
   1.318 -        break;
   1.319 -    case 3:
   1.320 -        value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
   1.321 -        break;
   1.322 -    case 4:
   1.323 -        value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
   1.324 -       break;
   1.325 -    case 8:
   1.326 -        value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
   1.327 -        value = (value & 0xF0) >> 4;
   1.328 -        break;
   1.329 -    default:
   1.330 -        domain_crash(v->domain);
   1.331 -        return;
   1.332 -    }
   1.333 -    
   1.334 -    set_reg(gp, value, regs, vmcb);
   1.335 -}
   1.336 -
   1.337  /*
   1.338   * Read from control registers. CR0 and CR4 are read from the shadow.
   1.339   */
   1.340 @@ -1864,12 +1724,18 @@ static int mov_to_cr(int gpreg, int cr, 
   1.341      HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
   1.342      HVM_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
   1.343  
   1.344 -    switch (cr) 
   1.345 +    switch ( cr )
   1.346      {
   1.347      case 0: 
   1.348          return svm_set_cr0(value);
   1.349  
   1.350 -    case 3: 
   1.351 +    case 3:
   1.352 +        if ( paging_mode_hap(v->domain) )
   1.353 +        {
   1.354 +            vmcb->cr3 = v->arch.hvm_svm.cpu_cr3 = value;
   1.355 +            break;
   1.356 +        }
   1.357 +
   1.358          /* If paging is not enabled yet, simply copy the value to CR3. */
   1.359          if ( !svm_paging_enabled(v) )
   1.360          {
   1.361 @@ -1914,6 +1780,13 @@ static int mov_to_cr(int gpreg, int cr, 
   1.362          break;
   1.363  
   1.364      case 4: /* CR4 */
   1.365 +        if ( paging_mode_hap(v->domain) )
   1.366 +        {
   1.367 +            vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 = value;
   1.368 +            paging_update_paging_modes(v);
   1.369 +            break;
   1.370 +        }
   1.371 +
   1.372          old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
   1.373          if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
   1.374          {
   1.375 @@ -2034,18 +1907,12 @@ static int svm_cr_access(struct vcpu *v,
   1.376      {
   1.377      case INSTR_MOV2CR:
   1.378          gpreg = decode_src_reg(prefix, buffer[index+2]);
   1.379 -        if ( paging_mode_hap(v->domain) )
   1.380 -            result = npt_mov_to_cr(gpreg, cr, regs);
   1.381 -        else
   1.382 -            result = mov_to_cr(gpreg, cr, regs);
   1.383 +        result = mov_to_cr(gpreg, cr, regs);
   1.384          break;
   1.385  
   1.386      case INSTR_MOVCR2:
   1.387          gpreg = decode_src_reg(prefix, buffer[index+2]);
   1.388 -        if ( paging_mode_hap(v->domain) )
   1.389 -            npt_mov_from_cr(cr, gpreg, regs);
   1.390 -        else
   1.391 -            mov_from_cr(cr, gpreg, regs);
   1.392 +        mov_from_cr(cr, gpreg, regs);
   1.393          break;
   1.394  
   1.395      case INSTR_CLTS:
   1.396 @@ -2059,13 +1926,8 @@ static int svm_cr_access(struct vcpu *v,
   1.397      case INSTR_LMSW:
   1.398          gpreg = decode_src_reg(prefix, buffer[index+2]);
   1.399          value = get_reg(gpreg, regs, vmcb) & 0xF;
   1.400 -
   1.401          value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value;
   1.402 -
   1.403 -        if ( paging_mode_hap(v->domain) )
   1.404 -            result = npt_set_cr0(value);
   1.405 -        else
   1.406 -            result = svm_set_cr0(value);
   1.407 +        result = svm_set_cr0(value);
   1.408          break;
   1.409  
   1.410      case INSTR_SMSW:
     2.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Mar 29 11:01:41 2007 +0100
     2.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Mar 29 12:04:35 2007 +0100
     2.3 @@ -1840,11 +1840,16 @@ static int vmx_set_cr0(unsigned long val
     2.4      unsigned long old_cr0;
     2.5      unsigned long old_base_mfn;
     2.6  
     2.7 -    /*
     2.8 -     * CR0: We don't want to lose PE and PG.
     2.9 -     */
    2.10 -    old_cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
    2.11 -    paging_enabled = (old_cr0 & X86_CR0_PE) && (old_cr0 & X86_CR0_PG);
    2.12 +    HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
    2.13 +
    2.14 +    /* ET is reserved and should be always be 1. */
    2.15 +    value |= X86_CR0_ET;
    2.16 +
    2.17 +    if ( (value & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG )
    2.18 +    {
    2.19 +        vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
    2.20 +        return 0;
    2.21 +    }
    2.22  
    2.23      /* TS cleared? Then initialise FPU now. */
    2.24      if ( !(value & X86_CR0_TS) )
    2.25 @@ -1853,6 +1858,9 @@ static int vmx_set_cr0(unsigned long val
    2.26          __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device);
    2.27      }
    2.28  
    2.29 +    old_cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
    2.30 +    paging_enabled = old_cr0 & X86_CR0_PG;
    2.31 +
    2.32      v->arch.hvm_vmx.cpu_cr0 = (value | X86_CR0_PE | X86_CR0_PG 
    2.33                                 | X86_CR0_NE | X86_CR0_WP);
    2.34      __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
    2.35 @@ -1860,8 +1868,6 @@ static int vmx_set_cr0(unsigned long val
    2.36      v->arch.hvm_vmx.cpu_shadow_cr0 = value;
    2.37      __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
    2.38  
    2.39 -    HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
    2.40 -
    2.41      if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled )
    2.42      {
    2.43          /*