ia64/xen-unstable

changeset 16626:d9ab9eb2bfee

HVM: support unaligned and page-crossing writes in the shadow emulator
so that we can use it to support guests that clear CR0.WP.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Sat Dec 15 18:29:27 2007 +0000 (2007-12-15)
parents 44a98411d230
children d3881629d572
files xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/include/asm-x86/hvm/hvm.h
line diff
     1.1 --- a/xen/arch/x86/mm/shadow/multi.c	Sat Dec 15 18:26:52 2007 +0000
     1.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Sat Dec 15 18:29:27 2007 +0000
     1.3 @@ -61,12 +61,6 @@
     1.4   * and if we do flush, re-do the walk.  If anything has changed, then 
     1.5   * pause all the other vcpus and do the walk *again*.
     1.6   *
     1.7 - * WP DISABLED
     1.8 - * Consider how to implement having the WP bit of CR0 set to 0.  
     1.9 - * Since we need to be able to cause write faults to pagetables, this might
    1.10 - * end up looking like not having the (guest) pagetables present at all in 
    1.11 - * HVM guests...
    1.12 - *
    1.13   * PSE disabled / PSE36
    1.14   * We don't support any modes other than PSE enabled, PSE36 disabled.
    1.15   * Neither of those would be hard to change, but we'd need to be able to 
    1.16 @@ -219,11 +213,17 @@ static uint32_t mandatory_flags(struct v
    1.17          /* 1   1   1   0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
    1.18          /* 1   1   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
    1.19      };
    1.20 -    uint32_t f = flags[(pfec & 0x1f) >> 1];
    1.21 +
    1.22      /* Don't demand not-NX if the CPU wouldn't enforce it. */
    1.23      if ( !guest_supports_nx(v) )
    1.24 -        f &= ~_PAGE_NX_BIT;
    1.25 -    return f;
    1.26 +        pfec &= ~PFEC_insn_fetch;
    1.27 +
    1.28 +    /* Don't demand R/W if the CPU wouldn't enforce it. */
    1.29 +    if ( is_hvm_vcpu(v) && unlikely(!hvm_wp_enabled(v)) 
    1.30 +         && !(pfec & PFEC_user_mode) )
    1.31 +        pfec &= ~PFEC_write_access;
    1.32 +
    1.33 +    return flags[(pfec & 0x1f) >> 1];
    1.34  }
    1.35  
    1.36  /* Modify a guest pagetable entry to set the Accessed and Dirty bits.
    1.37 @@ -262,7 +262,8 @@ static uint32_t set_ad_bits(void *guest_
    1.38   * from any guest PT pages we see, as we will be shadowing them soon
    1.39   * and will rely on the contents' not having changed.
    1.40   * 
    1.41 - * Returns 0 for success or non-zero if the walk did not complete.
    1.42 + * Returns 0 for success, or the set of permission bits that we failed on 
    1.43 + * if the walk did not complete.
    1.44   * N.B. This is different from the old return code but almost no callers
    1.45   * checked the old return code anyway.
    1.46   */
    1.47 @@ -2717,8 +2718,9 @@ static int sh_page_fault(struct vcpu *v,
    1.48      fetch_type_t ft = 0;
    1.49      p2m_type_t p2mt;
    1.50  
    1.51 -    SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
    1.52 -                   v->domain->domain_id, v->vcpu_id, va, regs->error_code);
    1.53 +    SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u, rip=%lx\n",
    1.54 +                  v->domain->domain_id, v->vcpu_id, va, regs->error_code,
    1.55 +                  regs->rip);
    1.56  
    1.57      perfc_incr(shadow_fault);
    1.58      //
    1.59 @@ -2790,7 +2792,7 @@ static int sh_page_fault(struct vcpu *v,
    1.60      shadow_lock(d);
    1.61      
    1.62      shadow_audit_tables(v);
    1.63 -                   
    1.64 +    
    1.65      if ( guest_walk_tables(v, va, &gw, regs->error_code, 1) != 0 )
    1.66      {
    1.67          perfc_incr(shadow_fault_bail_real_fault);
    1.68 @@ -2883,6 +2885,16 @@ static int sh_page_fault(struct vcpu *v,
    1.69          goto mmio;
    1.70      }
    1.71  
    1.72 +    /* In HVM guests, we force CR0.WP always to be set, so that the
    1.73 +     * pagetables are always write-protected.  If the guest thinks
    1.74 +     * CR0.WP is clear, we must emulate faulting supervisor writes to
    1.75 +     * allow the guest to write through read-only PTEs.  Emulate if the 
    1.76 +     * fault was a non-user write to a present page.  */
    1.77 +    if ( is_hvm_domain(d) 
    1.78 +         && unlikely(!hvm_wp_enabled(v)) 
    1.79 +         && regs->error_code == (PFEC_write_access|PFEC_page_present) )
    1.80 +        goto emulate;
    1.81 +
    1.82      perfc_incr(shadow_fault_fixed);
    1.83      d->arch.paging.log_dirty.fault_count++;
    1.84      reset_early_unshadow(v);
    1.85 @@ -3968,25 +3980,17 @@ int sh_remove_l3_shadow(struct vcpu *v, 
    1.86  /**************************************************************************/
    1.87  /* Handling HVM guest writes to pagetables  */
    1.88  
    1.89 -/* Check that the user is allowed to perform this write. 
    1.90 - * Returns a mapped pointer to write to, and the mfn it's on,
    1.91 - * or NULL for error. */
    1.92 -static inline void * emulate_map_dest(struct vcpu *v,
    1.93 -                                      unsigned long vaddr,
    1.94 -                                      struct sh_emulate_ctxt *sh_ctxt,
    1.95 -                                      mfn_t *mfnp)
    1.96 +/* Translate a VA to an MFN, injecting a page-fault if we fail */
    1.97 +static mfn_t emulate_gva_to_mfn(struct vcpu *v,
    1.98 +                                unsigned long vaddr,
    1.99 +                                struct sh_emulate_ctxt *sh_ctxt)
   1.100  {
   1.101 -    uint32_t pfec;
   1.102      unsigned long gfn;
   1.103      mfn_t mfn;
   1.104      p2m_type_t p2mt;
   1.105 -
   1.106 -    /* We don't emulate user-mode writes to page tables */
   1.107 -    if ( ring_3(sh_ctxt->ctxt.regs) ) 
   1.108 -        return NULL;
   1.109 -
   1.110 -    /* Translate the VA, and exit with a page-fault if we fail */
   1.111 -    pfec = PFEC_page_present | PFEC_write_access;
   1.112 +    uint32_t pfec = PFEC_page_present | PFEC_write_access;
   1.113 +
   1.114 +    /* Translate the VA to a GFN */
   1.115      gfn = sh_gva_to_gfn(v, vaddr, &pfec);
   1.116      if ( gfn == INVALID_GFN ) 
   1.117      {
   1.118 @@ -3994,84 +3998,184 @@ static inline void * emulate_map_dest(st
   1.119              hvm_inject_exception(TRAP_page_fault, pfec, vaddr);
   1.120          else
   1.121              propagate_page_fault(vaddr, pfec);
   1.122 -        return NULL;
   1.123 +        return _mfn(INVALID_MFN);
   1.124      }
   1.125  
   1.126 -    /* Translate the GFN */
   1.127 +    /* Translate the GFN to an MFN */
   1.128      mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
   1.129      if ( p2m_is_ram(p2mt) )
   1.130      {
   1.131          ASSERT(mfn_valid(mfn));
   1.132 -        *mfnp = mfn;
   1.133          v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
   1.134 -        return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
   1.135 +        return mfn;
   1.136 +    }
   1.137 + 
   1.138 +    return _mfn(INVALID_MFN);
   1.139 +}
   1.140 +
   1.141 +/* Check that the user is allowed to perform this write. 
   1.142 + * Returns a mapped pointer to write to, or NULL for error. */
   1.143 +static void * emulate_map_dest(struct vcpu *v,
   1.144 +                               unsigned long vaddr,
   1.145 +                               u32 bytes,
   1.146 +                               struct sh_emulate_ctxt *sh_ctxt)
   1.147 +{
   1.148 +    unsigned long offset;
   1.149 +    void *map = NULL;
   1.150 +
   1.151 +    /* We don't emulate user-mode writes to page tables */
   1.152 +    if ( ring_3(sh_ctxt->ctxt.regs) ) 
   1.153 +        return NULL;
   1.154 +
   1.155 +    sh_ctxt->mfn1 = emulate_gva_to_mfn(v, vaddr, sh_ctxt);
   1.156 +    if ( !mfn_valid(sh_ctxt->mfn1) ) 
   1.157 +        return NULL;
   1.158 +
   1.159 +    /* Unaligned writes mean probably this isn't a pagetable */
   1.160 +    if ( vaddr & (bytes - 1) )
   1.161 +        sh_remove_shadows(v, sh_ctxt->mfn1, 0, 0 /* Slow, can fail */ );
   1.162 +
   1.163 +    if ( likely(((vaddr + bytes - 1) & PAGE_MASK) == (vaddr & PAGE_MASK)) )
   1.164 +    {
   1.165 +        /* Whole write fits on a single page */
   1.166 +        sh_ctxt->mfn2 = _mfn(INVALID_MFN);
   1.167 +        map = sh_map_domain_page(sh_ctxt->mfn1) + (vaddr & ~PAGE_MASK);
   1.168      }
   1.169      else 
   1.170 -        return NULL;
   1.171 -}
   1.172 -
   1.173 -static int safe_not_to_verify_write(mfn_t gmfn, void *dst, void *src, 
   1.174 -                                    int bytes)
   1.175 -{
   1.176 -#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY)
   1.177 -    struct page_info *pg = mfn_to_page(gmfn);
   1.178 -    if ( !(pg->shadow_flags & SHF_32) 
   1.179 -         && ((unsigned long)dst & 7) == 0 )
   1.180      {
   1.181 -        /* Not shadowed 32-bit: aligned 64-bit writes that leave the
   1.182 -         * present bit unset are safe to ignore. */
   1.183 -        if ( (*(u64*)src & _PAGE_PRESENT) == 0 
   1.184 -             && (*(u64*)dst & _PAGE_PRESENT) == 0 )
   1.185 -            return 1;
   1.186 +        /* Cross-page emulated writes are only supported for HVM guests; 
   1.187 +         * PV guests ought to know better */
   1.188 +        if ( !is_hvm_vcpu(v) )
   1.189 +            return NULL;
   1.190 +
   1.191 +        /* This write crosses a page boundary.  Translate the second page */
   1.192 +        sh_ctxt->mfn2 = emulate_gva_to_mfn(v, (vaddr + bytes - 1) & PAGE_MASK,
   1.193 +                                           sh_ctxt);
   1.194 +        if ( !mfn_valid(sh_ctxt->mfn2) ) 
   1.195 +            return NULL;
   1.196 +
   1.197 +        /* Cross-page writes mean probably not a pagetable */
   1.198 +        sh_remove_shadows(v, sh_ctxt->mfn2, 0, 0 /* Slow, can fail */ );
   1.199 +        
   1.200 +        /* Hack: we map the pages into the vcpu's LDT space, since we
   1.201 +         * know that we're not going to need the LDT for HVM guests, 
   1.202 +         * and only HVM guests are allowed unaligned writes. */
   1.203 +        ASSERT(is_hvm_vcpu(v));
   1.204 +        map = (void *)LDT_VIRT_START(v);
   1.205 +        offset = l1_linear_offset((unsigned long) map);
   1.206 +        l1e_write(&__linear_l1_table[offset],
   1.207 +                  l1e_from_pfn(mfn_x(sh_ctxt->mfn1), __PAGE_HYPERVISOR));
   1.208 +        l1e_write(&__linear_l1_table[offset + 1],
   1.209 +                  l1e_from_pfn(mfn_x(sh_ctxt->mfn2), __PAGE_HYPERVISOR));
   1.210 +        flush_tlb_local();
   1.211 +        map += (vaddr & ~PAGE_MASK);
   1.212      }
   1.213 -    else if ( !(pg->shadow_flags & (SHF_PAE|SHF_64)) 
   1.214 -              && ((unsigned long)dst & 3) == 0 )
   1.215 +    
   1.216 +#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY)
   1.217 +    /* Remember if the bottom bit was clear, so we can choose not to run
   1.218 +     * the change through the verify code if it's still clear afterwards */
   1.219 +    sh_ctxt->low_bit_was_clear = map != NULL && !(*(u8 *)map & _PAGE_PRESENT);
   1.220 +#endif
   1.221 +
   1.222 +    return map;
   1.223 +}
   1.224 +
   1.225 +/* Tidy up after the emulated write: mark pages dirty, verify the new
   1.226 + * contents, and undo the mapping */
   1.227 +static void emulate_unmap_dest(struct vcpu *v,
   1.228 +                               void *addr,
   1.229 +                               u32 bytes,
   1.230 +                               struct sh_emulate_ctxt *sh_ctxt)
   1.231 +{
   1.232 +    u32 b1 = bytes, b2 = 0, shflags;
   1.233 +
   1.234 +    ASSERT(mfn_valid(sh_ctxt->mfn1));
   1.235 +
   1.236 +    /* If we are writing lots of PTE-aligned zeros, might want to unshadow */
   1.237 +    if ( likely(bytes >= 4)
   1.238 +         && (*(u32 *)addr == 0)
   1.239 +         && ((unsigned long) addr & ((sizeof (guest_intpte_t)) - 1)) == 0 )
   1.240 +        check_for_early_unshadow(v, sh_ctxt->mfn1);
   1.241 +    else
   1.242 +        reset_early_unshadow(v);
   1.243 +
   1.244 +    /* We can avoid re-verifying the page contents after the write if:
   1.245 +     *  - it was no larger than the PTE type of this pagetable;
   1.246 +     *  - it was aligned to the PTE boundaries; and
   1.247 +     *  - _PAGE_PRESENT was clear before and after the write. */
   1.248 +    shflags = mfn_to_page(sh_ctxt->mfn1)->shadow_flags;
   1.249 +#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY)
   1.250 +    if ( sh_ctxt->low_bit_was_clear
   1.251 +         && !(*(u8 *)addr & _PAGE_PRESENT)
   1.252 +         && ((!(shflags & SHF_32)
   1.253 +              /* Not shadowed 32-bit: aligned 64-bit writes that leave
   1.254 +               * the present bit unset are safe to ignore. */
   1.255 +              && ((unsigned long)addr & 7) == 0
   1.256 +              && bytes <= 8)
   1.257 +             ||
   1.258 +             (!(shflags & (SHF_PAE|SHF_64))
   1.259 +              /* Not shadowed PAE/64-bit: aligned 32-bit writes that
   1.260 +               * leave the present bit unset are safe to ignore. */
   1.261 +              && ((unsigned long)addr & 3) == 0
   1.262 +              && bytes <= 4)) )
   1.263      {
   1.264 -        /* Not shadowed PAE/64-bit: aligned 32-bit writes that leave the
   1.265 -         * present bit unset are safe to ignore. */
   1.266 -        if ( (*(u32*)src & _PAGE_PRESENT) == 0 
   1.267 -             && (*(u32*)dst & _PAGE_PRESENT) == 0 )
   1.268 -            return 1;        
   1.269 +        /* Writes with this alignment constraint can't possibly cross pages */
   1.270 +        ASSERT(!mfn_valid(sh_ctxt->mfn2)); 
   1.271      }
   1.272 -#endif
   1.273 -    return 0;
   1.274 +    else 
   1.275 +#endif /* SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY */
   1.276 +    {        
   1.277 +        if ( unlikely(mfn_valid(sh_ctxt->mfn2)) )
   1.278 +        {
   1.279 +            /* Validate as two writes, one to each page */
   1.280 +            b1 = PAGE_SIZE - (((unsigned long)addr) & ~PAGE_MASK);
   1.281 +            b2 = bytes - b1;
   1.282 +            ASSERT(b2 < bytes);
   1.283 +        }
   1.284 +        if ( likely(b1 > 0) )
   1.285 +            sh_validate_guest_pt_write(v, sh_ctxt->mfn1, addr, b1);
   1.286 +        if ( unlikely(b2 > 0) )
   1.287 +            sh_validate_guest_pt_write(v, sh_ctxt->mfn2, addr + b1, b2);
   1.288 +    }
   1.289 +
   1.290 +    paging_mark_dirty(v->domain, mfn_x(sh_ctxt->mfn1));
   1.291 +
   1.292 +    if ( unlikely(mfn_valid(sh_ctxt->mfn2)) )
   1.293 +    {
   1.294 +        unsigned long offset;
   1.295 +        paging_mark_dirty(v->domain, mfn_x(sh_ctxt->mfn2));
   1.296 +        /* Undo the hacky two-frame contiguous map. */
   1.297 +        ASSERT(((unsigned long) addr & PAGE_MASK) == LDT_VIRT_START(v));
   1.298 +        offset = l1_linear_offset((unsigned long) addr);
   1.299 +        l1e_write(&__linear_l1_table[offset], l1e_empty());
   1.300 +        l1e_write(&__linear_l1_table[offset + 1], l1e_empty());
   1.301 +        flush_tlb_all();
   1.302 +    }
   1.303 +    else 
   1.304 +        sh_unmap_domain_page(addr);
   1.305  }
   1.306  
   1.307 -
   1.308  int
   1.309  sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
   1.310                        u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
   1.311  {
   1.312 -    mfn_t mfn;
   1.313      void *addr;
   1.314 -    int skip;
   1.315 -
   1.316 -    if ( vaddr & (bytes-1) )
   1.317 +
   1.318 +    /* Unaligned writes are only acceptable on HVM */
   1.319 +    if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v)  )
   1.320          return X86EMUL_UNHANDLEABLE;
   1.321  
   1.322 -    ASSERT(((vaddr & ~PAGE_MASK) + bytes) <= PAGE_SIZE);
   1.323      shadow_lock(v->domain);
   1.324 -
   1.325 -    addr = emulate_map_dest(v, vaddr, sh_ctxt, &mfn);
   1.326 +    addr = emulate_map_dest(v, vaddr, bytes, sh_ctxt);
   1.327      if ( addr == NULL )
   1.328      {
   1.329          shadow_unlock(v->domain);
   1.330          return X86EMUL_EXCEPTION;
   1.331      }
   1.332  
   1.333 -    skip = safe_not_to_verify_write(mfn, addr, src, bytes);
   1.334      memcpy(addr, src, bytes);
   1.335 -    if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
   1.336 -
   1.337 -    /* If we are writing zeros to this page, might want to unshadow */
   1.338 -    if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) )
   1.339 -        check_for_early_unshadow(v, mfn);
   1.340 -    else
   1.341 -        reset_early_unshadow(v);
   1.342 -    
   1.343 -    paging_mark_dirty(v->domain, mfn_x(mfn));
   1.344 -
   1.345 -    sh_unmap_domain_page(addr);
   1.346 +
   1.347 +    emulate_unmap_dest(v, addr, bytes, sh_ctxt);
   1.348      shadow_audit_tables(v);
   1.349      shadow_unlock(v->domain);
   1.350      return X86EMUL_OKAY;
   1.351 @@ -4082,26 +4186,23 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
   1.352                          unsigned long old, unsigned long new,
   1.353                          unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
   1.354  {
   1.355 -    mfn_t mfn;
   1.356      void *addr;
   1.357      unsigned long prev;
   1.358 -    int rv = X86EMUL_OKAY, skip;
   1.359 -
   1.360 -    ASSERT(bytes <= sizeof(unsigned long));
   1.361 +    int rv = X86EMUL_OKAY;
   1.362 +
   1.363 +    /* Unaligned writes are only acceptable on HVM */
   1.364 +    if ( (vaddr & (bytes - 1)) && !is_hvm_vcpu(v)  )
   1.365 +        return X86EMUL_UNHANDLEABLE;
   1.366 +
   1.367      shadow_lock(v->domain);
   1.368  
   1.369 -    if ( vaddr & (bytes-1) )
   1.370 -        return X86EMUL_UNHANDLEABLE;
   1.371 -
   1.372 -    addr = emulate_map_dest(v, vaddr, sh_ctxt, &mfn);
   1.373 +    addr = emulate_map_dest(v, vaddr, bytes, sh_ctxt);
   1.374      if ( addr == NULL )
   1.375      {
   1.376          shadow_unlock(v->domain);
   1.377          return X86EMUL_EXCEPTION;
   1.378      }
   1.379  
   1.380 -    skip = safe_not_to_verify_write(mfn, &new, &old, bytes);
   1.381 -
   1.382      switch ( bytes )
   1.383      {
   1.384      case 1: prev = cmpxchg(((u8 *)addr), old, new);  break;
   1.385 @@ -4113,26 +4214,14 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
   1.386          prev = ~old;
   1.387      }
   1.388  
   1.389 -    if ( prev == old )
   1.390 -    {
   1.391 -        if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, bytes);
   1.392 -    }
   1.393 -    else
   1.394 +    if ( prev != old ) 
   1.395          rv = X86EMUL_CMPXCHG_FAILED;
   1.396  
   1.397      SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
   1.398                    " wanted %#lx now %#lx bytes %u\n",
   1.399                    vaddr, prev, old, new, *(unsigned long *)addr, bytes);
   1.400  
   1.401 -    /* If we are writing zeros to this page, might want to unshadow */
   1.402 -    if ( likely(bytes >= 4) && (*(u32 *)addr == 0) && is_lo_pte(vaddr) )
   1.403 -        check_for_early_unshadow(v, mfn);
   1.404 -    else
   1.405 -        reset_early_unshadow(v);
   1.406 -
   1.407 -    paging_mark_dirty(v->domain, mfn_x(mfn));
   1.408 -
   1.409 -    sh_unmap_domain_page(addr);
   1.410 +    emulate_unmap_dest(v, addr, bytes, sh_ctxt);
   1.411      shadow_audit_tables(v);
   1.412      shadow_unlock(v->domain);
   1.413      return rv;
   1.414 @@ -4144,17 +4233,17 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
   1.415                            unsigned long new_lo, unsigned long new_hi,
   1.416                            struct sh_emulate_ctxt *sh_ctxt)
   1.417  {
   1.418 -    mfn_t mfn;
   1.419      void *addr;
   1.420      u64 old, new, prev;
   1.421 -    int rv = X86EMUL_OKAY, skip;
   1.422 -
   1.423 -    if ( vaddr & 7 )
   1.424 +    int rv = X86EMUL_OKAY;
   1.425 +
   1.426 +    /* Unaligned writes are only acceptable on HVM */
   1.427 +    if ( (vaddr & 7) && !is_hvm_vcpu(v) )
   1.428          return X86EMUL_UNHANDLEABLE;
   1.429  
   1.430      shadow_lock(v->domain);
   1.431  
   1.432 -    addr = emulate_map_dest(v, vaddr, sh_ctxt, &mfn);
   1.433 +    addr = emulate_map_dest(v, vaddr, 8, sh_ctxt);
   1.434      if ( addr == NULL )
   1.435      {
   1.436          shadow_unlock(v->domain);
   1.437 @@ -4163,25 +4252,12 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
   1.438  
   1.439      old = (((u64) old_hi) << 32) | (u64) old_lo;
   1.440      new = (((u64) new_hi) << 32) | (u64) new_lo;
   1.441 -    skip = safe_not_to_verify_write(mfn, &new, &old, 8);
   1.442      prev = cmpxchg(((u64 *)addr), old, new);
   1.443  
   1.444 -    if ( prev == old )
   1.445 -    {
   1.446 -        if ( !skip ) sh_validate_guest_pt_write(v, mfn, addr, 8);
   1.447 -    }
   1.448 -    else
   1.449 +    if ( prev != old )
   1.450          rv = X86EMUL_CMPXCHG_FAILED;
   1.451  
   1.452 -    /* If we are writing zeros to this page, might want to unshadow */
   1.453 -    if ( *(u32 *)addr == 0 )
   1.454 -        check_for_early_unshadow(v, mfn);
   1.455 -    else
   1.456 -        reset_early_unshadow(v);
   1.457 -
   1.458 -    paging_mark_dirty(v->domain, mfn_x(mfn));
   1.459 -
   1.460 -    sh_unmap_domain_page(addr);
   1.461 +    emulate_unmap_dest(v, addr, 8, sh_ctxt);
   1.462      shadow_audit_tables(v);
   1.463      shadow_unlock(v->domain);
   1.464      return rv;
     2.1 --- a/xen/arch/x86/mm/shadow/private.h	Sat Dec 15 18:26:52 2007 +0000
     2.2 +++ b/xen/arch/x86/mm/shadow/private.h	Sat Dec 15 18:29:27 2007 +0000
     2.3 @@ -429,13 +429,6 @@ int shadow_cmpxchg_guest_entry(struct vc
     2.4  #undef pagetable_from_page
     2.5  #define pagetable_from_page(pg) pagetable_from_mfn(page_to_mfn(pg))
     2.6  
     2.7 -
     2.8 -#if GUEST_PAGING_LEVELS >= 3
     2.9 -# define is_lo_pte(_vaddr) (((_vaddr)&0x4)==0)
    2.10 -#else
    2.11 -# define is_lo_pte(_vaddr) (1)
    2.12 -#endif
    2.13 -
    2.14  static inline int
    2.15  sh_mfn_is_a_page_table(mfn_t gmfn)
    2.16  {
    2.17 @@ -664,14 +657,23 @@ static inline void sh_unpin(struct vcpu 
    2.18  struct sh_emulate_ctxt {
    2.19      struct x86_emulate_ctxt ctxt;
    2.20  
    2.21 -    /* [HVM] Cache of up to 31 bytes of instruction. */
    2.22 +    /* Cache of up to 31 bytes of instruction. */
    2.23      uint8_t insn_buf[31];
    2.24      uint8_t insn_buf_bytes;
    2.25      unsigned long insn_buf_eip;
    2.26  
    2.27 -    /* [HVM] Cache of segment registers already gathered for this emulation. */
    2.28 +    /* Cache of segment registers already gathered for this emulation. */
    2.29      unsigned int valid_seg_regs;
    2.30      struct segment_register seg_reg[6];
    2.31 +
    2.32 +    /* MFNs being written to in write/cmpxchg callbacks */
    2.33 +    mfn_t mfn1, mfn2;
    2.34 +
    2.35 +#if (SHADOW_OPTIMIZATIONS & SHOPT_SKIP_VERIFY)
    2.36 +    /* Special case for avoiding having to verify writes: remember 
    2.37 +     * whether the old value had its low bit (_PAGE_PRESENT) clear. */
    2.38 +    int low_bit_was_clear:1;
    2.39 +#endif
    2.40  };
    2.41  
    2.42  struct x86_emulate_ops *shadow_init_emulation(
     3.1 --- a/xen/include/asm-x86/hvm/hvm.h	Sat Dec 15 18:26:52 2007 +0000
     3.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Sat Dec 15 18:29:27 2007 +0000
     3.3 @@ -144,6 +144,8 @@ u64 hvm_get_guest_tsc(struct vcpu *v);
     3.4  
     3.5  #define hvm_paging_enabled(v) \
     3.6      (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG))
     3.7 +#define hvm_wp_enabled(v) \
     3.8 +    (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_WP))
     3.9  #define hvm_pae_enabled(v) \
    3.10      (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE))
    3.11  #define hvm_nx_enabled(v) \