ia64/xen-unstable

changeset 11084:9727328c008e

[XEN] Remove batched writable pagetable logic.

Benchmarks show it provides little or no benefit (except
on synthetic benchmarks). Also it is complicated and
likely to hinder efforts to reduce lockign granularity.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Fri Aug 11 16:07:22 2006 +0100 (2006-08-11)
parents 1d817bfc5ed9
children 44aa6f86830d
files xen/arch/ia64/xen/mm.c xen/arch/x86/domain.c xen/arch/x86/mm.c xen/arch/x86/traps.c xen/include/asm-ia64/mm.h xen/include/asm-powerpc/mm.h xen/include/asm-x86/domain.h xen/include/asm-x86/mm.h xen/include/asm-x86/perfc.h xen/include/xen/mm.h
line diff
     1.1 --- a/xen/arch/ia64/xen/mm.c	Fri Aug 11 14:22:54 2006 +0100
     1.2 +++ b/xen/arch/ia64/xen/mm.c	Fri Aug 11 16:07:22 2006 +0100
     1.3 @@ -1580,11 +1580,6 @@ void pgtable_quicklist_free(void *pgtabl
     1.4  	free_xenheap_page(pgtable_entry);
     1.5  }
     1.6  
     1.7 -void cleanup_writable_pagetable(struct domain *d)
     1.8 -{
     1.9 -  return;
    1.10 -}
    1.11 -
    1.12  void put_page_type(struct page_info *page)
    1.13  {
    1.14      u32 nx, x, y = page->u.inuse.type_info;
    1.15 @@ -1692,22 +1687,6 @@ int get_page_type(struct page_info *page
    1.16              {
    1.17                  if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
    1.18                  {
    1.19 -                    if ( current->domain == page_get_owner(page) )
    1.20 -                    {
    1.21 -                        /*
    1.22 -                         * This ensures functions like set_gdt() see up-to-date
    1.23 -                         * type info without needing to clean up writable p.t.
    1.24 -                         * state on the fast path.
    1.25 -                         */
    1.26 -                        LOCK_BIGLOCK(current->domain);
    1.27 -                        cleanup_writable_pagetable(current->domain);
    1.28 -                        y = page->u.inuse.type_info;
    1.29 -                        UNLOCK_BIGLOCK(current->domain);
    1.30 -                        /* Can we make progress now? */
    1.31 -                        if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
    1.32 -                             ((y & PGT_count_mask) == 0) )
    1.33 -                            goto again;
    1.34 -                    }
    1.35                      if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
    1.36                           ((type & PGT_type_mask) != PGT_l1_page_table) )
    1.37                          MEM_LOG("Bad type (saw %08x != exp %08x) "
     2.1 --- a/xen/arch/x86/domain.c	Fri Aug 11 14:22:54 2006 +0100
     2.2 +++ b/xen/arch/x86/domain.c	Fri Aug 11 16:07:22 2006 +0100
     2.3 @@ -154,7 +154,7 @@ void free_vcpu_struct(struct vcpu *v)
     2.4  int arch_domain_create(struct domain *d)
     2.5  {
     2.6      l1_pgentry_t gdt_l1e;
     2.7 -    int vcpuid, pdpt_order, rc;
     2.8 +    int vcpuid, pdpt_order;
     2.9  #ifdef __x86_64__
    2.10      int i;
    2.11  #endif
    2.12 @@ -215,9 +215,6 @@ int arch_domain_create(struct domain *d)
    2.13          if ( (d->shared_info = alloc_xenheap_page()) == NULL )
    2.14              goto fail_nomem;
    2.15  
    2.16 -        if ( (rc = ptwr_init(d)) != 0 )
    2.17 -            goto fail_nomem;
    2.18 -
    2.19          memset(d->shared_info, 0, PAGE_SIZE);
    2.20          share_xen_page_with_guest(
    2.21              virt_to_page(d->shared_info), d, XENSHARE_writable);
    2.22 @@ -927,8 +924,6 @@ void domain_relinquish_resources(struct 
    2.23  
    2.24      BUG_ON(!cpus_empty(d->domain_dirty_cpumask));
    2.25  
    2.26 -    ptwr_destroy(d);
    2.27 -
    2.28      /* Drop the in-use references to page-table bases. */
    2.29      for_each_vcpu ( d, v )
    2.30      {
     3.1 --- a/xen/arch/x86/mm.c	Fri Aug 11 14:22:54 2006 +0100
     3.2 +++ b/xen/arch/x86/mm.c	Fri Aug 11 16:07:22 2006 +0100
     3.3 @@ -1669,10 +1669,8 @@ int get_page_type(struct page_info *page
     3.4                           *     enter a recursive loop via get_page_from_l1e()
     3.5                           *     during pagetable revalidation.
     3.6                           */
     3.7 -                        LOCK_BIGLOCK(current->domain);
     3.8 -                        cleanup_writable_pagetable(current->domain);
     3.9 +                        sync_pagetable_state(current->domain);
    3.10                          y = page->u.inuse.type_info;
    3.11 -                        UNLOCK_BIGLOCK(current->domain);
    3.12                          /* Can we make progress now? */
    3.13                          if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
    3.14                               ((y & PGT_count_mask) == 0) )
    3.15 @@ -1750,8 +1748,6 @@ int new_guest_cr3(unsigned long mfn)
    3.16      int okay;
    3.17      unsigned long old_base_mfn;
    3.18  
    3.19 -    ASSERT(writable_pagetable_in_sync(d));
    3.20 -
    3.21      if ( shadow_mode_refcounts(d) )
    3.22      {
    3.23          okay = get_page_from_pagenr(mfn, d);
    3.24 @@ -1940,7 +1936,7 @@ int do_mmuext_op(
    3.25  
    3.26      LOCK_BIGLOCK(d);
    3.27  
    3.28 -    cleanup_writable_pagetable(d);
    3.29 +    sync_pagetable_state(d);
    3.30  
    3.31      if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
    3.32      {
    3.33 @@ -2193,7 +2189,7 @@ int do_mmu_update(
    3.34  
    3.35      LOCK_BIGLOCK(d);
    3.36  
    3.37 -    cleanup_writable_pagetable(d);
    3.38 +    sync_pagetable_state(d);
    3.39  
    3.40      if ( unlikely(shadow_mode_enabled(d)) )
    3.41          check_pagetable(v, "pre-mmu"); /* debug */
    3.42 @@ -2704,7 +2700,7 @@ int do_update_va_mapping(unsigned long v
    3.43  
    3.44      LOCK_BIGLOCK(d);
    3.45  
    3.46 -    cleanup_writable_pagetable(d);
    3.47 +    sync_pagetable_state(d);
    3.48  
    3.49      if ( unlikely(shadow_mode_enabled(d)) )
    3.50          check_pagetable(v, "pre-va"); /* debug */
    3.51 @@ -3102,131 +3098,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
    3.52   * Writable Pagetables
    3.53   */
    3.54  
    3.55 -#ifdef VVERBOSE
    3.56 -int ptwr_debug = 0x0;
    3.57 -#define PTWR_PRINTK(_f, _a...) \
    3.58 - do { if ( unlikely(ptwr_debug) ) printk( _f , ## _a ); } while ( 0 )
    3.59 -#define PTWR_PRINT_WHICH (which ? 'I' : 'A')
    3.60 -#else
    3.61 -#define PTWR_PRINTK(_f, _a...) ((void)0)
    3.62 -#endif
    3.63 -
    3.64 -
    3.65 -#ifdef PERF_ARRAYS
    3.66 -
    3.67 -/**************** writeable pagetables profiling functions *****************/
    3.68 -
    3.69 -#define ptwr_eip_buckets        256
    3.70 -
    3.71 -int ptwr_eip_stat_threshold[] = {1, 10, 50, 100, L1_PAGETABLE_ENTRIES};
    3.72 -
    3.73 -#define ptwr_eip_stat_thresholdN (sizeof(ptwr_eip_stat_threshold)/sizeof(int))
    3.74 -
    3.75 -struct {
    3.76 -    unsigned long eip;
    3.77 -    domid_t       id;
    3.78 -    u32           val[ptwr_eip_stat_thresholdN];
    3.79 -} typedef ptwr_eip_stat_t;
    3.80 -
    3.81 -ptwr_eip_stat_t ptwr_eip_stats[ptwr_eip_buckets];
    3.82 -
    3.83 -static inline unsigned int ptwr_eip_stat_hash( unsigned long eip, domid_t id )
    3.84 -{
    3.85 -    return (((unsigned long) id) ^ eip ^ (eip>>8) ^ (eip>>16) ^ (eip>24)) % 
    3.86 -        ptwr_eip_buckets;
    3.87 -}
    3.88 -
    3.89 -static void ptwr_eip_stat_inc(u32 *n)
    3.90 -{
    3.91 -    unsigned int i, j;
    3.92 -
    3.93 -    if ( ++(*n) != 0 )
    3.94 -        return;
    3.95 -
    3.96 -    *n = ~0;
    3.97 -
    3.98 -    /* Re-scale all buckets. */
    3.99 -    for ( i = 0; i < ptwr_eip_buckets; i++ )
   3.100 -        for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
   3.101 -            ptwr_eip_stats[i].val[j] >>= 1;
   3.102 -}
   3.103 -
   3.104 -static void ptwr_eip_stat_update(unsigned long eip, domid_t id, int modified)
   3.105 -{
   3.106 -    unsigned int i, j, b;
   3.107 -
   3.108 -    i = b = ptwr_eip_stat_hash(eip, id);
   3.109 -
   3.110 -    do
   3.111 -    {
   3.112 -        if ( !ptwr_eip_stats[i].eip )
   3.113 -        {
   3.114 -            /* doesn't exist */
   3.115 -            ptwr_eip_stats[i].eip = eip;
   3.116 -            ptwr_eip_stats[i].id = id;
   3.117 -            memset(ptwr_eip_stats[i].val,0, sizeof(ptwr_eip_stats[i].val));
   3.118 -        }
   3.119 -
   3.120 -        if ( ptwr_eip_stats[i].eip == eip && ptwr_eip_stats[i].id == id)
   3.121 -        {
   3.122 -            for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
   3.123 -                if ( modified <= ptwr_eip_stat_threshold[j] )
   3.124 -                    break;
   3.125 -            BUG_ON(j >= ptwr_eip_stat_thresholdN);
   3.126 -            ptwr_eip_stat_inc(&ptwr_eip_stats[i].val[j]);
   3.127 -            return;
   3.128 -        }
   3.129 -
   3.130 -        i = (i+1) % ptwr_eip_buckets;
   3.131 -    }
   3.132 -    while ( i != b );
   3.133 -   
   3.134 -    printk("ptwr_eip_stat: too many EIPs in use!\n");
   3.135 -    
   3.136 -    ptwr_eip_stat_print();
   3.137 -    ptwr_eip_stat_reset();
   3.138 -}
   3.139 -
   3.140 -void ptwr_eip_stat_reset(void)
   3.141 -{
   3.142 -    memset(ptwr_eip_stats, 0, sizeof(ptwr_eip_stats));
   3.143 -}
   3.144 -
   3.145 -void ptwr_eip_stat_print(void)
   3.146 -{
   3.147 -    struct domain *e;
   3.148 -    domid_t d;
   3.149 -    unsigned int i, j;
   3.150 -
   3.151 -    for_each_domain( e )
   3.152 -    {
   3.153 -        d = e->domain_id;
   3.154 -
   3.155 -        for ( i = 0; i < ptwr_eip_buckets; i++ )
   3.156 -        {
   3.157 -            if ( !ptwr_eip_stats[i].eip || ptwr_eip_stats[i].id != d )
   3.158 -                continue;
   3.159 -
   3.160 -            printk("D %5d  eip %p ",
   3.161 -                   ptwr_eip_stats[i].id, (void *)ptwr_eip_stats[i].eip);
   3.162 -
   3.163 -            for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
   3.164 -                printk("<=%u %4u \t",
   3.165 -                       ptwr_eip_stat_threshold[j],
   3.166 -                       ptwr_eip_stats[i].val[j]);
   3.167 -            printk("\n");
   3.168 -        }
   3.169 -    }
   3.170 -}
   3.171 -
   3.172 -#else /* PERF_ARRAYS */
   3.173 -
   3.174 -#define ptwr_eip_stat_update(eip, id, modified) ((void)0)
   3.175 -
   3.176 -#endif
   3.177 -
   3.178 -/*******************************************************************/
   3.179 -
   3.180  /* Re-validate a given p.t. page, given its prior snapshot */
   3.181  int revalidate_l1(
   3.182      struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
   3.183 @@ -3277,112 +3148,6 @@ int revalidate_l1(
   3.184      return modified;
   3.185  }
   3.186  
   3.187 -
   3.188 -/* Flush the given writable p.t. page and write-protect it again. */
   3.189 -void ptwr_flush(struct domain *d, const int which)
   3.190 -{
   3.191 -    unsigned long l1va;
   3.192 -    l1_pgentry_t  *pl1e, pte, *ptep;
   3.193 -    l2_pgentry_t  *pl2e;
   3.194 -    unsigned int   modified;
   3.195 -
   3.196 -#ifdef CONFIG_X86_64
   3.197 -    struct vcpu *v = current;
   3.198 -    int user_mode = !(v->arch.flags & TF_kernel_mode);
   3.199 -#endif
   3.200 -
   3.201 -    ASSERT(!shadow_mode_enabled(d));
   3.202 -
   3.203 -    if ( unlikely(d->arch.ptwr[which].vcpu != current) )
   3.204 -        /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
   3.205 -        __write_ptbase(pagetable_get_pfn(
   3.206 -            d->arch.ptwr[which].vcpu->arch.guest_table));
   3.207 -    else
   3.208 -        TOGGLE_MODE();
   3.209 -
   3.210 -    l1va = d->arch.ptwr[which].l1va;
   3.211 -    ptep = (l1_pgentry_t *)&linear_pg_table[l1_linear_offset(l1va)];
   3.212 -
   3.213 -    /*
   3.214 -     * STEP 1. Write-protect the p.t. page so no more updates can occur.
   3.215 -     */
   3.216 -
   3.217 -    if ( unlikely(__get_user(pte.l1, &ptep->l1)) )
   3.218 -    {
   3.219 -        MEM_LOG("ptwr: Could not read pte at %p", ptep);
   3.220 -        /*
   3.221 -         * Really a bug. We could read this PTE during the initial fault,
   3.222 -         * and pagetables can't have changed meantime.
   3.223 -         */
   3.224 -        BUG();
   3.225 -    }
   3.226 -    PTWR_PRINTK("[%c] disconnected_l1va at %p is %"PRIpte"\n",
   3.227 -                PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte));
   3.228 -    l1e_remove_flags(pte, _PAGE_RW);
   3.229 -
   3.230 -    /* Write-protect the p.t. page in the guest page table. */
   3.231 -    if ( unlikely(__put_user(pte, ptep)) )
   3.232 -    {
   3.233 -        MEM_LOG("ptwr: Could not update pte at %p", ptep);
   3.234 -        /*
   3.235 -         * Really a bug. We could write this PTE during the initial fault,
   3.236 -         * and pagetables can't have changed meantime.
   3.237 -         */
   3.238 -        BUG();
   3.239 -    }
   3.240 -
   3.241 -    /* Ensure that there are no stale writable mappings in any TLB. */
   3.242 -    /* NB. INVLPG is a serialising instruction: flushes pending updates. */
   3.243 -    flush_tlb_one_mask(d->domain_dirty_cpumask, l1va);
   3.244 -    PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
   3.245 -                PTWR_PRINT_WHICH, ptep, l1e_get_intpte(pte));
   3.246 -
   3.247 -    /*
   3.248 -     * STEP 2. Validate any modified PTEs.
   3.249 -     */
   3.250 -
   3.251 -    if ( likely(d == current->domain) )
   3.252 -    {
   3.253 -        pl1e = map_domain_page(l1e_get_pfn(pte));
   3.254 -        modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
   3.255 -        unmap_domain_page(pl1e);
   3.256 -        perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
   3.257 -        ptwr_eip_stat_update(d->arch.ptwr[which].eip, d->domain_id, modified);
   3.258 -        d->arch.ptwr[which].prev_nr_updates = modified;
   3.259 -    }
   3.260 -    else
   3.261 -    {
   3.262 -        /*
   3.263 -         * Must make a temporary global mapping, since we are running in the
   3.264 -         * wrong address space, so no access to our own mapcache.
   3.265 -         */
   3.266 -        pl1e = map_domain_page_global(l1e_get_pfn(pte));
   3.267 -        modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
   3.268 -        unmap_domain_page_global(pl1e);
   3.269 -    }
   3.270 -
   3.271 -    /*
   3.272 -     * STEP 3. Reattach the L1 p.t. page into the current address space.
   3.273 -     */
   3.274 -
   3.275 -    if ( which == PTWR_PT_ACTIVE )
   3.276 -    {
   3.277 -        pl2e = &__linear_l2_table[d->arch.ptwr[which].l2_idx];
   3.278 -        l2e_add_flags(*pl2e, _PAGE_PRESENT); 
   3.279 -    }
   3.280 -
   3.281 -    /*
   3.282 -     * STEP 4. Final tidy-up.
   3.283 -     */
   3.284 -
   3.285 -    d->arch.ptwr[which].l1va = 0;
   3.286 -
   3.287 -    if ( unlikely(d->arch.ptwr[which].vcpu != current) )
   3.288 -        write_ptbase(current);
   3.289 -    else 
   3.290 -        TOGGLE_MODE();
   3.291 -}
   3.292 -
   3.293  static int ptwr_emulated_update(
   3.294      unsigned long addr,
   3.295      paddr_t old,
   3.296 @@ -3390,7 +3155,7 @@ static int ptwr_emulated_update(
   3.297      unsigned int bytes,
   3.298      unsigned int do_cmpxchg)
   3.299  {
   3.300 -    unsigned long pfn, l1va;
   3.301 +    unsigned long pfn;
   3.302      struct page_info *page;
   3.303      l1_pgentry_t pte, ol1e, nl1e, *pl1e;
   3.304      struct domain *d = current->domain;
   3.305 @@ -3428,24 +3193,6 @@ static int ptwr_emulated_update(
   3.306          old  |= full;
   3.307      }
   3.308  
   3.309 -#if 0 /* XXX KAF: I don't think this can happen. */
   3.310 -    /*
   3.311 -     * We must not emulate an update to a PTE that is temporarily marked
   3.312 -     * writable by the batched ptwr logic, else we can corrupt page refcnts! 
   3.313 -     */
   3.314 -    if ( ((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) &&
   3.315 -         (l1_linear_offset(l1va) == l1_linear_offset(addr)) )
   3.316 -        ptwr_flush(d, PTWR_PT_ACTIVE);
   3.317 -    if ( ((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
   3.318 -         (l1_linear_offset(l1va) == l1_linear_offset(addr)) )
   3.319 -        ptwr_flush(d, PTWR_PT_INACTIVE);
   3.320 -#else
   3.321 -    BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) &&
   3.322 -           (l1_linear_offset(l1va) == l1_linear_offset(addr)));
   3.323 -    BUG_ON(((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
   3.324 -           (l1_linear_offset(l1va) == l1_linear_offset(addr)));
   3.325 -#endif
   3.326 -
   3.327      /* Read the PTE that maps the page being updated. */
   3.328      if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
   3.329                            sizeof(pte)) )
   3.330 @@ -3545,239 +3292,53 @@ static struct x86_emulate_ops ptwr_emula
   3.331  int ptwr_do_page_fault(struct domain *d, unsigned long addr, 
   3.332                         struct cpu_user_regs *regs)
   3.333  {
   3.334 -    unsigned long    pfn;
   3.335 +    unsigned long     pfn;
   3.336      struct page_info *page;
   3.337 -    l1_pgentry_t    *pl1e, pte;
   3.338 -    l2_pgentry_t    *pl2e, l2e;
   3.339 -    int              which, flags;
   3.340 -    unsigned long    l2_idx;
   3.341 +    l1_pgentry_t      pte;
   3.342 +    l2_pgentry_t     *pl2e, l2e;
   3.343      struct x86_emulate_ctxt emul_ctxt;
   3.344  
   3.345 -    ASSERT(!shadow_mode_enabled(d));
   3.346 +    LOCK_BIGLOCK(d);
   3.347  
   3.348      /*
   3.349       * Attempt to read the PTE that maps the VA being accessed. By checking for
   3.350       * PDE validity in the L2 we avoid many expensive fixups in __get_user().
   3.351 -     * NB. The L2 entry cannot be detached due to existing ptwr work: the
   3.352 -     * caller already checked that.
   3.353       */
   3.354      pl2e = &__linear_l2_table[l2_linear_offset(addr)];
   3.355      if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) ||
   3.356          !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
   3.357           __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
   3.358                            sizeof(pte)) )
   3.359 -    {
   3.360 -        return 0;
   3.361 -    }
   3.362 +        goto bail;
   3.363  
   3.364      pfn  = l1e_get_pfn(pte);
   3.365      page = mfn_to_page(pfn);
   3.366  
   3.367 -#ifdef CONFIG_X86_64
   3.368 -#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT | _PAGE_USER)
   3.369 -#else
   3.370 -#define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT)
   3.371 -#endif
   3.372 -
   3.373 -    /*
   3.374 -     * Check the required flags for a valid wrpt mapping. If the page is
   3.375 -     * already writable then we can return straight to the guest (SMP race).
   3.376 -     * We decide whether or not to propagate the fault by testing for write
   3.377 -     * permissions in page directories by writing back to the linear mapping.
   3.378 -     */
   3.379 -    if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS )
   3.380 -        return __put_user(
   3.381 -            pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1) ?
   3.382 -            0 : EXCRET_not_a_fault;
   3.383 -
   3.384      /* We are looking only for read-only mappings of p.t. pages. */
   3.385 -    if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) ||
   3.386 +    if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
   3.387           ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
   3.388           ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
   3.389           (page_get_owner(page) != d) )
   3.390 -    {
   3.391 -        return 0;
   3.392 -    }
   3.393 -
   3.394 -#if 0 /* Leave this in as useful for debugging */ 
   3.395 -    goto emulate; 
   3.396 -#endif
   3.397 -
   3.398 -    PTWR_PRINTK("ptwr_page_fault on l1 pt at va %lx, pfn %lx, eip %lx\n",
   3.399 -                addr, pfn, (unsigned long)regs->eip);
   3.400 -    
   3.401 -    /* Get the L2 index at which this L1 p.t. is always mapped. */
   3.402 -    l2_idx = page->u.inuse.type_info & PGT_va_mask;
   3.403 -    if ( unlikely(l2_idx >= PGT_va_unknown) )
   3.404 -        goto emulate; /* Urk! This L1 is mapped in multiple L2 slots! */
   3.405 -    l2_idx >>= PGT_va_shift;
   3.406 -
   3.407 -    if ( unlikely(l2_idx == l2_linear_offset(addr)) )
   3.408 -        goto emulate; /* Urk! Pagetable maps itself! */
   3.409 -
   3.410 -    /*
   3.411 -     * Is the L1 p.t. mapped into the current address space? If so we call it
   3.412 -     * an ACTIVE p.t., otherwise it is INACTIVE.
   3.413 -     */
   3.414 -    pl2e = &__linear_l2_table[l2_idx];
   3.415 -    which = PTWR_PT_INACTIVE;
   3.416 -
   3.417 -    if ( (__get_user(l2e.l2, &pl2e->l2) == 0) && (l2e_get_pfn(l2e) == pfn) )
   3.418 -    {
   3.419 -        /*
   3.420 -         * Check the PRESENT bit to set ACTIVE mode.
   3.421 -         * If the PRESENT bit is clear, we may be conflicting with the current 
   3.422 -         * ACTIVE p.t. (it may be the same p.t. mapped at another virt addr).
   3.423 -         * The ptwr_flush call below will restore the PRESENT bit.
   3.424 -         */
   3.425 -        if ( likely(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
   3.426 -             (d->arch.ptwr[PTWR_PT_ACTIVE].l1va &&
   3.427 -              (l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) )
   3.428 -            which = PTWR_PT_ACTIVE;
   3.429 -    }
   3.430 -
   3.431 -    /*
   3.432 -     * Multi-processor guest? Then ensure that the page table is hooked into
   3.433 -     * at most one L2, and also ensure that there is only one mapping of the
   3.434 -     * page table itself (or there can be conflicting writable mappings from
   3.435 -     * other VCPUs).
   3.436 -     */
   3.437 -    if ( d->vcpu[0]->next_in_list != NULL )
   3.438 -    {
   3.439 -        if ( /* Hooked into at most one L2 table (which this VCPU maps)? */
   3.440 -             ((page->u.inuse.type_info & PGT_count_mask) != 
   3.441 -              (!!(page->u.inuse.type_info & PGT_pinned) +
   3.442 -               (which == PTWR_PT_ACTIVE))) ||
   3.443 -             /* PTEs are mapped read-only in only one place? */
   3.444 -             ((page->count_info & PGC_count_mask) !=
   3.445 -              (!!(page->count_info & PGC_allocated) +       /* alloc count */
   3.446 -               (page->u.inuse.type_info & PGT_count_mask) + /* type count  */
   3.447 -               1)) )                                        /* map count   */
   3.448 -        {
   3.449 -            /* Could be conflicting writable mappings from other VCPUs. */
   3.450 -            cleanup_writable_pagetable(d);
   3.451 -            goto emulate;
   3.452 -        }
   3.453 -    }
   3.454 -
   3.455 -    /*
   3.456 -     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a
   3.457 -     * time. If there is already one, we must flush it out.
   3.458 -     */
   3.459 -    if ( d->arch.ptwr[which].l1va )
   3.460 -        ptwr_flush(d, which);
   3.461 -
   3.462 -    /*
   3.463 -     * If last batch made no updates then we are probably stuck. Emulate this 
   3.464 -     * update to ensure we make progress.
   3.465 -     */
   3.466 -    if ( d->arch.ptwr[which].prev_nr_updates == 0 )
   3.467 -    {
   3.468 -        /* Ensure that we don't get stuck in an emulation-only rut. */
   3.469 -        d->arch.ptwr[which].prev_nr_updates = 1;
   3.470 -        goto emulate;
   3.471 -    }
   3.472 -
   3.473 -    PTWR_PRINTK("[%c] batched ptwr_page_fault at va %lx, pt for %08lx, "
   3.474 -                "pfn %lx\n", PTWR_PRINT_WHICH, addr,
   3.475 -                l2_idx << L2_PAGETABLE_SHIFT, pfn);
   3.476 -
   3.477 -    /* For safety, disconnect the L1 p.t. page from current space. */
   3.478 -    if ( which == PTWR_PT_ACTIVE )
   3.479 -    {
   3.480 -        l2e_remove_flags(l2e, _PAGE_PRESENT);
   3.481 -        if ( unlikely(__copy_to_user(pl2e, &l2e, sizeof(l2e))) )
   3.482 -        {
   3.483 -            MEM_LOG("ptwr: Could not unhook l2e at %p", pl2e);
   3.484 -            domain_crash(d);
   3.485 -            return 0;
   3.486 -        }
   3.487 -        flush_tlb_mask(d->domain_dirty_cpumask);
   3.488 -    }
   3.489 -    
   3.490 -    /* Temporarily map the L1 page, and make a copy of it. */
   3.491 -    pl1e = map_domain_page(pfn);
   3.492 -    memcpy(d->arch.ptwr[which].page, pl1e, PAGE_SIZE);
   3.493 -    unmap_domain_page(pl1e);
   3.494 -
   3.495 -    /* Finally, make the p.t. page writable by the guest OS. */
   3.496 -    l1e_add_flags(pte, _PAGE_RW);
   3.497 -    if ( unlikely(__put_user(pte.l1,
   3.498 -                             &linear_pg_table[l1_linear_offset(addr)].l1)) )
   3.499 -    {
   3.500 -        MEM_LOG("ptwr: Could not update pte at %p",
   3.501 -                &linear_pg_table[l1_linear_offset(addr)]);
   3.502 -        domain_crash(d);
   3.503 -        return 0;
   3.504 -    }
   3.505 -    
   3.506 -    /*
   3.507 -     * Now record the writable pagetable state *after* any accesses that can
   3.508 -     * cause a recursive page fault (i.e., those via the *_user() accessors).
   3.509 -     * Otherwise we can enter ptwr_flush() with half-done ptwr state.
   3.510 -     */
   3.511 -    d->arch.ptwr[which].l1va   = addr | 1;
   3.512 -    d->arch.ptwr[which].l2_idx = l2_idx;
   3.513 -    d->arch.ptwr[which].vcpu   = current;
   3.514 -#ifdef PERF_ARRAYS
   3.515 -    d->arch.ptwr[which].eip    = regs->eip;
   3.516 -#endif
   3.517 -
   3.518 -    return EXCRET_fault_fixed;
   3.519 -
   3.520 - emulate:
   3.521 +        goto bail;
   3.522 +
   3.523      emul_ctxt.regs = guest_cpu_user_regs();
   3.524      emul_ctxt.cr2  = addr;
   3.525      emul_ctxt.mode = X86EMUL_MODE_HOST;
   3.526      if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
   3.527 -        return 0;
   3.528 +        goto bail;
   3.529 +
   3.530 +    UNLOCK_BIGLOCK(d);
   3.531      perfc_incrc(ptwr_emulations);
   3.532      return EXCRET_fault_fixed;
   3.533 -}
   3.534 -
   3.535 -int ptwr_init(struct domain *d)
   3.536 -{
   3.537 -    void *x = alloc_xenheap_page();
   3.538 -    void *y = alloc_xenheap_page();
   3.539 -
   3.540 -    if ( (x == NULL) || (y == NULL) )
   3.541 -    {
   3.542 -        free_xenheap_page(x);
   3.543 -        free_xenheap_page(y);
   3.544 -        return -ENOMEM;
   3.545 -    }
   3.546 -
   3.547 -    d->arch.ptwr[PTWR_PT_ACTIVE].page   = x;
   3.548 -    d->arch.ptwr[PTWR_PT_INACTIVE].page = y;
   3.549 -
   3.550 +
   3.551 + bail:
   3.552 +    UNLOCK_BIGLOCK(d);
   3.553      return 0;
   3.554  }
   3.555  
   3.556 -void ptwr_destroy(struct domain *d)
   3.557 -{
   3.558 -    LOCK_BIGLOCK(d);
   3.559 -    cleanup_writable_pagetable(d);
   3.560 -    UNLOCK_BIGLOCK(d);
   3.561 -    free_xenheap_page(d->arch.ptwr[PTWR_PT_ACTIVE].page);
   3.562 -    free_xenheap_page(d->arch.ptwr[PTWR_PT_INACTIVE].page);
   3.563 -}
   3.564 -
   3.565 -void cleanup_writable_pagetable(struct domain *d)
   3.566 +void sync_pagetable_state(struct domain *d)
   3.567  {
   3.568 -    if ( unlikely(!VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
   3.569 -        return;
   3.570 -
   3.571 -    if ( unlikely(shadow_mode_enabled(d)) )
   3.572 -    {
   3.573 -        shadow_sync_all(d);
   3.574 -    }
   3.575 -    else
   3.576 -    {
   3.577 -        if ( d->arch.ptwr[PTWR_PT_ACTIVE].l1va )
   3.578 -            ptwr_flush(d, PTWR_PT_ACTIVE);
   3.579 -        if ( d->arch.ptwr[PTWR_PT_INACTIVE].l1va )
   3.580 -            ptwr_flush(d, PTWR_PT_INACTIVE);
   3.581 -    }
   3.582 +    shadow_sync_all(d);
   3.583  }
   3.584  
   3.585  int map_pages_to_xen(
     4.1 --- a/xen/arch/x86/traps.c	Fri Aug 11 14:22:54 2006 +0100
     4.2 +++ b/xen/arch/x86/traps.c	Fri Aug 11 16:07:22 2006 +0100
     4.3 @@ -713,7 +713,7 @@ static int handle_gdt_ldt_mapping_fault(
     4.4      {
     4.5          /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
     4.6          LOCK_BIGLOCK(d);
     4.7 -        cleanup_writable_pagetable(d);
     4.8 +        sync_pagetable_state(d);
     4.9          ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
    4.10          UNLOCK_BIGLOCK(d);
    4.11  
    4.12 @@ -849,7 +849,7 @@ static int spurious_page_fault(
    4.13      int            is_spurious;
    4.14  
    4.15      LOCK_BIGLOCK(d);
    4.16 -    cleanup_writable_pagetable(d);
    4.17 +    sync_pagetable_state(d);
    4.18      is_spurious = __spurious_page_fault(addr, regs);
    4.19      UNLOCK_BIGLOCK(d);
    4.20  
    4.21 @@ -878,33 +878,11 @@ static int fixup_page_fault(unsigned lon
    4.22      if ( unlikely(shadow_mode_enabled(d)) )
    4.23          return shadow_fault(addr, regs);
    4.24  
    4.25 -    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
    4.26 -    {
    4.27 -        LOCK_BIGLOCK(d);
    4.28 -        if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
    4.29 -             unlikely(l2_linear_offset(addr) ==
    4.30 -                      d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
    4.31 -        {
    4.32 -            ptwr_flush(d, PTWR_PT_ACTIVE);
    4.33 -            UNLOCK_BIGLOCK(d);
    4.34 -            return EXCRET_fault_fixed;
    4.35 -        }
    4.36 -
    4.37 -        /*
    4.38 -         * Note it is *not* safe to check PGERR_page_present here. It can be
    4.39 -         * clear, due to unhooked page table, when we would otherwise expect
    4.40 -         * it to be set. We have an aversion to trusting that flag in Xen, and
    4.41 -         * guests ought to be leery too.
    4.42 -         */
    4.43 -        if ( guest_kernel_mode(v, regs) &&
    4.44 -             (regs->error_code & PGERR_write_access) &&
    4.45 -             ptwr_do_page_fault(d, addr, regs) )
    4.46 -        {
    4.47 -            UNLOCK_BIGLOCK(d);
    4.48 -            return EXCRET_fault_fixed;
    4.49 -        }
    4.50 -        UNLOCK_BIGLOCK(d);
    4.51 -    }
    4.52 +    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) &&
    4.53 +         guest_kernel_mode(v, regs) &&
    4.54 +         ((regs->error_code & (PGERR_write_access|PGERR_page_present)) ==
    4.55 +          (PGERR_write_access|PGERR_page_present)) )
    4.56 +        return ptwr_do_page_fault(d, addr, regs) ? EXCRET_fault_fixed : 0;
    4.57  
    4.58      return 0;
    4.59  }
    4.60 @@ -1324,7 +1302,7 @@ static int emulate_privileged_op(struct 
    4.61  
    4.62          case 3: /* Write CR3 */
    4.63              LOCK_BIGLOCK(v->domain);
    4.64 -            cleanup_writable_pagetable(v->domain);
    4.65 +            sync_pagetable_state(v->domain);
    4.66              (void)new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
    4.67              UNLOCK_BIGLOCK(v->domain);
    4.68              break;
     5.1 --- a/xen/include/asm-ia64/mm.h	Fri Aug 11 14:22:54 2006 +0100
     5.2 +++ b/xen/include/asm-ia64/mm.h	Fri Aug 11 16:07:22 2006 +0100
     5.3 @@ -500,4 +500,6 @@ extern u64 translate_domain_pte(u64 ptev
     5.4  int steal_page(
     5.5      struct domain *d, struct page_info *page, unsigned int memflags);
     5.6  
     5.7 +#define sync_pagetable_state(d) ((void)0)
     5.8 +
     5.9  #endif /* __ASM_IA64_MM_H__ */
     6.1 --- a/xen/include/asm-powerpc/mm.h	Fri Aug 11 14:22:54 2006 +0100
     6.2 +++ b/xen/include/asm-powerpc/mm.h	Fri Aug 11 16:07:22 2006 +0100
     6.3 @@ -35,8 +35,6 @@
     6.4  extern unsigned long xenheap_phys_end;
     6.5  #define IS_XEN_HEAP_FRAME(_pfn) (page_to_mfn(_pfn) < xenheap_phys_end)
     6.6  
     6.7 -#define cleanup_writable_pagetable(_d)
     6.8 -
     6.9  /*
    6.10   * Per-page-frame information.
    6.11   * 
    6.12 @@ -226,4 +224,6 @@ static inline unsigned long gmfn_to_mfn(
    6.13  extern int steal_page(struct domain *d, struct page_info *page,
    6.14                          unsigned int memflags);
    6.15  
    6.16 +#define sync_pagetable_state(d) ((void)0)
    6.17 +
    6.18  #endif
     7.1 --- a/xen/include/asm-x86/domain.h	Fri Aug 11 14:22:54 2006 +0100
     7.2 +++ b/xen/include/asm-x86/domain.h	Fri Aug 11 16:07:22 2006 +0100
     7.3 @@ -70,9 +70,6 @@ struct arch_domain
     7.4      struct mapcache mapcache;
     7.5  #endif
     7.6  
     7.7 -    /* Writable pagetables. */
     7.8 -    struct ptwr_info ptwr[2];
     7.9 -
    7.10      /* I/O-port admin-specified access capabilities. */
    7.11      struct rangeset *ioport_caps;
    7.12  
     8.1 --- a/xen/include/asm-x86/mm.h	Fri Aug 11 14:22:54 2006 +0100
     8.2 +++ b/xen/include/asm-x86/mm.h	Fri Aug 11 16:07:22 2006 +0100
     8.3 @@ -198,7 +198,8 @@ static inline int get_page(struct page_i
     8.4               unlikely(d != _domain) )                /* Wrong owner? */
     8.5          {
     8.6              if ( !_shadow_mode_refcounts(domain) )
     8.7 -                DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" PRtype_info "\n",
     8.8 +                DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
     8.9 +                        PRtype_info "\n",
    8.10                          page_to_mfn(page), domain, unpickle_domptr(d),
    8.11                          x, page->u.inuse.type_info);
    8.12              return 0;
    8.13 @@ -307,48 +308,11 @@ void memguard_unguard_range(void *p, uns
    8.14  
    8.15  void memguard_guard_stack(void *p);
    8.16  
    8.17 -/* Writable Pagetables */
    8.18 -struct ptwr_info {
    8.19 -    /* Linear address where the guest is updating the p.t. page. */
    8.20 -    unsigned long l1va;
    8.21 -    /* Copy of the p.t. page, taken before guest is given write access. */
    8.22 -    l1_pgentry_t *page;
    8.23 -    /* Index in L2 page table where this L1 p.t. is always hooked. */
    8.24 -    unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
    8.25 -    /* Info about last ptwr update batch. */
    8.26 -    unsigned int prev_nr_updates;
    8.27 -    /* VCPU which created writable mapping. */
    8.28 -    struct vcpu *vcpu;
    8.29 -    /* EIP of the original write fault (stats collection only). */
    8.30 -    unsigned long eip;
    8.31 -};
    8.32 -
    8.33 -#define PTWR_PT_ACTIVE 0
    8.34 -#define PTWR_PT_INACTIVE 1
    8.35 -
    8.36 -#define PTWR_CLEANUP_ACTIVE 1
    8.37 -#define PTWR_CLEANUP_INACTIVE 2
    8.38 -
    8.39 -int  ptwr_init(struct domain *);
    8.40 -void ptwr_destroy(struct domain *);
    8.41 -void ptwr_flush(struct domain *, const int);
    8.42  int  ptwr_do_page_fault(struct domain *, unsigned long,
    8.43                          struct cpu_user_regs *);
    8.44  int  revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
    8.45  
    8.46 -void cleanup_writable_pagetable(struct domain *d);
    8.47 -#define sync_pagetable_state(d)                                 \
    8.48 -    do {                                                        \
    8.49 -        LOCK_BIGLOCK(d);                                        \
    8.50 -        /* Avoid racing with ptwr_destroy(). */                 \
    8.51 -        if ( !test_bit(_DOMF_dying, &(d)->domain_flags) )       \
    8.52 -            cleanup_writable_pagetable(d);                      \
    8.53 -        UNLOCK_BIGLOCK(d);                                      \
    8.54 -    } while ( 0 )
    8.55 -
    8.56 -#define writable_pagetable_in_sync(d)           \
    8.57 -    (!((d)->arch.ptwr[PTWR_PT_ACTIVE].l1va |    \
    8.58 -       (d)->arch.ptwr[PTWR_PT_INACTIVE].l1va))
    8.59 +void sync_pagetable_state(struct domain *d);
    8.60  
    8.61  int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
    8.62  
    8.63 @@ -370,18 +334,6 @@ void audit_domains(void);
    8.64  
    8.65  #endif
    8.66  
    8.67 -#ifdef PERF_ARRAYS
    8.68 -
    8.69 -void ptwr_eip_stat_reset(void);
    8.70 -void ptwr_eip_stat_print(void);
    8.71 -
    8.72 -#else
    8.73 -
    8.74 -#define ptwr_eip_stat_reset() ((void)0)
    8.75 -#define ptwr_eip_stat_print() ((void)0)
    8.76 -
    8.77 -#endif
    8.78 -
    8.79  int new_guest_cr3(unsigned long pfn);
    8.80  
    8.81  void propagate_page_fault(unsigned long addr, u16 error_code);
     9.1 --- a/xen/include/asm-x86/perfc.h	Fri Aug 11 14:22:54 2006 +0100
     9.2 +++ b/xen/include/asm-x86/perfc.h	Fri Aug 11 16:07:22 2006 +0100
     9.3 @@ -2,21 +2,15 @@
     9.4  #define __ASM_PERFC_H__
     9.5  #include <asm/mm.h>
     9.6  
     9.7 -static inline void arch_perfc_printall (void)
     9.8 +static inline void arch_perfc_printall(void)
     9.9  {
    9.10 -#ifdef PERF_ARRAYS
    9.11 -    ptwr_eip_stat_print();
    9.12 -#endif
    9.13  }
    9.14  
    9.15 -static inline void arch_perfc_reset (void)
    9.16 +static inline void arch_perfc_reset(void)
    9.17  {
    9.18 -#ifdef PERF_ARRAYS
    9.19 -    ptwr_eip_stat_reset();
    9.20 -#endif
    9.21  }
    9.22  
    9.23 -static inline void arch_perfc_gather (void)
    9.24 +static inline void arch_perfc_gather(void)
    9.25  {
    9.26  }
    9.27  
    10.1 --- a/xen/include/xen/mm.h	Fri Aug 11 14:22:54 2006 +0100
    10.2 +++ b/xen/include/xen/mm.h	Fri Aug 11 16:07:22 2006 +0100
    10.3 @@ -95,10 +95,6 @@ unsigned long avail_scrub_pages(void);
    10.4  
    10.5  #include <asm/mm.h>
    10.6  
    10.7 -#ifndef sync_pagetable_state
    10.8 -#define sync_pagetable_state(d) ((void)0)
    10.9 -#endif
   10.10 -
   10.11  int guest_remove_page(struct domain *d, unsigned long gmfn);
   10.12  
   10.13  #endif /* __XEN_MM_H__ */