ia64/xen-unstable

changeset 4239:2e28ec68d40b

bitkeeper revision 1.1236.32.14 (423eb7a0HqJL37tAErMbIXIQw6Q3Jg)

Added prediction of where to find the last writable PTE for a given page;
greatly speeds up promotion of a page to be used as a page table.

Removed some broken concepts of write protecting PDEs and higher level
entries. To write protect a page, all we need to do is write protect all
L1 entries that point at it.

Fixed a bug with translated IO pages; gotta check that MFNs are really backed
by RAM before we go looking in the frame_table for them...

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author mafetter@fleming.research
date Mon Mar 21 12:01:36 2005 +0000 (2005-03-21)
parents 170dd1839b49
children e113e917d480
files xen/arch/x86/audit.c xen/arch/x86/mm.c xen/arch/x86/shadow.c xen/include/asm-x86/mm.h xen/include/asm-x86/shadow.h xen/include/xen/perfc.h xen/include/xen/perfc_defn.h
line diff
     1.1 --- a/xen/arch/x86/audit.c	Sat Mar 19 15:06:34 2005 +0000
     1.2 +++ b/xen/arch/x86/audit.c	Mon Mar 21 12:01:36 2005 +0000
     1.3 @@ -333,22 +333,26 @@ int audit_adjust_pgtables(struct domain 
     1.4                  smfn = a->smfn;
     1.5                  page = &frame_table[smfn];
     1.6  
     1.7 -                adjust(pfn_to_page(gmfn), 0);
     1.8 -
     1.9                  switch ( a->gpfn_and_flags & PGT_type_mask ) {
    1.10 +                case PGT_writable_pred:
    1.11 +                    break;
    1.12                  case PGT_snapshot:
    1.13 +                    adjust(pfn_to_page(gmfn), 0);
    1.14                      break;
    1.15                  case PGT_l1_shadow:
    1.16 +                    adjust(pfn_to_page(gmfn), 0);
    1.17                      adjust_l1_page(smfn);
    1.18                      if ( page->u.inuse.type_info & PGT_pinned )
    1.19                          adjust(page, 0);
    1.20                      break;
    1.21                  case PGT_hl2_shadow:
    1.22 +                    adjust(pfn_to_page(gmfn), 0);
    1.23                      adjust_hl2_page(smfn);
    1.24                      if ( page->u.inuse.type_info & PGT_pinned )
    1.25                          adjust(page, 0);
    1.26                      break;
    1.27                  case PGT_l2_shadow:
    1.28 +                    adjust(pfn_to_page(gmfn), 0);
    1.29                      adjust_l2_page(smfn);
    1.30                      if ( page->u.inuse.type_info & PGT_pinned )
    1.31                          adjust(page, 0);
    1.32 @@ -619,6 +623,7 @@ void _audit_domain(struct domain *d, int
    1.33                          scan_for_pfn_in_mfn(d, xmfn, a->smfn);
    1.34                          break;
    1.35                      case PGT_snapshot:
    1.36 +                    case PGT_writable_pred:
    1.37                          break;
    1.38                      default:
    1.39                          BUG();
    1.40 @@ -835,6 +840,9 @@ void _audit_domain(struct domain *d, int
    1.41                          errors++;
    1.42                      }
    1.43                      break;
    1.44 +                case PGT_writable_pred:
    1.45 +                    // XXX - nothing to check?
    1.46 +                    break;
    1.47  
    1.48                  default:
    1.49                      BUG();
     2.1 --- a/xen/arch/x86/mm.c	Sat Mar 19 15:06:34 2005 +0000
     2.2 +++ b/xen/arch/x86/mm.c	Mon Mar 21 12:01:36 2005 +0000
     2.3 @@ -268,7 +268,7 @@ int map_ldt_shadow_page(unsigned int off
     2.4      if ( unlikely(shadow_mode_enabled(d)) )
     2.5      {
     2.6          shadow_lock(d);
     2.7 -        shadow_remove_all_write_access(d, PGT_l1_shadow, PGT_l1_shadow, gpfn, gmfn);
     2.8 +        shadow_remove_all_write_access(d, gpfn, gmfn);
     2.9      }
    2.10  
    2.11      res = get_page_and_type(&frame_table[gmfn], d, PGT_ldt_page);
     3.1 --- a/xen/arch/x86/shadow.c	Sat Mar 19 15:06:34 2005 +0000
     3.2 +++ b/xen/arch/x86/shadow.c	Mon Mar 21 12:01:36 2005 +0000
     3.3 @@ -48,7 +48,6 @@ static inline int
     3.4  shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
     3.5                 unsigned long new_type)
     3.6  {
     3.7 -    unsigned long min_type, max_type;
     3.8      struct pfn_info *page = pfn_to_page(gmfn);
     3.9      int pinned = 0, okay = 1;
    3.10  
    3.11 @@ -61,20 +60,11 @@ shadow_promote(struct domain *d, unsigne
    3.12      }
    3.13  
    3.14      if ( unlikely(page_is_page_table(page)) )
    3.15 -    {
    3.16 -        min_type = shadow_max_pgtable_type(d, gpfn) + PGT_l1_shadow;
    3.17 -        max_type = new_type;
    3.18 -    }
    3.19 -    else
    3.20 -    {
    3.21 -        min_type = PGT_l1_shadow;
    3.22 -        max_type = PGT_l1_shadow;
    3.23 -    }
    3.24 -    FSH_LOG("shadow_promote gpfn=%p gmfn=%p nt=%p min=%p max=%p",
    3.25 -            gpfn, gmfn, new_type, min_type, max_type);
    3.26 -
    3.27 -    if ( (min_type <= max_type) &&
    3.28 -         !shadow_remove_all_write_access(d, min_type, max_type, gpfn, gmfn) )
    3.29 +        return 1;
    3.30 +
    3.31 +    FSH_LOG("shadow_promote gpfn=%p gmfn=%p nt=%p", gpfn, gmfn, new_type);
    3.32 +
    3.33 +    if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
    3.34          return 0;
    3.35  
    3.36      // To convert this page to use as a page table, the writable count
    3.37 @@ -1737,114 +1727,192 @@ int __shadow_out_of_sync(struct exec_dom
    3.38      return 0;
    3.39  }
    3.40  
    3.41 +#define GPFN_TO_GPTEPAGE(_gpfn) ((_gpfn) / (PAGE_SIZE / sizeof(l1_pgentry_t)))
    3.42 +static inline unsigned long
    3.43 +predict_writable_pte_page(struct domain *d, unsigned long gpfn)
    3.44 +{
    3.45 +    return __shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), PGT_writable_pred);
    3.46 +}
    3.47 +
    3.48 +static inline void
    3.49 +increase_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
    3.50 +{
    3.51 +    unsigned long score = prediction & PGT_score_mask;
    3.52 +    int create = (score == 0);
    3.53 +
    3.54 +    // saturating addition
    3.55 +    score = (score + (1u << PGT_score_shift)) & PGT_score_mask;
    3.56 +    score = score ? score : PGT_score_mask;
    3.57 +
    3.58 +    prediction = (prediction & PGT_mfn_mask) | score;
    3.59 +
    3.60 +    //printk("increase gpfn=%p pred=%p create=%d\n", gpfn, prediction, create);
    3.61 +    set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
    3.62 +
    3.63 +    if ( create )
    3.64 +        perfc_incr(writable_pte_predictions);
    3.65 +}
    3.66 +
    3.67 +static inline void
    3.68 +decrease_writable_pte_prediction(struct domain *d, unsigned long gpfn, unsigned long prediction)
    3.69 +{
    3.70 +    unsigned long score = prediction & PGT_score_mask;
    3.71 +    ASSERT(score);
    3.72 +
    3.73 +    // divide score by 2...  We don't like bad predictions.
    3.74 +    //
    3.75 +    score = (score >> 1) & PGT_score_mask;
    3.76 +
    3.77 +    prediction = (prediction & PGT_mfn_mask) | score;
    3.78 +
    3.79 +    //printk("decrease gpfn=%p pred=%p score=%p\n", gpfn, prediction, score);
    3.80 +
    3.81 +    if ( score )
    3.82 +        set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
    3.83 +    else
    3.84 +    {
    3.85 +        delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
    3.86 +        perfc_decr(writable_pte_predictions);
    3.87 +    }
    3.88 +}
    3.89 +
    3.90  static u32 remove_all_write_access_in_ptpage(
    3.91 -    struct domain *d, unsigned long pt_mfn, unsigned long readonly_mfn)
    3.92 +    struct domain *d, unsigned long pt_pfn, unsigned long pt_mfn,
    3.93 +    unsigned long readonly_gpfn, unsigned long readonly_gmfn,
    3.94 +    u32 max_refs_to_find, unsigned long prediction)
    3.95  {
    3.96      unsigned long *pt = map_domain_mem(pt_mfn << PAGE_SHIFT);
    3.97      unsigned long match =
    3.98 -        (readonly_mfn << PAGE_SHIFT) | _PAGE_RW | _PAGE_PRESENT;
    3.99 +        (readonly_gmfn << PAGE_SHIFT) | _PAGE_RW | _PAGE_PRESENT;
   3.100      unsigned long mask = PAGE_MASK | _PAGE_RW | _PAGE_PRESENT;
   3.101      int i;
   3.102 -    u32 count = 0;
   3.103 +    u32 found = 0;
   3.104      int is_l1_shadow =
   3.105          ((frame_table[pt_mfn].u.inuse.type_info & PGT_type_mask) ==
   3.106           PGT_l1_shadow);
   3.107  
   3.108 +#define MATCH_ENTRY(_i) (((pt[_i] ^ match) & mask) == 0)
   3.109 +
   3.110 +    // returns true if all refs have been found and fixed.
   3.111 +    //
   3.112 +    int fix_entry(int i)
   3.113 +    {
   3.114 +        unsigned long old = pt[i];
   3.115 +        unsigned long new = old & ~_PAGE_RW;
   3.116 +
   3.117 +        if ( is_l1_shadow && !shadow_get_page_from_l1e(mk_l1_pgentry(new), d) )
   3.118 +            BUG();
   3.119 +        found++;
   3.120 +        pt[i] = new;
   3.121 +        if ( is_l1_shadow )
   3.122 +            put_page_from_l1e(mk_l1_pgentry(old), d);
   3.123 +
   3.124 +#if 0
   3.125 +        printk("removed write access to pfn=%p mfn=%p in smfn=%p entry %x "
   3.126 +               "is_l1_shadow=%d\n",
   3.127 +               readonly_gpfn, readonly_gmfn, pt_mfn, i, is_l1_shadow);
   3.128 +#endif
   3.129 +
   3.130 +        return (found == max_refs_to_find);
   3.131 +    }
   3.132 +
   3.133 +    if ( MATCH_ENTRY(readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1)) &&
   3.134 +         fix_entry(readonly_gpfn & (L1_PAGETABLE_ENTRIES - 1)) )
   3.135 +    {
   3.136 +        perfc_incrc(remove_write_fast_exit);
   3.137 +        increase_writable_pte_prediction(d, readonly_gpfn, prediction);
   3.138 +        unmap_domain_mem(pt);
   3.139 +        return found;
   3.140 +    }
   3.141 + 
   3.142      for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
   3.143      {
   3.144 -        if ( unlikely(((pt[i] ^ match) & mask) == 0) )
   3.145 -        {
   3.146 -            unsigned long old = pt[i];
   3.147 -            unsigned long new = old & ~_PAGE_RW;
   3.148 -
   3.149 -            if ( is_l1_shadow &&
   3.150 -                 !shadow_get_page_from_l1e(mk_l1_pgentry(new), d) )
   3.151 -                BUG();
   3.152 -
   3.153 -            count++;
   3.154 -            pt[i] = new;
   3.155 -
   3.156 -            if ( is_l1_shadow )
   3.157 -                put_page_from_l1e(mk_l1_pgentry(old), d);
   3.158 -
   3.159 -            FSH_LOG("removed write access to mfn=%p in smfn=%p entry %x "
   3.160 -                    "is_l1_shadow=%d",
   3.161 -                    readonly_mfn, pt_mfn, i, is_l1_shadow);
   3.162 -        }
   3.163 +        if ( unlikely(MATCH_ENTRY(i)) && fix_entry(i) )
   3.164 +            break;
   3.165      }
   3.166  
   3.167      unmap_domain_mem(pt);
   3.168  
   3.169 -    return count;
   3.170 +    return found;
   3.171 +#undef MATCH_ENTRY
   3.172  }
   3.173  
   3.174  int shadow_remove_all_write_access(
   3.175 -    struct domain *d, unsigned min_type, unsigned max_type,
   3.176 -    unsigned long gpfn, unsigned long gmfn)
   3.177 +    struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
   3.178  {
   3.179      int i;
   3.180      struct shadow_status *a;
   3.181 -    unsigned long sl1mfn = __shadow_status(d, gpfn, PGT_l1_shadow);
   3.182 -    u32 count = 0;
   3.183 -    u32 write_refs;
   3.184 +    u32 found = 0, fixups, write_refs;
   3.185 +    unsigned long prediction, predicted_gpfn, predicted_smfn;
   3.186  
   3.187      ASSERT(spin_is_locked(&d->arch.shadow_lock));
   3.188 -    ASSERT(gmfn);
   3.189 +    ASSERT(VALID_MFN(readonly_gmfn));
   3.190  
   3.191      perfc_incrc(remove_write_access);
   3.192  
   3.193 -    if ( (frame_table[gmfn].u.inuse.type_info & PGT_type_mask) ==
   3.194 +    // If it's not a writable page, then no writable refs can be outstanding.
   3.195 +    //
   3.196 +    if ( (frame_table[readonly_gmfn].u.inuse.type_info & PGT_type_mask) !=
   3.197           PGT_writable_page )
   3.198      {
   3.199 -        write_refs = (frame_table[gmfn].u.inuse.type_info & PGT_count_mask);
   3.200 -        if ( write_refs &&
   3.201 -             (frame_table[gmfn].u.inuse.type_info & PGT_pinned) )
   3.202 -            write_refs--;
   3.203 -        if ( write_refs == 0 )
   3.204 +        perfc_incrc(remove_write_not_writable);
   3.205 +        return 1;
   3.206 +    }
   3.207 +
   3.208 +    // How many outstanding writable PTEs for this page are there?
   3.209 +    //
   3.210 +    write_refs = (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
   3.211 +    if ( write_refs && (frame_table[readonly_gmfn].u.inuse.type_info & PGT_pinned) )
   3.212 +        write_refs--;
   3.213 +
   3.214 +    if ( write_refs == 0 )
   3.215 +    {
   3.216 +        perfc_incrc(remove_write_no_work);
   3.217 +        return 1;
   3.218 +    }
   3.219 +
   3.220 +    // Before searching all the L1 page tables, check the typical culprit first.
   3.221 +    //
   3.222 +    if ( (prediction = predict_writable_pte_page(d, readonly_gpfn)) )
   3.223 +    {
   3.224 +        predicted_gpfn = prediction & PGT_mfn_mask;
   3.225 +        if ( (predicted_smfn = __shadow_status(d, predicted_gpfn, PGT_l1_shadow)) &&
   3.226 +             (fixups = remove_all_write_access_in_ptpage(d, predicted_gpfn, predicted_smfn, readonly_gpfn, readonly_gmfn, write_refs, prediction)) )
   3.227          {
   3.228 -            perfc_incrc(remove_write_access_easy);
   3.229 -            return 1;
   3.230 +            found += fixups;
   3.231 +            if ( found == write_refs )
   3.232 +            {
   3.233 +                perfc_incrc(remove_write_predicted);
   3.234 +                return 1;
   3.235 +            }
   3.236 +        }
   3.237 +        else
   3.238 +        {
   3.239 +            perfc_incrc(remove_write_bad_prediction);
   3.240 +            decrease_writable_pte_prediction(d, readonly_gpfn, prediction);
   3.241          }
   3.242      }
   3.243  
   3.244 +    // Search all the shadow L1 page tables...
   3.245 +    //
   3.246      for (i = 0; i < shadow_ht_buckets; i++)
   3.247      {
   3.248          a = &d->arch.shadow_ht[i];
   3.249          while ( a && a->gpfn_and_flags )
   3.250          {
   3.251 -            if ( ((a->gpfn_and_flags & PGT_type_mask) >= min_type) &&
   3.252 -                 ((a->gpfn_and_flags & PGT_type_mask) <= max_type) )
   3.253 +            if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow )
   3.254              {
   3.255 -                switch ( a->gpfn_and_flags & PGT_type_mask )
   3.256 -                {
   3.257 -                case PGT_l1_shadow:
   3.258 -                    count +=
   3.259 -                        remove_all_write_access_in_ptpage(d, a->smfn, gmfn);
   3.260 -                    if ( count == write_refs )
   3.261 -                        return 1;
   3.262 -                    break;
   3.263 -                case PGT_l2_shadow:
   3.264 -                    if ( sl1mfn )
   3.265 -                        count +=
   3.266 -                            remove_all_write_access_in_ptpage(d, a->smfn,
   3.267 -                                                              sl1mfn);
   3.268 -                    if ( count == write_refs )
   3.269 -                        return 1;
   3.270 -                    break;
   3.271 -                case PGT_hl2_shadow:
   3.272 -                    // nothing to do here...
   3.273 -                    break;
   3.274 -                default:
   3.275 -                    // need to flush this out for 4 level page tables.
   3.276 -                    BUG();
   3.277 -                }
   3.278 +                found += remove_all_write_access_in_ptpage(d, a->gpfn_and_flags & PGT_mfn_mask, a->smfn, readonly_gpfn, readonly_gmfn, write_refs - found, a->gpfn_and_flags & PGT_mfn_mask);
   3.279 +                if ( found == write_refs )
   3.280 +                    return 1;
   3.281              }
   3.282 +
   3.283              a = a->next;
   3.284          }
   3.285      }
   3.286  
   3.287      FSH_LOG("%s: looking for %d refs, found %d refs\n",
   3.288 -            __func__, write_refs, count);
   3.289 +            __func__, write_refs, found);
   3.290  
   3.291      return 0;
   3.292  }
   3.293 @@ -1881,7 +1949,7 @@ static u32 remove_all_access_in_page(
   3.294      return count;
   3.295  }
   3.296  
   3.297 -u32 shadow_remove_all_access(struct domain *d, unsigned long gmfn)
   3.298 +u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
   3.299  {
   3.300      int i;
   3.301      struct shadow_status *a;
   3.302 @@ -1894,11 +1962,23 @@ u32 shadow_remove_all_access(struct doma
   3.303          a = &d->arch.shadow_ht[i];
   3.304          while ( a && a->gpfn_and_flags )
   3.305          {
   3.306 -            if ( ((a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow) ||
   3.307 -                 ((a->gpfn_and_flags & PGT_type_mask) == PGT_hl2_shadow) )
   3.308 +            switch (a->gpfn_and_flags & PGT_type_mask)
   3.309              {
   3.310 -                count += remove_all_access_in_page(d, a->smfn, gmfn);
   3.311 +            case PGT_l1_shadow:
   3.312 +            case PGT_l2_shadow:
   3.313 +            case PGT_l3_shadow:
   3.314 +            case PGT_l4_shadow:
   3.315 +            case PGT_hl2_shadow:
   3.316 +                count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
   3.317 +                break;
   3.318 +            case PGT_snapshot:
   3.319 +            case PGT_writable_pred:
   3.320 +                // these can't hold refs to the forbidden page
   3.321 +                break;
   3.322 +            default:
   3.323 +                BUG();
   3.324              }
   3.325 +
   3.326              a = a->next;
   3.327          }
   3.328      }
   3.329 @@ -2659,6 +2739,7 @@ int _check_all_pagetables(struct exec_do
   3.330                  BUG(); // XXX - ought to fix this...
   3.331                  break;
   3.332              case PGT_snapshot:
   3.333 +            case PGT_writable_ref:
   3.334                  break;
   3.335              default:
   3.336                  errors++;
     4.1 --- a/xen/include/asm-x86/mm.h	Sat Mar 19 15:06:34 2005 +0000
     4.2 +++ b/xen/include/asm-x86/mm.h	Mon Mar 21 12:01:36 2005 +0000
     4.3 @@ -76,6 +76,7 @@ struct pfn_info
     4.4  #define PGT_l4_shadow       PGT_l4_page_table
     4.5  #define PGT_hl2_shadow      (5<<29)
     4.6  #define PGT_snapshot        (6<<29)
     4.7 +#define PGT_writable_pred   (7<<29) /* predicted gpfn with writable ref */
     4.8  
     4.9  #define PGT_type_mask       (7<<29) /* Bits 29-31. */
    4.10  
    4.11 @@ -95,7 +96,10 @@ struct pfn_info
    4.12   /* 17-bit count of uses of this frame as its current type. */
    4.13  #define PGT_count_mask      ((1U<<17)-1)
    4.14  
    4.15 -#define PGT_mfn_mask        ((1U<<21)-1) /* mfn mask for shadow types */
    4.16 +#define PGT_mfn_mask        ((1U<<20)-1) /* mfn mask for shadow types */
    4.17 +
    4.18 +#define PGT_score_shift     20
    4.19 +#define PGT_score_mask      (((1U<<4)-1)<<PGT_score_shift)
    4.20  
    4.21   /* Cleared when the owning guest 'frees' this page. */
    4.22  #define _PGC_allocated      31
    4.23 @@ -169,8 +173,7 @@ int alloc_page_type(struct pfn_info *pag
    4.24  void free_page_type(struct pfn_info *page, unsigned int type);
    4.25  extern void invalidate_shadow_ldt(struct exec_domain *d);
    4.26  extern int shadow_remove_all_write_access(
    4.27 -    struct domain *d, unsigned min_type, unsigned max_type,
    4.28 -    unsigned long gpfn, unsigned long gmfn);
    4.29 +    struct domain *d, unsigned long gpfn, unsigned long gmfn);
    4.30  extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
    4.31  
    4.32  static inline void put_page(struct pfn_info *page)
     5.1 --- a/xen/include/asm-x86/shadow.h	Sat Mar 19 15:06:34 2005 +0000
     5.2 +++ b/xen/include/asm-x86/shadow.h	Mon Mar 21 12:01:36 2005 +0000
     5.3 @@ -182,8 +182,12 @@ extern unsigned long gpfn_to_mfn_safe(
     5.4  
     5.5  struct shadow_status {
     5.6      unsigned long gpfn_and_flags; /* Guest pfn plus flags. */
     5.7 -    struct shadow_status *next;   /* Pull-to-front list.   */
     5.8 +    struct shadow_status *next;   /* Pull-to-front list per hash bucket. */
     5.9      unsigned long smfn;           /* Shadow mfn.           */
    5.10 +
    5.11 +    // Pull-to-front list of L1s/L2s from which we check when removing
    5.12 +    // write access to a page.
    5.13 +    //struct list_head next_to_check;
    5.14  };
    5.15  
    5.16  #define shadow_ht_extra_size 128
    5.17 @@ -625,7 +629,7 @@ static inline void hl2e_propagate_from_g
    5.18          else
    5.19              mfn = __gpfn_to_mfn(d, pfn);
    5.20  
    5.21 -        if ( VALID_MFN(mfn) )
    5.22 +        if ( VALID_MFN(mfn) && (mfn < max_page) )
    5.23              hl2e = (mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR;
    5.24      }
    5.25  
    5.26 @@ -838,17 +842,19 @@ static void shadow_audit(struct domain *
    5.27          perfc_value(shadow_l1_pages) +
    5.28          perfc_value(shadow_l2_pages) +
    5.29          perfc_value(hl2_table_pages) +
    5.30 -        perfc_value(snapshot_pages)
    5.31 +        perfc_value(snapshot_pages) +
    5.32 +        perfc_value(writable_pte_predictions)
    5.33          ) - live;
    5.34  #ifdef PERF_COUNTERS
    5.35      if ( (abs < -1) || (abs > 1) )
    5.36      {
    5.37 -        printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d\n",
    5.38 +        printk("live=%d free=%d l1=%d l2=%d hl2=%d snapshot=%d writable_ptes=%d\n",
    5.39                 live, free,
    5.40                 perfc_value(shadow_l1_pages),
    5.41                 perfc_value(shadow_l2_pages),
    5.42                 perfc_value(hl2_table_pages),
    5.43 -               perfc_value(snapshot_pages));
    5.44 +               perfc_value(snapshot_pages),
    5.45 +               perfc_value(writable_pte_predictions));
    5.46          BUG();
    5.47      }
    5.48  #endif
    5.49 @@ -941,13 +947,22 @@ static inline unsigned long __shadow_sta
    5.50      ASSERT(gpfn == (gpfn & PGT_mfn_mask));
    5.51      ASSERT(stype && !(stype & ~PGT_type_mask));
    5.52  
    5.53 -    if ( VALID_MFN(gmfn) &&
    5.54 -         ((stype != PGT_snapshot)
    5.55 -          ? !mfn_is_page_table(gmfn)
    5.56 -          : !mfn_out_of_sync(gmfn)) )
    5.57 +    if ( VALID_MFN(gmfn) && (gmfn < max_page) &&
    5.58 +         (stype != PGT_writable_pred) &&
    5.59 +         ((stype == PGT_snapshot)
    5.60 +          ? !mfn_out_of_sync(gmfn)
    5.61 +          : !mfn_is_page_table(gmfn)) )
    5.62      {
    5.63          perfc_incrc(shadow_status_shortcut);
    5.64 +#ifndef NDEBUG
    5.65          ASSERT(___shadow_status(d, gpfn, stype) == 0);
    5.66 +
    5.67 +        // Undo the affects of the above ASSERT on ___shadow_status()'s perf
    5.68 +        // counters.
    5.69 +        //
    5.70 +        perfc_decrc(shadow_status_calls);
    5.71 +        perfc_decrc(shadow_status_miss);
    5.72 +#endif
    5.73          return 0;
    5.74      }
    5.75  
    5.76 @@ -978,21 +993,26 @@ shadow_max_pgtable_type(struct domain *d
    5.77          {
    5.78              type = x->gpfn_and_flags & PGT_type_mask;
    5.79  
    5.80 -            // Treat an HL2 as if it's an L1
    5.81 -            //
    5.82 -            if ( type == PGT_hl2_shadow )
    5.83 +            switch ( type )
    5.84 +            {
    5.85 +            case PGT_hl2_shadow:
    5.86 +                // Treat an HL2 as if it's an L1
    5.87 +                //
    5.88                  type = PGT_l1_shadow;
    5.89 -
    5.90 -            // Ignore snapshots -- they don't in and of themselves constitute
    5.91 -            // treating a page as a page table
    5.92 -            //
    5.93 -            if ( type == PGT_snapshot )
    5.94 +                break;
    5.95 +            case PGT_snapshot:
    5.96 +            case PGT_writable_pred:
    5.97 +                // Ignore snapshots -- they don't in and of themselves constitute
    5.98 +                // treating a page as a page table
    5.99 +                //
   5.100                  goto next;
   5.101 -
   5.102 -            // Early exit if we found the max possible value
   5.103 -            //
   5.104 -            if ( type == PGT_base_page_table )
   5.105 +            case PGT_base_page_table:
   5.106 +                // Early exit if we found the max possible value
   5.107 +                //
   5.108                  return type;
   5.109 +            default:
   5.110 +                break;
   5.111 +            }
   5.112  
   5.113              if ( type > pttype )
   5.114                  pttype = type;
   5.115 @@ -1116,7 +1136,8 @@ static inline void delete_shadow_status(
   5.116  
   5.117   found:
   5.118      // release ref to page
   5.119 -    put_page(pfn_to_page(gmfn));
   5.120 +    if ( stype != PGT_writable_pred )
   5.121 +        put_page(pfn_to_page(gmfn));
   5.122  
   5.123      shadow_audit(d, 0);
   5.124  }
   5.125 @@ -1129,15 +1150,16 @@ static inline void set_shadow_status(
   5.126      int i;
   5.127      unsigned long key = gpfn | stype;
   5.128  
   5.129 -    SH_VVLOG("set gpfn=%p gmfn=%p smfn=%p t=%p\n", gpfn, gmfn, smfn, stype);
   5.130 +    SH_VVLOG("set gpfn=%p gmfn=%p smfn=%p t=%p", gpfn, gmfn, smfn, stype);
   5.131  
   5.132      ASSERT(spin_is_locked(&d->arch.shadow_lock));
   5.133  
   5.134      ASSERT(shadow_mode_translate(d) || gpfn);
   5.135      ASSERT(!(gpfn & ~PGT_mfn_mask));
   5.136 -    
   5.137 -    ASSERT(pfn_is_ram(gmfn)); // XXX need to be more graceful
   5.138 -    ASSERT(smfn && !(smfn & ~PGT_mfn_mask));
   5.139 +
   5.140 +    // XXX - need to be more graceful.
   5.141 +    ASSERT(VALID_MFN(gmfn));
   5.142 +
   5.143      ASSERT(stype && !(stype & ~PGT_type_mask));
   5.144  
   5.145      x = head = hash_bucket(d, gpfn);
   5.146 @@ -1149,17 +1171,24 @@ static inline void set_shadow_status(
   5.147      // grab a reference to the guest page to represent the entry in the shadow
   5.148      // hash table
   5.149      //
   5.150 -    get_page(pfn_to_page(gmfn), d);
   5.151 +    // XXX - Should PGT_writable_pred grab a page ref?
   5.152 +    //     - Who/how are these hash table entry refs flushed if/when a page
   5.153 +    //       is given away by the domain?
   5.154 +    //
   5.155 +    if ( stype != PGT_writable_pred )
   5.156 +        get_page(pfn_to_page(gmfn), d);
   5.157  
   5.158      /*
   5.159       * STEP 1. If page is already in the table, update it in place.
   5.160       */
   5.161      do
   5.162      {
   5.163 -        if ( x->gpfn_and_flags == key )
   5.164 +        if ( unlikely(x->gpfn_and_flags == key) )
   5.165          {
   5.166 -            BUG();
   5.167 +            if ( stype != PGT_writable_pred )
   5.168 +                BUG(); // we should never replace entries into the hash table
   5.169              x->smfn = smfn;
   5.170 +            put_page(pfn_to_page(gmfn)); // already had a ref...
   5.171              goto done;
   5.172          }
   5.173  
   5.174 @@ -1221,6 +1250,13 @@ static inline void set_shadow_status(
   5.175  
   5.176   done:
   5.177      shadow_audit(d, 0);
   5.178 +
   5.179 +    if ( stype <= PGT_l4_shadow )
   5.180 +    {
   5.181 +        // add to front of list of pages to check when removing write
   5.182 +        // permissions for a page...
   5.183 +        //
   5.184 +    }
   5.185  }
   5.186  
   5.187  /************************************************************************/
     6.1 --- a/xen/include/xen/perfc.h	Sat Mar 19 15:06:34 2005 +0000
     6.2 +++ b/xen/include/xen/perfc.h	Mon Mar 21 12:01:36 2005 +0000
     6.3 @@ -65,6 +65,7 @@ extern struct perfcounter perfcounters;
     6.4  #define perfc_incr(x)     atomic_inc(&perfcounters.x[0])
     6.5  #define perfc_decr(x)     atomic_dec(&perfcounters.x[0])
     6.6  #define perfc_incrc(x)    atomic_inc(&perfcounters.x[smp_processor_id()])
     6.7 +#define perfc_decrc(x)    atomic_dec(&perfcounters.x[smp_processor_id()])
     6.8  #define perfc_incra(x,y)  \
     6.9    { if(y<(sizeof(perfcounters.x)/sizeof(*perfcounters.x))) \
    6.10      atomic_inc(&perfcounters.x[y]); }
     7.1 --- a/xen/include/xen/perfc_defn.h	Sat Mar 19 15:06:34 2005 +0000
     7.2 +++ b/xen/include/xen/perfc_defn.h	Mon Mar 21 12:01:36 2005 +0000
     7.3 @@ -38,6 +38,7 @@ PERFSTATUS( shadow_l2_pages, "current # 
     7.4  PERFSTATUS( shadow_l1_pages, "current # shadow L1 pages" )
     7.5  PERFSTATUS( hl2_table_pages, "current # hl2 pages" )
     7.6  PERFSTATUS( snapshot_pages,  "current # fshadow snapshot pages" )
     7.7 +PERFSTATUS( writable_pte_predictions, "# writable pte predictions")
     7.8  
     7.9  PERFCOUNTER_CPU(shadow_status_shortcut, "fastpath miss on shadow cache")
    7.10  PERFCOUNTER_CPU(shadow_status_calls,    "calls to ___shadow_status" )
    7.11 @@ -73,4 +74,8 @@ PERFCOUNTER_CPU(write_fault_bail,       
    7.12  PERFCOUNTER_CPU(read_fault_bail,                   "sf bailed due to read_fault")
    7.13  PERFCOUNTER_CPU(exception_fixed,                   "pre-exception fixed")
    7.14  PERFCOUNTER_CPU(remove_write_access,               "calls to remove_write_access")
    7.15 -PERFCOUNTER_CPU(remove_write_access_easy,          "easy outs of remove_write_access")
    7.16 +PERFCOUNTER_CPU(remove_write_no_work,              "no work in remove_write_access")
    7.17 +PERFCOUNTER_CPU(remove_write_not_writable,         "remove_write non-writable page")
    7.18 +PERFCOUNTER_CPU(remove_write_fast_exit,            "remove_write hit predicted entry")
    7.19 +PERFCOUNTER_CPU(remove_write_predicted,            "remove_write predict hit&exit")
    7.20 +PERFCOUNTER_CPU(remove_write_bad_prediction,       "remove_write bad prediction")