ia64/xen-unstable

changeset 4800:dff93c0ff33e

bitkeeper revision 1.1389.5.36 (427f7513GY6Vv1b-toMAsdXzaHGvTQ)

Hand merge
author mafetter@fleming.research
date Mon May 09 14:34:59 2005 +0000 (2005-05-09)
parents 979aa5d4764e 251ac792d8c1
children 86285c9c18c1 3404966959f2
files xen/arch/x86/audit.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c xen/arch/x86/shadow.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/shadow.h xen/include/asm-x86/x86_32/domain_page.h xen/include/xen/lib.h xen/include/xen/perfc_defn.h
line diff
     1.1 --- a/xen/arch/x86/audit.c	Sun May 08 12:06:10 2005 +0000
     1.2 +++ b/xen/arch/x86/audit.c	Mon May 09 14:34:59 2005 +0000
     1.3 @@ -49,7 +49,8 @@ static int l1, l2, oos_count, page_count
     1.4  int audit_adjust_pgtables(struct domain *d, int dir, int noisy)
     1.5  {
     1.6      int errors = 0;
     1.7 -    int shadow_enabled = shadow_mode_enabled(d) ? 1 : 0;
     1.8 +    int shadow_refcounts = !!shadow_mode_refcounts(d);
     1.9 +    int shadow_enabled = !!shadow_mode_enabled(d);
    1.10      int l2limit;
    1.11  
    1.12      void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
    1.13 @@ -119,7 +120,7 @@ int audit_adjust_pgtables(struct domain 
    1.14              page->count_info += dir;
    1.15      }
    1.16  
    1.17 -    void adjust_l2_page(unsigned long mfn)
    1.18 +    void adjust_l2_page(unsigned long mfn, int shadow)
    1.19      {
    1.20          unsigned long *pt = map_domain_mem(mfn << PAGE_SHIFT);
    1.21          int i;
    1.22 @@ -133,7 +134,7 @@ int audit_adjust_pgtables(struct domain 
    1.23  
    1.24                  if ( noisy )
    1.25                  {
    1.26 -                    if ( shadow_enabled )
    1.27 +                    if ( shadow )
    1.28                      {
    1.29                          if ( page_get_owner(l1page) != NULL )
    1.30                          {
    1.31 @@ -145,6 +146,17 @@ int audit_adjust_pgtables(struct domain 
    1.32                              errors++;
    1.33                              continue;
    1.34                          }
    1.35 +
    1.36 +                        u32 page_type = l1page->u.inuse.type_info & PGT_type_mask;
    1.37 +
    1.38 +                        if ( page_type != PGT_l1_shadow )
    1.39 +                        {
    1.40 +                            printk("Audit %d: [Shadow L2 mfn=%lx i=%x] "
    1.41 +                                   "Expected Shadow L1 t=%x mfn=%lx\n",
    1.42 +                                   d->id, mfn, i,
    1.43 +                                   l1page->u.inuse.type_info, l1mfn);
    1.44 +                            errors++;
    1.45 +                        }
    1.46                      }
    1.47                      else
    1.48                      {
    1.49 @@ -154,7 +166,9 @@ int audit_adjust_pgtables(struct domain 
    1.50                                     "belonging to other dom %p (id=%d)\n",
    1.51                                     l1mfn,
    1.52                                     page_get_owner(l1page),
    1.53 -                                   page_get_owner(l1page)->id);
    1.54 +                                   (page_get_owner(l1page)
    1.55 +                                    ? page_get_owner(l1page)->id
    1.56 +                                    : -1));
    1.57                              errors++;
    1.58                              continue;
    1.59                          }
    1.60 @@ -179,7 +193,7 @@ int audit_adjust_pgtables(struct domain 
    1.61                      }
    1.62                  }
    1.63  
    1.64 -                adjust(l1page, !shadow_enabled);
    1.65 +                adjust(l1page, !shadow);
    1.66              }
    1.67          }
    1.68  
    1.69 @@ -280,7 +294,7 @@ int audit_adjust_pgtables(struct domain 
    1.70                              errors++;
    1.71                          }
    1.72  
    1.73 -                        if ( shadow_enabled &&
    1.74 +                        if ( shadow_refcounts &&
    1.75                               page_is_page_table(gpage) &&
    1.76                               ! page_out_of_sync(gpage) )
    1.77                          {
    1.78 @@ -336,19 +350,21 @@ int audit_adjust_pgtables(struct domain 
    1.79                      break;
    1.80                  case PGT_l1_shadow:
    1.81                      adjust(pfn_to_page(gmfn), 0);
    1.82 -                    adjust_l1_page(smfn);
    1.83 +                    if ( shadow_refcounts )
    1.84 +                        adjust_l1_page(smfn);
    1.85                      if ( page->u.inuse.type_info & PGT_pinned )
    1.86                          adjust(page, 0);
    1.87                      break;
    1.88                  case PGT_hl2_shadow:
    1.89                      adjust(pfn_to_page(gmfn), 0);
    1.90 -                    adjust_hl2_page(smfn);
    1.91 +                    if ( shadow_refcounts )
    1.92 +                        adjust_hl2_page(smfn);
    1.93                      if ( page->u.inuse.type_info & PGT_pinned )
    1.94                          adjust(page, 0);
    1.95                      break;
    1.96                  case PGT_l2_shadow:
    1.97                      adjust(pfn_to_page(gmfn), 0);
    1.98 -                    adjust_l2_page(smfn);
    1.99 +                    adjust_l2_page(smfn, 1);
   1.100                      if ( page->u.inuse.type_info & PGT_pinned )
   1.101                          adjust(page, 0);
   1.102                      break;
   1.103 @@ -391,45 +407,43 @@ int audit_adjust_pgtables(struct domain 
   1.104          struct exec_domain *ed;
   1.105  
   1.106          for_each_exec_domain(d, ed)
   1.107 -            {
   1.108 -                if ( !shadow_enabled )
   1.109 -                {
   1.110 -                    if ( pagetable_val(ed->arch.guest_table) )
   1.111 -                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
   1.112 -                                            >> PAGE_SHIFT], 1);
   1.113 -                }
   1.114 -                else
   1.115 -                {
   1.116 -                    if ( pagetable_val(ed->arch.guest_table) )
   1.117 -                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
   1.118 -                                            >> PAGE_SHIFT], 0);
   1.119 -                    if ( pagetable_val(ed->arch.shadow_table) )
   1.120 -                        adjust(&frame_table[pagetable_val(ed->arch.shadow_table)
   1.121 -                                            >> PAGE_SHIFT], 0);
   1.122 -                    if ( ed->arch.monitor_shadow_ref )
   1.123 -                        adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
   1.124 -                }
   1.125 -            }
   1.126 +        {
   1.127 +            if ( pagetable_val(ed->arch.guest_table) )
   1.128 +                adjust(&frame_table[pagetable_get_pfn(ed->arch.guest_table)], 1);
   1.129 +            if ( pagetable_val(ed->arch.shadow_table) )
   1.130 +                adjust(&frame_table[pagetable_get_pfn(ed->arch.shadow_table)], 0);
   1.131 +            if ( ed->arch.monitor_shadow_ref )
   1.132 +                adjust(&frame_table[ed->arch.monitor_shadow_ref], 0);
   1.133 +        }
   1.134      }
   1.135  
   1.136      void adjust_guest_pages()
   1.137      {
   1.138          struct list_head *list_ent = d->page_list.next;
   1.139          struct pfn_info *page;
   1.140 -        unsigned long mfn;
   1.141 +        unsigned long mfn, snapshot_mfn;
   1.142  
   1.143          while ( list_ent != &d->page_list )
   1.144          {
   1.145              u32 page_type;
   1.146  
   1.147              page = list_entry(list_ent, struct pfn_info, list);
   1.148 -            mfn = page_to_pfn(page);
   1.149 +            snapshot_mfn = mfn = page_to_pfn(page);
   1.150              page_type = page->u.inuse.type_info & PGT_type_mask;
   1.151  
   1.152              BUG_ON(page_get_owner(page) != d);
   1.153  
   1.154              page_count++;
   1.155  
   1.156 +            if ( shadow_enabled && !shadow_refcounts &&
   1.157 +                 page_out_of_sync(page) )
   1.158 +            {
   1.159 +                unsigned long gpfn = __mfn_to_gpfn(d, mfn);
   1.160 +                ASSERT( VALID_M2P(gpfn) );
   1.161 +                snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
   1.162 +                ASSERT( snapshot_mfn );
   1.163 +            }
   1.164 +
   1.165              switch ( page_type )
   1.166              {
   1.167              case PGT_l2_page_table:
   1.168 @@ -437,7 +451,7 @@ int audit_adjust_pgtables(struct domain 
   1.169  
   1.170                  if ( noisy )
   1.171                  {
   1.172 -                    if ( shadow_enabled )
   1.173 +                    if ( shadow_refcounts )
   1.174                      {
   1.175                          printk("Audit %d: found an L2 guest page "
   1.176                                 "mfn=%lx t=%08x c=%08x while in shadow mode\n",
   1.177 @@ -446,19 +460,22 @@ int audit_adjust_pgtables(struct domain 
   1.178                          errors++;
   1.179                      }
   1.180  
   1.181 -                    if ( (page->u.inuse.type_info & PGT_validated) !=
   1.182 -                         PGT_validated )
   1.183 +                    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
   1.184                      {
   1.185 -                        printk("Audit %d: L2 mfn=%lx not validated %08x\n",
   1.186 -                               d->id, mfn, page->u.inuse.type_info);
   1.187 -                        errors++;
   1.188 -                    }
   1.189 +                        if ( (page->u.inuse.type_info & PGT_validated) !=
   1.190 +                             PGT_validated )
   1.191 +                        {
   1.192 +                            printk("Audit %d: L2 mfn=%lx not validated %08x\n",
   1.193 +                                   d->id, mfn, page->u.inuse.type_info);
   1.194 +                            errors++;
   1.195 +                        }
   1.196  
   1.197 -                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
   1.198 -                    {
   1.199 -                        printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
   1.200 -                               d->id, mfn, page->u.inuse.type_info);
   1.201 -                        errors++;
   1.202 +                        if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
   1.203 +                        {
   1.204 +                            printk("Audit %d: L2 mfn=%lx not pinned t=%08x\n",
   1.205 +                                   d->id, mfn, page->u.inuse.type_info);
   1.206 +                            errors++;
   1.207 +                        }
   1.208                      }
   1.209                  }
   1.210  
   1.211 @@ -466,7 +483,7 @@ int audit_adjust_pgtables(struct domain 
   1.212                      adjust(page, 1);
   1.213  
   1.214                  if ( page->u.inuse.type_info & PGT_validated )
   1.215 -                    adjust_l2_page(mfn);
   1.216 +                    adjust_l2_page(snapshot_mfn, 0);
   1.217  
   1.218                  break;
   1.219  
   1.220 @@ -475,7 +492,7 @@ int audit_adjust_pgtables(struct domain 
   1.221  
   1.222                  if ( noisy )
   1.223                  {
   1.224 -                    if ( shadow_enabled )
   1.225 +                    if ( shadow_refcounts )
   1.226                      {
   1.227                          printk("found an L1 guest page mfn=%lx t=%08x c=%08x "
   1.228                                 "while in shadow mode\n",
   1.229 @@ -483,21 +500,24 @@ int audit_adjust_pgtables(struct domain 
   1.230                          errors++;
   1.231                      }
   1.232  
   1.233 -                    if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated )
   1.234 +                    if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
   1.235                      {
   1.236 -                        printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
   1.237 -                               d->id, mfn, page->u.inuse.type_info);
   1.238 -                        errors++;
   1.239 -                    }
   1.240 -
   1.241 -                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
   1.242 -                    {
   1.243 -                        if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
   1.244 +                        if ( (page->u.inuse.type_info & PGT_validated) !=
   1.245 +                             PGT_validated )
   1.246                          {
   1.247 -                            printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
   1.248 +                            printk("Audit %d: L1 not validated mfn=%lx t=%08x\n",
   1.249                                     d->id, mfn, page->u.inuse.type_info);
   1.250                              errors++;
   1.251                          }
   1.252 +
   1.253 +                        if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
   1.254 +                        {
   1.255 +                            if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
   1.256 +                            {
   1.257 +                                printk("Audit %d: L1 mfn=%lx not pinned t=%08x\n",
   1.258 +                                       d->id, mfn, page->u.inuse.type_info);
   1.259 +                            }
   1.260 +                        }
   1.261                      }
   1.262                  }
   1.263                  
   1.264 @@ -505,7 +525,7 @@ int audit_adjust_pgtables(struct domain 
   1.265                      adjust(page, 1);
   1.266  
   1.267                  if ( page->u.inuse.type_info & PGT_validated )
   1.268 -                    adjust_l1_page(mfn);
   1.269 +                    adjust_l1_page(snapshot_mfn);
   1.270  
   1.271                  break;
   1.272  
   1.273 @@ -520,7 +540,7 @@ int audit_adjust_pgtables(struct domain 
   1.274                  break;
   1.275  
   1.276              case PGT_writable_page:
   1.277 -                if ( shadow_enabled )
   1.278 +                if ( shadow_refcounts )
   1.279                  {
   1.280                      // In shadow mode, writable pages can get pinned by
   1.281                      // paravirtualized guests that think they are pinning
   1.282 @@ -589,6 +609,8 @@ void audit_pagelist(struct domain *d)
   1.283  
   1.284  void _audit_domain(struct domain *d, int flags)
   1.285  {
   1.286 +    int shadow_refcounts = !!shadow_mode_refcounts(d);
   1.287 +
   1.288      void scan_for_pfn_in_mfn(struct domain *d, unsigned long xmfn,
   1.289                               unsigned long mfn)
   1.290      {
   1.291 @@ -608,8 +630,29 @@ void _audit_domain(struct domain *d, int
   1.292          unmap_domain_mem(pt);           
   1.293      }
   1.294  
   1.295 +    void scan_for_pfn_in_grant_table(struct domain *d, unsigned xmfn)
   1.296 +    {
   1.297 +        int i;
   1.298 +        active_grant_entry_t *act = d->grant_table->active;
   1.299 +
   1.300 +        spin_lock(&d->grant_table->lock);
   1.301 +
   1.302 +        for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
   1.303 +        {
   1.304 +            if ( act[i].pin && (act[i].frame == xmfn) )
   1.305 +            {
   1.306 +                printk("     found active grant table entry i=%d dom=%d pin=%d\n",
   1.307 +                       i, act[i].domid, act[i].pin);
   1.308 +            }
   1.309 +        }
   1.310 +
   1.311 +        spin_unlock(&d->grant_table->lock);
   1.312 +    }
   1.313 +
   1.314      void scan_for_pfn(struct domain *d, unsigned long xmfn)
   1.315      {
   1.316 +        scan_for_pfn_in_grant_table(d, xmfn);
   1.317 +
   1.318          if ( !shadow_mode_enabled(d) )
   1.319          {
   1.320              struct list_head *list_ent = d->page_list.next;
   1.321 @@ -688,7 +731,7 @@ void _audit_domain(struct domain *d, int
   1.322  
   1.323      // Maybe we should just be using BIGLOCK?
   1.324      //
   1.325 -    if ( !(flags & AUDIT_ALREADY_LOCKED) )
   1.326 +    if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
   1.327          shadow_lock(d);
   1.328  
   1.329      spin_lock(&d->page_alloc_lock);
   1.330 @@ -716,7 +759,7 @@ void _audit_domain(struct domain *d, int
   1.331              errors++;
   1.332          }
   1.333  
   1.334 -        if ( shadow_mode_enabled(d) &&
   1.335 +        if ( shadow_mode_refcounts(d) &&
   1.336               (page_type == PGT_writable_page) &&
   1.337               !(page->u.inuse.type_info & PGT_validated) )
   1.338          {
   1.339 @@ -764,7 +807,9 @@ void _audit_domain(struct domain *d, int
   1.340                         mfn);
   1.341                  errors++;
   1.342              }
   1.343 -            if ( page_type != PGT_writable_page )
   1.344 +            if ( shadow_refcounts
   1.345 +                 ? (page_type != PGT_writable_page)
   1.346 +                 : !(page_type && (page_type <= PGT_l4_page_table)) )
   1.347              {
   1.348                  printk("out of sync page mfn=%lx has strange type "
   1.349                         "t=%08x c=%08x\n",
   1.350 @@ -821,7 +866,7 @@ void _audit_domain(struct domain *d, int
   1.351                         d->id, page->u.inuse.type_info, 
   1.352                         page->tlbflush_timestamp,
   1.353                         page->count_info, mfn);
   1.354 -                errors++;
   1.355 +                //errors++;
   1.356              }
   1.357              break;
   1.358          default:
   1.359 @@ -835,7 +880,7 @@ void _audit_domain(struct domain *d, int
   1.360                     page->count_info,
   1.361                     page->u.inuse.type_info, 
   1.362                     page->tlbflush_timestamp, mfn );
   1.363 -            errors++;
   1.364 +            //errors++;
   1.365              scan_for_pfn_remote(mfn);
   1.366          }
   1.367  
   1.368 @@ -870,6 +915,8 @@ void _audit_domain(struct domain *d, int
   1.369                                 d->id, page_to_pfn(page),
   1.370                                 page->u.inuse.type_info,
   1.371                                 page->count_info);
   1.372 +                        printk("a->gpfn_and_flags=%p\n",
   1.373 +                               (void *)a->gpfn_and_flags);
   1.374                          errors++;
   1.375                      }
   1.376                      break;
   1.377 @@ -905,7 +952,7 @@ void _audit_domain(struct domain *d, int
   1.378                 "pages=%d oos=%d l1=%d l2=%d ctot=%d ttot=%d\n",
   1.379                 d->id, page_count, oos_count, l1, l2, ctot, ttot);
   1.380  
   1.381 -    if ( !(flags & AUDIT_ALREADY_LOCKED) )
   1.382 +    if ( !(flags & AUDIT_SHADOW_ALREADY_LOCKED) )
   1.383          shadow_unlock(d);
   1.384  
   1.385      if ( d != current->domain )
     2.1 --- a/xen/arch/x86/domain.c	Sun May 08 12:06:10 2005 +0000
     2.2 +++ b/xen/arch/x86/domain.c	Mon May 09 14:34:59 2005 +0000
     2.3 @@ -364,7 +364,8 @@ static int vmx_final_setup_guest(
     2.4  
     2.5          /* Put the domain in shadow mode even though we're going to be using
     2.6           * the shared 1:1 page table initially. It shouldn't hurt */
     2.7 -        shadow_mode_enable(ed->domain, SHM_enable|SHM_translate|SHM_external);
     2.8 +        shadow_mode_enable(ed->domain,
     2.9 +                           SHM_enable|SHM_refcounts|SHM_translate|SHM_external);
    2.10      }
    2.11  
    2.12      return 0;
    2.13 @@ -432,7 +433,7 @@ int arch_set_info_guest(
    2.14      phys_basetab = c->pt_base;
    2.15      ed->arch.guest_table = mk_pagetable(phys_basetab);
    2.16  
    2.17 -    if ( shadow_mode_enabled(d) )
    2.18 +    if ( shadow_mode_refcounts(d) )
    2.19      {
    2.20          if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
    2.21              return -EINVAL;
    2.22 @@ -981,17 +982,21 @@ void domain_relinquish_resources(struct 
    2.23      {
    2.24          if ( pagetable_val(ed->arch.guest_table) != 0 )
    2.25          {
    2.26 -            (shadow_mode_enabled(d) ? put_page : put_page_and_type)
    2.27 -                (&frame_table[pagetable_val(
    2.28 -                    ed->arch.guest_table) >> PAGE_SHIFT]);
    2.29 +            if ( shadow_mode_refcounts(d) )
    2.30 +                put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
    2.31 +            else
    2.32 +                put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table)]);
    2.33 +
    2.34              ed->arch.guest_table = mk_pagetable(0);
    2.35          }
    2.36  
    2.37          if ( pagetable_val(ed->arch.guest_table_user) != 0 )
    2.38          {
    2.39 -            (shadow_mode_enabled(d) ? put_page : put_page_and_type)
    2.40 -                (&frame_table[pagetable_val(
    2.41 -                    ed->arch.guest_table_user) >> PAGE_SHIFT]);
    2.42 +            if ( shadow_mode_refcounts(d) )
    2.43 +                put_page(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
    2.44 +            else
    2.45 +                put_page_and_type(&frame_table[pagetable_get_pfn(ed->arch.guest_table_user)]);
    2.46 +
    2.47              ed->arch.guest_table_user = mk_pagetable(0);
    2.48          }
    2.49  
     3.1 --- a/xen/arch/x86/domain_build.c	Sun May 08 12:06:10 2005 +0000
     3.2 +++ b/xen/arch/x86/domain_build.c	Mon May 09 14:34:59 2005 +0000
     3.3 @@ -547,7 +547,7 @@ int construct_dom0(struct domain *d,
     3.4      if ( opt_dom0_shadow || opt_dom0_translate )
     3.5      {
     3.6          shadow_mode_enable(d, (opt_dom0_translate
     3.7 -                               ? SHM_enable | SHM_translate
     3.8 +                               ? SHM_enable | SHM_refcounts | SHM_translate
     3.9                                 : SHM_enable));
    3.10          if ( opt_dom0_translate )
    3.11          {
    3.12 @@ -570,7 +570,7 @@ int construct_dom0(struct domain *d,
    3.13              idle_pg_table[1] = root_create_phys(pagetable_val(d->arch.phys_table),
    3.14                                                  __PAGE_HYPERVISOR);
    3.15              translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
    3.16 -                                pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT);
    3.17 +                                pagetable_get_pfn(ed->arch.guest_table));
    3.18              idle_pg_table[1] = root_empty();
    3.19              local_flush_tlb();
    3.20          }
     4.1 --- a/xen/arch/x86/mm.c	Sun May 08 12:06:10 2005 +0000
     4.2 +++ b/xen/arch/x86/mm.c	Mon May 09 14:34:59 2005 +0000
     4.3 @@ -316,7 +316,7 @@ int map_ldt_shadow_page(unsigned int off
     4.4  
     4.5      res = get_page_and_type(&frame_table[gmfn], d, PGT_ldt_page);
     4.6  
     4.7 -    if ( !res && unlikely(shadow_mode_enabled(d)) )
     4.8 +    if ( !res && unlikely(shadow_mode_refcounts(d)) )
     4.9      {
    4.10          shadow_lock(d);
    4.11          shadow_remove_all_write_access(d, gpfn, gmfn);
    4.12 @@ -392,7 +392,7 @@ get_linear_pagetable(
    4.13      struct pfn_info *page;
    4.14      unsigned long pfn;
    4.15  
    4.16 -    ASSERT( !shadow_mode_enabled(d) );
    4.17 +    ASSERT( !shadow_mode_refcounts(d) );
    4.18  
    4.19      if ( (root_get_flags(re) & _PAGE_RW) )
    4.20      {
    4.21 @@ -482,7 +482,7 @@ get_page_from_l2e(
    4.22  {
    4.23      int rc;
    4.24  
    4.25 -    ASSERT(!shadow_mode_enabled(d));
    4.26 +    ASSERT(!shadow_mode_refcounts(d));
    4.27  
    4.28      if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
    4.29          return 1;
    4.30 @@ -512,6 +512,8 @@ static int
    4.31  get_page_from_l3e(
    4.32      l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
    4.33  {
    4.34 +    ASSERT( !shadow_mode_refcounts(d) );
    4.35 +
    4.36      if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
    4.37          return 1;
    4.38  
    4.39 @@ -533,6 +535,8 @@ get_page_from_l4e(
    4.40  {
    4.41      int rc;
    4.42  
    4.43 +    ASSERT( !shadow_mode_refcounts(d) );
    4.44 +
    4.45      if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
    4.46          return 1;
    4.47  
    4.48 @@ -641,7 +645,7 @@ static int alloc_l1_table(struct pfn_inf
    4.49      l1_pgentry_t  *pl1e;
    4.50      int            i;
    4.51  
    4.52 -    ASSERT(!shadow_mode_enabled(d));
    4.53 +    ASSERT(!shadow_mode_refcounts(d));
    4.54  
    4.55      pl1e = map_domain_mem(pfn << PAGE_SHIFT);
    4.56  
    4.57 @@ -670,10 +674,12 @@ static int alloc_l2_table(struct pfn_inf
    4.58      l2_pgentry_t  *pl2e;
    4.59      int            i;
    4.60  
    4.61 +    // See the code in shadow_promote() to understand why this is here...
    4.62      if ( (PGT_base_page_table == PGT_l2_page_table) &&
    4.63 -         shadow_mode_enabled(d) )
    4.64 +         unlikely(shadow_mode_refcounts(d)) )
    4.65          return 1;
    4.66 -    ASSERT( !shadow_mode_enabled(d) );
    4.67 +
    4.68 +    ASSERT( !shadow_mode_refcounts(d) );
    4.69     
    4.70      pl2e = map_domain_mem(pfn << PAGE_SHIFT);
    4.71  
    4.72 @@ -716,7 +722,7 @@ static int alloc_l3_table(struct pfn_inf
    4.73      l3_pgentry_t  *pl3e = page_to_virt(page);
    4.74      int            i;
    4.75  
    4.76 -    ASSERT( !shadow_mode_enabled(d) );
    4.77 +    ASSERT( !shadow_mode_refcounts(d) );
    4.78  
    4.79      for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
    4.80          if ( is_guest_l3_slot(i) &&
    4.81 @@ -741,10 +747,12 @@ static int alloc_l4_table(struct pfn_inf
    4.82      l4_pgentry_t  *pl4e = page_to_virt(page);
    4.83      int            i;
    4.84  
    4.85 +    // See the code in shadow_promote() to understand why this is here...
    4.86      if ( (PGT_base_page_table == PGT_l4_page_table) &&
    4.87 -         shadow_mode_enabled(d) )
    4.88 +         shadow_mode_refcounts(d) )
    4.89          return 1;
    4.90 -    ASSERT( !shadow_mode_enabled(d) );
    4.91 +
    4.92 +    ASSERT( !shadow_mode_refcounts(d) );
    4.93  
    4.94      for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
    4.95          if ( is_guest_l4_slot(i) &&
    4.96 @@ -861,11 +869,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
    4.97      l1_pgentry_t ol1e;
    4.98      struct domain *d = current->domain;
    4.99  
   4.100 -    ASSERT( !shadow_mode_enabled(d) );
   4.101 -
   4.102      if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
   4.103          return 0;
   4.104  
   4.105 +    if ( unlikely(shadow_mode_refcounts(d)) )
   4.106 +        return update_l1e(pl1e, ol1e, nl1e);
   4.107 +
   4.108      if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
   4.109      {
   4.110          if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
   4.111 @@ -893,7 +902,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
   4.112          if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   4.113              return 0;
   4.114      }
   4.115 -    
   4.116 +
   4.117      put_page_from_l1e(ol1e, d);
   4.118      return 1;
   4.119  }
   4.120 @@ -1095,8 +1104,19 @@ int alloc_page_type(struct pfn_info *pag
   4.121  void free_page_type(struct pfn_info *page, unsigned int type)
   4.122  {
   4.123      struct domain *owner = page_get_owner(page);
   4.124 -    if ( likely(owner != NULL) && unlikely(shadow_mode_enabled(owner)) )
   4.125 -        return;
   4.126 +    unsigned long gpfn;
   4.127 +
   4.128 +    if ( owner != NULL )
   4.129 +    {
   4.130 +        if ( unlikely(shadow_mode_refcounts(owner)) )
   4.131 +            return;
   4.132 +        if ( unlikely(shadow_mode_enabled(owner)) )
   4.133 +        {
   4.134 +            gpfn = __mfn_to_gpfn(owner, page_to_pfn(page));
   4.135 +            ASSERT(VALID_M2P(gpfn));
   4.136 +            remove_shadow(owner, gpfn, type);
   4.137 +        }
   4.138 +    }
   4.139  
   4.140      switch ( type )
   4.141      {
   4.142 @@ -1287,7 +1307,7 @@ int new_guest_cr3(unsigned long mfn)
   4.143      int okay;
   4.144      unsigned long old_base_mfn;
   4.145  
   4.146 -    if ( shadow_mode_enabled(d) )
   4.147 +    if ( shadow_mode_refcounts(d) )
   4.148          okay = get_page_from_pagenr(mfn, d);
   4.149      else
   4.150          okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
   4.151 @@ -1296,24 +1316,24 @@ int new_guest_cr3(unsigned long mfn)
   4.152      {
   4.153          invalidate_shadow_ldt(ed);
   4.154  
   4.155 -        old_base_mfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
   4.156 +        old_base_mfn = pagetable_get_pfn(ed->arch.guest_table);
   4.157          ed->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
   4.158          update_pagetables(ed); /* update shadow_table and monitor_table */
   4.159  
   4.160          write_ptbase(ed);
   4.161  
   4.162 -        if ( shadow_mode_enabled(d) )
   4.163 +        if ( shadow_mode_refcounts(d) )
   4.164              put_page(&frame_table[old_base_mfn]);
   4.165          else
   4.166              put_page_and_type(&frame_table[old_base_mfn]);
   4.167  
   4.168 -        /* CR3 holds its own ref to its shadow. */
   4.169 +        /* CR3 also holds a ref to its shadow... */
   4.170          if ( shadow_mode_enabled(d) )
   4.171          {
   4.172              if ( ed->arch.monitor_shadow_ref )
   4.173                  put_shadow_ref(ed->arch.monitor_shadow_ref);
   4.174              ed->arch.monitor_shadow_ref =
   4.175 -                pagetable_val(ed->arch.monitor_table) >> PAGE_SHIFT;
   4.176 +                pagetable_get_pfn(ed->arch.monitor_table);
   4.177              ASSERT(!page_get_owner(&frame_table[ed->arch.monitor_shadow_ref]));
   4.178              get_shadow_ref(ed->arch.monitor_shadow_ref);
   4.179          }
   4.180 @@ -1486,7 +1506,7 @@ int do_mmuext_op(
   4.181              type = PGT_l1_page_table | PGT_va_mutable;
   4.182  
   4.183          pin_page:
   4.184 -            if ( shadow_mode_enabled(FOREIGNDOM) )
   4.185 +            if ( shadow_mode_refcounts(FOREIGNDOM) )
   4.186                  type = PGT_writable_page;
   4.187  
   4.188              okay = get_page_and_type_from_pagenr(op.mfn, type, FOREIGNDOM);
   4.189 @@ -1557,7 +1577,7 @@ int do_mmuext_op(
   4.190              else
   4.191              {
   4.192                  unsigned long old_mfn =
   4.193 -                    pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT;
   4.194 +                    pagetable_get_pfn(ed->arch.guest_table_user);
   4.195                  ed->arch.guest_table_user = mk_pagetable(op.mfn << PAGE_SHIFT);
   4.196                  if ( old_mfn != 0 )
   4.197                      put_page_and_type(&frame_table[old_mfn]);
   4.198 @@ -1785,13 +1805,16 @@ int do_mmu_update(
   4.199      unsigned int foreigndom)
   4.200  {
   4.201      mmu_update_t req;
   4.202 -    unsigned long va = 0, mfn, prev_mfn = 0, gpfn;
   4.203 +    void *va;
   4.204 +    unsigned long gpfn, mfn;
   4.205      struct pfn_info *page;
   4.206      int rc = 0, okay = 1, i = 0, cpu = smp_processor_id();
   4.207      unsigned int cmd, done = 0;
   4.208      struct exec_domain *ed = current;
   4.209      struct domain *d = ed->domain;
   4.210      u32 type_info;
   4.211 +    struct map_dom_mem_cache mapcache = MAP_DOM_MEM_CACHE_INIT;
   4.212 +    struct map_dom_mem_cache sh_mapcache = MAP_DOM_MEM_CACHE_INIT;
   4.213  
   4.214      LOCK_BIGLOCK(d);
   4.215  
   4.216 @@ -1841,8 +1864,6 @@ int do_mmu_update(
   4.217          }
   4.218  
   4.219          cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
   4.220 -        mfn = req.ptr >> PAGE_SHIFT;
   4.221 -
   4.222          okay = 0;
   4.223  
   4.224          switch ( cmd )
   4.225 @@ -1851,73 +1872,75 @@ int do_mmu_update(
   4.226               * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
   4.227               */
   4.228          case MMU_NORMAL_PT_UPDATE:
   4.229 +
   4.230 +            gpfn = req.ptr >> PAGE_SHIFT;
   4.231 +            mfn = __gpfn_to_mfn(d, gpfn);
   4.232 +
   4.233              if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
   4.234              {
   4.235                  MEM_LOG("Could not get page for normal update");
   4.236                  break;
   4.237              }
   4.238  
   4.239 -            if ( likely(prev_mfn == mfn) )
   4.240 -            {
   4.241 -                va = (va & PAGE_MASK) | (req.ptr & ~PAGE_MASK);
   4.242 -            }
   4.243 -            else
   4.244 -            {
   4.245 -                if ( prev_mfn != 0 )
   4.246 -                    unmap_domain_mem((void *)va);
   4.247 -                va = (unsigned long)map_domain_mem(req.ptr);
   4.248 -                prev_mfn = mfn;
   4.249 -            }
   4.250 -
   4.251 +            va = map_domain_mem_with_cache(req.ptr, &mapcache);
   4.252              page = &frame_table[mfn];
   4.253 +
   4.254              switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
   4.255              {
   4.256              case PGT_l1_page_table: 
   4.257 -                ASSERT(!shadow_mode_enabled(d));
   4.258 +                ASSERT( !shadow_mode_refcounts(d) );
   4.259                  if ( likely(get_page_type(
   4.260                      page, type_info & (PGT_type_mask|PGT_va_mask))) )
   4.261                  {
   4.262 -                    l1_pgentry_t pte;
   4.263 +                    l1_pgentry_t l1e;
   4.264  
   4.265                      /* FIXME: doesn't work with PAE */
   4.266 -                    pte = l1e_create_phys(req.val, req.val);
   4.267 -                    okay = mod_l1_entry((l1_pgentry_t *)va, pte);
   4.268 +                    l1e = l1e_create_phys(req.val, req.val);
   4.269 +                    okay = mod_l1_entry(va, l1e);
   4.270 +                    if ( okay && unlikely(shadow_mode_enabled(d)) )
   4.271 +                        shadow_l1_normal_pt_update(d, req.ptr, l1e, &sh_mapcache);
   4.272                      put_page_type(page);
   4.273                  }
   4.274                  break;
   4.275              case PGT_l2_page_table:
   4.276 -                ASSERT(!shadow_mode_enabled(d));
   4.277 +                ASSERT( !shadow_mode_refcounts(d) );
   4.278                  if ( likely(get_page_type(page, PGT_l2_page_table)) )
   4.279                  {
   4.280                      l2_pgentry_t l2e;
   4.281  
   4.282                      /* FIXME: doesn't work with PAE */
   4.283                      l2e = l2e_create_phys(req.val, req.val);
   4.284 -                    okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn);
   4.285 +                    okay = mod_l2_entry(va, l2e, mfn);
   4.286 +                    if ( okay && unlikely(shadow_mode_enabled(d)) )
   4.287 +                        shadow_l2_normal_pt_update(d, req.ptr, l2e, &sh_mapcache);
   4.288                      put_page_type(page);
   4.289                  }
   4.290                  break;
   4.291  #ifdef __x86_64__
   4.292              case PGT_l3_page_table:
   4.293 -                ASSERT(!shadow_mode_enabled(d));
   4.294 +                ASSERT( !shadow_mode_refcounts(d) );
   4.295                  if ( likely(get_page_type(page, PGT_l3_page_table)) )
   4.296                  {
   4.297                      l3_pgentry_t l3e;
   4.298  
   4.299                      /* FIXME: doesn't work with PAE */
   4.300                      l3e = l3e_create_phys(req.val,req.val);
   4.301 -                    okay = mod_l3_entry((l3_pgentry_t *)va, l3e, mfn);
   4.302 +                    okay = mod_l3_entry(va, l3e, mfn);
   4.303 +                    if ( okay && unlikely(shadow_mode_enabled(d)) )
   4.304 +                        shadow_l3_normal_pt_update(d, req.ptr, l3e, &sh_mapcache);
   4.305                      put_page_type(page);
   4.306                  }
   4.307                  break;
   4.308              case PGT_l4_page_table:
   4.309 -                ASSERT(!shadow_mode_enabled(d));
   4.310 +                ASSERT( !shadow_mode_refcounts(d) );
   4.311                  if ( likely(get_page_type(page, PGT_l4_page_table)) )
   4.312                  {
   4.313                      l4_pgentry_t l4e;
   4.314  
   4.315                      l4e = l4e_create_phys(req.val,req.val);
   4.316 -                    okay = mod_l4_entry((l4_pgentry_t *)va, l4e, mfn);
   4.317 +                    okay = mod_l4_entry(va, l4e, mfn);
   4.318 +                    if ( okay && unlikely(shadow_mode_enabled(d)) )
   4.319 +                        shadow_l4_normal_pt_update(d, req.ptr, l4e, &sh_mapcache);
   4.320                      put_page_type(page);
   4.321                  }
   4.322                  break;
   4.323 @@ -1932,9 +1955,6 @@ int do_mmu_update(
   4.324                          if ( shadow_mode_log_dirty(d) )
   4.325                              __mark_dirty(d, mfn);
   4.326  
   4.327 -                        gpfn = __mfn_to_gpfn(d, mfn);
   4.328 -                        ASSERT(VALID_M2P(gpfn));
   4.329 -
   4.330                          if ( page_is_page_table(page) &&
   4.331                               !page_out_of_sync(page) )
   4.332                          {
   4.333 @@ -1953,24 +1973,29 @@ int do_mmu_update(
   4.334                  break;
   4.335              }
   4.336  
   4.337 +            unmap_domain_mem_with_cache(va, &mapcache);
   4.338 +
   4.339              put_page(page);
   4.340              break;
   4.341  
   4.342          case MMU_MACHPHYS_UPDATE:
   4.343  
   4.344 +            mfn = req.ptr >> PAGE_SHIFT;
   4.345 +            gpfn = req.val;
   4.346 +
   4.347              /* HACK ALERT...  Need to think about this some more... */
   4.348              if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
   4.349              {
   4.350 -                rc = FOREIGNDOM->next_io_page++;
   4.351 -                printk("privileged guest dom%d requests mfn=%lx for dom%d, "
   4.352 -                       "gets pfn=%x\n",
   4.353 -                       d->id, mfn, FOREIGNDOM->id, rc);
   4.354 -                set_machinetophys(mfn, rc);
   4.355 -                set_p2m_entry(FOREIGNDOM, rc, mfn);
   4.356 +                shadow_lock(FOREIGNDOM);
   4.357 +                printk("privileged guest dom%d requests pfn=%lx to map mfn=%lx for dom%d\n",
   4.358 +                       d->id, gpfn, mfn, FOREIGNDOM->id);
   4.359 +                set_machinetophys(mfn, gpfn);
   4.360 +                set_p2m_entry(FOREIGNDOM, gpfn, mfn, NULL, NULL);
   4.361                  okay = 1;
   4.362 +                shadow_unlock(FOREIGNDOM);
   4.363                  break;
   4.364              }
   4.365 -            
   4.366 +
   4.367              if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
   4.368              {
   4.369                  MEM_LOG("Could not get page for mach->phys update");
   4.370 @@ -1983,7 +2008,7 @@ int do_mmu_update(
   4.371                  break;
   4.372              }
   4.373  
   4.374 -            set_machinetophys(mfn, req.val);
   4.375 +            set_machinetophys(mfn, gpfn);
   4.376              okay = 1;
   4.377  
   4.378              /*
   4.379 @@ -2012,8 +2037,8 @@ int do_mmu_update(
   4.380      }
   4.381  
   4.382   out:
   4.383 -    if ( prev_mfn != 0 )
   4.384 -        unmap_domain_mem((void *)va);
   4.385 +    unmap_domain_mem_cache(&mapcache);
   4.386 +    unmap_domain_mem_cache(&sh_mapcache);
   4.387  
   4.388      process_deferred_ops(cpu);
   4.389  
   4.390 @@ -2031,73 +2056,6 @@ int do_mmu_update(
   4.391  /* This function assumes the caller is holding the domain's BIGLOCK
   4.392   * and is running in a shadow mode
   4.393   */
   4.394 -int update_shadow_va_mapping(unsigned long va,
   4.395 -                             l1_pgentry_t val,
   4.396 -                             struct exec_domain *ed,
   4.397 -                             struct domain *d)
   4.398 -{
   4.399 -    unsigned long l1mfn;
   4.400 -    l1_pgentry_t spte;
   4.401 -    int rc = 0;
   4.402 -
   4.403 -    check_pagetable(ed, "pre-va"); /* debug */
   4.404 -    shadow_lock(d);
   4.405 -        
   4.406 -    // This is actually overkill - we don't need to sync the L1 itself,
   4.407 -    // just everything involved in getting to this L1 (i.e. we need
   4.408 -    // linear_pg_table[l1_linear_offset(va)] to be in sync)...
   4.409 -    //
   4.410 -    __shadow_sync_va(ed, va);
   4.411 -
   4.412 -#if 1 /* keep check_pagetables() happy */
   4.413 -    /*
   4.414 -     * However, the above doesn't guarantee that there's no snapshot of
   4.415 -     * the L1 table in question; it just says that the relevant L2 and L1
   4.416 -     * entries for VA are in-sync.  There might still be a snapshot.
   4.417 -     *
   4.418 -     * The checking code in _check_pagetables() assumes that no one will
   4.419 -     * mutate the shadow of a page that has a snapshot.  It's actually
   4.420 -     * OK to not sync this page, but it seems simpler to:
   4.421 -     * 1) keep all code paths the same, and
   4.422 -     * 2) maintain the invariant for _check_pagetables(), rather than try
   4.423 -     *    to teach it about this boundary case.
   4.424 -     * So we flush this L1 page, if it's out of sync.
   4.425 -     */
   4.426 -    l1mfn = l2e_get_pfn(linear_l2_table(ed)[l2_table_offset(va)]);
   4.427 -    if ( mfn_out_of_sync(l1mfn) )
   4.428 -    {
   4.429 -        perfc_incrc(extra_va_update_sync);
   4.430 -        __shadow_sync_mfn(d, l1mfn);
   4.431 -    }
   4.432 -#endif /* keep check_pagetables() happy */
   4.433 -
   4.434 -    if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
   4.435 -                                 &val, sizeof(val))))
   4.436 -    {
   4.437 -        rc = -EINVAL;
   4.438 -        goto out;
   4.439 -    }
   4.440 -
   4.441 -    // also need to update the shadow
   4.442 -
   4.443 -    l1pte_propagate_from_guest(d, val, &spte);
   4.444 -    shadow_set_l1e(va, spte, 0);
   4.445 -
   4.446 -    /*
   4.447 -     * If we're in log-dirty mode then we need to note that we've updated
   4.448 -     * the PTE in the PT-holding page. We need the machine frame number
   4.449 -     * for this.
   4.450 -     */
   4.451 -    if ( shadow_mode_log_dirty(d) )
   4.452 -        mark_dirty(d, va_to_l1mfn(ed, va));
   4.453 -
   4.454 - out:
   4.455 -    shadow_unlock(d);
   4.456 -    check_pagetable(ed, "post-va"); /* debug */
   4.457 -
   4.458 -    return rc;
   4.459 -}
   4.460 -
   4.461  int update_grant_va_mapping(unsigned long va,
   4.462                              l1_pgentry_t _nl1e, 
   4.463                              struct domain *d,
   4.464 @@ -2116,11 +2074,17 @@ int update_grant_va_mapping(unsigned lon
   4.465      
   4.466      cleanup_writable_pagetable(d);
   4.467  
   4.468 +    // This is actually overkill - we don't need to sync the L1 itself,
   4.469 +    // just everything involved in getting to this L1 (i.e. we need
   4.470 +    // linear_pg_table[l1_linear_offset(va)] to be in sync)...
   4.471 +    //
   4.472 +    __shadow_sync_va(ed, va);
   4.473 +
   4.474      pl1e = &linear_pg_table[l1_linear_offset(va)];
   4.475  
   4.476      if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
   4.477          rc = -EINVAL;
   4.478 -    else
   4.479 +    else if ( !shadow_mode_refcounts(d) )
   4.480      {
   4.481          if ( update_l1e(pl1e, ol1e, _nl1e) )
   4.482          {
   4.483 @@ -2133,9 +2097,14 @@ int update_grant_va_mapping(unsigned lon
   4.484          else
   4.485              rc = -EINVAL;
   4.486      }
   4.487 +    else
   4.488 +    {
   4.489 +        printk("grant tables and shadow mode currently don't work together\n");
   4.490 +        BUG();
   4.491 +    }
   4.492  
   4.493      if ( unlikely(shadow_mode_enabled(d)) )
   4.494 -        update_shadow_va_mapping(va, _nl1e, ed, d);
   4.495 +        shadow_do_update_va_mapping(va, _nl1e, ed);
   4.496  
   4.497      return rc;
   4.498  }
   4.499 @@ -2161,6 +2130,13 @@ int do_update_va_mapping(unsigned long v
   4.500      cleanup_writable_pagetable(d);
   4.501  
   4.502      if ( unlikely(shadow_mode_enabled(d)) )
   4.503 +        check_pagetable(ed, "pre-va"); /* debug */
   4.504 +
   4.505 +    if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
   4.506 +                                val)) )
   4.507 +        rc = -EINVAL;
   4.508 +
   4.509 +    if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
   4.510      {
   4.511          if ( unlikely(percpu_info[cpu].foreign &&
   4.512                        (shadow_mode_translate(d) ||
   4.513 @@ -2173,11 +2149,10 @@ int do_update_va_mapping(unsigned long v
   4.514              domain_crash();
   4.515          }
   4.516      
   4.517 -        rc = update_shadow_va_mapping(va, val, ed, d);
   4.518 +        rc = shadow_do_update_va_mapping(va, val, ed);
   4.519 +
   4.520 +        check_pagetable(ed, "post-va"); /* debug */
   4.521      }
   4.522 -    else if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
   4.523 -                                     val)) )
   4.524 -        rc = -EINVAL;
   4.525  
   4.526      switch ( flags & UVMF_FLUSHTYPE_MASK )
   4.527      {
   4.528 @@ -2468,14 +2443,68 @@ int ptwr_debug = 0x0;
   4.529  #define PTWR_PRINTK(_f, _a...) ((void)0)
   4.530  #endif
   4.531  
   4.532 +/* Re-validate a given p.t. page, given its prior snapshot */
   4.533 +int revalidate_l1(struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
   4.534 +{
   4.535 +    l1_pgentry_t ol1e, nl1e;
   4.536 +    int modified = 0, i;
   4.537 +
   4.538 +#if 0
   4.539 +    if ( d->id )
   4.540 +        printk("%s: l1page mfn=%lx snapshot mfn=%lx\n", __func__,
   4.541 +               l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned long)l1page)]),
   4.542 +               l1e_get_pfn(linear_pg_table[l1_linear_offset((unsigned long)snapshot)]));
   4.543 +#endif
   4.544 +
   4.545 +    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   4.546 +    {
   4.547 +        ol1e = snapshot[i];
   4.548 +        nl1e = l1page[i];
   4.549 +
   4.550 +        if ( likely(l1e_get_value(ol1e) == l1e_get_value(nl1e)) )
   4.551 +            continue;
   4.552 +
   4.553 +        /* Update number of entries modified. */
   4.554 +        modified++;
   4.555 +
   4.556 +        /*
   4.557 +         * Fast path for PTEs that have merely been write-protected
   4.558 +         * (e.g., during a Unix fork()). A strict reduction in privilege.
   4.559 +         */
   4.560 +        if ( likely(l1e_get_value(ol1e) == (l1e_get_value(nl1e)|_PAGE_RW)) )
   4.561 +        {
   4.562 +            if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
   4.563 +                put_page_type(&frame_table[l1e_get_pfn(nl1e)]);
   4.564 +            continue;
   4.565 +        }
   4.566 +
   4.567 +        if ( unlikely(!get_page_from_l1e(nl1e, d)) )
   4.568 +        {
   4.569 +            MEM_LOG("ptwr: Could not re-validate l1 page\n");
   4.570 +            /*
   4.571 +             * Make the remaining p.t's consistent before crashing, so the
   4.572 +             * reference counts are correct.
   4.573 +             */
   4.574 +            memcpy(&l1page[i], &snapshot[i],
   4.575 +                   (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
   4.576 +            domain_crash();
   4.577 +            break;
   4.578 +        }
   4.579 +        
   4.580 +        put_page_from_l1e(ol1e, d);
   4.581 +    }
   4.582 +
   4.583 +    return modified;
   4.584 +}
   4.585 +
   4.586 +
   4.587  /* Flush the given writable p.t. page and write-protect it again. */
   4.588  void ptwr_flush(struct domain *d, const int which)
   4.589  {
   4.590      unsigned long  pte, *ptep, l1va;
   4.591 -    l1_pgentry_t  *pl1e, ol1e, nl1e;
   4.592 +    l1_pgentry_t  *pl1e;
   4.593      l2_pgentry_t  *pl2e;
   4.594 -    int            i;
   4.595 -    unsigned int   modified = 0;
   4.596 +    unsigned int   modified;
   4.597  
   4.598      ASSERT(!shadow_mode_enabled(d));
   4.599  
   4.600 @@ -2524,45 +2553,8 @@ void ptwr_flush(struct domain *d, const 
   4.601       */
   4.602  
   4.603      pl1e = d->arch.ptwr[which].pl1e;
   4.604 -    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   4.605 -    {
   4.606 -        ol1e = d->arch.ptwr[which].page[i];
   4.607 -        nl1e = pl1e[i];
   4.608 -
   4.609 -        if ( likely(l1e_get_value(ol1e) == l1e_get_value(nl1e)) )
   4.610 -            continue;
   4.611 -
   4.612 -        /* Update number of entries modified. */
   4.613 -        modified++;
   4.614 -
   4.615 -        /*
   4.616 -         * Fast path for PTEs that have merely been write-protected
   4.617 -         * (e.g., during a Unix fork()). A strict reduction in privilege.
   4.618 -         */
   4.619 -        if ( likely(l1e_get_value(ol1e) == (l1e_get_value(nl1e)|_PAGE_RW)) )
   4.620 -        {
   4.621 -            if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
   4.622 -                put_page_type(&frame_table[l1e_get_pfn(nl1e)]);
   4.623 -            continue;
   4.624 -        }
   4.625 -
   4.626 -        if ( unlikely(!get_page_from_l1e(nl1e, d)) )
   4.627 -        {
   4.628 -            MEM_LOG("ptwr: Could not re-validate l1 page\n");
   4.629 -            /*
   4.630 -             * Make the remaining p.t's consistent before crashing, so the
   4.631 -             * reference counts are correct.
   4.632 -             */
   4.633 -            memcpy(&pl1e[i], &d->arch.ptwr[which].page[i],
   4.634 -                   (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
   4.635 -            domain_crash();
   4.636 -            break;
   4.637 -        }
   4.638 -        
   4.639 -        put_page_from_l1e(ol1e, d);
   4.640 -    }
   4.641 +    modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
   4.642      unmap_domain_mem(pl1e);
   4.643 -    
   4.644      perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
   4.645      d->arch.ptwr[which].prev_nr_updates  = modified;
   4.646  
     5.1 --- a/xen/arch/x86/shadow.c	Sun May 08 12:06:10 2005 +0000
     5.2 +++ b/xen/arch/x86/shadow.c	Mon May 09 14:34:59 2005 +0000
     5.3 @@ -30,11 +30,17 @@
     5.4  #include <xen/sched.h>
     5.5  #include <xen/trace.h>
     5.6  
     5.7 +#define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
     5.8 +
     5.9  static void shadow_free_snapshot(struct domain *d,
    5.10                                   struct out_of_sync_entry *entry);
    5.11  static void remove_out_of_sync_entries(struct domain *d, unsigned long smfn);
    5.12  static void free_writable_pte_predictions(struct domain *d);
    5.13  
    5.14 +#if SHADOW_DEBUG
    5.15 +static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
    5.16 +#endif
    5.17 +
    5.18  /********
    5.19  
    5.20  There's a per-domain shadow table spin lock which works fine for SMP
    5.21 @@ -62,6 +68,9 @@ shadow_promote(struct domain *d, unsigne
    5.22          __shadow_sync_mfn(d, gmfn);
    5.23      }
    5.24  
    5.25 +    if ( !shadow_mode_refcounts(d) )
    5.26 +        return 1;
    5.27 +
    5.28      if ( unlikely(page_is_page_table(page)) )
    5.29          return 1;
    5.30  
    5.31 @@ -89,7 +98,7 @@ shadow_promote(struct domain *d, unsigne
    5.32      // TLB flushes required when promoting a writable page, and also deal
    5.33      // with any outstanding (external) writable refs to this page (by
    5.34      // refusing to promote it).  The pinning headache complicates this
    5.35 -    // code -- it would all much get simpler if we stop using
    5.36 +    // code -- it would all get much simpler if we stop using
    5.37      // shadow_lock() and move the shadow code to BIGLOCK().
    5.38      //
    5.39      if ( unlikely(!get_page(page, d)) )
    5.40 @@ -130,6 +139,9 @@ shadow_promote(struct domain *d, unsigne
    5.41  static inline void
    5.42  shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
    5.43  {
    5.44 +    if ( !shadow_mode_refcounts(d) )
    5.45 +        return;
    5.46 +
    5.47      ASSERT(frame_table[gmfn].count_info & PGC_page_table);
    5.48  
    5.49      if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
    5.50 @@ -210,7 +222,7 @@ alloc_shadow_page(struct domain *d,
    5.51          else
    5.52          {
    5.53              page = alloc_domheap_page(NULL);
    5.54 -            void *l1 = map_domain_mem(page_to_pfn(page) << PAGE_SHIFT);
    5.55 +            void *l1 = map_domain_mem(page_to_phys(page));
    5.56              memset(l1, 0, PAGE_SIZE);
    5.57              unmap_domain_mem(l1);
    5.58          }
    5.59 @@ -312,7 +324,7 @@ free_shadow_l1_table(struct domain *d, u
    5.60  
    5.61      for ( i = min; i <= max; i++ )
    5.62      {
    5.63 -        put_page_from_l1e(pl1e[i], d);
    5.64 +        shadow_put_page_from_l1e(pl1e[i], d);
    5.65          pl1e[i] = l1e_empty();
    5.66      }
    5.67  
    5.68 @@ -348,21 +360,20 @@ free_shadow_hl2_table(struct domain *d, 
    5.69  static void inline
    5.70  free_shadow_l2_table(struct domain *d, unsigned long smfn)
    5.71  {
    5.72 -    unsigned long *pl2e = map_domain_mem(smfn << PAGE_SHIFT);
    5.73 +    l2_pgentry_t *pl2e = map_domain_mem(smfn << PAGE_SHIFT);
    5.74      int i, external = shadow_mode_external(d);
    5.75  
    5.76      for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
    5.77          if ( external || is_guest_l2_slot(i) )
    5.78 -            if ( pl2e[i] & _PAGE_PRESENT )
    5.79 -                put_shadow_ref(pl2e[i] >> PAGE_SHIFT);
    5.80 +            if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
    5.81 +                put_shadow_ref(l2e_get_pfn(pl2e[i]));
    5.82  
    5.83      if ( (PGT_base_page_table == PGT_l2_page_table) &&
    5.84           shadow_mode_translate(d) && !external )
    5.85      {
    5.86          // free the ref to the hl2
    5.87          //
    5.88 -        put_shadow_ref(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]
    5.89 -                       >> PAGE_SHIFT);
    5.90 +        put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
    5.91      }
    5.92  
    5.93      unmap_domain_mem(pl2e);
    5.94 @@ -428,6 +439,26 @@ void free_shadow_page(unsigned long smfn
    5.95          free_domheap_page(page);
    5.96  }
    5.97  
    5.98 +void
    5.99 +remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
   5.100 +{
   5.101 +    unsigned long smfn;
   5.102 +
   5.103 +    //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
   5.104 +
   5.105 +    shadow_lock(d);
   5.106 +
   5.107 +    while ( stype >= PGT_l1_shadow )
   5.108 +    {
   5.109 +        smfn = __shadow_status(d, gpfn, stype);
   5.110 +        if ( smfn && MFN_PINNED(smfn) )
   5.111 +            shadow_unpin(smfn);
   5.112 +        stype -= PGT_l1_shadow;
   5.113 +    }
   5.114 +
   5.115 +    shadow_unlock(d);
   5.116 +}
   5.117 +
   5.118  static void inline
   5.119  release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
   5.120  {
   5.121 @@ -537,15 +568,22 @@ static void free_shadow_pages(struct dom
   5.122      //
   5.123      free_out_of_sync_state(d);
   5.124  
   5.125 -    // second, remove any outstanding refs from ed->arch.shadow_table...
   5.126 +    // second, remove any outstanding refs from ed->arch.shadow_table
   5.127 +    // and CR3.
   5.128      //
   5.129      for_each_exec_domain(d, ed)
   5.130      {
   5.131          if ( pagetable_val(ed->arch.shadow_table) )
   5.132          {
   5.133 -            put_shadow_ref(pagetable_val(ed->arch.shadow_table) >> PAGE_SHIFT);
   5.134 +            put_shadow_ref(pagetable_get_pfn(ed->arch.shadow_table));
   5.135              ed->arch.shadow_table = mk_pagetable(0);
   5.136          }
   5.137 +
   5.138 +        if ( ed->arch.monitor_shadow_ref )
   5.139 +        {
   5.140 +            put_shadow_ref(ed->arch.monitor_shadow_ref);
   5.141 +            ed->arch.monitor_shadow_ref = 0;
   5.142 +        }
   5.143      }
   5.144  
   5.145      // For external shadows, remove the monitor table's refs
   5.146 @@ -584,7 +622,6 @@ static void free_shadow_pages(struct dom
   5.147      // under us...  First, collect the list of pinned pages, then
   5.148      // free them.
   5.149      //
   5.150 -#define PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned)
   5.151      for ( i = 0; i < shadow_ht_buckets; i++ )
   5.152      {
   5.153          u32 count;
   5.154 @@ -596,7 +633,7 @@ static void free_shadow_pages(struct dom
   5.155  
   5.156          count = 0;
   5.157          for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
   5.158 -            if ( PINNED(x->smfn) )
   5.159 +            if ( MFN_PINNED(x->smfn) )
   5.160                  count++;
   5.161          if ( !count )
   5.162              continue;
   5.163 @@ -604,7 +641,7 @@ static void free_shadow_pages(struct dom
   5.164          mfn_list = xmalloc_array(unsigned long, count);
   5.165          count = 0;
   5.166          for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
   5.167 -            if ( PINNED(x->smfn) )
   5.168 +            if ( MFN_PINNED(x->smfn) )
   5.169                  mfn_list[count++] = x->smfn;
   5.170  
   5.171          while ( count )
   5.172 @@ -613,7 +650,18 @@ static void free_shadow_pages(struct dom
   5.173          }
   5.174          xfree(mfn_list);
   5.175      }
   5.176 -#undef PINNED
   5.177 +
   5.178 +    // Now free the pre-zero'ed pages from the domain
   5.179 +    //
   5.180 +    struct list_head *list_ent, *tmp;
   5.181 +    list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
   5.182 +    {
   5.183 +        list_del(list_ent);
   5.184 +        perfc_decr(free_l1_pages);
   5.185 +
   5.186 +        struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
   5.187 +        free_domheap_page(page);
   5.188 +    }
   5.189  
   5.190      shadow_audit(d, 0);
   5.191  
   5.192 @@ -624,9 +672,9 @@ void shadow_mode_init(void)
   5.193  {
   5.194  }
   5.195  
   5.196 -int _shadow_mode_enabled(struct domain *d)
   5.197 +int _shadow_mode_refcounts(struct domain *d)
   5.198  {
   5.199 -    return shadow_mode_enabled(d);
   5.200 +    return shadow_mode_refcounts(d);
   5.201  }
   5.202  
   5.203  static void alloc_monitor_pagetable(struct exec_domain *ed)
   5.204 @@ -706,7 +754,7 @@ void free_monitor_pagetable(struct exec_
   5.205      /*
   5.206       * Then free monitor_table.
   5.207       */
   5.208 -    mfn = (pagetable_val(ed->arch.monitor_table)) >> PAGE_SHIFT;
   5.209 +    mfn = pagetable_get_pfn(ed->arch.monitor_table);
   5.210      free_domheap_page(&frame_table[mfn]);
   5.211  
   5.212      ed->arch.monitor_table = mk_pagetable(0);
   5.213 @@ -714,7 +762,9 @@ void free_monitor_pagetable(struct exec_
   5.214  }
   5.215  
   5.216  int
   5.217 -set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn)
   5.218 +set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
   5.219 +              struct map_dom_mem_cache *l2cache,
   5.220 +              struct map_dom_mem_cache *l1cache)
   5.221  {
   5.222      unsigned long phystab = pagetable_val(d->arch.phys_table);
   5.223      l2_pgentry_t *l2, l2e;
   5.224 @@ -724,26 +774,29 @@ set_p2m_entry(struct domain *d, unsigned
   5.225  
   5.226      ASSERT( phystab );
   5.227  
   5.228 -    l2 = map_domain_mem(phystab);
   5.229 +    l2 = map_domain_mem_with_cache(phystab, l2cache);
   5.230      l2e = l2[l2_table_offset(va)];
   5.231 -    if ( !l2e_get_value(l2e) ) /* FIXME: check present bit? */
   5.232 +    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
   5.233      {
   5.234          l1page = alloc_domheap_page(NULL);
   5.235          if ( !l1page )
   5.236 +        {
   5.237 +            unmap_domain_mem_with_cache(l2, l2cache);
   5.238              return 0;
   5.239 -
   5.240 -        l1 = map_domain_mem(page_to_pfn(l1page) << PAGE_SHIFT);
   5.241 +        }
   5.242 +
   5.243 +        l1 = map_domain_mem_with_cache(page_to_phys(l1page), l1cache);
   5.244          memset(l1, 0, PAGE_SIZE);
   5.245 -        unmap_domain_mem(l1);
   5.246 +        unmap_domain_mem_with_cache(l1, l1cache);
   5.247  
   5.248          l2e = l2e_create_pfn(page_to_pfn(l1page), __PAGE_HYPERVISOR);
   5.249          l2[l2_table_offset(va)] = l2e;
   5.250      }
   5.251 -    unmap_domain_mem(l2);
   5.252 -
   5.253 -    l1 = map_domain_mem(l2e_get_phys(l2e));
   5.254 +    unmap_domain_mem_with_cache(l2, l2cache);
   5.255 +
   5.256 +    l1 = map_domain_mem_with_cache(l2e_get_phys(l2e), l1cache);
   5.257      l1[l1_table_offset(va)] = l1e_create_pfn(mfn, __PAGE_HYPERVISOR);
   5.258 -    unmap_domain_mem(l1);
   5.259 +    unmap_domain_mem_with_cache(l1, l1cache);
   5.260  
   5.261      return 1;
   5.262  }
   5.263 @@ -755,14 +808,16 @@ alloc_p2m_table(struct domain *d)
   5.264      struct pfn_info *page, *l2page;
   5.265      l2_pgentry_t *l2;
   5.266      unsigned long mfn, pfn;
   5.267 +    struct map_dom_mem_cache l2cache = MAP_DOM_MEM_CACHE_INIT;
   5.268 +    struct map_dom_mem_cache l1cache = MAP_DOM_MEM_CACHE_INIT;
   5.269  
   5.270      l2page = alloc_domheap_page(NULL);
   5.271      if ( !l2page )
   5.272          return 0;
   5.273 -    d->arch.phys_table = mk_pagetable(page_to_pfn(l2page) << PAGE_SHIFT);
   5.274 -    l2 = map_domain_mem(page_to_pfn(l2page) << PAGE_SHIFT);
   5.275 +    d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
   5.276 +    l2 = map_domain_mem_with_cache(page_to_phys(l2page), &l2cache);
   5.277      memset(l2, 0, PAGE_SIZE);
   5.278 -    unmap_domain_mem(l2);
   5.279 +    unmap_domain_mem_with_cache(l2, &l2cache);
   5.280  
   5.281      list_ent = d->page_list.next;
   5.282      while ( list_ent != &d->page_list )
   5.283 @@ -773,7 +828,7 @@ alloc_p2m_table(struct domain *d)
   5.284          ASSERT(pfn != INVALID_M2P_ENTRY);
   5.285          ASSERT(pfn < (1u<<20));
   5.286  
   5.287 -        set_p2m_entry(d, pfn, mfn);
   5.288 +        set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
   5.289  
   5.290          list_ent = page->list.next;
   5.291      }
   5.292 @@ -787,12 +842,15 @@ alloc_p2m_table(struct domain *d)
   5.293          if ( (pfn != INVALID_M2P_ENTRY) &&
   5.294               (pfn < (1u<<20)) )
   5.295          {
   5.296 -            set_p2m_entry(d, pfn, mfn);
   5.297 +            set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
   5.298          }
   5.299  
   5.300          list_ent = page->list.next;
   5.301      }
   5.302  
   5.303 +    unmap_domain_mem_cache(&l2cache);
   5.304 +    unmap_domain_mem_cache(&l1cache);
   5.305 +
   5.306      return 1;
   5.307  }
   5.308  
   5.309 @@ -915,13 +973,13 @@ int __shadow_mode_enable(struct domain *
   5.310          {
   5.311              // external guests provide their own memory for their P2M maps.
   5.312              //
   5.313 -            ASSERT( d == page_get_owner(&frame_table[pagetable_val(
   5.314 -                                        d->arch.phys_table)>>PAGE_SHIFT]) );
   5.315 +            ASSERT( d == page_get_owner(
   5.316 +                        &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
   5.317          }
   5.318      }
   5.319  
   5.320      printk("audit1\n");
   5.321 -    _audit_domain(d, AUDIT_ALREADY_LOCKED | AUDIT_ERRORS_OK);
   5.322 +    _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
   5.323      printk("audit1 done\n");
   5.324  
   5.325      // Get rid of any shadow pages from any previous shadow mode.
   5.326 @@ -929,15 +987,9 @@ int __shadow_mode_enable(struct domain *
   5.327      free_shadow_pages(d);
   5.328  
   5.329      printk("audit2\n");
   5.330 -    _audit_domain(d, AUDIT_ALREADY_LOCKED | AUDIT_ERRORS_OK);
   5.331 +    _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
   5.332      printk("audit2 done\n");
   5.333  
   5.334 -    // Turn off writable page tables.
   5.335 -    // It doesn't mix with shadow mode.
   5.336 -    // And shadow mode offers a superset of functionality.
   5.337 -    //
   5.338 -    vm_assist(d, VMASST_CMD_disable, VMASST_TYPE_writable_pagetables);
   5.339 -
   5.340      /*
   5.341       * Tear down it's counts by disassembling its page-table-based ref counts.
   5.342       * Also remove CR3's gcount/tcount.
   5.343 @@ -959,23 +1011,27 @@ int __shadow_mode_enable(struct domain *
   5.344       * Assert that no pages are left with L1/L2/L3/L4 type.
   5.345       */
   5.346      audit_adjust_pgtables(d, -1, 1);
   5.347 +
   5.348      d->arch.shadow_mode = mode;
   5.349  
   5.350 -    struct list_head *list_ent = d->page_list.next;
   5.351 -    while ( list_ent != &d->page_list )
   5.352 +    if ( shadow_mode_refcounts(d) )
   5.353      {
   5.354 -        struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
   5.355 -        if ( !get_page_type(page, PGT_writable_page) )
   5.356 -            BUG();
   5.357 -        put_page_type(page);
   5.358 -
   5.359 -        list_ent = page->list.next;
   5.360 +        struct list_head *list_ent = d->page_list.next;
   5.361 +        while ( list_ent != &d->page_list )
   5.362 +        {
   5.363 +            struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
   5.364 +            if ( !get_page_type(page, PGT_writable_page) )
   5.365 +                BUG();
   5.366 +            put_page_type(page);
   5.367 +
   5.368 +            list_ent = page->list.next;
   5.369 +        }
   5.370      }
   5.371  
   5.372      audit_adjust_pgtables(d, 1, 1);
   5.373  
   5.374      printk("audit3\n");
   5.375 -    _audit_domain(d, AUDIT_ALREADY_LOCKED);
   5.376 +    _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
   5.377      printk("audit3 done\n");
   5.378  
   5.379      return 0;
   5.380 @@ -1120,8 +1176,8 @@ void __shadow_mode_disable(struct domain
   5.381       * Currently this does not fix up page ref counts, so it is valid to call
   5.382       * only when a domain is being destroyed.
   5.383       */
   5.384 -    BUG_ON(!test_bit(DF_DYING, &d->flags));
   5.385 -    d->arch.shadow_tainted_refcnts = 1;
   5.386 +    BUG_ON(!test_bit(DF_DYING, &d->flags) && shadow_mode_refcounts(d));
   5.387 +    d->arch.shadow_tainted_refcnts = shadow_mode_refcounts(d);
   5.388  
   5.389      free_shadow_pages(d);
   5.390      free_writable_pte_predictions(d);
   5.391 @@ -1138,11 +1194,17 @@ void __shadow_mode_disable(struct domain
   5.392          }
   5.393      }
   5.394  #endif
   5.395 -    
   5.396 +
   5.397      d->arch.shadow_mode = 0;
   5.398  
   5.399      free_shadow_ht_entries(d);
   5.400      free_out_of_sync_entries(d);
   5.401 +
   5.402 +    struct exec_domain *ed;
   5.403 +    for_each_exec_domain(d, ed)
   5.404 +    {
   5.405 +        update_pagetables(ed);
   5.406 +    }
   5.407  }
   5.408  
   5.409  static int shadow_mode_table_op(
   5.410 @@ -1281,6 +1343,7 @@ int shadow_mode_control(struct domain *d
   5.411      switch ( op )
   5.412      {
   5.413      case DOM0_SHADOW_CONTROL_OP_OFF:
   5.414 +        __shadow_sync_all(d);
   5.415          __shadow_mode_disable(d);
   5.416          break;
   5.417  
   5.418 @@ -1298,7 +1361,7 @@ int shadow_mode_control(struct domain *d
   5.419      case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
   5.420          free_shadow_pages(d);
   5.421          rc = __shadow_mode_enable(
   5.422 -            d, d->arch.shadow_mode|SHM_enable|SHM_translate);
   5.423 +            d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
   5.424          break;
   5.425  
   5.426      default:
   5.427 @@ -1560,23 +1623,23 @@ void shadow_map_l1_into_current_l2(unsig
   5.428  
   5.429      if ( init_table )
   5.430      {
   5.431 +        l1_pgentry_t sl1e;
   5.432 +        int index = l1_table_offset(va);
   5.433 +        int min = 1, max = 0;
   5.434 +
   5.435          gpl1e = &(linear_pg_table[l1_linear_offset(va) &
   5.436                                ~(L1_PAGETABLE_ENTRIES-1)]);
   5.437  
   5.438          spl1e = &(shadow_linear_pg_table[l1_linear_offset(va) &
   5.439                                       ~(L1_PAGETABLE_ENTRIES-1)]);
   5.440  
   5.441 -        l1_pgentry_t sl1e;
   5.442 -        int index = l1_table_offset(va);
   5.443 -        int min = 1, max = 0;
   5.444 -
   5.445          for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   5.446          {
   5.447              l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
   5.448              if ( (l1e_get_flags(sl1e) & _PAGE_PRESENT) &&
   5.449 -                 !shadow_get_page_from_l1e(sl1e, d) )
   5.450 +                 unlikely(!shadow_get_page_from_l1e(sl1e, d)) )
   5.451                  sl1e = l1e_empty();
   5.452 -            if ( l1e_get_value(sl1e) == 0 ) /* FIXME: check flags? */
   5.453 +            if ( l1e_get_flags(sl1e) == 0 )
   5.454              {
   5.455                  // First copy entries from 0 until first invalid.
   5.456                  // Then copy entries from index until first invalid.
   5.457 @@ -1695,7 +1758,8 @@ shadow_make_snapshot(
   5.458      if ( !get_shadow_ref(smfn) )
   5.459          BUG();
   5.460  
   5.461 -    if ( shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow )
   5.462 +    if ( shadow_mode_refcounts(d) &&
   5.463 +         (shadow_max_pgtable_type(d, gpfn, &sl1mfn) == PGT_l1_shadow) )
   5.464          min_max = pfn_to_page(sl1mfn)->tlbflush_timestamp;
   5.465      pfn_to_page(smfn)->tlbflush_timestamp = min_max;
   5.466  
   5.467 @@ -1748,7 +1812,18 @@ shadow_mark_mfn_out_of_sync(struct exec_
   5.468  
   5.469      ASSERT(spin_is_locked(&d->arch.shadow_lock));
   5.470      ASSERT(pfn_valid(mfn));
   5.471 -    ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page);
   5.472 +
   5.473 +#ifndef NDEBUG
   5.474 +    u32 type = page->u.inuse.type_info & PGT_type_mask;
   5.475 +    if ( shadow_mode_refcounts(d) )
   5.476 +    {
   5.477 +        ASSERT(type == PGT_writable_page);
   5.478 +    }
   5.479 +    else
   5.480 +    {
   5.481 +        ASSERT(type && (type < PGT_l4_page_table));
   5.482 +    }
   5.483 +#endif
   5.484  
   5.485      FSH_LOG("%s(gpfn=%lx, mfn=%lx) c=%08x t=%08x", __func__,
   5.486              gpfn, mfn, page->count_info, page->u.inuse.type_info);
   5.487 @@ -1766,6 +1841,10 @@ shadow_mark_mfn_out_of_sync(struct exec_
   5.488      entry->snapshot_mfn = shadow_make_snapshot(d, gpfn, mfn);
   5.489      entry->writable_pl1e = -1;
   5.490  
   5.491 +#if SHADOW_DEBUG
   5.492 +    mark_shadows_as_reflecting_snapshot(d, gpfn);
   5.493 +#endif
   5.494 +
   5.495      // increment guest's ref count to represent the entry in the
   5.496      // full shadow out-of-sync list.
   5.497      //
   5.498 @@ -1859,7 +1938,7 @@ static int snapshot_entry_matches(
   5.499  int __shadow_out_of_sync(struct exec_domain *ed, unsigned long va)
   5.500  {
   5.501      struct domain *d = ed->domain;
   5.502 -    unsigned long l2mfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
   5.503 +    unsigned long l2mfn = pagetable_get_pfn(ed->arch.guest_table);
   5.504      l2_pgentry_t l2e;
   5.505      unsigned long l1mfn;
   5.506  
   5.507 @@ -1867,6 +1946,10 @@ int __shadow_out_of_sync(struct exec_dom
   5.508  
   5.509      perfc_incrc(shadow_out_of_sync_calls);
   5.510  
   5.511 +    // PERF BUG: snapshot_entry_matches will call map_domain_mem() on the l2
   5.512 +    // page, but it's already available at ed->arch.guest_vtable...
   5.513 +    // Ditto for the sl2 page and ed->arch.shadow_vtable.
   5.514 +    //
   5.515      if ( page_out_of_sync(&frame_table[l2mfn]) &&
   5.516           !snapshot_entry_matches(ed, l2mfn, l2_table_offset(va)) )
   5.517          return 1;
   5.518 @@ -1881,6 +1964,10 @@ int __shadow_out_of_sync(struct exec_dom
   5.519      if ( !VALID_MFN(l1mfn) )
   5.520          return 0;
   5.521  
   5.522 +    // PERF BUG: snapshot_entry_matches will call map_domain_mem() on the l1
   5.523 +    // page, but it's already available at linear_pg_table[l1_linear_offset()].
   5.524 +    // Ditto for the sl1 page and shadow_linear_pg_table[]...
   5.525 +    //
   5.526      if ( page_out_of_sync(&frame_table[l1mfn]) &&
   5.527           !snapshot_entry_matches(ed, l1mfn, l1_table_offset(va)) )
   5.528          return 1;
   5.529 @@ -2002,7 +2089,7 @@ static u32 remove_all_write_access_in_pt
   5.530          found++;
   5.531          pt[i] = new;
   5.532          if ( is_l1_shadow )
   5.533 -            put_page_from_l1e(old, d);
   5.534 +            shadow_put_page_from_l1e(old, d);
   5.535  
   5.536  #if 0
   5.537          printk("removed write access to pfn=%lx mfn=%lx in smfn=%lx entry %x "
   5.538 @@ -2060,8 +2147,7 @@ int shadow_remove_all_write_access(
   5.539      //
   5.540      write_refs =
   5.541          (frame_table[readonly_gmfn].u.inuse.type_info & PGT_count_mask);
   5.542 -    if ( write_refs &&
   5.543 -         (frame_table[readonly_gmfn].u.inuse.type_info & PGT_pinned) )
   5.544 +    if ( write_refs && MFN_PINNED(readonly_gmfn) )
   5.545      {
   5.546          write_refs--;
   5.547      }
   5.548 @@ -2141,7 +2227,7 @@ static u32 remove_all_access_in_page(
   5.549              count++;
   5.550  
   5.551              if ( is_l1_shadow )
   5.552 -                put_page_from_l1e(ol2e, d);
   5.553 +                shadow_put_page_from_l1e(ol2e, d);
   5.554              else /* must be an hl2 page */
   5.555                  put_page(&frame_table[forbidden_gmfn]);
   5.556          }
   5.557 @@ -2210,8 +2296,23 @@ static int resync_all(struct domain *d, 
   5.558          if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
   5.559              continue;
   5.560  
   5.561 -        if ( !(smfn = __shadow_status(d, entry->gpfn, stype)) )
   5.562 -            continue;
   5.563 +        smfn = __shadow_status(d, entry->gpfn, stype);
   5.564 +
   5.565 +        if ( !smfn )
   5.566 +        {
   5.567 +            if ( shadow_mode_refcounts(d) )
   5.568 +                continue;
   5.569 +
   5.570 +            // For light weight shadows, even when no shadow page exists,
   5.571 +            // we need to resync the refcounts to the new contents of the
   5.572 +            // guest page.
   5.573 +            // This only applies when we have writable page tables.
   5.574 +            //
   5.575 +            if ( (stype == PGT_l1_shadow) && !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
   5.576 +                continue;
   5.577 +            if ( (stype != PGT_l1_shadow) && !shadow_mode_write_all(d) )
   5.578 +                continue;
   5.579 +        }
   5.580  
   5.581          FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx",
   5.582                  stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn);
   5.583 @@ -2221,12 +2322,29 @@ static int resync_all(struct domain *d, 
   5.584          //
   5.585          guest    = map_domain_mem(entry->gmfn         << PAGE_SHIFT);
   5.586          snapshot = map_domain_mem(entry->snapshot_mfn << PAGE_SHIFT);
   5.587 -        shadow   = map_domain_mem(smfn                << PAGE_SHIFT);
   5.588 +
   5.589 +        if ( smfn )
   5.590 +            shadow = map_domain_mem(smfn << PAGE_SHIFT);
   5.591 +        else
   5.592 +            shadow = NULL;
   5.593 +
   5.594          unshadow = 0;
   5.595  
   5.596          switch ( stype ) {
   5.597          case PGT_l1_shadow:
   5.598          {
   5.599 +            l1_pgentry_t *guest1 = guest;
   5.600 +            l1_pgentry_t *shadow1 = shadow;
   5.601 +            l1_pgentry_t *snapshot1 = snapshot;
   5.602 +
   5.603 +            ASSERT(VM_ASSIST(d, VMASST_TYPE_writable_pagetables));
   5.604 +
   5.605 +            if ( !shadow_mode_refcounts(d) )
   5.606 +                revalidate_l1(d, guest1, snapshot1);
   5.607 +
   5.608 +            if ( !smfn )
   5.609 +                break;
   5.610 +
   5.611              u32 min_max_shadow = pfn_to_page(smfn)->tlbflush_timestamp;
   5.612              int min_shadow = SHADOW_MIN(min_max_shadow);
   5.613              int max_shadow = SHADOW_MAX(min_max_shadow);
   5.614 @@ -2236,10 +2354,6 @@ static int resync_all(struct domain *d, 
   5.615              int min_snapshot = SHADOW_MIN(min_max_snapshot);
   5.616              int max_snapshot = SHADOW_MAX(min_max_snapshot);
   5.617  
   5.618 -            l1_pgentry_t *guest1 = guest;
   5.619 -            l1_pgentry_t *shadow1 = shadow;
   5.620 -            l1_pgentry_t *snapshot1 = snapshot;
   5.621 -
   5.622              changed = 0;
   5.623  
   5.624              for ( i = min_shadow; i <= max_shadow; i++ )
   5.625 @@ -2270,6 +2384,9 @@ static int resync_all(struct domain *d, 
   5.626              l2_pgentry_t *shadow2 = shadow;
   5.627              l2_pgentry_t *snapshot2 = snapshot;
   5.628  
   5.629 +            ASSERT(shadow_mode_write_all(d));
   5.630 +            BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
   5.631 +
   5.632              changed = 0;
   5.633              for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   5.634              {
   5.635 @@ -2295,8 +2412,7 @@ static int resync_all(struct domain *d, 
   5.636                  //       Need a better solution long term.
   5.637                  if ( !(l2e_get_flags(new_pde) & _PAGE_PRESENT) &&
   5.638                       unlikely(l2e_get_value(new_pde) != 0) &&
   5.639 -                     !unshadow &&
   5.640 -                     (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
   5.641 +                     !unshadow && MFN_PINNED(smfn) )
   5.642                      unshadow = 1;
   5.643              }
   5.644              if ( max == -1 )
   5.645 @@ -2311,6 +2427,9 @@ static int resync_all(struct domain *d, 
   5.646              l2_pgentry_t *snapshot2 = snapshot;
   5.647              l1_pgentry_t *shadow2 = shadow;
   5.648              
   5.649 +            ASSERT(shadow_mode_write_all(d));
   5.650 +            BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented
   5.651 +
   5.652              changed = 0;
   5.653              for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   5.654              {
   5.655 @@ -2338,7 +2457,8 @@ static int resync_all(struct domain *d, 
   5.656              BUG();
   5.657          }
   5.658  
   5.659 -        unmap_domain_mem(shadow);
   5.660 +        if ( smfn )
   5.661 +            unmap_domain_mem(shadow);
   5.662          unmap_domain_mem(snapshot);
   5.663          unmap_domain_mem(guest);
   5.664  
   5.665 @@ -2351,7 +2471,7 @@ static int resync_all(struct domain *d, 
   5.666                  unsigned long hl2mfn;
   5.667  
   5.668                  if ( (hl2mfn = __shadow_status(d, entry->gpfn, PGT_hl2_shadow)) &&
   5.669 -                     (frame_table[hl2mfn].u.inuse.type_info & PGT_pinned) )
   5.670 +                     MFN_PINNED(hl2mfn) )
   5.671                      shadow_unpin(hl2mfn);
   5.672              }
   5.673          }
   5.674 @@ -2388,7 +2508,7 @@ void __shadow_sync_all(struct domain *d)
   5.675               !shadow_get_page_from_l1e(npte, d) )
   5.676              BUG();
   5.677          *ppte = npte;
   5.678 -        put_page_from_l1e(opte, d);
   5.679 +        shadow_put_page_from_l1e(opte, d);
   5.680  
   5.681          unmap_domain_mem(ppte);
   5.682      }
   5.683 @@ -2475,13 +2595,23 @@ int shadow_fault(unsigned long va, struc
   5.684      /* Write fault? */
   5.685      if ( regs->error_code & 2 )  
   5.686      {
   5.687 +        int allow_writes = 0;
   5.688 +
   5.689          if ( unlikely(!(l1e_get_flags(gpte) & _PAGE_RW)) )
   5.690          {
   5.691 -            /* Write fault on a read-only mapping. */
   5.692 -            SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", 
   5.693 -                     l1e_get_value(gpte));
   5.694 -            perfc_incrc(shadow_fault_bail_ro_mapping);
   5.695 -            goto fail;
   5.696 +            if ( shadow_mode_page_writable(d, l1e_get_pfn(gpte)) )
   5.697 +            {
   5.698 +                allow_writes = 1;
   5.699 +                l1e_add_flags(&gpte, _PAGE_RW);
   5.700 +            }
   5.701 +            else
   5.702 +            {
   5.703 +                /* Write fault on a read-only mapping. */
   5.704 +                SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", 
   5.705 +                         l1e_get_value(gpte));
   5.706 +                perfc_incrc(shadow_fault_bail_ro_mapping);
   5.707 +                goto fail;
   5.708 +            }
   5.709          }
   5.710  
   5.711          if ( !l1pte_write_fault(ed, &gpte, &spte, va) )
   5.712 @@ -2491,6 +2621,9 @@ int shadow_fault(unsigned long va, struc
   5.713              shadow_unlock(d);
   5.714              return 0;
   5.715          }
   5.716 +
   5.717 +        if ( allow_writes )
   5.718 +            l1e_remove_flags(&gpte, _PAGE_RW);
   5.719      }
   5.720      else
   5.721      {
   5.722 @@ -2506,22 +2639,23 @@ int shadow_fault(unsigned long va, struc
   5.723      /*
   5.724       * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
   5.725       */
   5.726 -
   5.727 -    /* XXX Watch out for read-only L2 entries! (not used in Linux). */
   5.728 -    if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
   5.729 -                                 &gpte, sizeof(gpte))) )
   5.730 +    if ( l1e_has_changed(&orig_gpte, &gpte, PAGE_FLAG_MASK) )
   5.731      {
   5.732 -        printk("shadow_fault() failed, crashing domain %d "
   5.733 -               "due to a read-only L2 page table (gpde=%lx), va=%lx\n",
   5.734 -               d->id, l2e_get_value(gpde), va);
   5.735 -        domain_crash_synchronous();
   5.736 +        /* XXX Watch out for read-only L2 entries! (not used in Linux). */
   5.737 +        if ( unlikely(__copy_to_user(&linear_pg_table[l1_linear_offset(va)],
   5.738 +                                     &gpte, sizeof(gpte))) )
   5.739 +        {
   5.740 +            printk("%s() failed, crashing domain %d "
   5.741 +                   "due to a read-only L2 page table (gpde=%lx), va=%lx\n",
   5.742 +                   __func__, d->id, l2e_get_value(gpde), va);
   5.743 +            domain_crash_synchronous();
   5.744 +        }
   5.745 +
   5.746 +        // if necessary, record the page table page as dirty
   5.747 +        if ( unlikely(shadow_mode_log_dirty(d)) )
   5.748 +            __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
   5.749      }
   5.750  
   5.751 -    // if necessary, record the page table page as dirty
   5.752 -    if ( unlikely(shadow_mode_log_dirty(d)) &&
   5.753 -         l1e_has_changed(&orig_gpte, &gpte, PAGE_FLAG_MASK))
   5.754 -        mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gpde)));
   5.755 -
   5.756      shadow_set_l1e(va, spte, 1);
   5.757  
   5.758      perfc_incrc(shadow_fault_fixed);
   5.759 @@ -2537,6 +2671,109 @@ int shadow_fault(unsigned long va, struc
   5.760      return 0;
   5.761  }
   5.762  
   5.763 +void shadow_l1_normal_pt_update(
   5.764 +    struct domain *d,
   5.765 +    unsigned long pa, l1_pgentry_t gpte,
   5.766 +    struct map_dom_mem_cache *cache)
   5.767 +{
   5.768 +    unsigned long sl1mfn;    
   5.769 +    l1_pgentry_t *spl1e, spte;
   5.770 +
   5.771 +    shadow_lock(d);
   5.772 +
   5.773 +    sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
   5.774 +    if ( sl1mfn )
   5.775 +    {
   5.776 +        SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%08lx",
   5.777 +                 (void *)pa, l1e_get_value(gpte));
   5.778 +        l1pte_propagate_from_guest(current->domain, gpte, &spte);
   5.779 +
   5.780 +        spl1e = map_domain_mem_with_cache(sl1mfn << PAGE_SHIFT, cache);
   5.781 +        spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
   5.782 +        unmap_domain_mem_with_cache(spl1e, cache);
   5.783 +    }
   5.784 +
   5.785 +    shadow_unlock(d);
   5.786 +}
   5.787 +
   5.788 +void shadow_l2_normal_pt_update(
   5.789 +    struct domain *d,
   5.790 +    unsigned long pa, l2_pgentry_t gpde,
   5.791 +    struct map_dom_mem_cache *cache)
   5.792 +{
   5.793 +    unsigned long sl2mfn;
   5.794 +    l2_pgentry_t *spl2e;
   5.795 +
   5.796 +    shadow_lock(d);
   5.797 +
   5.798 +    sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
   5.799 +    if ( sl2mfn )
   5.800 +    {
   5.801 +        SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%08lx",
   5.802 +                 (void *)pa, l2e_get_value(gpde));
   5.803 +        spl2e = map_domain_mem_with_cache(sl2mfn << PAGE_SHIFT, cache);
   5.804 +        validate_pde_change(d, gpde,
   5.805 +                            &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
   5.806 +        unmap_domain_mem_with_cache(spl2e, cache);
   5.807 +    }
   5.808 +
   5.809 +    shadow_unlock(d);
   5.810 +}
   5.811 +
   5.812 +#ifdef __x86_64__
   5.813 +void shadow_l3_normal_pt_update(
   5.814 +    struct domain *d,
   5.815 +    unsigned long pa, l3_pgentry_t gpde,
   5.816 +    struct map_dom_mem_cache *cache)
   5.817 +{
   5.818 +    BUG(); // not yet implemented
   5.819 +}
   5.820 +
   5.821 +void shadow_l4_normal_pt_update(
   5.822 +    struct domain *d,
   5.823 +    unsigned long pa, l4_pgentry_t gpde,
   5.824 +    struct map_dom_mem_cache *cache)
   5.825 +{
   5.826 +    BUG(); // not yet implemented
   5.827 +}
   5.828 +#endif
   5.829 +
   5.830 +int shadow_do_update_va_mapping(unsigned long va,
   5.831 +                                l1_pgentry_t val,
   5.832 +                                struct exec_domain *ed)
   5.833 +{
   5.834 +    struct domain *d = ed->domain;
   5.835 +    l1_pgentry_t spte;
   5.836 +    int rc = 0;
   5.837 +
   5.838 +    shadow_lock(d);
   5.839 +
   5.840 +    //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_value(val));
   5.841 +        
   5.842 +    // This is actually overkill - we don't need to sync the L1 itself,
   5.843 +    // just everything involved in getting to this L1 (i.e. we need
   5.844 +    // linear_pg_table[l1_linear_offset(va)] to be in sync)...
   5.845 +    //
   5.846 +    __shadow_sync_va(ed, va);
   5.847 +
   5.848 +    l1pte_propagate_from_guest(d, val, &spte);
   5.849 +    shadow_set_l1e(va, spte, 0);
   5.850 +
   5.851 +    /*
   5.852 +     * If we're in log-dirty mode then we need to note that we've updated
   5.853 +     * the PTE in the PT-holding page. We need the machine frame number
   5.854 +     * for this.
   5.855 +     */
   5.856 +    if ( shadow_mode_log_dirty(d) )
   5.857 +        __mark_dirty(d, va_to_l1mfn(ed, va));
   5.858 +
   5.859 +// out:
   5.860 +    shadow_unlock(d);
   5.861 +
   5.862 +    return rc;
   5.863 +}
   5.864 +
   5.865 +
   5.866  /*
   5.867   * What lives where in the 32-bit address space in the various shadow modes,
   5.868   * and what it uses to get/maintain that mapping.
   5.869 @@ -2566,7 +2803,7 @@ int shadow_fault(unsigned long va, struc
   5.870  void __update_pagetables(struct exec_domain *ed)
   5.871  {
   5.872      struct domain *d = ed->domain;
   5.873 -    unsigned long gmfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
   5.874 +    unsigned long gmfn = pagetable_get_pfn(ed->arch.guest_table);
   5.875      unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
   5.876      unsigned long smfn, hl2mfn, old_smfn;
   5.877  
   5.878 @@ -2595,7 +2832,7 @@ void __update_pagetables(struct exec_dom
   5.879          smfn = shadow_l2_table(d, gpfn, gmfn);
   5.880      if ( !get_shadow_ref(smfn) )
   5.881          BUG();
   5.882 -    old_smfn = pagetable_val(ed->arch.shadow_table) >> PAGE_SHIFT;
   5.883 +    old_smfn = pagetable_get_pfn(ed->arch.shadow_table);
   5.884      ed->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
   5.885      if ( old_smfn )
   5.886          put_shadow_ref(old_smfn);
   5.887 @@ -2665,6 +2902,47 @@ void __update_pagetables(struct exec_dom
   5.888  
   5.889  #if SHADOW_DEBUG
   5.890  
   5.891 +// The following is entirely for _check_pagetable()'s benefit.
   5.892 +// _check_pagetable() wants to know whether a given entry in a
   5.893 +// shadow page table is supposed to be the shadow of the guest's
   5.894 +// current entry, or the shadow of the entry held in the snapshot
   5.895 +// taken above.
   5.896 +//
   5.897 +// Here, we mark all currently existing entries as reflecting
   5.898 +// the snapshot, above.  All other places in xen that update
   5.899 +// the shadow will keep the shadow in sync with the guest's
   5.900 +// entries (via l1pte_propagate_from_guest and friends), which clear
   5.901 +// the SHADOW_REFLECTS_SNAPSHOT bit.
   5.902 +//
   5.903 +static void
   5.904 +mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn)
   5.905 +{
   5.906 +    unsigned long smfn;
   5.907 +    l1_pgentry_t *l1e;
   5.908 +    l2_pgentry_t *l2e;
   5.909 +    unsigned i;
   5.910 +
   5.911 +    if ( (smfn = __shadow_status(d, gpfn, PGT_l1_shadow)) )
   5.912 +    {
   5.913 +        l1e = map_domain_mem(smfn << PAGE_SHIFT);
   5.914 +        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   5.915 +            if ( is_guest_l1_slot(i) &&
   5.916 +                 (l1e_get_flags(l1e[i]) & _PAGE_PRESENT) )
   5.917 +                l1e_add_flags(&l1e[i], SHADOW_REFLECTS_SNAPSHOT);
   5.918 +        unmap_domain_mem(l1e);
   5.919 +    }
   5.920 +
   5.921 +    if ( (smfn = __shadow_status(d, gpfn, PGT_l2_shadow)) )
   5.922 +    {
   5.923 +        l2e = map_domain_mem(smfn << PAGE_SHIFT);
   5.924 +        for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   5.925 +            if ( is_guest_l2_slot(i) &&
   5.926 +                 (l2e_get_flags(l2e[i]) & _PAGE_PRESENT) )
   5.927 +                l2e_add_flags(&l2e[i], SHADOW_REFLECTS_SNAPSHOT);
   5.928 +        unmap_domain_mem(l2e);
   5.929 +    }
   5.930 +}
   5.931 +
   5.932  // BUG: these are not SMP safe...
   5.933  static int sh_l2_present;
   5.934  static int sh_l1_present;
   5.935 @@ -2687,96 +2965,109 @@ int shadow_status_noswap;
   5.936  
   5.937  #define FAIL(_f, _a...)                                                      \
   5.938      do {                                                                     \
   5.939 -        printk("XXX %s-FAIL (%d,%d,%d)" _f " at %s(%d)\n",                   \
   5.940 +        printk("XXX %s-FAIL (%d,%d,%d) " _f " at %s(%d)\n",                  \
   5.941                 sh_check_name, level, l2_idx, l1_idx, ## _a,                  \
   5.942                 __FILE__, __LINE__);                                          \
   5.943 -        printk("g=%lx s=%lx &g=%p &s=%p"                                     \
   5.944 -               " v2m(&g)=%08lx v2m(&s)=%08lx ea=%08x\n",                     \
   5.945 -               l1e_get_value(gpte), l1e_get_value(spte), pgpte, pspte,       \
   5.946 -               v2m(ed, pgpte), v2m(ed, pspte),                               \
   5.947 +        printk("guest_pte=%lx eff_guest_pte=%lx shadow_pte=%lx "             \
   5.948 +               "snapshot_pte=%lx &guest=%p &shadow=%p &snap=%p "             \
   5.949 +               "v2m(&guest)=%p v2m(&shadow)=%p v2m(&snap)=%p ea=%08x\n",     \
   5.950 +               l1e_get_value(guest_pte), l1e_get_value(eff_guest_pte),       \
   5.951 +               l1e_get_value(shadow_pte), l1e_get_value(snapshot_pte),       \
   5.952 +               p_guest_pte, p_shadow_pte, p_snapshot_pte,                    \
   5.953 +               (void *)v2m(ed, p_guest_pte), (void *)v2m(ed, p_shadow_pte),  \
   5.954 +               (void *)v2m(ed, p_snapshot_pte),                              \
   5.955                 (l2_idx << L2_PAGETABLE_SHIFT) |                              \
   5.956                 (l1_idx << L1_PAGETABLE_SHIFT));                              \
   5.957          errors++;                                                            \
   5.958      } while ( 0 )
   5.959  
   5.960  static int check_pte(
   5.961 -    struct exec_domain *ed, l1_pgentry_t *pgpte, l1_pgentry_t *pspte, 
   5.962 -    int level, int l2_idx, int l1_idx, int oos_ptes)
   5.963 +    struct exec_domain *ed,
   5.964 +    l1_pgentry_t *p_guest_pte,
   5.965 +    l1_pgentry_t *p_shadow_pte,
   5.966 +    l1_pgentry_t *p_snapshot_pte,
   5.967 +    int level, int l2_idx, int l1_idx)
   5.968  {
   5.969      struct domain *d = ed->domain;
   5.970 -    l1_pgentry_t gpte = *pgpte;
   5.971 -    l1_pgentry_t spte = *pspte;
   5.972 -    unsigned long mask, gpfn, smfn, gmfn;
   5.973 -    int errors = 0;
   5.974 +    l1_pgentry_t guest_pte = *p_guest_pte;
   5.975 +    l1_pgentry_t shadow_pte = *p_shadow_pte;
   5.976 +    l1_pgentry_t snapshot_pte = p_snapshot_pte ? *p_snapshot_pte : l1e_empty();
   5.977 +    l1_pgentry_t eff_guest_pte;
   5.978 +    unsigned long mask, eff_guest_pfn, eff_guest_mfn, shadow_mfn;
   5.979 +    int errors = 0, guest_writable;
   5.980      int page_table_page;
   5.981  
   5.982 -    if ( (l1e_get_value(spte) == 0) ||
   5.983 -         (l1e_get_value(spte) == 0xdeadface) ||
   5.984 -         (l1e_get_value(spte) == 0x00000E00) )
   5.985 +    if ( (l1e_get_value(shadow_pte) == 0) ||
   5.986 +         (l1e_get_value(shadow_pte) == 0xdeadface) ||
   5.987 +         (l1e_get_value(shadow_pte) == 0x00000E00) )
   5.988          return errors;  /* always safe */
   5.989  
   5.990 -    if ( !(l1e_get_flags(spte) & _PAGE_PRESENT) )
   5.991 -        FAIL("Non zero not present spte");
   5.992 +    if ( !(l1e_get_flags(shadow_pte) & _PAGE_PRESENT) )
   5.993 +        FAIL("Non zero not present shadow_pte");
   5.994  
   5.995      if ( level == 2 ) sh_l2_present++;
   5.996      if ( level == 1 ) sh_l1_present++;
   5.997  
   5.998 -    if ( !(l1e_get_flags(gpte) & _PAGE_PRESENT) )
   5.999 +    if ( (l1e_get_flags(shadow_pte) & SHADOW_REFLECTS_SNAPSHOT) && p_snapshot_pte )
  5.1000 +        eff_guest_pte = snapshot_pte;
  5.1001 +    else
  5.1002 +        eff_guest_pte = guest_pte;
  5.1003 +
  5.1004 +    if ( !(l1e_get_flags(eff_guest_pte) & _PAGE_PRESENT) )
  5.1005          FAIL("Guest not present yet shadow is");
  5.1006  
  5.1007 -    mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|PAGE_MASK);
  5.1008 -
  5.1009 -    if ( (l1e_get_value(spte) & mask) != (l1e_get_value(gpte) & mask) )
  5.1010 +    mask = ~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|_PAGE_AVAIL|PAGE_MASK);
  5.1011 +
  5.1012 +    if ( ((l1e_get_value(shadow_pte) & mask) != (l1e_get_value(eff_guest_pte) & mask)) )
  5.1013          FAIL("Corrupt?");
  5.1014  
  5.1015      if ( (level == 1) &&
  5.1016 -         (l1e_get_flags(spte) & _PAGE_DIRTY) &&
  5.1017 -         !(l1e_get_flags(gpte) & _PAGE_DIRTY) && !oos_ptes )
  5.1018 +         (l1e_get_flags(shadow_pte) & _PAGE_DIRTY) &&
  5.1019 +         !(l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY) )
  5.1020          FAIL("Dirty coherence");
  5.1021  
  5.1022 -    if ( (l1e_get_flags(spte) & _PAGE_ACCESSED) &&
  5.1023 -         !(l1e_get_flags(gpte) & _PAGE_ACCESSED) && !oos_ptes )
  5.1024 +    if ( (l1e_get_flags(shadow_pte) & _PAGE_ACCESSED) &&
  5.1025 +         !(l1e_get_flags(eff_guest_pte) & _PAGE_ACCESSED) )
  5.1026          FAIL("Accessed coherence");
  5.1027  
  5.1028 -    if ( l1e_get_flags(spte) & _PAGE_GLOBAL )
  5.1029 +    if ( l1e_get_flags(shadow_pte) & _PAGE_GLOBAL )
  5.1030          FAIL("global bit set in shadow");
  5.1031  
  5.1032 -    smfn = l1e_get_pfn(spte);
  5.1033 -    gpfn = l1e_get_pfn(gpte);
  5.1034 -    gmfn = __gpfn_to_mfn(d, gpfn);
  5.1035 -
  5.1036 -    if ( !VALID_MFN(gmfn) )
  5.1037 -        FAIL("%s: invalid gpfn=%lx gpte=%lx\n", __func__, gpfn,
  5.1038 -             l1e_get_value(gpte));
  5.1039 -
  5.1040 -    page_table_page = mfn_is_page_table(gmfn);
  5.1041 -
  5.1042 -    if ( (l1e_get_flags(spte) & _PAGE_RW ) &&
  5.1043 -         !(l1e_get_flags(gpte) & _PAGE_RW) && !oos_ptes )
  5.1044 +    eff_guest_pfn = l1e_get_pfn(eff_guest_pte);
  5.1045 +    eff_guest_mfn = __gpfn_to_mfn(d, eff_guest_pfn);
  5.1046 +    shadow_mfn = l1e_get_pfn(shadow_pte);
  5.1047 +
  5.1048 +    if ( !VALID_MFN(eff_guest_mfn) && !shadow_mode_refcounts(d) )
  5.1049 +        FAIL("%s: invalid eff_guest_pfn=%lx eff_guest_pte=%lx\n", __func__, eff_guest_pfn,
  5.1050 +             l1e_get_value(eff_guest_pte));
  5.1051 +
  5.1052 +    page_table_page = mfn_is_page_table(eff_guest_mfn);
  5.1053 +
  5.1054 +    guest_writable =
  5.1055 +        (l1e_get_flags(eff_guest_pte) & _PAGE_RW) ||
  5.1056 +        (VM_ASSIST(d, VMASST_TYPE_writable_pagetables) && (level == 1) && mfn_out_of_sync(eff_guest_mfn));
  5.1057 +
  5.1058 +    if ( (l1e_get_flags(shadow_pte) & _PAGE_RW ) && !guest_writable )
  5.1059      {
  5.1060 -        printk("gpfn=%lx gmfn=%lx smfn=%lx t=0x%08x page_table_page=%d "
  5.1061 -               "oos_ptes=%d\n",
  5.1062 -               gpfn, gmfn, smfn,
  5.1063 -               frame_table[gmfn].u.inuse.type_info,
  5.1064 -               page_table_page, oos_ptes);
  5.1065 +        printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
  5.1066 +               eff_guest_pfn, eff_guest_mfn, shadow_mfn,
  5.1067 +               frame_table[eff_guest_mfn].u.inuse.type_info,
  5.1068 +               page_table_page);
  5.1069          FAIL("RW coherence");
  5.1070      }
  5.1071  
  5.1072      if ( (level == 1) &&
  5.1073 -         (l1e_get_flags(spte) & _PAGE_RW ) &&
  5.1074 -         !((l1e_get_flags(gpte) & _PAGE_RW) &&
  5.1075 -           (l1e_get_flags(gpte) & _PAGE_DIRTY)) &&
  5.1076 -         !oos_ptes )
  5.1077 +         (l1e_get_flags(shadow_pte) & _PAGE_RW ) &&
  5.1078 +         !(guest_writable && (l1e_get_flags(eff_guest_pte) & _PAGE_DIRTY)) )
  5.1079      {
  5.1080 -        printk("gpfn=%lx gmfn=%lx smfn=%lx t=0x%08x page_table_page=%d "
  5.1081 -               "oos_ptes=%d\n",
  5.1082 -               gpfn, gmfn, smfn,
  5.1083 -               frame_table[gmfn].u.inuse.type_info,
  5.1084 -               page_table_page, oos_ptes);
  5.1085 +        printk("eff_guest_pfn=%lx eff_guest_mfn=%lx shadow_mfn=%lx t=0x%08x page_table_page=%d\n",
  5.1086 +               eff_guest_pfn, eff_guest_mfn, shadow_mfn,
  5.1087 +               frame_table[eff_guest_mfn].u.inuse.type_info,
  5.1088 +               page_table_page);
  5.1089          FAIL("RW2 coherence");
  5.1090      }
  5.1091   
  5.1092 -    if ( gmfn == smfn )
  5.1093 +    if ( eff_guest_mfn == shadow_mfn )
  5.1094      {
  5.1095          if ( level > 1 )
  5.1096              FAIL("Linear map ???");    /* XXX this will fail on BSD */
  5.1097 @@ -2788,9 +3079,9 @@ static int check_pte(
  5.1098  
  5.1099          if ( level == 2 )
  5.1100          {
  5.1101 -            if ( __shadow_status(d, gpfn, PGT_l1_shadow) != smfn )
  5.1102 -                FAIL("smfn problem gpfn=%lx smfn=%lx", gpfn,
  5.1103 -                     __shadow_status(d, gpfn, PGT_l1_shadow));
  5.1104 +            if ( __shadow_status(d, eff_guest_pfn, PGT_l1_shadow) != shadow_mfn )
  5.1105 +                FAIL("shadow_mfn problem eff_guest_pfn=%lx shadow_mfn=%lx", eff_guest_pfn,
  5.1106 +                     __shadow_status(d, eff_guest_pfn, PGT_l1_shadow));
  5.1107          }
  5.1108          else
  5.1109              BUG(); // XXX -- not handled yet.
  5.1110 @@ -2807,24 +3098,29 @@ static int check_l1_table(
  5.1111  {
  5.1112      struct domain *d = ed->domain;
  5.1113      int i;
  5.1114 -    l1_pgentry_t *gpl1e, *spl1e;
  5.1115 -    int errors = 0, oos_ptes = 0;
  5.1116 +    unsigned long snapshot_mfn;
  5.1117 +    l1_pgentry_t *p_guest, *p_shadow, *p_snapshot = NULL;
  5.1118 +    int errors = 0;
  5.1119  
  5.1120      if ( page_out_of_sync(pfn_to_page(gmfn)) )
  5.1121      {
  5.1122 -        gmfn = __shadow_status(d, gpfn, PGT_snapshot);
  5.1123 -        oos_ptes = 1;
  5.1124 -        ASSERT(gmfn);
  5.1125 +        snapshot_mfn = __shadow_status(d, gpfn, PGT_snapshot);
  5.1126 +        ASSERT(snapshot_mfn);
  5.1127 +        p_snapshot = map_domain_mem(snapshot_mfn << PAGE_SHIFT);
  5.1128      }
  5.1129  
  5.1130 -    gpl1e = map_domain_mem(gmfn << PAGE_SHIFT);
  5.1131 -    spl1e = map_domain_mem(smfn << PAGE_SHIFT);
  5.1132 +    p_guest  = map_domain_mem(gmfn << PAGE_SHIFT);
  5.1133 +    p_shadow = map_domain_mem(smfn << PAGE_SHIFT);
  5.1134  
  5.1135      for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
  5.1136 -        errors += check_pte(ed, &gpl1e[i], &spl1e[i], 1, l2_idx, i, oos_ptes);
  5.1137 +        errors += check_pte(ed, p_guest+i, p_shadow+i,
  5.1138 +                            p_snapshot ? p_snapshot+i : NULL,
  5.1139 +                            1, l2_idx, i);
  5.1140   
  5.1141 -    unmap_domain_mem(spl1e);
  5.1142 -    unmap_domain_mem(gpl1e);
  5.1143 +    unmap_domain_mem(p_shadow);
  5.1144 +    unmap_domain_mem(p_guest);
  5.1145 +    if ( p_snapshot )
  5.1146 +        unmap_domain_mem(p_snapshot);
  5.1147  
  5.1148      return errors;
  5.1149  }
  5.1150 @@ -2909,7 +3205,8 @@ int check_l2_table(
  5.1151          errors += check_pte(ed,
  5.1152                              (l1_pgentry_t*)(&gpl2e[i]), /* Hmm, dirty ... */
  5.1153                              (l1_pgentry_t*)(&spl2e[i]),
  5.1154 -                            2, i, 0, 0);
  5.1155 +                            NULL,
  5.1156 +                            2, i, 0);
  5.1157  
  5.1158      unmap_domain_mem(spl2e);
  5.1159      unmap_domain_mem(gpl2e);
     6.1 --- a/xen/arch/x86/traps.c	Sun May 08 12:06:10 2005 +0000
     6.2 +++ b/xen/arch/x86/traps.c	Mon May 09 14:34:59 2005 +0000
     6.3 @@ -271,7 +271,8 @@ asmlinkage int do_page_fault(struct cpu_
     6.4  
     6.5      perfc_incrc(page_faults);
     6.6  
     6.7 -    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
     6.8 +    if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
     6.9 +                !shadow_mode_enabled(d)) )
    6.10      {
    6.11          LOCK_BIGLOCK(d);
    6.12          if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
    6.13 @@ -287,8 +288,6 @@ asmlinkage int do_page_fault(struct cpu_
    6.14               ((regs->error_code & 3) == 3) && /* write-protection fault */
    6.15               ptwr_do_page_fault(d, addr) )
    6.16          {
    6.17 -            if ( unlikely(shadow_mode_enabled(d)) )
    6.18 -                (void)shadow_fault(addr, regs);
    6.19              UNLOCK_BIGLOCK(d);
    6.20              return EXCRET_fault_fixed;
    6.21          }
     7.1 --- a/xen/arch/x86/vmx.c	Sun May 08 12:06:10 2005 +0000
     7.2 +++ b/xen/arch/x86/vmx.c	Mon May 09 14:34:59 2005 +0000
     7.3 @@ -672,7 +672,7 @@ static int vmx_set_cr0(unsigned long val
     7.4                          d->arch.arch_vmx.cpu_cr3);
     7.5              domain_crash_synchronous(); /* need to take a clean path */
     7.6          }
     7.7 -        old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
     7.8 +        old_base_mfn = pagetable_get_pfn(d->arch.guest_table);
     7.9          if (old_base_mfn)
    7.10              put_page(pfn_to_page(old_base_mfn));
    7.11  
    7.12 @@ -798,7 +798,7 @@ static int mov_to_cr(int gp, int cr, str
    7.13                          "Invalid CR3 value=%lx", value);
    7.14                  domain_crash_synchronous(); /* need to take a clean path */
    7.15              }
    7.16 -            old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
    7.17 +            old_base_mfn = pagetable_get_pfn(d->arch.guest_table);
    7.18              d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
    7.19              if (old_base_mfn)
    7.20                  put_page(pfn_to_page(old_base_mfn));
     8.1 --- a/xen/include/asm-x86/mm.h	Sun May 08 12:06:10 2005 +0000
     8.2 +++ b/xen/include/asm-x86/mm.h	Mon May 09 14:34:59 2005 +0000
     8.3 @@ -150,7 +150,7 @@ extern void invalidate_shadow_ldt(struct
     8.4  extern int shadow_remove_all_write_access(
     8.5      struct domain *d, unsigned long gpfn, unsigned long gmfn);
     8.6  extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
     8.7 -extern int _shadow_mode_enabled(struct domain *d);
     8.8 +extern int _shadow_mode_refcounts(struct domain *d);
     8.9  
    8.10  static inline void put_page(struct pfn_info *page)
    8.11  {
    8.12 @@ -182,7 +182,7 @@ static inline int get_page(struct pfn_in
    8.13               unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
    8.14               unlikely(d != _domain) )                /* Wrong owner? */
    8.15          {
    8.16 -            if ( !_shadow_mode_enabled(domain) )
    8.17 +            if ( !_shadow_mode_refcounts(domain) )
    8.18                  DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%08x\n",
    8.19                          page_to_pfn(page), domain, unpickle_domptr(d),
    8.20                          x, page->u.inuse.type_info);
    8.21 @@ -315,14 +315,21 @@ int  ptwr_init(struct domain *);
    8.22  void ptwr_destroy(struct domain *);
    8.23  void ptwr_flush(struct domain *, const int);
    8.24  int  ptwr_do_page_fault(struct domain *, unsigned long);
    8.25 +int  revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
    8.26  
    8.27  #define cleanup_writable_pagetable(_d)                                      \
    8.28      do {                                                                    \
    8.29 -        if ( unlikely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) { \
    8.30 -            if ( (_d)->arch.ptwr[PTWR_PT_ACTIVE].l1va )                     \
    8.31 -                ptwr_flush((_d), PTWR_PT_ACTIVE);                           \
    8.32 -            if ( (_d)->arch.ptwr[PTWR_PT_INACTIVE].l1va )                   \
    8.33 -                ptwr_flush((_d), PTWR_PT_INACTIVE);                         \
    8.34 +        if ( likely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) )     \
    8.35 +        {                                                                   \
    8.36 +            if ( likely(!shadow_mode_enabled(_d)) )                         \
    8.37 +            {                                                               \
    8.38 +                if ( (_d)->arch.ptwr[PTWR_PT_ACTIVE].l1va )                 \
    8.39 +                    ptwr_flush((_d), PTWR_PT_ACTIVE);                       \
    8.40 +                if ( (_d)->arch.ptwr[PTWR_PT_INACTIVE].l1va )               \
    8.41 +                    ptwr_flush((_d), PTWR_PT_INACTIVE);                     \
    8.42 +            }                                                               \
    8.43 +            else                                                            \
    8.44 +                shadow_sync_all(_d);                                        \
    8.45          }                                                                   \
    8.46      } while ( 0 )
    8.47  
    8.48 @@ -330,9 +337,9 @@ int audit_adjust_pgtables(struct domain 
    8.49  
    8.50  #ifndef NDEBUG
    8.51  
    8.52 -#define AUDIT_ALREADY_LOCKED ( 1u << 0 )
    8.53 -#define AUDIT_ERRORS_OK      ( 1u << 1 )
    8.54 -#define AUDIT_QUIET          ( 1u << 2 )
    8.55 +#define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
    8.56 +#define AUDIT_ERRORS_OK             ( 1u << 1 )
    8.57 +#define AUDIT_QUIET                 ( 1u << 2 )
    8.58  
    8.59  void _audit_domain(struct domain *d, int flags);
    8.60  #define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
     9.1 --- a/xen/include/asm-x86/page.h	Sun May 08 12:06:10 2005 +0000
     9.2 +++ b/xen/include/asm-x86/page.h	Mon May 09 14:34:59 2005 +0000
     9.3 @@ -23,6 +23,7 @@
     9.4  #ifndef __ASSEMBLY__
     9.5  typedef struct { unsigned long pt_lo; } pagetable_t;
     9.6  #define pagetable_val(_x)   ((_x).pt_lo)
     9.7 +#define pagetable_get_pfn(_x) ((_x).pt_lo >> PAGE_SHIFT)
     9.8  #define mk_pagetable(_x)    ( (pagetable_t) { (_x) } )
     9.9  #endif
    9.10  
    9.11 @@ -103,6 +104,7 @@ extern void paging_init(void);
    9.12  #define _PAGE_PAT      0x080UL
    9.13  #define _PAGE_PSE      0x080UL
    9.14  #define _PAGE_GLOBAL   0x100UL
    9.15 +#define _PAGE_AVAIL    0xe00UL
    9.16  
    9.17  #define __PAGE_HYPERVISOR \
    9.18      (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
    10.1 --- a/xen/include/asm-x86/shadow.h	Sun May 08 12:06:10 2005 +0000
    10.2 +++ b/xen/include/asm-x86/shadow.h	Mon May 09 14:34:59 2005 +0000
    10.3 @@ -33,11 +33,17 @@
    10.4  /* Shadow PT operation mode : shadow-mode variable in arch_domain. */
    10.5  
    10.6  #define SHM_enable    (1<<0) /* we're in one of the shadow modes */
    10.7 -#define SHM_log_dirty (1<<1) /* enable log dirty mode */
    10.8 -#define SHM_translate (1<<2) /* do p2m tranaltion on guest tables */
    10.9 -#define SHM_external  (1<<3) /* external page table, not used by Xen */
   10.10 +#define SHM_refcounts (1<<1) /* refcounts based on shadow tables instead of
   10.11 +                                guest tables */
   10.12 +#define SHM_write_all (1<<2) /* allow write access to all guest pt pages,
   10.13 +                                regardless of pte write permissions */
   10.14 +#define SHM_log_dirty (1<<3) /* enable log dirty mode */
   10.15 +#define SHM_translate (1<<4) /* do p2m tranaltion on guest tables */
   10.16 +#define SHM_external  (1<<5) /* external page table, not used by Xen */
   10.17  
   10.18  #define shadow_mode_enabled(_d)   ((_d)->arch.shadow_mode)
   10.19 +#define shadow_mode_refcounts(_d) ((_d)->arch.shadow_mode & SHM_refcounts)
   10.20 +#define shadow_mode_write_all(_d) ((_d)->arch.shadow_mode & SHM_write_all)
   10.21  #define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
   10.22  #define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
   10.23  #define shadow_mode_external(_d)  ((_d)->arch.shadow_mode & SHM_external)
   10.24 @@ -72,7 +78,29 @@ extern void free_monitor_pagetable(struc
   10.25  extern void __shadow_sync_all(struct domain *d);
   10.26  extern int __shadow_out_of_sync(struct exec_domain *ed, unsigned long va);
   10.27  extern int set_p2m_entry(
   10.28 -    struct domain *d, unsigned long pfn, unsigned long mfn);
   10.29 +    struct domain *d, unsigned long pfn, unsigned long mfn,
   10.30 +    struct map_dom_mem_cache *l2cache,
   10.31 +    struct map_dom_mem_cache *l1cache);
   10.32 +extern void remove_shadow(struct domain *d, unsigned long gpfn, u32 stype);
   10.33 +
   10.34 +extern void shadow_l1_normal_pt_update(struct domain *d,
   10.35 +                                       unsigned long pa, l1_pgentry_t l1e,
   10.36 +                                       struct map_dom_mem_cache *cache);
   10.37 +extern void shadow_l2_normal_pt_update(struct domain *d,
   10.38 +                                       unsigned long pa, l2_pgentry_t l2e,
   10.39 +                                       struct map_dom_mem_cache *cache);
   10.40 +#ifdef __x86_64__
   10.41 +extern void shadow_l3_normal_pt_update(struct domain *d,
   10.42 +                                       unsigned long pa, l3_pgentry_t l3e,
   10.43 +                                       struct map_dom_mem_cache *cache);
   10.44 +extern void shadow_l4_normal_pt_update(struct domain *d,
   10.45 +                                       unsigned long pa, l4_pgentry_t l4e,
   10.46 +                                       struct map_dom_mem_cache *cache);
   10.47 +#endif
   10.48 +extern int shadow_do_update_va_mapping(unsigned long va,
   10.49 +                                       l1_pgentry_t val,
   10.50 +                                       struct exec_domain *ed);
   10.51 +
   10.52  
   10.53  static inline unsigned long __shadow_status(
   10.54      struct domain *d, unsigned long gpfn, unsigned long stype);
   10.55 @@ -82,7 +110,13 @@ extern void vmx_shadow_clear_state(struc
   10.56  
   10.57  static inline int page_is_page_table(struct pfn_info *page)
   10.58  {
   10.59 -    return page->count_info & PGC_page_table;
   10.60 +    struct domain *owner = page_get_owner(page);
   10.61 +
   10.62 +    if ( owner && shadow_mode_refcounts(owner) )
   10.63 +        return page->count_info & PGC_page_table;
   10.64 +
   10.65 +    u32 type_info = page->u.inuse.type_info & PGT_type_mask;
   10.66 +    return type_info && (type_info <= PGT_l4_page_table);
   10.67  }
   10.68  
   10.69  static inline int mfn_is_page_table(unsigned long mfn)
   10.70 @@ -90,7 +124,7 @@ static inline int mfn_is_page_table(unsi
   10.71      if ( !pfn_valid(mfn) )
   10.72          return 0;
   10.73  
   10.74 -    return frame_table[mfn].count_info & PGC_page_table;
   10.75 +    return page_is_page_table(pfn_to_page(mfn));
   10.76  }
   10.77  
   10.78  static inline int page_out_of_sync(struct pfn_info *page)
   10.79 @@ -103,7 +137,7 @@ static inline int mfn_out_of_sync(unsign
   10.80      if ( !pfn_valid(mfn) )
   10.81          return 0;
   10.82  
   10.83 -    return frame_table[mfn].count_info & PGC_out_of_sync;
   10.84 +    return page_out_of_sync(pfn_to_page(mfn));
   10.85  }
   10.86  
   10.87  
   10.88 @@ -191,10 +225,12 @@ static inline void shadow_mode_disable(s
   10.89        : (mfn) )
   10.90  
   10.91  #define __gpfn_to_mfn(_d, gpfn)                        \
   10.92 -    ( (shadow_mode_translate(_d))                      \
   10.93 -      ? ({ ASSERT(current->domain == (_d));            \
   10.94 -           phys_to_machine_mapping(gpfn); })           \
   10.95 -      : (gpfn) )
   10.96 +    ({                                                 \
   10.97 +        ASSERT(current->domain == (_d));               \
   10.98 +        (shadow_mode_translate(_d))                    \
   10.99 +        ? phys_to_machine_mapping(gpfn)                \
  10.100 +        : (gpfn);                                      \
  10.101 +    })
  10.102  
  10.103  #define __gpfn_to_mfn_foreign(_d, gpfn)                \
  10.104      ( (shadow_mode_translate(_d))                      \
  10.105 @@ -237,6 +273,8 @@ struct out_of_sync_entry {
  10.106  
  10.107  #if SHADOW_DEBUG
  10.108  extern int shadow_status_noswap;
  10.109 +#define _SHADOW_REFLECTS_SNAPSHOT ( 9)
  10.110 +#define SHADOW_REFLECTS_SNAPSHOT  (1u << _SHADOW_REFLECTS_SNAPSHOT)
  10.111  #endif
  10.112  
  10.113  #ifdef VERBOSE
  10.114 @@ -292,15 +330,18 @@ shadow_get_page_from_l1e(l1_pgentry_t l1
  10.115  
  10.116      ASSERT(l1e_get_flags(l1e) & _PAGE_PRESENT);
  10.117  
  10.118 +    if ( !shadow_mode_refcounts(d) )
  10.119 +        return 1;
  10.120 +
  10.121      nl1e = l1e;
  10.122      l1e_remove_flags(&nl1e, _PAGE_GLOBAL);
  10.123      res = get_page_from_l1e(nl1e, d);
  10.124  
  10.125      if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
  10.126 -         !(l1e_get_flags(l1e) & L1_DISALLOW_MASK) &&
  10.127 -         (mfn = l1e_get_pfn(l1e)) &&
  10.128 +         !(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) &&
  10.129 +         (mfn = l1e_get_pfn(nl1e)) &&
  10.130           pfn_valid(mfn) &&
  10.131 -         (owner = page_get_owner(pfn_to_page(l1e_get_pfn(l1e)))) &&
  10.132 +         (owner = page_get_owner(pfn_to_page(mfn))) &&
  10.133           (d != owner) )
  10.134      {
  10.135          res = get_page_from_l1e(nl1e, owner);
  10.136 @@ -319,6 +360,103 @@ shadow_get_page_from_l1e(l1_pgentry_t l1
  10.137      return res;
  10.138  }
  10.139  
  10.140 +static inline void
  10.141 +shadow_put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
  10.142 +{
  10.143 +    if ( !shadow_mode_refcounts(d) )
  10.144 +        return;
  10.145 +
  10.146 +    put_page_from_l1e(l1e, d);
  10.147 +}
  10.148 +
  10.149 +static inline void
  10.150 +shadow_put_page_type(struct domain *d, struct pfn_info *page)
  10.151 +{
  10.152 +    if ( !shadow_mode_refcounts(d) )
  10.153 +        return;
  10.154 +
  10.155 +    put_page_type(page);
  10.156 +}
  10.157 +
  10.158 +static inline int shadow_get_page(struct domain *d,
  10.159 +                                  struct pfn_info *page,
  10.160 +                                  struct domain *owner)
  10.161 +{
  10.162 +    if ( !shadow_mode_refcounts(d) )
  10.163 +        return 1;
  10.164 +    return get_page(page, owner);
  10.165 +}
  10.166 +
  10.167 +static inline void shadow_put_page(struct domain *d,
  10.168 +                                   struct pfn_info *page)
  10.169 +{
  10.170 +    if ( !shadow_mode_refcounts(d) )
  10.171 +        return;
  10.172 +    put_page(page);
  10.173 +}
  10.174 +
  10.175 +/************************************************************************/
  10.176 +
  10.177 +static inline int __mark_dirty(struct domain *d, unsigned int mfn)
  10.178 +{
  10.179 +    unsigned long pfn;
  10.180 +    int           rc = 0;
  10.181 +
  10.182 +    ASSERT(spin_is_locked(&d->arch.shadow_lock));
  10.183 +    ASSERT(d->arch.shadow_dirty_bitmap != NULL);
  10.184 +
  10.185 +    if ( !VALID_MFN(mfn) )
  10.186 +        return rc;
  10.187 +
  10.188 +    // N.B. This doesn't use __mfn_to_gpfn().
  10.189 +    // This wants the nice compact set of PFNs from 0..domain's max,
  10.190 +    // which __mfn_to_gpfn() only returns for translated domains.
  10.191 +    //
  10.192 +    pfn = machine_to_phys_mapping[mfn];
  10.193 +
  10.194 +    /*
  10.195 +     * Values with the MSB set denote MFNs that aren't really part of the 
  10.196 +     * domain's pseudo-physical memory map (e.g., the shared info frame).
  10.197 +     * Nothing to do here...
  10.198 +     */
  10.199 +    if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) )
  10.200 +        return rc;
  10.201 +
  10.202 +    if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) )
  10.203 +    {
  10.204 +        /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
  10.205 +        if ( !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) )
  10.206 +        {
  10.207 +            d->arch.shadow_dirty_count++;
  10.208 +            rc = 1;
  10.209 +        }
  10.210 +    }
  10.211 +#ifndef NDEBUG
  10.212 +    else if ( mfn < max_page )
  10.213 +    {
  10.214 +        SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
  10.215 +               mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
  10.216 +        SH_LOG("dom=%p caf=%08x taf=%08x", 
  10.217 +               page_get_owner(&frame_table[mfn]),
  10.218 +               frame_table[mfn].count_info, 
  10.219 +               frame_table[mfn].u.inuse.type_info );
  10.220 +    }
  10.221 +#endif
  10.222 +
  10.223 +    return rc;
  10.224 +}
  10.225 +
  10.226 +
  10.227 +static inline int mark_dirty(struct domain *d, unsigned int mfn)
  10.228 +{
  10.229 +    int rc;
  10.230 +    shadow_lock(d);
  10.231 +    rc = __mark_dirty(d, mfn);
  10.232 +    shadow_unlock(d);
  10.233 +    return rc;
  10.234 +}
  10.235 +
  10.236 +
  10.237  /************************************************************************/
  10.238  
  10.239  static inline void
  10.240 @@ -350,10 +488,15 @@ static inline void
  10.241  __guest_set_l2e(
  10.242      struct exec_domain *ed, unsigned long va, l2_pgentry_t value)
  10.243  {
  10.244 +    struct domain *d = ed->domain;
  10.245 +
  10.246      ed->arch.guest_vtable[l2_table_offset(va)] = value;
  10.247  
  10.248 -    if ( unlikely(shadow_mode_translate(ed->domain)) )
  10.249 +    if ( unlikely(shadow_mode_translate(d)) )
  10.250          update_hl2e(ed, va);
  10.251 +
  10.252 +    if ( unlikely(shadow_mode_log_dirty(d)) )
  10.253 +        __mark_dirty(d, pagetable_get_pfn(ed->arch.guest_table));
  10.254  }
  10.255  
  10.256  static inline void
  10.257 @@ -380,11 +523,12 @@ update_hl2e(struct exec_domain *ed, unsi
  10.258      if ( (l1e_has_changed(&old_hl2e, &new_hl2e, _PAGE_PRESENT)) )
  10.259      {
  10.260          if ( (l1e_get_flags(new_hl2e) & _PAGE_PRESENT) &&
  10.261 -             !get_page(pfn_to_page(l1e_get_pfn(new_hl2e)), ed->domain) )
  10.262 +             !shadow_get_page(ed->domain, pfn_to_page(l1e_get_pfn(new_hl2e)),
  10.263 +                              ed->domain) )
  10.264              new_hl2e = l1e_empty();
  10.265          if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
  10.266          {
  10.267 -            put_page(pfn_to_page(l1e_get_pfn(old_hl2e)));
  10.268 +            shadow_put_page(ed->domain, pfn_to_page(l1e_get_pfn(old_hl2e)));
  10.269              need_flush = 1;
  10.270          }
  10.271      }
  10.272 @@ -401,7 +545,7 @@ update_hl2e(struct exec_domain *ed, unsi
  10.273  static inline void shadow_drop_references(
  10.274      struct domain *d, struct pfn_info *page)
  10.275  {
  10.276 -    if ( likely(!shadow_mode_enabled(d)) ||
  10.277 +    if ( likely(!shadow_mode_refcounts(d)) ||
  10.278           ((page->u.inuse.type_info & PGT_count_mask) == 0) )
  10.279          return;
  10.280  
  10.281 @@ -423,7 +567,7 @@ static inline void shadow_drop_reference
  10.282  static inline void shadow_sync_and_drop_references(
  10.283      struct domain *d, struct pfn_info *page)
  10.284  {
  10.285 -    if ( likely(!shadow_mode_enabled(d)) )
  10.286 +    if ( likely(!shadow_mode_refcounts(d)) )
  10.287          return;
  10.288  
  10.289      shadow_lock(d);
  10.290 @@ -522,64 +666,6 @@ shadow_unpin(unsigned long smfn)
  10.291  
  10.292  /************************************************************************/
  10.293  
  10.294 -static inline int __mark_dirty(struct domain *d, unsigned int mfn)
  10.295 -{
  10.296 -    unsigned long pfn;
  10.297 -    int           rc = 0;
  10.298 -
  10.299 -    ASSERT(spin_is_locked(&d->arch.shadow_lock));
  10.300 -    ASSERT(d->arch.shadow_dirty_bitmap != NULL);
  10.301 -
  10.302 -    if ( !VALID_MFN(mfn) )
  10.303 -        return rc;
  10.304 -
  10.305 -    pfn = __mfn_to_gpfn(d, mfn);
  10.306 -
  10.307 -    /*
  10.308 -     * Values with the MSB set denote MFNs that aren't really part of the 
  10.309 -     * domain's pseudo-physical memory map (e.g., the shared info frame).
  10.310 -     * Nothing to do here...
  10.311 -     */
  10.312 -    if ( unlikely(IS_INVALID_M2P_ENTRY(pfn)) )
  10.313 -        return rc;
  10.314 -
  10.315 -    if ( likely(pfn < d->arch.shadow_dirty_bitmap_size) )
  10.316 -    {
  10.317 -        /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
  10.318 -        if ( !__test_and_set_bit(pfn, d->arch.shadow_dirty_bitmap) )
  10.319 -        {
  10.320 -            d->arch.shadow_dirty_count++;
  10.321 -            rc = 1;
  10.322 -        }
  10.323 -    }
  10.324 -#ifndef NDEBUG
  10.325 -    else if ( mfn < max_page )
  10.326 -    {
  10.327 -        SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (dom %p)",
  10.328 -               mfn, pfn, d->arch.shadow_dirty_bitmap_size, d);
  10.329 -        SH_LOG("dom=%p caf=%08x taf=%08x\n", 
  10.330 -               page_get_owner(&frame_table[mfn]),
  10.331 -               frame_table[mfn].count_info, 
  10.332 -               frame_table[mfn].u.inuse.type_info );
  10.333 -    }
  10.334 -#endif
  10.335 -
  10.336 -    return rc;
  10.337 -}
  10.338 -
  10.339 -
  10.340 -static inline int mark_dirty(struct domain *d, unsigned int mfn)
  10.341 -{
  10.342 -    int rc;
  10.343 -    shadow_lock(d);
  10.344 -    rc = __mark_dirty(d, mfn);
  10.345 -    shadow_unlock(d);
  10.346 -    return rc;
  10.347 -}
  10.348 -
  10.349 -
  10.350 -/************************************************************************/
  10.351 -
  10.352  extern void shadow_mark_va_out_of_sync(
  10.353      struct exec_domain *ed, unsigned long gpfn, unsigned long mfn,
  10.354      unsigned long va);
  10.355 @@ -666,8 +752,10 @@ static inline void l1pte_propagate_from_
  10.356            (_PAGE_PRESENT|_PAGE_ACCESSED)) &&
  10.357           VALID_MFN(mfn = __gpfn_to_mfn(d, l1e_get_pfn(gpte))) )
  10.358      {
  10.359 -        spte = l1e_create_pfn(mfn, l1e_get_flags(gpte) & ~_PAGE_GLOBAL);
  10.360 -        
  10.361 +        spte = l1e_create_pfn(mfn,
  10.362 +                              l1e_get_flags(gpte) &
  10.363 +                              ~(_PAGE_GLOBAL | _PAGE_AVAIL));
  10.364 +
  10.365          if ( shadow_mode_log_dirty(d) ||
  10.366               !(l1e_get_flags(gpte) & _PAGE_DIRTY) ||
  10.367               mfn_is_page_table(mfn) )
  10.368 @@ -729,14 +817,13 @@ static inline void l2pde_general(
  10.369      spde = l2e_empty();
  10.370      if ( (l2e_get_flags(gpde) & _PAGE_PRESENT) && (sl1mfn != 0) )
  10.371      {
  10.372 -        spde = l2e_create_pfn(sl1mfn, 
  10.373 -                              l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED);
  10.374 -        l2e_add_flags(&gpde, _PAGE_ACCESSED); /* N.B. PDEs do not have a dirty bit. */
  10.375 +        spde = l2e_create_pfn(sl1mfn,
  10.376 +                              (l2e_get_flags(gpde) | _PAGE_RW | _PAGE_ACCESSED)
  10.377 +                              & ~(_PAGE_AVAIL));
  10.378  
  10.379 -        // XXX mafetter: Hmm...
  10.380 -        //     Shouldn't the dirty log be checked/updated here?
  10.381 -        //     Actually, it needs to be done in this function's callers.
  10.382 -        //
  10.383 +        /* N.B. PDEs do not have a dirty bit. */
  10.384 +        l2e_add_flags(&gpde, _PAGE_ACCESSED);
  10.385 +
  10.386          *gpde_p = gpde;
  10.387      }
  10.388  
  10.389 @@ -769,34 +856,57 @@ validate_pte_change(
  10.390      l1_pgentry_t *shadow_pte_p)
  10.391  {
  10.392      l1_pgentry_t old_spte, new_spte;
  10.393 +    int need_flush = 0;
  10.394  
  10.395      perfc_incrc(validate_pte_calls);
  10.396  
  10.397 -#if 0
  10.398 -    FSH_LOG("validate_pte(old=%lx new=%lx)", old_pte, new_pte);
  10.399 -#endif
  10.400 -
  10.401 -    old_spte = *shadow_pte_p;
  10.402      l1pte_propagate_from_guest(d, new_pte, &new_spte);
  10.403  
  10.404 -    // only do the ref counting if something important changed.
  10.405 -    //
  10.406 -    if ( ((l1e_get_value(old_spte) | l1e_get_value(new_spte)) & _PAGE_PRESENT ) &&
  10.407 -         l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) )
  10.408 +    if ( shadow_mode_refcounts(d) )
  10.409      {
  10.410 -        perfc_incrc(validate_pte_changes);
  10.411 +        old_spte = *shadow_pte_p;
  10.412  
  10.413 -        if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
  10.414 -             !shadow_get_page_from_l1e(new_spte, d) )
  10.415 -            new_spte = l1e_empty();
  10.416 -        if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
  10.417 -            put_page_from_l1e(old_spte, d);
  10.418 +        if ( l1e_get_value(old_spte) == l1e_get_value(new_spte) )
  10.419 +        {
  10.420 +            // No accounting required...
  10.421 +            //
  10.422 +            perfc_incrc(validate_pte_changes1);
  10.423 +        }
  10.424 +        else if ( l1e_get_value(old_spte) == (l1e_get_value(new_spte)|_PAGE_RW) )
  10.425 +        {
  10.426 +            // Fast path for PTEs that have merely been write-protected
  10.427 +            // (e.g., during a Unix fork()). A strict reduction in privilege.
  10.428 +            //
  10.429 +            perfc_incrc(validate_pte_changes2);
  10.430 +            if ( likely(l1e_get_flags(new_spte) & _PAGE_PRESENT) )
  10.431 +                shadow_put_page_type(d, &frame_table[l1e_get_pfn(new_spte)]);
  10.432 +        }
  10.433 +        else if ( ((l1e_get_flags(old_spte) | l1e_get_flags(new_spte)) &
  10.434 +                   _PAGE_PRESENT ) &&
  10.435 +                  l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) )
  10.436 +        {
  10.437 +            // only do the ref counting if something important changed.
  10.438 +            //
  10.439 +            perfc_incrc(validate_pte_changes3);
  10.440 +
  10.441 +            if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
  10.442 +                 !shadow_get_page_from_l1e(new_spte, d) )
  10.443 +                new_spte = l1e_empty();
  10.444 +            if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
  10.445 +            {
  10.446 +                shadow_put_page_from_l1e(old_spte, d);
  10.447 +                need_flush = 1;
  10.448 +            }
  10.449 +        }
  10.450 +        else
  10.451 +        {
  10.452 +            perfc_incrc(validate_pte_changes4);
  10.453 +        }
  10.454      }
  10.455  
  10.456      *shadow_pte_p = new_spte;
  10.457  
  10.458 -    // paranoia rules!
  10.459 -    return 1;
  10.460 +    return need_flush;
  10.461  }
  10.462  
  10.463  // returns true if a tlb flush is needed
  10.464 @@ -808,6 +918,7 @@ validate_hl2e_change(
  10.465      l1_pgentry_t *shadow_hl2e_p)
  10.466  {
  10.467      l1_pgentry_t old_hl2e, new_hl2e;
  10.468 +    int need_flush = 0;
  10.469  
  10.470      perfc_incrc(validate_hl2e_calls);
  10.471  
  10.472 @@ -825,14 +936,15 @@ validate_hl2e_change(
  10.473               !get_page(pfn_to_page(l1e_get_pfn(new_hl2e)), d) )
  10.474              new_hl2e = l1e_empty();
  10.475          if ( l1e_get_flags(old_hl2e) & _PAGE_PRESENT )
  10.476 +        {
  10.477              put_page(pfn_to_page(l1e_get_pfn(old_hl2e)));
  10.478 +            need_flush = 1;
  10.479 +        }
  10.480      }
  10.481  
  10.482      *shadow_hl2e_p = new_hl2e;
  10.483  
  10.484 -    // paranoia rules!
  10.485 -    return 1;
  10.486 -    
  10.487 +    return need_flush;
  10.488  }
  10.489  
  10.490  // returns true if a tlb flush is needed
  10.491 @@ -844,15 +956,13 @@ validate_pde_change(
  10.492      l2_pgentry_t *shadow_pde_p)
  10.493  {
  10.494      l2_pgentry_t old_spde, new_spde;
  10.495 +    int need_flush = 0;
  10.496  
  10.497      perfc_incrc(validate_pde_calls);
  10.498  
  10.499      old_spde = *shadow_pde_p;
  10.500      l2pde_propagate_from_guest(d, &new_gpde, &new_spde);
  10.501  
  10.502 -    // XXX Shouldn't we propagate the new_gpde to the guest?
  10.503 -    // And then mark the guest's L2 page as dirty?
  10.504 -
  10.505      // Only do the ref counting if something important changed.
  10.506      //
  10.507      if ( ((l2e_get_value(old_spde) | l2e_get_value(new_spde)) & _PAGE_PRESENT) &&
  10.508 @@ -864,13 +974,15 @@ validate_pde_change(
  10.509               !get_shadow_ref(l2e_get_pfn(new_spde)) )
  10.510              BUG();
  10.511          if ( l2e_get_flags(old_spde) & _PAGE_PRESENT )
  10.512 +        {
  10.513              put_shadow_ref(l2e_get_pfn(old_spde));
  10.514 +            need_flush = 1;
  10.515 +        }
  10.516      }
  10.517  
  10.518      *shadow_pde_p = new_spde;
  10.519  
  10.520 -    // paranoia rules!
  10.521 -    return 1;
  10.522 +    return need_flush;
  10.523  }
  10.524  
  10.525  /*********************************************************************/
  10.526 @@ -1035,10 +1147,19 @@ static inline unsigned long __shadow_sta
  10.527      {
  10.528          perfc_incrc(shadow_status_shortcut);
  10.529  #ifndef NDEBUG
  10.530 -        ASSERT(___shadow_status(d, gpfn, stype) == 0);
  10.531 +        if ( ___shadow_status(d, gpfn, stype) != 0 )
  10.532 +        {
  10.533 +            printk("d->id=%d gpfn=%lx gmfn=%lx stype=%lx c=%x t=%x "
  10.534 +                   "mfn_out_of_sync(gmfn)=%d mfn_is_page_table(gmfn)=%d\n",
  10.535 +                   d->id, gpfn, gmfn, stype,
  10.536 +                   frame_table[gmfn].count_info,
  10.537 +                   frame_table[gmfn].u.inuse.type_info,
  10.538 +                   mfn_out_of_sync(gmfn), mfn_is_page_table(gmfn));
  10.539 +            BUG();
  10.540 +        }
  10.541  
  10.542 -        // Undo the affects of the above ASSERT on ___shadow_status()'s perf
  10.543 -        // counters.
  10.544 +        // Undo the affects of the above call to ___shadow_status()'s perf
  10.545 +        // counters, since that call is really just part of an assertion.
  10.546          //
  10.547          perfc_decrc(shadow_status_calls);
  10.548          perfc_decrc(shadow_status_miss);
  10.549 @@ -1056,12 +1177,12 @@ static inline unsigned long __shadow_sta
  10.550   *
  10.551   * Either returns PGT_none, or PGT_l{1,2,3,4}_page_table.
  10.552   */
  10.553 -static inline unsigned long
  10.554 +static inline u32
  10.555  shadow_max_pgtable_type(struct domain *d, unsigned long gpfn,
  10.556                          unsigned long *smfn)
  10.557  {
  10.558      struct shadow_status *x;
  10.559 -    unsigned long pttype = PGT_none, type;
  10.560 +    u32 pttype = PGT_none, type;
  10.561  
  10.562      ASSERT(spin_is_locked(&d->arch.shadow_lock));
  10.563      ASSERT(gpfn == (gpfn & PGT_mfn_mask));
  10.564 @@ -1379,7 +1500,6 @@ shadow_set_l1e(unsigned long va, l1_pgen
  10.565      struct exec_domain *ed = current;
  10.566      struct domain *d = ed->domain;
  10.567      l2_pgentry_t sl2e;
  10.568 -    l1_pgentry_t old_spte;
  10.569  
  10.570  #if 0
  10.571      printk("shadow_set_l1e(va=%p, new_spte=%p, create=%d)\n",
  10.572 @@ -1424,17 +1544,20 @@ shadow_set_l1e(unsigned long va, l1_pgen
  10.573          }
  10.574      }
  10.575  
  10.576 -    old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
  10.577 -
  10.578 -    // only do the ref counting if something important changed.
  10.579 -    //
  10.580 -    if ( l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) )
  10.581 +    if ( shadow_mode_refcounts(d) )
  10.582      {
  10.583 -        if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
  10.584 -             !shadow_get_page_from_l1e(new_spte, d) )
  10.585 -            new_spte = l1e_empty();
  10.586 -        if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
  10.587 -            put_page_from_l1e(old_spte, d);
  10.588 +        l1_pgentry_t old_spte = shadow_linear_pg_table[l1_linear_offset(va)];
  10.589 +
  10.590 +        // only do the ref counting if something important changed.
  10.591 +        //
  10.592 +        if ( l1e_has_changed(&old_spte, &new_spte, _PAGE_RW | _PAGE_PRESENT) )
  10.593 +        {
  10.594 +            if ( (l1e_get_flags(new_spte) & _PAGE_PRESENT) &&
  10.595 +                 !shadow_get_page_from_l1e(new_spte, d) )
  10.596 +                new_spte = l1e_empty();
  10.597 +            if ( l1e_get_flags(old_spte) & _PAGE_PRESENT )
  10.598 +                shadow_put_page_from_l1e(old_spte, d);
  10.599 +        }
  10.600      }
  10.601  
  10.602      shadow_linear_pg_table[l1_linear_offset(va)] = new_spte;
  10.603 @@ -1444,6 +1567,27 @@ shadow_set_l1e(unsigned long va, l1_pgen
  10.604  
  10.605  /************************************************************************/
  10.606  
  10.607 +static inline int
  10.608 +shadow_mode_page_writable(struct domain *d, unsigned long gpfn)
  10.609 +{
  10.610 +    unsigned long mfn = __gpfn_to_mfn(d, gpfn);
  10.611 +    u32 type = frame_table[mfn].u.inuse.type_info & PGT_type_mask;
  10.612 +
  10.613 +    if ( shadow_mode_refcounts(d) &&
  10.614 +         (type == PGT_writable_page) )
  10.615 +        type = shadow_max_pgtable_type(d, gpfn, NULL);
  10.616 +
  10.617 +    if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
  10.618 +         (type == PGT_l1_page_table) )
  10.619 +        return 1;
  10.620 +
  10.621 +    if ( shadow_mode_write_all(d) &&
  10.622 +         type && (type <= PGT_l4_page_table) )
  10.623 +        return 1;
  10.624 +
  10.625 +    return 0;
  10.626 +}
  10.627 +
  10.628  static inline l1_pgentry_t gva_to_gpte(unsigned long gva)
  10.629  {
  10.630      l2_pgentry_t gpde;
    11.1 --- a/xen/include/asm-x86/x86_32/domain_page.h	Sun May 08 12:06:10 2005 +0000
    11.2 +++ b/xen/include/asm-x86/x86_32/domain_page.h	Mon May 09 14:34:59 2005 +0000
    11.3 @@ -26,4 +26,51 @@ extern void *map_domain_mem(unsigned lon
    11.4   */
    11.5  extern void unmap_domain_mem(void *va);
    11.6  
    11.7 +struct map_dom_mem_cache {
    11.8 +    unsigned long pa;
    11.9 +    void *va;
   11.10 +};
   11.11 +
   11.12 +#define MAP_DOM_MEM_CACHE_INIT { .pa = 0 }
   11.13 +
   11.14 +static inline void *
   11.15 +map_domain_mem_with_cache(unsigned long pa,
   11.16 +                          struct map_dom_mem_cache *cache)
   11.17 +{
   11.18 +    if ( likely(cache != NULL) )
   11.19 +    {
   11.20 +        if ( likely(cache->pa) )
   11.21 +        {
   11.22 +            if ( likely((pa & PAGE_MASK) == (cache->pa & PAGE_MASK)) )
   11.23 +                goto done;
   11.24 +            unmap_domain_mem(cache->va);
   11.25 +        }
   11.26 +        cache->pa = (pa & PAGE_MASK) | 1;
   11.27 +        cache->va = map_domain_mem(cache->pa);
   11.28 +    done:
   11.29 +        return (void *)(((unsigned long)cache->va & PAGE_MASK) |
   11.30 +                        (pa & ~PAGE_MASK));
   11.31 +    }
   11.32 +
   11.33 +    return map_domain_mem(pa);
   11.34 +}
   11.35 +
   11.36 +static inline void
   11.37 +unmap_domain_mem_with_cache(void *va,
   11.38 +                            struct map_dom_mem_cache *cache)
   11.39 +{
   11.40 +    if ( unlikely(!cache) )
   11.41 +        unmap_domain_mem(va);
   11.42 +}
   11.43 +
   11.44 +static inline void
   11.45 +unmap_domain_mem_cache(struct map_dom_mem_cache *cache)
   11.46 +{
   11.47 +    if ( likely(cache != NULL) && likely(cache->pa) )
   11.48 +    {
   11.49 +        unmap_domain_mem(cache->va);
   11.50 +        cache->pa = 0;
   11.51 +    }
   11.52 +}
   11.53 +
   11.54  #endif /* __ASM_DOMAIN_PAGE_H__ */
    12.1 --- a/xen/include/xen/lib.h	Sun May 08 12:06:10 2005 +0000
    12.2 +++ b/xen/include/xen/lib.h	Mon May 09 14:34:59 2005 +0000
    12.3 @@ -15,7 +15,7 @@
    12.4  #define BUG_ON(_p) do { if (_p) BUG(); } while ( 0 )
    12.5  
    12.6  #ifndef NDEBUG
    12.7 -#define ASSERT(_p) if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s\n", #_p , __LINE__, __FILE__); BUG(); }
    12.8 +#define ASSERT(_p) { if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s\n", #_p , __LINE__, __FILE__); BUG(); } }
    12.9  #else
   12.10  #define ASSERT(_p) ((void)0)
   12.11  #endif
    13.1 --- a/xen/include/xen/perfc_defn.h	Sun May 08 12:06:10 2005 +0000
    13.2 +++ b/xen/include/xen/perfc_defn.h	Mon May 09 14:34:59 2005 +0000
    13.3 @@ -86,12 +86,14 @@ PERFCOUNTER_CPU(resync_hl2,             
    13.4  PERFCOUNTER_CPU(shadow_make_snapshot,              "snapshots created")
    13.5  PERFCOUNTER_CPU(shadow_mark_mfn_out_of_sync_calls, "calls to shadow_mk_out_of_sync")
    13.6  PERFCOUNTER_CPU(shadow_out_of_sync_calls,          "calls to shadow_out_of_sync")
    13.7 -PERFCOUNTER_CPU(extra_va_update_sync,              "extra syncs for bug in chk_pgtb")
    13.8  PERFCOUNTER_CPU(snapshot_entry_matches_calls,      "calls to ss_entry_matches")
    13.9  PERFCOUNTER_CPU(snapshot_entry_matches_true,       "ss_entry_matches returns true")
   13.10  
   13.11  PERFCOUNTER_CPU(validate_pte_calls,                "calls to validate_pte_change")
   13.12 -PERFCOUNTER_CPU(validate_pte_changes,              "validate_pte makes changes")
   13.13 +PERFCOUNTER_CPU(validate_pte_changes1,             "validate_pte makes changes1")
   13.14 +PERFCOUNTER_CPU(validate_pte_changes2,             "validate_pte makes changes2")
   13.15 +PERFCOUNTER_CPU(validate_pte_changes3,             "validate_pte makes changes3")
   13.16 +PERFCOUNTER_CPU(validate_pte_changes4,             "validate_pte makes changes4")
   13.17  PERFCOUNTER_CPU(validate_pde_calls,                "calls to validate_pde_change")
   13.18  PERFCOUNTER_CPU(validate_pde_changes,              "validate_pde makes changes")
   13.19  PERFCOUNTER_CPU(shadow_get_page_fail,   "shadow_get_page_from_l1e fails" )