ia64/xen-unstable

changeset 4184:8f0c441d9e25

bitkeeper revision 1.1249 (42387345w4RJ2RC5ifMnONI8xxsgWA)

manual merge with michaels latest

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author rneugeba@wyvis.research.intel-research.net
date Wed Mar 16 17:56:21 2005 +0000 (2005-03-16)
parents cf77cd925ef3 d617bb4a2907
children 105bb57fc414
files .rootkeys linux-2.6.10-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6.10-xen-sparse/arch/xen/i386/mm/init.c linux-2.6.10-xen-sparse/arch/xen/i386/mm/pgtable.c xen/arch/x86/audit.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c xen/arch/x86/shadow.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/x86_32/domain_page.c xen/common/dom_mem_ops.c xen/common/keyhandler.c xen/common/page_alloc.c xen/common/schedule.c xen/drivers/char/console.c xen/include/asm-x86/domain.h xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/shadow.h xen/include/asm-x86/x86_32/page.h xen/include/xen/domain.h xen/include/xen/perfc_defn.h ~/audit.c
line diff
     1.1 --- a/.rootkeys	Wed Mar 16 17:31:35 2005 +0000
     1.2 +++ b/.rootkeys	Wed Mar 16 17:56:21 2005 +0000
     1.3 @@ -951,7 +951,7 @@ 3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/
     1.4  3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/x86/Rules.mk
     1.5  3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi.c
     1.6  3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/x86/apic.c
     1.7 -42386d3bKw0QftYe-cDL6_4WiATRTw xen/arch/x86/audit.c
     1.8 +42360b3244-Q6BpEKhR_A1YtG1wPNQ xen/arch/x86/audit.c
     1.9  3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen/arch/x86/boot/mkelf32.c
    1.10  3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen/arch/x86/boot/x86_32.S
    1.11  40e42bdbNu4MjI750THP_8J1S-Sa0g xen/arch/x86/boot/x86_64.S
    1.12 @@ -1239,3 +1239,4 @@ 3eb3c87fPL2T_zBb0bHlbZY-ACEKRw xen/tools
    1.13  3eb3c87fmKYTC5GCh_rydFakZp9ayw xen/tools/figlet/README
    1.14  3eb3c87fdQKQ5OBGbM-KjZfi9Us4ng xen/tools/figlet/figlet.c
    1.15  3eb3c87fS7DNbg0i6yhFs28UIqAK5g xen/tools/figlet/xen.flf
    1.16 +42386d3bKw0QftYe-cDL6_4WiATRTw ~/audit.c
     2.1 --- a/xen/arch/x86/audit.c	Wed Mar 16 17:31:35 2005 +0000
     2.2 +++ b/xen/arch/x86/audit.c	Wed Mar 16 17:56:21 2005 +0000
     2.3 @@ -25,25 +25,17 @@
     2.4  #include <xen/kernel.h>
     2.5  #include <xen/lib.h>
     2.6  #include <xen/mm.h>
     2.7 -//#include <xen/sched.h>
     2.8 -//#include <xen/errno.h>
     2.9  #include <xen/perfc.h>
    2.10 -//#include <xen/irq.h>
    2.11 -//#include <xen/softirq.h>
    2.12  #include <asm/shadow.h>
    2.13  #include <asm/page.h>
    2.14  #include <asm/flushtlb.h>
    2.15 -//#include <asm/io.h>
    2.16 -//#include <asm/uaccess.h>
    2.17 -//#include <asm/domain_page.h>
    2.18 -//#include <asm/ldt.h>
    2.19  
    2.20  // XXX SMP bug -- these should not be statics...
    2.21  //
    2.22  static int ttot=0, ctot=0, io_mappings=0, lowmem_mappings=0;
    2.23  static int l1, l2, oos_count, page_count;
    2.24  
    2.25 -#define FILE_AND_LINE 1
    2.26 +#define FILE_AND_LINE 0
    2.27  
    2.28  #if FILE_AND_LINE
    2.29  #define adjust(_p, _a) _adjust((_p), (_a), __FILE__, __LINE__)
    2.30 @@ -73,7 +65,7 @@ int audit_adjust_pgtables(struct domain 
    2.31              if ( page_get_owner(page) == NULL )
    2.32              {
    2.33                  APRINTK("adjust(mfn=%p, dir=%d, adjtype=%d) owner=NULL",
    2.34 -                        page_to_pfn(page), dir, adjtype, file, line);
    2.35 +                        page_to_pfn(page), dir, adjtype);
    2.36                  errors++;
    2.37              }
    2.38  
     3.1 --- a/xen/arch/x86/mm.c	Wed Mar 16 17:31:35 2005 +0000
     3.2 +++ b/xen/arch/x86/mm.c	Wed Mar 16 17:56:21 2005 +0000
     3.3 @@ -1859,8 +1859,6 @@ int do_mmu_update(
     3.4                  break;
     3.5  #endif /* __x86_64__ */
     3.6              default:
     3.7 -                printk("do_mmu_update writable update: ma=%p val=%p\n",
     3.8 -                       req.ptr, req.val);
     3.9                  if ( likely(get_page_type(page, PGT_writable_page)) )
    3.10                  {
    3.11                      if ( shadow_mode_enabled(d) )
    3.12 @@ -2004,6 +2002,8 @@ int do_update_va_mapping(unsigned long v
    3.13      }
    3.14      else
    3.15      {
    3.16 +        unsigned long l1mfn;
    3.17 +
    3.18          if ( unlikely(percpu_info[cpu].foreign &&
    3.19                        (shadow_mode_translate(d) ||
    3.20                         shadow_mode_translate(percpu_info[cpu].foreign))) )
    3.21 @@ -2024,6 +2024,29 @@ int do_update_va_mapping(unsigned long v
    3.22          //
    3.23          __shadow_sync_va(ed, va);
    3.24  
    3.25 +#if 1 /* keep check_pagetables() happy */
    3.26 +        /*
    3.27 +         * However, the above doesn't guarantee that there's no snapshot of
    3.28 +         * the L1 table in question; it just says that the relevant L2 and L1
    3.29 +         * entries for VA are in-sync.  There might still be a snapshot.
    3.30 +         *
    3.31 +         * The checking code in _check_pagetables() assumes that no one will
    3.32 +         * mutate the shadow of a page that has a snapshot.  It's actually
    3.33 +         * OK to not sync this page, but it seems simpler to:
    3.34 +         * 1) keep all code paths the same, and
    3.35 +         * 2) maintain the invariant for _check_pagetables(), rather than try
    3.36 +         *    to teach it about this boundary case.
    3.37 +         * So we flush this L1 page, if it's out of sync.
    3.38 +         */
    3.39 +        l1mfn = (l2_pgentry_val(linear_l2_table(ed)[l2_table_offset(va)]) >>
    3.40 +                 PAGE_SHIFT);
    3.41 +        if ( mfn_out_of_sync(l1mfn) )
    3.42 +        {
    3.43 +            perfc_incrc(extra_va_update_sync);
    3.44 +            __shadow_sync_mfn(d, l1mfn);
    3.45 +        }
    3.46 +#endif /* keep check_pagetables() happy */
    3.47 +
    3.48          if ( unlikely(__put_user(val, &l1_pgentry_val(
    3.49                                       linear_pg_table[l1_linear_offset(va)]))) )
    3.50              err = -EINVAL;
     4.1 --- a/xen/arch/x86/shadow.c	Wed Mar 16 17:31:35 2005 +0000
     4.2 +++ b/xen/arch/x86/shadow.c	Wed Mar 16 17:56:21 2005 +0000
     4.3 @@ -60,7 +60,7 @@ shadow_promote(struct domain *d, unsigne
     4.4          __shadow_sync_mfn(d, gmfn);
     4.5      }
     4.6  
     4.7 -    if ( unlikely(mfn_is_page_table(gmfn)) )
     4.8 +    if ( unlikely(page_is_page_table(page)) )
     4.9      {
    4.10          min_type = shadow_max_pgtable_type(d, gpfn) + PGT_l1_shadow;
    4.11          max_type = new_type;
    4.12 @@ -70,7 +70,7 @@ shadow_promote(struct domain *d, unsigne
    4.13          min_type = PGT_l1_shadow;
    4.14          max_type = PGT_l1_shadow;
    4.15      }
    4.16 -    FSH_LOG("shadow_promote gpfn=%p gmfn=%p nt=%p min=%p max=%p\n",
    4.17 +    FSH_LOG("shadow_promote gpfn=%p gmfn=%p nt=%p min=%p max=%p",
    4.18              gmfn, gmfn, new_type, min_type, max_type);
    4.19  
    4.20      if ( min_type <= max_type )
    4.21 @@ -99,7 +99,7 @@ shadow_promote(struct domain *d, unsigne
    4.22      if ( get_page_type(page, PGT_base_page_table) )
    4.23      {
    4.24          put_page_type(page);
    4.25 -        set_bit(_PGC_page_table, &frame_table[gmfn].count_info);
    4.26 +        set_bit(_PGC_page_table, &page->count_info);
    4.27      }
    4.28      else
    4.29      {
    4.30 @@ -299,8 +299,6 @@ free_shadow_hl2_table(struct domain *d, 
    4.31  static void inline
    4.32  free_shadow_l2_table(struct domain *d, unsigned long smfn)
    4.33  {
    4.34 -    printk("free_shadow_l2_table(smfn=%p)\n", smfn);
    4.35 -
    4.36      unsigned long *pl2e = map_domain_mem(smfn << PAGE_SHIFT);
    4.37      int i, external = shadow_mode_external(d);
    4.38  
    4.39 @@ -388,7 +386,12 @@ release_out_of_sync_entry(struct domain 
    4.40      // Only use entries that have low bits clear...
    4.41      //
    4.42      if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
    4.43 +    {
    4.44          put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
    4.45 +        entry->writable_pl1e = -2;
    4.46 +    }
    4.47 +    else
    4.48 +        ASSERT( entry->writable_pl1e == -1 );
    4.49  
    4.50      // Free the snapshot
    4.51      //
    4.52 @@ -399,38 +402,63 @@ static void remove_out_of_sync_entries(s
    4.53  {
    4.54      struct out_of_sync_entry *entry = d->arch.out_of_sync;
    4.55      struct out_of_sync_entry **prev = &d->arch.out_of_sync;
    4.56 +    struct out_of_sync_entry *found = NULL;
    4.57  
    4.58 +    // NB: Be careful not to call something that manipulates this list
    4.59 +    //     while walking it.  Collect the results into a separate list
    4.60 +    //     first, then walk that list.
    4.61 +    //
    4.62      while ( entry )
    4.63      {
    4.64          if ( entry->gmfn == gmfn )
    4.65          {
    4.66 -            release_out_of_sync_entry(d, entry);
    4.67 -            *prev = entry = entry->next;
    4.68 +            // remove from out of sync list
    4.69 +            *prev = entry->next;
    4.70 +
    4.71 +            // add to found list
    4.72 +            entry->next = found;
    4.73 +            found = entry;
    4.74 +
    4.75 +            entry = *prev;
    4.76              continue;
    4.77          }
    4.78          prev = &entry->next;
    4.79          entry = entry->next;
    4.80      }
    4.81 +
    4.82 +    prev = NULL;
    4.83 +    entry = found;
    4.84 +    while ( entry )
    4.85 +    {
    4.86 +        release_out_of_sync_entry(d, entry);
    4.87 +
    4.88 +        prev = &entry->next;
    4.89 +        entry = entry->next;
    4.90 +    }
    4.91 +
    4.92 +    // Add found list to free list
    4.93 +    if ( prev )
    4.94 +    {
    4.95 +        *prev = d->arch.out_of_sync_free;
    4.96 +        d->arch.out_of_sync_free = found;
    4.97 +    }
    4.98  }
    4.99  
   4.100  static void free_out_of_sync_state(struct domain *d)
   4.101  {
   4.102      struct out_of_sync_entry *entry;
   4.103 -    struct out_of_sync_entry **tail = NULL;
   4.104  
   4.105 -    // Add the list of out-of-sync entries to the free list of entries.
   4.106 -    // Not the smartest code.  But it works.
   4.107 +    // NB: Be careful not to call something that manipulates this list
   4.108 +    //     while walking it.  Remove one item at a time, and always
   4.109 +    //     restart from start of list.
   4.110      //
   4.111 -    for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
   4.112 +    while ( (entry = d->arch.out_of_sync) )
   4.113      {
   4.114 +        d->arch.out_of_sync = entry->next;
   4.115          release_out_of_sync_entry(d, entry);
   4.116 -        tail = &entry->next;
   4.117 -    }
   4.118 -    if ( tail )
   4.119 -    {
   4.120 -        *tail = d->arch.out_of_sync_free;
   4.121 -        d->arch.out_of_sync_free = d->arch.out_of_sync;
   4.122 -        d->arch.out_of_sync = NULL;
   4.123 +
   4.124 +        entry->next = d->arch.out_of_sync_free;
   4.125 +        d->arch.out_of_sync_free = entry;
   4.126      }
   4.127  }
   4.128  
   4.129 @@ -1103,7 +1131,8 @@ static unsigned long shadow_l2_table(
   4.130              // shadow_mode_translate (but not external) sl2 tables hold a
   4.131              // ref to their hl2.
   4.132              //
   4.133 -            get_shadow_ref(hl2mfn);
   4.134 +            if ( !get_shadow_ref(hl2mfn) )
   4.135 +                BUG();
   4.136              
   4.137              spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
   4.138                  mk_l2_pgentry((hl2mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   4.139 @@ -1178,7 +1207,8 @@ void shadow_map_l1_into_current_l2(unsig
   4.140      ASSERT( !(old_sl2e & _PAGE_PRESENT) );
   4.141  #endif
   4.142  
   4.143 -    get_shadow_ref(sl1mfn);
   4.144 +    if ( !get_shadow_ref(sl1mfn) )
   4.145 +        BUG();
   4.146      l2pde_general(d, &gl2e, &sl2e, sl1mfn);
   4.147      __guest_set_l2e(ed, va, gl2e);
   4.148      __shadow_set_l2e(ed, va, sl2e);
   4.149 @@ -1195,9 +1225,13 @@ void shadow_map_l1_into_current_l2(unsig
   4.150  
   4.151          for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   4.152          {
   4.153 -            l1pte_propagate_from_guest(d, gpl1e[i], &spl1e[i]);
   4.154 -            if ( spl1e[i] & _PAGE_PRESENT )
   4.155 -                get_page_from_l1e(mk_l1_pgentry(spl1e[i]), d);
   4.156 +            unsigned long sl1e;
   4.157 +
   4.158 +            l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
   4.159 +            if ( (sl1e & _PAGE_PRESENT) &&
   4.160 +                 !shadow_get_page_from_l1e(mk_l1_pgentry(sl1e), d) )
   4.161 +                sl1e = 0;
   4.162 +            spl1e[i] = sl1e;
   4.163          }
   4.164      }
   4.165  }
   4.166 @@ -1293,7 +1327,8 @@ shadow_make_snapshot(
   4.167          BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
   4.168      }
   4.169  
   4.170 -    get_shadow_ref(smfn);
   4.171 +    if ( !get_shadow_ref(smfn) )
   4.172 +        BUG();
   4.173  
   4.174      original = map_domain_mem(gmfn << PAGE_SHIFT);
   4.175      snapshot = map_domain_mem(smfn << PAGE_SHIFT);
   4.176 @@ -1336,13 +1371,7 @@ shadow_mark_mfn_out_of_sync(struct exec_
   4.177  
   4.178      ASSERT(spin_is_locked(&d->arch.shadow_lock));
   4.179      ASSERT(pfn_is_ram(mfn));
   4.180 -    //ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page);
   4.181 -    if (!((page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page))
   4.182 -    {
   4.183 -        printk("assertion failed: gpfn=%p gmfn=%p t=%p\n",
   4.184 -               gpfn, mfn, page->u.inuse.type_info);
   4.185 -        BUG();
   4.186 -    }
   4.187 +    ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page);
   4.188  
   4.189      FSH_LOG("mark_mfn_out_of_sync(gpfn=%p, mfn=%p) c=%p t=%p",
   4.190              gpfn, mfn, page->count_info, page->u.inuse.type_info);
   4.191 @@ -1373,7 +1402,7 @@ shadow_mark_mfn_out_of_sync(struct exec_
   4.192      return entry;
   4.193  }
   4.194  
   4.195 -void shadow_mark_out_of_sync(
   4.196 +void shadow_mark_va_out_of_sync(
   4.197      struct exec_domain *ed, unsigned long gpfn, unsigned long mfn, unsigned long va)
   4.198  {
   4.199      struct out_of_sync_entry *entry =
   4.200 @@ -1402,7 +1431,8 @@ void shadow_mark_out_of_sync(
   4.201      // Increment shadow's page count to represent the reference
   4.202      // inherent in entry->writable_pl1e
   4.203      //
   4.204 -    get_shadow_ref(sl2e >> PAGE_SHIFT);
   4.205 +    if ( !get_shadow_ref(sl2e >> PAGE_SHIFT) )
   4.206 +        BUG();
   4.207  
   4.208      FSH_LOG("mark_out_of_sync(va=%p -> writable_pl1e=%p)",
   4.209              va, entry->writable_pl1e);
   4.210 @@ -1502,8 +1532,9 @@ static u32 remove_all_write_access_in_pt
   4.211              unsigned long old = pt[i];
   4.212              unsigned long new = old & ~_PAGE_RW;
   4.213  
   4.214 -            if ( is_l1_shadow )
   4.215 -                get_page_from_l1e(mk_l1_pgentry(new), d);
   4.216 +            if ( is_l1_shadow &&
   4.217 +                 !shadow_get_page_from_l1e(mk_l1_pgentry(new), d) )
   4.218 +                BUG();
   4.219  
   4.220              count++;
   4.221              pt[i] = new;
   4.222 @@ -1512,7 +1543,7 @@ static u32 remove_all_write_access_in_pt
   4.223                  put_page_from_l1e(mk_l1_pgentry(old), d);
   4.224  
   4.225              FSH_LOG("removed write access to mfn=%p in smfn=%p entry %x "
   4.226 -                    "is_l1_shadow=%d\n",
   4.227 +                    "is_l1_shadow=%d",
   4.228                      readonly_mfn, pt_mfn, i, is_l1_shadow);
   4.229          }
   4.230      }
   4.231 @@ -1633,6 +1664,7 @@ static int resync_all(struct domain *d, 
   4.232      unsigned long smfn;
   4.233      unsigned long *guest, *shadow, *snapshot;
   4.234      int need_flush = 0, external = shadow_mode_external(d);
   4.235 +    int unshadow;
   4.236  
   4.237      ASSERT(spin_is_locked(&d->arch.shadow_lock));
   4.238  
   4.239 @@ -1653,6 +1685,7 @@ static int resync_all(struct domain *d, 
   4.240          guest    = map_domain_mem(entry->gmfn         << PAGE_SHIFT);
   4.241          snapshot = map_domain_mem(entry->snapshot_mfn << PAGE_SHIFT);
   4.242          shadow   = map_domain_mem(smfn                << PAGE_SHIFT);
   4.243 +        unshadow = 0;
   4.244  
   4.245          switch ( stype ) {
   4.246          case PGT_l1_shadow:
   4.247 @@ -1686,6 +1719,16 @@ static int resync_all(struct domain *d, 
   4.248                      //
   4.249                      // snapshot[i] = new_pde;
   4.250                  }
   4.251 +
   4.252 +                // XXX - This hack works for linux guests.
   4.253 +                //       Need a better solution long term.
   4.254 +                if ( !(new_pde & _PAGE_PRESENT) && unlikely(new_pde != 0) &&
   4.255 +                     (frame_table[smfn].u.inuse.type_info & PGT_pinned) &&
   4.256 +                     !unshadow )
   4.257 +                {
   4.258 +                    perfc_incrc(unshadow_l2_count);
   4.259 +                    unshadow = 1;
   4.260 +                }
   4.261              }
   4.262              break;
   4.263          default:
   4.264 @@ -1696,6 +1739,9 @@ static int resync_all(struct domain *d, 
   4.265          unmap_domain_mem(shadow);
   4.266          unmap_domain_mem(snapshot);
   4.267          unmap_domain_mem(guest);
   4.268 +
   4.269 +        if ( unlikely(unshadow) )
   4.270 +            shadow_unpin(smfn);
   4.271      }
   4.272  
   4.273      return need_flush;
   4.274 @@ -1724,7 +1770,9 @@ void __shadow_sync_all(struct domain *d)
   4.275          unsigned long opte = *ppte;
   4.276          unsigned long npte = opte & ~_PAGE_RW;
   4.277  
   4.278 -        get_page_from_l1e(mk_l1_pgentry(npte), d);
   4.279 +        if ( (npte & _PAGE_PRESENT) &&
   4.280 +             !shadow_get_page_from_l1e(mk_l1_pgentry(npte), d) )
   4.281 +            BUG();
   4.282          *ppte = npte;
   4.283          put_page_from_l1e(mk_l1_pgentry(opte), d);
   4.284  
   4.285 @@ -1884,7 +1932,7 @@ void __update_pagetables(struct exec_dom
   4.286      struct domain *d = ed->domain;
   4.287      unsigned long gmfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
   4.288      unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
   4.289 -    unsigned long smfn, hl2mfn;
   4.290 +    unsigned long smfn, hl2mfn, old_smfn;
   4.291  
   4.292      int max_mode = ( shadow_mode_external(d) ? SHM_external
   4.293                       : shadow_mode_translate(d) ? SHM_translate
   4.294 @@ -1909,10 +1957,12 @@ void __update_pagetables(struct exec_dom
   4.295       */
   4.296      if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
   4.297          smfn = shadow_l2_table(d, gpfn, gmfn);
   4.298 -    get_shadow_ref(smfn);
   4.299 -    if ( pagetable_val(ed->arch.shadow_table) )
   4.300 -        put_shadow_ref(pagetable_val(ed->arch.shadow_table) >> PAGE_SHIFT);
   4.301 +    if ( !get_shadow_ref(smfn) )
   4.302 +        BUG();
   4.303 +    old_smfn = pagetable_val(ed->arch.shadow_table) >> PAGE_SHIFT;
   4.304      ed->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
   4.305 +    if ( old_smfn )
   4.306 +        put_shadow_ref(old_smfn);
   4.307  
   4.308      SH_VVLOG("0: __update_pagetables(gmfn=%p, smfn=%p)", gmfn, smfn);
   4.309  
   4.310 @@ -1937,7 +1987,8 @@ void __update_pagetables(struct exec_dom
   4.311      {
   4.312          if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
   4.313              hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
   4.314 -        get_shadow_ref(hl2mfn);
   4.315 +        if ( !get_shadow_ref(hl2mfn) )
   4.316 +            BUG();
   4.317  
   4.318          if ( ed->arch.hl2_vtable )
   4.319              unmap_domain_mem(ed->arch.hl2_vtable);
     5.1 --- a/xen/common/keyhandler.c	Wed Mar 16 17:31:35 2005 +0000
     5.2 +++ b/xen/common/keyhandler.c	Wed Mar 16 17:56:21 2005 +0000
     5.3 @@ -188,7 +188,7 @@ void initialize_keytable(void)
     5.4      register_keyhandler(
     5.5          'o', audit_domains_key,  "audit domains >0 EXPERIMENTAL");
     5.6      register_keyhandler(
     5.7 -        'T', debugtrace_key, "dump debugtrace");
     5.8 +        'T', debugtrace_key, "toggle debugtrace to console/buffer");
     5.9  #endif
    5.10  
    5.11  #ifdef PERF_COUNTERS
     6.1 --- a/xen/drivers/char/console.c	Wed Mar 16 17:31:35 2005 +0000
     6.2 +++ b/xen/drivers/char/console.c	Wed Mar 16 17:56:21 2005 +0000
     6.3 @@ -511,7 +511,10 @@ void debugtrace_dump(void)
     6.4  
     6.5      spin_lock_irqsave(&debugtrace_lock, flags);
     6.6  
     6.7 +    printk("debugtrace_dump() starting\n");
     6.8 +
     6.9      /* Print oldest portion of the ring. */
    6.10 +    ASSERT(debugtrace_buf[debugtrace_bytes - 1] == 0);
    6.11      serial_puts(sercon_handle, &debugtrace_buf[debugtrace_prd]);
    6.12  
    6.13      /* Print youngest portion of the ring. */
    6.14 @@ -520,6 +523,8 @@ void debugtrace_dump(void)
    6.15  
    6.16      memset(debugtrace_buf, '\0', debugtrace_bytes);
    6.17  
    6.18 +    printk("debugtrace_dump() finished\n");
    6.19 +
    6.20      spin_unlock_irqrestore(&debugtrace_lock, flags);
    6.21  
    6.22      watchdog_on = _watchdog_on;
    6.23 @@ -538,6 +543,8 @@ void debugtrace_printk(const char *fmt, 
    6.24  
    6.25      spin_lock_irqsave(&debugtrace_lock, flags);
    6.26  
    6.27 +    ASSERT(debugtrace_buf[debugtrace_bytes - 1] == 0);
    6.28 +
    6.29      va_start(args, fmt);
    6.30      (void)vsnprintf(buf, sizeof(buf), fmt, args);
    6.31      va_end(args);
    6.32 @@ -563,20 +570,24 @@ void debugtrace_printk(const char *fmt, 
    6.33  static int __init debugtrace_init(void)
    6.34  {
    6.35      int order;
    6.36 -    unsigned int kbytes;
    6.37 +    unsigned int kbytes, bytes;
    6.38  
    6.39      /* Round size down to next power of two. */
    6.40      while ( (kbytes = (debugtrace_kilobytes & (debugtrace_kilobytes-1))) != 0 )
    6.41          debugtrace_kilobytes = kbytes;
    6.42  
    6.43 -    debugtrace_bytes = debugtrace_kilobytes << 10;
    6.44 -    if ( debugtrace_bytes == 0 )
    6.45 +    bytes = debugtrace_kilobytes << 10;
    6.46 +    if ( bytes == 0 )
    6.47          return 0;
    6.48  
    6.49 -    order = get_order(debugtrace_bytes);
    6.50 +    order = get_order(bytes);
    6.51      debugtrace_buf = (unsigned char *)alloc_xenheap_pages(order);
    6.52      ASSERT(debugtrace_buf != NULL);
    6.53  
    6.54 +    memset(debugtrace_buf, '\0', bytes);
    6.55 +
    6.56 +    debugtrace_bytes = bytes;
    6.57 +
    6.58      memset(debugtrace_buf, '\0', debugtrace_bytes);
    6.59  
    6.60      return 0;
     7.1 --- a/xen/include/asm-x86/shadow.h	Wed Mar 16 17:31:35 2005 +0000
     7.2 +++ b/xen/include/asm-x86/shadow.h	Wed Mar 16 17:56:21 2005 +0000
     7.3 @@ -68,6 +68,33 @@ static inline unsigned long __shadow_sta
     7.4  
     7.5  extern void vmx_shadow_clear_state(struct domain *);
     7.6  
     7.7 +static inline int page_is_page_table(struct pfn_info *page)
     7.8 +{
     7.9 +    return page->count_info & PGC_page_table;
    7.10 +}
    7.11 +
    7.12 +static inline int mfn_is_page_table(unsigned long mfn)
    7.13 +{
    7.14 +    if ( !pfn_is_ram(mfn) )
    7.15 +        return 0;
    7.16 +
    7.17 +    return frame_table[mfn].count_info & PGC_page_table;
    7.18 +}
    7.19 +
    7.20 +static inline int page_out_of_sync(struct pfn_info *page)
    7.21 +{
    7.22 +    return page->count_info & PGC_out_of_sync;
    7.23 +}
    7.24 +
    7.25 +static inline int mfn_out_of_sync(unsigned long mfn)
    7.26 +{
    7.27 +    if ( !pfn_is_ram(mfn) )
    7.28 +        return 0;
    7.29 +
    7.30 +    return frame_table[mfn].count_info & PGC_out_of_sync;
    7.31 +}
    7.32 +
    7.33 +
    7.34  /************************************************************************/
    7.35  
    7.36  static void inline
    7.37 @@ -215,6 +242,39 @@ extern int shadow_status_noswap;
    7.38  
    7.39  /************************************************************************/
    7.40  
    7.41 +static inline int
    7.42 +shadow_get_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
    7.43 +{
    7.44 +    int res = get_page_from_l1e(l1e, d);
    7.45 +    unsigned long mfn;
    7.46 +    struct domain *owner;
    7.47 +
    7.48 +    ASSERT( l1_pgentry_val(l1e) & _PAGE_PRESENT );
    7.49 +
    7.50 +    if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
    7.51 +         !(l1_pgentry_val(l1e) & L1_DISALLOW_MASK) &&
    7.52 +         (mfn = l1_pgentry_to_pfn(l1e)) &&
    7.53 +         pfn_is_ram(mfn) &&
    7.54 +         (owner = page_get_owner(pfn_to_page(l1_pgentry_to_pfn(l1e)))) &&
    7.55 +         (d != owner) )
    7.56 +    {
    7.57 +        res = get_page_from_l1e(l1e, owner);
    7.58 +        printk("tried to map mfn %p from domain %d into shadow page tables "
    7.59 +               "of domain %d; %s\n",
    7.60 +               mfn, owner->id, d->id, res ? "success" : "failed");
    7.61 +    }
    7.62 +
    7.63 +    if ( unlikely(!res) )
    7.64 +    {
    7.65 +        perfc_incrc(shadow_get_page_fail);
    7.66 +        FSH_LOG("%s failed to get ref l1e=%p\n", l1_pgentry_val(l1e));
    7.67 +    }
    7.68 +
    7.69 +    return res;
    7.70 +}
    7.71 +
    7.72 +/************************************************************************/
    7.73 +
    7.74  static inline void
    7.75  __shadow_get_l2e(
    7.76      struct exec_domain *ed, unsigned long va, unsigned long *psl2e)
    7.77 @@ -256,8 +316,9 @@ static inline void
    7.78          //
    7.79          if ( (old_hl2e ^ new_hl2e) & (PAGE_MASK | _PAGE_PRESENT) )
    7.80          {
    7.81 -            if ( new_hl2e & _PAGE_PRESENT )
    7.82 -                get_page_from_l1e(mk_l1_pgentry(new_hl2e), ed->domain);
    7.83 +            if ( (new_hl2e & _PAGE_PRESENT) &&
    7.84 +                 !shadow_get_page_from_l1e(mk_l1_pgentry(new_hl2e), ed->domain) )
    7.85 +                new_hl2e = 0;
    7.86              if ( old_hl2e & _PAGE_PRESENT )
    7.87                  put_page_from_l1e(mk_l1_pgentry(old_hl2e), ed->domain);
    7.88          }
    7.89 @@ -314,8 +375,9 @@ put_shadow_ref(unsigned long smfn)
    7.90  
    7.91      if ( unlikely(x == 0) )
    7.92      {
    7.93 -        printk("put_shadow_ref underflow, gmfn=%p smfn=%p\n",
    7.94 -               frame_table[smfn].u.inuse.type_info & PGT_mfn_mask, smfn);
    7.95 +        printk("put_shadow_ref underflow, oc=%p t=%p\n",
    7.96 +               frame_table[smfn].count_info,
    7.97 +               frame_table[smfn].u.inuse.type_info);
    7.98          BUG();
    7.99      }
   7.100  
   7.101 @@ -335,7 +397,8 @@ shadow_pin(unsigned long smfn)
   7.102      ASSERT( !(frame_table[smfn].u.inuse.type_info & PGT_pinned) );
   7.103  
   7.104      frame_table[smfn].u.inuse.type_info |= PGT_pinned;
   7.105 -    get_shadow_ref(smfn);
   7.106 +    if ( !get_shadow_ref(smfn) )
   7.107 +        BUG();
   7.108  }
   7.109  
   7.110  static inline void
   7.111 @@ -403,7 +466,7 @@ static inline int mark_dirty(struct doma
   7.112  
   7.113  /************************************************************************/
   7.114  
   7.115 -extern void shadow_mark_out_of_sync(
   7.116 +extern void shadow_mark_va_out_of_sync(
   7.117      struct exec_domain *ed, unsigned long gpfn, unsigned long mfn,
   7.118      unsigned long va);
   7.119  
   7.120 @@ -436,7 +499,7 @@ static inline void l1pte_write_fault(
   7.121          __mark_dirty(d, mfn);
   7.122  
   7.123      if ( mfn_is_page_table(mfn) )
   7.124 -        shadow_mark_out_of_sync(ed, gpfn, mfn, va);
   7.125 +        shadow_mark_va_out_of_sync(ed, gpfn, mfn, va);
   7.126  
   7.127      *gpte_p = gpte;
   7.128      *spte_p = spte;
   7.129 @@ -474,26 +537,20 @@ static inline void l1pte_read_fault(
   7.130  static inline void l1pte_propagate_from_guest(
   7.131      struct domain *d, unsigned long gpte, unsigned long *spte_p)
   7.132  { 
   7.133 -    unsigned long spte = *spte_p;
   7.134      unsigned long pfn = gpte >> PAGE_SHIFT;
   7.135      unsigned long mfn = __gpfn_to_mfn(d, pfn);
   7.136 +    unsigned long spte;
   7.137  
   7.138  #if SHADOW_VERBOSE_DEBUG
   7.139 -    unsigned long old_spte = spte;
   7.140 +    unsigned long old_spte = *spte_p;
   7.141  #endif
   7.142  
   7.143 -    if ( unlikely(!mfn) )
   7.144 -    {
   7.145 -        // likely an MMIO address space mapping...
   7.146 -        //
   7.147 -        *spte_p = 0;
   7.148 -        return;
   7.149 -    }
   7.150 +    spte = 0;
   7.151  
   7.152 -    spte = 0;
   7.153 -    if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
   7.154 -         (_PAGE_PRESENT|_PAGE_ACCESSED) ) {
   7.155 -        
   7.156 +    if ( mfn &&
   7.157 +         ((gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
   7.158 +          (_PAGE_PRESENT|_PAGE_ACCESSED)) ) {
   7.159 +
   7.160          spte = (mfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
   7.161          
   7.162          if ( shadow_mode_log_dirty(d) ||
   7.163 @@ -506,7 +563,7 @@ static inline void l1pte_propagate_from_
   7.164  
   7.165  #if SHADOW_VERBOSE_DEBUG
   7.166      if ( old_spte || spte || gpte )
   7.167 -        debugtrace_printk("l1pte_propagate_from_guest: gpte=0x%p, old spte=0x%p, new spte=0x%p\n", gpte, old_spte, spte);
   7.168 +        SH_VLOG("l1pte_propagate_from_guest: gpte=0x%p, old spte=0x%p, new spte=0x%p", gpte, old_spte, spte);
   7.169  #endif
   7.170  
   7.171      *spte_p = spte;
   7.172 @@ -541,9 +598,10 @@ static inline void l2pde_general(
   7.173  static inline void l2pde_propagate_from_guest(
   7.174      struct domain *d, unsigned long *gpde_p, unsigned long *spde_p)
   7.175  {
   7.176 -    unsigned long gpde = *gpde_p, sl1mfn;
   7.177 +    unsigned long gpde = *gpde_p, sl1mfn = 0;
   7.178  
   7.179 -    sl1mfn =  __shadow_status(d, gpde >> PAGE_SHIFT, PGT_l1_shadow);
   7.180 +    if ( gpde & _PAGE_PRESENT )
   7.181 +        sl1mfn =  __shadow_status(d, gpde >> PAGE_SHIFT, PGT_l1_shadow);
   7.182      l2pde_general(d, gpde_p, spde_p, sl1mfn);
   7.183  }
   7.184      
   7.185 @@ -559,26 +617,31 @@ validate_pte_change(
   7.186  {
   7.187      unsigned long old_spte, new_spte;
   7.188  
   7.189 -    perfc_incrc(validate_pte_change);
   7.190 +    perfc_incrc(validate_pte_calls);
   7.191  
   7.192  #if 0
   7.193      FSH_LOG("validate_pte(old=%p new=%p)\n", old_pte, new_pte);
   7.194  #endif
   7.195  
   7.196      old_spte = *shadow_pte_p;
   7.197 -    l1pte_propagate_from_guest(d, new_pte, shadow_pte_p);
   7.198 -    new_spte = *shadow_pte_p;
   7.199 +    l1pte_propagate_from_guest(d, new_pte, &new_spte);
   7.200  
   7.201      // only do the ref counting if something important changed.
   7.202      //
   7.203 -    if ( (old_spte ^ new_spte) & (PAGE_MASK | _PAGE_RW | _PAGE_PRESENT) )
   7.204 +    if ( ((old_spte | new_spte) & _PAGE_PRESENT ) &&
   7.205 +         ((old_spte ^ new_spte) & (PAGE_MASK | _PAGE_RW | _PAGE_PRESENT)) )
   7.206      {
   7.207 -        if ( new_spte & _PAGE_PRESENT )
   7.208 -            get_page_from_l1e(mk_l1_pgentry(new_spte), d);
   7.209 +        perfc_incrc(validate_pte_changes);
   7.210 +
   7.211 +        if ( (new_spte & _PAGE_PRESENT) &&
   7.212 +             !shadow_get_page_from_l1e(mk_l1_pgentry(new_spte), d) )
   7.213 +            new_spte = 0;
   7.214          if ( old_spte & _PAGE_PRESENT )
   7.215              put_page_from_l1e(mk_l1_pgentry(old_spte), d);
   7.216      }
   7.217  
   7.218 +    *shadow_pte_p = new_spte;
   7.219 +
   7.220      // paranoia rules!
   7.221      return 1;
   7.222  }
   7.223 @@ -588,27 +651,35 @@ validate_pte_change(
   7.224  static int inline
   7.225  validate_pde_change(
   7.226      struct domain *d,
   7.227 -    unsigned long new_pde,
   7.228 +    unsigned long new_gpde,
   7.229      unsigned long *shadow_pde_p)
   7.230  {
   7.231 -    unsigned long old_spde = *shadow_pde_p;
   7.232 -    unsigned long new_spde;
   7.233 -
   7.234 -    perfc_incrc(validate_pde_change);
   7.235 +    unsigned long old_spde, new_spde;
   7.236  
   7.237 -    l2pde_propagate_from_guest(d, &new_pde, shadow_pde_p);
   7.238 -    new_spde = *shadow_pde_p;
   7.239 +    perfc_incrc(validate_pde_calls);
   7.240  
   7.241 -    // only do the ref counting if something important changed.
   7.242 +    old_spde = *shadow_pde_p;
   7.243 +    l2pde_propagate_from_guest(d, &new_gpde, &new_spde);
   7.244 +
   7.245 +    // XXX Shouldn't we propagate the new_gpde to the guest?
   7.246 +    // And then mark the guest's L2 page as dirty?
   7.247 +
   7.248 +    // Only do the ref counting if something important changed.
   7.249      //
   7.250 -    if ( (old_spde ^ new_spde) & (PAGE_MASK | _PAGE_PRESENT) )
   7.251 +    if ( ((old_spde | new_spde) & _PAGE_PRESENT) &&
   7.252 +         ((old_spde ^ new_spde) & (PAGE_MASK | _PAGE_PRESENT)) )
   7.253      {
   7.254 -        if ( new_spde & _PAGE_PRESENT )
   7.255 -            get_shadow_ref(new_spde >> PAGE_SHIFT);
   7.256 +        perfc_incrc(validate_pde_changes);
   7.257 +
   7.258 +        if ( (new_spde & _PAGE_PRESENT) &&
   7.259 +             !get_shadow_ref(new_spde >> PAGE_SHIFT) )
   7.260 +            BUG();
   7.261          if ( old_spde & _PAGE_PRESENT )
   7.262              put_shadow_ref(old_spde >> PAGE_SHIFT);
   7.263      }
   7.264  
   7.265 +    *shadow_pde_p = new_spde;
   7.266 +
   7.267      // paranoia rules!
   7.268      return 1;
   7.269  }
   7.270 @@ -676,6 +747,9 @@ static void shadow_audit(struct domain *
   7.271          BUG();
   7.272      }
   7.273  #endif
   7.274 +
   7.275 +    // XXX ought to add some code to audit the out-of-sync entries, too.
   7.276 +    //
   7.277  }
   7.278  #else
   7.279  #define shadow_audit(p, print) ((void)0)
   7.280 @@ -696,16 +770,12 @@ static inline struct shadow_status *hash
   7.281   *      It returns the shadow's mfn, or zero if it doesn't exist.
   7.282   */
   7.283  
   7.284 -static inline unsigned long __shadow_status(
   7.285 +static inline unsigned long ___shadow_status(
   7.286      struct domain *d, unsigned long gpfn, unsigned long stype)
   7.287  {
   7.288      struct shadow_status *p, *x, *head;
   7.289      unsigned long key = gpfn | stype;
   7.290  
   7.291 -    ASSERT(spin_is_locked(&d->arch.shadow_lock));
   7.292 -    ASSERT(gpfn == (gpfn & PGT_mfn_mask));
   7.293 -    ASSERT(stype && !(stype & ~PGT_type_mask));
   7.294 -
   7.295      perfc_incrc(shadow_status_calls);
   7.296  
   7.297      x = head = hash_bucket(d, gpfn);
   7.298 @@ -755,6 +825,27 @@ static inline unsigned long __shadow_sta
   7.299      return 0;
   7.300  }
   7.301  
   7.302 +static inline unsigned long __shadow_status(
   7.303 +    struct domain *d, unsigned long gpfn, unsigned long stype)
   7.304 +{
   7.305 +    unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
   7.306 +
   7.307 +    ASSERT(spin_is_locked(&d->arch.shadow_lock));
   7.308 +    ASSERT(gpfn == (gpfn & PGT_mfn_mask));
   7.309 +    ASSERT(stype && !(stype & ~PGT_type_mask));
   7.310 +
   7.311 +    if ( gmfn && ((stype != PGT_snapshot)
   7.312 +                  ? !mfn_is_page_table(gmfn)
   7.313 +                  : !mfn_out_of_sync(gmfn)) )
   7.314 +    {
   7.315 +        perfc_incrc(shadow_status_shortcut);
   7.316 +        ASSERT(___shadow_status(d, gpfn, stype) == 0);
   7.317 +        return 0;
   7.318 +    }
   7.319 +
   7.320 +    return ___shadow_status(d, gmfn, stype);
   7.321 +}
   7.322 +
   7.323  /*
   7.324   * Not clear if pull-to-front is worth while for this or not,
   7.325   * as it generally needs to scan the entire bucket anyway.
   7.326 @@ -955,6 +1046,7 @@ static inline void set_shadow_status(
   7.327      {
   7.328          if ( x->gpfn_and_flags == key )
   7.329          {
   7.330 +            BUG();
   7.331              x->smfn = smfn;
   7.332              goto done;
   7.333          }
   7.334 @@ -1059,7 +1151,8 @@ shadow_set_l1e(unsigned long va, unsigne
   7.335              if ( sl1mfn )
   7.336              {
   7.337                  perfc_incrc(shadow_set_l1e_unlinked);
   7.338 -                get_shadow_ref(sl1mfn);
   7.339 +                if ( !get_shadow_ref(sl1mfn) )
   7.340 +                    BUG();
   7.341                  l2pde_general(d, &gpde, &sl2e, sl1mfn);
   7.342                  __guest_set_l2e(ed, va, gpde);
   7.343                  __shadow_set_l2e(ed, va, sl2e);
   7.344 @@ -1074,17 +1167,19 @@ shadow_set_l1e(unsigned long va, unsigne
   7.345      }
   7.346  
   7.347      old_spte = l1_pgentry_val(shadow_linear_pg_table[l1_linear_offset(va)]);
   7.348 -    shadow_linear_pg_table[l1_linear_offset(va)] = mk_l1_pgentry(new_spte);
   7.349  
   7.350      // only do the ref counting if something important changed.
   7.351      //
   7.352      if ( (old_spte ^ new_spte) & (PAGE_MASK | _PAGE_RW | _PAGE_PRESENT) )
   7.353      {
   7.354 -        if ( new_spte & _PAGE_PRESENT )
   7.355 -            get_page_from_l1e(mk_l1_pgentry(new_spte), d);
   7.356 +        if ( (new_spte & _PAGE_PRESENT) &&
   7.357 +             !shadow_get_page_from_l1e(mk_l1_pgentry(new_spte), d) )
   7.358 +            new_spte = 0;
   7.359          if ( old_spte & _PAGE_PRESENT )
   7.360              put_page_from_l1e(mk_l1_pgentry(old_spte), d);
   7.361      }
   7.362 +
   7.363 +    shadow_linear_pg_table[l1_linear_offset(va)] = mk_l1_pgentry(new_spte);
   7.364  }
   7.365  
   7.366  /************************************************************************/
     8.1 --- a/xen/include/xen/perfc_defn.h	Wed Mar 16 17:31:35 2005 +0000
     8.2 +++ b/xen/include/xen/perfc_defn.h	Wed Mar 16 17:56:21 2005 +0000
     8.3 @@ -55,12 +55,14 @@ PERFCOUNTER_CPU( shadow_set_l1e_force_ma
     8.4  PERFCOUNTER_CPU( shadow_set_l1e_unlinked,  "shadow_set_l1e found unlinked l1" )
     8.5  PERFCOUNTER_CPU( shadow_set_l1e_fail,      "shadow_set_l1e failed (no sl1)" )
     8.6  PERFCOUNTER_CPU( shadow_invlpg_faults,     "shadow_invlpg's get_user faulted")
     8.7 +PERFCOUNTER_CPU( unshadow_l2_count,        "unpinned L2 count")
     8.8  
     8.9  
    8.10  /* STATUS counters do not reset when 'P' is hit */
    8.11  PERFSTATUS( snapshot_pages,  "current # fshadow snapshot pages" )
    8.12  
    8.13 -PERFCOUNTER_CPU(shadow_status_calls,    "calls to __shadow_status" )
    8.14 +PERFCOUNTER_CPU(shadow_status_shortcut, "fastpath miss on shadow cache")
    8.15 +PERFCOUNTER_CPU(shadow_status_calls,    "calls to ___shadow_status" )
    8.16  PERFCOUNTER_CPU(shadow_status_miss,     "missed shadow cache" )
    8.17  PERFCOUNTER_CPU(shadow_status_hit_head, "hits on head of bucket" )
    8.18  
    8.19 @@ -68,6 +70,7 @@ PERFCOUNTER_CPU(shadow_sync_all,        
    8.20  PERFCOUNTER_CPU(shadow_make_snapshot,              "snapshots created")
    8.21  PERFCOUNTER_CPU(shadow_mark_mfn_out_of_sync_calls, "calls to shadow_mk_out_of_sync")
    8.22  PERFCOUNTER_CPU(shadow_out_of_sync_calls,          "calls to shadow_out_of_sync")
    8.23 +PERFCOUNTER_CPU(extra_va_update_sync,              "extra syncs for bug in chk_pgtb")
    8.24  PERFCOUNTER_CPU(snapshot_entry_matches_calls,      "calls to ss_entry_matches")
    8.25  PERFCOUNTER_CPU(snapshot_entry_matches_true,       "ss_entry_matches returns true")
    8.26  
    8.27 @@ -76,5 +79,7 @@ PERFCOUNTER_CPU(shadow_fault_bail_pde_no
    8.28  PERFCOUNTER_CPU(shadow_fault_bail_pte_not_present, "sf bailed due to pte not present")
    8.29  PERFCOUNTER_CPU(shadow_fault_bail_ro_mapping,      "sf bailed due to a ro mapping")
    8.30  PERFCOUNTER_CPU(shadow_fault_fixed,                "sf fixed the pgfault")
    8.31 -PERFCOUNTER_CPU(validate_pte_change,               "calls to validate_pte_change")
    8.32 -PERFCOUNTER_CPU(validate_pde_change,               "calls to validate_pde_change")
    8.33 +PERFCOUNTER_CPU(validate_pte_calls,                "calls to validate_pte_change")
    8.34 +PERFCOUNTER_CPU(validate_pte_changes,              "validate_pte makes changes")
    8.35 +PERFCOUNTER_CPU(validate_pde_calls,                "calls to validate_pde_change")
    8.36 +PERFCOUNTER_CPU(validate_pde_changes,              "validate_pde makes changes")
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/~/audit.c	Wed Mar 16 17:56:21 2005 +0000
     9.3 @@ -0,0 +1,817 @@
     9.4 +/******************************************************************************
     9.5 + * arch/x86/audit.c
     9.6 + * 
     9.7 + * Copyright (c) 2002-2005 K A Fraser
     9.8 + * Copyright (c) 2004 Christian Limpach
     9.9 + * Copyright (c) 2005 Michael A Fetterman
    9.10 + * 
    9.11 + * This program is free software; you can redistribute it and/or modify
    9.12 + * it under the terms of the GNU General Public License as published by
    9.13 + * the Free Software Foundation; either version 2 of the License, or
    9.14 + * (at your option) any later version.
    9.15 + * 
    9.16 + * This program is distributed in the hope that it will be useful,
    9.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    9.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    9.19 + * GNU General Public License for more details.
    9.20 + * 
    9.21 + * You should have received a copy of the GNU General Public License
    9.22 + * along with this program; if not, write to the Free Software
    9.23 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    9.24 + */
    9.25 +
    9.26 +#include <xen/config.h>
    9.27 +#include <xen/init.h>
    9.28 +#include <xen/kernel.h>
    9.29 +#include <xen/lib.h>
    9.30 +#include <xen/mm.h>
    9.31 +//#include <xen/sched.h>
    9.32 +//#include <xen/errno.h>
    9.33 +#include <xen/perfc.h>
    9.34 +//#include <xen/irq.h>
    9.35 +//#include <xen/softirq.h>
    9.36 +#include <asm/shadow.h>
    9.37 +#include <asm/page.h>
    9.38 +#include <asm/flushtlb.h>
    9.39 +//#include <asm/io.h>
    9.40 +//#include <asm/uaccess.h>
    9.41 +//#include <asm/domain_page.h>
    9.42 +//#include <asm/ldt.h>
    9.43 +
    9.44 +// XXX SMP bug -- these should not be statics...
    9.45 +//
    9.46 +static int ttot=0, ctot=0, io_mappings=0, lowmem_mappings=0;
    9.47 +static int l1, l2, oos_count, page_count;
    9.48 +
    9.49 +#define FILE_AND_LINE 1
    9.50 +
    9.51 +#if FILE_AND_LINE
    9.52 +#define adjust(_p, _a) _adjust((_p), (_a), __FILE__, __LINE__)
    9.53 +#define ADJUST_EXTRA_ARGS ,const char *file, int line
    9.54 +#define APRINTK(_f, _a...) printk(_f " %s:%d\n", ## _a, file, line)
    9.55 +#else
    9.56 +#define adjust _adjust
    9.57 +#define ADJUST_EXTRA_ARGS
    9.58 +#define APRINTK(_f, _a...) printk(_f "\n", ##_a)
    9.59 +#endif
    9.60 +
    9.61 +int audit_adjust_pgtables(struct domain *d, int dir, int noisy)
    9.62 +{
    9.63 +    int errors = 0;
    9.64 +    int shadow_enabled = shadow_mode_enabled(d) ? 1 : 0;
    9.65 +
    9.66 +    void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
    9.67 +    {
    9.68 +        if ( adjtype )
    9.69 +        {
    9.70 +            // adjust the type count
    9.71 +            //
    9.72 +            int tcount = page->u.inuse.type_info & PGT_count_mask;
    9.73 +            tcount += dir;
    9.74 +            ttot++;
    9.75 +
    9.76 +            if ( page_get_owner(page) == NULL )
    9.77 +            {
    9.78 +                APRINTK("adjust(mfn=%p, dir=%d, adjtype=%d) owner=NULL",
    9.79 +                        page_to_pfn(page), dir, adjtype, file, line);
    9.80 +                errors++;
    9.81 +            }
    9.82 +
    9.83 +            if ( tcount < 0 )
    9.84 +            {
    9.85 +                APRINTK("Audit %d: type count went below zero mfn=%x t=%x ot=%x",
    9.86 +                        d->id, page-frame_table,
    9.87 +                        page->u.inuse.type_info,
    9.88 +                        page->tlbflush_timestamp);
    9.89 +                errors++;
    9.90 +            }
    9.91 +            else if ( (tcount & ~PGT_count_mask) != 0 )
    9.92 +            {
    9.93 +                APRINTK("Audit %d: type count overflowed mfn=%x t=%x ot=%x",
    9.94 +                        d->id, page-frame_table,
    9.95 +                        page->u.inuse.type_info,
    9.96 +                        page->tlbflush_timestamp);
    9.97 +                errors++;
    9.98 +            }
    9.99 +            else
   9.100 +                page->u.inuse.type_info += dir;
   9.101 +        }
   9.102 +
   9.103 +        // adjust the general count
   9.104 +        //
   9.105 +        int count = page->count_info & PGC_count_mask;
   9.106 +        count += dir;
   9.107 +        ctot++;
   9.108 +
   9.109 +        if ( count < 0 )
   9.110 +        {
   9.111 +            APRINTK("Audit %d: general count went below zero pfn=%x t=%x ot=%x",
   9.112 +                    d->id, page-frame_table,
   9.113 +                    page->u.inuse.type_info,
   9.114 +                    page->tlbflush_timestamp);
   9.115 +            errors++;
   9.116 +        }
   9.117 +        else if ( (count & ~PGT_count_mask) != 0 )
   9.118 +        {
   9.119 +            APRINTK("Audit %d: general count overflowed pfn=%x t=%x ot=%x",
   9.120 +                    d->id, page-frame_table,
   9.121 +                    page->u.inuse.type_info,
   9.122 +                    page->tlbflush_timestamp);
   9.123 +            errors++;
   9.124 +        }
   9.125 +        else
   9.126 +            page->count_info += dir;
   9.127 +    }
   9.128 +
   9.129 +    void adjust_l2_page(unsigned long mfn, int adjtype)
   9.130 +    {
   9.131 +        unsigned long *pt = map_domain_mem(mfn << PAGE_SHIFT);
   9.132 +        int i, limit;
   9.133 +
   9.134 +        if ( shadow_mode_external(d) )
   9.135 +            limit = L2_PAGETABLE_ENTRIES;
   9.136 +        else
   9.137 +            limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
   9.138 +
   9.139 +        for ( i = 0; i < limit; i++ )
   9.140 +        {
   9.141 +            if ( pt[i] & _PAGE_PRESENT )
   9.142 +            {
   9.143 +                unsigned long l1mfn = pt[i] >> PAGE_SHIFT;
   9.144 +                struct pfn_info *l1page = pfn_to_page(l1mfn);
   9.145 +
   9.146 +                if ( noisy )
   9.147 +                {
   9.148 +                    if ( shadow_enabled )
   9.149 +                    {
   9.150 +                        if ( page_get_owner(l1page) != NULL )
   9.151 +                        {
   9.152 +                            printk("L2: Bizarre shadow L1 page mfn=%p "
   9.153 +                                   "belonging to a domain %p (id=%d)\n",
   9.154 +                                   l1mfn,
   9.155 +                                   page_get_owner(l1page),
   9.156 +                                   page_get_owner(l1page)->id);
   9.157 +                            errors++;
   9.158 +                            continue;
   9.159 +                        }
   9.160 +                    }
   9.161 +                    else
   9.162 +                    {
   9.163 +                        if ( page_get_owner(l1page) != d )
   9.164 +                        {
   9.165 +                            printk("L2: Skip bizarre L1 page mfn=%p "
   9.166 +                                   "belonging to other dom %p (id=%d)\n",
   9.167 +                                   l1mfn,
   9.168 +                                   page_get_owner(l1page),
   9.169 +                                   page_get_owner(l1page)->id);
   9.170 +                            errors++;
   9.171 +                            continue;
   9.172 +                        }
   9.173 +
   9.174 +                        u32 page_type = l1page->u.inuse.type_info & PGT_type_mask;
   9.175 +
   9.176 +                        if ( page_type == PGT_l2_page_table )
   9.177 +                        {
   9.178 +                            printk("Audit %d: [%x] Found %s Linear PT "
   9.179 +                                   "t=%x mfn=%p\n",
   9.180 +                                   d->id, i, (l1mfn==mfn) ? "Self" : "Other",
   9.181 +                                   l1page->u.inuse.type_info, l1mfn);
   9.182 +                        }
   9.183 +                        else if ( page_type != PGT_l1_page_table )
   9.184 +                        {
   9.185 +                            printk("Audit %d: [L2 mfn=%p i=%x] "
   9.186 +                                   "Expected L1 t=%x mfn=%p\n",
   9.187 +                                   d->id, mfn, i,
   9.188 +                                   l1page->u.inuse.type_info, l1mfn);
   9.189 +                            errors++;
   9.190 +                        }
   9.191 +                    }
   9.192 +                }
   9.193 +
   9.194 +                adjust(l1page, adjtype);
   9.195 +            }
   9.196 +        }
   9.197 +
   9.198 +        unmap_domain_mem(pt);
   9.199 +    }
   9.200 +
   9.201 +    void adjust_l1_page(unsigned long l1mfn)
   9.202 +    {
   9.203 +        unsigned long *pt = map_domain_mem(l1mfn << PAGE_SHIFT);
   9.204 +        int i;
   9.205 +
   9.206 +        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   9.207 +        {
   9.208 +            if ( pt[i] & _PAGE_PRESENT )
   9.209 +            {
   9.210 +                unsigned long gmfn = pt[i] >> PAGE_SHIFT;
   9.211 +                struct pfn_info *gpage = pfn_to_page(gmfn);
   9.212 +
   9.213 +                if ( gmfn < 0x100 )
   9.214 +                {
   9.215 +                    lowmem_mappings++;
   9.216 +                    continue;
   9.217 +                }
   9.218 +
   9.219 +                if ( gmfn > max_page )
   9.220 +                {
   9.221 +                    io_mappings++;
   9.222 +                    continue;
   9.223 +                }
   9.224 +
   9.225 +                if ( noisy )
   9.226 +                {
   9.227 +                    if ( pt[i] & _PAGE_RW )
   9.228 +                    {
   9.229 +                        // If it's not a writable page, complain.
   9.230 +                        //
   9.231 +                        if ( !((gpage->u.inuse.type_info & PGT_type_mask) ==
   9.232 +                               PGT_writable_page) )
   9.233 +                        {
   9.234 +                            printk("Audit %d: [l1mfn=%p, i=%x] Illegal RW "
   9.235 +                                   "t=%x mfn=%p\n",
   9.236 +                                   d->id, l1mfn, i,
   9.237 +                                   gpage->u.inuse.type_info, gmfn);
   9.238 +                            errors++;
   9.239 +                        }
   9.240 +
   9.241 +                        if ( shadow_enabled &&
   9.242 +                             page_is_page_table(gpage) &&
   9.243 +                             ! page_out_of_sync(gpage) )
   9.244 +                        {
   9.245 +                            printk("Audit %d: [l1mfn=%p, i=%x] Illegal RW of "
   9.246 +                                   "page table gmfn=%p\n",
   9.247 +                                   d->id, l1mfn, i, gmfn);
   9.248 +                            errors++;
   9.249 +                        }
   9.250 +                    }
   9.251 +
   9.252 +                    if ( page_get_owner(gpage) != d )
   9.253 +                    {
   9.254 +                        printk("Audit %d: [l1mfn=%p,i=%x] Skip foreign page "
   9.255 +                               "dom=%p (id=%d) mfn=%p c=%08x t=%08x\n",
   9.256 +                               d->id, l1mfn, i,
   9.257 +                               page_get_owner(gpage),
   9.258 +                               page_get_owner(gpage)->id,
   9.259 +                               gmfn,
   9.260 +                               gpage->count_info,
   9.261 +                               gpage->u.inuse.type_info);
   9.262 +                        continue;
   9.263 +                    }
   9.264 +                }
   9.265 +
   9.266 +                adjust(gpage, (pt[i] & _PAGE_RW) ? 1 : 0);
   9.267 +            }
   9.268 +        }
   9.269 +
   9.270 +        unmap_domain_mem(pt);
   9.271 +    }
   9.272 +
   9.273 +    void adjust_shadow_tables()
   9.274 +    {
   9.275 +        struct shadow_status *a;
   9.276 +        unsigned long smfn, gmfn;
   9.277 +        struct pfn_info *page;
   9.278 +        int i;
   9.279 +
   9.280 +        for ( i = 0; i < shadow_ht_buckets; i++ )
   9.281 +        {
   9.282 +            a = &d->arch.shadow_ht[i];
   9.283 +            while ( a && a->gpfn_and_flags )
   9.284 +            {
   9.285 +                gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
   9.286 +                smfn = a->smfn;
   9.287 +                page = &frame_table[smfn];
   9.288 +
   9.289 +                adjust(pfn_to_page(gmfn), 0);
   9.290 +
   9.291 +                switch ( a->gpfn_and_flags & PGT_type_mask ) {
   9.292 +                case PGT_snapshot:
   9.293 +                    break;
   9.294 +                case PGT_l1_shadow:
   9.295 +                case PGT_hl2_shadow:
   9.296 +                    adjust_l1_page(smfn);
   9.297 +                    if ( page->u.inuse.type_info & PGT_pinned )
   9.298 +                        adjust(page, 0);
   9.299 +                    break;
   9.300 +                case PGT_l2_shadow:
   9.301 +                    adjust_l2_page(smfn, 0);
   9.302 +                    if ( page->u.inuse.type_info & PGT_pinned )
   9.303 +                        adjust(page, 0);
   9.304 +                    break;
   9.305 +                default:
   9.306 +                    BUG();
   9.307 +                    break;
   9.308 +                }
   9.309 +
   9.310 +                a = a->next;
   9.311 +            }
   9.312 +        }
   9.313 +    }
   9.314 +
   9.315 +    void adjust_oos_list()
   9.316 +    {
   9.317 +        struct out_of_sync_entry *oos;
   9.318 +
   9.319 +        if ( (oos = d->arch.out_of_sync) )
   9.320 +            ASSERT(shadow_enabled);
   9.321 +
   9.322 +        while ( oos )
   9.323 +        {
   9.324 +            adjust(pfn_to_page(oos->gmfn), 0);
   9.325 +
   9.326 +            // Only use entries that have low bits clear...
   9.327 +            //
   9.328 +            if ( !(oos->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
   9.329 +                adjust(pfn_to_page(oos->writable_pl1e >> PAGE_SHIFT), 0);
   9.330 +
   9.331 +            oos = oos->next;
   9.332 +            oos_count++;
   9.333 +        }
   9.334 +    }
   9.335 +
   9.336 +    void adjust_for_pgtbase()
   9.337 +    {
   9.338 +        struct exec_domain *ed;
   9.339 +
   9.340 +        for_each_exec_domain(d, ed)
   9.341 +            {
   9.342 +                if ( !shadow_enabled )
   9.343 +                {
   9.344 +                    if ( pagetable_val(ed->arch.guest_table) )
   9.345 +                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
   9.346 +                                            >> PAGE_SHIFT], 1);
   9.347 +                }
   9.348 +                else
   9.349 +                {
   9.350 +                    if ( pagetable_val(ed->arch.guest_table) )
   9.351 +                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
   9.352 +                                            >> PAGE_SHIFT], 0);
   9.353 +                    if ( pagetable_val(ed->arch.shadow_table) )
   9.354 +                        adjust(&frame_table[pagetable_val(ed->arch.shadow_table)
   9.355 +                                            >> PAGE_SHIFT], 0);
   9.356 +                }
   9.357 +            }
   9.358 +    }
   9.359 +
   9.360 +    void adjust_guest_pages()
   9.361 +    {
   9.362 +        struct list_head *list_ent = d->page_list.next;
   9.363 +        struct pfn_info *page;
   9.364 +        unsigned long mfn;
   9.365 +
   9.366 +        while ( list_ent != &d->page_list )
   9.367 +        {
   9.368 +            u32 page_type;
   9.369 +
   9.370 +            page = list_entry(list_ent, struct pfn_info, list);
   9.371 +            mfn = page_to_pfn(page);
   9.372 +            page_type = page->u.inuse.type_info & PGT_type_mask;
   9.373 +
   9.374 +            if ( page_get_owner(page) != d )
   9.375 +                BUG();
   9.376 +
   9.377 +            page_count++;
   9.378 +
   9.379 +            switch ( page_type )
   9.380 +            {
   9.381 +            case PGT_l2_page_table:
   9.382 +                l2++;
   9.383 +
   9.384 +                if ( noisy )
   9.385 +                {
   9.386 +                    if ( shadow_enabled )
   9.387 +                    {
   9.388 +                        printk("Audit %d: found an L2 guest page "
   9.389 +                               "mfn=%p t=%08x c=%08x while in shadow mode\n",
   9.390 +                               mfn, page->u.inuse.type_info, page->count_info);
   9.391 +                        errors++;
   9.392 +                    }
   9.393 +
   9.394 +                    if ( (page->u.inuse.type_info & PGT_validated) !=
   9.395 +                         PGT_validated )
   9.396 +                    {
   9.397 +                        printk("Audit %d: L2 mfn=%p not validated %p\n",
   9.398 +                               d->id, mfn, page->u.inuse.type_info);
   9.399 +                        errors++;
   9.400 +                    }
   9.401 +
   9.402 +                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
   9.403 +                    {
   9.404 +                        printk("Audit %d: L2 mfn=%p not pinned t=%p\n",
   9.405 +                               d->id, mfn, page->u.inuse.type_info);
   9.406 +                        errors++;
   9.407 +                    }
   9.408 +                }
   9.409 +
   9.410 +                if ( page->u.inuse.type_info & PGT_pinned )
   9.411 +                    adjust(page, 1);
   9.412 +
   9.413 +                if ( page->u.inuse.type_info & PGT_validated )
   9.414 +                    adjust_l2_page(mfn, 1);
   9.415 +
   9.416 +                break;
   9.417 +
   9.418 +            case PGT_l1_page_table:
   9.419 +                l1++;
   9.420 +
   9.421 +                if ( noisy )
   9.422 +                {
   9.423 +                    if ( shadow_enabled )
   9.424 +                    {
   9.425 +                        printk("found an L1 guest page mfn=%p t=%08x c=%08x while in shadow mode\n",
   9.426 +                               mfn, page->u.inuse.type_info, page->count_info);
   9.427 +                        errors++;
   9.428 +                    }
   9.429 +
   9.430 +                    if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated )
   9.431 +                    {
   9.432 +                        printk("Audit %d: L1 not validated mfn=%p t=%p\n",
   9.433 +                               d->id, mfn, page->u.inuse.type_info);
   9.434 +                        errors++;
   9.435 +                    }
   9.436 +
   9.437 +                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
   9.438 +                    {
   9.439 +                        if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
   9.440 +                        {
   9.441 +                            printk("Audit %d: L1 mfn=%p not pinned t=%p\n",
   9.442 +                                   d->id, mfn, page->u.inuse.type_info);
   9.443 +                            errors++;
   9.444 +                        }
   9.445 +                    }
   9.446 +                }
   9.447 +                
   9.448 +                if ( page->u.inuse.type_info & PGT_pinned )
   9.449 +                    adjust(page, 1);
   9.450 +
   9.451 +                if ( page->u.inuse.type_info & PGT_validated )
   9.452 +                    adjust_l1_page(mfn);
   9.453 +
   9.454 +                break;
   9.455 +
   9.456 +            case PGT_gdt_page:
   9.457 +                ASSERT( !page_out_of_sync(page) );
   9.458 +                adjust(page, 1);
   9.459 +                break;
   9.460 +
   9.461 +            case PGT_ldt_page:
   9.462 +                ASSERT( !page_out_of_sync(page) );
   9.463 +                adjust(page, 1);
   9.464 +                break;
   9.465 +
   9.466 +            case PGT_writable_page:
   9.467 +                if ( shadow_enabled )
   9.468 +                {
   9.469 +                    // In shadow mode, writable pages can get pinned by
   9.470 +                    // paravirtualized guests that think they are pinning
   9.471 +                    // their L1s and/or L2s.
   9.472 +                    //
   9.473 +                    if ( page->u.inuse.type_info & PGT_pinned )
   9.474 +                        adjust(page, 1);
   9.475 +                }
   9.476 +            }
   9.477 +
   9.478 +            list_ent = page->list.next;
   9.479 +        }
   9.480 +    }
   9.481 +
   9.482 +    adjust_for_pgtbase();
   9.483 +
   9.484 +    adjust_guest_pages();
   9.485 +
   9.486 +    if ( shadow_enabled )
   9.487 +    {
   9.488 +        adjust_oos_list();
   9.489 +        adjust_shadow_tables();
   9.490 +    }
   9.491 +
   9.492 +    return errors;
   9.493 +}
   9.494 +
   9.495 +
   9.496 +#ifndef NDEBUG
   9.497 +
   9.498 +void _audit_domain(struct domain *d, int flags, const char *file, int line)
   9.499 +{
   9.500 +    void scan_for_pfn_in_mfn(struct domain *d, unsigned long xmfn,
   9.501 +                             unsigned long mfn)
   9.502 +    {
   9.503 +        struct pfn_info *page = &frame_table[mfn];
   9.504 +        unsigned long *pt = map_domain_mem(mfn);
   9.505 +        int i;
   9.506 +
   9.507 +        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   9.508 +        {
   9.509 +            if ( (pt[i] & _PAGE_PRESENT) && ((pt[i] >> PAGE_SHIFT) == xmfn) )
   9.510 +                printk("     found dom=%d mfn=%p t=%x c=%x pt[i=%x]=%p\n",
   9.511 +                       d->id, mfn, page->u.inuse.type_info,
   9.512 +                       page->count_info, i, pt[i]);
   9.513 +        }
   9.514 +
   9.515 +        unmap_domain_mem(pt);           
   9.516 +    }
   9.517 +
   9.518 +    void scan_for_pfn(struct domain *d, unsigned long xmfn)
   9.519 +    {
   9.520 +        if ( !shadow_mode_enabled(d) )
   9.521 +        {
   9.522 +            struct list_head *list_ent = d->page_list.next;
   9.523 +            struct pfn_info *page;
   9.524 +
   9.525 +            while ( list_ent != &d->page_list )
   9.526 +            {
   9.527 +                page = list_entry(list_ent, struct pfn_info, list);
   9.528 +
   9.529 +                switch ( page->u.inuse.type_info & PGT_type_mask )
   9.530 +                {
   9.531 +                case PGT_l1_page_table:
   9.532 +                case PGT_l2_page_table:
   9.533 +                    scan_for_pfn_in_mfn(d, xmfn, page_to_pfn(page));
   9.534 +                    break;
   9.535 +                default:
   9.536 +                    break;
   9.537 +                }
   9.538 +
   9.539 +                list_ent = page->list.next;
   9.540 +            }
   9.541 +        }
   9.542 +        else
   9.543 +        {
   9.544 +            struct shadow_status *a;
   9.545 +            int i;
   9.546 +            
   9.547 +            for ( i = 0; i < shadow_ht_buckets; i++ )
   9.548 +            {
   9.549 +                a = &d->arch.shadow_ht[i];
   9.550 +                while ( a && a->gpfn_and_flags )
   9.551 +                {
   9.552 +                    switch ( a->gpfn_and_flags & PGT_type_mask )
   9.553 +                    {
   9.554 +                    case PGT_l1_shadow:
   9.555 +                    case PGT_l2_shadow:
   9.556 +                    case PGT_hl2_shadow:
   9.557 +                        scan_for_pfn_in_mfn(d, xmfn, a->smfn);
   9.558 +                        break;
   9.559 +                    case PGT_snapshot:
   9.560 +                        break;
   9.561 +                    default:
   9.562 +                        BUG();
   9.563 +                        break;
   9.564 +                    }
   9.565 +                    a = a->next;
   9.566 +                }
   9.567 +            }
   9.568 +        }
   9.569 +    }
   9.570 +
   9.571 +    void scan_for_pfn_remote(unsigned long xmfn)
   9.572 +    {
   9.573 +        struct domain *e;
   9.574 +        for_each_domain ( e )
   9.575 +            scan_for_pfn( e, xmfn );
   9.576 +    } 
   9.577 +
   9.578 +    unsigned long mfn;
   9.579 +    struct list_head *list_ent;
   9.580 +    struct pfn_info *page;
   9.581 +    int errors = 0;
   9.582 +
   9.583 +    if ( d != current->domain )
   9.584 +        domain_pause(d);
   9.585 +    synchronise_pagetables(~0UL);
   9.586 +
   9.587 +    // Maybe we should just be using BIGLOCK?
   9.588 +    //
   9.589 +    if ( !(flags & AUDIT_ALREADY_LOCKED) )
   9.590 +        shadow_lock(d);
   9.591 +
   9.592 +    spin_lock(&d->page_alloc_lock);
   9.593 +
   9.594 +    /* PHASE 0 */
   9.595 +
   9.596 +    list_ent = d->page_list.next;
   9.597 +    while ( list_ent != &d->page_list )
   9.598 +    {
   9.599 +        u32 page_type;
   9.600 +
   9.601 +        page = list_entry(list_ent, struct pfn_info, list);
   9.602 +        mfn = page_to_pfn(page);
   9.603 +        page_type = page->u.inuse.type_info & PGT_type_mask;
   9.604 +
   9.605 +        if ( page_get_owner(page) != d )
   9.606 +            BUG();
   9.607 +
   9.608 +        if ( (page->u.inuse.type_info & PGT_count_mask) >
   9.609 +             (page->count_info & PGC_count_mask) )
   9.610 +        {
   9.611 +            printk("taf(%08x) > caf(%08x) mfn=%p\n",
   9.612 +                   page->u.inuse.type_info, page->count_info, mfn);
   9.613 +            errors++;
   9.614 +        }
   9.615 +
   9.616 +        if ( shadow_mode_enabled(d) &&
   9.617 +             (page_type == PGT_writable_page) &&
   9.618 +             !(page->u.inuse.type_info & PGT_validated) )
   9.619 +        {
   9.620 +            printk("shadow mode writable page not validated mfn=%p t=%08x c=%08x\n",
   9.621 +                   mfn, page->u.inuse.type_info, page->count_info);
   9.622 +            errors++;
   9.623 +        }
   9.624 + 
   9.625 +#if 0   /* SYSV shared memory pages plus writeable files. */
   9.626 +        if ( page_type == PGT_writable_page && 
   9.627 +             (page->u.inuse.type_info & PGT_count_mask) > 1 )
   9.628 +        {
   9.629 +            printk("writeable page with type count >1: mfn=%lx t=%x c=%x\n",
   9.630 +                  mfn,
   9.631 +                  page->u.inuse.type_info,
   9.632 +                  page->count_info );
   9.633 +            errors++;
   9.634 +            scan_for_pfn_remote(mfn);
   9.635 +        }
   9.636 +#endif
   9.637 +
   9.638 +        if ( page_type == PGT_none && 
   9.639 +             (page->u.inuse.type_info & PGT_count_mask) > 0 )
   9.640 +        {
   9.641 +            printk("normal page with type count >0: mfn=%lx t=%x c=%x\n",
   9.642 +                  mfn,
   9.643 +                  page->u.inuse.type_info,
   9.644 +                  page->count_info );
   9.645 +            errors++;
   9.646 +        }
   9.647 +
   9.648 +        if ( page_out_of_sync(page) )
   9.649 +        {
   9.650 +            if ( !page_is_page_table(page) )
   9.651 +            {
   9.652 +                printk("out of sync page mfn=%p is not a page table\n", mfn);
   9.653 +                errors++;
   9.654 +            }
   9.655 +            unsigned long pfn = __mfn_to_gpfn(d, mfn);
   9.656 +            if ( !__shadow_status(d, pfn, PGT_snapshot) )
   9.657 +            {
   9.658 +                printk("out of sync page mfn=%p doesn't have a snapshot\n");
   9.659 +                errors++;
   9.660 +            }
   9.661 +            if ( page_type != PGT_writable_page )
   9.662 +            {
   9.663 +                printk("out of sync page mfn=%p has strange type t=%08x c=%08x\n",
   9.664 +                       mfn, page->u.inuse.type_info, page->count_info);
   9.665 +                errors++;
   9.666 +            }
   9.667 +        }
   9.668 +
   9.669 +        /* Use tlbflush_timestamp to store original type_info. */
   9.670 +        page->tlbflush_timestamp = page->u.inuse.type_info;
   9.671 +
   9.672 +        list_ent = page->list.next;
   9.673 +    }
   9.674 +
   9.675 +    /* PHASE 1 */
   9.676 +    io_mappings = lowmem_mappings = 0;
   9.677 +
   9.678 +    errors += audit_adjust_pgtables(d, -1, 1);
   9.679 +
   9.680 +    if ( !(flags & AUDIT_QUIET) &&
   9.681 +         ((io_mappings > 0) || (lowmem_mappings > 0)) )
   9.682 +        printk("Audit %d: Found %d lowmem mappings and %d io mappings\n",
   9.683 +               d->id, lowmem_mappings, io_mappings);
   9.684 +
   9.685 +    /* PHASE 2 */
   9.686 +
   9.687 +    list_ent = d->page_list.next;
   9.688 +    while ( list_ent != &d->page_list )
   9.689 +    {
   9.690 +        page = list_entry(list_ent, struct pfn_info, list);
   9.691 +        mfn = page_to_pfn(page);
   9.692 +
   9.693 +        switch ( page->u.inuse.type_info & PGT_type_mask)
   9.694 +        {
   9.695 +        case PGT_l1_page_table:
   9.696 +        case PGT_l2_page_table:
   9.697 +            if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
   9.698 +            {
   9.699 +                printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
   9.700 +                       d->id, page->u.inuse.type_info, 
   9.701 +                       page->tlbflush_timestamp,
   9.702 +                       page->count_info, mfn);
   9.703 +                errors++;
   9.704 +                scan_for_pfn_remote(mfn);
   9.705 +            }
   9.706 +            break;
   9.707 +        case PGT_none:
   9.708 +        case PGT_writable_page:
   9.709 +        case PGT_gdt_page:
   9.710 +        case PGT_ldt_page:
   9.711 +            if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
   9.712 +            {
   9.713 +                printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
   9.714 +                       d->id, page->u.inuse.type_info, 
   9.715 +                       page->tlbflush_timestamp,
   9.716 +                       page->count_info, mfn);
   9.717 +                errors++;
   9.718 +            }
   9.719 +            break;
   9.720 +        default:
   9.721 +            BUG(); // XXX fix me...
   9.722 +        }
   9.723 +        
   9.724 +        if ( (page->count_info & PGC_count_mask) != 1 )
   9.725 +        {
   9.726 +            printk("Audit %d: gen count!=1 (c=%x) t=%x ot=%x mfn=%lx\n",
   9.727 +                   d->id,
   9.728 +                   page->count_info,
   9.729 +                   page->u.inuse.type_info, 
   9.730 +                   page->tlbflush_timestamp, mfn );
   9.731 +            errors++;
   9.732 +            scan_for_pfn_remote(mfn);
   9.733 +        }
   9.734 +
   9.735 +        list_ent = page->list.next;
   9.736 +    }
   9.737 +
   9.738 +    if ( shadow_mode_enabled(d) )
   9.739 +    {
   9.740 +        struct shadow_status *a;
   9.741 +        struct pfn_info *page;
   9.742 +        u32 page_type;
   9.743 +        int i;
   9.744 +
   9.745 +        for ( i = 0; i < shadow_ht_buckets; i++ )
   9.746 +        {
   9.747 +            a = &d->arch.shadow_ht[i];
   9.748 +            while ( a && a->gpfn_and_flags )
   9.749 +            {
   9.750 +                page = pfn_to_page(a->smfn);
   9.751 +                page_type = a->gpfn_and_flags & PGT_type_mask;
   9.752 +
   9.753 +                switch ( page_type ) {
   9.754 +                case PGT_snapshot:
   9.755 +                    // XXX -- what should we check here?
   9.756 +                    break;
   9.757 +                case PGT_l1_shadow:
   9.758 +                case PGT_l2_shadow:
   9.759 +                    if ( ((page->u.inuse.type_info & PGT_type_mask) != page_type ) ||
   9.760 +                         (page->count_info != 0) )
   9.761 +                    {
   9.762 +                        printk("Audit %d: shadow page counts wrong mfn=%p t=%x c=%x\n",
   9.763 +                               d->id, page_to_pfn(page),
   9.764 +                               page->u.inuse.type_info,
   9.765 +                               page->count_info);
   9.766 +                        errors++;
   9.767 +                    }
   9.768 +                    break;
   9.769 +
   9.770 +                case PGT_hl2_shadow: // haven't thought about this case yet.
   9.771 +                default:
   9.772 +                    BUG();
   9.773 +                    break;
   9.774 +                }
   9.775 +
   9.776 +                a = a->next;
   9.777 +            }
   9.778 +        }
   9.779 +    }
   9.780 +
   9.781 +    /* PHASE 3 */
   9.782 +    ctot = ttot = page_count = l1 = l2 = oos_count = 0;
   9.783 +
   9.784 +    audit_adjust_pgtables(d, 1, 0);
   9.785 +
   9.786 +#if 0
   9.787 +    // This covers our sins of trashing the tlbflush_timestamps...
   9.788 +    //
   9.789 +    local_flush_tlb();
   9.790 +#endif
   9.791 +
   9.792 +    spin_unlock(&d->page_alloc_lock);
   9.793 +
   9.794 +    if ( !(flags & AUDIT_QUIET) )
   9.795 +        printk("Audit dom%d (%s:%d) Done. "
   9.796 +               "pages=%d oos=%d l1=%d l2=%d ctot=%d ttot=%d\n",
   9.797 +               d->id, file, line, page_count, oos_count, l1, l2, ctot, ttot );
   9.798 +
   9.799 +    if ( !(flags & AUDIT_ALREADY_LOCKED) )
   9.800 +        shadow_unlock(d);
   9.801 +
   9.802 +    if ( d != current->domain )
   9.803 +        domain_unpause(d);
   9.804 +
   9.805 +    if ( errors && !(flags & AUDIT_ERRORS_OK) )
   9.806 +        BUG();
   9.807 +}
   9.808 +
   9.809 +void audit_domains(void)
   9.810 +{
   9.811 +    struct domain *d;
   9.812 +    for_each_domain ( d )
   9.813 +        audit_domain(d);
   9.814 +}
   9.815 +
   9.816 +void audit_domains_key(unsigned char key)
   9.817 +{
   9.818 +    audit_domains();
   9.819 +}
   9.820 +#endif