ia64/xen-unstable

changeset 4184:8f0c441d9e25

bitkeeper revision 1.1249 (42387345w4RJ2RC5ifMnONI8xxsgWA)

manual merge with michaels latest

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author rneugeba@wyvis.research.intel-research.net
date Wed Mar 16 17:56:21 2005 +0000 (2005-03-16)
parents cf77cd925ef3 d617bb4a2907
children 105bb57fc414
files .rootkeys linux-2.6.10-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6.10-xen-sparse/arch/xen/i386/mm/init.c linux-2.6.10-xen-sparse/arch/xen/i386/mm/pgtable.c xen/arch/x86/audit.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c xen/arch/x86/shadow.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/x86_32/domain_page.c xen/common/dom_mem_ops.c xen/common/keyhandler.c xen/common/page_alloc.c xen/common/schedule.c xen/drivers/char/console.c xen/include/asm-x86/domain.h xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/shadow.h xen/include/asm-x86/x86_32/page.h xen/include/xen/domain.h xen/include/xen/perfc_defn.h ~/audit.c
line diff
     1.1 --- a/.rootkeys	Wed Mar 16 17:31:35 2005 +0000
     1.2 +++ b/.rootkeys	Wed Mar 16 17:56:21 2005 +0000
     1.3 @@ -951,7 +951,7 @@ 3ddb79bcZbRBzT3elFWSX7u6NtMagQ xen/arch/
     1.4  3ddb79bcBQF85CfLS4i1WGZ4oLLaCA xen/arch/x86/Rules.mk
     1.5  3e5636e5FAYZ5_vQnmgwFJfSdmO5Mw xen/arch/x86/acpi.c
     1.6  3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/x86/apic.c
     1.7 -42386d3bKw0QftYe-cDL6_4WiATRTw xen/arch/x86/audit.c
     1.8 +42360b3244-Q6BpEKhR_A1YtG1wPNQ xen/arch/x86/audit.c
     1.9  3ddb79c4yGZ7_22QAFFwPzqP4NSHwA xen/arch/x86/boot/mkelf32.c
    1.10  3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen/arch/x86/boot/x86_32.S
    1.11  40e42bdbNu4MjI750THP_8J1S-Sa0g xen/arch/x86/boot/x86_64.S
    1.12 @@ -1239,3 +1239,4 @@ 3eb3c87fPL2T_zBb0bHlbZY-ACEKRw xen/tools
    1.13  3eb3c87fmKYTC5GCh_rydFakZp9ayw xen/tools/figlet/README
    1.14  3eb3c87fdQKQ5OBGbM-KjZfi9Us4ng xen/tools/figlet/figlet.c
    1.15  3eb3c87fS7DNbg0i6yhFs28UIqAK5g xen/tools/figlet/xen.flf
    1.16 +42386d3bKw0QftYe-cDL6_4WiATRTw ~/audit.c
     5.1 --- a/xen/arch/x86/audit.c	Wed Mar 16 17:31:35 2005 +0000
     5.2 +++ b/xen/arch/x86/audit.c	Wed Mar 16 17:56:21 2005 +0000
     5.3 @@ -25,25 +25,17 @@
     5.4  #include <xen/kernel.h>
     5.5  #include <xen/lib.h>
     5.6  #include <xen/mm.h>
     5.7 -//#include <xen/sched.h>
     5.8 -//#include <xen/errno.h>
     5.9  #include <xen/perfc.h>
    5.10 -//#include <xen/irq.h>
    5.11 -//#include <xen/softirq.h>
    5.12  #include <asm/shadow.h>
    5.13  #include <asm/page.h>
    5.14  #include <asm/flushtlb.h>
    5.15 -//#include <asm/io.h>
    5.16 -//#include <asm/uaccess.h>
    5.17 -//#include <asm/domain_page.h>
    5.18 -//#include <asm/ldt.h>
    5.19  
    5.20  // XXX SMP bug -- these should not be statics...
    5.21  //
    5.22  static int ttot=0, ctot=0, io_mappings=0, lowmem_mappings=0;
    5.23  static int l1, l2, oos_count, page_count;
    5.24  
    5.25 -#define FILE_AND_LINE 1
    5.26 +#define FILE_AND_LINE 0
    5.27  
    5.28  #if FILE_AND_LINE
    5.29  #define adjust(_p, _a) _adjust((_p), (_a), __FILE__, __LINE__)
    5.30 @@ -73,7 +65,7 @@ int audit_adjust_pgtables(struct domain 
    5.31              if ( page_get_owner(page) == NULL )
    5.32              {
    5.33                  APRINTK("adjust(mfn=%p, dir=%d, adjtype=%d) owner=NULL",
    5.34 -                        page_to_pfn(page), dir, adjtype, file, line);
    5.35 +                        page_to_pfn(page), dir, adjtype);
    5.36                  errors++;
    5.37              }
    5.38  
     8.1 --- a/xen/arch/x86/mm.c	Wed Mar 16 17:31:35 2005 +0000
     8.2 +++ b/xen/arch/x86/mm.c	Wed Mar 16 17:56:21 2005 +0000
     8.3 @@ -1859,8 +1859,6 @@ int do_mmu_update(
     8.4                  break;
     8.5  #endif /* __x86_64__ */
     8.6              default:
     8.7 -                printk("do_mmu_update writable update: ma=%p val=%p\n",
     8.8 -                       req.ptr, req.val);
     8.9                  if ( likely(get_page_type(page, PGT_writable_page)) )
    8.10                  {
    8.11                      if ( shadow_mode_enabled(d) )
    8.12 @@ -2004,6 +2002,8 @@ int do_update_va_mapping(unsigned long v
    8.13      }
    8.14      else
    8.15      {
    8.16 +        unsigned long l1mfn;
    8.17 +
    8.18          if ( unlikely(percpu_info[cpu].foreign &&
    8.19                        (shadow_mode_translate(d) ||
    8.20                         shadow_mode_translate(percpu_info[cpu].foreign))) )
    8.21 @@ -2024,6 +2024,29 @@ int do_update_va_mapping(unsigned long v
    8.22          //
    8.23          __shadow_sync_va(ed, va);
    8.24  
    8.25 +#if 1 /* keep check_pagetables() happy */
    8.26 +        /*
    8.27 +         * However, the above doesn't guarantee that there's no snapshot of
    8.28 +         * the L1 table in question; it just says that the relevant L2 and L1
    8.29 +         * entries for VA are in-sync.  There might still be a snapshot.
    8.30 +         *
    8.31 +         * The checking code in _check_pagetables() assumes that no one will
    8.32 +         * mutate the shadow of a page that has a snapshot.  It's actually
    8.33 +         * OK to not sync this page, but it seems simpler to:
    8.34 +         * 1) keep all code paths the same, and
    8.35 +         * 2) maintain the invariant for _check_pagetables(), rather than try
    8.36 +         *    to teach it about this boundary case.
    8.37 +         * So we flush this L1 page, if it's out of sync.
    8.38 +         */
    8.39 +        l1mfn = (l2_pgentry_val(linear_l2_table(ed)[l2_table_offset(va)]) >>
    8.40 +                 PAGE_SHIFT);
    8.41 +        if ( mfn_out_of_sync(l1mfn) )
    8.42 +        {
    8.43 +            perfc_incrc(extra_va_update_sync);
    8.44 +            __shadow_sync_mfn(d, l1mfn);
    8.45 +        }
    8.46 +#endif /* keep check_pagetables() happy */
    8.47 +
    8.48          if ( unlikely(__put_user(val, &l1_pgentry_val(
    8.49                                       linear_pg_table[l1_linear_offset(va)]))) )
    8.50              err = -EINVAL;
     9.1 --- a/xen/arch/x86/shadow.c	Wed Mar 16 17:31:35 2005 +0000
     9.2 +++ b/xen/arch/x86/shadow.c	Wed Mar 16 17:56:21 2005 +0000
     9.3 @@ -60,7 +60,7 @@ shadow_promote(struct domain *d, unsigne
     9.4          __shadow_sync_mfn(d, gmfn);
     9.5      }
     9.6  
     9.7 -    if ( unlikely(mfn_is_page_table(gmfn)) )
     9.8 +    if ( unlikely(page_is_page_table(page)) )
     9.9      {
    9.10          min_type = shadow_max_pgtable_type(d, gpfn) + PGT_l1_shadow;
    9.11          max_type = new_type;
    9.12 @@ -70,7 +70,7 @@ shadow_promote(struct domain *d, unsigne
    9.13          min_type = PGT_l1_shadow;
    9.14          max_type = PGT_l1_shadow;
    9.15      }
    9.16 -    FSH_LOG("shadow_promote gpfn=%p gmfn=%p nt=%p min=%p max=%p\n",
    9.17 +    FSH_LOG("shadow_promote gpfn=%p gmfn=%p nt=%p min=%p max=%p",
    9.18              gmfn, gmfn, new_type, min_type, max_type);
    9.19  
    9.20      if ( min_type <= max_type )
    9.21 @@ -99,7 +99,7 @@ shadow_promote(struct domain *d, unsigne
    9.22      if ( get_page_type(page, PGT_base_page_table) )
    9.23      {
    9.24          put_page_type(page);
    9.25 -        set_bit(_PGC_page_table, &frame_table[gmfn].count_info);
    9.26 +        set_bit(_PGC_page_table, &page->count_info);
    9.27      }
    9.28      else
    9.29      {
    9.30 @@ -299,8 +299,6 @@ free_shadow_hl2_table(struct domain *d, 
    9.31  static void inline
    9.32  free_shadow_l2_table(struct domain *d, unsigned long smfn)
    9.33  {
    9.34 -    printk("free_shadow_l2_table(smfn=%p)\n", smfn);
    9.35 -
    9.36      unsigned long *pl2e = map_domain_mem(smfn << PAGE_SHIFT);
    9.37      int i, external = shadow_mode_external(d);
    9.38  
    9.39 @@ -388,7 +386,12 @@ release_out_of_sync_entry(struct domain 
    9.40      // Only use entries that have low bits clear...
    9.41      //
    9.42      if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
    9.43 +    {
    9.44          put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
    9.45 +        entry->writable_pl1e = -2;
    9.46 +    }
    9.47 +    else
    9.48 +        ASSERT( entry->writable_pl1e == -1 );
    9.49  
    9.50      // Free the snapshot
    9.51      //
    9.52 @@ -399,38 +402,63 @@ static void remove_out_of_sync_entries(s
    9.53  {
    9.54      struct out_of_sync_entry *entry = d->arch.out_of_sync;
    9.55      struct out_of_sync_entry **prev = &d->arch.out_of_sync;
    9.56 +    struct out_of_sync_entry *found = NULL;
    9.57  
    9.58 +    // NB: Be careful not to call something that manipulates this list
    9.59 +    //     while walking it.  Collect the results into a separate list
    9.60 +    //     first, then walk that list.
    9.61 +    //
    9.62      while ( entry )
    9.63      {
    9.64          if ( entry->gmfn == gmfn )
    9.65          {
    9.66 -            release_out_of_sync_entry(d, entry);
    9.67 -            *prev = entry = entry->next;
    9.68 +            // remove from out of sync list
    9.69 +            *prev = entry->next;
    9.70 +
    9.71 +            // add to found list
    9.72 +            entry->next = found;
    9.73 +            found = entry;
    9.74 +
    9.75 +            entry = *prev;
    9.76              continue;
    9.77          }
    9.78          prev = &entry->next;
    9.79          entry = entry->next;
    9.80      }
    9.81 +
    9.82 +    prev = NULL;
    9.83 +    entry = found;
    9.84 +    while ( entry )
    9.85 +    {
    9.86 +        release_out_of_sync_entry(d, entry);
    9.87 +
    9.88 +        prev = &entry->next;
    9.89 +        entry = entry->next;
    9.90 +    }
    9.91 +
    9.92 +    // Add found list to free list
    9.93 +    if ( prev )
    9.94 +    {
    9.95 +        *prev = d->arch.out_of_sync_free;
    9.96 +        d->arch.out_of_sync_free = found;
    9.97 +    }
    9.98  }
    9.99  
   9.100  static void free_out_of_sync_state(struct domain *d)
   9.101  {
   9.102      struct out_of_sync_entry *entry;
   9.103 -    struct out_of_sync_entry **tail = NULL;
   9.104  
   9.105 -    // Add the list of out-of-sync entries to the free list of entries.
   9.106 -    // Not the smartest code.  But it works.
   9.107 +    // NB: Be careful not to call something that manipulates this list
   9.108 +    //     while walking it.  Remove one item at a time, and always
   9.109 +    //     restart from start of list.
   9.110      //
   9.111 -    for ( entry = d->arch.out_of_sync; entry; entry = entry->next)
   9.112 +    while ( (entry = d->arch.out_of_sync) )
   9.113      {
   9.114 +        d->arch.out_of_sync = entry->next;
   9.115          release_out_of_sync_entry(d, entry);
   9.116 -        tail = &entry->next;
   9.117 -    }
   9.118 -    if ( tail )
   9.119 -    {
   9.120 -        *tail = d->arch.out_of_sync_free;
   9.121 -        d->arch.out_of_sync_free = d->arch.out_of_sync;
   9.122 -        d->arch.out_of_sync = NULL;
   9.123 +
   9.124 +        entry->next = d->arch.out_of_sync_free;
   9.125 +        d->arch.out_of_sync_free = entry;
   9.126      }
   9.127  }
   9.128  
   9.129 @@ -1103,7 +1131,8 @@ static unsigned long shadow_l2_table(
   9.130              // shadow_mode_translate (but not external) sl2 tables hold a
   9.131              // ref to their hl2.
   9.132              //
   9.133 -            get_shadow_ref(hl2mfn);
   9.134 +            if ( !get_shadow_ref(hl2mfn) )
   9.135 +                BUG();
   9.136              
   9.137              spl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
   9.138                  mk_l2_pgentry((hl2mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   9.139 @@ -1178,7 +1207,8 @@ void shadow_map_l1_into_current_l2(unsig
   9.140      ASSERT( !(old_sl2e & _PAGE_PRESENT) );
   9.141  #endif
   9.142  
   9.143 -    get_shadow_ref(sl1mfn);
   9.144 +    if ( !get_shadow_ref(sl1mfn) )
   9.145 +        BUG();
   9.146      l2pde_general(d, &gl2e, &sl2e, sl1mfn);
   9.147      __guest_set_l2e(ed, va, gl2e);
   9.148      __shadow_set_l2e(ed, va, sl2e);
   9.149 @@ -1195,9 +1225,13 @@ void shadow_map_l1_into_current_l2(unsig
   9.150  
   9.151          for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   9.152          {
   9.153 -            l1pte_propagate_from_guest(d, gpl1e[i], &spl1e[i]);
   9.154 -            if ( spl1e[i] & _PAGE_PRESENT )
   9.155 -                get_page_from_l1e(mk_l1_pgentry(spl1e[i]), d);
   9.156 +            unsigned long sl1e;
   9.157 +
   9.158 +            l1pte_propagate_from_guest(d, gpl1e[i], &sl1e);
   9.159 +            if ( (sl1e & _PAGE_PRESENT) &&
   9.160 +                 !shadow_get_page_from_l1e(mk_l1_pgentry(sl1e), d) )
   9.161 +                sl1e = 0;
   9.162 +            spl1e[i] = sl1e;
   9.163          }
   9.164      }
   9.165  }
   9.166 @@ -1293,7 +1327,8 @@ shadow_make_snapshot(
   9.167          BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
   9.168      }
   9.169  
   9.170 -    get_shadow_ref(smfn);
   9.171 +    if ( !get_shadow_ref(smfn) )
   9.172 +        BUG();
   9.173  
   9.174      original = map_domain_mem(gmfn << PAGE_SHIFT);
   9.175      snapshot = map_domain_mem(smfn << PAGE_SHIFT);
   9.176 @@ -1336,13 +1371,7 @@ shadow_mark_mfn_out_of_sync(struct exec_
   9.177  
   9.178      ASSERT(spin_is_locked(&d->arch.shadow_lock));
   9.179      ASSERT(pfn_is_ram(mfn));
   9.180 -    //ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page);
   9.181 -    if (!((page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page))
   9.182 -    {
   9.183 -        printk("assertion failed: gpfn=%p gmfn=%p t=%p\n",
   9.184 -               gpfn, mfn, page->u.inuse.type_info);
   9.185 -        BUG();
   9.186 -    }
   9.187 +    ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page);
   9.188  
   9.189      FSH_LOG("mark_mfn_out_of_sync(gpfn=%p, mfn=%p) c=%p t=%p",
   9.190              gpfn, mfn, page->count_info, page->u.inuse.type_info);
   9.191 @@ -1373,7 +1402,7 @@ shadow_mark_mfn_out_of_sync(struct exec_
   9.192      return entry;
   9.193  }
   9.194  
   9.195 -void shadow_mark_out_of_sync(
   9.196 +void shadow_mark_va_out_of_sync(
   9.197      struct exec_domain *ed, unsigned long gpfn, unsigned long mfn, unsigned long va)
   9.198  {
   9.199      struct out_of_sync_entry *entry =
   9.200 @@ -1402,7 +1431,8 @@ void shadow_mark_out_of_sync(
   9.201      // Increment shadow's page count to represent the reference
   9.202      // inherent in entry->writable_pl1e
   9.203      //
   9.204 -    get_shadow_ref(sl2e >> PAGE_SHIFT);
   9.205 +    if ( !get_shadow_ref(sl2e >> PAGE_SHIFT) )
   9.206 +        BUG();
   9.207  
   9.208      FSH_LOG("mark_out_of_sync(va=%p -> writable_pl1e=%p)",
   9.209              va, entry->writable_pl1e);
   9.210 @@ -1502,8 +1532,9 @@ static u32 remove_all_write_access_in_pt
   9.211              unsigned long old = pt[i];
   9.212              unsigned long new = old & ~_PAGE_RW;
   9.213  
   9.214 -            if ( is_l1_shadow )
   9.215 -                get_page_from_l1e(mk_l1_pgentry(new), d);
   9.216 +            if ( is_l1_shadow &&
   9.217 +                 !shadow_get_page_from_l1e(mk_l1_pgentry(new), d) )
   9.218 +                BUG();
   9.219  
   9.220              count++;
   9.221              pt[i] = new;
   9.222 @@ -1512,7 +1543,7 @@ static u32 remove_all_write_access_in_pt
   9.223                  put_page_from_l1e(mk_l1_pgentry(old), d);
   9.224  
   9.225              FSH_LOG("removed write access to mfn=%p in smfn=%p entry %x "
   9.226 -                    "is_l1_shadow=%d\n",
   9.227 +                    "is_l1_shadow=%d",
   9.228                      readonly_mfn, pt_mfn, i, is_l1_shadow);
   9.229          }
   9.230      }
   9.231 @@ -1633,6 +1664,7 @@ static int resync_all(struct domain *d, 
   9.232      unsigned long smfn;
   9.233      unsigned long *guest, *shadow, *snapshot;
   9.234      int need_flush = 0, external = shadow_mode_external(d);
   9.235 +    int unshadow;
   9.236  
   9.237      ASSERT(spin_is_locked(&d->arch.shadow_lock));
   9.238  
   9.239 @@ -1653,6 +1685,7 @@ static int resync_all(struct domain *d, 
   9.240          guest    = map_domain_mem(entry->gmfn         << PAGE_SHIFT);
   9.241          snapshot = map_domain_mem(entry->snapshot_mfn << PAGE_SHIFT);
   9.242          shadow   = map_domain_mem(smfn                << PAGE_SHIFT);
   9.243 +        unshadow = 0;
   9.244  
   9.245          switch ( stype ) {
   9.246          case PGT_l1_shadow:
   9.247 @@ -1686,6 +1719,16 @@ static int resync_all(struct domain *d, 
   9.248                      //
   9.249                      // snapshot[i] = new_pde;
   9.250                  }
   9.251 +
   9.252 +                // XXX - This hack works for linux guests.
   9.253 +                //       Need a better solution long term.
   9.254 +                if ( !(new_pde & _PAGE_PRESENT) && unlikely(new_pde != 0) &&
   9.255 +                     (frame_table[smfn].u.inuse.type_info & PGT_pinned) &&
   9.256 +                     !unshadow )
   9.257 +                {
   9.258 +                    perfc_incrc(unshadow_l2_count);
   9.259 +                    unshadow = 1;
   9.260 +                }
   9.261              }
   9.262              break;
   9.263          default:
   9.264 @@ -1696,6 +1739,9 @@ static int resync_all(struct domain *d, 
   9.265          unmap_domain_mem(shadow);
   9.266          unmap_domain_mem(snapshot);
   9.267          unmap_domain_mem(guest);
   9.268 +
   9.269 +        if ( unlikely(unshadow) )
   9.270 +            shadow_unpin(smfn);
   9.271      }
   9.272  
   9.273      return need_flush;
   9.274 @@ -1724,7 +1770,9 @@ void __shadow_sync_all(struct domain *d)
   9.275          unsigned long opte = *ppte;
   9.276          unsigned long npte = opte & ~_PAGE_RW;
   9.277  
   9.278 -        get_page_from_l1e(mk_l1_pgentry(npte), d);
   9.279 +        if ( (npte & _PAGE_PRESENT) &&
   9.280 +             !shadow_get_page_from_l1e(mk_l1_pgentry(npte), d) )
   9.281 +            BUG();
   9.282          *ppte = npte;
   9.283          put_page_from_l1e(mk_l1_pgentry(opte), d);
   9.284  
   9.285 @@ -1884,7 +1932,7 @@ void __update_pagetables(struct exec_dom
   9.286      struct domain *d = ed->domain;
   9.287      unsigned long gmfn = pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT;
   9.288      unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
   9.289 -    unsigned long smfn, hl2mfn;
   9.290 +    unsigned long smfn, hl2mfn, old_smfn;
   9.291  
   9.292      int max_mode = ( shadow_mode_external(d) ? SHM_external
   9.293                       : shadow_mode_translate(d) ? SHM_translate
   9.294 @@ -1909,10 +1957,12 @@ void __update_pagetables(struct exec_dom
   9.295       */
   9.296      if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
   9.297          smfn = shadow_l2_table(d, gpfn, gmfn);
   9.298 -    get_shadow_ref(smfn);
   9.299 -    if ( pagetable_val(ed->arch.shadow_table) )
   9.300 -        put_shadow_ref(pagetable_val(ed->arch.shadow_table) >> PAGE_SHIFT);
   9.301 +    if ( !get_shadow_ref(smfn) )
   9.302 +        BUG();
   9.303 +    old_smfn = pagetable_val(ed->arch.shadow_table) >> PAGE_SHIFT;
   9.304      ed->arch.shadow_table = mk_pagetable(smfn << PAGE_SHIFT);
   9.305 +    if ( old_smfn )
   9.306 +        put_shadow_ref(old_smfn);
   9.307  
   9.308      SH_VVLOG("0: __update_pagetables(gmfn=%p, smfn=%p)", gmfn, smfn);
   9.309  
   9.310 @@ -1937,7 +1987,8 @@ void __update_pagetables(struct exec_dom
   9.311      {
   9.312          if ( unlikely(!(hl2mfn = __shadow_status(d, gpfn, PGT_hl2_shadow))) )
   9.313              hl2mfn = shadow_hl2_table(d, gpfn, gmfn, smfn);
   9.314 -        get_shadow_ref(hl2mfn);
   9.315 +        if ( !get_shadow_ref(hl2mfn) )
   9.316 +            BUG();
   9.317  
   9.318          if ( ed->arch.hl2_vtable )
   9.319              unmap_domain_mem(ed->arch.hl2_vtable);
    14.1 --- a/xen/common/keyhandler.c	Wed Mar 16 17:31:35 2005 +0000
    14.2 +++ b/xen/common/keyhandler.c	Wed Mar 16 17:56:21 2005 +0000
    14.3 @@ -188,7 +188,7 @@ void initialize_keytable(void)
    14.4      register_keyhandler(
    14.5          'o', audit_domains_key,  "audit domains >0 EXPERIMENTAL");
    14.6      register_keyhandler(
    14.7 -        'T', debugtrace_key, "dump debugtrace");
    14.8 +        'T', debugtrace_key, "toggle debugtrace to console/buffer");
    14.9  #endif
   14.10  
   14.11  #ifdef PERF_COUNTERS
    17.1 --- a/xen/drivers/char/console.c	Wed Mar 16 17:31:35 2005 +0000
    17.2 +++ b/xen/drivers/char/console.c	Wed Mar 16 17:56:21 2005 +0000
    17.3 @@ -511,7 +511,10 @@ void debugtrace_dump(void)
    17.4  
    17.5      spin_lock_irqsave(&debugtrace_lock, flags);
    17.6  
    17.7 +    printk("debugtrace_dump() starting\n");
    17.8 +
    17.9      /* Print oldest portion of the ring. */
   17.10 +    ASSERT(debugtrace_buf[debugtrace_bytes - 1] == 0);
   17.11      serial_puts(sercon_handle, &debugtrace_buf[debugtrace_prd]);
   17.12  
   17.13      /* Print youngest portion of the ring. */
   17.14 @@ -520,6 +523,8 @@ void debugtrace_dump(void)
   17.15  
   17.16      memset(debugtrace_buf, '\0', debugtrace_bytes);
   17.17  
   17.18 +    printk("debugtrace_dump() finished\n");
   17.19 +
   17.20      spin_unlock_irqrestore(&debugtrace_lock, flags);
   17.21  
   17.22      watchdog_on = _watchdog_on;
   17.23 @@ -538,6 +543,8 @@ void debugtrace_printk(const char *fmt, 
   17.24  
   17.25      spin_lock_irqsave(&debugtrace_lock, flags);
   17.26  
   17.27 +    ASSERT(debugtrace_buf[debugtrace_bytes - 1] == 0);
   17.28 +
   17.29      va_start(args, fmt);
   17.30      (void)vsnprintf(buf, sizeof(buf), fmt, args);
   17.31      va_end(args);
   17.32 @@ -563,20 +570,24 @@ void debugtrace_printk(const char *fmt, 
   17.33  static int __init debugtrace_init(void)
   17.34  {
   17.35      int order;
   17.36 -    unsigned int kbytes;
   17.37 +    unsigned int kbytes, bytes;
   17.38  
   17.39      /* Round size down to next power of two. */
   17.40      while ( (kbytes = (debugtrace_kilobytes & (debugtrace_kilobytes-1))) != 0 )
   17.41          debugtrace_kilobytes = kbytes;
   17.42  
   17.43 -    debugtrace_bytes = debugtrace_kilobytes << 10;
   17.44 -    if ( debugtrace_bytes == 0 )
   17.45 +    bytes = debugtrace_kilobytes << 10;
   17.46 +    if ( bytes == 0 )
   17.47          return 0;
   17.48  
   17.49 -    order = get_order(debugtrace_bytes);
   17.50 +    order = get_order(bytes);
   17.51      debugtrace_buf = (unsigned char *)alloc_xenheap_pages(order);
   17.52      ASSERT(debugtrace_buf != NULL);
   17.53  
   17.54 +    memset(debugtrace_buf, '\0', bytes);
   17.55 +
   17.56 +    debugtrace_bytes = bytes;
   17.57 +
   17.58      memset(debugtrace_buf, '\0', debugtrace_bytes);
   17.59  
   17.60      return 0;
    21.1 --- a/xen/include/asm-x86/shadow.h	Wed Mar 16 17:31:35 2005 +0000
    21.2 +++ b/xen/include/asm-x86/shadow.h	Wed Mar 16 17:56:21 2005 +0000
    21.3 @@ -68,6 +68,33 @@ static inline unsigned long __shadow_sta
    21.4  
    21.5  extern void vmx_shadow_clear_state(struct domain *);
    21.6  
    21.7 +static inline int page_is_page_table(struct pfn_info *page)
    21.8 +{
    21.9 +    return page->count_info & PGC_page_table;
   21.10 +}
   21.11 +
   21.12 +static inline int mfn_is_page_table(unsigned long mfn)
   21.13 +{
   21.14 +    if ( !pfn_is_ram(mfn) )
   21.15 +        return 0;
   21.16 +
   21.17 +    return frame_table[mfn].count_info & PGC_page_table;
   21.18 +}
   21.19 +
   21.20 +static inline int page_out_of_sync(struct pfn_info *page)
   21.21 +{
   21.22 +    return page->count_info & PGC_out_of_sync;
   21.23 +}
   21.24 +
   21.25 +static inline int mfn_out_of_sync(unsigned long mfn)
   21.26 +{
   21.27 +    if ( !pfn_is_ram(mfn) )
   21.28 +        return 0;
   21.29 +
   21.30 +    return frame_table[mfn].count_info & PGC_out_of_sync;
   21.31 +}
   21.32 +
   21.33 +
   21.34  /************************************************************************/
   21.35  
   21.36  static void inline
   21.37 @@ -215,6 +242,39 @@ extern int shadow_status_noswap;
   21.38  
   21.39  /************************************************************************/
   21.40  
   21.41 +static inline int
   21.42 +shadow_get_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
   21.43 +{
   21.44 +    int res = get_page_from_l1e(l1e, d);
   21.45 +    unsigned long mfn;
   21.46 +    struct domain *owner;
   21.47 +
   21.48 +    ASSERT( l1_pgentry_val(l1e) & _PAGE_PRESENT );
   21.49 +
   21.50 +    if ( unlikely(!res) && IS_PRIV(d) && !shadow_mode_translate(d) &&
   21.51 +         !(l1_pgentry_val(l1e) & L1_DISALLOW_MASK) &&
   21.52 +         (mfn = l1_pgentry_to_pfn(l1e)) &&
   21.53 +         pfn_is_ram(mfn) &&
   21.54 +         (owner = page_get_owner(pfn_to_page(l1_pgentry_to_pfn(l1e)))) &&
   21.55 +         (d != owner) )
   21.56 +    {
   21.57 +        res = get_page_from_l1e(l1e, owner);
   21.58 +        printk("tried to map mfn %p from domain %d into shadow page tables "
   21.59 +               "of domain %d; %s\n",
   21.60 +               mfn, owner->id, d->id, res ? "success" : "failed");
   21.61 +    }
   21.62 +
   21.63 +    if ( unlikely(!res) )
   21.64 +    {
   21.65 +        perfc_incrc(shadow_get_page_fail);
   21.66 +        FSH_LOG("%s failed to get ref l1e=%p\n", l1_pgentry_val(l1e));
   21.67 +    }
   21.68 +
   21.69 +    return res;
   21.70 +}
   21.71 +
   21.72 +/************************************************************************/
   21.73 +
   21.74  static inline void
   21.75  __shadow_get_l2e(
   21.76      struct exec_domain *ed, unsigned long va, unsigned long *psl2e)
   21.77 @@ -256,8 +316,9 @@ static inline void
   21.78          //
   21.79          if ( (old_hl2e ^ new_hl2e) & (PAGE_MASK | _PAGE_PRESENT) )
   21.80          {
   21.81 -            if ( new_hl2e & _PAGE_PRESENT )
   21.82 -                get_page_from_l1e(mk_l1_pgentry(new_hl2e), ed->domain);
   21.83 +            if ( (new_hl2e & _PAGE_PRESENT) &&
   21.84 +                 !shadow_get_page_from_l1e(mk_l1_pgentry(new_hl2e), ed->domain) )
   21.85 +                new_hl2e = 0;
   21.86              if ( old_hl2e & _PAGE_PRESENT )
   21.87                  put_page_from_l1e(mk_l1_pgentry(old_hl2e), ed->domain);
   21.88          }
   21.89 @@ -314,8 +375,9 @@ put_shadow_ref(unsigned long smfn)
   21.90  
   21.91      if ( unlikely(x == 0) )
   21.92      {
   21.93 -        printk("put_shadow_ref underflow, gmfn=%p smfn=%p\n",
   21.94 -               frame_table[smfn].u.inuse.type_info & PGT_mfn_mask, smfn);
   21.95 +        printk("put_shadow_ref underflow, oc=%p t=%p\n",
   21.96 +               frame_table[smfn].count_info,
   21.97 +               frame_table[smfn].u.inuse.type_info);
   21.98          BUG();
   21.99      }
  21.100  
  21.101 @@ -335,7 +397,8 @@ shadow_pin(unsigned long smfn)
  21.102      ASSERT( !(frame_table[smfn].u.inuse.type_info & PGT_pinned) );
  21.103  
  21.104      frame_table[smfn].u.inuse.type_info |= PGT_pinned;
  21.105 -    get_shadow_ref(smfn);
  21.106 +    if ( !get_shadow_ref(smfn) )
  21.107 +        BUG();
  21.108  }
  21.109  
  21.110  static inline void
  21.111 @@ -403,7 +466,7 @@ static inline int mark_dirty(struct doma
  21.112  
  21.113  /************************************************************************/
  21.114  
  21.115 -extern void shadow_mark_out_of_sync(
  21.116 +extern void shadow_mark_va_out_of_sync(
  21.117      struct exec_domain *ed, unsigned long gpfn, unsigned long mfn,
  21.118      unsigned long va);
  21.119  
  21.120 @@ -436,7 +499,7 @@ static inline void l1pte_write_fault(
  21.121          __mark_dirty(d, mfn);
  21.122  
  21.123      if ( mfn_is_page_table(mfn) )
  21.124 -        shadow_mark_out_of_sync(ed, gpfn, mfn, va);
  21.125 +        shadow_mark_va_out_of_sync(ed, gpfn, mfn, va);
  21.126  
  21.127      *gpte_p = gpte;
  21.128      *spte_p = spte;
  21.129 @@ -474,26 +537,20 @@ static inline void l1pte_read_fault(
  21.130  static inline void l1pte_propagate_from_guest(
  21.131      struct domain *d, unsigned long gpte, unsigned long *spte_p)
  21.132  { 
  21.133 -    unsigned long spte = *spte_p;
  21.134      unsigned long pfn = gpte >> PAGE_SHIFT;
  21.135      unsigned long mfn = __gpfn_to_mfn(d, pfn);
  21.136 +    unsigned long spte;
  21.137  
  21.138  #if SHADOW_VERBOSE_DEBUG
  21.139 -    unsigned long old_spte = spte;
  21.140 +    unsigned long old_spte = *spte_p;
  21.141  #endif
  21.142  
  21.143 -    if ( unlikely(!mfn) )
  21.144 -    {
  21.145 -        // likely an MMIO address space mapping...
  21.146 -        //
  21.147 -        *spte_p = 0;
  21.148 -        return;
  21.149 -    }
  21.150 +    spte = 0;
  21.151  
  21.152 -    spte = 0;
  21.153 -    if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
  21.154 -         (_PAGE_PRESENT|_PAGE_ACCESSED) ) {
  21.155 -        
  21.156 +    if ( mfn &&
  21.157 +         ((gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
  21.158 +          (_PAGE_PRESENT|_PAGE_ACCESSED)) ) {
  21.159 +
  21.160          spte = (mfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
  21.161          
  21.162          if ( shadow_mode_log_dirty(d) ||
  21.163 @@ -506,7 +563,7 @@ static inline void l1pte_propagate_from_
  21.164  
  21.165  #if SHADOW_VERBOSE_DEBUG
  21.166      if ( old_spte || spte || gpte )
  21.167 -        debugtrace_printk("l1pte_propagate_from_guest: gpte=0x%p, old spte=0x%p, new spte=0x%p\n", gpte, old_spte, spte);
  21.168 +        SH_VLOG("l1pte_propagate_from_guest: gpte=0x%p, old spte=0x%p, new spte=0x%p", gpte, old_spte, spte);
  21.169  #endif
  21.170  
  21.171      *spte_p = spte;
  21.172 @@ -541,9 +598,10 @@ static inline void l2pde_general(
  21.173  static inline void l2pde_propagate_from_guest(
  21.174      struct domain *d, unsigned long *gpde_p, unsigned long *spde_p)
  21.175  {
  21.176 -    unsigned long gpde = *gpde_p, sl1mfn;
  21.177 +    unsigned long gpde = *gpde_p, sl1mfn = 0;
  21.178  
  21.179 -    sl1mfn =  __shadow_status(d, gpde >> PAGE_SHIFT, PGT_l1_shadow);
  21.180 +    if ( gpde & _PAGE_PRESENT )
  21.181 +        sl1mfn =  __shadow_status(d, gpde >> PAGE_SHIFT, PGT_l1_shadow);
  21.182      l2pde_general(d, gpde_p, spde_p, sl1mfn);
  21.183  }
  21.184      
  21.185 @@ -559,26 +617,31 @@ validate_pte_change(
  21.186  {
  21.187      unsigned long old_spte, new_spte;
  21.188  
  21.189 -    perfc_incrc(validate_pte_change);
  21.190 +    perfc_incrc(validate_pte_calls);
  21.191  
  21.192  #if 0
  21.193      FSH_LOG("validate_pte(old=%p new=%p)\n", old_pte, new_pte);
  21.194  #endif
  21.195  
  21.196      old_spte = *shadow_pte_p;
  21.197 -    l1pte_propagate_from_guest(d, new_pte, shadow_pte_p);
  21.198 -    new_spte = *shadow_pte_p;
  21.199 +    l1pte_propagate_from_guest(d, new_pte, &new_spte);
  21.200  
  21.201      // only do the ref counting if something important changed.
  21.202      //
  21.203 -    if ( (old_spte ^ new_spte) & (PAGE_MASK | _PAGE_RW | _PAGE_PRESENT) )
  21.204 +    if ( ((old_spte | new_spte) & _PAGE_PRESENT ) &&
  21.205 +         ((old_spte ^ new_spte) & (PAGE_MASK | _PAGE_RW | _PAGE_PRESENT)) )
  21.206      {
  21.207 -        if ( new_spte & _PAGE_PRESENT )
  21.208 -            get_page_from_l1e(mk_l1_pgentry(new_spte), d);
  21.209 +        perfc_incrc(validate_pte_changes);
  21.210 +
  21.211 +        if ( (new_spte & _PAGE_PRESENT) &&
  21.212 +             !shadow_get_page_from_l1e(mk_l1_pgentry(new_spte), d) )
  21.213 +            new_spte = 0;
  21.214          if ( old_spte & _PAGE_PRESENT )
  21.215              put_page_from_l1e(mk_l1_pgentry(old_spte), d);
  21.216      }
  21.217  
  21.218 +    *shadow_pte_p = new_spte;
  21.219 +
  21.220      // paranoia rules!
  21.221      return 1;
  21.222  }
  21.223 @@ -588,27 +651,35 @@ validate_pte_change(
  21.224  static int inline
  21.225  validate_pde_change(
  21.226      struct domain *d,
  21.227 -    unsigned long new_pde,
  21.228 +    unsigned long new_gpde,
  21.229      unsigned long *shadow_pde_p)
  21.230  {
  21.231 -    unsigned long old_spde = *shadow_pde_p;
  21.232 -    unsigned long new_spde;
  21.233 +    unsigned long old_spde, new_spde;
  21.234 +
  21.235 +    perfc_incrc(validate_pde_calls);
  21.236  
  21.237 -    perfc_incrc(validate_pde_change);
  21.238 +    old_spde = *shadow_pde_p;
  21.239 +    l2pde_propagate_from_guest(d, &new_gpde, &new_spde);
  21.240 +
  21.241 +    // XXX Shouldn't we propagate the new_gpde to the guest?
  21.242 +    // And then mark the guest's L2 page as dirty?
  21.243  
  21.244 -    l2pde_propagate_from_guest(d, &new_pde, shadow_pde_p);
  21.245 -    new_spde = *shadow_pde_p;
  21.246 -
  21.247 -    // only do the ref counting if something important changed.
  21.248 +    // Only do the ref counting if something important changed.
  21.249      //
  21.250 -    if ( (old_spde ^ new_spde) & (PAGE_MASK | _PAGE_PRESENT) )
  21.251 +    if ( ((old_spde | new_spde) & _PAGE_PRESENT) &&
  21.252 +         ((old_spde ^ new_spde) & (PAGE_MASK | _PAGE_PRESENT)) )
  21.253      {
  21.254 -        if ( new_spde & _PAGE_PRESENT )
  21.255 -            get_shadow_ref(new_spde >> PAGE_SHIFT);
  21.256 +        perfc_incrc(validate_pde_changes);
  21.257 +
  21.258 +        if ( (new_spde & _PAGE_PRESENT) &&
  21.259 +             !get_shadow_ref(new_spde >> PAGE_SHIFT) )
  21.260 +            BUG();
  21.261          if ( old_spde & _PAGE_PRESENT )
  21.262              put_shadow_ref(old_spde >> PAGE_SHIFT);
  21.263      }
  21.264  
  21.265 +    *shadow_pde_p = new_spde;
  21.266 +
  21.267      // paranoia rules!
  21.268      return 1;
  21.269  }
  21.270 @@ -676,6 +747,9 @@ static void shadow_audit(struct domain *
  21.271          BUG();
  21.272      }
  21.273  #endif
  21.274 +
  21.275 +    // XXX ought to add some code to audit the out-of-sync entries, too.
  21.276 +    //
  21.277  }
  21.278  #else
  21.279  #define shadow_audit(p, print) ((void)0)
  21.280 @@ -696,16 +770,12 @@ static inline struct shadow_status *hash
  21.281   *      It returns the shadow's mfn, or zero if it doesn't exist.
  21.282   */
  21.283  
  21.284 -static inline unsigned long __shadow_status(
  21.285 +static inline unsigned long ___shadow_status(
  21.286      struct domain *d, unsigned long gpfn, unsigned long stype)
  21.287  {
  21.288      struct shadow_status *p, *x, *head;
  21.289      unsigned long key = gpfn | stype;
  21.290  
  21.291 -    ASSERT(spin_is_locked(&d->arch.shadow_lock));
  21.292 -    ASSERT(gpfn == (gpfn & PGT_mfn_mask));
  21.293 -    ASSERT(stype && !(stype & ~PGT_type_mask));
  21.294 -
  21.295      perfc_incrc(shadow_status_calls);
  21.296  
  21.297      x = head = hash_bucket(d, gpfn);
  21.298 @@ -755,6 +825,27 @@ static inline unsigned long __shadow_sta
  21.299      return 0;
  21.300  }
  21.301  
  21.302 +static inline unsigned long __shadow_status(
  21.303 +    struct domain *d, unsigned long gpfn, unsigned long stype)
  21.304 +{
  21.305 +    unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
  21.306 +
  21.307 +    ASSERT(spin_is_locked(&d->arch.shadow_lock));
  21.308 +    ASSERT(gpfn == (gpfn & PGT_mfn_mask));
  21.309 +    ASSERT(stype && !(stype & ~PGT_type_mask));
  21.310 +
  21.311 +    if ( gmfn && ((stype != PGT_snapshot)
  21.312 +                  ? !mfn_is_page_table(gmfn)
  21.313 +                  : !mfn_out_of_sync(gmfn)) )
  21.314 +    {
  21.315 +        perfc_incrc(shadow_status_shortcut);
  21.316 +        ASSERT(___shadow_status(d, gpfn, stype) == 0);
  21.317 +        return 0;
  21.318 +    }
  21.319 +
  21.320 +    return ___shadow_status(d, gmfn, stype);
  21.321 +}
  21.322 +
  21.323  /*
  21.324   * Not clear if pull-to-front is worth while for this or not,
  21.325   * as it generally needs to scan the entire bucket anyway.
  21.326 @@ -955,6 +1046,7 @@ static inline void set_shadow_status(
  21.327      {
  21.328          if ( x->gpfn_and_flags == key )
  21.329          {
  21.330 +            BUG();
  21.331              x->smfn = smfn;
  21.332              goto done;
  21.333          }
  21.334 @@ -1059,7 +1151,8 @@ shadow_set_l1e(unsigned long va, unsigne
  21.335              if ( sl1mfn )
  21.336              {
  21.337                  perfc_incrc(shadow_set_l1e_unlinked);
  21.338 -                get_shadow_ref(sl1mfn);
  21.339 +                if ( !get_shadow_ref(sl1mfn) )
  21.340 +                    BUG();
  21.341                  l2pde_general(d, &gpde, &sl2e, sl1mfn);
  21.342                  __guest_set_l2e(ed, va, gpde);
  21.343                  __shadow_set_l2e(ed, va, sl2e);
  21.344 @@ -1074,17 +1167,19 @@ shadow_set_l1e(unsigned long va, unsigne
  21.345      }
  21.346  
  21.347      old_spte = l1_pgentry_val(shadow_linear_pg_table[l1_linear_offset(va)]);
  21.348 -    shadow_linear_pg_table[l1_linear_offset(va)] = mk_l1_pgentry(new_spte);
  21.349  
  21.350      // only do the ref counting if something important changed.
  21.351      //
  21.352      if ( (old_spte ^ new_spte) & (PAGE_MASK | _PAGE_RW | _PAGE_PRESENT) )
  21.353      {
  21.354 -        if ( new_spte & _PAGE_PRESENT )
  21.355 -            get_page_from_l1e(mk_l1_pgentry(new_spte), d);
  21.356 +        if ( (new_spte & _PAGE_PRESENT) &&
  21.357 +             !shadow_get_page_from_l1e(mk_l1_pgentry(new_spte), d) )
  21.358 +            new_spte = 0;
  21.359          if ( old_spte & _PAGE_PRESENT )
  21.360              put_page_from_l1e(mk_l1_pgentry(old_spte), d);
  21.361      }
  21.362 +
  21.363 +    shadow_linear_pg_table[l1_linear_offset(va)] = mk_l1_pgentry(new_spte);
  21.364  }
  21.365  
  21.366  /************************************************************************/
    24.1 --- a/xen/include/xen/perfc_defn.h	Wed Mar 16 17:31:35 2005 +0000
    24.2 +++ b/xen/include/xen/perfc_defn.h	Wed Mar 16 17:56:21 2005 +0000
    24.3 @@ -55,12 +55,14 @@ PERFCOUNTER_CPU( shadow_set_l1e_force_ma
    24.4  PERFCOUNTER_CPU( shadow_set_l1e_unlinked,  "shadow_set_l1e found unlinked l1" )
    24.5  PERFCOUNTER_CPU( shadow_set_l1e_fail,      "shadow_set_l1e failed (no sl1)" )
    24.6  PERFCOUNTER_CPU( shadow_invlpg_faults,     "shadow_invlpg's get_user faulted")
    24.7 +PERFCOUNTER_CPU( unshadow_l2_count,        "unpinned L2 count")
    24.8  
    24.9  
   24.10  /* STATUS counters do not reset when 'P' is hit */
   24.11  PERFSTATUS( snapshot_pages,  "current # fshadow snapshot pages" )
   24.12  
   24.13 -PERFCOUNTER_CPU(shadow_status_calls,    "calls to __shadow_status" )
   24.14 +PERFCOUNTER_CPU(shadow_status_shortcut, "fastpath miss on shadow cache")
   24.15 +PERFCOUNTER_CPU(shadow_status_calls,    "calls to ___shadow_status" )
   24.16  PERFCOUNTER_CPU(shadow_status_miss,     "missed shadow cache" )
   24.17  PERFCOUNTER_CPU(shadow_status_hit_head, "hits on head of bucket" )
   24.18  
   24.19 @@ -68,6 +70,7 @@ PERFCOUNTER_CPU(shadow_sync_all,        
   24.20  PERFCOUNTER_CPU(shadow_make_snapshot,              "snapshots created")
   24.21  PERFCOUNTER_CPU(shadow_mark_mfn_out_of_sync_calls, "calls to shadow_mk_out_of_sync")
   24.22  PERFCOUNTER_CPU(shadow_out_of_sync_calls,          "calls to shadow_out_of_sync")
   24.23 +PERFCOUNTER_CPU(extra_va_update_sync,              "extra syncs for bug in chk_pgtb")
   24.24  PERFCOUNTER_CPU(snapshot_entry_matches_calls,      "calls to ss_entry_matches")
   24.25  PERFCOUNTER_CPU(snapshot_entry_matches_true,       "ss_entry_matches returns true")
   24.26  
   24.27 @@ -76,5 +79,7 @@ PERFCOUNTER_CPU(shadow_fault_bail_pde_no
   24.28  PERFCOUNTER_CPU(shadow_fault_bail_pte_not_present, "sf bailed due to pte not present")
   24.29  PERFCOUNTER_CPU(shadow_fault_bail_ro_mapping,      "sf bailed due to a ro mapping")
   24.30  PERFCOUNTER_CPU(shadow_fault_fixed,                "sf fixed the pgfault")
   24.31 -PERFCOUNTER_CPU(validate_pte_change,               "calls to validate_pte_change")
   24.32 -PERFCOUNTER_CPU(validate_pde_change,               "calls to validate_pde_change")
   24.33 +PERFCOUNTER_CPU(validate_pte_calls,                "calls to validate_pte_change")
   24.34 +PERFCOUNTER_CPU(validate_pte_changes,              "validate_pte makes changes")
   24.35 +PERFCOUNTER_CPU(validate_pde_calls,                "calls to validate_pde_change")
   24.36 +PERFCOUNTER_CPU(validate_pde_changes,              "validate_pde makes changes")
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/~/audit.c	Wed Mar 16 17:56:21 2005 +0000
    25.3 @@ -0,0 +1,817 @@
    25.4 +/******************************************************************************
    25.5 + * arch/x86/audit.c
    25.6 + * 
    25.7 + * Copyright (c) 2002-2005 K A Fraser
    25.8 + * Copyright (c) 2004 Christian Limpach
    25.9 + * Copyright (c) 2005 Michael A Fetterman
   25.10 + * 
   25.11 + * This program is free software; you can redistribute it and/or modify
   25.12 + * it under the terms of the GNU General Public License as published by
   25.13 + * the Free Software Foundation; either version 2 of the License, or
   25.14 + * (at your option) any later version.
   25.15 + * 
   25.16 + * This program is distributed in the hope that it will be useful,
   25.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   25.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   25.19 + * GNU General Public License for more details.
   25.20 + * 
   25.21 + * You should have received a copy of the GNU General Public License
   25.22 + * along with this program; if not, write to the Free Software
   25.23 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   25.24 + */
   25.25 +
   25.26 +#include <xen/config.h>
   25.27 +#include <xen/init.h>
   25.28 +#include <xen/kernel.h>
   25.29 +#include <xen/lib.h>
   25.30 +#include <xen/mm.h>
   25.31 +//#include <xen/sched.h>
   25.32 +//#include <xen/errno.h>
   25.33 +#include <xen/perfc.h>
   25.34 +//#include <xen/irq.h>
   25.35 +//#include <xen/softirq.h>
   25.36 +#include <asm/shadow.h>
   25.37 +#include <asm/page.h>
   25.38 +#include <asm/flushtlb.h>
   25.39 +//#include <asm/io.h>
   25.40 +//#include <asm/uaccess.h>
   25.41 +//#include <asm/domain_page.h>
   25.42 +//#include <asm/ldt.h>
   25.43 +
   25.44 +// XXX SMP bug -- these should not be statics...
   25.45 +//
   25.46 +static int ttot=0, ctot=0, io_mappings=0, lowmem_mappings=0;
   25.47 +static int l1, l2, oos_count, page_count;
   25.48 +
   25.49 +#define FILE_AND_LINE 1
   25.50 +
   25.51 +#if FILE_AND_LINE
   25.52 +#define adjust(_p, _a) _adjust((_p), (_a), __FILE__, __LINE__)
   25.53 +#define ADJUST_EXTRA_ARGS ,const char *file, int line
   25.54 +#define APRINTK(_f, _a...) printk(_f " %s:%d\n", ## _a, file, line)
   25.55 +#else
   25.56 +#define adjust _adjust
   25.57 +#define ADJUST_EXTRA_ARGS
   25.58 +#define APRINTK(_f, _a...) printk(_f "\n", ##_a)
   25.59 +#endif
   25.60 +
   25.61 +int audit_adjust_pgtables(struct domain *d, int dir, int noisy)
   25.62 +{
   25.63 +    int errors = 0;
   25.64 +    int shadow_enabled = shadow_mode_enabled(d) ? 1 : 0;
   25.65 +
   25.66 +    void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
   25.67 +    {
   25.68 +        if ( adjtype )
   25.69 +        {
   25.70 +            // adjust the type count
   25.71 +            //
   25.72 +            int tcount = page->u.inuse.type_info & PGT_count_mask;
   25.73 +            tcount += dir;
   25.74 +            ttot++;
   25.75 +
   25.76 +            if ( page_get_owner(page) == NULL )
   25.77 +            {
   25.78 +                APRINTK("adjust(mfn=%p, dir=%d, adjtype=%d) owner=NULL",
   25.79 +                        page_to_pfn(page), dir, adjtype, file, line);
   25.80 +                errors++;
   25.81 +            }
   25.82 +
   25.83 +            if ( tcount < 0 )
   25.84 +            {
   25.85 +                APRINTK("Audit %d: type count went below zero mfn=%x t=%x ot=%x",
   25.86 +                        d->id, page-frame_table,
   25.87 +                        page->u.inuse.type_info,
   25.88 +                        page->tlbflush_timestamp);
   25.89 +                errors++;
   25.90 +            }
   25.91 +            else if ( (tcount & ~PGT_count_mask) != 0 )
   25.92 +            {
   25.93 +                APRINTK("Audit %d: type count overflowed mfn=%x t=%x ot=%x",
   25.94 +                        d->id, page-frame_table,
   25.95 +                        page->u.inuse.type_info,
   25.96 +                        page->tlbflush_timestamp);
   25.97 +                errors++;
   25.98 +            }
   25.99 +            else
  25.100 +                page->u.inuse.type_info += dir;
  25.101 +        }
  25.102 +
  25.103 +        // adjust the general count
  25.104 +        //
  25.105 +        int count = page->count_info & PGC_count_mask;
  25.106 +        count += dir;
  25.107 +        ctot++;
  25.108 +
  25.109 +        if ( count < 0 )
  25.110 +        {
  25.111 +            APRINTK("Audit %d: general count went below zero pfn=%x t=%x ot=%x",
  25.112 +                    d->id, page-frame_table,
  25.113 +                    page->u.inuse.type_info,
  25.114 +                    page->tlbflush_timestamp);
  25.115 +            errors++;
  25.116 +        }
  25.117 +        else if ( (count & ~PGT_count_mask) != 0 )
  25.118 +        {
  25.119 +            APRINTK("Audit %d: general count overflowed pfn=%x t=%x ot=%x",
  25.120 +                    d->id, page-frame_table,
  25.121 +                    page->u.inuse.type_info,
  25.122 +                    page->tlbflush_timestamp);
  25.123 +            errors++;
  25.124 +        }
  25.125 +        else
  25.126 +            page->count_info += dir;
  25.127 +    }
  25.128 +
  25.129 +    void adjust_l2_page(unsigned long mfn, int adjtype)
  25.130 +    {
  25.131 +        unsigned long *pt = map_domain_mem(mfn << PAGE_SHIFT);
  25.132 +        int i, limit;
  25.133 +
  25.134 +        if ( shadow_mode_external(d) )
  25.135 +            limit = L2_PAGETABLE_ENTRIES;
  25.136 +        else
  25.137 +            limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
  25.138 +
  25.139 +        for ( i = 0; i < limit; i++ )
  25.140 +        {
  25.141 +            if ( pt[i] & _PAGE_PRESENT )
  25.142 +            {
  25.143 +                unsigned long l1mfn = pt[i] >> PAGE_SHIFT;
  25.144 +                struct pfn_info *l1page = pfn_to_page(l1mfn);
  25.145 +
  25.146 +                if ( noisy )
  25.147 +                {
  25.148 +                    if ( shadow_enabled )
  25.149 +                    {
  25.150 +                        if ( page_get_owner(l1page) != NULL )
  25.151 +                        {
  25.152 +                            printk("L2: Bizarre shadow L1 page mfn=%p "
  25.153 +                                   "belonging to a domain %p (id=%d)\n",
  25.154 +                                   l1mfn,
  25.155 +                                   page_get_owner(l1page),
  25.156 +                                   page_get_owner(l1page)->id);
  25.157 +                            errors++;
  25.158 +                            continue;
  25.159 +                        }
  25.160 +                    }
  25.161 +                    else
  25.162 +                    {
  25.163 +                        if ( page_get_owner(l1page) != d )
  25.164 +                        {
  25.165 +                            printk("L2: Skip bizarre L1 page mfn=%p "
  25.166 +                                   "belonging to other dom %p (id=%d)\n",
  25.167 +                                   l1mfn,
  25.168 +                                   page_get_owner(l1page),
  25.169 +                                   page_get_owner(l1page)->id);
  25.170 +                            errors++;
  25.171 +                            continue;
  25.172 +                        }
  25.173 +
  25.174 +                        u32 page_type = l1page->u.inuse.type_info & PGT_type_mask;
  25.175 +
  25.176 +                        if ( page_type == PGT_l2_page_table )
  25.177 +                        {
  25.178 +                            printk("Audit %d: [%x] Found %s Linear PT "
  25.179 +                                   "t=%x mfn=%p\n",
  25.180 +                                   d->id, i, (l1mfn==mfn) ? "Self" : "Other",
  25.181 +                                   l1page->u.inuse.type_info, l1mfn);
  25.182 +                        }
  25.183 +                        else if ( page_type != PGT_l1_page_table )
  25.184 +                        {
  25.185 +                            printk("Audit %d: [L2 mfn=%p i=%x] "
  25.186 +                                   "Expected L1 t=%x mfn=%p\n",
  25.187 +                                   d->id, mfn, i,
  25.188 +                                   l1page->u.inuse.type_info, l1mfn);
  25.189 +                            errors++;
  25.190 +                        }
  25.191 +                    }
  25.192 +                }
  25.193 +
  25.194 +                adjust(l1page, adjtype);
  25.195 +            }
  25.196 +        }
  25.197 +
  25.198 +        unmap_domain_mem(pt);
  25.199 +    }
  25.200 +
  25.201 +    void adjust_l1_page(unsigned long l1mfn)
  25.202 +    {
  25.203 +        unsigned long *pt = map_domain_mem(l1mfn << PAGE_SHIFT);
  25.204 +        int i;
  25.205 +
  25.206 +        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
  25.207 +        {
  25.208 +            if ( pt[i] & _PAGE_PRESENT )
  25.209 +            {
  25.210 +                unsigned long gmfn = pt[i] >> PAGE_SHIFT;
  25.211 +                struct pfn_info *gpage = pfn_to_page(gmfn);
  25.212 +
  25.213 +                if ( gmfn < 0x100 )
  25.214 +                {
  25.215 +                    lowmem_mappings++;
  25.216 +                    continue;
  25.217 +                }
  25.218 +
  25.219 +                if ( gmfn > max_page )
  25.220 +                {
  25.221 +                    io_mappings++;
  25.222 +                    continue;
  25.223 +                }
  25.224 +
  25.225 +                if ( noisy )
  25.226 +                {
  25.227 +                    if ( pt[i] & _PAGE_RW )
  25.228 +                    {
  25.229 +                        // If it's not a writable page, complain.
  25.230 +                        //
  25.231 +                        if ( !((gpage->u.inuse.type_info & PGT_type_mask) ==
  25.232 +                               PGT_writable_page) )
  25.233 +                        {
  25.234 +                            printk("Audit %d: [l1mfn=%p, i=%x] Illegal RW "
  25.235 +                                   "t=%x mfn=%p\n",
  25.236 +                                   d->id, l1mfn, i,
  25.237 +                                   gpage->u.inuse.type_info, gmfn);
  25.238 +                            errors++;
  25.239 +                        }
  25.240 +
  25.241 +                        if ( shadow_enabled &&
  25.242 +                             page_is_page_table(gpage) &&
  25.243 +                             ! page_out_of_sync(gpage) )
  25.244 +                        {
  25.245 +                            printk("Audit %d: [l1mfn=%p, i=%x] Illegal RW of "
  25.246 +                                   "page table gmfn=%p\n",
  25.247 +                                   d->id, l1mfn, i, gmfn);
  25.248 +                            errors++;
  25.249 +                        }
  25.250 +                    }
  25.251 +
  25.252 +                    if ( page_get_owner(gpage) != d )
  25.253 +                    {
  25.254 +                        printk("Audit %d: [l1mfn=%p,i=%x] Skip foreign page "
  25.255 +                               "dom=%p (id=%d) mfn=%p c=%08x t=%08x\n",
  25.256 +                               d->id, l1mfn, i,
  25.257 +                               page_get_owner(gpage),
  25.258 +                               page_get_owner(gpage)->id,
  25.259 +                               gmfn,
  25.260 +                               gpage->count_info,
  25.261 +                               gpage->u.inuse.type_info);
  25.262 +                        continue;
  25.263 +                    }
  25.264 +                }
  25.265 +
  25.266 +                adjust(gpage, (pt[i] & _PAGE_RW) ? 1 : 0);
  25.267 +            }
  25.268 +        }
  25.269 +
  25.270 +        unmap_domain_mem(pt);
  25.271 +    }
  25.272 +
  25.273 +    void adjust_shadow_tables()
  25.274 +    {
  25.275 +        struct shadow_status *a;
  25.276 +        unsigned long smfn, gmfn;
  25.277 +        struct pfn_info *page;
  25.278 +        int i;
  25.279 +
  25.280 +        for ( i = 0; i < shadow_ht_buckets; i++ )
  25.281 +        {
  25.282 +            a = &d->arch.shadow_ht[i];
  25.283 +            while ( a && a->gpfn_and_flags )
  25.284 +            {
  25.285 +                gmfn = __gpfn_to_mfn(d, a->gpfn_and_flags & PGT_mfn_mask);
  25.286 +                smfn = a->smfn;
  25.287 +                page = &frame_table[smfn];
  25.288 +
  25.289 +                adjust(pfn_to_page(gmfn), 0);
  25.290 +
  25.291 +                switch ( a->gpfn_and_flags & PGT_type_mask ) {
  25.292 +                case PGT_snapshot:
  25.293 +                    break;
  25.294 +                case PGT_l1_shadow:
  25.295 +                case PGT_hl2_shadow:
  25.296 +                    adjust_l1_page(smfn);
  25.297 +                    if ( page->u.inuse.type_info & PGT_pinned )
  25.298 +                        adjust(page, 0);
  25.299 +                    break;
  25.300 +                case PGT_l2_shadow:
  25.301 +                    adjust_l2_page(smfn, 0);
  25.302 +                    if ( page->u.inuse.type_info & PGT_pinned )
  25.303 +                        adjust(page, 0);
  25.304 +                    break;
  25.305 +                default:
  25.306 +                    BUG();
  25.307 +                    break;
  25.308 +                }
  25.309 +
  25.310 +                a = a->next;
  25.311 +            }
  25.312 +        }
  25.313 +    }
  25.314 +
  25.315 +    void adjust_oos_list()
  25.316 +    {
  25.317 +        struct out_of_sync_entry *oos;
  25.318 +
  25.319 +        if ( (oos = d->arch.out_of_sync) )
  25.320 +            ASSERT(shadow_enabled);
  25.321 +
  25.322 +        while ( oos )
  25.323 +        {
  25.324 +            adjust(pfn_to_page(oos->gmfn), 0);
  25.325 +
  25.326 +            // Only use entries that have low bits clear...
  25.327 +            //
  25.328 +            if ( !(oos->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
  25.329 +                adjust(pfn_to_page(oos->writable_pl1e >> PAGE_SHIFT), 0);
  25.330 +
  25.331 +            oos = oos->next;
  25.332 +            oos_count++;
  25.333 +        }
  25.334 +    }
  25.335 +
  25.336 +    void adjust_for_pgtbase()
  25.337 +    {
  25.338 +        struct exec_domain *ed;
  25.339 +
  25.340 +        for_each_exec_domain(d, ed)
  25.341 +            {
  25.342 +                if ( !shadow_enabled )
  25.343 +                {
  25.344 +                    if ( pagetable_val(ed->arch.guest_table) )
  25.345 +                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
  25.346 +                                            >> PAGE_SHIFT], 1);
  25.347 +                }
  25.348 +                else
  25.349 +                {
  25.350 +                    if ( pagetable_val(ed->arch.guest_table) )
  25.351 +                        adjust(&frame_table[pagetable_val(ed->arch.guest_table)
  25.352 +                                            >> PAGE_SHIFT], 0);
  25.353 +                    if ( pagetable_val(ed->arch.shadow_table) )
  25.354 +                        adjust(&frame_table[pagetable_val(ed->arch.shadow_table)
  25.355 +                                            >> PAGE_SHIFT], 0);
  25.356 +                }
  25.357 +            }
  25.358 +    }
  25.359 +
  25.360 +    void adjust_guest_pages()
  25.361 +    {
  25.362 +        struct list_head *list_ent = d->page_list.next;
  25.363 +        struct pfn_info *page;
  25.364 +        unsigned long mfn;
  25.365 +
  25.366 +        while ( list_ent != &d->page_list )
  25.367 +        {
  25.368 +            u32 page_type;
  25.369 +
  25.370 +            page = list_entry(list_ent, struct pfn_info, list);
  25.371 +            mfn = page_to_pfn(page);
  25.372 +            page_type = page->u.inuse.type_info & PGT_type_mask;
  25.373 +
  25.374 +            if ( page_get_owner(page) != d )
  25.375 +                BUG();
  25.376 +
  25.377 +            page_count++;
  25.378 +
  25.379 +            switch ( page_type )
  25.380 +            {
  25.381 +            case PGT_l2_page_table:
  25.382 +                l2++;
  25.383 +
  25.384 +                if ( noisy )
  25.385 +                {
  25.386 +                    if ( shadow_enabled )
  25.387 +                    {
  25.388 +                        printk("Audit %d: found an L2 guest page "
  25.389 +                               "mfn=%p t=%08x c=%08x while in shadow mode\n",
  25.390 +                               mfn, page->u.inuse.type_info, page->count_info);
  25.391 +                        errors++;
  25.392 +                    }
  25.393 +
  25.394 +                    if ( (page->u.inuse.type_info & PGT_validated) !=
  25.395 +                         PGT_validated )
  25.396 +                    {
  25.397 +                        printk("Audit %d: L2 mfn=%p not validated %p\n",
  25.398 +                               d->id, mfn, page->u.inuse.type_info);
  25.399 +                        errors++;
  25.400 +                    }
  25.401 +
  25.402 +                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  25.403 +                    {
  25.404 +                        printk("Audit %d: L2 mfn=%p not pinned t=%p\n",
  25.405 +                               d->id, mfn, page->u.inuse.type_info);
  25.406 +                        errors++;
  25.407 +                    }
  25.408 +                }
  25.409 +
  25.410 +                if ( page->u.inuse.type_info & PGT_pinned )
  25.411 +                    adjust(page, 1);
  25.412 +
  25.413 +                if ( page->u.inuse.type_info & PGT_validated )
  25.414 +                    adjust_l2_page(mfn, 1);
  25.415 +
  25.416 +                break;
  25.417 +
  25.418 +            case PGT_l1_page_table:
  25.419 +                l1++;
  25.420 +
  25.421 +                if ( noisy )
  25.422 +                {
  25.423 +                    if ( shadow_enabled )
  25.424 +                    {
  25.425 +                        printk("found an L1 guest page mfn=%p t=%08x c=%08x while in shadow mode\n",
  25.426 +                               mfn, page->u.inuse.type_info, page->count_info);
  25.427 +                        errors++;
  25.428 +                    }
  25.429 +
  25.430 +                    if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated )
  25.431 +                    {
  25.432 +                        printk("Audit %d: L1 not validated mfn=%p t=%p\n",
  25.433 +                               d->id, mfn, page->u.inuse.type_info);
  25.434 +                        errors++;
  25.435 +                    }
  25.436 +
  25.437 +                    if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned )
  25.438 +                    {
  25.439 +                        if ( !VM_ASSIST(d, VMASST_TYPE_writable_pagetables) )
  25.440 +                        {
  25.441 +                            printk("Audit %d: L1 mfn=%p not pinned t=%p\n",
  25.442 +                                   d->id, mfn, page->u.inuse.type_info);
  25.443 +                            errors++;
  25.444 +                        }
  25.445 +                    }
  25.446 +                }
  25.447 +                
  25.448 +                if ( page->u.inuse.type_info & PGT_pinned )
  25.449 +                    adjust(page, 1);
  25.450 +
  25.451 +                if ( page->u.inuse.type_info & PGT_validated )
  25.452 +                    adjust_l1_page(mfn);
  25.453 +
  25.454 +                break;
  25.455 +
  25.456 +            case PGT_gdt_page:
  25.457 +                ASSERT( !page_out_of_sync(page) );
  25.458 +                adjust(page, 1);
  25.459 +                break;
  25.460 +
  25.461 +            case PGT_ldt_page:
  25.462 +                ASSERT( !page_out_of_sync(page) );
  25.463 +                adjust(page, 1);
  25.464 +                break;
  25.465 +
  25.466 +            case PGT_writable_page:
  25.467 +                if ( shadow_enabled )
  25.468 +                {
  25.469 +                    // In shadow mode, writable pages can get pinned by
  25.470 +                    // paravirtualized guests that think they are pinning
  25.471 +                    // their L1s and/or L2s.
  25.472 +                    //
  25.473 +                    if ( page->u.inuse.type_info & PGT_pinned )
  25.474 +                        adjust(page, 1);
  25.475 +                }
  25.476 +            }
  25.477 +
  25.478 +            list_ent = page->list.next;
  25.479 +        }
  25.480 +    }
  25.481 +
  25.482 +    adjust_for_pgtbase();
  25.483 +
  25.484 +    adjust_guest_pages();
  25.485 +
  25.486 +    if ( shadow_enabled )
  25.487 +    {
  25.488 +        adjust_oos_list();
  25.489 +        adjust_shadow_tables();
  25.490 +    }
  25.491 +
  25.492 +    return errors;
  25.493 +}
  25.494 +
  25.495 +
  25.496 +#ifndef NDEBUG
  25.497 +
  25.498 +void _audit_domain(struct domain *d, int flags, const char *file, int line)
  25.499 +{
  25.500 +    void scan_for_pfn_in_mfn(struct domain *d, unsigned long xmfn,
  25.501 +                             unsigned long mfn)
  25.502 +    {
  25.503 +        struct pfn_info *page = &frame_table[mfn];
  25.504 +        unsigned long *pt = map_domain_mem(mfn);
  25.505 +        int i;
  25.506 +
  25.507 +        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
  25.508 +        {
  25.509 +            if ( (pt[i] & _PAGE_PRESENT) && ((pt[i] >> PAGE_SHIFT) == xmfn) )
  25.510 +                printk("     found dom=%d mfn=%p t=%x c=%x pt[i=%x]=%p\n",
  25.511 +                       d->id, mfn, page->u.inuse.type_info,
  25.512 +                       page->count_info, i, pt[i]);
  25.513 +        }
  25.514 +
  25.515 +        unmap_domain_mem(pt);           
  25.516 +    }
  25.517 +
  25.518 +    void scan_for_pfn(struct domain *d, unsigned long xmfn)
  25.519 +    {
  25.520 +        if ( !shadow_mode_enabled(d) )
  25.521 +        {
  25.522 +            struct list_head *list_ent = d->page_list.next;
  25.523 +            struct pfn_info *page;
  25.524 +
  25.525 +            while ( list_ent != &d->page_list )
  25.526 +            {
  25.527 +                page = list_entry(list_ent, struct pfn_info, list);
  25.528 +
  25.529 +                switch ( page->u.inuse.type_info & PGT_type_mask )
  25.530 +                {
  25.531 +                case PGT_l1_page_table:
  25.532 +                case PGT_l2_page_table:
  25.533 +                    scan_for_pfn_in_mfn(d, xmfn, page_to_pfn(page));
  25.534 +                    break;
  25.535 +                default:
  25.536 +                    break;
  25.537 +                }
  25.538 +
  25.539 +                list_ent = page->list.next;
  25.540 +            }
  25.541 +        }
  25.542 +        else
  25.543 +        {
  25.544 +            struct shadow_status *a;
  25.545 +            int i;
  25.546 +            
  25.547 +            for ( i = 0; i < shadow_ht_buckets; i++ )
  25.548 +            {
  25.549 +                a = &d->arch.shadow_ht[i];
  25.550 +                while ( a && a->gpfn_and_flags )
  25.551 +                {
  25.552 +                    switch ( a->gpfn_and_flags & PGT_type_mask )
  25.553 +                    {
  25.554 +                    case PGT_l1_shadow:
  25.555 +                    case PGT_l2_shadow:
  25.556 +                    case PGT_hl2_shadow:
  25.557 +                        scan_for_pfn_in_mfn(d, xmfn, a->smfn);
  25.558 +                        break;
  25.559 +                    case PGT_snapshot:
  25.560 +                        break;
  25.561 +                    default:
  25.562 +                        BUG();
  25.563 +                        break;
  25.564 +                    }
  25.565 +                    a = a->next;
  25.566 +                }
  25.567 +            }
  25.568 +        }
  25.569 +    }
  25.570 +
  25.571 +    void scan_for_pfn_remote(unsigned long xmfn)
  25.572 +    {
  25.573 +        struct domain *e;
  25.574 +        for_each_domain ( e )
  25.575 +            scan_for_pfn( e, xmfn );
  25.576 +    } 
  25.577 +
  25.578 +    unsigned long mfn;
  25.579 +    struct list_head *list_ent;
  25.580 +    struct pfn_info *page;
  25.581 +    int errors = 0;
  25.582 +
  25.583 +    if ( d != current->domain )
  25.584 +        domain_pause(d);
  25.585 +    synchronise_pagetables(~0UL);
  25.586 +
  25.587 +    // Maybe we should just be using BIGLOCK?
  25.588 +    //
  25.589 +    if ( !(flags & AUDIT_ALREADY_LOCKED) )
  25.590 +        shadow_lock(d);
  25.591 +
  25.592 +    spin_lock(&d->page_alloc_lock);
  25.593 +
  25.594 +    /* PHASE 0 */
  25.595 +
  25.596 +    list_ent = d->page_list.next;
  25.597 +    while ( list_ent != &d->page_list )
  25.598 +    {
  25.599 +        u32 page_type;
  25.600 +
  25.601 +        page = list_entry(list_ent, struct pfn_info, list);
  25.602 +        mfn = page_to_pfn(page);
  25.603 +        page_type = page->u.inuse.type_info & PGT_type_mask;
  25.604 +
  25.605 +        if ( page_get_owner(page) != d )
  25.606 +            BUG();
  25.607 +
  25.608 +        if ( (page->u.inuse.type_info & PGT_count_mask) >
  25.609 +             (page->count_info & PGC_count_mask) )
  25.610 +        {
  25.611 +            printk("taf(%08x) > caf(%08x) mfn=%p\n",
  25.612 +                   page->u.inuse.type_info, page->count_info, mfn);
  25.613 +            errors++;
  25.614 +        }
  25.615 +
  25.616 +        if ( shadow_mode_enabled(d) &&
  25.617 +             (page_type == PGT_writable_page) &&
  25.618 +             !(page->u.inuse.type_info & PGT_validated) )
  25.619 +        {
  25.620 +            printk("shadow mode writable page not validated mfn=%p t=%08x c=%08x\n",
  25.621 +                   mfn, page->u.inuse.type_info, page->count_info);
  25.622 +            errors++;
  25.623 +        }
  25.624 + 
  25.625 +#if 0   /* SYSV shared memory pages plus writeable files. */
  25.626 +        if ( page_type == PGT_writable_page && 
  25.627 +             (page->u.inuse.type_info & PGT_count_mask) > 1 )
  25.628 +        {
  25.629 +            printk("writeable page with type count >1: mfn=%lx t=%x c=%x\n",
  25.630 +                  mfn,
  25.631 +                  page->u.inuse.type_info,
  25.632 +                  page->count_info );
  25.633 +            errors++;
  25.634 +            scan_for_pfn_remote(mfn);
  25.635 +        }
  25.636 +#endif
  25.637 +
  25.638 +        if ( page_type == PGT_none && 
  25.639 +             (page->u.inuse.type_info & PGT_count_mask) > 0 )
  25.640 +        {
  25.641 +            printk("normal page with type count >0: mfn=%lx t=%x c=%x\n",
  25.642 +                  mfn,
  25.643 +                  page->u.inuse.type_info,
  25.644 +                  page->count_info );
  25.645 +            errors++;
  25.646 +        }
  25.647 +
  25.648 +        if ( page_out_of_sync(page) )
  25.649 +        {
  25.650 +            if ( !page_is_page_table(page) )
  25.651 +            {
  25.652 +                printk("out of sync page mfn=%p is not a page table\n", mfn);
  25.653 +                errors++;
  25.654 +            }
  25.655 +            unsigned long pfn = __mfn_to_gpfn(d, mfn);
  25.656 +            if ( !__shadow_status(d, pfn, PGT_snapshot) )
  25.657 +            {
  25.658 +                printk("out of sync page mfn=%p doesn't have a snapshot\n");
  25.659 +                errors++;
  25.660 +            }
  25.661 +            if ( page_type != PGT_writable_page )
  25.662 +            {
  25.663 +                printk("out of sync page mfn=%p has strange type t=%08x c=%08x\n",
  25.664 +                       mfn, page->u.inuse.type_info, page->count_info);
  25.665 +                errors++;
  25.666 +            }
  25.667 +        }
  25.668 +
  25.669 +        /* Use tlbflush_timestamp to store original type_info. */
  25.670 +        page->tlbflush_timestamp = page->u.inuse.type_info;
  25.671 +
  25.672 +        list_ent = page->list.next;
  25.673 +    }
  25.674 +
  25.675 +    /* PHASE 1 */
  25.676 +    io_mappings = lowmem_mappings = 0;
  25.677 +
  25.678 +    errors += audit_adjust_pgtables(d, -1, 1);
  25.679 +
  25.680 +    if ( !(flags & AUDIT_QUIET) &&
  25.681 +         ((io_mappings > 0) || (lowmem_mappings > 0)) )
  25.682 +        printk("Audit %d: Found %d lowmem mappings and %d io mappings\n",
  25.683 +               d->id, lowmem_mappings, io_mappings);
  25.684 +
  25.685 +    /* PHASE 2 */
  25.686 +
  25.687 +    list_ent = d->page_list.next;
  25.688 +    while ( list_ent != &d->page_list )
  25.689 +    {
  25.690 +        page = list_entry(list_ent, struct pfn_info, list);
  25.691 +        mfn = page_to_pfn(page);
  25.692 +
  25.693 +        switch ( page->u.inuse.type_info & PGT_type_mask)
  25.694 +        {
  25.695 +        case PGT_l1_page_table:
  25.696 +        case PGT_l2_page_table:
  25.697 +            if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
  25.698 +            {
  25.699 +                printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
  25.700 +                       d->id, page->u.inuse.type_info, 
  25.701 +                       page->tlbflush_timestamp,
  25.702 +                       page->count_info, mfn);
  25.703 +                errors++;
  25.704 +                scan_for_pfn_remote(mfn);
  25.705 +            }
  25.706 +            break;
  25.707 +        case PGT_none:
  25.708 +        case PGT_writable_page:
  25.709 +        case PGT_gdt_page:
  25.710 +        case PGT_ldt_page:
  25.711 +            if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
  25.712 +            {
  25.713 +                printk("Audit %d: type count!=0 t=%x ot=%x c=%x mfn=%lx\n",
  25.714 +                       d->id, page->u.inuse.type_info, 
  25.715 +                       page->tlbflush_timestamp,
  25.716 +                       page->count_info, mfn);
  25.717 +                errors++;
  25.718 +            }
  25.719 +            break;
  25.720 +        default:
  25.721 +            BUG(); // XXX fix me...
  25.722 +        }
  25.723 +        
  25.724 +        if ( (page->count_info & PGC_count_mask) != 1 )
  25.725 +        {
  25.726 +            printk("Audit %d: gen count!=1 (c=%x) t=%x ot=%x mfn=%lx\n",
  25.727 +                   d->id,
  25.728 +                   page->count_info,
  25.729 +                   page->u.inuse.type_info, 
  25.730 +                   page->tlbflush_timestamp, mfn );
  25.731 +            errors++;
  25.732 +            scan_for_pfn_remote(mfn);
  25.733 +        }
  25.734 +
  25.735 +        list_ent = page->list.next;
  25.736 +    }
  25.737 +
  25.738 +    if ( shadow_mode_enabled(d) )
  25.739 +    {
  25.740 +        struct shadow_status *a;
  25.741 +        struct pfn_info *page;
  25.742 +        u32 page_type;
  25.743 +        int i;
  25.744 +
  25.745 +        for ( i = 0; i < shadow_ht_buckets; i++ )
  25.746 +        {
  25.747 +            a = &d->arch.shadow_ht[i];
  25.748 +            while ( a && a->gpfn_and_flags )
  25.749 +            {
  25.750 +                page = pfn_to_page(a->smfn);
  25.751 +                page_type = a->gpfn_and_flags & PGT_type_mask;
  25.752 +
  25.753 +                switch ( page_type ) {
  25.754 +                case PGT_snapshot:
  25.755 +                    // XXX -- what should we check here?
  25.756 +                    break;
  25.757 +                case PGT_l1_shadow:
  25.758 +                case PGT_l2_shadow:
  25.759 +                    if ( ((page->u.inuse.type_info & PGT_type_mask) != page_type ) ||
  25.760 +                         (page->count_info != 0) )
  25.761 +                    {
  25.762 +                        printk("Audit %d: shadow page counts wrong mfn=%p t=%x c=%x\n",
  25.763 +                               d->id, page_to_pfn(page),
  25.764 +                               page->u.inuse.type_info,
  25.765 +                               page->count_info);
  25.766 +                        errors++;
  25.767 +                    }
  25.768 +                    break;
  25.769 +
  25.770 +                case PGT_hl2_shadow: // haven't thought about this case yet.
  25.771 +                default:
  25.772 +                    BUG();
  25.773 +                    break;
  25.774 +                }
  25.775 +
  25.776 +                a = a->next;
  25.777 +            }
  25.778 +        }
  25.779 +    }
  25.780 +
  25.781 +    /* PHASE 3 */
  25.782 +    ctot = ttot = page_count = l1 = l2 = oos_count = 0;
  25.783 +
  25.784 +    audit_adjust_pgtables(d, 1, 0);
  25.785 +
  25.786 +#if 0
  25.787 +    // This covers our sins of trashing the tlbflush_timestamps...
  25.788 +    //
  25.789 +    local_flush_tlb();
  25.790 +#endif
  25.791 +
  25.792 +    spin_unlock(&d->page_alloc_lock);
  25.793 +
  25.794 +    if ( !(flags & AUDIT_QUIET) )
  25.795 +        printk("Audit dom%d (%s:%d) Done. "
  25.796 +               "pages=%d oos=%d l1=%d l2=%d ctot=%d ttot=%d\n",
  25.797 +               d->id, file, line, page_count, oos_count, l1, l2, ctot, ttot );
  25.798 +
  25.799 +    if ( !(flags & AUDIT_ALREADY_LOCKED) )
  25.800 +        shadow_unlock(d);
  25.801 +
  25.802 +    if ( d != current->domain )
  25.803 +        domain_unpause(d);
  25.804 +
  25.805 +    if ( errors && !(flags & AUDIT_ERRORS_OK) )
  25.806 +        BUG();
  25.807 +}
  25.808 +
  25.809 +void audit_domains(void)
  25.810 +{
  25.811 +    struct domain *d;
  25.812 +    for_each_domain ( d )
  25.813 +        audit_domain(d);
  25.814 +}
  25.815 +
  25.816 +void audit_domains_key(unsigned char key)
  25.817 +{
  25.818 +    audit_domains();
  25.819 +}
  25.820 +#endif