ia64/xen-unstable

changeset 4212:7f9cdb03e7fd

bitkeeper revision 1.1236.32.11 (423b097bvEBDPFFtDR44bf9tw_JCqg)

dom0 runs well in shadow translate mode!

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author mafetter@fleming.research
date Fri Mar 18 17:01:47 2005 +0000 (2005-03-18)
parents e38db244d654
children d509bef98bd9 6352a018ed51
files xen/arch/x86/audit.c xen/arch/x86/domain.c xen/arch/x86/extable.c xen/arch/x86/mm.c xen/arch/x86/shadow.c xen/arch/x86/vmx.c xen/arch/x86/x86_32/domain_build.c xen/arch/x86/x86_32/domain_page.c xen/arch/x86/x86_32/traps.c xen/common/page_alloc.c xen/include/asm-x86/mm.h xen/include/asm-x86/shadow.h xen/include/asm-x86/vmx_platform.h xen/include/asm-x86/x86_32/domain_page.h xen/include/xen/perfc_defn.h xen/include/xen/sched.h
line diff
     1.1 --- a/xen/arch/x86/audit.c	Thu Mar 17 12:25:14 2005 +0000
     1.2 +++ b/xen/arch/x86/audit.c	Fri Mar 18 17:01:47 2005 +0000
     1.3 @@ -36,7 +36,7 @@ static int ttot=0, ctot=0, io_mappings=0
     1.4  static int l1, l2, oos_count, page_count;
     1.5  
     1.6  #define FILE_AND_LINE 0
     1.7 -//#define MFN_TO_WATCH 0x4700
     1.8 +//#define MFN2_TO_WATCH 0x1d8
     1.9  
    1.10  #if FILE_AND_LINE
    1.11  #define adjust(_p, _a) _adjust((_p), (_a), __FILE__, __LINE__)
    1.12 @@ -56,10 +56,10 @@ int audit_adjust_pgtables(struct domain 
    1.13  
    1.14      void _adjust(struct pfn_info *page, int adjtype ADJUST_EXTRA_ARGS)
    1.15      {
    1.16 -#ifdef MFN_TO_WATCH
    1.17 -        if (page_to_pfn(page) == MFN_TO_WATCH)
    1.18 +#ifdef MFN2_TO_WATCH
    1.19 +        if (page_to_pfn(page) == MFN2_TO_WATCH)
    1.20          {
    1.21 -            APRINTK("adjust(mfn=%p, dir=%d, adjtype=%d) MFN_TO_WATCH",
    1.22 +            APRINTK("adjust(mfn=%p, dir=%d, adjtype=%d)",
    1.23                      page_to_pfn(page), dir, adjtype);
    1.24          }
    1.25  #endif
    1.26 @@ -548,6 +548,9 @@ int audit_adjust_pgtables(struct domain 
    1.27          adjust_shadow_tables();
    1.28      }
    1.29  
    1.30 +    //printk("d->shared_info=%p __pa()=%p\n", d->shared_info, __pa(d->shared_info));
    1.31 +    adjust(virt_to_page(d->shared_info), 1);
    1.32 +
    1.33      return errors;
    1.34  }
    1.35  
     2.1 --- a/xen/arch/x86/domain.c	Thu Mar 17 12:25:14 2005 +0000
     2.2 +++ b/xen/arch/x86/domain.c	Fri Mar 18 17:01:47 2005 +0000
     2.3 @@ -238,7 +238,7 @@ void arch_do_createdomain(struct exec_do
     2.4          d->shared_info = (void *)alloc_xenheap_page();
     2.5          memset(d->shared_info, 0, PAGE_SIZE);
     2.6          ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
     2.7 -        SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
     2.8 +        SHARE_PFN_WITH_DOMAIN2(virt_to_page(d->shared_info), d);
     2.9          machine_to_phys_mapping[virt_to_phys(d->shared_info) >> 
    2.10                                 PAGE_SHIFT] = INVALID_M2P_ENTRY;
    2.11  
     3.1 --- a/xen/arch/x86/extable.c	Thu Mar 17 12:25:14 2005 +0000
     3.2 +++ b/xen/arch/x86/extable.c	Fri Mar 18 17:01:47 2005 +0000
     3.3 @@ -3,6 +3,11 @@
     3.4  #include <xen/spinlock.h>
     3.5  #include <asm/uaccess.h>
     3.6  
     3.7 +#ifdef PERF_COUNTERS
     3.8 +#include <xen/sched.h>
     3.9 +#include <xen/perfc.h>
    3.10 +#endif
    3.11 +
    3.12  extern struct exception_table_entry __start___ex_table[];
    3.13  extern struct exception_table_entry __stop___ex_table[];
    3.14  extern struct exception_table_entry __start___pre_ex_table[];
    3.15 @@ -69,5 +74,9 @@ search_pre_exception_table(struct xen_re
    3.16      unsigned long fixup = search_one_table(
    3.17          __start___pre_ex_table, __stop___pre_ex_table-1, addr);
    3.18      DPRINTK("Pre-exception: %p -> %p\n", addr, fixup);
    3.19 +#ifdef PERF_COUNTERS
    3.20 +    if ( fixup )
    3.21 +        perfc_incrc(exception_fixed);
    3.22 +#endif
    3.23      return fixup;
    3.24  }
     4.1 --- a/xen/arch/x86/mm.c	Thu Mar 17 12:25:14 2005 +0000
     4.2 +++ b/xen/arch/x86/mm.c	Fri Mar 18 17:01:47 2005 +0000
     4.3 @@ -262,7 +262,7 @@ int map_ldt_shadow_page(unsigned int off
     4.4  
     4.5      gpfn = l1_pgentry_to_pfn(mk_l1_pgentry(l1e));
     4.6      gmfn = __gpfn_to_mfn(d, gpfn);
     4.7 -    if ( unlikely(!gmfn) )
     4.8 +    if ( unlikely(!VALID_MFN(gmfn)) )
     4.9          return 0;
    4.10  
    4.11      if ( unlikely(shadow_mode_enabled(d)) )
    4.12 @@ -1088,7 +1088,7 @@ void free_page_type(struct pfn_info *pag
    4.13  }
    4.14  
    4.15  
    4.16 -void put_page_type(struct pfn_info *page)
    4.17 +void _put_page_type(struct pfn_info *page)
    4.18  {
    4.19      u32 nx, x, y = page->u.inuse.type_info;
    4.20  
    4.21 @@ -1143,7 +1143,7 @@ void put_page_type(struct pfn_info *page
    4.22  }
    4.23  
    4.24  
    4.25 -int get_page_type(struct pfn_info *page, u32 type)
    4.26 +int _get_page_type(struct pfn_info *page, u32 type)
    4.27  {
    4.28      u32 nx, x, y = page->u.inuse.type_info;
    4.29  
    4.30 @@ -1286,8 +1286,7 @@ static int do_extended_command(unsigned 
    4.31      unsigned int cmd = val & MMUEXT_CMD_MASK, type;
    4.32      struct exec_domain *ed = current;
    4.33      struct domain *d = ed->domain, *e;
    4.34 -    unsigned long gpfn = ptr >> PAGE_SHIFT;
    4.35 -    unsigned long mfn = __gpfn_to_mfn(d, gpfn);
    4.36 +    unsigned long mfn = ptr >> PAGE_SHIFT;
    4.37      struct pfn_info *page = &frame_table[mfn];
    4.38      u32 x, y, _d, _nd;
    4.39      domid_t domid;
    4.40 @@ -1304,15 +1303,6 @@ static int do_extended_command(unsigned 
    4.41          type = PGT_l1_page_table | PGT_va_mutable;
    4.42  
    4.43      pin_page:
    4.44 -        if ( unlikely(percpu_info[cpu].foreign &&
    4.45 -                      (shadow_mode_translate(d) ||
    4.46 -                       shadow_mode_translate(percpu_info[cpu].foreign))) )
    4.47 -        {
    4.48 -            // oops -- we should be using the foreign domain's P2M
    4.49 -            mfn = __gpfn_to_mfn(FOREIGNDOM, gpfn);
    4.50 -            page = &frame_table[mfn];
    4.51 -        }
    4.52 -
    4.53          if ( shadow_mode_enabled(FOREIGNDOM) )
    4.54              type = PGT_writable_page;
    4.55  
    4.56 @@ -1349,15 +1339,6 @@ static int do_extended_command(unsigned 
    4.57  #endif /* __x86_64__ */
    4.58  
    4.59      case MMUEXT_UNPIN_TABLE:
    4.60 -        if ( unlikely(percpu_info[cpu].foreign &&
    4.61 -                      (shadow_mode_translate(d) ||
    4.62 -                       shadow_mode_translate(percpu_info[cpu].foreign))) )
    4.63 -        {
    4.64 -            // oops -- we should be using the foreign domain's P2M
    4.65 -            mfn = __gpfn_to_mfn(FOREIGNDOM, gpfn);
    4.66 -            page = &frame_table[mfn];
    4.67 -        }
    4.68 -
    4.69          if ( unlikely(!(okay = get_page_from_pagenr(mfn, FOREIGNDOM))) )
    4.70          {
    4.71              MEM_LOG("mfn %p bad domain (dom=%p)",
    4.72 @@ -1723,9 +1704,7 @@ int do_mmu_update(
    4.73      cleanup_writable_pagetable(d);
    4.74  
    4.75      if ( unlikely(shadow_mode_enabled(d)) )
    4.76 -    {
    4.77          check_pagetable(ed, "pre-mmu"); /* debug */
    4.78 -    }
    4.79  
    4.80      /*
    4.81       * If we are resuming after preemption, read how much work we have already
    4.82 @@ -1783,8 +1762,7 @@ int do_mmu_update(
    4.83          }
    4.84  
    4.85          cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
    4.86 -        gpfn = req.ptr >> PAGE_SHIFT;
    4.87 -        mfn = __gpfn_to_mfn(d, gpfn);
    4.88 +        mfn = req.ptr >> PAGE_SHIFT;
    4.89  
    4.90          okay = 0;
    4.91  
    4.92 @@ -1867,6 +1845,8 @@ int do_mmu_update(
    4.93                          if ( shadow_mode_log_dirty(d) )
    4.94                              __mark_dirty(d, mfn);
    4.95  
    4.96 +                        gpfn = __mfn_to_gpfn(d, mfn);
    4.97 +                        ASSERT(gpfn);
    4.98                          if ( page_is_page_table(page) )
    4.99                              shadow_mark_mfn_out_of_sync(ed, gpfn, mfn);
   4.100                      }
   4.101 @@ -1886,6 +1866,21 @@ int do_mmu_update(
   4.102              break;
   4.103  
   4.104          case MMU_MACHPHYS_UPDATE:
   4.105 +
   4.106 +            // HACK ALERT...  This about this later...
   4.107 +            //
   4.108 +            if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
   4.109 +            {
   4.110 +                rc = FOREIGNDOM->next_io_page++;
   4.111 +                printk("privileged guest dom%d requests mfn=%p for dom%d, gets pfn=%p\n",
   4.112 +                       d->id, mfn, FOREIGNDOM->id, rc);
   4.113 +                set_machinetophys(mfn, rc);
   4.114 +                set_p2m_entry(FOREIGNDOM, rc, mfn);
   4.115 +                okay = 1;
   4.116 +                break;
   4.117 +            }
   4.118 +            BUG();
   4.119 +            
   4.120              if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
   4.121              {
   4.122                  MEM_LOG("Could not get page for mach->phys update");
   4.123 @@ -2250,7 +2245,7 @@ long do_update_descriptor(
   4.124  
   4.125      LOCK_BIGLOCK(dom);
   4.126  
   4.127 -    if ( !(mfn = __gpfn_to_mfn(dom, gpfn)) ) {
   4.128 +    if ( !VALID_MFN(mfn = __gpfn_to_mfn(dom, gpfn)) ) {
   4.129          UNLOCK_BIGLOCK(dom);
   4.130          return -EINVAL;
   4.131      }
     5.1 --- a/xen/arch/x86/shadow.c	Thu Mar 17 12:25:14 2005 +0000
     5.2 +++ b/xen/arch/x86/shadow.c	Fri Mar 18 17:01:47 2005 +0000
     5.3 @@ -191,14 +191,6 @@ alloc_shadow_page(struct domain *d,
     5.4      unsigned long smfn;
     5.5      int pin = 0;
     5.6  
     5.7 -    if ( (psh_type != PGT_snapshot) &&
     5.8 -         !shadow_promote(d, gpfn, gmfn, psh_type) )
     5.9 -    {
    5.10 -        FSH_LOG("promotion of pfn=%p mfn=%p failed!  external gnttab refs?\n",
    5.11 -                gpfn, gmfn);
    5.12 -        return 0;
    5.13 -    }
    5.14 -
    5.15      page = alloc_domheap_page(NULL);
    5.16      if ( unlikely(page == NULL) )
    5.17      {
    5.18 @@ -222,11 +214,15 @@ alloc_shadow_page(struct domain *d,
    5.19      switch ( psh_type )
    5.20      {
    5.21      case PGT_l1_shadow:
    5.22 +        if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
    5.23 +            goto oom;
    5.24          perfc_incr(shadow_l1_pages);
    5.25          d->arch.shadow_page_count++;
    5.26          break;
    5.27  
    5.28      case PGT_l2_shadow:
    5.29 +        if ( !shadow_promote(d, gpfn, gmfn, psh_type) )
    5.30 +            goto oom;
    5.31          perfc_incr(shadow_l2_pages);
    5.32          d->arch.shadow_page_count++;
    5.33          if ( PGT_l2_page_table == PGT_root_page_table )
    5.34 @@ -235,13 +231,16 @@ alloc_shadow_page(struct domain *d,
    5.35          break;
    5.36  
    5.37      case PGT_hl2_shadow:
    5.38 +        // Treat an hl2 as an L1 for purposes of promotion.
    5.39 +        // For external mode domains, treat them as an L2 for purposes of
    5.40 +        // pinning.
    5.41 +        //
    5.42 +        if ( !shadow_promote(d, gpfn, gmfn, PGT_l1_shadow) )
    5.43 +            goto oom;
    5.44          perfc_incr(hl2_table_pages);
    5.45          d->arch.hl2_page_count++;
    5.46 -
    5.47 -        // treat an hl2 as an L1 for purposes of promotion,
    5.48 -        // and as an L2 for purposes of pinning.
    5.49 -        //
    5.50 -        if ( PGT_l2_page_table == PGT_root_page_table )
    5.51 +        if ( shadow_mode_external(d) &&
    5.52 +             (PGT_l2_page_table == PGT_root_page_table) )
    5.53              pin = 1;
    5.54  
    5.55          break;
    5.56 @@ -263,6 +262,12 @@ alloc_shadow_page(struct domain *d,
    5.57          shadow_pin(smfn);
    5.58  
    5.59      return smfn;
    5.60 +
    5.61 +  oom:
    5.62 +    FSH_LOG("promotion of pfn=%p mfn=%p failed!  external gnttab refs?\n",
    5.63 +            gpfn, gmfn);
    5.64 +    free_domheap_page(page);
    5.65 +    return 0;
    5.66  }
    5.67  
    5.68  static void inline
    5.69 @@ -280,9 +285,7 @@ free_shadow_l1_table(struct domain *d, u
    5.70  static void inline
    5.71  free_shadow_hl2_table(struct domain *d, unsigned long smfn)
    5.72  {
    5.73 -    printk("free_shadow_hl2_table(smfn=%p)\n", smfn);
    5.74 -
    5.75 -    l1_pgentry_t *pl1e = map_domain_mem(smfn << PAGE_SHIFT);
    5.76 +    l1_pgentry_t *hl2 = map_domain_mem(smfn << PAGE_SHIFT);
    5.77      int i, limit;
    5.78  
    5.79      if ( shadow_mode_external(d) )
    5.80 @@ -291,9 +294,13 @@ free_shadow_hl2_table(struct domain *d, 
    5.81          limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
    5.82  
    5.83      for ( i = 0; i < limit; i++ )
    5.84 -        put_page_from_l1e(pl1e[i], d);
    5.85 -
    5.86 -    unmap_domain_mem(pl1e);
    5.87 +    {
    5.88 +        unsigned long hl2e = l1_pgentry_val(hl2[i]);
    5.89 +        if ( hl2e & _PAGE_PRESENT )
    5.90 +            put_page(pfn_to_page(hl2e >> PAGE_SHIFT));
    5.91 +    }
    5.92 +
    5.93 +    unmap_domain_mem(hl2);
    5.94  }
    5.95  
    5.96  static void inline
    5.97 @@ -330,7 +337,7 @@ void free_shadow_page(unsigned long smfn
    5.98  
    5.99      ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
   5.100  
   5.101 -    delete_shadow_status(d, gpfn, type);
   5.102 +    delete_shadow_status(d, gpfn, gmfn, type);
   5.103  
   5.104      switch ( type )
   5.105      {
   5.106 @@ -607,14 +614,58 @@ void free_monitor_pagetable(struct exec_
   5.107      ed->arch.monitor_vtable = 0;
   5.108  }
   5.109  
   5.110 +int
   5.111 +set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn)
   5.112 +{
   5.113 +    unsigned long phystab = pagetable_val(d->arch.phys_table);
   5.114 +    l2_pgentry_t *l2, l2e;
   5.115 +    l1_pgentry_t *l1;
   5.116 +    struct pfn_info *l1page;
   5.117 +    unsigned long va = pfn << PAGE_SHIFT;
   5.118 +
   5.119 +    ASSERT( phystab );
   5.120 +
   5.121 +#ifdef WATCH_MAP_DOMAIN_CALLERS
   5.122 +    int old_map_domain_mem_noisy = map_domain_mem_noisy;
   5.123 +    map_domain_mem_noisy = 0;
   5.124 +#endif
   5.125 +
   5.126 +    l2 = map_domain_mem(phystab);
   5.127 +    if ( !l2_pgentry_val(l2e = l2[l2_table_offset(va)]) )
   5.128 +    {
   5.129 +        l1page = alloc_domheap_page(NULL);
   5.130 +        if ( !l1page )
   5.131 +            return 0;
   5.132 +
   5.133 +        l1 = map_domain_mem(page_to_pfn(l1page) << PAGE_SHIFT);
   5.134 +        memset(l1, 0, PAGE_SIZE);
   5.135 +        unmap_domain_mem(l1);
   5.136 +
   5.137 +        l2e = l2[l2_table_offset(va)] =
   5.138 +            mk_l2_pgentry((page_to_pfn(l1page) << PAGE_SHIFT) |
   5.139 +                          __PAGE_HYPERVISOR);
   5.140 +    }
   5.141 +    unmap_domain_mem(l2);
   5.142 +
   5.143 +    l1 = map_domain_mem(l2_pgentry_val(l2e) & PAGE_MASK);
   5.144 +    l1[l1_table_offset(va)] = mk_l1_pgentry((mfn << PAGE_SHIFT) |
   5.145 +                                            __PAGE_HYPERVISOR);
   5.146 +    unmap_domain_mem(l1);
   5.147 +
   5.148 +#ifdef WATCH_MAP_DOMAIN_CALLERS
   5.149 +    map_domain_mem_noisy = old_map_domain_mem_noisy;
   5.150 +#endif
   5.151 +
   5.152 +    return 1;
   5.153 +}
   5.154 +
   5.155  static int
   5.156  alloc_p2m_table(struct domain *d)
   5.157  {
   5.158      struct list_head *list_ent;
   5.159 -    struct pfn_info *page, *l2page, *l1page;
   5.160 -    l2_pgentry_t *l2, l2e, last_l2e = mk_l2_pgentry(0);
   5.161 -    l1_pgentry_t *l1 = NULL;
   5.162 -    unsigned long va, mfn, pfn;
   5.163 +    struct pfn_info *page, *l2page;
   5.164 +    l2_pgentry_t *l2;
   5.165 +    unsigned long mfn, pfn;
   5.166  
   5.167      l2page = alloc_domheap_page(NULL);
   5.168      if ( !l2page )
   5.169 @@ -622,6 +673,7 @@ alloc_p2m_table(struct domain *d)
   5.170      d->arch.phys_table = mk_pagetable(page_to_pfn(l2page) << PAGE_SHIFT);
   5.171      l2 = map_domain_mem(page_to_pfn(l2page) << PAGE_SHIFT);
   5.172      memset(l2, 0, PAGE_SIZE);
   5.173 +    unmap_domain_mem(l2);
   5.174  
   5.175      list_ent = d->page_list.next;
   5.176      while ( list_ent != &d->page_list )
   5.177 @@ -632,34 +684,11 @@ alloc_p2m_table(struct domain *d)
   5.178          ASSERT(pfn != INVALID_M2P_ENTRY);
   5.179          ASSERT(pfn < (1u<<20));
   5.180  
   5.181 -        va = pfn << PAGE_SHIFT;
   5.182 -        if ( !l2_pgentry_val(l2e = l2[l2_table_offset(va)]) )
   5.183 -        {
   5.184 -            l1page = alloc_domheap_page(NULL);
   5.185 -            if ( !l1page )
   5.186 -                return 0;
   5.187 -            l2e = l2[l2_table_offset(va)] =
   5.188 -                mk_l2_pgentry((page_to_pfn(l1page) << PAGE_SHIFT) |
   5.189 -                              __PAGE_HYPERVISOR);
   5.190 -        }
   5.191 -
   5.192 -        if ( l2_pgentry_val(last_l2e) != l2_pgentry_val(l2e) )
   5.193 -        {
   5.194 -            if ( l1 )
   5.195 -                unmap_domain_mem(l1);
   5.196 -            l1 = map_domain_mem(l2_pgentry_val(l2e) & PAGE_MASK);
   5.197 -            last_l2e = l2e;
   5.198 -        }
   5.199 -
   5.200 -        l1[l1_table_offset(va)] = mk_l1_pgentry((mfn << PAGE_SHIFT) |
   5.201 -                                                __PAGE_HYPERVISOR);
   5.202 +        set_p2m_entry(d, pfn, mfn);
   5.203 +
   5.204          list_ent = page->list.next;
   5.205      }
   5.206  
   5.207 -    if ( l1 )
   5.208 -        unmap_domain_mem(l1);
   5.209 -    unmap_domain_mem(l2);
   5.210 -
   5.211      return 1;
   5.212  }
   5.213  
   5.214 @@ -782,8 +811,8 @@ int __shadow_mode_enable(struct domain *
   5.215          {
   5.216              // external guests provide their own memory for their P2M maps.
   5.217              //
   5.218 -            unsigned long mfn = pagetable_val(d->arch.phys_table)>>PAGE_SHIFT;
   5.219 -            ASSERT( d == page_get_owner(&frame_table[mfn]) );
   5.220 +            ASSERT( d == page_get_owner(&frame_table[pagetable_val(
   5.221 +                                        d->arch.phys_table)>>PAGE_SHIFT]) );
   5.222          }
   5.223      }
   5.224  
   5.225 @@ -1174,7 +1203,7 @@ void vmx_shadow_clear_state(struct domai
   5.226      shadow_unlock(d);
   5.227  }
   5.228  
   5.229 -static unsigned long
   5.230 +unsigned long
   5.231  gpfn_to_mfn_safe(struct domain *d, unsigned long gpfn)
   5.232  {
   5.233      ASSERT( shadow_mode_translate(d) );
   5.234 @@ -1190,7 +1219,7 @@ gpfn_to_mfn_safe(struct domain *d, unsig
   5.235      {
   5.236          printk("gpfn_to_mfn_safe(d->id=%d, gpfn=%p) => 0 l2e=%p\n",
   5.237                 d->id, gpfn, l2_pgentry_val(l2e));
   5.238 -        return 0;
   5.239 +        return INVALID_MFN;
   5.240      }
   5.241      unsigned long l1tab = l2_pgentry_val(l2e) & PAGE_MASK;
   5.242      l1_pgentry_t *l1 = map_domain_mem(l1tab);
   5.243 @@ -1204,7 +1233,7 @@ gpfn_to_mfn_safe(struct domain *d, unsig
   5.244      {
   5.245          printk("gpfn_to_mfn_safe(d->id=%d, gpfn=%p) => 0 l1e=%p\n",
   5.246                 d->id, gpfn, l1_pgentry_val(l1e));
   5.247 -        return 0;
   5.248 +        return INVALID_MFN;
   5.249      }
   5.250  
   5.251      return l1_pgentry_val(l1e) >> PAGE_SHIFT;
   5.252 @@ -1237,48 +1266,18 @@ shadow_hl2_table(struct domain *d, unsig
   5.253      else
   5.254          limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
   5.255  
   5.256 -    if ( unlikely(current->domain != d) && !shadow_mode_external(d) )
   5.257 +    for ( i = 0; i < limit; i++ )
   5.258      {
   5.259 -        // Can't use __gpfn_to_mfn() if we don't have one of this domain's
   5.260 -        // page tables currently installed.  What a pain in the neck!
   5.261 -        //
   5.262 -        // This isn't common -- it only happens during shadow mode setup
   5.263 -        // and mode changes.
   5.264 -        //
   5.265 -        perfc_incrc(shadow_hl2_other_domain);
   5.266 -        for ( i = 0; i < limit; i++ )
   5.267 -        {
   5.268 -            unsigned long gl2e = l2_pgentry_val(gl2[i]);
   5.269 -            unsigned long mfn;
   5.270 -
   5.271 -            if ( (gl2e & _PAGE_PRESENT) &&
   5.272 -                 (mfn = gpfn_to_mfn_safe(d, gl2e >> PAGE_SHIFT)) )
   5.273 -            {
   5.274 -                hl2[i] = mk_l1_pgentry((mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   5.275 -                get_page(pfn_to_page(mfn), d);
   5.276 -            }
   5.277 -            else
   5.278 -            {
   5.279 -                hl2[i] = mk_l1_pgentry(0);
   5.280 -            }
   5.281 -        }
   5.282 -    }
   5.283 -    else
   5.284 -    {
   5.285 -        for ( i = 0; i < limit; i++ )
   5.286 -        {
   5.287 -            unsigned long gl2e = l2_pgentry_val(gl2[i]);
   5.288 -            unsigned long mfn;
   5.289 -
   5.290 -            if ( (gl2e & _PAGE_PRESENT) &&
   5.291 -                 (mfn = __gpfn_to_mfn(d, gl2e >> PAGE_SHIFT)) )
   5.292 -            {
   5.293 -                hl2[i] = mk_l1_pgentry((mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   5.294 -                get_page(pfn_to_page(mfn), d);
   5.295 -            }
   5.296 -            else
   5.297 -                hl2[i] = mk_l1_pgentry(0);
   5.298 -        }
   5.299 +        unsigned long gl2e = l2_pgentry_val(gl2[i]);
   5.300 +        unsigned long hl2e;
   5.301 +
   5.302 +        hl2e_propagate_from_guest(d, gl2e, &hl2e);
   5.303 +
   5.304 +        if ( (hl2e & _PAGE_PRESENT) &&
   5.305 +             !get_page(pfn_to_page(hl2e >> PAGE_SHIFT), d) )
   5.306 +            hl2e = 0;
   5.307 +
   5.308 +        hl2[i] = mk_l1_pgentry(hl2e);
   5.309      }
   5.310  
   5.311      if ( !shadow_mode_external(d) )
   5.312 @@ -1402,7 +1401,7 @@ void shadow_map_l1_into_current_l2(unsig
   5.313          SH_VVLOG("4a: l1 not shadowed");
   5.314  
   5.315          gl1mfn = __gpfn_to_mfn(d, gl1pfn);
   5.316 -        if ( unlikely(!gl1mfn) )
   5.317 +        if ( unlikely(!VALID_MFN(gl1mfn)) )
   5.318          {
   5.319              // Attempt to use an invalid pfn as an L1 page.
   5.320              // XXX this needs to be more graceful!
   5.321 @@ -1727,7 +1726,7 @@ int __shadow_out_of_sync(struct exec_dom
   5.322      l1mfn = __gpfn_to_mfn(d, l2e >> PAGE_SHIFT);
   5.323  
   5.324      // If the l1 pfn is invalid, it can't be out of sync...
   5.325 -    if ( !l1mfn )
   5.326 +    if ( !VALID_MFN(l1mfn) )
   5.327          return 0;
   5.328  
   5.329      if ( page_out_of_sync(&frame_table[l1mfn]) &&
   5.330 @@ -1786,10 +1785,24 @@ u32 shadow_remove_all_write_access(
   5.331      struct shadow_status *a;
   5.332      unsigned long sl1mfn = __shadow_status(d, gpfn, PGT_l1_shadow);
   5.333      u32 count = 0;
   5.334 +    u32 write_refs;
   5.335  
   5.336      ASSERT(spin_is_locked(&d->arch.shadow_lock));
   5.337      ASSERT(gmfn);
   5.338  
   5.339 +    perfc_incrc(remove_write_access);
   5.340 +
   5.341 +    if ( (frame_table[gmfn].u.inuse.type_info & PGT_type_mask) ==
   5.342 +         PGT_writable_page )
   5.343 +    {
   5.344 +        write_refs = (frame_table[gmfn].u.inuse.type_info & PGT_count_mask);
   5.345 +        if ( write_refs == 0 )
   5.346 +        {
   5.347 +            perfc_incrc(remove_write_access_easy);
   5.348 +            return 0;
   5.349 +        }
   5.350 +    }
   5.351 +
   5.352      for (i = 0; i < shadow_ht_buckets; i++)
   5.353      {
   5.354          a = &d->arch.shadow_ht[i];
   5.355 @@ -1948,8 +1961,8 @@ static int resync_all(struct domain *d, 
   5.356                  // XXX - This hack works for linux guests.
   5.357                  //       Need a better solution long term.
   5.358                  if ( !(new_pde & _PAGE_PRESENT) && unlikely(new_pde != 0) &&
   5.359 -                     (frame_table[smfn].u.inuse.type_info & PGT_pinned) &&
   5.360 -                     !unshadow )
   5.361 +                     !unshadow &&
   5.362 +                     (frame_table[smfn].u.inuse.type_info & PGT_pinned) )
   5.363                  {
   5.364                      perfc_incrc(unshadow_l2_count);
   5.365                      unshadow = 1;
   5.366 @@ -1957,7 +1970,22 @@ static int resync_all(struct domain *d, 
   5.367              }
   5.368              break;
   5.369          default:
   5.370 -            BUG();
   5.371 +            for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   5.372 +            {
   5.373 +                if ( !is_guest_l2_slot(i) && !external )
   5.374 +                    continue;
   5.375 +
   5.376 +                unsigned new_pde = guest[i];
   5.377 +                if ( new_pde != snapshot[i] )
   5.378 +                {
   5.379 +                    need_flush |= validate_hl2e_change(d, new_pde, &shadow[i]);
   5.380 +
   5.381 +                    // can't update snapshots of linear page tables -- they
   5.382 +                    // are used multiple times...
   5.383 +                    //
   5.384 +                    // snapshot[i] = new_pde;
   5.385 +                }
   5.386 +            }
   5.387              break;
   5.388          }
   5.389  
   5.390 @@ -2090,11 +2118,23 @@ int shadow_fault(unsigned long va, struc
   5.391              return 0;
   5.392          }
   5.393  
   5.394 -        l1pte_write_fault(ed, &gpte, &spte, va);
   5.395 +        if ( !l1pte_write_fault(ed, &gpte, &spte, va) )
   5.396 +        {
   5.397 +            SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed");
   5.398 +            perfc_incrc(write_fault_bail);
   5.399 +            shadow_unlock(d);
   5.400 +            return 0;
   5.401 +        }
   5.402      }
   5.403      else
   5.404      {
   5.405 -        l1pte_read_fault(d, &gpte, &spte);
   5.406 +        if ( !l1pte_read_fault(d, &gpte, &spte) )
   5.407 +        {
   5.408 +            SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed");
   5.409 +            perfc_incrc(read_fault_bail);
   5.410 +            shadow_unlock(d);
   5.411 +            return 0;
   5.412 +        }
   5.413      }
   5.414  
   5.415      /*
   5.416 @@ -2189,7 +2229,7 @@ void __update_pagetables(struct exec_dom
   5.417      if ( old_smfn )
   5.418          put_shadow_ref(old_smfn);
   5.419  
   5.420 -    SH_VVLOG("0: __update_pagetables(gmfn=%p, smfn=%p)", gmfn, smfn);
   5.421 +    SH_VVLOG("__update_pagetables(gmfn=%p, smfn=%p)", gmfn, smfn);
   5.422  
   5.423      /*
   5.424       * arch.shadow_vtable
   5.425 @@ -2314,6 +2354,9 @@ static int check_pte(
   5.426      gpfn = gpte >> PAGE_SHIFT;
   5.427      gmfn = __gpfn_to_mfn(d, gpfn);
   5.428  
   5.429 +    if ( !VALID_MFN(gmfn) )
   5.430 +        FAIL("invalid gpfn=%p gpte=%p\n", __func__, gpfn, gpte);
   5.431 +
   5.432      page_table_page = mfn_is_page_table(gmfn);
   5.433  
   5.434      if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) && !oos_ptes )
   5.435 @@ -2505,7 +2548,7 @@ int _check_pagetable(struct exec_domain 
   5.436      unsigned long ptbase_mfn = 0;
   5.437      int errors = 0, limit, oos_pdes = 0;
   5.438  
   5.439 -    audit_domain(d);
   5.440 +    _audit_domain(d, AUDIT_QUIET);
   5.441      shadow_lock(d);
   5.442  
   5.443      sh_check_name = s;
     6.1 --- a/xen/arch/x86/vmx.c	Thu Mar 17 12:25:14 2005 +0000
     6.2 +++ b/xen/arch/x86/vmx.c	Fri Mar 18 17:01:47 2005 +0000
     6.3 @@ -416,8 +416,8 @@ static void mov_to_cr(int gp, int cr, st
     6.4              /*
     6.5               * The guest CR3 must be pointing to the guest physical.
     6.6               */
     6.7 -            if (!(mfn = phys_to_machine_mapping(
     6.8 -                      d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT))) 
     6.9 +            if (!VALID_MFN(mfn = phys_to_machine_mapping(
    6.10 +                               d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)))
    6.11              {
    6.12                  VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx", 
    6.13                          d->arch.arch_vmx.cpu_cr3);
     7.1 --- a/xen/arch/x86/x86_32/domain_build.c	Thu Mar 17 12:25:14 2005 +0000
     7.2 +++ b/xen/arch/x86/x86_32/domain_build.c	Fri Mar 18 17:01:47 2005 +0000
     7.3 @@ -329,12 +329,22 @@ int construct_dom0(struct domain *d,
     7.4      /* Copy the initial ramdisk. */
     7.5      if ( initrd_len != 0 )
     7.6          memcpy((void *)vinitrd_start, initrd_start, initrd_len);
     7.7 -    
     7.8 +
     7.9 +    d->next_io_page = d->max_pages;
    7.10 +
    7.11      /* Set up start info area. */
    7.12      si = (start_info_t *)vstartinfo_start;
    7.13      memset(si, 0, PAGE_SIZE);
    7.14      si->nr_pages     = d->tot_pages;
    7.15 +#define NASTY_HACK
    7.16 +#ifdef NASTY_HACK
    7.17 +    si->shared_info  = d->next_io_page << PAGE_SHIFT;
    7.18 +    set_machinetophys(virt_to_phys(d->shared_info) >> PAGE_SHIFT,
    7.19 +                      d->next_io_page);
    7.20 +    d->next_io_page++;
    7.21 +#else
    7.22      si->shared_info  = virt_to_phys(d->shared_info);
    7.23 +#endif
    7.24      si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
    7.25      si->pt_base      = vpt_start;
    7.26      si->nr_pt_frames = nr_pt_pages;
    7.27 @@ -344,11 +354,13 @@ int construct_dom0(struct domain *d,
    7.28      for ( pfn = 0; pfn < d->tot_pages; pfn++ )
    7.29      {
    7.30          mfn = pfn + (alloc_start>>PAGE_SHIFT);
    7.31 +#if 0
    7.32  #ifndef NDEBUG
    7.33  #define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
    7.34          if ( pfn > REVERSE_START )
    7.35              mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
    7.36  #endif
    7.37 +#endif
    7.38          ((u32 *)vphysmap_start)[pfn] = mfn;
    7.39          machine_to_phys_mapping[mfn] = pfn;
    7.40      }
     8.1 --- a/xen/arch/x86/x86_32/domain_page.c	Thu Mar 17 12:25:14 2005 +0000
     8.2 +++ b/xen/arch/x86/x86_32/domain_page.c	Fri Mar 18 17:01:47 2005 +0000
     8.3 @@ -26,6 +26,11 @@ static spinlock_t map_lock = SPIN_LOCK_U
     8.4  /* Use a spare PTE bit to mark entries ready for recycling. */
     8.5  #define READY_FOR_TLB_FLUSH (1<<10)
     8.6  
     8.7 +#ifdef WATCH_MAP_DOMAIN_CALLERS
     8.8 +int map_domain_mem_noisy = 1;
     8.9 +#endif
    8.10 +
    8.11 +
    8.12  static void flush_all_ready_maps(void)
    8.13  {
    8.14      unsigned long *cache = mapcache;
    8.15 @@ -38,8 +43,7 @@ static void flush_all_ready_maps(void)
    8.16      while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
    8.17  }
    8.18  
    8.19 -
    8.20 -void *map_domain_mem(unsigned long pa)
    8.21 +void *_map_domain_mem(unsigned long pa)
    8.22  {
    8.23      unsigned long va;
    8.24      unsigned int idx, cpu = smp_processor_id();
     9.1 --- a/xen/arch/x86/x86_32/traps.c	Thu Mar 17 12:25:14 2005 +0000
     9.2 +++ b/xen/arch/x86/x86_32/traps.c	Fri Mar 18 17:01:47 2005 +0000
     9.3 @@ -150,6 +150,8 @@ void show_registers(struct xen_regs *reg
     9.4             ds, es, fs, gs, ss, cs);
     9.5  
     9.6      show_stack((unsigned long *)&regs->esp);
     9.7 +    if ( GUEST_MODE(regs) )
     9.8 +        show_guest_stack();
     9.9  } 
    9.10  
    9.11  void show_page_walk(unsigned long addr)
    10.1 --- a/xen/common/page_alloc.c	Thu Mar 17 12:25:14 2005 +0000
    10.2 +++ b/xen/common/page_alloc.c	Fri Mar 18 17:01:47 2005 +0000
    10.3 @@ -350,6 +350,11 @@ void scrub_heap_pages(void)
    10.4  
    10.5      printk("Scrubbing Free RAM: ");
    10.6  
    10.7 +#ifdef WATCH_MAP_DOMAIN_CALLERS
    10.8 +    int old_map_domain_mem_noisy = map_domain_mem_noisy;
    10.9 +    map_domain_mem_noisy = 0;
   10.10 +#endif
   10.11 +
   10.12      for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
   10.13      {
   10.14          /* Every 100MB, print a progress dot and appease the watchdog. */
   10.15 @@ -376,6 +381,10 @@ void scrub_heap_pages(void)
   10.16          spin_unlock_irqrestore(&heap_lock, flags);
   10.17      }
   10.18  
   10.19 +#ifdef WATCH_MAP_DOMAIN_CALLERS
   10.20 +    map_domain_mem_noisy = old_map_domain_mem_noisy;
   10.21 +#endif
   10.22 +
   10.23      printk("done.\n");
   10.24  }
   10.25  
    11.1 --- a/xen/include/asm-x86/mm.h	Thu Mar 17 12:25:14 2005 +0000
    11.2 +++ b/xen/include/asm-x86/mm.h	Fri Mar 18 17:01:47 2005 +0000
    11.3 @@ -144,6 +144,21 @@ static inline u32 pickle_domptr(struct d
    11.4          list_add_tail(&(_pfn)->list, &(_dom)->xenpage_list);                \
    11.5          spin_unlock(&(_dom)->page_alloc_lock);                              \
    11.6      } while ( 0 )
    11.7 +#define SHARE_PFN_WITH_DOMAIN2(_pfn, _dom)                                  \
    11.8 +    do {                                                                    \
    11.9 +        page_set_owner((_pfn), (_dom));                                     \
   11.10 +        /* The incremented type count is intended to pin to 'writable'. */  \
   11.11 +        (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;  \
   11.12 +        wmb(); /* install valid domain ptr before updating refcnt. */       \
   11.13 +        spin_lock(&(_dom)->page_alloc_lock);                                \
   11.14 +        /* _dom holds an allocation reference + writable ref */             \
   11.15 +        ASSERT((_pfn)->count_info == 0);                                    \
   11.16 +        (_pfn)->count_info |= PGC_allocated | 2;                            \
   11.17 +        if ( unlikely((_dom)->xenheap_pages++ == 0) )                       \
   11.18 +            get_knownalive_domain(_dom);                                    \
   11.19 +        list_add_tail(&(_pfn)->list, &(_dom)->page_list);                   \
   11.20 +        spin_unlock(&(_dom)->page_alloc_lock);                              \
   11.21 +    } while ( 0 )
   11.22  
   11.23  extern struct pfn_info *frame_table;
   11.24  extern unsigned long frame_table_size;
   11.25 @@ -188,6 +203,7 @@ static inline int get_page(struct pfn_in
   11.26               unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
   11.27               unlikely(d != _domain) )                /* Wrong owner? */
   11.28          {
   11.29 +          if ( !domain->arch.shadow_mode )
   11.30              DPRINTK("Error pfn %p: rd=%p(%d), od=%p(%d), caf=%08x, taf=%08x\n",
   11.31                      page_to_pfn(page), domain, (domain ? domain->id : -1),
   11.32                      page_get_owner(page),
   11.33 @@ -206,8 +222,36 @@ static inline int get_page(struct pfn_in
   11.34      return 1;
   11.35  }
   11.36  
   11.37 -void put_page_type(struct pfn_info *page);
   11.38 -int  get_page_type(struct pfn_info *page, u32 type);
   11.39 +//#define MFN1_TO_WATCH 0x1d8
   11.40 +#ifdef MFN1_TO_WATCH
   11.41 +#define get_page_type(__p, __t) (                                             \
   11.42 +{                                                                             \
   11.43 +    struct pfn_info *_p = (__p);                                              \
   11.44 +    u32 _t = (__t);                                                           \
   11.45 +    if ( page_to_pfn(_p) == MFN1_TO_WATCH )                                   \
   11.46 +        printk("get_page_type(%x) c=%p ot=%p @ %s:%d in %s\n",                \
   11.47 +               MFN1_TO_WATCH, frame_table[MFN1_TO_WATCH].count_info,          \
   11.48 +               frame_table[MFN1_TO_WATCH].u.inuse.type_info,                  \
   11.49 +               __FILE__, __LINE__, __func__);                                 \
   11.50 +    _get_page_type(_p, _t);                                                   \
   11.51 +})
   11.52 +#define put_page_type(__p) (                                                  \
   11.53 +{                                                                             \
   11.54 +    struct pfn_info *_p = (__p);                                              \
   11.55 +    if ( page_to_pfn(_p) == MFN1_TO_WATCH )                                   \
   11.56 +        printk("put_page_type(%x) c=%p ot=%p @ %s:%d in %s\n",                \
   11.57 +               MFN1_TO_WATCH, frame_table[MFN1_TO_WATCH].count_info,          \
   11.58 +               frame_table[MFN1_TO_WATCH].u.inuse.type_info,                  \
   11.59 +               __FILE__, __LINE__, __func__);                                 \
   11.60 +    _put_page_type(_p);                                                       \
   11.61 +})
   11.62 +#else
   11.63 +#define _get_page_type get_page_type
   11.64 +#define _put_page_type put_page_type
   11.65 +#endif
   11.66 +
   11.67 +void _put_page_type(struct pfn_info *page);
   11.68 +int  _get_page_type(struct pfn_info *page, u32 type);
   11.69  int  get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
   11.70  void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
   11.71  
   11.72 @@ -266,6 +310,8 @@ void synchronise_pagetables(unsigned lon
   11.73   * been used by the read-only MPT map.
   11.74   */
   11.75  #define __phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
   11.76 +#define INVALID_MFN               (~0UL)
   11.77 +#define VALID_MFN(_mfn)           (!((_mfn) & (1U<<31)))
   11.78  
   11.79  /* Returns the machine physical */
   11.80  static inline unsigned long phys_to_machine_mapping(unsigned long pfn) 
   11.81 @@ -273,10 +319,11 @@ static inline unsigned long phys_to_mach
   11.82      unsigned long mfn;
   11.83      l1_pgentry_t pte;
   11.84  
   11.85 -   if (__get_user(l1_pgentry_val(pte), (__phys_to_machine_mapping + pfn)))
   11.86 -       mfn = 0;
   11.87 +   if ( !__get_user(l1_pgentry_val(pte), (__phys_to_machine_mapping + pfn)) &&
   11.88 +        (l1_pgentry_val(pte) & _PAGE_PRESENT) )
   11.89 +       mfn = l1_pgentry_to_phys(pte) >> PAGE_SHIFT;
   11.90     else
   11.91 -       mfn = l1_pgentry_to_phys(pte) >> PAGE_SHIFT;
   11.92 +       mfn = INVALID_MFN;
   11.93  
   11.94     return mfn; 
   11.95  }
    12.1 --- a/xen/include/asm-x86/shadow.h	Thu Mar 17 12:25:14 2005 +0000
    12.2 +++ b/xen/include/asm-x86/shadow.h	Fri Mar 18 17:01:47 2005 +0000
    12.3 @@ -49,7 +49,7 @@
    12.4       (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
    12.5  
    12.6  #define shadow_lock_init(_d) spin_lock_init(&(_d)->arch.shadow_lock)
    12.7 -#define shadow_lock(_d)      spin_lock(&(_d)->arch.shadow_lock)
    12.8 +#define shadow_lock(_d)      do { ASSERT(!spin_is_locked(&(_d)->arch.shadow_lock)); spin_lock(&(_d)->arch.shadow_lock); } while (0)
    12.9  #define shadow_unlock(_d)    spin_unlock(&(_d)->arch.shadow_lock)
   12.10  
   12.11  extern void shadow_mode_init(void);
   12.12 @@ -62,6 +62,8 @@ extern struct out_of_sync_entry *shadow_
   12.13  extern void free_monitor_pagetable(struct exec_domain *ed);
   12.14  extern void __shadow_sync_all(struct domain *d);
   12.15  extern int __shadow_out_of_sync(struct exec_domain *ed, unsigned long va);
   12.16 +extern int set_p2m_entry(
   12.17 +    struct domain *d, unsigned long pfn, unsigned long mfn);
   12.18  
   12.19  static inline unsigned long __shadow_status(
   12.20      struct domain *d, unsigned long gpfn, unsigned long stype);
   12.21 @@ -173,6 +175,9 @@ static inline void shadow_mode_disable(s
   12.22             phys_to_machine_mapping(gpfn); })           \
   12.23        : (gpfn) )
   12.24  
   12.25 +extern unsigned long gpfn_to_mfn_safe(
   12.26 +    struct domain *d, unsigned long gpfn);
   12.27 +
   12.28  /************************************************************************/
   12.29  
   12.30  struct shadow_status {
   12.31 @@ -268,7 +273,7 @@ shadow_get_page_from_l1e(l1_pgentry_t l1
   12.32      if ( unlikely(!res) )
   12.33      {
   12.34          perfc_incrc(shadow_get_page_fail);
   12.35 -        FSH_LOG("%s failed to get ref l1e=%p\n", l1_pgentry_val(l1e));
   12.36 +        FSH_LOG("%s failed to get ref l1e=%p\n", __func__, l1_pgentry_val(l1e));
   12.37      }
   12.38  
   12.39      return res;
   12.40 @@ -311,7 +316,7 @@ static inline void
   12.41          unsigned long old_hl2e =
   12.42              l1_pgentry_val(ed->arch.hl2_vtable[l2_table_offset(va)]);
   12.43          unsigned long new_hl2e =
   12.44 -            (mfn ? ((mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR) : 0);
   12.45 +            (VALID_MFN(mfn) ? ((mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR) : 0);
   12.46  
   12.47          // only do the ref counting if something important changed.
   12.48          //
   12.49 @@ -332,11 +337,36 @@ static inline void
   12.50  
   12.51  /************************************************************************/
   12.52  
   12.53 +//#define MFN3_TO_WATCH 0x1ff6e
   12.54 +#ifdef MFN3_TO_WATCH
   12.55 +#define get_shadow_ref(__s) (                                                 \
   12.56 +{                                                                             \
   12.57 +    unsigned long _s = (__s);                                                 \
   12.58 +    if ( _s == MFN3_TO_WATCH )                                                \
   12.59 +        printk("get_shadow_ref(%x) oc=%d @ %s:%d in %s\n",                    \
   12.60 +               MFN3_TO_WATCH, frame_table[_s].count_info,                     \
   12.61 +               __FILE__, __LINE__, __func__);                                 \
   12.62 +    _get_shadow_ref(_s);                                                      \
   12.63 +})
   12.64 +#define put_shadow_ref(__s) (                                                 \
   12.65 +{                                                                             \
   12.66 +    unsigned long _s = (__s);                                                 \
   12.67 +    if ( _s == MFN3_TO_WATCH )                                                \
   12.68 +        printk("put_shadow_ref(%x) oc=%d @ %s:%d in %s\n",                    \
   12.69 +               MFN3_TO_WATCH, frame_table[_s].count_info,                     \
   12.70 +               __FILE__, __LINE__, __func__);                                 \
   12.71 +    _put_shadow_ref(_s);                                                      \
   12.72 +})
   12.73 +#else
   12.74 +#define _get_shadow_ref get_shadow_ref
   12.75 +#define _put_shadow_ref put_shadow_ref
   12.76 +#endif
   12.77 +
   12.78  /*
   12.79   * Add another shadow reference to smfn.
   12.80   */
   12.81  static inline int
   12.82 -get_shadow_ref(unsigned long smfn)
   12.83 +_get_shadow_ref(unsigned long smfn)
   12.84  {
   12.85      u32 x, nx;
   12.86  
   12.87 @@ -365,7 +395,7 @@ extern void free_shadow_page(unsigned lo
   12.88   * Drop a shadow reference to smfn.
   12.89   */
   12.90  static inline void
   12.91 -put_shadow_ref(unsigned long smfn)
   12.92 +_put_shadow_ref(unsigned long smfn)
   12.93  {
   12.94      u32 x, nx;
   12.95  
   12.96 @@ -420,6 +450,9 @@ static inline int __mark_dirty(struct do
   12.97      ASSERT(spin_is_locked(&d->arch.shadow_lock));
   12.98      ASSERT(d->arch.shadow_dirty_bitmap != NULL);
   12.99  
  12.100 +    if ( !VALID_MFN(mfn) )
  12.101 +        return rc;
  12.102 +
  12.103      pfn = __mfn_to_gpfn(d, mfn);
  12.104  
  12.105      /*
  12.106 @@ -471,7 +504,7 @@ extern void shadow_mark_va_out_of_sync(
  12.107      struct exec_domain *ed, unsigned long gpfn, unsigned long mfn,
  12.108      unsigned long va);
  12.109  
  12.110 -static inline void l1pte_write_fault(
  12.111 +static inline int l1pte_write_fault(
  12.112      struct exec_domain *ed, unsigned long *gpte_p, unsigned long *spte_p,
  12.113      unsigned long va)
  12.114  {
  12.115 @@ -479,34 +512,36 @@ static inline void l1pte_write_fault(
  12.116      unsigned long gpte = *gpte_p;
  12.117      unsigned long spte;
  12.118      unsigned long gpfn = gpte >> PAGE_SHIFT;
  12.119 -    unsigned long mfn = __gpfn_to_mfn(d, gpfn);
  12.120 +    unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
  12.121  
  12.122 -    //printk("l1pte_write_fault gmfn=%p\n", mfn);
  12.123 +    //printk("l1pte_write_fault gmfn=%p\n", gmfn);
  12.124  
  12.125 -    if ( unlikely(!mfn) )
  12.126 +    if ( unlikely(!VALID_MFN(gmfn)) )
  12.127      {
  12.128          SH_LOG("l1pte_write_fault: invalid gpfn=%p", gpfn);
  12.129          *spte_p = 0;
  12.130 -        return;
  12.131 +        return 0;
  12.132      }
  12.133  
  12.134      ASSERT(gpte & _PAGE_RW);
  12.135      gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
  12.136 -    spte = (mfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
  12.137 +    spte = (gmfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
  12.138  
  12.139      SH_VVLOG("l1pte_write_fault: updating spte=0x%p gpte=0x%p", spte, gpte);
  12.140  
  12.141      if ( shadow_mode_log_dirty(d) )
  12.142 -        __mark_dirty(d, mfn);
  12.143 +        __mark_dirty(d, gmfn);
  12.144  
  12.145 -    if ( mfn_is_page_table(mfn) )
  12.146 -        shadow_mark_va_out_of_sync(ed, gpfn, mfn, va);
  12.147 +    if ( mfn_is_page_table(gmfn) )
  12.148 +        shadow_mark_va_out_of_sync(ed, gpfn, gmfn, va);
  12.149  
  12.150      *gpte_p = gpte;
  12.151      *spte_p = spte;
  12.152 +
  12.153 +    return 1;
  12.154  }
  12.155  
  12.156 -static inline void l1pte_read_fault(
  12.157 +static inline int l1pte_read_fault(
  12.158      struct domain *d, unsigned long *gpte_p, unsigned long *spte_p)
  12.159  { 
  12.160      unsigned long gpte = *gpte_p;
  12.161 @@ -514,11 +549,11 @@ static inline void l1pte_read_fault(
  12.162      unsigned long pfn = gpte >> PAGE_SHIFT;
  12.163      unsigned long mfn = __gpfn_to_mfn(d, pfn);
  12.164  
  12.165 -    if ( unlikely(!mfn) )
  12.166 +    if ( unlikely(!VALID_MFN(mfn)) )
  12.167      {
  12.168          SH_LOG("l1pte_read_fault: invalid gpfn=%p", pfn);
  12.169          *spte_p = 0;
  12.170 -        return;
  12.171 +        return 0;
  12.172      }
  12.173  
  12.174      gpte |= _PAGE_ACCESSED;
  12.175 @@ -533,21 +568,22 @@ static inline void l1pte_read_fault(
  12.176      SH_VVLOG("l1pte_read_fault: updating spte=0x%p gpte=0x%p", spte, gpte);
  12.177      *gpte_p = gpte;
  12.178      *spte_p = spte;
  12.179 +
  12.180 +    return 1;
  12.181  }
  12.182  
  12.183  static inline void l1pte_propagate_from_guest(
  12.184      struct domain *d, unsigned long gpte, unsigned long *spte_p)
  12.185  { 
  12.186      unsigned long pfn = gpte >> PAGE_SHIFT;
  12.187 -    unsigned long mfn = __gpfn_to_mfn(d, pfn);
  12.188 -    unsigned long spte;
  12.189 +    unsigned long mfn, spte;
  12.190  
  12.191      spte = 0;
  12.192  
  12.193 -    if ( mfn &&
  12.194 -         ((gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
  12.195 -          (_PAGE_PRESENT|_PAGE_ACCESSED)) ) {
  12.196 -
  12.197 +    if ( ((gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
  12.198 +          (_PAGE_PRESENT|_PAGE_ACCESSED)) &&
  12.199 +         VALID_MFN(mfn = __gpfn_to_mfn(d, pfn)) )
  12.200 +    {
  12.201          spte = (mfn << PAGE_SHIFT) | (gpte & ~PAGE_MASK);
  12.202          
  12.203          if ( shadow_mode_log_dirty(d) ||
  12.204 @@ -557,13 +593,46 @@ static inline void l1pte_propagate_from_
  12.205              spte &= ~_PAGE_RW;
  12.206          }
  12.207      }
  12.208 +
  12.209  #if 0
  12.210 -
  12.211      if ( spte || gpte )
  12.212 -        SH_VLOG("%s: gpte=0x%p, new spte=0x%p", __func__, gpte, spte);
  12.213 +        SH_VVLOG("%s: gpte=%p, new spte=%p", __func__, gpte, spte);
  12.214 +#endif
  12.215 +
  12.216 +    *spte_p = spte;
  12.217 +}
  12.218 +
  12.219 +static inline void hl2e_propagate_from_guest(
  12.220 +    struct domain *d, unsigned long gpde, unsigned long *hl2e_p)
  12.221 +{
  12.222 +    unsigned long pfn = gpde >> PAGE_SHIFT;
  12.223 +    unsigned long mfn, hl2e;
  12.224 +
  12.225 +    hl2e = 0;
  12.226  
  12.227 -#endif
  12.228 -    *spte_p = spte;
  12.229 +    if ( gpde & _PAGE_PRESENT )
  12.230 +    {
  12.231 +        if ( unlikely((current->domain != d) && !shadow_mode_external(d)) )
  12.232 +        {
  12.233 +            // Can't use __gpfn_to_mfn() if we don't have one of this domain's
  12.234 +            // page tables currently installed.  What a pain in the neck!
  12.235 +            //
  12.236 +            // This isn't common -- it only happens during shadow mode setup
  12.237 +            // and mode changes.
  12.238 +            //
  12.239 +            mfn = gpfn_to_mfn_safe(d, pfn);
  12.240 +        }
  12.241 +        else
  12.242 +            mfn = __gpfn_to_mfn(d, pfn);
  12.243 +
  12.244 +        if ( VALID_MFN(mfn) )
  12.245 +            hl2e = (mfn << PAGE_SHIFT) | __PAGE_HYPERVISOR;
  12.246 +    }
  12.247 +
  12.248 +    if ( hl2e || gpde )
  12.249 +        SH_VVLOG("%s: gpde=%p hl2e=%p", __func__, gpde, hl2e);
  12.250 +
  12.251 +    *hl2e_p = hl2e;
  12.252  }
  12.253  
  12.254  static inline void l2pde_general(
  12.255 @@ -590,7 +659,7 @@ static inline void l2pde_general(
  12.256      }
  12.257  
  12.258      if ( spde || gpde )
  12.259 -        SH_VLOG("%s: gpde=0x%p, new spde=0x%p", __func__, gpde, spde);
  12.260 +        SH_VVLOG("%s: gpde=%p, new spde=%p", __func__, gpde, spde);
  12.261  
  12.262      *spde_p = spde;
  12.263  }
  12.264 @@ -649,6 +718,42 @@ validate_pte_change(
  12.265  // returns true if a tlb flush is needed
  12.266  //
  12.267  static int inline
  12.268 +validate_hl2e_change(
  12.269 +    struct domain *d,
  12.270 +    unsigned long new_gpde,
  12.271 +    unsigned long *shadow_hl2e_p)
  12.272 +{
  12.273 +    unsigned long old_hl2e, new_hl2e;
  12.274 +
  12.275 +    perfc_incrc(validate_hl2e_calls);
  12.276 +
  12.277 +    old_hl2e = *shadow_hl2e_p;
  12.278 +    hl2e_propagate_from_guest(d, new_gpde, &new_hl2e);
  12.279 +
  12.280 +    // Only do the ref counting if something important changed.
  12.281 +    //
  12.282 +    if ( ((old_hl2e | new_hl2e) & _PAGE_PRESENT) &&
  12.283 +         ((old_hl2e ^ new_hl2e) & (PAGE_MASK | _PAGE_PRESENT)) )
  12.284 +    {
  12.285 +        perfc_incrc(validate_hl2e_changes);
  12.286 +
  12.287 +        if ( (new_hl2e & _PAGE_PRESENT) &&
  12.288 +             !get_page(pfn_to_page(new_hl2e >> PAGE_SHIFT), d) )
  12.289 +            new_hl2e = 0;
  12.290 +        if ( old_hl2e & _PAGE_PRESENT )
  12.291 +            put_page(pfn_to_page(old_hl2e >> PAGE_SHIFT));
  12.292 +    }
  12.293 +
  12.294 +    *shadow_hl2e_p = new_hl2e;
  12.295 +
  12.296 +    // paranoia rules!
  12.297 +    return 1;
  12.298 +    
  12.299 +}
  12.300 +
  12.301 +// returns true if a tlb flush is needed
  12.302 +//
  12.303 +static int inline
  12.304  validate_pde_change(
  12.305      struct domain *d,
  12.306      unsigned long new_gpde,
  12.307 @@ -830,15 +935,16 @@ static inline unsigned long __shadow_sta
  12.308  {
  12.309      unsigned long gmfn = ((current->domain == d)
  12.310                            ? __gpfn_to_mfn(d, gpfn)
  12.311 -                          : 0);
  12.312 +                          : INVALID_MFN);
  12.313  
  12.314      ASSERT(spin_is_locked(&d->arch.shadow_lock));
  12.315      ASSERT(gpfn == (gpfn & PGT_mfn_mask));
  12.316      ASSERT(stype && !(stype & ~PGT_type_mask));
  12.317  
  12.318 -    if ( gmfn && ((stype != PGT_snapshot)
  12.319 -                  ? !mfn_is_page_table(gmfn)
  12.320 -                  : !mfn_out_of_sync(gmfn)) )
  12.321 +    if ( VALID_MFN(gmfn) &&
  12.322 +         ((stype != PGT_snapshot)
  12.323 +          ? !mfn_is_page_table(gmfn)
  12.324 +          : !mfn_out_of_sync(gmfn)) )
  12.325      {
  12.326          perfc_incrc(shadow_status_shortcut);
  12.327          ASSERT(___shadow_status(d, gpfn, stype) == 0);
  12.328 @@ -939,7 +1045,7 @@ static inline void put_shadow_status(str
  12.329  
  12.330  
  12.331  static inline void delete_shadow_status( 
  12.332 -    struct domain *d, unsigned int gpfn, unsigned int stype)
  12.333 +    struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype)
  12.334  {
  12.335      struct shadow_status *p, *x, *n, *head;
  12.336      unsigned long key = gpfn | stype;
  12.337 @@ -1010,7 +1116,7 @@ static inline void delete_shadow_status(
  12.338  
  12.339   found:
  12.340      // release ref to page
  12.341 -    put_page(pfn_to_page(__gpfn_to_mfn(d, gpfn)));
  12.342 +    put_page(pfn_to_page(gmfn));
  12.343  
  12.344      shadow_audit(d, 0);
  12.345  }
  12.346 @@ -1026,7 +1132,10 @@ static inline void set_shadow_status(
  12.347      SH_VVLOG("set gpfn=%p gmfn=%p smfn=%p t=%p\n", gpfn, gmfn, smfn, stype);
  12.348  
  12.349      ASSERT(spin_is_locked(&d->arch.shadow_lock));
  12.350 -    ASSERT(gpfn && !(gpfn & ~PGT_mfn_mask));
  12.351 +
  12.352 +    ASSERT(shadow_mode_translate(d) || gpfn);
  12.353 +    ASSERT(!(gpfn & ~PGT_mfn_mask));
  12.354 +    
  12.355      ASSERT(pfn_is_ram(gmfn)); // XXX need to be more graceful
  12.356      ASSERT(smfn && !(smfn & ~PGT_mfn_mask));
  12.357      ASSERT(stype && !(stype & ~PGT_type_mask));
    13.1 --- a/xen/include/asm-x86/vmx_platform.h	Thu Mar 17 12:25:14 2005 +0000
    13.2 +++ b/xen/include/asm-x86/vmx_platform.h	Fri Mar 18 17:01:47 2005 +0000
    13.3 @@ -87,6 +87,7 @@ struct virutal_platform_def {
    13.4  extern void handle_mmio(unsigned long, unsigned long);
    13.5  extern int vmx_setup_platform(struct exec_domain *, execution_context_t *);
    13.6  
    13.7 -#define mmio_space(gpa) (!phys_to_machine_mapping((gpa) >> PAGE_SHIFT))
    13.8 +// XXX - think about this -- maybe use bit 30 of the mfn to signify an MMIO frame.
    13.9 +#define mmio_space(gpa) (!VALID_MFN(phys_to_machine_mapping((gpa) >> PAGE_SHIFT)))
   13.10  
   13.11  #endif
    14.1 --- a/xen/include/asm-x86/x86_32/domain_page.h	Thu Mar 17 12:25:14 2005 +0000
    14.2 +++ b/xen/include/asm-x86/x86_32/domain_page.h	Fri Mar 18 17:01:47 2005 +0000
    14.3 @@ -13,12 +13,28 @@
    14.4  extern unsigned long *mapcache;
    14.5  #define MAPCACHE_ENTRIES        1024
    14.6  
    14.7 +
    14.8 +//#define WATCH_MAP_DOMAIN_CALLERS 1
    14.9 +#ifdef WATCH_MAP_DOMAIN_CALLERS
   14.10 +extern int map_domain_mem_noisy;
   14.11 +#define map_domain_mem(__mdm_pa) (                                            \
   14.12 +{                                                                             \
   14.13 +    unsigned long _mdm_pa = (__mdm_pa);                                       \
   14.14 +    if ( map_domain_mem_noisy )                                               \
   14.15 +        printk("map_domain_mem(%p) @ %s:%d in %s\n",                          \
   14.16 +               _mdm_pa, __FILE__, __LINE__, __func__);                        \
   14.17 +    _map_domain_mem(_mdm_pa);                                                 \
   14.18 +})
   14.19 +#else
   14.20 +#define _map_domain_mem map_domain_mem
   14.21 +#endif
   14.22 +
   14.23  /*
   14.24   * Maps a given physical address, returning corresponding virtual address.
   14.25   * The entire page containing that VA is now accessible until a 
   14.26   * corresponding call to unmap_domain_mem().
   14.27   */
   14.28 -extern void *map_domain_mem(unsigned long pa);
   14.29 +extern void *_map_domain_mem(unsigned long pa);
   14.30  
   14.31  /*
   14.32   * Pass a VA within a page previously mapped with map_domain_mem().
    15.1 --- a/xen/include/xen/perfc_defn.h	Thu Mar 17 12:25:14 2005 +0000
    15.2 +++ b/xen/include/xen/perfc_defn.h	Fri Mar 18 17:01:47 2005 +0000
    15.3 @@ -66,5 +66,11 @@ PERFCOUNTER_CPU(validate_pte_calls,     
    15.4  PERFCOUNTER_CPU(validate_pte_changes,              "validate_pte makes changes")
    15.5  PERFCOUNTER_CPU(validate_pde_calls,                "calls to validate_pde_change")
    15.6  PERFCOUNTER_CPU(validate_pde_changes,              "validate_pde makes changes")
    15.7 -PERFCOUNTER_CPU(shadow_hl2_other_domain,           "shadow_hl2 from other domain")
    15.8 +PERFCOUNTER_CPU(validate_hl2e_calls,               "calls to validate_hl2e_change")
    15.9 +PERFCOUNTER_CPU(validate_hl2e_changes,             "validate_hl2e makes changes")
   15.10  PERFCOUNTER_CPU(gpfn_to_mfn_safe,                  "calls to gpfn_to_mfn_safe")
   15.11 +PERFCOUNTER_CPU(write_fault_bail,                  "sf bailed due to write_fault")
   15.12 +PERFCOUNTER_CPU(read_fault_bail,                   "sf bailed due to read_fault")
   15.13 +PERFCOUNTER_CPU(exception_fixed,                   "pre-exception fixed")
   15.14 +PERFCOUNTER_CPU(remove_write_access,               "calls to remove_write_access")
   15.15 +PERFCOUNTER_CPU(remove_write_access_easy,          "easy outs of remove_write_access")
    16.1 --- a/xen/include/xen/sched.h	Thu Mar 17 12:25:14 2005 +0000
    16.2 +++ b/xen/include/xen/sched.h	Fri Mar 18 17:01:47 2005 +0000
    16.3 @@ -117,6 +117,7 @@ struct domain
    16.4      struct list_head xenpage_list;    /* linked list, of size xenheap_pages */
    16.5      unsigned int     tot_pages;       /* number of pages currently possesed */
    16.6      unsigned int     max_pages;       /* maximum value for tot_pages        */
    16.7 +    unsigned int     next_io_page;    /* next io pfn to give to domain      */
    16.8      unsigned int     xenheap_pages;   /* # pages allocated from Xen heap    */
    16.9  
   16.10      /* Scheduling. */