ia64/xen-unstable

changeset 2635:0174982516f6

bitkeeper revision 1.1159.1.229 (416d3ad1BpCS1RVPjkX14HUpsanlGw)

Shadow pagetable walkthrough.
author kaf24@freefall.cl.cam.ac.uk
date Wed Oct 13 14:25:21 2004 +0000 (2004-10-13)
parents 7565994e86cb
children 611881eac889 ab1802b30bfa
files tools/libxc/xc_linux_save.c xen/arch/x86/memory.c xen/arch/x86/shadow.c xen/include/asm-x86/shadow.h xen/include/hypervisor-ifs/dom0_ops.h xen/include/xen/lib.h
line diff
     1.1 --- a/tools/libxc/xc_linux_save.c	Wed Oct 13 03:33:39 2004 +0000
     1.2 +++ b/tools/libxc/xc_linux_save.c	Wed Oct 13 14:25:21 2004 +0000
     1.3 @@ -210,7 +210,7 @@ static int analysis_phase( int xc_handle
     1.4          int i;
     1.5  
     1.6          xc_shadow_control( xc_handle, domid, 
     1.7 -                           DOM0_SHADOW_CONTROL_OP_CLEAN2,
     1.8 +                           DOM0_SHADOW_CONTROL_OP_CLEAN,
     1.9                             arr, nr_pfns, NULL);
    1.10          printf("#Flush\n");
    1.11          for ( i = 0; i < 100; i++ )
    1.12 @@ -829,7 +829,7 @@ int xc_linux_save(int xc_handle, XcIOCon
    1.13              } 
    1.14  
    1.15              if ( xc_shadow_control( xc_handle, domid, 
    1.16 -                                    DOM0_SHADOW_CONTROL_OP_CLEAN2,
    1.17 +                                    DOM0_SHADOW_CONTROL_OP_CLEAN,
    1.18                                      to_send, nr_pfns, &stats ) != nr_pfns ) 
    1.19              {
    1.20                  xcio_error(ioctxt, "Error flushing shadow PT");
     2.1 --- a/xen/arch/x86/memory.c	Wed Oct 13 03:33:39 2004 +0000
     2.2 +++ b/xen/arch/x86/memory.c	Wed Oct 13 14:25:21 2004 +0000
     2.3 @@ -1503,7 +1503,7 @@ int do_update_va_mapping(unsigned long p
     2.4      {
     2.5          unsigned long sval;
     2.6  
     2.7 -        l1pte_no_fault(&d->mm, &val, &sval);
     2.8 +        l1pte_propagate_from_guest(&d->mm, &val, &sval);
     2.9  
    2.10          if ( unlikely(__put_user(sval, ((unsigned long *)(
    2.11              &shadow_linear_pg_table[page_nr])))) )
    2.12 @@ -1521,9 +1521,9 @@ int do_update_va_mapping(unsigned long p
    2.13           * for this.
    2.14           */
    2.15          if ( d->mm.shadow_mode == SHM_logdirty )
    2.16 -            mark_dirty( &current->mm, va_to_l1mfn(page_nr<<PAGE_SHIFT) );  
    2.17 +            mark_dirty(&current->mm, va_to_l1mfn(page_nr << PAGE_SHIFT));  
    2.18    
    2.19 -        check_pagetable(d, d->mm.pagetable, "va"); /* debug */
    2.20 +        check_pagetable(&d->mm, d->mm.pagetable, "va"); /* debug */
    2.21      }
    2.22  
    2.23      deferred_ops = percpu_info[cpu].deferred_ops;
    2.24 @@ -1613,7 +1613,7 @@ void ptwr_flush(const int which)
    2.25      if ( unlikely(d->mm.shadow_mode) )
    2.26      {
    2.27          /* Write-protect the p.t. page in the shadow page table. */
    2.28 -        l1pte_no_fault(&d->mm, &pte, &spte);
    2.29 +        l1pte_propagate_from_guest(&d->mm, &pte, &spte);
    2.30          __put_user(
    2.31              spte, (unsigned long *)&shadow_linear_pg_table[l1va>>PAGE_SHIFT]);
    2.32  
    2.33 @@ -1657,7 +1657,7 @@ void ptwr_flush(const int which)
    2.34              if ( likely(l1_pgentry_val(nl1e) & _PAGE_PRESENT) )
    2.35              {
    2.36                  if ( unlikely(sl1e != NULL) )
    2.37 -                    l1pte_no_fault(
    2.38 +                    l1pte_propagate_from_guest(
    2.39                          &d->mm, &l1_pgentry_val(nl1e), 
    2.40                          &l1_pgentry_val(sl1e[i]));
    2.41                  put_page_type(&frame_table[l1_pgentry_to_pagenr(nl1e)]);
    2.42 @@ -1672,7 +1672,7 @@ void ptwr_flush(const int which)
    2.43          }
    2.44          
    2.45          if ( unlikely(sl1e != NULL) )
    2.46 -            l1pte_no_fault(
    2.47 +            l1pte_propagate_from_guest(
    2.48                  &d->mm, &l1_pgentry_val(nl1e), &l1_pgentry_val(sl1e[i]));
    2.49  
    2.50          if ( unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT) )
     3.1 --- a/xen/arch/x86/shadow.c	Wed Oct 13 03:33:39 2004 +0000
     3.2 +++ b/xen/arch/x86/shadow.c	Wed Oct 13 14:25:21 2004 +0000
     3.3 @@ -9,7 +9,6 @@
     3.4  #include <xen/event.h>
     3.5  #include <xen/trace.h>
     3.6  
     3.7 -
     3.8  /********
     3.9  
    3.10  To use these shadow page tables, guests must not rely on the ACCESSED
    3.11 @@ -28,166 +27,141 @@ hypercall lock anyhow (at least initiall
    3.12  
    3.13  ********/
    3.14  
    3.15 -
    3.16 -static inline void free_shadow_page(struct mm_struct *m, 
    3.17 -                                    struct pfn_info *page)
    3.18 +static inline void free_shadow_page(
    3.19 +    struct mm_struct *m, struct pfn_info *page)
    3.20  {
    3.21 -    unsigned long type = page->u.inuse.type_info & PGT_type_mask;
    3.22 -
    3.23      m->shadow_page_count--;
    3.24  
    3.25 -    if (type == PGT_l1_page_table)
    3.26 +    switch ( page->u.inuse.type_info & PGT_type_mask )
    3.27 +    {
    3.28 +    case PGT_l1_page_table:
    3.29          perfc_decr(shadow_l1_pages);
    3.30 -    else if (type == PGT_l2_page_table)
    3.31 +        break;
    3.32 +
    3.33 +    case PGT_l2_page_table:
    3.34          perfc_decr(shadow_l2_pages);
    3.35 -    else printk("Free shadow weird page type pfn=%08x type=%08x\n",
    3.36 -                frame_table-page, page->u.inuse.type_info);
    3.37 -    
    3.38 +        break;
    3.39 +
    3.40 +    default:
    3.41 +        printk("Free shadow weird page type pfn=%08x type=%08x\n",
    3.42 +               frame_table-page, page->u.inuse.type_info);
    3.43 +        break;
    3.44 +    }
    3.45 +
    3.46      free_domheap_page(page);
    3.47  }
    3.48  
    3.49 -static void __free_shadow_table( struct mm_struct *m )
    3.50 +static void __free_shadow_table(struct mm_struct *m)
    3.51  {
    3.52 -    int j, free=0;
    3.53 -    struct shadow_status *a,*next;
    3.54 +    int                   i, free = 0;
    3.55 +    struct shadow_status *x, *n;
    3.56   
    3.57 -    // the code assumes you're not using the page tables i.e.
    3.58 -    // the domain is stopped and cr3 is something else!!
    3.59 +    /*
    3.60 +     * WARNING! The shadow page table must not currently be in use!
    3.61 +     * e.g., You are expected to have paused the domain and synchronized CR3.
    3.62 +     */
    3.63 +
    3.64 +    shadow_audit(m, 1);
    3.65  
    3.66 -    // walk the hash table and call free_shadow_page on all pages
    3.67 +    /* Free each hash chain in turn. */
    3.68 +    for ( i = 0; i < shadow_ht_buckets; i++ )
    3.69 +    {
    3.70 +        /* Skip empty buckets. */
    3.71 +        x = &m->shadow_ht[i];
    3.72 +        if ( x->pfn == 0 )
    3.73 +            continue;
    3.74 +
    3.75 +        /* Free the head page. */
    3.76 +        free_shadow_page(
    3.77 +            m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
    3.78  
    3.79 -    shadow_audit(m,1);
    3.80 +        /* Reinitialise the head node. */
    3.81 +        x->pfn            = 0;
    3.82 +        x->spfn_and_flags = 0;
    3.83 +        n                 = x->next;
    3.84 +        x->next           = NULL;
    3.85 +
    3.86 +        free++;
    3.87  
    3.88 -    for(j=0;j<shadow_ht_buckets;j++)
    3.89 -    {
    3.90 -        a = &m->shadow_ht[j];        
    3.91 -        if (a->pfn)
    3.92 -        {
    3.93 -            free_shadow_page( m, 
    3.94 -                              &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
    3.95 -            a->pfn = 0;
    3.96 -            a->spfn_and_flags = 0;
    3.97 +        /* Iterate over non-head nodes. */
    3.98 +        for ( x = n; x != NULL; x = n )
    3.99 +        { 
   3.100 +            /* Free the shadow page. */
   3.101 +            free_shadow_page(
   3.102 +                m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
   3.103 +
   3.104 +            /* Re-initialise the chain node. */
   3.105 +            x->pfn            = 0;
   3.106 +            x->spfn_and_flags = 0;
   3.107 +
   3.108 +            /* Add to the free list. */
   3.109 +            n                 = x->next;
   3.110 +            x->next           = m->shadow_ht_free;
   3.111 +            m->shadow_ht_free = x;
   3.112 +
   3.113              free++;
   3.114          }
   3.115 -        next=a->next;
   3.116 -        a->next=NULL;
   3.117 -        a=next;
   3.118 -        while(a)
   3.119 -        { 
   3.120 -            struct shadow_status *next = a->next;
   3.121  
   3.122 -            free_shadow_page( m, 
   3.123 -                              &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
   3.124 -            a->pfn = 0;
   3.125 -            a->spfn_and_flags = 0;
   3.126 -            free++;
   3.127 -            a->next = m->shadow_ht_free;           
   3.128 -            m->shadow_ht_free = a;
   3.129 -            a=next;
   3.130 -        }
   3.131 -        shadow_audit(m,0);
   3.132 +        shadow_audit(m, 0);
   3.133      }
   3.134 -    SH_LOG("Free shadow table. Freed= %d",free);
   3.135 +
   3.136 +    SH_LOG("Free shadow table. Freed=%d.", free);
   3.137  }
   3.138  
   3.139 -
   3.140 -#define TABLE_OP_ZERO_L2 1
   3.141 -#define TABLE_OP_ZERO_L1 2
   3.142 -#define TABLE_OP_FREE_L1 3
   3.143 +static inline int __clear_shadow_page(
   3.144 +    struct mm_struct *m, struct shadow_status *x)
   3.145 +{
   3.146 +    unsigned long   *p;
   3.147 +    int              restart = 0;
   3.148 +    struct pfn_info *spage = &frame_table[x->spfn_and_flags & PSH_pfn_mask];
   3.149  
   3.150 -static inline int shadow_page_op( struct mm_struct *m, unsigned int op, 
   3.151 -								  unsigned int gpfn,
   3.152 -                                  struct pfn_info *spfn_info, int *work )
   3.153 -{
   3.154 -    unsigned int spfn = spfn_info-frame_table;
   3.155 -	int restart = 0;
   3.156 -
   3.157 -    switch( op )
   3.158 +    switch ( spage->u.inuse.type_info & PGT_type_mask )
   3.159      {
   3.160 -	case TABLE_OP_ZERO_L2:
   3.161 -	{
   3.162 -		if ( (spfn_info->u.inuse.type_info & PGT_type_mask) == 
   3.163 -             PGT_l2_page_table )
   3.164 -		{
   3.165 -			unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
   3.166 -#ifdef __i386__
   3.167 -			memset(spl1e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*spl1e));
   3.168 -#endif
   3.169 -			unmap_domain_mem( spl1e );
   3.170 -		}
   3.171 -    }
   3.172 -	break;
   3.173 -	
   3.174 -	case TABLE_OP_ZERO_L1:
   3.175 -	{
   3.176 -		if ( (spfn_info->u.inuse.type_info & PGT_type_mask) == 
   3.177 -             PGT_l1_page_table )
   3.178 -		{
   3.179 -			unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
   3.180 -			memset( spl1e, 0, ENTRIES_PER_L1_PAGETABLE * sizeof(*spl1e) );
   3.181 -			unmap_domain_mem( spl1e );
   3.182 -		}
   3.183 -    }
   3.184 -	break;
   3.185 +        /* We clear L2 pages by zeroing the guest entries. */
   3.186 +    case PGT_l2_page_table:
   3.187 +        p = map_domain_mem((spage - frame_table) << PAGE_SHIFT);
   3.188 +        memset(p, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
   3.189 +        unmap_domain_mem(p);
   3.190 +        break;
   3.191  
   3.192 -	case TABLE_OP_FREE_L1:
   3.193 -	{
   3.194 -		if ( (spfn_info->u.inuse.type_info & PGT_type_mask) == 
   3.195 -             PGT_l1_page_table )
   3.196 -		{
   3.197 -			// lock is already held
   3.198 -			delete_shadow_status( m, gpfn );
   3.199 -			free_shadow_page( m, spfn_info );
   3.200 -			restart = 1; // we need to go to start of list again
   3.201 -		}
   3.202 +        /* We clear L1 pages by freeing them: no benefit from zeroing them. */
   3.203 +    case PGT_l1_page_table:
   3.204 +        delete_shadow_status(m, x->pfn);
   3.205 +        free_shadow_page(m, spage);
   3.206 +        restart = 1; /* We need to go to start of list again. */
   3.207 +        break;
   3.208      }
   3.209  
   3.210 -	break;
   3.211 -	
   3.212 -	default:
   3.213 -		BUG();
   3.214 -
   3.215 -    }
   3.216      return restart;
   3.217  }
   3.218  
   3.219 -static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
   3.220 +static void __clear_shadow_state(struct mm_struct *m)
   3.221  {
   3.222 -    int j, work=0;
   3.223 -    struct shadow_status *a, *next;
   3.224 +    int                   i;
   3.225 +    struct shadow_status *x;
   3.226   
   3.227 -    // the code assumes you're not using the page tables i.e.
   3.228 -    // the domain is stopped and cr3 is something else!!
   3.229 +    shadow_audit(m, 1);
   3.230  
   3.231 -    // walk the hash table and call free_shadow_page on all pages
   3.232 -
   3.233 -    shadow_audit(m,1);
   3.234 -
   3.235 -    for(j=0;j<shadow_ht_buckets;j++)
   3.236 +    for ( i = 0; i < shadow_ht_buckets; i++ )
   3.237      {
   3.238 -	retry:
   3.239 -        a = &m->shadow_ht[j];     
   3.240 -		next = a->next;
   3.241 -        if (a->pfn)
   3.242 -        {
   3.243 -            if ( shadow_page_op( m, op, a->pfn,								 
   3.244 -								 &frame_table[a->spfn_and_flags & PSH_pfn_mask], 
   3.245 -								 &work ) )
   3.246 -				goto retry;
   3.247 -        }
   3.248 -        a=next;
   3.249 -        while(a)
   3.250 -        { 
   3.251 -			next = a->next;
   3.252 -            if ( shadow_page_op( m, op, a->pfn,
   3.253 -								 &frame_table[a->spfn_and_flags & PSH_pfn_mask],
   3.254 -								 &work ) )
   3.255 -				goto retry;
   3.256 -            a=next;
   3.257 -        }
   3.258 -        shadow_audit(m,0);
   3.259 +    retry:
   3.260 +        /* Skip empty buckets. */
   3.261 +        x = &m->shadow_ht[i];
   3.262 +        if ( x->pfn == 0 )
   3.263 +            continue;
   3.264 +
   3.265 +        if ( __clear_shadow_page(m, x) )
   3.266 +            goto retry;
   3.267 +
   3.268 +        for ( x = x->next; x != NULL; x = x->next )
   3.269 +            if ( __clear_shadow_page(m, x) )
   3.270 +                goto retry;
   3.271 +
   3.272 +        shadow_audit(m, 0);
   3.273      }
   3.274 -    SH_VLOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
   3.275 +
   3.276 +    SH_VLOG("Scan shadow table. l1=%d l2=%d",
   3.277 +            perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
   3.278  }
   3.279  
   3.280  
   3.281 @@ -195,297 +169,215 @@ void shadow_mode_init(void)
   3.282  {
   3.283  }
   3.284  
   3.285 -int shadow_mode_enable( struct domain *p, unsigned int mode )
   3.286 +int shadow_mode_enable(struct domain *p, unsigned int mode)
   3.287  {
   3.288      struct mm_struct *m = &p->mm;
   3.289 -    struct shadow_status **fptr;
   3.290 -    int i;
   3.291  
   3.292 -    // allocate hashtable
   3.293 -    m->shadow_ht = xmalloc(shadow_ht_buckets * 
   3.294 -                           sizeof(struct shadow_status));
   3.295 -    if( m->shadow_ht == NULL )
   3.296 -        goto nomem;
   3.297 -
   3.298 -    memset(m->shadow_ht, 0, shadow_ht_buckets * sizeof(struct shadow_status));
   3.299 -
   3.300 -    // allocate space for first lot of extra nodes
   3.301 -    m->shadow_ht_extras = xmalloc(sizeof(void*) + 
   3.302 -                                  (shadow_ht_extra_size * 
   3.303 -                                   sizeof(struct shadow_status)));
   3.304 -    if( m->shadow_ht_extras == NULL )
   3.305 +    m->shadow_ht = xmalloc(
   3.306 +        shadow_ht_buckets * sizeof(struct shadow_status));
   3.307 +    if ( m->shadow_ht == NULL )
   3.308          goto nomem;
   3.309 -
   3.310 -    memset( m->shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size * 
   3.311 -                                                     sizeof(struct shadow_status)) );
   3.312 -
   3.313 -    m->shadow_extras_count++;
   3.314 - 
   3.315 -    // add extras to free list
   3.316 -    fptr = &m->shadow_ht_free;
   3.317 -    for ( i=0; i<shadow_ht_extra_size; i++ )
   3.318 -    {
   3.319 -        *fptr = &m->shadow_ht_extras[i];
   3.320 -        fptr = &(m->shadow_ht_extras[i].next);
   3.321 -    }
   3.322 -    *fptr = NULL;
   3.323 -    *((struct shadow_status ** ) 
   3.324 -      &m->shadow_ht_extras[shadow_ht_extra_size]) = NULL;
   3.325 +    memset(m->shadow_ht, 0, shadow_ht_buckets * sizeof(struct shadow_status));
   3.326  
   3.327      if ( mode == SHM_logdirty )
   3.328      {
   3.329 -        m->shadow_dirty_bitmap_size = (p->max_pages+63)&(~63);
   3.330 +        m->shadow_dirty_bitmap_size = (p->max_pages + 63) & ~63;
   3.331          m->shadow_dirty_bitmap = 
   3.332 -            xmalloc( m->shadow_dirty_bitmap_size/8);
   3.333 -        if( m->shadow_dirty_bitmap == NULL )
   3.334 +            xmalloc(m->shadow_dirty_bitmap_size/8);
   3.335 +        if ( m->shadow_dirty_bitmap == NULL )
   3.336          {
   3.337              m->shadow_dirty_bitmap_size = 0;
   3.338 -			BUG();
   3.339              goto nomem;
   3.340          }
   3.341 -        memset(m->shadow_dirty_bitmap,0,m->shadow_dirty_bitmap_size/8);
   3.342 +        memset(m->shadow_dirty_bitmap, 0, m->shadow_dirty_bitmap_size/8);
   3.343      }
   3.344  
   3.345      m->shadow_mode = mode;
   3.346  
   3.347 -    // call shadow_mk_pagetable
   3.348 -    __shadow_mk_pagetable( m );
   3.349 +    __shadow_mk_pagetable(m);
   3.350      return 0;
   3.351  
   3.352 -nomem:
   3.353 -    if( m->shadow_ht ) {
   3.354 -		xfree( m->shadow_ht ); m->shadow_ht = NULL; };
   3.355 -
   3.356 -    if( m->shadow_ht_extras )  {
   3.357 -		xfree( m->shadow_ht_extras ); m->shadow_ht_extras = NULL; };
   3.358 -
   3.359 + nomem:
   3.360 +    if ( m->shadow_ht != NULL )
   3.361 +        xfree( m->shadow_ht );
   3.362 +    m->shadow_ht = NULL;
   3.363      return -ENOMEM;
   3.364  }
   3.365  
   3.366  void __shadow_mode_disable(struct domain *d)
   3.367  {
   3.368      struct mm_struct *m = &d->mm;
   3.369 -    struct shadow_status *next;
   3.370 +    struct shadow_status *x, *n;
   3.371  
   3.372      __free_shadow_table(m);
   3.373      m->shadow_mode = 0;
   3.374  
   3.375      SH_VLOG("freed tables count=%d l1=%d l2=%d",
   3.376 -           m->shadow_page_count, perfc_value(shadow_l1_pages), 
   3.377 -           perfc_value(shadow_l2_pages));
   3.378 +            m->shadow_page_count, perfc_value(shadow_l1_pages), 
   3.379 +            perfc_value(shadow_l2_pages));
   3.380  
   3.381 -    next = m->shadow_ht_extras;
   3.382 -    while ( next )
   3.383 +    n = m->shadow_ht_extras;
   3.384 +    while ( (x = n) != NULL )
   3.385      {
   3.386 -        struct shadow_status * this = next;
   3.387          m->shadow_extras_count--;
   3.388 -        next = *((struct shadow_status **)(&next[shadow_ht_extra_size]));
   3.389 -        xfree(this);
   3.390 +        n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
   3.391 +        xfree(x);
   3.392      }
   3.393  
   3.394 +    m->shadow_ht_extras = NULL;
   3.395 +    ASSERT(m->shadow_extras_count == 0);
   3.396      SH_LOG("freed extras, now %d", m->shadow_extras_count);
   3.397  
   3.398 -    if ( m->shadow_dirty_bitmap  )
   3.399 +    if ( m->shadow_dirty_bitmap != NULL )
   3.400      {
   3.401 -        xfree( m->shadow_dirty_bitmap );
   3.402 +        xfree(m->shadow_dirty_bitmap);
   3.403          m->shadow_dirty_bitmap = 0;
   3.404          m->shadow_dirty_bitmap_size = 0;
   3.405      }
   3.406  
   3.407 -    // free the hashtable itself
   3.408 -    xfree( m->shadow_ht );
   3.409 -
   3.410 -	m->shadow_ht = NULL;
   3.411 -	m->shadow_ht_extras = NULL;
   3.412 +    xfree(m->shadow_ht);
   3.413 +    m->shadow_ht = NULL;
   3.414  }
   3.415  
   3.416 -static int shadow_mode_table_op(struct domain *d, 
   3.417 -							    dom0_shadow_control_t *sc)
   3.418 +static int shadow_mode_table_op(
   3.419 +    struct domain *d, dom0_shadow_control_t *sc)
   3.420  {
   3.421 -    unsigned int op = sc->op;
   3.422 +    unsigned int      op = sc->op;
   3.423      struct mm_struct *m = &d->mm;
   3.424 -    int rc = 0;
   3.425 -
   3.426 -    // since Dom0 did the hypercall, we should be running with it's page
   3.427 -    // tables right now. Calling flush on yourself would be really
   3.428 -    // stupid.
   3.429 +    int               i, rc = 0;
   3.430  
   3.431      ASSERT(spin_is_locked(&d->mm.shadow_lock));
   3.432  
   3.433 -    if ( m == &current->mm )
   3.434 -    {
   3.435 -        printk("Don't try and flush your own page tables!\n");
   3.436 -        return -EINVAL;
   3.437 -    }
   3.438 -   
   3.439 -    SH_VLOG("shadow mode table op %08lx %08lx count %d",pagetable_val( m->pagetable),pagetable_val(m->shadow_table), m->shadow_page_count);
   3.440 +    SH_VLOG("shadow mode table op %08lx %08lx count %d",
   3.441 +            pagetable_val(m->pagetable), pagetable_val(m->shadow_table),
   3.442 +            m->shadow_page_count);
   3.443  
   3.444 -    shadow_audit(m,1);
   3.445 +    shadow_audit(m, 1);
   3.446  
   3.447 -    switch(op)
   3.448 +    switch ( op )
   3.449      {
   3.450      case DOM0_SHADOW_CONTROL_OP_FLUSH:
   3.451          __free_shadow_table( m );  
   3.452  
   3.453 -		d->mm.shadow_fault_count       = 0;
   3.454 -		d->mm.shadow_dirty_count       = 0;
   3.455 -		d->mm.shadow_dirty_net_count   = 0;
   3.456 -		d->mm.shadow_dirty_block_count = 0;
   3.457 +        d->mm.shadow_fault_count       = 0;
   3.458 +        d->mm.shadow_dirty_count       = 0;
   3.459 +        d->mm.shadow_dirty_net_count   = 0;
   3.460 +        d->mm.shadow_dirty_block_count = 0;
   3.461  
   3.462          break;
   3.463     
   3.464 -    case DOM0_SHADOW_CONTROL_OP_CLEAN:   // zero all-non hypervisor
   3.465 -	{
   3.466 -		__scan_shadow_table( m, TABLE_OP_ZERO_L2 );
   3.467 -		__scan_shadow_table( m, TABLE_OP_ZERO_L1 );
   3.468 +    case DOM0_SHADOW_CONTROL_OP_CLEAN:
   3.469 +        __clear_shadow_state(m);
   3.470  
   3.471 -		goto send_bitmap;
   3.472 -	}
   3.473 -		
   3.474 +        sc->stats.fault_count       = d->mm.shadow_fault_count;
   3.475 +        sc->stats.dirty_count       = d->mm.shadow_dirty_count;
   3.476 +        sc->stats.dirty_net_count   = d->mm.shadow_dirty_net_count;
   3.477 +        sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
   3.478  
   3.479 -    case DOM0_SHADOW_CONTROL_OP_CLEAN2:  // zero all L2, free L1s
   3.480 -    {
   3.481 -		int i,j,zero=1;
   3.482 -		
   3.483 -		__scan_shadow_table( m, TABLE_OP_ZERO_L2 );
   3.484 -		__scan_shadow_table( m, TABLE_OP_FREE_L1 );
   3.485 -		
   3.486 -	send_bitmap:
   3.487 -		sc->stats.fault_count       = d->mm.shadow_fault_count;
   3.488 -		sc->stats.dirty_count       = d->mm.shadow_dirty_count;
   3.489 -		sc->stats.dirty_net_count   = d->mm.shadow_dirty_net_count;
   3.490 -		sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
   3.491 -
   3.492 -		d->mm.shadow_fault_count       = 0;
   3.493 -		d->mm.shadow_dirty_count       = 0;
   3.494 -		d->mm.shadow_dirty_net_count   = 0;
   3.495 -		d->mm.shadow_dirty_block_count = 0;
   3.496 -	
   3.497 -		sc->pages = d->max_pages;
   3.498 +        d->mm.shadow_fault_count       = 0;
   3.499 +        d->mm.shadow_dirty_count       = 0;
   3.500 +        d->mm.shadow_dirty_net_count   = 0;
   3.501 +        d->mm.shadow_dirty_block_count = 0;
   3.502 + 
   3.503 +        if ( (d->max_pages > sc->pages) || 
   3.504 +             (sc->dirty_bitmap == NULL) || 
   3.505 +             (d->mm.shadow_dirty_bitmap == NULL) )
   3.506 +        {
   3.507 +            rc = -EINVAL;
   3.508 +            goto out;
   3.509 +        }
   3.510 + 
   3.511 +        sc->pages = d->max_pages;
   3.512  
   3.513 -		if( d->max_pages > sc->pages || 
   3.514 -			!sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
   3.515 -		{
   3.516 -			rc = -EINVAL;
   3.517 -			goto out;
   3.518 -		}
   3.519 +#define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
   3.520 +        for ( i = 0; i < d->max_pages; i += chunk )
   3.521 +        {
   3.522 +            int bytes = ((((d->max_pages - i) > chunk) ?
   3.523 +                          chunk : (d->max_pages - i)) + 7) / 8;
   3.524 +     
   3.525 +            copy_to_user(
   3.526 +                sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
   3.527 +                d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
   3.528 +                bytes);
   3.529 +     
   3.530 +            memset(
   3.531 +                d->mm.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
   3.532 +                0, bytes);
   3.533 +        }
   3.534 +
   3.535 +        break;
   3.536  
   3.537 -	
   3.538 -#define chunk (8*1024) // do this in 1KB chunks for L1 cache
   3.539 -	
   3.540 -		for(i=0;i<d->max_pages;i+=chunk)
   3.541 -		{
   3.542 -			int bytes = ((  ((d->max_pages-i) > (chunk))?
   3.543 -							(chunk):(d->max_pages-i) ) + 7) / 8;
   3.544 -	    
   3.545 -			copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
   3.546 -						  d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
   3.547 -						  bytes );
   3.548 -	    
   3.549 -			for(j=0; zero && j<bytes/sizeof(unsigned long);j++)
   3.550 -			{
   3.551 -				if( d->mm.shadow_dirty_bitmap[j] != 0 )
   3.552 -					zero = 0;
   3.553 -			}
   3.554 +    case DOM0_SHADOW_CONTROL_OP_PEEK:
   3.555 +        sc->stats.fault_count       = d->mm.shadow_fault_count;
   3.556 +        sc->stats.dirty_count       = d->mm.shadow_dirty_count;
   3.557 +        sc->stats.dirty_net_count   = d->mm.shadow_dirty_net_count;
   3.558 +        sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
   3.559 + 
   3.560 +        if ( (d->max_pages > sc->pages) || 
   3.561 +             (sc->dirty_bitmap == NULL) || 
   3.562 +             (d->mm.shadow_dirty_bitmap == NULL) )
   3.563 +        {
   3.564 +            rc = -EINVAL;
   3.565 +            goto out;
   3.566 +        }
   3.567 + 
   3.568 +        sc->pages = d->max_pages;
   3.569 +        copy_to_user(
   3.570 +            sc->dirty_bitmap, d->mm.shadow_dirty_bitmap, (d->max_pages+7)/8);
   3.571  
   3.572 -			memset( d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
   3.573 -					0, bytes);
   3.574 -		}
   3.575 +        break;
   3.576  
   3.577 -#if 0   /* This optimisation is dangerous for some uses of this function.
   3.578 -		 disable for the moment */
   3.579 -        /* Might as well stop the domain as an optimization. */
   3.580 -		if ( zero )
   3.581 -            domain_pause_by_systemcontroller(d);
   3.582 -#endif
   3.583 -
   3.584 -		break;
   3.585 +    default:
   3.586 +        BUG();
   3.587      }
   3.588  
   3.589 -    case DOM0_SHADOW_CONTROL_OP_PEEK:
   3.590 -    {
   3.591 -		int i;
   3.592 -
   3.593 -		sc->stats.fault_count       = d->mm.shadow_fault_count;
   3.594 -		sc->stats.dirty_count       = d->mm.shadow_dirty_count;
   3.595 -		sc->stats.dirty_net_count   = d->mm.shadow_dirty_net_count;
   3.596 -		sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
   3.597 -	
   3.598 -		if( d->max_pages > sc->pages || 
   3.599 -			!sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
   3.600 -		{
   3.601 -			rc = -EINVAL;
   3.602 -			goto out;
   3.603 -		}
   3.604 -	
   3.605 -		sc->pages = d->max_pages;
   3.606 -	
   3.607 -#define chunk (8*1024) // do this in 1KB chunks for L1 cache
   3.608 -	
   3.609 -		for(i=0;i<d->max_pages;i+=chunk)
   3.610 -		{
   3.611 -			int bytes = ((  ((d->max_pages-i) > (chunk))?
   3.612 -							(chunk):(d->max_pages-i) ) + 7) / 8;
   3.613 -	    
   3.614 -			copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
   3.615 -						  d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
   3.616 -						  bytes );	    
   3.617 -		}
   3.618 -
   3.619 -		break;
   3.620 -    }
   3.621 -
   3.622 -	default:
   3.623 -		BUG();
   3.624 -
   3.625 -    }
   3.626 -
   3.627 -
   3.628 -out:
   3.629 -
   3.630 + out:
   3.631      SH_VLOG("shadow mode table op : page count %d", m->shadow_page_count);
   3.632 -
   3.633 -    shadow_audit(m,1);
   3.634 -
   3.635 -    // call shadow_mk_pagetable
   3.636 -    __shadow_mk_pagetable( m );
   3.637 -
   3.638 +    shadow_audit(m, 1);
   3.639 +    __shadow_mk_pagetable(m);
   3.640      return rc;
   3.641  }
   3.642  
   3.643  int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
   3.644  {
   3.645      unsigned int cmd = sc->op;
   3.646 -    int rc = 0;
   3.647 +    int          rc  = 0;
   3.648 +
   3.649 +    if ( unlikely(d == current) )
   3.650 +    {
   3.651 +        DPRINTK("Don't try to do a shadow op on yourself!\n");
   3.652 +        return -EINVAL;
   3.653 +    }   
   3.654  
   3.655      domain_pause(d);
   3.656      synchronise_pagetables(~0UL);
   3.657  
   3.658      shadow_lock(&d->mm);
   3.659  
   3.660 -    if ( cmd == DOM0_SHADOW_CONTROL_OP_OFF )
   3.661 +    switch ( cmd )
   3.662      {
   3.663 +    case DOM0_SHADOW_CONTROL_OP_OFF:
   3.664          shadow_mode_disable(d);
   3.665 -    }
   3.666 -    else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
   3.667 -    {
   3.668 +        break;
   3.669 +
   3.670 +    case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
   3.671          shadow_mode_disable(d);
   3.672          rc = shadow_mode_enable(d, SHM_test);
   3.673 -    } 
   3.674 -    else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY )
   3.675 -    {
   3.676 +        break;
   3.677 +
   3.678 +    case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
   3.679          shadow_mode_disable(d);
   3.680          rc = shadow_mode_enable(d, SHM_logdirty);
   3.681 -    } 
   3.682 -    else if ( shadow_mode(d) && 
   3.683 -              (cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH) && 
   3.684 -              (cmd <= DOM0_SHADOW_CONTROL_OP_CLEAN2) )
   3.685 -    {
   3.686 -        rc = shadow_mode_table_op(d, sc);
   3.687 -    }
   3.688 -    else
   3.689 -    {
   3.690 -        rc = -EINVAL;
   3.691 +        break;
   3.692 +
   3.693 +    default:
   3.694 +        if ( shadow_mode(d) && 
   3.695 +             (cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH) && 
   3.696 +             (cmd <= DOM0_SHADOW_CONTROL_OP_PEEK) )
   3.697 +            rc = shadow_mode_table_op(d, sc);
   3.698 +        else
   3.699 +            rc = -EINVAL;
   3.700 +        break;
   3.701      }
   3.702  
   3.703      shadow_unlock(&d->mm);
   3.704 @@ -497,80 +389,74 @@ int shadow_mode_control(struct domain *d
   3.705  
   3.706  static inline struct pfn_info *alloc_shadow_page(struct mm_struct *m)
   3.707  {
   3.708 -	struct pfn_info *page;
   3.709 +    struct pfn_info *page = alloc_domheap_page(NULL);
   3.710 +
   3.711      m->shadow_page_count++;
   3.712 -    page = alloc_domheap_page(NULL);
   3.713  
   3.714 -	if( unlikely(page == NULL) )
   3.715 -	{
   3.716 -		printk("Couldn't alloc shadow page! count=%d\n",
   3.717 -			   m->shadow_page_count);
   3.718 -		SH_VLOG("Shadow tables l1=%d l2=%d",
   3.719 -				perfc_value(shadow_l1_pages), 
   3.720 -				perfc_value(shadow_l2_pages));
   3.721 -		BUG();  // FIXME: try a shadow flush to free up some memory
   3.722 -	}
   3.723 +    if ( unlikely(page == NULL) )
   3.724 +    {
   3.725 +        printk("Couldn't alloc shadow page! count=%d\n",
   3.726 +               m->shadow_page_count);
   3.727 +        SH_VLOG("Shadow tables l1=%d l2=%d",
   3.728 +                perfc_value(shadow_l1_pages), 
   3.729 +                perfc_value(shadow_l2_pages));
   3.730 +        BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
   3.731 +    }
   3.732  
   3.733 -	return page;
   3.734 +    return page;
   3.735  }
   3.736  
   3.737 -void unshadow_table( unsigned long gpfn, unsigned int type )
   3.738 +void unshadow_table(unsigned long gpfn, unsigned int type)
   3.739  {
   3.740 -    unsigned long spfn;
   3.741 -	struct domain *d = frame_table[gpfn].u.inuse.domain;
   3.742 +    unsigned long  spfn;
   3.743 +    struct domain *d = frame_table[gpfn].u.inuse.domain;
   3.744  
   3.745 -    SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
   3.746 -            type,
   3.747 -            gpfn );
   3.748 +    SH_VLOG("unshadow_table type=%08x gpfn=%08lx", type, gpfn);
   3.749  
   3.750      perfc_incrc(unshadow_table_count);
   3.751  
   3.752 -    // this function is the same for both l1 and l2 tables
   3.753 -
   3.754 -    // even in the SMP guest case, there won't be a race here as
   3.755 -    // this CPU was the one that cmpxchg'ed the page to invalid
   3.756 -
   3.757 +    /*
   3.758 +     * This function is the same for all p.t. pages. Even for multi-processor 
   3.759 +     * guests there won't be a race here as this CPU was the one that 
   3.760 +     * cmpxchg'ed the page to invalid.
   3.761 +     */
   3.762      spfn = __shadow_status(&d->mm, gpfn) & PSH_pfn_mask;
   3.763 -
   3.764      delete_shadow_status(&d->mm, gpfn);
   3.765 -
   3.766 -    free_shadow_page(&d->mm, &frame_table[spfn] );
   3.767 -
   3.768 +    free_shadow_page(&d->mm, &frame_table[spfn]);
   3.769  }
   3.770  
   3.771 -
   3.772  unsigned long shadow_l2_table( 
   3.773 -    struct mm_struct *m, unsigned long gpfn )
   3.774 +    struct mm_struct *m, unsigned long gpfn)
   3.775  {
   3.776      struct pfn_info *spfn_info;
   3.777 -    unsigned long spfn;
   3.778 -    l2_pgentry_t *spl2e, *gpl2e;
   3.779 -    int i;
   3.780 +    unsigned long    spfn;
   3.781 +    l2_pgentry_t    *spl2e;
   3.782  
   3.783 -    SH_VVLOG("shadow_l2_table( %08lx )",gpfn);
   3.784 +    SH_VVLOG("shadow_l2_table( %08lx )", gpfn);
   3.785  
   3.786      perfc_incrc(shadow_l2_table_count);
   3.787  
   3.788 -    // XXX in future, worry about racing in SMP guests 
   3.789 -    //      -- use cmpxchg with PSH_pending flag to show progress (and spin)
   3.790 -
   3.791 -    spfn_info = alloc_shadow_page(m);
   3.792 -
   3.793 -    ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache
   3.794 +    if ( (spfn_info = alloc_shadow_page(m)) != NULL )
   3.795 +        BUG(); /* XXX Deal gracefully with failure. */
   3.796  
   3.797      spfn_info->u.inuse.type_info = PGT_l2_page_table;
   3.798      perfc_incr(shadow_l2_pages);
   3.799  
   3.800 -    spfn = (unsigned long) (spfn_info - frame_table);
   3.801 +    spfn = spfn_info - frame_table;
   3.802  
   3.803 -    // mark pfn as being shadowed, update field to point at shadow
   3.804 +    /* Mark pfn as being shadowed; update field to point at shadow. */
   3.805      set_shadow_status(m, gpfn, spfn | PSH_shadowed);
   3.806   
   3.807 -    // we need to do this before the linear map is set up
   3.808 -    spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
   3.809 +    spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
   3.810 +
   3.811 +    /*
   3.812 +     * We could proactively fill in PDEs for pages that are already shadowed.
   3.813 +     * However, we tried it and it didn't help performance. This is simpler.
   3.814 +     */
   3.815 +    memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
   3.816  
   3.817  #ifdef __i386__
   3.818 -    // get hypervisor and 2x linear PT mapings installed 
   3.819 +    /* Install hypervisor and 2x linear p.t. mapings. */
   3.820      memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
   3.821             &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
   3.822             HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
   3.823 @@ -579,218 +465,187 @@ unsigned long shadow_l2_table(
   3.824      spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   3.825          mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   3.826      spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
   3.827 -        mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) | 
   3.828 +        mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
   3.829                        __PAGE_HYPERVISOR);
   3.830  #endif
   3.831  
   3.832 -    // can't use the linear map as we may not be in the right PT
   3.833 -    gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
   3.834 -
   3.835 -    // proactively create entries for pages that are already shadowed
   3.836 -    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   3.837 -    {
   3.838 -        unsigned long spte = 0;
   3.839 -
   3.840 -#if 0  // Turns out this doesn't really help
   3.841 -        unsigned long gpte;
   3.842 -
   3.843 -        gpte = l2_pgentry_val(gpl2e[i]);
   3.844 -
   3.845 -        if (gpte & _PAGE_PRESENT)
   3.846 -        {
   3.847 -            unsigned long s_sh = 
   3.848 -                __shadow_status(p, gpte>>PAGE_SHIFT);
   3.849 +    unmap_domain_mem(spl2e);
   3.850  
   3.851 -            l2pde_general( m, &gpte, &spte, s_sh );
   3.852 -
   3.853 -        }
   3.854 -#endif
   3.855 -
   3.856 -        spl2e[i] = mk_l2_pgentry( spte );
   3.857 -
   3.858 -    }
   3.859 -
   3.860 -    // its arguable we should 'preemptively shadow' a few active L1 pages
   3.861 -    // to avoid taking a string of faults when 'jacking' a running domain
   3.862 -
   3.863 -    unmap_domain_mem( gpl2e );
   3.864 -    unmap_domain_mem( spl2e );
   3.865 -
   3.866 -    SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
   3.867 -
   3.868 +    SH_VLOG("shadow_l2_table( %08lx -> %08lx)", gpfn, spfn);
   3.869      return spfn;
   3.870  }
   3.871  
   3.872 +static void shadow_map_l1_into_current_l2(unsigned long va)
   3.873 +{ 
   3.874 +    struct mm_struct *m = &current->mm;
   3.875 +    unsigned long    *gpl1e, *spl1e, gpde, spde, gl1pfn, sl1pfn, sl1ss;
   3.876 +    struct pfn_info  *sl1pfn_info;
   3.877 +    int               i;
   3.878  
   3.879 -int shadow_fault( unsigned long va, long error_code )
   3.880 +    gpde = l2_pgentry_val(linear_l2_table[va >> L2_PAGETABLE_SHIFT]);
   3.881 +
   3.882 +    gl1pfn = gpde >> PAGE_SHIFT;
   3.883 +
   3.884 +    sl1ss = __shadow_status(m, gl1pfn);
   3.885 +    if ( !(sl1ss & PSH_shadowed) )
   3.886 +    {
   3.887 +        /* This L1 is NOT already shadowed so we need to shadow it. */
   3.888 +        SH_VVLOG("4a: l1 not shadowed ( %08lx )", sl1pfn);
   3.889 +
   3.890 +        sl1pfn_info = alloc_shadow_page(m);
   3.891 +        sl1pfn_info->u.inuse.type_info = PGT_l1_page_table;
   3.892 +   
   3.893 +        sl1pfn = sl1pfn_info - frame_table;
   3.894 +
   3.895 +        perfc_incrc(shadow_l1_table_count);
   3.896 +        perfc_incr(shadow_l1_pages);
   3.897 +
   3.898 +        set_shadow_status(m, gl1pfn, PSH_shadowed | sl1pfn);
   3.899 +
   3.900 +        l2pde_general(m, &gpde, &spde, sl1pfn);
   3.901 +
   3.902 +        linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
   3.903 +        shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =
   3.904 +            mk_l2_pgentry(spde);
   3.905 +
   3.906 +        gpl1e = (unsigned long *) &(linear_pg_table[
   3.907 +            (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
   3.908 +
   3.909 +        spl1e = (unsigned long *) &shadow_linear_pg_table[
   3.910 +            (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)];
   3.911 +
   3.912 +        for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   3.913 +            l1pte_propagate_from_guest(m, &gpl1e[i], &spl1e[i]);
   3.914 +    }
   3.915 +    else
   3.916 +    {
   3.917 +        /* This L1 is shadowed already, but the L2 entry is missing. */
   3.918 +        SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )", sl1pfn);
   3.919 +
   3.920 +        sl1pfn = sl1ss & PSH_pfn_mask;
   3.921 +        l2pde_general(m, &gpde, &spde, sl1pfn);
   3.922 +
   3.923 +        linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
   3.924 +        shadow_linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
   3.925 +    }              
   3.926 +}
   3.927 +
   3.928 +int shadow_fault(unsigned long va, long error_code)
   3.929  {
   3.930      unsigned long gpte, spte;
   3.931      struct mm_struct *m = &current->mm;
   3.932  
   3.933      SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
   3.934  
   3.935 -    check_pagetable( current, current->mm.pagetable, "pre-sf" );
   3.936 +    check_pagetable(m, current->mm.pagetable, "pre-sf");
   3.937  
   3.938 -    if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
   3.939 +    /*
   3.940 +     * STEP 1. A fast-reject set of checks with no locking.
   3.941 +     */
   3.942 +
   3.943 +    if ( unlikely(__get_user(gpte, (unsigned long *)
   3.944 +                             &linear_pg_table[va >> PAGE_SHIFT])) )
   3.945      {
   3.946          SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
   3.947 -        return 0;  // propagate to guest
   3.948 +        return 0;
   3.949      }
   3.950  
   3.951 -    if ( ! (gpte & _PAGE_PRESENT) )
   3.952 +    if ( !(gpte & _PAGE_PRESENT) )
   3.953      {
   3.954          SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
   3.955 -        return 0;  // we're not going to be able to help
   3.956 -    }
   3.957 -
   3.958 -    if ( (error_code & 2)  && ! (gpte & _PAGE_RW) )
   3.959 -    {
   3.960 -        // write fault on RO page
   3.961          return 0;
   3.962      }
   3.963  
   3.964 -    // take the lock and reread gpte
   3.965 +    if ( (error_code & 2)  && !(gpte & _PAGE_RW) )
   3.966 +    {
   3.967 +        /* Write fault on a read-only mapping. */
   3.968 +        return 0;
   3.969 +    }
   3.970 +
   3.971 +    /*
   3.972 +     * STEP 2. Take the shadow lock and re-check the guest PTE.
   3.973 +     */
   3.974  
   3.975      shadow_lock(m);
   3.976 -	
   3.977 -    if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
   3.978 + 
   3.979 +    if ( unlikely(__get_user(gpte, (unsigned long *)
   3.980 +                             &linear_pg_table[va >> PAGE_SHIFT])) )
   3.981      {
   3.982          SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
   3.983          shadow_unlock(m);
   3.984 -        return 0;  // propagate to guest
   3.985 +        return 0;
   3.986      }
   3.987  
   3.988      if ( unlikely(!(gpte & _PAGE_PRESENT)) )
   3.989      {
   3.990          SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
   3.991          shadow_unlock(m);
   3.992 -        return 0;  // we're not going to be able to help
   3.993 +        return 0;
   3.994      }
   3.995  
   3.996 -    if ( error_code & 2  )  
   3.997 -    {  // write fault
   3.998 -        if ( likely(gpte & _PAGE_RW) )
   3.999 +    /* Write fault? */
  3.1000 +    if ( error_code & 2 )  
  3.1001 +    {
  3.1002 +        if ( unlikely(!(gpte & _PAGE_RW)) )
  3.1003          {
  3.1004 -            l1pte_write_fault( m, &gpte, &spte );
  3.1005 +            /* Write fault on a read-only mapping. */
  3.1006 +            SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", gpte);
  3.1007 +            shadow_unlock(m);
  3.1008 +            return 0;
  3.1009          }
  3.1010 -        else
  3.1011 -        {   // write fault on RO page
  3.1012 -            SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
  3.1013 -            shadow_unlock(m);
  3.1014 -            return 0; // propagate to guest
  3.1015 -            // not clear whether we should set accessed bit here...
  3.1016 -        }
  3.1017 +
  3.1018 +        l1pte_write_fault(m, &gpte, &spte);
  3.1019      }
  3.1020      else
  3.1021      {
  3.1022 -        l1pte_read_fault( m, &gpte, &spte );
  3.1023 +        l1pte_read_fault(m, &gpte, &spte);
  3.1024      }
  3.1025  
  3.1026 -    SH_VVLOG("plan: gpte=%08lx  spte=%08lx", gpte, spte );
  3.1027 -
  3.1028 -    // write back updated gpte
  3.1029 -    // XXX watch out for read-only L2 entries! (not used in Linux)
  3.1030 -    if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
  3.1031 -        domain_crash();  // fixme!
  3.1032 -
  3.1033 -    if ( unlikely( __put_user( spte, (unsigned long*)&shadow_linear_pg_table[va>>PAGE_SHIFT])) )
  3.1034 -    { 
  3.1035 -        // failed:
  3.1036 -        //  the L1 may not be shadowed, or the L2 entry may be insufficient
  3.1037 -
  3.1038 -        unsigned long gpde, spde, gl1pfn, sl1pfn, sl1ss;
  3.1039 -
  3.1040 -        SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx  spte=%08lx",gpte,spte );
  3.1041 -
  3.1042 -        gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
  3.1043 -
  3.1044 -        gl1pfn = gpde>>PAGE_SHIFT;
  3.1045 +    /*
  3.1046 +     * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
  3.1047 +     */
  3.1048  
  3.1049 -        sl1ss = __shadow_status(&current->mm, gl1pfn);
  3.1050 -        if ( ! (sl1ss & PSH_shadowed) )
  3.1051 -        {
  3.1052 -            // this L1 is NOT already shadowed so we need to shadow it
  3.1053 -            struct pfn_info *sl1pfn_info;
  3.1054 -            unsigned long *gpl1e, *spl1e;
  3.1055 -            int i;
  3.1056 -            sl1pfn_info = alloc_shadow_page( &current->mm ); 
  3.1057 -            sl1pfn_info->u.inuse.type_info = PGT_l1_page_table;
  3.1058 -			
  3.1059 -            sl1pfn = sl1pfn_info - frame_table;
  3.1060 -
  3.1061 -            SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
  3.1062 -            perfc_incrc(shadow_l1_table_count);
  3.1063 -            perfc_incr(shadow_l1_pages);
  3.1064 -
  3.1065 -            set_shadow_status(&current->mm, gl1pfn, PSH_shadowed | sl1pfn);
  3.1066 -
  3.1067 -            l2pde_general( m, &gpde, &spde, sl1pfn );
  3.1068 +    /* XXX Watch out for read-only L2 entries! (not used in Linux). */
  3.1069 +    if ( unlikely(__put_user(gpte, (unsigned long *)
  3.1070 +                             &linear_pg_table[va >> PAGE_SHIFT])) )
  3.1071 +        domain_crash();
  3.1072  
  3.1073 -            linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
  3.1074 -            shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =  mk_l2_pgentry(spde);
  3.1075 -
  3.1076 -            gpl1e = (unsigned long *) &(linear_pg_table[
  3.1077 -                (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]);
  3.1078 -
  3.1079 -            spl1e = (unsigned long *) &shadow_linear_pg_table[
  3.1080 -                (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ];
  3.1081 -
  3.1082 -
  3.1083 -            for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
  3.1084 -            {
  3.1085 -                l1pte_no_fault( m, &gpl1e[i], &spl1e[i] );
  3.1086 -            }
  3.1087 -
  3.1088 +    /*
  3.1089 +     * Update of shadow PTE can fail because the L1 p.t. is not shadowed,
  3.1090 +     * or because the shadow isn't linked into this shadow L2 p.t.
  3.1091 +     */
  3.1092 +    if ( unlikely(__put_user(spte, (unsigned long *)
  3.1093 +                             &shadow_linear_pg_table[va >> PAGE_SHIFT])) )
  3.1094 +    {
  3.1095 +        SH_VVLOG("3: not shadowed/mapped gpte=%08lx spte=%08lx", gpte, spte);
  3.1096 +        shadow_map_l1_into_current_l2(va);
  3.1097 +        shadow_linear_pg_table[va >> PAGE_SHIFT] = mk_l1_pgentry(spte);
  3.1098 +    }
  3.1099  
  3.1100 -        }
  3.1101 -        else
  3.1102 -        {
  3.1103 -            // this L1 was shadowed (by another PT) but we didn't have an L2
  3.1104 -            // entry for it
  3.1105 -
  3.1106 -            SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
  3.1107 -
  3.1108 -            sl1pfn = sl1ss & PSH_pfn_mask;
  3.1109 -            l2pde_general( m, &gpde, &spde, sl1pfn );
  3.1110 -
  3.1111 -            linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
  3.1112 -            shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
  3.1113 -   
  3.1114 -        }              
  3.1115 -
  3.1116 -        shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte);
  3.1117 -        // (we need to do the above even if we've just made the shadow L1)
  3.1118 -
  3.1119 -    } // end of fixup writing the shadow L1 directly failed
  3.1120 -     
  3.1121      perfc_incrc(shadow_fixup_count);
  3.1122 -
  3.1123 -	m->shadow_fault_count++;
  3.1124 -
  3.1125 -    check_pagetable( current, current->mm.pagetable, "post-sf" );
  3.1126 +    m->shadow_fault_count++;
  3.1127  
  3.1128      shadow_unlock(m);
  3.1129  
  3.1130 -    return 1; // let's try the faulting instruction again...
  3.1131 -
  3.1132 +    check_pagetable(m, current->mm.pagetable, "post-sf");
  3.1133 +    return 1;
  3.1134  }
  3.1135  
  3.1136  
  3.1137 -void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
  3.1138 -                                 unsigned long *prev_spfn_ptr,
  3.1139 -                                 l1_pgentry_t **prev_spl1e_ptr )
  3.1140 +void shadow_l1_normal_pt_update(
  3.1141 +    unsigned long pa, unsigned long gpte,
  3.1142 +    unsigned long *prev_spfn_ptr,
  3.1143 +    l1_pgentry_t **prev_spl1e_ptr)
  3.1144  {
  3.1145 -    unsigned long gpfn, spfn, spte, prev_spfn = *prev_spfn_ptr;    
  3.1146 -    l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
  3.1147 -
  3.1148 +    unsigned long spfn, spte, prev_spfn = *prev_spfn_ptr;    
  3.1149 +    l1_pgentry_t *spl1e, *prev_spl1e = *prev_spl1e_ptr;
  3.1150  
  3.1151 -    SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%p\n",
  3.1152 -             pa,gpte,prev_spfn, prev_spl1e);
  3.1153 +    /* N.B. To get here, we know the l1 page *must* be shadowed. */
  3.1154 +    SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, "
  3.1155 +             "prev_spfn=%08lx, prev_spl1e=%p\n",
  3.1156 +             pa, gpte, prev_spfn, prev_spl1e);
  3.1157  
  3.1158 -    // to get here, we know the l1 page *must* be shadowed
  3.1159 -
  3.1160 -    gpfn = pa >> PAGE_SHIFT;
  3.1161 -    spfn = __shadow_status(&current->mm, gpfn) & PSH_pfn_mask;
  3.1162 +    spfn = __shadow_status(&current->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
  3.1163  
  3.1164      if ( spfn == prev_spfn )
  3.1165      {
  3.1166 @@ -798,54 +653,44 @@ void shadow_l1_normal_pt_update( unsigne
  3.1167      }
  3.1168      else
  3.1169      {
  3.1170 -        if( prev_spl1e ) unmap_domain_mem( prev_spl1e );
  3.1171 -        spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  3.1172 +        if ( prev_spl1e != NULL )
  3.1173 +            unmap_domain_mem( prev_spl1e );
  3.1174 +        spl1e = (l1_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
  3.1175          *prev_spfn_ptr  = spfn;
  3.1176          *prev_spl1e_ptr = spl1e;
  3.1177      }
  3.1178  
  3.1179 -    // XXX we assume only pagetables can be shadowed; 
  3.1180 -    // this will have to change to allow arbitrary CoW etc.
  3.1181 -
  3.1182 -    l1pte_no_fault( &current->mm, &gpte, &spte );
  3.1183 -
  3.1184 -
  3.1185 -    spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = mk_l1_pgentry( spte );
  3.1186 -
  3.1187 +    l1pte_propagate_from_guest(&current->mm, &gpte, &spte);
  3.1188 +    spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = mk_l1_pgentry(spte);
  3.1189  }
  3.1190  
  3.1191 -void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte )
  3.1192 +void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpte)
  3.1193  {
  3.1194 -    unsigned long gpfn, spfn, spte;
  3.1195 -    l2_pgentry_t * sp2le;
  3.1196 -    unsigned long s_sh=0;
  3.1197 +    unsigned long spfn, spte;
  3.1198 +    l2_pgentry_t *spl2e;
  3.1199 +    unsigned long s_sh;
  3.1200  
  3.1201 +    /* N.B. To get here, we know the l2 page *must* be shadowed. */
  3.1202      SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
  3.1203  
  3.1204 -    // to get here, we know the l2 page has a shadow
  3.1205 +    spfn = __shadow_status(&current->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
  3.1206 +
  3.1207 +    s_sh = (gpte & _PAGE_PRESENT) ?
  3.1208 +        __shadow_status(&current->mm, gpte >> PAGE_SHIFT) : 0;
  3.1209  
  3.1210 -    gpfn = pa >> PAGE_SHIFT;
  3.1211 -    spfn = __shadow_status(&current->mm, gpfn) & PSH_pfn_mask;
  3.1212 +    /* XXXX Should mark guest pte as DIRTY and ACCESSED too! */
  3.1213 +    l2pde_general(&current->mm, &gpte, &spte, s_sh);
  3.1214 +    spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
  3.1215 +    spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)] = mk_l2_pgentry(spte);
  3.1216 +    unmap_domain_mem(spl2e);
  3.1217 +}
  3.1218  
  3.1219  
  3.1220 -    spte = 0;
  3.1221  
  3.1222 -    if( gpte & _PAGE_PRESENT )
  3.1223 -        s_sh = __shadow_status(&current->mm, gpte >> PAGE_SHIFT);
  3.1224 -
  3.1225 -    sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  3.1226 -    // no real need for a cache here
  3.1227  
  3.1228 -    l2pde_general( &current->mm, &gpte, &spte, s_sh );
  3.1229 -
  3.1230 -    // XXXX Should mark guest pte as DIRTY and ACCESSED too!!!!!
  3.1231 -
  3.1232 -    sp2le[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t) ] = 
  3.1233 -        mk_l2_pgentry( spte );
  3.1234 -
  3.1235 -    unmap_domain_mem( (void *) sp2le );
  3.1236 -}
  3.1237 -
  3.1238 +/************************************************************************/
  3.1239 +/************************************************************************/
  3.1240 +/************************************************************************/
  3.1241  
  3.1242  #if SHADOW_DEBUG
  3.1243  
  3.1244 @@ -853,29 +698,34 @@ static int sh_l2_present;
  3.1245  static int sh_l1_present;
  3.1246  char * sh_check_name;
  3.1247  
  3.1248 -#define FAIL(_f, _a...)                             \
  3.1249 -{printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n",  sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
  3.1250 +#define FAIL(_f, _a...)                                        \
  3.1251 +    do {                                                       \
  3.1252 +        printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n",  \
  3.1253 +               sh_check_name, level, i, ## _a , gpte, spte);   \
  3.1254 +        BUG();                                                 \
  3.1255 +    } while ( 0 )
  3.1256  
  3.1257 -static int check_pte( struct mm_struct *m, 
  3.1258 -                      unsigned long gpte, unsigned long spte, int level, int i )
  3.1259 +static int check_pte(
  3.1260 +    struct mm_struct *m, unsigned long gpte, unsigned long spte, 
  3.1261 +    int level, int i)
  3.1262  {
  3.1263      unsigned long mask, gpfn, spfn;
  3.1264  
  3.1265 -    if ( spte == 0 || spte == 0xdeadface || spte == 0x00000E00)
  3.1266 -        return 1;  // always safe
  3.1267 +    if ( (spte == 0) || (spte == 0xdeadface) || (spte == 0x00000E00) )
  3.1268 +        return 1;  /* always safe */
  3.1269  
  3.1270      if ( !(spte & _PAGE_PRESENT) )
  3.1271          FAIL("Non zero not present spte");
  3.1272  
  3.1273 -    if( level == 2 ) sh_l2_present++;
  3.1274 -    if( level == 1 ) sh_l1_present++;
  3.1275 +    if ( level == 2 ) sh_l2_present++;
  3.1276 +    if ( level == 1 ) sh_l1_present++;
  3.1277  
  3.1278      if ( !(gpte & _PAGE_PRESENT) )
  3.1279          FAIL("Guest not present yet shadow is");
  3.1280  
  3.1281      mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
  3.1282  
  3.1283 -    if ( (spte & mask) != (gpte & mask ) )
  3.1284 +    if ( (spte & mask) != (gpte & mask) )
  3.1285          FAIL("Corrupt?");
  3.1286  
  3.1287      if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
  3.1288 @@ -887,108 +737,97 @@ static int check_pte( struct mm_struct *
  3.1289      if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
  3.1290          FAIL("RW coherence");
  3.1291  
  3.1292 -    if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY) ))
  3.1293 +    if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY)) )
  3.1294          FAIL("RW2 coherence");
  3.1295   
  3.1296 -    spfn = spte>>PAGE_SHIFT;
  3.1297 -    gpfn = gpte>>PAGE_SHIFT;
  3.1298 +    spfn = spte >> PAGE_SHIFT;
  3.1299 +    gpfn = gpte >> PAGE_SHIFT;
  3.1300  
  3.1301      if ( gpfn == spfn )
  3.1302      {
  3.1303          if ( level > 1 )
  3.1304 -            FAIL("Linear map ???");    // XXX this will fail on BSD
  3.1305 -
  3.1306 -        return 1;
  3.1307 +            FAIL("Linear map ???");    /* XXX this will fail on BSD */
  3.1308      }
  3.1309      else
  3.1310      {
  3.1311          if ( level < 2 )
  3.1312              FAIL("Shadow in L1 entry?");
  3.1313  
  3.1314 -        if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) )
  3.1315 -            FAIL("spfn problem g.sf=%08lx", 
  3.1316 -                 __shadow_status(p, gpfn) );
  3.1317 +        if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
  3.1318 +            FAIL("spfn problem g.sf=%08lx", __shadow_status(m, gpfn));
  3.1319      }
  3.1320  
  3.1321      return 1;
  3.1322  }
  3.1323  
  3.1324  
  3.1325 -static int check_l1_table( struct mm_struct *m, unsigned long va, 
  3.1326 -                           unsigned long g2, unsigned long s2 )
  3.1327 +static int check_l1_table(
  3.1328 +    struct mm_struct *m, unsigned long va, 
  3.1329 +    unsigned long g2, unsigned long s2)
  3.1330  {
  3.1331 -    int j;
  3.1332 +    int i;
  3.1333      unsigned long *gpl1e, *spl1e;
  3.1334  
  3.1335 -    //gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]);
  3.1336 -    //spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]);
  3.1337 -
  3.1338 -    gpl1e = map_domain_mem( g2<<PAGE_SHIFT );
  3.1339 -    spl1e = map_domain_mem( s2<<PAGE_SHIFT );
  3.1340 +    gpl1e = map_domain_mem(g2 << PAGE_SHIFT);
  3.1341 +    spl1e = map_domain_mem(s2 << PAGE_SHIFT);
  3.1342  
  3.1343 -    for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
  3.1344 -    {
  3.1345 -        unsigned long gpte = gpl1e[j];
  3.1346 -        unsigned long spte = spl1e[j];
  3.1347 -  
  3.1348 -        check_pte( p, gpte, spte, 1, j );
  3.1349 -    }
  3.1350 +    for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
  3.1351 +        check_pte(m, gpl1e[i], spl1e[i], 1, i);
  3.1352   
  3.1353 -    unmap_domain_mem( spl1e );
  3.1354 -    unmap_domain_mem( gpl1e );
  3.1355 +    unmap_domain_mem(spl1e);
  3.1356 +    unmap_domain_mem(gpl1e);
  3.1357  
  3.1358      return 1;
  3.1359  }
  3.1360  
  3.1361 -#define FAILPT(_f, _a...)                             \
  3.1362 -{printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); BUG();}
  3.1363 +#define FAILPT(_f, _a...)                                      \
  3.1364 +    do {                                                       \
  3.1365 +        printk("XXX FAIL %s-PT" _f "\n", s, ## _a );           \
  3.1366 +        BUG();                                                 \
  3.1367 +    } while ( 0 )
  3.1368  
  3.1369 -int check_pagetable( struct mm_struct *m, pagetable_t pt, char *s )
  3.1370 +int check_pagetable(struct mm_struct *m, pagetable_t pt, char *s)
  3.1371  {
  3.1372      unsigned long gptbase = pagetable_val(pt);
  3.1373      unsigned long gpfn, spfn;
  3.1374 -    int i;
  3.1375 +    int           i;
  3.1376      l2_pgentry_t *gpl2e, *spl2e;
  3.1377  
  3.1378      sh_check_name = s;
  3.1379  
  3.1380 -    SH_VVLOG("%s-PT Audit",s);
  3.1381 +    SH_VVLOG("%s-PT Audit", s);
  3.1382  
  3.1383      sh_l2_present = sh_l1_present = 0;
  3.1384  
  3.1385 -    gpfn =  gptbase >> PAGE_SHIFT;
  3.1386 +    gpfn = gptbase >> PAGE_SHIFT;
  3.1387  
  3.1388 -    if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) )
  3.1389 +    if ( !(__shadow_status(m, gpfn) & PSH_shadowed) )
  3.1390      {
  3.1391          printk("%s-PT %08lx not shadowed\n", s, gptbase);
  3.1392 -
  3.1393 -        if( __shadow_status(p, gpfn) != 0 ) BUG();
  3.1394 -
  3.1395 +        if ( __shadow_status(m, gpfn) != 0 )
  3.1396 +            BUG();
  3.1397          return 0;
  3.1398      }
  3.1399   
  3.1400 -    spfn = __shadow_status(p, gpfn) & PSH_pfn_mask;
  3.1401 +    spfn = __shadow_status(m, gpfn) & PSH_pfn_mask;
  3.1402  
  3.1403 -    if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) )
  3.1404 +    if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
  3.1405          FAILPT("ptbase shadow inconsistent1");
  3.1406  
  3.1407      gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
  3.1408      spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  3.1409  
  3.1410 -    //ipl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  3.1411 -
  3.1412 -
  3.1413 -    if ( memcmp( &spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
  3.1414 -                 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
  3.1415 -                 ((SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT))-DOMAIN_ENTRIES_PER_L2_PAGETABLE)
  3.1416 -                 * sizeof(l2_pgentry_t)) )
  3.1417 +    if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
  3.1418 +                &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
  3.1419 +                ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
  3.1420 +                 DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
  3.1421      {
  3.1422          printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn);
  3.1423 -        for (i=DOMAIN_ENTRIES_PER_L2_PAGETABLE; 
  3.1424 -             i<(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT));
  3.1425 -             i++ )
  3.1426 +        for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE; 
  3.1427 +              i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
  3.1428 +              i++ )
  3.1429              printk("+++ (%d) %08lx %08lx\n",i,
  3.1430 -                   l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]) );
  3.1431 +                   l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
  3.1432          FAILPT("hypervisor entries inconsistent");
  3.1433      }
  3.1434  
  3.1435 @@ -996,49 +835,41 @@ int check_pagetable( struct mm_struct *m
  3.1436            l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
  3.1437          FAILPT("hypervisor linear map inconsistent");
  3.1438  
  3.1439 -    if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) != 
  3.1440 +    if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> 
  3.1441 +                              L2_PAGETABLE_SHIFT]) != 
  3.1442            ((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
  3.1443          FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx",
  3.1444 -               l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]),
  3.1445 -               (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR
  3.1446 -            );
  3.1447 +               l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
  3.1448 +                                   L2_PAGETABLE_SHIFT]),
  3.1449 +               (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
  3.1450  
  3.1451      if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
  3.1452 -          ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) | __PAGE_HYPERVISOR))) )
  3.1453 +          ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) | 
  3.1454 +            __PAGE_HYPERVISOR))) )
  3.1455          FAILPT("hypervisor per-domain map inconsistent");
  3.1456  
  3.1457  
  3.1458 -    // check the whole L2
  3.1459 +    /* Check the whole L2. */
  3.1460      for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  3.1461 -    {
  3.1462 -        unsigned long gpte = l2_pgentry_val(gpl2e[i]);
  3.1463 -        unsigned long spte = l2_pgentry_val(spl2e[i]);
  3.1464 +        check_pte(m, l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]), 2, i);
  3.1465  
  3.1466 -        check_pte( p, gpte, spte, 2, i );
  3.1467 -    }
  3.1468 -
  3.1469 -
  3.1470 -    // go back and recurse
  3.1471 +    /* Go back and recurse. */
  3.1472      for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  3.1473      {
  3.1474 -        unsigned long gpte = l2_pgentry_val(gpl2e[i]);
  3.1475 -        unsigned long spte = l2_pgentry_val(spl2e[i]);
  3.1476 -
  3.1477 -        if ( spte )    
  3.1478 -            check_l1_table( p,
  3.1479 -                            i<<L2_PAGETABLE_SHIFT,
  3.1480 -                            gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
  3.1481 -
  3.1482 +        if ( l2_pgentry_val(spl2e[i]) != 0 )
  3.1483 +            check_l1_table(
  3.1484 +                m, i << L2_PAGETABLE_SHIFT,
  3.1485 +                l2_pgentry_val(gpl2e[i]) >> PAGE_SHIFT, 
  3.1486 +                l2_pgentry_val(spl2e[i]) >> PAGE_SHIFT);
  3.1487      }
  3.1488  
  3.1489 -    unmap_domain_mem( spl2e );
  3.1490 -    unmap_domain_mem( gpl2e );
  3.1491 +    unmap_domain_mem(spl2e);
  3.1492 +    unmap_domain_mem(gpl2e);
  3.1493  
  3.1494      SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
  3.1495 -             sh_l2_present, sh_l1_present );
  3.1496 +             sh_l2_present, sh_l1_present);
  3.1497   
  3.1498      return 1;
  3.1499  }
  3.1500  
  3.1501 -
  3.1502  #endif
     4.1 --- a/xen/include/asm-x86/shadow.h	Wed Oct 13 03:33:39 2004 +0000
     4.2 +++ b/xen/include/asm-x86/shadow.h	Wed Oct 13 14:25:21 2004 +0000
     4.3 @@ -8,21 +8,19 @@
     4.4  #include <xen/perfc.h>
     4.5  #include <asm/processor.h>
     4.6  
     4.7 -
     4.8  /* Shadow PT flag bits in pfn_info */
     4.9  #define PSH_shadowed    (1<<31) /* page has a shadow. PFN points to shadow */
    4.10 -#define PSH_pending     (1<<29) /* page is in the process of being shadowed */
    4.11  #define PSH_pfn_mask    ((1<<21)-1)
    4.12  
    4.13  /* Shadow PT operation mode : shadowmode variable in mm_struct */
    4.14  #define SHM_test        (1) /* just run domain on shadow PTs */
    4.15  #define SHM_logdirty    (2) /* log pages that are dirtied */
    4.16  #define SHM_translate   (3) /* lookup machine pages in translation table */
    4.17 -//#define SHM_cow       (4) /* copy on write all dirtied pages */
    4.18 -
    4.19 +#define SHM_cow         (4) /* copy on write all dirtied pages */
    4.20  
    4.21  #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
    4.22 -#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
    4.23 +#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
    4.24 +     (SH_LINEAR_PT_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
    4.25  
    4.26  #define shadow_mode(_d)      ((_d)->mm.shadow_mode)
    4.27  #define shadow_lock_init(_d) spin_lock_init(&(_d)->mm.shadow_lock)
    4.28 @@ -32,9 +30,9 @@
    4.29  extern void shadow_mode_init(void);
    4.30  extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc);
    4.31  extern int shadow_fault(unsigned long va, long error_code);
    4.32 -extern void shadow_l1_normal_pt_update(unsigned long pa, unsigned long gpte, 
    4.33 -                                       unsigned long *prev_spfn_ptr,
    4.34 -                                       l1_pgentry_t **prev_spl1e_ptr);
    4.35 +extern void shadow_l1_normal_pt_update(
    4.36 +    unsigned long pa, unsigned long gpte, 
    4.37 +    unsigned long *prev_spfn_ptr, l1_pgentry_t **prev_spl1e_ptr);
    4.38  extern void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpte);
    4.39  extern void unshadow_table(unsigned long gpfn, unsigned int type);
    4.40  extern int shadow_mode_enable(struct domain *p, unsigned int mode);
    4.41 @@ -47,20 +45,19 @@ static inline void shadow_mode_disable(s
    4.42  }
    4.43  
    4.44  extern unsigned long shadow_l2_table( 
    4.45 -    struct mm_struct *m, unsigned long gpfn );
    4.46 +    struct mm_struct *m, unsigned long gpfn);
    4.47  
    4.48 -#define SHADOW_DEBUG 0
    4.49 +#define SHADOW_DEBUG      0
    4.50  #define SHADOW_HASH_DEBUG 0
    4.51 -#define SHADOW_OPTIMISE 1
    4.52  
    4.53  struct shadow_status {
    4.54 -    unsigned long pfn;            // gpfn 
    4.55 -    unsigned long spfn_and_flags; // spfn plus flags
    4.56 -    struct shadow_status *next;   // use pull-to-front list.
    4.57 +    unsigned long pfn;            /* Guest pfn.             */
    4.58 +    unsigned long spfn_and_flags; /* Shadow pfn plus flags. */
    4.59 +    struct shadow_status *next;   /* Pull-to-front list.    */
    4.60  };
    4.61  
    4.62 -#define shadow_ht_extra_size         128 /*128*/
    4.63 -#define shadow_ht_buckets            256 /*256*/
    4.64 +#define shadow_ht_extra_size 128
    4.65 +#define shadow_ht_buckets    256
    4.66  
    4.67  #ifdef VERBOSE
    4.68  #define SH_LOG(_f, _a...)                             \
    4.69 @@ -89,63 +86,60 @@ printk("DOM%u: (file=shadow.c, line=%d) 
    4.70  
    4.71  /************************************************************************/
    4.72  
    4.73 -static inline int __mark_dirty( struct mm_struct *m, unsigned int mfn )
    4.74 +static inline int __mark_dirty( struct mm_struct *m, unsigned int mfn)
    4.75  {
    4.76 -    unsigned int pfn;
    4.77 -    int rc = 0;
    4.78 +    unsigned long pfn;
    4.79 +    int           rc = 0;
    4.80  
    4.81      ASSERT(spin_is_locked(&m->shadow_lock));
    4.82 +    ASSERT(m->shadow_dirty_bitmap != NULL);
    4.83  
    4.84      pfn = machine_to_phys_mapping[mfn];
    4.85  
    4.86 -    /* We use values with the top bit set to mark MFNs that aren't
    4.87 -       really part of the domain's psuedo-physical memory map e.g.
    4.88 -       the shared info frame. Nothing to do here...
    4.89 -    */
    4.90 -    if ( unlikely(pfn & 0x80000000U) ) return rc; 
    4.91 +    /*
    4.92 +     * Values with the MSB set denote MFNs that aren't really part of the 
    4.93 +     * domain's pseudo-physical memory map (e.g., the shared info frame).
    4.94 +     * Nothing to do here...
    4.95 +     */
    4.96 +    if ( unlikely(pfn & 0x80000000UL) )
    4.97 +        return rc;
    4.98  
    4.99 -    ASSERT(m->shadow_dirty_bitmap);
   4.100 -    if( likely(pfn<m->shadow_dirty_bitmap_size) )
   4.101 +    if ( likely(pfn < m->shadow_dirty_bitmap_size) )
   4.102      {
   4.103 -        /* These updates occur with mm.shadow_lock held, so use 
   4.104 -           (__) version of test_and_set */
   4.105 -        if ( __test_and_set_bit( pfn, m->shadow_dirty_bitmap ) == 0 )
   4.106 +        /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
   4.107 +        if ( !__test_and_set_bit(pfn, m->shadow_dirty_bitmap) )
   4.108          {
   4.109 -            // if we set it
   4.110              m->shadow_dirty_count++;
   4.111              rc = 1;
   4.112          }
   4.113      }
   4.114 -    else
   4.115 +#ifndef NDEBUG
   4.116 +    else if ( mfn < max_page )
   4.117      {
   4.118 -		if ( mfn < max_page )
   4.119 -		{
   4.120 -			SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
   4.121 -				   mfn, pfn, m->shadow_dirty_bitmap_size, m );
   4.122 -			SH_LOG("dom=%p caf=%08x taf=%08x\n", 
   4.123 -				   frame_table[mfn].u.inuse.domain,
   4.124 -				   frame_table[mfn].count_info, 
   4.125 -				   frame_table[mfn].u.inuse.type_info );
   4.126 -			{
   4.127 -				extern void show_trace(unsigned long *esp);		
   4.128 -				unsigned long *esp;
   4.129 -				__asm__ __volatile__ ("movl %%esp,%0" : "=r" (esp) : );
   4.130 -				show_trace(esp);
   4.131 -			}
   4.132 -		}
   4.133 +        SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
   4.134 +               mfn, pfn, m->shadow_dirty_bitmap_size, m );
   4.135 +        SH_LOG("dom=%p caf=%08x taf=%08x\n", 
   4.136 +               frame_table[mfn].u.inuse.domain,
   4.137 +               frame_table[mfn].count_info, 
   4.138 +               frame_table[mfn].u.inuse.type_info );
   4.139 +        {
   4.140 +            extern void show_trace(unsigned long *esp);
   4.141 +            unsigned long *esp;
   4.142 +            __asm__ __volatile__ ("movl %%esp,%0" : "=r" (esp) : );
   4.143 +            show_trace(esp);
   4.144 +        }
   4.145      }
   4.146 +#endif
   4.147  
   4.148      return rc;
   4.149  }
   4.150  
   4.151  
   4.152 -static inline int mark_dirty( struct mm_struct *m, unsigned int mfn )
   4.153 +static inline int mark_dirty(struct mm_struct *m, unsigned int mfn)
   4.154  {
   4.155      int rc;
   4.156 -    //ASSERT(local_irq_is_enabled());
   4.157 -    //if(spin_is_locked(&m->shadow_lock)) printk("+");
   4.158      shadow_lock(m);
   4.159 -    rc = __mark_dirty( m, mfn );
   4.160 +    rc = __mark_dirty(m, mfn);
   4.161      shadow_unlock(m);
   4.162      return rc;
   4.163  }
   4.164 @@ -159,19 +153,19 @@ static inline void l1pte_write_fault(
   4.165      unsigned long gpte = *gpte_p;
   4.166      unsigned long spte = *spte_p;
   4.167  
   4.168 +    ASSERT(gpte & _PAGE_RW);
   4.169 +
   4.170 +    gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
   4.171 +
   4.172      switch ( m->shadow_mode )
   4.173      {
   4.174      case SHM_test:
   4.175 -        spte = gpte;
   4.176 -        gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
   4.177 -        spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
   4.178 +        spte = gpte | _PAGE_RW;
   4.179          break;
   4.180  
   4.181      case SHM_logdirty:
   4.182 -        spte = gpte;
   4.183 -        gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
   4.184 -        spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
   4.185 -        __mark_dirty( m, (gpte >> PAGE_SHIFT) );
   4.186 +        spte = gpte | _PAGE_RW;
   4.187 +        __mark_dirty(m, gpte >> PAGE_SHIFT);
   4.188          break;
   4.189      }
   4.190  
   4.191 @@ -185,21 +179,16 @@ static inline void l1pte_read_fault(
   4.192      unsigned long gpte = *gpte_p;
   4.193      unsigned long spte = *spte_p;
   4.194  
   4.195 +    gpte |= _PAGE_ACCESSED;
   4.196 +
   4.197      switch ( m->shadow_mode )
   4.198      {
   4.199      case SHM_test:
   4.200 -        spte = gpte;
   4.201 -        gpte |= _PAGE_ACCESSED;
   4.202 -        spte |= _PAGE_ACCESSED;
   4.203 -        if ( ! (gpte & _PAGE_DIRTY ) )
   4.204 -            spte &= ~ _PAGE_RW;
   4.205 +        spte = (gpte & _PAGE_DIRTY) ? gpte : (gpte & ~_PAGE_RW);
   4.206          break;
   4.207  
   4.208      case SHM_logdirty:
   4.209 -        spte = gpte;
   4.210 -        gpte |= _PAGE_ACCESSED;
   4.211 -        spte |= _PAGE_ACCESSED;
   4.212 -        spte &= ~ _PAGE_RW;
   4.213 +        spte = gpte & ~_PAGE_RW;
   4.214          break;
   4.215      }
   4.216  
   4.217 @@ -207,7 +196,7 @@ static inline void l1pte_read_fault(
   4.218      *spte_p = spte;
   4.219  }
   4.220  
   4.221 -static inline void l1pte_no_fault(
   4.222 +static inline void l1pte_propagate_from_guest(
   4.223      struct mm_struct *m, unsigned long *gpte_p, unsigned long *spte_p)
   4.224  { 
   4.225      unsigned long gpte = *gpte_p;
   4.226 @@ -219,22 +208,14 @@ static inline void l1pte_no_fault(
   4.227          spte = 0;
   4.228          if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
   4.229               (_PAGE_PRESENT|_PAGE_ACCESSED) )
   4.230 -        {
   4.231 -            spte = gpte;
   4.232 -            if ( ! (gpte & _PAGE_DIRTY ) )
   4.233 -                spte &= ~ _PAGE_RW;
   4.234 -        }
   4.235 +            spte = (gpte & _PAGE_DIRTY) ? gpte : (gpte & ~_PAGE_RW);
   4.236          break;
   4.237  
   4.238      case SHM_logdirty:
   4.239          spte = 0;
   4.240          if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
   4.241               (_PAGE_PRESENT|_PAGE_ACCESSED) )
   4.242 -        {
   4.243 -            spte = gpte;
   4.244 -            spte &= ~ _PAGE_RW;
   4.245 -        }
   4.246 -
   4.247 +            spte = gpte & ~_PAGE_RW;
   4.248          break;
   4.249      }
   4.250  
   4.251 @@ -243,7 +224,7 @@ static inline void l1pte_no_fault(
   4.252  }
   4.253  
   4.254  static inline void l2pde_general(
   4.255 -    struct mm_struct *m, 
   4.256 +    struct mm_struct *m,
   4.257      unsigned long *gpde_p,
   4.258      unsigned long *spde_p,
   4.259      unsigned long sl1pfn)
   4.260 @@ -253,18 +234,16 @@ static inline void l2pde_general(
   4.261  
   4.262      spde = 0;
   4.263  
   4.264 -    if ( sl1pfn )
   4.265 +    if ( sl1pfn != 0 )
   4.266      {
   4.267 -        spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | 
   4.268 +        spde = (gpde & ~PAGE_MASK) | (sl1pfn << PAGE_SHIFT) | 
   4.269              _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
   4.270 -        gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
   4.271 +        gpde |= _PAGE_ACCESSED | _PAGE_DIRTY;
   4.272  
   4.273 -        if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gpde & PAGE_MASK)  ) )
   4.274 -        {   
   4.275 -            // detect linear map, and keep pointing at guest
   4.276 -            SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
   4.277 +        /* Detect linear p.t. mappings and write-protect them. */
   4.278 +        if ( (frame_table[sl1pfn].u.inuse.type_info & PGT_type_mask) ==
   4.279 +             PGT_l2_page_table )
   4.280              spde = gpde & ~_PAGE_RW;
   4.281 -        }
   4.282      }
   4.283  
   4.284      *gpde_p = gpde;
   4.285 @@ -276,324 +255,316 @@ static inline void l2pde_general(
   4.286  #if SHADOW_HASH_DEBUG
   4.287  static void shadow_audit(struct mm_struct *m, int print)
   4.288  {
   4.289 -    int live=0, free=0, j=0, abs;
   4.290 +    int live = 0, free = 0, j = 0, abs;
   4.291      struct shadow_status *a;
   4.292  
   4.293 -    for( j = 0; j < shadow_ht_buckets; j++ )
   4.294 +    for ( j = 0; j < shadow_ht_buckets; j++ )
   4.295      {
   4.296          a = &m->shadow_ht[j];        
   4.297 -        if(a->pfn){live++; ASSERT(a->spfn_and_flags&PSH_pfn_mask);}
   4.298 -        ASSERT((a->pfn&0xf0000000)==0);
   4.299 -        ASSERT(a->pfn<0x00100000);
   4.300 -        a=a->next;
   4.301 -        while(a && live<9999)
   4.302 +        if ( a->pfn ) { live++; ASSERT(a->spfn_and_flags & PSH_pfn_mask); }
   4.303 +        ASSERT(a->pfn < 0x00100000UL);
   4.304 +        a = a->next;
   4.305 +        while ( a && (live < 9999) )
   4.306          { 
   4.307              live++; 
   4.308 -            if(a->pfn == 0 || a->spfn_and_flags == 0)
   4.309 +            if ( (a->pfn == 0) || (a->spfn_and_flags == 0) )
   4.310              {
   4.311                  printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
   4.312                         live, a->pfn, a->spfn_and_flags, a->next);
   4.313                  BUG();
   4.314              }
   4.315 -            ASSERT(a->pfn);
   4.316 -            ASSERT((a->pfn&0xf0000000)==0);
   4.317 -            ASSERT(a->pfn<0x00100000);
   4.318 -            ASSERT(a->spfn_and_flags&PSH_pfn_mask);
   4.319 -            a=a->next; 
   4.320 +            ASSERT(a->pfn < 0x00100000UL);
   4.321 +            ASSERT(a->spfn_and_flags & PSH_pfn_mask);
   4.322 +            a = a->next; 
   4.323          }
   4.324 -        ASSERT(live<9999);
   4.325 +        ASSERT(live < 9999);
   4.326      }
   4.327  
   4.328 -    a = m->shadow_ht_free;
   4.329 -    while(a) { free++; a=a->next; }
   4.330 +    for ( a = m->shadow_ht_free; a != NULL; a = a->next )
   4.331 +        free++; 
   4.332  
   4.333 -    if(print) printk("Xlive=%d free=%d\n",live,free);
   4.334 +    if ( print)
   4.335 +        printk("Xlive=%d free=%d\n",live,free);
   4.336  
   4.337 -    abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live;
   4.338 -    if( abs < -1 || abs > 1 )
   4.339 +    abs = (perfc_value(shadow_l1_pages) + perfc_value(shadow_l2_pages)) - live;
   4.340 +    if ( (abs < -1) || (abs > 1) )
   4.341      {
   4.342          printk("live=%d free=%d l1=%d l2=%d\n",live,free,
   4.343                 perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
   4.344          BUG();
   4.345      }
   4.346 -
   4.347  }
   4.348 -
   4.349  #else
   4.350 -#define shadow_audit(p, print)
   4.351 +#define shadow_audit(p, print) ((void)0)
   4.352  #endif
   4.353  
   4.354  
   4.355  
   4.356 -static inline struct shadow_status* hash_bucket( struct mm_struct *m,
   4.357 -                                                 unsigned int gpfn )
   4.358 +static inline struct shadow_status *hash_bucket(
   4.359 +    struct mm_struct *m, unsigned int gpfn)
   4.360  {
   4.361 -    return &(m->shadow_ht[gpfn % shadow_ht_buckets]);
   4.362 +    return &m->shadow_ht[gpfn % shadow_ht_buckets];
   4.363  }
   4.364  
   4.365  
   4.366 -static inline unsigned long __shadow_status( struct mm_struct *m,
   4.367 -                                             unsigned int gpfn )
   4.368 +static inline unsigned long __shadow_status(
   4.369 +    struct mm_struct *m, unsigned int gpfn)
   4.370  {
   4.371 -    struct shadow_status **ob, *b, *B = hash_bucket( m, gpfn );
   4.372 +    struct shadow_status *p, *x, *head;
   4.373  
   4.374 -    b = B;
   4.375 -    ob = NULL;
   4.376 +    x = head = hash_bucket(m, gpfn);
   4.377 +    p = NULL;
   4.378  
   4.379 -    SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, b );
   4.380 -    shadow_audit(m,0);  // if in debug mode
   4.381 +    SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, x);
   4.382 +    shadow_audit(m, 0);
   4.383  
   4.384      do
   4.385      {
   4.386 -        if ( b->pfn == gpfn )
   4.387 -        {
   4.388 -            unsigned long t;
   4.389 -            struct shadow_status *x;
   4.390 +        ASSERT(x->pfn || ((x == head) && (x->next == NULL)));
   4.391  
   4.392 -            // swap with head
   4.393 -            t=B->pfn; B->pfn=b->pfn; b->pfn=t;
   4.394 -            t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags; 
   4.395 -            b->spfn_and_flags=t;
   4.396 +        if ( x->pfn == gpfn )
   4.397 +        {
   4.398 +            /* Pull-to-front if 'x' isn't already the head item. */
   4.399 +            if ( unlikely(x != head) )
   4.400 +            {
   4.401 +                /* Delete 'x' from list and reinsert immediately after head. */
   4.402 +                p->next = x->next;
   4.403 +                x->next = head->next;
   4.404 +                head->next = x;
   4.405  
   4.406 -            if( ob )
   4.407 -            {   // pull to front
   4.408 -                *ob=b->next;
   4.409 -                x=B->next;
   4.410 -                B->next=b;
   4.411 -                b->next=x;
   4.412 +                /* Swap 'x' contents with head contents. */
   4.413 +                SWAP(head->pfn, x->pfn);
   4.414 +                SWAP(head->spfn_and_flags, x->spfn_and_flags);
   4.415              }
   4.416 -            return B->spfn_and_flags;
   4.417 +
   4.418 +            return head->spfn_and_flags;
   4.419          }
   4.420 -#if SHADOW_HASH_DEBUG
   4.421 -        else
   4.422 -        {
   4.423 -            if(b!=B)ASSERT(b->pfn);
   4.424 -        }
   4.425 -#endif
   4.426 -        ob=&b->next;
   4.427 -        b=b->next;
   4.428 +
   4.429 +        p = x;
   4.430 +        x = x->next;
   4.431      }
   4.432 -    while (b);
   4.433 +    while ( x != NULL );
   4.434  
   4.435      return 0;
   4.436  }
   4.437  
   4.438 -/* we can make this locking more fine grained e.g. per shadow page if it 
   4.439 -ever becomes a problem, but since we need a spin lock on the hash table 
   4.440 -anyway its probably not worth being too clever. */
   4.441 -
   4.442 -static inline unsigned long get_shadow_status( struct mm_struct *m,
   4.443 -                                               unsigned int gpfn )
   4.444 +/*
   4.445 + * N.B. We can make this locking more fine grained (e.g., per shadow page) if
   4.446 + * it ever becomes a problem, but since we need a spin lock on the hash table 
   4.447 + * anyway it's probably not worth being too clever.
   4.448 + */
   4.449 +static inline unsigned long get_shadow_status(
   4.450 +    struct mm_struct *m, unsigned int gpfn )
   4.451  {
   4.452      unsigned long res;
   4.453  
   4.454 -    /* If we get here, we know that this domain is running in shadow mode. 
   4.455 -       We also know that some sort of update has happened to the underlying
   4.456 -       page table page: either a PTE has been updated, or the page has
   4.457 -       changed type. If we're in log dirty mode, we should set the approrpiate
   4.458 -       bit in the dirty bitmap.
   4.459 -       NB: the VA update path doesn't use this so needs to be handled 
   4.460 -       independnetly. 
   4.461 -    */
   4.462 +    ASSERT(m->shadow_mode);
   4.463  
   4.464 -    //ASSERT(local_irq_is_enabled());
   4.465 -    //if(spin_is_locked(&m->shadow_lock)) printk("*");
   4.466 +    /*
   4.467 +     * If we get here we know that some sort of update has happened to the
   4.468 +     * underlying page table page: either a PTE has been updated, or the page
   4.469 +     * has changed type. If we're in log dirty mode, we should set the
   4.470 +     * appropriate bit in the dirty bitmap.
   4.471 +     * N.B. The VA update path doesn't use this and is handled independently. 
   4.472 +     */
   4.473 +
   4.474      shadow_lock(m);
   4.475  
   4.476 -    if( m->shadow_mode == SHM_logdirty )
   4.477 +    if ( m->shadow_mode == SHM_logdirty )
   4.478          __mark_dirty( m, gpfn );
   4.479  
   4.480 -    res = __shadow_status( m, gpfn );
   4.481 -    if (!res) 
   4.482 +    if ( !(res = __shadow_status(m, gpfn)) )
   4.483          shadow_unlock(m);
   4.484 +
   4.485      return res;
   4.486  }
   4.487  
   4.488  
   4.489 -static inline void put_shadow_status( struct mm_struct *m )
   4.490 +static inline void put_shadow_status(
   4.491 +    struct mm_struct *m)
   4.492  {
   4.493      shadow_unlock(m);
   4.494  }
   4.495  
   4.496  
   4.497 -static inline void delete_shadow_status( struct mm_struct *m,
   4.498 -                                         unsigned int gpfn )
   4.499 +static inline void delete_shadow_status( 
   4.500 +    struct mm_struct *m, unsigned int gpfn)
   4.501  {
   4.502 -    struct shadow_status *b, *B, **ob;
   4.503 +    struct shadow_status *p, *x, *n, *head;
   4.504  
   4.505      ASSERT(spin_is_locked(&m->shadow_lock));
   4.506 +    ASSERT(gpfn != 0);
   4.507  
   4.508 -    B = b = hash_bucket( m, gpfn );
   4.509 +    head = hash_bucket(m, gpfn);
   4.510  
   4.511 -    SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b );
   4.512 -    shadow_audit(m,0);
   4.513 -    ASSERT(gpfn);
   4.514 +    SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b);
   4.515 +    shadow_audit(m, 0);
   4.516  
   4.517 -    if( b->pfn == gpfn )
   4.518 +    /* Match on head item? */
   4.519 +    if ( head->pfn == gpfn )
   4.520      {
   4.521 -        if (b->next)
   4.522 +        if ( (n = head->next) != NULL )
   4.523          {
   4.524 -            struct shadow_status *D=b->next;
   4.525 -            b->spfn_and_flags = b->next->spfn_and_flags;
   4.526 -            b->pfn = b->next->pfn;
   4.527 +            /* Overwrite head with contents of following node. */
   4.528 +            head->pfn            = n->pfn;
   4.529 +            head->spfn_and_flags = n->spfn_and_flags;
   4.530  
   4.531 -            b->next = b->next->next;
   4.532 -            D->next = m->shadow_ht_free;
   4.533 -            D->pfn = 0;
   4.534 -            D->spfn_and_flags = 0;
   4.535 -            m->shadow_ht_free = D;
   4.536 +            /* Delete following node. */
   4.537 +            head->next           = n->next;
   4.538 +
   4.539 +            /* Add deleted node to the free list. */
   4.540 +            n->pfn            = 0;
   4.541 +            n->spfn_and_flags = 0;
   4.542 +            n->next           = m->shadow_ht_free;
   4.543 +            m->shadow_ht_free = n;
   4.544          }
   4.545          else
   4.546          {
   4.547 -            b->pfn = 0;
   4.548 -            b->spfn_and_flags = 0;
   4.549 +            /* This bucket is now empty. Initialise the head node. */
   4.550 +            head->pfn            = 0;
   4.551 +            head->spfn_and_flags = 0;
   4.552          }
   4.553  
   4.554 -#if SHADOW_HASH_DEBUG
   4.555 -        if( __shadow_status(m,gpfn) ) BUG();  
   4.556 -        shadow_audit(m,0);
   4.557 -#endif
   4.558 -        return;
   4.559 +        goto found;
   4.560      }
   4.561  
   4.562 -    ob = &b->next;
   4.563 -    b=b->next;
   4.564 +    p = head;
   4.565 +    x = head->next;
   4.566  
   4.567      do
   4.568      {
   4.569 -        if ( b->pfn == gpfn )
   4.570 +        if ( x->pfn == gpfn )
   4.571          {
   4.572 -            b->pfn = 0;
   4.573 -            b->spfn_and_flags = 0;
   4.574 +            /* Delete matching node. */
   4.575 +            p->next = x->next;
   4.576  
   4.577 -            // b is in the list
   4.578 -            *ob=b->next;
   4.579 -            b->next = m->shadow_ht_free;
   4.580 -            m->shadow_ht_free = b;
   4.581 +            /* Add deleted node to the free list. */
   4.582 +            x->pfn            = 0;
   4.583 +            x->spfn_and_flags = 0;
   4.584 +            x->next           = m->shadow_ht_free;
   4.585 +            m->shadow_ht_free = x;
   4.586  
   4.587 -#if SHADOW_HASH_DEBUG
   4.588 -            if( __shadow_status(m,gpfn) ) BUG();
   4.589 -#endif
   4.590 -            shadow_audit(m,0);
   4.591 -            return;
   4.592 +            goto found;
   4.593          }
   4.594  
   4.595 -        ob = &b->next;
   4.596 -        b=b->next;
   4.597 +        p = x;
   4.598 +        x = x->next;
   4.599      }
   4.600 -    while (b);
   4.601 +    while ( x != NULL );
   4.602  
   4.603 -    // if we got here, it wasn't in the list
   4.604 +    /* If we got here, it wasn't in the list! */
   4.605      BUG();
   4.606 +
   4.607 + found:
   4.608 +    shadow_audit(m, 0);
   4.609  }
   4.610  
   4.611  
   4.612 -static inline void set_shadow_status( struct mm_struct *m,
   4.613 -                                      unsigned int gpfn, unsigned long s )
   4.614 +static inline void set_shadow_status(
   4.615 +    struct mm_struct *m, unsigned int gpfn, unsigned long s)
   4.616  {
   4.617 -    struct shadow_status *b, *B, *extra, **fptr;
   4.618 +    struct shadow_status *x, *head, *extra;
   4.619      int i;
   4.620  
   4.621      ASSERT(spin_is_locked(&m->shadow_lock));
   4.622 +    ASSERT(gpfn != 0);
   4.623 +    ASSERT(s & PSH_shadowed);
   4.624  
   4.625 -    B = b = hash_bucket( m, gpfn );
   4.626 +    x = head = hash_bucket(m, gpfn);
   4.627     
   4.628 -    ASSERT(gpfn);
   4.629 -    SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next );
   4.630 +    SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next);
   4.631 +    shadow_audit(m, 0);
   4.632  
   4.633 -    shadow_audit(m,0);
   4.634 +    /*
   4.635 +     * STEP 1. If page is already in the table, update it in place.
   4.636 +     */
   4.637  
   4.638      do
   4.639      {
   4.640 -        if ( b->pfn == gpfn )
   4.641 +        if ( x->pfn == gpfn )
   4.642          {
   4.643 -            b->spfn_and_flags = s;
   4.644 -            shadow_audit(m,0);
   4.645 -            return;
   4.646 +            x->spfn_and_flags = s;
   4.647 +            goto done;
   4.648          }
   4.649  
   4.650 -        b=b->next;
   4.651 +        x = x->next;
   4.652      }
   4.653 -    while (b);
   4.654 +    while ( x != NULL );
   4.655  
   4.656 -    // if we got here, this is an insert rather than update
   4.657 -
   4.658 -    ASSERT( s );  // deletes must have succeeded by here
   4.659 +    /*
   4.660 +     * STEP 2. The page must be inserted into the table.
   4.661 +     */
   4.662  
   4.663 -    if ( B->pfn == 0 )
   4.664 +    /* If the bucket is empty then insert the new page as the head item. */
   4.665 +    if ( head->pfn == 0 )
   4.666      {
   4.667 -        // we can use this head
   4.668 -        ASSERT( B->next == 0 );
   4.669 -        B->pfn = gpfn;
   4.670 -        B->spfn_and_flags = s;
   4.671 -        shadow_audit(m,0);
   4.672 -        return;
   4.673 +        head->pfn            = gpfn;
   4.674 +        head->spfn_and_flags = s;
   4.675 +        ASSERT(head->next == NULL);
   4.676 +        goto done;
   4.677      }
   4.678  
   4.679 -    if( unlikely(m->shadow_ht_free == NULL) )
   4.680 +    /* We need to allocate a new node. Ensure the quicklist is non-empty. */
   4.681 +    if ( unlikely(m->shadow_ht_free == NULL) )
   4.682      {
   4.683 -        SH_LOG("allocate more shadow hashtable blocks");
   4.684 +        SH_LOG("Allocate more shadow hashtable blocks.");
   4.685 +
   4.686 +        extra = xmalloc(
   4.687 +            sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
   4.688  
   4.689 -        // we need to allocate more space
   4.690 -        extra = xmalloc(sizeof(void*) + (shadow_ht_extra_size * 
   4.691 -                                         sizeof(struct shadow_status)));
   4.692 +        /* XXX Should be more graceful here. */
   4.693 +        if ( extra == NULL )
   4.694 +            BUG();
   4.695  
   4.696 -        if( ! extra ) BUG(); // should be more graceful here....
   4.697 +        memset(extra, 0, sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
   4.698  
   4.699 -        memset(extra, 0, sizeof(void*) + (shadow_ht_extra_size * 
   4.700 -                                          sizeof(struct shadow_status)));
   4.701 -
   4.702 +        /* Record the allocation block so it can be correctly freed later. */
   4.703          m->shadow_extras_count++;
   4.704 +        *((struct shadow_status **)&extra[shadow_ht_extra_size]) = 
   4.705 +            m->shadow_ht_extras;
   4.706 +        m->shadow_ht_extras = &extra[0];
   4.707  
   4.708 -        // add extras to free list
   4.709 -        fptr = &m->shadow_ht_free;
   4.710 -        for ( i=0; i<shadow_ht_extra_size; i++ )
   4.711 -        {
   4.712 -            *fptr = &extra[i];
   4.713 -            fptr = &(extra[i].next);
   4.714 -        }
   4.715 -        *fptr = NULL;
   4.716 +        /* Thread a free chain through the newly-allocated nodes. */
   4.717 +        for ( i = 0; i < (shadow_ht_extra_size - 1); i++ )
   4.718 +            extra[i].next = &extra[i+1];
   4.719 +        extra[i].next = NULL;
   4.720  
   4.721 -        *((struct shadow_status ** ) &extra[shadow_ht_extra_size]) = 
   4.722 -            m->shadow_ht_extras;
   4.723 -        m->shadow_ht_extras = extra;
   4.724 -
   4.725 +        /* Add the new nodes to the free list. */
   4.726 +        m->shadow_ht_free = &extra[0];
   4.727      }
   4.728  
   4.729 -    // should really put this in B to go right to front
   4.730 -    b = m->shadow_ht_free;
   4.731 -    m->shadow_ht_free = b->next;
   4.732 -    b->spfn_and_flags = s;
   4.733 -    b->pfn = gpfn;
   4.734 -    b->next = B->next;
   4.735 -    B->next = b;
   4.736 +    /* Allocate a new node from the quicklist. */
   4.737 +    x                 = m->shadow_ht_free;
   4.738 +    m->shadow_ht_free = x->next;
   4.739  
   4.740 -    shadow_audit(m,0);
   4.741 +    /* Initialise the new node and insert directly after the head item. */
   4.742 +    x->pfn            = gpfn;
   4.743 +    x->spfn_and_flags = s;
   4.744 +    x->next           = head->next;
   4.745 +    head->next        = x;
   4.746  
   4.747 -    return;
   4.748 + done:
   4.749 +    shadow_audit(m, 0);
   4.750  }
   4.751  
   4.752 -static inline void __shadow_mk_pagetable( struct mm_struct *mm )
   4.753 +static inline void __shadow_mk_pagetable(struct mm_struct *mm)
   4.754  {
   4.755 -    unsigned long gpfn, spfn=0;
   4.756 -
   4.757 -    gpfn =  pagetable_val(mm->pagetable) >> PAGE_SHIFT;
   4.758 +    unsigned long gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
   4.759 +    unsigned long spfn = __shadow_status(mm, gpfn);
   4.760  
   4.761 -    if ( unlikely((spfn=__shadow_status(mm, gpfn)) == 0 ) )
   4.762 -    {
   4.763 -        spfn = shadow_l2_table(mm, gpfn );
   4.764 -    }      
   4.765 -    mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT);
   4.766 +    if ( unlikely(spfn == 0) )
   4.767 +        spfn = shadow_l2_table(mm, gpfn);
   4.768 +
   4.769 +    mm->shadow_table = mk_pagetable(spfn << PAGE_SHIFT);
   4.770  }
   4.771  
   4.772 -static inline void shadow_mk_pagetable( struct mm_struct *mm )
   4.773 +static inline void shadow_mk_pagetable(struct mm_struct *mm)
   4.774  {
   4.775      SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
   4.776               pagetable_val(mm->pagetable), mm->shadow_mode );
   4.777  
   4.778      if ( unlikely(mm->shadow_mode) )
   4.779      {
   4.780 -        //ASSERT(local_irq_is_enabled());
   4.781          shadow_lock(mm);
   4.782          __shadow_mk_pagetable(mm);
   4.783          shadow_unlock(mm);
   4.784 @@ -602,17 +573,12 @@ static inline void shadow_mk_pagetable( 
   4.785      SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx",
   4.786               pagetable_val(mm->pagetable), mm->shadow_mode, 
   4.787               pagetable_val(mm->shadow_table) );
   4.788 -
   4.789  }
   4.790  
   4.791 -
   4.792  #if SHADOW_DEBUG
   4.793  extern int check_pagetable(struct mm_struct *m, pagetable_t pt, char *s);
   4.794  #else
   4.795  #define check_pagetable(m, pt, s) ((void)0)
   4.796  #endif
   4.797  
   4.798 -
   4.799  #endif /* XEN_SHADOW_H */
   4.800 -
   4.801 -
     5.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h	Wed Oct 13 03:33:39 2004 +0000
     5.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h	Wed Oct 13 14:25:21 2004 +0000
     5.3 @@ -19,7 +19,7 @@
     5.4   * This makes sure that old versions of dom0 tools will stop working in a
     5.5   * well-defined way (rather than crashing the machine, for instance).
     5.6   */
     5.7 -#define DOM0_INTERFACE_VERSION   0xAAAA0015
     5.8 +#define DOM0_INTERFACE_VERSION   0xAAAA0016
     5.9  
    5.10  #define MAX_DOMAIN_NAME    16
    5.11  
    5.12 @@ -275,7 +275,6 @@ typedef struct {
    5.13  #define DOM0_SHADOW_CONTROL_OP_FLUSH       10     /* table ops */
    5.14  #define DOM0_SHADOW_CONTROL_OP_CLEAN       11
    5.15  #define DOM0_SHADOW_CONTROL_OP_PEEK        12
    5.16 -#define DOM0_SHADOW_CONTROL_OP_CLEAN2      13
    5.17  
    5.18  typedef struct dom0_shadow_control
    5.19  {
     6.1 --- a/xen/include/xen/lib.h	Wed Oct 13 03:33:39 2004 +0000
     6.2 +++ b/xen/include/xen/lib.h	Wed Oct 13 14:25:21 2004 +0000
     6.3 @@ -11,6 +11,9 @@
     6.4  #define ASSERT(_p) ((void)0)
     6.5  #endif
     6.6  
     6.7 +#define SWAP(_a, _b) \
     6.8 +   do { typeof(_a) _t = (_a); (_a) = (_b); (_b) = _t; } while ( 0 )
     6.9 +
    6.10  #define reserve_bootmem(_p,_l) \
    6.11  printk("Memory Reservation 0x%lx, %lu bytes\n", (_p), (_l))
    6.12