ia64/xen-unstable

changeset 1260:ae024a256b34

bitkeeper revision 1.832 (4069e6efDAxnvoZE5ananXkWGDiyig)

shadow tables code refactoring stage 1
author iap10@labyrinth.cl.cam.ac.uk
date Tue Mar 30 21:30:23 2004 +0000 (2004-03-30)
parents a0d70d73a737
children b9b7a37dc588
files xen/common/shadow.c xen/include/asm-i386/processor.h
line diff
     1.1 --- a/xen/common/shadow.c	Tue Mar 30 15:44:27 2004 +0000
     1.2 +++ b/xen/common/shadow.c	Tue Mar 30 21:30:23 2004 +0000
     1.3 @@ -1,4 +1,4 @@
     1.4 -/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*- */
     1.5 +/* -*-  Mode:C++; c-set-style:BSD; c-basic-offset:4; tab-width:4 -*- */
     1.6  
     1.7  #include <xen/config.h>
     1.8  #include <xen/types.h>
     1.9 @@ -30,19 +30,18 @@ static inline void free_shadow_page( str
    1.10  									 struct pfn_info *pfn_info )
    1.11  {
    1.12      unsigned long flags;
    1.13 -	unsigned long type = pfn_info->type_and_flags & PGT_type_mask;
    1.14 +    unsigned long type = pfn_info->type_and_flags & PGT_type_mask;
    1.15  
    1.16 -	m->shadow_page_count--;
    1.17 +    m->shadow_page_count--;
    1.18  
    1.19 -	if (type == PGT_l1_page_table)
    1.20 +    if (type == PGT_l1_page_table)
    1.21  		perfc_decr(shadow_l1_pages);
    1.22      else if (type == PGT_l2_page_table)
    1.23  		perfc_decr(shadow_l2_pages);
    1.24 -	else printk("Free shadow weird page type pfn=%08x type=%08lx\n",
    1.25 +    else printk("Free shadow weird page type pfn=%08x type=%08lx\n",
    1.26  				frame_table-pfn_info, pfn_info->type_and_flags);
    1.27  				
    1.28 -
    1.29 -	pfn_info->type_and_flags = 0;
    1.30 +    pfn_info->type_and_flags = 0;
    1.31  
    1.32      spin_lock_irqsave(&free_list_lock, flags);
    1.33      list_add(&pfn_info->list, &free_list);
    1.34 @@ -52,210 +51,218 @@ static inline void free_shadow_page( str
    1.35  
    1.36  static void __free_shadow_table( struct mm_struct *m )
    1.37  {
    1.38 -	int j, free=0;
    1.39 -	struct shadow_status *a,*next;
    1.40 +    int j, free=0;
    1.41 +    struct shadow_status *a,*next;
    1.42  	
    1.43 -	// the code assumes you're not using the page tables i.e.
    1.44 +    // the code assumes you're not using the page tables i.e.
    1.45      // the domain is stopped and cr3 is something else!!
    1.46  
    1.47      // walk the hash table and call free_shadow_page on all pages
    1.48  
    1.49 -	shadow_audit(m,1);
    1.50 +    shadow_audit(m,1);
    1.51  
    1.52      for(j=0;j<shadow_ht_buckets;j++)
    1.53 -    {
    1.54 -        a = &m->shadow_ht[j];        
    1.55 -        if (a->pfn)
    1.56 -        {
    1.57 -            free_shadow_page( m, 
    1.58 -                          &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
    1.59 -            a->pfn = 0;
    1.60 -            a->spfn_and_flags = 0;
    1.61 -            free++;
    1.62 +	{
    1.63 +		a = &m->shadow_ht[j];        
    1.64 +		if (a->pfn)
    1.65 +		{
    1.66 +			free_shadow_page( m, 
    1.67 +							  &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
    1.68 +			a->pfn = 0;
    1.69 +			a->spfn_and_flags = 0;
    1.70 +			free++;
    1.71          }
    1.72 -        next=a->next;
    1.73 -        a->next=NULL;
    1.74 -        a=next;
    1.75 -        while(a)
    1.76 +		next=a->next;
    1.77 +		a->next=NULL;
    1.78 +		a=next;
    1.79 +		while(a)
    1.80  		{ 
    1.81 -            struct shadow_status *next = a->next;
    1.82 +			struct shadow_status *next = a->next;
    1.83  
    1.84 -            free_shadow_page( m, 
    1.85 -                          &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
    1.86 -            a->pfn = 0;
    1.87 -            a->spfn_and_flags = 0;
    1.88 -            free++;
    1.89 -            a->next = m->shadow_ht_free;           
    1.90 -            m->shadow_ht_free = a;
    1.91 -            a=next;
    1.92 +			free_shadow_page( m, 
    1.93 +							  &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
    1.94 +			a->pfn = 0;
    1.95 +			a->spfn_and_flags = 0;
    1.96 +			free++;
    1.97 +			a->next = m->shadow_ht_free;           
    1.98 +			m->shadow_ht_free = a;
    1.99 +			a=next;
   1.100  		}
   1.101 -	shadow_audit(m,0);
   1.102 -	}
   1.103 -   SH_LOG("Free shadow table. Freed= %d",free);
   1.104 +		shadow_audit(m,0);
   1.105 +    }
   1.106 +    SH_LOG("Free shadow table. Freed= %d",free);
   1.107  }
   1.108  
   1.109  static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
   1.110 -                                   struct pfn_info *spfn_info )
   1.111 +								  struct pfn_info *spfn_info )
   1.112  {
   1.113      int work = 0;
   1.114      unsigned int spfn = spfn_info-frame_table;
   1.115  
   1.116      switch( op )
   1.117      {
   1.118 -        case DOM0_SHADOW_CONTROL_OP_CLEAN:
   1.119 -        {
   1.120 -            int i;
   1.121 -            if ( (spfn_info->type_and_flags & PGT_type_mask) == 
   1.122 -                                                      PGT_l1_page_table )
   1.123 -            {
   1.124 -                unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
   1.125 +    case DOM0_SHADOW_CONTROL_OP_CLEAN:
   1.126 +    {
   1.127 +		int i;
   1.128 +		if ( (spfn_info->type_and_flags & PGT_type_mask) == 
   1.129 +			 PGT_l1_page_table )
   1.130 +		{
   1.131 +			unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
   1.132  
   1.133 -                for (i=0;i<ENTRIES_PER_L1_PAGETABLE;i++)
   1.134 -                {                    
   1.135 -                    if ( spl1e[i] & _PAGE_RW )
   1.136 -                    {
   1.137 -                        work++;
   1.138 -                        spl1e[i] &= ~_PAGE_RW;
   1.139 -                    }
   1.140 -                }
   1.141 -                unmap_domain_mem( spl1e );
   1.142 -            }
   1.143 -        }
   1.144 +			for (i=0;i<ENTRIES_PER_L1_PAGETABLE;i++)
   1.145 +			{                    
   1.146 +				if ( spl1e[i] & _PAGE_RW )
   1.147 +				{
   1.148 +					work++;
   1.149 +					spl1e[i] &= ~_PAGE_RW;
   1.150 +				}
   1.151 +			}
   1.152 +			unmap_domain_mem( spl1e );
   1.153 +		}
   1.154 +    }
   1.155      }
   1.156      return work;
   1.157  }
   1.158  static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
   1.159  {
   1.160 -	int j, work=0;
   1.161 -	struct shadow_status *a;
   1.162 +    int j, work=0;
   1.163 +    struct shadow_status *a;
   1.164  	
   1.165 -	// the code assumes you're not using the page tables i.e.
   1.166 +    // the code assumes you're not using the page tables i.e.
   1.167      // the domain is stopped and cr3 is something else!!
   1.168  
   1.169      // walk the hash table and call free_shadow_page on all pages
   1.170  
   1.171 -	shadow_audit(m,1);
   1.172 +    shadow_audit(m,1);
   1.173  
   1.174      for(j=0;j<shadow_ht_buckets;j++)
   1.175      {
   1.176 -        a = &m->shadow_ht[j];        
   1.177 -        if (a->pfn)
   1.178 +		a = &m->shadow_ht[j];        
   1.179 +		if (a->pfn)
   1.180          {
   1.181 -            work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
   1.182 +			work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
   1.183          }
   1.184 -        a=a->next;
   1.185 -        while(a)
   1.186 +		a=a->next;
   1.187 +		while(a)
   1.188  		{ 
   1.189 -            work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
   1.190 -            a=a->next;
   1.191 +			work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
   1.192 +			a=a->next;
   1.193  		}
   1.194 -	shadow_audit(m,0);
   1.195 -	}
   1.196 -   SH_LOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
   1.197 +		shadow_audit(m,0);
   1.198 +    }
   1.199 +    SH_LOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
   1.200  }
   1.201  
   1.202  
   1.203  int shadow_mode_enable( struct task_struct *p, unsigned int mode )
   1.204  {
   1.205      struct mm_struct *m = &p->mm;
   1.206 -	struct shadow_status **fptr;
   1.207 -	int i;
   1.208 +    struct shadow_status **fptr;
   1.209 +    int i;
   1.210  
   1.211  
   1.212 -	spin_lock_init(&m->shadow_lock);
   1.213 -	spin_lock(&m->shadow_lock);
   1.214 +    spin_lock_init(&m->shadow_lock);
   1.215 +    spin_lock(&m->shadow_lock);
   1.216  
   1.217      m->shadow_mode = mode;
   1.218  	
   1.219 -	// allocate hashtable
   1.220 +    // allocate hashtable
   1.221      m->shadow_ht = kmalloc( shadow_ht_buckets * 
   1.222 -							   sizeof(struct shadow_status), GFP_KERNEL );
   1.223 -	if( ! m->shadow_ht )
   1.224 +							sizeof(struct shadow_status), GFP_KERNEL );
   1.225 +    if( ! m->shadow_ht )
   1.226  		goto nomem;
   1.227  
   1.228 -	memset( m->shadow_ht, 0, shadow_ht_buckets * 
   1.229 -							   sizeof(struct shadow_status) );
   1.230 +    memset( m->shadow_ht, 0, shadow_ht_buckets * 
   1.231 +			sizeof(struct shadow_status) );
   1.232  
   1.233  
   1.234 -	// allocate space for first lot of extra nodes
   1.235 +    // allocate space for first lot of extra nodes
   1.236      m->shadow_ht_extras = kmalloc( sizeof(void*) + (shadow_ht_extra_size * 
   1.237 -							   sizeof(struct shadow_status)), GFP_KERNEL );
   1.238 +													sizeof(struct shadow_status)), GFP_KERNEL );
   1.239  
   1.240 -	if( ! m->shadow_ht_extras )
   1.241 +    if( ! m->shadow_ht_extras )
   1.242  		goto nomem;
   1.243  
   1.244 -	memset( m->shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size * 
   1.245 -							   sizeof(struct shadow_status)) );
   1.246 +    memset( m->shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size * 
   1.247 +													 sizeof(struct shadow_status)) );
   1.248  
   1.249      m->shadow_extras_count++;
   1.250  	
   1.251      // add extras to free list
   1.252 -	fptr = &m->shadow_ht_free;
   1.253 -	for ( i=0; i<shadow_ht_extra_size; i++ )
   1.254 -	{
   1.255 +    fptr = &m->shadow_ht_free;
   1.256 +    for ( i=0; i<shadow_ht_extra_size; i++ )
   1.257 +    {
   1.258  		*fptr = &m->shadow_ht_extras[i];
   1.259  		fptr = &(m->shadow_ht_extras[i].next);
   1.260 -	}
   1.261 -	*fptr = NULL;
   1.262 -	*((struct shadow_status ** ) 
   1.263 -        &m->shadow_ht_extras[shadow_ht_extra_size]) = NULL;
   1.264 +    }
   1.265 +    *fptr = NULL;
   1.266 +    *((struct shadow_status ** ) 
   1.267 +      &m->shadow_ht_extras[shadow_ht_extra_size]) = NULL;
   1.268  
   1.269      if ( mode == SHM_logdirty )
   1.270      {
   1.271 -        m->shadow_dirty_bitmap = kmalloc( p->max_pages/8, GFP_KERNEL );
   1.272 -        if( !m->shadow_dirty_bitmap  ) goto nomem;
   1.273 -        memset(m->shadow_dirty_bitmap,0,p->max_pages/8);
   1.274 +		m->shadow_dirty_bitmap_size = (p->max_pages+63)&(~63);
   1.275 +		m->shadow_dirty_bitmap = 
   1.276 +			kmalloc( m->shadow_dirty_bitmap_size/8, GFP_KERNEL );
   1.277 +
   1.278 +		if( !m->shadow_dirty_bitmap  )
   1.279 +		{
   1.280 +			m->shadow_dirty_bitmap_size = 0;
   1.281 +			goto nomem;
   1.282 +		}
   1.283 +		memset(m->shadow_dirty_bitmap,0,m->shadow_dirty_bitmap_size/8);
   1.284      }
   1.285  
   1.286 -	spin_unlock(&m->shadow_lock);
   1.287 +    spin_unlock(&m->shadow_lock);
   1.288  
   1.289      // call shadow_mk_pagetable
   1.290 -	shadow_mk_pagetable( m );
   1.291 +    shadow_mk_pagetable( m );
   1.292  
   1.293 -	return 0;
   1.294 +    return 0;
   1.295  
   1.296  nomem:
   1.297 -	spin_unlock(&m->shadow_lock);
   1.298 -	return -ENOMEM;
   1.299 +    spin_unlock(&m->shadow_lock);
   1.300 +    return -ENOMEM;
   1.301  }
   1.302  
   1.303  static void shadow_mode_disable( struct task_struct *p )
   1.304  {
   1.305      struct mm_struct *m = &p->mm;
   1.306 -	struct shadow_status *next;
   1.307 +    struct shadow_status *next;
   1.308  
   1.309      spin_lock(&m->shadow_lock);
   1.310 -	__free_shadow_table( m );
   1.311 -	m->shadow_mode = 0;
   1.312 -	spin_unlock(&m->shadow_lock);
   1.313 +    __free_shadow_table( m );
   1.314 +    m->shadow_mode = 0;
   1.315 +    spin_unlock(&m->shadow_lock);
   1.316  
   1.317 -	SH_LOG("freed tables count=%d l1=%d l2=%d",
   1.318 +    SH_LOG("freed tables count=%d l1=%d l2=%d",
   1.319  		   m->shadow_page_count, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
   1.320  
   1.321 -	next = m->shadow_ht_extras;
   1.322 -	while( next )
   1.323 +    next = m->shadow_ht_extras;
   1.324 +    while( next )
   1.325      {
   1.326  		struct shadow_status * this = next;
   1.327  		m->shadow_extras_count--;
   1.328  		next = *((struct shadow_status **)(&next[shadow_ht_extra_size]));
   1.329  		kfree( this );
   1.330 -	}
   1.331 +    }
   1.332  
   1.333 -	SH_LOG("freed extras, now %d", m->shadow_extras_count);
   1.334 +    SH_LOG("freed extras, now %d", m->shadow_extras_count);
   1.335  
   1.336      if( m->shadow_dirty_bitmap  )
   1.337      {
   1.338 -        kfree( m->shadow_dirty_bitmap );
   1.339 +		kfree( m->shadow_dirty_bitmap );
   1.340  		m->shadow_dirty_bitmap = 0;
   1.341 +		m->shadow_dirty_bitmap_size = 0;
   1.342      }
   1.343  
   1.344      // free the hashtable itself
   1.345 -	kfree( &m->shadow_ht[0] );
   1.346 +    kfree( &m->shadow_ht[0] );
   1.347  }
   1.348  
   1.349  static void shadow_mode_table_op( struct task_struct *p, unsigned int op )
   1.350  {
   1.351 -	struct mm_struct *m = &p->mm;
   1.352 +    struct mm_struct *m = &p->mm;
   1.353  
   1.354      // since Dom0 did the hypercall, we should be running with it's page
   1.355      // tables right now. Calling flush on yourself would be really
   1.356 @@ -263,8 +270,8 @@ static void shadow_mode_table_op( struct
   1.357  
   1.358      if ( m == &current->mm )
   1.359      {
   1.360 -        printk("Don't try and flush your own page tables!\n");
   1.361 -        return;
   1.362 +		printk("Don't try and flush your own page tables!\n");
   1.363 +		return;
   1.364      }
   1.365     
   1.366  
   1.367 @@ -277,17 +284,17 @@ static void shadow_mode_table_op( struct
   1.368      switch(op)
   1.369      {
   1.370      case DOM0_SHADOW_CONTROL_OP_FLUSH:
   1.371 -	    __free_shadow_table( m );
   1.372 -        break;
   1.373 +		__free_shadow_table( m );
   1.374 +		break;
   1.375     
   1.376      case DOM0_SHADOW_CONTROL_OP_CLEAN:
   1.377 -       __scan_shadow_table( m, op );
   1.378 -       if( m->shadow_dirty_bitmap )
   1.379 -           memset(m->shadow_dirty_bitmap,0,p->max_pages/8);
   1.380 -       break;
   1.381 +		__scan_shadow_table( m, op );
   1.382 +		if( m->shadow_dirty_bitmap )
   1.383 +			memset(m->shadow_dirty_bitmap,0,m->shadow_dirty_bitmap_size/8);
   1.384 +		break;
   1.385      }
   1.386  
   1.387 -	spin_unlock(&m->shadow_lock);
   1.388 +    spin_unlock(&m->shadow_lock);
   1.389  
   1.390      SH_LOG("shadow mode table op : page count %d", m->shadow_page_count);
   1.391  
   1.392 @@ -305,140 +312,214 @@ int shadow_mode_control( struct task_str
   1.393   
   1.394      // don't call if already shadowed...
   1.395  
   1.396 -	// sychronously stop domain
   1.397 +    // sychronously stop domain
   1.398      if( 0 && !(p->state & TASK_STOPPED) && !(p->state & TASK_PAUSED))
   1.399      {
   1.400 -        printk("about to pause domain\n");
   1.401 -	    sched_pause_sync(p);
   1.402 -        printk("paused domain\n");
   1.403 -        we_paused = 1;
   1.404 +		printk("about to pause domain\n");
   1.405 +		sched_pause_sync(p);
   1.406 +		printk("paused domain\n");
   1.407 +		we_paused = 1;
   1.408      }
   1.409  
   1.410 -	if ( p->mm.shadow_mode && op == DOM0_SHADOW_CONTROL_OP_OFF )
   1.411 +    if ( p->mm.shadow_mode && op == DOM0_SHADOW_CONTROL_OP_OFF )
   1.412      {
   1.413  		shadow_mode_disable(p);
   1.414 -	}
   1.415 -	else if ( op == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
   1.416 -	{
   1.417 -        if(p->mm.shadow_mode) shadow_mode_disable(p);
   1.418 -        shadow_mode_enable(p, SHM_test);
   1.419 -	}	
   1.420 -	else if ( p->mm.shadow_mode && op >= DOM0_SHADOW_CONTROL_OP_FLUSH && op<=DOM0_SHADOW_CONTROL_OP_CLEAN )
   1.421 +    }
   1.422 +    else if ( op == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
   1.423 +    {
   1.424 +		if(p->mm.shadow_mode) shadow_mode_disable(p);
   1.425 +		shadow_mode_enable(p, SHM_test);
   1.426 +    }	
   1.427 +    else if ( p->mm.shadow_mode && op >= DOM0_SHADOW_CONTROL_OP_FLUSH && op<=DOM0_SHADOW_CONTROL_OP_CLEAN )
   1.428      {
   1.429  		shadow_mode_table_op(p, op);
   1.430      }
   1.431 -	else
   1.432 +    else
   1.433      {
   1.434 -        if ( we_paused ) wake_up(p);
   1.435 +		if ( we_paused ) wake_up(p);
   1.436  		return -EINVAL;
   1.437      }
   1.438  
   1.439      if ( we_paused ) wake_up(p);
   1.440 -	return 0;
   1.441 +    return 0;
   1.442  }
   1.443  
   1.444  
   1.445  
   1.446  static inline struct pfn_info *alloc_shadow_page( struct mm_struct *m )
   1.447  {
   1.448 -	m->shadow_page_count++;
   1.449 +    m->shadow_page_count++;
   1.450 +
   1.451 +    return alloc_domain_page( NULL );
   1.452 +}
   1.453 +
   1.454 +/************************************************************************/
   1.455 +
   1.456 +static inline void l1pte_write_fault( struct mm_struct *m, 
   1.457 +									  unsigned long *gpte_p, unsigned long *spte_p )
   1.458 +{ 
   1.459 +    unsigned long gpte = *gpte_p;
   1.460 +    unsigned long spte = *spte_p;
   1.461  
   1.462 -	return alloc_domain_page( NULL );
   1.463 +    switch( m->shadow_mode )
   1.464 +    {
   1.465 +    case SHM_test:
   1.466 +		spte = gpte;
   1.467 +		gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
   1.468 +		spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; 			
   1.469 +		break;
   1.470 +
   1.471 +    case SHM_logdirty:
   1.472 +		break;
   1.473 +    }
   1.474 +
   1.475 +    *gpte_p = gpte;
   1.476 +    *spte_p = spte;
   1.477  }
   1.478  
   1.479 +static inline void l1pte_read_fault( struct mm_struct *m, 
   1.480 +									 unsigned long *gpte_p, unsigned long *spte_p )
   1.481 +{ 
   1.482 +    unsigned long gpte = *gpte_p;
   1.483 +    unsigned long spte = *spte_p;
   1.484  
   1.485 +    switch( m->shadow_mode )
   1.486 +    {
   1.487 +    case SHM_test:
   1.488 +		spte = gpte;
   1.489 +		gpte |= _PAGE_ACCESSED;
   1.490 +		spte |= _PAGE_ACCESSED; 			
   1.491 +		if ( ! (gpte & _PAGE_DIRTY ) )
   1.492 +			spte &= ~ _PAGE_RW;
   1.493 +		break;
   1.494 +
   1.495 +    case SHM_logdirty:
   1.496 +		break;
   1.497 +    }
   1.498 +
   1.499 +    *gpte_p = gpte;
   1.500 +    *spte_p = spte;
   1.501 +}
   1.502 +
   1.503 +static inline void l1pte_no_fault( struct mm_struct *m, 
   1.504 +								   unsigned long *gpte_p, unsigned long *spte_p )
   1.505 +{ 
   1.506 +    unsigned long gpte = *gpte_p;
   1.507 +    unsigned long spte = *spte_p;
   1.508 +
   1.509 +    switch( m->shadow_mode )
   1.510 +    {
   1.511 +    case SHM_test:
   1.512 +		spte = 0;
   1.513 +		if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
   1.514 +			 (_PAGE_PRESENT|_PAGE_ACCESSED) )
   1.515 +		{
   1.516 +			if ( ! (gpte & _PAGE_DIRTY ) )
   1.517 +				spte &= ~ _PAGE_RW;
   1.518 +		}
   1.519 +		break;
   1.520 +
   1.521 +    case SHM_logdirty:
   1.522 +		break;
   1.523 +    }
   1.524 +
   1.525 +    *gpte_p = gpte;
   1.526 +    *spte_p = spte;
   1.527 +}
   1.528 +
   1.529 +/*********************************************************************/
   1.530  
   1.531  void unshadow_table( unsigned long gpfn, unsigned int type )
   1.532  {
   1.533 -	unsigned long spfn;
   1.534 +    unsigned long spfn;
   1.535  
   1.536      SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
   1.537 -		 type,
   1.538 -		 gpfn );
   1.539 +			type,
   1.540 +			gpfn );
   1.541  
   1.542 -	perfc_incrc(unshadow_table_count);
   1.543 +    perfc_incrc(unshadow_table_count);
   1.544  
   1.545 -	// this function is the same for both l1 and l2 tables
   1.546 +    // this function is the same for both l1 and l2 tables
   1.547  
   1.548 -	// even in the SMP guest case, there won't be a race here as
   1.549 +    // even in the SMP guest case, there won't be a race here as
   1.550      // this CPU was the one that cmpxchg'ed the page to invalid
   1.551  
   1.552 -	spfn = __shadow_status(&current->mm, gpfn) & PSH_pfn_mask;
   1.553 +    spfn = __shadow_status(&current->mm, gpfn) & PSH_pfn_mask;
   1.554  
   1.555 -	delete_shadow_status(&current->mm, gpfn);
   1.556 +    delete_shadow_status(&current->mm, gpfn);
   1.557  
   1.558  #if 0 // XXX leave as might be useful for later debugging
   1.559 -	{ 
   1.560 +    { 
   1.561  		int i;
   1.562  		unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
   1.563  
   1.564  		for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   1.565 -	        {
   1.566 -				spl1e[i] = 0xdead0000;
   1.567 -			}
   1.568 +		{
   1.569 +			spl1e[i] = 0xdead0000;
   1.570 +		}
   1.571  		unmap_domain_mem( spl1e );
   1.572 -	}
   1.573 +    }
   1.574  #endif
   1.575  
   1.576 -	free_shadow_page( &current->mm, &frame_table[spfn] );
   1.577 +    free_shadow_page( &current->mm, &frame_table[spfn] );
   1.578  
   1.579  }
   1.580  
   1.581  
   1.582  unsigned long shadow_l2_table( 
   1.583 -                     struct mm_struct *m, unsigned long gpfn )
   1.584 +    struct mm_struct *m, unsigned long gpfn )
   1.585  {
   1.586 -	struct pfn_info *spfn_info;
   1.587 -	unsigned long spfn;
   1.588 -	l2_pgentry_t *spl2e, *gpl2e;
   1.589 -	int i;
   1.590 +    struct pfn_info *spfn_info;
   1.591 +    unsigned long spfn;
   1.592 +    l2_pgentry_t *spl2e, *gpl2e;
   1.593 +    int i;
   1.594  
   1.595 -	SH_VVLOG("shadow_l2_table( %08lx )",gpfn);
   1.596 +    SH_VVLOG("shadow_l2_table( %08lx )",gpfn);
   1.597  
   1.598 -	perfc_incrc(shadow_l2_table_count);
   1.599 +    perfc_incrc(shadow_l2_table_count);
   1.600  
   1.601      // XXX in future, worry about racing in SMP guests 
   1.602      //      -- use cmpxchg with PSH_pending flag to show progress (and spin)
   1.603  
   1.604 -	spfn_info = alloc_shadow_page(m);
   1.605 +    spfn_info = alloc_shadow_page(m);
   1.606  
   1.607      ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache
   1.608  
   1.609 -	spfn_info->type_and_flags = PGT_l2_page_table;
   1.610 -	perfc_incr(shadow_l2_pages);
   1.611 +    spfn_info->type_and_flags = PGT_l2_page_table;
   1.612 +    perfc_incr(shadow_l2_pages);
   1.613  
   1.614 -	spfn = (unsigned long) (spfn_info - frame_table);
   1.615 +    spfn = (unsigned long) (spfn_info - frame_table);
   1.616  
   1.617 -	// mark pfn as being shadowed, update field to point at shadow
   1.618 -	set_shadow_status(m, gpfn, spfn | PSH_shadowed);
   1.619 +    // mark pfn as being shadowed, update field to point at shadow
   1.620 +    set_shadow_status(m, gpfn, spfn | PSH_shadowed);
   1.621  	
   1.622 -	// we need to do this before the linear map is set up
   1.623 -	spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
   1.624 +    // we need to do this before the linear map is set up
   1.625 +    spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
   1.626  
   1.627 -	// get hypervisor and 2x linear PT mapings installed 
   1.628 -	memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
   1.629 -           &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
   1.630 -           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
   1.631 +    // get hypervisor and 2x linear PT mapings installed 
   1.632 +    memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
   1.633 +		   &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
   1.634 +		   HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
   1.635      spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   1.636 -        mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   1.637 +		mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   1.638      spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
   1.639 -        mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   1.640 +		mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
   1.641      spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
   1.642 -        mk_l2_pgentry(__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | 
   1.643 -                      __PAGE_HYPERVISOR);
   1.644 +		mk_l2_pgentry(__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | 
   1.645 +					  __PAGE_HYPERVISOR);
   1.646  
   1.647 -	// can't use the linear map as we may not be in the right PT
   1.648 -	gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
   1.649 +    // can't use the linear map as we may not be in the right PT
   1.650 +    gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
   1.651  
   1.652 -	// proactively create entries for pages that are already shadowed
   1.653 -	for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   1.654 -	{
   1.655 +    // proactively create entries for pages that are already shadowed
   1.656 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
   1.657 +    {
   1.658  		unsigned long spte = 0;
   1.659  
   1.660  #if 0  // Turns out this doesn't really help
   1.661 -        unsigned long gpte;
   1.662 +		unsigned long gpte;
   1.663  
   1.664 -        gpte = l2_pgentry_val(gpl2e[i]);
   1.665 +		gpte = l2_pgentry_val(gpl2e[i]);
   1.666  
   1.667  		if (gpte & _PAGE_PRESENT)
   1.668  		{
   1.669 @@ -450,203 +531,183 @@ unsigned long shadow_l2_table(
   1.670  				if ( unlikely( (__shadow_status(p, gpte>>PAGE_SHIFT) & PGT_type_mask) == PGT_l2_page_table) )
   1.671                  {
   1.672  					printk("Linear mapping detected\n");
   1.673 -  				    spte = gpte & ~_PAGE_RW;
   1.674 +					spte = gpte & ~_PAGE_RW;
   1.675                  }
   1.676  				else
   1.677                  {
   1.678 -  				    spte = ( gpte & ~PAGE_MASK ) | (s_sh<<PAGE_SHIFT) |
   1.679 +					spte = ( gpte & ~PAGE_MASK ) | (s_sh<<PAGE_SHIFT) |
   1.680  						_PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED ;
   1.681  				}
   1.682  				// XXX should probably update guest to ACCESSED|DIRTY too...
   1.683  
   1.684 -		    }
   1.685 +			}
   1.686  
   1.687  		}
   1.688  #endif
   1.689  
   1.690  		spl2e[i] = mk_l2_pgentry( spte );
   1.691  
   1.692 -	}
   1.693 +    }
   1.694  
   1.695 -	// its arguable we should 'preemptively shadow' a few active L1 pages
   1.696 +    // its arguable we should 'preemptively shadow' a few active L1 pages
   1.697      // to avoid taking a string of faults when 'jacking' a running domain
   1.698  
   1.699      unmap_domain_mem( gpl2e );
   1.700      unmap_domain_mem( spl2e );
   1.701  
   1.702 -	SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
   1.703 +    SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
   1.704  
   1.705 -	return spfn;
   1.706 +    return spfn;
   1.707  }
   1.708  
   1.709  
   1.710  int shadow_fault( unsigned long va, long error_code )
   1.711  {
   1.712 -	unsigned long gpte, spte;
   1.713 +    unsigned long gpte, spte;
   1.714 +    struct mm_struct *m = &current->mm;
   1.715  
   1.716 -	SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
   1.717 +    SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
   1.718  
   1.719      check_pagetable( current, current->mm.pagetable, "pre-sf" );
   1.720  
   1.721 -	if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
   1.722 -	{
   1.723 +    if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
   1.724 +    {
   1.725  		SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
   1.726  		return 0;  // propagate to guest
   1.727 -	}
   1.728 +    }
   1.729  
   1.730 -	if ( ! (gpte & _PAGE_PRESENT) )
   1.731 -	{
   1.732 +    if ( ! (gpte & _PAGE_PRESENT) )
   1.733 +    {
   1.734  		SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
   1.735  		return 0;  // we're not going to be able to help
   1.736      }
   1.737  
   1.738      if ( (error_code & 2)  && ! (gpte & _PAGE_RW) )
   1.739      {
   1.740 -	    // write fault on RO page
   1.741 -	    return 0;
   1.742 -	}
   1.743 +		// write fault on RO page
   1.744 +		return 0;
   1.745 +    }
   1.746  
   1.747      spin_lock(&current->mm.shadow_lock);
   1.748      // take the lock and reread gpte
   1.749  
   1.750 -	if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
   1.751 -	{
   1.752 +    if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
   1.753 +    {
   1.754  		SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
   1.755 -		spin_unlock(&current->mm.shadow_lock);
   1.756 +		spin_unlock(&m->shadow_lock);
   1.757  		return 0;  // propagate to guest
   1.758 -	}
   1.759 +    }
   1.760  
   1.761 -	if ( unlikely(!(gpte & _PAGE_PRESENT)) )
   1.762 -	{
   1.763 +    if ( unlikely(!(gpte & _PAGE_PRESENT)) )
   1.764 +    {
   1.765  		SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
   1.766 -		spin_unlock(&current->mm.shadow_lock);
   1.767 +		spin_unlock(&m->shadow_lock);
   1.768  		return 0;  // we're not going to be able to help
   1.769      }
   1.770  
   1.771 -    spte = gpte;
   1.772 -
   1.773 -	if ( error_code & 2  )  
   1.774 -	{  // write fault
   1.775 +    if ( error_code & 2  )  
   1.776 +    {  // write fault
   1.777  		if ( likely(gpte & _PAGE_RW) )
   1.778 -	    {
   1.779 -			gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
   1.780 -			spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; 
   1.781 -            // (we're about to dirty it anyhow...)
   1.782 +		{
   1.783 +			l1pte_write_fault( m, &gpte, &spte );
   1.784  		}
   1.785  		else
   1.786  		{   // write fault on RO page
   1.787 -            SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
   1.788 -            spin_unlock(&current->mm.shadow_lock);
   1.789 +			SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
   1.790 +			spin_unlock(&m->shadow_lock);
   1.791  			return 0; // propagate to guest
   1.792  			// not clear whether we should set accessed bit here...
   1.793  		}
   1.794 -	}
   1.795 -	else
   1.796 -	{
   1.797 -		gpte |= _PAGE_ACCESSED;
   1.798 -        spte |= _PAGE_ACCESSED; // about to happen anyway
   1.799 -		if ( ! (gpte & _PAGE_DIRTY) ) 
   1.800 -			spte &= ~_PAGE_RW;  // force clear unless already dirty
   1.801 -	}
   1.802 +    }
   1.803 +    else
   1.804 +    {
   1.805 +		l1pte_read_fault( m, &gpte, &spte );
   1.806 +    }
   1.807  
   1.808 - 	SH_VVLOG("plan: gpte=%08lx  spte=%08lx", gpte, spte );
   1.809 +    SH_VVLOG("plan: gpte=%08lx  spte=%08lx", gpte, spte );
   1.810  
   1.811 -	// write back updated gpte
   1.812 +    // write back updated gpte
   1.813      // XXX watch out for read-only L2 entries! (not used in Linux)
   1.814 -	if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
   1.815 +    if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
   1.816  		BUG();  // fixme!
   1.817  
   1.818      if ( unlikely( __put_user( spte, (unsigned long*)&shadow_linear_pg_table[va>>PAGE_SHIFT])) )
   1.819 -	{ 
   1.820 +    { 
   1.821  		// failed:
   1.822 -        //  the L1 may not be shadowed, or the L2 entry may be insufficient
   1.823 +		//  the L1 may not be shadowed, or the L2 entry may be insufficient
   1.824  
   1.825  		unsigned long gpde, spde, gl1pfn, sl1pfn;
   1.826  
   1.827 -        SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx  spte=%08lx",gpte,spte );
   1.828 +		SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx  spte=%08lx",gpte,spte );
   1.829  
   1.830 -        gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
   1.831 +		gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
   1.832  
   1.833 -        gl1pfn = gpde>>PAGE_SHIFT;
   1.834 +		gl1pfn = gpde>>PAGE_SHIFT;
   1.835  
   1.836          
   1.837 -        if ( ! (sl1pfn=__shadow_status(&current->mm, gl1pfn) ) )
   1.838 +		if ( ! (sl1pfn=__shadow_status(&current->mm, gl1pfn) ) )
   1.839          {
   1.840 -            // this L1 is NOT already shadowed so we need to shadow it
   1.841 -            struct pfn_info *sl1pfn_info;
   1.842 -            unsigned long *gpl1e, *spl1e;
   1.843 -            int i;
   1.844 -            sl1pfn_info = alloc_shadow_page( &current->mm ); 
   1.845 -  	        sl1pfn_info->type_and_flags = PGT_l1_page_table;
   1.846 +			// this L1 is NOT already shadowed so we need to shadow it
   1.847 +			struct pfn_info *sl1pfn_info;
   1.848 +			unsigned long *gpl1e, *spl1e;
   1.849 +			int i;
   1.850 +			sl1pfn_info = alloc_shadow_page( &current->mm ); 
   1.851 +			sl1pfn_info->type_and_flags = PGT_l1_page_table;
   1.852  
   1.853 -            sl1pfn = sl1pfn_info - frame_table;
   1.854 +			sl1pfn = sl1pfn_info - frame_table;
   1.855  
   1.856 -            SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
   1.857 -	        perfc_incrc(shadow_l1_table_count);
   1.858 -	        perfc_incr(shadow_l1_pages);
   1.859 +			SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
   1.860 +			perfc_incrc(shadow_l1_table_count);
   1.861 +			perfc_incr(shadow_l1_pages);
   1.862  
   1.863 -            set_shadow_status(&current->mm, gl1pfn, PSH_shadowed | sl1pfn);
   1.864 +			set_shadow_status(&current->mm, gl1pfn, PSH_shadowed | sl1pfn);
   1.865  
   1.866 -            gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
   1.867 -            spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<<PAGE_SHIFT);
   1.868 +			gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
   1.869 +			spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<<PAGE_SHIFT);
   1.870          
   1.871  
   1.872 -            linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
   1.873 -            shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =  mk_l2_pgentry(spde);
   1.874 +			linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
   1.875 +			shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =  mk_l2_pgentry(spde);
   1.876  
   1.877 -            gpl1e = (unsigned long *) &(linear_pg_table[
   1.878 -                         (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]);
   1.879 +			gpl1e = (unsigned long *) &(linear_pg_table[
   1.880 +				(va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]);
   1.881  
   1.882 -            spl1e = (unsigned long *) &shadow_linear_pg_table[
   1.883 -                         (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ];
   1.884 +			spl1e = (unsigned long *) &shadow_linear_pg_table[
   1.885 +				(va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ];
   1.886  
   1.887  
   1.888 -			// XXX can only do this is the shadow/guest is writeable
   1.889 -            // disable write protection if ! gpde & _PAGE_RW ????
   1.890 -
   1.891 -            for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   1.892 -	        {
   1.893 -#if SHADOW_OPTIMISE
   1.894 -                if ( (gpl1e[i] & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
   1.895 -                                (_PAGE_PRESENT|_PAGE_ACCESSED) )
   1.896 -                {
   1.897 -                    spl1e[i] = gpl1e[i];
   1.898 -                    if ( !(gpl1e[i] & _PAGE_DIRTY) )
   1.899 -                        spl1e[i] &= ~_PAGE_RW;
   1.900 -                }
   1.901 -                else
   1.902 -#endif
   1.903 -                    spl1e[i] = 0;
   1.904 +			for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
   1.905 +			{
   1.906 +				l1pte_no_fault( m, &gpl1e[i], &spl1e[i] );
   1.907              }
   1.908  
   1.909  
   1.910          }
   1.911 -        else
   1.912 +		else
   1.913          {
   1.914 -            // this L1 was shadowed (by another PT) but we didn't have an L2
   1.915 -            // entry for it
   1.916 +			// this L1 was shadowed (by another PT) but we didn't have an L2
   1.917 +			// entry for it
   1.918  
   1.919 -            SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
   1.920 +			SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
   1.921  
   1.922 -		    spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
   1.923 +			spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
   1.924  
   1.925 -            gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
   1.926 +			gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
   1.927  
   1.928  
   1.929  			if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gl1pfn<<PAGE_SHIFT)  ) )
   1.930  			{   // detect linear map, and keep pointing at guest
   1.931 -                SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
   1.932 +				SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
   1.933  				spde = (spde & ~PAGE_MASK) | (gl1pfn<<PAGE_SHIFT);
   1.934  			}
   1.935  
   1.936 -            linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
   1.937 -            shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
   1.938 +			linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
   1.939 +			shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
   1.940  			
   1.941 -
   1.942          }              
   1.943  
   1.944 -        shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte);
   1.945 -        // (we need to do the above even if we've just made the shadow L1)
   1.946 +		shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte);
   1.947 +		// (we need to do the above even if we've just made the shadow L1)
   1.948  
   1.949      } // end of fixup writing the shadow L1 directly failed
   1.950      	
   1.951 @@ -654,7 +715,7 @@ int shadow_fault( unsigned long va, long
   1.952  
   1.953      check_pagetable( current, current->mm.pagetable, "post-sf" );
   1.954  
   1.955 -    spin_unlock(&current->mm.shadow_lock);
   1.956 +    spin_unlock(&m->shadow_lock);
   1.957  
   1.958      return 1; // let's try the faulting instruction again...
   1.959  
   1.960 @@ -663,14 +724,14 @@ int shadow_fault( unsigned long va, long
   1.961  
   1.962  void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
   1.963                                   unsigned long *prev_spfn_ptr,
   1.964 -				 l1_pgentry_t **prev_spl1e_ptr )
   1.965 +								 l1_pgentry_t **prev_spl1e_ptr )
   1.966  {
   1.967      unsigned long gpfn, spfn, spte, prev_spfn = *prev_spfn_ptr;    
   1.968      l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
   1.969  
   1.970  
   1.971 -SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%p\n",
   1.972 -pa,gpte,prev_spfn, prev_spl1e);
   1.973 +    SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%p\n",
   1.974 +			 pa,gpte,prev_spfn, prev_spl1e);
   1.975  
   1.976      // to get here, we know the l1 page *must* be shadowed
   1.977  
   1.978 @@ -679,34 +740,24 @@ pa,gpte,prev_spfn, prev_spl1e);
   1.979  
   1.980      if ( spfn == prev_spfn )
   1.981      {
   1.982 -        spl1e = prev_spl1e;
   1.983 +		spl1e = prev_spl1e;
   1.984      }
   1.985      else
   1.986      {
   1.987 -        if( prev_spl1e ) unmap_domain_mem( prev_spl1e );
   1.988 -        spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
   1.989 -	    *prev_spfn_ptr  = spfn;
   1.990 -	    *prev_spl1e_ptr = spl1e;
   1.991 +		if( prev_spl1e ) unmap_domain_mem( prev_spl1e );
   1.992 +		spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
   1.993 +		*prev_spfn_ptr  = spfn;
   1.994 +		*prev_spl1e_ptr = spl1e;
   1.995      }
   1.996 -	// XXX we assume only pagetables can be shadowed; this will have to change
   1.997 -	// to allow arbitrary CoW etc.
   1.998 -
   1.999 -    spte = 0;
  1.1000  
  1.1001 -#if SHADOW_OPTIMISE
  1.1002 -	if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
  1.1003 -		 (_PAGE_PRESENT|_PAGE_ACCESSED) )
  1.1004 -    {
  1.1005 -        spte = gpte;
  1.1006 -		if ( !(gpte & _PAGE_DIRTY ) )
  1.1007 -			gpte &= ~ _PAGE_RW;
  1.1008 -	}
  1.1009 -#endif
  1.1010 +    // XXX we assume only pagetables can be shadowed; 
  1.1011 +    // this will have to change to allow arbitrary CoW etc.
  1.1012  
  1.1013 -	spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = 
  1.1014 -		mk_l1_pgentry( spte );
  1.1015 +    l1pte_no_fault( &current->mm, &gpte, &spte );
  1.1016 +
  1.1017  
  1.1018 -	//unmap_domain_mem( (void *) spl1e );
  1.1019 +    spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = mk_l1_pgentry( spte );
  1.1020 +
  1.1021  }
  1.1022  
  1.1023  void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte )
  1.1024 @@ -725,30 +776,30 @@ void shadow_l2_normal_pt_update( unsigne
  1.1025  
  1.1026      spte = 0;
  1.1027  
  1.1028 -	if( gpte & _PAGE_PRESENT )
  1.1029 +    if( gpte & _PAGE_PRESENT )
  1.1030  		s_sh = __shadow_status(&current->mm, gpte >> PAGE_SHIFT);
  1.1031  
  1.1032      sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  1.1033      // no real need for a cache here
  1.1034  		
  1.1035 -	if ( s_sh ) // PSH_shadowed
  1.1036 -	{
  1.1037 +    if ( s_sh ) // PSH_shadowed
  1.1038 +    {
  1.1039  		if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) )
  1.1040  		{ 
  1.1041 -            // linear page table case
  1.1042 +			// linear page table case
  1.1043  			spte = (gpte & ~_PAGE_RW) | _PAGE_DIRTY | _PAGE_ACCESSED; 
  1.1044 -	    }
  1.1045 -	    else
  1.1046 +		}
  1.1047 +		else
  1.1048  			spte = (gpte & ~PAGE_MASK) | (s_sh<<PAGE_SHIFT) | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
  1.1049  
  1.1050 -	}
  1.1051 +    }
  1.1052  
  1.1053 -	// XXXX Should mark guest pte as DIRTY and ACCESSED too!!!!!
  1.1054 +    // XXXX Should mark guest pte as DIRTY and ACCESSED too!!!!!
  1.1055  
  1.1056 -	sp2le[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t) ] = 
  1.1057 +    sp2le[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t) ] = 
  1.1058  		mk_l2_pgentry( spte );
  1.1059  
  1.1060 -	unmap_domain_mem( (void *) sp2le );
  1.1061 +    unmap_domain_mem( (void *) sp2le );
  1.1062  }
  1.1063  
  1.1064  
  1.1065 @@ -762,87 +813,87 @@ char * sh_check_name;
  1.1066  {printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n",  sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
  1.1067  
  1.1068  static int check_pte( struct mm_struct *m, 
  1.1069 -			   unsigned long gpte, unsigned long spte, int level, int i )
  1.1070 +					  unsigned long gpte, unsigned long spte, int level, int i )
  1.1071  {
  1.1072 -	unsigned long mask, gpfn, spfn;
  1.1073 +    unsigned long mask, gpfn, spfn;
  1.1074  
  1.1075      if ( spte == 0 || spte == 0xdeadface || spte == 0x00000E00)
  1.1076 -        return 1;  // always safe
  1.1077 +		return 1;  // always safe
  1.1078  
  1.1079      if ( !(spte & _PAGE_PRESENT) )
  1.1080 -        FAIL("Non zero not present spte");
  1.1081 +		FAIL("Non zero not present spte");
  1.1082  
  1.1083 -	if( level == 2 ) sh_l2_present++;
  1.1084 -	if( level == 1 ) sh_l1_present++;
  1.1085 +    if( level == 2 ) sh_l2_present++;
  1.1086 +    if( level == 1 ) sh_l1_present++;
  1.1087  
  1.1088      if ( !(gpte & _PAGE_PRESENT) )
  1.1089 -        FAIL("Guest not present yet shadow is");
  1.1090 +		FAIL("Guest not present yet shadow is");
  1.1091  
  1.1092      mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
  1.1093  
  1.1094      if ( (spte & mask) != (gpte & mask ) )
  1.1095  		FAIL("Corrupt?");
  1.1096  
  1.1097 -	if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
  1.1098 +    if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
  1.1099  		FAIL("Dirty coherence");
  1.1100  
  1.1101 -	if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
  1.1102 +    if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
  1.1103  		FAIL("Accessed coherence");
  1.1104  
  1.1105 -	if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
  1.1106 +    if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
  1.1107  		FAIL("RW coherence");
  1.1108  
  1.1109 -	if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY) ))
  1.1110 +    if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY) ))
  1.1111  		FAIL("RW2 coherence");
  1.1112  	
  1.1113 -	spfn = spte>>PAGE_SHIFT;
  1.1114 -	gpfn = gpte>>PAGE_SHIFT;
  1.1115 +    spfn = spte>>PAGE_SHIFT;
  1.1116 +    gpfn = gpte>>PAGE_SHIFT;
  1.1117  
  1.1118 -	if ( gpfn == spfn )
  1.1119 +    if ( gpfn == spfn )
  1.1120      {
  1.1121  		if ( level > 1 )
  1.1122  			FAIL("Linear map ???");			 // XXX this will fail on BSD
  1.1123  
  1.1124  		return 1;
  1.1125 -	}
  1.1126 -	else
  1.1127 -	{
  1.1128 +    }
  1.1129 +    else
  1.1130 +    {
  1.1131  		if ( level < 2 )
  1.1132  			FAIL("Shadow in L1 entry?");
  1.1133  
  1.1134  		if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) )
  1.1135  			FAIL("spfn problem g.sf=%08lx", 
  1.1136  				 __shadow_status(p, gpfn) );
  1.1137 -	}
  1.1138 +    }
  1.1139  
  1.1140 -	return 1;
  1.1141 +    return 1;
  1.1142  }
  1.1143  
  1.1144  
  1.1145  static int check_l1_table( struct mm_struct *m, unsigned long va, 
  1.1146 -					unsigned long g2, unsigned long s2 )
  1.1147 +						   unsigned long g2, unsigned long s2 )
  1.1148  {
  1.1149 -	int j;
  1.1150 -	unsigned long *gpl1e, *spl1e;
  1.1151 +    int j;
  1.1152 +    unsigned long *gpl1e, *spl1e;
  1.1153  
  1.1154 -	//gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]);
  1.1155 -	//spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]);
  1.1156 +    //gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]);
  1.1157 +    //spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]);
  1.1158  
  1.1159 -	gpl1e = map_domain_mem( g2<<PAGE_SHIFT );
  1.1160 -	spl1e = map_domain_mem( s2<<PAGE_SHIFT );
  1.1161 +    gpl1e = map_domain_mem( g2<<PAGE_SHIFT );
  1.1162 +    spl1e = map_domain_mem( s2<<PAGE_SHIFT );
  1.1163  
  1.1164 -	for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
  1.1165 -	{
  1.1166 +    for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
  1.1167 +    {
  1.1168  		unsigned long gpte = gpl1e[j];
  1.1169  		unsigned long spte = spl1e[j];
  1.1170  		
  1.1171  		check_pte( p, gpte, spte, 1, j );
  1.1172 -	}
  1.1173 +    }
  1.1174  	
  1.1175 -	unmap_domain_mem( spl1e );
  1.1176 -	unmap_domain_mem( gpl1e );
  1.1177 +    unmap_domain_mem( spl1e );
  1.1178 +    unmap_domain_mem( gpl1e );
  1.1179  
  1.1180 -	return 1;
  1.1181 +    return 1;
  1.1182  }
  1.1183  
  1.1184  #define FAILPT(_f, _a...)                             \
  1.1185 @@ -850,44 +901,44 @@ static int check_l1_table( struct mm_str
  1.1186  
  1.1187  int check_pagetable( struct mm_struct *m, pagetable_t pt, char *s )
  1.1188  {
  1.1189 -	unsigned long gptbase = pagetable_val(pt);
  1.1190 -	unsigned long gpfn, spfn;
  1.1191 -	int i;
  1.1192 -	l2_pgentry_t *gpl2e, *spl2e;
  1.1193 +    unsigned long gptbase = pagetable_val(pt);
  1.1194 +    unsigned long gpfn, spfn;
  1.1195 +    int i;
  1.1196 +    l2_pgentry_t *gpl2e, *spl2e;
  1.1197  
  1.1198 -	sh_check_name = s;
  1.1199 +    sh_check_name = s;
  1.1200  
  1.1201      SH_VVLOG("%s-PT Audit",s);
  1.1202  
  1.1203 -	sh_l2_present = sh_l1_present = 0;
  1.1204 +    sh_l2_present = sh_l1_present = 0;
  1.1205  
  1.1206 -	gpfn =  gptbase >> PAGE_SHIFT;
  1.1207 +    gpfn =  gptbase >> PAGE_SHIFT;
  1.1208  
  1.1209 -	if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) )
  1.1210 -	{
  1.1211 +    if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) )
  1.1212 +    {
  1.1213  		printk("%s-PT %08lx not shadowed\n", s, gptbase);
  1.1214  
  1.1215  		if( __shadow_status(p, gpfn) != 0 ) BUG();
  1.1216  
  1.1217  		return 0;
  1.1218 -	}
  1.1219 +    }
  1.1220  	
  1.1221      spfn = __shadow_status(p, gpfn) & PSH_pfn_mask;
  1.1222  
  1.1223 -	if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) )
  1.1224 +    if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) )
  1.1225  		FAILPT("ptbase shadow inconsistent1");
  1.1226  
  1.1227 -	gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
  1.1228 -	spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  1.1229 +    gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
  1.1230 +    spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  1.1231  
  1.1232 -	//ipl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  1.1233 +    //ipl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
  1.1234  
  1.1235  
  1.1236 -	if ( memcmp( &spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
  1.1237 -			&gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
  1.1238 -			((SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT))-DOMAIN_ENTRIES_PER_L2_PAGETABLE)
  1.1239 -			* sizeof(l2_pgentry_t)) )
  1.1240 -	{
  1.1241 +    if ( memcmp( &spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
  1.1242 +				 &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
  1.1243 +				 ((SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT))-DOMAIN_ENTRIES_PER_L2_PAGETABLE)
  1.1244 +				 * sizeof(l2_pgentry_t)) )
  1.1245 +    {
  1.1246  		printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn);
  1.1247  		for (i=DOMAIN_ENTRIES_PER_L2_PAGETABLE; 
  1.1248  			 i<(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT));
  1.1249 @@ -895,54 +946,54 @@ int check_pagetable( struct mm_struct *m
  1.1250  			printk("+++ (%d) %08lx %08lx\n",i,
  1.1251  				   l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]) );
  1.1252  		FAILPT("hypervisor entries inconsistent");
  1.1253 -	}
  1.1254 +    }
  1.1255  
  1.1256 -	if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) != 
  1.1257 +    if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) != 
  1.1258  		  l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
  1.1259  		FAILPT("hypervisor linear map inconsistent");
  1.1260  
  1.1261 -	if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) != 
  1.1262 +    if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) != 
  1.1263  		  ((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
  1.1264  		FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx",
  1.1265  			   l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]),
  1.1266 -			   		  (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR
  1.1267 -			   );
  1.1268 +			   (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR
  1.1269 +			);
  1.1270  
  1.1271 -	if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
  1.1272 +    if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
  1.1273  		  ((__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | __PAGE_HYPERVISOR))) )
  1.1274  		FAILPT("hypervisor per-domain map inconsistent");
  1.1275  
  1.1276  
  1.1277 -	// check the whole L2
  1.1278 -	for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  1.1279 -	{
  1.1280 +    // check the whole L2
  1.1281 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  1.1282 +    {
  1.1283  		unsigned long gpte = l2_pgentry_val(gpl2e[i]);
  1.1284  		unsigned long spte = l2_pgentry_val(spl2e[i]);
  1.1285  
  1.1286  		check_pte( p, gpte, spte, 2, i );
  1.1287 -	}
  1.1288 +    }
  1.1289  
  1.1290  
  1.1291 -	// go back and recurse
  1.1292 -	for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  1.1293 -	{
  1.1294 +    // go back and recurse
  1.1295 +    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
  1.1296 +    {
  1.1297  		unsigned long gpte = l2_pgentry_val(gpl2e[i]);
  1.1298  		unsigned long spte = l2_pgentry_val(spl2e[i]);
  1.1299  
  1.1300  		if ( spte )	   
  1.1301  			check_l1_table( p,
  1.1302 -				i<<L2_PAGETABLE_SHIFT,
  1.1303 -				gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
  1.1304 +							i<<L2_PAGETABLE_SHIFT,
  1.1305 +							gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
  1.1306  
  1.1307 -	}
  1.1308 +    }
  1.1309  
  1.1310 -	unmap_domain_mem( spl2e );
  1.1311 -	unmap_domain_mem( gpl2e );
  1.1312 +    unmap_domain_mem( spl2e );
  1.1313 +    unmap_domain_mem( gpl2e );
  1.1314  
  1.1315 -	SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
  1.1316 -		   sh_l2_present, sh_l1_present );
  1.1317 +    SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
  1.1318 +			 sh_l2_present, sh_l1_present );
  1.1319  	
  1.1320 -	return 1;
  1.1321 +    return 1;
  1.1322  }
  1.1323  
  1.1324  
     2.1 --- a/xen/include/asm-i386/processor.h	Tue Mar 30 15:44:27 2004 +0000
     2.2 +++ b/xen/include/asm-i386/processor.h	Tue Mar 30 21:30:23 2004 +0000
     2.3 @@ -427,6 +427,7 @@ struct mm_struct {
     2.4      struct shadow_status *shadow_ht_free;
     2.5      struct shadow_status *shadow_ht_extras; // extra allocation units
     2.6      unsigned int *shadow_dirty_bitmap;
     2.7 +    unsigned int shadow_dirty_bitmap_size;  // in pages, bit per page
     2.8      unsigned int shadow_page_count;
     2.9      unsigned int shadow_max_page_count;
    2.10      unsigned int shadow_extras_count;