ia64/xen-unstable

changeset 1216:a9abf4f7b2f6

bitkeeper revision 1.794.1.4 (40623aebq_XP4MvV6YJsXGleofDYNg)

shadow mode improvements :
use hash table to avoid increasing pfn_info size.
improved locking in preparation for SMP guests.
author iap10@tetris.cl.cam.ac.uk
date Thu Mar 25 01:50:35 2004 +0000 (2004-03-25)
parents 3ef84b0712aa
children ee66ecf7b0f5
files xen/arch/i386/process.c xen/arch/i386/traps.c xen/common/debug.c xen/common/domain.c xen/common/memory.c xen/common/perfc.c xen/common/shadow.c xen/include/asm-i386/config.h xen/include/asm-i386/processor.h xen/include/xeno/mm.h xen/include/xeno/shadow.h xen/net/dev.c
line diff
     1.1 --- a/xen/arch/i386/process.c	Sun Mar 21 19:14:29 2004 +0000
     1.2 +++ b/xen/arch/i386/process.c	Thu Mar 25 01:50:35 2004 +0000
     1.3 @@ -282,25 +282,14 @@ void switch_to(struct task_struct *prev_
     1.4      }
     1.5  
     1.6      /* Switch page tables.  */
     1.7 -#ifdef CONFIG_SHADOW
     1.8 -
     1.9 -    /*    printk("switch_to %08lx, %08lx\n", next_p->mm.pagetable,
    1.10 -	   next_p->mm.shadowtable);*/
    1.11 -
    1.12 -
    1.13 -    if( next_p->mm.shadowmode )
    1.14 +    if( next_p->mm.shadow_mode )
    1.15        {
    1.16 -	check_pagetable( next_p->mm.pagetable, "switch" );
    1.17 -	write_cr3_counted(pagetable_val(next_p->mm.shadowtable));
    1.18 +	check_pagetable( next_p, next_p->mm.pagetable, "switch" );
    1.19 +	write_cr3_counted(pagetable_val(next_p->mm.shadow_table));
    1.20        }
    1.21      else
    1.22 -#endif
    1.23        write_cr3_counted(pagetable_val(next_p->mm.pagetable));
    1.24  
    1.25 -
    1.26 -
    1.27 -
    1.28 -
    1.29      set_current(next_p);
    1.30  
    1.31      /* Switch GDT and LDT. */
     2.1 --- a/xen/arch/i386/traps.c	Sun Mar 21 19:14:29 2004 +0000
     2.2 +++ b/xen/arch/i386/traps.c	Thu Mar 25 01:50:35 2004 +0000
     2.3 @@ -339,13 +339,11 @@ asmlinkage void do_page_fault(struct pt_
     2.4              return; /* successfully copied the mapping */
     2.5      }
     2.6  
     2.7 -#ifdef CONFIG_SHADOW
     2.8 -    if ( p->mm.shadowmode && addr < PAGE_OFFSET &&
     2.9 +    if ( unlikely( p->mm.shadow_mode ) && addr < PAGE_OFFSET &&
    2.10  	 shadow_fault( addr, error_code ) )
    2.11        {
    2.12  	return; // return true if fault was handled 
    2.13        }
    2.14 -#endif
    2.15  
    2.16      if ( unlikely(!(regs->xcs & 3)) )
    2.17          goto fault_in_hypervisor;
     3.1 --- a/xen/common/debug.c	Sun Mar 21 19:14:29 2004 +0000
     3.2 +++ b/xen/common/debug.c	Thu Mar 25 01:50:35 2004 +0000
     3.3 @@ -91,11 +91,9 @@ int pdb_change_values(domid_t domain, u_
     3.4  
     3.5      if ((addr >> PAGE_SHIFT) == ((addr + length - 1) >> PAGE_SHIFT))
     3.6      {
     3.7 -#ifdef CONFIG_SHADOW
     3.8 -        if (p->mm.shadowmode )
     3.9 -          l2_table = map_domain_mem(pagetable_val(p->mm.shadowtable));
    3.10 +        if (p->mm.shadow_mode )
    3.11 +          l2_table = map_domain_mem(pagetable_val(p->mm.shadow_table));
    3.12  	else
    3.13 -#endif
    3.14            l2_table = map_domain_mem(pagetable_val(p->mm.pagetable));
    3.15  
    3.16  	l2_table += l2_table_offset(addr);
     4.1 --- a/xen/common/domain.c	Sun Mar 21 19:14:29 2004 +0000
     4.2 +++ b/xen/common/domain.c	Thu Mar 25 01:50:35 2004 +0000
     4.3 @@ -341,12 +341,14 @@ void free_domain_page(struct pfn_info *p
     4.4          if ( !(page->count_and_flags & PGC_zombie) )
     4.5          {
     4.6              page->tlbflush_timestamp = tlbflush_clock;
     4.7 -            page->u.cpu_mask = 1 << p->processor;
     4.8 -
     4.9 -            spin_lock(&p->page_list_lock);
    4.10 -            list_del(&page->list);
    4.11 -            p->tot_pages--;
    4.12 -            spin_unlock(&p->page_list_lock);
    4.13 +	    if (p)
    4.14 +	    {
    4.15 +                page->u.cpu_mask = 1 << p->processor;
    4.16 +                spin_lock(&p->page_list_lock);
    4.17 +		list_del(&page->list);
    4.18 +		p->tot_pages--;
    4.19 +		spin_unlock(&p->page_list_lock);
    4.20 +	    }
    4.21          }
    4.22  
    4.23          page->count_and_flags = 0;
    4.24 @@ -547,10 +549,6 @@ int final_setup_guestos(struct task_stru
    4.25      get_page_and_type(&frame_table[phys_l2tab>>PAGE_SHIFT], p, 
    4.26                        PGT_l2_page_table);
    4.27  
    4.28 -#ifdef CONFIG_SHADOW
    4.29 -    p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode);
    4.30 -#endif
    4.31 -
    4.32      /* Set up the shared info structure. */
    4.33      update_dom_time(p->shared_info);
    4.34  
    4.35 @@ -852,15 +850,10 @@ int setup_guestos(struct task_struct *p,
    4.36  
    4.37      set_bit(PF_CONSTRUCTED, &p->flags);
    4.38  
    4.39 -#ifdef CONFIG_SHADOW
    4.40 -
    4.41 -printk("Engage shadow mode for dom 0\n");
    4.42 -    p->mm.shadowmode = SHM_test; // XXXXX IAP
    4.43 -    p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode );
    4.44 +#if 1 // XXXXX IAP DO NOT CHECK IN ENBALED !!!!!!!
    4.45 +    shadow_mode_enable(p, SHM_test); 
    4.46  #endif
    4.47  
    4.48 -
    4.49 -
    4.50      new_thread(p, 
    4.51                 (unsigned long)virt_load_address, 
    4.52                 (unsigned long)virt_stack_address, 
     5.1 --- a/xen/common/memory.c	Sun Mar 21 19:14:29 2004 +0000
     5.2 +++ b/xen/common/memory.c	Thu Mar 25 01:50:35 2004 +0000
     5.3 @@ -765,20 +765,22 @@ void free_page_type(struct pfn_info *pag
     5.4      {
     5.5      case PGT_l1_page_table:
     5.6          free_l1_table(page);
     5.7 -#ifdef CONFIG_SHADOW
     5.8 -	// assume we're in shadow mode if PSH_shadowed set
     5.9 -	if ( current->mm.shadowmode && page->shadow_and_flags & PSH_shadowed )
    5.10 +	if ( unlikely(current->mm.shadow_mode) && 
    5.11 +	     (get_shadow_status(current, page-frame_table) & PSH_shadowed) )
    5.12 +	{
    5.13  	    unshadow_table( page-frame_table, type );
    5.14 -#endif
    5.15 +	    put_shadow_status(current);
    5.16 +        }
    5.17  	return;
    5.18  
    5.19      case PGT_l2_page_table:
    5.20          free_l2_table(page);
    5.21 -#ifdef CONFIG_SHADOW
    5.22 -	// assume we're in shadow mode if PSH_shadowed set
    5.23 -	if ( current->mm.shadowmode && page->shadow_and_flags & PSH_shadowed )
    5.24 +	if ( unlikely(current->mm.shadow_mode) && 
    5.25 +	     (get_shadow_status(current, page-frame_table) & PSH_shadowed) )
    5.26 +	{
    5.27  	    unshadow_table( page-frame_table, type );
    5.28 -#endif
    5.29 +	    put_shadow_status(current);
    5.30 +        }
    5.31  	return;
    5.32  
    5.33      default:
    5.34 @@ -848,21 +850,22 @@ static int do_extended_command(unsigned 
    5.35              put_page_and_type(&frame_table[pagetable_val(current->mm.pagetable)
    5.36                                            >> PAGE_SHIFT]);
    5.37              current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
    5.38 -#ifdef CONFIG_SHADOW            
    5.39 -	    current->mm.shadowtable = 
    5.40 -	      shadow_mk_pagetable(pfn << PAGE_SHIFT, current->mm.shadowmode);
    5.41 -#endif
    5.42 -            invalidate_shadow_ldt();
    5.43 +
    5.44 +	    if( unlikely(current->mm.shadow_mode))
    5.45 +	      current->mm.shadow_table = 
    5.46 +		shadow_mk_pagetable(current, pfn<<PAGE_SHIFT);
    5.47  
    5.48 +            invalidate_shadow_ldt();
    5.49 +	    
    5.50 +	    // start using the new PT straight away
    5.51              percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
    5.52 -#ifdef CONFIG_SHADOW
    5.53 -            if ( unlikely(current->mm.shadowmode) )
    5.54 +            if ( unlikely(current->mm.shadow_mode) )
    5.55  	    {
    5.56 -                check_pagetable( current->mm.pagetable, "pre-stlb-flush" );
    5.57 -	        write_cr3_counted(pagetable_val(current->mm.shadowtable));
    5.58 +                check_pagetable( current, 
    5.59 +				 current->mm.pagetable, "pre-stlb-flush" );
    5.60 +	        write_cr3_counted(pagetable_val(current->mm.shadow_table));
    5.61              }
    5.62              else
    5.63 -#endif	  
    5.64  	        write_cr3_counted(pagetable_val(current->mm.pagetable));
    5.65          }
    5.66          else
    5.67 @@ -947,10 +950,8 @@ int do_mmu_update(mmu_update_t *ureqs, i
    5.68      struct pfn_info *page;
    5.69      int rc = 0, okay = 1, i, cpu = smp_processor_id();
    5.70      unsigned int cmd;
    5.71 -#ifdef CONFIG_SHADOW
    5.72      unsigned long prev_spfn = 0;
    5.73      l1_pgentry_t *prev_spl1e = 0;
    5.74 -#endif
    5.75  
    5.76      perfc_incrc(calls_to_mmu_update); 
    5.77      perfc_addc(num_page_updates, count);
    5.78 @@ -1002,11 +1003,14 @@ int do_mmu_update(mmu_update_t *ureqs, i
    5.79                      okay = mod_l1_entry((l1_pgentry_t *)va, 
    5.80                                          mk_l1_pgentry(req.val)); 
    5.81  
    5.82 -#ifdef CONFIG_SHADOW
    5.83 -		    if ( okay && page->shadow_and_flags & PSH_shadowed )
    5.84 +		    if ( okay && unlikely(current->mm.shadow_mode) &&
    5.85 +			 (get_shadow_status(current, page-frame_table) &
    5.86 +			  PSH_shadowed) )
    5.87 +		    {
    5.88  		        shadow_l1_normal_pt_update( req.ptr, req.val, 
    5.89  						    &prev_spfn, &prev_spl1e );
    5.90 -#endif
    5.91 +			put_shadow_status(current);
    5.92 +		    }
    5.93  
    5.94                      put_page_type(page);
    5.95                  }
    5.96 @@ -1017,10 +1021,14 @@ int do_mmu_update(mmu_update_t *ureqs, i
    5.97                      okay = mod_l2_entry((l2_pgentry_t *)va, 
    5.98                                          mk_l2_pgentry(req.val),
    5.99                                          pfn); 
   5.100 -#ifdef CONFIG_SHADOW
   5.101 -		    if ( okay && page->shadow_and_flags & PSH_shadowed )
   5.102 +
   5.103 +		    if ( okay && unlikely(current->mm.shadow_mode) &&
   5.104 +			 (get_shadow_status(current, page-frame_table) & 
   5.105 +			  PSH_shadowed) )
   5.106 +		    {
   5.107  		        shadow_l2_normal_pt_update( req.ptr, req.val );
   5.108 -#endif
   5.109 +			put_shadow_status(current);
   5.110 +		    }
   5.111  
   5.112                      put_page_type(page);
   5.113                  }
   5.114 @@ -1032,19 +1040,11 @@ int do_mmu_update(mmu_update_t *ureqs, i
   5.115                      okay = 1;
   5.116                      put_page_type(page);
   5.117  
   5.118 -#ifdef CONFIG_SHADOW
   5.119 -		    if ( page->shadow_and_flags & PSH_shadowed )
   5.120 -		        BUG(); 
   5.121 -		        // at present, we shouldn't be shadowing such pages
   5.122 -#endif
   5.123 -
   5.124 -
   5.125 +                    // at present, we don't shadowing such pages
   5.126                  }
   5.127                  break;
   5.128              }
   5.129  
   5.130 -check_pagetable( current->mm.pagetable, "mmu" ); // XXX XXX XXX XXX XXX
   5.131 -            
   5.132              put_page(page);
   5.133  
   5.134              break;
   5.135 @@ -1087,25 +1087,22 @@ check_pagetable( current->mm.pagetable, 
   5.136      if ( prev_pfn != 0 )
   5.137          unmap_domain_mem((void *)va);
   5.138  
   5.139 -#ifdef CONFIG_SHADOW
   5.140      if( prev_spl1e != 0 ) 
   5.141          unmap_domain_mem((void *)prev_spl1e);
   5.142 -#endif
   5.143  
   5.144      deferred_ops = percpu_info[cpu].deferred_ops;
   5.145      percpu_info[cpu].deferred_ops = 0;
   5.146  
   5.147      if ( deferred_ops & DOP_FLUSH_TLB )
   5.148      {
   5.149 -#ifdef CONFIG_SHADOW
   5.150 -        if ( unlikely(current->mm.shadowmode) )
   5.151 +        if ( unlikely(current->mm.shadow_mode) )
   5.152  	{
   5.153 -            check_pagetable( current->mm.pagetable, "pre-stlb-flush" );
   5.154 -	    write_cr3_counted(pagetable_val(current->mm.shadowtable));
   5.155 +            check_pagetable( current, 
   5.156 +			     current->mm.pagetable, "pre-stlb-flush" );
   5.157 +	    write_cr3_counted(pagetable_val(current->mm.shadow_table));
   5.158          }
   5.159          else
   5.160 -#endif	  
   5.161 -	  write_cr3_counted(pagetable_val(current->mm.pagetable));
   5.162 +  	    write_cr3_counted(pagetable_val(current->mm.pagetable));
   5.163      }
   5.164  
   5.165      if ( deferred_ops & DOP_RELOAD_LDT )
   5.166 @@ -1142,9 +1139,7 @@ int do_update_va_mapping(unsigned long p
   5.167                                  mk_l1_pgentry(val))) )
   5.168          err = -EINVAL;
   5.169  
   5.170 -#ifdef CONFIG_SHADOW
   5.171 -
   5.172 -    if ( unlikely(p->mm.shadowmode) )
   5.173 +    if ( unlikely(p->mm.shadow_mode) )
   5.174      {
   5.175          unsigned long sval = 0;
   5.176  
   5.177 @@ -1164,14 +1159,14 @@ int do_update_va_mapping(unsigned long p
   5.178  	{
   5.179  	    // Since L2's are guranteed RW, failure indicates the page
   5.180  	    // was not shadowed, so ignore.
   5.181 -            
   5.182 +            perfc_incrc(shadow_update_va_fail);
   5.183  	    //MEM_LOG("update_va_map: couldn't write update\n");	
   5.184  	}
   5.185 +
   5.186 +	check_pagetable( p, p->mm.pagetable, "va" ); // debug
   5.187 +    
   5.188      }
   5.189  
   5.190 -check_pagetable( p->mm.pagetable, "va" );
   5.191 -
   5.192 -#endif
   5.193  
   5.194      deferred_ops = percpu_info[cpu].deferred_ops;
   5.195      percpu_info[cpu].deferred_ops = 0;
   5.196 @@ -1179,12 +1174,10 @@ check_pagetable( p->mm.pagetable, "va" )
   5.197      if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 
   5.198           unlikely(flags & UVMF_FLUSH_TLB) )
   5.199      {
   5.200 -#ifdef CONFIG_SHADOW
   5.201 -        if ( unlikely(p->mm.shadowmode) )
   5.202 -          write_cr3_counted(pagetable_val(p->mm.shadowtable));
   5.203 +        if ( unlikely(p->mm.shadow_mode) )
   5.204 +            write_cr3_counted(pagetable_val(p->mm.shadow_table));
   5.205          else
   5.206 -#endif
   5.207 -          write_cr3_counted(pagetable_val(p->mm.pagetable));
   5.208 +            write_cr3_counted(pagetable_val(p->mm.pagetable));
   5.209      }
   5.210      else if ( unlikely(flags & UVMF_INVLPG) )
   5.211          __flush_tlb_one(page_nr << PAGE_SHIFT);
     6.1 --- a/xen/common/perfc.c	Sun Mar 21 19:14:29 2004 +0000
     6.2 +++ b/xen/common/perfc.c	Thu Mar 25 01:50:35 2004 +0000
     6.3 @@ -103,7 +103,7 @@ void perfc_reset(u_char key, void *dev_i
     6.4              for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
     6.5  	      	atomic_set(&counters[j],0);
     6.6          case TYPE_S_ARRAY:
     6.7 -            counters += j;
     6.8 +            counters += perfc_info[i].nr_elements;
     6.9              break;
    6.10          }
    6.11      }
     7.1 --- a/xen/common/shadow.c	Sun Mar 21 19:14:29 2004 +0000
     7.2 +++ b/xen/common/shadow.c	Thu Mar 25 01:50:35 2004 +0000
     7.3 @@ -7,70 +7,161 @@
     7.4  #include <asm/domain_page.h>
     7.5  #include <asm/page.h>
     7.6  
     7.7 -#ifdef CONFIG_SHADOW
     7.8 -
     7.9 -
    7.10 -#if SHADOW_DEBUG
    7.11 -#define MEM_VLOG(_f, _a...)                             \
    7.12 -  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
    7.13 -         current->domain , __LINE__ , ## _a )
    7.14 -#else
    7.15 -#define MEM_VLOG(_f, _a...) 
    7.16 -#endif
    7.17 -
    7.18 -#if 0
    7.19 -#define MEM_VVLOG(_f, _a...)                             \
    7.20 -  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
    7.21 -         current->domain , __LINE__ , ## _a )
    7.22 -#else
    7.23 -#define MEM_VVLOG(_f, _a...) 
    7.24 -#endif
    7.25 -
    7.26  
    7.27  /********
    7.28  
    7.29  To use these shadow page tables, guests must not rely on the ACCESSED
    7.30  and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
    7.31  
    7.32 +
    7.33  I doubt this will break anything. (If guests want to use the va_update
    7.34  mechanism they've signed up for this anyhow...)
    7.35  
    7.36  ********/
    7.37  
    7.38  
    7.39 -pagetable_t shadow_mk_pagetable( unsigned long gptbase, 
    7.40 -					unsigned int shadowmode )
    7.41 +int shadow_mode_enable( struct task_struct *p, unsigned int mode )
    7.42  {
    7.43 -	unsigned long gpfn, spfn=0;
    7.44 +	struct shadow_status **fptr;
    7.45 +	int i;
    7.46 +
    7.47 +	// sychronously stop domain
    7.48 +    // XXX for the moment, only use on already stopped domains!!!
    7.49 +
    7.50 +	spin_lock_init(&p->mm.shadow_lock);
    7.51 +	spin_lock(&p->mm.shadow_lock);
    7.52  
    7.53 -	MEM_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
    7.54 -			 gptbase, shadowmode );
    7.55 +    p->mm.shadow_mode = mode;
    7.56 +	
    7.57 +	// allocate hashtable
    7.58 +    p->mm.shadow_ht = kmalloc( shadow_ht_buckets * 
    7.59 +							   sizeof(struct shadow_status), GFP_KERNEL );
    7.60 +	if( ! p->mm.shadow_ht )
    7.61 +		goto nomem;
    7.62 +
    7.63 +	memset( p->mm.shadow_ht, 0, shadow_ht_buckets * 
    7.64 +							   sizeof(struct shadow_status) );
    7.65 +
    7.66  
    7.67 -	if ( unlikely(shadowmode) ) 
    7.68 +	// allocate space for first lot of extra nodes
    7.69 +    p->mm.shadow_ht_extras = kmalloc( sizeof(void*) + (shadow_ht_extra_size * 
    7.70 +							   sizeof(struct shadow_status)), GFP_KERNEL );
    7.71 +
    7.72 +	if( ! p->mm.shadow_ht_extras )
    7.73 +		goto nomem;
    7.74 +
    7.75 +	memset( p->mm.shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size * 
    7.76 +							   sizeof(struct shadow_status)) );
    7.77 +	
    7.78 +    // add extras to free list
    7.79 +	fptr = &p->mm.shadow_ht_free;
    7.80 +	for ( i=0; i<shadow_ht_extra_size; i++ )
    7.81  	{
    7.82 -		gpfn =  gptbase >> PAGE_SHIFT;
    7.83 -		
    7.84 -		if ( likely(frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
    7.85 -		{
    7.86 -			spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
    7.87 +		*fptr = &p->mm.shadow_ht_extras[i];
    7.88 +		fptr = &(p->mm.shadow_ht_extras[i].next);
    7.89 +	}
    7.90 +	*fptr = NULL;
    7.91 +	*((struct shadow_status ** ) &p->mm.shadow_ht_extras[shadow_ht_extra_size]) = NULL;
    7.92 +
    7.93 +	spin_unlock(&p->mm.shadow_lock);
    7.94 +
    7.95 +    // call shadow_mk_pagetable
    7.96 +	p->mm.shadow_table = shadow_mk_pagetable( p, 
    7.97 +											  pagetable_val(p->mm.pagetable) );
    7.98 +
    7.99 +	return 0;
   7.100 +
   7.101 +nomem:
   7.102 +	spin_unlock(&p->mm.shadow_lock);
   7.103 +	return -ENOMEM;
   7.104 +}
   7.105 +
   7.106 +void shadow_mode_disable( )
   7.107 +{
   7.108 +
   7.109 +    // free the hash buckets as you go
   7.110 +
   7.111 +    // free the hashtable itself
   7.112 +}
   7.113 +
   7.114 +
   7.115 +static inline void free_shadow_page( struct task_struct *p, unsigned int pfn )
   7.116 +{
   7.117 +    unsigned long flags;
   7.118 +
   7.119 +	p->mm.shadow_page_count--;
   7.120 +
   7.121 +    spin_lock_irqsave(&free_list_lock, flags);
   7.122 +    list_add(&frame_table[pfn].list, &free_list);
   7.123 +    free_pfns++;
   7.124 +    spin_unlock_irqrestore(&free_list_lock, flags);
   7.125 +}
   7.126 +
   7.127 +static inline struct pfn_info *alloc_shadow_page( struct task_struct *p )
   7.128 +{
   7.129 +	p->mm.shadow_page_count++;
   7.130 +
   7.131 +	return alloc_domain_page( NULL );
   7.132 +}
   7.133 +
   7.134 +
   7.135 +static void __free_shadow_table( struct task_struct *p )
   7.136 +{
   7.137 +	int j;
   7.138 +	struct shadow_status *a;
   7.139 +	
   7.140 +	// the code assumes you're not using the page tables i.e.
   7.141 +    // the domain is stopped and cr3 is something else!!
   7.142 +
   7.143 +    // walk the hash table and call free_shadow_page on all pages
   7.144 +
   7.145 +    for(j=0;j<shadow_ht_buckets;j++)
   7.146 +    {
   7.147 +        a = &p->mm.shadow_ht[j];        
   7.148 +        if (a->pfn)
   7.149 +        {
   7.150 +            free_shadow_page( p, a->spfn_and_flags & PSH_pfn_mask );
   7.151 +            a->pfn = 0;
   7.152 +            a->spfn_and_flags = 0;
   7.153 +        }
   7.154 +        a=a->next;
   7.155 +        while(a)
   7.156 +		{ 
   7.157 +            struct shadow_status *next = a->next;
   7.158 +            free_shadow_page( p, a->spfn_and_flags & PSH_pfn_mask );
   7.159 +            a->pfn = 0;
   7.160 +            a->spfn_and_flags = 0;
   7.161 +            a->next = p->mm.shadow_ht_free;
   7.162 +            p->mm.shadow_ht_free = a;
   7.163 +            a=next;
   7.164  		}
   7.165 -		else
   7.166 -		{
   7.167 -			spfn = shadow_l2_table( gpfn );
   7.168 -		}      
   7.169  	}
   7.170 +}
   7.171  
   7.172 -	return mk_pagetable(spfn << PAGE_SHIFT);
   7.173 +static void flush_shadow_table( struct task_struct *p )
   7.174 +{
   7.175 +	
   7.176 +    // XXX synchronously stop domain (needed for SMP guests)
   7.177 +
   7.178 +    // switch to idle task's page tables
   7.179 + 
   7.180 +    // walk the hash table and call free_shadow_page on all pages
   7.181 +	spin_lock(&p->mm.shadow_lock);
   7.182 +	__free_shadow_table( p );
   7.183 +	spin_unlock(&p->mm.shadow_lock);
   7.184 +
   7.185 +    // XXX unpause domain
   7.186  }
   7.187  
   7.188 +
   7.189 +
   7.190  void unshadow_table( unsigned long gpfn, unsigned int type )
   7.191  {
   7.192  	unsigned long spfn;
   7.193  
   7.194 -    MEM_VLOG("unshadow_table type=%08x gpfn=%08lx, spfn=%08lx",
   7.195 +    SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
   7.196  		 type,
   7.197 -		 gpfn,
   7.198 -		 frame_table[gpfn].shadow_and_flags & PSH_pfn_mask );
   7.199 +		 gpfn );
   7.200  
   7.201  	perfc_incrc(unshadow_table_count);
   7.202  
   7.203 @@ -79,9 +170,8 @@ void unshadow_table( unsigned long gpfn,
   7.204  	// even in the SMP guest case, there won't be a race here as
   7.205      // this CPU was the one that cmpxchg'ed the page to invalid
   7.206  
   7.207 -	spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
   7.208 -	frame_table[gpfn].shadow_and_flags=0;
   7.209 -	frame_table[spfn].shadow_and_flags=0;
   7.210 +	spfn = __shadow_status(current, gpfn) & PSH_pfn_mask;
   7.211 +	delete_shadow_status(current, gpfn);
   7.212  
   7.213  #if 0 // XXX leave as might be useful for later debugging
   7.214  	{ 
   7.215 @@ -101,27 +191,21 @@ void unshadow_table( unsigned long gpfn,
   7.216      else
   7.217  		perfc_decr(shadow_l2_pages);
   7.218  
   7.219 -	//free_domain_page( &frame_table[spfn] );
   7.220 -
   7.221 -	{
   7.222 -    unsigned long flags;
   7.223 -    spin_lock_irqsave(&free_list_lock, flags);
   7.224 -    list_add(&frame_table[spfn].list, &free_list);
   7.225 -    free_pfns++;
   7.226 -    spin_unlock_irqrestore(&free_list_lock, flags);
   7.227 -	}
   7.228 +	free_shadow_page( current, spfn );
   7.229  
   7.230  }
   7.231  
   7.232  
   7.233 -unsigned long shadow_l2_table( unsigned long gpfn )
   7.234 +static unsigned long shadow_l2_table( 
   7.235 +                     struct task_struct *p, unsigned long gpfn )
   7.236  {
   7.237  	struct pfn_info *spfn_info;
   7.238  	unsigned long spfn;
   7.239  	l2_pgentry_t *spl2e, *gpl2e;
   7.240  	int i;
   7.241  
   7.242 -	MEM_VVLOG("shadow_l2_table( %08lx )",gpfn);
   7.243 +	SH_VVLOG("shadow_l2_table( %08lx )",gpfn);
   7.244 +	spin_lock(&p->mm.shadow_lock);
   7.245  
   7.246  	perfc_incrc(shadow_l2_table_count);
   7.247  	perfc_incr(shadow_l2_pages);
   7.248 @@ -129,17 +213,14 @@ unsigned long shadow_l2_table( unsigned 
   7.249      // XXX in future, worry about racing in SMP guests 
   7.250      //      -- use cmpxchg with PSH_pending flag to show progress (and spin)
   7.251  
   7.252 -	spfn_info = alloc_domain_page( NULL ); // XXX account properly later 
   7.253 +	spfn_info = alloc_shadow_page(p);
   7.254  
   7.255      ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache
   7.256  
   7.257  	spfn = (unsigned long) (spfn_info - frame_table);
   7.258  
   7.259  	// mark pfn as being shadowed, update field to point at shadow
   7.260 -	frame_table[gpfn].shadow_and_flags = spfn | PSH_shadowed;
   7.261 -
   7.262 -	// mark shadow pfn as being a shadow, update field to point at  pfn	
   7.263 -	frame_table[spfn].shadow_and_flags = gpfn | PSH_shadow;
   7.264 +	set_shadow_status(p, gpfn, spfn | PSH_shadowed);
   7.265  	
   7.266  	// we need to do this before the linear map is set up
   7.267  	spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
   7.268 @@ -172,11 +253,11 @@ unsigned long shadow_l2_table( unsigned 
   7.269  		if (gpte & _PAGE_PRESENT)
   7.270  		{
   7.271  			unsigned long s_sh = 
   7.272 -				frame_table[ gpte>>PAGE_SHIFT ].shadow_and_flags;
   7.273 +				__shadow_status(p, gpte>>PAGE_SHIFT);
   7.274  
   7.275  			if( s_sh & PSH_shadowed ) // PSH_shadowed
   7.276  			{
   7.277 -				if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) )
   7.278 +				if ( unlikely( (__shadow_status(p, gpte>>PAGE_SHIFT) & PGT_type_mask) == PGT_l2_page_table) )
   7.279                  {
   7.280  					printk("Linear mapping detected\n");
   7.281    				    spte = gpte & ~_PAGE_RW;
   7.282 @@ -203,33 +284,61 @@ unsigned long shadow_l2_table( unsigned 
   7.283      unmap_domain_mem( gpl2e );
   7.284      unmap_domain_mem( spl2e );
   7.285  
   7.286 -	MEM_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
   7.287 +	SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
   7.288  
   7.289 -
   7.290 +	spin_unlock(&p->mm.shadow_lock);
   7.291  	return spfn;
   7.292  }
   7.293  
   7.294 +pagetable_t shadow_mk_pagetable( struct task_struct *p, 
   7.295 +											   unsigned long gptbase)
   7.296 +{
   7.297 +	unsigned long gpfn, spfn=0;
   7.298 +
   7.299 +	SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
   7.300 +			 gptbase, p->mm.shadow_mode );
   7.301 +
   7.302 +	if ( likely(p->mm.shadow_mode) )  // should always be true if we're here
   7.303 +	{
   7.304 +		gpfn =  gptbase >> PAGE_SHIFT;
   7.305 +		
   7.306 +		if ( unlikely((spfn=__shadow_status(p, gpfn)) == 0 ) )
   7.307 +		{
   7.308 +			spfn = shadow_l2_table(p, gpfn );
   7.309 +		}      
   7.310 +	}
   7.311 +
   7.312 +	SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
   7.313 +			 gptbase, p->mm.shadow_mode );
   7.314 +
   7.315 +	return mk_pagetable(spfn<<PAGE_SHIFT);
   7.316 +}
   7.317  
   7.318  int shadow_fault( unsigned long va, long error_code )
   7.319  {
   7.320  	unsigned long gpte, spte;
   7.321  
   7.322 -	MEM_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
   7.323 +	SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
   7.324  
   7.325 -    check_pagetable( current->mm.pagetable, "pre-sf" );
   7.326 +    spin_lock(&current->mm.shadow_lock);
   7.327 +
   7.328 +    check_pagetable( current, current->mm.pagetable, "pre-sf" );
   7.329  
   7.330  	if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
   7.331  	{
   7.332 -		MEM_VVLOG("shadow_fault - EXIT: read gpte faulted" );
   7.333 +		SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
   7.334 +        spin_unlock(&current->mm.shadow_lock);
   7.335  		return 0;  // propagate to guest
   7.336  	}
   7.337  
   7.338  	if ( ! (gpte & _PAGE_PRESENT) )
   7.339  	{
   7.340 -		MEM_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
   7.341 +		SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
   7.342 +        spin_unlock(&current->mm.shadow_lock);
   7.343  		return 0;  // we're not going to be able to help
   7.344      }
   7.345  
   7.346 +
   7.347      spte = gpte;
   7.348  
   7.349  	if ( error_code & 2  )  
   7.350 @@ -242,7 +351,8 @@ int shadow_fault( unsigned long va, long
   7.351  		}
   7.352  		else
   7.353  		{   // write fault on RO page
   7.354 -            MEM_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
   7.355 +            SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
   7.356 +            spin_unlock(&current->mm.shadow_lock);
   7.357  			return 0; // propagate to guest
   7.358  			// not clear whether we should set accessed bit here...
   7.359  		}
   7.360 @@ -255,7 +365,7 @@ int shadow_fault( unsigned long va, long
   7.361  			spte &= ~_PAGE_RW;  // force clear unless already dirty
   7.362  	}
   7.363  
   7.364 - 	MEM_VVLOG("plan: gpte=%08lx  spte=%08lx", gpte, spte );
   7.365 + 	SH_VVLOG("plan: gpte=%08lx  spte=%08lx", gpte, spte );
   7.366  
   7.367  	// write back updated gpte
   7.368      // XXX watch out for read-only L2 entries! (not used in Linux)
   7.369 @@ -269,13 +379,13 @@ int shadow_fault( unsigned long va, long
   7.370  
   7.371  		unsigned long gpde, spde, gl1pfn, sl1pfn;
   7.372  
   7.373 -        MEM_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx  spte=%08lx",gpte,spte );
   7.374 +        SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx  spte=%08lx",gpte,spte );
   7.375  
   7.376          gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
   7.377  
   7.378          gl1pfn = gpde>>PAGE_SHIFT;
   7.379  
   7.380 -        if ( ! (frame_table[gl1pfn].shadow_and_flags & PSH_shadowed ) )
   7.381 +        if ( ! (sl1pfn=__shadow_status(current, gl1pfn) ) )
   7.382          {
   7.383              // this L1 is NOT already shadowed so we need to shadow it
   7.384              struct pfn_info *sl1pfn_info;
   7.385 @@ -284,12 +394,11 @@ int shadow_fault( unsigned long va, long
   7.386              sl1pfn_info = alloc_domain_page( NULL ); // XXX account properly! 
   7.387              sl1pfn = sl1pfn_info - frame_table;
   7.388  
   7.389 -            MEM_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
   7.390 +            SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
   7.391  	        perfc_incrc(shadow_l1_table_count);
   7.392  	        perfc_incr(shadow_l1_pages);
   7.393  
   7.394 -            sl1pfn_info->shadow_and_flags = PSH_shadow | gl1pfn;
   7.395 -            frame_table[gl1pfn].shadow_and_flags = PSH_shadowed | sl1pfn;
   7.396 +            set_shadow_status(current, gl1pfn, PSH_shadowed | sl1pfn);
   7.397  
   7.398              gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
   7.399              spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<<PAGE_SHIFT);
   7.400 @@ -330,9 +439,7 @@ int shadow_fault( unsigned long va, long
   7.401              // this L1 was shadowed (by another PT) but we didn't have an L2
   7.402              // entry for it
   7.403  
   7.404 -            sl1pfn = frame_table[gl1pfn].shadow_and_flags & PSH_pfn_mask;
   7.405 -
   7.406 -            MEM_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
   7.407 +            SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
   7.408  
   7.409  		    spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
   7.410  
   7.411 @@ -341,7 +448,7 @@ int shadow_fault( unsigned long va, long
   7.412  
   7.413  			if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gl1pfn<<PAGE_SHIFT)  ) )
   7.414  			{   // detect linear map, and keep pointing at guest
   7.415 -                MEM_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
   7.416 +                SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
   7.417  				spde = (spde & ~PAGE_MASK) | (gl1pfn<<PAGE_SHIFT);
   7.418  			}
   7.419  
   7.420 @@ -358,7 +465,9 @@ int shadow_fault( unsigned long va, long
   7.421      	
   7.422      perfc_incrc(shadow_fixup_count);
   7.423  
   7.424 -    check_pagetable( current->mm.pagetable, "post-sf" );
   7.425 +    check_pagetable( current, current->mm.pagetable, "post-sf" );
   7.426 +
   7.427 +    spin_unlock(&current->mm.shadow_lock);
   7.428  
   7.429      return 1; // let's try the faulting instruction again...
   7.430  
   7.431 @@ -373,13 +482,13 @@ void shadow_l1_normal_pt_update( unsigne
   7.432      l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
   7.433  
   7.434  
   7.435 -MEM_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n",
   7.436 +SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n",
   7.437  pa,gpte,prev_spfn, prev_spl1e);
   7.438  
   7.439      // to get here, we know the l1 page *must* be shadowed
   7.440  
   7.441      gpfn = pa >> PAGE_SHIFT;
   7.442 -    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
   7.443 +    spfn = __shadow_status(current, gpfn) & PSH_pfn_mask;
   7.444  
   7.445      if ( spfn == prev_spfn )
   7.446      {
   7.447 @@ -417,21 +526,23 @@ void shadow_l2_normal_pt_update( unsigne
   7.448  {
   7.449      unsigned long gpfn, spfn, spte;
   7.450      l2_pgentry_t * sp2le;
   7.451 -    unsigned long s_sh;
   7.452 +    unsigned long s_sh=0;
   7.453  
   7.454 -    MEM_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
   7.455 +    SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
   7.456  
   7.457      // to get here, we know the l2 page has a shadow
   7.458  
   7.459      gpfn = pa >> PAGE_SHIFT;
   7.460 -    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
   7.461 +    spfn = __shadow_status(current, gpfn) & PSH_pfn_mask;
   7.462 +
   7.463 +
   7.464 +    spte = 0;
   7.465 +
   7.466 +	if( gpte & _PAGE_PRESENT )
   7.467 +		s_sh = __shadow_status(current, gpte >> PAGE_SHIFT);
   7.468  
   7.469      sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
   7.470      // no real need for a cache here
   7.471 -
   7.472 -    spte = 0;
   7.473 -
   7.474 -    s_sh = frame_table[gpte >> PAGE_SHIFT].shadow_and_flags;
   7.475  		
   7.476  	if ( s_sh ) // PSH_shadowed
   7.477  	{
   7.478 @@ -463,7 +574,8 @@ char * sh_check_name;
   7.479  #define FAIL(_f, _a...)                             \
   7.480  {printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n",  sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
   7.481  
   7.482 -int check_pte( unsigned long gpte, unsigned long spte, int level, int i )
   7.483 +static int check_pte( struct task_struct *p, 
   7.484 +			   unsigned long gpte, unsigned long spte, int level, int i )
   7.485  {
   7.486  	unsigned long mask, gpfn, spfn;
   7.487  
   7.488 @@ -504,42 +616,24 @@ int check_pte( unsigned long gpte, unsig
   7.489  		if ( level > 1 )
   7.490  			FAIL("Linear map ???");			 // XXX this will fail on BSD
   7.491  
   7.492 -#if 0 // might be a RO mapping of a page table page
   7.493 -		if ( frame_table[gpfn].shadow_and_flags != 0 )
   7.494 -        {
   7.495 -			FAIL("Should have been shadowed g.sf=%08lx s.sf=%08lx", 
   7.496 -				 frame_table[gpfn].shadow_and_flags,
   7.497 -				 frame_table[spfn].shadow_and_flags);
   7.498 -        }
   7.499 -		else
   7.500 -#endif
   7.501 -			return 1;
   7.502 +		return 1;
   7.503  	}
   7.504  	else
   7.505  	{
   7.506  		if ( level < 2 )
   7.507  			FAIL("Shadow in L1 entry?");
   7.508  
   7.509 -		if ( frame_table[gpfn].shadow_and_flags != (PSH_shadowed | spfn) )
   7.510 -			FAIL("spfn problem g.sf=%08lx s.sf=%08lx [g.sf]=%08lx [s.sf]=%08lx", 
   7.511 -				 frame_table[gpfn].shadow_and_flags,
   7.512 -				 frame_table[spfn].shadow_and_flags,
   7.513 -				 frame_table[frame_table[gpfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags,
   7.514 -				 frame_table[frame_table[spfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags
   7.515 -				 );
   7.516 -
   7.517 -		if ( frame_table[spfn].shadow_and_flags != (PSH_shadow | gpfn) )
   7.518 -			FAIL("gpfn problem g.sf=%08lx s.sf=%08lx", 
   7.519 -				 frame_table[gpfn].shadow_and_flags,
   7.520 -				 frame_table[spfn].shadow_and_flags);
   7.521 -
   7.522 +		if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) )
   7.523 +			FAIL("spfn problem g.sf=%08lx", 
   7.524 +				 __shadow_status(p, gpfn) );
   7.525  	}
   7.526  
   7.527  	return 1;
   7.528  }
   7.529  
   7.530  
   7.531 -int check_l1_table( unsigned long va, unsigned long g2, unsigned long s2 )
   7.532 +static int check_l1_table( struct task_struct *p, unsigned long va, 
   7.533 +					unsigned long g2, unsigned long s2 )
   7.534  {
   7.535  	int j;
   7.536  	unsigned long *gpl1e, *spl1e;
   7.537 @@ -555,7 +649,7 @@ int check_l1_table( unsigned long va, un
   7.538  		unsigned long gpte = gpl1e[j];
   7.539  		unsigned long spte = spl1e[j];
   7.540  		
   7.541 -		check_pte( gpte, spte, 1, j );
   7.542 +		check_pte( p, gpte, spte, 1, j );
   7.543  	}
   7.544  	
   7.545  	unmap_domain_mem( spl1e );
   7.546 @@ -567,7 +661,7 @@ int check_l1_table( unsigned long va, un
   7.547  #define FAILPT(_f, _a...)                             \
   7.548  {printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); BUG();}
   7.549  
   7.550 -int check_pagetable( pagetable_t pt, char *s )
   7.551 +int check_pagetable( struct task_struct *p, pagetable_t pt, char *s )
   7.552  {
   7.553  	unsigned long gptbase = pagetable_val(pt);
   7.554  	unsigned long gpfn, spfn;
   7.555 @@ -576,29 +670,26 @@ int check_pagetable( pagetable_t pt, cha
   7.556  
   7.557  	sh_check_name = s;
   7.558  
   7.559 -    MEM_VVLOG("%s-PT Audit",s);
   7.560 +    SH_VVLOG("%s-PT Audit",s);
   7.561  
   7.562  	sh_l2_present = sh_l1_present = 0;
   7.563  
   7.564  	gpfn =  gptbase >> PAGE_SHIFT;
   7.565  
   7.566 -	if ( ! (frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
   7.567 +	if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) )
   7.568  	{
   7.569  		printk("%s-PT %08lx not shadowed\n", s, gptbase);
   7.570  
   7.571 -		if( frame_table[gpfn].shadow_and_flags != 0 ) BUG();
   7.572 +		if( __shadow_status(p, gpfn) != 0 ) BUG();
   7.573  
   7.574  		return 0;
   7.575  	}
   7.576  	
   7.577 -    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
   7.578 +    spfn = __shadow_status(p, gpfn) & PSH_pfn_mask;
   7.579  
   7.580 -	if ( ! frame_table[gpfn].shadow_and_flags == (PSH_shadowed | spfn) )
   7.581 +	if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) )
   7.582  		FAILPT("ptbase shadow inconsistent1");
   7.583  
   7.584 -	if ( ! frame_table[spfn].shadow_and_flags == (PSH_shadow | gpfn) )
   7.585 -		FAILPT("ptbase shadow inconsistent2");
   7.586 -
   7.587  	gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
   7.588  	spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
   7.589  
   7.590 @@ -641,7 +732,7 @@ int check_pagetable( pagetable_t pt, cha
   7.591  		unsigned long gpte = l2_pgentry_val(gpl2e[i]);
   7.592  		unsigned long spte = l2_pgentry_val(spl2e[i]);
   7.593  
   7.594 -		check_pte( gpte, spte, 2, i );
   7.595 +		check_pte( p, gpte, spte, 2, i );
   7.596  	}
   7.597  
   7.598  
   7.599 @@ -652,7 +743,7 @@ int check_pagetable( pagetable_t pt, cha
   7.600  		unsigned long spte = l2_pgentry_val(spl2e[i]);
   7.601  
   7.602  		if ( spte )	   
   7.603 -			check_l1_table( 
   7.604 +			check_l1_table( p,
   7.605  				i<<L2_PAGETABLE_SHIFT,
   7.606  				gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
   7.607  
   7.608 @@ -661,7 +752,7 @@ int check_pagetable( pagetable_t pt, cha
   7.609  	unmap_domain_mem( spl2e );
   7.610  	unmap_domain_mem( gpl2e );
   7.611  
   7.612 -	MEM_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
   7.613 +	SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
   7.614  		   sh_l2_present, sh_l1_present );
   7.615  	
   7.616  	return 1;
   7.617 @@ -671,7 +762,6 @@ int check_pagetable( pagetable_t pt, cha
   7.618  #endif
   7.619  
   7.620  
   7.621 -#endif // CONFIG_SHADOW
   7.622  
   7.623  
   7.624  
     8.1 --- a/xen/include/asm-i386/config.h	Sun Mar 21 19:14:29 2004 +0000
     8.2 +++ b/xen/include/asm-i386/config.h	Thu Mar 25 01:50:35 2004 +0000
     8.3 @@ -40,8 +40,6 @@
     8.4  
     8.5  #define CONFIG_XEN_ATTENTION_KEY 1
     8.6  
     8.7 -#define CONFIG_SHADOW 1
     8.8 -
     8.9  
    8.10  #define HZ 100
    8.11  
     9.1 --- a/xen/include/asm-i386/processor.h	Sun Mar 21 19:14:29 2004 +0000
     9.2 +++ b/xen/include/asm-i386/processor.h	Thu Mar 25 01:50:35 2004 +0000
     9.3 @@ -12,6 +12,7 @@
     9.4  #include <asm/cpufeature.h>
     9.5  #include <asm/desc.h>
     9.6  #include <xeno/config.h>
     9.7 +#include <xeno/spinlock.h>
     9.8  #include <hypervisor-ifs/hypervisor-if.h>
     9.9  
    9.10  struct task_struct;
    9.11 @@ -416,10 +417,14 @@ struct mm_struct {
    9.12      l1_pgentry_t *perdomain_pt;
    9.13      pagetable_t  pagetable;
    9.14  
    9.15 -#ifdef CONFIG_SHADOW
    9.16 -    unsigned int shadowmode;  /* flags to control shadow table operation */
    9.17 -    pagetable_t  shadowtable;
    9.18 -#endif
    9.19 +    unsigned int shadow_mode;  /* flags to control shadow table operation */
    9.20 +    pagetable_t  shadow_table;
    9.21 +    spinlock_t shadow_lock;
    9.22 +    struct shadow_status *shadow_ht;
    9.23 +    struct shadow_status *shadow_ht_free;
    9.24 +    struct shadow_status *shadow_ht_extras; // extra allocation units
    9.25 +    unsigned int shadow_page_count;
    9.26 +    unsigned int shadow_max_page_count;
    9.27  
    9.28      /* Current LDT details. */
    9.29      unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
    10.1 --- a/xen/include/xeno/mm.h	Sun Mar 21 19:14:29 2004 +0000
    10.2 +++ b/xen/include/xeno/mm.h	Thu Mar 25 01:50:35 2004 +0000
    10.3 @@ -67,10 +67,6 @@ struct pfn_info
    10.4      unsigned long       type_and_flags;
    10.5      /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
    10.6      unsigned long       tlbflush_timestamp;
    10.7 -#ifdef CONFIG_SHADOW
    10.8 -    /* Shadow page status: top bits flags, bottom bits are a pfn */
    10.9 -    unsigned long       shadow_and_flags;  
   10.10 -#endif
   10.11  };
   10.12  
   10.13   /* The following page types are MUTUALLY EXCLUSIVE. */
    11.1 --- a/xen/include/xeno/shadow.h	Sun Mar 21 19:14:29 2004 +0000
    11.2 +++ b/xen/include/xeno/shadow.h	Thu Mar 25 01:50:35 2004 +0000
    11.3 @@ -3,15 +3,13 @@
    11.4  #ifndef _XENO_SHADOW_H
    11.5  #define _XENO_SHADOW_H
    11.6  
    11.7 -#ifdef CONFIG_SHADOW
    11.8 -
    11.9  #include <xeno/config.h>
   11.10  #include <xeno/types.h>
   11.11  #include <xeno/mm.h>
   11.12 +#include <xeno/perfc.h>
   11.13  
   11.14  /* Shadow PT flag bits in pfn_info */
   11.15  #define PSH_shadowed	(1<<31) /* page has a shadow. PFN points to shadow */
   11.16 -#define PSH_shadow	    (1<<30) /* page is a shadow. PFN points to orig page */
   11.17  #define PSH_pending	    (1<<29) /* page is in the process of being shadowed */
   11.18  #define PSH_pfn_mask	((1<<21)-1)
   11.19  
   11.20 @@ -24,28 +22,323 @@
   11.21  #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
   11.22  #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
   11.23  
   11.24 -extern pagetable_t shadow_mk_pagetable( unsigned long gptbase, unsigned int shadowmode );
   11.25 -extern void unshadow_table( unsigned long gpfn, unsigned int type );
   11.26 -extern unsigned long shadow_l2_table( unsigned long gpfn );
   11.27 +extern pagetable_t shadow_mk_pagetable( struct task_struct *p, 
   11.28 +										unsigned long gptbase);
   11.29  extern int shadow_fault( unsigned long va, long error_code );
   11.30  extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, 
   11.31  										unsigned long *prev_spfn_ptr,
   11.32  										l1_pgentry_t **prev_spl1e_ptr  );
   11.33  extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte );
   11.34 -
   11.35 +extern void unshadow_table( unsigned long gpfn, unsigned int type );
   11.36 +extern int shadow_mode_enable( struct task_struct *p, unsigned int mode );
   11.37  
   11.38  #define SHADOW_DEBUG 0
   11.39 +#define SHADOW_HASH_DEBUG 0
   11.40  #define SHADOW_OPTIMISE 1
   11.41  
   11.42 -#endif // end of CONFIG_SHADOW
   11.43 +struct shadow_status {
   11.44 +    unsigned long pfn;            // gpfn 
   11.45 +    unsigned long spfn_and_flags; // spfn plus flags
   11.46 +    struct shadow_status *next;   // use pull-to-front list.
   11.47 +};
   11.48 +
   11.49 +#define shadow_ht_extra_size         128 /*128*/
   11.50 +#define shadow_ht_buckets            256 /*256*/
   11.51 +
   11.52 +#ifndef NDEBUG
   11.53 +#define SH_LOG(_f, _a...)                             \
   11.54 +  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
   11.55 +         current->domain , __LINE__ , ## _a )
   11.56 +#else
   11.57 +#define SH_LOG(_f, _a...) 
   11.58 +#endif
   11.59  
   11.60  #if SHADOW_DEBUG
   11.61 -extern int check_pagetable( pagetable_t pt, char *s );
   11.62 +#define SH_VLOG(_f, _a...)                             \
   11.63 +  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
   11.64 +         current->domain , __LINE__ , ## _a )
   11.65  #else
   11.66 -#define check_pagetable( pt, s )
   11.67 +#define SH_VLOG(_f, _a...) 
   11.68 +#endif
   11.69 +
   11.70 +#if 0
   11.71 +#define SH_VVLOG(_f, _a...)                             \
   11.72 +  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
   11.73 +         current->domain , __LINE__ , ## _a )
   11.74 +#else
   11.75 +#define SH_VVLOG(_f, _a...) 
   11.76  #endif
   11.77  
   11.78  
   11.79  
   11.80 +#if SHADOW_HASH_DEBUG
   11.81 +static void shadow_audit(struct task_struct *p, int print)
   11.82 +{
   11.83 +	int live=0, free=0, j=0, abs;
   11.84 +	struct shadow_status *a;
   11.85 +	
   11.86 +    for(j=0;j<shadow_ht_buckets;j++)
   11.87 +    {
   11.88 +        a = &p->mm.shadow_ht[j];        
   11.89 +		if(a->pfn) live++;
   11.90 +        while(a->next && live<9999)
   11.91 +		{ 
   11.92 +			live++; 
   11.93 +			if(a->pfn == 0)
   11.94 +			{
   11.95 +				printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
   11.96 +					   live, a->pfn, a->spfn_and_flags, a->next);
   11.97 +				BUG();
   11.98 +			}
   11.99 +			a=a->next; 
  11.100 +		}
  11.101 +		ASSERT(live<9999);
  11.102 +	}
  11.103 +
  11.104 +    a = p->mm.shadow_ht_free;
  11.105 +    while(a) { free++; a=a->next; }
  11.106 +
  11.107 +    if(print) printk("live=%d free=%d\n",live,free);
  11.108 +
  11.109 +	abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live;
  11.110 +	if( abs < -1 || abs > 1 )
  11.111 +	{
  11.112 +		printk("live=%d free=%d l1=%d l2=%d\n",live,free,
  11.113 +			  perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
  11.114 +		BUG();
  11.115 +    }
  11.116 +
  11.117 +}
  11.118 +
  11.119 +#else
  11.120 +#define shadow_audit(p, print)
  11.121 +#endif
  11.122 +
  11.123 +static inline struct shadow_status* hash_bucket( struct task_struct *p,
  11.124 +												 unsigned int gpfn )
  11.125 +{
  11.126 +    return &(p->mm.shadow_ht[gpfn % shadow_ht_buckets]);
  11.127 +}
  11.128 +
  11.129 +
  11.130 +static inline unsigned long __shadow_status( struct task_struct *p,
  11.131 +										   unsigned int gpfn )
  11.132 +{
  11.133 +	struct shadow_status **ob, *b, *B = hash_bucket( p, gpfn );
  11.134 +
  11.135 +    b = B;
  11.136 +    ob = NULL;
  11.137 +
  11.138 +	SH_VVLOG("lookup gpfn=%08lx bucket=%08lx", gpfn, b );
  11.139 +	shadow_audit(p,0);  // if in debug mode
  11.140 +
  11.141 +	do
  11.142 +	{
  11.143 +		if ( b->pfn == gpfn )
  11.144 +		{
  11.145 +			unsigned long t;
  11.146 +			struct shadow_status *x;
  11.147 +
  11.148 +			// swap with head
  11.149 +			t=B->pfn; B->pfn=b->pfn; b->pfn=t;
  11.150 +			t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags; 
  11.151 +			    b->spfn_and_flags=t;
  11.152 +
  11.153 +			if(ob)
  11.154 +			{   // pull to front
  11.155 +				*ob=b->next;
  11.156 +				x=B->next;
  11.157 +				B->next=b;	
  11.158 +				b->next=x;
  11.159 +			}			
  11.160 +			return B->spfn_and_flags;
  11.161 +		}
  11.162 +#if SHADOW_HASH_DEBUG
  11.163 +		else
  11.164 +		{
  11.165 +			if(b!=B)ASSERT(b->pfn);
  11.166 +		}
  11.167 +#endif
  11.168 +		ob=&b->next;
  11.169 +		b=b->next;
  11.170 +	}
  11.171 +	while (b);
  11.172 +
  11.173 +	return 0;
  11.174 +}
  11.175 +
  11.176 +/* we can make this locking more fine grained e.g. per shadow page if it 
  11.177 +ever becomes a problem, but since we need a spin lock on the hash table 
  11.178 +anyway its probably not worth being too clever. */
  11.179 +
  11.180 +static inline unsigned long get_shadow_status( struct task_struct *p,
  11.181 +										   unsigned int gpfn )
  11.182 +{
  11.183 +	unsigned long res;
  11.184 +
  11.185 +	spin_lock(&p->mm.shadow_lock);
  11.186 +	res = __shadow_status( p, gpfn );
  11.187 +	if (!res) spin_unlock(&p->mm.shadow_lock);
  11.188 +	return res;
  11.189 +}
  11.190 +
  11.191 +
  11.192 +static inline void put_shadow_status( struct task_struct *p )
  11.193 +{
  11.194 +	spin_unlock(&p->mm.shadow_lock);
  11.195 +}
  11.196 +
  11.197 +
  11.198 +static inline void delete_shadow_status( struct task_struct *p,
  11.199 +									  unsigned int gpfn )
  11.200 +{
  11.201 +	struct shadow_status *b, *B, **ob;
  11.202 +
  11.203 +	B = b = hash_bucket( p, gpfn );
  11.204 +
  11.205 +	SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b );
  11.206 +	shadow_audit(p,0);
  11.207 +	ASSERT(gpfn);
  11.208 +
  11.209 +	if( b->pfn == gpfn )
  11.210 +    {
  11.211 +		if (b->next)
  11.212 +		{
  11.213 +			struct shadow_status *D=b->next;
  11.214 +			b->spfn_and_flags = b->next->spfn_and_flags;
  11.215 +			b->pfn = b->next->pfn;
  11.216 +
  11.217 +			b->next = b->next->next;
  11.218 +			D->next = p->mm.shadow_ht_free;
  11.219 +			p->mm.shadow_ht_free = D;
  11.220 +		}
  11.221 +		else
  11.222 +		{
  11.223 +			b->pfn = 0;
  11.224 +			b->spfn_and_flags = 0;
  11.225 +		}
  11.226 +
  11.227 +#if SHADOW_HASH_DEBUG
  11.228 +		if( __shadow_status(p,gpfn) ) BUG();  
  11.229 +#endif
  11.230 +		return;
  11.231 +    }
  11.232 +
  11.233 +	ob = &b->next;
  11.234 +	b=b->next;
  11.235 +
  11.236 +	do
  11.237 +	{
  11.238 +		if ( b->pfn == gpfn )			
  11.239 +		{
  11.240 +			b->pfn = 0;
  11.241 +			b->spfn_and_flags = 0;
  11.242 +
  11.243 +			// b is in the list
  11.244 +            *ob=b->next;
  11.245 +			b->next = p->mm.shadow_ht_free;
  11.246 +			p->mm.shadow_ht_free = b;
  11.247 +
  11.248 +#if SHADOW_HASH_DEBUG
  11.249 +			if( __shadow_status(p,gpfn) ) BUG();
  11.250 +#endif
  11.251 +			return;
  11.252 +		}
  11.253 +
  11.254 +		ob = &b->next;
  11.255 +		b=b->next;
  11.256 +	}
  11.257 +	while (b);
  11.258 +
  11.259 +	// if we got here, it wasn't in the list
  11.260 +    BUG();
  11.261 +}
  11.262 +
  11.263 +
  11.264 +static inline void set_shadow_status( struct task_struct *p,
  11.265 +									  unsigned int gpfn, unsigned long s )
  11.266 +{
  11.267 +	struct shadow_status *b, *B, *extra, **fptr;
  11.268 +    int i;
  11.269 +
  11.270 +	B = b = hash_bucket( p, gpfn );
  11.271 +   
  11.272 +    ASSERT(gpfn);
  11.273 +    ASSERT(s);
  11.274 +    SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next );
  11.275 +    shadow_audit(p,0);
  11.276 +
  11.277 +	do
  11.278 +	{
  11.279 +		if ( b->pfn == gpfn )			
  11.280 +		{
  11.281 +			b->spfn_and_flags = s;
  11.282 +			return;
  11.283 +		}
  11.284 +
  11.285 +		b=b->next;
  11.286 +	}
  11.287 +	while (b);
  11.288 +
  11.289 +	// if we got here, this is an insert rather than update
  11.290 +
  11.291 +    ASSERT( s );  // deletes must have succeeded by here
  11.292 +
  11.293 +    if ( B->pfn == 0 )
  11.294 +	{
  11.295 +		// we can use this head
  11.296 +        ASSERT( B->next == 0 );
  11.297 +		B->pfn = gpfn;
  11.298 +		B->spfn_and_flags = s;
  11.299 +		return;
  11.300 +	}
  11.301 +
  11.302 +    if( unlikely(p->mm.shadow_ht_free == NULL) )
  11.303 +    {
  11.304 +        SH_LOG("allocate more shadow hashtable blocks");
  11.305 +
  11.306 +        // we need to allocate more space
  11.307 +        extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size * 
  11.308 +							   sizeof(struct shadow_status)), GFP_KERNEL );
  11.309 +
  11.310 +	    if( ! extra ) BUG(); // should be more graceful here....
  11.311 +
  11.312 +	    memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size * 
  11.313 +							   sizeof(struct shadow_status)) );
  11.314 +	
  11.315 +        // add extras to free list
  11.316 +	    fptr = &p->mm.shadow_ht_free;
  11.317 +	    for ( i=0; i<shadow_ht_extra_size; i++ )
  11.318 + 	    {
  11.319 +		    *fptr = &extra[i];
  11.320 +		    fptr = &(extra[i].next);
  11.321 +	    }
  11.322 +	    *fptr = NULL;
  11.323 +
  11.324 +	    *((struct shadow_status ** ) &p->mm.shadow_ht[shadow_ht_extra_size]) = 
  11.325 +                                            p->mm.shadow_ht_extras;
  11.326 +        p->mm.shadow_ht_extras = extra;
  11.327 +
  11.328 +    }
  11.329 +
  11.330 +	// should really put this in B to go right to front
  11.331 +	b = p->mm.shadow_ht_free;
  11.332 +    p->mm.shadow_ht_free = b->next;
  11.333 +    b->spfn_and_flags = s;
  11.334 +	b->pfn = gpfn;
  11.335 +	b->next = B->next;
  11.336 +	B->next = b;
  11.337 +
  11.338 +	return;
  11.339 +}
  11.340 +
  11.341 +
  11.342 +
  11.343 +#if SHADOW_DEBUG
  11.344 +extern int check_pagetable( struct task_struct *p, pagetable_t pt, char *s );
  11.345 +#else
  11.346 +#define check_pagetable( p, pt, s )
  11.347 +#endif
  11.348 +
  11.349  
  11.350  #endif
    12.1 --- a/xen/net/dev.c	Sun Mar 21 19:14:29 2004 +0000
    12.2 +++ b/xen/net/dev.c	Thu Mar 25 01:50:35 2004 +0000
    12.3 @@ -494,6 +494,7 @@ void deliver_packet(struct sk_buff *skb,
    12.4      unsigned short size;
    12.5      unsigned char  offset, status = RING_STATUS_OK;
    12.6      struct task_struct *p = vif->domain;
    12.7 +    unsigned long spte_pfn;
    12.8  
    12.9      memcpy(skb->mac.ethernet->h_dest, vif->vmac, ETH_ALEN);
   12.10      if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
   12.11 @@ -546,21 +547,18 @@ void deliver_packet(struct sk_buff *skb,
   12.12          goto out;
   12.13      }
   12.14  
   12.15 -
   12.16 -#ifdef CONFIG_SHADOW
   12.17 -    if ( pte_page->shadow_and_flags & PSH_shadowed )
   12.18 +    if ( p->mm.shadow_mode && 
   12.19 +	 (spte_pfn=get_shadow_status(p, pte_page-frame_table)) )
   12.20      {
   12.21 -        unsigned long spte_pfn = pte_page->shadow_and_flags & PSH_pfn_mask;
   12.22  	unsigned long *sptr = map_domain_mem( (spte_pfn<<PAGE_SHIFT) |
   12.23  			(((unsigned long)ptep)&~PAGE_MASK) );
   12.24  
   12.25 -        // save the fault later
   12.26 +        // avoid the fault later
   12.27  	*sptr = new_pte;
   12.28  
   12.29 -	unmap_domain_mem( sptr );
   12.30 +	unmap_domain_mem(sptr);
   12.31 +	put_shadow_status(p);
   12.32      }
   12.33 -#endif
   12.34 -
   12.35  
   12.36      machine_to_phys_mapping[new_page - frame_table] 
   12.37          = machine_to_phys_mapping[old_page - frame_table];
   12.38 @@ -2068,7 +2066,7 @@ static void get_rx_bufs(net_vif_t *vif)
   12.39      rx_shadow_entry_t *srx;
   12.40      unsigned long  pte_pfn, buf_pfn;
   12.41      struct pfn_info *pte_page, *buf_page;
   12.42 -    unsigned long *ptep, pte;
   12.43 +    unsigned long *ptep, pte, spfn;
   12.44  
   12.45      spin_lock(&vif->rx_lock);
   12.46  
   12.47 @@ -2114,21 +2112,16 @@ static void get_rx_bufs(net_vif_t *vif)
   12.48              goto rx_unmap_and_continue;
   12.49          }
   12.50  
   12.51 -#ifdef CONFIG_SHADOW
   12.52 -	{
   12.53 -	    if ( frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_shadowed )
   12.54 -	      {
   12.55 -		unsigned long spfn = 
   12.56 -		  frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_pfn_mask;
   12.57 -		unsigned long * sptr = map_domain_mem( (spfn<<PAGE_SHIFT) | (rx.addr&~PAGE_MASK) );
   12.58 +	if ( p->mm.shadow_mode && 
   12.59 +	     (spfn=get_shadow_status(p, rx.addr>>PAGE_SHIFT)) )
   12.60 +	  {
   12.61 +	    unsigned long * sptr = 
   12.62 +	      map_domain_mem( (spfn<<PAGE_SHIFT) | (rx.addr&~PAGE_MASK) );
   12.63  
   12.64 -		*sptr = 0;
   12.65 -		unmap_domain_mem( sptr );
   12.66 -
   12.67 -	      }
   12.68 -
   12.69 -	}
   12.70 -#endif
   12.71 +	    *sptr = 0;
   12.72 +	    unmap_domain_mem( sptr );
   12.73 +	    put_shadow_status(p);
   12.74 +	  }
   12.75          
   12.76          buf_pfn  = pte >> PAGE_SHIFT;
   12.77          buf_page = &frame_table[buf_pfn];