direct-io.hg

changeset 1417:8ae534da09e8

bitkeeper revision 1.921 (40b23b32vMbvKCdgtCukVpQP62ZvYA)

Add more stats to migration code, scan pages in psuedo random permutation,
misc cleanups.
author iap10@labyrinth.cl.cam.ac.uk
date Mon May 24 18:13:06 2004 +0000 (2004-05-24)
parents 0681a064d114
children e8a6046f7535 16c2273aaf2c
files tools/xc/lib/xc.h tools/xc/lib/xc_domain.c tools/xc/lib/xc_linux_restore.c tools/xc/lib/xc_linux_save.c tools/xc/lib/xc_private.c tools/xc/py/Xc.c xen/common/dom0_ops.c xen/common/shadow.c xen/include/asm-i386/processor.h xen/include/hypervisor-ifs/dom0_ops.h xen/include/xen/shadow.h xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c
line diff
     1.1 --- a/tools/xc/lib/xc.h	Fri May 21 10:57:45 2004 +0000
     1.2 +++ b/tools/xc/lib/xc.h	Mon May 24 18:13:06 2004 +0000
     1.3 @@ -59,7 +59,9 @@ int xc_shadow_control(int xc_handle,
     1.4                        u64 domid, 
     1.5                        unsigned int sop,
     1.6  		      unsigned long *dirty_bitmap,
     1.7 -		      unsigned long pages);
     1.8 +		      unsigned long pages,
     1.9 +		      unsigned long *fault_count,
    1.10 +		      unsigned long *dirty_count);
    1.11  
    1.12  
    1.13  #define XCFLAGS_VERBOSE 1
     2.1 --- a/tools/xc/lib/xc_domain.c	Fri May 21 10:57:45 2004 +0000
     2.2 +++ b/tools/xc/lib/xc_domain.c	Mon May 24 18:13:06 2004 +0000
     2.3 @@ -112,7 +112,9 @@ int xc_shadow_control(int xc_handle,
     2.4                        u64 domid, 
     2.5                        unsigned int sop,
     2.6  		      unsigned long *dirty_bitmap,
     2.7 -		      unsigned long pages)
     2.8 +		      unsigned long pages,
     2.9 +		      unsigned long *fault_count,
    2.10 +		      unsigned long *dirty_count)
    2.11  {
    2.12      int rc;
    2.13      dom0_op_t op;
    2.14 @@ -124,6 +126,9 @@ int xc_shadow_control(int xc_handle,
    2.15  
    2.16      rc = do_dom0_op(xc_handle, &op);
    2.17  
    2.18 +    if(fault_count) *fault_count = op.u.shadow_control.fault_count;
    2.19 +    if(dirty_count) *dirty_count = op.u.shadow_control.dirty_count;
    2.20 +
    2.21      if ( rc == 0 )
    2.22  	return op.u.shadow_control.pages;
    2.23      else
     3.1 --- a/tools/xc/lib/xc_linux_restore.c	Fri May 21 10:57:45 2004 +0000
     3.2 +++ b/tools/xc/lib/xc_linux_restore.c	Mon May 24 18:13:06 2004 +0000
     3.3 @@ -88,6 +88,9 @@ int xc_linux_restore(int xc_handle,
     3.4      /* A table containg the type of each PFN (/not/ MFN!). */
     3.5      unsigned long *pfn_type = NULL;
     3.6  
     3.7 +    /* A table of MFNs to map in the current region */
     3.8 +    unsigned long *region_mfn = NULL;
     3.9 +
    3.10      /* A temporary mapping, and a copy, of one frame of guest memory. */
    3.11      unsigned long *ppage;
    3.12  
    3.13 @@ -97,10 +100,12 @@ int xc_linux_restore(int xc_handle,
    3.14      /* A table mapping each PFN to its new MFN. */
    3.15      unsigned long *pfn_to_mfn_table = NULL;
    3.16  
    3.17 +    /* used by mapper for updating the domain's copy of the table */
    3.18 +    unsigned long *live_pfn_to_mfn_table = NULL;
    3.19 +
    3.20      /* A temporary mapping of the guest's suspend record. */
    3.21      suspend_record_t *p_srec;
    3.22  
    3.23 -    mfn_mapper_t *region_mapper, *mapper_handle1;
    3.24      char *region_base;
    3.25  
    3.26      mmu_t *mmu = NULL;
    3.27 @@ -154,12 +159,20 @@ int xc_linux_restore(int xc_handle,
    3.28      /* We want zeroed memory so use calloc rather than malloc. */
    3.29      pfn_to_mfn_table = calloc(1, 4 * nr_pfns);
    3.30      pfn_type         = calloc(1, 4 * nr_pfns);    
    3.31 +    region_mfn       = calloc(1, 4 * MAX_BATCH_SIZE);    
    3.32  
    3.33 -    if ( (pfn_to_mfn_table == NULL) || (pfn_type == NULL) )
    3.34 +    if ( (pfn_to_mfn_table == NULL) || (pfn_type == NULL) || 
    3.35 +	 (region_mfn == NULL) )
    3.36      {
    3.37          errno = ENOMEM;
    3.38          goto out;
    3.39      }
    3.40 +    
    3.41 +    if ( mlock(region_mfn, 4 * MAX_BATCH_SIZE ) )
    3.42 +    {
    3.43 +        ERROR("Could not mlock region_mfn");
    3.44 +        goto out;
    3.45 +    }
    3.46  
    3.47      /* Set the domain's name to that from the restore file */
    3.48      if ( xc_domain_setname( xc_handle, dom, name ) )
    3.49 @@ -206,15 +219,6 @@ int xc_linux_restore(int xc_handle,
    3.50          goto out;
    3.51      }
    3.52  
    3.53 -
    3.54 -    if ( (region_mapper = mfn_mapper_init(xc_handle, dom,
    3.55 -					  MAX_BATCH_SIZE*PAGE_SIZE, 
    3.56 -					  PROT_WRITE )) 
    3.57 -	 == NULL )
    3.58 -        goto out;
    3.59 -
    3.60 -    region_base = mfn_mapper_base( region_mapper );
    3.61 -
    3.62      verbose_printf("Reloading memory pages:   0%%");
    3.63  
    3.64      /*
    3.65 @@ -227,7 +231,7 @@ int xc_linux_restore(int xc_handle,
    3.66      while(1)
    3.67      {
    3.68  	int j;
    3.69 -	unsigned long region_pfn_type[1024];
    3.70 +	unsigned long region_pfn_type[MAX_BATCH_SIZE];
    3.71  
    3.72          this_pc = (n * 100) / nr_pfns;
    3.73          if ( (this_pc - prev_pc) >= 5 )
    3.74 @@ -270,30 +274,31 @@ int xc_linux_restore(int xc_handle,
    3.75  
    3.76  	for(i=0;i<j;i++)
    3.77  	{
    3.78 -            if ((region_pfn_type[i]>>29) == 7)
    3.79 -		continue;
    3.80 -
    3.81 -	    pfn = region_pfn_type[i] & ~PGT_type_mask;
    3.82 -	    mfn = pfn_to_mfn_table[pfn];
    3.83 -	    
    3.84 -	    mfn_mapper_queue_entry( region_mapper, i<<PAGE_SHIFT, 
    3.85 -				    mfn, PAGE_SIZE );
    3.86 +            if ( (region_pfn_type[i] & LTAB_MASK) == XTAB)
    3.87 +		region_mfn[i] = 0; // we know map will fail, but don't care
    3.88 +	    else
    3.89 +	    {		
    3.90 +		pfn = region_pfn_type[i] & ~LTAB_MASK;
    3.91 +		region_mfn[i] = pfn_to_mfn_table[pfn];
    3.92 +	    }	    	    
    3.93  	}
    3.94 -
    3.95 -	if( mfn_mapper_flush_queue(region_mapper) )
    3.96 +	
    3.97 +	if ( (region_base = mfn_mapper_map_batch( xc_handle, dom, 
    3.98 +						  PROT_WRITE,
    3.99 +						  region_mfn,
   3.100 +						  j )) == 0)
   3.101  	{
   3.102 -	    ERROR("Couldn't map page region");
   3.103 +	    PERROR("map batch failed");
   3.104  	    goto out;
   3.105  	}
   3.106  
   3.107 -
   3.108  	for(i=0;i<j;i++)
   3.109  	{
   3.110  	    unsigned long *ppage;
   3.111  
   3.112 -	    pfn = region_pfn_type[i] & ~PGT_type_mask;
   3.113 +	    pfn = region_pfn_type[i] & ~LTAB_MASK;
   3.114  
   3.115 -            if ((region_pfn_type[i]>>29) == 7)
   3.116 +            if ( (region_pfn_type[i] & LTAB_MASK) == XTAB)
   3.117  		continue;
   3.118  
   3.119              if (pfn>nr_pfns)
   3.120 @@ -302,7 +307,7 @@ int xc_linux_restore(int xc_handle,
   3.121  		goto out;
   3.122  	    }
   3.123  
   3.124 -	    region_pfn_type[i] &= PGT_type_mask;
   3.125 +	    region_pfn_type[i] &= LTAB_MASK;
   3.126  
   3.127  	    pfn_type[pfn] = region_pfn_type[i];
   3.128  
   3.129 @@ -334,7 +339,7 @@ int xc_linux_restore(int xc_handle,
   3.130  
   3.131  			if ( xpfn >= nr_pfns )
   3.132  			{
   3.133 -			    ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=0x%x nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
   3.134 +			    ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=0x%x nr_pfns=%d",region_pfn_type[i]>>28,i,k,xpfn,nr_pfns);
   3.135  			    goto out;
   3.136  			}
   3.137  
   3.138 @@ -355,17 +360,11 @@ int xc_linux_restore(int xc_handle,
   3.139  
   3.140  			if ( xpfn >= nr_pfns )
   3.141  			{
   3.142 -			    ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
   3.143 +			    ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>28,i,k,xpfn,nr_pfns);
   3.144  
   3.145  			    goto out;
   3.146  			}
   3.147 -#if 0
   3.148 -			if ( region_pfn_type[pfn] != L1TAB )
   3.149 -			{
   3.150 -			    ERROR("Page table mistyping");
   3.151 -			    goto out;
   3.152 -			}
   3.153 -#endif
   3.154 +
   3.155  			ppage[k] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PSE);
   3.156  			ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT;
   3.157  		    }
   3.158 @@ -399,17 +398,21 @@ int xc_linux_restore(int xc_handle,
   3.159  
   3.160  	    if ( add_mmu_update(xc_handle, mmu,
   3.161  				(mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) )
   3.162 +	    {
   3.163 +		printf("machpys mfn=%ld pfn=%ld\n",mfn,pfn);
   3.164  		goto out;
   3.165 +	    }
   3.166  
   3.167  	} // end of 'batch' for loop
   3.168  
   3.169 +	munmap( region_base, j*PAGE_SIZE );
   3.170  	n+=j; // crude stats
   3.171  
   3.172      }
   3.173  
   3.174 -    DPRINTF("Received all pages\n");
   3.175 +    printf("Received all pages\n");
   3.176  
   3.177 -    mfn_mapper_close( region_mapper );
   3.178 +    DPRINTF("Received all pages\n");
   3.179  
   3.180      /*
   3.181       * Pin page tables. Do this after writing to them as otherwise Xen
   3.182 @@ -424,7 +427,8 @@ int xc_linux_restore(int xc_handle,
   3.183                                  MMU_EXTENDED_COMMAND,
   3.184                                  MMUEXT_PIN_L1_TABLE) )
   3.185  	    {
   3.186 -		printf("ERR pin L1 pfn=%lx mfn=%lx\n");
   3.187 +		printf("ERR pin L1 pfn=%lx mfn=%lx\n",
   3.188 +		       i, pfn_to_mfn_table[i]);
   3.189                  goto out;
   3.190  	    }
   3.191          }
   3.192 @@ -435,7 +439,8 @@ int xc_linux_restore(int xc_handle,
   3.193                                  MMU_EXTENDED_COMMAND,
   3.194                                  MMUEXT_PIN_L2_TABLE) )
   3.195  	    {
   3.196 -		printf("ERR pin L2 pfn=%lx mfn=%lx\n");
   3.197 +		printf("ERR pin L2 pfn=%lx mfn=%lx\n",
   3.198 +		       i, pfn_to_mfn_table[i]);
   3.199                  goto out;
   3.200  	    }
   3.201          }
   3.202 @@ -456,7 +461,7 @@ int xc_linux_restore(int xc_handle,
   3.203  
   3.204      /* Uncanonicalise the suspend-record frame number and poke resume rec. */
   3.205      pfn = ctxt.cpu_ctxt.esi;
   3.206 -    if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
   3.207 +    if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
   3.208      {
   3.209          ERROR("Suspend record frame number is bad");
   3.210          goto out;
   3.211 @@ -477,7 +482,7 @@ int xc_linux_restore(int xc_handle,
   3.212      for ( i = 0; i < ctxt.gdt_ents; i += 512 )
   3.213      {
   3.214          pfn = ctxt.gdt_frames[i];
   3.215 -        if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
   3.216 +        if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
   3.217          {
   3.218              ERROR("GDT frame number is bad");
   3.219              goto out;
   3.220 @@ -509,37 +514,33 @@ int xc_linux_restore(int xc_handle,
   3.221  
   3.222  
   3.223      /* Uncanonicalise the pfn-to-mfn table frame-number list. */
   3.224 -    if ( (mapper_handle1 = mfn_mapper_init(xc_handle, dom,
   3.225 -					   1024*1024, PROT_WRITE )) 
   3.226 -	 == NULL )
   3.227 -        goto out;
   3.228 -	
   3.229      for ( i = 0; i < (nr_pfns+1023)/1024; i++ )
   3.230      {
   3.231  	unsigned long pfn, mfn;
   3.232  
   3.233          pfn = pfn_to_mfn_frame_list[i];
   3.234 -        if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
   3.235 +        if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NOTAB) )
   3.236          {
   3.237              ERROR("PFN-to-MFN frame number is bad");
   3.238              goto out;
   3.239          }
   3.240  	mfn = pfn_to_mfn_table[pfn];
   3.241 -
   3.242 -	mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT, 
   3.243 -				mfn, PAGE_SIZE );
   3.244 +	pfn_to_mfn_frame_list[i] = mfn;
   3.245      }
   3.246      
   3.247 -    if ( mfn_mapper_flush_queue(mapper_handle1) )
   3.248 +    if ( (live_pfn_to_mfn_table = mfn_mapper_map_batch( xc_handle, dom, 
   3.249 +				  PROT_WRITE,
   3.250 +				  pfn_to_mfn_frame_list,
   3.251 +				  (nr_pfns+1023)/1024 )) == 0 )
   3.252      {
   3.253          ERROR("Couldn't map pfn_to_mfn table");
   3.254          goto out;
   3.255      }
   3.256  
   3.257 -    memcpy( mfn_mapper_base( mapper_handle1 ), pfn_to_mfn_table, 
   3.258 +    memcpy( live_pfn_to_mfn_table, pfn_to_mfn_table, 
   3.259  	    nr_pfns*sizeof(unsigned long) );
   3.260  
   3.261 -    mfn_mapper_close( mapper_handle1 );
   3.262 +    munmap( live_pfn_to_mfn_table, ((nr_pfns+1023)/1024)*PAGE_SIZE );
   3.263  
   3.264      /*
   3.265       * Safety checking of saved context:
     4.1 --- a/tools/xc/lib/xc_linux_save.c	Fri May 21 10:57:45 2004 +0000
     4.2 +++ b/tools/xc/lib/xc_linux_save.c	Mon May 24 18:13:06 2004 +0000
     4.3 @@ -64,36 +64,94 @@
     4.4  
     4.5  
     4.6  /* test_bit */
     4.7 -inline int test_bit ( int nr, volatile void * addr)
     4.8 +static inline int test_bit ( int nr, volatile void * addr)
     4.9  {
    4.10      return ( ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >> 
    4.11  	     (nr % (sizeof(unsigned long)*8) ) ) & 1;
    4.12  }
    4.13  
    4.14 -inline void clear_bit ( int nr, volatile void * addr)
    4.15 +static inline void clear_bit ( int nr, volatile void * addr)
    4.16  {
    4.17      ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &= 
    4.18  	~(1 << (nr % (sizeof(unsigned long)*8) ) );
    4.19  }
    4.20  
    4.21 -inline void set_bit ( int nr, volatile void * addr)
    4.22 +static inline void set_bit ( int nr, volatile void * addr)
    4.23  {
    4.24      ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |= 
    4.25  	(1 << (nr % (sizeof(unsigned long)*8) ) );
    4.26  }
    4.27 +/*
    4.28 + * hweightN: returns the hamming weight (i.e. the number
    4.29 + * of bits set) of a N-bit word
    4.30 + */
    4.31  
    4.32 -long long tv_to_us( struct timeval *new )
    4.33 +static inline unsigned int hweight32(unsigned int w)
    4.34 +{
    4.35 +        unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
    4.36 +        res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
    4.37 +        res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
    4.38 +        res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
    4.39 +        return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
    4.40 +}
    4.41 +
    4.42 +static inline int count_bits ( int nr, volatile void *addr)
    4.43 +{
    4.44 +    int i, count = 0;
    4.45 +    unsigned long *p = (unsigned long *)addr;
    4.46 +    // we know the array is padded to unsigned long
    4.47 +    for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++)
    4.48 +	count += hweight32( *p );
    4.49 +    return count;
    4.50 +}
    4.51 +
    4.52 +static inline int permute( int i, int nr, int order_nr  )
    4.53 +{
    4.54 +    /* Need a simple permutation function so that we scan pages in a
    4.55 +       pseudo random order, enabling us to get a better estimate of
    4.56 +       the domain's page dirtying rate as we go (there are often 
    4.57 +       contiguous ranges of pfns that have similar behaviour, and we
    4.58 +       want to mix them up. */
    4.59 +
    4.60 +    /* e.g. nr->oder 15->4 16->4 17->5 */
    4.61 +    /* 512MB domain, 128k pages, order 17 */
    4.62 +
    4.63 +    /*
    4.64 +      QPONMLKJIHGFEDCBA  
    4.65 +             QPONMLKJIH  
    4.66 +      GFEDCBA  
    4.67 +     */
    4.68 +    
    4.69 +    /*
    4.70 +      QPONMLKJIHGFEDCBA  
    4.71 +                  EDCBA  
    4.72 +             QPONM
    4.73 +      LKJIHGF
    4.74 +      */
    4.75 +
    4.76 +    do
    4.77 +    {
    4.78 +	i = ( ( i>>(order_nr-10))  | ( i<<10 ) ) &
    4.79 +	    ((1<<order_nr)-1);
    4.80 +    }
    4.81 +    while ( i >= nr ); // this won't ever loop if nr is a power of 2
    4.82 +
    4.83 +    return i;
    4.84 +}
    4.85 +
    4.86 +static long long tv_to_us( struct timeval *new )
    4.87  {
    4.88      return (new->tv_sec * 1000000) + new->tv_usec;
    4.89  }
    4.90  
    4.91 -long long tvdelta( struct timeval *new, struct timeval *old )
    4.92 +static long long tvdelta( struct timeval *new, struct timeval *old )
    4.93  {
    4.94      return ((new->tv_sec - old->tv_sec)*1000000 ) + 
    4.95  	(new->tv_usec - old->tv_usec);
    4.96  }
    4.97  
    4.98 -int track_cpu_usage( int xc_handle, u64 domid, int pages, int print )
    4.99 +static int track_cpu_usage( int xc_handle, u64 domid, int faults,
   4.100 +			    int pages_sent, int pages_dirtied, int print )
   4.101  {
   4.102      static struct timeval wall_last;
   4.103      static long long      d0_cpu_last;
   4.104 @@ -123,11 +181,13 @@ int track_cpu_usage( int xc_handle, u64 
   4.105      d1_cpu_delta  = (d1_cpu_now - d1_cpu_last)/1000;
   4.106  
   4.107      if(print)
   4.108 -	printf("interval %lldms, dom0 used %lldms (%d%%), target used %lldms (%d%%), b/w %dMb/s\n",
   4.109 +	printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, dirtied %dMb/s\n",
   4.110  	       wall_delta, 
   4.111 -	       d0_cpu_delta, (int)((d0_cpu_delta*100)/wall_delta),
   4.112 -	       d1_cpu_delta, (int)((d1_cpu_delta*100)/wall_delta),
   4.113 -	       (int)((pages*PAGE_SIZE*8)/(wall_delta*1000)));
   4.114 +	       (int)((d0_cpu_delta*100)/wall_delta),
   4.115 +	       (int)((d1_cpu_delta*100)/wall_delta),
   4.116 +	       (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)),
   4.117 +	       (int)((pages_dirtied*PAGE_SIZE*8)/(wall_delta*1000))
   4.118 +	    );
   4.119  
   4.120      d0_cpu_last  = d0_cpu_now;
   4.121      d1_cpu_last  = d1_cpu_now;
   4.122 @@ -144,13 +204,14 @@ int xc_linux_save(int xc_handle,
   4.123  		  void *writerst )
   4.124  {
   4.125      dom0_op_t op;
   4.126 -    int rc = 1, i, j, k, n, last_iter, iter = 0;
   4.127 +    int rc = 1, i, j, k, last_iter, iter = 0;
   4.128      unsigned long mfn;
   4.129      int verbose = flags & XCFLAGS_VERBOSE;
   4.130      int live = flags & XCFLAGS_LIVE;
   4.131      int debug = flags & XCFLAGS_DEBUG;
   4.132      int sent_last_iter, sent_this_iter, skip_this_iter;
   4.133 -    
   4.134 +    unsigned long dirtied_this_iter, faults_this_iter;
   4.135 +
   4.136      /* Important tuning parameters */
   4.137      int max_iters  = 29; // limit us to 30 times round loop
   4.138      int max_factor = 3;  // never send more than 3x nr_pfns 
   4.139 @@ -192,6 +253,9 @@ int xc_linux_save(int xc_handle,
   4.140      /* number of pages we're dealing with */
   4.141      unsigned long nr_pfns;
   4.142  
   4.143 +    /* power of 2 order of nr_pfns */
   4.144 +    int order_nr; 
   4.145 +
   4.146      /* bitmap of pages:
   4.147         - that should be sent this iteration (unless later marked as skip); 
   4.148         - to skip this iteration because already dirty;
   4.149 @@ -310,7 +374,7 @@ int xc_linux_save(int xc_handle,
   4.150      { 
   4.151  	if ( xc_shadow_control( xc_handle, domid, 
   4.152  			   DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
   4.153 -			   NULL, 0 ) < 0 )
   4.154 +			   NULL, 0, NULL, NULL ) < 0 )
   4.155  	{
   4.156  	    ERROR("Couldn't enable shadow mode");
   4.157  	    goto out;
   4.158 @@ -361,6 +425,11 @@ int xc_linux_save(int xc_handle,
   4.159  
   4.160      }
   4.161  
   4.162 +    /* calculate the power of 2 order of nr_pfns, e.g.
   4.163 +     15->4 16->4 17->5 */
   4.164 +    for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
   4.165 +
   4.166 +printf("nr_pfns=%d order_nr=%d\n",nr_pfns, order_nr);
   4.167  
   4.168      /* We want zeroed memory so use calloc rather than malloc. */
   4.169      pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
   4.170 @@ -415,25 +484,26 @@ int xc_linux_save(int xc_handle,
   4.171          goto out;
   4.172      }
   4.173  
   4.174 -    track_cpu_usage( xc_handle, domid, 0, 0);
   4.175 +    track_cpu_usage( xc_handle, domid, 0, 0, 0, 0 );
   4.176  
   4.177      /* Now write out each data page, canonicalising page tables as we go... */
   4.178      
   4.179      while(1)
   4.180      {
   4.181 -	unsigned int prev_pc, batch, sent_this_iter;
   4.182 +	unsigned int prev_pc, sent_this_iter, N, batch;
   4.183  
   4.184  	iter++;
   4.185 -
   4.186  	sent_this_iter = 0;
   4.187  	skip_this_iter = 0;
   4.188  	prev_pc = 0;
   4.189 +	N=0;
   4.190 +
   4.191  	verbose_printf("Saving memory pages: iter %d   0%%", iter);
   4.192  
   4.193 -	n=0;
   4.194 -	while( n < nr_pfns )
   4.195 +	while( N < nr_pfns )
   4.196  	{
   4.197 -	    unsigned int this_pc = (n * 100) / nr_pfns;
   4.198 +	    unsigned int this_pc = (N * 100) / nr_pfns;
   4.199 +
   4.200  	    if ( (this_pc - prev_pc) >= 5 )
   4.201  	    {
   4.202  		verbose_printf("\b\b\b\b%3d%%", this_pc);
   4.203 @@ -444,9 +514,9 @@ int xc_linux_save(int xc_handle,
   4.204  	       but this is fast enough for the moment. */
   4.205  
   4.206  	    if ( !last_iter && 
   4.207 -		 xc_shadow_control( xc_handle, domid, 
   4.208 -				    DOM0_SHADOW_CONTROL_OP_PEEK,
   4.209 -				    to_skip, nr_pfns ) != nr_pfns ) 
   4.210 +		 xc_shadow_control(xc_handle, domid, 
   4.211 +				   DOM0_SHADOW_CONTROL_OP_PEEK,
   4.212 +				   to_skip, nr_pfns, NULL, NULL) != nr_pfns ) 
   4.213  	    {
   4.214  		ERROR("Error peeking shadow bitmap");
   4.215  		goto out;
   4.216 @@ -456,8 +526,9 @@ int xc_linux_save(int xc_handle,
   4.217  	    /* load pfn_type[] with the mfn of all the pages we're doing in
   4.218  	       this batch. */
   4.219  
   4.220 -	    for( batch = 0; batch < BATCH_SIZE && n < nr_pfns ; n++ )
   4.221 +	    for( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ )
   4.222  	    {
   4.223 +		int n = permute(N, nr_pfns, order_nr );
   4.224  
   4.225  		if(0 && debug)
   4.226  		    fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d   [mfn]= %08lx\n",
   4.227 @@ -528,7 +599,7 @@ int xc_linux_save(int xc_handle,
   4.228  	    
   4.229  	    for( j = 0; j < batch; j++ )
   4.230  	    {
   4.231 -		if((pfn_type[j]>>29) == 7)
   4.232 +		if( (pfn_type[j] & LTAB_MASK) == XTAB)
   4.233  		{
   4.234  		    DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
   4.235  		    continue;
   4.236 @@ -537,16 +608,16 @@ int xc_linux_save(int xc_handle,
   4.237  		if(0 && debug)
   4.238  		    fprintf(stderr,"%d pfn= %08lx mfn= %08lx [mfn]= %08lx sum= %08lx\n",
   4.239  			    iter, 
   4.240 -			    (pfn_type[j] & PGT_type_mask) | pfn_batch[j],
   4.241 +			    (pfn_type[j] & LTAB_MASK) | pfn_batch[j],
   4.242  			    pfn_type[j],
   4.243 -			    live_mfn_to_pfn_table[pfn_type[j]&(~PGT_type_mask)],
   4.244 +			    live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)],
   4.245  			    csum_page(region_base + (PAGE_SIZE*j))
   4.246  			);
   4.247  
   4.248  		/* canonicalise mfn->pfn */
   4.249 -		pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
   4.250 +		pfn_type[j] = (pfn_type[j] & LTAB_MASK) |
   4.251  		    pfn_batch[j];
   4.252 -		//live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
   4.253 +		//live_mfn_to_pfn_table[pfn_type[j]&~LTAB_MASK];
   4.254  
   4.255  	    }
   4.256  
   4.257 @@ -568,20 +639,20 @@ int xc_linux_save(int xc_handle,
   4.258  	    {
   4.259  		/* write out pages in batch */
   4.260  		
   4.261 -		if((pfn_type[j]>>29) == 7)
   4.262 +		if( (pfn_type[j] & LTAB_MASK) == XTAB)
   4.263  		{
   4.264  		    DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
   4.265  		    continue;
   4.266  		}
   4.267  		
   4.268 -		if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) || 
   4.269 -		     ((pfn_type[j] & PGT_type_mask) == L2TAB) )
   4.270 +		if ( ((pfn_type[j] & LTAB_MASK) == L1TAB) || 
   4.271 +		     ((pfn_type[j] & LTAB_MASK) == L2TAB) )
   4.272  		{
   4.273  		    
   4.274  		    memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
   4.275  		    
   4.276  		    for ( k = 0; 
   4.277 -			  k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ? 
   4.278 +			  k < (((pfn_type[j] & LTAB_MASK) == L2TAB) ? 
   4.279  		       (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); 
   4.280  			  k++ )
   4.281  		    {
   4.282 @@ -610,9 +681,9 @@ int xc_linux_save(int xc_handle,
   4.283  			page[k] &= PAGE_SIZE - 1;
   4.284  			page[k] |= pfn << PAGE_SHIFT;
   4.285  			
   4.286 -#if DEBUG
   4.287 +#if 0
   4.288  			printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
   4.289 -			       pfn_type[j]>>29,
   4.290 +			       pfn_type[j]>>28,
   4.291  			       j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
   4.292  #endif			  
   4.293  			
   4.294 @@ -646,13 +717,13 @@ int xc_linux_save(int xc_handle,
   4.295  
   4.296  	total_sent += sent_this_iter;
   4.297  
   4.298 -	verbose_printf("\b\b\b\b100%% (pages sent= %d, skipped= %d )\n", 
   4.299 -		       sent_this_iter, skip_this_iter );
   4.300 +	verbose_printf("\r %d: sent %d, skipped %d, ", 
   4.301 +		       iter, sent_this_iter, skip_this_iter );
   4.302  
   4.303 -	track_cpu_usage( xc_handle, domid, sent_this_iter, 1);
   4.304 -	
   4.305  	if ( last_iter )
   4.306  	{
   4.307 +	    track_cpu_usage( xc_handle, domid, 0, sent_this_iter, 0, 1);
   4.308 +
   4.309  	    verbose_printf("Total pages sent= %d (%.2fx)\n", 
   4.310  			   total_sent, ((float)total_sent)/nr_pfns );
   4.311  	    verbose_printf("(of which %d were fixups)\n", needed_to_fix  );
   4.312 @@ -683,7 +754,7 @@ int xc_linux_save(int xc_handle,
   4.313  	    if ( 
   4.314  		 // ( sent_this_iter > (sent_last_iter * 0.95) ) ||		 
   4.315  		 (iter >= max_iters) || 
   4.316 -		 (sent_this_iter+skip_this_iter < 10) || 
   4.317 +		 (sent_this_iter+skip_this_iter < 50) || 
   4.318  		 (total_sent > nr_pfns*max_factor) )
   4.319  	    {
   4.320  		DPRINTF("Start last iteration\n");
   4.321 @@ -695,7 +766,8 @@ int xc_linux_save(int xc_handle,
   4.322  
   4.323  	    if ( xc_shadow_control( xc_handle, domid, 
   4.324  				    DOM0_SHADOW_CONTROL_OP_CLEAN2,
   4.325 -				    to_send, nr_pfns ) != nr_pfns ) 
   4.326 +				    to_send, nr_pfns, &faults_this_iter,
   4.327 +				    &dirtied_this_iter) != nr_pfns ) 
   4.328  	    {
   4.329  		ERROR("Error flushing shadow PT");
   4.330  		goto out;
   4.331 @@ -703,6 +775,10 @@ int xc_linux_save(int xc_handle,
   4.332  
   4.333  	    sent_last_iter = sent_this_iter;
   4.334  
   4.335 +	    //dirtied_this_iter = count_bits( nr_pfns, to_send ); 
   4.336 +	    track_cpu_usage( xc_handle, domid, faults_this_iter,
   4.337 +			     sent_this_iter, dirtied_this_iter, 1);
   4.338 +	    
   4.339  	}
   4.340  
   4.341  
     5.1 --- a/tools/xc/lib/xc_private.c	Fri May 21 10:57:45 2004 +0000
     5.2 +++ b/tools/xc/lib/xc_private.c	Mon May 24 18:13:06 2004 +0000
     5.3 @@ -97,178 +97,9 @@ void * mfn_mapper_map_single(int xc_hand
     5.4      return addr;
     5.5  }
     5.6  
     5.7 -mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot)
     5.8 -{
     5.9 -    mfn_mapper_t * t;
    5.10 -    t = calloc( 1, sizeof(mfn_mapper_t)+
    5.11 -		mfn_mapper_queue_size*sizeof(privcmd_mmap_entry_t) );
    5.12 -    if (!t) return NULL;
    5.13 -    t->xc_handle = xc_handle;
    5.14 -    t->size = size;
    5.15 -    t->prot = prot;
    5.16 -    t->error = 0;
    5.17 -    t->max_queue_size = mfn_mapper_queue_size;
    5.18 -    t->addr = mmap( NULL, size, prot, MAP_SHARED, xc_handle, 0 );
    5.19 -    if (!t->addr)
    5.20 -    {
    5.21 -	free(t);
    5.22 -	return NULL;
    5.23 -    }
    5.24 -    t->ioctl.num = 0;
    5.25 -    t->ioctl.dom = dom;
    5.26 -    t->ioctl.entry = (privcmd_mmap_entry_t *) &t[1];
    5.27 -    return t;
    5.28 -}
    5.29 -
    5.30 -void * mfn_mapper_base(mfn_mapper_t *t)
    5.31 -{
    5.32 -    return t->addr;
    5.33 -}
    5.34 -
    5.35 -void mfn_mapper_close(mfn_mapper_t *t)
    5.36 -{
    5.37 -    if(t->addr) munmap( t->addr, t->size );
    5.38 -    free(t);    
    5.39 -}
    5.40 -
    5.41 -static int __mfn_mapper_flush_queue(mfn_mapper_t *t)
    5.42 -{
    5.43 -    int rc;
    5.44 -    rc = ioctl( t->xc_handle, IOCTL_PRIVCMD_MMAP, &t->ioctl );
    5.45 -    t->ioctl.num = 0;    
    5.46 -    if(rc && !t->error) 
    5.47 -	t->error = rc;
    5.48 -    return rc;
    5.49 -}
    5.50 -
    5.51 -int mfn_mapper_flush_queue(mfn_mapper_t *t)
    5.52 -{
    5.53 -    int rc;
    5.54 -    
    5.55 -    rc = __mfn_mapper_flush_queue(t);
    5.56 -
    5.57 -    if ( t->error )
    5.58 -    {
    5.59 -	rc = t->error;
    5.60 -    }
    5.61 -
    5.62 -    t->error = 0;
    5.63 -    return rc;
    5.64 -}
    5.65 -
    5.66 -void * mfn_mapper_queue_entry(mfn_mapper_t *t, int offset, 
    5.67 -			      unsigned long mfn, int size)
    5.68 -{
    5.69 -    privcmd_mmap_entry_t *entry, *prev;
    5.70 -    int pages;
    5.71 -
    5.72 -    offset &= PAGE_MASK;
    5.73 -    pages =(size+PAGE_SIZE-1)>>PAGE_SHIFT;
    5.74 -    entry = &t->ioctl.entry[t->ioctl.num];       
    5.75 -
    5.76 -    if ( t->ioctl.num > 0 )
    5.77 -    {
    5.78 -	prev = &t->ioctl.entry[t->ioctl.num-1];       
    5.79 -
    5.80 -	if ( (prev->va+(prev->npages*PAGE_SIZE)) == 
    5.81 -	     ((unsigned long)t->addr+offset) &&
    5.82 -	     (prev->mfn+prev->npages) == mfn )
    5.83 -	{
    5.84 -	    prev->npages += pages;
    5.85 -	    return t->addr+offset;
    5.86 -	}
    5.87 -    }
    5.88 -     
    5.89 -    entry->va = (unsigned long)t->addr+offset;
    5.90 -    entry->mfn = mfn;
    5.91 -    entry->npages = pages;
    5.92 -    t->ioctl.num++;       
    5.93 -
    5.94 -    if(t->ioctl.num == t->max_queue_size)
    5.95 -    {
    5.96 -	if ( __mfn_mapper_flush_queue(t) )
    5.97 -	    return 0;
    5.98 -    }
    5.99 -
   5.100 -    return t->addr+offset;
   5.101 -}
   5.102 -
   5.103 -
   5.104  /*******************/
   5.105  
   5.106 -#if 0
   5.107 -
   5.108 -mfn_typer_t *mfn_typer_init(int xc_handle, domid_t dom, int num )
   5.109 -{
   5.110 -    mfn_typer_t *t;
   5.111 -    multicall_entry_t *m;
   5.112 -    dom0_op_compact_getpageframeinfo_t *d;
   5.113 -
   5.114 -    t = calloc(1, sizeof(mfn_typer_t) );
   5.115 -    m = calloc(num, sizeof(multicall_entry_t));
   5.116 -    d = calloc(num, sizeof(dom0_op_compact_getpageframeinfo_t));
   5.117 -
   5.118 -    if (!t || !m || !d)
   5.119 -    {
   5.120 -	if(t) free(t);	
   5.121 -	if(m) free(m);
   5.122 -	if(d) free(d);
   5.123 -	return NULL;
   5.124 -    }
   5.125 -
   5.126 -printf("sizeof(m)=%d sizeof(d)=%d m=%p d=%p\n",sizeof(multicall_entry_t), sizeof(dom0_op_compact_getpageframeinfo_t),m,d);
   5.127 -
   5.128 -    if ( (mlock(m, sizeof(multicall_entry_t)*num ) != 0) || 
   5.129 -	 (mlock(d, sizeof(dom0_op_compact_getpageframeinfo_t)*num ) != 0) )
   5.130 -    {
   5.131 -        PERROR("Could not lock memory for Xen hypercall");
   5.132 -        return NULL;
   5.133 -    }
   5.134 -    
   5.135 -    t->xc_handle = xc_handle;
   5.136 -    t->max = num;
   5.137 -    t->nr_multicall_ents=0;
   5.138 -    t->multicall_list=m;
   5.139 -    t->gpf_list=d;
   5.140 -    t->dom = dom;
   5.141 -
   5.142 -    return t;
   5.143 -}
   5.144 -
   5.145 -void mfn_typer_queue_entry(mfn_typer_t *t, unsigned long mfn )
   5.146 -{
   5.147 -    int i = t->nr_multicall_ents;
   5.148 -    multicall_entry_t *m = &t->multicall_list[i];
   5.149 -    dom0_op_compact_getpageframeinfo_t *d = &t->gpf_list[i];
   5.150 -
   5.151 -    d->cmd = DOM0_GETPAGEFRAMEINFO;
   5.152 -    d->interface_version = DOM0_INTERFACE_VERSION;
   5.153 -    d->getpageframeinfo.pfn = mfn;
   5.154 -    d->getpageframeinfo.domain = t->dom;
   5.155 -    d->getpageframeinfo.type = 1000; //~0UL;
   5.156 -      
   5.157 -    m->op = __HYPERVISOR_dom0_op;
   5.158 -    m->args[0] = (unsigned long)d;
   5.159 -   
   5.160 -    t->nr_multicall_ents++;
   5.161 -}
   5.162 -
   5.163 -int mfn_typer_flush_queue(mfn_typer_t *t)
   5.164 -{
   5.165 -    if (t->nr_multicall_ents == 0) return 0;
   5.166 -    do_multicall_op(t->xc_handle, t->multicall_list, t->nr_multicall_ents);
   5.167 -    t->nr_multicall_ents = 0;
   5.168 -}
   5.169 -
   5.170 -unsigned int mfn_typer_get_result(mfn_typer_t *t, int idx)
   5.171 -{
   5.172 -    return t->gpf_list[idx].getpageframeinfo.type;
   5.173 -}
   5.174 -
   5.175 -#endif
   5.176 -
   5.177  /* NB: arr must be mlock'ed */
   5.178 -
   5.179  int get_pfn_type_batch(int xc_handle, 
   5.180  		       u64 dom, int num, unsigned long *arr)
   5.181  {
   5.182 @@ -362,8 +193,10 @@ int add_mmu_update(int xc_handle, mmu_t 
   5.183  {
   5.184      mmu->updates[mmu->idx].ptr = ptr;
   5.185      mmu->updates[mmu->idx].val = val;
   5.186 +
   5.187      if ( ++mmu->idx == MAX_MMU_UPDATES )
   5.188          return flush_mmu_updates(xc_handle, mmu);
   5.189 +
   5.190      return 0;
   5.191  }
   5.192  
     6.1 --- a/tools/xc/py/Xc.c	Fri May 21 10:57:45 2004 +0000
     6.2 +++ b/tools/xc/py/Xc.c	Mon May 24 18:13:06 2004 +0000
     6.3 @@ -380,7 +380,7 @@ static PyObject *pyxc_linux_restore(PyOb
     6.4  	    do { 
     6.5  		rc = read( (int) fd, ((char*)buf)+tot, count-tot ); 
     6.6  		if ( rc < 0 ) { perror("READ"); return rc; }
     6.7 -		if ( rc == 0 ) { printf("read: need %d, tot=%d got zero\n"); return -1; }
     6.8 +		if ( rc == 0 ) { printf("read: need %d, tot=%d got zero\n",count-tot,tot); return -1; }
     6.9  		tot += rc;
    6.10  	    } 
    6.11              while ( tot < count );
    6.12 @@ -1296,7 +1296,7 @@ static PyObject *pyxc_shadow_control(PyO
    6.13                                        &dom, &op) )
    6.14          return NULL;
    6.15  
    6.16 -    if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0) < 0 )
    6.17 +    if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0, NULL, NULL) < 0 )
    6.18          return PyErr_SetFromErrno(xc_error);
    6.19      
    6.20      Py_INCREF(zero);
     7.1 --- a/xen/common/dom0_ops.c	Fri May 21 10:57:45 2004 +0000
     7.2 +++ b/xen/common/dom0_ops.c	Mon May 24 18:13:06 2004 +0000
     7.3 @@ -397,7 +397,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
     7.4          {
     7.5              ret = 0;
     7.6  
     7.7 -            op->u.getpageframeinfo.type = NONE;
     7.8 +            op->u.getpageframeinfo.type = NOTAB;
     7.9  
    7.10              if ( (page->type_and_flags & PGT_count_mask) != 0 )
    7.11              {
    7.12 @@ -645,11 +645,17 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    7.13  		    switch( page->type_and_flags & PGT_type_mask )
    7.14  		    {
    7.15  		    case PGT_l1_page_table:
    7.16 +			type = L1TAB;
    7.17 +			break;
    7.18  		    case PGT_l2_page_table:
    7.19 +			type = L2TAB;
    7.20 +			break;
    7.21  		    case PGT_l3_page_table:
    7.22 +			type = L3TAB;
    7.23 +			break;
    7.24  		    case PGT_l4_page_table:
    7.25 -			type = page->type_and_flags & PGT_type_mask;
    7.26 -
    7.27 +			type = L4TAB;
    7.28 +			break;
    7.29  		    }
    7.30  		    l_arr[j] |= type;
    7.31  		    put_page(page);
    7.32 @@ -657,7 +663,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    7.33  		else
    7.34  		{
    7.35  		e2_err:
    7.36 -		    l_arr[j] |= PGT_type_mask; /* error */
    7.37 +		    l_arr[j] |= XTAB;
    7.38  		}
    7.39  
    7.40  	    }
     8.1 --- a/xen/common/shadow.c	Fri May 21 10:57:45 2004 +0000
     8.2 +++ b/xen/common/shadow.c	Mon May 24 18:13:06 2004 +0000
     8.3 @@ -33,12 +33,34 @@ hypercall lock anyhow (at least initiall
     8.4  
     8.5  FIXME:
     8.6  
     8.7 -1. Flush needs to avoid blowing away the L2 page that another CPU may be using!
     8.8 +The shadow table flush command is dangerous on SMP systems as the
     8.9 +guest may be using the L2 on one CPU while the other is trying to 
    8.10 +blow the table away. 
    8.11 +
    8.12 +The current save restore code works around this by not calling FLUSH,
    8.13 +but by calling CLEAN2 which leaves all L2s in tact (this is probably
    8.14 +quicker anyhow).
    8.15  
    8.16 -fix using cpu_raise_softirq
    8.17 +Even so, we have to be very careful. The flush code may need to cause
    8.18 +a TLB flush on another CPU. It needs to do this while holding the
    8.19 +shadow table lock. The trouble is, the guest may be in the shadow page
    8.20 +fault handler spinning waiting to grab the shadow lock. It may have
    8.21 +intterupts disabled, hence we can't use the normal flush_tlb_cpu
    8.22 +mechanism.
    8.23  
    8.24 -have a flag to count in, (after switching to init's PTs) 
    8.25 -spinlock, reload cr3_shadow, unlock
    8.26 +For the moment, we have a grim hace whereby the spinlock in the shadow
    8.27 +fault handler is actually a try lock, in a loop with a helper for the
    8.28 +tlb flush code.
    8.29 +
    8.30 +A better soloution would be to take a new flush lock, then raise a
    8.31 +per-domain soft irq on the other CPU.  The softirq will switch to
    8.32 +init's PTs, then do an atomic inc of a variable to count himself in,
    8.33 +then spin on a lock.  Having noticed that the other guy has counted
    8.34 +in, flush the shadow table, then release him by dropping the lock. He
    8.35 +will then reload cr3 from mm.page_table on the way out of the softirq.
    8.36 +
    8.37 +In domian-softirq context we know that the guy holds no locks and has
    8.38 +interrupts enabled. Nothing can go wrong ;-)
    8.39  
    8.40  **/
    8.41  
    8.42 @@ -364,6 +386,11 @@ static int shadow_mode_table_op( struct 
    8.43  			rc = -EINVAL;
    8.44  			goto out;
    8.45  		}
    8.46 +
    8.47 +		sc->fault_count = p->mm.shadow_fault_count;
    8.48 +		sc->dirty_count = p->mm.shadow_dirty_count;
    8.49 +		p->mm.shadow_fault_count = 0;
    8.50 +		p->mm.shadow_dirty_count = 0;
    8.51  	
    8.52  		sc->pages = p->tot_pages;
    8.53  	
    8.54 @@ -746,6 +773,8 @@ int shadow_fault( unsigned long va, long
    8.55       
    8.56      perfc_incrc(shadow_fixup_count);
    8.57  
    8.58 +	m->shadow_fault_count++;
    8.59 +
    8.60      check_pagetable( current, current->mm.pagetable, "post-sf" );
    8.61  
    8.62      spin_unlock(&m->shadow_lock);
     9.1 --- a/xen/include/asm-i386/processor.h	Fri May 21 10:57:45 2004 +0000
     9.2 +++ b/xen/include/asm-i386/processor.h	Mon May 24 18:13:06 2004 +0000
     9.3 @@ -445,17 +445,27 @@ struct mm_struct {
     9.4      l1_pgentry_t *perdomain_pt;
     9.5      pagetable_t  pagetable;
     9.6  
     9.7 +    /* shadow mode status and controls */
     9.8      unsigned int shadow_mode;  /* flags to control shadow table operation */
     9.9      pagetable_t  shadow_table;
    9.10      spinlock_t shadow_lock;
    9.11 +    unsigned int shadow_max_page_count; // currently unused
    9.12 +
    9.13 +    /* shadow hashtable */
    9.14      struct shadow_status *shadow_ht;
    9.15      struct shadow_status *shadow_ht_free;
    9.16      struct shadow_status *shadow_ht_extras; /* extra allocation units */
    9.17 +    unsigned int shadow_extras_count;
    9.18 +
    9.19 +    /* shadow dirty bitmap */
    9.20      unsigned long *shadow_dirty_bitmap;
    9.21      unsigned int shadow_dirty_bitmap_size;  /* in pages, bit per page */
    9.22 -    unsigned int shadow_page_count;
    9.23 -    unsigned int shadow_max_page_count;
    9.24 -    unsigned int shadow_extras_count;
    9.25 +
    9.26 +    /* shadow mode stats */
    9.27 +    unsigned int shadow_page_count;     
    9.28 +    unsigned int shadow_fault_count;     
    9.29 +    unsigned int shadow_dirty_count;     
    9.30 +
    9.31  
    9.32      /* Current LDT details. */
    9.33      unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
    10.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h	Fri May 21 10:57:45 2004 +0000
    10.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h	Mon May 24 18:13:06 2004 +0000
    10.3 @@ -150,6 +150,13 @@ typedef struct dom0_settime_st
    10.4  } dom0_settime_t;
    10.5  
    10.6  #define DOM0_GETPAGEFRAMEINFO 18
    10.7 +#define NOTAB 0         /* normal page */
    10.8 +#define L1TAB (1<<28)
    10.9 +#define L2TAB (2<<28)
   10.10 +#define L3TAB (3<<28)
   10.11 +#define L4TAB (4<<28)
   10.12 +#define XTAB  (0xf<<28) /* invalid page */
   10.13 +#define LTAB_MASK XTAB
   10.14  typedef struct dom0_getpageframeinfo_st
   10.15  {
   10.16      /* IN variables. */
   10.17 @@ -157,8 +164,7 @@ typedef struct dom0_getpageframeinfo_st
   10.18      domid_t domain;        /* To which domain does the frame belong?    */
   10.19      /* OUT variables. */
   10.20      /* Is the page PINNED to a type? */
   10.21 -    enum { NONE, L1TAB=(1<<29), L2TAB=(2<<29), L3TAB=(3<<29), L4TAB=(4<<29) } type;
   10.22 -#define PGT_type_mask (7<<29)
   10.23 +    unsigned long type;    /* see above type defs */
   10.24  } dom0_getpageframeinfo_t;
   10.25  
   10.26  
   10.27 @@ -251,6 +257,9 @@ typedef struct dom0_shadow_control_st
   10.28      unsigned long  *dirty_bitmap; // pointe to mlocked buffer
   10.29      /* IN/OUT variables */
   10.30      unsigned long  pages;  // size of buffer, updated with actual size
   10.31 +    /* OUT varaibles */
   10.32 +    unsigned long fault_count;
   10.33 +    unsigned long dirty_count;
   10.34  } dom0_shadow_control_t;
   10.35  
   10.36  #define DOM0_SETDOMAINNAME     26
    11.1 --- a/xen/include/xen/shadow.h	Fri May 21 10:57:45 2004 +0000
    11.2 +++ b/xen/include/xen/shadow.h	Mon May 24 18:13:06 2004 +0000
    11.3 @@ -95,8 +95,12 @@ printk("DOM%lld: (file=shadow.c, line=%d
    11.4      ASSERT(m->shadow_dirty_bitmap);
    11.5      if( likely(pfn<m->shadow_dirty_bitmap_size) )
    11.6      {
    11.7 -	/* These updates occur with mm.shadow_lock held */
    11.8 -	__set_bit( pfn, m->shadow_dirty_bitmap );
    11.9 +		/* These updates occur with mm.shadow_lock held, so use 
   11.10 +		   (__) version of test_and_set */
   11.11 +		if( ! __test_and_set_bit( pfn, m->shadow_dirty_bitmap ) )
   11.12 +		{
   11.13 +			m->shadow_dirty_count++;
   11.14 +		}
   11.15      }
   11.16      else
   11.17      {
    12.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Fri May 21 10:57:45 2004 +0000
    12.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Mon May 24 18:13:06 2004 +0000
    12.3 @@ -172,7 +172,7 @@ static int privcmd_ioctl(struct inode *i
    12.4                                        v);
    12.5  
    12.6  	    if ( unlikely(HYPERVISOR_mmu_update(u, v - u + 1, NULL) < 0) )
    12.7 -		put_user( 0xe0000000 | mfn, p );
    12.8 +		put_user( 0xF0000000 | mfn, p );
    12.9  
   12.10  	    v = w;
   12.11  	}