ia64/xen-unstable

changeset 1563:eec2392c11f7

bitkeeper revision 1.1012 (40dabc774KHZDbPvbtWWNX1C-LNl5Q)

manual merge
author iap10@labyrinth.cl.cam.ac.uk
date Thu Jun 24 11:35:19 2004 +0000 (2004-06-24)
parents 6802d4f0ce7f 16c2273aaf2c
children 8ce3066ba3ec
files tools/xc/lib/xc.h tools/xc/lib/xc_domain.c tools/xc/lib/xc_linux_save.c tools/xc/py/Xc.c xen/common/shadow.c xen/include/asm-x86/processor.h xen/include/hypervisor-ifs/dom0_ops.h xen/include/xen/shadow.h
line diff
     1.1 --- a/tools/xc/lib/xc.h	Thu Jun 24 10:28:58 2004 +0000
     1.2 +++ b/tools/xc/lib/xc.h	Thu Jun 24 11:35:19 2004 +0000
     1.3 @@ -36,6 +36,14 @@ typedef struct {
     1.4      unsigned long max_memkb;
     1.5  } xc_dominfo_t;
     1.6  
     1.7 +typedef struct xc_shadow_control_stats_st
     1.8 +{
     1.9 +    unsigned long fault_count;
    1.10 +    unsigned long dirty_count;
    1.11 +    unsigned long dirty_net_count;     
    1.12 +    unsigned long dirty_block_count;     
    1.13 +} xc_shadow_control_stats_t;
    1.14 +
    1.15  int xc_domain_create(int xc_handle, 
    1.16                       unsigned int mem_kb, 
    1.17                       const char *name,
    1.18 @@ -60,8 +68,7 @@ int xc_shadow_control(int xc_handle,
    1.19                        unsigned int sop,
    1.20  		      unsigned long *dirty_bitmap,
    1.21  		      unsigned long pages,
    1.22 -		      unsigned long *fault_count,
    1.23 -		      unsigned long *dirty_count);
    1.24 +		      xc_shadow_control_stats_t *stats);
    1.25  
    1.26  
    1.27  #define XCFLAGS_VERBOSE 1
     2.1 --- a/tools/xc/lib/xc_domain.c	Thu Jun 24 10:28:58 2004 +0000
     2.2 +++ b/tools/xc/lib/xc_domain.c	Thu Jun 24 11:35:19 2004 +0000
     2.3 @@ -120,10 +120,9 @@ int xc_domain_getinfo(int xc_handle,
     2.4  int xc_shadow_control(int xc_handle,
     2.5                        u32 domid, 
     2.6                        unsigned int sop,
     2.7 -                      unsigned long *dirty_bitmap,
     2.8 -                      unsigned long pages,
     2.9 -                      unsigned long *fault_count,
    2.10 -                      unsigned long *dirty_count)
    2.11 +		      unsigned long *dirty_bitmap,
    2.12 +		      unsigned long pages,
    2.13 +		      xc_shadow_control_stats_t *stats )
    2.14  {
    2.15      int rc;
    2.16      dom0_op_t op;
    2.17 @@ -135,10 +134,8 @@ int xc_shadow_control(int xc_handle,
    2.18  
    2.19      rc = do_dom0_op(xc_handle, &op);
    2.20  
    2.21 -    if ( fault_count ) 
    2.22 -        *fault_count = op.u.shadow_control.fault_count;
    2.23 -    if ( dirty_count )
    2.24 -        *dirty_count = op.u.shadow_control.dirty_count;
    2.25 +    if(stats) memcpy(stats, &op.u.shadow_control.stats,
    2.26 +		     sizeof(xc_shadow_control_stats_t));
    2.27  
    2.28      return (rc == 0) ? op.u.shadow_control.pages : rc;
    2.29  }
     3.1 --- a/tools/xc/lib/xc_linux_save.c	Thu Jun 24 10:28:58 2004 +0000
     3.2 +++ b/tools/xc/lib/xc_linux_save.c	Thu Jun 24 11:35:19 2004 +0000
     3.3 @@ -47,8 +47,8 @@
     3.4         (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)) ) || \
     3.5  \
     3.6         (live_mfn_to_pfn_table[_mfn] >= 0x80000000 && \
     3.7 - live_mfn_to_pfn_table[_mfn] <= 0x80000003 ) || \
     3.8 - live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004 )  )
     3.9 +	live_mfn_to_pfn_table[_mfn] <= 0x80000003 ) || \
    3.10 +	live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004 )  )
    3.11       
    3.12  /* Returns TRUE if MFN is successfully converted to a PFN. */
    3.13  #define translate_mfn_to_pfn(_pmfn)         \
    3.14 @@ -67,19 +67,19 @@
    3.15  static inline int test_bit ( int nr, volatile void * addr)
    3.16  {
    3.17      return ( ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >> 
    3.18 -             (nr % (sizeof(unsigned long)*8) ) ) & 1;
    3.19 +	     (nr % (sizeof(unsigned long)*8) ) ) & 1;
    3.20  }
    3.21  
    3.22  static inline void clear_bit ( int nr, volatile void * addr)
    3.23  {
    3.24      ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &= 
    3.25 -        ~(1 << (nr % (sizeof(unsigned long)*8) ) );
    3.26 +	~(1 << (nr % (sizeof(unsigned long)*8) ) );
    3.27  }
    3.28  
    3.29  static inline void set_bit ( int nr, volatile void * addr)
    3.30  {
    3.31      ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |= 
    3.32 -        (1 << (nr % (sizeof(unsigned long)*8) ) );
    3.33 +	(1 << (nr % (sizeof(unsigned long)*8) ) );
    3.34  }
    3.35  /*
    3.36   * hweightN: returns the hamming weight (i.e. the number
    3.37 @@ -88,20 +88,20 @@ static inline void set_bit ( int nr, vol
    3.38  
    3.39  static inline unsigned int hweight32(unsigned int w)
    3.40  {
    3.41 -    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
    3.42 -    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
    3.43 -    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
    3.44 -    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
    3.45 -    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
    3.46 +        unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
    3.47 +        res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
    3.48 +        res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
    3.49 +        res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
    3.50 +        return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
    3.51  }
    3.52  
    3.53  static inline int count_bits ( int nr, volatile void *addr)
    3.54  {
    3.55      int i, count = 0;
    3.56      unsigned long *p = (unsigned long *)addr;
    3.57 -    /* we know the array is padded to unsigned long */
    3.58 +    // we know the array is padded to unsigned long
    3.59      for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++)
    3.60 -        count += hweight32( *p );
    3.61 +	count += hweight32( *p );
    3.62      return count;
    3.63  }
    3.64  
    3.65 @@ -131,10 +131,10 @@ static inline int permute( int i, int nr
    3.66  
    3.67      do
    3.68      {
    3.69 -        i = ( ( i>>(order_nr-10))  | ( i<<10 ) ) &
    3.70 -            ((1<<order_nr)-1);
    3.71 +	i = ( ( i>>(order_nr-10))  | ( i<<10 ) ) &
    3.72 +	    ((1<<order_nr)-1);
    3.73      }
    3.74 -    while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
    3.75 +    while ( i >= nr ); // this won't ever loop if nr is a power of 2
    3.76  
    3.77      return i;
    3.78  }
    3.79 @@ -144,14 +144,22 @@ static long long tv_to_us( struct timeva
    3.80      return (new->tv_sec * 1000000) + new->tv_usec;
    3.81  }
    3.82  
    3.83 -static long long tvdelta( struct timeval *new, struct timeval *old )
    3.84 +static long long llgettimeofday()
    3.85 +{
    3.86 +    struct timeval now;
    3.87 +    gettimeofday(&now, NULL);
    3.88 +    return tv_to_us(&now);
    3.89 +}
    3.90 +
    3.91 +static long long tv_delta( struct timeval *new, struct timeval *old )
    3.92  {
    3.93      return ((new->tv_sec - old->tv_sec)*1000000 ) + 
    3.94 -        (new->tv_usec - old->tv_usec);
    3.95 +	(new->tv_usec - old->tv_usec);
    3.96  }
    3.97  
    3.98 -static int track_cpu_usage( int xc_handle, u32 domid, int faults,
    3.99 -                            int pages_sent, int pages_dirtied, int print )
   3.100 +static int print_stats( int xc_handle, u32 domid, 
   3.101 +			int pages_sent, xc_shadow_control_stats_t *stats,
   3.102 +			int print )
   3.103  {
   3.104      static struct timeval wall_last;
   3.105      static long long      d0_cpu_last;
   3.106 @@ -162,18 +170,17 @@ static int track_cpu_usage( int xc_handl
   3.107      long long             d0_cpu_now, d0_cpu_delta;
   3.108      long long             d1_cpu_now, d1_cpu_delta;
   3.109  
   3.110 -
   3.111      gettimeofday(&wall_now, NULL);
   3.112  
   3.113      d0_cpu_now = xc_domain_get_cpu_usage( xc_handle, 0 )/1000;
   3.114      d1_cpu_now = xc_domain_get_cpu_usage( xc_handle, domid )/1000;
   3.115  
   3.116 -    if ( d0_cpu_now == -1 || d1_cpu_now == -1 ) 
   3.117 +    if ( d0_cpu_now == -1 || d1_cpu_now == -1 )	
   3.118      {
   3.119 -        printf("ARRHHH!!\n");
   3.120 +	printf("ARRHHH!!\n");
   3.121      }
   3.122  
   3.123 -    wall_delta = tvdelta(&wall_now,&wall_last)/1000;
   3.124 +    wall_delta = tv_delta(&wall_now,&wall_last)/1000;
   3.125  
   3.126      if ( wall_delta == 0 ) wall_delta = 1;
   3.127  
   3.128 @@ -181,28 +188,65 @@ static int track_cpu_usage( int xc_handl
   3.129      d1_cpu_delta  = (d1_cpu_now - d1_cpu_last)/1000;
   3.130  
   3.131      if(print)
   3.132 -        printf("delta %lldms, dom0 %d%%, target %d%%, "
   3.133 -               "sent %dMb/s, dirtied %dMb/s\n",
   3.134 -               wall_delta, 
   3.135 -               (int)((d0_cpu_delta*100)/wall_delta),
   3.136 -               (int)((d1_cpu_delta*100)/wall_delta),
   3.137 -               (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)),
   3.138 -               (int)((pages_dirtied*PAGE_SIZE*8)/(wall_delta*1000))
   3.139 -            );
   3.140 +	printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, dirtied %dMb/s\n",
   3.141 +	       wall_delta, 
   3.142 +	       (int)((d0_cpu_delta*100)/wall_delta),
   3.143 +	       (int)((d1_cpu_delta*100)/wall_delta),
   3.144 +	       (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)),
   3.145 +	       (int)((stats->dirty_count*PAGE_SIZE*8)/(wall_delta*1000))
   3.146 +	    );
   3.147  
   3.148      d0_cpu_last  = d0_cpu_now;
   3.149      d1_cpu_last  = d1_cpu_now;
   3.150 -    wall_last = wall_now; 
   3.151 +    wall_last = wall_now;	
   3.152  
   3.153      return 0;
   3.154  }
   3.155  
   3.156  
   3.157 +static int analysis_phase( int xc_handle, u32 domid, 
   3.158 +			   int nr_pfns, unsigned long *arr )
   3.159 +{
   3.160 +    long long start, now;
   3.161 +    xc_shadow_control_stats_t stats;
   3.162 +
   3.163 +    start = llgettimeofday();
   3.164 +
   3.165 +    while(0)
   3.166 +    {
   3.167 +	int i;
   3.168 +
   3.169 +	xc_shadow_control( xc_handle, domid, 
   3.170 +			   DOM0_SHADOW_CONTROL_OP_CLEAN2,
   3.171 +			   arr, nr_pfns, NULL);
   3.172 +	printf("#Flush\n");
   3.173 +	for(i=0;i<100;i++)
   3.174 +	{	    
   3.175 +	    usleep(10000);	    
   3.176 +	    now = llgettimeofday();
   3.177 +	    xc_shadow_control( xc_handle, domid, 
   3.178 +			       DOM0_SHADOW_CONTROL_OP_PEEK,
   3.179 +			       NULL, 0, &stats);
   3.180 +
   3.181 +	    printf("now= %lld faults= %ld dirty= %ld dirty_net= %ld dirty_block= %ld\n", 
   3.182 +		   ((now-start)+500)/1000, 
   3.183 +		   stats.fault_count, stats.dirty_count,
   3.184 +		   stats.dirty_net_count, stats.dirty_block_count );
   3.185 +
   3.186 +	}
   3.187 +
   3.188 +
   3.189 +    }
   3.190 +    
   3.191 +
   3.192 +    return -1;
   3.193 +}
   3.194 +
   3.195  int xc_linux_save(int xc_handle,
   3.196                    u32 domid, 
   3.197 -                  unsigned int flags,
   3.198 -                  int (*writerfn)(void *, const void *, size_t),
   3.199 -                  void *writerst )
   3.200 +		  unsigned int flags,
   3.201 +		  int (*writerfn)(void *, const void *, size_t),
   3.202 +		  void *writerst )
   3.203  {
   3.204      dom0_op_t op;
   3.205      int rc = 1, i, j, k, last_iter, iter = 0;
   3.206 @@ -211,11 +255,10 @@ int xc_linux_save(int xc_handle,
   3.207      int live = flags & XCFLAGS_LIVE;
   3.208      int debug = flags & XCFLAGS_DEBUG;
   3.209      int sent_last_iter, sent_this_iter, skip_this_iter;
   3.210 -    unsigned long dirtied_this_iter, faults_this_iter;
   3.211  
   3.212      /* Important tuning parameters */
   3.213 -    int max_iters  = 29; /* limit us to 30 times round loop */
   3.214 -    int max_factor = 3;  /* never send more than 3x nr_pfns */
   3.215 +    int max_iters  = 29; // limit us to 30 times round loop
   3.216 +    int max_factor = 3;  // never send more than 3x nr_pfns 
   3.217  
   3.218      /* The new domain's shared-info frame number. */
   3.219      unsigned long shared_info_frame;
   3.220 @@ -262,6 +305,8 @@ int xc_linux_save(int xc_handle,
   3.221         - to skip this iteration because already dirty;
   3.222         - to fixup by sending at the end if not already resent; */
   3.223      unsigned long *to_send, *to_skip, *to_fix;
   3.224 +    
   3.225 +    xc_shadow_control_stats_t stats;
   3.226  
   3.227      int needed_to_fix = 0;
   3.228      int total_sent    = 0;
   3.229 @@ -273,10 +318,11 @@ int xc_linux_save(int xc_handle,
   3.230      }
   3.231  
   3.232      /* Ensure that the domain exists, and that it is stopped. */
   3.233 -    if ( xc_domain_pause(xc_handle, domid) )
   3.234 +
   3.235 +    if ( xc_domain_pause( xc_handle, domid, &op, &ctxt ) )
   3.236      {
   3.237 -        PERROR("Could not pause domain");
   3.238 -        goto out;
   3.239 +	PERROR("Could not pause domain");
   3.240 +	goto out;
   3.241      }
   3.242  
   3.243      memcpy(name, op.u.getdomaininfo.name, sizeof(name));
   3.244 @@ -292,8 +338,8 @@ int xc_linux_save(int xc_handle,
   3.245      /* Map the suspend-record MFN to pin it. The page must be owned by 
   3.246         domid for this to succeed. */
   3.247      p_srec = mfn_mapper_map_single(xc_handle, domid,
   3.248 -                                   sizeof(*p_srec), PROT_READ, 
   3.249 -                                   ctxt.cpu_ctxt.esi );
   3.250 +				 sizeof(*p_srec), PROT_READ, 
   3.251 +				 ctxt.cpu_ctxt.esi );
   3.252  
   3.253      if (!p_srec)
   3.254      {
   3.255 @@ -312,9 +358,9 @@ int xc_linux_save(int xc_handle,
   3.256  
   3.257      /* the pfn_to_mfn_frame_list fits in a single page */
   3.258      live_pfn_to_mfn_frame_list = 
   3.259 -        mfn_mapper_map_single(xc_handle, domid, 
   3.260 -                              PAGE_SIZE, PROT_READ, 
   3.261 -                              p_srec->pfn_to_mfn_frame_list );
   3.262 +	mfn_mapper_map_single(xc_handle, domid, 
   3.263 +			      PAGE_SIZE, PROT_READ, 
   3.264 +			      p_srec->pfn_to_mfn_frame_list );
   3.265  
   3.266      if (!live_pfn_to_mfn_frame_list)
   3.267      {
   3.268 @@ -324,20 +370,20 @@ int xc_linux_save(int xc_handle,
   3.269  
   3.270      /* Track the mfn_to_pfn table down from the domains PT */
   3.271      {
   3.272 -        unsigned long *pgd;
   3.273 -        unsigned long mfn_to_pfn_table_start_mfn;
   3.274 +	unsigned long *pgd;
   3.275 +	unsigned long mfn_to_pfn_table_start_mfn;
   3.276  
   3.277 -        pgd = mfn_mapper_map_single(xc_handle, domid, 
   3.278 -                                    PAGE_SIZE, PROT_READ, 
   3.279 -                                    ctxt.pt_base>>PAGE_SHIFT);
   3.280 +	pgd = mfn_mapper_map_single(xc_handle, domid, 
   3.281 +				PAGE_SIZE, PROT_READ, 
   3.282 +				ctxt.pt_base>>PAGE_SHIFT);
   3.283  
   3.284 -        mfn_to_pfn_table_start_mfn = 
   3.285 -            pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT]>>PAGE_SHIFT;
   3.286 +	mfn_to_pfn_table_start_mfn = 
   3.287 +	    pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT]>>PAGE_SHIFT;
   3.288  
   3.289 -        live_mfn_to_pfn_table = 
   3.290 -            mfn_mapper_map_single(xc_handle, DOMID_SELF, 
   3.291 -                                  PAGE_SIZE*1024, PROT_READ, 
   3.292 -                                  mfn_to_pfn_table_start_mfn );
   3.293 +	live_mfn_to_pfn_table = 
   3.294 +	    mfn_mapper_map_single(xc_handle, ~0ULL, 
   3.295 +				  PAGE_SIZE*1024, PROT_READ, 
   3.296 +				  mfn_to_pfn_table_start_mfn );
   3.297      }
   3.298  
   3.299      /* Map all the frames of the pfn->mfn table. For migrate to succeed, 
   3.300 @@ -346,9 +392,9 @@ int xc_linux_save(int xc_handle,
   3.301         from a safety POV anyhow. */
   3.302  
   3.303      live_pfn_to_mfn_table = mfn_mapper_map_batch( xc_handle, domid, 
   3.304 -                                                  PROT_READ,
   3.305 -                                                  live_pfn_to_mfn_frame_list,
   3.306 -                                                  (nr_pfns+1023)/1024 );  
   3.307 +						  PROT_READ,
   3.308 +						  live_pfn_to_mfn_frame_list,
   3.309 +						  (nr_pfns+1023)/1024 );  
   3.310      if( !live_pfn_to_mfn_table )
   3.311      {
   3.312          PERROR("Couldn't map pfn_to_mfn table");
   3.313 @@ -372,64 +418,63 @@ int xc_linux_save(int xc_handle,
   3.314  
   3.315      if( live )
   3.316      { 
   3.317 -        if ( xc_shadow_control( xc_handle, domid, 
   3.318 -                                DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
   3.319 -                                NULL, 0, NULL, NULL ) < 0 )
   3.320 -        {
   3.321 -            ERROR("Couldn't enable shadow mode");
   3.322 -            goto out;
   3.323 -        }
   3.324 +	if ( xc_shadow_control( xc_handle, domid, 
   3.325 +			   DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
   3.326 +			   NULL, 0, NULL ) < 0 )
   3.327 +	{
   3.328 +	    ERROR("Couldn't enable shadow mode");
   3.329 +	    goto out;
   3.330 +	}
   3.331  
   3.332 -        if ( xc_domain_unpause(xc_handle, domid) < 0 )
   3.333 -        {
   3.334 -            ERROR("Couldn't unpause domain");
   3.335 -            goto out;
   3.336 -        }
   3.337 +	if ( xc_domain_unpause( xc_handle, domid ) < 0 )
   3.338 +	{
   3.339 +	    ERROR("Couldn't unpause domain");
   3.340 +	    goto out;
   3.341 +	}
   3.342  
   3.343 -        last_iter = 0;
   3.344 -        sent_last_iter = 1<<20; /* 4GB's worth of pages */
   3.345 +	last_iter = 0;
   3.346 +	sent_last_iter = 1<<20; // 4GB's worth of pages
   3.347      }
   3.348      else
   3.349 -        last_iter = 1;
   3.350 +	last_iter = 1;
   3.351  
   3.352 +    /* calculate the power of 2 order of nr_pfns, e.g.
   3.353 +     15->4 16->4 17->5 */
   3.354 +    for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
   3.355  
   3.356      /* Setup to_send bitmap */
   3.357      {
   3.358 -        int sz = (nr_pfns/8) + 8; /* includes slop at end of array */
   3.359 - 
   3.360 -        to_send = malloc( sz );
   3.361 -        to_fix  = calloc( 1, sz );
   3.362 -        to_skip = malloc( sz );
   3.363 +	int sz = (nr_pfns/8) + 8; // includes slop at end of array
   3.364 +	
   3.365 +	to_send = malloc( sz );
   3.366 +	to_fix  = calloc( 1, sz );
   3.367 +	to_skip = malloc( sz );
   3.368  
   3.369 -        if (!to_send || !to_fix || !to_skip)
   3.370 -        {
   3.371 -            ERROR("Couldn't allocate to_send array");
   3.372 -            goto out;
   3.373 -        }
   3.374 +	if (!to_send || !to_fix || !to_skip)
   3.375 +	{
   3.376 +	    ERROR("Couldn't allocate to_send array");
   3.377 +	    goto out;
   3.378 +	}
   3.379  
   3.380 -        memset( to_send, 0xff, sz );
   3.381 +	memset( to_send, 0xff, sz );
   3.382  
   3.383 -        if ( mlock( to_send, sz ) )
   3.384 -        {
   3.385 -            PERROR("Unable to mlock to_send");
   3.386 -            return 1;
   3.387 -        }
   3.388 +	if ( mlock( to_send, sz ) )
   3.389 +	{
   3.390 +	    PERROR("Unable to mlock to_send");
   3.391 +	    return 1;
   3.392 +	}
   3.393  
   3.394 -        /* (to fix is local only) */
   3.395 +	/* (to fix is local only) */
   3.396  
   3.397 -        if ( mlock( to_skip, sz ) )
   3.398 -        {
   3.399 -            PERROR("Unable to mlock to_skip");
   3.400 -            return 1;
   3.401 -        }
   3.402 +	if ( mlock( to_skip, sz ) )
   3.403 +	{
   3.404 +	    PERROR("Unable to mlock to_skip");
   3.405 +	    return 1;
   3.406 +	}
   3.407  
   3.408      }
   3.409  
   3.410 -    /* calculate the power of 2 order of nr_pfns, e.g.
   3.411 -       15->4 16->4 17->5 */
   3.412 -    for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
   3.413 -
   3.414 -    printf("nr_pfns=%d order_nr=%d\n",nr_pfns, order_nr);
   3.415 +    analysis_phase( xc_handle, domid, nr_pfns, to_skip );
   3.416  
   3.417      /* We want zeroed memory so use calloc rather than malloc. */
   3.418      pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
   3.419 @@ -443,8 +488,8 @@ int xc_linux_save(int xc_handle,
   3.420  
   3.421      if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) )
   3.422      {
   3.423 -        ERROR("Unable to mlock");
   3.424 -        goto out;
   3.425 +	ERROR("Unable to mlock");
   3.426 +	goto out;
   3.427      }
   3.428  
   3.429  
   3.430 @@ -456,16 +501,16 @@ int xc_linux_save(int xc_handle,
   3.431      {
   3.432          mfn = live_pfn_to_mfn_table[i];
   3.433  
   3.434 -        if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) )
   3.435 -            printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n",
   3.436 -                   i,mfn,live_mfn_to_pfn_table[mfn]);
   3.437 +	if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) )
   3.438 +	    printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n",
   3.439 +		   i,mfn,live_mfn_to_pfn_table[mfn]);
   3.440      }
   3.441  #endif
   3.442  
   3.443      /* Map the shared info frame */
   3.444      live_shinfo = mfn_mapper_map_single(xc_handle, domid,
   3.445 -                                        PAGE_SIZE, PROT_READ,
   3.446 -                                        shared_info_frame);
   3.447 +					PAGE_SIZE, PROT_READ,
   3.448 +					shared_info_frame);
   3.449  
   3.450      if (!live_shinfo)
   3.451      {
   3.452 @@ -484,294 +529,300 @@ int xc_linux_save(int xc_handle,
   3.453          goto out;
   3.454      }
   3.455  
   3.456 -    track_cpu_usage( xc_handle, domid, 0, 0, 0, 0 );
   3.457 +    print_stats( xc_handle, domid, 0, &stats, 0 );
   3.458  
   3.459      /* Now write out each data page, canonicalising page tables as we go... */
   3.460      
   3.461      while(1)
   3.462      {
   3.463 -        unsigned int prev_pc, sent_this_iter, N, batch;
   3.464 +	unsigned int prev_pc, sent_this_iter, N, batch;
   3.465  
   3.466 -        iter++;
   3.467 -        sent_this_iter = 0;
   3.468 -        skip_this_iter = 0;
   3.469 -        prev_pc = 0;
   3.470 -        N=0;
   3.471 +	iter++;
   3.472 +	sent_this_iter = 0;
   3.473 +	skip_this_iter = 0;
   3.474 +	prev_pc = 0;
   3.475 +	N=0;
   3.476  
   3.477 -        verbose_printf("Saving memory pages: iter %d   0%%", iter);
   3.478 +	verbose_printf("Saving memory pages: iter %d   0%%", iter);
   3.479  
   3.480 -        while( N < nr_pfns )
   3.481 -        {
   3.482 -            unsigned int this_pc = (N * 100) / nr_pfns;
   3.483 +	while( N < nr_pfns )
   3.484 +	{
   3.485 +	    unsigned int this_pc = (N * 100) / nr_pfns;
   3.486  
   3.487 -            if ( (this_pc - prev_pc) >= 5 )
   3.488 -            {
   3.489 -                verbose_printf("\b\b\b\b%3d%%", this_pc);
   3.490 -                prev_pc = this_pc;
   3.491 -            }
   3.492 +	    if ( (this_pc - prev_pc) >= 5 )
   3.493 +	    {
   3.494 +		verbose_printf("\b\b\b\b%3d%%", this_pc);
   3.495 +		prev_pc = this_pc;
   3.496 +	    }
   3.497  
   3.498 -            /* slightly wasteful to peek the whole array evey time, 
   3.499 -               but this is fast enough for the moment. */
   3.500 +	    /* slightly wasteful to peek the whole array evey time, 
   3.501 +	       but this is fast enough for the moment. */
   3.502  
   3.503 -            if ( !last_iter && 
   3.504 -                 xc_shadow_control(xc_handle, domid, 
   3.505 -                                   DOM0_SHADOW_CONTROL_OP_PEEK,
   3.506 -                                   to_skip, nr_pfns, NULL, NULL) != nr_pfns ) 
   3.507 -            {
   3.508 -                ERROR("Error peeking shadow bitmap");
   3.509 -                goto out;
   3.510 -            }
   3.511 -     
   3.512 +	    if ( !last_iter && 
   3.513 +		 xc_shadow_control(xc_handle, domid, 
   3.514 +				   DOM0_SHADOW_CONTROL_OP_PEEK,
   3.515 +				   to_skip, nr_pfns, NULL) != nr_pfns ) 
   3.516 +	    {
   3.517 +		ERROR("Error peeking shadow bitmap");
   3.518 +		goto out;
   3.519 +	    }
   3.520 +	    
   3.521  
   3.522 -            /* load pfn_type[] with the mfn of all the pages we're doing in
   3.523 -               this batch. */
   3.524 +	    /* load pfn_type[] with the mfn of all the pages we're doing in
   3.525 +	       this batch. */
   3.526  
   3.527 -            for( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ )
   3.528 -            {
   3.529 -                int n = permute(N, nr_pfns, order_nr );
   3.530 +	    for( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ )
   3.531 +	    {
   3.532 +		int n = permute(N, nr_pfns, order_nr );
   3.533  
   3.534 -                if(0 && debug)
   3.535 -                    fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d   "
   3.536 -                            "[mfn]= %08lx\n",
   3.537 -                            iter, n, live_pfn_to_mfn_table[n],
   3.538 -                            test_bit(n,to_send),
   3.539 -                            live_mfn_to_pfn_table[
   3.540 -                                live_pfn_to_mfn_table[n]&0xFFFFF]);
   3.541 +		if(0 && debug)
   3.542 +		    fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d   [mfn]= %08lx\n",
   3.543 +			    iter, n, live_pfn_to_mfn_table[n],
   3.544 +			    test_bit(n,to_send),
   3.545 +			    live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]&0xFFFFF]);
   3.546  
   3.547 -                if (!last_iter && test_bit(n, to_send) && test_bit(n, to_skip))
   3.548 -                    skip_this_iter++; /* stats keeping */
   3.549 +		if (!last_iter && test_bit(n, to_send) && test_bit(n, to_skip))
   3.550 +		    skip_this_iter++; // stats keeping
   3.551  
   3.552 -                if (! ( (test_bit(n, to_send) && !test_bit(n, to_skip)) ||
   3.553 -                        (test_bit(n, to_send) && last_iter) ||
   3.554 -                        (test_bit(n, to_fix)  && last_iter) )   )
   3.555 -                    continue;
   3.556 +		if (! ( (test_bit(n, to_send) && !test_bit(n, to_skip)) ||
   3.557 +			(test_bit(n, to_send) && last_iter) ||
   3.558 +			(test_bit(n, to_fix)  && last_iter) )   )
   3.559 +		    continue;
   3.560  
   3.561 -                /* we get here if:
   3.562 -                   1. page is marked to_send & hasn't already been re-dirtied
   3.563 -                   2. (ignore to_skip in last iteration)
   3.564 -                   3. add in pages that still need fixup (net bufs)
   3.565 -                */
   3.566 -  
   3.567 -                pfn_batch[batch] = n;
   3.568 -                pfn_type[batch] = live_pfn_to_mfn_table[n];
   3.569 +		/* we get here if:
   3.570 +		   1. page is marked to_send & hasn't already been re-dirtied
   3.571 +		   2. (ignore to_skip in last iteration)
   3.572 +		   3. add in pages that still need fixup (net bufs)
   3.573 +		 */
   3.574 +		
   3.575 +		pfn_batch[batch] = n;
   3.576 +		pfn_type[batch] = live_pfn_to_mfn_table[n];
   3.577  
   3.578 -                if( pfn_type[batch] == 0x80000004 )
   3.579 -                {
   3.580 -                    /* not currently in pusedo-physical map -- set bit
   3.581 -                       in to_fix that we must send this page in last_iter
   3.582 -                       unless its sent sooner anyhow */
   3.583 +		if( pfn_type[batch] == 0x80000004 )
   3.584 +		{
   3.585 +		    /* not currently in pusedo-physical map -- set bit
   3.586 +		       in to_fix that we must send this page in last_iter
   3.587 +		       unless its sent sooner anyhow */
   3.588  
   3.589 -                    set_bit( n, to_fix );
   3.590 -                    if( iter>1 )
   3.591 -                        DDPRINTF("Urk! netbuf race: iter %d, pfn %lx."
   3.592 -                                 " mfn %lx\n",
   3.593 -                                 iter,n,pfn_type[batch]);
   3.594 -                    continue;
   3.595 -                }
   3.596 +		    set_bit( n, to_fix );
   3.597 +		    if( iter>1 )
   3.598 +			DDPRINTF("Urk! netbuf race: iter %d, pfn %lx. mfn %lx\n",
   3.599 +			       iter,n,pfn_type[batch]);
   3.600 +		    continue;
   3.601 +		}
   3.602  
   3.603 -                if ( last_iter && test_bit(n, to_fix) && 
   3.604 -                     !test_bit(n, to_send) )
   3.605 -                {
   3.606 -                    needed_to_fix++;
   3.607 -                    DPRINTF("Fix! iter %d, pfn %lx. mfn %lx\n",
   3.608 -                            iter,n,pfn_type[batch]);
   3.609 -                }
   3.610 +		if ( last_iter && test_bit(n, to_fix ) && !test_bit(n, to_send ))
   3.611 +		{
   3.612 +		    needed_to_fix++;
   3.613 +		    DPRINTF("Fix! iter %d, pfn %lx. mfn %lx\n",
   3.614 +			       iter,n,pfn_type[batch]);
   3.615 +		}
   3.616  
   3.617 -                clear_bit( n, to_fix ); 
   3.618 +		clear_bit( n, to_fix ); 
   3.619  
   3.620 -                batch++;
   3.621 -            }
   3.622 -     
   3.623 -            DDPRINTF("batch %d:%d (n=%d)\n",iter,batch,n);
   3.624 +		batch++;
   3.625 +	    }
   3.626 +	    
   3.627 +	    DDPRINTF("batch %d:%d (n=%d)\n",iter,batch,n);
   3.628  
   3.629 -            if ( batch == 0 ) 
   3.630 -                goto skip; /* very unlikely */
   3.631 -      
   3.632 -            if ( (region_base = mfn_mapper_map_batch(xc_handle, domid, 
   3.633 -                                                     PROT_READ,
   3.634 -                                                     pfn_type,
   3.635 -                                                     batch)) == 0 )
   3.636 -            {
   3.637 -                PERROR("map batch failed");
   3.638 -                goto out;
   3.639 -            }
   3.640 -     
   3.641 -            if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) )
   3.642 -            {
   3.643 -                ERROR("get_pfn_type_batch failed");
   3.644 -                goto out;
   3.645 -            }
   3.646 -     
   3.647 -            for ( j = 0; j < batch; j++ )
   3.648 -            {
   3.649 -                if ( (pfn_type[j] & LTAB_MASK) == XTAB )
   3.650 -                {
   3.651 -                    DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
   3.652 -                    continue;
   3.653 -                }
   3.654 -  
   3.655 -                if ( 0 && debug )
   3.656 -                    fprintf(stderr,"%d pfn= %08lx mfn= %08lx "
   3.657 -                            "[mfn]= %08lx sum= %08lx\n",
   3.658 -                            iter, 
   3.659 -                            (pfn_type[j] & LTAB_MASK) | pfn_batch[j],
   3.660 -                            pfn_type[j],
   3.661 -                            live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)],
   3.662 -                            csum_page(region_base + (PAGE_SIZE*j))
   3.663 -                        );
   3.664 +	    if(batch == 0) goto skip; // vanishingly unlikely...
   3.665 + 	    
   3.666 +	    if ( (region_base = mfn_mapper_map_batch( xc_handle, domid, 
   3.667 +						      PROT_READ,
   3.668 +						      pfn_type,
   3.669 +						      batch )) == 0)
   3.670 +	    {
   3.671 +		PERROR("map batch failed");
   3.672 +		goto out;
   3.673 +	    }
   3.674 +	    
   3.675 +	    if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) )
   3.676 +	    {
   3.677 +		ERROR("get_pfn_type_batch failed");
   3.678 +		goto out;
   3.679 +	    }
   3.680 +	    
   3.681 +	    for( j = 0; j < batch; j++ )
   3.682 +	    {
   3.683 +		if( (pfn_type[j] & LTAB_MASK) == XTAB)
   3.684 +		{
   3.685 +		    DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
   3.686 +		    continue;
   3.687 +		}
   3.688 +		
   3.689 +		if(0 && debug)
   3.690 +		    fprintf(stderr,"%d pfn= %08lx mfn= %08lx [mfn]= %08lx sum= %08lx\n",
   3.691 +			    iter, 
   3.692 +			    (pfn_type[j] & LTAB_MASK) | pfn_batch[j],
   3.693 +			    pfn_type[j],
   3.694 +			    live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)],
   3.695 +			    csum_page(region_base + (PAGE_SIZE*j))
   3.696 +			);
   3.697  
   3.698 -                /* canonicalise mfn->pfn */
   3.699 -                pfn_type[j] = (pfn_type[j] & LTAB_MASK) |
   3.700 -                    pfn_batch[j];
   3.701 -            }
   3.702 +		/* canonicalise mfn->pfn */
   3.703 +		pfn_type[j] = (pfn_type[j] & LTAB_MASK) |
   3.704 +		    pfn_batch[j];
   3.705 +		//live_mfn_to_pfn_table[pfn_type[j]&~LTAB_MASK];
   3.706 +
   3.707 +	    }
   3.708  
   3.709 -     
   3.710 -            if ( (*writerfn)(writerst, &batch, sizeof(int) ) )
   3.711 -            {
   3.712 -                ERROR("Error when writing to state file (2)");
   3.713 -                goto out;
   3.714 -            }
   3.715 +	    
   3.716 +	    if ( (*writerfn)(writerst, &batch, sizeof(int) ) )
   3.717 +	    {
   3.718 +		ERROR("Error when writing to state file (2)");
   3.719 +		goto out;
   3.720 +	    }
   3.721  
   3.722 -            if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) )
   3.723 -            {
   3.724 -                ERROR("Error when writing to state file (3)");
   3.725 -                goto out;
   3.726 -            }
   3.727 -     
   3.728 -            /* entering this loop, pfn_type is now in pfns (Not mfns) */
   3.729 -            for( j = 0; j < batch; j++ )
   3.730 -            {
   3.731 -                /* write out pages in batch */
   3.732 -  
   3.733 -                if( (pfn_type[j] & LTAB_MASK) == XTAB)
   3.734 -                {
   3.735 -                    DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
   3.736 -                    continue;
   3.737 -                }
   3.738 -  
   3.739 -                if ( ((pfn_type[j] & LTAB_MASK) == L1TAB) || 
   3.740 -                     ((pfn_type[j] & LTAB_MASK) == L2TAB) )
   3.741 -                {
   3.742 -      
   3.743 -                    memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
   3.744 -      
   3.745 -                    for ( k = 0; 
   3.746 -                          k < (((pfn_type[j] & LTAB_MASK) == L2TAB) ? 
   3.747 -                               (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 
   3.748 -                               1024); 
   3.749 -                          k++ )
   3.750 -                    {
   3.751 -                        unsigned long pfn;
   3.752 +	    if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) )
   3.753 +	    {
   3.754 +		ERROR("Error when writing to state file (3)");
   3.755 +		goto out;
   3.756 +	    }
   3.757 +	    
   3.758 +	    /* entering this loop, pfn_type is now in pfns (Not mfns) */
   3.759 +	    for( j = 0; j < batch; j++ )
   3.760 +	    {
   3.761 +		/* write out pages in batch */
   3.762 +		
   3.763 +		if( (pfn_type[j] & LTAB_MASK) == XTAB)
   3.764 +		{
   3.765 +		    DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
   3.766 +		    continue;
   3.767 +		}
   3.768 +		
   3.769 +		if ( ((pfn_type[j] & LTAB_MASK) == L1TAB) || 
   3.770 +		     ((pfn_type[j] & LTAB_MASK) == L2TAB) )
   3.771 +		{
   3.772 +		    
   3.773 +		    memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
   3.774 +		    
   3.775 +		    for ( k = 0; 
   3.776 +			  k < (((pfn_type[j] & LTAB_MASK) == L2TAB) ? 
   3.777 +		       (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); 
   3.778 +			  k++ )
   3.779 +		    {
   3.780 +			unsigned long pfn;
   3.781  
   3.782 -                        if ( !(page[k] & _PAGE_PRESENT) ) continue;
   3.783 -                        mfn = page[k] >> PAGE_SHIFT;      
   3.784 -                        pfn = live_mfn_to_pfn_table[mfn];
   3.785 +			if ( !(page[k] & _PAGE_PRESENT) ) continue;
   3.786 +			mfn = page[k] >> PAGE_SHIFT;		    
   3.787 +			pfn = live_mfn_to_pfn_table[mfn];
   3.788 +
   3.789 +			if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   3.790 +			{
   3.791 +			    // I don't think this should ever happen
   3.792  
   3.793 -                        if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   3.794 -                        {
   3.795 -                            /* I don't think this should ever happen */
   3.796 -                            printf("FNI %d : [%08lx,%d] pte=%08lx, "
   3.797 -                                   "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
   3.798 -                                   j, pfn_type[j], k,
   3.799 -                                   page[k], mfn, live_mfn_to_pfn_table[mfn],
   3.800 -                                   (live_mfn_to_pfn_table[mfn]<nr_pfns)? 
   3.801 -                                   live_pfn_to_mfn_table[
   3.802 -                                       live_mfn_to_pfn_table[mfn]]:0xdeadbeef);
   3.803 -                            pfn = 0; /* be suspicious, very suspicious */
   3.804 -                        }
   3.805 -                        page[k] &= PAGE_SIZE - 1;
   3.806 -                        page[k] |= pfn << PAGE_SHIFT;
   3.807 -                    } /* end of page table rewrite for loop */
   3.808 -      
   3.809 -                    if ( (*writerfn)(writerst, page, PAGE_SIZE) )
   3.810 -                    {
   3.811 -                        ERROR("Error when writing to state file (4)");
   3.812 -                        goto out;
   3.813 -                    }
   3.814 -      
   3.815 -                }  /* end of it's a PT page */
   3.816 -                else
   3.817 -                {  /* normal page */
   3.818 +			    printf("FNI %d : [%08lx,%d] pte=%08lx, mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
   3.819 +				   j, pfn_type[j], k,
   3.820 +				   page[k], mfn, live_mfn_to_pfn_table[mfn],
   3.821 +				   (live_mfn_to_pfn_table[mfn]<nr_pfns)? 
   3.822 +				   live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef);
   3.823 +
   3.824 +			    pfn = 0; // be suspicious, very suspicious
   3.825 +			    
   3.826 +			    //goto out;  // let's try our luck
   3.827 +
   3.828  
   3.829 -                    if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), 
   3.830 -                                     PAGE_SIZE) )
   3.831 -                    {
   3.832 -                        ERROR("Error when writing to state file (5)");
   3.833 -                        goto out;
   3.834 -                    }
   3.835 -                }
   3.836 -            } /* end of the write out for this batch */
   3.837 -     
   3.838 -            sent_this_iter += batch;
   3.839 +			}
   3.840 +			page[k] &= PAGE_SIZE - 1;
   3.841 +			page[k] |= pfn << PAGE_SHIFT;
   3.842 +			
   3.843 +#if 0
   3.844 +			printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
   3.845 +			       pfn_type[j]>>28,
   3.846 +			       j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
   3.847 +#endif			  
   3.848 +			
   3.849 +		    } /* end of page table rewrite for loop */
   3.850 +		    
   3.851 +		    if ( (*writerfn)(writerst, page, PAGE_SIZE) )
   3.852 +		    {
   3.853 +			ERROR("Error when writing to state file (4)");
   3.854 +			goto out;
   3.855 +		    }
   3.856 +		    
   3.857 +		}  /* end of it's a PT page */
   3.858 +		else
   3.859 +		{  /* normal page */
   3.860  
   3.861 -        } /* end of this while loop for this iteration */
   3.862 +		    if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
   3.863 +		    {
   3.864 +			ERROR("Error when writing to state file (5)");
   3.865 +			goto out;
   3.866 +		    }
   3.867 +		}
   3.868 +	    } /* end of the write out for this batch */
   3.869 +	    
   3.870 +	    sent_this_iter += batch;
   3.871  
   3.872 -        munmap(region_base, batch*PAGE_SIZE);
   3.873 +	} /* end of this while loop for this iteration */
   3.874 +
   3.875 +	munmap(region_base, batch*PAGE_SIZE);
   3.876  
   3.877      skip: 
   3.878  
   3.879 -        total_sent += sent_this_iter;
   3.880 +	total_sent += sent_this_iter;
   3.881  
   3.882 -        verbose_printf("\r %d: sent %d, skipped %d, ", 
   3.883 -                       iter, sent_this_iter, skip_this_iter );
   3.884 +	verbose_printf("\r %d: sent %d, skipped %d, ", 
   3.885 +		       iter, sent_this_iter, skip_this_iter );
   3.886  
   3.887 -        if ( last_iter )
   3.888 -        {
   3.889 -            track_cpu_usage( xc_handle, domid, 0, sent_this_iter, 0, 1);
   3.890 +	if ( last_iter )
   3.891 +	{
   3.892 +	    print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
   3.893  
   3.894 -            verbose_printf("Total pages sent= %d (%.2fx)\n", 
   3.895 -                           total_sent, ((float)total_sent)/nr_pfns );
   3.896 -            verbose_printf("(of which %d were fixups)\n", needed_to_fix  );
   3.897 -        }       
   3.898 +	    verbose_printf("Total pages sent= %d (%.2fx)\n", 
   3.899 +			   total_sent, ((float)total_sent)/nr_pfns );
   3.900 +	    verbose_printf("(of which %d were fixups)\n", needed_to_fix  );
   3.901 +	}       
   3.902  
   3.903 -        if ( debug && last_iter )
   3.904 -        {
   3.905 -            int minusone = -1;
   3.906 -            memset( to_send, 0xff, nr_pfns/8 );
   3.907 -            debug = 0;
   3.908 -            printf("Entering debug resend-all mode\n");
   3.909 +	if ( debug && last_iter )
   3.910 +	{
   3.911 +	    int minusone = -1;
   3.912 +	    memset( to_send, 0xff, (nr_pfns+8)/8 );
   3.913 +	    debug = 0;
   3.914 +	    printf("Entering debug resend-all mode\n");
   3.915      
   3.916 -            /* send "-1" to put receiver into debug mode */
   3.917 -            if ( (*writerfn)(writerst, &minusone, sizeof(int)) )
   3.918 -            {
   3.919 -                ERROR("Error when writing to state file (6)");
   3.920 -                goto out;
   3.921 -            }
   3.922 +	    /* send "-1" to put receiver into debug mode */
   3.923 +	    if ( (*writerfn)(writerst, &minusone, sizeof(int)) )
   3.924 +	    {
   3.925 +		ERROR("Error when writing to state file (6)");
   3.926 +		goto out;
   3.927 +	    }
   3.928  
   3.929 -            continue;
   3.930 -        }
   3.931 +	    continue;
   3.932 +	}
   3.933  
   3.934 -        if ( last_iter )
   3.935 -            break;
   3.936 +	if ( last_iter )
   3.937 +	    break;
   3.938  
   3.939 -        if ( live )
   3.940 -        {
   3.941 -            if ( (iter >= max_iters) || 
   3.942 -                 (sent_this_iter+skip_this_iter < 50) || 
   3.943 -                 (total_sent > nr_pfns*max_factor) )
   3.944 -            {
   3.945 -                DPRINTF("Start last iteration\n");
   3.946 -                last_iter = 1;
   3.947 +	if ( live )
   3.948 +	{
   3.949 +	    if ( 
   3.950 +		 // ( sent_this_iter > (sent_last_iter * 0.95) ) ||		 
   3.951 +		 (iter >= max_iters) || 
   3.952 +		 (sent_this_iter+skip_this_iter < 50) || 
   3.953 +		 (total_sent > nr_pfns*max_factor) )
   3.954 +	    {
   3.955 +		DPRINTF("Start last iteration\n");
   3.956 +		last_iter = 1;
   3.957  
   3.958 -                xc_domain_pause(xc_handle, domid);
   3.959 -            } 
   3.960 +		xc_domain_pause( xc_handle, domid, &op, NULL );
   3.961  
   3.962 -            if ( xc_shadow_control( xc_handle, domid, 
   3.963 -                                    DOM0_SHADOW_CONTROL_OP_CLEAN2,
   3.964 -                                    to_send, nr_pfns, &faults_this_iter,
   3.965 -                                    &dirtied_this_iter) != nr_pfns ) 
   3.966 -            {
   3.967 -                ERROR("Error flushing shadow PT");
   3.968 -                goto out;
   3.969 -            }
   3.970 +	    } 
   3.971  
   3.972 -            sent_last_iter = sent_this_iter;
   3.973 +	    if ( xc_shadow_control( xc_handle, domid, 
   3.974 +				    DOM0_SHADOW_CONTROL_OP_CLEAN2,
   3.975 +				    to_send, nr_pfns, &stats ) != nr_pfns ) 
   3.976 +	    {
   3.977 +		ERROR("Error flushing shadow PT");
   3.978 +		goto out;
   3.979 +	    }
   3.980  
   3.981 -            /* dirtied_this_iter = count_bits( nr_pfns, to_send ); */
   3.982 -            track_cpu_usage( xc_handle, domid, faults_this_iter,
   3.983 -                             sent_this_iter, dirtied_this_iter, 1);
   3.984 -     
   3.985 -        }
   3.986 +	    sent_last_iter = sent_this_iter;
   3.987 +
   3.988 +	    print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
   3.989 +	    
   3.990 +	}
   3.991 +
   3.992  
   3.993      } /* end of while 1 */
   3.994  
   3.995 @@ -783,8 +834,8 @@ int xc_linux_save(int xc_handle,
   3.996      /* Zero terminate */
   3.997      if ( (*writerfn)(writerst, &rc, sizeof(int)) )
   3.998      {
   3.999 -        ERROR("Error when writing to state file (6)");
  3.1000 -        goto out;
  3.1001 +	ERROR("Error when writing to state file (6)");
  3.1002 +	goto out;
  3.1003      }
  3.1004  
  3.1005      /* Get the final execution context */
  3.1006 @@ -792,10 +843,10 @@ int xc_linux_save(int xc_handle,
  3.1007      op.u.getdomaininfo.domain = (domid_t)domid;
  3.1008      op.u.getdomaininfo.ctxt = &ctxt;
  3.1009      if ( (do_dom0_op(xc_handle, &op) < 0) || 
  3.1010 -         ((u32)op.u.getdomaininfo.domain != domid) )
  3.1011 +	 ((u32)op.u.getdomaininfo.domain != domid) )
  3.1012      {
  3.1013 -        PERROR("Could not get info on domain");
  3.1014 -        goto out;
  3.1015 +	PERROR("Could not get info on domain");
  3.1016 +	goto out;
  3.1017      }
  3.1018  
  3.1019      /* Canonicalise the suspend-record frame number. */
  3.1020 @@ -821,18 +872,18 @@ int xc_linux_save(int xc_handle,
  3.1021          ERROR("PT base is not in range of pseudophys map");
  3.1022          goto out;
  3.1023      }
  3.1024 -    ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << 
  3.1025 -        PAGE_SHIFT;
  3.1026 +    ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
  3.1027  
  3.1028 -    if ( (*writerfn)(writerst, &ctxt,       sizeof(ctxt)) ||
  3.1029 -         (*writerfn)(writerst, live_shinfo, PAGE_SIZE) )
  3.1030 +    if ( (*writerfn)(writerst, &ctxt,                 sizeof(ctxt)) ||
  3.1031 +         (*writerfn)(writerst, live_shinfo,           PAGE_SIZE) )
  3.1032      {
  3.1033          ERROR("Error when writing to state file (1)");
  3.1034          goto out;
  3.1035      }
  3.1036      munmap(live_shinfo, PAGE_SIZE);
  3.1037  
  3.1038 - out:
  3.1039 +out:
  3.1040 +
  3.1041      if ( pfn_type != NULL )
  3.1042          free(pfn_type);
  3.1043  
     4.1 --- a/tools/xc/py/Xc.c	Thu Jun 24 10:28:58 2004 +0000
     4.2 +++ b/tools/xc/py/Xc.c	Thu Jun 24 11:35:19 2004 +0000
     4.3 @@ -939,7 +939,7 @@ static PyObject *pyxc_shadow_control(PyO
     4.4                                        &dom, &op) )
     4.5          return NULL;
     4.6  
     4.7 -    if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0, NULL, NULL) < 0 )
     4.8 +    if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0, NULL) < 0 )
     4.9          return PyErr_SetFromErrno(xc_error);
    4.10      
    4.11      Py_INCREF(zero);
     5.1 --- a/xen/common/shadow.c	Thu Jun 24 10:28:58 2004 +0000
     5.2 +++ b/xen/common/shadow.c	Thu Jun 24 11:35:19 2004 +0000
     5.3 @@ -48,7 +48,7 @@ fault handler spinning waiting to grab t
     5.4  intterupts disabled, hence we can't use the normal flush_tlb_cpu
     5.5  mechanism.
     5.6  
     5.7 -For the moment, we have a grim hace whereby the spinlock in the shadow
     5.8 +For the moment, we have a grim race whereby the spinlock in the shadow
     5.9  fault handler is actually a try lock, in a loop with a helper for the
    5.10  tlb flush code.
    5.11  
    5.12 @@ -374,6 +374,17 @@ static int shadow_mode_table_op(struct d
    5.13  		__scan_shadow_table( m, TABLE_OP_FREE_L1 );
    5.14  		
    5.15  	send_bitmap:
    5.16 +		sc->stats.fault_count       = d->mm.shadow_fault_count;
    5.17 +		sc->stats.dirty_count       = d->mm.shadow_dirty_count;
    5.18 +		sc->stats.dirty_net_count   = d->mm.shadow_dirty_net_count;
    5.19 +		sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
    5.20 +
    5.21 +		d->mm.shadow_fault_count       = 0;
    5.22 +		d->mm.shadow_dirty_count       = 0;
    5.23 +		d->mm.shadow_dirty_net_count   = 0;
    5.24 +		d->mm.shadow_dirty_block_count = 0;
    5.25 +	
    5.26 +		sc->pages = d->tot_pages;
    5.27  
    5.28  		if( d->tot_pages > sc->pages || 
    5.29  			!sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
    5.30 @@ -382,12 +393,6 @@ static int shadow_mode_table_op(struct d
    5.31  			goto out;
    5.32  		}
    5.33  
    5.34 -		sc->fault_count = d->mm.shadow_fault_count;
    5.35 -		sc->dirty_count = d->mm.shadow_dirty_count;
    5.36 -		d->mm.shadow_fault_count = 0;
    5.37 -		d->mm.shadow_dirty_count = 0;
    5.38 -	
    5.39 -		sc->pages = d->tot_pages;
    5.40  	
    5.41  #define chunk (8*1024) // do this in 1KB chunks for L1 cache
    5.42  	
    5.43 @@ -420,6 +425,11 @@ static int shadow_mode_table_op(struct d
    5.44      case DOM0_SHADOW_CONTROL_OP_PEEK:
    5.45      {
    5.46  		int i;
    5.47 +
    5.48 +		sc->stats.fault_count       = p->mm.shadow_fault_count;
    5.49 +		sc->stats.dirty_count       = p->mm.shadow_dirty_count;
    5.50 +		sc->stats.dirty_net_count   = p->mm.shadow_dirty_net_count;
    5.51 +		sc->stats.dirty_block_count = p->mm.shadow_dirty_block_count;
    5.52  	
    5.53  		if( d->tot_pages > sc->pages || 
    5.54  			!sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
    5.55 @@ -716,7 +726,7 @@ int shadow_fault( unsigned long va, long
    5.56              int i;
    5.57              sl1pfn_info = alloc_shadow_page( &current->mm ); 
    5.58              sl1pfn_info->type_and_flags = PGT_l1_page_table;
    5.59 -
    5.60 +			
    5.61              sl1pfn = sl1pfn_info - frame_table;
    5.62  
    5.63              SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
     6.1 --- a/xen/include/asm-x86/processor.h	Thu Jun 24 10:28:58 2004 +0000
     6.2 +++ b/xen/include/asm-x86/processor.h	Thu Jun 24 11:35:19 2004 +0000
     6.3 @@ -373,6 +373,8 @@ struct mm_struct {
     6.4      unsigned int shadow_page_count;     
     6.5      unsigned int shadow_fault_count;     
     6.6      unsigned int shadow_dirty_count;     
     6.7 +    unsigned int shadow_dirty_net_count;     
     6.8 +    unsigned int shadow_dirty_block_count;     
     6.9  
    6.10      /* Current LDT details. */
    6.11      unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
     7.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h	Thu Jun 24 10:28:58 2004 +0000
     7.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h	Thu Jun 24 11:35:19 2004 +0000
     7.3 @@ -255,10 +255,20 @@ typedef struct {
     7.4  #define DOM0_SHADOW_CONTROL_OP_OFF         0
     7.5  #define DOM0_SHADOW_CONTROL_OP_ENABLE_TEST 1
     7.6  #define DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY 2
     7.7 +#define DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE 3
     7.8  #define DOM0_SHADOW_CONTROL_OP_FLUSH       10     /* table ops */
     7.9  #define DOM0_SHADOW_CONTROL_OP_CLEAN       11
    7.10  #define DOM0_SHADOW_CONTROL_OP_PEEK        12
    7.11  #define DOM0_SHADOW_CONTROL_OP_CLEAN2      13
    7.12 +
    7.13 +typedef struct dom0_shadow_control
    7.14 +{
    7.15 +    u32 fault_count;
    7.16 +    u32 dirty_count;
    7.17 +    u32 dirty_net_count;     
    7.18 +    u32 dirty_block_count;     
    7.19 +} dom0_shadow_control_stats_t;
    7.20 +
    7.21  typedef struct {
    7.22      /* IN variables. */
    7.23      domid_t        domain;            /*  0 */
    7.24 @@ -269,11 +279,9 @@ typedef struct {
    7.25      memory_t       pages;  /* 16: size of buffer, updated with actual size */
    7.26      MEMORY_PADDING;
    7.27      /* OUT variables. */
    7.28 -    memory_t       fault_count;       /* 24 */
    7.29 -    MEMORY_PADDING;
    7.30 -    memory_t       dirty_count;       /* 32 */
    7.31 -    MEMORY_PADDING;
    7.32 -} PACKED dom0_shadow_control_t; /* 40 bytes */
    7.33 +    dom0_shadow_control_stats_t;
    7.34 +} PACKED dom0_shadow_control_t;
    7.35 +
    7.36  
    7.37  #define DOM0_SETDOMAINNAME     26
    7.38  typedef struct {
     8.1 --- a/xen/include/xen/shadow.h	Thu Jun 24 10:28:58 2004 +0000
     8.2 +++ b/xen/include/xen/shadow.h	Thu Jun 24 11:35:19 2004 +0000
     8.3 @@ -17,8 +17,9 @@
     8.4  /* Shadow PT operation mode : shadowmode variable in mm_struct */
     8.5  #define SHM_test        (1) /* just run domain on shadow PTs */
     8.6  #define SHM_logdirty    (2) /* log pages that are dirtied */
     8.7 -#define SHM_cow         (3) /* copy on write all dirtied pages */
     8.8 -#define SHM_translate   (4) /* lookup machine pages in translation table */
     8.9 +#define SHM_translate   (3) /* lookup machine pages in translation table */
    8.10 +//#define SHM_cow       (4) /* copy on write all dirtied pages */
    8.11 +
    8.12  
    8.13  #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
    8.14  #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
    8.15 @@ -76,9 +77,10 @@ printk("DOM%u: (file=shadow.c, line=%d) 
    8.16  
    8.17  /************************************************************************/
    8.18  
    8.19 -static inline void __mark_dirty( struct mm_struct *m, unsigned int mfn )
    8.20 +static inline int __mark_dirty( struct mm_struct *m, unsigned int mfn )
    8.21  {
    8.22      unsigned int pfn;
    8.23 +    int rc = 0;
    8.24  
    8.25      ASSERT(spin_is_locked(&m->shadow_lock));
    8.26  
    8.27 @@ -88,15 +90,19 @@ static inline void __mark_dirty( struct 
    8.28         really part of the domain's psuedo-physical memory map e.g.
    8.29         the shared info frame. Nothing to do here...
    8.30      */
    8.31 -    if ( unlikely(pfn & 0x80000000U) ) return; 
    8.32 +    if ( unlikely(pfn & 0x80000000U) ) return rc; 
    8.33  
    8.34      ASSERT(m->shadow_dirty_bitmap);
    8.35      if( likely(pfn<m->shadow_dirty_bitmap_size) )
    8.36      {
    8.37 -        /* These updates occur with mm.shadow_lock held, so use 
    8.38 -           (__) version of test_and_set */
    8.39 -        if( !__test_and_set_bit( pfn, m->shadow_dirty_bitmap) )
    8.40 -            m->shadow_dirty_count++;
    8.41 +	/* These updates occur with mm.shadow_lock held, so use 
    8.42 +	   (__) version of test_and_set */
    8.43 +	if( __test_and_set_bit( pfn, m->shadow_dirty_bitmap ) == 0 )
    8.44 +	{
    8.45 +	    // if we set it
    8.46 +	    m->shadow_dirty_count++;
    8.47 +	    rc = 1;
    8.48 +	}
    8.49      }
    8.50      else
    8.51      {
    8.52 @@ -108,17 +114,20 @@ static inline void __mark_dirty( struct 
    8.53                 frame_table[mfn].count_and_flags, 
    8.54                 frame_table[mfn].type_and_flags );
    8.55      }
    8.56 -
    8.57 +	
    8.58 +    return rc;
    8.59  }
    8.60  
    8.61  
    8.62 -static inline void mark_dirty( struct mm_struct *m, unsigned int mfn )
    8.63 +static inline int mark_dirty( struct mm_struct *m, unsigned int mfn )
    8.64  {
    8.65 +    int rc;
    8.66      ASSERT(local_irq_is_enabled());
    8.67      //if(spin_is_locked(&m->shadow_lock)) printk("+");
    8.68      spin_lock(&m->shadow_lock);
    8.69 -    __mark_dirty( m, mfn );
    8.70 +    rc = __mark_dirty( m, mfn );
    8.71      spin_unlock(&m->shadow_lock);
    8.72 +    return rc;
    8.73  }
    8.74  
    8.75