ia64/xen-unstable

changeset 2753:8d83a86ca86b

bitkeeper revision 1.1159.135.1 (417fd3b8e-uijqG_Stg0EouCeWPftA)

Add network bandwidth adaptive rate limiting to migration and
timestamped info messages.
author cl349@freefall.cl.cam.ac.uk
date Wed Oct 27 16:58:32 2004 +0000 (2004-10-27)
parents 35abe76a49b1
children 1511d2acc1a4
files tools/libxc/xc_io.c tools/libxc/xc_linux_restore.c tools/libxc/xc_linux_save.c tools/python/xen/xm/migrate.py
line diff
     1.1 --- a/tools/libxc/xc_io.c	Wed Oct 27 14:26:56 2004 +0000
     1.2 +++ b/tools/libxc/xc_io.c	Wed Oct 27 16:58:32 2004 +0000
     1.3 @@ -1,4 +1,13 @@
     1.4  #include "xc_io.h"
     1.5 +#include <sys/time.h>
     1.6 +
     1.7 +void xcio_timestamp(XcIOContext *ctxt, const char *msg){
     1.8 +  struct timeval tv;
     1.9 +
    1.10 +  gettimeofday(&tv, NULL);
    1.11 +  if (msg[0] != '\b' && msg[0] != '\r')
    1.12 +      fprintf(stdout, "[%08ld.%06ld] ", tv.tv_sec, tv.tv_usec);
    1.13 +}
    1.14  
    1.15  void xcio_error(XcIOContext *ctxt, const char *msg, ...){
    1.16    va_list args;
    1.17 @@ -15,8 +24,10 @@ void xcio_info(XcIOContext *ctxt, const 
    1.18  
    1.19    if(0 && !(ctxt->flags & XCFLAGS_VERBOSE)) return;
    1.20    va_start(args, msg);
    1.21 +  xcio_timestamp(ctxt, msg);
    1.22    vfprintf(stdout, msg, args); fprintf(stdout, "\n");
    1.23    IOStream_vprint(ctxt->info, msg, args);
    1.24 +  fflush(stdout);
    1.25    va_end(args);
    1.26  }
    1.27  
    1.28 @@ -25,6 +36,7 @@ void xcio_debug(XcIOContext *ctxt, const
    1.29  
    1.30    if(0 && !(ctxt->flags & XCFLAGS_DEBUG)) return;
    1.31    va_start(args, msg);
    1.32 +  xcio_timestamp(ctxt, msg);
    1.33    vfprintf(stdout, msg, args); fprintf(stdout, "\n");
    1.34    IOStream_vprint(ctxt->info, msg, args);
    1.35    va_end(args);
     2.1 --- a/tools/libxc/xc_linux_restore.c	Wed Oct 27 14:26:56 2004 +0000
     2.2 +++ b/tools/libxc/xc_linux_restore.c	Wed Oct 27 16:58:32 2004 +0000
     2.3 @@ -130,6 +130,8 @@ int xc_linux_restore(int xc_handle, XcIO
     2.4      /* used by debug verify code */
     2.5      unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
     2.6  
     2.7 +    xcio_info(ioctxt, "xc_linux_restore start\n");
     2.8 +
     2.9      if ( mlock(&ctxt, sizeof(ctxt) ) )
    2.10      {
    2.11          /* needed for when we do the build dom0 op, 
    2.12 @@ -194,7 +196,7 @@ int xc_linux_restore(int xc_handle, XcIO
    2.13      }
    2.14      
    2.15      ioctxt->domain = dom;
    2.16 -    printf("Created domain %ld\n",dom);
    2.17 +    xcio_info(ioctxt, "Created domain %ld\n",dom);
    2.18  
    2.19      /* Get the domain's shared-info frame. */
    2.20      op.cmd = DOM0_GETDOMAININFO;
    2.21 @@ -433,7 +435,7 @@ int xc_linux_restore(int xc_handle, XcIO
    2.22          n+=j; /* crude stats */
    2.23      }
    2.24  
    2.25 -    DPRINTF("Received all pages\n");
    2.26 +    xcio_info(ioctxt, "Received all pages\n");
    2.27  
    2.28      /*
    2.29       * Pin page tables. Do this after writing to them as otherwise Xen
    2.30 @@ -473,7 +475,8 @@ int xc_linux_restore(int xc_handle, XcIO
    2.31  
    2.32      if ( finish_mmu_updates(xc_handle, mmu) ) goto out;
    2.33  
    2.34 -    xcio_info(ioctxt, "\b\b\b\b100%%\nMemory reloaded.\n");
    2.35 +    xcio_info(ioctxt, "\b\b\b\b100%%\n");
    2.36 +    xcio_info(ioctxt, "Memory reloaded.\n");
    2.37  
    2.38      /* Get the list of PFNs that are not in the psuedo-phys map */
    2.39      {
    2.40 @@ -647,7 +650,9 @@ int xc_linux_restore(int xc_handle, XcIO
    2.41          xcio_error(ioctxt, "Bad LDT base or size");
    2.42          goto out;
    2.43      }
    2.44 -   
    2.45 +
    2.46 +    xcio_info(ioctxt, "Domain ready to be built.\n");
    2.47 +
    2.48      op.cmd = DOM0_BUILDDOMAIN;
    2.49      op.u.builddomain.domain   = (domid_t)dom;
    2.50      op.u.builddomain.ctxt = &ctxt;
    2.51 @@ -661,6 +666,7 @@ int xc_linux_restore(int xc_handle, XcIO
    2.52  
    2.53      if ( ioctxt->flags & XCFLAGS_CONFIGURE )
    2.54      {
    2.55 +        xcio_info(ioctxt, "Domain ready to be unpaused\n");
    2.56          op.cmd = DOM0_UNPAUSEDOMAIN;
    2.57          op.u.unpausedomain.domain = (domid_t)dom;
    2.58          rc = do_dom0_op(xc_handle, &op);
     3.1 --- a/tools/libxc/xc_linux_save.c	Wed Oct 27 14:26:56 2004 +0000
     3.2 +++ b/tools/libxc/xc_linux_save.c	Wed Oct 27 16:58:32 2004 +0000
     3.3 @@ -9,9 +9,12 @@
     3.4  #include <sys/time.h>
     3.5  #include "xc_private.h"
     3.6  #include <asm-xen/suspend.h>
     3.7 +#include <time.h>
     3.8  
     3.9  #define BATCH_SIZE 1024   /* 1024 pages (4MB) at a time */
    3.10  
    3.11 +#define MAX_MBIT_RATE 500
    3.12 +
    3.13  #define DEBUG  0
    3.14  #define DDEBUG 0
    3.15  
    3.16 @@ -138,6 +141,80 @@ static long long tv_delta( struct timeva
    3.17          (new->tv_usec - old->tv_usec);
    3.18  }
    3.19  
    3.20 +
    3.21 +#define START_MBIT_RATE ioctxt->resource
    3.22 +
    3.23 +static int mbit_rate, ombit_rate = 0;
    3.24 +static int burst_time_us = -1;
    3.25 +
    3.26 +#define MBIT_RATE mbit_rate
    3.27 +#define BURST_BUDGET (100*1024)
    3.28 +
    3.29 +/* 
    3.30 +   1000000/((100)*1024*1024/8/(100*1024))
    3.31 +   7812
    3.32 +   1000000/((100)*1024/8/(100))
    3.33 +   7812
    3.34 +   1000000/((100)*128/(100))
    3.35 +   7812
    3.36 +   100000000/((100)*128)
    3.37 +   7812
    3.38 +   100000000/128
    3.39 +   781250
    3.40 + */
    3.41 +#define RATE_TO_BTU 781250
    3.42 +#define BURST_TIME_US burst_time_us
    3.43 +
    3.44 +static int xcio_ratewrite(XcIOContext *ioctxt, void *buf, int n){
    3.45 +    static int budget = 0;
    3.46 +    static struct timeval last_put = { 0 };
    3.47 +    struct timeval now;
    3.48 +    struct timespec delay;
    3.49 +    long long delta;
    3.50 +    int rc;
    3.51 +
    3.52 +    if (START_MBIT_RATE == 0)
    3.53 +	return xcio_write(ioctxt, buf, n);
    3.54 +    
    3.55 +    budget -= n;
    3.56 +    if (budget < 0) {
    3.57 +	if (MBIT_RATE != ombit_rate) {
    3.58 +	    BURST_TIME_US = RATE_TO_BTU / MBIT_RATE;
    3.59 +	    ombit_rate = MBIT_RATE;
    3.60 +	    xcio_info(ioctxt,
    3.61 +		      "rate limit: %d mbit/s burst budget %d slot time %d\n",
    3.62 +		      MBIT_RATE, BURST_BUDGET, BURST_TIME_US);
    3.63 +	}
    3.64 +	if (last_put.tv_sec == 0) {
    3.65 +	    budget += BURST_BUDGET;
    3.66 +	    gettimeofday(&last_put, NULL);
    3.67 +	} else {
    3.68 +	    while (budget < 0) {
    3.69 +		gettimeofday(&now, NULL);
    3.70 +		delta = tv_delta(&now, &last_put);
    3.71 +		while (delta > BURST_TIME_US) {
    3.72 +		    budget += BURST_BUDGET;
    3.73 +		    last_put.tv_usec += BURST_TIME_US;
    3.74 +		    if (last_put.tv_usec > 1000000) {
    3.75 +			last_put.tv_usec -= 1000000;
    3.76 +			last_put.tv_sec++;
    3.77 +		    }
    3.78 +		    delta -= BURST_TIME_US;
    3.79 +		}
    3.80 +		if (budget > 0)
    3.81 +		    break;
    3.82 +		delay.tv_sec = 0;
    3.83 +		delay.tv_nsec = 1000 * (BURST_TIME_US - delta);
    3.84 +		while (delay.tv_nsec > 0)
    3.85 +		    if (nanosleep(&delay, &delay) == 0)
    3.86 +			break;
    3.87 +	    }
    3.88 +	}
    3.89 +    }
    3.90 +    rc = IOStream_write(ioctxt->io, buf, n);
    3.91 +    return (rc == n ? 0 : rc);
    3.92 +}
    3.93 +
    3.94  static int print_stats( int xc_handle, u32 domid, 
    3.95                          int pages_sent, xc_shadow_control_stats_t *stats,
    3.96                          int print )
    3.97 @@ -168,12 +245,20 @@ static int print_stats( int xc_handle, u
    3.98  
    3.99      if ( print )
   3.100          printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
   3.101 -               "dirtied %dMb/s\n",
   3.102 +               "dirtied %dMb/s %ld pages\n",
   3.103                 wall_delta, 
   3.104                 (int)((d0_cpu_delta*100)/wall_delta),
   3.105                 (int)((d1_cpu_delta*100)/wall_delta),
   3.106 -               (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)),
   3.107 -               (int)((stats->dirty_count*PAGE_SIZE*8)/(wall_delta*1000)));
   3.108 +               (int)((pages_sent*PAGE_SIZE)/(wall_delta*(1000/8))),
   3.109 +               (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))),
   3.110 +               stats->dirty_count);
   3.111 +
   3.112 +    if (((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8))) > mbit_rate) {
   3.113 +	mbit_rate = (int)((stats->dirty_count*PAGE_SIZE)/(wall_delta*(1000/8)))
   3.114 +	    + 50;
   3.115 +	if (mbit_rate > MAX_MBIT_RATE)
   3.116 +	    mbit_rate = MAX_MBIT_RATE;
   3.117 +    }
   3.118  
   3.119      d0_cpu_last  = d0_cpu_now;
   3.120      d1_cpu_last  = d1_cpu_now;
   3.121 @@ -198,14 +283,15 @@ static int write_vmconfig(XcIOContext *i
   3.122  }
   3.123  
   3.124  static int analysis_phase( int xc_handle, u32 domid, 
   3.125 -                           int nr_pfns, unsigned long *arr )
   3.126 +                           int nr_pfns, unsigned long *arr, int runs )
   3.127  {
   3.128      long long start, now;
   3.129      xc_shadow_control_stats_t stats;
   3.130 +    int j;
   3.131  
   3.132      start = llgettimeofday();
   3.133  
   3.134 -    while ( 0 )
   3.135 +    for (j = 0; j < runs; j++)
   3.136      {
   3.137          int i;
   3.138  
   3.139 @@ -213,9 +299,9 @@ static int analysis_phase( int xc_handle
   3.140                             DOM0_SHADOW_CONTROL_OP_CLEAN,
   3.141                             arr, nr_pfns, NULL);
   3.142          printf("#Flush\n");
   3.143 -        for ( i = 0; i < 100; i++ )
   3.144 +        for ( i = 0; i < 40; i++ )
   3.145          {     
   3.146 -            usleep(10000);     
   3.147 +            usleep(50000);     
   3.148              now = llgettimeofday();
   3.149              xc_shadow_control( xc_handle, domid, 
   3.150                                 DOM0_SHADOW_CONTROL_OP_PEEK,
   3.151 @@ -345,6 +431,10 @@ int xc_linux_save(int xc_handle, XcIOCon
   3.152  
   3.153      int needed_to_fix = 0;
   3.154      int total_sent    = 0;
   3.155 +
   3.156 +    MBIT_RATE = START_MBIT_RATE;
   3.157 +
   3.158 +    xcio_info(ioctxt, "xc_linux_save start %d\n", domid);
   3.159      
   3.160      if (mlock(&ctxt, sizeof(ctxt))) {
   3.161          xcio_perror(ioctxt, "Unable to mlock ctxt");
   3.162 @@ -440,7 +530,6 @@ int xc_linux_save(int xc_handle, XcIOCon
   3.163          }
   3.164  
   3.165          last_iter = 0;
   3.166 -        sent_last_iter = 1<<20; /* 4GB of pages */
   3.167      } else{
   3.168  	/* This is a non-live suspend. Issue the call back to get the
   3.169  	 domain suspended */
   3.170 @@ -455,6 +544,7 @@ int xc_linux_save(int xc_handle, XcIOCon
   3.171  	}
   3.172  
   3.173      }
   3.174 +    sent_last_iter = 1<<20; /* 4GB of pages */
   3.175  
   3.176      /* calculate the power of 2 order of nr_pfns, e.g.
   3.177         15->4 16->4 17->5 */
   3.178 @@ -493,7 +583,7 @@ int xc_linux_save(int xc_handle, XcIOCon
   3.179  
   3.180      }
   3.181  
   3.182 -    analysis_phase( xc_handle, domid, nr_pfns, to_skip );
   3.183 +    analysis_phase( xc_handle, domid, nr_pfns, to_skip, 0 );
   3.184  
   3.185      /* We want zeroed memory so use calloc rather than malloc. */
   3.186      pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
   3.187 @@ -744,14 +834,14 @@ int xc_linux_save(int xc_handle, XcIOCon
   3.188     
   3.189                      } /* end of page table rewrite for loop */
   3.190        
   3.191 -                    if ( xcio_write(ioctxt, page, PAGE_SIZE) ){
   3.192 +                    if ( xcio_ratewrite(ioctxt, page, PAGE_SIZE) ){
   3.193                          xcio_error(ioctxt, "Error when writing to state file (4)");
   3.194                          goto out;
   3.195                      }
   3.196        
   3.197                  }  /* end of it's a PT page */ else {  /* normal page */
   3.198  
   3.199 -                    if ( xcio_write(ioctxt, region_base + (PAGE_SIZE*j), 
   3.200 +                    if ( xcio_ratewrite(ioctxt, region_base + (PAGE_SIZE*j), 
   3.201                                       PAGE_SIZE) ){
   3.202                          xcio_error(ioctxt, "Error when writing to state file (5)");
   3.203                          goto out;
   3.204 @@ -801,7 +891,8 @@ int xc_linux_save(int xc_handle, XcIOCon
   3.205          if ( live )
   3.206          {
   3.207              if ( 
   3.208 -                /* ( sent_this_iter > (sent_last_iter * 0.95) ) || */
   3.209 +                ( ( sent_this_iter > sent_last_iter ) &&
   3.210 +		  (mbit_rate == MAX_MBIT_RATE ) ) ||
   3.211                  (iter >= max_iters) || 
   3.212                  (sent_this_iter+skip_this_iter < 50) || 
   3.213                  (total_sent > nr_pfns*max_factor) )
   3.214 @@ -816,11 +907,11 @@ int xc_linux_save(int xc_handle, XcIOCon
   3.215  		    goto out;
   3.216  		}
   3.217  
   3.218 -		printf("SUSPEND flags %08lx shinfo %08lx eip %08lx esi %08lx\n", 
   3.219 -		       op.u.getdomaininfo.flags, op.u.getdomaininfo.shared_info_frame,
   3.220 -		       ctxt.cpu_ctxt.eip, ctxt.cpu_ctxt.esi );
   3.221 -
   3.222 -
   3.223 +		xcio_info(ioctxt,
   3.224 +                          "SUSPEND flags %08lx shinfo %08lx eip %08lx "
   3.225 +                          "esi %08lx\n", op.u.getdomaininfo.flags,
   3.226 +                          op.u.getdomaininfo.shared_info_frame,
   3.227 +                          ctxt.cpu_ctxt.eip, ctxt.cpu_ctxt.esi );
   3.228              } 
   3.229  
   3.230              if ( xc_shadow_control( xc_handle, domid, 
     4.1 --- a/tools/python/xen/xm/migrate.py	Wed Oct 27 14:26:56 2004 +0000
     4.2 +++ b/tools/python/xen/xm/migrate.py	Wed Oct 27 16:58:32 2004 +0000
     4.3 @@ -26,7 +26,7 @@ gopts.opt('live', short='l',
     4.4            fn=set_true, default=0,
     4.5            use="Use live migration.")
     4.6  
     4.7 -gopts.opt('resource', short='r',
     4.8 +gopts.opt('resource', short='r', val='MBIT',
     4.9            fn=set_int, default=0,
    4.10            use="Set level of resource usage for migration.")
    4.11