ia64/xen-unstable

changeset 1408:1ad6b2dcc1f8

bitkeeper revision 1.911.1.3 (40ac8592YPN8CVevw_ez5NasKUdPag)

live migration improvements
author iap10@labyrinth.cl.cam.ac.uk
date Thu May 20 10:16:50 2004 +0000 (2004-05-20)
parents c44c3a19fe95
children 27b5376a7ec5
files tools/examples/xc_dom_control.py tools/xc/lib/xc_linux_restore.c tools/xc/lib/xc_linux_save.c tools/xc/py/Xc.c xen/common/shadow.c xen/include/hypervisor-ifs/dom0_ops.h
line diff
     1.1 --- a/tools/examples/xc_dom_control.py	Wed May 19 21:56:34 2004 +0000
     1.2 +++ b/tools/examples/xc_dom_control.py	Thu May 20 10:16:50 2004 +0000
     1.3 @@ -136,17 +136,7 @@ elif cmd == 'suspend':
     1.4          pid = int(fd.readline())
     1.5          os.kill(pid, signal.SIGTERM)
     1.6  
     1.7 -    """
     1.8 -    xc.domain_stop( dom=dom )
     1.9 -    XXX
    1.10 -    while not xc.domain_getinfo( first_dom=dom, max_doms=1 )[0]['stopped']:
    1.11 -	print "Sleep..."
    1.12 -	time.sleep(0.001);
    1.13 -    """
    1.14 -
    1.15      rc = xc.linux_save( dom=dom, state_file=file, progress=1)
    1.16 -    if rc == 0 : xc.domain_destroy( dom=dom, force=1 )
    1.17 -    else: xc.domain_start( dom=dom )  # sensible for production use
    1.18  
    1.19  elif cmd == 'cpu_bvtslice':
    1.20      if len(sys.argv) < 3:
     2.1 --- a/tools/xc/lib/xc_linux_restore.c	Wed May 19 21:56:34 2004 +0000
     2.2 +++ b/tools/xc/lib/xc_linux_restore.c	Thu May 20 10:16:50 2004 +0000
     2.3 @@ -592,14 +592,11 @@ int xc_linux_restore(int xc_handle,
     2.4  
     2.5  
     2.6   out:
     2.7 -    if ( rc != 0 )  // destroy is something went wrong
     2.8 +    if ( rc != 0 )  // destroy if something went wrong
     2.9      {
    2.10          if ( dom != 0 )
    2.11          {
    2.12 -            op.cmd = DOM0_DESTROYDOMAIN;
    2.13 -            op.u.destroydomain.domain = (domid_t)dom;
    2.14 -            op.u.destroydomain.force  = 1;
    2.15 -            (void)do_dom0_op(xc_handle, &op);
    2.16 +	    xc_domain_destroy( xc_handle, dom, 1 );
    2.17          }
    2.18      }
    2.19  
     3.1 --- a/tools/xc/lib/xc_linux_save.c	Wed May 19 21:56:34 2004 +0000
     3.2 +++ b/tools/xc/lib/xc_linux_save.c	Thu May 20 10:16:50 2004 +0000
     3.3 @@ -95,7 +95,7 @@ int xc_linux_save(int xc_handle,
     3.4      int verbose = flags & XCFLAGS_VERBOSE;
     3.5      int live = flags & XCFLAGS_LIVE;
     3.6      int debug = flags & XCFLAGS_DEBUG;
     3.7 -    int sent_last_iter, sent_this_iter, max_iters;
     3.8 +    int sent_last_iter, sent_this_iter, skip_this_iter, max_iters;
     3.9  
    3.10      /* Remember if we stopped the guest, so we can restart it on exit. */
    3.11      int we_stopped_it = 0;
    3.12 @@ -137,8 +137,11 @@ int xc_linux_save(int xc_handle,
    3.13      /* number of pages we're dealing with */
    3.14      unsigned long nr_pfns;
    3.15  
    3.16 -    /* bitmap of pages left to send */
    3.17 -    unsigned long *to_send, *to_fix;
    3.18 +    /* bitmap of pages:
    3.19 +       - that should be sent this iteration (unless later marked as skip); 
    3.20 +       - to skip this iteration because already dirty;
    3.21 +       - to fixup by sending at the end if not already resent; */
    3.22 +    unsigned long *to_send, *to_skip, *to_fix;
    3.23  
    3.24      int needed_to_fix = 0;
    3.25      int total_sent    = 0;
    3.26 @@ -289,7 +292,7 @@ int xc_linux_save(int xc_handle,
    3.27  
    3.28  	last_iter = 0;
    3.29  	sent_last_iter = 1<<20; // 4GB's worth of pages
    3.30 -	max_iters = 9; // limit us to 10 time round loop
    3.31 +	max_iters = 19; // limit us to 20 times round loop
    3.32      }
    3.33      else
    3.34  	last_iter = 1;
    3.35 @@ -301,12 +304,14 @@ int xc_linux_save(int xc_handle,
    3.36  	
    3.37  	to_send = malloc( sz );
    3.38  	to_fix  = calloc( 1, sz );
    3.39 +	to_skip = malloc( sz );
    3.40  
    3.41 -	if (!to_send || !to_fix)
    3.42 +	if (!to_send || !to_fix || !to_skip)
    3.43  	{
    3.44  	    ERROR("Couldn't allocate to_send array");
    3.45  	    goto out;
    3.46  	}
    3.47 +
    3.48  	memset( to_send, 0xff, sz );
    3.49  
    3.50  	if ( mlock( to_send, sz ) )
    3.51 @@ -314,6 +319,15 @@ int xc_linux_save(int xc_handle,
    3.52  	    PERROR("Unable to mlock to_send");
    3.53  	    return 1;
    3.54  	}
    3.55 +
    3.56 +	/* (to fix is local only) */
    3.57 +
    3.58 +	if ( mlock( to_skip, sz ) )
    3.59 +	{
    3.60 +	    PERROR("Unable to mlock to_skip");
    3.61 +	    return 1;
    3.62 +	}
    3.63 +
    3.64      }
    3.65  
    3.66  
    3.67 @@ -379,6 +393,7 @@ int xc_linux_save(int xc_handle,
    3.68  	iter++;
    3.69  
    3.70  	sent_this_iter = 0;
    3.71 +	skip_this_iter = 0;
    3.72  	prev_pc = 0;
    3.73  	verbose_printf("Saving memory pages: iter %d   0%%", iter);
    3.74  
    3.75 @@ -392,6 +407,18 @@ int xc_linux_save(int xc_handle,
    3.76  		prev_pc = this_pc;
    3.77  	    }
    3.78  
    3.79 +	    /* slightly wasteful to peek the whole array evey time, 
    3.80 +	       but this is fast enough for the moment. */
    3.81 +
    3.82 +	    if ( !last_iter && 
    3.83 +		 xc_shadow_control( xc_handle, domid, 
    3.84 +				    DOM0_SHADOW_CONTROL_OP_PEEK,
    3.85 +				    to_skip, nr_pfns ) != nr_pfns ) 
    3.86 +	    {
    3.87 +		ERROR("Error peeking shadow bitmap");
    3.88 +		goto out;
    3.89 +	    }
    3.90 +	    
    3.91  
    3.92  	    /* load pfn_type[] with the mfn of all the pages we're doing in
    3.93  	       this batch. */
    3.94 @@ -405,15 +432,29 @@ int xc_linux_save(int xc_handle,
    3.95  			    test_bit(n,to_send),
    3.96  			    live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]&0xFFFFF]);
    3.97  
    3.98 +		if (!last_iter && test_bit(n, to_send) && test_bit(n, to_skip))
    3.99 +		    skip_this_iter++; // stats keeping
   3.100  
   3.101 -		if ( !test_bit(n, to_send ) &&
   3.102 -		    !( last_iter && test_bit(n, to_fix ) ) ) continue;
   3.103 +		if (! ( (test_bit(n, to_send) && !test_bit(n, to_skip)) ||
   3.104 +			(test_bit(n, to_send) && last_iter) ||
   3.105 +			(test_bit(n, to_fix)  && last_iter) )   )
   3.106 +		    continue;
   3.107 +
   3.108 +		/* we get here if:
   3.109 +		   1. page is marked to_send & hasn't already been re-dirtied
   3.110 +		   2. (ignore to_skip in last iteration)
   3.111 +		   3. add in pages that still need fixup (net bufs)
   3.112 +		 */
   3.113  		
   3.114  		pfn_batch[batch] = n;
   3.115  		pfn_type[batch] = live_pfn_to_mfn_table[n];
   3.116  
   3.117  		if( pfn_type[batch] == 0x80000004 )
   3.118  		{
   3.119 +		    /* not currently in pusedo-physical map -- set bit
   3.120 +		       in to_fix that we must send this page in last_iter
   3.121 +		       unless its sent sooner anyhow */
   3.122 +
   3.123  		    set_bit( n, to_fix );
   3.124  		    if( iter>1 )
   3.125  			DDPRINTF("Urk! netbuf race: iter %d, pfn %lx. mfn %lx\n",
   3.126 @@ -572,7 +613,8 @@ int xc_linux_save(int xc_handle,
   3.127  
   3.128  	total_sent += sent_this_iter;
   3.129  
   3.130 -	verbose_printf("\b\b\b\b100%% (%d pages)\n", sent_this_iter );
   3.131 +	verbose_printf("\b\b\b\b100%% (pages sent= %d, skipped= %d )\n", 
   3.132 +		       sent_this_iter, skip_this_iter );
   3.133  	
   3.134  	if ( last_iter )
   3.135  	{
   3.136 @@ -604,7 +646,8 @@ int xc_linux_save(int xc_handle,
   3.137  	if ( live )
   3.138  	{
   3.139  	    if ( ( sent_this_iter > (sent_last_iter * 0.95) ) ||
   3.140 -		 (iter >= max_iters) || (sent_this_iter < 10) )
   3.141 +		 (iter >= max_iters) || (sent_this_iter < 10) || 
   3.142 +		 (total_sent > nr_pfns*2) )
   3.143  	    {
   3.144  		DPRINTF("Start last iteration\n");
   3.145  		last_iter = 1;
   3.146 @@ -685,14 +728,6 @@ int xc_linux_save(int xc_handle,
   3.147      munmap(live_shinfo, PAGE_SIZE);
   3.148  
   3.149  out:
   3.150 -    /* Restart the domain if we had to stop it to save its state. */
   3.151 -    if ( we_stopped_it )
   3.152 -    {
   3.153 -	printf("Restart domain\n");
   3.154 -        op.cmd = DOM0_STARTDOMAIN;
   3.155 -        op.u.startdomain.domain = (domid_t)domid;
   3.156 -        (void)do_dom0_op(xc_handle, &op);
   3.157 -    }
   3.158  
   3.159      if ( pfn_type != NULL )
   3.160          free(pfn_type);
     4.1 --- a/tools/xc/py/Xc.c	Wed May 19 21:56:34 2004 +0000
     4.2 +++ b/tools/xc/py/Xc.c	Thu May 20 10:16:50 2004 +0000
     4.3 @@ -214,6 +214,7 @@ static PyObject *pyxc_linux_save(PyObjec
     4.4  	struct hostent *h;
     4.5  	struct sockaddr_in s;
     4.6  	int sockbufsize;
     4.7 +	int rc = -1;
     4.8  
     4.9  	int writerfn(void *fd, const void *buf, size_t count)
    4.10  	{
    4.11 @@ -257,12 +258,24 @@ static PyObject *pyxc_linux_save(PyObjec
    4.12  	if ( xc_linux_save(xc->xc_handle, dom, flags, 
    4.13                             writerfn, (void*)sd) == 0 )
    4.14  	{
    4.15 -	    close(sd);
    4.16 -	    Py_INCREF(zero);
    4.17 -	    return zero;
    4.18 +	    if ( read( sd, &rc, sizeof(int) ) != sizeof(int) )
    4.19 +		goto serr;
    4.20 +		
    4.21 +	    if ( rc == 0 )
    4.22 +	    {
    4.23 +		printf("Migration succesful -- destroy local copy\n");
    4.24 +		xc_domain_destroy( xc->xc_handle, dom, 1 );
    4.25 +		close(sd);
    4.26 +		Py_INCREF(zero);
    4.27 +		return zero;
    4.28 +	    }
    4.29 +	    else
    4.30 +		errno = rc;
    4.31  	}
    4.32  
    4.33      serr:
    4.34 +	printf("Migration failed -- restart local copy\n");
    4.35 +	xc_domain_start( xc->xc_handle, dom );
    4.36  	PyErr_SetFromErrno(xc_error);
    4.37  	if ( sd >= 0 ) close(sd);
    4.38  	return NULL;
    4.39 @@ -355,7 +368,7 @@ static PyObject *pyxc_linux_restore(PyOb
    4.40  	struct sockaddr_in s, d, p;
    4.41  	socklen_t dlen, plen;
    4.42  	int sockbufsize;
    4.43 -	int on = 1;
    4.44 +	int on = 1, rc = -1;
    4.45  
    4.46  	int readerfn(void *fd, void *buf, size_t count)
    4.47  	{
    4.48 @@ -413,13 +426,18 @@ static PyObject *pyxc_linux_restore(PyOb
    4.49                          sizeof sockbufsize) < 0 ) 
    4.50  	    goto serr;
    4.51  
    4.52 -	if ( xc_linux_restore(xc->xc_handle, dom, flags, 
    4.53 -                              readerfn, (void*)sd, &dom) == 0 )
    4.54 +	rc = xc_linux_restore(xc->xc_handle, dom, flags, 
    4.55 +                              readerfn, (void*)sd, &dom);
    4.56 +
    4.57 +	write( sd, &rc, sizeof(int) ); 
    4.58 +
    4.59 +	if (rc == 0)
    4.60  	{
    4.61  	    close(sd);
    4.62  	    Py_INCREF(zero);
    4.63  	    return zero;
    4.64  	}
    4.65 +	errno = rc;
    4.66  
    4.67      serr:
    4.68  	PyErr_SetFromErrno(xc_error);
     5.1 --- a/xen/common/shadow.c	Wed May 19 21:56:34 2004 +0000
     5.2 +++ b/xen/common/shadow.c	Thu May 20 10:16:50 2004 +0000
     5.3 @@ -110,10 +110,10 @@ static void __free_shadow_table( struct 
     5.4  }
     5.5  
     5.6  static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
     5.7 -                                  struct pfn_info *spfn_info )
     5.8 +                                  struct pfn_info *spfn_info, int *work )
     5.9  {
    5.10 -    int work = 0;
    5.11      unsigned int spfn = spfn_info-frame_table;
    5.12 +	int restart = 0;
    5.13  
    5.14      switch( op )
    5.15      {
    5.16 @@ -129,7 +129,7 @@ static inline int shadow_page_op( struct
    5.17              {                    
    5.18                  if ( (spl1e[i] & _PAGE_PRESENT ) && (spl1e[i] & _PAGE_RW) )
    5.19                  {
    5.20 -                    work++;
    5.21 +                    *work++;
    5.22                      spl1e[i] &= ~_PAGE_RW;
    5.23                  }
    5.24              }
    5.25 @@ -138,14 +138,36 @@ static inline int shadow_page_op( struct
    5.26      }
    5.27  	break;
    5.28  
    5.29 +    case DOM0_SHADOW_CONTROL_OP_CLEAN2:
    5.30 +    {
    5.31 +        if ( (spfn_info->type_and_flags & PGT_type_mask) == 
    5.32 +             PGT_l1_page_table )
    5.33 +        {
    5.34 +			delete_shadow_status( m, frame_table-spfn_info );
    5.35 +			restart = 1; // we need to go to start of list again
    5.36 +		}
    5.37 +		else if ( (spfn_info->type_and_flags & PGT_type_mask) == 
    5.38 +             PGT_l2_page_table )
    5.39 +		{
    5.40 +			unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
    5.41 +			memset( spl1e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*spl1e) );
    5.42 +			unmap_domain_mem( spl1e );
    5.43 +		}
    5.44 +		else
    5.45 +			BUG();
    5.46      }
    5.47 -    return work;
    5.48 +	break;
    5.49 +
    5.50 +
    5.51 +
    5.52 +    }
    5.53 +    return restart;
    5.54  }
    5.55  
    5.56  static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
    5.57  {
    5.58      int j, work=0;
    5.59 -    struct shadow_status *a;
    5.60 +    struct shadow_status *a, *next;
    5.61   
    5.62      // the code assumes you're not using the page tables i.e.
    5.63      // the domain is stopped and cr3 is something else!!
    5.64 @@ -156,16 +178,25 @@ static void __scan_shadow_table( struct 
    5.65  
    5.66      for(j=0;j<shadow_ht_buckets;j++)
    5.67      {
    5.68 -        a = &m->shadow_ht[j];        
    5.69 +	retry:
    5.70 +        a = &m->shadow_ht[j];     
    5.71 +		next = a->next;
    5.72          if (a->pfn)
    5.73          {
    5.74 -            work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
    5.75 +            if ( shadow_page_op( m, op, 
    5.76 +							&frame_table[a->spfn_and_flags & PSH_pfn_mask], 
    5.77 +							&work ) )
    5.78 +				goto retry;
    5.79          }
    5.80 -        a=a->next;
    5.81 +        a=next;
    5.82          while(a)
    5.83          { 
    5.84 -            work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
    5.85 -            a=a->next;
    5.86 +			next = a->next;
    5.87 +            if ( shadow_page_op( m, op, 
    5.88 +							&frame_table[a->spfn_and_flags & PSH_pfn_mask],
    5.89 +							&work ) )
    5.90 +				goto retry;
    5.91 +            a=next;
    5.92          }
    5.93          shadow_audit(m,0);
    5.94      }
    5.95 @@ -304,7 +335,8 @@ static int shadow_mode_table_op( struct 
    5.96          __free_shadow_table( m );
    5.97          break;
    5.98     
    5.99 -    case DOM0_SHADOW_CONTROL_OP_CLEAN:
   5.100 +    case DOM0_SHADOW_CONTROL_OP_CLEAN:   // zero all-non hypervisor
   5.101 +    case DOM0_SHADOW_CONTROL_OP_CLEAN2:  // zero all L2, free L1s
   5.102      {
   5.103  		int i,j,zero=1;
   5.104  		
   5.105 @@ -418,7 +450,7 @@ int shadow_mode_control( struct task_str
   5.106          if(p->mm.shadow_mode) shadow_mode_disable(p);
   5.107          shadow_mode_enable(p, SHM_logdirty);
   5.108      } 
   5.109 -    else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_PEEK )
   5.110 +    else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN2 )
   5.111      {
   5.112          rc = shadow_mode_table_op(p, sc);
   5.113      }
     6.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h	Wed May 19 21:56:34 2004 +0000
     6.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h	Thu May 20 10:16:50 2004 +0000
     6.3 @@ -236,9 +236,10 @@ typedef struct dom0_sched_id_st
     6.4  #define DOM0_SHADOW_CONTROL_OP_OFF         0
     6.5  #define DOM0_SHADOW_CONTROL_OP_ENABLE_TEST 1
     6.6  #define DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY 2
     6.7 -#define DOM0_SHADOW_CONTROL_OP_FLUSH       10
     6.8 +#define DOM0_SHADOW_CONTROL_OP_FLUSH       10     /* table ops */
     6.9  #define DOM0_SHADOW_CONTROL_OP_CLEAN       11
    6.10  #define DOM0_SHADOW_CONTROL_OP_PEEK        12
    6.11 +#define DOM0_SHADOW_CONTROL_OP_CLEAN2      13
    6.12  typedef struct dom0_shadow_control_st
    6.13  {
    6.14      /* IN variables. */