ia64/xen-unstable

changeset 1359:8d56cd44e887

bitkeeper revision 1.896 (40a0e9e8M0uaTwE5LBe9sIhr2vdX7Q)

Live migration initial checkin.
author iap10@labyrinth.cl.cam.ac.uk
date Tue May 11 14:57:44 2004 +0000 (2004-05-11)
parents ef16fc03abbf
children 0fab6364d23b
files tools/examples/xc_dom_control.py tools/xc/lib/xc.h tools/xc/lib/xc_domain.c tools/xc/lib/xc_linux_build.c tools/xc/lib/xc_linux_restore.c tools/xc/lib/xc_linux_save.c tools/xc/lib/xc_private.c tools/xc/lib/xc_private.h tools/xc/py/Xc.c tools/xend/lib/utils.c xen/common/dom0_ops.c xen/common/domain.c xen/common/memory.c xen/common/network.c xen/common/shadow.c xen/drivers/block/xen_block.c xen/include/asm-i386/processor.h xen/include/hypervisor-ifs/dom0_ops.h xen/include/xen/mm.h xen/include/xen/shadow.h xen/net/dev.c xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c xenolinux-2.4.26-sparse/arch/xen/kernel/time.c xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h
line diff
     1.1 --- a/tools/examples/xc_dom_control.py	Tue May 11 14:31:55 2004 +0000
     1.2 +++ b/tools/examples/xc_dom_control.py	Tue May 11 14:57:44 2004 +0000
     1.3 @@ -139,10 +139,12 @@ elif cmd == 'suspend':
     1.4      xc.domain_stop( dom=dom )
     1.5      
     1.6      while not xc.domain_getinfo( first_dom=dom, max_doms=1 )[0]['stopped']:
     1.7 -	time.sleep(0.1);
     1.8 +	print "Sleep..."
     1.9 +	time.sleep(0.001);
    1.10  
    1.11      rc = xc.linux_save( dom=dom, state_file=file, progress=1)
    1.12      if rc == 0 : xc.domain_destroy( dom=dom, force=1 )
    1.13 +    else: xc.domain_start( dom=dom )  # sensible for production use
    1.14  
    1.15  elif cmd == 'cpu_bvtslice':
    1.16      if len(sys.argv) < 3:
     2.1 --- a/tools/xc/lib/xc.h	Tue May 11 14:31:55 2004 +0000
     2.2 +++ b/tools/xc/lib/xc.h	Tue May 11 14:57:44 2004 +0000
     2.3 @@ -57,7 +57,10 @@ int xc_domain_getinfo(int xc_handle,
     2.4  
     2.5  int xc_shadow_control(int xc_handle,
     2.6                        u64 domid, 
     2.7 -                      unsigned int sop);
     2.8 +                      unsigned int sop,
     2.9 +		      unsigned long *dirty_bitmap,
    2.10 +		      unsigned long pages);
    2.11 +
    2.12  
    2.13  #define XCFLAGS_VERBOSE 1
    2.14  #define XCFLAGS_LIVE    2
    2.15 @@ -247,11 +250,6 @@ int xc_readconsolering(int xc_handle,
    2.16  int xc_physinfo(int xc_handle,
    2.17                  xc_physinfo_t *info);
    2.18  
    2.19 -
    2.20 -int xc_shadow_control(int xc_handle,
    2.21 -                      u64 domid, 
    2.22 -                      unsigned int sop);
    2.23 -
    2.24  int xc_domain_setname(int xc_handle,
    2.25                        u64 domid, 
    2.26                        char *name);
     3.1 --- a/tools/xc/lib/xc_domain.c	Tue May 11 14:31:55 2004 +0000
     3.2 +++ b/tools/xc/lib/xc_domain.c	Tue May 11 14:57:44 2004 +0000
     3.3 @@ -109,13 +109,24 @@ int xc_domain_getinfo(int xc_handle,
     3.4  
     3.5  int xc_shadow_control(int xc_handle,
     3.6                        u64 domid, 
     3.7 -                      unsigned int sop)
     3.8 +                      unsigned int sop,
     3.9 +		      unsigned long *dirty_bitmap,
    3.10 +		      unsigned long pages)
    3.11  {
    3.12 +    int rc;
    3.13      dom0_op_t op;
    3.14      op.cmd = DOM0_SHADOW_CONTROL;
    3.15      op.u.shadow_control.domain = (domid_t)domid;
    3.16      op.u.shadow_control.op     = sop;
    3.17 -    return do_dom0_op(xc_handle, &op);
    3.18 +    op.u.shadow_control.dirty_bitmap = dirty_bitmap;
    3.19 +    op.u.shadow_control.pages  = pages;
    3.20 +
    3.21 +    rc = do_dom0_op(xc_handle, &op);
    3.22 +
    3.23 +    if ( rc == 0 )
    3.24 +	return op.u.shadow_control.pages;
    3.25 +    else
    3.26 +	return rc;
    3.27  }
    3.28  
    3.29  int xc_domain_setname(int xc_handle,
     4.1 --- a/tools/xc/lib/xc_linux_build.c	Tue May 11 14:31:55 2004 +0000
     4.2 +++ b/tools/xc/lib/xc_linux_build.c	Tue May 11 14:57:44 2004 +0000
     4.3 @@ -284,7 +284,7 @@ static int setup_guestos(int xc_handle,
     4.4  
     4.5      /* shared_info page starts its life empty. */
     4.6      shared_info = map_pfn_writeable(pm_handle, shared_info_frame);
     4.7 -    memset(shared_info, 0, PAGE_SIZE);
     4.8 +    memset(shared_info, 0, sizeof(shared_info_t));
     4.9      /* Mask all upcalls... */
    4.10      for ( i = 0; i < MAX_VIRT_CPUS; i++ )
    4.11          shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
     5.1 --- a/tools/xc/lib/xc_linux_restore.c	Tue May 11 14:31:55 2004 +0000
     5.2 +++ b/tools/xc/lib/xc_linux_restore.c	Tue May 11 14:57:44 2004 +0000
     5.3 @@ -230,9 +230,16 @@ int xc_linux_restore(int xc_handle,
     5.4              goto out;
     5.5          }
     5.6  
     5.7 -	//printf("batch=%d\n",j);
     5.8 +	printf("batch %d\n",j);
     5.9  	
    5.10 -	if(j==0) break;  // our work here is done
    5.11 +	if (j == 0) 
    5.12 +	    break;  // our work here is done
    5.13 +
    5.14 +	if( j > MAX_BATCH_SIZE )
    5.15 +	{
    5.16 +	    ERROR("Max batch size exceeded. Giving up.");
    5.17 +	    goto out;
    5.18 +	}
    5.19  	
    5.20          if ( (*readerfn)(readerst, region_pfn_type, j*sizeof(unsigned long)) )
    5.21          {
    5.22 @@ -242,6 +249,9 @@ int xc_linux_restore(int xc_handle,
    5.23  
    5.24  	for(i=0;i<j;i++)
    5.25  	{
    5.26 +            if ((region_pfn_type[i]>>29) == 7)
    5.27 +		continue;
    5.28 +
    5.29  	    pfn = region_pfn_type[i] & ~PGT_type_mask;
    5.30  	    mfn = pfn_to_mfn_table[pfn];
    5.31  	    
    5.32 @@ -261,9 +271,15 @@ int xc_linux_restore(int xc_handle,
    5.33  	    unsigned long *ppage;
    5.34  
    5.35  	    pfn = region_pfn_type[i] & ~PGT_type_mask;
    5.36 +
    5.37 +//if(n>=nr_pfns || ((region_pfn_type[i] & PGT_type_mask) == L2TAB) ) printf("pfn=%08lx mfn=%x\n",region_pfn_type[i],pfn_to_mfn_table[pfn]);
    5.38 +
    5.39  	    	  	    
    5.40  //if(pfn_type[i])printf("^pfn=%d %08lx\n",pfn,pfn_type[i]);
    5.41  
    5.42 +            if ((region_pfn_type[i]>>29) == 7)
    5.43 +		continue;
    5.44 +
    5.45              if (pfn>nr_pfns)
    5.46  	    {
    5.47  		ERROR("pfn out of range");
    5.48 @@ -304,7 +320,7 @@ int xc_linux_restore(int xc_handle,
    5.49  
    5.50  			if ( xpfn >= nr_pfns )
    5.51  			{
    5.52 -			    ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
    5.53 +			    ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=0x%x nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns);
    5.54  			    goto out;
    5.55  			}
    5.56  #if 0
    5.57 @@ -355,17 +371,19 @@ int xc_linux_restore(int xc_handle,
    5.58  	    default:
    5.59  		ERROR("Bogus page type %x page table is out of range. i=%d nr_pfns=%d",region_pfn_type[i],i,nr_pfns);
    5.60  		goto out;
    5.61 -	    }
    5.62 +
    5.63 +	    } // end of page type switch statement
    5.64  
    5.65  	    if ( add_mmu_update(xc_handle, mmu,
    5.66  				(mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) )
    5.67  		goto out;
    5.68  
    5.69 -	}
    5.70 +	} // end of 'batch' for loop
    5.71  
    5.72  	n+=j; // crude stats
    5.73  
    5.74      }
    5.75 +printf("RECEIVED ALL PAGES\n");
    5.76  
    5.77      mfn_mapper_close( region_mapper );
    5.78  
    5.79 @@ -381,7 +399,10 @@ int xc_linux_restore(int xc_handle,
    5.80                                  (pfn_to_mfn_table[i]<<PAGE_SHIFT) | 
    5.81                                  MMU_EXTENDED_COMMAND,
    5.82                                  MMUEXT_PIN_L1_TABLE) )
    5.83 +	    {
    5.84 +		printf("ERR pin L1 pfn=%lx mfn=%lx\n");
    5.85                  goto out;
    5.86 +	    }
    5.87          }
    5.88          else if ( pfn_type[i] == L2TAB )
    5.89          {
    5.90 @@ -389,7 +410,10 @@ int xc_linux_restore(int xc_handle,
    5.91                                  (pfn_to_mfn_table[i]<<PAGE_SHIFT) | 
    5.92                                  MMU_EXTENDED_COMMAND,
    5.93                                  MMUEXT_PIN_L2_TABLE) )
    5.94 +	    {
    5.95 +		printf("ERR pin L2 pfn=%lx mfn=%lx\n");
    5.96                  goto out;
    5.97 +	    }
    5.98          }
    5.99      }
   5.100  
   5.101 @@ -421,6 +445,8 @@ int xc_linux_restore(int xc_handle,
   5.102      p_srec->resume_info.flags       = 0;
   5.103      unmap_pfn(pm_handle, p_srec);
   5.104  
   5.105 +printf("new shared info is %lx\n", shared_info_frame);
   5.106 +
   5.107      /* Uncanonicalise each GDT frame number. */
   5.108      if ( ctxt.gdt_ents > 8192 )
   5.109      {
   5.110 @@ -451,7 +477,7 @@ int xc_linux_restore(int xc_handle,
   5.111  
   5.112      /* Copy saved contents of shared-info page. No checking needed. */
   5.113      ppage = map_pfn_writeable(pm_handle, shared_info_frame);
   5.114 -    memcpy(ppage, shared_info, PAGE_SIZE);
   5.115 +    memcpy(ppage, shared_info, sizeof(shared_info_t));
   5.116      unmap_pfn(pm_handle, ppage);
   5.117  
   5.118  
   5.119 @@ -528,7 +554,9 @@ int xc_linux_restore(int xc_handle,
   5.120      op.u.builddomain.ctxt = &ctxt;
   5.121      rc = do_dom0_op(xc_handle, &op);
   5.122  
   5.123 +printf("NORMAL EXIT RESTORE\n");
   5.124   out:
   5.125 +printf("EXIT RESTORE\n");
   5.126      if ( mmu != NULL )
   5.127          free(mmu);
   5.128  
     6.1 --- a/tools/xc/lib/xc_linux_save.c	Tue May 11 14:31:55 2004 +0000
     6.2 +++ b/tools/xc/lib/xc_linux_save.c	Tue May 11 14:57:44 2004 +0000
     6.3 @@ -22,11 +22,17 @@
     6.4  /*
     6.5   * Returns TRUE if the given machine frame number has a unique mapping
     6.6   * in the guest's pseudophysical map.
     6.7 + * 0x80000000-3 mark the shared_info, and blk/net rings
     6.8   */
     6.9  #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
    6.10 -    (((_mfn) < (1024*1024)) &&          \
    6.11 -     (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)))
    6.12 -
    6.13 +    (((_mfn) < (1024*1024)) && \
    6.14 +     ( ( (live_mfn_to_pfn_table[_mfn] < nr_pfns) && \
    6.15 +       (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)) ) || \
    6.16 +\
    6.17 +       (live_mfn_to_pfn_table[_mfn] >= 0x80000000 && \
    6.18 +	live_mfn_to_pfn_table[_mfn] <= 0x80000003 ) || \
    6.19 +	live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004 )  )
    6.20 +     
    6.21  /* Returns TRUE if MFN is successfully converted to a PFN. */
    6.22  #define translate_mfn_to_pfn(_pmfn)         \
    6.23  ({                                          \
    6.24 @@ -40,6 +46,14 @@
    6.25  })
    6.26  
    6.27  
    6.28 +/* test_bit */
    6.29 +inline int test_bit ( int nr, volatile void * addr)
    6.30 +{
    6.31 +    return ( ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >> 
    6.32 +	     (nr % (sizeof(unsigned long)*8) ) ) & 1;
    6.33 +}
    6.34 +
    6.35 +
    6.36  int xc_linux_save(int xc_handle,
    6.37                    u64 domid, 
    6.38  		  unsigned int flags,
    6.39 @@ -47,14 +61,11 @@ int xc_linux_save(int xc_handle,
    6.40  		  void *writerst )
    6.41  {
    6.42      dom0_op_t op;
    6.43 -    int rc = 1, i, j, k, n;
    6.44 +    int rc = 1, i, j, k, n, last_iter, iter = 0;
    6.45      unsigned long mfn;
    6.46 -    unsigned int prev_pc, this_pc;
    6.47      int verbose = flags & XCFLAGS_VERBOSE;
    6.48 -    //int live = flags & XCFLAGS_LIVE;
    6.49 -
    6.50 -    /* state of the new MFN mapper */
    6.51 -    mfn_mapper_t *mapper_handle1, *mapper_handle2;
    6.52 +    int live = 1; //flags & XCFLAGS_LIVE;     // XXXXXXXXXXXXXXXXXXX
    6.53 +    int sent_last_iter, sent_this_iter, max_iters;
    6.54  
    6.55      /* Remember if we stopped the guest, so we can restart it on exit. */
    6.56      int we_stopped_it = 0;
    6.57 @@ -90,8 +101,13 @@ int xc_linux_save(int xc_handle,
    6.58      unsigned char *region_base;
    6.59  
    6.60      /* A temporary mapping, and a copy, of the guest's suspend record. */
    6.61 -    suspend_record_t *p_srec, srec;
    6.62 +    suspend_record_t *p_srec;
    6.63  
    6.64 +    /* number of pages we're dealing with */
    6.65 +    unsigned long nr_pfns;
    6.66 +
    6.67 +    /* bitmap of pages left to send */
    6.68 +    unsigned long *to_send;
    6.69  
    6.70      if ( mlock(&ctxt, sizeof(ctxt) ) )
    6.71      {
    6.72 @@ -129,21 +145,24 @@ int xc_linux_save(int xc_handle,
    6.73              goto out;
    6.74          }
    6.75  
    6.76 -        sleep(1);
    6.77 +        usleep(1000); // 1ms
    6.78 +	printf("Sleep for 1ms\n");
    6.79      }
    6.80  
    6.81 +#if 1
    6.82      /* A cheesy test to see whether the domain contains valid state. */
    6.83      if ( ctxt.pt_base == 0 )
    6.84      {
    6.85          ERROR("Domain is not in a valid Linux guest OS state");
    6.86          goto out;
    6.87      }
    6.88 +#endif
    6.89  
    6.90  
    6.91      /* Map the suspend-record MFN to pin it. The page must be owned by 
    6.92         domid for this to succeed. */
    6.93      p_srec = mfn_mapper_map_single(xc_handle, domid,
    6.94 -				 sizeof(srec), PROT_READ, 
    6.95 +				 sizeof(*p_srec), PROT_READ, 
    6.96  				 ctxt.cpu_ctxt.esi );
    6.97  
    6.98      if (!p_srec)
    6.99 @@ -152,10 +171,10 @@ int xc_linux_save(int xc_handle,
   6.100          goto out;
   6.101      }
   6.102  
   6.103 -    memcpy( &srec, p_srec, sizeof(srec) );
   6.104 +    nr_pfns = p_srec->nr_pfns;
   6.105  
   6.106      /* cheesy sanity check */
   6.107 -    if ( srec.nr_pfns > 1024*1024 )
   6.108 +    if ( nr_pfns > 1024*1024 )
   6.109      {
   6.110          ERROR("Invalid state record -- pfn count out of range");
   6.111          goto out;
   6.112 @@ -165,55 +184,13 @@ int xc_linux_save(int xc_handle,
   6.113      live_pfn_to_mfn_frame_list = 
   6.114  	mfn_mapper_map_single(xc_handle, domid, 
   6.115  			      PAGE_SIZE, PROT_READ, 
   6.116 -			      srec.pfn_to_mfn_frame_list );
   6.117 +			      p_srec->pfn_to_mfn_frame_list );
   6.118  
   6.119      if (!live_pfn_to_mfn_frame_list)
   6.120      {
   6.121          ERROR("Couldn't map pfn_to_mfn_frame_list");
   6.122          goto out;
   6.123      }
   6.124 -   
   6.125 -
   6.126 -    if ( (mapper_handle1 = mfn_mapper_init(xc_handle, domid,
   6.127 -					   1024*1024, PROT_READ )) 
   6.128 -	 == NULL )
   6.129 -        goto out;
   6.130 -	
   6.131 -    for ( i = 0; i < (srec.nr_pfns+1023)/1024; i++ )
   6.132 -    {
   6.133 -	/* Grab a copy of the pfn-to-mfn table frame list. 
   6.134 -	 This has the effect of preventing the page from being freed and
   6.135 -	 given to another domain. (though the domain is stopped anyway...) */
   6.136 -	mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT, 
   6.137 -				live_pfn_to_mfn_frame_list[i],
   6.138 -				PAGE_SIZE );
   6.139 -    }
   6.140 -    
   6.141 -    if ( mfn_mapper_flush_queue(mapper_handle1) )
   6.142 -    {
   6.143 -        ERROR("Couldn't map pfn_to_mfn table");
   6.144 -        goto out;
   6.145 -    }
   6.146 -
   6.147 -    live_pfn_to_mfn_table = mfn_mapper_base( mapper_handle1 );
   6.148 -
   6.149 -
   6.150 -
   6.151 -    /* We want zeroed memory so use calloc rather than malloc. */
   6.152 -    pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
   6.153 -
   6.154 -    if ( (pfn_type == NULL) )
   6.155 -    {
   6.156 -        errno = ENOMEM;
   6.157 -        goto out;
   6.158 -    }
   6.159 -
   6.160 -    if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) )
   6.161 -    {
   6.162 -	ERROR("Unable to mlock");
   6.163 -	goto out;
   6.164 -    }
   6.165 -
   6.166  
   6.167      /* Track the mfn_to_pfn table down from the domains PT */
   6.168      {
   6.169 @@ -233,49 +210,106 @@ int xc_linux_save(int xc_handle,
   6.170  				  mfn_to_pfn_table_start_mfn );
   6.171      }
   6.172  
   6.173 +    /* Map all the frames of the pfn->mfn table. For migrate to succeed, 
   6.174 +       the guest must not change which frames are used for this purpose. 
   6.175 +       (its not clear why it would want to change them, and we'll be OK
   6.176 +       from a safety POV anyhow. */
   6.177 +
   6.178 +    live_pfn_to_mfn_table = mfn_mapper_map_batch( xc_handle, domid, 
   6.179 +						  PROT_READ,
   6.180 +						  live_pfn_to_mfn_frame_list,
   6.181 +						  (nr_pfns+1023)/1024 );  
   6.182 +    if( !live_pfn_to_mfn_table )
   6.183 +    {
   6.184 +        PERROR("Couldn't map pfn_to_mfn table");
   6.185 +        goto out;
   6.186 +    }
   6.187 +
   6.188 +    for(i=0;i<(nr_pfns+1023)/1024 ;i++)
   6.189 +	printf("LF: %d %x\n",i,live_pfn_to_mfn_frame_list[i]);
   6.190 +
   6.191 +
   6.192 +    /* At this point, we can start the domain again if we're doign a
   6.193 +       live suspend */
   6.194 +
   6.195 +    if( live )
   6.196 +    { 
   6.197 +#if 1
   6.198 +	if ( xc_shadow_control( xc_handle, domid, 
   6.199 +			   DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
   6.200 +			   NULL, 0 ) < 0 )
   6.201 +	{
   6.202 +	    ERROR("Couldn't enable shadow mode");
   6.203 +	    goto out;
   6.204 +	}
   6.205 +#endif 
   6.206 +	if ( xc_domain_start( xc_handle, domid ) < 0 )
   6.207 +	{
   6.208 +	    ERROR("Couldn't restart domain");
   6.209 +	    goto out;
   6.210 +	}
   6.211 +//exit(-1);
   6.212 +	last_iter = 0;
   6.213 +	sent_last_iter = 1<<20; // 4GB's worth of pages
   6.214 +	max_iters = 8; // limit us to 9 time round loop
   6.215 +    }
   6.216 +    else
   6.217 +	last_iter = 1;
   6.218 +
   6.219 +
   6.220 +    /* Setup to_send bitmap */
   6.221 +    {
   6.222 +	int sz = (nr_pfns/8) + 8; // includes slop at end of array
   6.223 +	
   6.224 +	to_send = malloc( sz );
   6.225 +
   6.226 +	if (!to_send)
   6.227 +	{
   6.228 +	    ERROR("Couldn't allocate to_send array");
   6.229 +	    goto out;
   6.230 +	}
   6.231 +	memset( to_send, 0xff, sz );
   6.232 +
   6.233 +	if ( mlock( to_send, sz ) )
   6.234 +	{
   6.235 +	    PERROR("Unable to mlock to_send");
   6.236 +	    return 1;
   6.237 +	}
   6.238 +    }
   6.239 +
   6.240 +
   6.241 +    /* We want zeroed memory so use calloc rather than malloc. */
   6.242 +    pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
   6.243 +
   6.244 +    if ( (pfn_type == NULL) )
   6.245 +    {
   6.246 +        errno = ENOMEM;
   6.247 +        goto out;
   6.248 +    }
   6.249 +
   6.250 +    if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) )
   6.251 +    {
   6.252 +	ERROR("Unable to mlock");
   6.253 +	goto out;
   6.254 +    }
   6.255 +
   6.256  
   6.257      /*
   6.258       * Quick belt and braces sanity check.
   6.259       */
   6.260  
   6.261 -    for ( i = 0; i < srec.nr_pfns; i++ )
   6.262 +    for ( i = 0; i < nr_pfns; i++ )
   6.263      {
   6.264          mfn = live_pfn_to_mfn_table[i];
   6.265  
   6.266 -	if( live_mfn_to_pfn_table[mfn] != i )
   6.267 -	    printf("i=%d mfn=%d live_mfn_to_pfn_table=%d\n",
   6.268 +	if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) )
   6.269 +	    printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n",
   6.270  		   i,mfn,live_mfn_to_pfn_table[mfn]);
   6.271      }
   6.272  
   6.273 -
   6.274 -    /* Canonicalise the suspend-record frame number. */
   6.275 -    if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
   6.276 -    {
   6.277 -        ERROR("State record is not in range of pseudophys map");
   6.278 -        goto out;
   6.279 -    }
   6.280 -
   6.281 -    /* Canonicalise each GDT frame number. */
   6.282 -    for ( i = 0; i < ctxt.gdt_ents; i += 512 )
   6.283 -    {
   6.284 -        if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
   6.285 -        {
   6.286 -            ERROR("GDT frame is not in range of pseudophys map");
   6.287 -            goto out;
   6.288 -        }
   6.289 -    }
   6.290 -
   6.291 -    /* Canonicalise the page table base pointer. */
   6.292 -    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
   6.293 -    {
   6.294 -        ERROR("PT base is not in range of pseudophys map");
   6.295 -        goto out;
   6.296 -    }
   6.297 -    ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
   6.298 -
   6.299      /* Canonicalise the pfn-to-mfn table frame-number list. */
   6.300      memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
   6.301 -    for ( i = 0; i < srec.nr_pfns; i += 1024 )
   6.302 +    for ( i = 0; i < nr_pfns; i += 1024 )
   6.303      {
   6.304          if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
   6.305          {
   6.306 @@ -284,7 +318,7 @@ int xc_linux_save(int xc_handle,
   6.307          }
   6.308      }
   6.309  
   6.310 -    /* Start writing out the saved-domain record. */
   6.311 +    /* Map the shared info frame */
   6.312      live_shinfo = mfn_mapper_map_single(xc_handle, domid,
   6.313  					PAGE_SIZE, PROT_READ,
   6.314  					shared_info_frame);
   6.315 @@ -295,164 +329,290 @@ int xc_linux_save(int xc_handle,
   6.316          goto out;
   6.317      }
   6.318  
   6.319 +    /* Start writing out the saved-domain record. */
   6.320 +
   6.321      if ( (*writerfn)(writerst, "LinuxGuestRecord",    16) ||
   6.322           (*writerfn)(writerst, name,                  sizeof(name)) ||
   6.323 -         (*writerfn)(writerst, &srec.nr_pfns,         sizeof(unsigned long)) ||
   6.324 -         (*writerfn)(writerst, &ctxt,                 sizeof(ctxt)) ||
   6.325 -         (*writerfn)(writerst, live_shinfo,           PAGE_SIZE) ||
   6.326 +         (*writerfn)(writerst, &nr_pfns,              sizeof(unsigned long)) ||
   6.327           (*writerfn)(writerst, pfn_to_mfn_frame_list, PAGE_SIZE) )
   6.328      {
   6.329          ERROR("Error when writing to state file (1)");
   6.330          goto out;
   6.331      }
   6.332 -    munmap(live_shinfo, PAGE_SIZE);
   6.333 -
   6.334 -    verbose_printf("Saving memory pages:   0%%");
   6.335 -
   6.336 -    if ( (mapper_handle2 = mfn_mapper_init(xc_handle, domid,
   6.337 -					   BATCH_SIZE*4096, PROT_READ )) 
   6.338 -	 == NULL )
   6.339 -        goto out;
   6.340 -
   6.341 -    region_base = mfn_mapper_base( mapper_handle2 );
   6.342  
   6.343      /* Now write out each data page, canonicalising page tables as we go... */
   6.344 -    prev_pc = 0;
   6.345 -    for ( n = 0; n < srec.nr_pfns; )
   6.346 +
   6.347 +    while(1)
   6.348      {
   6.349 -        this_pc = (n * 100) / srec.nr_pfns;
   6.350 -        if ( (this_pc - prev_pc) >= 5 )
   6.351 -        {
   6.352 -            verbose_printf("\b\b\b\b%3d%%", this_pc);
   6.353 -            prev_pc = this_pc;
   6.354 -        }
   6.355 +	unsigned int prev_pc, batch, sent_this_iter;
   6.356 +
   6.357 +	iter++;
   6.358 +
   6.359 +	sent_this_iter = 0;
   6.360 +	prev_pc = 0;
   6.361 +	verbose_printf("Saving memory pages: iter %d   0%%", iter);
   6.362  
   6.363 -	for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
   6.364 -	{		
   6.365 -	    pfn_type[j] = live_pfn_to_mfn_table[i];
   6.366 -	}
   6.367 +	n=0;
   6.368 +	while( n < nr_pfns )
   6.369 +	{
   6.370 +	    unsigned int this_pc = (n * 100) / nr_pfns;
   6.371 +	    if ( (this_pc - prev_pc) >= 5 )
   6.372 +	    {
   6.373 +		verbose_printf("\b\b\b\b%3d%%", this_pc);
   6.374 +		prev_pc = this_pc;
   6.375 +	    }
   6.376  
   6.377  
   6.378 -	for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
   6.379 -	{
   6.380 -	    /* queue up mappings for all of the pages in this batch */
   6.381 +	    /* load pfn_type[] with the mfn of all the pages we're doing in
   6.382 +	       this batch. */
   6.383 +
   6.384 +	    for( batch = 0; batch < BATCH_SIZE && n < nr_pfns ; n++ )
   6.385 +	    {
   6.386 +		if ( !test_bit(n, to_send ) ) continue;
   6.387 +
   6.388 +		pfn_type[batch] = live_pfn_to_mfn_table[n];
   6.389 +
   6.390 +		if( pfn_type[batch] == 0x80000004 )
   6.391 +		{
   6.392 +		    //printf("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]);
   6.393 +		    continue;
   6.394 +		}
   6.395  
   6.396 -//printf("region n=%d j=%d i=%d mfn=%d\n",n,j,i,live_pfn_to_mfn_table[i]);
   6.397 -	    mfn_mapper_queue_entry( mapper_handle2, j<<PAGE_SHIFT, 
   6.398 -				    live_pfn_to_mfn_table[i],
   6.399 -				    PAGE_SIZE );
   6.400 -	}
   6.401 +//if(iter>1) printf("pfn=%x mfn=%x\n",n,pfn_type[batch]);
   6.402 +		
   6.403 +		batch++;
   6.404 +	    }
   6.405 +
   6.406 +	    for( j = 0; j < batch; j++ )
   6.407 +	    {
   6.408 +
   6.409 +		if( (pfn_type[j] &0xfffff) == 0x0000004 )
   6.410 +		{
   6.411 +		    printf("XXXXXXXXSkip netbuf entry %d mfn %lx\n",j,pfn_type[j]);
   6.412 +		}
   6.413 +
   6.414 +		
   6.415 +	    }
   6.416 +
   6.417 +	    
   6.418 +	    printf("batch %d:%d (n=%d)\n",iter,batch,n);
   6.419  
   6.420 -	if( mfn_mapper_flush_queue(mapper_handle2) )
   6.421 -	{
   6.422 -	    ERROR("Couldn't map page region");
   6.423 -	    goto out;
   6.424 -	}
   6.425 -
   6.426 -	if ( get_pfn_type_batch(xc_handle, domid, j, pfn_type) )
   6.427 -	{
   6.428 -	    ERROR("get_pfn_type_batch failed");
   6.429 -	    goto out;
   6.430 -	}
   6.431 -	
   6.432 -	for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
   6.433 -	{
   6.434 -	    if((pfn_type[j]>>29) == 7)
   6.435 +	    if(batch == 0) goto skip; // vanishingly unlikely...
   6.436 + 	    
   6.437 +	    if ( (region_base = mfn_mapper_map_batch( xc_handle, domid, 
   6.438 +						      PROT_READ,
   6.439 +						      pfn_type,
   6.440 +						      batch )) == 0)
   6.441 +	    {
   6.442 +		PERROR("map batch failed");
   6.443 +		goto out;
   6.444 +	    }
   6.445 +	    
   6.446 +	    if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) )
   6.447  	    {
   6.448 -		ERROR("bogus page");
   6.449 +		ERROR("get_pfn_type_batch failed");
   6.450 +		goto out;
   6.451 +	    }
   6.452 +	    
   6.453 +	    for( j = 0; j < batch; j++ )
   6.454 +	    {
   6.455 +		if((pfn_type[j]>>29) == 7)
   6.456 +		{
   6.457 +		    //printf("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
   6.458 +		    continue;
   6.459 +		}
   6.460 +//if((pfn_type[j] & PGT_type_mask) == L2TAB) printf("L2 pfn=%08lx mfn=%lx\n",pfn_type[j],live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]);
   6.461 +		
   6.462 +		/* canonicalise mfn->pfn */
   6.463 +		pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
   6.464 +		    live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
   6.465 +	    }
   6.466 +	    
   6.467 +	    
   6.468 +	    if ( (*writerfn)(writerst, &batch, sizeof(int) ) )
   6.469 +	    {
   6.470 +		ERROR("Error when writing to state file (2)");
   6.471  		goto out;
   6.472  	    }
   6.473  
   6.474 -	    /* canonicalise mfn->pfn */
   6.475 -	    pfn_type[j] = (pfn_type[j] & PGT_type_mask) |
   6.476 -		live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask];
   6.477 +	    if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) )
   6.478 +	    {
   6.479 +		ERROR("Error when writing to state file (3)");
   6.480 +		goto out;
   6.481 +	    }
   6.482  	    
   6.483 -/*	    if(pfn_type[j]>>29)
   6.484 -		    printf("i=%d type=%d\n",i,pfn_type[i]);    */
   6.485 +	    /* entering this loop, pfn_type is now in pfns (Not mfns) */
   6.486 +	    for( j = 0; j < batch; j++ )
   6.487 +	    {
   6.488 +		/* write out pages in batch */
   6.489 +		
   6.490 +		if((pfn_type[j]>>29) == 7)
   6.491 +		{
   6.492 +		    //printf("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
   6.493 +		    continue;
   6.494 +		}
   6.495 +		
   6.496 +		if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) || 
   6.497 +		     ((pfn_type[j] & PGT_type_mask) == L2TAB) )
   6.498 +		{
   6.499 +		    
   6.500 +		    memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
   6.501 +		    
   6.502 +		    for ( k = 0; 
   6.503 +			  k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ? 
   6.504 +		       (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); 
   6.505 +			  k++ )
   6.506 +		    {
   6.507 +			unsigned long pfn;
   6.508 +
   6.509 +			if ( !(page[k] & _PAGE_PRESENT) ) continue;
   6.510 +			mfn = page[k] >> PAGE_SHIFT;		    
   6.511 +			pfn = live_mfn_to_pfn_table[mfn];
   6.512 +
   6.513 +			if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   6.514 +			{
   6.515 +			    printf("FNI %d : [%08lx,%d] pte=%08lx, mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
   6.516 +				   j, pfn_type[j], k,
   6.517 +				   page[k], mfn, live_mfn_to_pfn_table[mfn],
   6.518 +				   (live_mfn_to_pfn_table[mfn]<nr_pfns)? 
   6.519 +				live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef);
   6.520 +			    pfn = 0; // be suspicious
   6.521 +			    
   6.522 +//			    ERROR("Frame number in pagetable page is invalid");
   6.523 +//			    goto out;
   6.524 +
   6.525 +
   6.526 +			}
   6.527 +			page[k] &= PAGE_SIZE - 1;
   6.528 +			page[k] |= pfn << PAGE_SHIFT;
   6.529 +			
   6.530 +			/*
   6.531 +			  printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
   6.532 +			  pfn_type[j]>>29,
   6.533 +			  j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
   6.534 +			  */
   6.535 +			
   6.536 +		    } /* end of page table rewrite for loop */
   6.537 +		    
   6.538 +		    if ( (*writerfn)(writerst, page, PAGE_SIZE) )
   6.539 +		    {
   6.540 +			ERROR("Error when writing to state file (4)");
   6.541 +			goto out;
   6.542 +		    }
   6.543 +		    
   6.544 +		}  /* end of it's a PT page */
   6.545 +		else
   6.546 +		{  /* normal page */
   6.547 +		    if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
   6.548 +		    {
   6.549 +			ERROR("Error when writing to state file (5)");
   6.550 +			goto out;
   6.551 +		    }
   6.552 +		}
   6.553 +	    } /* end of the write out for this batch */
   6.554 +	    
   6.555 +	    sent_this_iter += batch;
   6.556 +
   6.557 +	} /* end of this while loop for this iteration */
   6.558 +
   6.559 +	munmap(region_base, batch*PAGE_SIZE);
   6.560 +
   6.561 +    skip: 
   6.562 +	
   6.563 +	verbose_printf("\b\b\b\b100%% (%d pages)\n", sent_this_iter );
   6.564 +	
   6.565 +	if ( last_iter )
   6.566 +	    break;
   6.567 +
   6.568 +	if ( live )
   6.569 +	{
   6.570 +	    if ( sent_this_iter < (sent_last_iter * 0.95) && iter < max_iters )
   6.571 +	    {
   6.572 +		// we seem to be doing OK, keep going
   6.573 +	    }
   6.574 +	    else
   6.575 +	    {
   6.576 +		printf("Start last iteration\n");
   6.577 +		last_iter = 1;
   6.578 +
   6.579 +		xc_domain_stop_sync( xc_handle, domid );
   6.580 +
   6.581 +	    } 
   6.582 +
   6.583 +	    if ( xc_shadow_control( xc_handle, domid, 
   6.584 +				    DOM0_SHADOW_CONTROL_OP_CLEAN,
   6.585 +				    to_send, nr_pfns ) != nr_pfns ) 
   6.586 +	    {
   6.587 +		ERROR("Error flushing shadow PT");
   6.588 +		goto out;
   6.589 +	    }
   6.590 +
   6.591 +#if 0
   6.592 +	    if(last_iter) memset(to_send, 0xff, (nr_pfns+7)/8 );
   6.593 +#endif
   6.594 +
   6.595 +	    sent_last_iter = sent_this_iter;
   6.596  	}
   6.597  
   6.598  
   6.599 -	if ( (*writerfn)(writerst, &j, sizeof(int) ) )
   6.600 -	{
   6.601 -	    ERROR("Error when writing to state file (2)");
   6.602 -	    goto out;
   6.603 -	}
   6.604 -
   6.605 -	if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) )
   6.606 -	{
   6.607 -	    ERROR("Error when writing to state file (3)");
   6.608 -	    goto out;
   6.609 -	}
   6.610 -
   6.611 -
   6.612 -	for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
   6.613 -	{
   6.614 -	    /* write out pages in batch */
   6.615 -
   6.616 -	    if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) || 
   6.617 -		 ((pfn_type[j] & PGT_type_mask) == L2TAB) )
   6.618 -	    {
   6.619 -		
   6.620 -		memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
   6.621 -
   6.622 -		for ( k = 0; 
   6.623 -		      k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ? 
   6.624 -			   (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); 
   6.625 -		      k++ )
   6.626 -		{
   6.627 -		    if ( !(page[k] & _PAGE_PRESENT) ) continue;
   6.628 -		    mfn = page[k] >> PAGE_SHIFT;		    
   6.629 +    } /* end of while 1 */
   6.630  
   6.631 -		    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   6.632 -		    {
   6.633 -			ERROR("Frame number in pagetable page is invalid");
   6.634 -			goto out;
   6.635 -		    }
   6.636 -		    page[k] &= PAGE_SIZE - 1;
   6.637 - 		    page[k] |= live_mfn_to_pfn_table[mfn] << PAGE_SHIFT;
   6.638 -
   6.639 -		    /*
   6.640 -		    printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n",
   6.641 -			   pfn_type[j]>>29,
   6.642 -			   j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
   6.643 -			   */
   6.644 -
   6.645 -		}
   6.646 -
   6.647 -		if ( (*writerfn)(writerst, page, PAGE_SIZE) )
   6.648 -		{
   6.649 -		    ERROR("Error when writing to state file (4)");
   6.650 -		    goto out;
   6.651 -		}
   6.652 -
   6.653 -
   6.654 -	    }
   6.655 -	    else
   6.656 -	    {
   6.657 -		if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
   6.658 -		{
   6.659 -		    ERROR("Error when writing to state file (5)");
   6.660 -		    goto out;
   6.661 -		}
   6.662 -	    }
   6.663 -	}
   6.664 -	
   6.665 -	n+=j; /* i is the master loop counter */
   6.666 -    }
   6.667 -
   6.668 -    verbose_printf("\b\b\b\b100%%\nMemory saved.\n");
   6.669 +printf("All memory is saved\n");
   6.670  
   6.671      /* Success! */
   6.672      rc = 0;
   6.673 -
   6.674 +    
   6.675      /* Zero terminate */
   6.676      if ( (*writerfn)(writerst, &rc, sizeof(int)) )
   6.677      {
   6.678  	ERROR("Error when writing to state file (6)");
   6.679  	goto out;
   6.680      }
   6.681 -    
   6.682  
   6.683 +    /* Get the final execution context */
   6.684 +    op.cmd = DOM0_GETDOMAININFO;
   6.685 +    op.u.getdomaininfo.domain = (domid_t)domid;
   6.686 +    op.u.getdomaininfo.ctxt = &ctxt;
   6.687 +    if ( (do_dom0_op(xc_handle, &op) < 0) || 
   6.688 +	 ((u64)op.u.getdomaininfo.domain != domid) )
   6.689 +    {
   6.690 +	PERROR("Could not get info on domain");
   6.691 +	goto out;
   6.692 +    }
   6.693 +printf("A\n");    
   6.694 +    /* Canonicalise the suspend-record frame number. */
   6.695 +    if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
   6.696 +    {
   6.697 +        ERROR("State record is not in range of pseudophys map");
   6.698 +        goto out;
   6.699 +    }
   6.700 +printf("B\n");    
   6.701 +    /* Canonicalise each GDT frame number. */
   6.702 +    for ( i = 0; i < ctxt.gdt_ents; i += 512 )
   6.703 +    {
   6.704 +        if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) )
   6.705 +        {
   6.706 +            ERROR("GDT frame is not in range of pseudophys map");
   6.707 +            goto out;
   6.708 +        }
   6.709 +    }
   6.710 +printf("C\n");    
   6.711 +    /* Canonicalise the page table base pointer. */
   6.712 +    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) )
   6.713 +    {
   6.714 +        ERROR("PT base is not in range of pseudophys map");
   6.715 +        goto out;
   6.716 +    }
   6.717 +    ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
   6.718 +printf("D\n");    
   6.719 +    if ( (*writerfn)(writerst, &ctxt,                 sizeof(ctxt)) ||
   6.720 +         (*writerfn)(writerst, live_shinfo,           PAGE_SIZE) )
   6.721 +    {
   6.722 +        ERROR("Error when writing to state file (1)");
   6.723 +        goto out;
   6.724 +    }
   6.725 +    munmap(live_shinfo, PAGE_SIZE);
   6.726 +printf("E\n");        
   6.727  out:
   6.728      /* Restart the domain if we had to stop it to save its state. */
   6.729      if ( we_stopped_it )
     7.1 --- a/tools/xc/lib/xc_private.c	Tue May 11 14:31:55 2004 +0000
     7.2 +++ b/tools/xc/lib/xc_private.c	Tue May 11 14:57:44 2004 +0000
     7.3 @@ -47,6 +47,31 @@ void unmap_pfn(int pm_handle, void *vadd
     7.4  
     7.5  /*******************/
     7.6  
     7.7 +void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot,
     7.8 +			    unsigned long *arr, int num )
     7.9 +{
    7.10 +    privcmd_mmapbatch_t ioctlx; 
    7.11 +    void *addr;
    7.12 +    addr = mmap( NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0 );
    7.13 +    if (addr)
    7.14 +    {
    7.15 +	ioctlx.num=num;
    7.16 +	ioctlx.dom=dom;
    7.17 +	ioctlx.addr=(unsigned long)addr;
    7.18 +	ioctlx.arr=arr;
    7.19 +	if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) <0 )
    7.20 +	{
    7.21 +	    perror("XXXXXXXX");
    7.22 +	    munmap(addr, num*PAGE_SIZE);
    7.23 +	    return 0;
    7.24 +	}
    7.25 +    }
    7.26 +    return addr;
    7.27 +
    7.28 +}
    7.29 +
    7.30 +/*******************/
    7.31 +
    7.32  void * mfn_mapper_map_single(int xc_handle, domid_t dom,
    7.33  			     int size, int prot,
    7.34  			     unsigned long mfn )
    7.35 @@ -64,7 +89,10 @@ void * mfn_mapper_map_single(int xc_hand
    7.36  	entry.mfn=mfn;
    7.37  	entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT;
    7.38  	if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) <0 )
    7.39 +	{
    7.40 +	    munmap(addr, size);
    7.41  	    return 0;
    7.42 +	}
    7.43      }
    7.44      return addr;
    7.45  }
    7.46 @@ -295,7 +323,7 @@ static int flush_mmu_updates(int xc_hand
    7.47  
    7.48      hypercall.op     = __HYPERVISOR_mmu_update;
    7.49      hypercall.arg[0] = (unsigned long)mmu->updates;
    7.50 -    hypercall.arg[1] = (unsigned long)mmu->idx;
    7.51 +    hypercall.arg[1] = (unsigned long)&(mmu->idx);
    7.52  
    7.53      if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 )
    7.54      {
    7.55 @@ -342,3 +370,47 @@ int finish_mmu_updates(int xc_handle, mm
    7.56  {
    7.57      return flush_mmu_updates(xc_handle, mmu);
    7.58  }
    7.59 +
    7.60 +
    7.61 +/***********************************************************/
    7.62 +
    7.63 +/* this function is a hack until we get proper synchronous domain stop */
    7.64 +
    7.65 +int xc_domain_stop_sync( int xc_handle, domid_t domid )
    7.66 +{
    7.67 +    dom0_op_t op;
    7.68 +
    7.69 +    while (1)
    7.70 +    {
    7.71 +        op.cmd = DOM0_STOPDOMAIN;
    7.72 +        op.u.stopdomain.domain = (domid_t)domid;
    7.73 +        if ( do_dom0_op(xc_handle, &op) != 0 )
    7.74 +        {
    7.75 +            PERROR("Stopping target domain failed");
    7.76 +            goto out;
    7.77 +        }
    7.78 +
    7.79 +        usleep(1000); // 1ms
    7.80 +	printf("Sleep for 1ms\n");
    7.81 +
    7.82 +        op.cmd = DOM0_GETDOMAININFO;
    7.83 +        op.u.getdomaininfo.domain = (domid_t)domid;
    7.84 +        op.u.getdomaininfo.ctxt = NULL;
    7.85 +        if ( (do_dom0_op(xc_handle, &op) < 0) || 
    7.86 +             ((u64)op.u.getdomaininfo.domain != domid) )
    7.87 +        {
    7.88 +            PERROR("Could not get info on domain");
    7.89 +            goto out;
    7.90 +        }
    7.91 +
    7.92 +        if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED )
    7.93 +	{
    7.94 +	    printf("Domain %lld stopped\n",domid);
    7.95 +            return 0;
    7.96 +	}
    7.97 +
    7.98 +    }
    7.99 +
   7.100 +out:
   7.101 +    return -1;    
   7.102 +}
     8.1 --- a/tools/xc/lib/xc_private.h	Tue May 11 14:31:55 2004 +0000
     8.2 +++ b/tools/xc/lib/xc_private.h	Tue May 11 14:57:44 2004 +0000
     8.3 @@ -232,6 +232,9 @@ typedef struct mfn_mapper {
     8.4  void * mfn_mapper_map_single(int xc_handle, domid_t dom, int size, int prot, 
     8.5  			     unsigned long mfn );
     8.6  
     8.7 +void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot,
     8.8 +			    unsigned long *arr, int num );
     8.9 +
    8.10  mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot);
    8.11  
    8.12  void * mfn_mapper_base(mfn_mapper_t *t);
    8.13 @@ -245,5 +248,6 @@ void * mfn_mapper_queue_entry(mfn_mapper
    8.14  
    8.15  /*********************/
    8.16  
    8.17 +int xc_domain_stop_sync( int xc_handle, domid_t dom );
    8.18  
    8.19  #endif /* __XC_PRIVATE_H__ */
     9.1 --- a/tools/xc/py/Xc.c	Tue May 11 14:31:55 2004 +0000
     9.2 +++ b/tools/xc/py/Xc.c	Tue May 11 14:57:44 2004 +0000
     9.3 @@ -190,16 +190,17 @@ static PyObject *pyxc_linux_save(PyObjec
     9.4  
     9.5      u64   dom;
     9.6      char *state_file;
     9.7 -    int   progress = 1;
     9.8 +    int   progress = 1, live = 0;
     9.9      unsigned int flags = 0;
    9.10  
    9.11 -    static char *kwd_list[] = { "dom", "state_file", "progress", NULL };
    9.12 +    static char *kwd_list[] = { "dom", "state_file", "progress", "live", NULL };
    9.13  
    9.14 -    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|i", kwd_list, 
    9.15 -                                      &dom, &state_file, &progress) )
    9.16 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|ii", kwd_list, 
    9.17 +                                      &dom, &state_file, &progress, &live) )
    9.18          return NULL;
    9.19  
    9.20      if (progress) flags |= XCFLAGS_VERBOSE;
    9.21 +    if (live)     flags |= XCFLAGS_LIVE;
    9.22  
    9.23      if (strncmp(state_file,"tcp:", strlen("tcp:")) == 0)
    9.24      {
    9.25 @@ -1273,7 +1274,7 @@ static PyObject *pyxc_shadow_control(PyO
    9.26                                        &dom, &op) )
    9.27          return NULL;
    9.28  
    9.29 -    if ( xc_shadow_control(xc->xc_handle, dom, op) != 0 )
    9.30 +    if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0) < 0 )
    9.31          return PyErr_SetFromErrno(xc_error);
    9.32      
    9.33      Py_INCREF(zero);
    10.1 --- a/tools/xend/lib/utils.c	Tue May 11 14:31:55 2004 +0000
    10.2 +++ b/tools/xend/lib/utils.c	Tue May 11 14:57:44 2004 +0000
    10.3 @@ -723,6 +723,11 @@ static PyObject *xu_port_new(PyObject *s
    10.4          goto fail4;
    10.5      }
    10.6  
    10.7 +    xup->interface->tx_resp_prod = 0;
    10.8 +    xup->interface->rx_req_prod  = 0;
    10.9 +    xup->interface->tx_req_prod = 0;
   10.10 +    xup->interface->rx_resp_prod = 0;
   10.11 +
   10.12      xup->tx_req_cons  = 0;
   10.13      xup->tx_resp_prod = 0;
   10.14      xup->rx_req_prod  = 0;
    11.1 --- a/xen/common/dom0_ops.c	Tue May 11 14:31:55 2004 +0000
    11.2 +++ b/xen/common/dom0_ops.c	Tue May 11 14:57:44 2004 +0000
    11.3 @@ -525,10 +525,10 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    11.4  	p = find_domain_by_id( op->u.shadow_control.domain );
    11.5  	if ( p )
    11.6  	{
    11.7 -            ret = shadow_mode_control(p, op->u.shadow_control.op );
    11.8 +            ret = shadow_mode_control(p, &op->u.shadow_control );
    11.9  	    put_task_struct(p);
   11.10 -        }
   11.11 -	
   11.12 +	    copy_to_user(u_dom0_op, op, sizeof(*op));
   11.13 +        }	
   11.14      }
   11.15      break;
   11.16  
    12.1 --- a/xen/common/domain.c	Tue May 11 14:31:55 2004 +0000
    12.2 +++ b/xen/common/domain.c	Tue May 11 14:57:44 2004 +0000
    12.3 @@ -89,9 +89,15 @@ struct task_struct *do_createdomain(domi
    12.4          memset(p->shared_info, 0, PAGE_SIZE);
    12.5          SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), p);
    12.6          
    12.7 +	machine_to_phys_mapping[virt_to_phys(p->shared_info) >> PAGE_SHIFT] =
    12.8 +	    0x80000000UL;  // set m2p table to magic marker (helps debug)
    12.9 +
   12.10          p->mm.perdomain_pt = (l1_pgentry_t *)get_free_page(GFP_KERNEL);
   12.11          memset(p->mm.perdomain_pt, 0, PAGE_SIZE);
   12.12          
   12.13 +	machine_to_phys_mapping[virt_to_phys(p->mm.perdomain_pt) >> PAGE_SHIFT] =
   12.14 +	    0x0fffdeadUL;  // set m2p table to magic marker (helps debug)
   12.15 +
   12.16          init_blkdev_info(p);
   12.17          
   12.18          /* Per-domain PCI-device list. */
   12.19 @@ -486,6 +492,7 @@ void free_all_dom_mem(struct task_struct
   12.20  unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
   12.21  {
   12.22      unsigned int alloc_pfns, nr_pages;
   12.23 +    struct pfn_info *page;
   12.24  
   12.25      nr_pages = (kbytes + ((PAGE_SIZE-1)>>10)) >> (PAGE_SHIFT - 10);
   12.26      p->max_pages = nr_pages; /* this can now be controlled independently */
   12.27 @@ -493,13 +500,16 @@ unsigned int alloc_new_dom_mem(struct ta
   12.28      /* grow the allocation if necessary */
   12.29      for ( alloc_pfns = p->tot_pages; alloc_pfns < nr_pages; alloc_pfns++ )
   12.30      {
   12.31 -        if ( unlikely(alloc_domain_page(p) == NULL) ||
   12.32 +        if ( unlikely((page=alloc_domain_page(p)) == NULL) ||
   12.33               unlikely(free_pfns < (SLACK_DOMAIN_MEM_KILOBYTES >> 
   12.34                                     (PAGE_SHIFT-10))) )
   12.35          {
   12.36              free_all_dom_mem(p);
   12.37              return -ENOMEM;
   12.38          }
   12.39 +
   12.40 +	/* initialise to machine_to_phys_mapping table to likely pfn */
   12.41 +	machine_to_phys_mapping[page-frame_table] = alloc_pfns;
   12.42      }
   12.43  
   12.44      p->tot_pages = nr_pages;
    13.1 --- a/xen/common/memory.c	Tue May 11 14:31:55 2004 +0000
    13.2 +++ b/xen/common/memory.c	Tue May 11 14:57:44 2004 +0000
    13.3 @@ -213,7 +213,12 @@ void __init init_frametable(unsigned lon
    13.4         belonging to the machine_to_phys_mapping to CPU0 idle task */
    13.5      
    13.6      mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT;
    13.7 -//    for(i=0;i<nr_pages;i+=1024,mfn++)
    13.8 +
    13.9 +    /* initialise to a magic of 0x55555555 so easier to spot bugs later */
   13.10 +    memset( machine_to_phys_mapping, 0x55, 4*1024*1024 );
   13.11 +
   13.12 +    /* The array is sized for a 4GB machine regardless of actuall mem size. 
   13.13 +       This costs 4MB -- may want to fix some day */
   13.14      for(i=0;i<1024*1024;i+=1024,mfn++)
   13.15      {
   13.16  	frame_table[mfn].count_and_flags = 1 | PGC_allocated;
   13.17 @@ -325,7 +330,7 @@ static int get_page_from_pagenr(unsigned
   13.18  
   13.19      if ( unlikely(!get_page(page, p)) )
   13.20      {
   13.21 -        MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
   13.22 +        MEM_LOG("Could not get page ref for pfn %08lx", page_nr);
   13.23          return 0;
   13.24      }
   13.25  
   13.26 @@ -944,8 +949,9 @@ static int do_extended_command(unsigned 
   13.27  }
   13.28  
   13.29  
   13.30 -int do_mmu_update(mmu_update_t *ureqs, int count)
   13.31 +int do_mmu_update(mmu_update_t *ureqs, int * p_count)
   13.32  {
   13.33 +    int count;
   13.34      mmu_update_t req;
   13.35      unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0;
   13.36      struct pfn_info *page;
   13.37 @@ -954,6 +960,11 @@ int do_mmu_update(mmu_update_t *ureqs, i
   13.38      unsigned long prev_spfn = 0;
   13.39      l1_pgentry_t *prev_spl1e = 0;
   13.40  
   13.41 +    if ( unlikely( get_user(count, p_count) ) )
   13.42 +    {
   13.43 +	return -EFAULT;
   13.44 +    }
   13.45 +
   13.46      perfc_incrc(calls_to_mmu_update); 
   13.47      perfc_addc(num_page_updates, count);
   13.48  
   13.49 @@ -1110,6 +1121,9 @@ int do_mmu_update(mmu_update_t *ureqs, i
   13.50          percpu_info[cpu].gps = percpu_info[cpu].pts = NULL;
   13.51      }
   13.52  
   13.53 +    if ( unlikely(rc) )
   13.54 +	put_user( count, p_count );
   13.55 +
   13.56      return rc;
   13.57  }
   13.58  
    14.1 --- a/xen/common/network.c	Tue May 11 14:31:55 2004 +0000
    14.2 +++ b/xen/common/network.c	Tue May 11 14:57:44 2004 +0000
    14.3 @@ -111,6 +111,9 @@ net_vif_t *create_net_vif(domid_t dom)
    14.4      clear_page(new_ring);
    14.5      SHARE_PFN_WITH_DOMAIN(virt_to_page(new_ring), p);
    14.6  
    14.7 +    machine_to_phys_mapping[virt_to_phys(new_ring)>>PAGE_SHIFT] = 
    14.8 +	0x80000001; // magic value aids debugging
    14.9 +
   14.10      /*
   14.11       * Fill in the new vif struct. Note that, while the vif's refcnt is
   14.12       * non-zero, we hold a reference to the task structure.
    15.1 --- a/xen/common/shadow.c	Tue May 11 14:31:55 2004 +0000
    15.2 +++ b/xen/common/shadow.c	Tue May 11 14:57:44 2004 +0000
    15.3 @@ -123,6 +123,7 @@ static inline int shadow_page_op( struct
    15.4      }
    15.5      return work;
    15.6  }
    15.7 +
    15.8  static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
    15.9  {
   15.10      int j, work=0;
   15.11 @@ -150,7 +151,7 @@ static void __scan_shadow_table( struct 
   15.12          }
   15.13          shadow_audit(m,0);
   15.14      }
   15.15 -    SH_LOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
   15.16 +    SH_VLOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
   15.17  }
   15.18  
   15.19  
   15.20 @@ -160,7 +161,6 @@ int shadow_mode_enable( struct task_stru
   15.21      struct shadow_status **fptr;
   15.22      int i;
   15.23  
   15.24 -
   15.25      spin_lock_init(&m->shadow_lock);
   15.26      spin_lock(&m->shadow_lock);
   15.27  
   15.28 @@ -217,7 +217,6 @@ int shadow_mode_enable( struct task_stru
   15.29  
   15.30      // call shadow_mk_pagetable
   15.31      shadow_mk_pagetable( m );
   15.32 -
   15.33      return 0;
   15.34  
   15.35   nomem:
   15.36 @@ -260,9 +259,12 @@ void shadow_mode_disable( struct task_st
   15.37      kfree( &m->shadow_ht[0] );
   15.38  }
   15.39  
   15.40 -static void shadow_mode_table_op( struct task_struct *p, unsigned int op )
   15.41 +static int shadow_mode_table_op( struct task_struct *p, 
   15.42 +								  dom0_shadow_control_t *sc )
   15.43  {
   15.44 +	unsigned int op = sc->op;
   15.45      struct mm_struct *m = &p->mm;
   15.46 +	int rc = 0;
   15.47  
   15.48      // since Dom0 did the hypercall, we should be running with it's page
   15.49      // tables right now. Calling flush on yourself would be really
   15.50 @@ -271,13 +273,13 @@ static void shadow_mode_table_op( struct
   15.51      if ( m == &current->mm )
   15.52      {
   15.53          printk("Don't try and flush your own page tables!\n");
   15.54 -        return;
   15.55 +        return -EINVAL;
   15.56      }
   15.57     
   15.58  
   15.59      spin_lock(&m->shadow_lock);
   15.60  
   15.61 -    SH_LOG("shadow mode table op %08lx %08lx count %d",pagetable_val( m->pagetable),pagetable_val(m->shadow_table), m->shadow_page_count);
   15.62 +    SH_VLOG("shadow mode table op %08lx %08lx count %d",pagetable_val( m->pagetable),pagetable_val(m->shadow_table), m->shadow_page_count);
   15.63  
   15.64      shadow_audit(m,1);
   15.65  
   15.66 @@ -288,27 +290,60 @@ static void shadow_mode_table_op( struct
   15.67          break;
   15.68     
   15.69      case DOM0_SHADOW_CONTROL_OP_CLEAN:
   15.70 -        __scan_shadow_table( m, op );
   15.71 -        // we used to bzero dirty bitmap here, but now leave this to user space
   15.72 -        // if we were double buffering we'd do the flip here
   15.73 +	{
   15.74 +		int i;
   15.75 +
   15.76 +	    __scan_shadow_table( m, op );
   15.77 +
   15.78 +	    if( p->tot_pages > sc->pages || 
   15.79 +			!sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
   15.80 +	    {
   15.81 +			rc = -EINVAL;
   15.82 +			goto out;
   15.83 +	    }
   15.84 +	    
   15.85 +	    sc->pages = p->tot_pages;
   15.86 +	   
   15.87 +#define chunk (8*1024) // do this in 1KB chunks for L1 cache
   15.88 +
   15.89 +	    for(i=0;i<p->tot_pages;i+=chunk)
   15.90 +	    {
   15.91 +			int bytes = ((  ((p->tot_pages-i) > (chunk))?
   15.92 +				(chunk):(p->tot_pages-i) ) + 7) / 8;
   15.93 +
   15.94 +			copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
   15.95 +						  p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
   15.96 +						  bytes );
   15.97 +
   15.98 +			memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
   15.99 +				   0, bytes);
  15.100 +		}
  15.101 +
  15.102          break;
  15.103 +	}
  15.104      }
  15.105  
  15.106 +
  15.107 +out:
  15.108 +
  15.109      spin_unlock(&m->shadow_lock);
  15.110  
  15.111 -    SH_LOG("shadow mode table op : page count %d", m->shadow_page_count);
  15.112 +    SH_VLOG("shadow mode table op : page count %d", m->shadow_page_count);
  15.113  
  15.114      shadow_audit(m,1);
  15.115  
  15.116      // call shadow_mk_pagetable
  15.117      shadow_mk_pagetable( m );
  15.118  
  15.119 +	return rc;
  15.120  }
  15.121  
  15.122  
  15.123 -int shadow_mode_control( struct task_struct *p, unsigned int op )
  15.124 +int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc )
  15.125  {
  15.126      int  we_paused = 0;
  15.127 +	unsigned int cmd = sc->op;
  15.128 +	int rc = 0;
  15.129   
  15.130      // don't call if already shadowed...
  15.131  
  15.132 @@ -321,18 +356,23 @@ int shadow_mode_control( struct task_str
  15.133          we_paused = 1;
  15.134      }
  15.135  
  15.136 -    if ( p->mm.shadow_mode && op == DOM0_SHADOW_CONTROL_OP_OFF )
  15.137 +    if ( p->mm.shadow_mode && cmd == DOM0_SHADOW_CONTROL_OP_OFF )
  15.138      {
  15.139          shadow_mode_disable(p);
  15.140      }
  15.141 -    else if ( op == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
  15.142 +    else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
  15.143      {
  15.144          if(p->mm.shadow_mode) shadow_mode_disable(p);
  15.145          shadow_mode_enable(p, SHM_test);
  15.146      } 
  15.147 -    else if ( p->mm.shadow_mode && op >= DOM0_SHADOW_CONTROL_OP_FLUSH && op<=DOM0_SHADOW_CONTROL_OP_CLEAN )
  15.148 +    else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY )
  15.149      {
  15.150 -        shadow_mode_table_op(p, op);
  15.151 +        if(p->mm.shadow_mode) shadow_mode_disable(p);
  15.152 +        shadow_mode_enable(p, SHM_logdirty);
  15.153 +    } 
  15.154 +    else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN )
  15.155 +    {
  15.156 +        rc = shadow_mode_table_op(p, sc);
  15.157      }
  15.158      else
  15.159      {
  15.160 @@ -341,7 +381,7 @@ int shadow_mode_control( struct task_str
  15.161      }
  15.162  
  15.163      if ( we_paused ) wake_up(p);
  15.164 -    return 0;
  15.165 +    return rc;
  15.166  }
  15.167  
  15.168  
    16.1 --- a/xen/drivers/block/xen_block.c	Tue May 11 14:31:55 2004 +0000
    16.2 +++ b/xen/drivers/block/xen_block.c	Tue May 11 14:57:44 2004 +0000
    16.3 @@ -19,6 +19,7 @@
    16.4  #include <xen/interrupt.h>
    16.5  #include <xen/vbd.h>
    16.6  #include <xen/slab.h>
    16.7 +#include <xen/shadow.h>
    16.8  
    16.9  /*
   16.10   * These are rather arbitrary. They are fairly large because adjacent requests
   16.11 @@ -358,9 +359,18 @@ static void unlock_buffer(unsigned long 
   16.12            pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
   16.13            pfn++ )
   16.14      {
   16.15 +
   16.16 +	/* Find the domain from the frame_table. Yuk... */
   16.17 +	struct task_struct *p = frame_table[pfn].u.domain;
   16.18 +
   16.19 +	if( p->mm.shadow_mode == SHM_logdirty )
   16.20 +	    mark_dirty( &p->mm, pfn );	
   16.21 +
   16.22 +
   16.23          if ( writeable_buffer )
   16.24              put_page_type(&frame_table[pfn]);
   16.25          put_page(&frame_table[pfn]);
   16.26 +
   16.27      }
   16.28  }
   16.29  
   16.30 @@ -597,6 +607,10 @@ void init_blkdev_info(struct task_struct
   16.31      p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
   16.32      clear_page(p->blk_ring_base);
   16.33      SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p);
   16.34 +
   16.35 +    machine_to_phys_mapping[virt_to_phys(p->blk_ring_base)>>PAGE_SHIFT] =
   16.36 +	0x80000002; // magic value aids debugging
   16.37 +
   16.38      p->blkdev_list.next = NULL;
   16.39      spin_lock_init(&p->vbd_lock);
   16.40  }
    17.1 --- a/xen/include/asm-i386/processor.h	Tue May 11 14:31:55 2004 +0000
    17.2 +++ b/xen/include/asm-i386/processor.h	Tue May 11 14:57:44 2004 +0000
    17.3 @@ -449,7 +449,7 @@ struct mm_struct {
    17.4      struct shadow_status *shadow_ht;
    17.5      struct shadow_status *shadow_ht_free;
    17.6      struct shadow_status *shadow_ht_extras; /* extra allocation units */
    17.7 -    unsigned int *shadow_dirty_bitmap;
    17.8 +    unsigned long *shadow_dirty_bitmap;
    17.9      unsigned int shadow_dirty_bitmap_size;  /* in pages, bit per page */
   17.10      unsigned int shadow_page_count;
   17.11      unsigned int shadow_max_page_count;
    18.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h	Tue May 11 14:31:55 2004 +0000
    18.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h	Tue May 11 14:57:44 2004 +0000
    18.3 @@ -243,6 +243,9 @@ typedef struct dom0_shadow_control_st
    18.4      /* IN variables. */
    18.5      domid_t      domain;
    18.6      int          op;
    18.7 +    unsigned long  *dirty_bitmap; // pointe to mlocked buffer
    18.8 +    /* IN/OUT variables */
    18.9 +    unsigned long  pages;  // size of buffer, updated with actual size
   18.10  } dom0_shadow_control_t;
   18.11  
   18.12  #define DOM0_SETDOMAINNAME     26
    19.1 --- a/xen/include/xen/mm.h	Tue May 11 14:31:55 2004 +0000
    19.2 +++ b/xen/include/xen/mm.h	Tue May 11 14:57:44 2004 +0000
    19.3 @@ -164,8 +164,8 @@ static inline int get_page(struct pfn_in
    19.4               unlikely(x & PGC_zombie) ||             /* Zombie? */
    19.5               unlikely(p != domain) )                 /* Wrong owner? */
    19.6          {
    19.7 -            DPRINTK("Error pfn %08lx: ed=%p(%lld), sd=%p(%lld), caf=%08x\n",
    19.8 -                    page_to_pfn(page), domain, (domain)?domain->domain:1234, p, (p)?p->domain:1234, x);
    19.9 +            DPRINTK("Error pfn %08lx: ed=%p(%lld), sd=%p(%lld), caf=%08x, taf=%08x\n",
   19.10 +                    page_to_pfn(page), domain, (domain)?domain->domain:999, p, (p && !((x & PGC_count_mask) == 0))?p->domain:999, x, page->type_and_flags);
   19.11              return 0;
   19.12          }
   19.13          __asm__ __volatile__(
   19.14 @@ -314,7 +314,7 @@ int check_descriptor(unsigned long a, un
   19.15  #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
   19.16  
   19.17  /* Part of the domain API. */
   19.18 -int do_mmu_update(mmu_update_t *updates, int count);
   19.19 +int do_mmu_update(mmu_update_t *updates, int *count);
   19.20  
   19.21  #define DEFAULT_GDT_ENTRIES     ((LAST_RESERVED_GDT_ENTRY*8)+7)
   19.22  #define DEFAULT_GDT_ADDRESS     ((unsigned long)gdt_table)
    20.1 --- a/xen/include/xen/shadow.h	Tue May 11 14:31:55 2004 +0000
    20.2 +++ b/xen/include/xen/shadow.h	Tue May 11 14:57:44 2004 +0000
    20.3 @@ -23,7 +23,7 @@
    20.4  #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
    20.5  #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
    20.6  
    20.7 -extern int shadow_mode_control( struct task_struct *p, unsigned int op );
    20.8 +extern int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc );
    20.9  extern int shadow_fault( unsigned long va, long error_code );
   20.10  extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, 
   20.11  										unsigned long *prev_spfn_ptr,
   20.12 @@ -50,7 +50,7 @@ struct shadow_status {
   20.13  
   20.14  #ifndef NDEBUG
   20.15  #define SH_LOG(_f, _a...)                             \
   20.16 -  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
   20.17 +  printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
   20.18           current->domain , __LINE__ , ## _a )
   20.19  #else
   20.20  #define SH_LOG(_f, _a...) 
   20.21 @@ -58,7 +58,7 @@ struct shadow_status {
   20.22  
   20.23  #if SHADOW_DEBUG
   20.24  #define SH_VLOG(_f, _a...)                             \
   20.25 -  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
   20.26 +  printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
   20.27           current->domain , __LINE__ , ## _a )
   20.28  #else
   20.29  #define SH_VLOG(_f, _a...) 
   20.30 @@ -66,19 +66,27 @@ struct shadow_status {
   20.31  
   20.32  #if 0
   20.33  #define SH_VVLOG(_f, _a...)                             \
   20.34 -  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
   20.35 +  printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
   20.36           current->domain , __LINE__ , ## _a )
   20.37  #else
   20.38  #define SH_VVLOG(_f, _a...) 
   20.39  #endif
   20.40  
   20.41  
   20.42 -
   20.43  /************************************************************************/
   20.44  
   20.45  static inline void mark_dirty( struct mm_struct *m, unsigned int mfn )
   20.46  {
   20.47 -	unsigned int pfn = machine_to_phys_mapping[mfn];
   20.48 +	unsigned int pfn;
   20.49 +
   20.50 +	pfn = machine_to_phys_mapping[mfn];
   20.51 +
   20.52 +	/* We use values with the top bit set to mark MFNs that aren't
   20.53 +	   really part of the domain's psuedo-physical memory map e.g.
   20.54 +           the shared info frame. Nothing to do here...
   20.55 +         */
   20.56 +	if ( unlikely(pfn & 0x80000000U) ) return; 
   20.57 +
   20.58  	ASSERT(m->shadow_dirty_bitmap);
   20.59  	if( likely(pfn<m->shadow_dirty_bitmap_size) )
   20.60  	{
   20.61 @@ -91,7 +99,14 @@ static inline void mark_dirty( struct mm
   20.62  	}
   20.63  	else
   20.64  	{
   20.65 -		SH_LOG("mark_dirty pfn out of range attempt!");
   20.66 +		extern void show_traceX(void);
   20.67 +		SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
   20.68 +			   mfn, pfn, m->shadow_dirty_bitmap_size, m );
   20.69 +		SH_LOG("dom=%lld caf=%08x taf=%08x\n", 
   20.70 +			   frame_table[mfn].u.domain->domain,
   20.71 +			   frame_table[mfn].count_and_flags, 
   20.72 +			   frame_table[mfn].type_and_flags );
   20.73 +		//show_traceX();
   20.74  	}
   20.75  
   20.76  }
   20.77 @@ -116,7 +131,7 @@ static inline void l1pte_write_fault( st
   20.78  		spte = gpte;
   20.79  		gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
   20.80  		spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; 			
   20.81 -		mark_dirty( m, gpte >> PAGE_SHIFT );
   20.82 +		mark_dirty( m, (gpte >> PAGE_SHIFT) );
   20.83  		break;
   20.84      }
   20.85  
   20.86 @@ -343,7 +358,7 @@ static inline unsigned long get_shadow_s
   20.87  
   20.88  	if( m->shadow_mode == SHM_logdirty )
   20.89  		mark_dirty( m, gpfn );
   20.90 -
   20.91 +	
   20.92  	spin_lock(&m->shadow_lock);
   20.93  	res = __shadow_status( m, gpfn );
   20.94  	if (!res) spin_unlock(&m->shadow_lock);
    21.1 --- a/xen/net/dev.c	Tue May 11 14:31:55 2004 +0000
    21.2 +++ b/xen/net/dev.c	Tue May 11 14:57:44 2004 +0000
    21.3 @@ -547,6 +547,9 @@ void deliver_packet(struct sk_buff *skb,
    21.4          goto out;
    21.5      }
    21.6  
    21.7 +    machine_to_phys_mapping[new_page - frame_table] = 
    21.8 +	machine_to_phys_mapping[old_page - frame_table];
    21.9 +
   21.10      if ( p->mm.shadow_mode && 
   21.11  	 (spte_pfn=get_shadow_status(&p->mm, pte_page-frame_table)) )
   21.12      {
   21.13 @@ -557,17 +560,15 @@ void deliver_packet(struct sk_buff *skb,
   21.14  	*sptr = new_pte;
   21.15  	unmap_domain_mem(sptr);
   21.16  
   21.17 -	if( p->mm.shadow_mode == SHM_logdirty )
   21.18 -		mark_dirty( &p->mm, new_page-frame_table );
   21.19 -
   21.20  	put_shadow_status(&p->mm);
   21.21      }
   21.22 -
   21.23 -    machine_to_phys_mapping[new_page - frame_table] 
   21.24 -        = machine_to_phys_mapping[old_page - frame_table];
   21.25      
   21.26      unmap_domain_mem(ptep);
   21.27  
   21.28 +    /* if in shadow mode, mark the buffer as dirty */
   21.29 +    if( p->mm.shadow_mode == SHM_logdirty )
   21.30 +	mark_dirty( &p->mm, (new_page-frame_table) );
   21.31 +
   21.32      /* Updates must happen before releasing the descriptor. */
   21.33      smp_wmb();
   21.34  
   21.35 @@ -2143,8 +2144,6 @@ static void get_rx_bufs(net_vif_t *vif)
   21.36              put_page_and_type(pte_page);
   21.37              make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
   21.38              goto rx_unmap_and_continue;
   21.39 -
   21.40 -	    /* XXX IAP should SHADOW_CONFIG do something here? */
   21.41          }
   21.42  
   21.43          /*
   21.44 @@ -2156,9 +2155,11 @@ static void get_rx_bufs(net_vif_t *vif)
   21.45                                0) != 
   21.46                        (PGC_allocated | PGC_tlb_flush_on_type_change | 2)) )
   21.47          {
   21.48 -            DPRINTK("Page held more than once %08x %s\n", 
   21.49 +            DPRINTK("Page held more than once mfn=%x %08x %s\n", 
   21.50 +		    buf_page-frame_table,
   21.51                      buf_page->count_and_flags,
   21.52  		    (buf_page->u.domain)?buf_page->u.domain->name:"None");
   21.53 +
   21.54              if ( !get_page_type(buf_page, PGT_writeable_page) )
   21.55                  put_page(buf_page);
   21.56              else if ( cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) !=
   21.57 @@ -2264,6 +2265,13 @@ long flush_bufs_for_vif(net_vif_t *vif)
   21.58  
   21.59          put_page_and_type(&frame_table[rx->pte_ptr >> PAGE_SHIFT]);
   21.60  
   21.61 +	/* if in shadow mode, mark the PTE as dirty */
   21.62 +	if( p->mm.shadow_mode == SHM_logdirty )
   21.63 +	    mark_dirty( &p->mm, rx->pte_ptr>>PAGE_SHIFT );
   21.64 +	/* assume the shadow page table is about to be blown away,
   21.65 +	   and that its not worth marking the buffer as dirty */
   21.66 +
   21.67 +
   21.68          make_rx_response(vif, rx->id, 0, RING_STATUS_DROPPED, 0);
   21.69      }
   21.70      vif->rx_cons = i;
    22.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c	Tue May 11 14:31:55 2004 +0000
    22.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c	Tue May 11 14:57:44 2004 +0000
    22.3 @@ -527,8 +527,6 @@ static void reset_xlblk_interface(void)
    22.4  {
    22.5      block_io_op_t op; 
    22.6  
    22.7 -    nr_pending = 0;
    22.8 -
    22.9      op.cmd = BLOCK_IO_OP_RESET;
   22.10      if ( HYPERVISOR_block_io_op(&op) != 0 )
   22.11          printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
   22.12 @@ -549,6 +547,8 @@ int __init xlblk_init(void)
   22.13  {
   22.14      int error; 
   22.15  
   22.16 +    nr_pending = 0;
   22.17 +
   22.18      reset_xlblk_interface();
   22.19  
   22.20      xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
    23.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Tue May 11 14:31:55 2004 +0000
    23.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Tue May 11 14:57:44 2004 +0000
    23.3 @@ -103,12 +103,12 @@ static int privcmd_ioctl(struct inode *i
    23.4  		if (msg[j].va + (msg[j].npages<<PAGE_SHIFT) > vma->vm_end)
    23.5  		    return -EINVAL;
    23.6  
    23.7 -		if (rc = direct_remap_area_pages(vma->vm_mm, 
    23.8 +		if ( (rc = direct_remap_area_pages(vma->vm_mm, 
    23.9  					    msg[j].va&PAGE_MASK, 
   23.10  					    msg[j].mfn<<PAGE_SHIFT, 
   23.11  					    msg[j].npages<<PAGE_SHIFT, 
   23.12  					    vma->vm_page_prot,
   23.13 -					    mmapcmd.dom))
   23.14 +					    mmapcmd.dom)) <0)
   23.15  		    return rc;
   23.16  	    }
   23.17  	}
   23.18 @@ -116,6 +116,91 @@ static int privcmd_ioctl(struct inode *i
   23.19      }
   23.20      break;
   23.21  
   23.22 +    case IOCTL_PRIVCMD_MMAPBATCH:
   23.23 +    {
   23.24 +#define MAX_DIRECTMAP_MMU_QUEUE 130
   23.25 +	mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
   23.26 +	privcmd_mmapbatch_t m;
   23.27 +	struct vm_area_struct *vma = NULL;
   23.28 +	unsigned long *p, addr;
   23.29 +	unsigned long mfn;
   23.30 +	int i;
   23.31 +
   23.32 +        if ( copy_from_user(&m, (void *)data, sizeof(m)) )
   23.33 +	{ ret = -EFAULT; goto batch_err; }
   23.34 +
   23.35 +	vma = find_vma( current->mm, m.addr );
   23.36 +
   23.37 +	if (!vma)
   23.38 +	{ ret = -EINVAL; goto batch_err; }
   23.39 +
   23.40 +	if (m.addr > PAGE_OFFSET)
   23.41 +	{ ret = -EFAULT; goto batch_err; }
   23.42 +
   23.43 +	if (m.addr + (m.num<<PAGE_SHIFT) > vma->vm_end)
   23.44 +	{ ret = -EFAULT; goto batch_err; }
   23.45 +
   23.46 +	// everything fits inside the vma
   23.47 +
   23.48 +//printk("direct_r_a_p sx=%ld address=%lx macaddr=%lx dom=%lld\n",size,address,machine_addr,domid);
   23.49 +//    memset( u, 0, sizeof(mmu_update_t)*MAX_DIRECTMAP_MMU_QUEUE );// XXX
   23.50 +
   23.51 +
   23.52 +	if ( m.dom != 0 )
   23.53 +	{
   23.54 +	    u[0].val  = (unsigned long)(m.dom<<16) & ~0xFFFFUL;
   23.55 +	    u[0].ptr  = (unsigned long)(m.dom<< 0) & ~0xFFFFUL;
   23.56 +	    u[1].val  = (unsigned long)(m.dom>>16) & ~0xFFFFUL;
   23.57 +	    u[1].ptr  = (unsigned long)(m.dom>>32) & ~0xFFFFUL;
   23.58 +	    u[0].ptr |= MMU_EXTENDED_COMMAND;
   23.59 +	    u[0].val |= MMUEXT_SET_SUBJECTDOM_L;
   23.60 +	    u[1].ptr |= MMU_EXTENDED_COMMAND;
   23.61 +	    u[1].val |= MMUEXT_SET_SUBJECTDOM_H;
   23.62 +	    v = w = &u[2];
   23.63 +	}
   23.64 +	else
   23.65 +	{
   23.66 +	    v = w = &u[0];
   23.67 +	}
   23.68 +
   23.69 +	p = m.arr;
   23.70 +	addr = m.addr;
   23.71 +//printk("BATCH: arr=%p addr=%lx num=%d u=%p,w=%p\n",p,addr,m.num,u,w);
   23.72 +	for (i=0; i<m.num; i++, addr+=PAGE_SIZE, p++)
   23.73 +	{
   23.74 +	    unsigned int count;
   23.75 +	    if ( get_user(mfn, p) ) return -EFAULT;
   23.76 +
   23.77 +	    v->val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot) |
   23.78 +		_PAGE_IO;
   23.79 +
   23.80 +	    __direct_remap_area_pages( vma->vm_mm,
   23.81 +				       addr, 
   23.82 +				       PAGE_SIZE, 
   23.83 +				       v);
   23.84 +	    v++;
   23.85 +	    count = v-u;
   23.86 +//printk("Q i=%d mfn=%x co=%d v=%p : %lx %lx\n",i,mfn,count,v, w->val,w->ptr);
   23.87 +
   23.88 +	    if ( HYPERVISOR_mmu_update(u, &count) < 0 )
   23.89 +	    {
   23.90 +		//printk("Fail %d->%d mfn=%lx\n",v-u,count, w->val);
   23.91 +		put_user( 0xe0000000 | mfn, p );
   23.92 +	    }
   23.93 +	    v=w;
   23.94 +	}
   23.95 +	ret = 0;
   23.96 +	break;
   23.97 +
   23.98 +    batch_err:
   23.99 +	printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%lx %lx-%lx\n", 
  23.100 +	       ret, vma, m.addr, m.num, m.arr, vma->vm_start, vma->vm_end);
  23.101 +	break;
  23.102 +    }
  23.103 +    break;
  23.104 +
  23.105 +
  23.106 +
  23.107      default:
  23.108          ret = -EINVAL;
  23.109      	break;
    24.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c	Tue May 11 14:31:55 2004 +0000
    24.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c	Tue May 11 14:57:44 2004 +0000
    24.3 @@ -248,6 +248,8 @@ static void network_alloc_rx_buffers(str
    24.4          np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
    24.5              virt_to_machine(get_ppte(skb->head));
    24.6  
    24.7 +	/* Shadow optimisation: disown this page from p->m map */
    24.8 +	phys_to_machine_mapping[virt_to_phys(skb->head)>>PAGE_SHIFT] = 0x80000004;
    24.9          np->rx_bufs_to_notify++;
   24.10      }
   24.11      while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
   24.12 @@ -364,6 +366,9 @@ static inline void _network_interrupt(st
   24.13          skb = np->rx_skbs[rx->id];
   24.14          ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
   24.15  
   24.16 +        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
   24.17 +            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
   24.18 +
   24.19          if ( unlikely(rx->status != RING_STATUS_OK) )
   24.20          {
   24.21              /* Gate this error. We get a (valid) slew of them on suspend. */
   24.22 @@ -382,9 +387,6 @@ static inline void _network_interrupt(st
   24.23          skb_shinfo(skb)->nr_frags = 0;
   24.24          skb_shinfo(skb)->frag_list = NULL;
   24.25                                  
   24.26 -        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
   24.27 -            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
   24.28 -
   24.29          skb->data = skb->tail = skb->head + rx->offset;
   24.30          skb_put(skb, rx->size);
   24.31          skb->protocol = eth_type_trans(skb, dev);
    25.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c	Tue May 11 14:31:55 2004 +0000
    25.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c	Tue May 11 14:57:44 2004 +0000
    25.3 @@ -1161,11 +1161,11 @@ static void stop_task(void *unused)
    25.4          virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
    25.5      suspend_record->nr_pfns = max_pfn;
    25.6  
    25.7 -    j = 0;
    25.8 -    for ( i = 0; i < max_pfn; i += (PAGE_SIZE / sizeof(unsigned long)) )
    25.9 -        pfn_to_mfn_frame_list[j++] = 
   25.10 +    for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
   25.11 +    {	
   25.12 +        pfn_to_mfn_frame_list[j] = 
   25.13              virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
   25.14 -
   25.15 +    }
   25.16      /*
   25.17       * NB. This is /not/ a full dev_close() as that loses route information!
   25.18       * Instead we do essentialy the same as dev_close() but without notifying
   25.19 @@ -1207,7 +1207,9 @@ static void stop_task(void *unused)
   25.20      memcpy(&start_info, &suspend_record->resume_info, sizeof(start_info));
   25.21  
   25.22      set_fixmap(FIX_SHARED_INFO, start_info.shared_info);
   25.23 +
   25.24      HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   25.25 +
   25.26      memset(empty_zero_page, 0, PAGE_SIZE);
   25.27  
   25.28      irq_resume();
    26.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/time.c	Tue May 11 14:31:55 2004 +0000
    26.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/time.c	Tue May 11 14:57:44 2004 +0000
    26.3 @@ -62,6 +62,7 @@
    26.4  #include <linux/smp.h>
    26.5  #include <linux/irq.h>
    26.6  #include <linux/sysctl.h>
    26.7 +#include <linux/sysrq.h>
    26.8  
    26.9  spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
   26.10  extern rwlock_t xtime_lock;
   26.11 @@ -581,6 +582,10 @@ static void dbg_time_int(int irq, void *
   26.12             timer->expires,(u32)(t_st>>32), (u32)t_st);
   26.13      printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n",
   26.14             (u32)(processed_system_time>>32), (u32)processed_system_time);
   26.15 +
   26.16 +
   26.17 +    handle_sysrq('t',NULL,NULL,NULL);
   26.18 +
   26.19  }
   26.20  
   26.21  static struct irqaction dbg_time = {
    27.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c	Tue May 11 14:31:55 2004 +0000
    27.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c	Tue May 11 14:57:44 2004 +0000
    27.3 @@ -317,16 +317,17 @@ asmlinkage void do_general_protection(st
    27.4  		__asm__ __volatile__ ( "sldt %0" : "=r" (ldt) );
    27.5  		if ( ldt == 0 )
    27.6  		{
    27.7 -			mmu_update_t u;
    27.8 -			u.ptr  = MMU_EXTENDED_COMMAND;
    27.9 -			u.ptr |= (unsigned long)&default_ldt[0];
   27.10 -			u.val  = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
   27.11 -			if ( unlikely(HYPERVISOR_mmu_update(&u, 1) < 0) )
   27.12 -			{
   27.13 -				show_trace(NULL);
   27.14 -				panic("Failed to install default LDT");
   27.15 -			}
   27.16 -			return;
   27.17 +		    int count = 1;
   27.18 +		    mmu_update_t u;
   27.19 +		    u.ptr  = MMU_EXTENDED_COMMAND;
   27.20 +		    u.ptr |= (unsigned long)&default_ldt[0];
   27.21 +		    u.val  = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
   27.22 +		    if ( unlikely(HYPERVISOR_mmu_update(&u, &count) < 0) )
   27.23 +		    {
   27.24 +			show_trace(NULL);
   27.25 +			panic("Failed to install default LDT");
   27.26 +		    }
   27.27 +		    return;
   27.28  		}
   27.29  	}
   27.30  
    28.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c	Tue May 11 14:31:55 2004 +0000
    28.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c	Tue May 11 14:57:44 2004 +0000
    28.3 @@ -37,12 +37,13 @@ static void DEBUG_allow_pt_reads(void)
    28.4      int i;
    28.5      for ( i = idx-1; i >= 0; i-- )
    28.6      {
    28.7 +	int count = 1;
    28.8          pte = update_debug_queue[i].ptep;
    28.9          if ( pte == NULL ) continue;
   28.10          update_debug_queue[i].ptep = NULL;
   28.11          update.ptr = virt_to_machine(pte);
   28.12          update.val = update_debug_queue[i].pteval;
   28.13 -        HYPERVISOR_mmu_update(&update, 1);
   28.14 +        HYPERVISOR_mmu_update(&update, &count);
   28.15      }
   28.16  }
   28.17  static void DEBUG_disallow_pt_read(unsigned long va)
   28.18 @@ -51,6 +52,7 @@ static void DEBUG_disallow_pt_read(unsig
   28.19      pmd_t *pmd;
   28.20      pgd_t *pgd;
   28.21      unsigned long pteval;
   28.22 +    int count = 1;
   28.23      /*
   28.24       * We may fault because of an already outstanding update.
   28.25       * That's okay -- it'll get fixed up in the fault handler.
   28.26 @@ -62,7 +64,7 @@ static void DEBUG_disallow_pt_read(unsig
   28.27      update.ptr = virt_to_machine(pte);
   28.28      pteval = *(unsigned long *)pte;
   28.29      update.val = pteval & ~_PAGE_PRESENT;
   28.30 -    HYPERVISOR_mmu_update(&update, 1);
   28.31 +    HYPERVISOR_mmu_update(&update, &count);
   28.32      update_debug_queue[idx].ptep = pte;
   28.33      update_debug_queue[idx].pteval = pteval;
   28.34  }
   28.35 @@ -100,7 +102,7 @@ void MULTICALL_flush_page_update_queue(v
   28.36          wmb(); /* Make sure index is cleared first to avoid double updates. */
   28.37          queue_multicall2(__HYPERVISOR_mmu_update, 
   28.38                           (unsigned long)update_queue, 
   28.39 -                         _idx);
   28.40 +                         &_idx);
   28.41      }
   28.42      spin_unlock_irqrestore(&update_lock, flags);
   28.43  }
   28.44 @@ -116,7 +118,7 @@ static inline void __flush_page_update_q
   28.45  #endif
   28.46      idx = 0;
   28.47      wmb(); /* Make sure index is cleared first to avoid double updates. */
   28.48 -    if ( unlikely(HYPERVISOR_mmu_update(update_queue, _idx) < 0) )
   28.49 +    if ( unlikely(HYPERVISOR_mmu_update(update_queue, &_idx) < 0) )
   28.50          panic("Failed to execute MMU updates");
   28.51  }
   28.52  
    29.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Tue May 11 14:31:55 2004 +0000
    29.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Tue May 11 14:57:44 2004 +0000
    29.3 @@ -27,16 +27,12 @@
    29.4  #define direct_mk_pte_phys(physpage, pgprot) \
    29.5    __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
    29.6  
    29.7 -static inline int direct_remap_area_pte(pte_t *pte, 
    29.8 +static inline void direct_remap_area_pte(pte_t *pte, 
    29.9                                          unsigned long address, 
   29.10                                          unsigned long size,
   29.11 -                                        unsigned long machine_addr, 
   29.12 -                                        pgprot_t prot,
   29.13 -                                        domid_t  domid)
   29.14 +					mmu_update_t **v)
   29.15  {
   29.16      unsigned long end;
   29.17 -#define MAX_DIRECTMAP_MMU_QUEUE 130
   29.18 -    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v, *w;
   29.19  
   29.20      address &= ~PMD_MASK;
   29.21      end = address + size;
   29.22 @@ -45,7 +41,87 @@ static inline int direct_remap_area_pte(
   29.23      if (address >= end)
   29.24          BUG();
   29.25  
   29.26 -    /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */
   29.27 +    do {
   29.28 +#if 0 // XXX
   29.29 +        if (!pte_none(*pte)) {
   29.30 +            printk("direct_remap_area_pte: page already exists\n");
   29.31 +            BUG();
   29.32 +        }
   29.33 +#endif
   29.34 +        (*v)->ptr = virt_to_machine(pte);
   29.35 +        (*v)++;
   29.36 +        address += PAGE_SIZE;
   29.37 +        pte++;
   29.38 +    } while (address && (address < end));
   29.39 +    return ;
   29.40 +}
   29.41 +
   29.42 +static inline int direct_remap_area_pmd(struct mm_struct *mm,
   29.43 +                                        pmd_t *pmd, 
   29.44 +                                        unsigned long address, 
   29.45 +                                        unsigned long size,
   29.46 +					mmu_update_t **v)
   29.47 +{
   29.48 +    unsigned long end;
   29.49 +
   29.50 +    address &= ~PGDIR_MASK;
   29.51 +    end = address + size;
   29.52 +    if (end > PGDIR_SIZE)
   29.53 +        end = PGDIR_SIZE;
   29.54 +    if (address >= end)
   29.55 +        BUG();
   29.56 +    do {
   29.57 +        pte_t * pte = pte_alloc(mm, pmd, address);
   29.58 +        if (!pte)
   29.59 +            return -ENOMEM;
   29.60 +        direct_remap_area_pte(pte, address, end - address, v);
   29.61 +
   29.62 +        address = (address + PMD_SIZE) & PMD_MASK;
   29.63 +        pmd++;
   29.64 +    } while (address && (address < end));
   29.65 +    return 0;
   29.66 +}
   29.67 + 
   29.68 +int __direct_remap_area_pages(struct mm_struct *mm,
   29.69 +			      unsigned long address, 
   29.70 +			      unsigned long size, 
   29.71 +			      mmu_update_t *v)
   29.72 +{
   29.73 +    pgd_t * dir;
   29.74 +    unsigned long end = address + size;
   29.75 +
   29.76 +    dir = pgd_offset(mm, address);
   29.77 +    flush_cache_all();
   29.78 +    if (address >= end)
   29.79 +        BUG();
   29.80 +    spin_lock(&mm->page_table_lock);
   29.81 +    do {
   29.82 +        pmd_t *pmd = pmd_alloc(mm, dir, address);
   29.83 +        if (!pmd)
   29.84 +	    return -ENOMEM;
   29.85 +        direct_remap_area_pmd(mm, pmd, address, end - address, &v);
   29.86 +        address = (address + PGDIR_SIZE) & PGDIR_MASK;
   29.87 +        dir++;
   29.88 +
   29.89 +    } while (address && (address < end));
   29.90 +    spin_unlock(&mm->page_table_lock);
   29.91 +    flush_tlb_all();
   29.92 +    return 0;
   29.93 +}
   29.94 +
   29.95 +
   29.96 +int direct_remap_area_pages(struct mm_struct *mm,
   29.97 +                            unsigned long address, 
   29.98 +                            unsigned long machine_addr,
   29.99 +                            unsigned long size, 
  29.100 +                            pgprot_t prot,
  29.101 +                            domid_t  domid)
  29.102 +{
  29.103 +    int i, count;
  29.104 +    unsigned long start_address;
  29.105 +#define MAX_DIRECTMAP_MMU_QUEUE 130
  29.106 +    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
  29.107 +
  29.108      if ( domid != 0 )
  29.109      {
  29.110          u[0].val  = (unsigned long)(domid<<16) & ~0xFFFFUL;
  29.111 @@ -63,98 +139,46 @@ static inline int direct_remap_area_pte(
  29.112          v = w = &u[0];
  29.113      }
  29.114  
  29.115 -    do {
  29.116 -        if ( (v-u) == MAX_DIRECTMAP_MMU_QUEUE )
  29.117 -        {
  29.118 -            if ( HYPERVISOR_mmu_update(u, MAX_DIRECTMAP_MMU_QUEUE) < 0 )
  29.119 -                return -EFAULT;
  29.120 -            v = w;
  29.121 -        }
  29.122 -#if 0  /* thanks to new ioctl mmaping interface this is no longer a bug */
  29.123 -        if (!pte_none(*pte)) {
  29.124 -            printk("direct_remap_area_pte: page already exists\n");
  29.125 -            BUG();
  29.126 -        }
  29.127 -#endif
  29.128 -        v->ptr = virt_to_machine(pte);
  29.129 +    start_address = address;
  29.130 +
  29.131 +    for(i=0; i<size; 
  29.132 +	i+=PAGE_SIZE, machine_addr+=PAGE_SIZE, address+=PAGE_SIZE, v++)
  29.133 +    {
  29.134 +	if( (v-u) == MAX_DIRECTMAP_MMU_QUEUE )
  29.135 +	{
  29.136 +	    /* get the ptep's filled in */
  29.137 +	    __direct_remap_area_pages( mm,
  29.138 +				       start_address, 
  29.139 +				       address-start_address, 
  29.140 +				       w);
  29.141 +	    
  29.142 +	    count = v-u;
  29.143 +	    if ( HYPERVISOR_mmu_update(u, &count) < 0 )
  29.144 +		return -EFAULT;	    
  29.145 +	    v=w;
  29.146 +	    start_address = address;
  29.147 +	}
  29.148 +
  29.149 +	/* fill in the machine addresses */
  29.150          v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO;
  29.151 -        v++;
  29.152 -        address += PAGE_SIZE;
  29.153 -        machine_addr += PAGE_SIZE;
  29.154 -        pte++;
  29.155 -    } while (address && (address < end));
  29.156 +    }
  29.157  
  29.158 -    if ( ((v-w) != 0) && (HYPERVISOR_mmu_update(u, v-u) < 0) )
  29.159 -        return -EFAULT;
  29.160 +    if(v!=w)
  29.161 +    {
  29.162 +	/* get the ptep's filled in */
  29.163 +	__direct_remap_area_pages( mm,
  29.164 +				   start_address, 
  29.165 +				   address-start_address, 
  29.166 +				   w);	 
  29.167 +	count = v-u;
  29.168 +	if ( HYPERVISOR_mmu_update(u, &count) < 0 )
  29.169 +	    return -EFAULT;	    
  29.170  
  29.171 +    }
  29.172 +    
  29.173      return 0;
  29.174  }
  29.175  
  29.176 -static inline int direct_remap_area_pmd(struct mm_struct *mm,
  29.177 -                                        pmd_t *pmd, 
  29.178 -                                        unsigned long address, 
  29.179 -                                        unsigned long size,
  29.180 -                                        unsigned long machine_addr,
  29.181 -                                        pgprot_t prot,
  29.182 -                                        domid_t  domid)
  29.183 -{
  29.184 -    int error = 0;
  29.185 -    unsigned long end;
  29.186 -
  29.187 -    address &= ~PGDIR_MASK;
  29.188 -    end = address + size;
  29.189 -    if (end > PGDIR_SIZE)
  29.190 -        end = PGDIR_SIZE;
  29.191 -    machine_addr -= address;
  29.192 -    if (address >= end)
  29.193 -        BUG();
  29.194 -    do {
  29.195 -        pte_t * pte = pte_alloc(mm, pmd, address);
  29.196 -        if (!pte)
  29.197 -            return -ENOMEM;
  29.198 -        error = direct_remap_area_pte(pte, address, end - address, 
  29.199 -                                      address + machine_addr, prot, domid);
  29.200 -        if ( error )
  29.201 -            break;
  29.202 -        address = (address + PMD_SIZE) & PMD_MASK;
  29.203 -        pmd++;
  29.204 -    } while (address && (address < end));
  29.205 -    return error;
  29.206 -}
  29.207 - 
  29.208 -int direct_remap_area_pages(struct mm_struct *mm,
  29.209 -                            unsigned long address, 
  29.210 -                            unsigned long machine_addr,
  29.211 -                            unsigned long size, 
  29.212 -                            pgprot_t prot,
  29.213 -                            domid_t  domid)
  29.214 -{
  29.215 -    int error = 0;
  29.216 -    pgd_t * dir;
  29.217 -    unsigned long end = address + size;
  29.218 -
  29.219 -    machine_addr -= address;
  29.220 -    dir = pgd_offset(mm, address);
  29.221 -    flush_cache_all();
  29.222 -    if (address >= end)
  29.223 -        BUG();
  29.224 -    spin_lock(&mm->page_table_lock);
  29.225 -    do {
  29.226 -        pmd_t *pmd = pmd_alloc(mm, dir, address);
  29.227 -        error = -ENOMEM;
  29.228 -        if (!pmd)
  29.229 -            break;
  29.230 -        error = direct_remap_area_pmd(mm, pmd, address, end - address,
  29.231 -                                      machine_addr + address, prot, domid);
  29.232 -        if (error)
  29.233 -            break;
  29.234 -        address = (address + PGDIR_SIZE) & PGDIR_MASK;
  29.235 -        dir++;
  29.236 -    } while (address && (address < end));
  29.237 -    spin_unlock(&mm->page_table_lock);
  29.238 -    flush_tlb_all();
  29.239 -    return error;
  29.240 -}
  29.241  
  29.242  #endif /* CONFIG_XEN_PRIVILEGED_GUEST */
  29.243  
    30.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h	Tue May 11 14:31:55 2004 +0000
    30.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h	Tue May 11 14:57:44 2004 +0000
    30.3 @@ -153,7 +153,7 @@ static inline int HYPERVISOR_set_trap_ta
    30.4      return ret;
    30.5  }
    30.6  
    30.7 -static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count)
    30.8 +static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int *count)
    30.9  {
   30.10      int ret;
   30.11      __asm__ __volatile__ (
    31.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h	Tue May 11 14:31:55 2004 +0000
    31.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h	Tue May 11 14:57:44 2004 +0000
    31.3 @@ -276,4 +276,11 @@ extern int direct_remap_area_pages(struc
    31.4                                     pgprot_t prot,
    31.5                                     domid_t  domid);
    31.6  
    31.7 +extern int __direct_remap_area_pages(struct mm_struct *mm,
    31.8 +				     unsigned long address, 
    31.9 +				     unsigned long size, 
   31.10 +				     mmu_update_t *v);
   31.11 +
   31.12 +
   31.13 +
   31.14  #endif /* _I386_PGALLOC_H */
    32.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h	Tue May 11 14:31:55 2004 +0000
    32.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h	Tue May 11 14:57:44 2004 +0000
    32.3 @@ -25,6 +25,13 @@ typedef struct privcmd_mmap {
    32.4      privcmd_mmap_entry_t *entry;
    32.5  } privcmd_mmap_t; 
    32.6  
    32.7 +typedef struct privcmd_mmapbatch {
    32.8 +    int num;     // number of pages to populate
    32.9 +    domid_t dom; // target domain 
   32.10 +    unsigned long addr;  // virtual address
   32.11 +    unsigned long *arr; // array of mfns - top nibble set on err
   32.12 +} privcmd_mmapbatch_t; 
   32.13 +
   32.14  typedef struct privcmd_blkmsg
   32.15  {
   32.16      unsigned long op;
   32.17 @@ -50,5 +57,7 @@ typedef struct privcmd_blkmsg
   32.18      _IOC(_IOC_NONE, 'P', 1, 0)
   32.19  #define IOCTL_PRIVCMD_MMAP             \
   32.20      _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
   32.21 +#define IOCTL_PRIVCMD_MMAPBATCH             \
   32.22 +    _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmapbatch_t))
   32.23  
   32.24  #endif /* __PROC_CMD_H__ */