ia64/xen-unstable

changeset 1333:80919e3d24af

bitkeeper revision 1.881 (409285285QhBEC23wsKBevdQUow-YA)

faster suspend
author iap10@labyrinth.cl.cam.ac.uk
date Fri Apr 30 16:56:08 2004 +0000 (2004-04-30)
parents d41f26deb150
children f7c9d0a33a21
files tools/Makefile tools/examples/xc_dom_control.py tools/xc/lib/Makefile tools/xc/lib/xc_linux_restore.c tools/xc/lib/xc_linux_save.c tools/xc/lib/xc_private.c tools/xc/lib/xc_private.h xen/common/kernel.c xen/common/memory.c xen/include/xen/sched.h xen/net/dev.c xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h
line diff
     1.1 --- a/tools/Makefile	Tue Apr 27 16:15:55 2004 +0000
     1.2 +++ b/tools/Makefile	Fri Apr 30 16:56:08 2004 +0000
     1.3 @@ -17,6 +17,10 @@ install: all
     1.4  	$(MAKE) -C xenctl install
     1.5  	$(MAKE) -C xend install
     1.6  
     1.7 +dist: $(TARGET)
     1.8 +	$(MAKE) prefix=`pwd`/../../install dist=yes install
     1.9 +
    1.10 +
    1.11  clean:
    1.12  	$(MAKE) -C balloon clean
    1.13  	$(MAKE) -C xc clean
     2.1 --- a/tools/examples/xc_dom_control.py	Tue Apr 27 16:15:55 2004 +0000
     2.2 +++ b/tools/examples/xc_dom_control.py	Fri Apr 30 16:56:08 2004 +0000
     2.3 @@ -137,6 +137,7 @@ elif cmd == 'suspend':
     2.4          os.kill(pid, signal.SIGTERM)
     2.5  
     2.6      xc.domain_stop( dom=dom )
     2.7 +    time.sleep(0.1);
     2.8      rc = xc.linux_save( dom=dom, state_file=file, progress=1)
     2.9      if rc == 0 : xc.domain_destroy( dom=dom, force=1 )
    2.10  
     3.1 --- a/tools/xc/lib/Makefile	Tue Apr 27 16:15:55 2004 +0000
     3.2 +++ b/tools/xc/lib/Makefile	Fri Apr 30 16:56:08 2004 +0000
     3.3 @@ -4,7 +4,7 @@ MINOR    = 0
     3.4  SONAME   = libxc.so.$(MAJOR)
     3.5  
     3.6  CC       = gcc
     3.7 -CFLAGS   = -c -Wall -O3 -fno-strict-aliasing
     3.8 +CFLAGS   = -c -Werror -O3 -fno-strict-aliasing
     3.9  CFLAGS  += -I../../../xen/include/hypervisor-ifs
    3.10  CFLAGS  += -I../../xend/lib
    3.11  CFLAGS  += -I../../../xenolinux-sparse/include
     4.1 --- a/tools/xc/lib/xc_linux_restore.c	Tue Apr 27 16:15:55 2004 +0000
     4.2 +++ b/tools/xc/lib/xc_linux_restore.c	Fri Apr 30 16:56:08 2004 +0000
     4.3 @@ -246,7 +246,7 @@ int xc_linux_restore(int xc_handle,
     4.4                  {
     4.5                      if ( (pfn = ppage[j] >> PAGE_SHIFT) >= nr_pfns )
     4.6                      {
     4.7 -                        ERROR("Frame number in page table is out of range");
     4.8 +                        ERROR("Frame number in type %d page table is out of range. i=%d j=%d pfn=%d nr_pfns=%d",pfn_type[i],i,j,pfn,nr_pfns);
     4.9                          goto out;
    4.10                      }
    4.11                      if ( (pfn_type[pfn] != NONE) && (ppage[j] & _PAGE_RW) )
     5.1 --- a/tools/xc/lib/xc_linux_save.c	Tue Apr 27 16:15:55 2004 +0000
     5.2 +++ b/tools/xc/lib/xc_linux_save.c	Fri Apr 30 16:56:08 2004 +0000
     5.3 @@ -24,7 +24,7 @@
     5.4   */
     5.5  #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
     5.6      (((_mfn) < (1024*1024)) &&          \
     5.7 -     (pfn_to_mfn_table[mfn_to_pfn_table[_mfn]] == (_mfn)))
     5.8 +     (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)))
     5.9  
    5.10  /* Returns TRUE if MFN is successfully converted to a PFN. */
    5.11  #define translate_mfn_to_pfn(_pmfn)         \
    5.12 @@ -34,7 +34,7 @@
    5.13      if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )   \
    5.14          _res = 0;                           \
    5.15      else                                    \
    5.16 -        *(_pmfn) = mfn_to_pfn_table[mfn];   \
    5.17 +        *(_pmfn) = live_mfn_to_pfn_table[mfn];   \
    5.18      _res;                                   \
    5.19  })
    5.20  
    5.21 @@ -80,7 +80,7 @@ int xc_linux_save(int xc_handle,
    5.22                    int verbose)
    5.23  {
    5.24      dom0_op_t op;
    5.25 -    int rc = 1, i, j;
    5.26 +    int rc = 1, i, j, k, n;
    5.27      unsigned long mfn;
    5.28      unsigned int prev_pc, this_pc;
    5.29  
    5.30 @@ -103,25 +103,25 @@ int xc_linux_save(int xc_handle,
    5.31      unsigned long *pfn_type = NULL;
    5.32  
    5.33      /* A temporary mapping, and a copy, of one frame of guest memory. */
    5.34 -    unsigned long *ppage, page[1024];
    5.35 +    unsigned long page[1024];
    5.36  
    5.37      /* A copy of the pfn-to-mfn table frame list. */
    5.38 -    unsigned long *pfn_to_mfn_frame_list;
    5.39 -    /* A live mapping of the pfn-to-mfn table frame list. */
    5.40      unsigned long *live_pfn_to_mfn_frame_list;
    5.41 +    unsigned long pfn_to_mfn_frame_list[1024];
    5.42  
    5.43 -    /* A table translating each PFN to its current MFN. */
    5.44 -    unsigned long *pfn_to_mfn_table = NULL;
    5.45      /* Live mapping of the table mapping each PFN to its current MFN. */
    5.46      unsigned long *live_pfn_to_mfn_table = NULL;
    5.47 -    /* A table translating each current MFN to its canonical PFN. */
    5.48 -    unsigned long *mfn_to_pfn_table = NULL;
    5.49 +    /* Live mapping of system MFN to PFN table. */
    5.50 +    unsigned long *live_mfn_to_pfn_table = NULL;
    5.51      
    5.52      /* Live mapping of shared info structure */
    5.53      unsigned long *live_shinfo;
    5.54  
    5.55 +    /* base of the region in which domain memory is mapped */
    5.56 +    unsigned char *region_base;
    5.57 +
    5.58      /* A temporary mapping, and a copy, of the guest's suspend record. */
    5.59 -    suspend_record_t *srec;
    5.60 +    suspend_record_t *p_srec, srec;
    5.61  
    5.62      /* The name and descriptor of the file that we are writing to. */
    5.63      int    fd;
    5.64 @@ -187,118 +187,126 @@ int xc_linux_save(int xc_handle,
    5.65      }
    5.66  
    5.67  
    5.68 -    /* Map the suspend-record MFN to pin it */
    5.69 -    srec = mfn_mapper_map_single(xc_handle, PROT_READ, 
    5.70 -				 ctxt.cpu_ctxt.esi, sizeof(*srec));
    5.71 +    /* Map the suspend-record MFN to pin it. The page must be owned by 
    5.72 +       domid for this to succeed. */
    5.73 +    p_srec = mfn_mapper_map_single(xc_handle, domid,
    5.74 +				 sizeof(srec), PROT_READ, 
    5.75 +				 ctxt.cpu_ctxt.esi );
    5.76  
    5.77 -    /* Is the suspend-record MFN actually valid for this domain? */
    5.78 -    if ( !check_pfn_ownership(xc_handle, ctxt.cpu_ctxt.esi, domid) )
    5.79 +    if (!p_srec)
    5.80      {
    5.81 -        ERROR("Invalid state record pointer");
    5.82 +        ERROR("Couldn't map state record");
    5.83          goto out;
    5.84      }
    5.85  
    5.86 +    memcpy( &srec, p_srec, sizeof(srec) );
    5.87 +
    5.88      /* cheesy sanity check */
    5.89 -    if ( srec->nr_pfns > 1024*1024 )
    5.90 +    if ( srec.nr_pfns > 1024*1024 )
    5.91      {
    5.92          ERROR("Invalid state record -- pfn count out of range");
    5.93          goto out;
    5.94      }
    5.95  
    5.96 -    
    5.97      /* the pfn_to_mfn_frame_list fits in a single page */
    5.98      live_pfn_to_mfn_frame_list = 
    5.99 -	mfn_mapper_map_single(xc_handle, PROT_WRITE, 
   5.100 -			      srec->pfn_to_mfn_frame_list, 
   5.101 -			      PAGE_SIZE);
   5.102 +	mfn_mapper_map_single(xc_handle, domid, 
   5.103 +			      PAGE_SIZE, PROT_READ, 
   5.104 +			      srec.pfn_to_mfn_frame_list );
   5.105  
   5.106 -    if ( !check_pfn_ownership(xc_handle, 
   5.107 -			      srec->pfn_to_mfn_frame_list, domid) )
   5.108 +    if (!live_pfn_to_mfn_frame_list)
   5.109      {
   5.110 -	ERROR("Invalid pfn-to-mfn frame list pointer");
   5.111 -	goto out;
   5.112 +        ERROR("Couldn't map pfn_to_mfn_frame_list");
   5.113 +        goto out;
   5.114      }
   5.115 +   
   5.116  
   5.117 -    memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
   5.118 -
   5.119 -    if ( (mapper_handle1 = mfn_mapper_init(xc_handle, 1024*1024, PROT_READ )) 
   5.120 +    if ( (mapper_handle1 = mfn_mapper_init(xc_handle, domid,
   5.121 +					   1024*1024, PROT_READ )) 
   5.122  	 == NULL )
   5.123          goto out;
   5.124  	
   5.125 -    for ( i = 0; i < (srec->nr_pfns+1023)/1024; i++ )
   5.126 +    for ( i = 0; i < (srec.nr_pfns+1023)/1024; i++ )
   5.127      {
   5.128  	/* Grab a copy of the pfn-to-mfn table frame list. 
   5.129  	 This has the effect of preventing the page from being freed and
   5.130  	 given to another domain. (though the domain is stopped anyway...) */
   5.131  	mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT, 
   5.132 -				pfn_to_mfn_frame_list[i],
   5.133 +				live_pfn_to_mfn_frame_list[i],
   5.134  				PAGE_SIZE );
   5.135      }
   5.136      
   5.137 -    mfn_mapper_flush_queue(mapper_handle1);
   5.138 -
   5.139 -    /* Now they're pined, check they're the right dom. We assume
   5.140 -       they're not going to change, otherwise the suspend is going to fail, 
   5.141 -       with only itself to blame. */
   5.142 -
   5.143 -    for ( i = 0; i < (srec->nr_pfns+1023)/1024; i++ )
   5.144 +    if ( mfn_mapper_flush_queue(mapper_handle1) )
   5.145      {
   5.146 -	if ( !check_pfn_ownership(xc_handle, 
   5.147 -				  pfn_to_mfn_frame_list[i], domid) )
   5.148 -	{
   5.149 -	    ERROR("Invalid pfn-to-mfn frame list pointer");
   5.150 -	    goto out;
   5.151 -	}
   5.152 +        ERROR("Couldn't map pfn_to_mfn table");
   5.153 +        goto out;
   5.154      }
   5.155  
   5.156      live_pfn_to_mfn_table = mfn_mapper_base( mapper_handle1 );
   5.157  
   5.158 +
   5.159 +
   5.160      /* We want zeroed memory so use calloc rather than malloc. */
   5.161 -    mfn_to_pfn_table = calloc(1, 4 * 1024 * 1024);
   5.162 -    pfn_to_mfn_table = calloc(1, 4 * srec->nr_pfns);
   5.163 -    pfn_type         = calloc(1, 4 * srec->nr_pfns);
   5.164 +    pfn_type         = calloc(1, 4 * srec.nr_pfns);
   5.165  
   5.166 -    if ( (mfn_to_pfn_table == NULL) ||
   5.167 -         (pfn_to_mfn_table == NULL) ||
   5.168 -         (pfn_type == NULL) )
   5.169 +    if ( (pfn_type == NULL) )
   5.170      {
   5.171          errno = ENOMEM;
   5.172          goto out;
   5.173      }
   5.174  
   5.175  
   5.176 +
   5.177 +    /* Track the mfn_to_pfn table down from the domains PT */
   5.178 +    {
   5.179 +	unsigned long *pgd;
   5.180 +	unsigned long mfn_to_pfn_table_start_mfn;
   5.181 +
   5.182 +    pgd = mfn_mapper_map_single(xc_handle, domid, 
   5.183 +				PAGE_SIZE, PROT_READ, 
   5.184 +				ctxt.pt_base>>PAGE_SHIFT);
   5.185 +/*
   5.186 +    printf("pt mfn=%d pfn=%d type=%08x pte=%08x\n",ctxt.pt_base>>PAGE_SHIFT,
   5.187 +	   mfn_to_pfn_table[ctxt.pt_base>>PAGE_SHIFT],
   5.188 +	   pfn_type[mfn_to_pfn_table[ctxt.pt_base>>PAGE_SHIFT]],
   5.189 +	   pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT] );
   5.190 +*/
   5.191 +    mfn_to_pfn_table_start_mfn = pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT]>>PAGE_SHIFT;
   5.192 +
   5.193 +    live_mfn_to_pfn_table = 
   5.194 +	mfn_mapper_map_single(xc_handle, ~0ULL, 
   5.195 +			      PAGE_SIZE*1024, PROT_READ, 
   5.196 +			      mfn_to_pfn_table_start_mfn );
   5.197 +    }
   5.198 +
   5.199 +
   5.200      /*
   5.201       * Construct the local pfn-to-mfn and mfn-to-pfn tables. On exit from this
   5.202       * loop we have each MFN mapped at most once. Note that there may be MFNs
   5.203       * that aren't mapped at all: we detect these by MFN_IS_IN_PSEUDOPHYS_MAP.
   5.204       */
   5.205  
   5.206 -    for ( i = 0; i < srec->nr_pfns; i++ )
   5.207 +    for ( i = 0; i < srec.nr_pfns; i++ )
   5.208      {
   5.209          mfn = live_pfn_to_mfn_table[i];
   5.210  
   5.211 -        if ( !check_pfn_ownership(xc_handle, mfn, domid) )
   5.212 -        {
   5.213 -            ERROR("Invalid frame specified with pfn-to-mfn table");
   5.214 -            goto out;
   5.215 -        }
   5.216 +#if 1  /* XXX use the master mfn_to_pfn table???? */
   5.217 +
   5.218  
   5.219 -        /* Did we map this MFN already? That would be invalid! */
   5.220 -        if ( mfn_to_pfn_table[mfn] )
   5.221 -        {
   5.222 -            ERROR("A machine frame appears twice in pseudophys space");
   5.223 -            goto out;
   5.224 -        }
   5.225 -
   5.226 -        mfn_to_pfn_table[mfn] = i;
   5.227 -	pfn_to_mfn_table[i] = live_pfn_to_mfn_table[i];
   5.228 +	if( live_mfn_to_pfn_table[mfn] != i )
   5.229 +	    printf("i=%d mfn=%d live_mfn_to_pfn_table=%d\n",
   5.230 +		   i,mfn,live_mfn_to_pfn_table[mfn]);
   5.231  
   5.232          /* Query page type by MFN, but store it by PFN. */
   5.233          if ( (pfn_type[i] = get_pfn_type(xc_handle, mfn, domid)) == 
   5.234               GETPFN_ERR )
   5.235              goto out;
   5.236 +#endif
   5.237 +
   5.238      }
   5.239  
   5.240 +
   5.241 +
   5.242      /* Canonicalise the suspend-record frame number. */
   5.243      if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
   5.244      {
   5.245 @@ -322,9 +330,10 @@ int xc_linux_save(int xc_handle,
   5.246          ERROR("PT base is not in range of pseudophys map");
   5.247          goto out;
   5.248      }
   5.249 -    ctxt.pt_base = mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
   5.250 +    ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT;
   5.251  
   5.252      /* Canonicalise the pfn-to-mfn table frame-number list. */
   5.253 +    memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
   5.254      for ( i = 0; i < srec.nr_pfns; i += 1024 )
   5.255      {
   5.256          if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) )
   5.257 @@ -335,8 +344,15 @@ int xc_linux_save(int xc_handle,
   5.258      }
   5.259  
   5.260      /* Start writing out the saved-domain record. */
   5.261 -    live_shinfo = mfn_mapper_map_single(xc_handle, PROT_READ,
   5.262 -				  shared_info_frame, PAGE_SIZE);
   5.263 +    live_shinfo = mfn_mapper_map_single(xc_handle, domid,
   5.264 +					PAGE_SIZE, PROT_READ,
   5.265 +					shared_info_frame);
   5.266 +
   5.267 +    if (!live_shinfo)
   5.268 +    {
   5.269 +        ERROR("Couldn't map live_shinfo");
   5.270 +        goto out;
   5.271 +    }
   5.272  
   5.273      /* Belts and braces safety check on the shared info record */
   5.274      if ( !check_pfn_ownership(xc_handle, shared_info_frame, domid) )
   5.275 @@ -360,7 +376,9 @@ int xc_linux_save(int xc_handle,
   5.276  
   5.277      verbose_printf("Saving memory pages:   0%%");
   5.278  
   5.279 -    if ( (mapper_handle2 = mfn_mapper_init(xc_handle, 
   5.280 +#define BATCH_SIZE 1024   /* 1024 pages (4MB) at a time */
   5.281 +
   5.282 +    if ( (mapper_handle2 = mfn_mapper_init(xc_handle, domid,
   5.283  					   BATCH_SIZE*4096, PROT_READ )) 
   5.284  	 == NULL )
   5.285          goto out;
   5.286 @@ -383,12 +401,19 @@ int xc_linux_save(int xc_handle,
   5.287  	{
   5.288  	    /* queue up mappings for all of the pages in this batch */
   5.289  
   5.290 +//printf("region n=%d j=%d i=%d mfn=%d\n",n,j,i,live_pfn_to_mfn_table[i]);
   5.291  	    mfn_mapper_queue_entry( mapper_handle2, j<<PAGE_SHIFT, 
   5.292 -				    pfn_to_mfn_frame_list[i],
   5.293 +				    live_pfn_to_mfn_table[i],
   5.294  				    PAGE_SIZE );
   5.295  	}
   5.296 -	mfn_mapper_flush_queue(mapper_handle2);
   5.297 -	   
   5.298 +
   5.299 +	if( mfn_mapper_flush_queue(mapper_handle2) )
   5.300 +	{
   5.301 +	    ERROR("Couldn't map page region");
   5.302 +	    goto out;
   5.303 +	}
   5.304 +
   5.305 +#if 0	   
   5.306  	typer_handle = get_type_init( xc_handle, BATCH_SIZE )
   5.307  
   5.308  	for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
   5.309 @@ -399,42 +424,51 @@ int xc_linux_save(int xc_handle,
   5.310  				  pfn_to_mfn_frame_list[i] );
   5.311  	}
   5.312  
   5.313 +	region_type = get_type;
   5.314  
   5.315 -	region_type = get_type;
   5.316 +#endif
   5.317  
   5.318  	for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ )
   5.319  	{
   5.320  	    /* write out pages in batch */
   5.321  
   5.322 -	    mfn = pfn_to_mfn_table[i];
   5.323 -
   5.324 -	    ppage = map_pfn_readonly(pm_handle, mfn);
   5.325 -	    memcpy(page, ppage, PAGE_SIZE);
   5.326 -	    unmap_pfn(pm_handle, ppage);
   5.327 -
   5.328  	    if ( (pfn_type[i] == L1TAB) || (pfn_type[i] == L2TAB) )
   5.329  	    {
   5.330 -		for ( j = 0; 
   5.331 -		      j < ((pfn_type[i] == L2TAB) ? 
   5.332 +		
   5.333 +		memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
   5.334 +
   5.335 +		for ( k = 0; 
   5.336 +		      k < ((pfn_type[i] == L2TAB) ? 
   5.337  			   (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); 
   5.338 -		      j++ )
   5.339 +		      k++ )
   5.340  		{
   5.341 -		    if ( !(page[j] & _PAGE_PRESENT) ) continue;
   5.342 -		    mfn = page[j] >> PAGE_SHIFT;
   5.343 +		    if ( !(page[k] & _PAGE_PRESENT) ) continue;
   5.344 +		    mfn = page[k] >> PAGE_SHIFT;
   5.345  		    if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
   5.346  		    {
   5.347  			ERROR("Frame number in pagetable page is invalid");
   5.348  			goto out;
   5.349  		    }
   5.350 -		    page[j] &= PAGE_SIZE - 1;
   5.351 -		    page[j] |= mfn_to_pfn_table[mfn] << PAGE_SHIFT;
   5.352 +		    page[k] &= PAGE_SIZE - 1;
   5.353 + 		    page[k] |= live_mfn_to_pfn_table[mfn] << PAGE_SHIFT;
   5.354 +
   5.355  		}
   5.356 -	    }
   5.357  
   5.358 -	    if ( !checked_write(gfd, page, PAGE_SIZE) )
   5.359 +		if ( !checked_write(gfd, page, PAGE_SIZE) )
   5.360 +		{
   5.361 +		    ERROR("Error when writing to state file");
   5.362 +		    goto out;
   5.363 +		}
   5.364 +
   5.365 +
   5.366 +	    }
   5.367 +	    else
   5.368  	    {
   5.369 -		ERROR("Error when writing to state file");
   5.370 -		goto out;
   5.371 +		if ( !checked_write(gfd, region_base + (PAGE_SIZE*j), PAGE_SIZE) )
   5.372 +		{
   5.373 +		    ERROR("Error when writing to state file");
   5.374 +		    goto out;
   5.375 +		}
   5.376  	    }
   5.377  	}
   5.378  	
   5.379 @@ -450,6 +484,7 @@ out:
   5.380      /* Restart the domain if we had to stop it to save its state. */
   5.381      if ( we_stopped_it )
   5.382      {
   5.383 +	printf("Restart domain\n");
   5.384          op.cmd = DOM0_STARTDOMAIN;
   5.385          op.u.startdomain.domain = (domid_t)domid;
   5.386          (void)do_dom0_op(xc_handle, &op);
   5.387 @@ -457,13 +492,6 @@ out:
   5.388  
   5.389      gzclose(gfd);
   5.390  
   5.391 -    if ( pm_handle >= 0 )
   5.392 -        (void)close_pfn_mapper(pm_handle);
   5.393 -
   5.394 -    if ( pfn_to_mfn_table != NULL )
   5.395 -        free(pfn_to_mfn_table);
   5.396 -    if ( mfn_to_pfn_table != NULL )
   5.397 -        free(mfn_to_pfn_table);
   5.398      if ( pfn_type != NULL )
   5.399          free(pfn_type);
   5.400  
     6.1 --- a/tools/xc/lib/xc_private.c	Tue Apr 27 16:15:55 2004 +0000
     6.2 +++ b/tools/xc/lib/xc_private.c	Fri Apr 30 16:56:08 2004 +0000
     6.3 @@ -47,8 +47,9 @@ void unmap_pfn(int pm_handle, void *vadd
     6.4  
     6.5  /*******************/
     6.6  
     6.7 -void * mfn_mapper_map_single(int xc_handle, int prot, 
     6.8 -			     unsigned long mfn, int size)
     6.9 +void * mfn_mapper_map_single(int xc_handle, domid_t dom,
    6.10 +			     int size, int prot,
    6.11 +			     unsigned long mfn )
    6.12  {
    6.13      privcmd_mmap_t ioctlx; 
    6.14      privcmd_mmap_entry_t entry; 
    6.15 @@ -57,6 +58,7 @@ void * mfn_mapper_map_single(int xc_hand
    6.16      if (addr)
    6.17      {
    6.18  	ioctlx.num=1;
    6.19 +	ioctlx.dom=dom;
    6.20  	ioctlx.entry=&entry;
    6.21  	entry.va=(unsigned long) addr;
    6.22  	entry.mfn=mfn;
    6.23 @@ -67,7 +69,7 @@ void * mfn_mapper_map_single(int xc_hand
    6.24      return addr;
    6.25  }
    6.26  
    6.27 -mfn_mapper_t * mfn_mapper_init(int xc_handle, int size, int prot)
    6.28 +mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot)
    6.29  {
    6.30      mfn_mapper_t * t;
    6.31      t = calloc( 1, sizeof(mfn_mapper_t)+
    6.32 @@ -76,6 +78,7 @@ mfn_mapper_t * mfn_mapper_init(int xc_ha
    6.33      t->xc_handle = xc_handle;
    6.34      t->size = size;
    6.35      t->prot = prot;
    6.36 +    t->error = 0;
    6.37      t->max_queue_size = mfn_mapper_queue_size;
    6.38      t->addr = mmap( NULL, size, prot, MAP_SHARED, xc_handle, 0 );
    6.39      if (!t->addr)
    6.40 @@ -84,6 +87,7 @@ mfn_mapper_t * mfn_mapper_init(int xc_ha
    6.41  	return NULL;
    6.42      }
    6.43      t->ioctl.num = 0;
    6.44 +    t->ioctl.dom = dom;
    6.45      t->ioctl.entry = (privcmd_mmap_entry_t *) &t[1];
    6.46      return t;
    6.47  }
    6.48 @@ -99,14 +103,29 @@ void mfn_mapper_close(mfn_mapper_t *t)
    6.49      free(t);    
    6.50  }
    6.51  
    6.52 +static int __mfn_mapper_flush_queue(mfn_mapper_t *t)
    6.53 +{
    6.54 +    int rc;
    6.55 +    rc = ioctl( t->xc_handle, IOCTL_PRIVCMD_MMAP, &t->ioctl );
    6.56 +    t->ioctl.num = 0;    
    6.57 +    if(rc && !t->error) 
    6.58 +	t->error = rc;
    6.59 +    return rc;
    6.60 +}
    6.61 +
    6.62  int mfn_mapper_flush_queue(mfn_mapper_t *t)
    6.63  {
    6.64      int rc;
    6.65 +    
    6.66 +    rc = __mfn_mapper_flush_queue(t);
    6.67  
    6.68 -    rc = ioctl( t->xc_handle, IOCTL_PRIVCMD_MMAP, &t->ioctl );
    6.69 -    if (rc<0) return rc;
    6.70 -    t->ioctl.num = 0;
    6.71 -    return 0;
    6.72 +    if ( t->error )
    6.73 +    {
    6.74 +	rc = t->error;
    6.75 +    }
    6.76 +
    6.77 +    t->error = 0;
    6.78 +    return rc;
    6.79  }
    6.80  
    6.81  void * mfn_mapper_queue_entry(mfn_mapper_t *t, int offset, 
    6.82 @@ -123,30 +142,113 @@ void * mfn_mapper_queue_entry(mfn_mapper
    6.83      {
    6.84  	prev = &t->ioctl.entry[t->ioctl.num-1];       
    6.85  
    6.86 -	if ( (prev->va+(prev->npages*PAGE_SIZE)) == (t->addr+offset) &&
    6.87 +	if ( (prev->va+(prev->npages*PAGE_SIZE)) == 
    6.88 +	     ((unsigned long)t->addr+offset) &&
    6.89  	     (prev->mfn+prev->npages) == mfn )
    6.90  	{
    6.91  	    prev->npages += pages;
    6.92 -printf("merge\n");
    6.93  	    return t->addr+offset;
    6.94  	}
    6.95      }
    6.96       
    6.97 -    entry->va = t->addr+offset;
    6.98 +    entry->va = (unsigned long)t->addr+offset;
    6.99      entry->mfn = mfn;
   6.100      entry->npages = pages;
   6.101      t->ioctl.num++;       
   6.102  
   6.103      if(t->ioctl.num == t->max_queue_size)
   6.104      {
   6.105 -	if ( mfn_mapper_flush_queue(t) )
   6.106 -	return 0;
   6.107 +	if ( __mfn_mapper_flush_queue(t) )
   6.108 +	    return 0;
   6.109      }
   6.110  
   6.111      return t->addr+offset;
   6.112  }
   6.113  
   6.114  
   6.115 +/*******************/
   6.116 +
   6.117 +typedef struct dom0_op_compact_getpageframeinfo {
   6.118 +    unsigned long cmd;
   6.119 +    unsigned long interface_version; /* DOM0_INTERFACE_VERSION */
   6.120 +    dom0_getpageframeinfo_t getpageframeinfo;
   6.121 +}  dom0_op_compact_getpageframeinfo_t;
   6.122 +
   6.123 +
   6.124 +typedef struct mfn_typer {
   6.125 +    domid_t dom;
   6.126 +    int max;
   6.127 +    int nr_multicall_ents;
   6.128 +    multicall_entry_t *multicall_list;
   6.129 +    dom0_op_compact_getpageframeinfo_t *gpf_list;
   6.130 +} mfn_typer_t;
   6.131 +
   6.132 +
   6.133 +mfn_typer_t *mfn_typer_init(int xc_handle, domid_t dom, int num );
   6.134 +
   6.135 +void mfn_typer_queue_entry(mfn_typer_t *t, unsigned long mfn );
   6.136 +
   6.137 +int mfn_typer_flush_queue(mfn_typer_t *t);
   6.138 +
   6.139 +unsigned int mfn_typer_get_result(mfn_typer_t *t, int idx);
   6.140 +
   6.141 +mfn_typer_t *mfn_typer_init(int xc_handle, domid_t dom, int num )
   6.142 +{
   6.143 +    mfn_typer_t *t;
   6.144 +    multicall_entry_t *m;
   6.145 +    dom0_op_compact_getpageframeinfo_t *d;
   6.146 +
   6.147 +    t = calloc(1, sizeof(mfn_typer_t) );
   6.148 +    m = calloc(1, sizeof(multicall_entry_t)*num );
   6.149 +    d = calloc(1, sizeof(dom0_op_compact_getpageframeinfo_t)*num );
   6.150 +
   6.151 +    if (!t || !m || !d)
   6.152 +    {
   6.153 +	if(t) free(t);	
   6.154 +	if(m) free(m);
   6.155 +	if(d) free(d);
   6.156 +	return NULL;
   6.157 +    }
   6.158 +
   6.159 +    t->max = num;
   6.160 +    t->nr_multicall_ents=0;
   6.161 +    t->multicall_list=m;
   6.162 +    t->gpf_list=d;
   6.163 +    t->dom = dom;
   6.164 +
   6.165 +    return t;
   6.166 +}
   6.167 +
   6.168 +void mfn_typer_queue_entry(mfn_typer_t *t, unsigned long mfn )
   6.169 +{
   6.170 +    int i = t->nr_multicall_ents;
   6.171 +    multicall_entry_t *m = &t->multicall_list[i];
   6.172 +    dom0_op_compact_getpageframeinfo_t *d = &t->gpf_list[i];
   6.173 +
   6.174 +    d->cmd = DOM0_GETPAGEFRAMEINFO;
   6.175 +    d->interface_version = DOM0_INTERFACE_VERSION;
   6.176 +    d->getpageframeinfo.pfn = mfn;
   6.177 +    d->getpageframeinfo.domain = t->dom;
   6.178 +    d->getpageframeinfo.type = ~0UL;
   6.179 +      
   6.180 +    m->op = __HYPERVISOR_dom0_op;
   6.181 +    m->args[0] = (unsigned long)d;
   6.182 +   
   6.183 +    t->nr_multicall_ents++;
   6.184 +}
   6.185 +
   6.186 +int mfn_typer_flush_queue(mfn_typer_t *t)
   6.187 +{
   6.188 +    if (t->nr_multicall_ents == 0) return 0;
   6.189 +    (void)HYPERVISOR_multicall(t->multicall_list, t->nr_multicall_ents);
   6.190 +    t->nr_multicall_ents = 0;
   6.191 +}
   6.192 +
   6.193 +unsigned int mfn_typer_get_result(mfn_typer_t *t, int idx)
   6.194 +{
   6.195 +    return t->gpf_list[idx].getpageframeinfo.type;
   6.196 +}
   6.197 +
   6.198  
   6.199  
   6.200  /*******************/
     7.1 --- a/tools/xc/lib/xc_private.h	Tue Apr 27 16:15:55 2004 +0000
     7.2 +++ b/tools/xc/lib/xc_private.h	Fri Apr 30 16:56:08 2004 +0000
     7.3 @@ -16,8 +16,6 @@
     7.4  
     7.5  #include "xc.h"
     7.6  
     7.7 -#include <asm-xen/proc_cmd.h>
     7.8 -
     7.9  /* from xen/include/hypervisor-ifs */
    7.10  #include <hypervisor-if.h>
    7.11  #include <dom0_ops.h>
    7.12 @@ -25,6 +23,10 @@
    7.13  #include <event_channel.h>
    7.14  #include <sched_ctl.h>
    7.15  
    7.16 +#include <asm-xen/proc_cmd.h>
    7.17 +
    7.18 +
    7.19 +
    7.20  /* from xend/lib */
    7.21  #include <domain_controller.h>
    7.22  
    7.23 @@ -188,6 +190,7 @@ typedef struct privcmd_mmap_entry {
    7.24  
    7.25  typedef struct privcmd_mmap {
    7.26      int num;
    7.27 +    domid_t dom;
    7.28      privcmd_mmap_entry_t *entry;
    7.29  } privcmd_mmap_t; 
    7.30  */
    7.31 @@ -198,16 +201,17 @@ typedef struct mfn_mapper {
    7.32      int xc_handle;
    7.33      int size;
    7.34      int prot;
    7.35 +    int error;
    7.36      int max_queue_size;
    7.37      void * addr;
    7.38      privcmd_mmap_t ioctl; 
    7.39      
    7.40  } mfn_mapper_t;
    7.41  
    7.42 -void * mfn_mapper_map_single(int xc_handle, int prot, 
    7.43 -			     unsigned long mfn, int size);
    7.44 +void * mfn_mapper_map_single(int xc_handle, domid_t dom, int size, int prot, 
    7.45 +			     unsigned long mfn );
    7.46  
    7.47 -mfn_mapper_t * mfn_mapper_init(int xc_handle, int size, int prot);
    7.48 +mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot);
    7.49  
    7.50  void * mfn_mapper_base(mfn_mapper_t *t);
    7.51  
     8.1 --- a/xen/common/kernel.c	Tue Apr 27 16:15:55 2004 +0000
     8.2 +++ b/xen/common/kernel.c	Fri Apr 30 16:56:08 2004 +0000
     8.3 @@ -255,6 +255,9 @@ void cmain(unsigned long magic, multiboo
     8.4  
     8.5      start_of_day();
     8.6  
     8.7 +    /* Add CPU0 idle task to the task hash list */
     8.8 +    task_hash[TASK_HASH(IDLE_DOMAIN_ID)] = &idle0_task;
     8.9 +
    8.10      /* Create initial domain 0. */
    8.11      dom0_params.memory_kb = opt_dom0_mem;
    8.12      new_dom = do_createdomain(0, 0);
     9.1 --- a/xen/common/memory.c	Tue Apr 27 16:15:55 2004 +0000
     9.2 +++ b/xen/common/memory.c	Fri Apr 30 16:56:08 2004 +0000
     9.3 @@ -194,6 +194,9 @@ static struct {
     9.4   */
     9.5  void __init init_frametable(unsigned long nr_pages)
     9.6  {
     9.7 +    int i;
     9.8 +    unsigned long mfn;
     9.9 +
    9.10      memset(percpu_info, 0, sizeof(percpu_info));
    9.11  
    9.12      max_page = nr_pages;
    9.13 @@ -206,8 +209,20 @@ void __init init_frametable(unsigned lon
    9.14      INIT_LIST_HEAD(&free_list);    
    9.15      free_pfns = 0;
    9.16  
    9.17 +    /* so that we can map them latter, set the ownership of pages
    9.18 +       belonging to the machine_to_phys_mapping to CPU0 idle task */
    9.19 +    
    9.20 +    mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT;
    9.21 +//    for(i=0;i<nr_pages;i+=1024,mfn++)
    9.22 +    for(i=0;i<1024*1024;i+=1024,mfn++)
    9.23 +    {
    9.24 +	frame_table[mfn].count_and_flags = 1 | PGC_allocated;
    9.25 +	frame_table[mfn].type_and_flags = 1 | PGT_gdt_page; // anything non RW
    9.26 +	frame_table[mfn].u.domain = &idle0_task;
    9.27 +    }
    9.28  }
    9.29  
    9.30 +
    9.31  void add_to_domain_alloc_list(unsigned long ps, unsigned long pe)
    9.32  {
    9.33      struct pfn_info *pf;
    9.34 @@ -895,7 +910,8 @@ static int do_extended_command(unsigned 
    9.35          break;
    9.36  
    9.37      case MMUEXT_SET_SUBJECTDOM_H:
    9.38 -        percpu_info[cpu].subject_id |= (domid_t)((ptr&~0xFFFF)|(val>>16))<<32;
    9.39 +        percpu_info[cpu].subject_id |= ((domid_t)((ptr&~0xFFFF)|(val>>16)))<<32;
    9.40 +
    9.41          if ( !IS_PRIV(current) )
    9.42          {
    9.43              MEM_LOG("Dom %llu has no privilege to set subject domain",
    10.1 --- a/xen/include/xen/sched.h	Tue Apr 27 16:15:55 2004 +0000
    10.2 +++ b/xen/include/xen/sched.h	Fri Apr 30 16:56:08 2004 +0000
    10.3 @@ -209,7 +209,8 @@ struct task_struct
    10.4      mm:          IDLE0_MM,       \
    10.5      addr_limit:  KERNEL_DS,      \
    10.6      thread:      INIT_THREAD,    \
    10.7 -    flags:       1<<PF_IDLETASK  \
    10.8 +    flags:       1<<PF_IDLETASK, \
    10.9 +    refcnt:      ATOMIC_INIT(1)  \
   10.10  }
   10.11  
   10.12  extern struct task_struct idle0_task;
    11.1 --- a/xen/net/dev.c	Tue Apr 27 16:15:55 2004 +0000
    11.2 +++ b/xen/net/dev.c	Fri Apr 30 16:56:08 2004 +0000
    11.3 @@ -2156,8 +2156,9 @@ static void get_rx_bufs(net_vif_t *vif)
    11.4                                0) != 
    11.5                        (PGC_allocated | PGC_tlb_flush_on_type_change | 2)) )
    11.6          {
    11.7 -            DPRINTK("Page held more than once %08x\n", 
    11.8 -                    buf_page->count_and_flags);
    11.9 +            DPRINTK("Page held more than once %08x %s\n", 
   11.10 +                    buf_page->count_and_flags,
   11.11 +		    (buf_page->u.domain)?buf_page->u.domain->name:"None");
   11.12              if ( !get_page_type(buf_page, PGT_writeable_page) )
   11.13                  put_page(buf_page);
   11.14              else if ( cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) !=
    12.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Tue Apr 27 16:15:55 2004 +0000
    12.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Fri Apr 30 16:56:08 2004 +0000
    12.3 @@ -67,7 +67,7 @@ static int privcmd_ioctl(struct inode *i
    12.4  #define PRIVCMD_MMAP_SZ 32
    12.5  	privcmd_mmap_t mmapcmd;
    12.6  	privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ], *p;
    12.7 -	int i;
    12.8 +	int i, rc;
    12.9  
   12.10          if ( copy_from_user(&mmapcmd, (void *)data, sizeof(mmapcmd)) )
   12.11              return -EFAULT;
   12.12 @@ -95,12 +95,13 @@ static int privcmd_ioctl(struct inode *i
   12.13  		if (msg[j].va + (msg[j].npages<<PAGE_SHIFT) > vma->vm_end)
   12.14  		    return -EINVAL;
   12.15  
   12.16 -		if (direct_remap_area_pages(vma->vm_mm, 
   12.17 +		if (rc = direct_remap_area_pages(vma->vm_mm, 
   12.18  					    msg[j].va&PAGE_MASK, 
   12.19  					    msg[j].mfn<<PAGE_SHIFT, 
   12.20  					    msg[j].npages<<PAGE_SHIFT, 
   12.21 -					    vma->vm_page_prot))
   12.22 -		    return -EINVAL;
   12.23 +					    vma->vm_page_prot,
   12.24 +					    mmapcmd.dom))
   12.25 +		    return rc;
   12.26  	    }
   12.27  	}
   12.28  	ret = 0;
    13.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Tue Apr 27 16:15:55 2004 +0000
    13.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Fri Apr 30 16:56:08 2004 +0000
    13.3 @@ -27,7 +27,7 @@
    13.4  #define direct_mk_pte_phys(physpage, pgprot) \
    13.5    __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
    13.6  
    13.7 -static inline void direct_remap_area_pte(pte_t *pte, 
    13.8 +static inline int direct_remap_area_pte(pte_t *pte, 
    13.9                                           unsigned long address, 
   13.10                                           unsigned long size,
   13.11                                           unsigned long machine_addr, 
   13.12 @@ -39,6 +39,9 @@ static inline void direct_remap_area_pte
   13.13      mmu_update_t *u, *v;
   13.14      u = v = vmalloc(3*PAGE_SIZE); /* plenty */
   13.15  
   13.16 +    if (!u) 
   13.17 +	return -ENOMEM;
   13.18 +
   13.19      /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */
   13.20      if ( domid != 0 )
   13.21      {
   13.22 @@ -74,10 +77,15 @@ static inline void direct_remap_area_pte
   13.23          pte++;
   13.24      } while (address && (address < end));
   13.25  
   13.26 -    if ( ((v-u) != 0) && (HYPERVISOR_mmu_update(u, v-u) < 0) )
   13.27 +    if ( ((v-u) > 2) && (HYPERVISOR_mmu_update(u, v-u) < 0) )
   13.28 +    {
   13.29          printk(KERN_WARNING "Failed to ioremap %08lx->%08lx (%08lx)\n",
   13.30                 end-size, end, machine_addr-size);
   13.31 +	return -EINVAL;
   13.32 +    }
   13.33 +
   13.34      vfree(u);
   13.35 +    return 0;
   13.36  }
   13.37  
   13.38  static inline int direct_remap_area_pmd(struct mm_struct *mm,
   13.39 @@ -89,6 +97,7 @@ static inline int direct_remap_area_pmd(
   13.40                                          domid_t  domid)
   13.41  {
   13.42      unsigned long end;
   13.43 +    int rc;
   13.44  
   13.45      address &= ~PGDIR_MASK;
   13.46      end = address + size;
   13.47 @@ -101,8 +110,11 @@ static inline int direct_remap_area_pmd(
   13.48          pte_t * pte = pte_alloc(mm, pmd, address);
   13.49          if (!pte)
   13.50              return -ENOMEM;
   13.51 -        direct_remap_area_pte(pte, address, end - address, 
   13.52 -                              address + machine_addr, prot, domid);
   13.53 +
   13.54 +        if ( rc = direct_remap_area_pte(pte, address, end - address, 
   13.55 +                              address + machine_addr, prot, domid) )
   13.56 +	    return rc;
   13.57 +
   13.58          address = (address + PMD_SIZE) & PMD_MASK;
   13.59          pmd++;
   13.60      } while (address && (address < end));
   13.61 @@ -120,8 +132,8 @@ int direct_remap_area_pages(struct mm_st
   13.62      pgd_t * dir;
   13.63      unsigned long end = address + size;
   13.64  
   13.65 -printk("direct_remap_area_pages va=%08lx ma=%08lx size=%d\n",
   13.66 -       address, machine_addr, size);
   13.67 +/*printk("direct_remap_area_pages va=%08lx ma=%08lx size=%d\n",
   13.68 +       address, machine_addr, size);*/
   13.69  
   13.70      machine_addr -= address;
   13.71      dir = pgd_offset(mm, address);
    14.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h	Tue Apr 27 16:15:55 2004 +0000
    14.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h	Fri Apr 30 16:56:08 2004 +0000
    14.3 @@ -21,6 +21,7 @@ typedef struct privcmd_mmap_entry {
    14.4  
    14.5  typedef struct privcmd_mmap {
    14.6      int num;
    14.7 +    domid_t dom; /* target domain */
    14.8      privcmd_mmap_entry_t *entry;
    14.9  } privcmd_mmap_t; 
   14.10