ia64/xen-unstable

changeset 2116:6c0df1212e95

bitkeeper revision 1.1159.1.7 (4113d1c6Eo4646ueDNflRbNI2S3wLg)

Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/tetris/iap10/xeno-clone/xeno.bk
author iap10@labyrinth.cl.cam.ac.uk
date Fri Aug 06 18:45:26 2004 +0000 (2004-08-06)
parents 1b4dbe8d9172 b9edbe5d4952
children bab0a551830b
files .rootkeys linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c linux-2.4.26-xen-sparse/arch/xen/mm/init.c tools/misc/setdomainmaxmem
line diff
     1.1 --- a/.rootkeys	Fri Aug 06 18:15:02 2004 +0000
     1.2 +++ b/.rootkeys	Fri Aug 06 18:45:26 2004 +0000
     1.3 @@ -298,6 +298,7 @@ 40c9c469kT0H9COWzA4XzPBjWK0WsA tools/mis
     1.4  4022a73cEKvrYe_DVZW2JlAxobg9wg tools/misc/nsplitd/Makefile
     1.5  4022a73cKms4Oq030x2JBzUB426lAQ tools/misc/nsplitd/nsplitd.c
     1.6  3f870808_8aFBAcZbWiWGdgrGQyIEw tools/misc/p4perf.h
     1.7 +4113d1afyPjO8m8-9E1pVBDHzGe1jQ tools/misc/setdomainmaxmem
     1.8  3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/misc/xen-clone
     1.9  3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README
    1.10  3f870808zS6T6iFhqYPGelroZlVfGQ tools/misc/xen_cpuperf.c
     2.1 --- a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c	Fri Aug 06 18:15:02 2004 +0000
     2.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c	Fri Aug 06 18:45:26 2004 +0000
     2.3 @@ -17,6 +17,8 @@
     2.4  #include <linux/mman.h>
     2.5  #include <linux/smp_lock.h>
     2.6  #include <linux/pagemap.h>
     2.7 +#include <linux/bootmem.h>
     2.8 +#include <linux/highmem.h>
     2.9  #include <linux/vmalloc.h>
    2.10  
    2.11  #include <asm/hypervisor.h>
    2.12 @@ -39,7 +41,7 @@ typedef struct user_balloon_op {
    2.13  
    2.14  static struct proc_dir_entry *balloon_pde;
    2.15  unsigned long credit;
    2.16 -static unsigned long current_pages, max_pages;
    2.17 +static unsigned long current_pages, most_seen_pages;
    2.18  
    2.19  static inline pte_t *get_ptep(unsigned long addr)
    2.20  {
    2.21 @@ -69,41 +71,43 @@ static unsigned long inflate_balloon(uns
    2.22      parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
    2.23      if ( parray == NULL )
    2.24      {
    2.25 -        printk("inflate_balloon: Unable to vmalloc parray\n");
    2.26 -        return 0;
    2.27 +        printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
    2.28 +        return -EFAULT;
    2.29      }
    2.30  
    2.31      currp = parray;
    2.32  
    2.33 -    for ( i = 0; i < num_pages; i++ )
    2.34 +    for ( i = 0; i < num_pages; i++, currp++ )
    2.35      {
    2.36 -        /* NB. Should be GFP_ATOMIC for a less aggressive inflation. */
    2.37 -        vaddr = __get_free_page(GFP_KERNEL);
    2.38 +	struct page *page = alloc_page(GFP_HIGHUSER);
    2.39 +	unsigned long pfn =  page - mem_map;
    2.40  
    2.41          /* If allocation fails then free all reserved pages. */
    2.42 -        if ( vaddr == 0 )
    2.43 +        if ( page == 0 )
    2.44          {
    2.45 -            printk("Unable to inflate balloon by %ld, only %ld pages free.",
    2.46 +            printk(KERN_ERR "Unable to inflate balloon by %ld, only %ld pages free.",
    2.47                     num_pages, i);
    2.48              currp = parray;
    2.49 -            for(j = 0; j < i; j++){
    2.50 -                free_page(*currp++);
    2.51 +            for(j = 0; j < i; j++, ++currp){
    2.52 +                __free_page((struct page *) (mem_map + *currp));
    2.53              }
    2.54 +	    ret = -EFAULT;
    2.55              goto cleanup;
    2.56          }
    2.57  
    2.58 -        *currp++ = vaddr;
    2.59 +        *currp = pfn;
    2.60      }
    2.61  
    2.62  
    2.63 -    currp = parray;
    2.64 -    for ( i = 0; i < num_pages; i++ )
    2.65 +    for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
    2.66      {
    2.67 -        curraddr = *currp;
    2.68 -        *currp = virt_to_machine(*currp) >> PAGE_SHIFT;
    2.69 -        queue_l1_entry_update(get_ptep(curraddr), 0);
    2.70 -        phys_to_machine_mapping[__pa(curraddr) >> PAGE_SHIFT] = DEAD;
    2.71 -        currp++;
    2.72 +	unsigned long mfn = phys_to_machine_mapping[*currp];
    2.73 +        curraddr = page_address(mem_map + *currp);
    2.74 +	if (curraddr)
    2.75 +            queue_l1_entry_update(get_ptep(curraddr), 0);
    2.76 +
    2.77 +        phys_to_machine_mapping[*currp] = DEAD;
    2.78 +        *currp = mfn;
    2.79      }
    2.80  
    2.81      XEN_flush_page_update_queue();
    2.82 @@ -112,7 +116,7 @@ static unsigned long inflate_balloon(uns
    2.83                                  parray, num_pages, 0);
    2.84      if ( unlikely(ret != num_pages) )
    2.85      {
    2.86 -        printk("Unable to inflate balloon, error %lx\n", ret);
    2.87 +        printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret);
    2.88          goto cleanup;
    2.89      }
    2.90  
    2.91 @@ -130,7 +134,7 @@ static unsigned long inflate_balloon(uns
    2.92   * phys->machine mapping table looking for DEAD entries and populates
    2.93   * them.
    2.94   */
    2.95 -static unsigned long process_new_pages(unsigned long * parray, 
    2.96 +static unsigned long process_returned_pages(unsigned long * parray, 
    2.97                                         unsigned long num)
    2.98  {
    2.99      /* currently, this function is rather simplistic as 
   2.100 @@ -140,7 +144,7 @@ static unsigned long process_new_pages(u
   2.101       * incorporated here.
   2.102       */
   2.103       
   2.104 -    unsigned long tot_pages = start_info.nr_pages;   
   2.105 +    unsigned long tot_pages = most_seen_pages;   
   2.106      unsigned long * curr = parray;
   2.107      unsigned long num_installed;
   2.108      unsigned long i;
   2.109 @@ -152,29 +156,18 @@ static unsigned long process_new_pages(u
   2.110          {
   2.111              phys_to_machine_mapping[i] = *curr;
   2.112              queue_machphys_update(*curr, i);
   2.113 -            queue_l1_entry_update(
   2.114 +	    if (i<max_low_pfn)
   2.115 +              queue_l1_entry_update(
   2.116                  get_ptep((unsigned long)__va(i << PAGE_SHIFT)),
   2.117                  ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
   2.118  
   2.119 -            *curr = (unsigned long)__va(i << PAGE_SHIFT);
   2.120 +            __free_page(mem_map + i);
   2.121 +
   2.122              curr++;
   2.123              num_installed++;
   2.124          }
   2.125      }
   2.126  
   2.127 -    /*
   2.128 -     * This is tricky (and will also change for machine addrs that 
   2.129 -     * are mapped to not previously released addresses). We free pages
   2.130 -     * that were allocated by get_free_page (the mappings are different 
   2.131 -     * now, of course).
   2.132 -     */
   2.133 -    curr = parray;
   2.134 -    for ( i = 0; i < num_installed; i++ )
   2.135 -    {
   2.136 -        free_page(*curr);
   2.137 -        curr++;
   2.138 -    }
   2.139 -
   2.140      return num_installed;
   2.141  }
   2.142  
   2.143 @@ -185,14 +178,15 @@ unsigned long deflate_balloon(unsigned l
   2.144  
   2.145      if ( num_pages > credit )
   2.146      {
   2.147 -        printk("Can not allocate more pages than previously released.\n");
   2.148 +        printk(KERN_ERR "deflate_balloon: %d pages > %d credit.\n",
   2.149 +			num_pages, credit);
   2.150          return -EAGAIN;
   2.151      }
   2.152  
   2.153      parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
   2.154      if ( parray == NULL )
   2.155      {
   2.156 -        printk("inflate_balloon: Unable to vmalloc parray\n");
   2.157 +        printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n");
   2.158          return 0;
   2.159      }
   2.160  
   2.161 @@ -202,14 +196,16 @@ unsigned long deflate_balloon(unsigned l
   2.162                                  parray, num_pages, 0);
   2.163      if ( unlikely(ret != num_pages) )
   2.164      {
   2.165 -        printk("Unable to deflate balloon, error %lx\n", ret);
   2.166 +        printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
   2.167 +			ret);
   2.168          goto cleanup;
   2.169      }
   2.170  
   2.171 -    if ( (ret = process_new_pages(parray, num_pages)) < num_pages )
   2.172 +    if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
   2.173      {
   2.174 -        printk("Unable to deflate balloon by specified %lx pages, only %lx.\n",
   2.175 -               num_pages, ret);
   2.176 +        printk(KERN_WARNING
   2.177 +	   "deflate_balloon: restored only %lx of %lx pages.\n",
   2.178 +           ret, num_pages);
   2.179          goto cleanup;
   2.180      }
   2.181  
   2.182 @@ -224,20 +220,170 @@ unsigned long deflate_balloon(unsigned l
   2.183  
   2.184  #define PAGE_TO_MB_SHIFT 8
   2.185  
   2.186 +/*
   2.187 + * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c 
   2.188 + * The loops do go through all of low memory (ZONE_NORMAL).  The
   2.189 + * old pages have _PAGE_PRESENT set and so get skipped.
   2.190 + * If low memory is not full, the new pages are used to fill it, going
   2.191 + * from cur_low_pfn to low_pfn.   high memory is not direct mapped so
   2.192 + * no extension is needed for new high memory.
   2.193 + */
   2.194 +
   2.195 +static void pagetable_extend (int cur_low_pfn, int newpages)
   2.196 +{
   2.197 +    unsigned long vaddr, end;
   2.198 +    pgd_t *kpgd, *pgd, *pgd_base;
   2.199 +    int i, j, k;
   2.200 +    pmd_t *kpmd, *pmd;
   2.201 +    pte_t *kpte, *pte, *pte_base;
   2.202 +    int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
   2.203 +
   2.204 +    /*
   2.205 +     * This can be zero as well - no problem, in that case we exit
   2.206 +     * the loops anyway due to the PTRS_PER_* conditions.
   2.207 +     */
   2.208 +    end = (unsigned long)__va(low_pfn*PAGE_SIZE);
   2.209 +
   2.210 +    pgd_base = init_mm.pgd;
   2.211 +    i = __pgd_offset(PAGE_OFFSET);
   2.212 +    pgd = pgd_base + i;
   2.213 +
   2.214 +    for (; i < PTRS_PER_PGD; pgd++, i++) {
   2.215 +        vaddr = i*PGDIR_SIZE;
   2.216 +        if (end && (vaddr >= end))
   2.217 +            break;
   2.218 +        pmd = (pmd_t *)pgd;
   2.219 +        for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
   2.220 +            vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
   2.221 +            if (end && (vaddr >= end))
   2.222 +                break;
   2.223 +
   2.224 +            /* Filled in for us already? */
   2.225 +            if ( pmd_val(*pmd) & _PAGE_PRESENT )
   2.226 +                continue;
   2.227 +
   2.228 +            pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
   2.229 +
   2.230 +            for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
   2.231 +                vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
   2.232 +                if (end && (vaddr >= end))
   2.233 +                    break;
   2.234 +                *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
   2.235 +            }
   2.236 +            kpgd = pgd_offset_k((unsigned long)pte_base);
   2.237 +            kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
   2.238 +            kpte = pte_offset(kpmd, (unsigned long)pte_base);
   2.239 +            queue_l1_entry_update(kpte,
   2.240 +                                  (*(unsigned long *)kpte)&~_PAGE_RW);
   2.241 +            set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
   2.242 +            XEN_flush_page_update_queue();
   2.243 +        }
   2.244 +    }
   2.245 +}
   2.246 +
   2.247 +/*
   2.248 + * claim_new_pages() asks xen to increase this domain's memory  reservation
   2.249 + * and return a list of the new pages of memory.  This new pages are
   2.250 + * added to the free list of the memory manager.
   2.251 + *
   2.252 + * Available RAM does not normally change while Linux runs.  To make this work,
   2.253 + * the linux mem= boottime command line param must say how big memory could
   2.254 + * possibly grow.  Then setup_arch() in arch/xen/kernel/setup.c
   2.255 + * sets max_pfn, max_low_pfn and the zones according to
   2.256 + * this max memory size.   The page tables themselves can only be
   2.257 + * extended after xen has assigned new pages to this domain.
   2.258 + */
   2.259 +
   2.260 +static unsigned long
   2.261 +claim_new_pages(unsigned long num_pages)
   2.262 +{
   2.263 +    unsigned long new_page_cnt, pfn;
   2.264 +    unsigned long * parray, *curr;
   2.265 +
   2.266 +    if (most_seen_pages+num_pages> max_pfn)
   2.267 +        num_pages = max_pfn-most_seen_pages;
   2.268 +    if (num_pages==0) return 0;
   2.269 +
   2.270 +    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
   2.271 +    if ( parray == NULL )
   2.272 +    {
   2.273 +        printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
   2.274 +        return 0;
   2.275 +    }
   2.276 +
   2.277 +    XEN_flush_page_update_queue();
   2.278 +    new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
   2.279 +                                parray, num_pages, 0);
   2.280 +    if (new_page_cnt != num_pages)
   2.281 +    {
   2.282 +        printk(KERN_WARNING
   2.283 +            "claim_new_pages: xen granted only %lu of %lu requested pages\n",
   2.284 +            new_page_cnt, num_pages);
   2.285 +
   2.286 +	/* XXX
   2.287 +	 * avoid xen lockup when user forgot to setdomainmaxmem.  xen
   2.288 +	 * usually can dribble out a few pages and then hangs
   2.289 +	 */
   2.290 +	if (new_page_cnt < 1000) {
   2.291 +            printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
   2.292 +	    HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
   2.293 +                                parray, new_page_cnt, 0);
   2.294 +            return -EFAULT;
   2.295 +	}
   2.296 +    }
   2.297 +    memcpy(phys_to_machine_mapping+most_seen_pages, parray,
   2.298 +            new_page_cnt * sizeof(unsigned long));
   2.299 +
   2.300 +    pagetable_extend(most_seen_pages,new_page_cnt);
   2.301 +
   2.302 +    for (pfn = most_seen_pages, curr = parray;
   2.303 +	    pfn < most_seen_pages+new_page_cnt;
   2.304 +            pfn++, curr++ )
   2.305 +    {
   2.306 +        struct page *page = mem_map + pfn;
   2.307 +
   2.308 +#ifndef CONFIG_HIGHMEM
   2.309 +	if (pfn>=max_low_pfn) {
   2.310 +            printk(KERN_WARNING "Warning only %ldMB will be used.\n",
   2.311 +               pfn>>PAGE_TO_MB_SHIFT);
   2.312 +            printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
   2.313 +	    break;
   2.314 +	}
   2.315 +#endif
   2.316 +	queue_machphys_update(*curr, pfn);
   2.317 +	XEN_flush_page_update_queue();
   2.318 +	if (pfn<max_low_pfn)  {
   2.319 +		queue_l1_entry_update(get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
   2.320 +			((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
   2.321 +		XEN_flush_page_update_queue();
   2.322 +		}
   2.323 +
   2.324 +        /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
   2.325 +        ClearPageReserved(page);
   2.326 +        if (pfn>=max_low_pfn) set_bit(PG_highmem, &page->flags);
   2.327 +        set_page_count(page, 1);
   2.328 +        __free_page(page);
   2.329 +    }
   2.330 +
   2.331 +    vfree(parray);
   2.332 +
   2.333 +    return new_page_cnt;
   2.334 +}
   2.335 +
   2.336  static int balloon_write(struct file *file, const char *buffer,
   2.337                           u_long count, void *data)
   2.338  {
   2.339      char memstring[64], *endchar;
   2.340      int len, i;
   2.341 -    unsigned long pages;
   2.342 -    unsigned long long target;
   2.343 +    unsigned long target;
   2.344 +    unsigned long long targetbytes;
   2.345  
   2.346      /* Only admin can play with the balloon :) */
   2.347      if ( !capable(CAP_SYS_ADMIN) )
   2.348          return -EPERM;
   2.349  
   2.350      if (count>sizeof memstring) {
   2.351 -	    return -EFBIG;
   2.352 +        return -EFBIG;
   2.353      }
   2.354  
   2.355      len = strnlen_user(buffer, count);
   2.356 @@ -248,53 +394,66 @@ static int balloon_write(struct file *fi
   2.357  
   2.358      endchar = memstring;
   2.359      for(i=0; i<len; ++i,++endchar) {
   2.360 -	    if ('0'>memstring[i] || memstring[i]>'9') break;
   2.361 +        if ('0'>memstring[i] || memstring[i]>'9') break;
   2.362      }
   2.363      if (i==0) return -EBADMSG;
   2.364  
   2.365 -    target = memparse(memstring,&endchar);
   2.366 -    pages = target >> PAGE_SHIFT;
   2.367 +    targetbytes = memparse(memstring,&endchar);
   2.368 +    target = targetbytes >> PAGE_SHIFT;
   2.369 +
   2.370 +    if (target < current_pages) {
   2.371 +        int change = inflate_balloon(current_pages-target);
   2.372 +        if (change<=0) return change;
   2.373  
   2.374 -    if (pages < current_pages) {
   2.375 -	    int change = inflate_balloon(current_pages-pages);
   2.376 -	    if (change<0) return change;
   2.377 +        current_pages -= change;
   2.378 +        printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
   2.379 +            change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   2.380 +    }
   2.381 +    else if (target > current_pages) {
   2.382 +        int change, reclaim = min(target,most_seen_pages) - current_pages;
   2.383  
   2.384 -	    current_pages -= change;
   2.385 -    	    printk("Relinquish %dMB to xen. Domain now has %ldMB\n",
   2.386 -		    change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   2.387 +        if (reclaim) {
   2.388 +            change = deflate_balloon( reclaim);
   2.389 +            if (change<=0) return change;
   2.390 +            current_pages += change;
   2.391 +            printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
   2.392 +                change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   2.393 +        }
   2.394 +
   2.395 +        if (most_seen_pages<target) {
   2.396 +            int growth = claim_new_pages(target-most_seen_pages);
   2.397 +	    if (growth<=0) return growth;
   2.398 +            most_seen_pages += growth;
   2.399 +            current_pages += growth;
   2.400 +            printk(KERN_INFO "Granted %dMB new mem by xen. Domain now has %luMB\n",
   2.401 +                growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   2.402 +        }
   2.403      }
   2.404 -    else if (pages > current_pages) {
   2.405 -	    int change = deflate_balloon(min(pages,max_pages) - current_pages);
   2.406 -	    if (change<0) return change;
   2.407  
   2.408 -	    current_pages += change;
   2.409 -    	    printk("Reclaim %dMB from xen. Domain now has %ldMB\n",
   2.410 -		    change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   2.411 -    }
   2.412  
   2.413      return len;
   2.414  }
   2.415  
   2.416  
   2.417  static int balloon_read(char *page, char **start, off_t off,
   2.418 -	  int count, int *eof, void *data)
   2.419 +      int count, int *eof, void *data)
   2.420  {
   2.421 -	int len;
   2.422 -	len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
   2.423 +    int len;
   2.424 +    len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
   2.425  
   2.426 -	if (len <= off+count) *eof = 1;
   2.427 -	*start = page + off;
   2.428 -	len -= off;
   2.429 -	if (len>count) len = count;
   2.430 -	if (len<0) len = 0;
   2.431 -	return len;
   2.432 +    if (len <= off+count) *eof = 1;
   2.433 +    *start = page + off;
   2.434 +    len -= off;
   2.435 +    if (len>count) len = count;
   2.436 +    if (len<0) len = 0;
   2.437 +    return len;
   2.438  }
   2.439  
   2.440  static int __init init_module(void)
   2.441  {
   2.442      printk(KERN_ALERT "Starting Xen Balloon driver\n");
   2.443  
   2.444 -    max_pages = current_pages = start_info.nr_pages;
   2.445 +    most_seen_pages = current_pages = min(start_info.nr_pages,max_pfn);
   2.446      if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
   2.447      {
   2.448          printk(KERN_ALERT "Unable to create balloon driver proc entry!");
   2.449 @@ -304,6 +463,17 @@ static int __init init_module(void)
   2.450      balloon_pde->write_proc = balloon_write;
   2.451      balloon_pde->read_proc = balloon_read;
   2.452  
   2.453 +    /* 
   2.454 +     * make a new phys map if mem= says xen can give us memory  to grow
   2.455 +     */
   2.456 +    if (max_pfn > start_info.nr_pages) {
   2.457 +        extern unsigned long *phys_to_machine_mapping;
   2.458 +        unsigned long *newmap;
   2.459 +        newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
   2.460 +        phys_to_machine_mapping = memcpy(newmap, phys_to_machine_mapping,
   2.461 +            start_info.nr_pages * sizeof(unsigned long));
   2.462 +    }
   2.463 +
   2.464      return 0;
   2.465  }
   2.466  
     3.1 --- a/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c	Fri Aug 06 18:15:02 2004 +0000
     3.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c	Fri Aug 06 18:45:26 2004 +0000
     3.3 @@ -120,10 +120,15 @@ union start_info_union start_info_union;
     3.4  static char command_line[COMMAND_LINE_SIZE];
     3.5  char saved_command_line[COMMAND_LINE_SIZE];
     3.6  
     3.7 -static void __init parse_mem_cmdline (char ** cmdline_p)
     3.8 +/* parse_mem_cmdline()
     3.9 + * returns the value of the mem= boot param converted to pages or 0
    3.10 + */ 
    3.11 +static int __init parse_mem_cmdline (char ** cmdline_p)
    3.12  {
    3.13      char c = ' ', *to = command_line, *from = saved_command_line;
    3.14      int len = 0;
    3.15 +    unsigned long long bytes;
    3.16 +    int mem_param = 0;
    3.17  
    3.18      /* Save unparsed command line copy for /proc/cmdline */
    3.19      memcpy(saved_command_line, start_info.cmd_line, COMMAND_LINE_SIZE);
    3.20 @@ -145,8 +150,9 @@ static void __init parse_mem_cmdline (ch
    3.21              } else if (!memcmp(from+4, "exactmap", 8)) {
    3.22                  from += 8+4;
    3.23              } else {
    3.24 -                (void)memparse(from+4, &from);
    3.25 -                if (*from == '@')
    3.26 +                bytes = memparse(from+4, &from);
    3.27 +                mem_param = bytes>>PAGE_SHIFT;
    3.28 +		if (*from == '@')
    3.29                      (void)memparse(from+1, &from);
    3.30              }
    3.31          }
    3.32 @@ -160,6 +166,8 @@ static void __init parse_mem_cmdline (ch
    3.33      }
    3.34      *to = '\0';
    3.35      *cmdline_p = command_line;
    3.36 +
    3.37 +    return mem_param;
    3.38  }
    3.39  
    3.40  /*
    3.41 @@ -194,7 +202,9 @@ int xen_module_init(struct module *mod)
    3.42  
    3.43  void __init setup_arch(char **cmdline_p)
    3.44  {
    3.45 -    unsigned long bootmap_size, start_pfn, max_low_pfn;
    3.46 +    unsigned long bootmap_size, start_pfn, lmax_low_pfn;
    3.47 +    int mem_param;  /* user specified memory size in pages */
    3.48 +    int boot_pfn;   /* low pages available for bootmem */
    3.49  
    3.50      extern void hypervisor_callback(void);
    3.51      extern void failsafe_callback(void);
    3.52 @@ -252,7 +262,16 @@ void __init setup_arch(char **cmdline_p)
    3.53      init_mm.end_data = (unsigned long) &_edata;
    3.54      init_mm.brk = (unsigned long) &_end;
    3.55  
    3.56 -    parse_mem_cmdline(cmdline_p);
    3.57 +    /* The mem= kernel command line param overrides the detected amount
    3.58 +     * of memory.   For xenolinux, if this override is larger than detected
    3.59 +     * memory, then boot using only detected memory and make provisions to
    3.60 +     * use all of the override value.   The hypervisor can give this
    3.61 +     * domain more memory later on and it will be added to the free
    3.62 +     * lists at that time.   See claim_new_pages() in
    3.63 +     * arch/xen/drivers/balloon/balloon.c
    3.64 +     */
    3.65 +    mem_param = parse_mem_cmdline(cmdline_p);
    3.66 +    if (!mem_param) mem_param = start_info.nr_pages;
    3.67  
    3.68  #define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
    3.69  #define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
    3.70 @@ -269,9 +288,9 @@ void __init setup_arch(char **cmdline_p)
    3.71      /*
    3.72       * Determine low and high memory ranges:
    3.73       */
    3.74 -    max_low_pfn = max_pfn = start_info.nr_pages;
    3.75 -    if (max_low_pfn > MAXMEM_PFN) {
    3.76 -        max_low_pfn = MAXMEM_PFN;
    3.77 +    lmax_low_pfn = max_pfn = mem_param;
    3.78 +    if (lmax_low_pfn > MAXMEM_PFN) {
    3.79 +        lmax_low_pfn = MAXMEM_PFN;
    3.80  #ifndef CONFIG_HIGHMEM
    3.81          /* Maximum memory usable is what is directly addressable */
    3.82          printk(KERN_WARNING "Warning only %ldMB will be used.\n",
    3.83 @@ -314,12 +333,20 @@ void __init setup_arch(char **cmdline_p)
    3.84       * bootstrap page table. We are guaranteed to get >=512kB unused 'padding'
    3.85       * for our own use after all bootstrap elements (see hypervisor-if.h).
    3.86       */
    3.87 -    bootmap_size = init_bootmem(start_pfn, max_low_pfn);
    3.88 -    free_bootmem(0, PFN_PHYS(max_low_pfn));
    3.89 +    boot_pfn = min((int)start_info.nr_pages,lmax_low_pfn);
    3.90 +    bootmap_size = init_bootmem(start_pfn,boot_pfn);
    3.91 +    free_bootmem(0, PFN_PHYS(boot_pfn));
    3.92      reserve_bootmem(__pa(&_stext), 
    3.93                      PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1 - 
    3.94                      __pa(&_stext));
    3.95  
    3.96 +    /* init_bootmem() set the global max_low_pfn to boot_pfn.  Now max_low_pfn 
    3.97 +     * can be set to the override value.
    3.98 +     */
    3.99 +    max_low_pfn = lmax_low_pfn;
   3.100 +
   3.101 +
   3.102 +
   3.103  #ifdef CONFIG_BLK_DEV_INITRD
   3.104      if ( start_info.mod_start != 0 )
   3.105      {
     4.1 --- a/linux-2.4.26-xen-sparse/arch/xen/mm/init.c	Fri Aug 06 18:15:02 2004 +0000
     4.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/mm/init.c	Fri Aug 06 18:45:26 2004 +0000
     4.3 @@ -219,11 +219,17 @@ static void __init pagetable_init (void)
     4.4      pmd_t *kpmd, *pmd;
     4.5      pte_t *kpte, *pte, *pte_base;
     4.6  
     4.7 +    /* create tables only for boot_pfn frames.  max_low_pfn may be sized for
     4.8 +     * pages yet to be allocated from the hypervisor, or it may be set
     4.9 +     * to override the start_info amount of memory
    4.10 +     */
    4.11 +    int boot_pfn = min(start_info.nr_pages,max_low_pfn);
    4.12 +
    4.13      /*
    4.14       * This can be zero as well - no problem, in that case we exit
    4.15       * the loops anyway due to the PTRS_PER_* conditions.
    4.16       */
    4.17 -    end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
    4.18 +    end = (unsigned long)__va(boot_pfn *PAGE_SIZE);
    4.19  
    4.20      pgd_base = init_mm.pgd;
    4.21      i = __pgd_offset(PAGE_OFFSET);
    4.22 @@ -308,7 +314,6 @@ void __init paging_init(void)
    4.23      pagetable_init();
    4.24  
    4.25      zone_sizes_init();
    4.26 -
    4.27      /* Switch to the real shared_info page, and clear the dummy page. */
    4.28      set_fixmap(FIX_SHARED_INFO, start_info.shared_info);
    4.29      HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
    4.30 @@ -368,11 +373,18 @@ static int __init free_pages_init(void)
    4.31  #endif
    4.32      int reservedpages, pfn;
    4.33  
    4.34 +    /* add only boot_pfn pages of low memory to free list.
    4.35 +     * max_low_pfn may be sized for
    4.36 +     * pages yet to be allocated from the hypervisor, or it may be set
    4.37 +     * to override the start_info amount of memory
    4.38 +     */
    4.39 +    int boot_pfn = min(start_info.nr_pages,max_low_pfn);
    4.40 +
    4.41      /* this will put all low memory onto the freelists */
    4.42      totalram_pages += free_all_bootmem();
    4.43  
    4.44      reservedpages = 0;
    4.45 -    for (pfn = 0; pfn < max_low_pfn; pfn++) {
    4.46 +    for (pfn = 0; pfn < boot_pfn ; pfn++) {
    4.47          /*
    4.48           * Only count reserved RAM pages
    4.49           */
    4.50 @@ -380,7 +392,7 @@ static int __init free_pages_init(void)
    4.51              reservedpages++;
    4.52      }
    4.53  #ifdef CONFIG_HIGHMEM
    4.54 -    for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
    4.55 +    for (pfn = start_info.nr_pages-1; pfn >= highstart_pfn; pfn--)
    4.56          one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro);
    4.57      totalram_pages += totalhigh_pages;
    4.58  #endif
    4.59 @@ -460,11 +472,11 @@ void free_initrd_mem(unsigned long start
    4.60  
    4.61  void si_meminfo(struct sysinfo *val)
    4.62  {
    4.63 -    val->totalram = totalram_pages;
    4.64 +    val->totalram = max_pfn;
    4.65      val->sharedram = 0;
    4.66      val->freeram = nr_free_pages();
    4.67      val->bufferram = atomic_read(&buffermem_pages);
    4.68 -    val->totalhigh = totalhigh_pages;
    4.69 +    val->totalhigh = max_pfn-max_low_pfn;
    4.70      val->freehigh = nr_free_highpages();
    4.71      val->mem_unit = PAGE_SIZE;
    4.72      return;
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/tools/misc/setdomainmaxmem	Fri Aug 06 18:45:26 2004 +0000
     5.3 @@ -0,0 +1,34 @@
     5.4 +#!/usr/bin/env perl
     5.5 +
     5.6 +use strict;
     5.7 +require "sys/ioctl.ph";
     5.8 +
     5.9 +sub SIZEOF_HYPERCALL () { 24; }
    5.10 +sub STRUCT_PRIVCMD_HYPERCALL () {"L P";}
    5.11 +sub IOCTL_PRIVCMD_HYPERCALL ()
    5.12 +        { &_IOC( &_IOC_NONE, ord('P'), 0, SIZEOF_HYPERCALL );}
    5.13 +sub __HYPERVISOR_dom0_op () {7;}
    5.14 +sub DOM0_INTERFACE_VERSION () {0xaaaa0010;}
    5.15 +sub DOM0_SETDOMAINMAXMEM () {28;}
    5.16 +sub STRUCT_DOM0_OP_PREFIX () {"L L";}
    5.17 +sub STRUCT_SETDOMAINMAXMEM () {STRUCT_DOM0_OP_PREFIX."L x4 L";}
    5.18 +sub XEN_PRIVCMD () {"/proc/xen/privcmd";}
    5.19 +
    5.20 +sub setdomainmaxmem($$) {
    5.21 +    my ($domain,$bytes) = @_;
    5.22 +    my $msg = pack(STRUCT_SETDOMAINMAXMEM,DOM0_SETDOMAINMAXMEM,
    5.23 +        DOM0_INTERFACE_VERSION,  $domain, $bytes);
    5.24 +    my $cmd = pack(STRUCT_PRIVCMD_HYPERCALL,__HYPERVISOR_dom0_op,$msg);
    5.25 +    open(XEN,XEN_PRIVCMD) or die "$!\n";
    5.26 +    ioctl(XEN, IOCTL_PRIVCMD_HYPERCALL, $cmd) or die "ioctl: $!";
    5.27 +    close XEN;
    5.28 +}
    5.29 +
    5.30 +my ($bytes,$suffix) = $ARGV[1] =~ m/(^\d+)([mMkKgG])/;
    5.31 +$bytes<<=10 if $suffix =~ m/[kK]/;
    5.32 +$bytes<<=20 if $suffix =~ m/[mM]/;
    5.33 +$bytes<<=30 if $suffix =~ m/[gG]/;
    5.34 +
    5.35 +printf "set domain $ARGV[0] to $bytes\n";
    5.36 +setdomainmaxmem($ARGV[0],$bytes);
    5.37 +