ia64/xen-unstable
changeset 2115:b9edbe5d4952
bitkeeper revision 1.1159.4.1 (4113d1b8oHof-8weGeQ3gNQFteLGEg)
David Becker writes:
This patch adds support for high memory to /proc/xen/memory_target,
and it restores the mem= linux boot parameter. This is for 2.4.26 only.
I haven't looked at 2.6 yet. (remmeber that CONFIG_HIGHMEM
is not on by default in the xen0 and xenU configs)
I have racks of 2g and 4g machines, hence my interest.
The 'mem=' param tells linux the maximum amount of memory it can use.
The feature originated for hosts where the normal mem detection mechanisms
failed. These days its main use in stock linux is to artificially
constrain how much memory linux uses for testing low memory systems.
With this patch, mem= now means the max memory linux could ever use.
When a domain is created with less than that the mem= value, linux will
behave as though that 'missing' memory is 'allocated'. To give the
domain more memory, first run setdomainmaxmem in Dom-0 to raise Xen's
limit, then write the new total to /proc/xen/memory_target in the
domain. When mem= is not explicitly set, it defaults to the
boottime size of the domain.
dom-0# xm create name=dom-1 memory=100 extra='-b mem=2g'
dom-0# setdomainmaxmem 1 2g
dom-1# echo 2g > /proc/xen/memory_target
David Becker writes:
This patch adds support for high memory to /proc/xen/memory_target,
and it restores the mem= linux boot parameter. This is for 2.4.26 only.
I haven't looked at 2.6 yet. (remmeber that CONFIG_HIGHMEM
is not on by default in the xen0 and xenU configs)
I have racks of 2g and 4g machines, hence my interest.
The 'mem=' param tells linux the maximum amount of memory it can use.
The feature originated for hosts where the normal mem detection mechanisms
failed. These days its main use in stock linux is to artificially
constrain how much memory linux uses for testing low memory systems.
With this patch, mem= now means the max memory linux could ever use.
When a domain is created with less than that the mem= value, linux will
behave as though that 'missing' memory is 'allocated'. To give the
domain more memory, first run setdomainmaxmem in Dom-0 to raise Xen's
limit, then write the new total to /proc/xen/memory_target in the
domain. When mem= is not explicitly set, it defaults to the
boottime size of the domain.
dom-0# xm create name=dom-1 memory=100 extra='-b mem=2g'
dom-0# setdomainmaxmem 1 2g
dom-1# echo 2g > /proc/xen/memory_target
author | iap10@labyrinth.cl.cam.ac.uk |
---|---|
date | Fri Aug 06 18:45:12 2004 +0000 (2004-08-06) |
parents | b142982228ea |
children | 6c0df1212e95 |
files | .rootkeys linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c linux-2.4.26-xen-sparse/arch/xen/mm/init.c tools/misc/setdomainmaxmem |
line diff
1.1 --- a/.rootkeys Fri Aug 06 15:55:31 2004 +0000 1.2 +++ b/.rootkeys Fri Aug 06 18:45:12 2004 +0000 1.3 @@ -298,6 +298,7 @@ 40c9c469kT0H9COWzA4XzPBjWK0WsA tools/mis 1.4 4022a73cEKvrYe_DVZW2JlAxobg9wg tools/misc/nsplitd/Makefile 1.5 4022a73cKms4Oq030x2JBzUB426lAQ tools/misc/nsplitd/nsplitd.c 1.6 3f870808_8aFBAcZbWiWGdgrGQyIEw tools/misc/p4perf.h 1.7 +4113d1afyPjO8m8-9E1pVBDHzGe1jQ tools/misc/setdomainmaxmem 1.8 3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/misc/xen-clone 1.9 3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README 1.10 3f870808zS6T6iFhqYPGelroZlVfGQ tools/misc/xen_cpuperf.c
2.1 --- a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c Fri Aug 06 15:55:31 2004 +0000 2.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c Fri Aug 06 18:45:12 2004 +0000 2.3 @@ -17,6 +17,8 @@ 2.4 #include <linux/mman.h> 2.5 #include <linux/smp_lock.h> 2.6 #include <linux/pagemap.h> 2.7 +#include <linux/bootmem.h> 2.8 +#include <linux/highmem.h> 2.9 #include <linux/vmalloc.h> 2.10 2.11 #include <asm/hypervisor.h> 2.12 @@ -39,7 +41,7 @@ typedef struct user_balloon_op { 2.13 2.14 static struct proc_dir_entry *balloon_pde; 2.15 unsigned long credit; 2.16 -static unsigned long current_pages, max_pages; 2.17 +static unsigned long current_pages, most_seen_pages; 2.18 2.19 static inline pte_t *get_ptep(unsigned long addr) 2.20 { 2.21 @@ -69,41 +71,43 @@ static unsigned long inflate_balloon(uns 2.22 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); 2.23 if ( parray == NULL ) 2.24 { 2.25 - printk("inflate_balloon: Unable to vmalloc parray\n"); 2.26 - return 0; 2.27 + printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n"); 2.28 + return -EFAULT; 2.29 } 2.30 2.31 currp = parray; 2.32 2.33 - for ( i = 0; i < num_pages; i++ ) 2.34 + for ( i = 0; i < num_pages; i++, currp++ ) 2.35 { 2.36 - /* NB. Should be GFP_ATOMIC for a less aggressive inflation. */ 2.37 - vaddr = __get_free_page(GFP_KERNEL); 2.38 + struct page *page = alloc_page(GFP_HIGHUSER); 2.39 + unsigned long pfn = page - mem_map; 2.40 2.41 /* If allocation fails then free all reserved pages. */ 2.42 - if ( vaddr == 0 ) 2.43 + if ( page == 0 ) 2.44 { 2.45 - printk("Unable to inflate balloon by %ld, only %ld pages free.", 2.46 + printk(KERN_ERR "Unable to inflate balloon by %ld, only %ld pages free.", 2.47 num_pages, i); 2.48 currp = parray; 2.49 - for(j = 0; j < i; j++){ 2.50 - free_page(*currp++); 2.51 + for(j = 0; j < i; j++, ++currp){ 2.52 + __free_page((struct page *) (mem_map + *currp)); 2.53 } 2.54 + ret = -EFAULT; 2.55 goto cleanup; 2.56 } 2.57 2.58 - *currp++ = vaddr; 2.59 + *currp = pfn; 2.60 } 2.61 2.62 2.63 - currp = parray; 2.64 - for ( i = 0; i < num_pages; i++ ) 2.65 + for ( i = 0, currp = parray; i < num_pages; i++, currp++ ) 2.66 { 2.67 - curraddr = *currp; 2.68 - *currp = virt_to_machine(*currp) >> PAGE_SHIFT; 2.69 - queue_l1_entry_update(get_ptep(curraddr), 0); 2.70 - phys_to_machine_mapping[__pa(curraddr) >> PAGE_SHIFT] = DEAD; 2.71 - currp++; 2.72 + unsigned long mfn = phys_to_machine_mapping[*currp]; 2.73 + curraddr = page_address(mem_map + *currp); 2.74 + if (curraddr) 2.75 + queue_l1_entry_update(get_ptep(curraddr), 0); 2.76 + 2.77 + phys_to_machine_mapping[*currp] = DEAD; 2.78 + *currp = mfn; 2.79 } 2.80 2.81 XEN_flush_page_update_queue(); 2.82 @@ -112,7 +116,7 @@ static unsigned long inflate_balloon(uns 2.83 parray, num_pages, 0); 2.84 if ( unlikely(ret != num_pages) ) 2.85 { 2.86 - printk("Unable to inflate balloon, error %lx\n", ret); 2.87 + printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret); 2.88 goto cleanup; 2.89 } 2.90 2.91 @@ -130,7 +134,7 @@ static unsigned long inflate_balloon(uns 2.92 * phys->machine mapping table looking for DEAD entries and populates 2.93 * them. 2.94 */ 2.95 -static unsigned long process_new_pages(unsigned long * parray, 2.96 +static unsigned long process_returned_pages(unsigned long * parray, 2.97 unsigned long num) 2.98 { 2.99 /* currently, this function is rather simplistic as 2.100 @@ -140,7 +144,7 @@ static unsigned long process_new_pages(u 2.101 * incorporated here. 2.102 */ 2.103 2.104 - unsigned long tot_pages = start_info.nr_pages; 2.105 + unsigned long tot_pages = most_seen_pages; 2.106 unsigned long * curr = parray; 2.107 unsigned long num_installed; 2.108 unsigned long i; 2.109 @@ -152,29 +156,18 @@ static unsigned long process_new_pages(u 2.110 { 2.111 phys_to_machine_mapping[i] = *curr; 2.112 queue_machphys_update(*curr, i); 2.113 - queue_l1_entry_update( 2.114 + if (i<max_low_pfn) 2.115 + queue_l1_entry_update( 2.116 get_ptep((unsigned long)__va(i << PAGE_SHIFT)), 2.117 ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); 2.118 2.119 - *curr = (unsigned long)__va(i << PAGE_SHIFT); 2.120 + __free_page(mem_map + i); 2.121 + 2.122 curr++; 2.123 num_installed++; 2.124 } 2.125 } 2.126 2.127 - /* 2.128 - * This is tricky (and will also change for machine addrs that 2.129 - * are mapped to not previously released addresses). We free pages 2.130 - * that were allocated by get_free_page (the mappings are different 2.131 - * now, of course). 2.132 - */ 2.133 - curr = parray; 2.134 - for ( i = 0; i < num_installed; i++ ) 2.135 - { 2.136 - free_page(*curr); 2.137 - curr++; 2.138 - } 2.139 - 2.140 return num_installed; 2.141 } 2.142 2.143 @@ -185,14 +178,15 @@ unsigned long deflate_balloon(unsigned l 2.144 2.145 if ( num_pages > credit ) 2.146 { 2.147 - printk("Can not allocate more pages than previously released.\n"); 2.148 + printk(KERN_ERR "deflate_balloon: %d pages > %d credit.\n", 2.149 + num_pages, credit); 2.150 return -EAGAIN; 2.151 } 2.152 2.153 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); 2.154 if ( parray == NULL ) 2.155 { 2.156 - printk("inflate_balloon: Unable to vmalloc parray\n"); 2.157 + printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n"); 2.158 return 0; 2.159 } 2.160 2.161 @@ -202,14 +196,16 @@ unsigned long deflate_balloon(unsigned l 2.162 parray, num_pages, 0); 2.163 if ( unlikely(ret != num_pages) ) 2.164 { 2.165 - printk("Unable to deflate balloon, error %lx\n", ret); 2.166 + printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n", 2.167 + ret); 2.168 goto cleanup; 2.169 } 2.170 2.171 - if ( (ret = process_new_pages(parray, num_pages)) < num_pages ) 2.172 + if ( (ret = process_returned_pages(parray, num_pages)) < num_pages ) 2.173 { 2.174 - printk("Unable to deflate balloon by specified %lx pages, only %lx.\n", 2.175 - num_pages, ret); 2.176 + printk(KERN_WARNING 2.177 + "deflate_balloon: restored only %lx of %lx pages.\n", 2.178 + ret, num_pages); 2.179 goto cleanup; 2.180 } 2.181 2.182 @@ -224,20 +220,170 @@ unsigned long deflate_balloon(unsigned l 2.183 2.184 #define PAGE_TO_MB_SHIFT 8 2.185 2.186 +/* 2.187 + * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c 2.188 + * The loops do go through all of low memory (ZONE_NORMAL). The 2.189 + * old pages have _PAGE_PRESENT set and so get skipped. 2.190 + * If low memory is not full, the new pages are used to fill it, going 2.191 + * from cur_low_pfn to low_pfn. high memory is not direct mapped so 2.192 + * no extension is needed for new high memory. 2.193 + */ 2.194 + 2.195 +static void pagetable_extend (int cur_low_pfn, int newpages) 2.196 +{ 2.197 + unsigned long vaddr, end; 2.198 + pgd_t *kpgd, *pgd, *pgd_base; 2.199 + int i, j, k; 2.200 + pmd_t *kpmd, *pmd; 2.201 + pte_t *kpte, *pte, *pte_base; 2.202 + int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn); 2.203 + 2.204 + /* 2.205 + * This can be zero as well - no problem, in that case we exit 2.206 + * the loops anyway due to the PTRS_PER_* conditions. 2.207 + */ 2.208 + end = (unsigned long)__va(low_pfn*PAGE_SIZE); 2.209 + 2.210 + pgd_base = init_mm.pgd; 2.211 + i = __pgd_offset(PAGE_OFFSET); 2.212 + pgd = pgd_base + i; 2.213 + 2.214 + for (; i < PTRS_PER_PGD; pgd++, i++) { 2.215 + vaddr = i*PGDIR_SIZE; 2.216 + if (end && (vaddr >= end)) 2.217 + break; 2.218 + pmd = (pmd_t *)pgd; 2.219 + for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { 2.220 + vaddr = i*PGDIR_SIZE + j*PMD_SIZE; 2.221 + if (end && (vaddr >= end)) 2.222 + break; 2.223 + 2.224 + /* Filled in for us already? */ 2.225 + if ( pmd_val(*pmd) & _PAGE_PRESENT ) 2.226 + continue; 2.227 + 2.228 + pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL); 2.229 + 2.230 + for (k = 0; k < PTRS_PER_PTE; pte++, k++) { 2.231 + vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; 2.232 + if (end && (vaddr >= end)) 2.233 + break; 2.234 + *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); 2.235 + } 2.236 + kpgd = pgd_offset_k((unsigned long)pte_base); 2.237 + kpmd = pmd_offset(kpgd, (unsigned long)pte_base); 2.238 + kpte = pte_offset(kpmd, (unsigned long)pte_base); 2.239 + queue_l1_entry_update(kpte, 2.240 + (*(unsigned long *)kpte)&~_PAGE_RW); 2.241 + set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); 2.242 + XEN_flush_page_update_queue(); 2.243 + } 2.244 + } 2.245 +} 2.246 + 2.247 +/* 2.248 + * claim_new_pages() asks xen to increase this domain's memory reservation 2.249 + * and return a list of the new pages of memory. This new pages are 2.250 + * added to the free list of the memory manager. 2.251 + * 2.252 + * Available RAM does not normally change while Linux runs. To make this work, 2.253 + * the linux mem= boottime command line param must say how big memory could 2.254 + * possibly grow. Then setup_arch() in arch/xen/kernel/setup.c 2.255 + * sets max_pfn, max_low_pfn and the zones according to 2.256 + * this max memory size. The page tables themselves can only be 2.257 + * extended after xen has assigned new pages to this domain. 2.258 + */ 2.259 + 2.260 +static unsigned long 2.261 +claim_new_pages(unsigned long num_pages) 2.262 +{ 2.263 + unsigned long new_page_cnt, pfn; 2.264 + unsigned long * parray, *curr; 2.265 + 2.266 + if (most_seen_pages+num_pages> max_pfn) 2.267 + num_pages = max_pfn-most_seen_pages; 2.268 + if (num_pages==0) return 0; 2.269 + 2.270 + parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); 2.271 + if ( parray == NULL ) 2.272 + { 2.273 + printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n"); 2.274 + return 0; 2.275 + } 2.276 + 2.277 + XEN_flush_page_update_queue(); 2.278 + new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 2.279 + parray, num_pages, 0); 2.280 + if (new_page_cnt != num_pages) 2.281 + { 2.282 + printk(KERN_WARNING 2.283 + "claim_new_pages: xen granted only %lu of %lu requested pages\n", 2.284 + new_page_cnt, num_pages); 2.285 + 2.286 + /* XXX 2.287 + * avoid xen lockup when user forgot to setdomainmaxmem. xen 2.288 + * usually can dribble out a few pages and then hangs 2.289 + */ 2.290 + if (new_page_cnt < 1000) { 2.291 + printk(KERN_WARNING "Remember to use setdomainmaxmem\n"); 2.292 + HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 2.293 + parray, new_page_cnt, 0); 2.294 + return -EFAULT; 2.295 + } 2.296 + } 2.297 + memcpy(phys_to_machine_mapping+most_seen_pages, parray, 2.298 + new_page_cnt * sizeof(unsigned long)); 2.299 + 2.300 + pagetable_extend(most_seen_pages,new_page_cnt); 2.301 + 2.302 + for (pfn = most_seen_pages, curr = parray; 2.303 + pfn < most_seen_pages+new_page_cnt; 2.304 + pfn++, curr++ ) 2.305 + { 2.306 + struct page *page = mem_map + pfn; 2.307 + 2.308 +#ifndef CONFIG_HIGHMEM 2.309 + if (pfn>=max_low_pfn) { 2.310 + printk(KERN_WARNING "Warning only %ldMB will be used.\n", 2.311 + pfn>>PAGE_TO_MB_SHIFT); 2.312 + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); 2.313 + break; 2.314 + } 2.315 +#endif 2.316 + queue_machphys_update(*curr, pfn); 2.317 + XEN_flush_page_update_queue(); 2.318 + if (pfn<max_low_pfn) { 2.319 + queue_l1_entry_update(get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)), 2.320 + ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); 2.321 + XEN_flush_page_update_queue(); 2.322 + } 2.323 + 2.324 + /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */ 2.325 + ClearPageReserved(page); 2.326 + if (pfn>=max_low_pfn) set_bit(PG_highmem, &page->flags); 2.327 + set_page_count(page, 1); 2.328 + __free_page(page); 2.329 + } 2.330 + 2.331 + vfree(parray); 2.332 + 2.333 + return new_page_cnt; 2.334 +} 2.335 + 2.336 static int balloon_write(struct file *file, const char *buffer, 2.337 u_long count, void *data) 2.338 { 2.339 char memstring[64], *endchar; 2.340 int len, i; 2.341 - unsigned long pages; 2.342 - unsigned long long target; 2.343 + unsigned long target; 2.344 + unsigned long long targetbytes; 2.345 2.346 /* Only admin can play with the balloon :) */ 2.347 if ( !capable(CAP_SYS_ADMIN) ) 2.348 return -EPERM; 2.349 2.350 if (count>sizeof memstring) { 2.351 - return -EFBIG; 2.352 + return -EFBIG; 2.353 } 2.354 2.355 len = strnlen_user(buffer, count); 2.356 @@ -248,53 +394,66 @@ static int balloon_write(struct file *fi 2.357 2.358 endchar = memstring; 2.359 for(i=0; i<len; ++i,++endchar) { 2.360 - if ('0'>memstring[i] || memstring[i]>'9') break; 2.361 + if ('0'>memstring[i] || memstring[i]>'9') break; 2.362 } 2.363 if (i==0) return -EBADMSG; 2.364 2.365 - target = memparse(memstring,&endchar); 2.366 - pages = target >> PAGE_SHIFT; 2.367 + targetbytes = memparse(memstring,&endchar); 2.368 + target = targetbytes >> PAGE_SHIFT; 2.369 + 2.370 + if (target < current_pages) { 2.371 + int change = inflate_balloon(current_pages-target); 2.372 + if (change<=0) return change; 2.373 2.374 - if (pages < current_pages) { 2.375 - int change = inflate_balloon(current_pages-pages); 2.376 - if (change<0) return change; 2.377 + current_pages -= change; 2.378 + printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n", 2.379 + change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 2.380 + } 2.381 + else if (target > current_pages) { 2.382 + int change, reclaim = min(target,most_seen_pages) - current_pages; 2.383 2.384 - current_pages -= change; 2.385 - printk("Relinquish %dMB to xen. Domain now has %ldMB\n", 2.386 - change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 2.387 + if (reclaim) { 2.388 + change = deflate_balloon( reclaim); 2.389 + if (change<=0) return change; 2.390 + current_pages += change; 2.391 + printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n", 2.392 + change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 2.393 + } 2.394 + 2.395 + if (most_seen_pages<target) { 2.396 + int growth = claim_new_pages(target-most_seen_pages); 2.397 + if (growth<=0) return growth; 2.398 + most_seen_pages += growth; 2.399 + current_pages += growth; 2.400 + printk(KERN_INFO "Granted %dMB new mem by xen. Domain now has %luMB\n", 2.401 + growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 2.402 + } 2.403 } 2.404 - else if (pages > current_pages) { 2.405 - int change = deflate_balloon(min(pages,max_pages) - current_pages); 2.406 - if (change<0) return change; 2.407 2.408 - current_pages += change; 2.409 - printk("Reclaim %dMB from xen. Domain now has %ldMB\n", 2.410 - change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 2.411 - } 2.412 2.413 return len; 2.414 } 2.415 2.416 2.417 static int balloon_read(char *page, char **start, off_t off, 2.418 - int count, int *eof, void *data) 2.419 + int count, int *eof, void *data) 2.420 { 2.421 - int len; 2.422 - len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT); 2.423 + int len; 2.424 + len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT); 2.425 2.426 - if (len <= off+count) *eof = 1; 2.427 - *start = page + off; 2.428 - len -= off; 2.429 - if (len>count) len = count; 2.430 - if (len<0) len = 0; 2.431 - return len; 2.432 + if (len <= off+count) *eof = 1; 2.433 + *start = page + off; 2.434 + len -= off; 2.435 + if (len>count) len = count; 2.436 + if (len<0) len = 0; 2.437 + return len; 2.438 } 2.439 2.440 static int __init init_module(void) 2.441 { 2.442 printk(KERN_ALERT "Starting Xen Balloon driver\n"); 2.443 2.444 - max_pages = current_pages = start_info.nr_pages; 2.445 + most_seen_pages = current_pages = min(start_info.nr_pages,max_pfn); 2.446 if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL ) 2.447 { 2.448 printk(KERN_ALERT "Unable to create balloon driver proc entry!"); 2.449 @@ -304,6 +463,17 @@ static int __init init_module(void) 2.450 balloon_pde->write_proc = balloon_write; 2.451 balloon_pde->read_proc = balloon_read; 2.452 2.453 + /* 2.454 + * make a new phys map if mem= says xen can give us memory to grow 2.455 + */ 2.456 + if (max_pfn > start_info.nr_pages) { 2.457 + extern unsigned long *phys_to_machine_mapping; 2.458 + unsigned long *newmap; 2.459 + newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long)); 2.460 + phys_to_machine_mapping = memcpy(newmap, phys_to_machine_mapping, 2.461 + start_info.nr_pages * sizeof(unsigned long)); 2.462 + } 2.463 + 2.464 return 0; 2.465 } 2.466
3.1 --- a/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c Fri Aug 06 15:55:31 2004 +0000 3.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c Fri Aug 06 18:45:12 2004 +0000 3.3 @@ -120,10 +120,15 @@ union start_info_union start_info_union; 3.4 static char command_line[COMMAND_LINE_SIZE]; 3.5 char saved_command_line[COMMAND_LINE_SIZE]; 3.6 3.7 -static void __init parse_mem_cmdline (char ** cmdline_p) 3.8 +/* parse_mem_cmdline() 3.9 + * returns the value of the mem= boot param converted to pages or 0 3.10 + */ 3.11 +static int __init parse_mem_cmdline (char ** cmdline_p) 3.12 { 3.13 char c = ' ', *to = command_line, *from = saved_command_line; 3.14 int len = 0; 3.15 + unsigned long long bytes; 3.16 + int mem_param = 0; 3.17 3.18 /* Save unparsed command line copy for /proc/cmdline */ 3.19 memcpy(saved_command_line, start_info.cmd_line, COMMAND_LINE_SIZE); 3.20 @@ -145,8 +150,9 @@ static void __init parse_mem_cmdline (ch 3.21 } else if (!memcmp(from+4, "exactmap", 8)) { 3.22 from += 8+4; 3.23 } else { 3.24 - (void)memparse(from+4, &from); 3.25 - if (*from == '@') 3.26 + bytes = memparse(from+4, &from); 3.27 + mem_param = bytes>>PAGE_SHIFT; 3.28 + if (*from == '@') 3.29 (void)memparse(from+1, &from); 3.30 } 3.31 } 3.32 @@ -160,6 +166,8 @@ static void __init parse_mem_cmdline (ch 3.33 } 3.34 *to = '\0'; 3.35 *cmdline_p = command_line; 3.36 + 3.37 + return mem_param; 3.38 } 3.39 3.40 /* 3.41 @@ -194,7 +202,9 @@ int xen_module_init(struct module *mod) 3.42 3.43 void __init setup_arch(char **cmdline_p) 3.44 { 3.45 - unsigned long bootmap_size, start_pfn, max_low_pfn; 3.46 + unsigned long bootmap_size, start_pfn, lmax_low_pfn; 3.47 + int mem_param; /* user specified memory size in pages */ 3.48 + int boot_pfn; /* low pages available for bootmem */ 3.49 3.50 extern void hypervisor_callback(void); 3.51 extern void failsafe_callback(void); 3.52 @@ -252,7 +262,16 @@ void __init setup_arch(char **cmdline_p) 3.53 init_mm.end_data = (unsigned long) &_edata; 3.54 init_mm.brk = (unsigned long) &_end; 3.55 3.56 - parse_mem_cmdline(cmdline_p); 3.57 + /* The mem= kernel command line param overrides the detected amount 3.58 + * of memory. For xenolinux, if this override is larger than detected 3.59 + * memory, then boot using only detected memory and make provisions to 3.60 + * use all of the override value. The hypervisor can give this 3.61 + * domain more memory later on and it will be added to the free 3.62 + * lists at that time. See claim_new_pages() in 3.63 + * arch/xen/drivers/balloon/balloon.c 3.64 + */ 3.65 + mem_param = parse_mem_cmdline(cmdline_p); 3.66 + if (!mem_param) mem_param = start_info.nr_pages; 3.67 3.68 #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) 3.69 #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) 3.70 @@ -269,9 +288,9 @@ void __init setup_arch(char **cmdline_p) 3.71 /* 3.72 * Determine low and high memory ranges: 3.73 */ 3.74 - max_low_pfn = max_pfn = start_info.nr_pages; 3.75 - if (max_low_pfn > MAXMEM_PFN) { 3.76 - max_low_pfn = MAXMEM_PFN; 3.77 + lmax_low_pfn = max_pfn = mem_param; 3.78 + if (lmax_low_pfn > MAXMEM_PFN) { 3.79 + lmax_low_pfn = MAXMEM_PFN; 3.80 #ifndef CONFIG_HIGHMEM 3.81 /* Maximum memory usable is what is directly addressable */ 3.82 printk(KERN_WARNING "Warning only %ldMB will be used.\n", 3.83 @@ -314,12 +333,20 @@ void __init setup_arch(char **cmdline_p) 3.84 * bootstrap page table. We are guaranteed to get >=512kB unused 'padding' 3.85 * for our own use after all bootstrap elements (see hypervisor-if.h). 3.86 */ 3.87 - bootmap_size = init_bootmem(start_pfn, max_low_pfn); 3.88 - free_bootmem(0, PFN_PHYS(max_low_pfn)); 3.89 + boot_pfn = min((int)start_info.nr_pages,lmax_low_pfn); 3.90 + bootmap_size = init_bootmem(start_pfn,boot_pfn); 3.91 + free_bootmem(0, PFN_PHYS(boot_pfn)); 3.92 reserve_bootmem(__pa(&_stext), 3.93 PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE-1 - 3.94 __pa(&_stext)); 3.95 3.96 + /* init_bootmem() set the global max_low_pfn to boot_pfn. Now max_low_pfn 3.97 + * can be set to the override value. 3.98 + */ 3.99 + max_low_pfn = lmax_low_pfn; 3.100 + 3.101 + 3.102 + 3.103 #ifdef CONFIG_BLK_DEV_INITRD 3.104 if ( start_info.mod_start != 0 ) 3.105 {
4.1 --- a/linux-2.4.26-xen-sparse/arch/xen/mm/init.c Fri Aug 06 15:55:31 2004 +0000 4.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/mm/init.c Fri Aug 06 18:45:12 2004 +0000 4.3 @@ -219,11 +219,17 @@ static void __init pagetable_init (void) 4.4 pmd_t *kpmd, *pmd; 4.5 pte_t *kpte, *pte, *pte_base; 4.6 4.7 + /* create tables only for boot_pfn frames. max_low_pfn may be sized for 4.8 + * pages yet to be allocated from the hypervisor, or it may be set 4.9 + * to override the start_info amount of memory 4.10 + */ 4.11 + int boot_pfn = min(start_info.nr_pages,max_low_pfn); 4.12 + 4.13 /* 4.14 * This can be zero as well - no problem, in that case we exit 4.15 * the loops anyway due to the PTRS_PER_* conditions. 4.16 */ 4.17 - end = (unsigned long)__va(max_low_pfn*PAGE_SIZE); 4.18 + end = (unsigned long)__va(boot_pfn *PAGE_SIZE); 4.19 4.20 pgd_base = init_mm.pgd; 4.21 i = __pgd_offset(PAGE_OFFSET); 4.22 @@ -308,7 +314,6 @@ void __init paging_init(void) 4.23 pagetable_init(); 4.24 4.25 zone_sizes_init(); 4.26 - 4.27 /* Switch to the real shared_info page, and clear the dummy page. */ 4.28 set_fixmap(FIX_SHARED_INFO, start_info.shared_info); 4.29 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); 4.30 @@ -368,11 +373,18 @@ static int __init free_pages_init(void) 4.31 #endif 4.32 int reservedpages, pfn; 4.33 4.34 + /* add only boot_pfn pages of low memory to free list. 4.35 + * max_low_pfn may be sized for 4.36 + * pages yet to be allocated from the hypervisor, or it may be set 4.37 + * to override the start_info amount of memory 4.38 + */ 4.39 + int boot_pfn = min(start_info.nr_pages,max_low_pfn); 4.40 + 4.41 /* this will put all low memory onto the freelists */ 4.42 totalram_pages += free_all_bootmem(); 4.43 4.44 reservedpages = 0; 4.45 - for (pfn = 0; pfn < max_low_pfn; pfn++) { 4.46 + for (pfn = 0; pfn < boot_pfn ; pfn++) { 4.47 /* 4.48 * Only count reserved RAM pages 4.49 */ 4.50 @@ -380,7 +392,7 @@ static int __init free_pages_init(void) 4.51 reservedpages++; 4.52 } 4.53 #ifdef CONFIG_HIGHMEM 4.54 - for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) 4.55 + for (pfn = start_info.nr_pages-1; pfn >= highstart_pfn; pfn--) 4.56 one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); 4.57 totalram_pages += totalhigh_pages; 4.58 #endif 4.59 @@ -460,11 +472,11 @@ void free_initrd_mem(unsigned long start 4.60 4.61 void si_meminfo(struct sysinfo *val) 4.62 { 4.63 - val->totalram = totalram_pages; 4.64 + val->totalram = max_pfn; 4.65 val->sharedram = 0; 4.66 val->freeram = nr_free_pages(); 4.67 val->bufferram = atomic_read(&buffermem_pages); 4.68 - val->totalhigh = totalhigh_pages; 4.69 + val->totalhigh = max_pfn-max_low_pfn; 4.70 val->freehigh = nr_free_highpages(); 4.71 val->mem_unit = PAGE_SIZE; 4.72 return;
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/tools/misc/setdomainmaxmem Fri Aug 06 18:45:12 2004 +0000 5.3 @@ -0,0 +1,34 @@ 5.4 +#!/usr/bin/env perl 5.5 + 5.6 +use strict; 5.7 +require "sys/ioctl.ph"; 5.8 + 5.9 +sub SIZEOF_HYPERCALL () { 24; } 5.10 +sub STRUCT_PRIVCMD_HYPERCALL () {"L P";} 5.11 +sub IOCTL_PRIVCMD_HYPERCALL () 5.12 + { &_IOC( &_IOC_NONE, ord('P'), 0, SIZEOF_HYPERCALL );} 5.13 +sub __HYPERVISOR_dom0_op () {7;} 5.14 +sub DOM0_INTERFACE_VERSION () {0xaaaa0010;} 5.15 +sub DOM0_SETDOMAINMAXMEM () {28;} 5.16 +sub STRUCT_DOM0_OP_PREFIX () {"L L";} 5.17 +sub STRUCT_SETDOMAINMAXMEM () {STRUCT_DOM0_OP_PREFIX."L x4 L";} 5.18 +sub XEN_PRIVCMD () {"/proc/xen/privcmd";} 5.19 + 5.20 +sub setdomainmaxmem($$) { 5.21 + my ($domain,$bytes) = @_; 5.22 + my $msg = pack(STRUCT_SETDOMAINMAXMEM,DOM0_SETDOMAINMAXMEM, 5.23 + DOM0_INTERFACE_VERSION, $domain, $bytes); 5.24 + my $cmd = pack(STRUCT_PRIVCMD_HYPERCALL,__HYPERVISOR_dom0_op,$msg); 5.25 + open(XEN,XEN_PRIVCMD) or die "$!\n"; 5.26 + ioctl(XEN, IOCTL_PRIVCMD_HYPERCALL, $cmd) or die "ioctl: $!"; 5.27 + close XEN; 5.28 +} 5.29 + 5.30 +my ($bytes,$suffix) = $ARGV[1] =~ m/(^\d+)([mMkKgG])/; 5.31 +$bytes<<=10 if $suffix =~ m/[kK]/; 5.32 +$bytes<<=20 if $suffix =~ m/[mM]/; 5.33 +$bytes<<=30 if $suffix =~ m/[gG]/; 5.34 + 5.35 +printf "set domain $ARGV[0] to $bytes\n"; 5.36 +setdomainmaxmem($ARGV[0],$bytes); 5.37 +