ia64/xen-unstable
changeset 19797:8440fc9f7a25
x86-64: do not pass unmanageable amounts of memory to Dom0
Due to address space restrictions it is not possible to successfully
pass more than about 500Gb to a Linux Dom0 unless its kernel specifies
a non-default phys-to-machine map location via XEN_ELFNOTE_INIT_P2M.
For non-Linux Dom0 kernels I can't say whether the limit could be set
to close to 1Tb, but since passing such huge amounts of memory isn't
very useful anyway (and can be enforced via dom0_mem=3D), the patch
doesn't attempt to guess the kernel type and restricts the memory
amount in all cases.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Due to address space restrictions it is not possible to successfully
pass more than about 500Gb to a Linux Dom0 unless its kernel specifies
a non-default phys-to-machine map location via XEN_ELFNOTE_INIT_P2M.
For non-Linux Dom0 kernels I can't say whether the limit could be set
to close to 1Tb, but since passing such huge amounts of memory isn't
very useful anyway (and can be enforced via dom0_mem=3D), the patch
doesn't attempt to guess the kernel type and restricts the memory
amount in all cases.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Thu Jun 18 10:31:17 2009 +0100 (2009-06-18) |
parents | 1c01814f9a25 |
children | af06333d4c5d |
files | xen/arch/x86/domain_build.c |
line diff
1.1 --- a/xen/arch/x86/domain_build.c Thu Jun 18 10:30:28 2009 +0100 1.2 +++ b/xen/arch/x86/domain_build.c Thu Jun 18 10:31:17 2009 +0100 1.3 @@ -146,33 +146,65 @@ static struct page_info * __init alloc_c 1.4 return page; 1.5 } 1.6 1.7 -static unsigned long __init compute_dom0_nr_pages(void) 1.8 +static unsigned long __init compute_dom0_nr_pages( 1.9 +#ifdef __x86_64__ 1.10 + unsigned long vstart, unsigned long vend, size_t sizeof_long) 1.11 +#else 1.12 + void) 1.13 +#endif 1.14 { 1.15 unsigned long avail = avail_domheap_pages() + initial_images_nrpages(); 1.16 + unsigned long nr_pages = dom0_nrpages; 1.17 + unsigned long min_pages = dom0_min_nrpages; 1.18 + unsigned long max_pages = dom0_max_nrpages; 1.19 1.20 /* 1.21 * If domain 0 allocation isn't specified, reserve 1/16th of available 1.22 * memory for things like DMA buffers. This reservation is clamped to 1.23 * a maximum of 128MB. 1.24 */ 1.25 - if ( dom0_nrpages == 0 ) 1.26 - { 1.27 - dom0_nrpages = avail; 1.28 - dom0_nrpages = min(dom0_nrpages / 16, 128L << (20 - PAGE_SHIFT)); 1.29 - dom0_nrpages = -dom0_nrpages; 1.30 - } 1.31 + if ( nr_pages == 0 ) 1.32 + nr_pages = -min(avail / 16, 128UL << (20 - PAGE_SHIFT)); 1.33 1.34 /* Negative memory specification means "all memory - specified amount". */ 1.35 - if ( dom0_nrpages < 0 ) dom0_nrpages += avail; 1.36 - if ( dom0_min_nrpages < 0 ) dom0_min_nrpages += avail; 1.37 - if ( dom0_max_nrpages < 0 ) dom0_max_nrpages += avail; 1.38 + if ( (long)nr_pages < 0 ) nr_pages += avail; 1.39 + if ( (long)min_pages < 0 ) min_pages += avail; 1.40 + if ( (long)max_pages < 0 ) max_pages += avail; 1.41 1.42 /* Clamp dom0 memory according to min/max limits and available memory. */ 1.43 - dom0_nrpages = max(dom0_nrpages, dom0_min_nrpages); 1.44 - dom0_nrpages = min(dom0_nrpages, dom0_max_nrpages); 1.45 - dom0_nrpages = min(dom0_nrpages, (long)avail); 1.46 + nr_pages = max(nr_pages, min_pages); 1.47 + nr_pages = min(nr_pages, max_pages); 1.48 + nr_pages = min(nr_pages, avail); 1.49 1.50 - return dom0_nrpages; 1.51 +#ifdef __x86_64__ 1.52 + if ( vstart && dom0_nrpages <= 0 && 1.53 + (dom0_min_nrpages <= 0 || nr_pages > min_pages) ) 1.54 + { 1.55 + /* 1.56 + * Legacy Linux kernels (i.e. such without a XEN_ELFNOTE_INIT_P2M 1.57 + * note) require that there is enough virtual space beyond the initial 1.58 + * allocation to set up their initial page tables. This space is 1.59 + * roughly the same size as the p2m table, so make sure the initial 1.60 + * allocation doesn't consume more than about half the space that's 1.61 + * available between params.virt_base and the address space end. 1.62 + */ 1.63 + unsigned long end = vend + nr_pages * sizeof_long; 1.64 + 1.65 + if ( end > vstart ) 1.66 + end += end - vstart; 1.67 + if ( end <= vstart || 1.68 + (sizeof_long < sizeof(end) && end > (1UL << (8 * sizeof_long))) ) 1.69 + { 1.70 + end = sizeof_long >= sizeof(end) ? 0 : 1UL << (8 * sizeof_long); 1.71 + nr_pages = (end - vend) / (2 * sizeof_long); 1.72 + if ( dom0_min_nrpages > 0 && nr_pages < min_pages ) 1.73 + nr_pages = min_pages; 1.74 + printk("Dom0 memory clipped to %lu pages\n", nr_pages); 1.75 + } 1.76 + } 1.77 +#endif 1.78 + 1.79 + return nr_pages; 1.80 } 1.81 1.82 static void __init process_dom0_ioports_disable(void) 1.83 @@ -282,8 +314,6 @@ int __init construct_dom0( 1.84 1.85 d->max_pages = ~0U; 1.86 1.87 - nr_pages = compute_dom0_nr_pages(); 1.88 - 1.89 if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 ) 1.90 return rc; 1.91 1.92 @@ -342,9 +372,18 @@ int __init construct_dom0( 1.93 d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1; 1.94 v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0]; 1.95 1.96 - if ( nr_pages != (unsigned int)nr_pages ) 1.97 - nr_pages = UINT_MAX; 1.98 + nr_pages = compute_dom0_nr_pages(parms.virt_base, 1.99 + round_pgup(parms.virt_kend) + round_pgup(initrd_len), 1.100 + sizeof(unsigned int)); 1.101 } 1.102 + else if (parms.p2m_base != UNSET_ADDR) 1.103 + nr_pages = compute_dom0_nr_pages(0, 0, 0); 1.104 + else 1.105 + nr_pages = compute_dom0_nr_pages(parms.virt_base, 1.106 + round_pgup(parms.virt_kend) + round_pgup(initrd_len), 1.107 + sizeof(unsigned long)); 1.108 +#else 1.109 + nr_pages = compute_dom0_nr_pages(); 1.110 #endif 1.111 1.112 if ( parms.pae == PAEKERN_extended_cr3 )