ia64/linux-2.6.18-xen.hg

changeset 908:baeb818cd2dc

x86-64: do not pass unmanageable amounts of memory to Dom0

Due to address space restrictions it is not possible to successfully
pass more than about 500Gb to a Linux Dom0 unless its kernel specifies
a non-default phys-to-machine map location via XEN_ELFNOTE_INIT_P2M.

For non-Linux Dom0 kernels I can't say whether the limit could be set
to close to 1Tb, but since passing such huge amounts of memory isn't
very useful anyway (and can be enforced via dom0_mem=3D), the patch
doesn't attempt to guess the kernel type and restricts the memory
amount in all cases.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 18 10:32:16 2009 +0100 (2009-06-18)
parents cad6f60f0506
children 865707f40531
files arch/x86_64/kernel/head-xen.S arch/x86_64/kernel/setup-xen.c arch/x86_64/mm/init-xen.c include/asm-x86_64/mach-xen/asm/pgtable.h
line diff
     1.1 --- a/arch/x86_64/kernel/head-xen.S	Thu Jun 18 10:24:18 2009 +0100
     1.2 +++ b/arch/x86_64/kernel/head-xen.S	Thu Jun 18 10:32:16 2009 +0100
     1.3 @@ -77,9 +77,6 @@ NEXT_PAGE(level3_kernel_pgt)
     1.4  NEXT_PAGE(level3_user_pgt)
     1.5          .fill	512,8,0
     1.6  
     1.7 -NEXT_PAGE(level2_kernel_pgt)
     1.8 -	.fill	512,8,0
     1.9 -
    1.10  NEXT_PAGE(hypercall_page)
    1.11  	CFI_STARTPROC
    1.12  	.rept 0x1000 / 0x20
     2.1 --- a/arch/x86_64/kernel/setup-xen.c	Thu Jun 18 10:24:18 2009 +0100
     2.2 +++ b/arch/x86_64/kernel/setup-xen.c	Thu Jun 18 10:32:16 2009 +0100
     2.3 @@ -524,10 +524,12 @@ contig_initmem_init(unsigned long start_
     2.4  		panic("Cannot find bootmem map of size %ld\n",bootmap_size);
     2.5  	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
     2.6  #ifdef CONFIG_XEN
     2.7 -	e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
     2.8 -#else
     2.9 +	if (xen_start_info->nr_pages < end_pfn)
    2.10 +		e820_bootmem_free(NODE_DATA(0), 0,
    2.11 +				  xen_start_info->nr_pages<<PAGE_SHIFT);
    2.12 +	else
    2.13 +#endif
    2.14  	e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
    2.15 -#endif
    2.16  	reserve_bootmem(bootmap, bootmap_size);
    2.17  } 
    2.18  #endif
     3.1 --- a/arch/x86_64/mm/init-xen.c	Thu Jun 18 10:24:18 2009 +0100
     3.2 +++ b/arch/x86_64/mm/init-xen.c	Thu Jun 18 10:32:16 2009 +0100
     3.3 @@ -527,8 +527,6 @@ void __init xen_init_pt(void)
     3.4  	page = (unsigned long *)xen_start_info->pt_base;
     3.5  	addr = page[pgd_index(__START_KERNEL_map)];
     3.6  	addr_to_page(addr, page);
     3.7 -	addr = page[pud_index(__START_KERNEL_map)];
     3.8 -	addr_to_page(addr, page);
     3.9  
    3.10  #if CONFIG_XEN_COMPAT <= 0x030002
    3.11  	/* On Xen 3.0.2 and older we may need to explicitly specify _PAGE_USER
    3.12 @@ -539,7 +537,9 @@ void __init xen_init_pt(void)
    3.13  
    3.14  		/* Mess with the initial mapping of page 0. It's not needed. */
    3.15  		BUILD_BUG_ON(__START_KERNEL <= __START_KERNEL_map);
    3.16 -		addr = page[pmd_index(__START_KERNEL_map)];
    3.17 +		addr = page[pud_index(__START_KERNEL_map)];
    3.18 +		addr_to_page(addr, pg);
    3.19 +		addr = pg[pmd_index(__START_KERNEL_map)];
    3.20  		addr_to_page(addr, pg);
    3.21  		pte.pte = pg[pte_index(__START_KERNEL_map)];
    3.22  		BUG_ON(!(pte.pte & _PAGE_PRESENT));
    3.23 @@ -560,9 +560,10 @@ void __init xen_init_pt(void)
    3.24  	/* Construct mapping of initial pte page in our own directories. */
    3.25  	init_level4_pgt[pgd_index(__START_KERNEL_map)] = 
    3.26  		__pgd(__pa_symbol(level3_kernel_pgt) | _PAGE_TABLE);
    3.27 -	level3_kernel_pgt[pud_index(__START_KERNEL_map)] = 
    3.28 -		__pud(__pa_symbol(level2_kernel_pgt) | _PAGE_TABLE);
    3.29 -	memcpy(level2_kernel_pgt, page, PAGE_SIZE);
    3.30 +	memcpy(level3_kernel_pgt + pud_index(__START_KERNEL_map),
    3.31 +	       page + pud_index(__START_KERNEL_map),
    3.32 +	       (PTRS_PER_PUD - pud_index(__START_KERNEL_map))
    3.33 +	       * sizeof(*level3_kernel_pgt));
    3.34  
    3.35  	__user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
    3.36  		__pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
    3.37 @@ -575,8 +576,6 @@ void __init xen_init_pt(void)
    3.38  				 XENFEAT_writable_page_tables);
    3.39  	early_make_page_readonly(level3_user_pgt,
    3.40  				 XENFEAT_writable_page_tables);
    3.41 -	early_make_page_readonly(level2_kernel_pgt,
    3.42 -				 XENFEAT_writable_page_tables);
    3.43  
    3.44  	if (!xen_feature(XENFEAT_writable_page_tables)) {
    3.45  		xen_pgd_pin(__pa_symbol(init_level4_pgt));
    3.46 @@ -608,6 +607,23 @@ static void __init extend_init_mapping(u
    3.47  	while (va < (__START_KERNEL_map
    3.48  		     + (start_pfn << PAGE_SHIFT)
    3.49  		     + tables_space)) {
    3.50 +		if (!(pmd_index(va) | pte_index(va))) {
    3.51 +			pud_t *pud;
    3.52 +
    3.53 +			page = (unsigned long *)init_level4_pgt;
    3.54 +			addr = page[pgd_index(va)];
    3.55 +			addr_to_page(addr, page);
    3.56 +			pud = (pud_t *)&page[pud_index(va)];
    3.57 +			if (pud_none(*pud)) {
    3.58 +				page = alloc_static_page(&phys);
    3.59 +				early_make_page_readonly(
    3.60 +					page, XENFEAT_writable_page_tables);
    3.61 +				set_pud(pud, __pud(phys | _KERNPG_TABLE));
    3.62 +			} else {
    3.63 +				addr = page[pud_index(va)];
    3.64 +				addr_to_page(addr, page);
    3.65 +			}
    3.66 +		}
    3.67  		pmd = (pmd_t *)&page[pmd_index(va)];
    3.68  		if (pmd_none(*pmd)) {
    3.69  			pte_page = alloc_static_page(&phys);
    3.70 @@ -630,6 +646,15 @@ static void __init extend_init_mapping(u
    3.71  
    3.72  	/* Finally, blow away any spurious initial mappings. */
    3.73  	while (1) {
    3.74 +		if (!(pmd_index(va) | pte_index(va))) {
    3.75 +			page = (unsigned long *)init_level4_pgt;
    3.76 +			addr = page[pgd_index(va)];
    3.77 +			addr_to_page(addr, page);
    3.78 +			if (pud_none(((pud_t *)page)[pud_index(va)]))
    3.79 +				break;
    3.80 +			addr = page[pud_index(va)];
    3.81 +			addr_to_page(addr, page);
    3.82 +		}
    3.83  		pmd = (pmd_t *)&page[pmd_index(va)];
    3.84  		if (pmd_none(*pmd))
    3.85  			break;
     4.1 --- a/include/asm-x86_64/mach-xen/asm/pgtable.h	Thu Jun 18 10:24:18 2009 +0100
     4.2 +++ b/include/asm-x86_64/mach-xen/asm/pgtable.h	Thu Jun 18 10:32:16 2009 +0100
     4.3 @@ -137,11 +137,11 @@ static inline void pgd_clear (pgd_t * pg
     4.4  #define FIRST_USER_ADDRESS	0
     4.5  
     4.6  #ifndef __ASSEMBLY__
     4.7 -#define MAXMEM		 0x3fffffffffffUL
     4.8 +#define MAXMEM		 0x6fffffffffUL
     4.9  #define VMALLOC_START    0xffffc20000000000UL
    4.10  #define VMALLOC_END      0xffffe1ffffffffffUL
    4.11  #define MODULES_VADDR    0xffffffff88000000UL
    4.12 -#define MODULES_END      0xfffffffffff00000UL
    4.13 +#define MODULES_END      0xffffffffff000000UL
    4.14  #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
    4.15  
    4.16  #define _PAGE_BIT_PRESENT	0