]> xenbits.xensource.com Git - legacy/linux-2.6.18-xen.git/commitdiff
x86-64: do not pass unmanageable amounts of memory to Dom0
authorKeir Fraser <keir.fraser@citrix.com>
Thu, 18 Jun 2009 09:32:16 +0000 (10:32 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Thu, 18 Jun 2009 09:32:16 +0000 (10:32 +0100)
Due to address space restrictions it is not possible to successfully
pass more than about 500Gb to a Linux Dom0 unless its kernel specifies
a non-default phys-to-machine map location via XEN_ELFNOTE_INIT_P2M.

For non-Linux Dom0 kernels I can't say whether the limit could be set
to close to 1Tb, but since passing such huge amounts of memory isn't
very useful anyway (and can be enforced via dom0_mem=3D), the patch
doesn't attempt to guess the kernel type and restricts the memory
amount in all cases.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
arch/x86_64/kernel/head-xen.S
arch/x86_64/kernel/setup-xen.c
arch/x86_64/mm/init-xen.c
include/asm-x86_64/mach-xen/asm/pgtable.h

index d451a1de21187f5aacb342e326749f86a7c105e4..037c0badc882979cce4f81d42e2344a2a14ff755 100644 (file)
@@ -77,9 +77,6 @@ NEXT_PAGE(level3_kernel_pgt)
 NEXT_PAGE(level3_user_pgt)
         .fill  512,8,0
 
-NEXT_PAGE(level2_kernel_pgt)
-       .fill   512,8,0
-
 NEXT_PAGE(hypercall_page)
        CFI_STARTPROC
        .rept 0x1000 / 0x20
index 5d8d94e1ce44bf3556ec0c060e1b5e14ddff5a73..1683ad172122607081fff32448ccfe575a83f6ac 100644 (file)
@@ -524,10 +524,12 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
                panic("Cannot find bootmem map of size %ld\n",bootmap_size);
        bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
 #ifdef CONFIG_XEN
-       e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
-#else
-       e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
+       if (xen_start_info->nr_pages < end_pfn)
+               e820_bootmem_free(NODE_DATA(0), 0,
+                                 xen_start_info->nr_pages<<PAGE_SHIFT);
+       else
 #endif
+       e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
        reserve_bootmem(bootmap, bootmap_size);
 } 
 #endif
index a977e43d87649e31070decd233abab1197f70eca..e0f78310b3736ad002ff412060d30546d5e04534 100644 (file)
@@ -527,8 +527,6 @@ void __init xen_init_pt(void)
        page = (unsigned long *)xen_start_info->pt_base;
        addr = page[pgd_index(__START_KERNEL_map)];
        addr_to_page(addr, page);
-       addr = page[pud_index(__START_KERNEL_map)];
-       addr_to_page(addr, page);
 
 #if CONFIG_XEN_COMPAT <= 0x030002
        /* On Xen 3.0.2 and older we may need to explicitly specify _PAGE_USER
@@ -539,7 +537,9 @@ void __init xen_init_pt(void)
 
                /* Mess with the initial mapping of page 0. It's not needed. */
                BUILD_BUG_ON(__START_KERNEL <= __START_KERNEL_map);
-               addr = page[pmd_index(__START_KERNEL_map)];
+               addr = page[pud_index(__START_KERNEL_map)];
+               addr_to_page(addr, pg);
+               addr = pg[pmd_index(__START_KERNEL_map)];
                addr_to_page(addr, pg);
                pte.pte = pg[pte_index(__START_KERNEL_map)];
                BUG_ON(!(pte.pte & _PAGE_PRESENT));
@@ -560,9 +560,10 @@ void __init xen_init_pt(void)
        /* Construct mapping of initial pte page in our own directories. */
        init_level4_pgt[pgd_index(__START_KERNEL_map)] = 
                __pgd(__pa_symbol(level3_kernel_pgt) | _PAGE_TABLE);
-       level3_kernel_pgt[pud_index(__START_KERNEL_map)] = 
-               __pud(__pa_symbol(level2_kernel_pgt) | _PAGE_TABLE);
-       memcpy(level2_kernel_pgt, page, PAGE_SIZE);
+       memcpy(level3_kernel_pgt + pud_index(__START_KERNEL_map),
+              page + pud_index(__START_KERNEL_map),
+              (PTRS_PER_PUD - pud_index(__START_KERNEL_map))
+              * sizeof(*level3_kernel_pgt));
 
        __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
                __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
@@ -575,8 +576,6 @@ void __init xen_init_pt(void)
                                 XENFEAT_writable_page_tables);
        early_make_page_readonly(level3_user_pgt,
                                 XENFEAT_writable_page_tables);
-       early_make_page_readonly(level2_kernel_pgt,
-                                XENFEAT_writable_page_tables);
 
        if (!xen_feature(XENFEAT_writable_page_tables)) {
                xen_pgd_pin(__pa_symbol(init_level4_pgt));
@@ -608,6 +607,23 @@ static void __init extend_init_mapping(unsigned long tables_space)
        while (va < (__START_KERNEL_map
                     + (start_pfn << PAGE_SHIFT)
                     + tables_space)) {
+               if (!(pmd_index(va) | pte_index(va))) {
+                       pud_t *pud;
+
+                       page = (unsigned long *)init_level4_pgt;
+                       addr = page[pgd_index(va)];
+                       addr_to_page(addr, page);
+                       pud = (pud_t *)&page[pud_index(va)];
+                       if (pud_none(*pud)) {
+                               page = alloc_static_page(&phys);
+                               early_make_page_readonly(
+                                       page, XENFEAT_writable_page_tables);
+                               set_pud(pud, __pud(phys | _KERNPG_TABLE));
+                       } else {
+                               addr = page[pud_index(va)];
+                               addr_to_page(addr, page);
+                       }
+               }
                pmd = (pmd_t *)&page[pmd_index(va)];
                if (pmd_none(*pmd)) {
                        pte_page = alloc_static_page(&phys);
@@ -630,6 +646,15 @@ static void __init extend_init_mapping(unsigned long tables_space)
 
        /* Finally, blow away any spurious initial mappings. */
        while (1) {
+               if (!(pmd_index(va) | pte_index(va))) {
+                       page = (unsigned long *)init_level4_pgt;
+                       addr = page[pgd_index(va)];
+                       addr_to_page(addr, page);
+                       if (pud_none(((pud_t *)page)[pud_index(va)]))
+                               break;
+                       addr = page[pud_index(va)];
+                       addr_to_page(addr, page);
+               }
                pmd = (pmd_t *)&page[pmd_index(va)];
                if (pmd_none(*pmd))
                        break;
index 8ea3863367e811dc9ae47304e56f2fa53e9c3f01..ea5fc556580ad0bf25242c2c6fb8a8d6453c7834 100644 (file)
@@ -137,11 +137,11 @@ static inline void pgd_clear (pgd_t * pgd)
 #define FIRST_USER_ADDRESS     0
 
 #ifndef __ASSEMBLY__
-#define MAXMEM          0x3fffffffffffUL
+#define MAXMEM          0x6fffffffffUL
 #define VMALLOC_START    0xffffc20000000000UL
 #define VMALLOC_END      0xffffe1ffffffffffUL
 #define MODULES_VADDR    0xffffffff88000000UL
-#define MODULES_END      0xfffffffffff00000UL
+#define MODULES_END      0xffffffffff000000UL
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 
 #define _PAGE_BIT_PRESENT      0