ia64/linux-2.6.18-xen.hg

annotate arch/x86_64/mm/init-xen.c @ 888:9090872bac23

xen/x86-64: fix phys_pmd_init() (regression from c/s 547)

I didn't pay attention to the fact that 'end' must always be an upper
bound, while xen_start_info->nr_pages must be additionally during
boot.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri May 29 09:17:16 2009 +0100 (2009-05-29)
parents e410857fd83c
children baeb818cd2dc
rev   line source
ian@26 1 /*
ian@26 2 * linux/arch/x86_64/mm/init.c
ian@26 3 *
ian@26 4 * Copyright (C) 1995 Linus Torvalds
ian@26 5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
ian@26 6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
ian@26 7 *
ian@26 8 * Jun Nakajima <jun.nakajima@intel.com>
ian@26 9 * Modified for Xen.
ian@26 10 */
ian@26 11
ian@26 12 #include <linux/signal.h>
ian@26 13 #include <linux/sched.h>
ian@26 14 #include <linux/kernel.h>
ian@26 15 #include <linux/errno.h>
ian@26 16 #include <linux/string.h>
ian@26 17 #include <linux/types.h>
ian@26 18 #include <linux/ptrace.h>
ian@26 19 #include <linux/mman.h>
ian@26 20 #include <linux/mm.h>
ian@26 21 #include <linux/swap.h>
ian@26 22 #include <linux/smp.h>
ian@26 23 #include <linux/init.h>
ian@26 24 #include <linux/pagemap.h>
ian@26 25 #include <linux/bootmem.h>
ian@26 26 #include <linux/proc_fs.h>
ian@26 27 #include <linux/pci.h>
ian@26 28 #include <linux/poison.h>
ian@26 29 #include <linux/dma-mapping.h>
ian@26 30 #include <linux/module.h>
ian@26 31 #include <linux/memory_hotplug.h>
ian@26 32
ian@26 33 #include <asm/processor.h>
ian@26 34 #include <asm/system.h>
ian@26 35 #include <asm/uaccess.h>
ian@26 36 #include <asm/pgtable.h>
ian@26 37 #include <asm/pgalloc.h>
ian@26 38 #include <asm/dma.h>
ian@26 39 #include <asm/fixmap.h>
ian@26 40 #include <asm/e820.h>
ian@26 41 #include <asm/apic.h>
ian@26 42 #include <asm/tlb.h>
ian@26 43 #include <asm/mmu_context.h>
ian@26 44 #include <asm/proto.h>
ian@26 45 #include <asm/smp.h>
ian@26 46 #include <asm/sections.h>
ian@26 47
ian@26 48 #include <xen/features.h>
ian@26 49
ian@26 50 #ifndef Dprintk
ian@26 51 #define Dprintk(x...)
ian@26 52 #endif
ian@26 53
ian@26 54 struct dma_mapping_ops* dma_ops;
ian@26 55 EXPORT_SYMBOL(dma_ops);
ian@26 56
ian@26 57 #if CONFIG_XEN_COMPAT <= 0x030002
ian@26 58 unsigned int __kernel_page_user;
ian@26 59 EXPORT_SYMBOL(__kernel_page_user);
ian@26 60 #endif
ian@26 61
keir@554 62 int after_bootmem;
keir@554 63
ian@26 64 static unsigned long dma_reserve __initdata;
ian@26 65
ian@26 66 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
ian@26 67 extern unsigned long start_pfn;
ian@26 68
ian@26 69 /*
ian@26 70 * Use this until direct mapping is established, i.e. before __va() is
ian@26 71 * available in init_memory_mapping().
ian@26 72 */
ian@26 73
ian@26 74 #define addr_to_page(addr, page) \
ian@26 75 (addr) &= PHYSICAL_PAGE_MASK; \
ian@26 76 (page) = ((unsigned long *) ((unsigned long) \
ian@26 77 (((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + \
ian@26 78 __START_KERNEL_map)))
ian@26 79
ian@26 80 static void __meminit early_make_page_readonly(void *va, unsigned int feature)
ian@26 81 {
ian@26 82 unsigned long addr, _va = (unsigned long)va;
ian@26 83 pte_t pte, *ptep;
ian@26 84 unsigned long *page = (unsigned long *) init_level4_pgt;
ian@26 85
keir@554 86 BUG_ON(after_bootmem);
keir@554 87
ian@26 88 if (xen_feature(feature))
ian@26 89 return;
ian@26 90
ian@26 91 addr = (unsigned long) page[pgd_index(_va)];
ian@26 92 addr_to_page(addr, page);
ian@26 93
ian@26 94 addr = page[pud_index(_va)];
ian@26 95 addr_to_page(addr, page);
ian@26 96
ian@26 97 addr = page[pmd_index(_va)];
ian@26 98 addr_to_page(addr, page);
ian@26 99
ian@26 100 ptep = (pte_t *) &page[pte_index(_va)];
ian@26 101
ian@26 102 pte.pte = ptep->pte & ~_PAGE_RW;
ian@26 103 if (HYPERVISOR_update_va_mapping(_va, pte, 0))
ian@26 104 BUG();
ian@26 105 }
ian@26 106
ian@26 107 static void __make_page_readonly(void *va)
ian@26 108 {
ian@26 109 pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
ian@26 110 unsigned long addr = (unsigned long) va;
ian@26 111
ian@26 112 pgd = pgd_offset_k(addr);
ian@26 113 pud = pud_offset(pgd, addr);
ian@26 114 pmd = pmd_offset(pud, addr);
ian@26 115 ptep = pte_offset_kernel(pmd, addr);
ian@26 116
ian@26 117 pte.pte = ptep->pte & ~_PAGE_RW;
ian@26 118 if (HYPERVISOR_update_va_mapping(addr, pte, 0))
ian@26 119 xen_l1_entry_update(ptep, pte); /* fallback */
ian@26 120
ian@26 121 if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
ian@26 122 __make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT));
ian@26 123 }
ian@26 124
ian@26 125 static void __make_page_writable(void *va)
ian@26 126 {
ian@26 127 pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
ian@26 128 unsigned long addr = (unsigned long) va;
ian@26 129
ian@26 130 pgd = pgd_offset_k(addr);
ian@26 131 pud = pud_offset(pgd, addr);
ian@26 132 pmd = pmd_offset(pud, addr);
ian@26 133 ptep = pte_offset_kernel(pmd, addr);
ian@26 134
ian@26 135 pte.pte = ptep->pte | _PAGE_RW;
ian@26 136 if (HYPERVISOR_update_va_mapping(addr, pte, 0))
ian@26 137 xen_l1_entry_update(ptep, pte); /* fallback */
ian@26 138
ian@26 139 if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
ian@26 140 __make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT));
ian@26 141 }
ian@26 142
ian@26 143 void make_page_readonly(void *va, unsigned int feature)
ian@26 144 {
ian@26 145 if (!xen_feature(feature))
ian@26 146 __make_page_readonly(va);
ian@26 147 }
ian@26 148
ian@26 149 void make_page_writable(void *va, unsigned int feature)
ian@26 150 {
ian@26 151 if (!xen_feature(feature))
ian@26 152 __make_page_writable(va);
ian@26 153 }
ian@26 154
ian@26 155 void make_pages_readonly(void *va, unsigned nr, unsigned int feature)
ian@26 156 {
ian@26 157 if (xen_feature(feature))
ian@26 158 return;
ian@26 159
ian@26 160 while (nr-- != 0) {
ian@26 161 __make_page_readonly(va);
ian@26 162 va = (void*)((unsigned long)va + PAGE_SIZE);
ian@26 163 }
ian@26 164 }
ian@26 165
ian@26 166 void make_pages_writable(void *va, unsigned nr, unsigned int feature)
ian@26 167 {
ian@26 168 if (xen_feature(feature))
ian@26 169 return;
ian@26 170
ian@26 171 while (nr-- != 0) {
ian@26 172 __make_page_writable(va);
ian@26 173 va = (void*)((unsigned long)va + PAGE_SIZE);
ian@26 174 }
ian@26 175 }
ian@26 176
ian@26 177 /*
ian@26 178 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
ian@26 179 * physical space so we can cache the place of the first one and move
ian@26 180 * around without checking the pgd every time.
ian@26 181 */
ian@26 182
ian@26 183 void show_mem(void)
ian@26 184 {
ian@26 185 long i, total = 0, reserved = 0;
ian@26 186 long shared = 0, cached = 0;
ian@26 187 pg_data_t *pgdat;
ian@26 188 struct page *page;
ian@26 189
ian@26 190 printk(KERN_INFO "Mem-info:\n");
ian@26 191 show_free_areas();
ian@26 192 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
ian@26 193
ian@26 194 for_each_online_pgdat(pgdat) {
ian@26 195 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
ian@26 196 page = pfn_to_page(pgdat->node_start_pfn + i);
ian@26 197 total++;
ian@26 198 if (PageReserved(page))
ian@26 199 reserved++;
ian@26 200 else if (PageSwapCache(page))
ian@26 201 cached++;
ian@26 202 else if (page_count(page))
ian@26 203 shared += page_count(page) - 1;
ian@26 204 }
ian@26 205 }
ian@26 206 printk(KERN_INFO "%lu pages of RAM\n", total);
ian@26 207 printk(KERN_INFO "%lu reserved pages\n",reserved);
ian@26 208 printk(KERN_INFO "%lu pages shared\n",shared);
ian@26 209 printk(KERN_INFO "%lu pages swap cached\n",cached);
ian@26 210 }
ian@26 211
ian@26 212
ian@26 213 static __init void *spp_getpage(void)
ian@26 214 {
ian@26 215 void *ptr;
ian@26 216 if (after_bootmem)
ian@26 217 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
ian@26 218 else if (start_pfn < table_end) {
ian@26 219 ptr = __va(start_pfn << PAGE_SHIFT);
ian@26 220 start_pfn++;
ian@26 221 memset(ptr, 0, PAGE_SIZE);
ian@26 222 } else
ian@26 223 ptr = alloc_bootmem_pages(PAGE_SIZE);
ian@26 224 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
ian@26 225 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
ian@26 226
ian@26 227 Dprintk("spp_getpage %p\n", ptr);
ian@26 228 return ptr;
ian@26 229 }
ian@26 230
kfraser@53 231 #define pgd_offset_u(address) (__user_pgd(init_level4_pgt) + pgd_index(address))
kfraser@53 232 #define pud_offset_u(address) (level3_user_pgt + pud_index(address))
ian@26 233
ian@26 234 static __init void set_pte_phys(unsigned long vaddr,
ian@26 235 unsigned long phys, pgprot_t prot, int user_mode)
ian@26 236 {
ian@26 237 pgd_t *pgd;
ian@26 238 pud_t *pud;
ian@26 239 pmd_t *pmd;
ian@26 240 pte_t *pte, new_pte;
ian@26 241
ian@26 242 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
ian@26 243
ian@26 244 pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
ian@26 245 if (pgd_none(*pgd)) {
ian@26 246 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
ian@26 247 return;
ian@26 248 }
ian@26 249 pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
ian@26 250 if (pud_none(*pud)) {
ian@26 251 pmd = (pmd_t *) spp_getpage();
ian@26 252 make_page_readonly(pmd, XENFEAT_writable_page_tables);
ian@26 253 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
ian@26 254 if (pmd != pmd_offset(pud, 0)) {
ian@26 255 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
ian@26 256 return;
ian@26 257 }
ian@26 258 }
ian@26 259 pmd = pmd_offset(pud, vaddr);
ian@26 260 if (pmd_none(*pmd)) {
ian@26 261 pte = (pte_t *) spp_getpage();
ian@26 262 make_page_readonly(pte, XENFEAT_writable_page_tables);
ian@26 263 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
ian@26 264 if (pte != pte_offset_kernel(pmd, 0)) {
ian@26 265 printk("PAGETABLE BUG #02!\n");
ian@26 266 return;
ian@26 267 }
ian@26 268 }
ian@26 269 if (pgprot_val(prot))
ian@26 270 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
ian@26 271 else
ian@26 272 new_pte = __pte(0);
ian@26 273
ian@26 274 pte = pte_offset_kernel(pmd, vaddr);
keir@700 275 if (!pte_none(*pte) && __pte_val(new_pte) &&
keir@283 276 __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask))
ian@26 277 pte_ERROR(*pte);
ian@26 278 set_pte(pte, new_pte);
ian@26 279
ian@26 280 /*
ian@26 281 * It's enough to flush this one mapping.
ian@26 282 * (PGE mappings get flushed as well)
ian@26 283 */
ian@26 284 __flush_tlb_one(vaddr);
ian@26 285 }
ian@26 286
ian@26 287 static __init void set_pte_phys_ma(unsigned long vaddr,
ian@26 288 unsigned long phys, pgprot_t prot)
ian@26 289 {
ian@26 290 pgd_t *pgd;
ian@26 291 pud_t *pud;
ian@26 292 pmd_t *pmd;
ian@26 293 pte_t *pte, new_pte;
ian@26 294
ian@26 295 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
ian@26 296
ian@26 297 pgd = pgd_offset_k(vaddr);
ian@26 298 if (pgd_none(*pgd)) {
ian@26 299 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
ian@26 300 return;
ian@26 301 }
ian@26 302 pud = pud_offset(pgd, vaddr);
ian@26 303 if (pud_none(*pud)) {
ian@26 304
ian@26 305 pmd = (pmd_t *) spp_getpage();
ian@26 306 make_page_readonly(pmd, XENFEAT_writable_page_tables);
ian@26 307 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
ian@26 308 if (pmd != pmd_offset(pud, 0)) {
ian@26 309 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
ian@26 310 return;
ian@26 311 }
ian@26 312 }
ian@26 313 pmd = pmd_offset(pud, vaddr);
ian@26 314 if (pmd_none(*pmd)) {
ian@26 315 pte = (pte_t *) spp_getpage();
ian@26 316 make_page_readonly(pte, XENFEAT_writable_page_tables);
ian@26 317 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
ian@26 318 if (pte != pte_offset_kernel(pmd, 0)) {
ian@26 319 printk("PAGETABLE BUG #02!\n");
ian@26 320 return;
ian@26 321 }
ian@26 322 }
ian@26 323 new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
ian@26 324
ian@26 325 pte = pte_offset_kernel(pmd, vaddr);
keir@700 326 if (!pte_none(*pte) && __pte_val(new_pte) &&
keir@623 327 #ifdef CONFIG_ACPI
keir@623 328 /* __acpi_map_table() fails to properly call clear_fixmap() */
keir@623 329 (vaddr < __fix_to_virt(FIX_ACPI_END) ||
keir@623 330 vaddr > __fix_to_virt(FIX_ACPI_BEGIN)) &&
keir@623 331 #endif
keir@623 332 __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask))
keir@623 333 pte_ERROR(*pte);
ian@26 334 set_pte(pte, new_pte);
ian@26 335
ian@26 336 /*
ian@26 337 * It's enough to flush this one mapping.
ian@26 338 * (PGE mappings get flushed as well)
ian@26 339 */
ian@26 340 __flush_tlb_one(vaddr);
ian@26 341 }
ian@26 342
ian@26 343 /* NOTE: this is meant to be run only at boot */
ian@26 344 void __init
ian@26 345 __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
ian@26 346 {
ian@26 347 unsigned long address = __fix_to_virt(idx);
ian@26 348
ian@26 349 if (idx >= __end_of_fixed_addresses) {
ian@26 350 printk("Invalid __set_fixmap\n");
ian@26 351 return;
ian@26 352 }
ian@26 353 switch (idx) {
ian@26 354 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
kfraser@53 355 set_pte_phys(address, phys, prot, 0);
kfraser@53 356 set_pte_phys(address, phys, prot, 1);
ian@26 357 break;
ian@26 358 default:
ian@26 359 set_pte_phys_ma(address, phys, prot);
ian@26 360 break;
ian@26 361 }
ian@26 362 }
ian@26 363
ian@26 364 unsigned long __initdata table_start, table_end;
ian@26 365
ian@26 366 static __meminit void *alloc_static_page(unsigned long *phys)
ian@26 367 {
ian@26 368 unsigned long va = (start_pfn << PAGE_SHIFT) + __START_KERNEL_map;
ian@26 369
ian@26 370 if (after_bootmem) {
ian@26 371 void *adr = (void *)get_zeroed_page(GFP_ATOMIC);
ian@26 372
ian@26 373 *phys = __pa(adr);
ian@26 374 return adr;
ian@26 375 }
ian@26 376
ian@26 377 *phys = start_pfn << PAGE_SHIFT;
ian@26 378 start_pfn++;
ian@26 379 memset((void *)va, 0, PAGE_SIZE);
ian@26 380 return (void *)va;
ian@26 381 }
ian@26 382
ian@26 383 #define PTE_SIZE PAGE_SIZE
ian@26 384
ian@26 385 static inline int make_readonly(unsigned long paddr)
ian@26 386 {
ian@26 387 extern char __vsyscall_0;
ian@26 388 int readonly = 0;
ian@26 389
ian@26 390 /* Make new page tables read-only. */
ian@26 391 if (!xen_feature(XENFEAT_writable_page_tables)
ian@26 392 && (paddr >= (table_start << PAGE_SHIFT))
ian@26 393 && (paddr < (table_end << PAGE_SHIFT)))
ian@26 394 readonly = 1;
ian@26 395 /* Make old page tables read-only. */
ian@26 396 if (!xen_feature(XENFEAT_writable_page_tables)
ian@26 397 && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
ian@26 398 && (paddr < (start_pfn << PAGE_SHIFT)))
ian@26 399 readonly = 1;
ian@26 400
ian@26 401 /*
ian@26 402 * No need for writable mapping of kernel image. This also ensures that
ian@26 403 * page and descriptor tables embedded inside don't have writable
ian@26 404 * mappings. Exclude the vsyscall area here, allowing alternative
ian@26 405 * instruction patching to work.
ian@26 406 */
ian@26 407 if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end))
ian@26 408 && !(paddr >= __pa_symbol(&__vsyscall_0)
ian@26 409 && paddr < __pa_symbol(&__vsyscall_0) + PAGE_SIZE))
ian@26 410 readonly = 1;
ian@26 411
ian@26 412 return readonly;
ian@26 413 }
ian@26 414
ian@26 415 #ifndef CONFIG_XEN
ian@26 416 /* Must run before zap_low_mappings */
ian@26 417 __init void *early_ioremap(unsigned long addr, unsigned long size)
ian@26 418 {
ian@26 419 unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
ian@26 420
ian@26 421 /* actually usually some more */
ian@26 422 if (size >= LARGE_PAGE_SIZE) {
ian@26 423 printk("SMBIOS area too long %lu\n", size);
ian@26 424 return NULL;
ian@26 425 }
ian@26 426 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
ian@26 427 map += LARGE_PAGE_SIZE;
ian@26 428 set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
ian@26 429 __flush_tlb();
ian@26 430 return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
ian@26 431 }
ian@26 432
ian@26 433 /* To avoid virtual aliases later */
ian@26 434 __init void early_iounmap(void *addr, unsigned long size)
ian@26 435 {
ian@26 436 if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
ian@26 437 printk("early_iounmap: bad address %p\n", addr);
ian@26 438 set_pmd(temp_mappings[0].pmd, __pmd(0));
ian@26 439 set_pmd(temp_mappings[1].pmd, __pmd(0));
ian@26 440 __flush_tlb();
ian@26 441 }
ian@26 442 #endif
ian@26 443
ian@26 444 static void __meminit
ian@26 445 phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
ian@26 446 {
ian@26 447 int i, k;
ian@26 448
ian@26 449 for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
ian@26 450 unsigned long pte_phys;
ian@26 451 pte_t *pte, *pte_save;
ian@26 452
keir@554 453 if (address >= end)
ian@26 454 break;
ian@26 455 pte = alloc_static_page(&pte_phys);
ian@26 456 pte_save = pte;
ian@26 457 for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
ian@26 458 unsigned long pteval = address | _PAGE_NX | _KERNPG_TABLE;
ian@26 459
keir@888 460 if (address >= end ||
keir@888 461 (!after_bootmem &&
keir@888 462 (address >> PAGE_SHIFT) >= xen_start_info->nr_pages))
ian@26 463 pteval = 0;
ian@26 464 else if (make_readonly(address))
ian@26 465 pteval &= ~_PAGE_RW;
keir@554 466 set_pte(pte, __pte(pteval & __supported_pte_mask));
ian@26 467 }
keir@554 468 if (!after_bootmem) {
keir@554 469 early_make_page_readonly(pte_save, XENFEAT_writable_page_tables);
keir@554 470 *pmd = __pmd(pte_phys | _KERNPG_TABLE);
keir@554 471 } else {
keir@554 472 make_page_readonly(pte_save, XENFEAT_writable_page_tables);
keir@554 473 set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
keir@554 474 }
ian@26 475 }
ian@26 476 }
ian@26 477
ian@26 478 static void __meminit
ian@26 479 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
ian@26 480 {
ian@26 481 pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
ian@26 482
ian@26 483 if (pmd_none(*pmd)) {
ian@26 484 spin_lock(&init_mm.page_table_lock);
ian@26 485 phys_pmd_init(pmd, address, end);
ian@26 486 spin_unlock(&init_mm.page_table_lock);
ian@26 487 __flush_tlb_all();
ian@26 488 }
ian@26 489 }
ian@26 490
ian@26 491 static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
ian@26 492 {
ian@26 493 long i = pud_index(address);
ian@26 494
ian@26 495 pud = pud + i;
ian@26 496
ian@26 497 if (after_bootmem && pud_val(*pud)) {
ian@26 498 phys_pmd_update(pud, address, end);
ian@26 499 return;
ian@26 500 }
ian@26 501
ian@26 502 for (; i < PTRS_PER_PUD; pud++, i++) {
ian@26 503 unsigned long paddr, pmd_phys;
ian@26 504 pmd_t *pmd;
ian@26 505
ian@26 506 paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
ian@26 507 if (paddr >= end)
ian@26 508 break;
ian@26 509
ian@26 510 pmd = alloc_static_page(&pmd_phys);
keir@554 511
ian@26 512 spin_lock(&init_mm.page_table_lock);
keir@554 513 *pud = __pud(pmd_phys | _KERNPG_TABLE);
ian@26 514 phys_pmd_init(pmd, paddr, end);
ian@26 515 spin_unlock(&init_mm.page_table_lock);
keir@554 516
keir@554 517 early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
ian@26 518 }
ian@26 519 __flush_tlb();
ian@26 520 }
ian@26 521
ian@26 522 void __init xen_init_pt(void)
ian@26 523 {
ian@26 524 unsigned long addr, *page;
ian@26 525
ian@26 526 /* Find the initial pte page that was built for us. */
ian@26 527 page = (unsigned long *)xen_start_info->pt_base;
ian@26 528 addr = page[pgd_index(__START_KERNEL_map)];
ian@26 529 addr_to_page(addr, page);
ian@26 530 addr = page[pud_index(__START_KERNEL_map)];
ian@26 531 addr_to_page(addr, page);
ian@26 532
ian@26 533 #if CONFIG_XEN_COMPAT <= 0x030002
ian@26 534 /* On Xen 3.0.2 and older we may need to explicitly specify _PAGE_USER
ian@26 535 in kernel PTEs. We check that here. */
ian@26 536 if (HYPERVISOR_xen_version(XENVER_version, NULL) <= 0x30000) {
ian@26 537 unsigned long *pg;
ian@26 538 pte_t pte;
ian@26 539
ian@26 540 /* Mess with the initial mapping of page 0. It's not needed. */
ian@26 541 BUILD_BUG_ON(__START_KERNEL <= __START_KERNEL_map);
ian@26 542 addr = page[pmd_index(__START_KERNEL_map)];
ian@26 543 addr_to_page(addr, pg);
ian@26 544 pte.pte = pg[pte_index(__START_KERNEL_map)];
ian@26 545 BUG_ON(!(pte.pte & _PAGE_PRESENT));
ian@26 546
ian@26 547 /* If _PAGE_USER isn't set, we obviously do not need it. */
ian@26 548 if (pte.pte & _PAGE_USER) {
ian@26 549 /* _PAGE_USER is needed, but is it set implicitly? */
ian@26 550 pte.pte &= ~_PAGE_USER;
ian@26 551 if ((HYPERVISOR_update_va_mapping(__START_KERNEL_map,
ian@26 552 pte, 0) != 0) ||
ian@26 553 !(pg[pte_index(__START_KERNEL_map)] & _PAGE_USER))
ian@26 554 /* We need to explicitly specify _PAGE_USER. */
ian@26 555 __kernel_page_user = _PAGE_USER;
ian@26 556 }
ian@26 557 }
ian@26 558 #endif
ian@26 559
ian@26 560 /* Construct mapping of initial pte page in our own directories. */
ian@26 561 init_level4_pgt[pgd_index(__START_KERNEL_map)] =
ian@26 562 __pgd(__pa_symbol(level3_kernel_pgt) | _PAGE_TABLE);
ian@26 563 level3_kernel_pgt[pud_index(__START_KERNEL_map)] =
kfraser@53 564 __pud(__pa_symbol(level2_kernel_pgt) | _PAGE_TABLE);
kfraser@53 565 memcpy(level2_kernel_pgt, page, PAGE_SIZE);
kfraser@53 566
kfraser@53 567 __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
kfraser@53 568 __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
ian@26 569
ian@26 570 early_make_page_readonly(init_level4_pgt,
ian@26 571 XENFEAT_writable_page_tables);
kfraser@53 572 early_make_page_readonly(__user_pgd(init_level4_pgt),
ian@26 573 XENFEAT_writable_page_tables);
ian@26 574 early_make_page_readonly(level3_kernel_pgt,
ian@26 575 XENFEAT_writable_page_tables);
ian@26 576 early_make_page_readonly(level3_user_pgt,
ian@26 577 XENFEAT_writable_page_tables);
ian@26 578 early_make_page_readonly(level2_kernel_pgt,
ian@26 579 XENFEAT_writable_page_tables);
ian@26 580
ian@26 581 if (!xen_feature(XENFEAT_writable_page_tables)) {
ian@26 582 xen_pgd_pin(__pa_symbol(init_level4_pgt));
kfraser@53 583 xen_pgd_pin(__pa_symbol(__user_pgd(init_level4_pgt)));
ian@26 584 }
ian@26 585 }
ian@26 586
ian@26 587 static void __init extend_init_mapping(unsigned long tables_space)
ian@26 588 {
ian@26 589 unsigned long va = __START_KERNEL_map;
ian@26 590 unsigned long phys, addr, *pte_page;
ian@26 591 pmd_t *pmd;
ian@26 592 pte_t *pte, new_pte;
ian@26 593 unsigned long *page = (unsigned long *)init_level4_pgt;
ian@26 594
ian@26 595 addr = page[pgd_index(va)];
ian@26 596 addr_to_page(addr, page);
ian@26 597 addr = page[pud_index(va)];
ian@26 598 addr_to_page(addr, page);
ian@26 599
ian@26 600 /* Kill mapping of low 1MB. */
ian@26 601 while (va < (unsigned long)&_text) {
keir@394 602 if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
keir@394 603 BUG();
ian@26 604 va += PAGE_SIZE;
ian@26 605 }
ian@26 606
ian@26 607 /* Ensure init mappings cover kernel text/data and initial tables. */
ian@26 608 while (va < (__START_KERNEL_map
ian@26 609 + (start_pfn << PAGE_SHIFT)
ian@26 610 + tables_space)) {
ian@26 611 pmd = (pmd_t *)&page[pmd_index(va)];
ian@26 612 if (pmd_none(*pmd)) {
ian@26 613 pte_page = alloc_static_page(&phys);
ian@26 614 early_make_page_readonly(
ian@26 615 pte_page, XENFEAT_writable_page_tables);
ian@26 616 set_pmd(pmd, __pmd(phys | _KERNPG_TABLE));
ian@26 617 } else {
ian@26 618 addr = page[pmd_index(va)];
ian@26 619 addr_to_page(addr, pte_page);
ian@26 620 }
ian@26 621 pte = (pte_t *)&pte_page[pte_index(va)];
ian@26 622 if (pte_none(*pte)) {
ian@26 623 new_pte = pfn_pte(
ian@26 624 (va - __START_KERNEL_map) >> PAGE_SHIFT,
ian@26 625 __pgprot(_KERNPG_TABLE));
ian@26 626 xen_l1_entry_update(pte, new_pte);
ian@26 627 }
ian@26 628 va += PAGE_SIZE;
ian@26 629 }
ian@26 630
ian@26 631 /* Finally, blow away any spurious initial mappings. */
ian@26 632 while (1) {
ian@26 633 pmd = (pmd_t *)&page[pmd_index(va)];
ian@26 634 if (pmd_none(*pmd))
ian@26 635 break;
keir@394 636 if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
keir@394 637 BUG();
ian@26 638 va += PAGE_SIZE;
ian@26 639 }
ian@26 640 }
ian@26 641
ian@26 642 static void __init find_early_table_space(unsigned long end)
ian@26 643 {
ian@26 644 unsigned long puds, pmds, ptes, tables;
ian@26 645
ian@26 646 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
ian@26 647 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
ian@26 648 ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
ian@26 649
ian@26 650 tables = round_up(puds * 8, PAGE_SIZE) +
ian@26 651 round_up(pmds * 8, PAGE_SIZE) +
ian@26 652 round_up(ptes * 8, PAGE_SIZE);
ian@26 653
ian@26 654 extend_init_mapping(tables);
ian@26 655
ian@26 656 table_start = start_pfn;
ian@26 657 table_end = table_start + (tables>>PAGE_SHIFT);
ian@26 658
ian@26 659 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
ian@26 660 end, table_start << PAGE_SHIFT,
ian@26 661 (table_start << PAGE_SHIFT) + tables);
ian@26 662 }
ian@26 663
ian@26 664 static void xen_finish_init_mapping(void)
ian@26 665 {
ian@26 666 unsigned long i, start, end;
ian@26 667
ian@26 668 /* Re-vector virtual addresses pointing into the initial
ian@26 669 mapping to the just-established permanent ones. */
ian@26 670 xen_start_info = __va(__pa(xen_start_info));
ian@26 671 xen_start_info->pt_base = (unsigned long)
ian@26 672 __va(__pa(xen_start_info->pt_base));
ian@26 673 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
ian@26 674 phys_to_machine_mapping =
ian@26 675 __va(__pa(xen_start_info->mfn_list));
ian@26 676 xen_start_info->mfn_list = (unsigned long)
ian@26 677 phys_to_machine_mapping;
ian@26 678 }
ian@26 679 if (xen_start_info->mod_start)
ian@26 680 xen_start_info->mod_start = (unsigned long)
ian@26 681 __va(__pa(xen_start_info->mod_start));
ian@26 682
ian@26 683 /* Destroy the Xen-created mappings beyond the kernel image as
ian@26 684 * well as the temporary mappings created above. Prevents
ian@26 685 * overlap with modules area (if init mapping is very big).
ian@26 686 */
ian@26 687 start = PAGE_ALIGN((unsigned long)_end);
ian@26 688 end = __START_KERNEL_map + (table_end << PAGE_SHIFT);
ian@26 689 for (; start < end; start += PAGE_SIZE)
keir@394 690 if (HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0))
keir@394 691 BUG();
ian@26 692
ian@26 693 /* Allocate pte's for initial fixmaps from 'start_pfn' allocator. */
ian@26 694 table_end = ~0UL;
ian@26 695
ian@26 696 /*
ian@26 697 * Prefetch pte's for the bt_ioremap() area. It gets used before the
ian@26 698 * boot-time allocator is online, so allocate-on-demand would fail.
ian@26 699 */
ian@26 700 for (i = FIX_BTMAP_END; i <= FIX_BTMAP_BEGIN; i++)
ian@26 701 __set_fixmap(i, 0, __pgprot(0));
ian@26 702
ian@26 703 /* Switch to the real shared_info page, and clear the dummy page. */
ian@26 704 set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
ian@26 705 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
ian@26 706 memset(empty_zero_page, 0, sizeof(empty_zero_page));
ian@26 707
ian@26 708 /* Set up mapping of lowest 1MB of physical memory. */
ian@26 709 for (i = 0; i < NR_FIX_ISAMAPS; i++)
ian@26 710 if (is_initial_xendomain())
ian@26 711 set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
ian@26 712 else
ian@26 713 __set_fixmap(FIX_ISAMAP_BEGIN - i,
ian@26 714 virt_to_mfn(empty_zero_page)
ian@26 715 << PAGE_SHIFT,
ian@26 716 PAGE_KERNEL_RO);
ian@26 717
ian@26 718 /* Disable the 'start_pfn' allocator. */
ian@26 719 table_end = start_pfn;
ian@26 720 }
ian@26 721
ian@26 722 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
ian@26 723 This runs before bootmem is initialized and gets pages directly from the
ian@26 724 physical memory. To access them they are temporarily mapped. */
ian@26 725 void __meminit init_memory_mapping(unsigned long start, unsigned long end)
ian@26 726 {
ian@26 727 unsigned long next;
ian@26 728
ian@26 729 Dprintk("init_memory_mapping\n");
ian@26 730
ian@26 731 /*
ian@26 732 * Find space for the kernel direct mapping tables.
ian@26 733 * Later we should allocate these tables in the local node of the memory
ian@26 734 * mapped. Unfortunately this is done currently before the nodes are
ian@26 735 * discovered.
ian@26 736 */
ian@26 737 if (!after_bootmem)
ian@26 738 find_early_table_space(end);
ian@26 739
ian@26 740 start = (unsigned long)__va(start);
ian@26 741 end = (unsigned long)__va(end);
ian@26 742
ian@26 743 for (; start < end; start = next) {
ian@26 744 unsigned long pud_phys;
ian@26 745 pgd_t *pgd = pgd_offset_k(start);
ian@26 746 pud_t *pud;
ian@26 747
keir@554 748 if (after_bootmem)
ian@26 749 pud = pud_offset(pgd, start & PGDIR_MASK);
keir@554 750 else
ian@26 751 pud = alloc_static_page(&pud_phys);
ian@26 752 next = start + PGDIR_SIZE;
ian@26 753 if (next > end)
ian@26 754 next = end;
ian@26 755 phys_pud_init(pud, __pa(start), __pa(next));
keir@554 756 if (!after_bootmem) {
keir@554 757 early_make_page_readonly(pud, XENFEAT_writable_page_tables);
ian@26 758 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
keir@554 759 }
ian@26 760 }
ian@26 761
ian@26 762 if (!after_bootmem) {
ian@26 763 BUG_ON(start_pfn != table_end);
ian@26 764 xen_finish_init_mapping();
ian@26 765 }
ian@26 766
ian@26 767 __flush_tlb_all();
ian@26 768 }
ian@26 769
ian@26 770 void __cpuinit zap_low_mappings(int cpu)
ian@26 771 {
ian@26 772 /* this is not required for Xen */
ian@26 773 #if 0
ian@26 774 swap_low_mappings();
ian@26 775 #endif
ian@26 776 }
ian@26 777
ian@26 778 /* Compute zone sizes for the DMA and DMA32 zones in a node. */
ian@26 779 __init void
ian@26 780 size_zones(unsigned long *z, unsigned long *h,
ian@26 781 unsigned long start_pfn, unsigned long end_pfn)
ian@26 782 {
ian@26 783 int i;
ian@26 784 unsigned long w;
ian@26 785
ian@26 786 for (i = 0; i < MAX_NR_ZONES; i++)
ian@26 787 z[i] = 0;
ian@26 788
ian@26 789 if (start_pfn < MAX_DMA_PFN)
ian@26 790 z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
ian@26 791 if (start_pfn < MAX_DMA32_PFN) {
ian@26 792 unsigned long dma32_pfn = MAX_DMA32_PFN;
ian@26 793 if (dma32_pfn > end_pfn)
ian@26 794 dma32_pfn = end_pfn;
ian@26 795 z[ZONE_DMA32] = dma32_pfn - start_pfn;
ian@26 796 }
ian@26 797 z[ZONE_NORMAL] = end_pfn - start_pfn;
ian@26 798
ian@26 799 /* Remove lower zones from higher ones. */
ian@26 800 w = 0;
ian@26 801 for (i = 0; i < MAX_NR_ZONES; i++) {
ian@26 802 if (z[i])
ian@26 803 z[i] -= w;
ian@26 804 w += z[i];
ian@26 805 }
ian@26 806
ian@26 807 /* Compute holes */
ian@26 808 w = start_pfn;
ian@26 809 for (i = 0; i < MAX_NR_ZONES; i++) {
ian@26 810 unsigned long s = w;
ian@26 811 w += z[i];
ian@26 812 h[i] = e820_hole_size(s, w);
ian@26 813 }
ian@26 814
ian@26 815 /* Add the space pace needed for mem_map to the holes too. */
ian@26 816 for (i = 0; i < MAX_NR_ZONES; i++)
ian@26 817 h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
ian@26 818
ian@26 819 /* The 16MB DMA zone has the kernel and other misc mappings.
ian@26 820 Account them too */
ian@26 821 if (h[ZONE_DMA]) {
ian@26 822 h[ZONE_DMA] += dma_reserve;
ian@26 823 if (h[ZONE_DMA] >= z[ZONE_DMA]) {
ian@26 824 printk(KERN_WARNING
ian@26 825 "Kernel too large and filling up ZONE_DMA?\n");
ian@26 826 h[ZONE_DMA] = z[ZONE_DMA];
ian@26 827 }
ian@26 828 }
ian@26 829 }
ian@26 830
ian@26 831 #ifndef CONFIG_NUMA
ian@26 832 void __init paging_init(void)
ian@26 833 {
ian@26 834 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
ian@26 835
ian@26 836 memory_present(0, 0, end_pfn);
ian@26 837 sparse_init();
ian@26 838 size_zones(zones, holes, 0, end_pfn);
ian@26 839 free_area_init_node(0, NODE_DATA(0), zones,
ian@26 840 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
ian@26 841
ian@26 842 init_mm.context.pinned = 1;
ian@26 843 }
ian@26 844 #endif
ian@26 845
ian@26 846 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
ian@26 847 from the CPU leading to inconsistent cache lines. address and size
ian@26 848 must be aligned to 2MB boundaries.
ian@26 849 Does nothing when the mapping doesn't exist. */
ian@26 850 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
ian@26 851 {
ian@26 852 unsigned long end = address + size;
ian@26 853
ian@26 854 BUG_ON(address & ~LARGE_PAGE_MASK);
ian@26 855 BUG_ON(size & ~LARGE_PAGE_MASK);
ian@26 856
ian@26 857 for (; address < end; address += LARGE_PAGE_SIZE) {
ian@26 858 pgd_t *pgd = pgd_offset_k(address);
ian@26 859 pud_t *pud;
ian@26 860 pmd_t *pmd;
ian@26 861 if (pgd_none(*pgd))
ian@26 862 continue;
ian@26 863 pud = pud_offset(pgd, address);
ian@26 864 if (pud_none(*pud))
ian@26 865 continue;
ian@26 866 pmd = pmd_offset(pud, address);
ian@26 867 if (!pmd || pmd_none(*pmd))
ian@26 868 continue;
keir@283 869 if (0 == (__pmd_val(*pmd) & _PAGE_PSE)) {
ian@26 870 /* Could handle this, but it should not happen currently. */
ian@26 871 printk(KERN_ERR
ian@26 872 "clear_kernel_mapping: mapping has been split. will leak memory\n");
ian@26 873 pmd_ERROR(*pmd);
ian@26 874 }
ian@26 875 set_pmd(pmd, __pmd(0));
ian@26 876 }
ian@26 877 __flush_tlb_all();
ian@26 878 }
ian@26 879
ian@26 880 /*
ian@26 881 * Memory hotplug specific functions
ian@26 882 */
ian@26 883 void online_page(struct page *page)
ian@26 884 {
ian@26 885 ClearPageReserved(page);
ian@26 886 init_page_count(page);
ian@26 887 __free_page(page);
ian@26 888 totalram_pages++;
ian@26 889 num_physpages++;
ian@26 890 }
ian@26 891
ian@26 892 #ifdef CONFIG_MEMORY_HOTPLUG
ian@26 893 /*
ian@26 894 * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
ian@26 895 * via probe interface of sysfs. If acpi notifies hot-add event, then it
ian@26 896 * can tell node id by searching dsdt. But, probe interface doesn't have
ian@26 897 * node id. So, return 0 as node id at this time.
ian@26 898 */
ian@26 899 #ifdef CONFIG_NUMA
ian@26 900 int memory_add_physaddr_to_nid(u64 start)
ian@26 901 {
ian@26 902 return 0;
ian@26 903 }
ian@26 904 #endif
ian@26 905
ian@26 906 /*
ian@26 907 * Memory is added always to NORMAL zone. This means you will never get
ian@26 908 * additional DMA/DMA32 memory.
ian@26 909 */
ian@26 910 int arch_add_memory(int nid, u64 start, u64 size)
ian@26 911 {
ian@26 912 struct pglist_data *pgdat = NODE_DATA(nid);
ian@26 913 struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
ian@26 914 unsigned long start_pfn = start >> PAGE_SHIFT;
ian@26 915 unsigned long nr_pages = size >> PAGE_SHIFT;
ian@26 916 int ret;
ian@26 917
ian@26 918 ret = __add_pages(zone, start_pfn, nr_pages);
ian@26 919 if (ret)
ian@26 920 goto error;
ian@26 921
ian@26 922 init_memory_mapping(start, (start + size -1));
ian@26 923
ian@26 924 return ret;
ian@26 925 error:
ian@26 926 printk("%s: Problem encountered in __add_pages!\n", __func__);
ian@26 927 return ret;
ian@26 928 }
ian@26 929 EXPORT_SYMBOL_GPL(arch_add_memory);
ian@26 930
ian@26 931 int remove_memory(u64 start, u64 size)
ian@26 932 {
ian@26 933 return -EINVAL;
ian@26 934 }
ian@26 935 EXPORT_SYMBOL_GPL(remove_memory);
ian@26 936
ian@26 937 #else /* CONFIG_MEMORY_HOTPLUG */
ian@26 938 /*
ian@26 939 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
ian@26 940 * just online the pages.
ian@26 941 */
ian@26 942 int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
ian@26 943 {
ian@26 944 int err = -EIO;
ian@26 945 unsigned long pfn;
ian@26 946 unsigned long total = 0, mem = 0;
ian@26 947 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
ian@26 948 if (pfn_valid(pfn)) {
ian@26 949 online_page(pfn_to_page(pfn));
ian@26 950 err = 0;
ian@26 951 mem++;
ian@26 952 }
ian@26 953 total++;
ian@26 954 }
ian@26 955 if (!err) {
ian@26 956 z->spanned_pages += total;
ian@26 957 z->present_pages += mem;
ian@26 958 z->zone_pgdat->node_spanned_pages += total;
ian@26 959 z->zone_pgdat->node_present_pages += mem;
ian@26 960 }
ian@26 961 return err;
ian@26 962 }
ian@26 963 #endif /* CONFIG_MEMORY_HOTPLUG */
ian@26 964
ian@26 965 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
ian@26 966 kcore_vsyscall;
ian@26 967
ian@26 968 void __init mem_init(void)
ian@26 969 {
ian@26 970 long codesize, reservedpages, datasize, initsize;
ian@26 971 unsigned long pfn;
ian@26 972
ian@26 973 pci_iommu_alloc();
ian@26 974
ian@26 975 /* How many end-of-memory variables you have, grandma! */
ian@26 976 max_low_pfn = end_pfn;
ian@26 977 max_pfn = end_pfn;
ian@26 978 num_physpages = end_pfn;
ian@26 979 high_memory = (void *) __va(end_pfn * PAGE_SIZE);
ian@26 980
ian@26 981 /* clear the zero-page */
ian@26 982 memset(empty_zero_page, 0, PAGE_SIZE);
ian@26 983
ian@26 984 reservedpages = 0;
ian@26 985
ian@26 986 /* this will put all low memory onto the freelists */
ian@26 987 #ifdef CONFIG_NUMA
ian@26 988 totalram_pages = numa_free_all_bootmem();
ian@26 989 #else
ian@26 990 totalram_pages = free_all_bootmem();
ian@26 991 #endif
ian@26 992 /* XEN: init and count pages outside initial allocation. */
ian@26 993 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
ian@26 994 ClearPageReserved(pfn_to_page(pfn));
ian@26 995 init_page_count(pfn_to_page(pfn));
ian@26 996 totalram_pages++;
ian@26 997 }
ian@26 998 reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
ian@26 999
ian@26 1000 after_bootmem = 1;
ian@26 1001
ian@26 1002 codesize = (unsigned long) &_etext - (unsigned long) &_text;
ian@26 1003 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
ian@26 1004 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
ian@26 1005
ian@26 1006 /* Register memory areas for /proc/kcore */
ian@26 1007 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
ian@26 1008 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
ian@26 1009 VMALLOC_END-VMALLOC_START);
ian@26 1010 kclist_add(&kcore_kernel, &_stext, _end - _stext);
ian@26 1011 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
ian@26 1012 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
ian@26 1013 VSYSCALL_END - VSYSCALL_START);
ian@26 1014
ian@26 1015 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
ian@26 1016 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
ian@26 1017 end_pfn << (PAGE_SHIFT-10),
ian@26 1018 codesize >> 10,
ian@26 1019 reservedpages << (PAGE_SHIFT-10),
ian@26 1020 datasize >> 10,
ian@26 1021 initsize >> 10);
ian@26 1022
ian@26 1023 #ifndef CONFIG_XEN
ian@26 1024 #ifdef CONFIG_SMP
ian@26 1025 /*
ian@26 1026 * Sync boot_level4_pgt mappings with the init_level4_pgt
ian@26 1027 * except for the low identity mappings which are already zapped
ian@26 1028 * in init_level4_pgt. This sync-up is essential for AP's bringup
ian@26 1029 */
ian@26 1030 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
ian@26 1031 #endif
ian@26 1032 #endif
ian@26 1033 }
ian@26 1034
ian@26 1035 void free_init_pages(char *what, unsigned long begin, unsigned long end)
ian@26 1036 {
ian@26 1037 unsigned long addr;
ian@26 1038
ian@26 1039 if (begin >= end)
ian@26 1040 return;
ian@26 1041
ian@26 1042 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
ian@26 1043 for (addr = begin; addr < end; addr += PAGE_SIZE) {
ian@26 1044 ClearPageReserved(virt_to_page(addr));
ian@26 1045 init_page_count(virt_to_page(addr));
ian@26 1046 memset((void *)(addr & ~(PAGE_SIZE-1)),
ian@26 1047 POISON_FREE_INITMEM, PAGE_SIZE);
ian@26 1048 if (addr >= __START_KERNEL_map) {
ian@26 1049 /* make_readonly() reports all kernel addresses. */
ian@26 1050 __make_page_writable(__va(__pa(addr)));
ian@26 1051 if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
ian@26 1052 pgd_t *pgd = pgd_offset_k(addr);
ian@26 1053 pud_t *pud = pud_offset(pgd, addr);
ian@26 1054 pmd_t *pmd = pmd_offset(pud, addr);
ian@26 1055 pte_t *pte = pte_offset_kernel(pmd, addr);
ian@26 1056
ian@26 1057 xen_l1_entry_update(pte, __pte(0)); /* fallback */
ian@26 1058 }
ian@26 1059 }
ian@26 1060 free_page(addr);
ian@26 1061 totalram_pages++;
ian@26 1062 }
ian@26 1063 }
ian@26 1064
ian@26 1065 void free_initmem(void)
ian@26 1066 {
ian@26 1067 memset(__initdata_begin, POISON_FREE_INITDATA,
ian@26 1068 __initdata_end - __initdata_begin);
ian@26 1069 free_init_pages("unused kernel memory",
ian@26 1070 (unsigned long)(&__init_begin),
ian@26 1071 (unsigned long)(&__init_end));
ian@26 1072 }
ian@26 1073
ian@26 1074 #ifdef CONFIG_DEBUG_RODATA
ian@26 1075
ian@26 1076 void mark_rodata_ro(void)
ian@26 1077 {
ian@26 1078 unsigned long addr = (unsigned long)__start_rodata;
ian@26 1079
ian@26 1080 for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
ian@26 1081 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
ian@26 1082
ian@26 1083 printk ("Write protecting the kernel read-only data: %luk\n",
ian@26 1084 (__end_rodata - __start_rodata) >> 10);
ian@26 1085
ian@26 1086 /*
ian@26 1087 * change_page_attr_addr() requires a global_flush_tlb() call after it.
ian@26 1088 * We do this after the printk so that if something went wrong in the
ian@26 1089 * change, the printk gets out at least to give a better debug hint
ian@26 1090 * of who is the culprit.
ian@26 1091 */
ian@26 1092 global_flush_tlb();
ian@26 1093 }
ian@26 1094 #endif
ian@26 1095
ian@26 1096 #ifdef CONFIG_BLK_DEV_INITRD
ian@26 1097 void free_initrd_mem(unsigned long start, unsigned long end)
ian@26 1098 {
ian@26 1099 free_init_pages("initrd memory", start, end);
ian@26 1100 }
ian@26 1101 #endif
ian@26 1102
ian@26 1103 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
ian@26 1104 {
ian@26 1105 /* Should check here against the e820 map to avoid double free */
ian@26 1106 #ifdef CONFIG_NUMA
ian@26 1107 int nid = phys_to_nid(phys);
ian@26 1108 reserve_bootmem_node(NODE_DATA(nid), phys, len);
ian@26 1109 #else
ian@26 1110 reserve_bootmem(phys, len);
ian@26 1111 #endif
ian@26 1112 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
ian@26 1113 dma_reserve += len / PAGE_SIZE;
ian@26 1114 }
ian@26 1115
ian@26 1116 int kern_addr_valid(unsigned long addr)
ian@26 1117 {
ian@26 1118 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
ian@26 1119 pgd_t *pgd;
ian@26 1120 pud_t *pud;
ian@26 1121 pmd_t *pmd;
ian@26 1122 pte_t *pte;
ian@26 1123
ian@26 1124 if (above != 0 && above != -1UL)
ian@26 1125 return 0;
ian@26 1126
ian@26 1127 pgd = pgd_offset_k(addr);
ian@26 1128 if (pgd_none(*pgd))
ian@26 1129 return 0;
ian@26 1130
ian@26 1131 pud = pud_offset(pgd, addr);
ian@26 1132 if (pud_none(*pud))
ian@26 1133 return 0;
ian@26 1134
ian@26 1135 pmd = pmd_offset(pud, addr);
ian@26 1136 if (pmd_none(*pmd))
ian@26 1137 return 0;
ian@26 1138 if (pmd_large(*pmd))
ian@26 1139 return pfn_valid(pmd_pfn(*pmd));
ian@26 1140
ian@26 1141 pte = pte_offset_kernel(pmd, addr);
ian@26 1142 if (pte_none(*pte))
ian@26 1143 return 0;
ian@26 1144 return pfn_valid(pte_pfn(*pte));
ian@26 1145 }
ian@26 1146
ian@26 1147 #ifdef CONFIG_SYSCTL
ian@26 1148 #include <linux/sysctl.h>
ian@26 1149
ian@26 1150 extern int exception_trace, page_fault_trace;
ian@26 1151
ian@26 1152 static ctl_table debug_table2[] = {
ian@26 1153 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
ian@26 1154 proc_dointvec },
ian@26 1155 { 0, }
ian@26 1156 };
ian@26 1157
ian@26 1158 static ctl_table debug_root_table2[] = {
ian@26 1159 { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
ian@26 1160 .child = debug_table2 },
ian@26 1161 { 0 },
ian@26 1162 };
ian@26 1163
ian@26 1164 static __init int x8664_sysctl_init(void)
ian@26 1165 {
ian@26 1166 register_sysctl_table(debug_root_table2, 1);
ian@26 1167 return 0;
ian@26 1168 }
ian@26 1169 __initcall(x8664_sysctl_init);
ian@26 1170 #endif
ian@26 1171
ian@26 1172 /* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
ian@26 1173 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
ian@26 1174 not need special handling anymore. */
ian@26 1175
ian@26 1176 static struct vm_area_struct gate_vma = {
ian@26 1177 .vm_start = VSYSCALL_START,
ian@26 1178 .vm_end = VSYSCALL_END,
ian@26 1179 .vm_page_prot = PAGE_READONLY
ian@26 1180 };
ian@26 1181
ian@26 1182 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
ian@26 1183 {
ian@26 1184 #ifdef CONFIG_IA32_EMULATION
ian@26 1185 if (test_tsk_thread_flag(tsk, TIF_IA32))
ian@26 1186 return NULL;
ian@26 1187 #endif
ian@26 1188 return &gate_vma;
ian@26 1189 }
ian@26 1190
ian@26 1191 int in_gate_area(struct task_struct *task, unsigned long addr)
ian@26 1192 {
ian@26 1193 struct vm_area_struct *vma = get_gate_vma(task);
ian@26 1194 if (!vma)
ian@26 1195 return 0;
ian@26 1196 return (addr >= vma->vm_start) && (addr < vma->vm_end);
ian@26 1197 }
ian@26 1198
ian@26 1199 /* Use this when you have no reliable task/vma, typically from interrupt
ian@26 1200 * context. It is less reliable than using the task's vma and may give
ian@26 1201 * false positives.
ian@26 1202 */
ian@26 1203 int in_gate_area_no_task(unsigned long addr)
ian@26 1204 {
ian@26 1205 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
ian@26 1206 }