ia64/xen-unstable

view linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c @ 5066:c4353a81ae5b

bitkeeper revision 1.1159.258.150 (428e6fdeb6W1XbDj2YkcF53xkdUozA)

[PATCH] Re: PAE

> We're taking quite a chunk out of lowmem, though....

Uhm, well, no, we don't. We should though, fix is below.

Current code in unstable takes the address space away from the
vmalloc area, not the lowmem area. vmalloc space is 128 MB by
default, so the non-PAE hypervisor hole fits in and it works
nevertheless. The larger PAE mode hypervisor hole doesn't fit
in, so it breaks when you use enougth memory (somewhere around
800-900 MB). I think that is the bug Scott Parish trapped into.

Gerd
author kraxel@bytesex.org[kaf24]
date Fri May 20 23:16:46 2005 +0000 (2005-05-20)
parents a71203a12503
children 6640eb3cb41d
line source
1 /*
2 * linux/arch/i386/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 */
9 #include <linux/config.h>
10 #include <linux/module.h>
11 #include <linux/signal.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/errno.h>
15 #include <linux/string.h>
16 #include <linux/types.h>
17 #include <linux/ptrace.h>
18 #include <linux/mman.h>
19 #include <linux/mm.h>
20 #include <linux/hugetlb.h>
21 #include <linux/swap.h>
22 #include <linux/smp.h>
23 #include <linux/init.h>
24 #include <linux/highmem.h>
25 #include <linux/pagemap.h>
26 #include <linux/bootmem.h>
27 #include <linux/slab.h>
28 #include <linux/proc_fs.h>
29 #include <linux/efi.h>
31 #include <asm/processor.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <asm/pgtable.h>
35 #include <asm/dma.h>
36 #include <asm/fixmap.h>
37 #include <asm/e820.h>
38 #include <asm/apic.h>
39 #include <asm/tlb.h>
40 #include <asm/tlbflush.h>
41 #include <asm/sections.h>
42 #include <asm-xen/hypervisor.h>
44 unsigned int __VMALLOC_RESERVE = 128 << 20;
46 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
47 unsigned long highstart_pfn, highend_pfn;
49 static int noinline do_test_wp_bit(void);
51 /*
52 * Creates a middle page table and puts a pointer to it in the
53 * given global directory entry. This only returns the gd entry
54 * in non-PAE compilation mode, since the middle layer is folded.
55 */
56 static pmd_t * __init one_md_table_init(pgd_t *pgd)
57 {
58 pud_t *pud;
59 pmd_t *pmd_table;
61 #ifdef CONFIG_X86_PAE
62 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
63 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
64 pud = pud_offset(pgd, 0);
65 if (pmd_table != pmd_offset(pud, 0))
66 BUG();
67 #else
68 pud = pud_offset(pgd, 0);
69 pmd_table = pmd_offset(pud, 0);
70 #endif
72 return pmd_table;
73 }
75 /*
76 * Create a page table and place a pointer to it in a middle page
77 * directory entry.
78 */
79 static pte_t * __init one_page_table_init(pmd_t *pmd)
80 {
81 if (pmd_none(*pmd)) {
82 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
83 make_page_readonly(page_table);
84 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
85 if (page_table != pte_offset_kernel(pmd, 0))
86 BUG();
88 return page_table;
89 }
91 return pte_offset_kernel(pmd, 0);
92 }
94 /*
95 * This function initializes a certain range of kernel virtual memory
96 * with new bootmem page tables, everywhere page tables are missing in
97 * the given range.
98 */
100 /*
101 * NOTE: The pagetables are allocated contiguous on the physical space
102 * so we can cache the place of the first one and move around without
103 * checking the pgd every time.
104 */
105 static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
106 {
107 pgd_t *pgd;
108 pud_t *pud;
109 pmd_t *pmd;
110 int pgd_idx, pmd_idx;
111 unsigned long vaddr;
113 vaddr = start;
114 pgd_idx = pgd_index(vaddr);
115 pmd_idx = pmd_index(vaddr);
116 pgd = pgd_base + pgd_idx;
118 for ( ; (pgd_idx < PTRS_PER_PGD_NO_HV) && (vaddr != end); pgd++, pgd_idx++) {
119 if (pgd_none(*pgd))
120 one_md_table_init(pgd);
121 pud = pud_offset(pgd, vaddr);
122 pmd = pmd_offset(pud, vaddr);
123 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
124 if (pmd_none(*pmd))
125 one_page_table_init(pmd);
127 vaddr += PMD_SIZE;
128 }
129 pmd_idx = 0;
130 }
131 }
133 static inline int is_kernel_text(unsigned long addr)
134 {
135 if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
136 return 1;
137 return 0;
138 }
140 /*
141 * This maps the physical memory to kernel virtual address space, a total
142 * of max_low_pfn pages, by creating page tables starting from address
143 * PAGE_OFFSET.
144 */
145 static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
146 {
147 unsigned long pfn;
148 pgd_t *pgd;
149 pmd_t *pmd;
150 pte_t *pte;
151 int pgd_idx, pmd_idx, pte_ofs;
153 unsigned long max_ram_pfn = xen_start_info.nr_pages;
154 if (max_ram_pfn > max_low_pfn)
155 max_ram_pfn = max_low_pfn;
157 pgd_idx = pgd_index(PAGE_OFFSET);
158 pgd = pgd_base + pgd_idx;
159 pfn = 0;
160 pmd_idx = pmd_index(PAGE_OFFSET);
161 pte_ofs = pte_index(PAGE_OFFSET);
163 for (; pgd_idx < PTRS_PER_PGD_NO_HV; pgd++, pgd_idx++) {
164 pmd = one_md_table_init(pgd);
165 if (pfn >= max_low_pfn)
166 continue;
167 pmd += pmd_idx;
168 for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
169 unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
171 /* Map with big pages if possible, otherwise create normal page tables. */
172 if (cpu_has_pse) {
173 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
175 if (is_kernel_text(address) || is_kernel_text(address2))
176 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
177 else
178 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
179 pfn += PTRS_PER_PTE;
180 } else {
181 pte = one_page_table_init(pmd);
183 pte += pte_ofs;
184 for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
185 /* XEN: Only map initial RAM allocation. */
186 if ((pfn >= max_ram_pfn) || pte_present(*pte))
187 continue;
188 if (is_kernel_text(address))
189 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
190 else
191 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
192 }
193 pte_ofs = 0;
194 }
195 flush_page_update_queue();
196 }
197 pmd_idx = 0;
198 }
199 }
201 static inline int page_kills_ppro(unsigned long pagenr)
202 {
203 if (pagenr >= 0x70000 && pagenr <= 0x7003F)
204 return 1;
205 return 0;
206 }
208 extern int is_available_memory(efi_memory_desc_t *);
210 static inline int page_is_ram(unsigned long pagenr)
211 {
212 int i;
213 unsigned long addr, end;
215 if (efi_enabled) {
216 efi_memory_desc_t *md;
218 for (i = 0; i < memmap.nr_map; i++) {
219 md = &memmap.map[i];
220 if (!is_available_memory(md))
221 continue;
222 addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
223 end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT;
225 if ((pagenr >= addr) && (pagenr < end))
226 return 1;
227 }
228 return 0;
229 }
231 for (i = 0; i < e820.nr_map; i++) {
233 if (e820.map[i].type != E820_RAM) /* not usable memory */
234 continue;
235 /*
236 * !!!FIXME!!! Some BIOSen report areas as RAM that
237 * are not. Notably the 640->1Mb area. We need a sanity
238 * check here.
239 */
240 addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
241 end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
242 if ((pagenr >= addr) && (pagenr < end))
243 return 1;
244 }
245 return 0;
246 }
248 #ifdef CONFIG_HIGHMEM
249 pte_t *kmap_pte;
250 pgprot_t kmap_prot;
252 EXPORT_SYMBOL(kmap_prot);
253 EXPORT_SYMBOL(kmap_pte);
255 #define kmap_get_fixmap_pte(vaddr) \
256 pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
258 void __init kmap_init(void)
259 {
260 unsigned long kmap_vstart;
262 /* cache the first kmap pte */
263 kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
264 kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
266 kmap_prot = PAGE_KERNEL;
267 }
269 void __init permanent_kmaps_init(pgd_t *pgd_base)
270 {
271 pgd_t *pgd;
272 pud_t *pud;
273 pmd_t *pmd;
274 pte_t *pte;
275 unsigned long vaddr;
277 vaddr = PKMAP_BASE;
278 page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
280 pgd = swapper_pg_dir + pgd_index(vaddr);
281 pud = pud_offset(pgd, vaddr);
282 pmd = pmd_offset(pud, vaddr);
283 pte = pte_offset_kernel(pmd, vaddr);
284 pkmap_page_table = pte;
285 }
287 void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
288 {
289 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
290 ClearPageReserved(page);
291 set_bit(PG_highmem, &page->flags);
292 set_page_count(page, 1);
293 if (pfn < xen_start_info.nr_pages)
294 __free_page(page);
295 totalhigh_pages++;
296 } else
297 SetPageReserved(page);
298 }
300 #ifndef CONFIG_DISCONTIGMEM
301 void __init set_highmem_pages_init(int bad_ppro)
302 {
303 int pfn;
304 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
305 one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
306 totalram_pages += totalhigh_pages;
307 }
308 #else
309 extern void set_highmem_pages_init(int);
310 #endif /* !CONFIG_DISCONTIGMEM */
312 #else
313 #define kmap_init() do { } while (0)
314 #define permanent_kmaps_init(pgd_base) do { } while (0)
315 #define set_highmem_pages_init(bad_ppro) do { } while (0)
316 #endif /* CONFIG_HIGHMEM */
318 unsigned long long __PAGE_KERNEL = _PAGE_KERNEL;
319 unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
321 #ifndef CONFIG_DISCONTIGMEM
322 #define remap_numa_kva() do {} while (0)
323 #else
324 extern void __init remap_numa_kva(void);
325 #endif
327 static void __init pagetable_init (void)
328 {
329 unsigned long vaddr;
330 pgd_t *pgd_base = swapper_pg_dir;
331 pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
333 #ifdef CONFIG_X86_PAE
334 int i;
335 /* Init entries of the first-level page table to the zero page */
336 for (i = 0; i < PTRS_PER_PGD; i++)
337 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
338 #endif
340 /* Enable PSE if available */
341 if (cpu_has_pse) {
342 set_in_cr4(X86_CR4_PSE);
343 }
345 /* Enable PGE if available */
346 if (cpu_has_pge) {
347 set_in_cr4(X86_CR4_PGE);
348 __PAGE_KERNEL |= _PAGE_GLOBAL;
349 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
350 }
352 /*
353 * Switch to proper mm_init page directory. Initialise from the current
354 * page directory, write-protect the new page directory, then switch to
355 * it. We clean up by write-enabling and then freeing the old page dir.
356 */
357 memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
358 make_page_readonly(pgd_base);
359 queue_pgd_pin(__pa(pgd_base));
360 load_cr3(pgd_base);
361 queue_pgd_unpin(__pa(old_pgd));
362 make_page_writable(old_pgd);
363 __flush_tlb_all();
364 free_bootmem(__pa(old_pgd), PAGE_SIZE);
366 kernel_physical_mapping_init(pgd_base);
367 remap_numa_kva();
369 /*
370 * Fixed mappings, only the page table structure has to be
371 * created - mappings will be set by set_fixmap():
372 */
373 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
374 page_table_range_init(vaddr, 0, pgd_base);
376 permanent_kmaps_init(pgd_base);
378 #ifdef CONFIG_X86_PAE
379 /*
380 * Add low memory identity-mappings - SMP needs it when
381 * starting up on an AP from real-mode. In the non-PAE
382 * case we already have these mappings through head.S.
383 * All user-space mappings are explicitly cleared after
384 * SMP startup.
385 */
386 pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
387 #endif
388 }
390 #if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
391 /*
392 * Swap suspend & friends need this for resume because things like the intel-agp
393 * driver might have split up a kernel 4MB mapping.
394 */
395 char __nosavedata swsusp_pg_dir[PAGE_SIZE]
396 __attribute__ ((aligned (PAGE_SIZE)));
398 static inline void save_pg_dir(void)
399 {
400 memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
401 }
402 #else
403 static inline void save_pg_dir(void)
404 {
405 }
406 #endif
408 void zap_low_mappings (void)
409 {
410 int i;
412 save_pg_dir();
414 /*
415 * Zap initial low-memory mappings.
416 *
417 * Note that "pgd_clear()" doesn't do it for
418 * us, because pgd_clear() is a no-op on i386.
419 */
420 for (i = 0; i < USER_PTRS_PER_PGD; i++)
421 #ifdef CONFIG_X86_PAE
422 set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
423 #else
424 set_pgd(swapper_pg_dir+i, __pgd(0));
425 #endif
426 flush_tlb_all();
427 }
429 #ifndef CONFIG_DISCONTIGMEM
430 void __init zone_sizes_init(void)
431 {
432 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
433 unsigned int /*max_dma,*/ high, low;
435 /*
436 * XEN: Our notion of "DMA memory" is fake when running over Xen.
437 * We simply put all RAM in the DMA zone so that those drivers which
438 * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
439 * Those drivers that *do* require lowmem are screwed anyway when
440 * running over Xen!
441 */
442 /*max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;*/
443 low = max_low_pfn;
444 high = highend_pfn;
446 /*if (low < max_dma)*/
447 zones_size[ZONE_DMA] = low;
448 /*else*/ {
449 /*zones_size[ZONE_DMA] = max_dma;*/
450 /*zones_size[ZONE_NORMAL] = low - max_dma;*/
451 #ifdef CONFIG_HIGHMEM
452 zones_size[ZONE_HIGHMEM] = high - low;
453 #endif
454 }
455 free_area_init(zones_size);
456 }
457 #else
458 extern void zone_sizes_init(void);
459 #endif /* !CONFIG_DISCONTIGMEM */
461 static int disable_nx __initdata = 0;
462 u64 __supported_pte_mask = ~_PAGE_NX;
464 /*
465 * noexec = on|off
466 *
467 * Control non executable mappings.
468 *
469 * on Enable
470 * off Disable
471 */
472 void __init noexec_setup(const char *str)
473 {
474 if (!strncmp(str, "on",2) && cpu_has_nx) {
475 __supported_pte_mask |= _PAGE_NX;
476 disable_nx = 0;
477 } else if (!strncmp(str,"off",3)) {
478 disable_nx = 1;
479 __supported_pte_mask &= ~_PAGE_NX;
480 }
481 }
483 int nx_enabled = 0;
484 #ifdef CONFIG_X86_PAE
486 static void __init set_nx(void)
487 {
488 unsigned int v[4], l, h;
490 if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
491 cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
492 if ((v[3] & (1 << 20)) && !disable_nx) {
493 rdmsr(MSR_EFER, l, h);
494 l |= EFER_NX;
495 wrmsr(MSR_EFER, l, h);
496 nx_enabled = 1;
497 __supported_pte_mask |= _PAGE_NX;
498 }
499 }
500 }
502 /*
503 * Enables/disables executability of a given kernel page and
504 * returns the previous setting.
505 */
506 int __init set_kernel_exec(unsigned long vaddr, int enable)
507 {
508 pte_t *pte;
509 int ret = 1;
511 if (!nx_enabled)
512 goto out;
514 pte = lookup_address(vaddr);
515 BUG_ON(!pte);
517 if (!pte_exec_kernel(*pte))
518 ret = 0;
520 if (enable)
521 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
522 else
523 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
524 __flush_tlb_all();
525 out:
526 return ret;
527 }
529 #endif
531 /*
532 * paging_init() sets up the page tables - note that the first 8MB are
533 * already mapped by head.S.
534 *
535 * This routines also unmaps the page at virtual kernel address 0, so
536 * that we can trap those pesky NULL-reference errors in the kernel.
537 */
538 void __init paging_init(void)
539 {
540 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
541 int i;
542 #endif
544 #ifdef CONFIG_X86_PAE
545 set_nx();
546 if (nx_enabled)
547 printk("NX (Execute Disable) protection: active\n");
548 #endif
550 pagetable_init();
552 #ifdef CONFIG_X86_PAE
553 /*
554 * We will bail out later - printk doesn't work right now so
555 * the user would just see a hanging kernel.
556 */
557 if (cpu_has_pae)
558 set_in_cr4(X86_CR4_PAE);
559 #endif
560 __flush_tlb_all();
562 kmap_init();
563 zone_sizes_init();
565 /* Switch to the real shared_info page, and clear the dummy page. */
566 flush_page_update_queue();
567 set_fixmap_ma(FIX_SHARED_INFO, xen_start_info.shared_info);
568 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
569 memset(empty_zero_page, 0, sizeof(empty_zero_page));
571 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
572 /* Setup mapping of lower 1st MB */
573 for (i = 0; i < NR_FIX_ISAMAPS; i++)
574 if (xen_start_info.flags & SIF_PRIVILEGED)
575 set_fixmap_ma(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
576 else
577 set_fixmap_ma_ro(FIX_ISAMAP_BEGIN - i,
578 virt_to_machine(empty_zero_page));
579 #endif
580 }
582 /*
583 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
584 * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
585 * used to involve black magic jumps to work around some nasty CPU bugs,
586 * but fortunately the switch to using exceptions got rid of all that.
587 */
589 void __init test_wp_bit(void)
590 {
591 printk("Checking if this processor honours the WP bit even in supervisor mode... ");
593 /* Any page-aligned address will do, the test is non-destructive */
594 __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
595 boot_cpu_data.wp_works_ok = do_test_wp_bit();
596 clear_fixmap(FIX_WP_TEST);
598 if (!boot_cpu_data.wp_works_ok) {
599 printk("No.\n");
600 #ifdef CONFIG_X86_WP_WORKS_OK
601 panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
602 #endif
603 } else {
604 printk("Ok.\n");
605 }
606 }
608 #ifndef CONFIG_DISCONTIGMEM
609 static void __init set_max_mapnr_init(void)
610 {
611 #ifdef CONFIG_HIGHMEM
612 max_mapnr = num_physpages = highend_pfn;
613 #else
614 max_mapnr = num_physpages = max_low_pfn;
615 #endif
616 }
617 #define __free_all_bootmem() free_all_bootmem()
618 #else
619 #define __free_all_bootmem() free_all_bootmem_node(NODE_DATA(0))
620 extern void set_max_mapnr_init(void);
621 #endif /* !CONFIG_DISCONTIGMEM */
623 static struct kcore_list kcore_mem, kcore_vmalloc;
625 void __init mem_init(void)
626 {
627 extern int ppro_with_ram_bug(void);
628 int codesize, reservedpages, datasize, initsize;
629 int tmp;
630 int bad_ppro;
631 unsigned long pfn;
633 #ifndef CONFIG_DISCONTIGMEM
634 if (!mem_map)
635 BUG();
636 #endif
638 bad_ppro = ppro_with_ram_bug();
640 #ifdef CONFIG_HIGHMEM
641 /* check that fixmap and pkmap do not overlap */
642 if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
643 printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n");
644 printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
645 PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START);
646 BUG();
647 }
648 #endif
650 set_max_mapnr_init();
652 #ifdef CONFIG_HIGHMEM
653 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE);
654 #else
655 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
656 #endif
657 printk("vmalloc area: %lx-%lx, maxmem %lx\n",
658 VMALLOC_START,VMALLOC_END,MAXMEM);
659 BUG_ON(VMALLOC_START > VMALLOC_END);
661 /* this will put all low memory onto the freelists */
662 totalram_pages += __free_all_bootmem();
663 /* XEN: init and count low-mem pages outside initial allocation. */
664 for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) {
665 ClearPageReserved(&mem_map[pfn]);
666 set_page_count(&mem_map[pfn], 1);
667 totalram_pages++;
668 }
670 reservedpages = 0;
671 for (tmp = 0; tmp < max_low_pfn; tmp++)
672 /*
673 * Only count reserved RAM pages
674 */
675 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
676 reservedpages++;
678 set_highmem_pages_init(bad_ppro);
680 codesize = (unsigned long) &_etext - (unsigned long) &_text;
681 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
682 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
684 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
685 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
686 VMALLOC_END-VMALLOC_START);
688 printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
689 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
690 num_physpages << (PAGE_SHIFT-10),
691 codesize >> 10,
692 reservedpages << (PAGE_SHIFT-10),
693 datasize >> 10,
694 initsize >> 10,
695 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
696 );
698 #ifdef CONFIG_X86_PAE
699 if (!cpu_has_pae)
700 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
701 #endif
702 if (boot_cpu_data.wp_works_ok < 0)
703 test_wp_bit();
705 /*
706 * Subtle. SMP is doing it's boot stuff late (because it has to
707 * fork idle threads) - but it also needs low mappings for the
708 * protected-mode entry to work. We zap these entries only after
709 * the WP-bit has been tested.
710 */
711 #ifndef CONFIG_SMP
712 zap_low_mappings();
713 #endif
714 }
716 kmem_cache_t *pgd_cache;
717 kmem_cache_t *pmd_cache;
718 kmem_cache_t *pte_cache;
720 void __init pgtable_cache_init(void)
721 {
722 pte_cache = kmem_cache_create("pte",
723 PTRS_PER_PTE*sizeof(pte_t),
724 PTRS_PER_PTE*sizeof(pte_t),
725 0,
726 pte_ctor,
727 pte_dtor);
728 if (!pte_cache)
729 panic("pgtable_cache_init(): Cannot create pte cache");
730 if (PTRS_PER_PMD > 1) {
731 pmd_cache = kmem_cache_create("pmd",
732 PTRS_PER_PMD*sizeof(pmd_t),
733 PTRS_PER_PMD*sizeof(pmd_t),
734 0,
735 pmd_ctor,
736 NULL);
737 if (!pmd_cache)
738 panic("pgtable_cache_init(): cannot create pmd cache");
739 }
740 pgd_cache = kmem_cache_create("pgd",
741 PTRS_PER_PGD*sizeof(pgd_t),
742 PTRS_PER_PGD*sizeof(pgd_t),
743 0,
744 pgd_ctor,
745 pgd_dtor);
746 if (!pgd_cache)
747 panic("pgtable_cache_init(): Cannot create pgd cache");
748 }
750 /*
751 * This function cannot be __init, since exceptions don't work in that
752 * section. Put this after the callers, so that it cannot be inlined.
753 */
754 static int noinline do_test_wp_bit(void)
755 {
756 char tmp_reg;
757 int flag;
759 __asm__ __volatile__(
760 " movb %0,%1 \n"
761 "1: movb %1,%0 \n"
762 " xorl %2,%2 \n"
763 "2: \n"
764 ".section __ex_table,\"a\"\n"
765 " .align 4 \n"
766 " .long 1b,2b \n"
767 ".previous \n"
768 :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
769 "=q" (tmp_reg),
770 "=r" (flag)
771 :"2" (1)
772 :"memory");
774 return flag;
775 }
777 void free_initmem(void)
778 {
779 unsigned long addr;
781 addr = (unsigned long)(&__init_begin);
782 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
783 ClearPageReserved(virt_to_page(addr));
784 set_page_count(virt_to_page(addr), 1);
785 memset((void *)addr, 0xcc, PAGE_SIZE);
786 free_page(addr);
787 totalram_pages++;
788 }
789 printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (__init_end - __init_begin) >> 10);
790 }
792 #ifdef CONFIG_BLK_DEV_INITRD
793 void free_initrd_mem(unsigned long start, unsigned long end)
794 {
795 if (start < end)
796 printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
797 for (; start < end; start += PAGE_SIZE) {
798 ClearPageReserved(virt_to_page(start));
799 set_page_count(virt_to_page(start), 1);
800 free_page(start);
801 totalram_pages++;
802 }
803 }
804 #endif