ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c @ 6294:1a0723cd37f1

Fix many uses of machine addresses in XenLinux. Primarily
this fixes users of virt_to_machine/machine_to_virt to
use virt_to_mfn/mfn_to_virt where that is more appropriate.

This should be a big step to improved PAE stability.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Aug 19 16:06:43 2005 +0000 (2005-08-19)
parents 47d49e8b8042
children f51fe43c5d1c 5f4724c13040 81576d3d1ca8 3a8f27c6d56c
line source
1 /*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 *
8 * Jun Nakajima <jun.nakajima@intel.com>
9 * Modified for Xen.
10 */
12 #include <linux/config.h>
13 #include <linux/signal.h>
14 #include <linux/sched.h>
15 #include <linux/kernel.h>
16 #include <linux/errno.h>
17 #include <linux/string.h>
18 #include <linux/types.h>
19 #include <linux/ptrace.h>
20 #include <linux/mman.h>
21 #include <linux/mm.h>
22 #include <linux/swap.h>
23 #include <linux/smp.h>
24 #include <linux/init.h>
25 #include <linux/pagemap.h>
26 #include <linux/bootmem.h>
27 #include <linux/proc_fs.h>
29 #include <asm/processor.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <asm/pgtable.h>
33 #include <asm/pgalloc.h>
34 #include <asm/dma.h>
35 #include <asm/fixmap.h>
36 #include <asm/e820.h>
37 #include <asm/apic.h>
38 #include <asm/tlb.h>
39 #include <asm/mmu_context.h>
40 #include <asm/proto.h>
41 #include <asm/smp.h>
43 extern unsigned long *contiguous_bitmap;
45 #if defined(CONFIG_SWIOTLB)
46 extern void swiotlb_init(void);
47 #endif
49 #ifndef Dprintk
50 #define Dprintk(x...)
51 #endif
53 extern char _stext[];
55 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
56 extern unsigned long start_pfn;
58 static int init_mapping_done;
60 /*
61 * Use this until direct mapping is established, i.e. before __va() is
62 * avaialble in init_memory_mapping().
63 */
65 #define addr_to_page(addr, page) \
66 (addr) &= PHYSICAL_PAGE_MASK; \
67 (page) = ((unsigned long *) ((unsigned long)(((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + __START_KERNEL_map)))
69 static void __make_page_readonly(unsigned long va)
70 {
71 unsigned long addr;
72 pte_t pte, *ptep;
73 unsigned long *page = (unsigned long *) init_level4_pgt;
75 addr = (unsigned long) page[pgd_index(va)];
76 addr_to_page(addr, page);
78 addr = page[pud_index(va)];
79 addr_to_page(addr, page);
81 addr = page[pmd_index(va)];
82 addr_to_page(addr, page);
84 ptep = (pte_t *) &page[pte_index(va)];
85 pte.pte = (ptep->pte & ~_PAGE_RW);
86 xen_l1_entry_update(ptep, pte);
87 __flush_tlb_one(addr);
88 }
90 static void __make_page_writable(unsigned long va)
91 {
92 unsigned long addr;
93 pte_t pte, *ptep;
94 unsigned long *page = (unsigned long *) init_level4_pgt;
96 addr = (unsigned long) page[pgd_index(va)];
97 addr_to_page(addr, page);
99 addr = page[pud_index(va)];
100 addr_to_page(addr, page);
102 addr = page[pmd_index(va)];
103 addr_to_page(addr, page);
105 ptep = (pte_t *) &page[pte_index(va)];
106 pte.pte = (ptep->pte | _PAGE_RW);
107 xen_l1_entry_update(ptep, pte);
108 __flush_tlb_one(addr);
109 }
112 /*
113 * Assume the translation is already established.
114 */
115 void make_page_readonly(void *va)
116 {
117 pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
118 unsigned long addr = (unsigned long) va;
120 if (!init_mapping_done) {
121 __make_page_readonly(addr);
122 return;
123 }
125 pgd = pgd_offset_k(addr);
126 pud = pud_offset(pgd, addr);
127 pmd = pmd_offset(pud, addr);
128 ptep = pte_offset_kernel(pmd, addr);
129 pte.pte = (ptep->pte & ~_PAGE_RW);
130 xen_l1_entry_update(ptep, pte);
131 __flush_tlb_one(addr);
132 }
134 void make_page_writable(void *va)
135 {
136 pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
137 unsigned long addr = (unsigned long) va;
139 if (!init_mapping_done) {
140 __make_page_writable(addr);
141 return;
142 }
144 pgd = pgd_offset_k(addr);
145 pud = pud_offset(pgd, addr);
146 pmd = pmd_offset(pud, addr);
147 ptep = pte_offset_kernel(pmd, addr);
148 pte.pte = (ptep->pte | _PAGE_RW);
149 xen_l1_entry_update(ptep, pte);
150 __flush_tlb_one(addr);
151 }
153 void make_pages_readonly(void* va, unsigned nr)
154 {
155 while ( nr-- != 0 ) {
156 make_page_readonly(va);
157 va = (void*)((unsigned long)va + PAGE_SIZE);
158 }
159 }
161 void make_pages_writable(void* va, unsigned nr)
162 {
163 while ( nr-- != 0 ) {
164 make_page_writable(va);
165 va = (void*)((unsigned long)va + PAGE_SIZE);
166 }
167 }
169 /*
170 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
171 * physical space so we can cache the place of the first one and move
172 * around without checking the pgd every time.
173 */
175 void show_mem(void)
176 {
177 int i, total = 0, reserved = 0;
178 int shared = 0, cached = 0;
179 pg_data_t *pgdat;
180 struct page *page;
182 printk("Mem-info:\n");
183 show_free_areas();
184 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
186 for_each_pgdat(pgdat) {
187 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
188 page = pfn_to_page(pgdat->node_start_pfn + i);
189 total++;
190 if (PageReserved(page))
191 reserved++;
192 else if (PageSwapCache(page))
193 cached++;
194 else if (page_count(page))
195 shared += page_count(page) - 1;
196 }
197 }
198 printk("%d pages of RAM\n", total);
199 printk("%d reserved pages\n",reserved);
200 printk("%d pages shared\n",shared);
201 printk("%d pages swap cached\n",cached);
202 }
204 /* References to section boundaries */
206 extern char _text, _etext, _edata, __bss_start, _end[];
207 extern char __init_begin, __init_end;
209 int after_bootmem;
211 static void *spp_getpage(void)
212 {
213 void *ptr;
214 if (after_bootmem)
215 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
216 else
217 ptr = alloc_bootmem_pages(PAGE_SIZE);
218 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
219 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
221 Dprintk("spp_getpage %p\n", ptr);
222 return ptr;
223 }
225 #define pgd_offset_u(address) (pgd_t *)(init_level4_user_pgt + pgd_index(address))
227 static inline pud_t *pud_offset_u(unsigned long address)
228 {
229 pud_t *pud = level3_user_pgt;
231 return pud + pud_index(address);
232 }
234 static void set_pte_phys(unsigned long vaddr,
235 unsigned long phys, pgprot_t prot, int user_mode)
236 {
237 pgd_t *pgd;
238 pud_t *pud;
239 pmd_t *pmd;
240 pte_t *pte, new_pte;
242 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
244 pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
246 if (pgd_none(*pgd)) {
247 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
248 return;
249 }
251 pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
253 if (pud_none(*pud)) {
254 pmd = (pmd_t *) spp_getpage();
256 make_page_readonly(pmd);
257 xen_pmd_pin(__pa(pmd));
258 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
259 if (pmd != pmd_offset(pud, 0)) {
260 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
261 return;
262 }
263 }
265 pmd = pmd_offset(pud, vaddr);
267 if (pmd_none(*pmd)) {
268 pte = (pte_t *) spp_getpage();
269 make_page_readonly(pte);
271 xen_pte_pin(__pa(pte));
272 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
273 if (pte != pte_offset_kernel(pmd, 0)) {
274 printk("PAGETABLE BUG #02!\n");
275 return;
276 }
277 }
278 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
280 pte = pte_offset_kernel(pmd, vaddr);
282 if (!pte_none(*pte) &&
283 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
284 pte_ERROR(*pte);
285 set_pte(pte, new_pte);
287 /*
288 * It's enough to flush this one mapping.
289 * (PGE mappings get flushed as well)
290 */
291 __flush_tlb_one(vaddr);
292 }
294 static void set_pte_phys_ma(unsigned long vaddr,
295 unsigned long phys, pgprot_t prot)
296 {
297 pgd_t *pgd;
298 pud_t *pud;
299 pmd_t *pmd;
300 pte_t *pte, new_pte;
302 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
304 pgd = pgd_offset_k(vaddr);
305 if (pgd_none(*pgd)) {
306 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
307 return;
308 }
309 pud = pud_offset(pgd, vaddr);
310 if (pud_none(*pud)) {
312 pmd = (pmd_t *) spp_getpage();
313 make_page_readonly(pmd);
314 xen_pmd_pin(__pa(pmd));
316 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
318 if (pmd != pmd_offset(pud, 0)) {
319 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
320 return;
321 }
322 }
323 pmd = pmd_offset(pud, vaddr);
325 if (pmd_none(*pmd)) {
326 pte = (pte_t *) spp_getpage();
327 make_page_readonly(pte);
328 xen_pte_pin(__pa(pte));
330 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
331 if (pte != pte_offset_kernel(pmd, 0)) {
332 printk("PAGETABLE BUG #02!\n");
333 return;
334 }
335 }
337 new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
338 pte = pte_offset_kernel(pmd, vaddr);
340 /*
341 * Note that the pte page is already RO, thus we want to use
342 * xen_l1_entry_update(), not set_pte().
343 */
344 xen_l1_entry_update(pte,
345 pfn_pte_ma(phys >> PAGE_SHIFT, prot));
347 /*
348 * It's enough to flush this one mapping.
349 * (PGE mappings get flushed as well)
350 */
351 __flush_tlb_one(vaddr);
352 }
354 #define SET_FIXMAP_KERNEL 0
355 #define SET_FIXMAP_USER 1
357 /* NOTE: this is meant to be run only at boot */
358 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
359 {
360 unsigned long address = __fix_to_virt(idx);
362 if (idx >= __end_of_fixed_addresses) {
363 printk("Invalid __set_fixmap\n");
364 return;
365 }
366 switch (idx) {
367 case VSYSCALL_FIRST_PAGE:
368 set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL);
369 break;
370 default:
371 set_pte_phys_ma(address, phys, prot);
372 break;
373 }
374 }
377 /*
378 * At this point it only supports vsyscall area.
379 */
380 void __set_fixmap_user (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
381 {
382 unsigned long address = __fix_to_virt(idx);
384 if (idx >= __end_of_fixed_addresses) {
385 printk("Invalid __set_fixmap\n");
386 return;
387 }
389 set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
390 }
392 unsigned long __initdata table_start, table_end, tables_space;
394 unsigned long get_machine_pfn(unsigned long addr)
395 {
396 pud_t* pud = pud_offset_k(addr);
397 pmd_t* pmd = pmd_offset(pud, addr);
398 pte_t *pte = pte_offset_kernel(pmd, addr);
400 return pte_mfn(*pte);
401 }
403 #define ALIGN_TO_4K __attribute__((section(".data.page_aligned")))
404 #define MAX_LOW_PAGES 0x20
405 static unsigned long __init_pgt[MAX_LOW_PAGES][512] ALIGN_TO_4K;
406 static int __init_pgt_index;
408 /*
409 * We start using from start_pfn
410 */
411 static __init void *alloc_static_page(unsigned long *phys)
412 {
413 int i = __init_pgt_index++;
415 if (__init_pgt_index >= MAX_LOW_PAGES) {
416 printk("Need to increase MAX_LOW_PAGES");
417 BUG();
418 }
420 *phys = __pa(__init_pgt[i]);
422 return (void *) __init_pgt[i];
423 }
425 /*
426 * Get RO page
427 */
428 static void __init *alloc_low_page(unsigned long *phys)
429 {
430 unsigned long pfn = table_end++;
432 *phys = (pfn << PAGE_SHIFT);
433 memset((void *) ((pfn << PAGE_SHIFT) + __START_KERNEL_map), 0, PAGE_SIZE);
434 return (void *)((pfn << PAGE_SHIFT) + __START_KERNEL_map);
435 }
437 #define PTE_SIZE PAGE_SIZE
439 static inline void __set_pte(pte_t *dst, pte_t val)
440 {
441 *dst = val;
442 }
444 void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
445 {
446 long i, j, k;
447 unsigned long paddr;
449 i = pud_index(address);
450 pud = pud + i;
452 for (; i < PTRS_PER_PUD; pud++, i++) {
453 unsigned long pmd_phys;
454 pmd_t *pmd;
456 paddr = address + i*PUD_SIZE;
457 if (paddr >= end) {
458 for (; i < PTRS_PER_PUD; i++, pud++)
459 set_pud(pud, __pud(0));
460 break;
461 }
463 pmd = alloc_low_page(&pmd_phys);
464 make_page_readonly(pmd);
465 xen_pmd_pin(pmd_phys);
466 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
468 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
469 unsigned long pte_phys;
470 pte_t *pte, *pte_save;
472 if (paddr >= end) {
473 for (; j < PTRS_PER_PMD; j++, pmd++)
474 set_pmd(pmd, __pmd(0));
475 break;
476 }
477 pte = alloc_low_page(&pte_phys);
478 pte_save = pte;
479 for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += PTE_SIZE) {
480 if (paddr < (table_start << PAGE_SHIFT)
481 + tables_space)
482 {
483 __set_pte(pte,
484 __pte(paddr | (_KERNPG_TABLE & ~_PAGE_RW)));
485 continue;
486 }
487 if (paddr >= end) {
488 for (; k < PTRS_PER_PTE; k++, pte++)
489 __set_pte(pte, __pte(0));
490 break;
491 }
492 __set_pte(pte, __pte(paddr | _KERNPG_TABLE));
493 }
494 pte = pte_save;
495 make_page_readonly(pte);
496 xen_pte_pin(pte_phys);
497 set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
498 }
499 }
500 __flush_tlb();
501 }
503 static void __init find_early_table_space(unsigned long end)
504 {
505 unsigned long puds, pmds, ptes;
507 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
508 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
509 ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
511 tables_space = round_up(puds * 8, PAGE_SIZE) +
512 round_up(pmds * 8, PAGE_SIZE) +
513 round_up(ptes * 8, PAGE_SIZE);
514 }
516 static void xen_copy_pt(void)
517 {
518 unsigned long va = __START_KERNEL_map;
519 unsigned long addr, *pte_page;
520 int i;
521 pud_t *pud; pmd_t *pmd; pte_t *pte;
522 unsigned long *page = (unsigned long *) init_level4_pgt;
524 addr = (unsigned long) page[pgd_index(va)];
525 addr_to_page(addr, page);
527 pud = (pud_t *) &page[pud_index(va)];
528 addr = page[pud_index(va)];
529 addr_to_page(addr, page);
531 level3_kernel_pgt[pud_index(va)] =
532 __pud(__pa_symbol(level2_kernel_pgt) | _KERNPG_TABLE | _PAGE_USER);
534 for (;;) {
535 pmd = (pmd_t *) &page[pmd_index(va)];
536 if (pmd_present(*pmd)) {
537 level2_kernel_pgt[pmd_index(va)] = *pmd;
538 /*
539 * if pmd is valid, check pte.
540 */
541 addr = page[pmd_index(va)];
542 addr_to_page(addr, pte_page);
544 for (i = 0; i < PTRS_PER_PTE; i++) {
545 pte = (pte_t *) &pte_page[pte_index(va)];
546 if (pte_present(*pte))
547 va += PAGE_SIZE;
548 else
549 break;
550 }
552 } else
553 break;
554 }
556 init_level4_pgt[pgd_index(__START_KERNEL_map)] =
557 mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
558 }
560 void __init xen_init_pt(void)
561 {
562 memcpy((void *)init_level4_pgt,
563 (void *)xen_start_info.pt_base, PAGE_SIZE);
565 memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
566 memset((void *)level2_kernel_pgt, 0, PAGE_SIZE);
568 xen_copy_pt();
570 make_page_readonly(init_level4_pgt);
571 make_page_readonly(level3_kernel_pgt);
572 make_page_readonly(level2_kernel_pgt);
573 make_page_readonly(init_level4_user_pgt);
574 make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
576 xen_pgd_pin(__pa_symbol(init_level4_pgt));
577 xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
578 xen_pud_pin(__pa_symbol(level3_kernel_pgt));
579 xen_pud_pin(__pa_symbol(level3_user_pgt));
580 xen_pmd_pin(__pa_symbol(level2_kernel_pgt));
582 set_pgd((pgd_t *)(init_level4_user_pgt + 511),
583 mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
585 }
587 /*
588 * Extend kernel mapping to access pages for page tables. The initial
589 * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
590 * mapping for early initialization.
591 */
593 #define MIN_INIT_SIZE 0x800000
594 static unsigned long current_size, extended_size;
596 void __init extend_init_mapping(void)
597 {
598 unsigned long va = __START_KERNEL_map;
599 unsigned long addr, *pte_page;
601 unsigned long phys;
602 pmd_t *pmd;
603 pte_t *pte, new_pte;
604 unsigned long *page = (unsigned long *) init_level4_pgt;
605 int i;
607 addr = (unsigned long) page[pgd_index(va)];
608 addr_to_page(addr, page);
610 addr = page[pud_index(va)];
611 addr_to_page(addr, page);
613 for (;;) {
614 pmd = (pmd_t *) &page[pmd_index(va)];
615 if (pmd_present(*pmd)) {
616 /*
617 * if pmd is valid, check pte.
618 */
619 addr = page[pmd_index(va)];
620 addr_to_page(addr, pte_page);
622 for (i = 0; i < PTRS_PER_PTE; i++) {
623 pte = (pte_t *) &pte_page[pte_index(va)];
625 if (pte_present(*pte)) {
626 va += PAGE_SIZE;
627 current_size += PAGE_SIZE;
628 } else
629 break;
630 }
632 } else
633 break;
634 }
636 for (; va < __START_KERNEL_map + current_size + tables_space; ) {
637 pmd = (pmd_t *) &page[pmd_index(va)];
639 if (pmd_none(*pmd)) {
640 pte_page = (unsigned long *) alloc_static_page(&phys);
641 make_page_readonly(pte_page);
642 xen_pte_pin(phys);
643 set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
645 for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
646 new_pte = pfn_pte((va - __START_KERNEL_map) >> PAGE_SHIFT,
647 __pgprot(_KERNPG_TABLE | _PAGE_USER));
649 pte = (pte_t *) &pte_page[pte_index(va)];
650 xen_l1_entry_update(pte, new_pte);
651 extended_size += PAGE_SIZE;
652 }
653 }
654 }
655 }
658 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
659 This runs before bootmem is initialized and gets pages directly from the
660 physical memory. To access them they are temporarily mapped. */
661 void __init init_memory_mapping(unsigned long start, unsigned long end)
662 {
663 unsigned long next;
665 Dprintk("init_memory_mapping\n");
667 find_early_table_space(end);
668 extend_init_mapping();
669 start_pfn = current_size >> PAGE_SHIFT;
671 table_start = start_pfn;
672 table_end = table_start;
674 start = (unsigned long)__va(start);
675 end = (unsigned long)__va(end);
677 for (; start < end; start = next) {
678 unsigned long pud_phys;
679 pud_t *pud = alloc_low_page(&pud_phys);
680 make_page_readonly(pud);
681 xen_pud_pin(pud_phys);
682 next = start + PGDIR_SIZE;
683 if (next > end)
684 next = end;
685 phys_pud_init(pud, __pa(start), __pa(next));
686 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
687 }
689 printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
690 table_start<<PAGE_SHIFT,
691 table_end<<PAGE_SHIFT);
693 start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
695 /*
696 * TBD: Need to calculate at runtime
697 */
699 __flush_tlb_all();
700 init_mapping_done = 1;
701 }
703 extern struct x8664_pda cpu_pda[NR_CPUS];
705 void zap_low_mappings(void)
706 {
707 /* this is not required for Xen */
708 #if 0
709 swap_low_mappings();
710 #endif
711 }
713 #ifndef CONFIG_DISCONTIGMEM
714 void __init paging_init(void)
715 {
716 {
717 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
718 /* unsigned int max_dma; */
719 /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
720 /* if (end_pfn < max_dma) */
721 zones_size[ZONE_DMA] = end_pfn;
722 #if 0
723 else {
724 zones_size[ZONE_DMA] = max_dma;
725 zones_size[ZONE_NORMAL] = end_pfn - max_dma;
726 }
727 #endif
728 free_area_init(zones_size);
729 }
731 set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
732 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
734 memset(empty_zero_page, 0, sizeof(empty_zero_page));
736 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
737 {
738 int i;
739 /* Setup mapping of lower 1st MB */
740 for (i = 0; i < NR_FIX_ISAMAPS; i++)
741 if (xen_start_info.flags & SIF_PRIVILEGED)
742 set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
743 else
744 __set_fixmap(FIX_ISAMAP_BEGIN - i,
745 virt_to_mfn(empty_zero_page) << PAGE_SHIFT,
746 PAGE_KERNEL_RO);
747 }
748 #endif
750 }
751 #endif
753 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
754 from the CPU leading to inconsistent cache lines. address and size
755 must be aligned to 2MB boundaries.
756 Does nothing when the mapping doesn't exist. */
757 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
758 {
759 unsigned long end = address + size;
761 BUG_ON(address & ~LARGE_PAGE_MASK);
762 BUG_ON(size & ~LARGE_PAGE_MASK);
764 for (; address < end; address += LARGE_PAGE_SIZE) {
765 pgd_t *pgd = pgd_offset_k(address);
766 pud_t *pud;
767 pmd_t *pmd;
768 if (pgd_none(*pgd))
769 continue;
770 pud = pud_offset(pgd, address);
771 if (pud_none(*pud))
772 continue;
773 pmd = pmd_offset(pud, address);
774 if (!pmd || pmd_none(*pmd))
775 continue;
776 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
777 /* Could handle this, but it should not happen currently. */
778 printk(KERN_ERR
779 "clear_kernel_mapping: mapping has been split. will leak memory\n");
780 pmd_ERROR(*pmd);
781 }
782 set_pmd(pmd, __pmd(0));
783 }
784 __flush_tlb_all();
785 }
787 static inline int page_is_ram (unsigned long pagenr)
788 {
789 if (pagenr < start_pfn || pagenr >= end_pfn)
790 return 0;
792 return 1;
793 }
795 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
796 kcore_vsyscall;
798 void __init mem_init(void)
799 {
800 int codesize, reservedpages, datasize, initsize;
801 int tmp;
803 contiguous_bitmap = alloc_bootmem_low_pages(
804 (end_pfn + 2*BITS_PER_LONG) >> 3);
805 BUG_ON(!contiguous_bitmap);
806 memset(contiguous_bitmap, 0, (end_pfn + 2*BITS_PER_LONG) >> 3);
808 #if defined(CONFIG_SWIOTLB)
809 swiotlb_init();
810 #endif
812 /* How many end-of-memory variables you have, grandma! */
813 max_low_pfn = end_pfn;
814 max_pfn = end_pfn;
815 num_physpages = end_pfn;
816 high_memory = (void *) __va(end_pfn * PAGE_SIZE);
818 /* clear the zero-page */
819 memset(empty_zero_page, 0, PAGE_SIZE);
821 reservedpages = 0;
823 /* this will put all low memory onto the freelists */
824 #ifdef CONFIG_DISCONTIGMEM
825 totalram_pages += numa_free_all_bootmem();
826 tmp = 0;
827 /* should count reserved pages here for all nodes */
828 #else
829 max_mapnr = end_pfn;
830 if (!mem_map) BUG();
832 totalram_pages += free_all_bootmem();
834 for (tmp = 0; tmp < end_pfn; tmp++)
835 /*
836 * Only count reserved RAM pages
837 */
838 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
839 reservedpages++;
840 #endif
842 after_bootmem = 1;
844 codesize = (unsigned long) &_etext - (unsigned long) &_text;
845 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
846 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
848 /* Register memory areas for /proc/kcore */
849 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
850 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
851 VMALLOC_END-VMALLOC_START);
852 kclist_add(&kcore_kernel, &_stext, _end - _stext);
853 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
854 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
855 VSYSCALL_END - VSYSCALL_START);
857 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
858 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
859 end_pfn << (PAGE_SHIFT-10),
860 codesize >> 10,
861 reservedpages << (PAGE_SHIFT-10),
862 datasize >> 10,
863 initsize >> 10);
865 /*
866 * Subtle. SMP is doing its boot stuff late (because it has to
867 * fork idle threads) - but it also needs low mappings for the
868 * protected-mode entry to work. We zap these entries only after
869 * the WP-bit has been tested.
870 */
871 #ifndef CONFIG_SMP
872 zap_low_mappings();
873 #endif
874 }
876 extern char __initdata_begin[], __initdata_end[];
878 void free_initmem(void)
879 {
880 #ifdef __DO_LATER__
881 /*
882 * Some pages can be pinned, but some are not. Unpinning such pages
883 * triggers BUG().
884 */
885 unsigned long addr;
887 addr = (unsigned long)(&__init_begin);
888 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
889 ClearPageReserved(virt_to_page(addr));
890 set_page_count(virt_to_page(addr), 1);
891 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
892 xen_pte_unpin(__pa(addr));
893 make_page_writable(__va(__pa(addr)));
894 /*
895 * Make pages from __PAGE_OFFSET address as well
896 */
897 make_page_writable((void *)addr);
898 free_page(addr);
899 totalram_pages++;
900 }
901 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
902 printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
903 #endif
904 }
906 #ifdef CONFIG_BLK_DEV_INITRD
907 void free_initrd_mem(unsigned long start, unsigned long end)
908 {
909 if (start < (unsigned long)&_end)
910 return;
911 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
912 for (; start < end; start += PAGE_SIZE) {
913 ClearPageReserved(virt_to_page(start));
914 set_page_count(virt_to_page(start), 1);
915 free_page(start);
916 totalram_pages++;
917 }
918 }
919 #endif
921 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
922 {
923 /* Should check here against the e820 map to avoid double free */
924 #ifdef CONFIG_DISCONTIGMEM
925 int nid = phys_to_nid(phys);
926 reserve_bootmem_node(NODE_DATA(nid), phys, len);
927 #else
928 reserve_bootmem(phys, len);
929 #endif
930 }
932 int kern_addr_valid(unsigned long addr)
933 {
934 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
935 pgd_t *pgd;
936 pud_t *pud;
937 pmd_t *pmd;
938 pte_t *pte;
940 if (above != 0 && above != -1UL)
941 return 0;
943 pgd = pgd_offset_k(addr);
944 if (pgd_none(*pgd))
945 return 0;
947 pud = pud_offset_k(addr);
948 if (pud_none(*pud))
949 return 0;
951 pmd = pmd_offset(pud, addr);
952 if (pmd_none(*pmd))
953 return 0;
954 if (pmd_large(*pmd))
955 return pfn_valid(pmd_pfn(*pmd));
957 pte = pte_offset_kernel(pmd, addr);
958 if (pte_none(*pte))
959 return 0;
960 return pfn_valid(pte_pfn(*pte));
961 }
963 #ifdef CONFIG_SYSCTL
964 #include <linux/sysctl.h>
966 extern int exception_trace, page_fault_trace;
968 static ctl_table debug_table2[] = {
969 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
970 proc_dointvec },
971 #ifdef CONFIG_CHECKING
972 { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
973 proc_dointvec },
974 #endif
975 { 0, }
976 };
978 static ctl_table debug_root_table2[] = {
979 { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
980 .child = debug_table2 },
981 { 0 },
982 };
984 static __init int x8664_sysctl_init(void)
985 {
986 register_sysctl_table(debug_root_table2, 1);
987 return 0;
988 }
989 __initcall(x8664_sysctl_init);
990 #endif
992 /* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
993 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
994 not need special handling anymore. */
996 static struct vm_area_struct gate_vma = {
997 .vm_start = VSYSCALL_START,
998 .vm_end = VSYSCALL_END,
999 .vm_page_prot = PAGE_READONLY
1000 };
1002 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
1004 #ifdef CONFIG_IA32_EMULATION
1005 if (test_tsk_thread_flag(tsk, TIF_IA32))
1006 return NULL;
1007 #endif
1008 return &gate_vma;
1011 int in_gate_area(struct task_struct *task, unsigned long addr)
1013 struct vm_area_struct *vma = get_gate_vma(task);
1014 if (!vma)
1015 return 0;
1016 return (addr >= vma->vm_start) && (addr < vma->vm_end);
1019 /* Use this when you have no reliable task/vma, typically from interrupt
1020 * context. It is less reliable than using the task's vma and may give
1021 * false positives.
1022 */
1023 int in_gate_area_no_task(unsigned long addr)
1025 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);