ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c @ 6422:e24fd7012ffb

merge?
author cl349@firebug.cl.cam.ac.uk
date Thu Aug 25 10:09:39 2005 +0000 (2005-08-25)
parents 2f20c2fce2c5 c42a9e2f6c5b
children 4abd299ef2f6
line source
1 /*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 *
8 * Jun Nakajima <jun.nakajima@intel.com>
9 * Modified for Xen.
10 */
12 #include <linux/config.h>
13 #include <linux/signal.h>
14 #include <linux/sched.h>
15 #include <linux/kernel.h>
16 #include <linux/errno.h>
17 #include <linux/string.h>
18 #include <linux/types.h>
19 #include <linux/ptrace.h>
20 #include <linux/mman.h>
21 #include <linux/mm.h>
22 #include <linux/swap.h>
23 #include <linux/smp.h>
24 #include <linux/init.h>
25 #include <linux/pagemap.h>
26 #include <linux/bootmem.h>
27 #include <linux/proc_fs.h>
29 #include <asm/processor.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <asm/pgtable.h>
33 #include <asm/pgalloc.h>
34 #include <asm/dma.h>
35 #include <asm/fixmap.h>
36 #include <asm/e820.h>
37 #include <asm/apic.h>
38 #include <asm/tlb.h>
39 #include <asm/mmu_context.h>
40 #include <asm/proto.h>
41 #include <asm/smp.h>
43 extern unsigned long *contiguous_bitmap;
45 #if defined(CONFIG_SWIOTLB)
46 extern void swiotlb_init(void);
47 #endif
49 #ifndef Dprintk
50 #define Dprintk(x...)
51 #endif
53 extern char _stext[];
55 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
56 extern unsigned long start_pfn;
58 static int init_mapping_done;
60 /*
61 * Use this until direct mapping is established, i.e. before __va() is
62 * avaialble in init_memory_mapping().
63 */
65 #define addr_to_page(addr, page) \
66 (addr) &= PHYSICAL_PAGE_MASK; \
67 (page) = ((unsigned long *) ((unsigned long)(((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + __START_KERNEL_map)))
69 static void __make_page_readonly(unsigned long va)
70 {
71 unsigned long addr;
72 pte_t pte, *ptep;
73 unsigned long *page = (unsigned long *) init_level4_pgt;
75 addr = (unsigned long) page[pgd_index(va)];
76 addr_to_page(addr, page);
78 addr = page[pud_index(va)];
79 addr_to_page(addr, page);
81 addr = page[pmd_index(va)];
82 addr_to_page(addr, page);
84 ptep = (pte_t *) &page[pte_index(va)];
85 pte.pte = (ptep->pte & ~_PAGE_RW);
86 xen_l1_entry_update(ptep, pte);
87 __flush_tlb_one(addr);
88 }
90 static void __make_page_writable(unsigned long va)
91 {
92 unsigned long addr;
93 pte_t pte, *ptep;
94 unsigned long *page = (unsigned long *) init_level4_pgt;
96 addr = (unsigned long) page[pgd_index(va)];
97 addr_to_page(addr, page);
99 addr = page[pud_index(va)];
100 addr_to_page(addr, page);
102 addr = page[pmd_index(va)];
103 addr_to_page(addr, page);
105 ptep = (pte_t *) &page[pte_index(va)];
106 pte.pte = (ptep->pte | _PAGE_RW);
107 xen_l1_entry_update(ptep, pte);
108 __flush_tlb_one(addr);
109 }
112 /*
113 * Assume the translation is already established.
114 */
115 void make_page_readonly(void *va)
116 {
117 pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
118 unsigned long addr = (unsigned long) va;
120 if (!init_mapping_done) {
121 __make_page_readonly(addr);
122 return;
123 }
125 pgd = pgd_offset_k(addr);
126 pud = pud_offset(pgd, addr);
127 pmd = pmd_offset(pud, addr);
128 ptep = pte_offset_kernel(pmd, addr);
129 pte.pte = (ptep->pte & ~_PAGE_RW);
130 xen_l1_entry_update(ptep, pte);
131 __flush_tlb_one(addr);
132 }
134 void make_page_writable(void *va)
135 {
136 pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
137 unsigned long addr = (unsigned long) va;
139 if (!init_mapping_done) {
140 __make_page_writable(addr);
141 return;
142 }
144 pgd = pgd_offset_k(addr);
145 pud = pud_offset(pgd, addr);
146 pmd = pmd_offset(pud, addr);
147 ptep = pte_offset_kernel(pmd, addr);
148 pte.pte = (ptep->pte | _PAGE_RW);
149 xen_l1_entry_update(ptep, pte);
150 __flush_tlb_one(addr);
151 }
153 void make_pages_readonly(void* va, unsigned nr)
154 {
155 while ( nr-- != 0 ) {
156 make_page_readonly(va);
157 va = (void*)((unsigned long)va + PAGE_SIZE);
158 }
159 }
161 void make_pages_writable(void* va, unsigned nr)
162 {
163 while ( nr-- != 0 ) {
164 make_page_writable(va);
165 va = (void*)((unsigned long)va + PAGE_SIZE);
166 }
167 }
169 /*
170 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
171 * physical space so we can cache the place of the first one and move
172 * around without checking the pgd every time.
173 */
175 void show_mem(void)
176 {
177 int i, total = 0, reserved = 0;
178 int shared = 0, cached = 0;
179 pg_data_t *pgdat;
180 struct page *page;
182 printk("Mem-info:\n");
183 show_free_areas();
184 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
186 for_each_pgdat(pgdat) {
187 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
188 page = pfn_to_page(pgdat->node_start_pfn + i);
189 total++;
190 if (PageReserved(page))
191 reserved++;
192 else if (PageSwapCache(page))
193 cached++;
194 else if (page_count(page))
195 shared += page_count(page) - 1;
196 }
197 }
198 printk("%d pages of RAM\n", total);
199 printk("%d reserved pages\n",reserved);
200 printk("%d pages shared\n",shared);
201 printk("%d pages swap cached\n",cached);
202 }
204 /* References to section boundaries */
206 extern char _text, _etext, _edata, __bss_start, _end[];
207 extern char __init_begin, __init_end;
209 int after_bootmem;
211 static void *spp_getpage(void)
212 {
213 void *ptr;
214 if (after_bootmem)
215 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
216 else
217 ptr = alloc_bootmem_pages(PAGE_SIZE);
218 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
219 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
221 Dprintk("spp_getpage %p\n", ptr);
222 return ptr;
223 }
225 #define pgd_offset_u(address) (pgd_t *)(init_level4_user_pgt + pgd_index(address))
227 static inline pud_t *pud_offset_u(unsigned long address)
228 {
229 pud_t *pud = level3_user_pgt;
231 return pud + pud_index(address);
232 }
234 static void set_pte_phys(unsigned long vaddr,
235 unsigned long phys, pgprot_t prot, int user_mode)
236 {
237 pgd_t *pgd;
238 pud_t *pud;
239 pmd_t *pmd;
240 pte_t *pte, new_pte;
242 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
244 pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
246 if (pgd_none(*pgd)) {
247 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
248 return;
249 }
251 pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
253 if (pud_none(*pud)) {
254 pmd = (pmd_t *) spp_getpage();
256 make_page_readonly(pmd);
257 xen_pmd_pin(__pa(pmd));
258 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
259 if (pmd != pmd_offset(pud, 0)) {
260 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
261 return;
262 }
263 }
265 pmd = pmd_offset(pud, vaddr);
267 if (pmd_none(*pmd)) {
268 pte = (pte_t *) spp_getpage();
269 make_page_readonly(pte);
271 xen_pte_pin(__pa(pte));
272 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
273 if (pte != pte_offset_kernel(pmd, 0)) {
274 printk("PAGETABLE BUG #02!\n");
275 return;
276 }
277 }
278 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
280 pte = pte_offset_kernel(pmd, vaddr);
282 if (!pte_none(*pte) &&
283 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
284 pte_ERROR(*pte);
285 set_pte(pte, new_pte);
287 /*
288 * It's enough to flush this one mapping.
289 * (PGE mappings get flushed as well)
290 */
291 __flush_tlb_one(vaddr);
292 }
294 static void set_pte_phys_ma(unsigned long vaddr,
295 unsigned long phys, pgprot_t prot)
296 {
297 pgd_t *pgd;
298 pud_t *pud;
299 pmd_t *pmd;
300 pte_t *pte, new_pte;
302 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
304 pgd = pgd_offset_k(vaddr);
305 if (pgd_none(*pgd)) {
306 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
307 return;
308 }
309 pud = pud_offset(pgd, vaddr);
310 if (pud_none(*pud)) {
312 pmd = (pmd_t *) spp_getpage();
313 make_page_readonly(pmd);
314 xen_pmd_pin(__pa(pmd));
316 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
318 if (pmd != pmd_offset(pud, 0)) {
319 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
320 return;
321 }
322 }
323 pmd = pmd_offset(pud, vaddr);
325 if (pmd_none(*pmd)) {
326 pte = (pte_t *) spp_getpage();
327 make_page_readonly(pte);
328 xen_pte_pin(__pa(pte));
330 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
331 if (pte != pte_offset_kernel(pmd, 0)) {
332 printk("PAGETABLE BUG #02!\n");
333 return;
334 }
335 }
337 new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
338 pte = pte_offset_kernel(pmd, vaddr);
340 /*
341 * Note that the pte page is already RO, thus we want to use
342 * xen_l1_entry_update(), not set_pte().
343 */
344 xen_l1_entry_update(pte,
345 pfn_pte_ma(phys >> PAGE_SHIFT, prot));
347 /*
348 * It's enough to flush this one mapping.
349 * (PGE mappings get flushed as well)
350 */
351 __flush_tlb_one(vaddr);
352 }
354 #define SET_FIXMAP_KERNEL 0
355 #define SET_FIXMAP_USER 1
357 /* NOTE: this is meant to be run only at boot */
358 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
359 {
360 unsigned long address = __fix_to_virt(idx);
362 if (idx >= __end_of_fixed_addresses) {
363 printk("Invalid __set_fixmap\n");
364 return;
365 }
366 switch (idx) {
367 case VSYSCALL_FIRST_PAGE:
368 set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL);
369 break;
370 default:
371 set_pte_phys_ma(address, phys, prot);
372 break;
373 }
374 }
377 /*
378 * At this point it only supports vsyscall area.
379 */
380 void __set_fixmap_user (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
381 {
382 unsigned long address = __fix_to_virt(idx);
384 if (idx >= __end_of_fixed_addresses) {
385 printk("Invalid __set_fixmap\n");
386 return;
387 }
389 set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
390 }
392 unsigned long __initdata table_start, table_end, tables_space;
394 unsigned long get_machine_pfn(unsigned long addr)
395 {
396 pud_t* pud = pud_offset_k(addr);
397 pmd_t* pmd = pmd_offset(pud, addr);
398 pte_t *pte = pte_offset_kernel(pmd, addr);
400 return pte_mfn(*pte);
401 }
403 #define ALIGN_TO_4K __attribute__((section(".data.page_aligned")))
404 #define MAX_LOW_PAGES 0x20
405 static unsigned long __init_pgt[MAX_LOW_PAGES][512] ALIGN_TO_4K;
406 static int __init_pgt_index;
408 /*
409 * We start using from start_pfn
410 */
411 static __init void *alloc_static_page(unsigned long *phys)
412 {
413 int i = __init_pgt_index++;
415 if (__init_pgt_index >= MAX_LOW_PAGES) {
416 printk("Need to increase MAX_LOW_PAGES");
417 BUG();
418 }
420 *phys = __pa(__init_pgt[i]);
422 return (void *) __init_pgt[i];
423 }
425 /*
426 * Get RO page
427 */
428 static void __init *alloc_low_page(unsigned long *phys)
429 {
430 unsigned long pfn = table_end++;
432 *phys = (pfn << PAGE_SHIFT);
433 memset((void *) ((pfn << PAGE_SHIFT) + __START_KERNEL_map), 0, PAGE_SIZE);
434 return (void *)((pfn << PAGE_SHIFT) + __START_KERNEL_map);
435 }
437 #define PTE_SIZE PAGE_SIZE
439 static inline void __set_pte(pte_t *dst, pte_t val)
440 {
441 *dst = val;
442 }
444 static inline int make_readonly(unsigned long paddr)
445 {
446 int readonly = 0;
448 /* Make new page tables read-only. */
449 if ((paddr < ((table_start << PAGE_SHIFT) + tables_space)) &&
450 (paddr >= (table_start << PAGE_SHIFT)))
451 readonly = 1;
453 /* Make old page tables read-only. */
454 if ((paddr < ((xen_start_info.pt_base - __START_KERNEL_map) +
455 (xen_start_info.nr_pt_frames << PAGE_SHIFT))) &&
456 (paddr >= (xen_start_info.pt_base - __START_KERNEL_map)))
457 readonly = 1;
459 /*
460 * No need for writable mapping of kernel image. This also ensures that
461 * page and descriptor tables embedded inside don't have writable mappings.
462 */
463 if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
464 readonly = 1;
466 return readonly;
467 }
469 void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
470 {
471 long i, j, k;
472 unsigned long paddr;
474 i = pud_index(address);
475 pud = pud + i;
477 for (; i < PTRS_PER_PUD; pud++, i++) {
478 unsigned long pmd_phys;
479 pmd_t *pmd;
481 paddr = address + i*PUD_SIZE;
482 if (paddr >= end) {
483 for (; i < PTRS_PER_PUD; i++, pud++)
484 set_pud(pud, __pud(0));
485 break;
486 }
488 pmd = alloc_low_page(&pmd_phys);
489 make_page_readonly(pmd);
490 xen_pmd_pin(pmd_phys);
491 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
493 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
494 unsigned long pte_phys;
495 pte_t *pte, *pte_save;
497 if (paddr >= end) {
498 for (; j < PTRS_PER_PMD; j++, pmd++)
499 set_pmd(pmd, __pmd(0));
500 break;
501 }
502 pte = alloc_low_page(&pte_phys);
503 pte_save = pte;
504 for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += PTE_SIZE) {
505 if (make_readonly(paddr)) {
506 __set_pte(pte,
507 __pte(paddr | (_KERNPG_TABLE & ~_PAGE_RW)));
508 continue;
509 }
510 if (paddr >= end) {
511 for (; k < PTRS_PER_PTE; k++, pte++)
512 __set_pte(pte, __pte(0));
513 break;
514 }
515 __set_pte(pte, __pte(paddr | _KERNPG_TABLE));
516 }
517 pte = pte_save;
518 make_page_readonly(pte);
519 xen_pte_pin(pte_phys);
520 set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
521 }
522 }
523 __flush_tlb();
524 }
526 static void __init find_early_table_space(unsigned long end)
527 {
528 unsigned long puds, pmds, ptes;
530 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
531 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
532 ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
534 tables_space = round_up(puds * 8, PAGE_SIZE) +
535 round_up(pmds * 8, PAGE_SIZE) +
536 round_up(ptes * 8, PAGE_SIZE);
537 }
539 static void xen_copy_pt(void)
540 {
541 unsigned long va = __START_KERNEL_map;
542 unsigned long addr, *pte_page;
543 int i;
544 pud_t *pud; pmd_t *pmd; pte_t *pte;
545 unsigned long *page = (unsigned long *) init_level4_pgt;
547 addr = (unsigned long) page[pgd_index(va)];
548 addr_to_page(addr, page);
550 pud = (pud_t *) &page[pud_index(va)];
551 addr = page[pud_index(va)];
552 addr_to_page(addr, page);
554 level3_kernel_pgt[pud_index(va)] =
555 __pud(__pa_symbol(level2_kernel_pgt) | _KERNPG_TABLE | _PAGE_USER);
557 for (;;) {
558 pmd = (pmd_t *) &page[pmd_index(va)];
559 if (pmd_present(*pmd)) {
560 level2_kernel_pgt[pmd_index(va)] = *pmd;
561 /*
562 * if pmd is valid, check pte.
563 */
564 addr = page[pmd_index(va)];
565 addr_to_page(addr, pte_page);
567 for (i = 0; i < PTRS_PER_PTE; i++) {
568 pte = (pte_t *) &pte_page[pte_index(va)];
569 if (pte_present(*pte))
570 va += PAGE_SIZE;
571 else
572 break;
573 }
575 } else
576 break;
577 }
579 init_level4_pgt[pgd_index(__START_KERNEL_map)] =
580 mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
581 }
583 void __init xen_init_pt(void)
584 {
585 int i;
587 for (i = 0; i < NR_CPUS; i++)
588 per_cpu(cur_pgd, i) = init_mm.pgd;
590 memcpy((void *)init_level4_pgt,
591 (void *)xen_start_info.pt_base, PAGE_SIZE);
593 memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
594 memset((void *)level2_kernel_pgt, 0, PAGE_SIZE);
596 xen_copy_pt();
598 make_page_readonly(init_level4_pgt);
599 make_page_readonly(level3_kernel_pgt);
600 make_page_readonly(level2_kernel_pgt);
601 make_page_readonly(init_level4_user_pgt);
602 make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
604 xen_pgd_pin(__pa_symbol(init_level4_pgt));
605 xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
606 xen_pud_pin(__pa_symbol(level3_kernel_pgt));
607 xen_pud_pin(__pa_symbol(level3_user_pgt));
608 xen_pmd_pin(__pa_symbol(level2_kernel_pgt));
610 set_pgd((pgd_t *)(init_level4_user_pgt + 511),
611 mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
613 }
615 /*
616 * Extend kernel mapping to access pages for page tables. The initial
617 * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
618 * mapping for early initialization.
619 */
621 #define MIN_INIT_SIZE 0x800000
622 static unsigned long current_size, extended_size;
624 void __init extend_init_mapping(void)
625 {
626 unsigned long va = __START_KERNEL_map;
627 unsigned long addr, *pte_page;
629 unsigned long phys;
630 pmd_t *pmd;
631 pte_t *pte, new_pte;
632 unsigned long *page = (unsigned long *) init_level4_pgt;
633 int i;
635 addr = (unsigned long) page[pgd_index(va)];
636 addr_to_page(addr, page);
638 addr = page[pud_index(va)];
639 addr_to_page(addr, page);
641 for (;;) {
642 pmd = (pmd_t *) &page[pmd_index(va)];
643 if (pmd_present(*pmd)) {
644 /*
645 * if pmd is valid, check pte.
646 */
647 addr = page[pmd_index(va)];
648 addr_to_page(addr, pte_page);
650 for (i = 0; i < PTRS_PER_PTE; i++) {
651 pte = (pte_t *) &pte_page[pte_index(va)];
653 if (pte_present(*pte)) {
654 va += PAGE_SIZE;
655 current_size += PAGE_SIZE;
656 } else
657 break;
658 }
660 } else
661 break;
662 }
664 for (; va < __START_KERNEL_map + current_size + tables_space; ) {
665 pmd = (pmd_t *) &page[pmd_index(va)];
667 if (pmd_none(*pmd)) {
668 pte_page = (unsigned long *) alloc_static_page(&phys);
669 make_page_readonly(pte_page);
670 xen_pte_pin(phys);
671 set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
673 for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
674 new_pte = pfn_pte((va - __START_KERNEL_map) >> PAGE_SHIFT,
675 __pgprot(_KERNPG_TABLE | _PAGE_USER));
677 pte = (pte_t *) &pte_page[pte_index(va)];
678 xen_l1_entry_update(pte, new_pte);
679 extended_size += PAGE_SIZE;
680 }
681 }
682 }
683 }
686 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
687 This runs before bootmem is initialized and gets pages directly from the
688 physical memory. To access them they are temporarily mapped. */
689 void __init init_memory_mapping(unsigned long start, unsigned long end)
690 {
691 unsigned long next;
693 Dprintk("init_memory_mapping\n");
695 find_early_table_space(end);
696 extend_init_mapping();
697 start_pfn = current_size >> PAGE_SHIFT;
699 table_start = start_pfn;
700 table_end = table_start;
702 start = (unsigned long)__va(start);
703 end = (unsigned long)__va(end);
705 for (; start < end; start = next) {
706 unsigned long pud_phys;
707 pud_t *pud = alloc_low_page(&pud_phys);
708 make_page_readonly(pud);
709 xen_pud_pin(pud_phys);
710 next = start + PGDIR_SIZE;
711 if (next > end)
712 next = end;
713 phys_pud_init(pud, __pa(start), __pa(next));
714 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
715 }
717 printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
718 table_start<<PAGE_SHIFT,
719 table_end<<PAGE_SHIFT);
721 start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
723 /*
724 * TBD: Need to calculate at runtime
725 */
727 __flush_tlb_all();
728 init_mapping_done = 1;
729 }
731 extern struct x8664_pda cpu_pda[NR_CPUS];
733 void zap_low_mappings(void)
734 {
735 /* this is not required for Xen */
736 #if 0
737 swap_low_mappings();
738 #endif
739 }
741 #ifndef CONFIG_DISCONTIGMEM
742 void __init paging_init(void)
743 {
744 {
745 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
746 /* unsigned int max_dma; */
747 /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
748 /* if (end_pfn < max_dma) */
749 zones_size[ZONE_DMA] = end_pfn;
750 #if 0
751 else {
752 zones_size[ZONE_DMA] = max_dma;
753 zones_size[ZONE_NORMAL] = end_pfn - max_dma;
754 }
755 #endif
756 free_area_init(zones_size);
757 }
759 set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
760 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
762 memset(empty_zero_page, 0, sizeof(empty_zero_page));
764 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
765 {
766 int i;
767 /* Setup mapping of lower 1st MB */
768 for (i = 0; i < NR_FIX_ISAMAPS; i++)
769 if (xen_start_info.flags & SIF_PRIVILEGED)
770 set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
771 else
772 __set_fixmap(FIX_ISAMAP_BEGIN - i,
773 virt_to_mfn(empty_zero_page) << PAGE_SHIFT,
774 PAGE_KERNEL_RO);
775 }
776 #endif
778 }
779 #endif
781 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
782 from the CPU leading to inconsistent cache lines. address and size
783 must be aligned to 2MB boundaries.
784 Does nothing when the mapping doesn't exist. */
785 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
786 {
787 unsigned long end = address + size;
789 BUG_ON(address & ~LARGE_PAGE_MASK);
790 BUG_ON(size & ~LARGE_PAGE_MASK);
792 for (; address < end; address += LARGE_PAGE_SIZE) {
793 pgd_t *pgd = pgd_offset_k(address);
794 pud_t *pud;
795 pmd_t *pmd;
796 if (pgd_none(*pgd))
797 continue;
798 pud = pud_offset(pgd, address);
799 if (pud_none(*pud))
800 continue;
801 pmd = pmd_offset(pud, address);
802 if (!pmd || pmd_none(*pmd))
803 continue;
804 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
805 /* Could handle this, but it should not happen currently. */
806 printk(KERN_ERR
807 "clear_kernel_mapping: mapping has been split. will leak memory\n");
808 pmd_ERROR(*pmd);
809 }
810 set_pmd(pmd, __pmd(0));
811 }
812 __flush_tlb_all();
813 }
815 static inline int page_is_ram (unsigned long pagenr)
816 {
817 if (pagenr < start_pfn || pagenr >= end_pfn)
818 return 0;
820 return 1;
821 }
823 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
824 kcore_vsyscall;
826 void __init mem_init(void)
827 {
828 int codesize, reservedpages, datasize, initsize;
829 int tmp;
831 contiguous_bitmap = alloc_bootmem_low_pages(
832 (end_pfn + 2*BITS_PER_LONG) >> 3);
833 BUG_ON(!contiguous_bitmap);
834 memset(contiguous_bitmap, 0, (end_pfn + 2*BITS_PER_LONG) >> 3);
836 #if defined(CONFIG_SWIOTLB)
837 swiotlb_init();
838 #endif
840 /* How many end-of-memory variables you have, grandma! */
841 max_low_pfn = end_pfn;
842 max_pfn = end_pfn;
843 num_physpages = end_pfn;
844 high_memory = (void *) __va(end_pfn * PAGE_SIZE);
846 /* clear the zero-page */
847 memset(empty_zero_page, 0, PAGE_SIZE);
849 reservedpages = 0;
851 /* this will put all low memory onto the freelists */
852 #ifdef CONFIG_DISCONTIGMEM
853 totalram_pages += numa_free_all_bootmem();
854 tmp = 0;
855 /* should count reserved pages here for all nodes */
856 #else
857 max_mapnr = end_pfn;
858 if (!mem_map) BUG();
860 totalram_pages += free_all_bootmem();
862 for (tmp = 0; tmp < end_pfn; tmp++)
863 /*
864 * Only count reserved RAM pages
865 */
866 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
867 reservedpages++;
868 #endif
870 after_bootmem = 1;
872 codesize = (unsigned long) &_etext - (unsigned long) &_text;
873 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
874 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
876 /* Register memory areas for /proc/kcore */
877 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
878 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
879 VMALLOC_END-VMALLOC_START);
880 kclist_add(&kcore_kernel, &_stext, _end - _stext);
881 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
882 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
883 VSYSCALL_END - VSYSCALL_START);
885 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
886 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
887 end_pfn << (PAGE_SHIFT-10),
888 codesize >> 10,
889 reservedpages << (PAGE_SHIFT-10),
890 datasize >> 10,
891 initsize >> 10);
893 /*
894 * Subtle. SMP is doing its boot stuff late (because it has to
895 * fork idle threads) - but it also needs low mappings for the
896 * protected-mode entry to work. We zap these entries only after
897 * the WP-bit has been tested.
898 */
899 #ifndef CONFIG_SMP
900 zap_low_mappings();
901 #endif
902 }
904 extern char __initdata_begin[], __initdata_end[];
906 void free_initmem(void)
907 {
908 #ifdef __DO_LATER__
909 /*
910 * Some pages can be pinned, but some are not. Unpinning such pages
911 * triggers BUG().
912 */
913 unsigned long addr;
915 addr = (unsigned long)(&__init_begin);
916 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
917 ClearPageReserved(virt_to_page(addr));
918 set_page_count(virt_to_page(addr), 1);
919 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
920 xen_pte_unpin(__pa(addr));
921 make_page_writable(__va(__pa(addr)));
922 /*
923 * Make pages from __PAGE_OFFSET address as well
924 */
925 make_page_writable((void *)addr);
926 free_page(addr);
927 totalram_pages++;
928 }
929 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
930 printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
931 #endif
932 }
934 #ifdef CONFIG_BLK_DEV_INITRD
935 void free_initrd_mem(unsigned long start, unsigned long end)
936 {
937 if (start < (unsigned long)&_end)
938 return;
939 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
940 for (; start < end; start += PAGE_SIZE) {
941 ClearPageReserved(virt_to_page(start));
942 set_page_count(virt_to_page(start), 1);
943 free_page(start);
944 totalram_pages++;
945 }
946 }
947 #endif
949 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
950 {
951 /* Should check here against the e820 map to avoid double free */
952 #ifdef CONFIG_DISCONTIGMEM
953 int nid = phys_to_nid(phys);
954 reserve_bootmem_node(NODE_DATA(nid), phys, len);
955 #else
956 reserve_bootmem(phys, len);
957 #endif
958 }
960 int kern_addr_valid(unsigned long addr)
961 {
962 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
963 pgd_t *pgd;
964 pud_t *pud;
965 pmd_t *pmd;
966 pte_t *pte;
968 if (above != 0 && above != -1UL)
969 return 0;
971 pgd = pgd_offset_k(addr);
972 if (pgd_none(*pgd))
973 return 0;
975 pud = pud_offset_k(addr);
976 if (pud_none(*pud))
977 return 0;
979 pmd = pmd_offset(pud, addr);
980 if (pmd_none(*pmd))
981 return 0;
982 if (pmd_large(*pmd))
983 return pfn_valid(pmd_pfn(*pmd));
985 pte = pte_offset_kernel(pmd, addr);
986 if (pte_none(*pte))
987 return 0;
988 return pfn_valid(pte_pfn(*pte));
989 }
991 #ifdef CONFIG_SYSCTL
992 #include <linux/sysctl.h>
994 extern int exception_trace, page_fault_trace;
996 static ctl_table debug_table2[] = {
997 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
998 proc_dointvec },
999 #ifdef CONFIG_CHECKING
1000 { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
1001 proc_dointvec },
1002 #endif
1003 { 0, }
1004 };
1006 static ctl_table debug_root_table2[] = {
1007 { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
1008 .child = debug_table2 },
1009 { 0 },
1010 };
1012 static __init int x8664_sysctl_init(void)
1014 register_sysctl_table(debug_root_table2, 1);
1015 return 0;
1017 __initcall(x8664_sysctl_init);
1018 #endif
1020 /* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
1021 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
1022 not need special handling anymore. */
1024 static struct vm_area_struct gate_vma = {
1025 .vm_start = VSYSCALL_START,
1026 .vm_end = VSYSCALL_END,
1027 .vm_page_prot = PAGE_READONLY
1028 };
1030 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
1032 #ifdef CONFIG_IA32_EMULATION
1033 if (test_tsk_thread_flag(tsk, TIF_IA32))
1034 return NULL;
1035 #endif
1036 return &gate_vma;
1039 int in_gate_area(struct task_struct *task, unsigned long addr)
1041 struct vm_area_struct *vma = get_gate_vma(task);
1042 if (!vma)
1043 return 0;
1044 return (addr >= vma->vm_start) && (addr < vma->vm_end);
1047 /* Use this when you have no reliable task/vma, typically from interrupt
1048 * context. It is less reliable than using the task's vma and may give
1049 * false positives.
1050 */
1051 int in_gate_area_no_task(unsigned long addr)
1053 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);