ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c @ 6432:b54144915ae6

merge?
author cl349@firebug.cl.cam.ac.uk
date Thu Aug 25 16:26:30 2005 +0000 (2005-08-25)
parents 3428d58a85e1 8d31f9a9c423
children 0610add7c3fe
line source
1 /*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 *
8 * Jun Nakajima <jun.nakajima@intel.com>
9 * Modified for Xen.
10 */
12 #include <linux/config.h>
13 #include <linux/signal.h>
14 #include <linux/sched.h>
15 #include <linux/kernel.h>
16 #include <linux/errno.h>
17 #include <linux/string.h>
18 #include <linux/types.h>
19 #include <linux/ptrace.h>
20 #include <linux/mman.h>
21 #include <linux/mm.h>
22 #include <linux/swap.h>
23 #include <linux/smp.h>
24 #include <linux/init.h>
25 #include <linux/pagemap.h>
26 #include <linux/bootmem.h>
27 #include <linux/proc_fs.h>
29 #include <asm/processor.h>
30 #include <asm/system.h>
31 #include <asm/uaccess.h>
32 #include <asm/pgtable.h>
33 #include <asm/pgalloc.h>
34 #include <asm/dma.h>
35 #include <asm/fixmap.h>
36 #include <asm/e820.h>
37 #include <asm/apic.h>
38 #include <asm/tlb.h>
39 #include <asm/mmu_context.h>
40 #include <asm/proto.h>
41 #include <asm/smp.h>
43 extern unsigned long *contiguous_bitmap;
45 #if defined(CONFIG_SWIOTLB)
46 extern void swiotlb_init(void);
47 #endif
49 #ifndef Dprintk
50 #define Dprintk(x...)
51 #endif
53 extern char _stext[];
55 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
56 extern unsigned long start_pfn;
58 static int init_mapping_done;
60 /*
61 * Use this until direct mapping is established, i.e. before __va() is
62 * avaialble in init_memory_mapping().
63 */
65 #define addr_to_page(addr, page) \
66 (addr) &= PHYSICAL_PAGE_MASK; \
67 (page) = ((unsigned long *) ((unsigned long)(((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + __START_KERNEL_map)))
69 static void __make_page_readonly(unsigned long va)
70 {
71 unsigned long addr;
72 pte_t pte, *ptep;
73 unsigned long *page = (unsigned long *) init_level4_pgt;
75 addr = (unsigned long) page[pgd_index(va)];
76 addr_to_page(addr, page);
78 addr = page[pud_index(va)];
79 addr_to_page(addr, page);
81 addr = page[pmd_index(va)];
82 addr_to_page(addr, page);
84 ptep = (pte_t *) &page[pte_index(va)];
85 pte.pte = (ptep->pte & ~_PAGE_RW);
86 xen_l1_entry_update(ptep, pte);
87 __flush_tlb_one(addr);
88 }
90 static void __make_page_writable(unsigned long va)
91 {
92 unsigned long addr;
93 pte_t pte, *ptep;
94 unsigned long *page = (unsigned long *) init_level4_pgt;
96 addr = (unsigned long) page[pgd_index(va)];
97 addr_to_page(addr, page);
99 addr = page[pud_index(va)];
100 addr_to_page(addr, page);
102 addr = page[pmd_index(va)];
103 addr_to_page(addr, page);
105 ptep = (pte_t *) &page[pte_index(va)];
106 pte.pte = (ptep->pte | _PAGE_RW);
107 xen_l1_entry_update(ptep, pte);
108 __flush_tlb_one(addr);
109 }
112 /*
113 * Assume the translation is already established.
114 */
115 void make_page_readonly(void *va)
116 {
117 pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
118 unsigned long addr = (unsigned long) va;
120 if (!init_mapping_done) {
121 __make_page_readonly(addr);
122 return;
123 }
125 pgd = pgd_offset_k(addr);
126 pud = pud_offset(pgd, addr);
127 pmd = pmd_offset(pud, addr);
128 ptep = pte_offset_kernel(pmd, addr);
129 pte.pte = (ptep->pte & ~_PAGE_RW);
130 xen_l1_entry_update(ptep, pte);
131 __flush_tlb_one(addr);
132 }
134 void make_page_writable(void *va)
135 {
136 pgd_t* pgd; pud_t *pud; pmd_t* pmd; pte_t pte, *ptep;
137 unsigned long addr = (unsigned long) va;
139 if (!init_mapping_done) {
140 __make_page_writable(addr);
141 return;
142 }
144 pgd = pgd_offset_k(addr);
145 pud = pud_offset(pgd, addr);
146 pmd = pmd_offset(pud, addr);
147 ptep = pte_offset_kernel(pmd, addr);
148 pte.pte = (ptep->pte | _PAGE_RW);
149 xen_l1_entry_update(ptep, pte);
150 __flush_tlb_one(addr);
151 }
153 void make_pages_readonly(void* va, unsigned nr)
154 {
155 while ( nr-- != 0 ) {
156 make_page_readonly(va);
157 va = (void*)((unsigned long)va + PAGE_SIZE);
158 }
159 }
161 void make_pages_writable(void* va, unsigned nr)
162 {
163 while ( nr-- != 0 ) {
164 make_page_writable(va);
165 va = (void*)((unsigned long)va + PAGE_SIZE);
166 }
167 }
169 /*
170 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
171 * physical space so we can cache the place of the first one and move
172 * around without checking the pgd every time.
173 */
175 void show_mem(void)
176 {
177 int i, total = 0, reserved = 0;
178 int shared = 0, cached = 0;
179 pg_data_t *pgdat;
180 struct page *page;
182 printk("Mem-info:\n");
183 show_free_areas();
184 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
186 for_each_pgdat(pgdat) {
187 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
188 page = pfn_to_page(pgdat->node_start_pfn + i);
189 total++;
190 if (PageReserved(page))
191 reserved++;
192 else if (PageSwapCache(page))
193 cached++;
194 else if (page_count(page))
195 shared += page_count(page) - 1;
196 }
197 }
198 printk("%d pages of RAM\n", total);
199 printk("%d reserved pages\n",reserved);
200 printk("%d pages shared\n",shared);
201 printk("%d pages swap cached\n",cached);
202 }
204 /* References to section boundaries */
206 extern char _text, _etext, _edata, __bss_start, _end[];
207 extern char __init_begin, __init_end;
209 int after_bootmem;
211 static void *spp_getpage(void)
212 {
213 void *ptr;
214 if (after_bootmem)
215 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
216 else
217 ptr = alloc_bootmem_pages(PAGE_SIZE);
218 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
219 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
221 Dprintk("spp_getpage %p\n", ptr);
222 return ptr;
223 }
225 #define pgd_offset_u(address) (pgd_t *)(init_level4_user_pgt + pgd_index(address))
227 static inline pud_t *pud_offset_u(unsigned long address)
228 {
229 pud_t *pud = level3_user_pgt;
231 return pud + pud_index(address);
232 }
234 static void set_pte_phys(unsigned long vaddr,
235 unsigned long phys, pgprot_t prot, int user_mode)
236 {
237 pgd_t *pgd;
238 pud_t *pud;
239 pmd_t *pmd;
240 pte_t *pte, new_pte;
242 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
244 pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
246 if (pgd_none(*pgd)) {
247 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
248 return;
249 }
251 pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
253 if (pud_none(*pud)) {
254 pmd = (pmd_t *) spp_getpage();
256 make_page_readonly(pmd);
257 xen_pmd_pin(__pa(pmd));
258 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
259 if (pmd != pmd_offset(pud, 0)) {
260 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
261 return;
262 }
263 }
265 pmd = pmd_offset(pud, vaddr);
267 if (pmd_none(*pmd)) {
268 pte = (pte_t *) spp_getpage();
269 make_page_readonly(pte);
271 xen_pte_pin(__pa(pte));
272 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
273 if (pte != pte_offset_kernel(pmd, 0)) {
274 printk("PAGETABLE BUG #02!\n");
275 return;
276 }
277 }
278 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
280 pte = pte_offset_kernel(pmd, vaddr);
282 if (!pte_none(*pte) &&
283 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
284 pte_ERROR(*pte);
285 set_pte(pte, new_pte);
287 /*
288 * It's enough to flush this one mapping.
289 * (PGE mappings get flushed as well)
290 */
291 __flush_tlb_one(vaddr);
292 }
294 static void set_pte_phys_ma(unsigned long vaddr,
295 unsigned long phys, pgprot_t prot)
296 {
297 pgd_t *pgd;
298 pud_t *pud;
299 pmd_t *pmd;
300 pte_t *pte, new_pte;
302 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
304 pgd = pgd_offset_k(vaddr);
305 if (pgd_none(*pgd)) {
306 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
307 return;
308 }
309 pud = pud_offset(pgd, vaddr);
310 if (pud_none(*pud)) {
312 pmd = (pmd_t *) spp_getpage();
313 make_page_readonly(pmd);
314 xen_pmd_pin(__pa(pmd));
316 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
318 if (pmd != pmd_offset(pud, 0)) {
319 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
320 return;
321 }
322 }
323 pmd = pmd_offset(pud, vaddr);
325 if (pmd_none(*pmd)) {
326 pte = (pte_t *) spp_getpage();
327 make_page_readonly(pte);
328 xen_pte_pin(__pa(pte));
330 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
331 if (pte != pte_offset_kernel(pmd, 0)) {
332 printk("PAGETABLE BUG #02!\n");
333 return;
334 }
335 }
337 new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
338 pte = pte_offset_kernel(pmd, vaddr);
340 /*
341 * Note that the pte page is already RO, thus we want to use
342 * xen_l1_entry_update(), not set_pte().
343 */
344 xen_l1_entry_update(pte,
345 pfn_pte_ma(phys >> PAGE_SHIFT, prot));
347 /*
348 * It's enough to flush this one mapping.
349 * (PGE mappings get flushed as well)
350 */
351 __flush_tlb_one(vaddr);
352 }
354 #define SET_FIXMAP_KERNEL 0
355 #define SET_FIXMAP_USER 1
357 /* NOTE: this is meant to be run only at boot */
358 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
359 {
360 unsigned long address = __fix_to_virt(idx);
362 if (idx >= __end_of_fixed_addresses) {
363 printk("Invalid __set_fixmap\n");
364 return;
365 }
366 switch (idx) {
367 case VSYSCALL_FIRST_PAGE:
368 set_pte_phys(address, phys, prot, SET_FIXMAP_KERNEL);
369 break;
370 default:
371 set_pte_phys_ma(address, phys, prot);
372 break;
373 }
374 }
377 /*
378 * At this point it only supports vsyscall area.
379 */
380 void __set_fixmap_user (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
381 {
382 unsigned long address = __fix_to_virt(idx);
384 if (idx >= __end_of_fixed_addresses) {
385 printk("Invalid __set_fixmap\n");
386 return;
387 }
389 set_pte_phys(address, phys, prot, SET_FIXMAP_USER);
390 }
392 unsigned long __initdata table_start, table_end, tables_space;
394 unsigned long get_machine_pfn(unsigned long addr)
395 {
396 pud_t* pud = pud_offset_k(addr);
397 pmd_t* pmd = pmd_offset(pud, addr);
398 pte_t *pte = pte_offset_kernel(pmd, addr);
400 return pte_mfn(*pte);
401 }
403 #define ALIGN_TO_4K __attribute__((section(".data.page_aligned")))
404 #define MAX_LOW_PAGES 0x20
405 static unsigned long __init_pgt[MAX_LOW_PAGES][512] ALIGN_TO_4K;
406 static int __init_pgt_index;
408 /*
409 * We start using from start_pfn
410 */
411 static __init void *alloc_static_page(unsigned long *phys)
412 {
413 int i = __init_pgt_index++;
415 if (__init_pgt_index >= MAX_LOW_PAGES) {
416 printk("Need to increase MAX_LOW_PAGES");
417 BUG();
418 }
420 *phys = __pa(__init_pgt[i]);
422 return (void *) __init_pgt[i];
423 }
425 /*
426 * Get RO page
427 */
428 static void __init *alloc_low_page(unsigned long *phys)
429 {
430 unsigned long pfn = table_end++;
432 *phys = (pfn << PAGE_SHIFT);
433 memset((void *) ((pfn << PAGE_SHIFT) + __START_KERNEL_map), 0, PAGE_SIZE);
434 return (void *)((pfn << PAGE_SHIFT) + __START_KERNEL_map);
435 }
437 #define PTE_SIZE PAGE_SIZE
439 static inline void __set_pte(pte_t *dst, pte_t val)
440 {
441 *dst = val;
442 }
444 static inline int make_readonly(unsigned long paddr)
445 {
446 int readonly = 0;
448 /* Make new page tables read-only. */
449 if ((paddr < ((table_start << PAGE_SHIFT) + tables_space)) &&
450 (paddr >= (table_start << PAGE_SHIFT)))
451 readonly = 1;
453 /* Make old page tables read-only. */
454 if ((paddr < ((xen_start_info.pt_base - __START_KERNEL_map) +
455 (xen_start_info.nr_pt_frames << PAGE_SHIFT))) &&
456 (paddr >= (xen_start_info.pt_base - __START_KERNEL_map)))
457 readonly = 1;
459 /*
460 * No need for writable mapping of kernel image. This also ensures that
461 * page and descriptor tables embedded inside don't have writable mappings.
462 */
463 if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
464 readonly = 1;
466 return readonly;
467 }
469 void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
470 {
471 long i, j, k;
472 unsigned long paddr;
474 i = pud_index(address);
475 pud = pud + i;
477 for (; i < PTRS_PER_PUD; pud++, i++) {
478 unsigned long pmd_phys;
479 pmd_t *pmd;
481 paddr = address + i*PUD_SIZE;
482 if (paddr >= end) {
483 for (; i < PTRS_PER_PUD; i++, pud++)
484 set_pud(pud, __pud(0));
485 break;
486 }
488 pmd = alloc_low_page(&pmd_phys);
489 make_page_readonly(pmd);
490 xen_pmd_pin(pmd_phys);
491 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
493 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
494 unsigned long pte_phys;
495 pte_t *pte, *pte_save;
497 if (paddr >= end) {
498 for (; j < PTRS_PER_PMD; j++, pmd++)
499 set_pmd(pmd, __pmd(0));
500 break;
501 }
502 pte = alloc_low_page(&pte_phys);
503 pte_save = pte;
504 for (k = 0; k < PTRS_PER_PTE; pte++, k++, paddr += PTE_SIZE) {
505 if (make_readonly(paddr)) {
506 __set_pte(pte,
507 __pte(paddr | (_KERNPG_TABLE & ~_PAGE_RW)));
508 continue;
509 }
510 if (paddr >= end) {
511 for (; k < PTRS_PER_PTE; k++, pte++)
512 __set_pte(pte, __pte(0));
513 break;
514 }
515 __set_pte(pte, __pte(paddr | _KERNPG_TABLE));
516 }
517 pte = pte_save;
518 make_page_readonly(pte);
519 xen_pte_pin(pte_phys);
520 set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
521 }
522 }
523 __flush_tlb();
524 }
526 static void __init find_early_table_space(unsigned long end)
527 {
528 unsigned long puds, pmds, ptes;
530 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
531 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
532 ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
534 tables_space = round_up(puds * 8, PAGE_SIZE) +
535 round_up(pmds * 8, PAGE_SIZE) +
536 round_up(ptes * 8, PAGE_SIZE);
537 }
539 void __init xen_init_pt(void)
540 {
541 unsigned long addr, *page;
542 int i;
544 for (i = 0; i < NR_CPUS; i++)
545 per_cpu(cur_pgd, i) = init_mm.pgd;
547 memset((void *)init_level4_pgt, 0, PAGE_SIZE);
548 memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
549 memset((void *)level2_kernel_pgt, 0, PAGE_SIZE);
551 /* Find the initial pte page that was built for us. */
552 page = (unsigned long *)xen_start_info.pt_base;
553 addr = page[pgd_index(__START_KERNEL_map)];
554 addr_to_page(addr, page);
555 addr = page[pud_index(__START_KERNEL_map)];
556 addr_to_page(addr, page);
558 /* Construct mapping of initial pte page in our own directories. */
559 init_level4_pgt[pgd_index(__START_KERNEL_map)] =
560 mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
561 level3_kernel_pgt[pud_index(__START_KERNEL_map)] =
562 __pud(__pa_symbol(level2_kernel_pgt) |
563 _KERNPG_TABLE | _PAGE_USER);
564 memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
566 make_page_readonly(init_level4_pgt);
567 make_page_readonly(init_level4_user_pgt);
568 make_page_readonly(level3_kernel_pgt);
569 make_page_readonly(level3_user_pgt);
570 make_page_readonly(level2_kernel_pgt);
572 xen_pgd_pin(__pa_symbol(init_level4_pgt));
573 xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
574 xen_pud_pin(__pa_symbol(level3_kernel_pgt));
575 xen_pud_pin(__pa_symbol(level3_user_pgt));
576 xen_pmd_pin(__pa_symbol(level2_kernel_pgt));
578 set_pgd((pgd_t *)(init_level4_user_pgt + 511),
579 mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
580 }
582 /*
583 * Extend kernel mapping to access pages for page tables. The initial
584 * mapping done by Xen is minimal (e.g. 8MB) and we need to extend the
585 * mapping for early initialization.
586 */
587 static unsigned long current_size, extended_size;
589 void __init extend_init_mapping(void)
590 {
591 unsigned long va = __START_KERNEL_map;
592 unsigned long phys, addr, *pte_page;
593 pmd_t *pmd;
594 pte_t *pte, new_pte;
595 unsigned long *page = (unsigned long *) init_level4_pgt;
596 int i;
598 addr = page[pgd_index(va)];
599 addr_to_page(addr, page);
600 addr = page[pud_index(va)];
601 addr_to_page(addr, page);
603 for (;;) {
604 pmd = (pmd_t *)&page[pmd_index(va)];
605 if (!pmd_present(*pmd))
606 break;
607 addr = page[pmd_index(va)];
608 addr_to_page(addr, pte_page);
609 for (i = 0; i < PTRS_PER_PTE; i++) {
610 pte = (pte_t *) &pte_page[pte_index(va)];
611 if (!pte_present(*pte))
612 break;
613 va += PAGE_SIZE;
614 current_size += PAGE_SIZE;
615 }
616 }
618 while (va < __START_KERNEL_map + current_size + tables_space) {
619 pmd = (pmd_t *) &page[pmd_index(va)];
620 if (!pmd_none(*pmd))
621 continue;
622 pte_page = (unsigned long *) alloc_static_page(&phys);
623 make_page_readonly(pte_page);
624 xen_pte_pin(phys);
625 set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
626 for (i = 0; i < PTRS_PER_PTE; i++, va += PAGE_SIZE) {
627 new_pte = pfn_pte(
628 (va - __START_KERNEL_map) >> PAGE_SHIFT,
629 __pgprot(_KERNPG_TABLE | _PAGE_USER));
630 pte = (pte_t *)&pte_page[pte_index(va)];
631 xen_l1_entry_update(pte, new_pte);
632 extended_size += PAGE_SIZE;
633 }
634 }
636 /* Kill mapping of low 1MB. */
637 for (va = __START_KERNEL_map; va < (unsigned long)&_text; va += PAGE_SIZE)
638 HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0);
639 }
642 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
643 This runs before bootmem is initialized and gets pages directly from the
644 physical memory. To access them they are temporarily mapped. */
645 void __init init_memory_mapping(unsigned long start, unsigned long end)
646 {
647 unsigned long next;
649 Dprintk("init_memory_mapping\n");
651 find_early_table_space(end);
652 extend_init_mapping();
653 start_pfn = current_size >> PAGE_SHIFT;
655 table_start = start_pfn;
656 table_end = table_start;
658 start = (unsigned long)__va(start);
659 end = (unsigned long)__va(end);
661 for (; start < end; start = next) {
662 unsigned long pud_phys;
663 pud_t *pud = alloc_low_page(&pud_phys);
664 make_page_readonly(pud);
665 xen_pud_pin(pud_phys);
666 next = start + PGDIR_SIZE;
667 if (next > end)
668 next = end;
669 phys_pud_init(pud, __pa(start), __pa(next));
670 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
671 }
673 printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
674 table_start<<PAGE_SHIFT,
675 table_end<<PAGE_SHIFT);
677 start_pfn = ((current_size + extended_size) >> PAGE_SHIFT);
679 __flush_tlb_all();
680 init_mapping_done = 1;
681 }
683 extern struct x8664_pda cpu_pda[NR_CPUS];
685 void zap_low_mappings(void)
686 {
687 /* this is not required for Xen */
688 #if 0
689 swap_low_mappings();
690 #endif
691 }
693 #ifndef CONFIG_DISCONTIGMEM
694 void __init paging_init(void)
695 {
696 {
697 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
698 /* unsigned int max_dma; */
699 /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
700 /* if (end_pfn < max_dma) */
701 zones_size[ZONE_DMA] = end_pfn;
702 #if 0
703 else {
704 zones_size[ZONE_DMA] = max_dma;
705 zones_size[ZONE_NORMAL] = end_pfn - max_dma;
706 }
707 #endif
708 free_area_init(zones_size);
709 }
711 set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
712 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
714 memset(empty_zero_page, 0, sizeof(empty_zero_page));
716 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
717 {
718 int i;
719 /* Setup mapping of lower 1st MB */
720 for (i = 0; i < NR_FIX_ISAMAPS; i++)
721 if (xen_start_info.flags & SIF_PRIVILEGED)
722 set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
723 else
724 __set_fixmap(FIX_ISAMAP_BEGIN - i,
725 virt_to_mfn(empty_zero_page) << PAGE_SHIFT,
726 PAGE_KERNEL_RO);
727 }
728 #endif
730 }
731 #endif
733 /* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
734 from the CPU leading to inconsistent cache lines. address and size
735 must be aligned to 2MB boundaries.
736 Does nothing when the mapping doesn't exist. */
737 void __init clear_kernel_mapping(unsigned long address, unsigned long size)
738 {
739 unsigned long end = address + size;
741 BUG_ON(address & ~LARGE_PAGE_MASK);
742 BUG_ON(size & ~LARGE_PAGE_MASK);
744 for (; address < end; address += LARGE_PAGE_SIZE) {
745 pgd_t *pgd = pgd_offset_k(address);
746 pud_t *pud;
747 pmd_t *pmd;
748 if (pgd_none(*pgd))
749 continue;
750 pud = pud_offset(pgd, address);
751 if (pud_none(*pud))
752 continue;
753 pmd = pmd_offset(pud, address);
754 if (!pmd || pmd_none(*pmd))
755 continue;
756 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
757 /* Could handle this, but it should not happen currently. */
758 printk(KERN_ERR
759 "clear_kernel_mapping: mapping has been split. will leak memory\n");
760 pmd_ERROR(*pmd);
761 }
762 set_pmd(pmd, __pmd(0));
763 }
764 __flush_tlb_all();
765 }
767 static inline int page_is_ram (unsigned long pagenr)
768 {
769 if (pagenr < start_pfn || pagenr >= end_pfn)
770 return 0;
772 return 1;
773 }
775 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
776 kcore_vsyscall;
778 void __init mem_init(void)
779 {
780 int codesize, reservedpages, datasize, initsize;
781 int tmp;
783 contiguous_bitmap = alloc_bootmem_low_pages(
784 (end_pfn + 2*BITS_PER_LONG) >> 3);
785 BUG_ON(!contiguous_bitmap);
786 memset(contiguous_bitmap, 0, (end_pfn + 2*BITS_PER_LONG) >> 3);
788 #if defined(CONFIG_SWIOTLB)
789 swiotlb_init();
790 #endif
792 /* How many end-of-memory variables you have, grandma! */
793 max_low_pfn = end_pfn;
794 max_pfn = end_pfn;
795 num_physpages = end_pfn;
796 high_memory = (void *) __va(end_pfn * PAGE_SIZE);
798 /* clear the zero-page */
799 memset(empty_zero_page, 0, PAGE_SIZE);
801 reservedpages = 0;
803 /* this will put all low memory onto the freelists */
804 #ifdef CONFIG_DISCONTIGMEM
805 totalram_pages += numa_free_all_bootmem();
806 tmp = 0;
807 /* should count reserved pages here for all nodes */
808 #else
809 max_mapnr = end_pfn;
810 if (!mem_map) BUG();
812 totalram_pages += free_all_bootmem();
814 for (tmp = 0; tmp < end_pfn; tmp++)
815 /*
816 * Only count reserved RAM pages
817 */
818 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
819 reservedpages++;
820 #endif
822 after_bootmem = 1;
824 codesize = (unsigned long) &_etext - (unsigned long) &_text;
825 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
826 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
828 /* Register memory areas for /proc/kcore */
829 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
830 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
831 VMALLOC_END-VMALLOC_START);
832 kclist_add(&kcore_kernel, &_stext, _end - _stext);
833 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
834 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
835 VSYSCALL_END - VSYSCALL_START);
837 printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
838 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
839 end_pfn << (PAGE_SHIFT-10),
840 codesize >> 10,
841 reservedpages << (PAGE_SHIFT-10),
842 datasize >> 10,
843 initsize >> 10);
845 /*
846 * Subtle. SMP is doing its boot stuff late (because it has to
847 * fork idle threads) - but it also needs low mappings for the
848 * protected-mode entry to work. We zap these entries only after
849 * the WP-bit has been tested.
850 */
851 #ifndef CONFIG_SMP
852 zap_low_mappings();
853 #endif
854 }
856 extern char __initdata_begin[], __initdata_end[];
858 void free_initmem(void)
859 {
860 #ifdef __DO_LATER__
861 /*
862 * Some pages can be pinned, but some are not. Unpinning such pages
863 * triggers BUG().
864 */
865 unsigned long addr;
867 addr = (unsigned long)(&__init_begin);
868 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
869 ClearPageReserved(virt_to_page(addr));
870 set_page_count(virt_to_page(addr), 1);
871 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
872 xen_pte_unpin(__pa(addr));
873 make_page_writable(__va(__pa(addr)));
874 /*
875 * Make pages from __PAGE_OFFSET address as well
876 */
877 make_page_writable((void *)addr);
878 free_page(addr);
879 totalram_pages++;
880 }
881 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
882 printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
883 #endif
884 }
886 #ifdef CONFIG_BLK_DEV_INITRD
887 void free_initrd_mem(unsigned long start, unsigned long end)
888 {
889 if (start < (unsigned long)&_end)
890 return;
891 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
892 for (; start < end; start += PAGE_SIZE) {
893 ClearPageReserved(virt_to_page(start));
894 set_page_count(virt_to_page(start), 1);
895 free_page(start);
896 totalram_pages++;
897 }
898 }
899 #endif
901 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
902 {
903 /* Should check here against the e820 map to avoid double free */
904 #ifdef CONFIG_DISCONTIGMEM
905 int nid = phys_to_nid(phys);
906 reserve_bootmem_node(NODE_DATA(nid), phys, len);
907 #else
908 reserve_bootmem(phys, len);
909 #endif
910 }
912 int kern_addr_valid(unsigned long addr)
913 {
914 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
915 pgd_t *pgd;
916 pud_t *pud;
917 pmd_t *pmd;
918 pte_t *pte;
920 if (above != 0 && above != -1UL)
921 return 0;
923 pgd = pgd_offset_k(addr);
924 if (pgd_none(*pgd))
925 return 0;
927 pud = pud_offset_k(addr);
928 if (pud_none(*pud))
929 return 0;
931 pmd = pmd_offset(pud, addr);
932 if (pmd_none(*pmd))
933 return 0;
934 if (pmd_large(*pmd))
935 return pfn_valid(pmd_pfn(*pmd));
937 pte = pte_offset_kernel(pmd, addr);
938 if (pte_none(*pte))
939 return 0;
940 return pfn_valid(pte_pfn(*pte));
941 }
943 #ifdef CONFIG_SYSCTL
944 #include <linux/sysctl.h>
946 extern int exception_trace, page_fault_trace;
948 static ctl_table debug_table2[] = {
949 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
950 proc_dointvec },
951 #ifdef CONFIG_CHECKING
952 { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
953 proc_dointvec },
954 #endif
955 { 0, }
956 };
958 static ctl_table debug_root_table2[] = {
959 { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
960 .child = debug_table2 },
961 { 0 },
962 };
964 static __init int x8664_sysctl_init(void)
965 {
966 register_sysctl_table(debug_root_table2, 1);
967 return 0;
968 }
969 __initcall(x8664_sysctl_init);
970 #endif
972 /* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
973 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
974 not need special handling anymore. */
976 static struct vm_area_struct gate_vma = {
977 .vm_start = VSYSCALL_START,
978 .vm_end = VSYSCALL_END,
979 .vm_page_prot = PAGE_READONLY
980 };
982 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
983 {
984 #ifdef CONFIG_IA32_EMULATION
985 if (test_tsk_thread_flag(tsk, TIF_IA32))
986 return NULL;
987 #endif
988 return &gate_vma;
989 }
991 int in_gate_area(struct task_struct *task, unsigned long addr)
992 {
993 struct vm_area_struct *vma = get_gate_vma(task);
994 if (!vma)
995 return 0;
996 return (addr >= vma->vm_start) && (addr < vma->vm_end);
997 }
999 /* Use this when you have no reliable task/vma, typically from interrupt
1000 * context. It is less reliable than using the task's vma and may give
1001 * false positives.
1002 */
1003 int in_gate_area_no_task(unsigned long addr)
1005 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);