direct-io.hg
changeset 3221:1a3005f838bc
bitkeeper revision 1.1159.183.35 (41acab40rFEtjpNAJkBrPgWMpUHO5w)
Merge scramble.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-2.0-testing.bk
into scramble.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
Merge scramble.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-2.0-testing.bk
into scramble.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Tue Nov 30 17:17:52 2004 +0000 (2004-11-30) |
parents | d7e53f4de825 6f0846972a4c |
children | 89652683e374 |
files | linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c linux-2.4.28-xen-sparse/arch/xen/mm/init.c linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h xen/arch/x86/x86_32/entry.S |
line diff
1.1 --- a/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c Tue Nov 30 09:01:47 2004 +0000 1.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c Tue Nov 30 17:17:52 2004 +0000 1.3 @@ -275,7 +275,8 @@ void __init setup_arch(char **cmdline_p) 1.4 * arch/xen/drivers/balloon/balloon.c 1.5 */ 1.6 mem_param = parse_mem_cmdline(cmdline_p); 1.7 - if (!mem_param) mem_param = xen_start_info.nr_pages; 1.8 + if (mem_param < xen_start_info.nr_pages) 1.9 + mem_param = xen_start_info.nr_pages; 1.10 1.11 #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) 1.12 #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) 1.13 @@ -303,6 +304,7 @@ void __init setup_arch(char **cmdline_p) 1.14 printk(KERN_WARNING "Use a PAE enabled kernel.\n"); 1.15 else 1.16 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); 1.17 + max_pfn = lmax_low_pfn; 1.18 #else /* !CONFIG_HIGHMEM */ 1.19 #ifndef CONFIG_X86_PAE 1.20 if (max_pfn > MAX_NONPAE_PFN) { 1.21 @@ -350,8 +352,6 @@ void __init setup_arch(char **cmdline_p) 1.22 */ 1.23 max_low_pfn = lmax_low_pfn; 1.24 1.25 - 1.26 - 1.27 #ifdef CONFIG_BLK_DEV_INITRD 1.28 if ( xen_start_info.mod_start != 0 ) 1.29 { 1.30 @@ -375,6 +375,20 @@ void __init setup_arch(char **cmdline_p) 1.31 1.32 paging_init(); 1.33 1.34 + /* Make sure we have a large enough P->M table. */ 1.35 + if ( max_pfn > xen_start_info.nr_pages ) 1.36 + { 1.37 + phys_to_machine_mapping = alloc_bootmem_low_pages( 1.38 + max_pfn * sizeof(unsigned long)); 1.39 + memset(phys_to_machine_mapping, ~0, max_pfn * sizeof(unsigned long)); 1.40 + memcpy(phys_to_machine_mapping, 1.41 + (unsigned long *)xen_start_info.mfn_list, 1.42 + xen_start_info.nr_pages * sizeof(unsigned long)); 1.43 + free_bootmem(__pa(xen_start_info.mfn_list), 1.44 + PFN_PHYS(PFN_UP(xen_start_info.nr_pages * 1.45 + sizeof(unsigned long)))); 1.46 + } 1.47 + 1.48 pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); 1.49 for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) 1.50 {
2.1 --- a/linux-2.4.28-xen-sparse/arch/xen/mm/init.c Tue Nov 30 09:01:47 2004 +0000 2.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/mm/init.c Tue Nov 30 17:17:52 2004 +0000 2.3 @@ -213,23 +213,16 @@ static void __init fixrange_init (unsign 2.4 2.5 static void __init pagetable_init (void) 2.6 { 2.7 - unsigned long vaddr, end; 2.8 + unsigned long vaddr, end, ram_end; 2.9 pgd_t *kpgd, *pgd, *pgd_base; 2.10 int i, j, k; 2.11 pmd_t *kpmd, *pmd; 2.12 pte_t *kpte, *pte, *pte_base; 2.13 2.14 - /* create tables only for boot_pfn frames. max_low_pfn may be sized for 2.15 - * pages yet to be allocated from the hypervisor, or it may be set 2.16 - * to override the xen_start_info amount of memory 2.17 - */ 2.18 - int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn); 2.19 - 2.20 - /* 2.21 - * This can be zero as well - no problem, in that case we exit 2.22 - * the loops anyway due to the PTRS_PER_* conditions. 2.23 - */ 2.24 - end = (unsigned long)__va(boot_pfn *PAGE_SIZE); 2.25 + end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); 2.26 + ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE); 2.27 + if ( ram_end > end ) 2.28 + ram_end = end; 2.29 2.30 pgd_base = init_mm.pgd; 2.31 i = __pgd_offset(PAGE_OFFSET); 2.32 @@ -237,12 +230,12 @@ static void __init pagetable_init (void) 2.33 2.34 for (; i < PTRS_PER_PGD; pgd++, i++) { 2.35 vaddr = i*PGDIR_SIZE; 2.36 - if (end && (vaddr >= end)) 2.37 + if (vaddr >= end) 2.38 break; 2.39 pmd = (pmd_t *)pgd; 2.40 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { 2.41 vaddr = i*PGDIR_SIZE + j*PMD_SIZE; 2.42 - if (end && (vaddr >= end)) 2.43 + if (vaddr >= end) 2.44 break; 2.45 2.46 /* Filled in for us already? */ 2.47 @@ -250,10 +243,11 @@ static void __init pagetable_init (void) 2.48 continue; 2.49 2.50 pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 2.51 + clear_page(pte_base); 2.52 2.53 for (k = 0; k < PTRS_PER_PTE; pte++, k++) { 2.54 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; 2.55 - if (end && (vaddr >= end)) 2.56 + if (vaddr >= ram_end) 2.57 break; 2.58 *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); 2.59 } 2.60 @@ -329,28 +323,14 @@ static inline int page_is_ram (unsigned 2.61 return 1; 2.62 } 2.63 2.64 -static inline int page_kills_ppro(unsigned long pagenr) 2.65 -{ 2.66 - return 0; 2.67 -} 2.68 - 2.69 #ifdef CONFIG_HIGHMEM 2.70 -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) 2.71 +void __init one_highpage_init(struct page *page, int free_page) 2.72 { 2.73 - if (!page_is_ram(pfn)) { 2.74 - SetPageReserved(page); 2.75 - return; 2.76 - } 2.77 - 2.78 - if (bad_ppro && page_kills_ppro(pfn)) { 2.79 - SetPageReserved(page); 2.80 - return; 2.81 - } 2.82 - 2.83 ClearPageReserved(page); 2.84 set_bit(PG_highmem, &page->flags); 2.85 atomic_set(&page->count, 1); 2.86 - __free_page(page); 2.87 + if ( free_page ) 2.88 + __free_page(page); 2.89 totalhigh_pages++; 2.90 } 2.91 #endif /* CONFIG_HIGHMEM */ 2.92 @@ -392,8 +372,9 @@ static int __init free_pages_init(void) 2.93 reservedpages++; 2.94 } 2.95 #ifdef CONFIG_HIGHMEM 2.96 - for (pfn = xen_start_info.nr_pages-1; pfn >= highstart_pfn; pfn--) 2.97 - one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); 2.98 + for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) 2.99 + one_highpage_init((struct page *) (mem_map + pfn), pfn, 2.100 + (pfn < xen_start_info.nr_pages)); 2.101 totalram_pages += totalhigh_pages; 2.102 #endif 2.103 return reservedpages;
3.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c Tue Nov 30 09:01:47 2004 +0000 3.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c Tue Nov 30 17:17:52 2004 +0000 3.3 @@ -52,6 +52,9 @@ 3.4 #include "setup_arch_pre.h" 3.5 #include <bios_ebda.h> 3.6 3.7 +/* Allows setting of maximum possible memory size */ 3.8 +static unsigned long xen_override_max_pfn; 3.9 + 3.10 int disable_pse __initdata = 0; 3.11 3.12 /* 3.13 @@ -718,8 +721,13 @@ static void __init parse_cmdline_early ( 3.14 unsigned long long mem_size; 3.15 3.16 mem_size = memparse(from+4, &from); 3.17 +#if 0 3.18 limit_regions(mem_size); 3.19 userdef=1; 3.20 +#else 3.21 + xen_override_max_pfn = 3.22 + (unsigned long)(mem_size>>PAGE_SHIFT); 3.23 +#endif 3.24 } 3.25 } 3.26 3.27 @@ -857,6 +865,7 @@ static void __init parse_cmdline_early ( 3.28 } 3.29 } 3.30 3.31 +#if 0 /* !XEN */ 3.32 /* 3.33 * Callback for efi_memory_walk. 3.34 */ 3.35 @@ -873,7 +882,6 @@ efi_find_max_pfn(unsigned long start, un 3.36 return 0; 3.37 } 3.38 3.39 - 3.40 /* 3.41 * Find the highest page frame number we have available 3.42 */ 3.43 @@ -900,6 +908,15 @@ void __init find_max_pfn(void) 3.44 max_pfn = end; 3.45 } 3.46 } 3.47 +#else 3.48 +/* We don't use the fake e820 because we need to respond to user override. */ 3.49 +void __init find_max_pfn(void) 3.50 +{ 3.51 + if ( xen_override_max_pfn < xen_start_info.nr_pages ) 3.52 + xen_override_max_pfn = xen_start_info.nr_pages; 3.53 + max_pfn = xen_override_max_pfn; 3.54 +} 3.55 +#endif /* XEN */ 3.56 3.57 /* 3.58 * Determine low and high memory ranges: 3.59 @@ -1414,6 +1431,21 @@ void __init setup_arch(char **cmdline_p) 3.60 #endif 3.61 paging_init(); 3.62 3.63 + /* Make sure we have a large enough P->M table. */ 3.64 + if (max_pfn > xen_start_info.nr_pages) { 3.65 + phys_to_machine_mapping = alloc_bootmem_low_pages( 3.66 + max_pfn * sizeof(unsigned long)); 3.67 + memset(phys_to_machine_mapping, ~0, 3.68 + max_pfn * sizeof(unsigned long)); 3.69 + memcpy(phys_to_machine_mapping, 3.70 + (unsigned long *)xen_start_info.mfn_list, 3.71 + xen_start_info.nr_pages * sizeof(unsigned long)); 3.72 + free_bootmem( 3.73 + __pa(xen_start_info.mfn_list), 3.74 + PFN_PHYS(PFN_UP(xen_start_info.nr_pages * 3.75 + sizeof(unsigned long)))); 3.76 + } 3.77 + 3.78 pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); 3.79 for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) 3.80 {
4.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c Tue Nov 30 09:01:47 2004 +0000 4.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c Tue Nov 30 17:17:52 2004 +0000 4.3 @@ -77,6 +77,12 @@ static pte_t * __init one_page_table_ini 4.4 { 4.5 if (pmd_none(*pmd)) { 4.6 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 4.7 + /* XEN: Make the new p.t. read-only. */ 4.8 + pgd_t *kpgd = pgd_offset_k((unsigned long)page_table); 4.9 + pmd_t *kpmd = pmd_offset(kpgd, (unsigned long)page_table); 4.10 + pte_t *kpte = pte_offset_kernel(kpmd, (unsigned long)page_table); 4.11 + xen_l1_entry_update( 4.12 + kpte, (*(unsigned long *)kpte)&~_PAGE_RW); 4.13 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 4.14 if (page_table != pte_offset_kernel(pmd, 0)) 4.15 BUG(); 4.16 @@ -141,25 +147,6 @@ void __init protect_page(pgd_t *pgd, voi 4.17 pte_val_ma(*pte) | _PAGE_RW); 4.18 } 4.19 4.20 -void __init protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode) 4.21 -{ 4.22 - pmd_t *pmd; 4.23 - pte_t *pte; 4.24 - int pgd_idx, pmd_idx; 4.25 - 4.26 - protect_page(dpgd, spgd, mode); 4.27 - 4.28 - for (pgd_idx = 0; pgd_idx < PTRS_PER_PGD_NO_HV; spgd++, pgd_idx++) { 4.29 - pmd = pmd_offset(spgd, 0); 4.30 - if (pmd_none(*pmd)) 4.31 - continue; 4.32 - for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { 4.33 - pte = pte_offset_kernel(pmd, 0); 4.34 - protect_page(dpgd, pte, mode); 4.35 - } 4.36 - } 4.37 -} 4.38 - 4.39 static inline int is_kernel_text(unsigned long addr) 4.40 { 4.41 if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end) 4.42 @@ -180,6 +167,10 @@ static void __init kernel_physical_mappi 4.43 pte_t *pte; 4.44 int pgd_idx, pmd_idx, pte_ofs; 4.45 4.46 + unsigned long max_ram_pfn = xen_start_info.nr_pages; 4.47 + if (max_ram_pfn > max_low_pfn) 4.48 + max_ram_pfn = max_low_pfn; 4.49 + 4.50 pgd_idx = pgd_index(PAGE_OFFSET); 4.51 pgd = pgd_base + pgd_idx; 4.52 pfn = 0; 4.53 @@ -207,7 +198,10 @@ static void __init kernel_physical_mappi 4.54 pte = one_page_table_init(pmd); 4.55 4.56 pte += pte_ofs; 4.57 - for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { 4.58 + /* XEN: Only map initial RAM allocation. */ 4.59 + for (; pte_ofs < PTRS_PER_PTE && pfn < max_ram_pfn; pte++, pfn++, pte_ofs++) { 4.60 + if (pte_present(*pte)) 4.61 + continue; 4.62 if (is_kernel_text(address)) 4.63 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); 4.64 else 4.65 @@ -311,7 +305,8 @@ void __init one_highpage_init(struct pag 4.66 ClearPageReserved(page); 4.67 set_bit(PG_highmem, &page->flags); 4.68 set_page_count(page, 1); 4.69 - __free_page(page); 4.70 + if (pfn < xen_start_info.nr_pages) 4.71 + __free_page(page); 4.72 totalhigh_pages++; 4.73 } else 4.74 SetPageReserved(page); 4.75 @@ -347,7 +342,8 @@ extern void __init remap_numa_kva(void); 4.76 static void __init pagetable_init (void) 4.77 { 4.78 unsigned long vaddr; 4.79 - pgd_t *pgd_base = swapper_pg_dir; 4.80 + pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base; 4.81 + pgd_t *new_pgd = swapper_pg_dir; 4.82 4.83 #ifdef CONFIG_X86_PAE 4.84 int i; 4.85 @@ -368,7 +364,22 @@ static void __init pagetable_init (void) 4.86 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; 4.87 } 4.88 4.89 - kernel_physical_mapping_init(pgd_base); 4.90 + /* 4.91 + * Switch to proper mm_init page directory. Initialise from the current 4.92 + * page directory, write-protect the new page directory, then switch to 4.93 + * it. We clean up by write-enabling and then freeing the old page dir. 4.94 + */ 4.95 + memcpy(new_pgd, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t)); 4.96 + protect_page(new_pgd, new_pgd, PROT_ON); 4.97 + queue_pgd_pin(__pa(new_pgd)); 4.98 + load_cr3(new_pgd); 4.99 + queue_pgd_unpin(__pa(old_pgd)); 4.100 + __flush_tlb_all(); /* implicit flush */ 4.101 + protect_page(new_pgd, old_pgd, PROT_OFF); 4.102 + flush_page_update_queue(); 4.103 + free_bootmem(__pa(old_pgd), PAGE_SIZE); 4.104 + 4.105 + kernel_physical_mapping_init(new_pgd); 4.106 remap_numa_kva(); 4.107 4.108 /* 4.109 @@ -376,9 +387,9 @@ static void __init pagetable_init (void) 4.110 * created - mappings will be set by set_fixmap(): 4.111 */ 4.112 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 4.113 - page_table_range_init(vaddr, 0, pgd_base); 4.114 + page_table_range_init(vaddr, 0, new_pgd); 4.115 4.116 - permanent_kmaps_init(pgd_base); 4.117 + permanent_kmaps_init(new_pgd); 4.118 4.119 #ifdef CONFIG_X86_PAE 4.120 /* 4.121 @@ -388,7 +399,7 @@ static void __init pagetable_init (void) 4.122 * All user-space mappings are explicitly cleared after 4.123 * SMP startup. 4.124 */ 4.125 - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; 4.126 + new_pgd[0] = new_pgd[USER_PTRS_PER_PGD]; 4.127 #endif 4.128 } 4.129 4.130 @@ -545,8 +556,6 @@ out: 4.131 */ 4.132 void __init paging_init(void) 4.133 { 4.134 - pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base; 4.135 - pgd_t *new_pgd = swapper_pg_dir; 4.136 #ifdef CONFIG_XEN_PHYSDEV_ACCESS 4.137 int i; 4.138 #endif 4.139 @@ -559,25 +568,6 @@ void __init paging_init(void) 4.140 4.141 pagetable_init(); 4.142 4.143 - /* 4.144 - * Write-protect both page tables within both page tables. 4.145 - * That's three ops, as the old p.t. is already protected 4.146 - * within the old p.t. Then pin the new table, switch tables, 4.147 - * and unprotect the old table. 4.148 - */ 4.149 - protect_pagetable(new_pgd, old_pgd, PROT_ON); 4.150 - protect_pagetable(new_pgd, new_pgd, PROT_ON); 4.151 - protect_pagetable(old_pgd, new_pgd, PROT_ON); 4.152 - queue_pgd_pin(__pa(new_pgd)); 4.153 - load_cr3(new_pgd); 4.154 - queue_pgd_unpin(__pa(old_pgd)); 4.155 - __flush_tlb_all(); /* implicit flush */ 4.156 - protect_pagetable(new_pgd, old_pgd, PROT_OFF); 4.157 - flush_page_update_queue(); 4.158 - 4.159 - /* Completely detached from old tables, so free them. */ 4.160 - free_bootmem(__pa(old_pgd), xen_start_info.nr_pt_frames << PAGE_SHIFT); 4.161 - 4.162 #ifdef CONFIG_X86_PAE 4.163 /* 4.164 * We will bail out later - printk doesn't work right now so
5.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c Tue Nov 30 09:01:47 2004 +0000 5.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c Tue Nov 30 17:17:52 2004 +0000 5.3 @@ -4,6 +4,7 @@ 5.4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5.5 * 5.6 * Copyright (c) 2003, B Dragovic 5.7 + * Copyright (c) 2003-2004, M Williamson, K Fraser 5.8 * 5.9 * This file may be distributed separately from the Linux kernel, or 5.10 * incorporated into other software packages, subject to the following license: 5.11 @@ -48,19 +49,10 @@ 5.12 #include <asm/tlb.h> 5.13 #include <linux/list.h> 5.14 5.15 -/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */ 5.16 -#define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */ 5.17 -#define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */ 5.18 -typedef struct user_balloon_op { 5.19 - unsigned int op; 5.20 - unsigned long size; 5.21 -} user_balloon_op_t; 5.22 -/* END OF USER DEFINE */ 5.23 - 5.24 static struct proc_dir_entry *balloon_pde; 5.25 5.26 unsigned long credit; 5.27 -static unsigned long current_pages, most_seen_pages; 5.28 +static unsigned long current_pages; 5.29 5.30 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 5.31 /* Use the private and mapping fields of struct page as a list. */ 5.32 @@ -78,71 +70,66 @@ static unsigned long current_pages, most 5.33 #define pte_offset_kernel pte_offset 5.34 #endif 5.35 5.36 +#define IPRINTK(fmt, args...) \ 5.37 + printk(KERN_INFO "xen_mem: " fmt, ##args) 5.38 +#define WPRINTK(fmt, args...) \ 5.39 + printk(KERN_WARNING "xen_mem: " fmt, ##args) 5.40 + 5.41 /* List of ballooned pages, threaded through the mem_map array. */ 5.42 LIST_HEAD(ballooned_pages); 5.43 5.44 -/** add_ballooned_page - remember we've ballooned a pfn */ 5.45 -void add_ballooned_page(unsigned long pfn) 5.46 +/* balloon_append: add the given page to the balloon. */ 5.47 +void balloon_append(struct page *page) 5.48 { 5.49 - struct page *p = mem_map + pfn; 5.50 - 5.51 - list_add(PAGE_TO_LIST(p), &ballooned_pages); 5.52 + list_add(PAGE_TO_LIST(page), &ballooned_pages); 5.53 } 5.54 5.55 -/* rem_ballooned_page - recall a ballooned page and remove from list. */ 5.56 -struct page *rem_ballooned_page(void) 5.57 +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 5.58 +struct page *balloon_retrieve(void) 5.59 { 5.60 - if(!list_empty(&ballooned_pages)) 5.61 - { 5.62 - struct page *ret; 5.63 + struct page *page; 5.64 5.65 - ret = LIST_TO_PAGE(ballooned_pages.next); 5.66 - UNLIST_PAGE(ret); 5.67 + if ( list_empty(&ballooned_pages) ) 5.68 + return NULL; 5.69 5.70 - return ret; 5.71 - } 5.72 - else 5.73 - return NULL; 5.74 + page = LIST_TO_PAGE(ballooned_pages.next); 5.75 + UNLIST_PAGE(page); 5.76 + return page; 5.77 } 5.78 5.79 static inline pte_t *get_ptep(unsigned long addr) 5.80 { 5.81 - pgd_t *pgd; pmd_t *pmd; pte_t *ptep; 5.82 + pgd_t *pgd; 5.83 + pmd_t *pmd; 5.84 + 5.85 pgd = pgd_offset_k(addr); 5.86 - 5.87 if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); 5.88 5.89 pmd = pmd_offset(pgd, addr); 5.90 if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); 5.91 5.92 - ptep = pte_offset_kernel(pmd, addr); 5.93 - 5.94 - return ptep; 5.95 + return pte_offset_kernel(pmd, addr); 5.96 } 5.97 5.98 /* Main function for relinquishing memory. */ 5.99 static unsigned long inflate_balloon(unsigned long num_pages) 5.100 - 5.101 { 5.102 - unsigned long *parray; 5.103 - unsigned long *currp; 5.104 - unsigned long curraddr; 5.105 - unsigned long ret = 0; 5.106 - unsigned long i, j; 5.107 + unsigned long *parray, *currp, curraddr, ret = 0, i, j, mfn, pfn; 5.108 + struct page *page; 5.109 5.110 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); 5.111 if ( parray == NULL ) 5.112 { 5.113 - printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n"); 5.114 - return -EFAULT; 5.115 + WPRINTK("inflate_balloon: Unable to vmalloc parray\n"); 5.116 + return -ENOMEM; 5.117 } 5.118 5.119 currp = parray; 5.120 5.121 for ( i = 0; i < num_pages; i++, currp++ ) 5.122 { 5.123 - struct page *page = alloc_page(GFP_HIGHUSER); 5.124 - unsigned long pfn = page - mem_map; 5.125 + page = alloc_page(GFP_HIGHUSER); 5.126 + pfn = page - mem_map; 5.127 5.128 /* If allocation fails then free all reserved pages. */ 5.129 if ( page == NULL ) 5.130 @@ -160,10 +147,9 @@ static unsigned long inflate_balloon(uns 5.131 *currp = pfn; 5.132 } 5.133 5.134 - 5.135 for ( i = 0, currp = parray; i < num_pages; i++, currp++ ) 5.136 { 5.137 - unsigned long mfn = phys_to_machine_mapping[*currp]; 5.138 + mfn = phys_to_machine_mapping[*currp]; 5.139 curraddr = (unsigned long)page_address(mem_map + *currp); 5.140 /* Blow away page contents for security, and also p.t. ref if any. */ 5.141 if ( curraddr != 0 ) 5.142 @@ -180,7 +166,7 @@ static unsigned long inflate_balloon(uns 5.143 } 5.144 #endif 5.145 5.146 - add_ballooned_page(*currp); 5.147 + balloon_append(&mem_map[*currp]); 5.148 5.149 phys_to_machine_mapping[*currp] = INVALID_P2M_ENTRY; 5.150 *currp = mfn; 5.151 @@ -206,62 +192,45 @@ static unsigned long inflate_balloon(uns 5.152 return ret; 5.153 } 5.154 5.155 -/* 5.156 - * Install new mem pages obtained by deflate_balloon. function walks 5.157 - * phys->machine mapping table looking for DEAD entries and populates 5.158 - * them. 5.159 - */ 5.160 -static unsigned long process_returned_pages(unsigned long * parray, 5.161 - unsigned long num) 5.162 +/* Install a set of new pages (@mfn_list, @nr_mfns) into the memory map. */ 5.163 +static unsigned long process_returned_pages( 5.164 + unsigned long *mfn_list, unsigned long nr_mfns) 5.165 { 5.166 - /* currently, this function is rather simplistic as 5.167 - * it is assumed that domain reclaims only number of 5.168 - * pages previously released. this is to change soon 5.169 - * and the code to extend page tables etc. will be 5.170 - * incorporated here. 5.171 - */ 5.172 - 5.173 - unsigned long * curr = parray; 5.174 - unsigned long num_installed; 5.175 - 5.176 + unsigned long pfn, i; 5.177 struct page *page; 5.178 5.179 - num_installed = 0; 5.180 - while ( (page = rem_ballooned_page()) != NULL ) 5.181 + for ( i = 0; i < nr_mfns; i++ ) 5.182 { 5.183 - unsigned long pfn; 5.184 - 5.185 - if ( num_installed == num ) 5.186 + if ( (page = balloon_retrieve()) != NULL ) 5.187 break; 5.188 5.189 pfn = page - mem_map; 5.190 - 5.191 - if(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY) 5.192 - { 5.193 - printk("BUG: Tried to unballoon existing page!"); 5.194 + if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY ) 5.195 BUG(); 5.196 - } 5.197 5.198 - phys_to_machine_mapping[pfn] = *curr; 5.199 - queue_machphys_update(*curr, pfn); 5.200 - if (pfn<max_low_pfn) 5.201 + /* Update P->M and M->P tables. */ 5.202 + phys_to_machine_mapping[pfn] = mfn_list[i]; 5.203 + queue_machphys_update(mfn_list[i], pfn); 5.204 + 5.205 + /* Link back into the page tables if it's not a highmem page. */ 5.206 + if ( pfn < max_low_pfn ) 5.207 queue_l1_entry_update( 5.208 get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)), 5.209 - ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); 5.210 - 5.211 - __free_page(mem_map + pfn); 5.212 + (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); 5.213 5.214 - curr++; 5.215 - num_installed++; 5.216 + /* Finally, relinquish the memory back to the system allocator. */ 5.217 + ClearPageReserved(page); 5.218 + set_page_count(page, 1); 5.219 + __free_page(page); 5.220 } 5.221 5.222 - return num_installed; 5.223 + return i; 5.224 } 5.225 5.226 unsigned long deflate_balloon(unsigned long num_pages) 5.227 { 5.228 unsigned long ret; 5.229 - unsigned long * parray; 5.230 + unsigned long *parray; 5.231 5.232 if ( num_pages > credit ) 5.233 { 5.234 @@ -305,205 +274,25 @@ unsigned long deflate_balloon(unsigned l 5.235 5.236 #define PAGE_TO_MB_SHIFT 8 5.237 5.238 -/* 5.239 - * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c 5.240 - * The loops do go through all of low memory (ZONE_NORMAL). The 5.241 - * old pages have _PAGE_PRESENT set and so get skipped. 5.242 - * If low memory is not full, the new pages are used to fill it, going 5.243 - * from cur_low_pfn to low_pfn. high memory is not direct mapped so 5.244 - * no extension is needed for new high memory. 5.245 - */ 5.246 - 5.247 -static void pagetable_extend (int cur_low_pfn, int newpages) 5.248 -{ 5.249 - unsigned long vaddr, end; 5.250 - pgd_t *kpgd, *pgd, *pgd_base; 5.251 - int i, j, k; 5.252 - pmd_t *kpmd, *pmd; 5.253 - pte_t *kpte, *pte, *pte_base; 5.254 - int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn); 5.255 - 5.256 - /* 5.257 - * This can be zero as well - no problem, in that case we exit 5.258 - * the loops anyway due to the PTRS_PER_* conditions. 5.259 - */ 5.260 - end = (unsigned long)__va(low_pfn*PAGE_SIZE); 5.261 - 5.262 - pgd_base = init_mm.pgd; 5.263 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 5.264 - i = pgd_index(PAGE_OFFSET); 5.265 -#else 5.266 - i = __pgd_offset(PAGE_OFFSET); 5.267 -#endif 5.268 - pgd = pgd_base + i; 5.269 - 5.270 - for (; i < PTRS_PER_PGD; pgd++, i++) { 5.271 - vaddr = i*PGDIR_SIZE; 5.272 - if (end && (vaddr >= end)) 5.273 - break; 5.274 - pmd = (pmd_t *)pgd; 5.275 - for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { 5.276 - vaddr = i*PGDIR_SIZE + j*PMD_SIZE; 5.277 - if (end && (vaddr >= end)) 5.278 - break; 5.279 - 5.280 - /* Filled in for us already? */ 5.281 - if ( pmd_val(*pmd) & _PAGE_PRESENT ) 5.282 - continue; 5.283 - 5.284 - pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL); 5.285 - 5.286 - for (k = 0; k < PTRS_PER_PTE; pte++, k++) { 5.287 - vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; 5.288 - if (end && (vaddr >= end)) 5.289 - break; 5.290 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 5.291 - *pte = mk_pte(virt_to_page(vaddr), PAGE_KERNEL); 5.292 -#else 5.293 - *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); 5.294 -#endif 5.295 - } 5.296 - kpgd = pgd_offset_k((unsigned long)pte_base); 5.297 - kpmd = pmd_offset(kpgd, (unsigned long)pte_base); 5.298 - kpte = pte_offset_kernel(kpmd, (unsigned long)pte_base); 5.299 - queue_l1_entry_update(kpte, 5.300 - (*(unsigned long *)kpte)&~_PAGE_RW); 5.301 - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); 5.302 - XEN_flush_page_update_queue(); 5.303 - } 5.304 - } 5.305 -} 5.306 - 5.307 -/* 5.308 - * claim_new_pages() asks xen to increase this domain's memory reservation 5.309 - * and return a list of the new pages of memory. This new pages are 5.310 - * added to the free list of the memory manager. 5.311 - * 5.312 - * Available RAM does not normally change while Linux runs. To make this work, 5.313 - * the linux mem= boottime command line param must say how big memory could 5.314 - * possibly grow. Then setup_arch() in arch/xen/kernel/setup.c 5.315 - * sets max_pfn, max_low_pfn and the zones according to 5.316 - * this max memory size. The page tables themselves can only be 5.317 - * extended after xen has assigned new pages to this domain. 5.318 - */ 5.319 - 5.320 -static unsigned long 5.321 -claim_new_pages(unsigned long num_pages) 5.322 -{ 5.323 - unsigned long new_page_cnt, pfn; 5.324 - unsigned long * parray, *curr; 5.325 - 5.326 - if (most_seen_pages+num_pages> max_pfn) 5.327 - num_pages = max_pfn-most_seen_pages; 5.328 - if (num_pages==0) return -EINVAL; 5.329 - 5.330 - parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); 5.331 - if ( parray == NULL ) 5.332 - { 5.333 - printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n"); 5.334 - return 0; 5.335 - } 5.336 - 5.337 - new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 5.338 - parray, num_pages, 0); 5.339 - if ( new_page_cnt != num_pages ) 5.340 - { 5.341 - printk(KERN_WARNING 5.342 - "claim_new_pages: xen granted only %lu of %lu requested pages\n", 5.343 - new_page_cnt, num_pages); 5.344 - 5.345 - /* 5.346 - * Avoid xen lockup when user forgot to setdomainmaxmem. Xen 5.347 - * usually can dribble out a few pages and then hangs. 5.348 - */ 5.349 - if ( new_page_cnt < 1000 ) 5.350 - { 5.351 - printk(KERN_WARNING "Remember to use setdomainmaxmem\n"); 5.352 - HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 5.353 - parray, new_page_cnt, 0); 5.354 - return -EFAULT; 5.355 - } 5.356 - } 5.357 - memcpy(phys_to_machine_mapping+most_seen_pages, parray, 5.358 - new_page_cnt * sizeof(unsigned long)); 5.359 - 5.360 - pagetable_extend(most_seen_pages,new_page_cnt); 5.361 - 5.362 - for ( pfn = most_seen_pages, curr = parray; 5.363 - pfn < most_seen_pages+new_page_cnt; 5.364 - pfn++, curr++ ) 5.365 - { 5.366 - struct page *page = mem_map + pfn; 5.367 - 5.368 -#ifndef CONFIG_HIGHMEM 5.369 - if ( pfn>=max_low_pfn ) 5.370 - { 5.371 - printk(KERN_WARNING "Warning only %ldMB will be used.\n", 5.372 - pfn>>PAGE_TO_MB_SHIFT); 5.373 - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); 5.374 - break; 5.375 - } 5.376 -#endif 5.377 - queue_machphys_update(*curr, pfn); 5.378 - if ( pfn < max_low_pfn ) 5.379 - queue_l1_entry_update( 5.380 - get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)), 5.381 - ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); 5.382 - 5.383 - XEN_flush_page_update_queue(); 5.384 - 5.385 - /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */ 5.386 - ClearPageReserved(page); 5.387 - if ( pfn >= max_low_pfn ) 5.388 - set_bit(PG_highmem, &page->flags); 5.389 - set_page_count(page, 1); 5.390 - __free_page(page); 5.391 - } 5.392 - 5.393 - vfree(parray); 5.394 - 5.395 - return new_page_cnt; 5.396 -} 5.397 - 5.398 - 5.399 static int balloon_try_target(int target) 5.400 { 5.401 int change, reclaim; 5.402 5.403 if ( target < current_pages ) 5.404 { 5.405 - int change = inflate_balloon(current_pages-target); 5.406 - if ( change <= 0 ) 5.407 + if ( (change = inflate_balloon(current_pages-target)) <= 0 ) 5.408 return change; 5.409 - 5.410 current_pages -= change; 5.411 printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n", 5.412 change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 5.413 } 5.414 - else if ( target > current_pages ) 5.415 + else if ( (reclaim = target - current_pages) > 0 ) 5.416 { 5.417 - reclaim = min((unsigned long)target,most_seen_pages) - current_pages; 5.418 - 5.419 - if ( reclaim ) 5.420 - { 5.421 - change = deflate_balloon( reclaim ); 5.422 - if ( change <= 0 ) 5.423 - return change; 5.424 - current_pages += change; 5.425 - printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n", 5.426 - change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 5.427 - } 5.428 - 5.429 - if ( most_seen_pages < target ) 5.430 - { 5.431 - int growth = claim_new_pages(target-most_seen_pages); 5.432 - if ( growth <= 0 ) 5.433 - return growth; 5.434 - most_seen_pages += growth; 5.435 - current_pages += growth; 5.436 - printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n", 5.437 - growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 5.438 - } 5.439 + if ( (change = deflate_balloon(reclaim)) <= 0 ) 5.440 + return change; 5.441 + current_pages += change; 5.442 + printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n", 5.443 + change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 5.444 } 5.445 5.446 return 1; 5.447 @@ -640,12 +429,15 @@ static int balloon_read(char *page, char 5.448 5.449 static int __init balloon_init(void) 5.450 { 5.451 - printk(KERN_ALERT "Starting Xen Balloon driver\n"); 5.452 + unsigned long pfn; 5.453 + struct page *page; 5.454 5.455 - most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn); 5.456 + IPRINTK("Initialising balloon driver.\n"); 5.457 + 5.458 + current_pages = min(xen_start_info.nr_pages, max_pfn); 5.459 if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL ) 5.460 { 5.461 - printk(KERN_ALERT "Unable to create balloon driver proc entry!"); 5.462 + WPRINTK("Unable to create balloon driver proc entry!"); 5.463 return -1; 5.464 } 5.465 5.466 @@ -661,18 +453,12 @@ static int __init balloon_init(void) 5.467 (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx, 5.468 CALLBACK_IN_BLOCKING_CONTEXT); 5.469 5.470 - /* 5.471 - * make_module a new phys map if mem= says xen can give us memory to grow 5.472 - */ 5.473 - if ( max_pfn > xen_start_info.nr_pages ) 5.474 + /* Initialise the balloon with excess memory space. */ 5.475 + for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ ) 5.476 { 5.477 - extern unsigned long *phys_to_machine_mapping; 5.478 - unsigned long *newmap; 5.479 - newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long)); 5.480 - memset(newmap, ~0, max_pfn * sizeof(unsigned long)); 5.481 - memcpy(newmap, phys_to_machine_mapping, 5.482 - xen_start_info.nr_pages * sizeof(unsigned long)); 5.483 - phys_to_machine_mapping = newmap; 5.484 + page = &mem_map[pfn]; 5.485 + if ( !PageReserved(page) ) 5.486 + balloon_append(page); 5.487 } 5.488 5.489 return 0;
6.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h Tue Nov 30 09:01:47 2004 +0000 6.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h Tue Nov 30 17:17:52 2004 +0000 6.3 @@ -59,7 +59,6 @@ void do_hypervisor_callback(struct pt_re 6.4 #define PROT_ON 1 6.5 #define PROT_OFF 0 6.6 void /* __init */ protect_page(pgd_t *dpgd, void *page, int mode); 6.7 -void /* __init */ protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode); 6.8 6.9 /* arch/xen/i386/kernel/head.S */ 6.10 void lgdt_finish(void);
7.1 --- a/xen/arch/x86/x86_32/entry.S Tue Nov 30 09:01:47 2004 +0000 7.2 +++ b/xen/arch/x86/x86_32/entry.S Tue Nov 30 17:17:52 2004 +0000 7.3 @@ -341,6 +341,7 @@ process_guest_exception_and_events: 7.4 leal DOMAIN_trap_bounce(%ebx),%edx 7.5 testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx) 7.6 jz test_all_events 7.7 + cli # create_bounce_frame needs CLI for pre-exceptions to work 7.8 call create_bounce_frame 7.9 jmp test_all_events 7.10