ia64/xen-unstable
changeset 6450:5978be010bec
The patched attached enables x86_64 xenlinux with "late pin, early
unpin", which is already implemented for x86_32. Since we now only pin
the root rather than any of the other levels, the overall performance
became better especially with workloads that require heavy memory
management operations.
On 8-way x86_64 xenlinux (dom0) the kernel build was improved by about
10% (using make -j32). Even a small setup like a UP HT system, I see
about 3% performance gain with kernel build (make -j4).
Lmbench also shows improvements in fork/exec/sh:
Processor, Processes - times in microseconds - smaller is better
--------------------------------------------------------------------
Host OS Mhz null null open slct sig sig fork exec sh =20
call I/O stat clos TCP inst hndl proc proc proc
--------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ----=20
Linux 2.6.12- 3786 1.13 1.36 3.93 6.04 10.5 1.43 4.33 536. 1446 3614
Linux 2.6.12- 3786 1.13 1.36 3.91 6.03 10.4 1.44 4.38 346. 1050 2831
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
unpin", which is already implemented for x86_32. Since we now only pin
the root rather than any of the other levels, the overall performance
became better especially with workloads that require heavy memory
management operations.
On 8-way x86_64 xenlinux (dom0) the kernel build was improved by about
10% (using make -j32). Even a small setup like a UP HT system, I see
about 3% performance gain with kernel build (make -j4).
Lmbench also shows improvements in fork/exec/sh:
Processor, Processes - times in microseconds - smaller is better
--------------------------------------------------------------------
Host OS Mhz null null open slct sig sig fork exec sh =20
call I/O stat clos TCP inst hndl proc proc proc
--------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ----=20
Linux 2.6.12- 3786 1.13 1.36 3.93 6.04 10.5 1.43 4.33 536. 1446 3614
Linux 2.6.12- 3786 1.13 1.36 3.91 6.03 10.4 1.44 4.38 346. 1050 2831
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
author | kaf24@firebug.cl.cam.ac.uk |
---|---|
date | Fri Aug 26 11:02:14 2005 +0000 (2005-08-26) |
parents | edeee85c90b1 |
children | 2b95125015a5 |
files | linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h |
line diff
1.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c Fri Aug 26 11:00:14 2005 +0000 1.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c Fri Aug 26 11:02:14 2005 +0000 1.3 @@ -105,14 +105,19 @@ int init_new_context(struct task_struct 1.4 struct mm_struct * old_mm; 1.5 int retval = 0; 1.6 1.7 + memset(&mm->context, 0, sizeof(mm->context)); 1.8 init_MUTEX(&mm->context.sem); 1.9 - mm->context.size = 0; 1.10 old_mm = current->mm; 1.11 if (old_mm && old_mm->context.size > 0) { 1.12 down(&old_mm->context.sem); 1.13 retval = copy_ldt(&mm->context, &old_mm->context); 1.14 up(&old_mm->context.sem); 1.15 } 1.16 + if (retval == 0) { 1.17 + spin_lock(&mm_unpinned_lock); 1.18 + list_add(&mm->context.unpinned, &mm_unpinned); 1.19 + spin_unlock(&mm_unpinned_lock); 1.20 + } 1.21 return retval; 1.22 } 1.23 1.24 @@ -134,6 +139,11 @@ void destroy_context(struct mm_struct *m 1.25 kfree(mm->context.ldt); 1.26 mm->context.size = 0; 1.27 } 1.28 + if (!mm->context.pinned) { 1.29 + spin_lock(&mm_unpinned_lock); 1.30 + list_del(&mm->context.unpinned); 1.31 + spin_unlock(&mm_unpinned_lock); 1.32 + } 1.33 } 1.34 1.35 static int read_ldt(void __user * ptr, unsigned long bytecount)
2.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Fri Aug 26 11:00:14 2005 +0000 2.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Fri Aug 26 11:02:14 2005 +0000 2.3 @@ -712,6 +712,7 @@ void __init paging_init(void) 2.4 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); 2.5 2.6 memset(empty_zero_page, 0, sizeof(empty_zero_page)); 2.7 + init_mm.context.pinned = 1; 2.8 2.9 #ifdef CONFIG_XEN_PHYSDEV_ACCESS 2.10 {
3.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c Fri Aug 26 11:00:14 2005 +0000 3.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c Fri Aug 26 11:02:14 2005 +0000 3.3 @@ -12,19 +12,145 @@ 3.4 #include <asm/uaccess.h> 3.5 #include <asm/processor.h> 3.6 #include <asm/tlbflush.h> 3.7 +#include <asm/io.h> 3.8 + 3.9 +#ifdef CONFIG_XEN 3.10 #include <asm/pgalloc.h> 3.11 -#include <asm/io.h> 3.12 +#include <asm/mmu_context.h> 3.13 + 3.14 +LIST_HEAD(mm_unpinned); 3.15 +DEFINE_SPINLOCK(mm_unpinned_lock); 3.16 + 3.17 +static inline void mm_walk_set_prot(void *pt, pgprot_t flags) 3.18 +{ 3.19 + struct page *page = virt_to_page(pt); 3.20 + unsigned long pfn = page_to_pfn(page); 3.21 + 3.22 + BUG_ON(HYPERVISOR_update_va_mapping( 3.23 + (unsigned long)__va(pfn << PAGE_SHIFT), 3.24 + pfn_pte(pfn, flags), 0)); 3.25 +} 3.26 + 3.27 +static void mm_walk(struct mm_struct *mm, pgprot_t flags) 3.28 +{ 3.29 + pgd_t *pgd; 3.30 + pud_t *pud; 3.31 + pmd_t *pmd; 3.32 + pte_t *pte; 3.33 + int g,u,m; 3.34 + 3.35 + pgd = mm->pgd; 3.36 + for (g = 0; g <= USER_PTRS_PER_PGD; g++, pgd++) { 3.37 + if (pgd_none(*pgd)) 3.38 + continue; 3.39 + pud = pud_offset(pgd, 0); 3.40 + if (PTRS_PER_PUD > 1) /* not folded */ 3.41 + mm_walk_set_prot(pud,flags); 3.42 + for (u = 0; u < PTRS_PER_PUD; u++, pud++) { 3.43 + if (pud_none(*pud)) 3.44 + continue; 3.45 + pmd = pmd_offset(pud, 0); 3.46 + if (PTRS_PER_PMD > 1) /* not folded */ 3.47 + mm_walk_set_prot(pmd,flags); 3.48 + for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { 3.49 + if (pmd_none(*pmd)) 3.50 + continue; 3.51 + pte = pte_offset_kernel(pmd,0); 3.52 + mm_walk_set_prot(pte,flags); 3.53 + } 3.54 + } 3.55 + } 3.56 +} 3.57 + 3.58 +void mm_pin(struct mm_struct *mm) 3.59 +{ 3.60 + spin_lock(&mm->page_table_lock); 3.61 + 3.62 + mm_walk(mm, PAGE_KERNEL_RO); 3.63 + BUG_ON(HYPERVISOR_update_va_mapping( 3.64 + (unsigned long)mm->pgd, 3.65 + pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO), 3.66 + UVMF_TLB_FLUSH)); 3.67 + BUG_ON(HYPERVISOR_update_va_mapping( 3.68 + (unsigned long)__user_pgd(mm->pgd), 3.69 + pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, PAGE_KERNEL_RO), 3.70 + UVMF_TLB_FLUSH)); 3.71 + xen_pgd_pin(__pa(mm->pgd)); /* kernel */ 3.72 + xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */ 3.73 + mm->context.pinned = 1; 3.74 + spin_lock(&mm_unpinned_lock); 3.75 + list_del(&mm->context.unpinned); 3.76 + spin_unlock(&mm_unpinned_lock); 3.77 + 3.78 + spin_unlock(&mm->page_table_lock); 3.79 +} 3.80 + 3.81 +void mm_unpin(struct mm_struct *mm) 3.82 +{ 3.83 + spin_lock(&mm->page_table_lock); 3.84 + 3.85 + xen_pgd_unpin(__pa(mm->pgd)); 3.86 + xen_pgd_unpin(__pa(__user_pgd(mm->pgd))); 3.87 + BUG_ON(HYPERVISOR_update_va_mapping( 3.88 + (unsigned long)mm->pgd, 3.89 + pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0)); 3.90 + BUG_ON(HYPERVISOR_update_va_mapping( 3.91 + (unsigned long)__user_pgd(mm->pgd), 3.92 + pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, PAGE_KERNEL), 0)); 3.93 + mm_walk(mm, PAGE_KERNEL); 3.94 + xen_tlb_flush(); 3.95 + mm->context.pinned = 0; 3.96 + spin_lock(&mm_unpinned_lock); 3.97 + list_add(&mm->context.unpinned, &mm_unpinned); 3.98 + spin_unlock(&mm_unpinned_lock); 3.99 + 3.100 + spin_unlock(&mm->page_table_lock); 3.101 +} 3.102 + 3.103 +void mm_pin_all(void) 3.104 +{ 3.105 + while (!list_empty(&mm_unpinned)) 3.106 + mm_pin(list_entry(mm_unpinned.next, struct mm_struct, 3.107 + context.unpinned)); 3.108 +} 3.109 + 3.110 +void _arch_exit_mmap(struct mm_struct *mm) 3.111 +{ 3.112 + struct task_struct *tsk = current; 3.113 + 3.114 + task_lock(tsk); 3.115 + 3.116 + /* 3.117 + * We aggressively remove defunct pgd from cr3. We execute unmap_vmas() 3.118 + * *much* faster this way, as no tlb flushes means bigger wrpt batches. 3.119 + */ 3.120 + if ( tsk->active_mm == mm ) 3.121 + { 3.122 + tsk->active_mm = &init_mm; 3.123 + atomic_inc(&init_mm.mm_count); 3.124 + 3.125 + switch_mm(mm, &init_mm, tsk); 3.126 + 3.127 + atomic_dec(&mm->mm_count); 3.128 + BUG_ON(atomic_read(&mm->mm_count) == 0); 3.129 + } 3.130 + 3.131 + task_unlock(tsk); 3.132 + 3.133 + if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) ) 3.134 + mm_unpin(mm); 3.135 +} 3.136 3.137 void pte_free(struct page *pte) 3.138 { 3.139 - pte_t *ptep; 3.140 - 3.141 - ptep = pfn_to_kaddr(page_to_pfn(pte)); 3.142 + unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT); 3.143 3.144 - xen_pte_unpin(__pa(ptep)); 3.145 - make_page_writable(ptep); 3.146 - __free_page(pte); 3.147 + if (!pte_write(*virt_to_ptep(va))) 3.148 + BUG_ON(HYPERVISOR_update_va_mapping( 3.149 + va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0)); 3.150 + __free_page(pte); 3.151 } 3.152 +#endif /* CONFIG_XEN */ 3.153 3.154 static inline pte_t *lookup_address(unsigned long address) 3.155 { 3.156 @@ -78,7 +204,7 @@ static void flush_kernel_map(void *addre 3.157 } else 3.158 asm volatile("wbinvd":::"memory"); 3.159 if (address) 3.160 - __flush_tlb_one((unsigned long) address); 3.161 + __flush_tlb_one(address); 3.162 else 3.163 __flush_tlb_all(); 3.164 } 3.165 @@ -166,14 +292,17 @@ static int 3.166 BUG(); 3.167 3.168 /* on x86-64 the direct mapping set at boot is not using 4k pages */ 3.169 -// BUG_ON(PageReserved(kpte_page)); 3.170 /* 3.171 * ..., but the XEN guest kernels (currently) do: 3.172 * If the pte was reserved, it means it was created at boot 3.173 * time (not via split_large_page) and in turn we must not 3.174 * replace it with a large page. 3.175 */ 3.176 - if (!PageReserved(kpte_page)) { 3.177 +#ifndef CONFIG_XEN 3.178 + BUG_ON(PageReserved(kpte_page)); 3.179 +#else 3.180 + if (!PageReserved(kpte_page)) 3.181 +#endif 3.182 switch (page_count(kpte_page)) { 3.183 case 1: 3.184 save_page(address, kpte_page); 3.185 @@ -182,7 +311,6 @@ static int 3.186 case 0: 3.187 BUG(); /* memleak and failed 2M page regeneration */ 3.188 } 3.189 - } 3.190 return 0; 3.191 } 3.192
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu.h Fri Aug 26 11:02:14 2005 +0000 4.3 @@ -0,0 +1,33 @@ 4.4 +#ifndef __x86_64_MMU_H 4.5 +#define __x86_64_MMU_H 4.6 + 4.7 +#include <linux/spinlock.h> 4.8 +#include <asm/semaphore.h> 4.9 + 4.10 +/* 4.11 + * The x86_64 doesn't have a mmu context, but 4.12 + * we put the segment information here. 4.13 + * 4.14 + * cpu_vm_mask is used to optimize ldt flushing. 4.15 + */ 4.16 +typedef struct { 4.17 + void *ldt; 4.18 + rwlock_t ldtlock; 4.19 + int size; 4.20 + struct semaphore sem; 4.21 +#ifdef CONFIG_XEN 4.22 + unsigned pinned:1; 4.23 + struct list_head unpinned; 4.24 +#endif 4.25 +} mm_context_t; 4.26 + 4.27 +#ifdef CONFIG_XEN 4.28 +extern struct list_head mm_unpinned; 4.29 +extern spinlock_t mm_unpinned_lock; 4.30 + 4.31 +/* mm/memory.c:exit_mmap hook */ 4.32 +extern void _arch_exit_mmap(struct mm_struct *mm); 4.33 +#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm) 4.34 +#endif 4.35 + 4.36 +#endif
5.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h Fri Aug 26 11:00:14 2005 +0000 5.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h Fri Aug 26 11:02:14 2005 +0000 5.3 @@ -58,6 +58,9 @@ static inline void __prepare_arch_switch 5.4 } 5.5 } 5.6 5.7 +extern void mm_pin(struct mm_struct *mm); 5.8 +extern void mm_unpin(struct mm_struct *mm); 5.9 +void mm_pin_all(void); 5.10 5.11 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 5.12 struct task_struct *tsk) 5.13 @@ -66,6 +69,9 @@ static inline void switch_mm(struct mm_s 5.14 struct mmuext_op _op[3], *op = _op; 5.15 5.16 if (likely(prev != next)) { 5.17 + if (!next->context.pinned) 5.18 + mm_pin(next); 5.19 + 5.20 /* stop flush ipis for the previous mm */ 5.21 clear_bit(cpu, &prev->cpu_vm_mask); 5.22 #if 0 /* XEN: no lazy tlb */
6.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h Fri Aug 26 11:00:14 2005 +0000 6.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h Fri Aug 26 11:02:14 2005 +0000 6.3 @@ -21,12 +21,27 @@ static inline void pmd_populate_kernel(s 6.4 6.5 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) 6.6 { 6.7 - set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); 6.8 + if (unlikely((mm)->context.pinned)) { 6.9 + BUG_ON(HYPERVISOR_update_va_mapping( 6.10 + (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT), 6.11 + pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0)); 6.12 + set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); 6.13 + } else { 6.14 + *(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)); 6.15 + } 6.16 } 6.17 6.18 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) 6.19 { 6.20 - set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); 6.21 + if (unlikely((mm)->context.pinned)) { 6.22 + BUG_ON(HYPERVISOR_update_va_mapping( 6.23 + (unsigned long)pmd, 6.24 + pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, 6.25 + PAGE_KERNEL_RO), 0)); 6.26 + set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); 6.27 + } else { 6.28 + *(pud) = __pud(_PAGE_TABLE | __pa(pmd)); 6.29 + } 6.30 } 6.31 6.32 /* 6.33 @@ -35,53 +50,54 @@ static inline void pud_populate(struct m 6.34 */ 6.35 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) 6.36 { 6.37 - set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud))); 6.38 - set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud))); 6.39 -} 6.40 - 6.41 -extern __inline__ pmd_t *get_pmd(void) 6.42 -{ 6.43 - pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); 6.44 - if (!pmd) 6.45 - return NULL; 6.46 - make_page_readonly(pmd); 6.47 - xen_pmd_pin(__pa(pmd)); 6.48 - return pmd; 6.49 + if (unlikely((mm)->context.pinned)) { 6.50 + BUG_ON(HYPERVISOR_update_va_mapping( 6.51 + (unsigned long)pud, 6.52 + pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, 6.53 + PAGE_KERNEL_RO), 0)); 6.54 + set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud))); 6.55 + set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud))); 6.56 + } else { 6.57 + *(pgd) = __pgd(_PAGE_TABLE | __pa(pud)); 6.58 + *(__user_pgd(pgd)) = *(pgd); 6.59 + } 6.60 } 6.61 6.62 extern __inline__ void pmd_free(pmd_t *pmd) 6.63 { 6.64 - BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); 6.65 - xen_pmd_unpin(__pa(pmd)); 6.66 - make_page_writable(pmd); 6.67 + pte_t *ptep = virt_to_ptep(pmd); 6.68 + 6.69 + if (!pte_write(*ptep)) { 6.70 + BUG_ON(HYPERVISOR_update_va_mapping( 6.71 + (unsigned long)pmd, 6.72 + pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, PAGE_KERNEL), 6.73 + 0)); 6.74 + } 6.75 free_page((unsigned long)pmd); 6.76 } 6.77 6.78 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 6.79 { 6.80 pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); 6.81 - if (!pmd) 6.82 - return NULL; 6.83 - make_page_readonly(pmd); 6.84 - xen_pmd_pin(__pa(pmd)); 6.85 return pmd; 6.86 } 6.87 6.88 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 6.89 { 6.90 pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); 6.91 - if (!pud) 6.92 - return NULL; 6.93 - make_page_readonly(pud); 6.94 - xen_pud_pin(__pa(pud)); 6.95 return pud; 6.96 } 6.97 6.98 static inline void pud_free(pud_t *pud) 6.99 { 6.100 - BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); 6.101 - xen_pud_unpin(__pa(pud)); 6.102 - make_page_writable(pud); 6.103 + pte_t *ptep = virt_to_ptep(pud); 6.104 + 6.105 + if (!pte_write(*ptep)) { 6.106 + BUG_ON(HYPERVISOR_update_va_mapping( 6.107 + (unsigned long)pud, 6.108 + pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, PAGE_KERNEL), 6.109 + 0)); 6.110 + } 6.111 free_page((unsigned long)pud); 6.112 } 6.113 6.114 @@ -107,10 +123,6 @@ static inline pgd_t *pgd_alloc(struct mm 6.115 (PTRS_PER_PGD - boundary) * sizeof(pgd_t)); 6.116 6.117 memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */ 6.118 - make_pages_readonly(pgd, 2); 6.119 - 6.120 - xen_pgd_pin(__pa(pgd)); /* kernel */ 6.121 - xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */ 6.122 /* 6.123 * Set level3_user_pgt for vsyscall area 6.124 */ 6.125 @@ -121,31 +133,45 @@ static inline pgd_t *pgd_alloc(struct mm 6.126 6.127 static inline void pgd_free(pgd_t *pgd) 6.128 { 6.129 - BUG_ON((unsigned long)pgd & (PAGE_SIZE-1)); 6.130 - xen_pgd_unpin(__pa(pgd)); 6.131 - xen_pgd_unpin(__pa(__user_pgd(pgd))); 6.132 - make_pages_writable(pgd, 2); 6.133 + pte_t *ptep = virt_to_ptep(pgd); 6.134 + 6.135 + if (!pte_write(*ptep)) { 6.136 + xen_pgd_unpin(__pa(pgd)); 6.137 + BUG_ON(HYPERVISOR_update_va_mapping( 6.138 + (unsigned long)pgd, 6.139 + pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL), 6.140 + 0)); 6.141 + } 6.142 + 6.143 + ptep = virt_to_ptep(__user_pgd(pgd)); 6.144 + 6.145 + if (!pte_write(*ptep)) { 6.146 + xen_pgd_unpin(__pa(__user_pgd(pgd))); 6.147 + BUG_ON(HYPERVISOR_update_va_mapping( 6.148 + (unsigned long)__user_pgd(pgd), 6.149 + pfn_pte(virt_to_phys(__user_pgd(pgd))>>PAGE_SHIFT, 6.150 + PAGE_KERNEL), 6.151 + 0)); 6.152 + } 6.153 + 6.154 free_pages((unsigned long)pgd, 1); 6.155 } 6.156 6.157 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 6.158 { 6.159 pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); 6.160 - if (!pte) 6.161 - return NULL; 6.162 - make_page_readonly(pte); 6.163 - xen_pte_pin(__pa(pte)); 6.164 + if (pte) 6.165 + make_page_readonly(pte); 6.166 + 6.167 return pte; 6.168 } 6.169 6.170 static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 6.171 { 6.172 - pte_t *pte = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); 6.173 - if (!pte) 6.174 - return NULL; 6.175 - make_page_readonly(pte); 6.176 - xen_pte_pin(__pa(pte)); 6.177 - return virt_to_page((unsigned long)pte); 6.178 + struct page *pte; 6.179 + 6.180 + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); 6.181 + return pte; 6.182 } 6.183 6.184 /* Should really implement gc for free page table pages. This could be
7.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h Fri Aug 26 11:00:14 2005 +0000 7.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h Fri Aug 26 11:02:14 2005 +0000 7.3 @@ -18,7 +18,7 @@ extern unsigned long pgkern_mask; 7.4 7.5 #define __flush_tlb_all() __flush_tlb_global() 7.6 7.7 -#define __flush_tlb_one(addr) xen_invlpg(addr) 7.8 +#define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr) 7.9 7.10 7.11 /*