ia64/xen-unstable
changeset 14736:14c25e48a557
linux: improve x86 page table handling performance
Where possible,
- use hypercalls instead of writing to read-only pages
- fold TLB flushes into page table update hypercalls
- on PAE, use single-access updates instead of two-access ones
The single change to PAE pte_clear() yields a 25-30% boost for kernel
builds on a 4x2x2 CPUs, 8Gb box; the other changes together yield
improvements of 2-5%.
Also, adjust backward compatibility handling in a few more places.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Where possible,
- use hypercalls instead of writing to read-only pages
- fold TLB flushes into page table update hypercalls
- on PAE, use single-access updates instead of two-access ones
The single change to PAE pte_clear() yields a 25-30% boost for kernel
builds on a 4x2x2 CPUs, 8Gb box; the other changes together yield
improvements of 2-5%.
Also, adjust backward compatibility handling in a few more places.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author | kfraser@localhost.localdomain |
---|---|
date | Thu Apr 05 09:10:33 2007 +0100 (2007-04-05) |
parents | 07d3208c0ca3 |
children | e5931b5e6cc5 |
files | linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h |
line diff
1.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h Thu Apr 05 08:59:12 2007 +0100 1.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h Thu Apr 05 09:10:33 2007 +0100 1.3 @@ -36,8 +36,37 @@ 1.4 #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) 1.5 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) 1.6 1.7 -#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0)) 1.8 +#define pte_none(x) (!(x).pte_low) 1.9 + 1.10 +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 1.11 +{ 1.12 + pte_t pte = *ptep; 1.13 + if (!pte_none(pte)) { 1.14 + if (mm != &init_mm) 1.15 + pte = __pte_ma(xchg(&ptep->pte_low, 0)); 1.16 + else 1.17 + HYPERVISOR_update_va_mapping(addr, __pte(0), 0); 1.18 + } 1.19 + return pte; 1.20 +} 1.21 + 1.22 +#define ptep_clear_flush(vma, addr, ptep) \ 1.23 +({ \ 1.24 + pte_t *__ptep = (ptep); \ 1.25 + pte_t __res = *__ptep; \ 1.26 + if (!pte_none(__res) && \ 1.27 + ((vma)->vm_mm != current->mm || \ 1.28 + HYPERVISOR_update_va_mapping(addr, __pte(0), \ 1.29 + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ 1.30 + UVMF_INVLPG|UVMF_MULTI))) { \ 1.31 + __ptep->pte_low = 0; \ 1.32 + flush_tlb_page(vma, addr); \ 1.33 + } \ 1.34 + __res; \ 1.35 +}) 1.36 + 1.37 #define pte_same(a, b) ((a).pte_low == (b).pte_low) 1.38 + 1.39 #define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) 1.40 #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ 1.41 __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) 1.42 @@ -46,7 +75,6 @@ 1.43 1.44 #define pte_page(_pte) pfn_to_page(pte_pfn(_pte)) 1.45 1.46 -#define pte_none(x) (!(x).pte_low) 1.47 #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) 1.48 #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) 1.49
2.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h Thu Apr 05 08:59:12 2007 +0100 2.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h Thu Apr 05 09:10:33 2007 +0100 2.3 @@ -99,6 +99,11 @@ static inline void pud_clear (pud_t * pu 2.4 #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ 2.5 pmd_index(address)) 2.6 2.7 +static inline int pte_none(pte_t pte) 2.8 +{ 2.9 + return !(pte.pte_low | pte.pte_high); 2.10 +} 2.11 + 2.12 /* 2.13 * For PTEs and PDEs, we must clear the P-bit first when clearing a page table 2.14 * entry, so clear the bottom half first and enforce ordering with a compiler 2.15 @@ -106,24 +111,50 @@ static inline void pud_clear (pud_t * pu 2.16 */ 2.17 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 2.18 { 2.19 - ptep->pte_low = 0; 2.20 - smp_wmb(); 2.21 - ptep->pte_high = 0; 2.22 + if ((mm != current->mm && mm != &init_mm) 2.23 + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { 2.24 + ptep->pte_low = 0; 2.25 + smp_wmb(); 2.26 + ptep->pte_high = 0; 2.27 + } 2.28 } 2.29 2.30 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) 2.31 2.32 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 2.33 { 2.34 - pte_t res; 2.35 + pte_t pte = *ptep; 2.36 + if (!pte_none(pte)) { 2.37 + if (mm != &init_mm) { 2.38 + uint64_t val = pte_val_ma(pte); 2.39 + if (__cmpxchg64(ptep, val, 0) != val) { 2.40 + /* xchg acts as a barrier before the setting of the high bits */ 2.41 + pte.pte_low = xchg(&ptep->pte_low, 0); 2.42 + pte.pte_high = ptep->pte_high; 2.43 + ptep->pte_high = 0; 2.44 + } 2.45 + } else 2.46 + HYPERVISOR_update_va_mapping(addr, __pte(0), 0); 2.47 + } 2.48 + return pte; 2.49 +} 2.50 2.51 - /* xchg acts as a barrier before the setting of the high bits */ 2.52 - res.pte_low = xchg(&ptep->pte_low, 0); 2.53 - res.pte_high = ptep->pte_high; 2.54 - ptep->pte_high = 0; 2.55 - 2.56 - return res; 2.57 -} 2.58 +#define ptep_clear_flush(vma, addr, ptep) \ 2.59 +({ \ 2.60 + pte_t *__ptep = (ptep); \ 2.61 + pte_t __res = *__ptep; \ 2.62 + if (!pte_none(__res) && \ 2.63 + ((vma)->vm_mm != current->mm || \ 2.64 + HYPERVISOR_update_va_mapping(addr, __pte(0), \ 2.65 + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ 2.66 + UVMF_INVLPG|UVMF_MULTI))) { \ 2.67 + __ptep->pte_low = 0; \ 2.68 + smp_wmb(); \ 2.69 + __ptep->pte_high = 0; \ 2.70 + flush_tlb_page(vma, addr); \ 2.71 + } \ 2.72 + __res; \ 2.73 +}) 2.74 2.75 static inline int pte_same(pte_t a, pte_t b) 2.76 { 2.77 @@ -132,11 +163,6 @@ static inline int pte_same(pte_t a, pte_ 2.78 2.79 #define pte_page(x) pfn_to_page(pte_pfn(x)) 2.80 2.81 -static inline int pte_none(pte_t pte) 2.82 -{ 2.83 - return !pte.pte_low && !pte.pte_high; 2.84 -} 2.85 - 2.86 #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ 2.87 ((_pte).pte_high << (32-PAGE_SHIFT))) 2.88 #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
3.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Thu Apr 05 08:59:12 2007 +0100 3.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h Thu Apr 05 09:10:33 2007 +0100 3.3 @@ -210,9 +210,13 @@ extern unsigned long pg0[]; 3.4 3.5 /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ 3.6 #define pmd_none(x) (!(unsigned long)pmd_val(x)) 3.7 +#ifdef CONFIG_XEN_COMPAT_030002 3.8 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. 3.9 can temporarily clear it. */ 3.10 #define pmd_present(x) (pmd_val(x)) 3.11 +#else 3.12 +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) 3.13 +#endif 3.14 #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT)) 3.15 3.16 3.17 @@ -252,36 +256,47 @@ static inline pte_t pte_mkhuge(pte_t pte 3.18 # include <asm/pgtable-2level.h> 3.19 #endif 3.20 3.21 -static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 3.22 -{ 3.23 - if (!pte_dirty(*ptep)) 3.24 - return 0; 3.25 - return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low); 3.26 -} 3.27 +#define ptep_test_and_clear_dirty(vma, addr, ptep) \ 3.28 +({ \ 3.29 + pte_t __pte = *(ptep); \ 3.30 + int __ret = pte_dirty(__pte); \ 3.31 + if (__ret) { \ 3.32 + __pte = pte_mkclean(__pte); \ 3.33 + if ((vma)->vm_mm != current->mm || \ 3.34 + HYPERVISOR_update_va_mapping(addr, __pte, 0)) \ 3.35 + (ptep)->pte_low = __pte.pte_low; \ 3.36 + } \ 3.37 + __ret; \ 3.38 +}) 3.39 3.40 -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 3.41 -{ 3.42 - if (!pte_young(*ptep)) 3.43 - return 0; 3.44 - return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low); 3.45 -} 3.46 +#define ptep_test_and_clear_young(vma, addr, ptep) \ 3.47 +({ \ 3.48 + pte_t __pte = *(ptep); \ 3.49 + int __ret = pte_young(__pte); \ 3.50 + if (__ret) \ 3.51 + __pte = pte_mkold(__pte); \ 3.52 + if ((vma)->vm_mm != current->mm || \ 3.53 + HYPERVISOR_update_va_mapping(addr, __pte, 0)) \ 3.54 + (ptep)->pte_low = __pte.pte_low; \ 3.55 + __ret; \ 3.56 +}) 3.57 3.58 -static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) 3.59 -{ 3.60 - pte_t pte; 3.61 - if (full) { 3.62 - pte = *ptep; 3.63 - pte_clear(mm, addr, ptep); 3.64 - } else { 3.65 - pte = ptep_get_and_clear(mm, addr, ptep); 3.66 - } 3.67 - return pte; 3.68 -} 3.69 +#define ptep_get_and_clear_full(mm, addr, ptep, full) \ 3.70 + ((full) ? ({ \ 3.71 + pte_t __res = *(ptep); \ 3.72 + if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) \ 3.73 + xen_l1_entry_update(ptep, __pte(0)); \ 3.74 + else \ 3.75 + *(ptep) = __pte(0); \ 3.76 + __res; \ 3.77 + }) : \ 3.78 + ptep_get_and_clear(mm, addr, ptep)) 3.79 3.80 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 3.81 { 3.82 - if (pte_write(*ptep)) 3.83 - clear_bit(_PAGE_BIT_RW, &ptep->pte_low); 3.84 + pte_t pte = *ptep; 3.85 + if (pte_write(pte)) 3.86 + set_pte_at(mm, addr, ptep, pte_wrprotect(pte)); 3.87 } 3.88 3.89 /* 3.90 @@ -418,6 +433,20 @@ extern void noexec_setup(const char *str 3.91 #define pte_unmap_nested(pte) do { } while (0) 3.92 #endif 3.93 3.94 +#define __HAVE_ARCH_PTEP_ESTABLISH 3.95 +#define ptep_establish(vma, address, ptep, pteval) \ 3.96 + do { \ 3.97 + if ( likely((vma)->vm_mm == current->mm) ) { \ 3.98 + BUG_ON(HYPERVISOR_update_va_mapping(address, \ 3.99 + pteval, \ 3.100 + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ 3.101 + UVMF_INVLPG|UVMF_MULTI)); \ 3.102 + } else { \ 3.103 + xen_l1_entry_update(ptep, pteval); \ 3.104 + flush_tlb_page(vma, address); \ 3.105 + } \ 3.106 + } while (0) 3.107 + 3.108 /* 3.109 * The i386 doesn't have any external MMU info: the kernel page 3.110 * tables contain all the necessary information. 3.111 @@ -430,27 +459,12 @@ extern void noexec_setup(const char *str 3.112 */ 3.113 #define update_mmu_cache(vma,address,pte) do { } while (0) 3.114 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 3.115 -#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ 3.116 - do { \ 3.117 - if (__dirty) { \ 3.118 - if ( likely((__vma)->vm_mm == current->mm) ) { \ 3.119 - BUG_ON(HYPERVISOR_update_va_mapping(__address, \ 3.120 - __entry, \ 3.121 - (unsigned long)(__vma)->vm_mm->cpu_vm_mask.bits| \ 3.122 - UVMF_INVLPG|UVMF_MULTI)); \ 3.123 - } else { \ 3.124 - xen_l1_entry_update(__ptep, __entry); \ 3.125 - flush_tlb_page(__vma, __address); \ 3.126 - } \ 3.127 - } \ 3.128 +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ 3.129 + do { \ 3.130 + if (dirty) \ 3.131 + ptep_establish(vma, address, ptep, entry); \ 3.132 } while (0) 3.133 3.134 -#define __HAVE_ARCH_PTEP_ESTABLISH 3.135 -#define ptep_establish(__vma, __address, __ptep, __entry) \ 3.136 -do { \ 3.137 - ptep_set_access_flags(__vma, __address, __ptep, __entry, 1); \ 3.138 -} while (0) 3.139 - 3.140 #include <xen/features.h> 3.141 void make_lowmem_page_readonly(void *va, unsigned int feature); 3.142 void make_lowmem_page_writable(void *va, unsigned int feature); 3.143 @@ -508,6 +522,7 @@ direct_remap_pfn_range(vma,from,pfn,size 3.144 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY 3.145 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR 3.146 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 3.147 +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH 3.148 #define __HAVE_ARCH_PTEP_SET_WRPROTECT 3.149 #define __HAVE_ARCH_PTE_SAME 3.150 #include <asm-generic/pgtable.h>
4.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Thu Apr 05 08:59:12 2007 +0100 4.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Thu Apr 05 09:10:33 2007 +0100 4.3 @@ -93,11 +93,6 @@ extern unsigned long empty_zero_page[PAG 4.4 #define pgd_none(x) (!pgd_val(x)) 4.5 #define pud_none(x) (!pud_val(x)) 4.6 4.7 -#define set_pte_batched(pteptr, pteval) \ 4.8 - queue_l1_entry_update(pteptr, (pteval)) 4.9 - 4.10 -extern inline int pud_present(pud_t pud) { return !pud_none(pud); } 4.11 - 4.12 static inline void set_pte(pte_t *dst, pte_t val) 4.13 { 4.14 *dst = val; 4.15 @@ -123,41 +118,6 @@ static inline void pgd_clear (pgd_t * pg 4.16 #define pud_page(pud) \ 4.17 ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) 4.18 4.19 -/* 4.20 - * A note on implementation of this atomic 'get-and-clear' operation. 4.21 - * This is actually very simple because Xen Linux can only run on a single 4.22 - * processor. Therefore, we cannot race other processors setting the 'accessed' 4.23 - * or 'dirty' bits on a page-table entry. 4.24 - * Even if pages are shared between domains, that is not a problem because 4.25 - * each domain will have separate page tables, with their own versions of 4.26 - * accessed & dirty state. 4.27 - */ 4.28 -#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte, 0)) 4.29 - 4.30 -#if 0 4.31 -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) 4.32 -{ 4.33 - pte_t pte = *xp; 4.34 - if (pte.pte) 4.35 - set_pte(xp, __pte_ma(0)); 4.36 - return pte; 4.37 -} 4.38 -#endif 4.39 - 4.40 -struct mm_struct; 4.41 - 4.42 -static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) 4.43 -{ 4.44 - pte_t pte; 4.45 - if (full) { 4.46 - pte = *ptep; 4.47 - *ptep = __pte(0); 4.48 - } else { 4.49 - pte = ptep_get_and_clear(mm, addr, ptep); 4.50 - } 4.51 - return pte; 4.52 -} 4.53 - 4.54 #define pte_same(a, b) ((a).pte == (b).pte) 4.55 4.56 #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) 4.57 @@ -318,6 +278,46 @@ static inline pte_t pfn_pte(unsigned lon 4.58 return __pte(pte); 4.59 } 4.60 4.61 +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 4.62 +{ 4.63 + pte_t pte = *ptep; 4.64 + if (!pte_none(pte)) { 4.65 + if (mm != &init_mm) 4.66 + pte = __pte_ma(xchg(&ptep->pte, 0)); 4.67 + else 4.68 + HYPERVISOR_update_va_mapping(addr, __pte(0), 0); 4.69 + } 4.70 + return pte; 4.71 +} 4.72 + 4.73 +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) 4.74 +{ 4.75 + if (full) { 4.76 + pte_t pte = *ptep; 4.77 + if (mm->context.pinned) 4.78 + xen_l1_entry_update(ptep, __pte(0)); 4.79 + else 4.80 + *ptep = __pte(0); 4.81 + return pte; 4.82 + } 4.83 + return ptep_get_and_clear(mm, addr, ptep); 4.84 +} 4.85 + 4.86 +#define ptep_clear_flush(vma, addr, ptep) \ 4.87 +({ \ 4.88 + pte_t *__ptep = (ptep); \ 4.89 + pte_t __res = *__ptep; \ 4.90 + if (!pte_none(__res) && \ 4.91 + ((vma)->vm_mm != current->mm || \ 4.92 + HYPERVISOR_update_va_mapping(addr, __pte(0), \ 4.93 + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ 4.94 + UVMF_INVLPG|UVMF_MULTI))) { \ 4.95 + __ptep->pte = 0; \ 4.96 + flush_tlb_page(vma, addr); \ 4.97 + } \ 4.98 + __res; \ 4.99 +}) 4.100 + 4.101 /* 4.102 * The following only work if pte_present() is true. 4.103 * Undefined behaviour if not.. 4.104 @@ -346,31 +346,29 @@ static inline pte_t pte_mkyoung(pte_t pt 4.105 static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; } 4.106 static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } 4.107 4.108 -struct vm_area_struct; 4.109 +#define ptep_test_and_clear_dirty(vma, addr, ptep) \ 4.110 +({ \ 4.111 + pte_t __pte = *(ptep); \ 4.112 + int __ret = pte_dirty(__pte); \ 4.113 + if (__ret) \ 4.114 + set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \ 4.115 + __ret; \ 4.116 +}) 4.117 4.118 -static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 4.119 -{ 4.120 - pte_t pte = *ptep; 4.121 - int ret = pte_dirty(pte); 4.122 - if (ret) 4.123 - set_pte(ptep, pte_mkclean(pte)); 4.124 - return ret; 4.125 -} 4.126 - 4.127 -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 4.128 -{ 4.129 - pte_t pte = *ptep; 4.130 - int ret = pte_young(pte); 4.131 - if (ret) 4.132 - set_pte(ptep, pte_mkold(pte)); 4.133 - return ret; 4.134 -} 4.135 +#define ptep_test_and_clear_young(vma, addr, ptep) \ 4.136 +({ \ 4.137 + pte_t __pte = *(ptep); \ 4.138 + int __ret = pte_young(__pte); \ 4.139 + if (__ret) \ 4.140 + set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \ 4.141 + __ret; \ 4.142 +}) 4.143 4.144 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 4.145 { 4.146 pte_t pte = *ptep; 4.147 if (pte_write(pte)) 4.148 - set_pte(ptep, pte_wrprotect(pte)); 4.149 + set_pte_at(mm, addr, ptep, pte_wrprotect(pte)); 4.150 } 4.151 4.152 /* 4.153 @@ -403,6 +401,7 @@ static inline int pmd_large(pmd_t pte) { 4.154 /* to find an entry in a page-table-directory. */ 4.155 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) 4.156 #define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address)) 4.157 +#define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT) 4.158 4.159 /* PMD - Level 2 access */ 4.160 #define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) 4.161 @@ -412,9 +411,13 @@ static inline int pmd_large(pmd_t pte) { 4.162 #define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \ 4.163 pmd_index(address)) 4.164 #define pmd_none(x) (!pmd_val(x)) 4.165 +#ifdef CONFIG_XEN_COMPAT_030002 4.166 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t. 4.167 can temporarily clear it. */ 4.168 #define pmd_present(x) (pmd_val(x)) 4.169 +#else 4.170 +#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) 4.171 +#endif 4.172 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) 4.173 #define pmd_bad(x) ((pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \ 4.174 != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT))) 4.175 @@ -468,25 +471,34 @@ static inline pte_t pte_modify(pte_t pte 4.176 4.177 #define update_mmu_cache(vma,address,pte) do { } while (0) 4.178 4.179 +/* 4.180 + * Rules for using ptep_establish: the pte MUST be a user pte, and 4.181 + * must be a present->present transition. 4.182 + */ 4.183 +#define __HAVE_ARCH_PTEP_ESTABLISH 4.184 +#define ptep_establish(vma, address, ptep, pteval) \ 4.185 + do { \ 4.186 + if ( likely((vma)->vm_mm == current->mm) ) { \ 4.187 + BUG_ON(HYPERVISOR_update_va_mapping(address, \ 4.188 + pteval, \ 4.189 + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ 4.190 + UVMF_INVLPG|UVMF_MULTI)); \ 4.191 + } else { \ 4.192 + xen_l1_entry_update(ptep, pteval); \ 4.193 + flush_tlb_page(vma, address); \ 4.194 + } \ 4.195 + } while (0) 4.196 + 4.197 /* We only update the dirty/accessed state if we set 4.198 * the dirty bit by hand in the kernel, since the hardware 4.199 * will do the accessed bit for us, and we don't want to 4.200 * race with other CPU's that might be updating the dirty 4.201 * bit at the same time. */ 4.202 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 4.203 -#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ 4.204 - do { \ 4.205 - if (__dirty) { \ 4.206 - if ( likely((__vma)->vm_mm == current->mm) ) { \ 4.207 - BUG_ON(HYPERVISOR_update_va_mapping(__address, \ 4.208 - __entry, \ 4.209 - (unsigned long)(__vma)->vm_mm->cpu_vm_mask.bits| \ 4.210 - UVMF_INVLPG|UVMF_MULTI)); \ 4.211 - } else { \ 4.212 - xen_l1_entry_update(__ptep, __entry); \ 4.213 - flush_tlb_page(__vma, __address); \ 4.214 - } \ 4.215 - } \ 4.216 +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ 4.217 + do { \ 4.218 + if (dirty) \ 4.219 + ptep_establish(vma, address, ptep, entry); \ 4.220 } while (0) 4.221 4.222 /* Encode and de-code a swap entry */ 4.223 @@ -506,6 +518,8 @@ extern int kern_addr_valid(unsigned long 4.224 4.225 #define DOMID_LOCAL (0xFFFFU) 4.226 4.227 +struct vm_area_struct; 4.228 + 4.229 int direct_remap_pfn_range(struct vm_area_struct *vma, 4.230 unsigned long address, 4.231 unsigned long mfn, 4.232 @@ -551,6 +565,7 @@ int touch_pte_range(struct mm_struct *mm 4.233 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY 4.234 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR 4.235 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 4.236 +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH 4.237 #define __HAVE_ARCH_PTEP_SET_WRPROTECT 4.238 #define __HAVE_ARCH_PTE_SAME 4.239 #include <asm-generic/pgtable.h>