ia64/xen-unstable
changeset 18775:5fd51e1e9c79
x86: PV support for hugepages
Hugepage support must be enabled via the hypervisor command line
option "allowhugepage". There is currently no support in the tools for
saving/restoring/migrating guests who use hugepages.
Signed-off-by: Dave McCracken <dave.mccracken@oracle.com>
Hugepage support must be enabled via the hypervisor command line
option "allowhugepage". There is currently no support in the tools for
saving/restoring/migrating guests who use hugepages.
Signed-off-by: Dave McCracken <dave.mccracken@oracle.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Wed Nov 05 10:57:21 2008 +0000 (2008-11-05) |
parents | 1e437b5b418a |
children | 3af208e6f850 |
files | xen/arch/x86/mm.c xen/arch/x86/traps.c xen/include/asm-x86/mm.h xen/include/asm-x86/x86_32/page.h xen/include/asm-x86/x86_64/page.h |
line diff
1.1 --- a/xen/arch/x86/mm.c Wed Nov 05 10:26:19 2008 +0000 1.2 +++ b/xen/arch/x86/mm.c Wed Nov 05 10:57:21 2008 +0000 1.3 @@ -160,6 +160,9 @@ unsigned long total_pages; 1.4 1.5 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) 1.6 1.7 +int opt_allow_hugepage; 1.8 +boolean_param("allowhugepage", opt_allow_hugepage); 1.9 + 1.10 #define l1_disallow_mask(d) \ 1.11 ((d != dom_io) && \ 1.12 (rangeset_is_empty((d)->iomem_caps) && \ 1.13 @@ -586,6 +589,28 @@ static int get_page_and_type_from_pagenr 1.14 return rc; 1.15 } 1.16 1.17 +static int get_data_page( 1.18 + struct page_info *page, struct domain *d, int writeable) 1.19 +{ 1.20 + int rc; 1.21 + 1.22 + if ( writeable ) 1.23 + rc = get_page_and_type(page, d, PGT_writable_page); 1.24 + else 1.25 + rc = get_page(page, d); 1.26 + 1.27 + return rc; 1.28 +} 1.29 + 1.30 +static void put_data_page( 1.31 + struct page_info *page, int writeable) 1.32 +{ 1.33 + if ( writeable ) 1.34 + put_page_and_type(page); 1.35 + else 1.36 + put_page(page); 1.37 +} 1.38 + 1.39 /* 1.40 * We allow root tables to map each other (a.k.a. linear page tables). It 1.41 * needs some special care with reference counts and access permissions: 1.42 @@ -700,10 +725,9 @@ get_page_from_l1e( 1.43 * contribute to writeable mapping refcounts. (This allows the 1.44 * qemu-dm helper process in dom0 to map the domain's memory without 1.45 * messing up the count of "real" writable mappings.) */ 1.46 - okay = (((l1f & _PAGE_RW) && 1.47 - !(unlikely(paging_mode_external(d) && (d != curr->domain)))) 1.48 - ? get_page_and_type(page, d, PGT_writable_page) 1.49 - : get_page(page, d)); 1.50 + okay = get_data_page( 1.51 + page, d, 1.52 + (l1f & _PAGE_RW) && !(paging_mode_external(d) && (d != curr->domain))); 1.53 if ( !okay ) 1.54 { 1.55 MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte 1.56 @@ -751,6 +775,7 @@ static int 1.57 get_page_from_l2e( 1.58 l2_pgentry_t l2e, unsigned long pfn, struct domain *d) 1.59 { 1.60 + unsigned long mfn = l2e_get_pfn(l2e); 1.61 int rc; 1.62 1.63 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) 1.64 @@ -762,10 +787,37 @@ get_page_from_l2e( 1.65 return -EINVAL; 1.66 } 1.67 1.68 - rc = get_page_and_type_from_pagenr( 1.69 - l2e_get_pfn(l2e), PGT_l1_page_table, d, 0, 0); 1.70 - if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) 1.71 - rc = 0; 1.72 + if ( !(l2e_get_flags(l2e) & _PAGE_PSE) ) 1.73 + { 1.74 + rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0); 1.75 + if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) 1.76 + rc = 0; 1.77 + } 1.78 + else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) ) 1.79 + { 1.80 + rc = -EINVAL; 1.81 + } 1.82 + else 1.83 + { 1.84 + unsigned long m = mfn; 1.85 + int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW); 1.86 + 1.87 + do { 1.88 + rc = get_data_page(mfn_to_page(m), d, writeable); 1.89 + if ( unlikely(!rc) ) 1.90 + { 1.91 + while ( m-- > mfn ) 1.92 + put_data_page(mfn_to_page(m), writeable); 1.93 + return -EINVAL; 1.94 + } 1.95 + } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); 1.96 + 1.97 +#ifdef __x86_64__ 1.98 + map_pages_to_xen( 1.99 + (unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES, 1.100 + PAGE_HYPERVISOR | l2e_get_flags(l2e)); 1.101 +#endif 1.102 + } 1.103 1.104 return rc; 1.105 } 1.106 @@ -954,13 +1006,24 @@ void put_page_from_l1e(l1_pgentry_t l1e, 1.107 */ 1.108 static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) 1.109 { 1.110 - if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 1.111 - (l2e_get_pfn(l2e) != pfn) ) 1.112 + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) ) 1.113 + return 1; 1.114 + 1.115 + if ( l2e_get_flags(l2e) & _PAGE_PSE ) 1.116 + { 1.117 + unsigned long mfn = l2e_get_pfn(l2e), m = mfn; 1.118 + int writeable = l2e_get_flags(l2e) & _PAGE_RW; 1.119 + ASSERT(opt_allow_hugepage && !(mfn & (L1_PAGETABLE_ENTRIES-1))); 1.120 + do { 1.121 + put_data_page(mfn_to_page(m), writeable); 1.122 + } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) ); 1.123 + } 1.124 + else 1.125 { 1.126 put_page_and_type(l2e_get_page(l2e)); 1.127 - return 0; 1.128 } 1.129 - return 1; 1.130 + 1.131 + return 0; 1.132 } 1.133 1.134 static int __put_page_type(struct page_info *, int preemptible);
2.1 --- a/xen/arch/x86/traps.c Wed Nov 05 10:26:19 2008 +0000 2.2 +++ b/xen/arch/x86/traps.c Wed Nov 05 10:57:21 2008 +0000 2.3 @@ -723,7 +723,8 @@ static void pv_cpuid(struct cpu_user_reg 2.4 { 2.5 /* Modify Feature Information. */ 2.6 __clear_bit(X86_FEATURE_VME, &d); 2.7 - __clear_bit(X86_FEATURE_PSE, &d); 2.8 + if ( !opt_allow_hugepage ) 2.9 + __clear_bit(X86_FEATURE_PSE, &d); 2.10 __clear_bit(X86_FEATURE_PGE, &d); 2.11 __clear_bit(X86_FEATURE_MCE, &d); 2.12 __clear_bit(X86_FEATURE_MCA, &d); 2.13 @@ -2003,9 +2004,12 @@ static int emulate_privileged_op(struct 2.14 case 4: /* Read CR4 */ 2.15 /* 2.16 * Guests can read CR4 to see what features Xen has enabled. We 2.17 - * therefore lie about PGE & PSE as they are unavailable to guests. 2.18 + * therefore lie about PGE as it is unavailable to guests. 2.19 + * Also disallow PSE if hugepages are not enabled. 2.20 */ 2.21 - *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE); 2.22 + *reg = read_cr4() & ~X86_CR4_PGE; 2.23 + if ( !opt_allow_hugepage ) 2.24 + *reg &= ~X86_CR4_PSE; 2.25 break; 2.26 2.27 default:
3.1 --- a/xen/include/asm-x86/mm.h Wed Nov 05 10:26:19 2008 +0000 3.2 +++ b/xen/include/asm-x86/mm.h Wed Nov 05 10:57:21 2008 +0000 3.3 @@ -263,6 +263,7 @@ pae_copy_root(struct vcpu *v, l3_pgentry 3.4 3.5 int check_descriptor(const struct domain *, struct desc_struct *d); 3.6 3.7 +extern int opt_allow_hugepage; 3.8 3.9 /****************************************************************************** 3.10 * With shadow pagetables, the different kinds of address start
4.1 --- a/xen/include/asm-x86/x86_32/page.h Wed Nov 05 10:26:19 2008 +0000 4.2 +++ b/xen/include/asm-x86/x86_32/page.h Wed Nov 05 10:57:21 2008 +0000 4.3 @@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA 4.4 #define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX) 4.5 4.6 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) 4.7 -#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) 4.8 +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE) 4.9 #define L3_DISALLOW_MASK 0xFFFFF1FEU /* must-be-zero */ 4.10 4.11 #endif /* __X86_32_PAGE_H__ */
5.1 --- a/xen/include/asm-x86/x86_64/page.h Wed Nov 05 10:26:19 2008 +0000 5.2 +++ b/xen/include/asm-x86/x86_64/page.h Wed Nov 05 10:57:21 2008 +0000 5.3 @@ -115,7 +115,7 @@ typedef l4_pgentry_t root_pgentry_t; 5.4 #define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX) 5.5 5.6 #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB) 5.7 -#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK) 5.8 +#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE) 5.9 #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK) 5.10 #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK) 5.11