direct-io.hg

changeset 11226:fc5736e0a2eb

[x86_64] Guests no longer set _PAGE_USER on kernel mappings.
This may allow guest kernels to be run outside ring 3 in future, and
also provides scope for optimisations today (e.g., using global bit on
user mappings).

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Tue Aug 22 15:26:40 2006 +0100 (2006-08-22)
parents d3a9bcf61c33
children bb8d13705aac
files linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h tools/libxc/xc_linux_build.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Tue Aug 22 15:13:07 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Tue Aug 22 15:26:40 2006 +0100
     1.3 @@ -282,9 +282,6 @@ void __iomem * __ioremap(unsigned long p
     1.4  	area->phys_addr = phys_addr;
     1.5  	addr = (void __iomem *) area->addr;
     1.6  	flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
     1.7 -#ifdef __x86_64__
     1.8 -	flags |= _PAGE_USER;
     1.9 -#endif
    1.10  	if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
    1.11  				     phys_addr>>PAGE_SHIFT,
    1.12  				     size, __pgprot(flags), domid)) {
     2.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c	Tue Aug 22 15:13:07 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c	Tue Aug 22 15:26:40 2006 +0100
     2.3 @@ -529,7 +529,7 @@ void __init xen_init_pt(void)
     2.4  		mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
     2.5  	level3_kernel_pgt[pud_index(__START_KERNEL_map)] = 
     2.6  		__pud(__pa_symbol(level2_kernel_pgt) |
     2.7 -		      _KERNPG_TABLE | _PAGE_USER);
     2.8 +		      _KERNPG_TABLE);
     2.9  	memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
    2.10  
    2.11  	early_make_page_readonly(init_level4_pgt,
    2.12 @@ -578,7 +578,7 @@ void __init extend_init_mapping(unsigned
    2.13  			pte_page = alloc_static_page(&phys);
    2.14  			early_make_page_readonly(
    2.15  				pte_page, XENFEAT_writable_page_tables);
    2.16 -			set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
    2.17 +			set_pmd(pmd, __pmd(phys | _KERNPG_TABLE));
    2.18  		} else {
    2.19  			addr = page[pmd_index(va)];
    2.20  			addr_to_page(addr, pte_page);
    2.21 @@ -587,7 +587,7 @@ void __init extend_init_mapping(unsigned
    2.22  		if (pte_none(*pte)) {
    2.23  			new_pte = pfn_pte(
    2.24  				(va - __START_KERNEL_map) >> PAGE_SHIFT, 
    2.25 -				__pgprot(_KERNPG_TABLE | _PAGE_USER));
    2.26 +				__pgprot(_KERNPG_TABLE));
    2.27  			xen_l1_entry_update(pte, new_pte);
    2.28  		}
    2.29  		va += PAGE_SIZE;
     3.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Tue Aug 22 15:13:07 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Tue Aug 22 15:26:40 2006 +0100
     3.3 @@ -206,7 +206,7 @@ static inline pte_t ptep_get_and_clear_f
     3.4  #define _PAGE_NX        (1UL<<_PAGE_BIT_NX)
     3.5  
     3.6  #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
     3.7 -#define _KERNPG_TABLE	_PAGE_TABLE
     3.8 +#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
     3.9  
    3.10  #define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
    3.11  
    3.12 @@ -219,22 +219,21 @@ static inline pte_t ptep_get_and_clear_f
    3.13  #define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
    3.14  #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
    3.15  #define __PAGE_KERNEL \
    3.16 -	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
    3.17 +	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
    3.18  #define __PAGE_KERNEL_EXEC \
    3.19 -	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER )
    3.20 +	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
    3.21  #define __PAGE_KERNEL_NOCACHE \
    3.22 -	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
    3.23 +	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX)
    3.24  #define __PAGE_KERNEL_RO \
    3.25 -	(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
    3.26 +	(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
    3.27  #define __PAGE_KERNEL_VSYSCALL \
    3.28 -	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_USER )
    3.29 +	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
    3.30  #define __PAGE_KERNEL_VSYSCALL_NOCACHE \
    3.31 -	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD | _PAGE_USER )
    3.32 +	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD)
    3.33  #define __PAGE_KERNEL_LARGE \
    3.34 -	(__PAGE_KERNEL | _PAGE_PSE | _PAGE_USER )
    3.35 +	(__PAGE_KERNEL | _PAGE_PSE)
    3.36  #define __PAGE_KERNEL_LARGE_EXEC \
    3.37 -	(__PAGE_KERNEL_EXEC | _PAGE_PSE | _PAGE_USER )
    3.38 -
    3.39 +	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
    3.40  
    3.41  /*
    3.42   * We don't support GLOBAL page in xenolinux64
    3.43 @@ -423,7 +422,7 @@ static inline pud_t *pud_offset_k(pgd_t 
    3.44     can temporarily clear it. */
    3.45  #define pmd_present(x)	(pmd_val(x))
    3.46  #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
    3.47 -#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
    3.48 +#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
    3.49  #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
    3.50  #define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
    3.51  
     4.1 --- a/tools/libxc/xc_linux_build.c	Tue Aug 22 15:13:07 2006 +0100
     4.2 +++ b/tools/libxc/xc_linux_build.c	Tue Aug 22 15:26:40 2006 +0100
     4.3 @@ -16,15 +16,11 @@
     4.4  /* Handy for printing out '0' prepended values at native pointer size */
     4.5  #define _p(a) ((void *) ((ulong)a))
     4.6  
     4.7 -#if defined(__i386__)
     4.8  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
     4.9  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    4.10 +#if defined(__i386__)
    4.11  #define L3_PROT (_PAGE_PRESENT)
    4.12 -#endif
    4.13 -
    4.14 -#if defined(__x86_64__)
    4.15 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    4.16 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    4.17 +#elif defined(__x86_64__)
    4.18  #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    4.19  #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    4.20  #endif
     5.1 --- a/xen/arch/x86/domain_build.c	Tue Aug 22 15:13:07 2006 +0100
     5.2 +++ b/xen/arch/x86/domain_build.c	Tue Aug 22 15:26:40 2006 +0100
     5.3 @@ -66,15 +66,11 @@ boolean_param("dom0_shadow", opt_dom0_sh
     5.4  static char opt_dom0_ioports_disable[200] = "";
     5.5  string_param("dom0_ioports_disable", opt_dom0_ioports_disable);
     5.6  
     5.7 -#if defined(__i386__)
     5.8 -/* No ring-3 access in initial leaf page tables. */
     5.9  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
    5.10  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    5.11 +#if CONFIG_PAGING_LEVELS == 3
    5.12  #define L3_PROT (_PAGE_PRESENT)
    5.13 -#elif defined(__x86_64__)
    5.14 -/* Allow ring-3 access in long mode as guest cannot use ring 1. */
    5.15 -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    5.16 -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    5.17 +#elif CONFIG_PAGING_LEVELS == 4
    5.18  #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    5.19  #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    5.20  #endif
     6.1 --- a/xen/arch/x86/mm.c	Tue Aug 22 15:13:07 2006 +0100
     6.2 +++ b/xen/arch/x86/mm.c	Tue Aug 22 15:26:40 2006 +0100
     6.3 @@ -707,6 +707,35 @@ get_page_from_l4e(
     6.4  }
     6.5  #endif /* 4 level */
     6.6  
     6.7 +#ifdef __x86_64__
     6.8 +#define adjust_guest_l1e(pl1e)                                  \
     6.9 +    do  {                                                       \
    6.10 +        if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) )    \
    6.11 +            l1e_add_flags((pl1e), _PAGE_USER);                  \
    6.12 +    } while ( 0 )
    6.13 +
    6.14 +#define adjust_guest_l2e(pl2e)                                  \
    6.15 +    do {                                                        \
    6.16 +        if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) )    \
    6.17 +            l2e_add_flags((pl2e), _PAGE_USER);                  \
    6.18 +    } while ( 0 )
    6.19 +
    6.20 +#define adjust_guest_l3e(pl3e)                                  \
    6.21 +    do {                                                        \
    6.22 +        if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) )    \
    6.23 +            l3e_add_flags((pl3e), _PAGE_USER);                  \
    6.24 +    } while ( 0 )
    6.25 +
    6.26 +#define adjust_guest_l4e(pl4e)                                  \
    6.27 +    do {                                                        \
    6.28 +        if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) )    \
    6.29 +            l4e_add_flags((pl4e), _PAGE_USER);                  \
    6.30 +    } while ( 0 )
    6.31 +#else
    6.32 +#define adjust_guest_l1e(_p) ((void)0)
    6.33 +#define adjust_guest_l2e(_p) ((void)0)
    6.34 +#define adjust_guest_l3e(_p) ((void)0)
    6.35 +#endif
    6.36  
    6.37  void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
    6.38  {
    6.39 @@ -806,10 +835,14 @@ static int alloc_l1_table(struct page_in
    6.40      pl1e = map_domain_page(pfn);
    6.41  
    6.42      for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
    6.43 +    {
    6.44          if ( is_guest_l1_slot(i) &&
    6.45               unlikely(!get_page_from_l1e(pl1e[i], d)) )
    6.46              goto fail;
    6.47  
    6.48 +        adjust_guest_l1e(pl1e[i]);
    6.49 +    }
    6.50 +
    6.51      unmap_domain_page(pl1e);
    6.52      return 1;
    6.53  
    6.54 @@ -985,6 +1018,8 @@ static int alloc_l2_table(struct page_in
    6.55          if ( is_guest_l2_slot(type, i) &&
    6.56               unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
    6.57              goto fail;
    6.58 +        
    6.59 +        adjust_guest_l2e(pl2e[i]);
    6.60      }
    6.61  
    6.62  #if CONFIG_PAGING_LEVELS == 2
    6.63 @@ -1053,6 +1088,8 @@ static int alloc_l3_table(struct page_in
    6.64          if ( is_guest_l3_slot(i) &&
    6.65               unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
    6.66              goto fail;
    6.67 +        
    6.68 +        adjust_guest_l3e(pl3e[i]);
    6.69      }
    6.70  
    6.71      if ( !create_pae_xen_mappings(pl3e) )
    6.72 @@ -1093,6 +1130,8 @@ static int alloc_l4_table(struct page_in
    6.73          if ( is_guest_l4_slot(i) &&
    6.74               unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) )
    6.75              goto fail;
    6.76 +
    6.77 +        adjust_guest_l4e(pl4e[i]);
    6.78      }
    6.79  
    6.80      /* Xen private mappings. */
    6.81 @@ -1255,6 +1294,8 @@ static int mod_l1_entry(l1_pgentry_t *pl
    6.82              return 0;
    6.83          }
    6.84  
    6.85 +        adjust_guest_l1e(nl1e);
    6.86 +
    6.87          /* Fast path for identical mapping, r/w and presence. */
    6.88          if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
    6.89              return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current);
    6.90 @@ -1336,6 +1377,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
    6.91              return 0;
    6.92          }
    6.93  
    6.94 +        adjust_guest_l2e(nl2e);
    6.95 +
    6.96          /* Fast path for identical mapping and presence. */
    6.97          if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
    6.98              return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn);
    6.99 @@ -1398,6 +1441,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
   6.100              return 0;
   6.101          }
   6.102  
   6.103 +        adjust_guest_l3e(nl3e);
   6.104 +
   6.105          /* Fast path for identical mapping and presence. */
   6.106          if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
   6.107              return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn);
   6.108 @@ -1464,6 +1509,8 @@ static int mod_l4_entry(l4_pgentry_t *pl
   6.109              return 0;
   6.110          }
   6.111  
   6.112 +        adjust_guest_l4e(nl4e);
   6.113 +
   6.114          /* Fast path for identical mapping and presence. */
   6.115          if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
   6.116              return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn);
   6.117 @@ -2402,7 +2449,7 @@ int do_mmu_update(
   6.118  
   6.119  
   6.120  static int create_grant_pte_mapping(
   6.121 -    unsigned long pte_addr, l1_pgentry_t _nl1e, struct vcpu *v)
   6.122 +    unsigned long pte_addr, l1_pgentry_t nl1e, struct vcpu *v)
   6.123  {
   6.124      int rc = GNTST_okay;
   6.125      void *va;
   6.126 @@ -2414,6 +2461,8 @@ static int create_grant_pte_mapping(
   6.127  
   6.128      ASSERT(spin_is_locked(&d->big_lock));
   6.129  
   6.130 +    adjust_guest_l1e(nl1e);
   6.131 +
   6.132      gmfn = pte_addr >> PAGE_SHIFT;
   6.133      mfn = gmfn_to_mfn(d, gmfn);
   6.134  
   6.135 @@ -2437,7 +2486,7 @@ static int create_grant_pte_mapping(
   6.136      }
   6.137  
   6.138      ol1e = *(l1_pgentry_t *)va;
   6.139 -    if ( !update_l1e(va, ol1e, _nl1e, mfn, v) )
   6.140 +    if ( !update_l1e(va, ol1e, nl1e, mfn, v) )
   6.141      {
   6.142          put_page_type(page);
   6.143          rc = GNTST_general_error;
   6.144 @@ -2526,17 +2575,19 @@ static int destroy_grant_pte_mapping(
   6.145  
   6.146  
   6.147  static int create_grant_va_mapping(
   6.148 -    unsigned long va, l1_pgentry_t _nl1e, struct vcpu *v)
   6.149 +    unsigned long va, l1_pgentry_t nl1e, struct vcpu *v)
   6.150  {
   6.151      l1_pgentry_t *pl1e, ol1e;
   6.152      struct domain *d = v->domain;
   6.153      
   6.154      ASSERT(spin_is_locked(&d->big_lock));
   6.155  
   6.156 +    adjust_guest_l1e(nl1e);
   6.157 +
   6.158      pl1e = &linear_pg_table[l1_linear_offset(va)];
   6.159  
   6.160      if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ||
   6.161 -         !update_l1e(pl1e, ol1e, _nl1e, 
   6.162 +         !update_l1e(pl1e, ol1e, nl1e, 
   6.163                      l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) )
   6.164          return GNTST_general_error;
   6.165  
   6.166 @@ -3139,6 +3190,8 @@ static int ptwr_emulated_update(
   6.167          }
   6.168      }
   6.169  
   6.170 +    adjust_guest_l1e(nl1e);
   6.171 +
   6.172      /* Checked successfully: do the update (write or cmpxchg). */
   6.173      pl1e = map_domain_page(page_to_mfn(page));
   6.174      pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));