ia64/xen-unstable

changeset 16147:ca2984b17fcf

x86: Tighten handling of page-type attributes and make
map_pages_to_xen() smarter and safer.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Wed Oct 17 14:38:19 2007 +0100 (2007-10-17)
parents b4278beaf354
children 765600a13e4a
files xen/arch/x86/mm.c xen/arch/x86/smp.c xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/x86_32/page-3level.h xen/include/asm-x86/x86_32/page.h xen/include/asm-x86/x86_64/page.h
line diff
     1.1 --- a/xen/arch/x86/mm.c	Wed Oct 17 13:12:03 2007 +0100
     1.2 +++ b/xen/arch/x86/mm.c	Wed Oct 17 14:38:19 2007 +0100
     1.3 @@ -149,6 +149,13 @@ struct page_info *frame_table;
     1.4  unsigned long max_page;
     1.5  unsigned long total_pages;
     1.6  
     1.7 +#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
     1.8 +
     1.9 +#define l1_disallow_mask(d)                                     \
    1.10 +    ((rangeset_is_empty((d)->iomem_caps) &&                     \
    1.11 +      rangeset_is_empty((d)->arch.ioport_caps)) ?               \
    1.12 +     L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS))
    1.13 +
    1.14  #ifdef CONFIG_COMPAT
    1.15  l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
    1.16  #define l3_disallow_mask(d) (!is_pv_32on64_domain(d) ?  \
    1.17 @@ -612,14 +619,17 @@ get_page_from_l1e(
    1.18  {
    1.19      unsigned long mfn = l1e_get_pfn(l1e);
    1.20      struct page_info *page = mfn_to_page(mfn);
    1.21 +    unsigned int disallow_mask;
    1.22      int okay;
    1.23  
    1.24      if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
    1.25          return 1;
    1.26  
    1.27 -    if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
    1.28 +    disallow_mask = l1_disallow_mask((d == dom_io) ? current->domain : d);
    1.29 +    if ( unlikely(l1e_get_flags(l1e) & disallow_mask) )
    1.30      {
    1.31 -        MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
    1.32 +        MEM_LOG("Bad L1 flags %x",
    1.33 +                l1e_get_flags(l1e) & disallow_mask);
    1.34          return 0;
    1.35      }
    1.36  
    1.37 @@ -1367,10 +1377,10 @@ static int mod_l1_entry(l1_pgentry_t *pl
    1.38          ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
    1.39          nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
    1.40  
    1.41 -        if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
    1.42 +        if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
    1.43          {
    1.44              MEM_LOG("Bad L1 flags %x",
    1.45 -                    l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
    1.46 +                    l1e_get_flags(nl1e) & l1_disallow_mask(d));
    1.47              return 0;
    1.48          }
    1.49  
    1.50 @@ -1574,7 +1584,7 @@ static int mod_l4_entry(struct domain *d
    1.51  
    1.52  #endif
    1.53  
    1.54 -int alloc_page_type(struct page_info *page, unsigned long type)
    1.55 +static int alloc_page_type(struct page_info *page, unsigned long type)
    1.56  {
    1.57      struct domain *owner = page_get_owner(page);
    1.58  
    1.59 @@ -3524,37 +3534,71 @@ void free_xen_pagetable(void *v)
    1.60          free_domheap_page(virt_to_page(v));
    1.61  }
    1.62  
    1.63 +/* Convert to from superpage-mapping flags for map_pages_to_xen(). */
    1.64 +#define l1f_to_l2f(f) ((f) | _PAGE_PSE)
    1.65 +#define l2f_to_l1f(f) ((f) & ~_PAGE_PSE)
    1.66 +
    1.67 +/*
    1.68 + * map_pages_to_xen() can be called with interrupts disabled:
    1.69 + *  * During early bootstrap; or
    1.70 + *  * alloc_xenheap_pages() via memguard_guard_range
    1.71 + * In these cases it is safe to use flush_area_local():
    1.72 + *  * Because only the local CPU is online; or
    1.73 + *  * Because stale TLB entries do not matter for memguard_[un]guard_range().
    1.74 + */
    1.75 +#define flush_area(v,f) (!local_irq_is_enabled() ?              \
    1.76 +                         flush_area_local((const void *)v, f) : \
    1.77 +                         flush_area_all((const void *)v, f))
    1.78 +
    1.79  int map_pages_to_xen(
    1.80      unsigned long virt,
    1.81      unsigned long mfn,
    1.82      unsigned long nr_mfns,
    1.83 -    unsigned long flags)
    1.84 +    unsigned int flags)
    1.85  {
    1.86      l2_pgentry_t *pl2e, ol2e;
    1.87      l1_pgentry_t *pl1e, ol1e;
    1.88      unsigned int  i;
    1.89  
    1.90 -    unsigned int  map_small_pages = !!(flags & MAP_SMALL_PAGES);
    1.91 -    flags &= ~MAP_SMALL_PAGES;
    1.92 -
    1.93      while ( nr_mfns != 0 )
    1.94      {
    1.95          pl2e = virt_to_xen_l2e(virt);
    1.96  
    1.97          if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
    1.98               (nr_mfns >= (1<<PAGETABLE_ORDER)) &&
    1.99 -             !map_small_pages )
   1.100 +             !(flags & (_PAGE_PAT|MAP_SMALL_PAGES)) )
   1.101          {
   1.102              /* Super-page mapping. */
   1.103              ol2e = *pl2e;
   1.104 -            l2e_write_atomic(pl2e, l2e_from_pfn(mfn, flags|_PAGE_PSE));
   1.105 +            l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_l2f(flags)));
   1.106  
   1.107              if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
   1.108              {
   1.109 -                flush_area_local((const void *)virt,
   1.110 -                                 FLUSH_TLB_GLOBAL|FLUSH_LEVEL(2));
   1.111 -                if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
   1.112 -                    free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e)));
   1.113 +                unsigned int flush_flags = FLUSH_TLB | FLUSH_LEVEL(2);
   1.114 +
   1.115 +                if ( l2e_get_flags(ol2e) & _PAGE_PSE )
   1.116 +                {
   1.117 +                    if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL )
   1.118 +                        flush_flags |= FLUSH_TLB_GLOBAL;
   1.119 +                    if ( (l2e_get_flags(ol2e) ^ l1f_to_l2f(flags)) &
   1.120 +                         l1f_to_l2f(PAGE_CACHE_ATTRS) )
   1.121 +                        flush_flags |= FLUSH_CACHE;
   1.122 +                    flush_area(virt, flush_flags);
   1.123 +                }
   1.124 +                else
   1.125 +                {
   1.126 +                    pl1e = l2e_to_l1e(ol2e);
   1.127 +                    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   1.128 +                    {
   1.129 +                        if ( l1e_get_flags(pl1e[i]) & _PAGE_GLOBAL )
   1.130 +                            flush_flags |= FLUSH_TLB_GLOBAL;
   1.131 +                        if ( (l1e_get_flags(pl1e[i]) ^ flags) &
   1.132 +                             PAGE_CACHE_ATTRS )
   1.133 +                            flush_flags |= FLUSH_CACHE;
   1.134 +                    }
   1.135 +                    flush_area(virt, flush_flags);
   1.136 +                    free_xen_pagetable(pl1e);
   1.137 +                }
   1.138              }
   1.139  
   1.140              virt    += 1UL << L2_PAGETABLE_SHIFT;
   1.141 @@ -3567,32 +3611,83 @@ int map_pages_to_xen(
   1.142              if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
   1.143              {
   1.144                  pl1e = alloc_xen_pagetable();
   1.145 +                if ( pl1e == NULL )
   1.146 +                    return -ENOMEM;
   1.147                  clear_page(pl1e);
   1.148                  l2e_write(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
   1.149                                               __PAGE_HYPERVISOR));
   1.150              }
   1.151              else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
   1.152              {
   1.153 +                unsigned int flush_flags = FLUSH_TLB | FLUSH_LEVEL(2);
   1.154 +
   1.155 +                /* Skip this PTE if there is no change. */
   1.156 +                if ( (((l2e_get_pfn(*pl2e) & ~(L1_PAGETABLE_ENTRIES - 1)) +
   1.157 +                       l1_table_offset(virt)) == mfn) &&
   1.158 +                     (((l2f_to_l1f(l2e_get_flags(*pl2e)) ^ flags) &
   1.159 +                       ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0) )
   1.160 +                {
   1.161 +                    virt    += 1UL << L1_PAGETABLE_SHIFT;
   1.162 +                    mfn     += 1UL;
   1.163 +                    nr_mfns -= 1UL;
   1.164 +                    continue;
   1.165 +                }
   1.166 +
   1.167                  pl1e = alloc_xen_pagetable();
   1.168 +                if ( pl1e == NULL )
   1.169 +                    return -ENOMEM;
   1.170 +
   1.171                  for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   1.172                      l1e_write(&pl1e[i],
   1.173                                l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
   1.174 -                                           l2e_get_flags(*pl2e) & ~_PAGE_PSE));
   1.175 +                                           l2f_to_l1f(l2e_get_flags(*pl2e))));
   1.176 +
   1.177 +                if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL )
   1.178 +                    flush_flags |= FLUSH_TLB_GLOBAL;
   1.179 +
   1.180                  l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
   1.181                                                      __PAGE_HYPERVISOR));
   1.182 -                flush_area_local((const void *)virt,
   1.183 -                                 FLUSH_TLB_GLOBAL|FLUSH_LEVEL(2));
   1.184 +                flush_area(virt, flush_flags);
   1.185              }
   1.186  
   1.187              pl1e  = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
   1.188              ol1e  = *pl1e;
   1.189              l1e_write_atomic(pl1e, l1e_from_pfn(mfn, flags));
   1.190              if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
   1.191 -                flush_tlb_one_local(virt);
   1.192 +            {
   1.193 +                unsigned int flush_flags = FLUSH_TLB | FLUSH_LEVEL(1);
   1.194 +                if ( l1e_get_flags(ol1e) & _PAGE_GLOBAL )
   1.195 +                    flush_flags |= FLUSH_TLB_GLOBAL;
   1.196 +                if ( (l1e_get_flags(ol1e) ^ flags) & PAGE_CACHE_ATTRS )
   1.197 +                    flush_flags |= FLUSH_CACHE;
   1.198 +                flush_area(virt, flush_flags);
   1.199 +            }
   1.200  
   1.201              virt    += 1UL << L1_PAGETABLE_SHIFT;
   1.202              mfn     += 1UL;
   1.203              nr_mfns -= 1UL;
   1.204 +
   1.205 +            if ( (flags == PAGE_HYPERVISOR) &&
   1.206 +                 ((nr_mfns == 0) ||
   1.207 +                  ((((virt >> PAGE_SHIFT) | mfn) &
   1.208 +                    ((1 << PAGETABLE_ORDER) - 1)) == 0)) )
   1.209 +            {
   1.210 +                unsigned long base_mfn;
   1.211 +                pl1e = l2e_to_l1e(*pl2e);
   1.212 +                base_mfn = l1e_get_pfn(*pl1e) & ~(L1_PAGETABLE_ENTRIES - 1);
   1.213 +                for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++, pl1e++ )
   1.214 +                    if ( (l1e_get_pfn(*pl1e) != (base_mfn + i)) ||
   1.215 +                         (l1e_get_flags(*pl1e) != flags) )
   1.216 +                        break;
   1.217 +                if ( i == L1_PAGETABLE_ENTRIES )
   1.218 +                {
   1.219 +                    ol2e = *pl2e;
   1.220 +                    l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn,
   1.221 +                                                        l1f_to_l2f(flags)));
   1.222 +                    flush_area(virt, FLUSH_TLB_GLOBAL | FLUSH_LEVEL(2));
   1.223 +                    free_xen_pagetable(l2e_to_l1e(ol2e));
   1.224 +                }
   1.225 +            }
   1.226          }
   1.227      }
   1.228  
   1.229 @@ -3659,6 +3754,7 @@ void destroy_xen_mappings(unsigned long 
   1.230              {
   1.231                  /* Empty: zap the L2E and free the L1 page. */
   1.232                  l2e_write_atomic(pl2e, l2e_empty());
   1.233 +                flush_all(FLUSH_TLB_GLOBAL); /* flush before free */
   1.234                  free_xen_pagetable(pl1e);
   1.235              }
   1.236          }
     2.1 --- a/xen/arch/x86/smp.c	Wed Oct 17 13:12:03 2007 +0100
     2.2 +++ b/xen/arch/x86/smp.c	Wed Oct 17 14:38:19 2007 +0100
     2.3 @@ -182,7 +182,7 @@ fastcall void smp_invalidate_interrupt(v
     2.4  void flush_area_mask(cpumask_t mask, const void *va, unsigned int flags)
     2.5  {
     2.6      ASSERT(local_irq_is_enabled());
     2.7 -    
     2.8 +
     2.9      if ( cpu_isset(smp_processor_id(), mask) )
    2.10      {
    2.11          flush_area_local(va, flags);
     3.1 --- a/xen/include/asm-x86/mm.h	Wed Oct 17 13:12:03 2007 +0100
     3.2 +++ b/xen/include/asm-x86/mm.h	Wed Oct 17 14:38:19 2007 +0100
     3.3 @@ -144,7 +144,6 @@ extern unsigned long max_page;
     3.4  extern unsigned long total_pages;
     3.5  void init_frametable(void);
     3.6  
     3.7 -int alloc_page_type(struct page_info *page, unsigned long type);
     3.8  void free_page_type(struct page_info *page, unsigned long type);
     3.9  int _shadow_mode_refcounts(struct domain *d);
    3.10  
     4.1 --- a/xen/include/asm-x86/page.h	Wed Oct 17 13:12:03 2007 +0100
     4.2 +++ b/xen/include/asm-x86/page.h	Wed Oct 17 14:38:19 2007 +0100
     4.3 @@ -355,13 +355,12 @@ void free_xen_pagetable(void *v);
     4.4  l2_pgentry_t *virt_to_xen_l2e(unsigned long v);
     4.5  
     4.6  /* Map machine page range in Xen virtual address space. */
     4.7 -#define MAP_SMALL_PAGES (1UL<<16) /* don't use superpages for the mapping */
     4.8 -int
     4.9 -map_pages_to_xen(
    4.10 +#define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages for the mapping */
    4.11 +int map_pages_to_xen(
    4.12      unsigned long virt,
    4.13      unsigned long mfn,
    4.14      unsigned long nr_mfns,
    4.15 -    unsigned long flags);
    4.16 +    unsigned int flags);
    4.17  void destroy_xen_mappings(unsigned long v, unsigned long e);
    4.18  
    4.19  #endif /* !__ASSEMBLY__ */
     5.1 --- a/xen/include/asm-x86/x86_32/page-3level.h	Wed Oct 17 13:12:03 2007 +0100
     5.2 +++ b/xen/include/asm-x86/x86_32/page-3level.h	Wed Oct 17 14:38:19 2007 +0100
     5.3 @@ -85,6 +85,6 @@ typedef l3_pgentry_t root_pgentry_t;
     5.4  #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
     5.5  #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
     5.6  
     5.7 -#define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */
     5.8 +#define L3_DISALLOW_MASK 0xFFFFF1FEU /* must-be-zero */
     5.9  
    5.10  #endif /* __X86_32_PAGE_3LEVEL_H__ */
     6.1 --- a/xen/include/asm-x86/x86_32/page.h	Wed Oct 17 13:12:03 2007 +0100
     6.2 +++ b/xen/include/asm-x86/x86_32/page.h	Wed Oct 17 14:38:19 2007 +0100
     6.3 @@ -33,10 +33,10 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
     6.4      (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB)
     6.5  
     6.6  /*
     6.7 - * Disallow unused flag bits plus PAT, PSE and GLOBAL.
     6.8 + * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
     6.9   * Permit the NX bit if the hardware supports it.
    6.10   */
    6.11 -#define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX)
    6.12 +#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
    6.13  
    6.14  #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
    6.15  #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
     7.1 --- a/xen/include/asm-x86/x86_64/page.h	Wed Oct 17 13:12:03 2007 +0100
     7.2 +++ b/xen/include/asm-x86/x86_64/page.h	Wed Oct 17 14:38:19 2007 +0100
     7.3 @@ -105,18 +105,18 @@ typedef l4_pgentry_t root_pgentry_t;
     7.4  #define _PAGE_NX     (cpu_has_nx ? _PAGE_NX_BIT : 0U)
     7.5  
     7.6  /*
     7.7 - * Disallow unused flag bits plus PAT, PSE and GLOBAL.
     7.8 + * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
     7.9   * Permit the NX bit if the hardware supports it.
    7.10   * Note that range [62:52] is available for software use on x86/64.
    7.11   */
    7.12 -#define BASE_DISALLOW_MASK (0xFF800180U & ~_PAGE_NX)
    7.13 +#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
    7.14  
    7.15  #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
    7.16  #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
    7.17  #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK)
    7.18  #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK)
    7.19  
    7.20 -#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1E6U
    7.21 +#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1FEU
    7.22  
    7.23  #define PAGE_HYPERVISOR         (__PAGE_HYPERVISOR         | _PAGE_GLOBAL)
    7.24  #define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)