ia64/xen-unstable

changeset 10209:d5f98d23427a

Fix the support for PAE pgdirs above 4GB that was introduced in
changeset 10173:954f4dea9da6336aaa35d0706aed55fde7909644.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue May 30 11:44:23 2006 +0100 (2006-05-30)
parents 6d476981e3a5
children 1dd2062668b2
files xen/arch/x86/domain.c xen/arch/x86/mm.c xen/include/asm-x86/domain.h xen/include/asm-x86/fixmap.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Sun May 28 15:49:17 2006 +0100
     1.2 +++ b/xen/arch/x86/domain.c	Tue May 30 11:44:23 2006 +0100
     1.3 @@ -146,6 +146,8 @@ struct vcpu *alloc_vcpu_struct(struct do
     1.4      v->arch.guest_vl4table = __linear_l4_table;
     1.5  #endif
     1.6  
     1.7 +    pae_l3_cache_init(&v->arch.pae_l3_cache);
     1.8 +
     1.9      return v;
    1.10  }
    1.11  
     2.1 --- a/xen/arch/x86/mm.c	Sun May 28 15:49:17 2006 +0100
     2.2 +++ b/xen/arch/x86/mm.c	Tue May 30 11:44:23 2006 +0100
     2.3 @@ -260,39 +260,79 @@ void share_xen_page_with_privileged_gues
     2.4      share_xen_page_with_guest(page, dom_xen, readonly);
     2.5  }
     2.6  
     2.7 +#if defined(CONFIG_X86_PAE)
     2.8 +
     2.9 +#ifdef NDEBUG
    2.10 +/* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
    2.11 +#define l3tab_needs_shadow(mfn) (mfn >= 0x100000)
    2.12 +#else
    2.13 +/* In debug builds we aggressively shadow PDPTs to exercise code paths. */
    2.14 +#define l3tab_needs_shadow(mfn) ((mfn << PAGE_SHIFT) != __pa(idle_pg_table))
    2.15 +#endif
    2.16 +
    2.17 +static l1_pgentry_t *fix_pae_highmem_pl1e;
    2.18 +
    2.19 +/* Cache the address of PAE high-memory fixmap page tables. */
    2.20 +static int __init cache_pae_fixmap_address(void)
    2.21 +{
    2.22 +    unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
    2.23 +    l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
    2.24 +    fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
    2.25 +    return 0;
    2.26 +}
    2.27 +__initcall(cache_pae_fixmap_address);
    2.28 +
    2.29  static void __write_ptbase(unsigned long mfn)
    2.30  {
    2.31 -#ifdef CONFIG_X86_PAE
    2.32 -    if ( mfn >= 0x100000 )
    2.33 +    l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
    2.34 +    struct pae_l3_cache *cache = &current->arch.pae_l3_cache;
    2.35 +    unsigned int cpu = smp_processor_id();
    2.36 +
    2.37 +    /* Fast path 1: does this mfn need a shadow at all? */
    2.38 +    if ( !l3tab_needs_shadow(mfn) )
    2.39      {
    2.40 -        l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
    2.41 -        struct vcpu *v = current;
    2.42 -        unsigned long flags;
    2.43 -
    2.44 -        /* Protects against re-entry and against __pae_flush_pgd(). */
    2.45 -        local_irq_save(flags);
    2.46 -
    2.47 -        /* Pick an unused low-memory L3 cache slot. */
    2.48 -        v->arch.lowmem_l3tab_inuse ^= 1;
    2.49 -        lowmem_l3tab = v->arch.lowmem_l3tab[v->arch.lowmem_l3tab_inuse];
    2.50 -        v->arch.lowmem_l3tab_high_mfn[v->arch.lowmem_l3tab_inuse] = mfn;
    2.51 -
    2.52 -        /* Map the guest L3 table and copy to the chosen low-memory cache. */
    2.53 -        highmem_l3tab = map_domain_page(mfn);
    2.54 -        memcpy(lowmem_l3tab, highmem_l3tab, sizeof(v->arch.lowmem_l3tab));
    2.55 -        unmap_domain_page(highmem_l3tab);
    2.56 -
    2.57 -        /* Install the low-memory L3 table in CR3. */
    2.58 -        write_cr3(__pa(lowmem_l3tab));
    2.59 -
    2.60 -        local_irq_restore(flags);
    2.61 +        write_cr3(mfn << PAGE_SHIFT);
    2.62          return;
    2.63      }
    2.64 -#endif
    2.65 -
    2.66 +
    2.67 +    /* Caching logic is not interrupt safe. */
    2.68 +    ASSERT(!in_irq());
    2.69 +
    2.70 +    /* Fast path 2: is this mfn already cached? */
    2.71 +    if ( cache->high_mfn == mfn )
    2.72 +    {
    2.73 +        write_cr3(__pa(cache->table[cache->inuse_idx]));
    2.74 +        return;
    2.75 +    }
    2.76 +
    2.77 +    /* Protects against pae_flush_pgd(). */
    2.78 +    spin_lock(&cache->lock);
    2.79 +
    2.80 +    cache->inuse_idx ^= 1;
    2.81 +    cache->high_mfn   = mfn;
    2.82 +
    2.83 +    /* Map the guest L3 table and copy to the chosen low-memory cache. */
    2.84 +    *(fix_pae_highmem_pl1e - cpu) = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
    2.85 +    highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
    2.86 +    lowmem_l3tab  = cache->table[cache->inuse_idx];
    2.87 +    memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
    2.88 +    *(fix_pae_highmem_pl1e - cpu) = l1e_empty();
    2.89 +
    2.90 +    /* Install the low-memory L3 table in CR3. */
    2.91 +    write_cr3(__pa(lowmem_l3tab));
    2.92 +
    2.93 +    spin_unlock(&cache->lock);
    2.94 +}
    2.95 +
    2.96 +#else /* !CONFIG_X86_PAE */
    2.97 +
    2.98 +static void __write_ptbase(unsigned long mfn)
    2.99 +{
   2.100      write_cr3(mfn << PAGE_SHIFT);
   2.101  }
   2.102  
   2.103 +#endif /* !CONFIG_X86_PAE */
   2.104 +
   2.105  void write_ptbase(struct vcpu *v)
   2.106  {
   2.107      __write_ptbase(pagetable_get_pfn(v->arch.monitor_table));
   2.108 @@ -804,48 +844,39 @@ static int create_pae_xen_mappings(l3_pg
   2.109      return 1;
   2.110  }
   2.111  
   2.112 -struct pae_flush_pgd {
   2.113 -    unsigned long l3tab_mfn;
   2.114 -    unsigned int  l3tab_idx;
   2.115 -    l3_pgentry_t  nl3e;
   2.116 -};
   2.117 -
   2.118 -static void __pae_flush_pgd(void *data)
   2.119 -{
   2.120 -    struct pae_flush_pgd *args = data;
   2.121 -    struct vcpu *v = this_cpu(curr_vcpu);
   2.122 -    int i = v->arch.lowmem_l3tab_inuse;
   2.123 -    intpte_t _ol3e, _nl3e, _pl3e;
   2.124 -    l3_pgentry_t *l3tab_ptr;
   2.125 -
   2.126 -    ASSERT(!local_irq_is_enabled());
   2.127 -
   2.128 -    if ( v->arch.lowmem_l3tab_high_mfn[i] != args->l3tab_mfn )
   2.129 -        return;
   2.130 -
   2.131 -    l3tab_ptr = &v->arch.lowmem_l3tab[i][args->l3tab_idx];
   2.132 -
   2.133 -    _ol3e = l3e_get_intpte(*l3tab_ptr);
   2.134 -    _nl3e = l3e_get_intpte(args->nl3e);
   2.135 -    _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
   2.136 -    BUG_ON(_pl3e != _ol3e);
   2.137 -}
   2.138 -
   2.139  /* Flush a pgdir update into low-memory caches. */
   2.140  static void pae_flush_pgd(
   2.141      unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
   2.142  {
   2.143      struct domain *d = page_get_owner(mfn_to_page(mfn));
   2.144 -    struct pae_flush_pgd args = {
   2.145 -        .l3tab_mfn = mfn,
   2.146 -        .l3tab_idx = idx,
   2.147 -        .nl3e      = nl3e };
   2.148 +    struct vcpu   *v;
   2.149 +    intpte_t       _ol3e, _nl3e, _pl3e;
   2.150 +    l3_pgentry_t  *l3tab_ptr;
   2.151 +    struct pae_l3_cache *cache;
   2.152  
   2.153      /* If below 4GB then the pgdir is not shadowed in low memory. */
   2.154 -    if ( mfn < 0x100000 )
   2.155 +    if ( !l3tab_needs_shadow(mfn) )
   2.156          return;
   2.157  
   2.158 -    on_selected_cpus(d->domain_dirty_cpumask, __pae_flush_pgd, &args, 1, 1);
   2.159 +    for_each_vcpu ( d, v )
   2.160 +    {
   2.161 +        cache = &v->arch.pae_l3_cache;
   2.162 +
   2.163 +        spin_lock(&cache->lock);
   2.164 +
   2.165 +        if ( cache->high_mfn == mfn )
   2.166 +        {
   2.167 +            l3tab_ptr = &cache->table[cache->inuse_idx][idx];
   2.168 +            _ol3e = l3e_get_intpte(*l3tab_ptr);
   2.169 +            _nl3e = l3e_get_intpte(nl3e);
   2.170 +            _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
   2.171 +            BUG_ON(_pl3e != _ol3e);
   2.172 +        }
   2.173 +
   2.174 +        spin_unlock(&cache->lock);
   2.175 +    }
   2.176 +
   2.177 +    flush_tlb_mask(d->domain_dirty_cpumask);
   2.178  }
   2.179  
   2.180  static inline int l1_backptr(
   2.181 @@ -3708,11 +3739,10 @@ int map_pages_to_xen(
   2.182  }
   2.183  
   2.184  void __set_fixmap(
   2.185 -    enum fixed_addresses idx, unsigned long p, unsigned long flags)
   2.186 +    enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
   2.187  {
   2.188 -    if ( unlikely(idx >= __end_of_fixed_addresses) )
   2.189 -        BUG();
   2.190 -    map_pages_to_xen(fix_to_virt(idx), p >> PAGE_SHIFT, 1, flags);
   2.191 +    BUG_ON(idx >= __end_of_fixed_addresses);
   2.192 +    map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
   2.193  }
   2.194  
   2.195  #ifdef MEMORY_GUARD
     3.1 --- a/xen/include/asm-x86/domain.h	Sun May 28 15:49:17 2006 +0100
     3.2 +++ b/xen/include/asm-x86/domain.h	Tue May 30 11:44:23 2006 +0100
     3.3 @@ -114,23 +114,32 @@ struct arch_domain
     3.4      unsigned long first_reserved_pfn;
     3.5  } __cacheline_aligned;
     3.6  
     3.7 +#ifdef CONFIG_X86_PAE
     3.8 +struct pae_l3_cache {
     3.9 +    /*
    3.10 +     * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
    3.11 +     * supplies a >=4GB PAE L3 table. We need two because we cannot set up
    3.12 +     * an L3 table while we are currently running on it (without using
    3.13 +     * expensive atomic 64-bit operations).
    3.14 +     */
    3.15 +    l3_pgentry_t  table[2][4] __attribute__((__aligned__(32)));
    3.16 +    unsigned long high_mfn;  /* The >=4GB MFN being shadowed. */
    3.17 +    unsigned int  inuse_idx; /* Which of the two cache slots is in use? */
    3.18 +    spinlock_t    lock;
    3.19 +};
    3.20 +#define pae_l3_cache_init(c) spin_lock_init(&(c)->lock)
    3.21 +#else /* !CONFIG_X86_PAE */
    3.22 +struct pae_l3_cache { };
    3.23 +#define pae_l3_cache_init(c) ((void)0)
    3.24 +#endif
    3.25 +
    3.26  struct arch_vcpu
    3.27  {
    3.28      /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */
    3.29      struct vcpu_guest_context guest_context
    3.30      __attribute__((__aligned__(16)));
    3.31  
    3.32 -#ifdef CONFIG_X86_PAE
    3.33 -    /*
    3.34 -     * Two low-memory (<4GB) PAE L3 tables, used as fallback when the guest
    3.35 -     * supplies a >=4GB PAE L3 table. We need two because we cannot set up
    3.36 -     * an L3 table while we are currently running on it (without using
    3.37 -     * expensive atomic 64-bit operations).
    3.38 -     */
    3.39 -    l3_pgentry_t  lowmem_l3tab[2][4] __attribute__((__aligned__(32)));
    3.40 -    unsigned long lowmem_l3tab_high_mfn[2]; /* The >=4GB MFN being shadowed. */
    3.41 -    unsigned int  lowmem_l3tab_inuse;       /* Which lowmem_l3tab is in use? */
    3.42 -#endif
    3.43 +    struct pae_l3_cache pae_l3_cache;
    3.44  
    3.45      unsigned long      flags; /* TF_ */
    3.46  
     4.1 --- a/xen/include/asm-x86/fixmap.h	Sun May 28 15:49:17 2006 +0100
     4.2 +++ b/xen/include/asm-x86/fixmap.h	Tue May 30 11:44:23 2006 +0100
     4.3 @@ -25,6 +25,10 @@
     4.4   * from the end of virtual memory backwards.
     4.5   */
     4.6  enum fixed_addresses {
     4.7 +#ifdef CONFIG_X86_PAE
     4.8 +    FIX_PAE_HIGHMEM_0,
     4.9 +    FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
    4.10 +#endif
    4.11      FIX_APIC_BASE,
    4.12      FIX_IO_APIC_BASE_0,
    4.13      FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
    4.14 @@ -40,13 +44,13 @@ enum fixed_addresses {
    4.15  #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
    4.16  
    4.17  extern void __set_fixmap(
    4.18 -    enum fixed_addresses idx, unsigned long p, unsigned long flags);
    4.19 +    enum fixed_addresses idx, unsigned long mfn, unsigned long flags);
    4.20  
    4.21  #define set_fixmap(idx, phys) \
    4.22 -    __set_fixmap(idx, phys, PAGE_HYPERVISOR)
    4.23 +    __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR)
    4.24  
    4.25  #define set_fixmap_nocache(idx, phys) \
    4.26 -    __set_fixmap(idx, phys, PAGE_HYPERVISOR_NOCACHE)
    4.27 +    __set_fixmap(idx, (phys)>>PAGE_SHIFT, PAGE_HYPERVISOR_NOCACHE)
    4.28  
    4.29  #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
    4.30  #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)