ia64/xen-unstable

changeset 12254:5ec45b464563

[XEN] Avoid taking domain biglock in the page-fault handler.
This avoids deadlock situation with the shadow_lock.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@localhost.localdomain
date Sat Nov 04 19:26:29 2006 +0000 (2006-11-04)
parents 13ea4bea8237
children cf8e65797826
files xen/arch/x86/traps.c xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/x86_32/page-2level.h xen/include/asm-x86/x86_32/page-3level.h xen/include/asm-x86/x86_64/page.h
line diff
     1.1 --- a/xen/arch/x86/traps.c	Fri Nov 03 16:51:28 2006 +0000
     1.2 +++ b/xen/arch/x86/traps.c	Sat Nov 04 19:26:29 2006 +0000
     1.3 @@ -704,12 +704,6 @@ void propagate_page_fault(unsigned long 
     1.4  static int handle_gdt_ldt_mapping_fault(
     1.5      unsigned long offset, struct cpu_user_regs *regs)
     1.6  {
     1.7 -    extern int map_ldt_shadow_page(unsigned int);
     1.8 -
     1.9 -    struct vcpu *v = current;
    1.10 -    struct domain *d  = v->domain;
    1.11 -    int ret;
    1.12 -
    1.13      /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
    1.14      unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
    1.15      unsigned int vcpu_area   = (offset >> GDT_LDT_VCPU_VA_SHIFT);
    1.16 @@ -723,18 +717,15 @@ static int handle_gdt_ldt_mapping_fault(
    1.17      if ( likely(is_ldt_area) )
    1.18      {
    1.19          /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
    1.20 -        LOCK_BIGLOCK(d);
    1.21 -        ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
    1.22 -        UNLOCK_BIGLOCK(d);
    1.23 -
    1.24 -        if ( unlikely(ret == 0) )
    1.25 +        if ( unlikely(map_ldt_shadow_page(offset >> PAGE_SHIFT) == 0) )
    1.26          {
    1.27              /* In hypervisor mode? Leave it to the #PF handler to fix up. */
    1.28              if ( !guest_mode(regs) )
    1.29                  return 0;
    1.30              /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
    1.31              propagate_page_fault(
    1.32 -                v->arch.guest_context.ldt_base + offset, regs->error_code);
    1.33 +                current->arch.guest_context.ldt_base + offset,
    1.34 +                regs->error_code);
    1.35          }
    1.36      }
    1.37      else
    1.38 @@ -787,7 +778,7 @@ static int __spurious_page_fault(
    1.39  
    1.40  #if CONFIG_PAGING_LEVELS >= 4
    1.41      l4t = map_domain_page(mfn);
    1.42 -    l4e = l4t[l4_table_offset(addr)];
    1.43 +    l4e = l4e_read_atomic(&l4t[l4_table_offset(addr)]);
    1.44      mfn = l4e_get_pfn(l4e);
    1.45      unmap_domain_page(l4t);
    1.46      if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
    1.47 @@ -800,7 +791,7 @@ static int __spurious_page_fault(
    1.48  #ifdef CONFIG_X86_PAE
    1.49      l3t += (cr3 & 0xFE0UL) >> 3;
    1.50  #endif
    1.51 -    l3e = l3t[l3_table_offset(addr)];
    1.52 +    l3e = l3e_read_atomic(&l3t[l3_table_offset(addr)]);
    1.53      mfn = l3e_get_pfn(l3e);
    1.54      unmap_domain_page(l3t);
    1.55  #ifdef CONFIG_X86_PAE
    1.56 @@ -814,7 +805,7 @@ static int __spurious_page_fault(
    1.57  #endif
    1.58  
    1.59      l2t = map_domain_page(mfn);
    1.60 -    l2e = l2t[l2_table_offset(addr)];
    1.61 +    l2e = l2e_read_atomic(&l2t[l2_table_offset(addr)]);
    1.62      mfn = l2e_get_pfn(l2e);
    1.63      unmap_domain_page(l2t);
    1.64      if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
    1.65 @@ -827,7 +818,7 @@ static int __spurious_page_fault(
    1.66      }
    1.67  
    1.68      l1t = map_domain_page(mfn);
    1.69 -    l1e = l1t[l1_table_offset(addr)];
    1.70 +    l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
    1.71      mfn = l1e_get_pfn(l1e);
    1.72      unmap_domain_page(l1t);
    1.73      if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
    1.74 @@ -856,12 +847,16 @@ static int __spurious_page_fault(
    1.75  static int spurious_page_fault(
    1.76      unsigned long addr, struct cpu_user_regs *regs)
    1.77  {
    1.78 -    struct domain *d = current->domain;
    1.79 -    int            is_spurious;
    1.80 +    unsigned long flags;
    1.81 +    int           is_spurious;
    1.82  
    1.83 -    LOCK_BIGLOCK(d);
    1.84 +    /*
    1.85 +     * Disabling interrupts prevents TLB flushing, and hence prevents
    1.86 +     * page tables from becoming invalid under our feet during the walk.
    1.87 +     */
    1.88 +    local_irq_save(flags);
    1.89      is_spurious = __spurious_page_fault(addr, regs);
    1.90 -    UNLOCK_BIGLOCK(d);
    1.91 +    local_irq_restore(flags);
    1.92  
    1.93      return is_spurious;
    1.94  }
    1.95 @@ -878,11 +873,7 @@ static int fixup_page_fault(unsigned lon
    1.96          if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
    1.97              return handle_gdt_ldt_mapping_fault(
    1.98                  addr - GDT_LDT_VIRT_START, regs);
    1.99 -        /*
   1.100 -         * Do not propagate spurious faults in the hypervisor area to the
   1.101 -         * guest. It cannot fix them up.
   1.102 -         */
   1.103 -        return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
   1.104 +        return 0;
   1.105      }
   1.106  
   1.107      if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
     2.1 --- a/xen/include/asm-x86/mm.h	Fri Nov 03 16:51:28 2006 +0000
     2.2 +++ b/xen/include/asm-x86/mm.h	Sat Nov 04 19:26:29 2006 +0000
     2.3 @@ -179,8 +179,8 @@ void init_frametable(void);
     2.4  
     2.5  int alloc_page_type(struct page_info *page, unsigned long type);
     2.6  void free_page_type(struct page_info *page, unsigned long type);
     2.7 -extern void invalidate_shadow_ldt(struct vcpu *d);
     2.8 -extern int _shadow_mode_refcounts(struct domain *d);
     2.9 +void invalidate_shadow_ldt(struct vcpu *d);
    2.10 +int _shadow_mode_refcounts(struct domain *d);
    2.11  
    2.12  static inline void put_page(struct page_info *page)
    2.13  {
    2.14 @@ -385,4 +385,6 @@ long subarch_memory_op(int op, XEN_GUEST
    2.15  int steal_page(
    2.16      struct domain *d, struct page_info *page, unsigned int memflags);
    2.17  
    2.18 +int map_ldt_shadow_page(unsigned int);
    2.19 +
    2.20  #endif /* __ASM_X86_MM_H__ */
     3.1 --- a/xen/include/asm-x86/page.h	Fri Nov 03 16:51:28 2006 +0000
     3.2 +++ b/xen/include/asm-x86/page.h	Sat Nov 04 19:26:29 2006 +0000
     3.3 @@ -25,6 +25,18 @@
     3.4  # include <asm/x86_64/page.h>
     3.5  #endif
     3.6  
     3.7 +/* Read a pte atomically from memory. */
     3.8 +#define l1e_read_atomic(l1ep) l1e_from_intpte(pte_read_atomic(l1ep))
     3.9 +#define l2e_read_atomic(l2ep) l2e_from_intpte(pte_read_atomic(l2ep))
    3.10 +#define l3e_read_atomic(l3ep) l3e_from_intpte(pte_read_atomic(l3ep))
    3.11 +#define l4e_read_atomic(l4ep) l4e_from_intpte(pte_read_atomic(l4ep))
    3.12 +
    3.13 +/* Write a pte atomically to memory. */
    3.14 +#define l1e_write_atomic(l1ep, l1e) pte_write_atomic(l1ep, l1e_get_intpte(l1e))
    3.15 +#define l2e_write_atomic(l2ep, l2e) pte_write_atomic(l2ep, l1e_get_intpte(l2e))
    3.16 +#define l3e_write_atomic(l3ep, l3e) pte_write_atomic(l3ep, l1e_get_intpte(l3e))
    3.17 +#define l4e_write_atomic(l4ep, l4e) pte_write_atomic(l4ep, l1e_get_intpte(l4e))
    3.18 +
    3.19  /* Get direct integer representation of a pte's contents (intpte_t). */
    3.20  #define l1e_get_intpte(x)          ((x).l1)
    3.21  #define l2e_get_intpte(x)          ((x).l2)
     4.1 --- a/xen/include/asm-x86/x86_32/page-2level.h	Fri Nov 03 16:51:28 2006 +0000
     4.2 +++ b/xen/include/asm-x86/x86_32/page-2level.h	Sat Nov 04 19:26:29 2006 +0000
     4.3 @@ -28,6 +28,9 @@ typedef l2_pgentry_t root_pgentry_t;
     4.4  
     4.5  #endif /* !__ASSEMBLY__ */
     4.6  
     4.7 +#define pte_read_atomic(ptep)       (*(intpte_t *)(ptep))
     4.8 +#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte))
     4.9 +
    4.10  /* root table */
    4.11  #define root_get_pfn              l2e_get_pfn
    4.12  #define root_get_flags            l2e_get_flags
     5.1 --- a/xen/include/asm-x86/x86_32/page-3level.h	Fri Nov 03 16:51:28 2006 +0000
     5.2 +++ b/xen/include/asm-x86/x86_32/page-3level.h	Sat Nov 04 19:26:29 2006 +0000
     5.3 @@ -38,6 +38,17 @@ typedef l3_pgentry_t root_pgentry_t;
     5.4  
     5.5  #endif /* !__ASSEMBLY__ */
     5.6  
     5.7 +#define pte_read_atomic(ptep) ({                                            \
     5.8 +    intpte_t __pte = *(intpte_t *)(ptep), __npte;                           \
     5.9 +    while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, __pte)) != __pte ) \
    5.10 +        __pte = __npte;                                                     \
    5.11 +    __pte; })
    5.12 +#define pte_write_atomic(ptep, pte) do {                                    \
    5.13 +    intpte_t __pte = *(intpte_t *)(ptep), __npte;                           \
    5.14 +    while ( (__npte = cmpxchg((intpte_t *)(ptep), __pte, (pte))) != __pte ) \
    5.15 +        __pte = __npte;                                                     \
    5.16 +} while ( 0 )
    5.17 +
    5.18  /* root table */
    5.19  #define root_get_pfn              l3e_get_pfn
    5.20  #define root_get_flags            l3e_get_flags
     6.1 --- a/xen/include/asm-x86/x86_64/page.h	Fri Nov 03 16:51:28 2006 +0000
     6.2 +++ b/xen/include/asm-x86/x86_64/page.h	Sat Nov 04 19:26:29 2006 +0000
     6.3 @@ -41,6 +41,9 @@ typedef l4_pgentry_t root_pgentry_t;
     6.4  
     6.5  #endif /* !__ASSEMBLY__ */
     6.6  
     6.7 +#define pte_read_atomic(ptep)       (*(intpte_t *)(ptep))
     6.8 +#define pte_write_atomic(ptep, pte) (*(intpte_t *)(ptep)) = (pte))
     6.9 +
    6.10  /* Given a virtual address, get an entry offset into a linear page table. */
    6.11  #define l1_linear_offset(_a) (((_a) & VADDR_MASK) >> L1_PAGETABLE_SHIFT)
    6.12  #define l2_linear_offset(_a) (((_a) & VADDR_MASK) >> L2_PAGETABLE_SHIFT)