ia64/xen-unstable

changeset 9448:d78dedc4831f

Detect spurious faults taken in the hypervisor that are
due to writable pagetable logic.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Mar 24 12:14:58 2006 +0100 (2006-03-24)
parents dfbf0939350c
children 905cfaa5986b
files xen/arch/x86/mm.c xen/arch/x86/traps.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/traps.c xen/include/asm-x86/processor.h
line diff
     1.1 --- a/xen/arch/x86/mm.c	Fri Mar 24 10:59:31 2006 +0100
     1.2 +++ b/xen/arch/x86/mm.c	Fri Mar 24 12:14:58 2006 +0100
     1.3 @@ -3351,8 +3351,9 @@ int ptwr_do_page_fault(struct domain *d,
     1.4       * permissions in page directories by writing back to the linear mapping.
     1.5       */
     1.6      if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS )
     1.7 -        return !__put_user(
     1.8 -            pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1);
     1.9 +        return __put_user(
    1.10 +            pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1) ?
    1.11 +            0 : EXCRET_not_a_fault;
    1.12  
    1.13      /* We are looking only for read-only mappings of p.t. pages. */
    1.14      if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) ||
     2.1 --- a/xen/arch/x86/traps.c	Fri Mar 24 10:59:31 2006 +0100
     2.2 +++ b/xen/arch/x86/traps.c	Fri Mar 24 12:14:58 2006 +0100
     2.3 @@ -620,6 +620,46 @@ static int fixup_page_fault(unsigned lon
     2.4      return 0;
     2.5  }
     2.6  
     2.7 +static int spurious_page_fault(unsigned long addr, struct cpu_user_regs *regs)
     2.8 +{
     2.9 +    struct vcpu   *v = current;
    2.10 +    struct domain *d = v->domain;
    2.11 +    int            rc;
    2.12 +
    2.13 +    /*
    2.14 +     * The only possible reason for a spurious page fault not to be picked
    2.15 +     * up already is that a page directory was unhooked by writable page table
    2.16 +     * logic and then reattached before the faulting VCPU could detect it.
    2.17 +     */
    2.18 +    if ( is_idle_domain(d) ||               /* no ptwr in idle domain       */
    2.19 +         IN_HYPERVISOR_RANGE(addr) ||       /* no ptwr on hypervisor addrs  */
    2.20 +         shadow_mode_enabled(d) ||          /* no ptwr logic in shadow mode */
    2.21 +         ((regs->error_code & 0x1d) != 0) ) /* simple not-present fault?    */
    2.22 +        return 0;
    2.23 +
    2.24 +    LOCK_BIGLOCK(d);
    2.25 +
    2.26 +    /*
    2.27 +     * The page directory could have been detached again while we weren't
    2.28 +     * holding the per-domain lock. Detect that and fix up if it's the case.
    2.29 +     */
    2.30 +    if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
    2.31 +         unlikely(l2_linear_offset(addr) ==
    2.32 +                  d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
    2.33 +    {
    2.34 +        ptwr_flush(d, PTWR_PT_ACTIVE);
    2.35 +        rc = 1;
    2.36 +    }
    2.37 +    else
    2.38 +    {
    2.39 +        /* Okay, walk the page tables. Only check for not-present faults.*/
    2.40 +        rc = __spurious_page_fault(addr);
    2.41 +    }
    2.42 +
    2.43 +    UNLOCK_BIGLOCK(d);
    2.44 +    return rc;
    2.45 +}
    2.46 +
    2.47  /*
    2.48   * #PF error code:
    2.49   *  Bit 0: Protection violation (=1) ; Page not present (=0)
    2.50 @@ -644,6 +684,13 @@ asmlinkage int do_page_fault(struct cpu_
    2.51  
    2.52      if ( unlikely(!guest_mode(regs)) )
    2.53      {
    2.54 +        if ( spurious_page_fault(addr, regs) )
    2.55 +        {
    2.56 +            DPRINTK("Spurious fault in domain %u:%u at addr %lx\n",
    2.57 +                    current->domain->domain_id, current->vcpu_id, addr);
    2.58 +            return EXCRET_not_a_fault;
    2.59 +        }
    2.60 +
    2.61          if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
    2.62          {
    2.63              perfc_incrc(copy_user_faults);
     3.1 --- a/xen/arch/x86/x86_32/traps.c	Fri Mar 24 10:59:31 2006 +0100
     3.2 +++ b/xen/arch/x86/x86_32/traps.c	Fri Mar 24 12:14:58 2006 +0100
     3.3 @@ -70,38 +70,77 @@ void show_registers(struct cpu_user_regs
     3.4  
     3.5  void show_page_walk(unsigned long addr)
     3.6  {
     3.7 -    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
     3.8 -    intpte_t *ptab, ent;
     3.9 -    unsigned long pfn; 
    3.10 +    unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
    3.11 +#ifdef CONFIG_X86_PAE
    3.12 +    l3_pgentry_t l3e, *l3t;
    3.13 +#endif
    3.14 +    l2_pgentry_t l2e, *l2t;
    3.15 +    l1_pgentry_t l1e, *l1t;
    3.16  
    3.17      printk("Pagetable walk from %08lx:\n", addr);
    3.18  
    3.19  #ifdef CONFIG_X86_PAE
    3.20 -    ptab = map_domain_page(mfn);
    3.21 -    ent  = ptab[l3_table_offset(addr)];
    3.22 -    pfn  = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT)); 
    3.23 -    printk(" L3 = %"PRIpte" %08lx\n", ent, pfn);
    3.24 -    unmap_domain_page(ptab);
    3.25 -    if ( !(ent & _PAGE_PRESENT) )
    3.26 +    l3t = map_domain_page(mfn);
    3.27 +    l3e = l3t[l3_table_offset(addr)];
    3.28 +    mfn = l3e_get_pfn(l3e);
    3.29 +    pfn = get_gpfn_from_mfn(mfn);
    3.30 +    printk(" L3 = %"PRIpte" %08lx\n", l3e_get_intpte(l3e), pfn);
    3.31 +    unmap_domain_page(l3t);
    3.32 +    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
    3.33          return;
    3.34 -    mfn = ent >> PAGE_SHIFT;
    3.35  #endif
    3.36  
    3.37 -    ptab = map_domain_page(mfn);
    3.38 -    ent  = ptab[l2_table_offset(addr)];
    3.39 -    pfn  = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
    3.40 -    printk("  L2 = %"PRIpte" %08lx %s\n", ent, pfn, 
    3.41 -           (ent & _PAGE_PSE) ? "(PSE)" : "");
    3.42 -    unmap_domain_page(ptab);
    3.43 -    if ( !(ent & _PAGE_PRESENT) || (ent & _PAGE_PSE) )
    3.44 +    l2t = map_domain_page(mfn);
    3.45 +    l2e = l2t[l2_table_offset(addr)];
    3.46 +    mfn = l2e_get_pfn(l2e);
    3.47 +    pfn = get_gpfn_from_mfn(mfn);
    3.48 +    printk("  L2 = %"PRIpte" %08lx %s\n", l2e_get_intpte(l2e), pfn, 
    3.49 +           (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
    3.50 +    unmap_domain_page(l2t);
    3.51 +    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
    3.52 +         (l2e_get_flags(l2e) & _PAGE_PSE) )
    3.53          return;
    3.54 -    mfn = ent >> PAGE_SHIFT;
    3.55  
    3.56 -    ptab = map_domain_page(ent >> PAGE_SHIFT);
    3.57 -    ent  = ptab[l1_table_offset(addr)];
    3.58 -    pfn  = get_gpfn_from_mfn((u32)(ent >> PAGE_SHIFT));
    3.59 -    printk("   L1 = %"PRIpte" %08lx\n", ent, pfn);
    3.60 -    unmap_domain_page(ptab);
    3.61 +    l1t = map_domain_page(mfn);
    3.62 +    l1e = l1t[l1_table_offset(addr)];
    3.63 +    mfn = l1e_get_pfn(l1e);
    3.64 +    pfn = get_gpfn_from_mfn(mfn);
    3.65 +    printk("   L1 = %"PRIpte" %08lx\n", l1e_get_intpte(l1e), pfn);
    3.66 +    unmap_domain_page(l1t);
    3.67 +}
    3.68 +
    3.69 +int __spurious_page_fault(unsigned long addr)
    3.70 +{
    3.71 +    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
    3.72 +#ifdef CONFIG_X86_PAE
    3.73 +    l3_pgentry_t l3e, *l3t;
    3.74 +#endif
    3.75 +    l2_pgentry_t l2e, *l2t;
    3.76 +    l1_pgentry_t l1e, *l1t;
    3.77 +
    3.78 +#ifdef CONFIG_X86_PAE
    3.79 +    l3t = map_domain_page(mfn);
    3.80 +    l3e = l3t[l3_table_offset(addr)];
    3.81 +    mfn = l3e_get_pfn(l3e);
    3.82 +    unmap_domain_page(l3t);
    3.83 +    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
    3.84 +        return 0;
    3.85 +#endif
    3.86 +
    3.87 +    l2t = map_domain_page(mfn);
    3.88 +    l2e = l2t[l2_table_offset(addr)];
    3.89 +    mfn = l2e_get_pfn(l2e);
    3.90 +    unmap_domain_page(l2t);
    3.91 +    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
    3.92 +        return 0;
    3.93 +    if ( l2e_get_flags(l2e) & _PAGE_PSE )
    3.94 +        return 1;
    3.95 +
    3.96 +    l1t = map_domain_page(mfn);
    3.97 +    l1e = l1t[l1_table_offset(addr)];
    3.98 +    mfn = l1e_get_pfn(l1e);
    3.99 +    unmap_domain_page(l1t);
   3.100 +    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
   3.101  }
   3.102  
   3.103  #define DOUBLEFAULT_STACK_SIZE 1024
     4.1 --- a/xen/arch/x86/x86_64/traps.c	Fri Mar 24 10:59:31 2006 +0100
     4.2 +++ b/xen/arch/x86/x86_64/traps.c	Fri Mar 24 12:14:58 2006 +0100
     4.3 @@ -70,31 +70,79 @@ void show_registers(struct cpu_user_regs
     4.4  
     4.5  void show_page_walk(unsigned long addr)
     4.6  {
     4.7 -    unsigned long page = read_cr3();
     4.8 -    
     4.9 +    unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
    4.10 +    l4_pgentry_t l4e, *l4t;
    4.11 +    l3_pgentry_t l3e, *l3t;
    4.12 +    l2_pgentry_t l2e, *l2t;
    4.13 +    l1_pgentry_t l1e, *l1t;
    4.14 +
    4.15      printk("Pagetable walk from %016lx:\n", addr);
    4.16  
    4.17 -    page &= PAGE_MASK;
    4.18 -    page = ((unsigned long *) __va(page))[l4_table_offset(addr)];
    4.19 -    printk(" L4 = %016lx\n", page);
    4.20 -    if ( !(page & _PAGE_PRESENT) )
    4.21 +    l4t = mfn_to_virt(mfn);
    4.22 +    l4e = l4t[l4_table_offset(addr)];
    4.23 +    mfn = l4e_get_pfn(l4e);
    4.24 +    pfn = get_gpfn_from_mfn(mfn);
    4.25 +    printk(" L4 = %"PRIpte" %016lx\n", l4e_get_intpte(l4e), pfn);
    4.26 +    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
    4.27          return;
    4.28  
    4.29 -    page &= PAGE_MASK;
    4.30 -    page = ((unsigned long *) __va(page))[l3_table_offset(addr)];
    4.31 -    printk("  L3 = %016lx\n", page);
    4.32 -    if ( !(page & _PAGE_PRESENT) )
    4.33 +    l3t = mfn_to_virt(mfn);
    4.34 +    l3e = l3t[l3_table_offset(addr)];
    4.35 +    mfn = l3e_get_pfn(l3e);
    4.36 +    pfn = get_gpfn_from_mfn(mfn);
    4.37 +    printk("  L3 = %"PRIpte" %016lx\n", l3e_get_intpte(l3e), pfn);
    4.38 +    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
    4.39          return;
    4.40  
    4.41 -    page &= PAGE_MASK;
    4.42 -    page = ((unsigned long *) __va(page))[l2_table_offset(addr)];
    4.43 -    printk("   L2 = %016lx %s\n", page, (page & _PAGE_PSE) ? "(2MB)" : "");
    4.44 -    if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
    4.45 +    l2t = mfn_to_virt(mfn);
    4.46 +    l2e = l2t[l2_table_offset(addr)];
    4.47 +    mfn = l2e_get_pfn(l2e);
    4.48 +    pfn = get_gpfn_from_mfn(mfn);
    4.49 +    printk("   L2 = %"PRIpte" %016lx %s\n", l2e_get_intpte(l2e), pfn,
    4.50 +           (l2e_get_flags(l2e) & _PAGE_PSE) ? "(PSE)" : "");
    4.51 +    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
    4.52 +         (l2e_get_flags(l2e) & _PAGE_PSE) )
    4.53          return;
    4.54  
    4.55 -    page &= PAGE_MASK;
    4.56 -    page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
    4.57 -    printk("    L1 = %016lx\n", page);
    4.58 +    l1t = mfn_to_virt(mfn);
    4.59 +    l1e = l1t[l1_table_offset(addr)];
    4.60 +    mfn = l1e_get_pfn(l1e);
    4.61 +    pfn = get_gpfn_from_mfn(mfn);
    4.62 +    printk("    L1 = %"PRIpte" %016lx\n", l1e_get_intpte(l1e), pfn);
    4.63 +}
    4.64 +
    4.65 +int __spurious_page_fault(unsigned long addr)
    4.66 +{
    4.67 +    unsigned long mfn = read_cr3() >> PAGE_SHIFT;
    4.68 +    l4_pgentry_t l4e, *l4t;
    4.69 +    l3_pgentry_t l3e, *l3t;
    4.70 +    l2_pgentry_t l2e, *l2t;
    4.71 +    l1_pgentry_t l1e, *l1t;
    4.72 +
    4.73 +    l4t = mfn_to_virt(mfn);
    4.74 +    l4e = l4t[l4_table_offset(addr)];
    4.75 +    mfn = l4e_get_pfn(l4e);
    4.76 +    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
    4.77 +        return 0;
    4.78 +
    4.79 +    l3t = mfn_to_virt(mfn);
    4.80 +    l3e = l3t[l3_table_offset(addr)];
    4.81 +    mfn = l3e_get_pfn(l3e);
    4.82 +    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
    4.83 +        return 0;
    4.84 +
    4.85 +    l2t = mfn_to_virt(mfn);
    4.86 +    l2e = l2t[l2_table_offset(addr)];
    4.87 +    mfn = l2e_get_pfn(l2e);
    4.88 +    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
    4.89 +        return 0;
    4.90 +    if ( l2e_get_flags(l2e) & _PAGE_PSE )
    4.91 +        return 1;
    4.92 +
    4.93 +    l1t = mfn_to_virt(mfn);
    4.94 +    l1e = l1t[l1_table_offset(addr)];
    4.95 +    mfn = l1e_get_pfn(l1e);
    4.96 +    return !!(l1e_get_flags(l1e) & _PAGE_PRESENT);
    4.97  }
    4.98  
    4.99  asmlinkage void double_fault(void);
     5.1 --- a/xen/include/asm-x86/processor.h	Fri Mar 24 10:59:31 2006 +0100
     5.2 +++ b/xen/include/asm-x86/processor.h	Fri Mar 24 12:14:58 2006 +0100
     5.3 @@ -524,6 +524,7 @@ extern always_inline void prefetchw(cons
     5.4  void show_stack(struct cpu_user_regs *regs);
     5.5  void show_registers(struct cpu_user_regs *regs);
     5.6  void show_page_walk(unsigned long addr);
     5.7 +int __spurious_page_fault(unsigned long addr);
     5.8  asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs);
     5.9  
    5.10  extern void mtrr_ap_init(void);