ia64/xen-unstable

changeset 10472:e23961a8ce7e

[XEN] Fix page-fault handler to not trust bit 0 of error code.
It can be cleared due to writable-pagetable logic. Various
other cleanups too. Spurious fault detection logic is
simplified.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Sun Jun 18 19:24:00 2006 +0100 (2006-06-18)
parents 5033ffe8f533
children 7713276d159e
files xen/arch/x86/mm.c xen/arch/x86/traps.c xen/arch/x86/x86_32/seg_fixup.c xen/arch/x86/x86_emulate.c
line diff
     1.1 --- a/xen/arch/x86/mm.c	Sat Jun 17 12:57:03 2006 +0100
     1.2 +++ b/xen/arch/x86/mm.c	Sun Jun 18 19:24:00 2006 +0100
     1.3 @@ -3351,7 +3351,7 @@ static int ptwr_emulated_update(
     1.4          addr &= ~(sizeof(paddr_t)-1);
     1.5          if ( copy_from_user(&full, (void *)addr, sizeof(paddr_t)) )
     1.6          {
     1.7 -            propagate_page_fault(addr, 4); /* user mode, read fault */
     1.8 +            propagate_page_fault(addr, 0); /* read fault */
     1.9              return X86EMUL_PROPAGATE_FAULT;
    1.10          }
    1.11          /* Mask out bits provided by caller. */
    1.12 @@ -3483,12 +3483,12 @@ int ptwr_do_page_fault(struct domain *d,
    1.13      unsigned long    l2_idx;
    1.14      struct x86_emulate_ctxt emul_ctxt;
    1.15  
    1.16 -    if ( unlikely(shadow_mode_enabled(d)) )
    1.17 -        return 0;
    1.18 +    ASSERT(!shadow_mode_enabled(d));
    1.19  
    1.20      /*
    1.21       * Attempt to read the PTE that maps the VA being accessed. By checking for
    1.22       * PDE validity in the L2 we avoid many expensive fixups in __get_user().
    1.23 +     * NB. The L2 entry cannot be detached as the caller already checked that.
    1.24       */
    1.25      if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) &
    1.26             _PAGE_PRESENT) ||
    1.27 @@ -3579,7 +3579,7 @@ int ptwr_do_page_fault(struct domain *d,
    1.28      }
    1.29  
    1.30      /*
    1.31 -     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at 
    1.32 +     * We only allow one ACTIVE and one INACTIVE p.t. to be updated at a
    1.33       * time. If there is already one, we must flush it out.
    1.34       */
    1.35      if ( d->arch.ptwr[which].l1va )
     2.1 --- a/xen/arch/x86/traps.c	Sat Jun 17 12:57:03 2006 +0100
     2.2 +++ b/xen/arch/x86/traps.c	Sun Jun 18 19:24:00 2006 +0100
     2.3 @@ -547,6 +547,7 @@ static int handle_gdt_ldt_mapping_fault(
     2.4      {
     2.5          /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
     2.6          LOCK_BIGLOCK(d);
     2.7 +        cleanup_writable_pagetable(d);
     2.8          ret = map_ldt_shadow_page(offset >> PAGE_SHIFT);
     2.9          UNLOCK_BIGLOCK(d);
    2.10  
    2.11 @@ -654,41 +655,14 @@ static int __spurious_page_fault(
    2.12  static int spurious_page_fault(
    2.13      unsigned long addr, struct cpu_user_regs *regs)
    2.14  {
    2.15 -    struct vcpu   *v = current;
    2.16 -    struct domain *d = v->domain;
    2.17 +    struct domain *d = current->domain;
    2.18      int            is_spurious;
    2.19  
    2.20      LOCK_BIGLOCK(d);
    2.21 -
    2.22 +    cleanup_writable_pagetable(d);
    2.23      is_spurious = __spurious_page_fault(addr, regs);
    2.24 -    if ( is_spurious )
    2.25 -        goto out;
    2.26 +    UNLOCK_BIGLOCK(d);
    2.27  
    2.28 -    /*
    2.29 -     * The only possible reason for a spurious page fault not to be picked
    2.30 -     * up already is that a page directory was unhooked by writable page table
    2.31 -     * logic and then reattached before the faulting VCPU could detect it.
    2.32 -     */
    2.33 -    if ( is_idle_domain(d) ||               /* no ptwr in idle domain       */
    2.34 -         IN_HYPERVISOR_RANGE(addr) ||       /* no ptwr on hypervisor addrs  */
    2.35 -         shadow_mode_enabled(d) ||          /* no ptwr logic in shadow mode */
    2.36 -         (regs->error_code & PGERR_page_present) ) /* not-present fault?    */
    2.37 -        goto out;
    2.38 -
    2.39 -    /*
    2.40 -     * The page directory could have been detached again while we weren't
    2.41 -     * holding the per-domain lock. Detect that and fix up if it's the case.
    2.42 -     */
    2.43 -    if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
    2.44 -         unlikely(l2_linear_offset(addr) ==
    2.45 -                  d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
    2.46 -    {
    2.47 -        ptwr_flush(d, PTWR_PT_ACTIVE);
    2.48 -        is_spurious = 1;
    2.49 -    }
    2.50 -
    2.51 - out:
    2.52 -    UNLOCK_BIGLOCK(d);
    2.53      return is_spurious;
    2.54  }
    2.55  
    2.56 @@ -696,7 +670,6 @@ static int fixup_page_fault(unsigned lon
    2.57  {
    2.58      struct vcpu   *v = current;
    2.59      struct domain *d = v->domain;
    2.60 -    int            rc;
    2.61  
    2.62      if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
    2.63      {
    2.64 @@ -709,10 +682,7 @@ static int fixup_page_fault(unsigned lon
    2.65           * Do not propagate spurious faults in the hypervisor area to the
    2.66           * guest. It cannot fix them up.
    2.67           */
    2.68 -        LOCK_BIGLOCK(d);
    2.69 -        rc = __spurious_page_fault(addr, regs);
    2.70 -        UNLOCK_BIGLOCK(d);
    2.71 -        return rc;
    2.72 +        return (spurious_page_fault(addr, regs) ? EXCRET_not_a_fault : 0);
    2.73      }
    2.74  
    2.75      if ( unlikely(shadow_mode_enabled(d)) )
    2.76 @@ -730,12 +700,14 @@ static int fixup_page_fault(unsigned lon
    2.77              return EXCRET_fault_fixed;
    2.78          }
    2.79  
    2.80 +        /*
    2.81 +         * Note it is *not* safe to check PGERR_page_present here. It can be
    2.82 +         * clear, due to unhooked page table, when we would otherwise expect
    2.83 +         * it to be set. We have an aversion to trusting that flag in Xen, and
    2.84 +         * guests ought to be leery too.
    2.85 +         */
    2.86          if ( guest_kernel_mode(v, regs) &&
    2.87 -             /* Protection violation on write? No reserved-bit violation? */
    2.88 -             ((regs->error_code & (PGERR_page_present |
    2.89 -                                   PGERR_write_access |
    2.90 -                                   PGERR_reserved_bit)) ==
    2.91 -              (PGERR_page_present | PGERR_write_access)) &&
    2.92 +             (regs->error_code & PGERR_write_access) &&
    2.93               ptwr_do_page_fault(d, addr, regs) )
    2.94          {
    2.95              UNLOCK_BIGLOCK(d);
    2.96 @@ -870,8 +842,6 @@ static inline int admin_io_okay(
    2.97      (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
    2.98  
    2.99  /* Propagate a fault back to the guest kernel. */
   2.100 -#define USER_READ_FAULT  (PGERR_user_mode)
   2.101 -#define USER_WRITE_FAULT (PGERR_user_mode | PGERR_write_access)
   2.102  #define PAGE_FAULT(_faultaddr, _errcode)        \
   2.103  ({  propagate_page_fault(_faultaddr, _errcode); \
   2.104      return EXCRET_fault_fixed;                  \
   2.105 @@ -881,7 +851,7 @@ static inline int admin_io_okay(
   2.106  #define insn_fetch(_type, _size, _ptr)          \
   2.107  ({  unsigned long _x;                           \
   2.108      if ( get_user(_x, (_type *)eip) )           \
   2.109 -        PAGE_FAULT(eip, USER_READ_FAULT);       \
   2.110 +        PAGE_FAULT(eip, 0); /* read fault */    \
   2.111      eip += _size; (_type)_x; })
   2.112  
   2.113  static int emulate_privileged_op(struct cpu_user_regs *regs)
   2.114 @@ -950,17 +920,17 @@ static int emulate_privileged_op(struct 
   2.115              case 1:
   2.116                  data = (u8)inb_user((u16)regs->edx, v, regs);
   2.117                  if ( put_user((u8)data, (u8 *)regs->edi) )
   2.118 -                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
   2.119 +                    PAGE_FAULT(regs->edi, PGERR_write_access);
   2.120                  break;
   2.121              case 2:
   2.122                  data = (u16)inw_user((u16)regs->edx, v, regs);
   2.123                  if ( put_user((u16)data, (u16 *)regs->edi) )
   2.124 -                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
   2.125 +                    PAGE_FAULT(regs->edi, PGERR_write_access);
   2.126                  break;
   2.127              case 4:
   2.128                  data = (u32)inl_user((u16)regs->edx, v, regs);
   2.129                  if ( put_user((u32)data, (u32 *)regs->edi) )
   2.130 -                    PAGE_FAULT(regs->edi, USER_WRITE_FAULT);
   2.131 +                    PAGE_FAULT(regs->edi, PGERR_write_access);
   2.132                  break;
   2.133              }
   2.134              regs->edi += (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes);
   2.135 @@ -975,17 +945,17 @@ static int emulate_privileged_op(struct 
   2.136              {
   2.137              case 1:
   2.138                  if ( get_user(data, (u8 *)regs->esi) )
   2.139 -                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
   2.140 +                    PAGE_FAULT(regs->esi, 0); /* read fault */
   2.141                  outb_user((u8)data, (u16)regs->edx, v, regs);
   2.142                  break;
   2.143              case 2:
   2.144                  if ( get_user(data, (u16 *)regs->esi) )
   2.145 -                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
   2.146 +                    PAGE_FAULT(regs->esi, 0); /* read fault */
   2.147                  outw_user((u16)data, (u16)regs->edx, v, regs);
   2.148                  break;
   2.149              case 4:
   2.150                  if ( get_user(data, (u32 *)regs->esi) )
   2.151 -                    PAGE_FAULT(regs->esi, USER_READ_FAULT);
   2.152 +                    PAGE_FAULT(regs->esi, 0); /* read fault */
   2.153                  outl_user((u32)data, (u16)regs->edx, v, regs);
   2.154                  break;
   2.155              }
   2.156 @@ -1168,7 +1138,7 @@ static int emulate_privileged_op(struct 
   2.157              v->arch.guest_context.ctrlreg[2] = *reg;
   2.158              v->vcpu_info->arch.cr2           = *reg;
   2.159              break;
   2.160 -            
   2.161 +
   2.162          case 3: /* Write CR3 */
   2.163              LOCK_BIGLOCK(v->domain);
   2.164              cleanup_writable_pagetable(v->domain);
     3.1 --- a/xen/arch/x86/x86_32/seg_fixup.c	Sat Jun 17 12:57:03 2006 +0100
     3.2 +++ b/xen/arch/x86/x86_32/seg_fixup.c	Sun Jun 18 19:24:00 2006 +0100
     3.3 @@ -464,7 +464,7 @@ int gpf_emulate_4gb(struct cpu_user_regs
     3.4      return 0;
     3.5  
     3.6   page_fault:
     3.7 -    propagate_page_fault((unsigned long)pb, 4);
     3.8 +    propagate_page_fault((unsigned long)pb, 0); /* read fault */
     3.9      return EXCRET_fault_fixed;
    3.10  }
    3.11  
     4.1 --- a/xen/arch/x86/x86_emulate.c	Sat Jun 17 12:57:03 2006 +0100
     4.2 +++ b/xen/arch/x86/x86_emulate.c	Sun Jun 18 19:24:00 2006 +0100
     4.3 @@ -1146,7 +1146,7 @@ x86_emulate_read_std(
     4.4      *val = 0;
     4.5      if ( copy_from_user((void *)val, (void *)addr, bytes) )
     4.6      {
     4.7 -        propagate_page_fault(addr, 4); /* user mode, read fault */
     4.8 +        propagate_page_fault(addr, 0); /* read fault */
     4.9          return X86EMUL_PROPAGATE_FAULT;
    4.10      }
    4.11      return X86EMUL_CONTINUE;
    4.12 @@ -1161,7 +1161,7 @@ x86_emulate_write_std(
    4.13  {
    4.14      if ( copy_to_user((void *)addr, (void *)&val, bytes) )
    4.15      {
    4.16 -        propagate_page_fault(addr, 6); /* user mode, write fault */
    4.17 +        propagate_page_fault(addr, PGERR_write_access); /* write fault */
    4.18          return X86EMUL_PROPAGATE_FAULT;
    4.19      }
    4.20      return X86EMUL_CONTINUE;