ia64/xen-unstable

changeset 11786:4fdcccd22352

[XEN] Fix race in shadow invlpg
This fixes a crash under mmstress in SMP linux guests, where one vcpu
could remove shadow entries when another was reading them for invlpg.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <tim.deegan@xensource.com>
date Thu Oct 12 11:08:48 2006 +0100 (2006-10-12)
parents bd2be8a8fc72
children b0ee6789e428
files xen/arch/x86/mm/shadow/multi.c xen/include/asm-x86/perfc_defn.h
line diff
     1.1 --- a/xen/arch/x86/mm/shadow/multi.c	Thu Oct 12 10:56:41 2006 +0100
     1.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Thu Oct 12 11:08:48 2006 +0100
     1.3 @@ -1375,80 +1375,6 @@ static int shadow_set_l1e(struct vcpu *v
     1.4  
     1.5  
     1.6  /**************************************************************************/
     1.7 -/* These functions take a vcpu and a virtual address, and return a pointer
     1.8 - * to the appropriate level N entry from the shadow tables.  
     1.9 - * If the necessary tables are not present in the shadow, they return NULL. */
    1.10 -
    1.11 -/* N.B. The use of GUEST_PAGING_LEVELS here is correct.  If the shadow has
    1.12 - * more levels than the guest, the upper levels are always fixed and do not 
    1.13 - * reflect any information from the guest, so we do not use these functions 
    1.14 - * to access them. */
    1.15 -
    1.16 -#if GUEST_PAGING_LEVELS >= 4
    1.17 -static shadow_l4e_t *
    1.18 -shadow_get_l4e(struct vcpu *v, unsigned long va)
    1.19 -{
    1.20 -    /* Reading the top level table is always valid. */
    1.21 -    return sh_linear_l4_table(v) + shadow_l4_linear_offset(va);
    1.22 -}
    1.23 -#endif /* GUEST_PAGING_LEVELS >= 4 */
    1.24 -
    1.25 -
    1.26 -#if GUEST_PAGING_LEVELS >= 3
    1.27 -static shadow_l3e_t *
    1.28 -shadow_get_l3e(struct vcpu *v, unsigned long va)
    1.29 -{
    1.30 -#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
    1.31 -    /* Get the l4 */
    1.32 -    shadow_l4e_t *sl4e = shadow_get_l4e(v, va);
    1.33 -    ASSERT(sl4e != NULL);
    1.34 -    if ( !(shadow_l4e_get_flags(*sl4e) & _PAGE_PRESENT) )
    1.35 -        return NULL;
    1.36 -    ASSERT(valid_mfn(shadow_l4e_get_mfn(*sl4e)));
    1.37 -    /* l4 was present; OK to get the l3 */
    1.38 -    return sh_linear_l3_table(v) + shadow_l3_linear_offset(va);
    1.39 -#else /* PAE... */
    1.40 -    /* Top level is always mapped */
    1.41 -    ASSERT(v->arch.shadow_vtable);
    1.42 -    return ((shadow_l3e_t *)v->arch.shadow_vtable) + shadow_l3_linear_offset(va);
    1.43 -#endif 
    1.44 -}
    1.45 -#endif /* GUEST_PAGING_LEVELS >= 3 */
    1.46 -
    1.47 -
    1.48 -static shadow_l2e_t *
    1.49 -shadow_get_l2e(struct vcpu *v, unsigned long va)
    1.50 -{
    1.51 -#if GUEST_PAGING_LEVELS >= 3  /* 64bit/PAE... */
    1.52 -    /* Get the l3 */
    1.53 -    shadow_l3e_t *sl3e = shadow_get_l3e(v, va);
    1.54 -    if ( sl3e == NULL || !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) )
    1.55 -        return NULL;
    1.56 -    ASSERT(valid_mfn(shadow_l3e_get_mfn(*sl3e)));
    1.57 -    /* l3 was present; OK to get the l2 */
    1.58 -#endif
    1.59 -    return sh_linear_l2_table(v) + shadow_l2_linear_offset(va);
    1.60 -}
    1.61 -
    1.62 -
    1.63 -#if 0 // avoid the compiler warning for now...
    1.64 -
    1.65 -static shadow_l1e_t *
    1.66 -shadow_get_l1e(struct vcpu *v, unsigned long va)
    1.67 -{
    1.68 -    /* Get the l2 */
    1.69 -    shadow_l2e_t *sl2e = shadow_get_l2e(v, va);
    1.70 -    if ( sl2e == NULL || !(shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT) )
    1.71 -        return NULL;
    1.72 -    ASSERT(valid_mfn(shadow_l2e_get_mfn(*sl2e)));
    1.73 -    /* l2 was present; OK to get the l1 */
    1.74 -    return sh_linear_l1_table(v) + shadow_l1_linear_offset(va);
    1.75 -}
    1.76 -
    1.77 -#endif
    1.78 -
    1.79 -
    1.80 -/**************************************************************************/
    1.81  /* Macros to walk pagetables.  These take the shadow of a pagetable and 
    1.82   * walk every "interesting" entry.  That is, they don't touch Xen mappings, 
    1.83   * and for 32-bit l2s shadowed onto PAE or 64-bit, they only touch every 
    1.84 @@ -2050,6 +1976,12 @@ sh_make_monitor_table(struct vcpu *v)
    1.85   * they are needed.  The "demand" argument is non-zero when handling
    1.86   * a demand fault (so we know what to do about accessed bits &c).
    1.87   * If the necessary tables are not present in the guest, they return NULL. */
    1.88 +
    1.89 +/* N.B. The use of GUEST_PAGING_LEVELS here is correct.  If the shadow has
    1.90 + * more levels than the guest, the upper levels are always fixed and do not 
    1.91 + * reflect any information from the guest, so we do not use these functions 
    1.92 + * to access them. */
    1.93 +
    1.94  #if GUEST_PAGING_LEVELS >= 4
    1.95  static shadow_l4e_t * shadow_get_and_create_l4e(struct vcpu *v, 
    1.96                                                  walk_t *gw, 
    1.97 @@ -3223,26 +3155,62 @@ sh_invlpg(struct vcpu *v, unsigned long 
    1.98   * instruction should be issued on the hardware, or 0 if it's safe not
    1.99   * to do so. */
   1.100  {
   1.101 -    shadow_l2e_t *ptr_sl2e = shadow_get_l2e(v, va);
   1.102 -
   1.103 -    // XXX -- might be a good thing to prefetch the va into the shadow
   1.104 -
   1.105 -    // no need to flush anything if there's no SL2...
   1.106 -    //
   1.107 -    if ( !ptr_sl2e )
   1.108 +    shadow_l2e_t sl2e;
   1.109 +    
   1.110 +    perfc_incrc(shadow_invlpg);
   1.111 +
   1.112 +    /* First check that we can safely read the shadow l2e.  SMP/PAE linux can
   1.113 +     * run as high as 6% of invlpg calls where we haven't shadowed the l2 
   1.114 +     * yet. */
   1.115 +#if SHADOW_PAGING_LEVELS == 4
   1.116 +    {
   1.117 +        shadow_l3e_t sl3e;
   1.118 +        if ( !(shadow_l4e_get_flags(
   1.119 +                   sh_linear_l4_table(v)[shadow_l4_linear_offset(va)])
   1.120 +               & _PAGE_PRESENT) )
   1.121 +            return 0;
   1.122 +        /* This must still be a copy-from-user because we don't have the
   1.123 +         * shadow lock, and the higher-level shadows might disappear
   1.124 +         * under our feet. */
   1.125 +        if ( __copy_from_user(&sl3e, (sh_linear_l3_table(v) 
   1.126 +                                      + shadow_l3_linear_offset(va)),
   1.127 +                              sizeof (sl3e)) != 0 )
   1.128 +        {
   1.129 +            perfc_incrc(shadow_invlpg_fault);
   1.130 +            return 0;
   1.131 +        }
   1.132 +        if ( (!shadow_l3e_get_flags(sl3e) & _PAGE_PRESENT) )
   1.133 +            return 0;
   1.134 +    }
   1.135 +#elif SHADOW_PAGING_LEVELS == 3
   1.136 +    if ( !(shadow_l3e_get_flags(
   1.137 +          ((shadow_l3e_t *)v->arch.shadow_vtable)[shadow_l3_linear_offset(va)])
   1.138 +           & _PAGE_PRESENT) )
   1.139 +        // no need to flush anything if there's no SL2...
   1.140          return 0;
   1.141 +#endif
   1.142 +
   1.143 +    /* This must still be a copy-from-user because we don't have the shadow
   1.144 +     * lock, and the higher-level shadows might disappear under our feet. */
   1.145 +    if ( __copy_from_user(&sl2e, 
   1.146 +                          sh_linear_l2_table(v) + shadow_l2_linear_offset(va),
   1.147 +                          sizeof (sl2e)) != 0 )
   1.148 +    {
   1.149 +        perfc_incrc(shadow_invlpg_fault);
   1.150 +        return 0;
   1.151 +    }
   1.152  
   1.153      // If there's nothing shadowed for this particular sl2e, then
   1.154      // there is no need to do an invlpg, either...
   1.155      //
   1.156 -    if ( !(shadow_l2e_get_flags(*ptr_sl2e) & _PAGE_PRESENT) )
   1.157 +    if ( !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) )
   1.158          return 0;
   1.159  
   1.160      // Check to see if the SL2 is a splintered superpage...
   1.161      // If so, then we'll need to flush the entire TLB (because that's
   1.162      // easier than invalidating all of the individual 4K pages).
   1.163      //
   1.164 -    if ( (mfn_to_page(shadow_l2e_get_mfn(*ptr_sl2e))->count_info &
   1.165 +    if ( (mfn_to_page(shadow_l2e_get_mfn(sl2e))->count_info &
   1.166            PGC_SH_type_mask) == PGC_SH_fl1_shadow )
   1.167      {
   1.168          local_flush_tlb();
     2.1 --- a/xen/include/asm-x86/perfc_defn.h	Thu Oct 12 10:56:41 2006 +0100
     2.2 +++ b/xen/include/asm-x86/perfc_defn.h	Thu Oct 12 11:08:48 2006 +0100
     2.3 @@ -81,8 +81,8 @@ PERFCOUNTER_CPU(shadow_up_pointer,     "
     2.4  PERFCOUNTER_CPU(shadow_unshadow_bf,    "shadow unshadow brute-force")
     2.5  PERFCOUNTER_CPU(shadow_get_page_fail,  "shadow_get_page_from_l1e failed")
     2.6  PERFCOUNTER_CPU(shadow_guest_walk,     "shadow walks guest tables")
     2.7 -PERFCOUNTER_CPU(shadow_walk_cache_hit, "shadow walk-cache hits")
     2.8 -PERFCOUNTER_CPU(shadow_walk_cache_miss, "shadow walk-cache misses")
     2.9 +PERFCOUNTER_CPU(shadow_invlpg,         "shadow emulates invlpg")
    2.10 +PERFCOUNTER_CPU(shadow_invlpg_fault,   "shadow invlpg faults")
    2.11  
    2.12  
    2.13  /*#endif*/ /* __XEN_PERFC_DEFN_H__ */