ia64/xen-unstable

changeset 2758:66b1445a11a0

bitkeeper revision 1.1159.1.295 (4180ee31v7apKAXQ_iCd672ndA6I0Q)

New TLB-flush logic. By basing NEED_FLUSH() on the current time, as
well as the CPU and page timestamps, I was able to get rid of the
tedious epoch logic. We now only need special-case logic when the
32-bit clock wraps. In debug build I deliberately restrict the clock to
10 bits, so that the wrap logic gets exercised.
author kaf24@freefall.cl.cam.ac.uk
date Thu Oct 28 13:03:45 2004 +0000 (2004-10-28)
parents 11a5fe965981
children e20b88a13953 4b524192e62b
files xen/arch/x86/flushtlb.c xen/arch/x86/smp.c xen/include/asm-x86/flushtlb.h
line diff
     1.1 --- a/xen/arch/x86/flushtlb.c	Thu Oct 28 10:22:45 2004 +0000
     1.2 +++ b/xen/arch/x86/flushtlb.c	Thu Oct 28 13:03:45 2004 +0000
     1.3 @@ -12,7 +12,14 @@
     1.4  #include <xen/softirq.h>
     1.5  #include <asm/flushtlb.h>
     1.6  
     1.7 -u32 tlbflush_clock;
     1.8 +/* Debug builds: Wrap frequently to stress-test the wrap logic. */
     1.9 +#ifdef NDEBUG
    1.10 +#define WRAP_MASK (0xFFFFFFFFU)
    1.11 +#else
    1.12 +#define WRAP_MASK (0x000003FFU)
    1.13 +#endif
    1.14 +
    1.15 +u32 tlbflush_clock = 1U;
    1.16  u32 tlbflush_time[NR_CPUS];
    1.17  
    1.18  void write_cr3(unsigned long cr3)
    1.19 @@ -20,38 +27,42 @@ void write_cr3(unsigned long cr3)
    1.20      u32 t, t1, t2;
    1.21      unsigned long flags;
    1.22  
    1.23 +    /* This non-reentrant function is sometimes called in interrupt context. */
    1.24      local_irq_save(flags);
    1.25  
    1.26      /*
    1.27 -     * Tick the clock, which is incremented by two each time. The L.S.B. is
    1.28 -     * used to decide who will control the epoch change, when one is required.
    1.29 +     * STEP 1. Increment the virtual clock *before* flushing the TLB.
    1.30 +     *         If we do it after, we race other CPUs invalidating PTEs.
    1.31 +     *         (e.g., a page invalidated after the flush might get the old 
    1.32 +     *          timestamp, but this CPU can speculatively fetch the mapping
    1.33 +     *          into its TLB after the flush but before inc'ing the clock).
    1.34       */
    1.35 +
    1.36      t = tlbflush_clock;
    1.37      do {
    1.38 -        t1 = t;      /* t1: Time before this clock tick. */
    1.39 -        t2 = t + 2;  /* t2: Time after this clock tick. */
    1.40 -        if ( unlikely(t2 & 1) )
    1.41 -        {
    1.42 -            /* Epoch change: someone else is leader. */
    1.43 -            t2 = t; /* no tick */
    1.44 +        t1 = t2 = t;
    1.45 +        /* Clock wrapped: someone else is leading a global TLB shootodiown. */
    1.46 +        if ( unlikely(t1 == 0) )
    1.47              goto skip_clocktick;
    1.48 -        }
    1.49 -        else if ( unlikely((t2 & TLBCLOCK_EPOCH_MASK) == 0) )
    1.50 -        {
    1.51 -            /* Epoch change: we may become leader. */
    1.52 -            t2--; /* half tick */
    1.53 -        }
    1.54 +        t2 = (t + 1) & WRAP_MASK;
    1.55      }
    1.56      while ( unlikely((t = cmpxchg(&tlbflush_clock, t1, t2)) != t1) );
    1.57  
    1.58 -    /* Epoch change: we are the leader. */
    1.59 -    if ( unlikely(t2 & 1) )
    1.60 +    /* Clock wrapped: we will lead a global TLB shootdown. */
    1.61 +    if ( unlikely(t2 == 0) )
    1.62          raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
    1.63  
    1.64 +    /*
    1.65 +     * STEP 2. Update %CR3, thereby flushing the TLB.
    1.66 +     */
    1.67 +
    1.68   skip_clocktick:
    1.69      __asm__ __volatile__ ( "mov"__OS" %0, %%cr3" : : "r" (cr3) : "memory" );
    1.70  
    1.71 -    /* Update this CPU's timestamp to new time. */
    1.72 +    /*
    1.73 +     * STEP 3. Update this CPU's timestamp.
    1.74 +     */
    1.75 +
    1.76      tlbflush_time[smp_processor_id()] = t2;
    1.77  
    1.78      local_irq_restore(flags);
     2.1 --- a/xen/arch/x86/smp.c	Thu Oct 28 10:22:45 2004 +0000
     2.2 +++ b/xen/arch/x86/smp.c	Thu Oct 28 13:03:45 2004 +0000
     2.3 @@ -261,15 +261,9 @@ void flush_tlb_mask(unsigned long mask)
     2.4      }
     2.5  }
     2.6  
     2.7 -/*
     2.8 - * NB. Must be called with no locks held and interrupts enabled.
     2.9 - *     (e.g., softirq context).
    2.10 - */
    2.11 +/* Call with no locks held and interrupts enabled (e.g., softirq context). */
    2.12  void new_tlbflush_clock_period(void)
    2.13  {
    2.14 -    /* Only the leader gets here. Noone else should tick the clock. */
    2.15 -    ASSERT(((tlbflush_clock+1) & TLBCLOCK_EPOCH_MASK) == 0);
    2.16 -
    2.17      /* Flush everyone else. We definitely flushed just before entry. */
    2.18      if ( smp_num_cpus > 1 )
    2.19      {
    2.20 @@ -285,6 +279,7 @@ void new_tlbflush_clock_period(void)
    2.21      }
    2.22  
    2.23      /* No need for atomicity: we are the only possible updater. */
    2.24 +    ASSERT(tlbflush_clock == 0);
    2.25      tlbflush_clock++;
    2.26  }
    2.27  
     3.1 --- a/xen/include/asm-x86/flushtlb.h	Thu Oct 28 10:22:45 2004 +0000
     3.2 +++ b/xen/include/asm-x86/flushtlb.h	Thu Oct 28 13:03:45 2004 +0000
     3.3 @@ -13,40 +13,36 @@
     3.4  #include <xen/config.h>
     3.5  #include <xen/smp.h>
     3.6  
     3.7 -/*
     3.8 - * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed.
     3.9 - * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock.
    3.10 - */
    3.11 -#define TLBCLOCK_EPOCH_MASK ((1U<<20)-1)
    3.12 +/* The current time as shown by the virtual TLB clock. */
    3.13 +extern u32 tlbflush_clock;
    3.14 +
    3.15 +/* Time at which each CPU's TLB was last flushed. */
    3.16 +extern u32 tlbflush_time[NR_CPUS];
    3.17 +
    3.18 +#define tlbflush_current_time() tlbflush_clock
    3.19  
    3.20  /*
    3.21 - * 'cpu_stamp' is the current timestamp for the CPU we are testing.
    3.22 - * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last 
    3.23 + * @cpu_stamp is the timestamp at last TLB flush for the CPU we are testing.
    3.24 + * @lastuse_stamp is a timestamp taken when the PFN we are testing was last 
    3.25   * used for a purpose that may have caused the CPU's TLB to become tainted.
    3.26   */
    3.27  static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp)
    3.28  {
    3.29 +    u32 curr_time = tlbflush_current_time();
    3.30      /*
    3.31 -     * Worst case in which a flush really is required:
    3.32 -     *  1. CPU has not flushed since end of last epoch.
    3.33 -     *  2. Clock has run to end of current epoch.
    3.34 -     *  THEREFORE: Maximum valid difference is (EPOCH_MASK + 1).
    3.35 -     * N.B. The clock cannot run further until the CPU has flushed once more
    3.36 -     * and updated to current time, so this is as 'far out' as it can get.
    3.37 +     * Two cases:
    3.38 +     *  1. During a wrap, the clock ticks over to 0 while CPUs catch up. For
    3.39 +     *     safety during this period, we force a flush if @curr_time == 0.
    3.40 +     *  2. Otherwise, we look to see if @cpu_stamp <= @lastuse_stamp.
    3.41 +     *     To detect false positives because @cpu_stamp has wrapped, we
    3.42 +     *     also check @curr_time. If less than @lastuse_stamp we definitely
    3.43 +     *     wrapped, so there's no need for a flush (one is forced every wrap).
    3.44       */
    3.45 -    return ((lastuse_stamp - cpu_stamp) <= (TLBCLOCK_EPOCH_MASK + 1));
    3.46 +    return ((curr_time == 0) ||
    3.47 +            ((cpu_stamp <= lastuse_stamp) &&
    3.48 +             (lastuse_stamp <= curr_time)));
    3.49  }
    3.50  
    3.51 -/*
    3.52 - * The least significant bit of the clock indicates whether an epoch-change
    3.53 - * is in progress. All other bits form the counter that is incremented on
    3.54 - * each clock tick.
    3.55 - */
    3.56 -extern u32 tlbflush_clock;
    3.57 -extern u32 tlbflush_time[NR_CPUS];
    3.58 -
    3.59 -#define tlbflush_current_time() tlbflush_clock
    3.60 -
    3.61  extern void new_tlbflush_clock_period(void);
    3.62  
    3.63  /* Read pagetable base. */