ia64/xen-unstable

changeset 2752:8a95c46fe237

bitkeeper revision 1.1159.1.290 (417fcacfkfQjkHbB1_isdievDAarQA)

Further fixes to the TLB-flush logic.
author kaf24@freefall.cl.cam.ac.uk
date Wed Oct 27 16:20:31 2004 +0000 (2004-10-27)
parents 35abe76a49b1
children 1511d2acc1a4
files xen/arch/x86/domain.c xen/arch/x86/flushtlb.c xen/arch/x86/memory.c xen/arch/x86/pdb-stub.c xen/arch/x86/smp.c xen/arch/x86/traps.c xen/common/page_alloc.c xen/include/asm-x86/flushtlb.h xen/include/asm-x86/page.h xen/include/asm-x86/processor.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Wed Oct 27 14:26:56 2004 +0000
     1.2 +++ b/xen/arch/x86/domain.c	Wed Oct 27 16:20:31 2004 +0000
     1.3 @@ -387,7 +387,6 @@ void switch_to(struct domain *prev_p, st
     1.4  
     1.5          /* Switch page tables. */
     1.6          write_ptbase(&next_p->mm);
     1.7 -        tlb_clocktick();
     1.8      }
     1.9  
    1.10      if ( unlikely(prev_p->io_bitmap != NULL) || 
     2.1 --- a/xen/arch/x86/flushtlb.c	Wed Oct 27 14:26:56 2004 +0000
     2.2 +++ b/xen/arch/x86/flushtlb.c	Wed Oct 27 16:20:31 2004 +0000
     2.3 @@ -12,38 +12,47 @@
     2.4  #include <xen/softirq.h>
     2.5  #include <asm/flushtlb.h>
     2.6  
     2.7 -unsigned long tlbflush_epoch_changing;
     2.8  u32 tlbflush_clock;
     2.9  u32 tlbflush_time[NR_CPUS];
    2.10  
    2.11 -void tlb_clocktick(void)
    2.12 +void write_cr3(unsigned long cr3)
    2.13  {
    2.14 -    u32 y, ny;
    2.15 +    u32 t, t1, t2;
    2.16      unsigned long flags;
    2.17  
    2.18      local_irq_save(flags);
    2.19  
    2.20 -    /* Tick the clock. 'y' contains the current time after the tick. */
    2.21 -    ny = tlbflush_clock;
    2.22 +    /*
    2.23 +     * Tick the clock, which is incremented by two each time. The L.S.B. is
    2.24 +     * used to decide who will control the epoch change, when one is required.
    2.25 +     */
    2.26 +    t = tlbflush_clock;
    2.27      do {
    2.28 -#ifdef CONFIG_SMP
    2.29 -        if ( unlikely(((y = ny+1) & TLBCLOCK_EPOCH_MASK) == 0) )
    2.30 +        t1 = t;      /* t1: Time before this clock tick. */
    2.31 +        t2 = t + 2;  /* t2: Time after this clock tick. */
    2.32 +        if ( unlikely(t2 & 1) )
    2.33          {
    2.34 -            /* Epoch is changing: the first to detect this is the leader. */
    2.35 -            if ( unlikely(!test_and_set_bit(0, &tlbflush_epoch_changing)) )
    2.36 -                raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
    2.37 -            /* The clock doesn't tick again until end of the epoch change. */
    2.38 -            y--;
    2.39 -            break;
    2.40 +            /* Epoch change: someone else is leader. */
    2.41 +            t2 = t; /* no tick */
    2.42 +            goto skip_clocktick;
    2.43 +        }
    2.44 +        else if ( unlikely((t2 & TLBCLOCK_EPOCH_MASK) == 0) )
    2.45 +        {
    2.46 +            /* Epoch change: we may become leader. */
    2.47 +            t2--; /* half tick */
    2.48          }
    2.49 -#else
    2.50 -        y = ny+1;
    2.51 -#endif
    2.52      }
    2.53 -    while ( unlikely((ny = cmpxchg(&tlbflush_clock, y-1, y)) != y-1) );
    2.54 +    while ( unlikely((t = cmpxchg(&tlbflush_clock, t1, t2)) != t1) );
    2.55 +
    2.56 +    /* Epoch change: we are the leader. */
    2.57 +    if ( unlikely(t2 & 1) )
    2.58 +        raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
    2.59 +
    2.60 + skip_clocktick:
    2.61 +    __asm__ __volatile__ ( "mov"__OS" %0, %%cr3" : : "r" (cr3) : "memory" );
    2.62  
    2.63      /* Update this CPU's timestamp to new time. */
    2.64 -    tlbflush_time[smp_processor_id()] = y;
    2.65 +    tlbflush_time[smp_processor_id()] = t2;
    2.66  
    2.67      local_irq_restore(flags);
    2.68  }
     3.1 --- a/xen/arch/x86/memory.c	Wed Oct 27 14:26:56 2004 +0000
     3.2 +++ b/xen/arch/x86/memory.c	Wed Oct 27 16:20:31 2004 +0000
     3.3 @@ -781,7 +781,7 @@ void put_page_type(struct pfn_info *page
     3.4          if ( unlikely((nx & PGT_count_mask) == 0) )
     3.5          {
     3.6              /* Record TLB information for flush later. Races are harmless. */
     3.7 -            page->tlbflush_timestamp = tlbflush_clock;
     3.8 +            page->tlbflush_timestamp = tlbflush_current_time();
     3.9              
    3.10              if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
    3.11                   likely(nx & PGT_validated) )
    3.12 @@ -989,13 +989,7 @@ static int do_extended_command(unsigned 
    3.13  
    3.14              write_ptbase(&d->mm);
    3.15  
    3.16 -            put_page_and_type(&frame_table[old_base_pfn]);    
    3.17 -
    3.18 -            /*
    3.19 -             * Note that we tick the clock /after/ dropping the old base's
    3.20 -             * reference count. If the page tables got freed then this will
    3.21 -             * avoid unnecessary TLB flushes when the pages are reused.  */
    3.22 -            tlb_clocktick();
    3.23 +            put_page_and_type(&frame_table[old_base_pfn]);
    3.24          }
    3.25          else
    3.26          {
     4.1 --- a/xen/arch/x86/pdb-stub.c	Wed Oct 27 14:26:56 2004 +0000
     4.2 +++ b/xen/arch/x86/pdb-stub.c	Wed Oct 27 16:20:31 2004 +0000
     4.3 @@ -1089,9 +1089,7 @@ int pdb_handle_exception(int exceptionVe
     4.4      int signal = 0;
     4.5      struct pdb_breakpoint* bkpt;
     4.6      int watchdog_save;
     4.7 -    unsigned long cr3;
     4.8 -
     4.9 -    __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
    4.10 +    unsigned long cr3 = read_cr3();
    4.11  
    4.12      /* If the exception is an int3 from user space then pdb is only
    4.13         interested if it re-wrote an instruction set the breakpoint.
     5.1 --- a/xen/arch/x86/smp.c	Wed Oct 27 14:26:56 2004 +0000
     5.2 +++ b/xen/arch/x86/smp.c	Wed Oct 27 16:20:31 2004 +0000
     5.3 @@ -286,13 +286,6 @@ void new_tlbflush_clock_period(void)
     5.4  
     5.5      /* No need for atomicity: we are the only possible updater. */
     5.6      tlbflush_clock++;
     5.7 -
     5.8 -    /* Finally, signal the end of the epoch-change protocol. */
     5.9 -    wmb();
    5.10 -    tlbflush_epoch_changing = 0;
    5.11 -
    5.12 -    /* In case we got to the end of the next epoch already. */
    5.13 -    tlb_clocktick();
    5.14  }
    5.15  
    5.16  static void flush_tlb_all_pge_ipi(void* info)
     6.1 --- a/xen/arch/x86/traps.c	Wed Oct 27 14:26:56 2004 +0000
     6.2 +++ b/xen/arch/x86/traps.c	Wed Oct 27 16:20:31 2004 +0000
     6.3 @@ -475,8 +475,7 @@ asmlinkage void do_general_protection(st
     6.4  #ifdef XEN_DEBUGGER
     6.5              if ( pdb_initialized && (pdb_ctx.system_call != 0) )
     6.6              {
     6.7 -                unsigned long cr3; 
     6.8 -                __asm__ __volatile__ ("movl %%cr3,%0" : "=r" (cr3) : );
     6.9 +                unsigned long cr3 = read_cr3();
    6.10                  if ( cr3 == pdb_ctx.ptbr )
    6.11                      pdb_linux_syscall_enter_bkpt(regs, error_code, ti);
    6.12              }
     7.1 --- a/xen/common/page_alloc.c	Wed Oct 27 14:26:56 2004 +0000
     7.2 +++ b/xen/common/page_alloc.c	Wed Oct 27 16:20:31 2004 +0000
     7.3 @@ -451,7 +451,7 @@ void free_domheap_pages(struct pfn_info 
     7.4          for ( i = 0; i < (1 << order); i++ )
     7.5          {
     7.6              ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
     7.7 -            pg[i].tlbflush_timestamp  = tlbflush_clock;
     7.8 +            pg[i].tlbflush_timestamp  = tlbflush_current_time();
     7.9              pg[i].u.free.cpu_mask     = 1 << d->processor;
    7.10              list_del(&pg[i].list);
    7.11  
     8.1 --- a/xen/include/asm-x86/flushtlb.h	Wed Oct 27 14:26:56 2004 +0000
     8.2 +++ b/xen/include/asm-x86/flushtlb.h	Wed Oct 27 16:20:31 2004 +0000
     8.3 @@ -15,13 +15,9 @@
     8.4  
     8.5  /*
     8.6   * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed.
     8.7 - * Therefore, if the current TLB time and a previously-read timestamp differ
     8.8 - * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock
     8.9 - * has wrapped at least once and every CPU's TLB is guaranteed to have been
    8.10 - * flushed meanwhile.
    8.11   * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock.
    8.12   */
    8.13 -#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1)
    8.14 +#define TLBCLOCK_EPOCH_MASK ((1U<<20)-1)
    8.15  
    8.16  /*
    8.17   * 'cpu_stamp' is the current timestamp for the CPU we are testing.
    8.18 @@ -32,22 +28,39 @@ static inline int NEED_FLUSH(u32 cpu_sta
    8.19  {
    8.20      /*
    8.21       * Worst case in which a flush really is required:
    8.22 -     *  CPU has not flushed since end of last epoch (cpu_stamp = 0x0000ffff).
    8.23 -     *  Clock has run to end of current epoch (clock = 0x0001ffff).
    8.24 -     *  Therefore maximum valid difference is 0x10000 (EPOCH_MASK + 1).
    8.25 +     *  1. CPU has not flushed since end of last epoch.
    8.26 +     *  2. Clock has run to end of current epoch.
    8.27 +     *  THEREFORE: Maximum valid difference is (EPOCH_MASK + 1).
    8.28       * N.B. The clock cannot run further until the CPU has flushed once more
    8.29 -     * and updated its stamp to 0x1ffff, so this is as 'far out' as it can get.
    8.30 +     * and updated to current time, so this is as 'far out' as it can get.
    8.31       */
    8.32      return ((lastuse_stamp - cpu_stamp) <= (TLBCLOCK_EPOCH_MASK + 1));
    8.33  }
    8.34  
    8.35 -extern unsigned long tlbflush_epoch_changing;
    8.36 +/*
    8.37 + * The least significant bit of the clock indicates whether an epoch-change
    8.38 + * is in progress. All other bits form the counter that is incremented on
    8.39 + * each clock tick.
    8.40 + */
    8.41  extern u32 tlbflush_clock;
    8.42  extern u32 tlbflush_time[NR_CPUS];
    8.43  
    8.44 -extern void tlb_clocktick(void);
    8.45 +#define tlbflush_current_time() tlbflush_clock
    8.46 +
    8.47  extern void new_tlbflush_clock_period(void);
    8.48  
    8.49 +/* Read pagetable base. */
    8.50 +static inline unsigned long read_cr3(void)
    8.51 +{
    8.52 +    unsigned long cr3;
    8.53 +    __asm__ __volatile__ (
    8.54 +        "mov"__OS" %%cr3, %0" : "=r" (cr3) : );
    8.55 +    return cr3;
    8.56 +}
    8.57 +
    8.58 +/* Write pagetable base and implicitly tick the tlbflush clock. */
    8.59 +extern void write_cr3(unsigned long cr3);
    8.60 +
    8.61  /*
    8.62   * TLB flushing:
    8.63   *
    8.64 @@ -59,6 +72,12 @@ extern void new_tlbflush_clock_period(vo
    8.65   * and page-granular flushes are available only on i486 and up.
    8.66   */
    8.67  
    8.68 +#define __flush_tlb()                                             \
    8.69 +    do {                                                          \
    8.70 +        unsigned long cr3 = read_cr3();                           \
    8.71 +        write_cr3(cr3);                                           \
    8.72 +    } while ( 0 )
    8.73 +
    8.74  #ifndef CONFIG_SMP
    8.75  
    8.76  #define flush_tlb()               __flush_tlb()
     9.1 --- a/xen/include/asm-x86/page.h	Wed Oct 27 14:26:56 2004 +0000
     9.2 +++ b/xen/include/asm-x86/page.h	Wed Oct 27 16:20:31 2004 +0000
     9.3 @@ -133,14 +133,6 @@ typedef struct { unsigned long pt_lo; } 
     9.4  extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE];
     9.5  extern void paging_init(void);
     9.6  
     9.7 -#define __flush_tlb()                                             \
     9.8 -    do {                                                          \
     9.9 -        __asm__ __volatile__ (                                    \
    9.10 -            "mov %%cr3, %%"__OP"ax; mov %%"__OP"ax, %%cr3"        \
    9.11 -            : : : "memory", __OP"ax" );                           \
    9.12 -        tlb_clocktick();                                          \
    9.13 -    } while ( 0 )
    9.14 -
    9.15  /* Flush global pages as well. */
    9.16  
    9.17  #define __pge_off()                                                     \
    10.1 --- a/xen/include/asm-x86/processor.h	Wed Oct 27 14:26:56 2004 +0000
    10.2 +++ b/xen/include/asm-x86/processor.h	Wed Oct 27 16:20:31 2004 +0000
    10.3 @@ -404,7 +404,7 @@ static inline void write_ptbase(struct m
    10.4      else
    10.5          pa = pagetable_val(mm->pagetable);
    10.6  
    10.7 -    __asm__ __volatile__ ( "mov"__OS" %0, %%cr3" : : "r" (pa) : "memory" );
    10.8 +    write_cr3(pa);
    10.9  }
   10.10  
   10.11  #define IDLE0_MM                                                    \