ia64/xen-unstable

changeset 679:76b59b68bc65

bitkeeper revision 1.394 (3f3f5bd5J74WkH-WgWI6a5W5aobvaQ)

Many files:
Forced TLB flushes when a domain page changes type is now done more lazily. Fixed a few bugs at the same time, and cleaned up perfctr output.
author kaf24@scramble.cl.cam.ac.uk
date Sun Aug 17 10:41:25 2003 +0000 (2003-08-17)
parents 8ae2c6bc95ea
children 203cbb0ec54e
files xen/arch/i386/boot/boot.S xen/arch/i386/mm.c xen/arch/i386/smp.c xen/arch/i386/traps.c xen/common/dom_mem_ops.c xen/common/domain_page.c xen/common/memory.c xen/common/perfc.c xen/drivers/block/xen_block.c xen/include/asm-i386/pgalloc.h xen/include/xeno/perfc_defn.h
line diff
     1.1 --- a/xen/arch/i386/boot/boot.S	Thu Aug 07 21:09:04 2003 +0000
     1.2 +++ b/xen/arch/i386/boot/boot.S	Sun Aug 17 10:41:25 2003 +0000
     1.3 @@ -14,13 +14,9 @@ ENTRY(start)
     1.4          /* Magic number indicating a Multiboot header. */
     1.5  	.long	0x1BADB002
     1.6  	/* Flags to bootloader (see Multiboot spec). */
     1.7 -	.long	0x00000006
     1.8 +	.long	0x00000002
     1.9  	/* Checksum: must be the negated sum of the first two fields. */
    1.10 -	.long	-0x1BADB008
    1.11 -        /* Unused loader addresses (ELF header has all this already).*/
    1.12 -        .long   0,0,0,0,0
    1.13 -        /* EGA text mode. */
    1.14 -        .long   1,0,0,0
    1.15 +	.long	-0x1BADB004
    1.16          
    1.17  hal_entry:
    1.18          /* Set up a few descriptors: on entry only CS is guaranteed good. */
     2.1 --- a/xen/arch/i386/mm.c	Thu Aug 07 21:09:04 2003 +0000
     2.2 +++ b/xen/arch/i386/mm.c	Sun Aug 17 10:41:25 2003 +0000
     2.3 @@ -241,13 +241,13 @@ long do_set_gdt(unsigned long *frame_lis
     2.4              mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
     2.5          
     2.6          page = frame_table + frames[i];
     2.7 -        page->flags &= ~PG_type_mask;
     2.8 +        page->flags &= ~(PG_type_mask | PG_need_flush);
     2.9          page->flags |= PGT_gdt_page;
    2.10          get_page_type(page);
    2.11          get_page_tot(page);
    2.12      }
    2.13  
    2.14 -    flush_tlb();
    2.15 +    local_flush_tlb();
    2.16  
    2.17      /* Copy over first entries of the new GDT. */
    2.18      memcpy((void *)GDT_VIRT_START, gdt_table, FIRST_DOMAIN_GDT_ENTRY*8);
     3.1 --- a/xen/arch/i386/smp.c	Thu Aug 07 21:09:04 2003 +0000
     3.2 +++ b/xen/arch/i386/smp.c	Sun Aug 17 10:41:25 2003 +0000
     3.3 @@ -352,48 +352,6 @@ void flush_tlb_others(unsigned long cpum
     3.4      spin_unlock(&tlbstate_lock);
     3.5  }
     3.6  	
     3.7 -void flush_tlb_current_task(void)
     3.8 -{
     3.9 -#if 0
    3.10 -    struct mm_struct *mm = &current->mm;
    3.11 -    unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
    3.12 -
    3.13 -    local_flush_tlb();
    3.14 -    if (cpu_mask)
    3.15 -        flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
    3.16 -#endif
    3.17 -}
    3.18 -
    3.19 -void flush_tlb_mm (struct mm_struct * mm)
    3.20 -{
    3.21 -#if 0
    3.22 -    unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
    3.23 -
    3.24 -    if (current->active_mm == mm)
    3.25 -        local_flush_tlb();
    3.26 -    if (cpu_mask)
    3.27 -        flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
    3.28 -#endif
    3.29 -}
    3.30 -
    3.31 -#if 0
    3.32 -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
    3.33 -{
    3.34 -    struct mm_struct *mm = vma->vm_mm;
    3.35 -    unsigned long cpu_mask = mm.cpu_vm_mask & ~(1 << smp_processor_id());
    3.36 -
    3.37 -    if (current->active_mm == mm) {
    3.38 -        if(current->mm)
    3.39 -            __flush_tlb_one(va);
    3.40 -        else
    3.41 -            leave_mm(smp_processor_id());
    3.42 -    }
    3.43 -
    3.44 -    if (cpu_mask)
    3.45 -        flush_tlb_others(cpu_mask, mm, va);
    3.46 -}
    3.47 -#endif
    3.48 -
    3.49  static inline void do_flush_tlb_all_local(void)
    3.50  {
    3.51      unsigned long cpu = smp_processor_id();
     4.1 --- a/xen/arch/i386/traps.c	Thu Aug 07 21:09:04 2003 +0000
     4.2 +++ b/xen/arch/i386/traps.c	Sun Aug 17 10:41:25 2003 +0000
     4.3 @@ -18,6 +18,7 @@
     4.4  #include <xeno/delay.h>
     4.5  #include <xeno/spinlock.h>
     4.6  #include <xeno/irq.h>
     4.7 +#include <xeno/perfc.h>
     4.8  #include <asm/domain_page.h>
     4.9  #include <asm/system.h>
    4.10  #include <asm/io.h>
    4.11 @@ -323,6 +324,13 @@ asmlinkage void do_page_fault(struct pt_
    4.12                  goto unlock_and_bounce_fault;
    4.13          unmap_domain_mem(ldt_page);
    4.14  
    4.15 +        if ( page->flags & PG_need_flush )
    4.16 +        {
    4.17 +            perfc_incrc(need_flush_tlb_flush);
    4.18 +            local_flush_tlb();
    4.19 +            page->flags &= ~PG_need_flush;
    4.20 +        }
    4.21 +
    4.22          page->flags &= ~PG_type_mask;
    4.23          page->flags |= PGT_ldt_page;
    4.24      }
     5.1 --- a/xen/common/dom_mem_ops.c	Thu Aug 07 21:09:04 2003 +0000
     5.2 +++ b/xen/common/dom_mem_ops.c	Sun Aug 17 10:41:25 2003 +0000
     5.3 @@ -11,6 +11,7 @@
     5.4  #include <xeno/lib.h>
     5.5  #include <xeno/mm.h>
     5.6  #include <xeno/dom_mem_ops.h>
     5.7 +#include <xeno/perfc.h>
     5.8  #include <xeno/sched.h>
     5.9  #include <xeno/event.h>
    5.10  #include <asm/domain_page.h>
    5.11 @@ -89,6 +90,7 @@ static long free_dom_mem(struct task_str
    5.12      unsigned long     i;
    5.13      unsigned long     flags;
    5.14      long              rc = 0;
    5.15 +    int               need_flush = 0;
    5.16  
    5.17      spin_lock_irqsave(&free_list_lock, flags);
    5.18      spin_lock(&p->page_lock);
    5.19 @@ -117,6 +119,8 @@ static long free_dom_mem(struct task_str
    5.20              goto out;
    5.21          }
    5.22  
    5.23 +        need_flush |= pf->flags & PG_need_flush;
    5.24 +
    5.25          pf->flags = 0;
    5.26  
    5.27          list_del(&pf->list);
    5.28 @@ -130,6 +134,12 @@ static long free_dom_mem(struct task_str
    5.29      spin_unlock(&p->page_lock);
    5.30      spin_unlock_irqrestore(&free_list_lock, flags);
    5.31      
    5.32 +    if ( need_flush )
    5.33 +    {
    5.34 +        __flush_tlb();
    5.35 +        perfc_incrc(need_flush_tlb_flush);
    5.36 +    }
    5.37 +
    5.38      return rc ? rc : bop.size;
    5.39  }
    5.40      
     6.1 --- a/xen/common/domain_page.c	Thu Aug 07 21:09:04 2003 +0000
     6.2 +++ b/xen/common/domain_page.c	Sun Aug 17 10:41:25 2003 +0000
     6.3 @@ -31,7 +31,7 @@ static void flush_all_ready_maps(void)
     6.4      do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; }
     6.5      while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
     6.6  
     6.7 -    perfc_incr(domain_page_tlb_flush);
     6.8 +    perfc_incrc(domain_page_tlb_flush);
     6.9      local_flush_tlb();
    6.10  }
    6.11  
     7.1 --- a/xen/common/memory.c	Thu Aug 07 21:09:04 2003 +0000
     7.2 +++ b/xen/common/memory.c	Sun Aug 17 10:41:25 2003 +0000
     7.3 @@ -70,7 +70,6 @@
     7.4  
     7.5  /*
     7.6   * THE FOLLOWING ARE ISSUES IF GUEST OPERATING SYSTEMS BECOME SMP-CAPABLE.
     7.7 - * [THAT IS, THEY'RE NOT A PROBLEM NOW, AND MAY NOT EVER BE.]
     7.8   * -----------------------------------------------------------------------
     7.9   * 
    7.10   * *********
    7.11 @@ -83,7 +82,6 @@
    7.12   * than one, we'd probably just flush on all processors running the domain.
    7.13   * *********
    7.14   * 
    7.15 - * ** 1 **
    7.16   * The problem involves creating new page tables which might be mapped 
    7.17   * writeable in the TLB of another processor. As an example, a domain might be 
    7.18   * running in two contexts (ie. on two processors) simultaneously, using the 
    7.19 @@ -109,67 +107,15 @@
    7.20   * FLUSH_NONE, FLUSH_PAGETABLE, FLUSH_DOMAIN. A flush reduces this
    7.21   * to FLUSH_NONE, while squashed write mappings can only promote up
    7.22   * to more aggressive flush types.
    7.23 - * 
    7.24 - * ** 2 **
    7.25 - * Same problem occurs when removing a page table, at level 1 say, then
    7.26 - * making it writeable. Need a TLB flush between otherwise another processor
    7.27 - * might write an illegal mapping into the old table, while yet another
    7.28 - * processor can use the illegal mapping because of a stale level-2 TLB
    7.29 - * entry. So, removal of a table reference sets 'flush_level' appropriately,
    7.30 - * and a flush occurs on next addition of a fresh write mapping.
    7.31 - * 
    7.32 - * BETTER SOLUTION FOR BOTH 1 AND 2:
    7.33 - * When type_refcnt goes to zero, leave old type in place (don't set to
    7.34 - * PGT_none). Then, only flush if making a page table of a page with
    7.35 - * (cnt=0,type=PGT_writeable), or when adding a write mapping for a page
    7.36 - * with (cnt=0, type=PGT_pagexxx). A TLB flush will cause all pages
    7.37 - * with refcnt==0 to be reset to PGT_none. Need an array for the purpose,
    7.38 - * added to when a type_refcnt goes to zero, and emptied on a TLB flush.
    7.39 - * Either have per-domain table, or force TLB flush at end of each
    7.40 - * call to 'process_page_updates'.
    7.41 - * Most OSes will always keep a writeable reference hanging around, and
    7.42 - * page table structure is fairly static, so this mechanism should be
    7.43 - * fairly cheap.
    7.44 - * 
    7.45 - * MAYBE EVEN BETTER? [somewhat dubious: not for first cut of the code]:
    7.46 - * If we need to force an intermediate flush, those other processors
    7.47 - * spin until we complete, then do a single TLB flush. They can spin on
    7.48 - * the lock protecting 'process_page_updates', and continue when that
    7.49 - * is freed. Saves cost of setting up and servicing an IPI: later
    7.50 - * communication is synchronous. Processors trying to install the domain
    7.51 - * or domain&pagetable would also enter the spin.
    7.52 - * 
    7.53 - * ** 3 **
    7.54 - * Indeed, this problem generalises to reusing page tables at different
    7.55 - * levels of the hierarchy (conceptually, the guest OS can use the
    7.56 - * hypervisor to introduce illegal table entries by proxy). Consider
    7.57 - * unlinking a level-1 page table and reintroducing at level 2 with no
    7.58 - * TLB flush. Hypervisor can add a reference to some other level-1 table
    7.59 - * with the RW bit set. This is fine in the level-2 context, but some
    7.60 - * other processor may still be using that table in level-1 context
    7.61 - * (due to a stale TLB entry). At level 1 it may look like the
    7.62 - * processor has write access to the other level-1 page table! Therefore
    7.63 - * can add illegal values there with impunity :-(
    7.64 - * 
    7.65 - * Fortunately, the solution above generalises to this extended problem.
    7.66   */
    7.67  
    7.68 -/*
    7.69 - * UPDATE 12.11.02.: We no longer have struct page and mem_map. These
    7.70 - * have been replaced by struct pfn_info and frame_table respectively.
    7.71 - * 
    7.72 - * system_free_list is a list_head linking all system owned free pages.
    7.73 - * it is initialized in init_frametable.
    7.74 - *
    7.75 - * Boris Dragovic.
    7.76 - */
    7.77 - 
    7.78  #include <xeno/config.h>
    7.79  #include <xeno/init.h>
    7.80  #include <xeno/lib.h>
    7.81  #include <xeno/mm.h>
    7.82  #include <xeno/sched.h>
    7.83  #include <xeno/errno.h>
    7.84 +#include <xeno/perfc.h>
    7.85  #include <asm/page.h>
    7.86  #include <asm/flushtlb.h>
    7.87  #include <asm/io.h>
    7.88 @@ -305,6 +251,13 @@ static int inc_page_refcnt(unsigned long
    7.89              return -1;
    7.90          }
    7.91  
    7.92 +        if ( flags & PG_need_flush )
    7.93 +        {
    7.94 +            flush_tlb[smp_processor_id()] = 1;
    7.95 +            page->flags &= ~PG_need_flush;
    7.96 +            perfc_incrc(need_flush_tlb_flush);
    7.97 +        }
    7.98 +
    7.99          page->flags &= ~PG_type_mask;
   7.100          page->flags |= type;
   7.101      }
   7.102 @@ -540,11 +493,7 @@ static void put_page(unsigned long page_
   7.103              ((page->flags & PG_need_flush) == PG_need_flush)));
   7.104      if ( writeable )
   7.105      {
   7.106 -        if ( put_page_type(page) == 0 )
   7.107 -        {
   7.108 -            flush_tlb[smp_processor_id()] = 1;
   7.109 -            page->flags &= ~PG_need_flush;
   7.110 -        }
   7.111 +        put_page_type(page);
   7.112      }
   7.113      else if ( unlikely(((page->flags & PG_type_mask) == PGT_ldt_page) &&
   7.114                         (page_type_count(page) != 0)) )
     8.1 --- a/xen/common/perfc.c	Thu Aug 07 21:09:04 2003 +0000
     8.2 +++ b/xen/common/perfc.c	Sun Aug 17 10:41:25 2003 +0000
     8.3 @@ -25,7 +25,7 @@ struct perfcounter_t perfcounters;
     8.4  
     8.5  void perfc_printall(u_char key, void *dev_id, struct pt_regs *regs)
     8.6  {
     8.7 -    int i, j;
     8.8 +    int i, j, sum;
     8.9      s_time_t now = NOW();
    8.10      atomic_t *counters = (atomic_t *)&perfcounters;
    8.11  
    8.12 @@ -34,27 +34,27 @@ void perfc_printall(u_char key, void *de
    8.13  
    8.14      for ( i = 0; i < NR_PERFCTRS; i++ ) 
    8.15      {
    8.16 -        printk("%20s  ",  perfc_info[i].name);
    8.17 +        printk("%-32s  ",  perfc_info[i].name);
    8.18          switch ( perfc_info[i].type )
    8.19          {
    8.20          case TYPE_SINGLE:
    8.21 -            printk("%10d  0x%08x",
    8.22 -                   atomic_read(&counters[0]), 
    8.23 -                   atomic_read(&counters[0]));
    8.24 +            printk("TOTAL[%10d]", atomic_read(&counters[0]));
    8.25              counters += 1;
    8.26              break;
    8.27          case TYPE_CPU:
    8.28 +            for ( j = sum = 0; j < smp_num_cpus; j++ )
    8.29 +                sum += atomic_read(&counters[j]);
    8.30 +            printk("TOTAL[%10d]  ", sum);
    8.31              for ( j = 0; j < smp_num_cpus; j++ )
    8.32 -                printk("CPU%02d[%10d 0x%08x]  ",
    8.33 -                       j, atomic_read(&counters[j]), 
    8.34 -                       atomic_read(&counters[j]));
    8.35 +                printk("CPU%02d[%10d]  ", j, atomic_read(&counters[j]));
    8.36              counters += NR_CPUS;
    8.37              break;
    8.38          case TYPE_ARRAY:
    8.39 +            for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
    8.40 +                sum += atomic_read(&counters[j]);
    8.41 +            printk("TOTAL[%10d]  ", sum);
    8.42              for ( j = 0; j < perfc_info[i].nr_elements; j++ )
    8.43 -                printk("ARR%02d[%10d 0x%08x]  ",
    8.44 -                       j, atomic_read(&counters[j]), 
    8.45 -                       atomic_read(&counters[j]));
    8.46 +                printk("ARR%02d[%10d]  ", j, atomic_read(&counters[j]));
    8.47              counters += j;
    8.48              break;
    8.49          }
     9.1 --- a/xen/drivers/block/xen_block.c	Thu Aug 07 21:09:04 2003 +0000
     9.2 +++ b/xen/drivers/block/xen_block.c	Sun Aug 17 10:41:25 2003 +0000
     9.3 @@ -316,8 +316,8 @@ static void __lock_buffer(unsigned long 
     9.4          {
     9.5              if ( page->type_count == 0 )
     9.6              {
     9.7 -                page->flags &= ~(PG_type_mask | PG_need_flush);
     9.8 -                /* NB. This ref alone won't cause a TLB flush. */
     9.9 +                page->flags &= ~PG_type_mask;
    9.10 +                /* No need for PG_need_flush here. */
    9.11                  page->flags |= PGT_writeable_page;
    9.12              }
    9.13              get_page_type(page);
    9.14 @@ -340,13 +340,8 @@ static void unlock_buffer(struct task_st
    9.15            pfn++ )
    9.16      {
    9.17          page = frame_table + pfn;
    9.18 -        if ( writeable_buffer &&
    9.19 -             (put_page_type(page) == 0) &&
    9.20 -             (page->flags & PG_need_flush) )
    9.21 -        {
    9.22 -            __flush_tlb();
    9.23 -            page->flags &= ~PG_need_flush;
    9.24 -        }
    9.25 +        if ( writeable_buffer )
    9.26 +            put_page_type(page);
    9.27          put_page_tot(page);
    9.28      }
    9.29      spin_unlock_irqrestore(&p->page_lock, flags);
    10.1 --- a/xen/include/asm-i386/pgalloc.h	Thu Aug 07 21:09:04 2003 +0000
    10.2 +++ b/xen/include/asm-i386/pgalloc.h	Sun Aug 17 10:41:25 2003 +0000
    10.3 @@ -39,9 +39,6 @@
    10.4   *
    10.5   *  - flush_tlb() flushes the current mm struct TLBs
    10.6   *  - flush_tlb_all() flushes all processes TLBs
    10.7 - *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
    10.8 - *  - flush_tlb_page(vma, vmaddr) flushes one page
    10.9 - *  - flush_tlb_range(mm, start, end) flushes a range of pages
   10.10   *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
   10.11   *
   10.12   * ..but the i386 has somewhat limited tlb flushing capabilities,
   10.13 @@ -50,54 +47,19 @@
   10.14  
   10.15  #ifndef CONFIG_SMP
   10.16  
   10.17 -#define flush_tlb() __flush_tlb()
   10.18 -#define flush_tlb_all() __flush_tlb_all()
   10.19 -#define local_flush_tlb() __flush_tlb()
   10.20 -
   10.21 -static inline void flush_tlb_mm(struct mm_struct *mm)
   10.22 -{
   10.23 -	if (mm == current->active_mm)
   10.24 -		__flush_tlb();
   10.25 -}
   10.26 -
   10.27 -static inline void flush_tlb_cpu(unsigned int cpu)
   10.28 -{
   10.29 -    __flush_tlb();
   10.30 -}
   10.31 -
   10.32 -#if 0
   10.33 -static inline void flush_tlb_page(struct vm_area_struct *vma,
   10.34 -	unsigned long addr)
   10.35 -{
   10.36 -	if (vma->vm_mm == current->active_mm)
   10.37 -		__flush_tlb_one(addr);
   10.38 -}
   10.39 -#endif
   10.40 -
   10.41 -static inline void flush_tlb_range(struct mm_struct *mm,
   10.42 -	unsigned long start, unsigned long end)
   10.43 -{
   10.44 -	if (mm == current->active_mm)
   10.45 -		__flush_tlb();
   10.46 -}
   10.47 +#define flush_tlb()         __flush_tlb()
   10.48 +#define flush_tlb_all()     __flush_tlb_all()
   10.49 +#define local_flush_tlb()   __flush_tlb()
   10.50 +#define flush_tlb_cpu(_cpu) __flush_tlb()
   10.51  
   10.52  #else
   10.53  
   10.54  #include <xeno/smp.h>
   10.55  
   10.56 -#define local_flush_tlb() \
   10.57 -	__flush_tlb()
   10.58 +#define flush_tlb()	    __flush_tlb()
   10.59 +#define local_flush_tlb()   __flush_tlb()
   10.60  
   10.61  extern void flush_tlb_all(void);
   10.62 -extern void flush_tlb_current_task(void);
   10.63 -extern void flush_tlb_mm(struct mm_struct *);
   10.64 -
   10.65 -#define flush_tlb()	flush_tlb_current_task()
   10.66 -
   10.67 -static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end)
   10.68 -{
   10.69 -	flush_tlb_mm(mm);
   10.70 -}
   10.71  
   10.72  extern void flush_tlb_others(unsigned long cpumask);
   10.73  static inline void flush_tlb_cpu(unsigned int cpu)
    11.1 --- a/xen/include/xeno/perfc_defn.h	Thu Aug 07 21:09:04 2003 +0000
    11.2 +++ b/xen/include/xeno/perfc_defn.h	Sun Aug 17 10:41:25 2003 +0000
    11.3 @@ -16,4 +16,5 @@ PERFCOUNTER( net_rx_delivered, "net rx d
    11.4  PERFCOUNTER( net_rx_tlbflush, "net rx tlb flushes" )
    11.5  PERFCOUNTER( net_tx_transmitted, "net tx transmitted" )
    11.6  
    11.7 -PERFCOUNTER( domain_page_tlb_flush, "domain page tlb flushes" )
    11.8 +PERFCOUNTER_CPU( domain_page_tlb_flush, "domain page tlb flushes" )
    11.9 +PERFCOUNTER_CPU( need_flush_tlb_flush, "PG_need_flush tlb flushes" )