ia64/xen-unstable

changeset 11808:0c18c6009448

[IA64] TLB tracking

Add tlb insert tracking to flush finer grained virtual address
range when a page is unmapped from a domain.
This functionality is enabled with a compile time option,
xen_ia64_tlb_track(default is y) and xen_ia64_tlb_track_cnt(default is n).

This patch forcuses on grant table mapping.
When page is unmapped, full vTLB flush is necessary.
By tracking tlb insert on grant mapped page, full vTLB flush
can be avoided.
Especially when vbd backend does only DMA, so dom0 doesn't insert tlb entry
on the grant mapped page. In such case any vTLB flush isn't needed.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author awilliam@xenbuild.aw
date Sat Oct 14 17:42:00 2006 -0600 (2006-10-14)
parents 435e2275ea62
children 685586518b2e
files xen/arch/ia64/Rules.mk xen/arch/ia64/xen/Makefile xen/arch/ia64/xen/domain.c xen/arch/ia64/xen/faults.c xen/arch/ia64/xen/mm.c xen/arch/ia64/xen/tlb_track.c xen/arch/ia64/xen/vcpu.c xen/arch/ia64/xen/vhpt.c xen/include/asm-ia64/domain.h xen/include/asm-ia64/linux-xen/asm/pgtable.h xen/include/asm-ia64/p2m_entry.h xen/include/asm-ia64/perfc_defn.h xen/include/asm-ia64/tlb_track.h xen/include/asm-ia64/tlbflush.h xen/include/asm-ia64/vcpu.h xen/include/asm-ia64/vcpumask.h xen/include/asm-ia64/vhpt.h xen/include/public/arch-ia64.h
line diff
     1.1 --- a/xen/arch/ia64/Rules.mk	Sat Oct 14 16:42:15 2006 -0600
     1.2 +++ b/xen/arch/ia64/Rules.mk	Sat Oct 14 17:42:00 2006 -0600
     1.3 @@ -7,6 +7,8 @@ VALIDATE_VT	?= n
     1.4  no_warns ?= n
     1.5  xen_ia64_expose_p2m	?= y
     1.6  xen_ia64_pervcpu_vhpt	?= y
     1.7 +xen_ia64_tlb_track	?= y
     1.8 +xen_ia64_tlb_track_cnt	?= n
     1.9  
    1.10  ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
    1.11  CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
    1.12 @@ -44,6 +46,12 @@ endif
    1.13  ifeq ($(xen_ia64_pervcpu_vhpt),y)
    1.14  CFLAGS	+= -DCONFIG_XEN_IA64_PERVCPU_VHPT
    1.15  endif
    1.16 +ifeq ($(xen_ia64_tlb_track),y)
    1.17 +CFLAGS	+= -DCONFIG_XEN_IA64_TLB_TRACK
    1.18 +endif
    1.19 +ifeq ($(xen_ia64_tlb_track_cnt),y)
    1.20 +CFLAGS	+= -DCONFIG_TLB_TRACK_CNT
    1.21 +endif
    1.22  ifeq ($(no_warns),y)
    1.23  CFLAGS	+= -Wa,--fatal-warnings -Werror -Wno-uninitialized
    1.24  endif
     2.1 --- a/xen/arch/ia64/xen/Makefile	Sat Oct 14 16:42:15 2006 -0600
     2.2 +++ b/xen/arch/ia64/xen/Makefile	Sat Oct 14 17:42:00 2006 -0600
     2.3 @@ -29,3 +29,4 @@ obj-y += xenpatch.o
     2.4  obj-y += xencomm.o
     2.5  
     2.6  obj-$(crash_debug) += gdbstub.o
     2.7 +obj-$(xen_ia64_tlb_track) += tlb_track.o
     3.1 --- a/xen/arch/ia64/xen/domain.c	Sat Oct 14 16:42:15 2006 -0600
     3.2 +++ b/xen/arch/ia64/xen/domain.c	Sat Oct 14 17:42:00 2006 -0600
     3.3 @@ -47,6 +47,7 @@
     3.4  #include <asm/dom_fw.h>
     3.5  #include <asm/shadow.h>
     3.6  #include <xen/guest_access.h>
     3.7 +#include <asm/tlb_track.h>
     3.8  
     3.9  unsigned long dom0_size = 512*1024*1024;
    3.10  unsigned long dom0_align = 64*1024*1024;
    3.11 @@ -390,6 +391,8 @@ int arch_domain_create(struct domain *d)
    3.12  	DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n",
    3.13  	        __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt);
    3.14  #endif
    3.15 +	if (tlb_track_create(d) < 0)
    3.16 +		goto fail_nomem1;
    3.17  	d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT));
    3.18  	if (d->shared_info == NULL)
    3.19  	    goto fail_nomem;
    3.20 @@ -418,6 +421,8 @@ int arch_domain_create(struct domain *d)
    3.21  	return 0;
    3.22  
    3.23  fail_nomem:
    3.24 +	tlb_track_destroy(d);
    3.25 +fail_nomem1:
    3.26  	if (d->arch.mm.pgd != NULL)
    3.27  	    pgd_free(d->arch.mm.pgd);
    3.28  	if (d->shared_info != NULL)
    3.29 @@ -433,6 +438,8 @@ void arch_domain_destroy(struct domain *
    3.30  	if (d->arch.shadow_bitmap != NULL)
    3.31  		xfree(d->arch.shadow_bitmap);
    3.32  
    3.33 +	tlb_track_destroy(d);
    3.34 +
    3.35  	/* Clear vTLB for the next domain.  */
    3.36  	domain_flush_tlb_vhpt(d);
    3.37  
     4.1 --- a/xen/arch/ia64/xen/faults.c	Sat Oct 14 16:42:15 2006 -0600
     4.2 +++ b/xen/arch/ia64/xen/faults.c	Sat Oct 14 17:42:00 2006 -0600
     4.3 @@ -31,6 +31,7 @@
     4.4  #include <asm/asm-xsi-offsets.h>
     4.5  #include <asm/shadow.h>
     4.6  #include <asm/uaccess.h>
     4.7 +#include <asm/p2m_entry.h>
     4.8  
     4.9  extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
    4.10  /* FIXME: where these declarations shold be there ? */
    4.11 @@ -202,8 +203,11 @@ void ia64_do_page_fault (unsigned long a
    4.12  	fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha);
    4.13  	if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) {
    4.14  		struct p2m_entry entry;
    4.15 -		pteval = translate_domain_pte(pteval, address, itir, &logps, &entry);
    4.16 -		vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,logps);
    4.17 +		unsigned long m_pteval;
    4.18 +		m_pteval = translate_domain_pte(pteval, address, itir,
    4.19 +		                                &logps, &entry);
    4.20 +		vcpu_itc_no_srlz(current, (is_data? 2: 1) | 4, 
    4.21 +		                 address, m_pteval, pteval, logps, &entry);
    4.22  		if ((fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) ||
    4.23  		    p2m_entry_retry(&entry)) {
    4.24  			/* dtlb has been purged in-between.  This dtlb was
     5.1 --- a/xen/arch/ia64/xen/mm.c	Sat Oct 14 16:42:15 2006 -0600
     5.2 +++ b/xen/arch/ia64/xen/mm.c	Sat Oct 14 17:42:00 2006 -0600
     5.3 @@ -172,13 +172,15 @@
     5.4  #include <asm/vhpt.h>
     5.5  #include <asm/vcpu.h>
     5.6  #include <asm/shadow.h>
     5.7 +#include <asm/p2m_entry.h>
     5.8 +#include <asm/tlb_track.h>
     5.9  #include <linux/efi.h>
    5.10  #include <xen/guest_access.h>
    5.11  #include <asm/page.h>
    5.12  #include <public/memory.h>
    5.13  
    5.14  static void domain_page_flush(struct domain* d, unsigned long mpaddr,
    5.15 -                              unsigned long old_mfn, unsigned long new_mfn);
    5.16 +                              volatile pte_t* ptep, pte_t old_pte);
    5.17  
    5.18  extern unsigned long ia64_iobase;
    5.19  
    5.20 @@ -798,12 +800,15 @@ flags_to_prot (unsigned long flags)
    5.21  
    5.22      res |= flags & ASSIGN_readonly ? _PAGE_AR_R: _PAGE_AR_RWX;
    5.23      res |= flags & ASSIGN_nocache ? _PAGE_MA_UC: _PAGE_MA_WB;
    5.24 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
    5.25 +    res |= flags & ASSIGN_tlb_track ? _PAGE_TLB_TRACKING: 0;
    5.26 +#endif
    5.27      
    5.28      return res;
    5.29  }
    5.30  
    5.31  /* map a physical address to the specified metaphysical addr */
    5.32 -// flags: currently only ASSIGN_readonly, ASSIGN_nocache
    5.33 +// flags: currently only ASSIGN_readonly, ASSIGN_nocache, ASSIGN_tlb_tack
    5.34  // This is called by assign_domain_mmio_page().
    5.35  // So accessing to pte is racy.
    5.36  int
    5.37 @@ -1034,7 +1039,7 @@ assign_domain_mach_page(struct domain *d
    5.38  // caller must call set_gpfn_from_mfn() before call if necessary.
    5.39  // because set_gpfn_from_mfn() result must be visible before pte xchg
    5.40  // caller must use memory barrier. NOTE: xchg has acquire semantics.
    5.41 -// flags: currently only ASSIGN_readonly
    5.42 +// flags: ASSIGN_xxx
    5.43  static void
    5.44  assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
    5.45                             unsigned long mfn, unsigned long flags)
    5.46 @@ -1068,7 +1073,7 @@ assign_domain_page_replace(struct domain
    5.47                  set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
    5.48              }
    5.49  
    5.50 -            domain_page_flush(d, mpaddr, old_mfn, mfn);
    5.51 +            domain_page_flush(d, mpaddr, pte, old_pte);
    5.52  
    5.53              try_to_clear_PGC_allocate(d, old_page);
    5.54              put_page(old_page);
    5.55 @@ -1088,7 +1093,7 @@ assign_domain_page_cmpxchg_rel(struct do
    5.56      struct mm_struct *mm = &d->arch.mm;
    5.57      volatile pte_t* pte;
    5.58      unsigned long old_mfn;
    5.59 -    unsigned long old_arflags;
    5.60 +    unsigned long old_prot;
    5.61      pte_t old_pte;
    5.62      unsigned long new_mfn;
    5.63      unsigned long new_prot;
    5.64 @@ -1098,12 +1103,12 @@ assign_domain_page_cmpxchg_rel(struct do
    5.65      pte = lookup_alloc_domain_pte(d, mpaddr);
    5.66  
    5.67   again:
    5.68 -    old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;
    5.69 +    old_prot = pte_val(*pte) & ~_PAGE_PPN_MASK;
    5.70      old_mfn = page_to_mfn(old_page);
    5.71 -    old_pte = pfn_pte(old_mfn, __pgprot(old_arflags));
    5.72 +    old_pte = pfn_pte(old_mfn, __pgprot(old_prot));
    5.73      if (!pte_present(old_pte)) {
    5.74 -        DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx\n",
    5.75 -                __func__, pte_val(old_pte), old_arflags, old_mfn);
    5.76 +        DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx\n",
    5.77 +                __func__, pte_val(old_pte), old_prot, old_mfn);
    5.78          return -EINVAL;
    5.79      }
    5.80  
    5.81 @@ -1118,10 +1123,10 @@ assign_domain_page_cmpxchg_rel(struct do
    5.82              goto again;
    5.83          }
    5.84  
    5.85 -        DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx "
    5.86 +        DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx "
    5.87                  "ret_pte 0x%lx ret_mfn 0x%lx\n",
    5.88                  __func__,
    5.89 -                pte_val(old_pte), old_arflags, old_mfn,
    5.90 +                pte_val(old_pte), old_prot, old_mfn,
    5.91                  pte_val(ret_pte), pte_pfn(ret_pte));
    5.92          return -EINVAL;
    5.93      }
    5.94 @@ -1133,7 +1138,7 @@ assign_domain_page_cmpxchg_rel(struct do
    5.95  
    5.96      set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
    5.97  
    5.98 -    domain_page_flush(d, mpaddr, old_mfn, new_mfn);
    5.99 +    domain_page_flush(d, mpaddr, pte, old_pte);
   5.100      put_page(old_page);
   5.101      perfc_incrc(assign_domain_pge_cmpxchg_rel);
   5.102      return 0;
   5.103 @@ -1202,7 +1207,7 @@ zap_domain_page_one(struct domain *d, un
   5.104          set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
   5.105      }
   5.106  
   5.107 -    domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
   5.108 +    domain_page_flush(d, mpaddr, pte, old_pte);
   5.109  
   5.110      if (page_get_owner(page) != NULL) {
   5.111          try_to_clear_PGC_allocate(d, page);
   5.112 @@ -1417,8 +1422,12 @@ create_grant_host_mapping(unsigned long 
   5.113      BUG_ON(ret == 0);
   5.114      BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
   5.115             get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
   5.116 -    assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)?
   5.117 -                                              ASSIGN_readonly: ASSIGN_writable);
   5.118 +    assign_domain_page_replace(d, gpaddr, mfn,
   5.119 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
   5.120 +                               ASSIGN_tlb_track |
   5.121 +#endif
   5.122 +                               ((flags & GNTMAP_readonly) ?
   5.123 +                                ASSIGN_readonly : ASSIGN_writable));
   5.124      perfc_incrc(create_grant_host_mapping);
   5.125      return GNTST_okay;
   5.126  }
   5.127 @@ -1473,7 +1482,7 @@ destroy_grant_host_mapping(unsigned long
   5.128      }
   5.129      BUG_ON(pte_pfn(old_pte) != mfn);
   5.130  
   5.131 -    domain_page_flush(d, gpaddr, mfn, INVALID_MFN);
   5.132 +    domain_page_flush(d, gpaddr, pte, old_pte);
   5.133  
   5.134      page = mfn_to_page(mfn);
   5.135      BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed.
   5.136 @@ -1645,12 +1654,43 @@ guest_physmap_remove_page(struct domain 
   5.137  //    flush finer range.
   5.138  static void
   5.139  domain_page_flush(struct domain* d, unsigned long mpaddr,
   5.140 -                  unsigned long old_mfn, unsigned long new_mfn)
   5.141 +                  volatile pte_t* ptep, pte_t old_pte)
   5.142  {
   5.143 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
   5.144 +    struct tlb_track_entry* entry;
   5.145 +#endif
   5.146 +
   5.147      if (shadow_mode_enabled(d))
   5.148          shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT);
   5.149  
   5.150 +#ifndef CONFIG_XEN_IA64_TLB_TRACK
   5.151      domain_flush_vtlb_all();
   5.152 +#else
   5.153 +    switch (tlb_track_search_and_remove(d->arch.tlb_track,
   5.154 +                                        ptep, old_pte, &entry)) {
   5.155 +    case TLB_TRACK_NOT_TRACKED:
   5.156 +        // DPRINTK("%s TLB_TRACK_NOT_TRACKED\n", __func__);
   5.157 +        domain_flush_vtlb_all();
   5.158 +        break;
   5.159 +    case TLB_TRACK_NOT_FOUND:
   5.160 +        /* do nothing */
   5.161 +        // DPRINTK("%s TLB_TRACK_NOT_FOUND\n", __func__);
   5.162 +        break;
   5.163 +    case TLB_TRACK_FOUND:
   5.164 +        // DPRINTK("%s TLB_TRACK_FOUND\n", __func__);
   5.165 +        domain_flush_vtlb_track_entry(d, entry);
   5.166 +        tlb_track_free_entry(d->arch.tlb_track, entry);
   5.167 +        break;
   5.168 +    case TLB_TRACK_MANY:
   5.169 +        DPRINTK("%s TLB_TRACK_MANY\n", __func__);
   5.170 +        domain_flush_vtlb_all();
   5.171 +        break;
   5.172 +    case TLB_TRACK_AGAIN:
   5.173 +        DPRINTK("%s TLB_TRACK_AGAIN\n", __func__);
   5.174 +        BUG();
   5.175 +        break;
   5.176 +    }
   5.177 +#endif
   5.178      perfc_incrc(domain_page_flush);
   5.179  }
   5.180  
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/xen/arch/ia64/xen/tlb_track.c	Sat Oct 14 17:42:00 2006 -0600
     6.3 @@ -0,0 +1,506 @@
     6.4 +/******************************************************************************
     6.5 + * tlb_track.c
     6.6 + *
     6.7 + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
     6.8 + *                    VA Linux Systems Japan K.K.
     6.9 + *
    6.10 + * This program is free software; you can redistribute it and/or modify
    6.11 + * it under the terms of the GNU General Public License as published by
    6.12 + * the Free Software Foundation; either version 2 of the License, or
    6.13 + * (at your option) any later version.
    6.14 + *
    6.15 + * This program is distributed in the hope that it will be useful,
    6.16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    6.17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    6.18 + * GNU General Public License for more details.
    6.19 + *
    6.20 + * You should have received a copy of the GNU General Public License
    6.21 + * along with this program; if not, write to the Free Software
    6.22 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    6.23 + *
    6.24 + */
    6.25 +
    6.26 +#include <asm/tlb_track.h>
    6.27 +#include <asm/p2m_entry.h>
    6.28 +#include <asm/vmx_mm_def.h>  /* for IA64_RR_SHIFT */
    6.29 +#include <asm/vmx_vcpu.h>    /* for VRN7 */
    6.30 +#include <asm/vcpu.h>        /* for PSCB() */
    6.31 +
    6.32 +#define CONFIG_TLB_TRACK_DEBUG
    6.33 +#ifdef CONFIG_TLB_TRACK_DEBUG
    6.34 +# define tlb_track_printd(fmt, ...)     \
    6.35 +    printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
    6.36 +#else
    6.37 +# define tlb_track_printd(fmt, ...)     do { } while (0)
    6.38 +#endif
    6.39 +
    6.40 +static int
    6.41 +tlb_track_allocate_entries(struct tlb_track* tlb_track)
    6.42 +{
    6.43 +    struct page_info* entry_page;
    6.44 +    struct tlb_track_entry* track_entries;
    6.45 +    unsigned int allocated;
    6.46 +    unsigned long i;
    6.47 +
    6.48 +    BUG_ON(tlb_track->num_free > 0);
    6.49 +    if (tlb_track->num_entries >= tlb_track->limit) {
    6.50 +        DPRINTK("%s: num_entries %d limit %d\n",
    6.51 +                __func__, tlb_track->num_entries, tlb_track->limit);
    6.52 +        return -ENOMEM;
    6.53 +    }
    6.54 +    entry_page = alloc_domheap_page(NULL);
    6.55 +    if (entry_page == NULL) {
    6.56 +        DPRINTK("%s: domheap page failed. num_entries %d limit %d\n",
    6.57 +                __func__, tlb_track->num_entries, tlb_track->limit);
    6.58 +        return -ENOMEM;
    6.59 +    }
    6.60 +
    6.61 +    list_add(&entry_page->list, &tlb_track->page_list);
    6.62 +    track_entries = (struct tlb_track_entry*)page_to_virt(entry_page);
    6.63 +    allocated = PAGE_SIZE / sizeof(track_entries[0]);
    6.64 +    tlb_track->num_entries += allocated;
    6.65 +    tlb_track->num_free += allocated;
    6.66 +    for (i = 0; i < allocated; i++) {
    6.67 +        list_add(&track_entries[i].list, &tlb_track->free_list);
    6.68 +        // tlb_track_printd("track_entries[%ld] 0x%p\n", i, &track_entries[i]);
    6.69 +    }
    6.70 +    tlb_track_printd("allocated %d num_entries %d num_free %d\n",
    6.71 +                     allocated, tlb_track->num_entries, tlb_track->num_free);
    6.72 +    return 0;
    6.73 +}
    6.74 +
    6.75 +
    6.76 +int
    6.77 +tlb_track_create(struct domain* d)
    6.78 +{
    6.79 +    struct tlb_track* tlb_track = NULL;
    6.80 +    struct page_info* hash_page = NULL;
    6.81 +    unsigned int hash_size;
    6.82 +    unsigned int hash_shift;
    6.83 +    unsigned int i;
    6.84 +
    6.85 +    tlb_track = xmalloc(struct tlb_track);
    6.86 +    if (tlb_track == NULL)
    6.87 +        goto out;
    6.88 +
    6.89 +    hash_page = alloc_domheap_page(NULL);
    6.90 +    if (hash_page == NULL)
    6.91 +        goto out;
    6.92 +
    6.93 +    spin_lock_init(&tlb_track->free_list_lock);
    6.94 +    INIT_LIST_HEAD(&tlb_track->free_list);
    6.95 +    tlb_track->limit = TLB_TRACK_LIMIT_ENTRIES;
    6.96 +    tlb_track->num_entries = 0;
    6.97 +    tlb_track->num_free = 0;
    6.98 +    INIT_LIST_HEAD(&tlb_track->page_list);
    6.99 +    if (tlb_track_allocate_entries(tlb_track) < 0)
   6.100 +        goto out;
   6.101 +
   6.102 +    spin_lock_init(&tlb_track->hash_lock);
   6.103 +    /* XXX hash size optimization */
   6.104 +    hash_size = PAGE_SIZE / sizeof(tlb_track->hash[0]);
   6.105 +    for (hash_shift = 0; (1 << (hash_shift + 1)) < hash_size; hash_shift++)
   6.106 +        /* nothing */;
   6.107 +    tlb_track->hash_size = (1 << hash_shift);
   6.108 +    tlb_track->hash_shift = hash_shift;
   6.109 +    tlb_track->hash_mask = (1 << hash_shift) - 1;
   6.110 +    tlb_track->hash = page_to_virt(hash_page);
   6.111 +    for (i = 0; i < tlb_track->hash_size; i++)
   6.112 +        INIT_LIST_HEAD(&tlb_track->hash[i]);
   6.113 +
   6.114 +    smp_mb(); /* make initialization visible before use. */
   6.115 +    d->arch.tlb_track = tlb_track;
   6.116 +    printk("%s:%d hash 0x%p hash_size %d \n",
   6.117 +           __func__, __LINE__, tlb_track->hash, tlb_track->hash_size);
   6.118 +
   6.119 +    return 0;
   6.120 +
   6.121 +out:
   6.122 +    if (hash_page != NULL)
   6.123 +        free_domheap_page(hash_page);
   6.124 +
   6.125 +    if (tlb_track != NULL)
   6.126 +        xfree(tlb_track);
   6.127 +
   6.128 +    return -ENOMEM;
   6.129 +}
   6.130 +
   6.131 +void
   6.132 +tlb_track_destroy(struct domain* d)
   6.133 +{
   6.134 +    struct tlb_track* tlb_track = d->arch.tlb_track;
   6.135 +    struct page_info* page;
   6.136 +    struct page_info* next;
   6.137 +
   6.138 +    spin_lock(&tlb_track->free_list_lock);
   6.139 +    BUG_ON(tlb_track->num_free != tlb_track->num_entries);
   6.140 +
   6.141 +    list_for_each_entry_safe(page, next, &tlb_track->page_list, list) {
   6.142 +        list_del(&page->list);
   6.143 +        free_domheap_page(page);
   6.144 +    }
   6.145 +
   6.146 +    free_domheap_page(virt_to_page(tlb_track->hash));
   6.147 +    xfree(tlb_track);
   6.148 +    // d->tlb_track = NULL;
   6.149 +}
   6.150 +
   6.151 +static struct tlb_track_entry*
   6.152 +tlb_track_get_entry(struct tlb_track* tlb_track)
   6.153 +{
   6.154 +    struct tlb_track_entry* entry = NULL;
   6.155 +    spin_lock(&tlb_track->free_list_lock);
   6.156 +    if (tlb_track->num_free == 0)
   6.157 +        (void)tlb_track_allocate_entries(tlb_track);
   6.158 +
   6.159 +    if (tlb_track->num_free > 0) {
   6.160 +        BUG_ON(list_empty(&tlb_track->free_list));
   6.161 +        entry = list_entry(tlb_track->free_list.next,
   6.162 +                           struct tlb_track_entry, list);
   6.163 +        tlb_track->num_free--;
   6.164 +        list_del(&entry->list);
   6.165 +    }
   6.166 +    spin_unlock(&tlb_track->free_list_lock);
   6.167 +    return entry;
   6.168 +}
   6.169 +
   6.170 +void
   6.171 +tlb_track_free_entry(struct tlb_track* tlb_track,
   6.172 +                     struct tlb_track_entry* entry)
   6.173 +{
   6.174 +    spin_lock(&tlb_track->free_list_lock);
   6.175 +    list_add(&entry->list, &tlb_track->free_list);
   6.176 +    tlb_track->num_free++;
   6.177 +    spin_unlock(&tlb_track->free_list_lock);
   6.178 +}
   6.179 +
   6.180 +
   6.181 +#include <linux/hash.h>
   6.182 +/* XXX hash function. */
   6.183 +static struct list_head*
   6.184 +tlb_track_hash_head(struct tlb_track* tlb_track, volatile pte_t* ptep)
   6.185 +{
   6.186 +    unsigned long hash = hash_long((unsigned long)ptep, tlb_track->hash_shift);
   6.187 +    BUG_ON(hash >= tlb_track->hash_size);
   6.188 +    BUG_ON((hash & tlb_track->hash_mask) != hash);
   6.189 +    return &tlb_track->hash[hash];
   6.190 +}
   6.191 +
   6.192 +static int
   6.193 +tlb_track_pte_zapped(pte_t old_pte, pte_t ret_pte)
   6.194 +{
   6.195 +    if (pte_pfn(old_pte) != pte_pfn(ret_pte) ||
   6.196 +        (pte_val(old_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK)) !=
   6.197 +        (pte_val(ret_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK))) {
   6.198 +        /* Other thread zapped the p2m entry. */
   6.199 +        return 1;
   6.200 +    }
   6.201 +    return 0;
   6.202 +}
   6.203 +
   6.204 +static TLB_TRACK_RET_T
   6.205 +tlb_track_insert_or_dirty(struct tlb_track* tlb_track, struct mm_struct* mm,
   6.206 +                          volatile pte_t* ptep, pte_t old_pte,
   6.207 +                          unsigned long vaddr, unsigned long rid)
   6.208 +{
   6.209 +    unsigned long mfn = pte_pfn(old_pte);
   6.210 +    struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
   6.211 +    struct tlb_track_entry* entry;
   6.212 +    struct tlb_track_entry* new_entry = NULL;
   6.213 +    unsigned long bit_to_be_set = _PAGE_TLB_INSERTED;
   6.214 +    pte_t new_pte;
   6.215 +    pte_t ret_pte;
   6.216 +
   6.217 +    struct vcpu* v = current;
   6.218 +    TLB_TRACK_RET_T ret = TLB_TRACK_NOT_FOUND;
   6.219 +
   6.220 +#if 0 /* this is done at vcpu_tlb_track_insert_or_dirty() */
   6.221 +    perfc_incrc(tlb_track_iod);
   6.222 +    if (!pte_tlb_tracking(old_pte)) {
   6.223 +        perfc_incrc(tlb_track_iod_not_tracked);
   6.224 +        return TLB_TRACK_NOT_TRACKED;
   6.225 +    }
   6.226 +#endif
   6.227 +    if (pte_tlb_inserted_many(old_pte)) {
   6.228 +        perfc_incrc(tlb_track_iod_tracked_many);
   6.229 +        return TLB_TRACK_MANY;
   6.230 +    }
   6.231 +
   6.232 +    /* vaddr must be normalized so that it is in vrn7 and page aligned. */
   6.233 +    BUG_ON((vaddr >> IA64_RR_SHIFT) != VRN7);
   6.234 +    BUG_ON((vaddr & ~PAGE_MASK) != 0);
   6.235 +#if 0
   6.236 +    tlb_track_printd("\n"
   6.237 +                     "\tmfn 0x%016lx\n"
   6.238 +                     "\told_pte 0x%016lx ptep 0x%p\n"
   6.239 +                     "\tptep_val 0x%016lx vaddr 0x%016lx rid %ld\n"
   6.240 +                     "\ttlb_track 0x%p head 0x%p\n",
   6.241 +                     mfn,
   6.242 +                     pte_val(old_pte), ptep, pte_val(*ptep),
   6.243 +                     vaddr, rid,
   6.244 +                     tlb_track, head);
   6.245 +#endif
   6.246 +
   6.247 + again:
   6.248 +    /*
   6.249 +     * zapping side may zap the p2m entry and then remove tlb track entry
   6.250 +     * non-atomically. We may see the stale tlb track entry here.
   6.251 +     * p2m_entry_retry() handles such a case.
   6.252 +     * Or other thread may zap the p2m entry and remove tlb track entry
   6.253 +     * and inserted new tlb track entry.
   6.254 +     */
   6.255 +    spin_lock(&tlb_track->hash_lock);
   6.256 +    list_for_each_entry(entry, head, list) {
   6.257 +        if (entry->ptep != ptep)
   6.258 +            continue;
   6.259 +
   6.260 +        if (pte_pfn(entry->pte_val) == mfn) {
   6.261 +            // tlb_track_entry_printf(entry);
   6.262 +            if (entry->vaddr == vaddr && entry->rid == rid) {
   6.263 +                // tlb_track_printd("TLB_TRACK_FOUND\n");
   6.264 +                ret = TLB_TRACK_FOUND;
   6.265 +                perfc_incrc(tlb_track_iod_found);
   6.266 +#ifdef CONFIG_TLB_TRACK_CNT
   6.267 +                entry->cnt++;
   6.268 +                if (entry->cnt > TLB_TRACK_CNT_FORCE_MANY) {
   6.269 +                    /*
   6.270 +                     * heuristics:
   6.271 +                     * If a page is used to transfer data by dev channel,
   6.272 +                     * it would be unmapped with small amount access
   6.273 +                     * (once or twice tlb insert) after real device
   6.274 +                     * I/O completion. It would be short period.
   6.275 +                     * However this page seems to be accessed many times.
   6.276 +                     * We guess that this page is used I/O ring
   6.277 +                     * so that tracking this entry might be useless.
   6.278 +                     */
   6.279 +                     // tlb_track_entry_printf(entry);
   6.280 +                     // tlb_track_printd("cnt = %ld\n", entry->cnt);
   6.281 +                    perfc_incrc(tlb_track_iod_force_many);
   6.282 +                    goto force_many;
   6.283 +                }
   6.284 +#endif
   6.285 +                goto found;
   6.286 +            } else {
   6.287 +#ifdef CONFIG_TLB_TRACK_CNT
   6.288 +            force_many:
   6.289 +#endif
   6.290 +                if (!pte_tlb_inserted(old_pte)) {
   6.291 +                    printk("%s:%d racy update\n", __func__, __LINE__);
   6.292 +                    old_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED);
   6.293 +                }
   6.294 +                new_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED_MANY);
   6.295 +                ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
   6.296 +                if (pte_val(ret_pte) != pte_val(old_pte)) {
   6.297 +                    // tlb_track_printd("TLB_TRACK_AGAIN\n");
   6.298 +                    ret = TLB_TRACK_AGAIN;
   6.299 +                    perfc_incrc(tlb_track_iod_again);
   6.300 +                } else {
   6.301 +                    // tlb_track_printd("TLB_TRACK_MANY del entry 0x%p\n",
   6.302 +                    //                  entry);
   6.303 +                    ret = TLB_TRACK_MANY;
   6.304 +                    list_del(&entry->list);
   6.305 +                    // tlb_track_entry_printf(entry);
   6.306 +                    perfc_incrc(tlb_track_iod_tracked_many_del);
   6.307 +                }
   6.308 +                goto out;
   6.309 +            }
   6.310 +        }
   6.311 +
   6.312 +        /*
   6.313 +         * Other thread changed the p2m entry and removed and inserted new
   6.314 +         * tlb tracn entry after we get old_pte, but before we get
   6.315 +         * spinlock.
   6.316 +         */
   6.317 +        // tlb_track_printd("TLB_TRACK_AGAIN\n");
   6.318 +        ret = TLB_TRACK_AGAIN;
   6.319 +        perfc_incrc(tlb_track_iod_again);
   6.320 +        goto out;
   6.321 +    }
   6.322 +
   6.323 +    entry = NULL; // prevent freeing entry.
   6.324 +    if (pte_tlb_inserted(old_pte)) {
   6.325 +        /* Other thread else removed the tlb_track_entry after we got old_pte
   6.326 +           before we got spin lock. */
   6.327 +        ret = TLB_TRACK_AGAIN;
   6.328 +        perfc_incrc(tlb_track_iod_again);
   6.329 +        goto out;
   6.330 +    }
   6.331 +    if (new_entry == NULL && bit_to_be_set == _PAGE_TLB_INSERTED) {
   6.332 +        spin_unlock(&tlb_track->hash_lock);
   6.333 +        new_entry = tlb_track_get_entry(tlb_track);
   6.334 +        if (new_entry == NULL) {
   6.335 +            tlb_track_printd("get_entry failed\n");
   6.336 +            /* entry can't be allocated.
   6.337 +               fall down into full flush mode. */
   6.338 +            bit_to_be_set |= _PAGE_TLB_INSERTED_MANY;
   6.339 +            perfc_incrc(tlb_track_iod_new_failed);
   6.340 +        }
   6.341 +        // tlb_track_printd("new_entry 0x%p\n", new_entry);
   6.342 +        perfc_incrc(tlb_track_iod_new_entry);
   6.343 +        goto again;
   6.344 +    }
   6.345 +
   6.346 +    BUG_ON(pte_tlb_inserted_many(old_pte));
   6.347 +    new_pte = __pte(pte_val(old_pte) | bit_to_be_set);
   6.348 +    ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
   6.349 +    if (pte_val(old_pte) != pte_val(ret_pte)) {
   6.350 +        if (tlb_track_pte_zapped(old_pte, ret_pte)) {
   6.351 +            // tlb_track_printd("zapped TLB_TRACK_AGAIN\n");
   6.352 +            ret = TLB_TRACK_AGAIN;
   6.353 +            perfc_incrc(tlb_track_iod_again);
   6.354 +            goto out;
   6.355 +        }
   6.356 +
   6.357 +        /* Other thread set _PAGE_TLB_INSERTED and/or _PAGE_TLB_INSERTED_MANY */
   6.358 +        if (pte_tlb_inserted_many(ret_pte)) {
   6.359 +            /* Other thread already set _PAGE_TLB_INSERTED_MANY and
   6.360 +               removed the entry. */
   6.361 +            // tlb_track_printd("iserted TLB_TRACK_MANY\n");
   6.362 +            BUG_ON(!pte_tlb_inserted(ret_pte));
   6.363 +            ret = TLB_TRACK_MANY;
   6.364 +            perfc_incrc(tlb_track_iod_new_many);
   6.365 +            goto out;
   6.366 +        }
   6.367 +        BUG_ON(pte_tlb_inserted(ret_pte));
   6.368 +        BUG();
   6.369 +    }
   6.370 +    if (new_entry) {
   6.371 +        // tlb_track_printd("iserting new_entry 0x%p\n", new_entry);
   6.372 +        entry = new_entry;
   6.373 +        new_entry = NULL;
   6.374 +
   6.375 +        entry->ptep = ptep;
   6.376 +        entry->pte_val = old_pte;
   6.377 +        entry->vaddr = vaddr;
   6.378 +        entry->rid = rid;
   6.379 +        cpus_clear(entry->pcpu_dirty_mask);
   6.380 +        vcpus_clear(entry->vcpu_dirty_mask);
   6.381 +        list_add(&entry->list, head);
   6.382 +
   6.383 +#ifdef CONFIG_TLB_TRACK_CNT
   6.384 +        entry->cnt = 0;
   6.385 +#endif
   6.386 +        perfc_incrc(tlb_track_iod_insert);
   6.387 +        // tlb_track_entry_printf(entry);
   6.388 +    } else {
   6.389 +        goto out;
   6.390 +    }
   6.391 +
   6.392 + found:
   6.393 +    BUG_ON(v->processor >= NR_CPUS);
   6.394 +    cpu_set(v->processor, entry->pcpu_dirty_mask);
   6.395 +    BUG_ON(v->vcpu_id >= NR_CPUS);
   6.396 +    vcpu_set(v->vcpu_id, entry->vcpu_dirty_mask);
   6.397 +    perfc_incrc(tlb_track_iod_dirtied);
   6.398 +
   6.399 + out:
   6.400 +    spin_unlock(&tlb_track->hash_lock);
   6.401 +    if (ret == TLB_TRACK_MANY && entry != NULL)
   6.402 +        tlb_track_free_entry(tlb_track, entry);
   6.403 +    if (new_entry != NULL)
   6.404 +        tlb_track_free_entry(tlb_track, new_entry);
   6.405 +    return ret;
   6.406 +}
   6.407 +
   6.408 +void
   6.409 +__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
   6.410 +                                 struct p2m_entry* entry)
   6.411 +{
   6.412 +    unsigned long vrn = vaddr >> IA64_RR_SHIFT;
   6.413 +    unsigned long rid = PSCB(vcpu, rrs[vrn]);
   6.414 +    TLB_TRACK_RET_T ret;
   6.415 +
   6.416 +    /* normalize vrn7
   6.417 +       When linux dom0 case, vrn7 is the most common case. */
   6.418 +    vaddr |= VRN7 << VRN_SHIFT;
   6.419 +    vaddr &= PAGE_MASK;
   6.420 +    ret = tlb_track_insert_or_dirty(vcpu->domain->arch.tlb_track,
   6.421 +                                    &vcpu->domain->arch.mm,
   6.422 +                                    entry->ptep, entry->used,
   6.423 +                                    vaddr, rid);
   6.424 +    if (ret == TLB_TRACK_AGAIN)
   6.425 +        p2m_entry_set_retry(entry);
   6.426 +}
   6.427 +
   6.428 +TLB_TRACK_RET_T
   6.429 +tlb_track_search_and_remove(struct tlb_track* tlb_track,
   6.430 +                            volatile pte_t* ptep, pte_t old_pte,
   6.431 +                            struct tlb_track_entry** entryp)
   6.432 +{
   6.433 +    unsigned long mfn = pte_pfn(old_pte);
   6.434 +    struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
   6.435 +    struct tlb_track_entry* entry;
   6.436 +
   6.437 +    perfc_incrc(tlb_track_sar);
   6.438 +    if (!pte_tlb_tracking(old_pte)) {
   6.439 +        perfc_incrc(tlb_track_sar_not_tracked);
   6.440 +        return TLB_TRACK_NOT_TRACKED;
   6.441 +    }
   6.442 +    if (!pte_tlb_inserted(old_pte)) {
   6.443 +        BUG_ON(pte_tlb_inserted_many(old_pte));
   6.444 +        perfc_incrc(tlb_track_sar_not_found);
   6.445 +        return TLB_TRACK_NOT_FOUND;
   6.446 +    }
   6.447 +    if (pte_tlb_inserted_many(old_pte)) {
   6.448 +        BUG_ON(!pte_tlb_inserted(old_pte));
   6.449 +        perfc_incrc(tlb_track_sar_many);
   6.450 +        return TLB_TRACK_MANY;
   6.451 +    }
   6.452 +
   6.453 +    spin_lock(&tlb_track->hash_lock);
   6.454 +    list_for_each_entry(entry, head, list) {
   6.455 +        if (entry->ptep != ptep)
   6.456 +            continue;
   6.457 +
   6.458 +        if (pte_pfn(entry->pte_val) == mfn) {
   6.459 +            list_del(&entry->list);
   6.460 +            spin_unlock(&tlb_track->hash_lock);
   6.461 +            *entryp = entry;
   6.462 +            perfc_incrc(tlb_track_sar_found);
   6.463 +            // tlb_track_entry_printf(entry);
   6.464 +#ifdef CONFIG_TLB_TRACK_CNT
   6.465 +            // tlb_track_printd("cnt = %ld\n", entry->cnt);
   6.466 +#endif
   6.467 +            return TLB_TRACK_FOUND;
   6.468 +        }
   6.469 +        BUG();
   6.470 +    }
   6.471 +    BUG();
   6.472 +    spin_unlock(&tlb_track->hash_lock);
   6.473 +    return TLB_TRACK_NOT_TRACKED;
   6.474 +}
   6.475 +
   6.476 +/* for debug */
   6.477 +void
   6.478 +__tlb_track_entry_printf(const char* func, int line,
   6.479 +                         const struct tlb_track_entry* entry)
   6.480 +{
   6.481 +    char pcpumask_buf[NR_CPUS + 1];
   6.482 +    char vcpumask_buf[MAX_VIRT_CPUS + 1];
   6.483 +    cpumask_scnprintf(pcpumask_buf, sizeof(pcpumask_buf),
   6.484 +                      entry->pcpu_dirty_mask);
   6.485 +    vcpumask_scnprintf(vcpumask_buf, sizeof(vcpumask_buf),
   6.486 +                       entry->vcpu_dirty_mask);
   6.487 +    printk("%s:%d\n"
   6.488 +           "\tmfn 0x%016lx\n"
   6.489 +           "\told_pte 0x%016lx ptep 0x%p\n"
   6.490 +           "\tpte_val 0x%016lx vaddr 0x%016lx rid %ld\n"
   6.491 +           "\tpcpu_dirty_mask %s vcpu_dirty_mask %s\n"
   6.492 +           "\tentry 0x%p\n",
   6.493 +           func, line,
   6.494 +           pte_pfn(entry->pte_val),
   6.495 +           pte_val(entry->pte_val), entry->ptep, pte_val(*entry->ptep),
   6.496 +           entry->vaddr, entry->rid,
   6.497 +           pcpumask_buf, vcpumask_buf,
   6.498 +           entry);
   6.499 +}
   6.500 +
   6.501 +/*
   6.502 + * Local variables:
   6.503 + * mode: C
   6.504 + * c-set-style: "BSD"
   6.505 + * c-basic-offset: 4
   6.506 + * tab-width: 4
   6.507 + * indent-tabs-mode: nil
   6.508 + * End:
   6.509 + */
     7.1 --- a/xen/arch/ia64/xen/vcpu.c	Sat Oct 14 16:42:15 2006 -0600
     7.2 +++ b/xen/arch/ia64/xen/vcpu.c	Sat Oct 14 17:42:00 2006 -0600
     7.3 @@ -24,6 +24,8 @@
     7.4  #include <asm/bundle.h>
     7.5  #include <asm/privop_stat.h>
     7.6  #include <asm/uaccess.h>
     7.7 +#include <asm/p2m_entry.h>
     7.8 +#include <asm/tlb_track.h>
     7.9  
    7.10  /* FIXME: where these declarations should be there ? */
    7.11  extern void getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs);
    7.12 @@ -2007,7 +2009,9 @@ IA64FAULT vcpu_set_dtr(VCPU *vcpu, u64 s
    7.13   VCPU translation cache access routines
    7.14  **************************************************************************/
    7.15  
    7.16 -void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps)
    7.17 +void
    7.18 +vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte,
    7.19 +                 UINT64 mp_pte, UINT64 logps, struct p2m_entry* entry)
    7.20  {
    7.21  	unsigned long psr;
    7.22  	unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT;
    7.23 @@ -2020,6 +2024,7 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64
    7.24   			      "smaller page size!\n");
    7.25  
    7.26  	BUG_ON(logps > PAGE_SHIFT);
    7.27 +	vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
    7.28  	psr = ia64_clear_ic();
    7.29  	ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings
    7.30  	ia64_set_psr(psr);
    7.31 @@ -2037,7 +2042,7 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64
    7.32  	// PAGE_SIZE mapping in the vhpt for now, else purging is complicated
    7.33  	else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2);
    7.34  #endif
    7.35 -	if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB
    7.36 +	if (IorD & 0x4) /* don't place in 1-entry TLB */
    7.37  		return;
    7.38  	if (IorD & 0x1) {
    7.39  		vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr);
    7.40 @@ -2062,7 +2067,7 @@ again:
    7.41  	pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry);
    7.42  	if (!pteval) return IA64_ILLOP_FAULT;
    7.43  	if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0]));
    7.44 -	vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps);
    7.45 +	vcpu_itc_no_srlz(vcpu, 2, ifa, pteval, pte, logps, &entry);
    7.46  	if (swap_rr0) set_metaphysical_rr0();
    7.47  	if (p2m_entry_retry(&entry)) {
    7.48  		vcpu_flush_tlb_vhpt_range(ifa, logps);
    7.49 @@ -2085,7 +2090,7 @@ again:
    7.50  	pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry);
    7.51  	if (!pteval) return IA64_ILLOP_FAULT;
    7.52  	if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0]));
    7.53 -	vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps);
    7.54 +	vcpu_itc_no_srlz(vcpu, 1, ifa, pteval, pte, logps, &entry);
    7.55  	if (swap_rr0) set_metaphysical_rr0();
    7.56  	if (p2m_entry_retry(&entry)) {
    7.57  		vcpu_flush_tlb_vhpt_range(ifa, logps);
     8.1 --- a/xen/arch/ia64/xen/vhpt.c	Sat Oct 14 16:42:15 2006 -0600
     8.2 +++ b/xen/arch/ia64/xen/vhpt.c	Sat Oct 14 17:42:00 2006 -0600
     8.3 @@ -18,6 +18,7 @@
     8.4  #include <asm/page.h>
     8.5  #include <asm/vhpt.h>
     8.6  #include <asm/vcpu.h>
     8.7 +#include <asm/vcpumask.h>
     8.8  #include <asm/vmmu.h>
     8.9  
    8.10  /* Defined in tlb.c  */
    8.11 @@ -42,12 +43,14 @@ void
    8.12  local_vhpt_flush(void)
    8.13  {
    8.14  	__vhpt_flush(__ia64_per_cpu_var(vhpt_paddr));
    8.15 +	perfc_incrc(local_vhpt_flush);
    8.16  }
    8.17  
    8.18  static void
    8.19  vcpu_vhpt_flush(struct vcpu* v)
    8.20  {
    8.21  	__vhpt_flush(vcpu_vhpt_maddr(v));
    8.22 +	perfc_incrc(vcpu_vhpt_flush);
    8.23  }
    8.24  
    8.25  static void
    8.26 @@ -170,6 +173,39 @@ pervcpu_vhpt_free(struct vcpu *v)
    8.27  }
    8.28  #endif
    8.29  
    8.30 +void
    8.31 +domain_purge_swtc_entries(struct domain *d)
    8.32 +{
    8.33 +	struct vcpu* v;
    8.34 +	for_each_vcpu(d, v) {
    8.35 +		if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
    8.36 +			continue;
    8.37 +
    8.38 +		/* Purge TC entries.
    8.39 +		   FIXME: clear only if match.  */
    8.40 +		vcpu_purge_tr_entry(&PSCBX(v,dtlb));
    8.41 +		vcpu_purge_tr_entry(&PSCBX(v,itlb));
    8.42 +	}
    8.43 +}
    8.44 +
    8.45 +void
    8.46 +domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,
    8.47 +                                          vcpumask_t vcpu_dirty_mask)
    8.48 +{
    8.49 +	int vcpu;
    8.50 +
    8.51 +	for_each_vcpu_mask(vcpu, vcpu_dirty_mask) {
    8.52 +		struct vcpu* v = d->vcpu[vcpu];
    8.53 +		if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
    8.54 +			continue;
    8.55 +
    8.56 +		/* Purge TC entries.
    8.57 +		   FIXME: clear only if match.  */
    8.58 +		vcpu_purge_tr_entry(&PSCBX(v, dtlb));
    8.59 +		vcpu_purge_tr_entry(&PSCBX(v, itlb));
    8.60 +	}
    8.61 +}
    8.62 +
    8.63  // SMP: we can't assume v == current, vcpu might move to another physical cpu.
    8.64  // So memory barrier is necessary.
    8.65  // if we can guranttee that vcpu can run on only this physical cpu
    8.66 @@ -292,15 +328,7 @@ void domain_flush_vtlb_range (struct dom
    8.67  	}
    8.68  #endif
    8.69  
    8.70 -	for_each_vcpu (d, v) {
    8.71 -		if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
    8.72 -			continue;
    8.73 -
    8.74 -		/* Purge TC entries.
    8.75 -		   FIXME: clear only if match.  */
    8.76 -		vcpu_purge_tr_entry(&PSCBX(v,dtlb));
    8.77 -		vcpu_purge_tr_entry(&PSCBX(v,itlb));
    8.78 -	}
    8.79 +	domain_purge_swtc_entries(d);
    8.80  	smp_mb();
    8.81  
    8.82  	for_each_vcpu (d, v) {
    8.83 @@ -327,6 +355,83 @@ void domain_flush_vtlb_range (struct dom
    8.84  	perfc_incrc(domain_flush_vtlb_range);
    8.85  }
    8.86  
    8.87 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
    8.88 +#include <asm/tlb_track.h>
    8.89 +#include <asm/vmx_vcpu.h>
    8.90 +void
    8.91 +__domain_flush_vtlb_track_entry(struct domain* d,
    8.92 +                                const struct tlb_track_entry* entry)
    8.93 +{
    8.94 +	unsigned long rr7_rid;
    8.95 +	int swap_rr0 = 0;
    8.96 +	unsigned long old_rid;
    8.97 +	unsigned long vaddr = entry->vaddr;
    8.98 +	struct vcpu* v;
    8.99 +	int cpu;
   8.100 +	int vcpu;
   8.101 +
   8.102 +	BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
   8.103 +	/*
   8.104 +	 * heuristic:
   8.105 +	 * dom0linux accesses grant mapped pages via the kernel
   8.106 +	 * straight mapped area and it doesn't change rr7 rid. 
   8.107 +	 * So it is likey that rr7 == entry->rid so that
   8.108 +	 * we can avoid rid change.
   8.109 +	 * When blktap is supported, this heuristic should be revised.
   8.110 +	 */
   8.111 +	vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid);
   8.112 +	if (likely(rr7_rid == entry->rid)) {
   8.113 +		perfc_incrc(tlb_track_use_rr7);
   8.114 +	} else {
   8.115 +		swap_rr0 = 1;
   8.116 +		vaddr = (vaddr << 3) >> 3;// force vrn0
   8.117 +		perfc_incrc(tlb_track_swap_rr0);
   8.118 +	}
   8.119 +
   8.120 +	// tlb_track_entry_printf(entry);
   8.121 +	if (swap_rr0) {
   8.122 +		vcpu_get_rr(current, 0, &old_rid);
   8.123 +		vcpu_set_rr(current, 0, entry->rid);
   8.124 +	}
   8.125 +    
   8.126 +	if (HAS_PERVCPU_VHPT(d)) {
   8.127 +		for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) {
   8.128 +			v = d->vcpu[vcpu];
   8.129 +			if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
   8.130 +				continue;
   8.131 +
   8.132 +			/* Invalidate VHPT entries.  */
   8.133 +			vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE);
   8.134 +		}
   8.135 +	} else {
   8.136 +		for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
   8.137 +			/* Invalidate VHPT entries.  */
   8.138 +			cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE);
   8.139 +		}
   8.140 +	}
   8.141 +	/* ptc.ga has release semantics. */
   8.142 +
   8.143 +	/* ptc.ga  */
   8.144 +	ia64_global_tlb_purge(vaddr, vaddr + PAGE_SIZE, PAGE_SHIFT);
   8.145 +
   8.146 +	if (swap_rr0) {
   8.147 +		vcpu_set_rr(current, 0, old_rid);
   8.148 +	}
   8.149 +	perfc_incrc(domain_flush_vtlb_track_entry);
   8.150 +}
   8.151 +
   8.152 +void
   8.153 +domain_flush_vtlb_track_entry(struct domain* d,
   8.154 +                              const struct tlb_track_entry* entry)
   8.155 +{
   8.156 +	domain_purge_swtc_entries_vcpu_dirty_mask(d, entry->vcpu_dirty_mask);
   8.157 +	smp_mb();
   8.158 +
   8.159 +	__domain_flush_vtlb_track_entry(d, entry);
   8.160 +}
   8.161 +
   8.162 +#endif
   8.163 +
   8.164  static void flush_tlb_vhpt_all (struct domain *d)
   8.165  {
   8.166  	/* First VHPT.  */
     9.1 --- a/xen/include/asm-ia64/domain.h	Sat Oct 14 16:42:15 2006 -0600
     9.2 +++ b/xen/include/asm-ia64/domain.h	Sat Oct 14 17:42:00 2006 -0600
     9.3 @@ -13,28 +13,10 @@
     9.4  #include <asm/fpswa.h>
     9.5  #include <xen/rangeset.h>
     9.6  
     9.7 -struct p2m_entry {
     9.8 -    volatile pte_t*     pte;
     9.9 -    pte_t               used;
    9.10 -};
    9.11 -
    9.12 -static inline void
    9.13 -p2m_entry_set(struct p2m_entry* entry, volatile pte_t* pte, pte_t used)
    9.14 -{
    9.15 -    entry->pte  = pte;
    9.16 -    entry->used = used;
    9.17 -}
    9.18 -
    9.19 -static inline int
    9.20 -p2m_entry_retry(struct p2m_entry* entry)
    9.21 -{
    9.22 -    //XXX see lookup_domain_pte().
    9.23 -    //    NULL is set for invalid gpaddr for the time being.
    9.24 -    if (entry->pte == NULL)
    9.25 -        return 0;
    9.26 -
    9.27 -    return (pte_val(*entry->pte) != pte_val(entry->used));
    9.28 -}
    9.29 +struct p2m_entry;
    9.30 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
    9.31 +struct tlb_track;
    9.32 +#endif
    9.33  
    9.34  extern void domain_relinquish_resources(struct domain *);
    9.35  struct vcpu;
    9.36 @@ -140,6 +122,10 @@ struct arch_domain {
    9.37      struct last_vcpu last_vcpu[NR_CPUS];
    9.38  
    9.39      struct arch_vmx_domain arch_vmx; /* Virtual Machine Extensions */
    9.40 +
    9.41 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
    9.42 +    struct tlb_track*   tlb_track;
    9.43 +#endif
    9.44  };
    9.45  #define INT_ENABLE_OFFSET(v) 		  \
    9.46      (sizeof(vcpu_info_t) * (v)->vcpu_id + \
    10.1 --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h	Sat Oct 14 16:42:15 2006 -0600
    10.2 +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h	Sat Oct 14 17:42:00 2006 -0600
    10.3 @@ -69,6 +69,26 @@
    10.4  #define _PAGE_VIRT_D		(__IA64_UL(1) << 53)	/* Virtual dirty bit */
    10.5  #define _PAGE_PROTNONE		0
    10.6  
    10.7 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
    10.8 +#define _PAGE_TLB_TRACKING_BIT          54
    10.9 +#define _PAGE_TLB_INSERTED_BIT          55
   10.10 +#define _PAGE_TLB_INSERTED_MANY_BIT     56
   10.11 +
   10.12 +#define _PAGE_TLB_TRACKING              (1UL << _PAGE_TLB_TRACKING_BIT)
   10.13 +#define _PAGE_TLB_INSERTED              (1UL << _PAGE_TLB_INSERTED_BIT)
   10.14 +#define _PAGE_TLB_INSERTED_MANY         (1UL << _PAGE_TLB_INSERTED_MANY_BIT)
   10.15 +#define _PAGE_TLB_TRACK_MASK            (_PAGE_TLB_TRACKING |		\
   10.16 +                                         _PAGE_TLB_INSERTED |		\
   10.17 +                                         _PAGE_TLB_INSERTED_MANY)
   10.18 +
   10.19 +#define pte_tlb_tracking(pte)				\
   10.20 +    ((pte_val(pte) & _PAGE_TLB_TRACKING) != 0)
   10.21 +#define pte_tlb_inserted(pte)				\
   10.22 +    ((pte_val(pte) & _PAGE_TLB_INSERTED) != 0)
   10.23 +#define pte_tlb_inserted_many(pte)			\
   10.24 +    ((pte_val(pte) & _PAGE_TLB_INSERTED_MANY) != 0)
   10.25 +#endif // CONFIG_XEN_IA64_TLB_TRACK
   10.26 +
   10.27  /* domVTI */
   10.28  #define GPFN_MEM		(0UL << 60)	/* Guest pfn is normal mem */
   10.29  #define GPFN_FRAME_BUFFER	(1UL << 60)	/* VGA framebuffer */
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/xen/include/asm-ia64/p2m_entry.h	Sat Oct 14 17:42:00 2006 -0600
    11.3 @@ -0,0 +1,76 @@
    11.4 +/******************************************************************************
    11.5 + * p2m_entry.h
    11.6 + *
    11.7 + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
    11.8 + *                    VA Linux Systems Japan K.K.
    11.9 + *
   11.10 + * This program is free software; you can redistribute it and/or modify
   11.11 + * it under the terms of the GNU General Public License as published by
   11.12 + * the Free Software Foundation; either version 2 of the License, or
   11.13 + * (at your option) any later version.
   11.14 + *
   11.15 + * This program is distributed in the hope that it will be useful,
   11.16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   11.17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11.18 + * GNU General Public License for more details.
   11.19 + *
   11.20 + * You should have received a copy of the GNU General Public License
   11.21 + * along with this program; if not, write to the Free Software
   11.22 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   11.23 + *
   11.24 + */
   11.25 +
   11.26 +#ifndef __ASM_P2M_ENTRY_H__
   11.27 +#define __ASM_P2M_ENTRY_H__
   11.28 +
   11.29 +#include <asm/pgtable.h>
   11.30 +
   11.31 +struct p2m_entry {
   11.32 +#define P2M_PTE_ALWAYS_RETRY	((volatile pte_t*) -1)
   11.33 +    volatile pte_t*     ptep;
   11.34 +    pte_t               used;
   11.35 +};
   11.36 +
   11.37 +static inline void
   11.38 +p2m_entry_set(struct p2m_entry* entry, volatile pte_t* ptep, pte_t used)
   11.39 +{
   11.40 +    entry->ptep = ptep;
   11.41 +    entry->used = used;
   11.42 +}
   11.43 +
   11.44 +static inline void
   11.45 +p2m_entry_set_retry(struct p2m_entry* entry)
   11.46 +{
   11.47 +    entry->ptep = P2M_PTE_ALWAYS_RETRY;
   11.48 +}
   11.49 +
   11.50 +static inline int
   11.51 +p2m_entry_retry(struct p2m_entry* entry)
   11.52 +{
   11.53 +    /* XXX see lookup_domain_pte().
   11.54 +       NULL is set for invalid gpaddr for the time being. */
   11.55 +    if (entry->ptep == NULL)
   11.56 +        return 0;
   11.57 +
   11.58 +    if (entry->ptep == P2M_PTE_ALWAYS_RETRY)
   11.59 +        return 1;
   11.60 +
   11.61 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
   11.62 +    return ((pte_val(*entry->ptep) & ~_PAGE_TLB_TRACK_MASK) !=
   11.63 +            (pte_val(entry->used) & ~_PAGE_TLB_TRACK_MASK));
   11.64 +#else
   11.65 +    return (pte_val(*entry->ptep) != pte_val(entry->used));
   11.66 +#endif
   11.67 +}
   11.68 +
   11.69 +#endif // __ASM_P2M_ENTRY_H__
   11.70 +
   11.71 +/*
   11.72 + * Local variables:
   11.73 + * mode: C
   11.74 + * c-set-style: "BSD"
   11.75 + * c-basic-offset: 4
   11.76 + * tab-width: 4
   11.77 + * indent-tabs-mode: nil
   11.78 + * End:
   11.79 + */
    12.1 --- a/xen/include/asm-ia64/perfc_defn.h	Sat Oct 14 16:42:15 2006 -0600
    12.2 +++ b/xen/include/asm-ia64/perfc_defn.h	Sat Oct 14 17:42:00 2006 -0600
    12.3 @@ -109,9 +109,12 @@ PERFPRIVOPADDR(thash)
    12.4  #endif
    12.5  
    12.6  // vhpt.c
    12.7 +PERFCOUNTER_CPU(local_vhpt_flush,               "local_vhpt_flush")
    12.8 +PERFCOUNTER_CPU(vcpu_vhpt_flush,                "vcpu_vhpt_flush")
    12.9  PERFCOUNTER_CPU(vcpu_flush_vtlb_all,            "vcpu_flush_vtlb_all")
   12.10  PERFCOUNTER_CPU(domain_flush_vtlb_all,          "domain_flush_vtlb_all")
   12.11  PERFCOUNTER_CPU(vcpu_flush_tlb_vhpt_range,      "vcpu_flush_tlb_vhpt_range")
   12.12 +PERFCOUNTER_CPU(domain_flush_vtlb_track_entry,  "domain_flush_vtlb_track_entry")
   12.13  PERFCOUNTER_CPU(domain_flush_vtlb_range,        "domain_flush_vtlb_range")
   12.14  
   12.15  // domain.c
   12.16 @@ -134,3 +137,30 @@ PERFCOUNTER_CPU(domain_page_flush,      
   12.17  // dom0vp
   12.18  PERFCOUNTER_CPU(dom0vp_phystomach,              "dom0vp_phystomach")
   12.19  PERFCOUNTER_CPU(dom0vp_machtophys,              "dom0vp_machtophys")
   12.20 +
   12.21 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
   12.22 +// insert or dirty
   12.23 +PERFCOUNTER_CPU(tlb_track_iod,                  "tlb_track_iod")
   12.24 +PERFCOUNTER_CPU(tlb_track_iod_again,            "tlb_track_iod_again")
   12.25 +PERFCOUNTER_CPU(tlb_track_iod_not_tracked,      "tlb_track_iod_not_tracked")
   12.26 +PERFCOUNTER_CPU(tlb_track_iod_force_many,       "tlb_track_iod_force_many")
   12.27 +PERFCOUNTER_CPU(tlb_track_iod_tracked_many,     "tlb_track_iod_tracked_many")
   12.28 +PERFCOUNTER_CPU(tlb_track_iod_tracked_many_del, "tlb_track_iod_tracked_many_del")
   12.29 +PERFCOUNTER_CPU(tlb_track_iod_found,            "tlb_track_iod_found")
   12.30 +PERFCOUNTER_CPU(tlb_track_iod_new_entry,        "tlb_track_iod_new_entry")
   12.31 +PERFCOUNTER_CPU(tlb_track_iod_new_failed,       "tlb_track_iod_new_failed")
   12.32 +PERFCOUNTER_CPU(tlb_track_iod_new_many,         "tlb_track_iod_new_many")
   12.33 +PERFCOUNTER_CPU(tlb_track_iod_insert,           "tlb_track_iod_insert")
   12.34 +PERFCOUNTER_CPU(tlb_track_iod_dirtied,          "tlb_track_iod_dirtied")
   12.35 +
   12.36 +// search and remove
   12.37 +PERFCOUNTER_CPU(tlb_track_sar,                  "tlb_track_sar")
   12.38 +PERFCOUNTER_CPU(tlb_track_sar_not_tracked,      "tlb_track_sar_not_tracked")
   12.39 +PERFCOUNTER_CPU(tlb_track_sar_not_found,        "tlb_track_sar_not_found")
   12.40 +PERFCOUNTER_CPU(tlb_track_sar_found,            "tlb_track_sar_found")
   12.41 +PERFCOUNTER_CPU(tlb_track_sar_many,             "tlb_track_sar_many")
   12.42 +
   12.43 +// flush
   12.44 +PERFCOUNTER_CPU(tlb_track_use_rr7,              "tlb_track_use_rr7")
   12.45 +PERFCOUNTER_CPU(tlb_track_swap_rr0,             "tlb_track_swap_rr0")
   12.46 +#endif
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xen/include/asm-ia64/tlb_track.h	Sat Oct 14 17:42:00 2006 -0600
    13.3 @@ -0,0 +1,152 @@
    13.4 +/******************************************************************************
    13.5 + * tlb_track.h
    13.6 + *
    13.7 + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
    13.8 + *                    VA Linux Systems Japan K.K.
    13.9 + *
   13.10 + * This program is free software; you can redistribute it and/or modify
   13.11 + * it under the terms of the GNU General Public License as published by
   13.12 + * the Free Software Foundation; either version 2 of the License, or
   13.13 + * (at your option) any later version.
   13.14 + *
   13.15 + * This program is distributed in the hope that it will be useful,
   13.16 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   13.17 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13.18 + * GNU General Public License for more details.
   13.19 + *
   13.20 + * You should have received a copy of the GNU General Public License
   13.21 + * along with this program; if not, write to the Free Software
   13.22 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   13.23 + *
   13.24 + */
   13.25 +
   13.26 +#ifndef __TLB_TRACK_H__
   13.27 +#define __TLB_TRACK_H__
   13.28 +
   13.29 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
   13.30 +
   13.31 +#include <xen/sched.h>
   13.32 +#include <xen/perfc.h>
   13.33 +#include <asm/domain.h>
   13.34 +#include <xen/list.h>
   13.35 +#include <asm/p2m_entry.h>
   13.36 +#include <asm/vcpumask.h>
   13.37 +
   13.38 +// TODO: compact this structure.
   13.39 +struct tlb_track_entry {
   13.40 +    struct list_head   list;
   13.41 +
   13.42 +    volatile pte_t*     ptep;           // corresponding p2m entry
   13.43 +
   13.44 +    /* XXX should we use TR_ENTRY? */
   13.45 +    pte_t               pte_val;        // mfn and other flags
   13.46 +                                        // pte_val.p = 1:
   13.47 +                                        //   tlb entry is inserted.
   13.48 +                                        // pte_val.p = 0: 
   13.49 +                                        //   once tlb entry is inserted, so
   13.50 +                                        //   this entry is created. But tlb
   13.51 +                                        //   purge is isseued, so this
   13.52 +                                        //   virtual address need not to be
   13.53 +                                        //   purged.
   13.54 +    unsigned long       vaddr;          // virtual address
   13.55 +    unsigned long       rid;            // rid
   13.56 +
   13.57 +    cpumask_t           pcpu_dirty_mask;
   13.58 +    vcpumask_t          vcpu_dirty_mask;
   13.59 +    // tlbflush_timestamp;
   13.60 +
   13.61 +#ifdef CONFIG_TLB_TRACK_CNT
   13.62 +#define TLB_TRACK_CNT_FORCE_MANY        256 /* XXX how many? */
   13.63 +    unsigned long       cnt;
   13.64 +#endif
   13.65 +};
   13.66 +
   13.67 +struct tlb_track {
   13.68 +
   13.69 +/* see __gnttab_map_grant_ref()
   13.70 +   A domain can map granted-page up to MAPTRACK_MAX_ENTRIES pages. */
   13.71 +#define TLB_TRACK_LIMIT_ENTRIES                                     \
   13.72 +    (MAPTRACK_MAX_ENTRIES * (PAGE_SIZE / sizeof(struct tlb_track)))
   13.73 +
   13.74 +    spinlock_t                  free_list_lock;
   13.75 +    struct list_head            free_list;
   13.76 +    unsigned int                limit;
   13.77 +    unsigned int                num_entries;
   13.78 +    unsigned int                num_free;
   13.79 +    struct list_head            page_list;
   13.80 +
   13.81 +    /* XXX hash table size */
   13.82 +    spinlock_t                  hash_lock;
   13.83 +    unsigned int                hash_size;
   13.84 +    unsigned int                hash_shift;
   13.85 +    unsigned int                hash_mask;
   13.86 +    struct list_head*           hash;
   13.87 +};
   13.88 +
   13.89 +int tlb_track_create(struct domain* d);
   13.90 +void tlb_track_destroy(struct domain* d);
   13.91 +
   13.92 +void tlb_track_free_entry(struct tlb_track* tlb_track,
   13.93 +                          struct tlb_track_entry* entry);
   13.94 +
   13.95 +void
   13.96 +__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
   13.97 +                                 struct p2m_entry* entry);
   13.98 +static inline void
   13.99 +vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
  13.100 +                               struct p2m_entry* entry)
  13.101 +{
  13.102 +    /* optimization.
  13.103 +       non-tracking pte is most common. */
  13.104 +    perfc_incrc(tlb_track_iod);
  13.105 +    if (!pte_tlb_tracking(entry->used)) {
  13.106 +        perfc_incrc(tlb_track_iod_not_tracked);
  13.107 +        return;
  13.108 +    }
  13.109 +
  13.110 +    __vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
  13.111 +}
  13.112 +
  13.113 +
  13.114 +/* return value
  13.115 + * NULL if this entry is used
  13.116 + * entry if this entry isn't used
  13.117 + */
  13.118 +enum TLB_TRACK_RET {
  13.119 +    TLB_TRACK_NOT_TRACKED,
  13.120 +    TLB_TRACK_NOT_FOUND,
  13.121 +    TLB_TRACK_FOUND,
  13.122 +    TLB_TRACK_MANY,
  13.123 +    TLB_TRACK_AGAIN,
  13.124 +};
  13.125 +typedef enum TLB_TRACK_RET TLB_TRACK_RET_T;
  13.126 +
  13.127 +TLB_TRACK_RET_T
  13.128 +tlb_track_search_and_remove(struct tlb_track* tlb_track, 
  13.129 +                            volatile pte_t* ptep, pte_t old_pte, 
  13.130 +                            struct tlb_track_entry** entryp);
  13.131 +
  13.132 +void
  13.133 +__tlb_track_entry_printf(const char* func, int line,
  13.134 +                         const struct tlb_track_entry* entry);
  13.135 +#define tlb_track_entry_printf(entry)                       \
  13.136 +    __tlb_track_entry_printf(__func__, __LINE__, (entry))
  13.137 +#else
  13.138 +
  13.139 +#define tlb_track_create(d)                                (0)
  13.140 +#define tlb_track_destroy(d)                               do { } while (0)
  13.141 +#define vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry) do { } while (0)
  13.142 +
  13.143 +#endif /* CONFIG_XEN_IA64_TLB_TRACK */
  13.144 +
  13.145 +#endif /* __TLB_TRACK_H__ */
  13.146 +
  13.147 +/*
  13.148 + * Local variables:
  13.149 + * mode: C
  13.150 + * c-set-style: "BSD"
  13.151 + * c-basic-offset: 4
  13.152 + * tab-width: 4
  13.153 + * indent-tabs-mode: nil
  13.154 + * End:
  13.155 + */
    14.1 --- a/xen/include/asm-ia64/tlbflush.h	Sat Oct 14 16:42:15 2006 -0600
    14.2 +++ b/xen/include/asm-ia64/tlbflush.h	Sat Oct 14 17:42:00 2006 -0600
    14.3 @@ -22,6 +22,15 @@ void domain_flush_vtlb_all (void);
    14.4  /* Global range-flush of vTLB.  */
    14.5  void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range);
    14.6  
    14.7 +#ifdef CONFIG_XEN_IA64_TLB_TRACK
    14.8 +struct tlb_track_entry;
    14.9 +void __domain_flush_vtlb_track_entry(struct domain* d,
   14.10 +                                     const struct tlb_track_entry* entry);
   14.11 +/* Global entry-flush of vTLB */
   14.12 +void domain_flush_vtlb_track_entry(struct domain* d,
   14.13 +                                   const struct tlb_track_entry* entry);
   14.14 +#endif
   14.15 +
   14.16  /* Flush vhpt and mTLB on every dirty cpus.  */
   14.17  void domain_flush_tlb_vhpt(struct domain *d);
   14.18  
    15.1 --- a/xen/include/asm-ia64/vcpu.h	Sat Oct 14 16:42:15 2006 -0600
    15.2 +++ b/xen/include/asm-ia64/vcpu.h	Sat Oct 14 17:42:00 2006 -0600
    15.3 @@ -161,7 +161,8 @@ extern void vcpu_poke_timer(VCPU *vcpu);
    15.4  extern void vcpu_set_next_timer(VCPU *vcpu);
    15.5  extern BOOLEAN vcpu_timer_expired(VCPU *vcpu);
    15.6  extern UINT64 vcpu_deliverable_interrupts(VCPU *vcpu);
    15.7 -extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64);
    15.8 +struct p2m_entry;
    15.9 +extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64, struct p2m_entry*);
   15.10  extern UINT64 vcpu_get_tmp(VCPU *, UINT64);
   15.11  extern void vcpu_set_tmp(VCPU *, UINT64, UINT64);
   15.12  
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xen/include/asm-ia64/vcpumask.h	Sat Oct 14 17:42:00 2006 -0600
    16.3 @@ -0,0 +1,60 @@
    16.4 +#ifndef __XEN_VCPUMASK_H
    16.5 +#define __XEN_VCPUMASK_H
    16.6 +
    16.7 +/* vcpu mask
    16.8 +   stolen from cpumask.h */
    16.9 +typedef struct { DECLARE_BITMAP(bits, MAX_VIRT_CPUS); } vcpumask_t;
   16.10 +
   16.11 +#define vcpu_set(vcpu, dst) __vcpu_set((vcpu), &(dst))
   16.12 +static inline void __vcpu_set(int vcpu, volatile vcpumask_t *dstp)
   16.13 +{
   16.14 +    set_bit(vcpu, dstp->bits);
   16.15 +}
   16.16 +#define vcpus_clear(dst) __vcpus_clear(&(dst), MAX_VIRT_CPUS)
   16.17 +static inline void __vcpus_clear(vcpumask_t *dstp, int nbits)
   16.18 +{
   16.19 +    bitmap_zero(dstp->bits, nbits);
   16.20 +}
   16.21 +/* No static inline type checking - see Subtlety (1) above. */
   16.22 +#define vcpu_isset(vcpu, vcpumask) test_bit((vcpu), (vcpumask).bits)
   16.23 +
   16.24 +#define first_vcpu(src) __first_vcpu(&(src), MAX_VIRT_CPUS)
   16.25 +static inline int __first_vcpu(const vcpumask_t *srcp, int nbits)
   16.26 +{
   16.27 +    return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
   16.28 +}
   16.29 +
   16.30 +#define next_vcpu(n, src) __next_vcpu((n), &(src), MAX_VIRT_CPUS)
   16.31 +static inline int __next_vcpu(int n, const vcpumask_t *srcp, int nbits)
   16.32 +{
   16.33 +    return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
   16.34 +}
   16.35 +
   16.36 +#if MAX_VIRT_CPUS > 1
   16.37 +#define for_each_vcpu_mask(vcpu, mask)          \
   16.38 +    for ((vcpu) = first_vcpu(mask);             \
   16.39 +         (vcpu) < MAX_VIRT_CPUS;                \
   16.40 +         (vcpu) = next_vcpu((vcpu), (mask)))
   16.41 +#else /* NR_CPUS == 1 */
   16.42 +#define for_each_vcpu_mask(vcpu, mask) for ((vcpu) = 0; (vcpu) < 1; (vcpu)++)
   16.43 +#endif /* NR_CPUS */
   16.44 +
   16.45 +#define vcpumask_scnprintf(buf, len, src) \
   16.46 +        __vcpumask_scnprintf((buf), (len), &(src), MAX_VIRT_CPUS)
   16.47 +static inline int __vcpumask_scnprintf(char *buf, int len,
   16.48 +                                       const vcpumask_t *srcp, int nbits)
   16.49 +{
   16.50 +    return bitmap_scnprintf(buf, len, srcp->bits, nbits);
   16.51 +}
   16.52 +
   16.53 +#endif /* __XEN_VCPUMASK_H */
   16.54 +
   16.55 +/*
   16.56 + * Local variables:
   16.57 + * mode: C
   16.58 + * c-set-style: "BSD"
   16.59 + * c-basic-offset: 4
   16.60 + * tab-width: 4
   16.61 + * indent-tabs-mode: nil
   16.62 + * End:
   16.63 + */
    17.1 --- a/xen/include/asm-ia64/vhpt.h	Sat Oct 14 16:42:15 2006 -0600
    17.2 +++ b/xen/include/asm-ia64/vhpt.h	Sat Oct 14 17:42:00 2006 -0600
    17.3 @@ -18,6 +18,10 @@
    17.4  
    17.5  #ifndef __ASSEMBLY__
    17.6  #include <xen/percpu.h>
    17.7 +#include <asm/vcpumask.h>
    17.8 +
    17.9 +extern void domain_purge_swtc_entries(struct domain *d);
   17.10 +extern void domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d, vcpumask_t vcpu_dirty_mask);
   17.11  
   17.12  //
   17.13  // VHPT Long Format Entry (as recognized by hw)
    18.1 --- a/xen/include/public/arch-ia64.h	Sat Oct 14 16:42:15 2006 -0600
    18.2 +++ b/xen/include/public/arch-ia64.h	Sat Oct 14 17:42:00 2006 -0600
    18.3 @@ -358,6 +358,9 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte
    18.4  /* Internal only: memory attribute must be WC/UC/UCE.  */
    18.5  #define _ASSIGN_nocache                 1
    18.6  #define ASSIGN_nocache                  (1UL << _ASSIGN_nocache)
    18.7 +// tlb tracking
    18.8 +#define _ASSIGN_tlb_track               2
    18.9 +#define ASSIGN_tlb_track                (1UL << _ASSIGN_tlb_track)
   18.10  
   18.11  /* This structure has the same layout of struct ia64_boot_param, defined in
   18.12     <asm/system.h>.  It is redefined here to ease use.  */