ia64/xen-unstable

changeset 14594:96f167771979

xen: Make all performance counter per-cpu, avoiding the need to update
them with atomic (locked) ops.

Conversion here isn't complete in the sense that many places still use
the old per-CPU accessors (which are now redundant). Since the patch
is already rather big, I'd prefer replacing those in a subsequent
patch.

While doing this, I also converted x86's multicall macros to no longer
require inclusion of asm-offsets.h in the respective C file (on IA64
the use of asm-offsets.h in C sources seems more wide spread, hence
there I rather used IA64_ prefixes for the otherwise conflicting
performance counter indices).

On x86, a few counter increments get moved a little, to avoid
duplicate counting of preempted hypercalls.

Also, a few counters are being added.

IA64 changes only compile-tested, hence somebody doing active IA64
work may want to have a close look at those changes.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kfraser@localhost.localdomain
date Tue Mar 27 16:35:37 2007 +0100 (2007-03-27)
parents 3375391fb0c9
children ea0b50ca4999
files xen/arch/ia64/asm-offsets.c xen/arch/ia64/xen/hyperprivop.S xen/arch/ia64/xen/privop_stat.c xen/arch/ia64/xen/vhpt.c xen/arch/x86/mm.c xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_64/compat/entry.S xen/arch/x86/x86_64/entry.S xen/common/multicall.c xen/common/perfc.c xen/include/asm-ia64/linux-xen/asm/asmmacro.h xen/include/asm-ia64/perfc_defn.h xen/include/asm-ia64/privop_stat.h xen/include/asm-x86/multicall.h xen/include/asm-x86/perfc_defn.h xen/include/asm-x86/x86_32/asm_defns.h xen/include/asm-x86/x86_64/asm_defns.h xen/include/xen/perfc.h xen/include/xen/perfc_defn.h
line diff
     1.1 --- a/xen/arch/ia64/asm-offsets.c	Tue Mar 27 16:23:52 2007 +0100
     1.2 +++ b/xen/arch/ia64/asm-offsets.c	Tue Mar 27 16:35:37 2007 +0100
     1.3 @@ -223,10 +223,11 @@ void foo(void)
     1.4  
     1.5  #ifdef PERF_COUNTERS
     1.6  	BLANK();
     1.7 -	DEFINE(RECOVER_TO_PAGE_FAULT_PERFC_OFS, offsetof (struct perfcounter, recover_to_page_fault));
     1.8 -	DEFINE(RECOVER_TO_BREAK_FAULT_PERFC_OFS, offsetof (struct perfcounter, recover_to_break_fault));
     1.9 -	DEFINE(FAST_HYPERPRIVOP_PERFC_OFS, offsetof (struct perfcounter, fast_hyperprivop));
    1.10 -	DEFINE(FAST_REFLECT_PERFC_OFS, offsetof (struct perfcounter, fast_reflect));
    1.11 +	DEFINE(IA64_PERFC_recover_to_page_fault, PERFC_recover_to_page_fault);
    1.12 +	DEFINE(IA64_PERFC_recover_to_break_fault, PERFC_recover_to_break_fault);
    1.13 +	DEFINE(IA64_PERFC_fast_vhpt_translate, PERFC_fast_vhpt_translate);
    1.14 +	DEFINE(IA64_PERFC_fast_hyperprivop, PERFC_fast_hyperprivop);
    1.15 +	DEFINE(IA64_PERFC_fast_reflect, PERFC_fast_reflect);
    1.16  #endif
    1.17  
    1.18  	BLANK();
     2.1 --- a/xen/arch/ia64/xen/hyperprivop.S	Tue Mar 27 16:23:52 2007 +0100
     2.2 +++ b/xen/arch/ia64/xen/hyperprivop.S	Tue Mar 27 16:35:37 2007 +0100
     2.3 @@ -26,8 +26,7 @@
     2.4  # define FAST_HYPERPRIVOPS
     2.5  # ifdef PERF_COUNTERS
     2.6  #  define FAST_HYPERPRIVOP_CNT
     2.7 -#  define FAST_HYPERPRIVOP_PERFC(N) \
     2.8 -	(perfcounters + FAST_HYPERPRIVOP_PERFC_OFS + (4 * N))
     2.9 +#  define FAST_HYPERPRIVOP_PERFC(N) PERFC(fast_hyperprivop + N)
    2.10  #  define FAST_REFLECT_CNT
    2.11  # endif
    2.12  	
    2.13 @@ -364,7 +363,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
    2.14  	mov rp=r29;;
    2.15  	mov cr.itm=r26;;	// ensure next tick
    2.16  #ifdef FAST_REFLECT_CNT
    2.17 -	movl r20=perfcounters+FAST_REFLECT_PERFC_OFS+((0x3000>>8)*4);;
    2.18 +	movl r20=PERFC(fast_reflect + (0x3000>>8));;
    2.19  	ld4 r21=[r20];;
    2.20  	adds r21=1,r21;;
    2.21  	st4 [r20]=r21;;
    2.22 @@ -597,7 +596,7 @@ END(fast_break_reflect)
    2.23  //	r31 == pr
    2.24  ENTRY(fast_reflect)
    2.25  #ifdef FAST_REFLECT_CNT
    2.26 -	movl r22=perfcounters+FAST_REFLECT_PERFC_OFS;
    2.27 +	movl r22=PERFC(fast_reflect);
    2.28  	shr r23=r20,8-2;;
    2.29  	add r22=r22,r23;;
    2.30  	ld4 r21=[r22];;
    2.31 @@ -938,7 +937,7 @@ 1:	// check the guest VHPT
    2.32  (p7)	br.cond.spnt.few page_not_present;;
    2.33  
    2.34  #ifdef FAST_REFLECT_CNT
    2.35 -	movl r21=perfcounter+FAST_VHPT_TRANSLATE_PERFC_OFS;;
    2.36 +	movl r21=PERFC(fast_vhpt_translate);;
    2.37  	ld4 r22=[r21];;
    2.38  	adds r22=1,r22;;
    2.39  	st4 [r21]=r22;;
    2.40 @@ -968,7 +967,7 @@ END(fast_tlb_miss_reflect)
    2.41  // we get here if fast_insert fails (e.g. due to metaphysical lookup)
    2.42  ENTRY(recover_and_page_fault)
    2.43  #ifdef PERF_COUNTERS
    2.44 -	movl r21=perfcounters + RECOVER_TO_PAGE_FAULT_PERFC_OFS;;
    2.45 +	movl r21=PERFC(recover_to_page_fault);;
    2.46  	ld4 r22=[r21];;
    2.47  	adds r22=1,r22;;
    2.48  	st4 [r21]=r22;;
    2.49 @@ -1832,7 +1831,7 @@ END(hyper_ptc_ga)
    2.50  // recovery block for hyper_itc metaphysical memory lookup
    2.51  ENTRY(recover_and_dispatch_break_fault)
    2.52  #ifdef PERF_COUNTERS
    2.53 -	movl r21=perfcounters + RECOVER_TO_BREAK_FAULT_PERFC_OFS;;
    2.54 +	movl r21=PERFC(recover_to_break_fault);;
    2.55  	ld4 r22=[r21];;
    2.56  	adds r22=1,r22;;
    2.57  	st4 [r21]=r22;;
     3.1 --- a/xen/arch/ia64/xen/privop_stat.c	Tue Mar 27 16:23:52 2007 +0100
     3.2 +++ b/xen/arch/ia64/xen/privop_stat.c	Tue Mar 27 16:35:37 2007 +0100
     3.3 @@ -10,48 +10,39 @@ struct privop_addr_count {
     3.4  	unsigned long addr[PRIVOP_COUNT_NADDRS];
     3.5  	unsigned int count[PRIVOP_COUNT_NADDRS];
     3.6  	unsigned int overflow;
     3.7 -	atomic_t *perfc_addr;
     3.8 -	atomic_t *perfc_count;
     3.9 -	atomic_t *perfc_overflow;
    3.10  };
    3.11  
    3.12 -#undef  PERFCOUNTER
    3.13 -#define PERFCOUNTER(var, name)
    3.14 +struct privop_addr_info {
    3.15 +	enum perfcounter perfc_addr;
    3.16 +	enum perfcounter perfc_count;
    3.17 +	enum perfcounter perfc_overflow;
    3.18 +};
    3.19  
    3.20 -#undef  PERFCOUNTER_CPU
    3.21 -#define PERFCOUNTER_CPU(var, name)
    3.22 -
    3.23 -#undef  PERFCOUNTER_ARRAY
    3.24 +#define PERFCOUNTER(var, name)
    3.25  #define PERFCOUNTER_ARRAY(var, name, size)
    3.26  
    3.27 -#undef  PERFSTATUS
    3.28  #define PERFSTATUS(var, name)
    3.29 -
    3.30 -#undef  PERFSTATUS_CPU
    3.31 -#define PERFSTATUS_CPU(var, name)
    3.32 -
    3.33 -#undef  PERFSTATUS_ARRAY
    3.34  #define PERFSTATUS_ARRAY(var, name, size)
    3.35  
    3.36 -#undef PERFPRIVOPADDR
    3.37  #define PERFPRIVOPADDR(name)                        \
    3.38      {                                               \
    3.39 -        { 0 }, { 0 }, 0,                            \
    3.40 -        perfcounters.privop_addr_##name##_addr,     \
    3.41 -        perfcounters.privop_addr_##name##_count,    \
    3.42 -        perfcounters.privop_addr_##name##_overflow  \
    3.43 +        PERFC_privop_addr_##name##_addr,            \
    3.44 +        PERFC_privop_addr_##name##_count,           \
    3.45 +        PERFC_privop_addr_##name##_overflow         \
    3.46      },
    3.47  
    3.48 -static struct privop_addr_count privop_addr_counter[] = {
    3.49 +static const struct privop_addr_info privop_addr_info[] = {
    3.50  #include <asm/perfc_defn.h>
    3.51  };
    3.52  
    3.53  #define PRIVOP_COUNT_NINSTS \
    3.54 -        (sizeof(privop_addr_counter) / sizeof(privop_addr_counter[0]))
    3.55 +        (sizeof(privop_addr_info) / sizeof(privop_addr_info[0]))
    3.56 +
    3.57 +static DEFINE_PER_CPU(struct privop_addr_count[PRIVOP_COUNT_NINSTS], privop_addr_counter);
    3.58  
    3.59  void privop_count_addr(unsigned long iip, enum privop_inst inst)
    3.60  {
    3.61 -	struct privop_addr_count *v = &privop_addr_counter[inst];
    3.62 +	struct privop_addr_count *v = this_cpu(privop_addr_counter) + inst;
    3.63  	int i;
    3.64  
    3.65  	if (inst >= PRIVOP_COUNT_NINSTS)
    3.66 @@ -72,31 +63,44 @@ void privop_count_addr(unsigned long iip
    3.67  
    3.68  void gather_privop_addrs(void)
    3.69  {
    3.70 -	int i, j;
    3.71 -	atomic_t *v;
    3.72 -	for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
    3.73 -		/* Note: addresses are truncated!  */
    3.74 -		v = privop_addr_counter[i].perfc_addr;
    3.75 -		for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
    3.76 -			atomic_set(&v[j], privop_addr_counter[i].addr[j]);
    3.77 +	unsigned int cpu;
    3.78 +
    3.79 +	for_each_cpu ( cpu ) {
    3.80 +		perfc_t *perfcounters = per_cpu(perfcounters, cpu);
    3.81 +		struct privop_addr_count *s = per_cpu(privop_addr_counter, cpu);
    3.82 +		int i, j;
    3.83 +
    3.84 +		for (i = 0; i < PRIVOP_COUNT_NINSTS; i++, s++) {
    3.85 +			perfc_t *d;
    3.86  
    3.87 -		v = privop_addr_counter[i].perfc_count;
    3.88 -		for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
    3.89 -			atomic_set(&v[j], privop_addr_counter[i].count[j]);
    3.90 +			/* Note: addresses are truncated!  */
    3.91 +			d = perfcounters + privop_addr_info[i].perfc_addr;
    3.92 +			for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
    3.93 +				d[j] = s->addr[j];
    3.94 +
    3.95 +			d = perfcounters + privop_addr_info[i].perfc_count;
    3.96 +			for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
    3.97 +				d[j] = s->count[j];
    3.98  		
    3.99 -		atomic_set(privop_addr_counter[i].perfc_overflow,
   3.100 -		           privop_addr_counter[i].overflow);
   3.101 +			perfcounters[privop_addr_info[i].perfc_overflow] =
   3.102 +				s->overflow;
   3.103 +		}
   3.104  	}
   3.105  }
   3.106  
   3.107  void reset_privop_addrs(void)
   3.108  {
   3.109 -	int i, j;
   3.110 -	for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
   3.111 -		struct privop_addr_count *v = &privop_addr_counter[i];
   3.112 -		for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
   3.113 -			v->addr[j] = v->count[j] = 0;
   3.114 -		v->overflow = 0;
   3.115 +	unsigned int cpu;
   3.116 +
   3.117 +	for_each_cpu ( cpu ) {
   3.118 +		struct privop_addr_count *v = per_cpu(privop_addr_counter, cpu);
   3.119 +		int i, j;
   3.120 +
   3.121 +		for (i = 0; i < PRIVOP_COUNT_NINSTS; i++, v++) {
   3.122 +			for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
   3.123 +				v->addr[j] = v->count[j] = 0;
   3.124 +			v->overflow = 0;
   3.125 +		}
   3.126  	}
   3.127  }
   3.128  #endif
     4.1 --- a/xen/arch/ia64/xen/vhpt.c	Tue Mar 27 16:23:52 2007 +0100
     4.2 +++ b/xen/arch/ia64/xen/vhpt.c	Tue Mar 27 16:35:37 2007 +0100
     4.3 @@ -512,7 +512,7 @@ void gather_vhpt_stats(void)
     4.4  		for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
     4.5  			if (!(v->ti_tag & INVALID_TI_TAG))
     4.6  				vhpt_valid++;
     4.7 -		perfc_seta(vhpt_valid_entries, cpu, vhpt_valid);
     4.8 +		per_cpu(perfcounters, cpu)[PERFC_vhpt_valid_entries] = vhpt_valid;
     4.9  	}
    4.10  }
    4.11  #endif
     5.1 --- a/xen/arch/x86/mm.c	Tue Mar 27 16:23:52 2007 +0100
     5.2 +++ b/xen/arch/x86/mm.c	Tue Mar 27 16:35:37 2007 +0100
     5.3 @@ -1969,6 +1969,8 @@ int do_mmuext_op(
     5.4          if ( unlikely(!guest_handle_is_null(pdone)) )
     5.5              (void)copy_from_guest(&done, pdone, 1);
     5.6      }
     5.7 +    else
     5.8 +        perfc_incr(calls_to_mmuext_op);
     5.9  
    5.10      if ( unlikely(!guest_handle_okay(uops, count)) )
    5.11      {
    5.12 @@ -2223,6 +2225,8 @@ int do_mmuext_op(
    5.13  
    5.14      UNLOCK_BIGLOCK(d);
    5.15  
    5.16 +    perfc_add(num_mmuext_ops, i);
    5.17 +
    5.18   out:
    5.19      /* Add incremental work we have done to the @done output parameter. */
    5.20      if ( unlikely(!guest_handle_is_null(pdone)) )
    5.21 @@ -2257,6 +2261,8 @@ int do_mmu_update(
    5.22          if ( unlikely(!guest_handle_is_null(pdone)) )
    5.23              (void)copy_from_guest(&done, pdone, 1);
    5.24      }
    5.25 +    else
    5.26 +        perfc_incr(calls_to_mmu_update);
    5.27  
    5.28      if ( unlikely(!guest_handle_okay(ureqs, count)) )
    5.29      {
    5.30 @@ -2273,9 +2279,6 @@ int do_mmu_update(
    5.31      domain_mmap_cache_init(&mapcache);
    5.32      domain_mmap_cache_init(&sh_mapcache);
    5.33  
    5.34 -    perfc_incrc(calls_to_mmu_update);
    5.35 -    perfc_addc(num_page_updates, count);
    5.36 -
    5.37      LOCK_BIGLOCK(d);
    5.38  
    5.39      for ( i = 0; i < count; i++ )
    5.40 @@ -2438,6 +2441,8 @@ int do_mmu_update(
    5.41      domain_mmap_cache_destroy(&mapcache);
    5.42      domain_mmap_cache_destroy(&sh_mapcache);
    5.43  
    5.44 +    perfc_add(num_page_updates, i);
    5.45 +
    5.46   out:
    5.47      /* Add incremental work we have done to the @done output parameter. */
    5.48      if ( unlikely(!guest_handle_is_null(pdone)) )
     6.1 --- a/xen/arch/x86/x86_32/asm-offsets.c	Tue Mar 27 16:23:52 2007 +0100
     6.2 +++ b/xen/arch/x86/x86_32/asm-offsets.c	Tue Mar 27 16:35:37 2007 +0100
     6.3 @@ -107,21 +107,11 @@ void __dummy__(void)
     6.4      BLANK();
     6.5  
     6.6  #if PERF_COUNTERS
     6.7 -    OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
     6.8 -    OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
     6.9 +    DEFINE(PERFC_hypercalls, PERFC_hypercalls);
    6.10 +    DEFINE(PERFC_exceptions, PERFC_exceptions);
    6.11      BLANK();
    6.12  #endif
    6.13  
    6.14 -    OFFSET(MULTICALL_op, struct multicall_entry, op);
    6.15 -    OFFSET(MULTICALL_arg0, struct multicall_entry, args[0]);
    6.16 -    OFFSET(MULTICALL_arg1, struct multicall_entry, args[1]);
    6.17 -    OFFSET(MULTICALL_arg2, struct multicall_entry, args[2]);
    6.18 -    OFFSET(MULTICALL_arg3, struct multicall_entry, args[3]);
    6.19 -    OFFSET(MULTICALL_arg4, struct multicall_entry, args[4]);
    6.20 -    OFFSET(MULTICALL_arg5, struct multicall_entry, args[5]);
    6.21 -    OFFSET(MULTICALL_result, struct multicall_entry, result);
    6.22 -    BLANK();
    6.23 -
    6.24      DEFINE(FIXMAP_apic_base, fix_to_virt(FIX_APIC_BASE));
    6.25      BLANK();
    6.26  
     7.1 --- a/xen/arch/x86/x86_32/entry.S	Tue Mar 27 16:23:52 2007 +0100
     7.2 +++ b/xen/arch/x86/x86_32/entry.S	Tue Mar 27 16:35:37 2007 +0100
     7.3 @@ -173,7 +173,7 @@ ENTRY(hypercall)
     7.4          GET_CURRENT(%ebx)
     7.5          cmpl  $NR_hypercalls,%eax
     7.6          jae   bad_hypercall
     7.7 -        PERFC_INCR(PERFC_hypercalls, %eax)
     7.8 +        PERFC_INCR(PERFC_hypercalls, %eax, %ebx)
     7.9  #ifndef NDEBUG
    7.10          /* Create shadow parameters and corrupt those not used by this call. */
    7.11          pushl %eax
    7.12 @@ -429,7 +429,7 @@ 1:      xorl  %eax,%eax
    7.13          movl  %esp,%edx
    7.14          pushl %edx                      # push the cpu_user_regs pointer
    7.15          GET_CURRENT(%ebx)
    7.16 -        PERFC_INCR(PERFC_exceptions, %eax)
    7.17 +        PERFC_INCR(PERFC_exceptions, %eax, %ebx)
    7.18          call  *exception_table(,%eax,4)
    7.19          addl  $4,%esp
    7.20          movl  UREGS_eflags(%esp),%eax
     8.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Tue Mar 27 16:23:52 2007 +0100
     8.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Tue Mar 27 16:35:37 2007 +0100
     8.3 @@ -121,30 +121,8 @@ void __dummy__(void)
     8.4      BLANK();
     8.5  
     8.6  #if PERF_COUNTERS
     8.7 -    OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
     8.8 -    OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
     8.9 -    BLANK();
    8.10 -#endif
    8.11 -
    8.12 -    OFFSET(MULTICALL_op, struct multicall_entry, op);
    8.13 -    OFFSET(MULTICALL_arg0, struct multicall_entry, args[0]);
    8.14 -    OFFSET(MULTICALL_arg1, struct multicall_entry, args[1]);
    8.15 -    OFFSET(MULTICALL_arg2, struct multicall_entry, args[2]);
    8.16 -    OFFSET(MULTICALL_arg3, struct multicall_entry, args[3]);
    8.17 -    OFFSET(MULTICALL_arg4, struct multicall_entry, args[4]);
    8.18 -    OFFSET(MULTICALL_arg5, struct multicall_entry, args[5]);
    8.19 -    OFFSET(MULTICALL_result, struct multicall_entry, result);
    8.20 -    BLANK();
    8.21 -
    8.22 -#ifdef CONFIG_COMPAT
    8.23 -    OFFSET(COMPAT_MULTICALL_op, struct compat_multicall_entry, op);
    8.24 -    OFFSET(COMPAT_MULTICALL_arg0, struct compat_multicall_entry, args[0]);
    8.25 -    OFFSET(COMPAT_MULTICALL_arg1, struct compat_multicall_entry, args[1]);
    8.26 -    OFFSET(COMPAT_MULTICALL_arg2, struct compat_multicall_entry, args[2]);
    8.27 -    OFFSET(COMPAT_MULTICALL_arg3, struct compat_multicall_entry, args[3]);
    8.28 -    OFFSET(COMPAT_MULTICALL_arg4, struct compat_multicall_entry, args[4]);
    8.29 -    OFFSET(COMPAT_MULTICALL_arg5, struct compat_multicall_entry, args[5]);
    8.30 -    OFFSET(COMPAT_MULTICALL_result, struct compat_multicall_entry, result);
    8.31 +    DEFINE(PERFC_hypercalls, PERFC_hypercalls);
    8.32 +    DEFINE(PERFC_exceptions, PERFC_exceptions);
    8.33      BLANK();
    8.34  #endif
    8.35  
     9.1 --- a/xen/arch/x86/x86_64/compat/entry.S	Tue Mar 27 16:23:52 2007 +0100
     9.2 +++ b/xen/arch/x86/x86_64/compat/entry.S	Tue Mar 27 16:35:37 2007 +0100
     9.3 @@ -57,7 +57,7 @@ ENTRY(compat_hypercall)
     9.4          movl  UREGS_rbx(%rsp),%edi   /* Arg 1        */
     9.5  #endif
     9.6          leaq  compat_hypercall_table(%rip),%r10
     9.7 -        PERFC_INCR(PERFC_hypercalls, %rax)
     9.8 +        PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
     9.9          callq *(%r10,%rax,8)
    9.10  #ifndef NDEBUG
    9.11          /* Deliberately corrupt parameter regs used by this hypercall. */
    10.1 --- a/xen/arch/x86/x86_64/entry.S	Tue Mar 27 16:23:52 2007 +0100
    10.2 +++ b/xen/arch/x86/x86_64/entry.S	Tue Mar 27 16:35:37 2007 +0100
    10.3 @@ -147,7 +147,7 @@ ENTRY(syscall_enter)
    10.4          pushq UREGS_rip+8(%rsp)
    10.5  #endif
    10.6          leaq  hypercall_table(%rip),%r10
    10.7 -        PERFC_INCR(PERFC_hypercalls, %rax)
    10.8 +        PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
    10.9          callq *(%r10,%rax,8)
   10.10  #ifndef NDEBUG
   10.11          /* Deliberately corrupt parameter regs used by this hypercall. */
   10.12 @@ -396,7 +396,7 @@ 1:      movq  %rsp,%rdi
   10.13          movl  UREGS_entry_vector(%rsp),%eax
   10.14          leaq  exception_table(%rip),%rdx
   10.15          GET_CURRENT(%rbx)
   10.16 -        PERFC_INCR(PERFC_exceptions, %rax)
   10.17 +        PERFC_INCR(PERFC_exceptions, %rax, %rbx)
   10.18          callq *(%rdx,%rax,8)
   10.19          testb $3,UREGS_cs(%rsp)
   10.20          jz    restore_all_xen
    11.1 --- a/xen/common/multicall.c	Tue Mar 27 16:23:52 2007 +0100
    11.2 +++ b/xen/common/multicall.c	Tue Mar 27 16:35:37 2007 +0100
    11.3 @@ -10,6 +10,7 @@
    11.4  #include <xen/event.h>
    11.5  #include <xen/multicall.h>
    11.6  #include <xen/guest_access.h>
    11.7 +#include <xen/perfc.h>
    11.8  #include <asm/current.h>
    11.9  #include <asm/hardirq.h>
   11.10  
   11.11 @@ -69,14 +70,18 @@ do_multicall(
   11.12          guest_handle_add_offset(call_list, 1);
   11.13      }
   11.14  
   11.15 +    perfc_incr(calls_to_multicall);
   11.16 +    perfc_add(calls_from_multicall, nr_calls);
   11.17      mcs->flags = 0;
   11.18      return 0;
   11.19  
   11.20   fault:
   11.21 +    perfc_incr(calls_to_multicall);
   11.22      mcs->flags = 0;
   11.23      return -EFAULT;
   11.24  
   11.25   preempted:
   11.26 +    perfc_add(calls_from_multicall, i);
   11.27      mcs->flags = 0;
   11.28      return hypercall_create_continuation(
   11.29          __HYPERVISOR_multicall, "hi", call_list, nr_calls-i);
    12.1 --- a/xen/common/perfc.c	Tue Mar 27 16:23:52 2007 +0100
    12.2 +++ b/xen/common/perfc.c	Tue Mar 27 16:35:37 2007 +0100
    12.3 @@ -10,81 +10,98 @@
    12.4  #include <public/sysctl.h>
    12.5  #include <asm/perfc.h>
    12.6  
    12.7 -#undef  PERFCOUNTER
    12.8 -#undef  PERFCOUNTER_CPU
    12.9 -#undef  PERFCOUNTER_ARRAY
   12.10 -#undef  PERFSTATUS
   12.11 -#undef  PERFSTATUS_CPU
   12.12 -#undef  PERFSTATUS_ARRAY
   12.13  #define PERFCOUNTER( var, name )              { name, TYPE_SINGLE, 0 },
   12.14 -#define PERFCOUNTER_CPU( var, name )          { name, TYPE_CPU,    0 },
   12.15  #define PERFCOUNTER_ARRAY( var, name, size )  { name, TYPE_ARRAY,  size },
   12.16  #define PERFSTATUS( var, name )               { name, TYPE_S_SINGLE, 0 },
   12.17 -#define PERFSTATUS_CPU( var, name )           { name, TYPE_S_CPU,    0 },
   12.18  #define PERFSTATUS_ARRAY( var, name, size )   { name, TYPE_S_ARRAY,  size },
   12.19 -static struct {
   12.20 -    char *name;
   12.21 -    enum { TYPE_SINGLE, TYPE_CPU, TYPE_ARRAY,
   12.22 -           TYPE_S_SINGLE, TYPE_S_CPU, TYPE_S_ARRAY
   12.23 +static const struct {
   12.24 +    const char *name;
   12.25 +    enum { TYPE_SINGLE, TYPE_ARRAY,
   12.26 +           TYPE_S_SINGLE, TYPE_S_ARRAY
   12.27      } type;
   12.28 -    int nr_elements;
   12.29 +    unsigned int nr_elements;
   12.30  } perfc_info[] = {
   12.31  #include <xen/perfc_defn.h>
   12.32  };
   12.33  
   12.34  #define NR_PERFCTRS (sizeof(perfc_info) / sizeof(perfc_info[0]))
   12.35  
   12.36 -struct perfcounter perfcounters;
   12.37 +DEFINE_PER_CPU(perfc_t[NUM_PERFCOUNTERS], perfcounters);
   12.38  
   12.39  void perfc_printall(unsigned char key)
   12.40  {
   12.41 -    unsigned int i, j, sum;
   12.42 +    unsigned int i, j;
   12.43      s_time_t now = NOW();
   12.44 -    atomic_t *counters = (atomic_t *)&perfcounters;
   12.45  
   12.46      printk("Xen performance counters SHOW  (now = 0x%08X:%08X)\n",
   12.47             (u32)(now>>32), (u32)now);
   12.48  
   12.49 -    for ( i = 0; i < NR_PERFCTRS; i++ ) 
   12.50 +    for ( i = j = 0; i < NR_PERFCTRS; i++ )
   12.51      {
   12.52 +        unsigned int k, cpu;
   12.53 +        unsigned long long sum = 0;
   12.54 +
   12.55          printk("%-32s  ",  perfc_info[i].name);
   12.56          switch ( perfc_info[i].type )
   12.57          {
   12.58          case TYPE_SINGLE:
   12.59          case TYPE_S_SINGLE:
   12.60 -            printk("TOTAL[%10d]", atomic_read(&counters[0]));
   12.61 -            counters += 1;
   12.62 -            break;
   12.63 -        case TYPE_CPU:
   12.64 -        case TYPE_S_CPU:
   12.65 -            sum = 0;
   12.66 -            for_each_online_cpu ( j )
   12.67 -                sum += atomic_read(&counters[j]);
   12.68 -            printk("TOTAL[%10u]", sum);
   12.69 -            if (sum)
   12.70 +            for_each_online_cpu ( cpu )
   12.71 +                sum += per_cpu(perfcounters, cpu)[j];
   12.72 +            printk("TOTAL[%12Lu]", sum);
   12.73 +            if ( sum )
   12.74              {
   12.75 -                for_each_online_cpu ( j )
   12.76 -                    printk("  CPU%02d[%10d]", j, atomic_read(&counters[j]));
   12.77 +                k = 0;
   12.78 +                for_each_online_cpu ( cpu )
   12.79 +                {
   12.80 +                    if ( k > 0 && (k % 4) == 0 )
   12.81 +                        printk("\n%46s", "");
   12.82 +                    printk("  CPU%02u[%10"PRIperfc"u]", cpu, per_cpu(perfcounters, cpu)[j]);
   12.83 +                    ++k;
   12.84 +                }
   12.85              }
   12.86 -            counters += NR_CPUS;
   12.87 +            ++j;
   12.88              break;
   12.89          case TYPE_ARRAY:
   12.90          case TYPE_S_ARRAY:
   12.91 -            for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
   12.92 -                sum += atomic_read(&counters[j]);
   12.93 -            printk("TOTAL[%10u]", sum);
   12.94 -#ifdef PERF_ARRAYS
   12.95 +            for_each_online_cpu ( cpu )
   12.96 +            {
   12.97 +                perfc_t *counters = per_cpu(perfcounters, cpu) + j;
   12.98 +
   12.99 +                for ( k = 0; k < perfc_info[i].nr_elements; k++ )
  12.100 +                    sum += counters[k];
  12.101 +            }
  12.102 +            printk("TOTAL[%12Lu]", sum);
  12.103              if (sum)
  12.104              {
  12.105 -                for ( j = 0; j < perfc_info[i].nr_elements; j++ )
  12.106 +#ifdef PERF_ARRAYS
  12.107 +                for ( k = 0; k < perfc_info[i].nr_elements; k++ )
  12.108 +                {
  12.109 +                    sum = 0;
  12.110 +                    for_each_online_cpu ( cpu )
  12.111 +                        sum += per_cpu(perfcounters, cpu)[j + k];
  12.112 +                    if ( (k % 4) == 0 )
  12.113 +                        printk("\n%16s", "");
  12.114 +                    printk("  ARR%02u[%10Lu]", k, sum);
  12.115 +                }
  12.116 +#else
  12.117 +                k = 0;
  12.118 +                for_each_online_cpu ( cpu )
  12.119                  {
  12.120 -                    if ( (j % 4) == 0 )
  12.121 -                        printk("\n                 ");
  12.122 -                    printk("  ARR%02d[%10d]", j, atomic_read(&counters[j]));
  12.123 +                    perfc_t *counters = per_cpu(perfcounters, cpu) + j;
  12.124 +                    unsigned int n;
  12.125 +
  12.126 +                    sum = 0;
  12.127 +                    for ( n = 0; n < perfc_info[i].nr_elements; n++ )
  12.128 +                        sum += counters[n];
  12.129 +                    if ( k > 0 && (k % 4) == 0 )
  12.130 +                        printk("\n%46s", "");
  12.131 +                    printk("  CPU%02u[%10Lu]", cpu, sum);
  12.132 +                    ++k;
  12.133                  }
  12.134 +#endif
  12.135              }
  12.136 -#endif
  12.137 -            counters += j;
  12.138 +            j += perfc_info[i].nr_elements;
  12.139              break;
  12.140          }
  12.141          printk("\n");
  12.142 @@ -97,7 +114,6 @@ void perfc_reset(unsigned char key)
  12.143  {
  12.144      unsigned int i, j;
  12.145      s_time_t now = NOW();
  12.146 -    atomic_t *counters = (atomic_t *)&perfcounters;
  12.147  
  12.148      if ( key != '\0' )
  12.149          printk("Xen performance counters RESET (now = 0x%08X:%08X)\n",
  12.150 @@ -105,43 +121,39 @@ void perfc_reset(unsigned char key)
  12.151  
  12.152      /* leave STATUS counters alone -- don't reset */
  12.153  
  12.154 -    for ( i = 0; i < NR_PERFCTRS; i++ ) 
  12.155 +    for ( i = j = 0; i < NR_PERFCTRS; i++ )
  12.156      {
  12.157 +        unsigned int cpu;
  12.158 +
  12.159          switch ( perfc_info[i].type )
  12.160          {
  12.161          case TYPE_SINGLE:
  12.162 -            atomic_set(&counters[0],0);
  12.163 +            for_each_cpu ( cpu )
  12.164 +                per_cpu(perfcounters, cpu)[j] = 0;
  12.165          case TYPE_S_SINGLE:
  12.166 -            counters += 1;
  12.167 -            break;
  12.168 -        case TYPE_CPU:
  12.169 -            for ( j = 0; j < NR_CPUS; j++ )
  12.170 -                atomic_set(&counters[j],0);
  12.171 -        case TYPE_S_CPU:
  12.172 -            counters += NR_CPUS;
  12.173 +            ++j;
  12.174              break;
  12.175          case TYPE_ARRAY:
  12.176 -            for ( j = 0; j < perfc_info[i].nr_elements; j++ )
  12.177 -                atomic_set(&counters[j],0);
  12.178 +            for_each_cpu ( cpu )
  12.179 +                memset(per_cpu(perfcounters, cpu) + j, 0,
  12.180 +                       perfc_info[i].nr_elements * sizeof(perfc_t));
  12.181          case TYPE_S_ARRAY:
  12.182 -            counters += perfc_info[i].nr_elements;
  12.183 +            j += perfc_info[i].nr_elements;
  12.184              break;
  12.185          }
  12.186      }
  12.187  
  12.188 -    arch_perfc_reset ();
  12.189 +    arch_perfc_reset();
  12.190  }
  12.191  
  12.192  static xen_sysctl_perfc_desc_t perfc_d[NR_PERFCTRS];
  12.193  static xen_sysctl_perfc_val_t *perfc_vals;
  12.194 -static int               perfc_nbr_vals;
  12.195 +static unsigned int      perfc_nbr_vals;
  12.196  static int               perfc_init = 0;
  12.197  static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc,
  12.198                             XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val)
  12.199  {
  12.200 -    unsigned int i, j;
  12.201 -    unsigned int v = 0;
  12.202 -    atomic_t *counters = (atomic_t *)&perfcounters;
  12.203 +    unsigned int i, j, v;
  12.204  
  12.205      /* We only copy the name and array-size information once. */
  12.206      if ( !perfc_init ) 
  12.207 @@ -154,11 +166,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
  12.208              {
  12.209              case TYPE_SINGLE:
  12.210              case TYPE_S_SINGLE:
  12.211 -                perfc_d[i].nr_vals = 1;
  12.212 -                break;
  12.213 -            case TYPE_CPU:
  12.214 -            case TYPE_S_CPU:
  12.215 -                perfc_d[i].nr_vals = num_online_cpus();
  12.216 +                perfc_d[i].nr_vals = num_possible_cpus();
  12.217                  break;
  12.218              case TYPE_ARRAY:
  12.219              case TYPE_S_ARRAY:
  12.220 @@ -181,26 +189,31 @@ static int perfc_copy_info(XEN_GUEST_HAN
  12.221      arch_perfc_gather();
  12.222  
  12.223      /* We gather the counts together every time. */
  12.224 -    for ( i = 0; i < NR_PERFCTRS; i++ )
  12.225 +    for ( i = j = v = 0; i < NR_PERFCTRS; i++ )
  12.226      {
  12.227 +        unsigned int cpu;
  12.228 +
  12.229          switch ( perfc_info[i].type )
  12.230          {
  12.231          case TYPE_SINGLE:
  12.232          case TYPE_S_SINGLE:
  12.233 -            perfc_vals[v++] = atomic_read(&counters[0]);
  12.234 -            counters += 1;
  12.235 -            break;
  12.236 -        case TYPE_CPU:
  12.237 -        case TYPE_S_CPU:
  12.238 -            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
  12.239 -                perfc_vals[v++] = atomic_read(&counters[j]);
  12.240 -            counters += NR_CPUS;
  12.241 +            for_each_cpu ( cpu )
  12.242 +                perfc_vals[v++] = per_cpu(perfcounters, cpu)[j];
  12.243 +            ++j;
  12.244              break;
  12.245          case TYPE_ARRAY:
  12.246          case TYPE_S_ARRAY:
  12.247 -            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
  12.248 -                perfc_vals[v++] = atomic_read(&counters[j]);
  12.249 -            counters += perfc_info[i].nr_elements;
  12.250 +            memset(perfc_vals + v, 0, perfc_d[i].nr_vals * sizeof(*perfc_vals));
  12.251 +            for_each_cpu ( cpu )
  12.252 +            {
  12.253 +                perfc_t *counters = per_cpu(perfcounters, cpu) + j;
  12.254 +                unsigned int k;
  12.255 +
  12.256 +                for ( k = 0; k < perfc_d[i].nr_vals; k++ )
  12.257 +                    perfc_vals[v + k] += counters[k];
  12.258 +            }
  12.259 +            v += perfc_d[i].nr_vals;
  12.260 +            j += perfc_info[i].nr_elements;
  12.261              break;
  12.262          }
  12.263      }
  12.264 @@ -224,14 +237,12 @@ int perfc_control(xen_sysctl_perfc_op_t 
  12.265      switch ( pc->cmd )
  12.266      {
  12.267      case XEN_SYSCTL_PERFCOP_reset:
  12.268 -        perfc_copy_info(pc->desc, pc->val);
  12.269 +        rc = perfc_copy_info(pc->desc, pc->val);
  12.270          perfc_reset(0);
  12.271 -        rc = 0;
  12.272          break;
  12.273  
  12.274      case XEN_SYSCTL_PERFCOP_query:
  12.275 -        perfc_copy_info(pc->desc, pc->val);
  12.276 -        rc = 0;
  12.277 +        rc = perfc_copy_info(pc->desc, pc->val);
  12.278          break;
  12.279  
  12.280      default:
    13.1 --- a/xen/include/asm-ia64/linux-xen/asm/asmmacro.h	Tue Mar 27 16:23:52 2007 +0100
    13.2 +++ b/xen/include/asm-ia64/linux-xen/asm/asmmacro.h	Tue Mar 27 16:35:37 2007 +0100
    13.3 @@ -116,4 +116,8 @@ 2:{ .mib;						\
    13.4  # define dv_serialize_instruction
    13.5  #endif
    13.6  
    13.7 +#ifdef PERF_COUNTERS
    13.8 +#define PERFC(n) (THIS_CPU(perfcounters) + (IA64_PERFC_ ## n) * 4)
    13.9 +#endif
   13.10 +
   13.11  #endif /* _ASM_IA64_ASMMACRO_H */
    14.1 --- a/xen/include/asm-ia64/perfc_defn.h	Tue Mar 27 16:23:52 2007 +0100
    14.2 +++ b/xen/include/asm-ia64/perfc_defn.h	Tue Mar 27 16:35:37 2007 +0100
    14.3 @@ -84,7 +84,7 @@ PERFCOUNTER_ARRAY(slow_reflect,       "s
    14.4  PERFCOUNTER_ARRAY(fast_reflect,       "fast reflection", 0x80)
    14.5  
    14.6  PERFSTATUS(vhpt_nbr_entries,          "nbr of entries per VHPT")
    14.7 -PERFSTATUS_CPU(vhpt_valid_entries,    "nbr of valid entries in VHPT")
    14.8 +PERFSTATUS(vhpt_valid_entries,        "nbr of valid entries in VHPT")
    14.9  
   14.10  PERFCOUNTER_ARRAY(vmx_mmio_access,    "vmx_mmio_access", 8)
   14.11  PERFCOUNTER_CPU(vmx_pal_emul,         "vmx_pal_emul")
   14.12 @@ -106,6 +106,8 @@ PERFSTATUS(privop_addr_##name##_overflow
   14.13  
   14.14  PERFPRIVOPADDR(get_ifa)
   14.15  PERFPRIVOPADDR(thash)
   14.16 +
   14.17 +#undef PERFPRIVOPADDR
   14.18  #endif
   14.19  
   14.20  // vhpt.c
    15.1 --- a/xen/include/asm-ia64/privop_stat.h	Tue Mar 27 16:23:52 2007 +0100
    15.2 +++ b/xen/include/asm-ia64/privop_stat.h	Tue Mar 27 16:35:37 2007 +0100
    15.3 @@ -1,5 +1,5 @@
    15.4 -#ifndef _XEN_UA64_PRIVOP_STAT_H
    15.5 -#define _XEN_UA64_PRIVOP_STAT_H
    15.6 +#ifndef _XEN_IA64_PRIVOP_STAT_H
    15.7 +#define _XEN_IA64_PRIVOP_STAT_H
    15.8  #include <asm/config.h>
    15.9  #include <xen/types.h>
   15.10  #include <public/xen.h>
   15.11 @@ -9,31 +9,24 @@
   15.12  extern void gather_privop_addrs(void);
   15.13  extern void reset_privop_addrs(void);
   15.14  
   15.15 -#undef  PERFCOUNTER
   15.16  #define PERFCOUNTER(var, name)
   15.17 -
   15.18 -#undef  PERFCOUNTER_CPU
   15.19 -#define PERFCOUNTER_CPU(var, name)
   15.20 -
   15.21 -#undef  PERFCOUNTER_ARRAY
   15.22  #define PERFCOUNTER_ARRAY(var, name, size)
   15.23  
   15.24 -#undef  PERFSTATUS
   15.25  #define PERFSTATUS(var, name)
   15.26 -
   15.27 -#undef  PERFSTATUS_CPU
   15.28 -#define PERFSTATUS_CPU(var, name)
   15.29 -
   15.30 -#undef  PERFSTATUS_ARRAY
   15.31  #define PERFSTATUS_ARRAY(var, name, size)
   15.32  
   15.33 -#undef  PERFPRIVOPADDR
   15.34  #define PERFPRIVOPADDR(name) privop_inst_##name,
   15.35  
   15.36  enum privop_inst {
   15.37  #include <asm/perfc_defn.h>
   15.38  };
   15.39  
   15.40 +#undef PERFCOUNTER
   15.41 +#undef PERFCOUNTER_ARRAY
   15.42 +
   15.43 +#undef PERFSTATUS
   15.44 +#undef PERFSTATUS_ARRAY
   15.45 +
   15.46  #undef PERFPRIVOPADDR
   15.47  
   15.48  #define	PRIVOP_COUNT_ADDR(regs,inst) privop_count_addr(regs->cr_iip,inst)
   15.49 @@ -45,4 +38,4 @@ extern void privop_count_addr(unsigned l
   15.50  #define reset_privop_addrs() do {} while (0)
   15.51  #endif
   15.52  
   15.53 -#endif /* _XEN_UA64_PRIVOP_STAT_H */
   15.54 +#endif /* _XEN_IA64_PRIVOP_STAT_H */
    16.1 --- a/xen/include/asm-x86/multicall.h	Tue Mar 27 16:23:52 2007 +0100
    16.2 +++ b/xen/include/asm-x86/multicall.h	Tue Mar 27 16:35:37 2007 +0100
    16.3 @@ -6,84 +6,94 @@
    16.4  #define __ASM_X86_MULTICALL_H__
    16.5  
    16.6  #include <xen/errno.h>
    16.7 -#include <asm/asm_defns.h>
    16.8  
    16.9  #ifdef __x86_64__
   16.10  
   16.11  #define do_multicall_call(_call)                             \
   16.12      do {                                                     \
   16.13          __asm__ __volatile__ (                               \
   16.14 -            "    movq  "STR(MULTICALL_op)"(%0),%%rax; "      \
   16.15 +            "    movq  %c1(%0),%%rax; "                      \
   16.16 +            "    leaq  hypercall_table(%%rip),%%rdi; "       \
   16.17              "    cmpq  $("STR(NR_hypercalls)"),%%rax; "      \
   16.18              "    jae   2f; "                                 \
   16.19 -            "    leaq  hypercall_table(%%rip),%%rdi; "       \
   16.20 -            "    leaq  (%%rdi,%%rax,8),%%rax; "              \
   16.21 -            "    movq  "STR(MULTICALL_arg0)"(%0),%%rdi; "    \
   16.22 -            "    movq  "STR(MULTICALL_arg1)"(%0),%%rsi; "    \
   16.23 -            "    movq  "STR(MULTICALL_arg2)"(%0),%%rdx; "    \
   16.24 -            "    movq  "STR(MULTICALL_arg3)"(%0),%%rcx; "    \
   16.25 -            "    movq  "STR(MULTICALL_arg4)"(%0),%%r8; "     \
   16.26 -            "    callq *(%%rax); "                           \
   16.27 -            "1:  movq  %%rax,"STR(MULTICALL_result)"(%0)\n"  \
   16.28 +            "    movq  (%%rdi,%%rax,8),%%rax; "              \
   16.29 +            "    movq  %c2+0*%c3(%0),%%rdi; "                \
   16.30 +            "    movq  %c2+1*%c3(%0),%%rsi; "                \
   16.31 +            "    movq  %c2+2*%c3(%0),%%rdx; "                \
   16.32 +            "    movq  %c2+3*%c3(%0),%%rcx; "                \
   16.33 +            "    movq  %c2+4*%c3(%0),%%r8; "                 \
   16.34 +            "    callq *%%rax; "                             \
   16.35 +            "1:  movq  %%rax,%c4(%0)\n"                      \
   16.36              ".section .fixup,\"ax\"\n"                       \
   16.37              "2:  movq  $-"STR(ENOSYS)",%%rax\n"              \
   16.38              "    jmp   1b\n"                                 \
   16.39              ".previous\n"                                    \
   16.40 -            : : "b" (_call)                                  \
   16.41 +            :                                                \
   16.42 +            : "b" (_call),                                   \
   16.43 +              "i" (offsetof(__typeof__(*_call), op)),        \
   16.44 +              "i" (offsetof(__typeof__(*_call), args)),      \
   16.45 +              "i" (sizeof(*(_call)->args)),                  \
   16.46 +              "i" (offsetof(__typeof__(*_call), result))     \
   16.47                /* all the caller-saves registers */           \
   16.48              : "rax", "rcx", "rdx", "rsi", "rdi",             \
   16.49                "r8",  "r9",  "r10", "r11" );                  \
   16.50      } while ( 0 )
   16.51  
   16.52 -#define compat_multicall_call(_call)                              \
   16.53 -    do {                                                          \
   16.54 -        __asm__ __volatile__ (                                    \
   16.55 -            "    movl  "STR(COMPAT_MULTICALL_op)"(%0),%%eax; "    \
   16.56 -            "    leaq  compat_hypercall_table(%%rip),%%rdi; "     \
   16.57 -            "    cmpl  $("STR(NR_hypercalls)"),%%eax; "           \
   16.58 -            "    jae   2f; "                                      \
   16.59 -            "    movq  (%%rdi,%%rax,8),%%rax; "                   \
   16.60 -            "    movl  "STR(COMPAT_MULTICALL_arg0)"(%0),%%edi; "  \
   16.61 -            "    movl  "STR(COMPAT_MULTICALL_arg1)"(%0),%%esi; "  \
   16.62 -            "    movl  "STR(COMPAT_MULTICALL_arg2)"(%0),%%edx; "  \
   16.63 -            "    movl  "STR(COMPAT_MULTICALL_arg3)"(%0),%%ecx; "  \
   16.64 -            "    movl  "STR(COMPAT_MULTICALL_arg4)"(%0),%%r8d; "  \
   16.65 -            "    callq *%%rax; "                                  \
   16.66 -            "1:  movl  %%eax,"STR(COMPAT_MULTICALL_result)"(%0)\n"\
   16.67 -            ".section .fixup,\"ax\"\n"                            \
   16.68 -            "2:  movl  $-"STR(ENOSYS)",%%eax\n"                   \
   16.69 -            "    jmp   1b\n"                                      \
   16.70 -            ".previous\n"                                         \
   16.71 -            : : "b" (_call)                                       \
   16.72 -              /* all the caller-saves registers */                \
   16.73 -            : "rax", "rcx", "rdx", "rsi", "rdi",                  \
   16.74 -              "r8",  "r9",  "r10", "r11" );                       \
   16.75 -    } while ( 0 )
   16.76 +#define compat_multicall_call(_call)                         \
   16.77 +        __asm__ __volatile__ (                               \
   16.78 +            "    movl  %c1(%0),%%eax; "                      \
   16.79 +            "    leaq  compat_hypercall_table(%%rip),%%rdi; "\
   16.80 +            "    cmpl  $("STR(NR_hypercalls)"),%%eax; "      \
   16.81 +            "    jae   2f; "                                 \
   16.82 +            "    movq  (%%rdi,%%rax,8),%%rax; "              \
   16.83 +            "    movl  %c2+0*%c3(%0),%%edi; "                \
   16.84 +            "    movl  %c2+1*%c3(%0),%%esi; "                \
   16.85 +            "    movl  %c2+2*%c3(%0),%%edx; "                \
   16.86 +            "    movl  %c2+3*%c3(%0),%%ecx; "                \
   16.87 +            "    movl  %c2+4*%c3(%0),%%r8d; "                \
   16.88 +            "    callq *%%rax; "                             \
   16.89 +            "1:  movl  %%eax,%c4(%0)\n"                      \
   16.90 +            ".section .fixup,\"ax\"\n"                       \
   16.91 +            "2:  movl  $-"STR(ENOSYS)",%%eax\n"              \
   16.92 +            "    jmp   1b\n"                                 \
   16.93 +            ".previous\n"                                    \
   16.94 +            :                                                \
   16.95 +            : "b" (_call),                                   \
   16.96 +              "i" (offsetof(__typeof__(*_call), op)),        \
   16.97 +              "i" (offsetof(__typeof__(*_call), args)),      \
   16.98 +              "i" (sizeof(*(_call)->args)),                  \
   16.99 +              "i" (offsetof(__typeof__(*_call), result))     \
  16.100 +              /* all the caller-saves registers */           \
  16.101 +            : "rax", "rcx", "rdx", "rsi", "rdi",             \
  16.102 +              "r8",  "r9",  "r10", "r11" )                   \
  16.103  
  16.104  #else
  16.105  
  16.106  #define do_multicall_call(_call)                             \
  16.107 -    do {                                                     \
  16.108          __asm__ __volatile__ (                               \
  16.109 -            "    pushl "STR(MULTICALL_arg4)"(%0); "          \
  16.110 -            "    pushl "STR(MULTICALL_arg3)"(%0); "          \
  16.111 -            "    pushl "STR(MULTICALL_arg2)"(%0); "          \
  16.112 -            "    pushl "STR(MULTICALL_arg1)"(%0); "          \
  16.113 -            "    pushl "STR(MULTICALL_arg0)"(%0); "          \
  16.114 -            "    movl  "STR(MULTICALL_op)"(%0),%%eax; "      \
  16.115 +            "    movl  %c1(%0),%%eax; "                      \
  16.116 +            "    pushl %c2+4*%c3(%0); "                      \
  16.117 +            "    pushl %c2+3*%c3(%0); "                      \
  16.118 +            "    pushl %c2+2*%c3(%0); "                      \
  16.119 +            "    pushl %c2+1*%c3(%0); "                      \
  16.120 +            "    pushl %c2+0*%c3(%0); "                      \
  16.121              "    cmpl  $("STR(NR_hypercalls)"),%%eax; "      \
  16.122              "    jae   2f; "                                 \
  16.123              "    call  *hypercall_table(,%%eax,4); "         \
  16.124 -            "1:  movl  %%eax,"STR(MULTICALL_result)"(%0); "  \
  16.125 +            "1:  movl  %%eax,%c4(%0); "                      \
  16.126              "    addl  $20,%%esp\n"                          \
  16.127              ".section .fixup,\"ax\"\n"                       \
  16.128              "2:  movl  $-"STR(ENOSYS)",%%eax\n"              \
  16.129              "    jmp   1b\n"                                 \
  16.130              ".previous\n"                                    \
  16.131 -            : : "b" (_call)                                  \
  16.132 +            :                                                \
  16.133 +            : "bSD" (_call),                                 \
  16.134 +              "i" (offsetof(__typeof__(*_call), op)),        \
  16.135 +              "i" (offsetof(__typeof__(*_call), args)),      \
  16.136 +              "i" (sizeof(*(_call)->args)),                  \
  16.137 +              "i" (offsetof(__typeof__(*_call), result))     \
  16.138                /* all the caller-saves registers */           \
  16.139 -            : "eax", "ecx", "edx" );                         \
  16.140 -    } while ( 0 )
  16.141 +            : "eax", "ecx", "edx" )                          \
  16.142  
  16.143  #endif
  16.144  
    17.1 --- a/xen/include/asm-x86/perfc_defn.h	Tue Mar 27 16:23:52 2007 +0100
    17.2 +++ b/xen/include/asm-x86/perfc_defn.h	Tue Mar 27 16:35:37 2007 +0100
    17.3 @@ -18,9 +18,11 @@ PERFCOUNTER_CPU(apic_timer,             
    17.4  
    17.5  PERFCOUNTER_CPU(domain_page_tlb_flush,  "domain page tlb flushes")
    17.6  
    17.7 -PERFCOUNTER_CPU(calls_to_mmu_update,    "calls_to_mmu_update")
    17.8 -PERFCOUNTER_CPU(num_page_updates,       "num_page_updates")
    17.9 -PERFCOUNTER_CPU(calls_to_update_va,     "calls_to_update_va_map")
   17.10 +PERFCOUNTER(calls_to_mmuext_op,         "calls to mmuext_op")
   17.11 +PERFCOUNTER(num_mmuext_ops,             "mmuext ops")
   17.12 +PERFCOUNTER(calls_to_mmu_update,        "calls to mmu_update")
   17.13 +PERFCOUNTER(num_page_updates,           "page updates")
   17.14 +PERFCOUNTER(calls_to_update_va,         "calls to update_va_map")
   17.15  PERFCOUNTER_CPU(page_faults,            "page faults")
   17.16  PERFCOUNTER_CPU(copy_user_faults,       "copy_user faults")
   17.17  
    18.1 --- a/xen/include/asm-x86/x86_32/asm_defns.h	Tue Mar 27 16:23:52 2007 +0100
    18.2 +++ b/xen/include/asm-x86/x86_32/asm_defns.h	Tue Mar 27 16:35:37 2007 +0100
    18.3 @@ -1,6 +1,8 @@
    18.4  #ifndef __X86_32_ASM_DEFNS_H__
    18.5  #define __X86_32_ASM_DEFNS_H__
    18.6  
    18.7 +#include <asm/percpu.h>
    18.8 +
    18.9  #ifndef NDEBUG
   18.10  /* Indicate special exception stack frame by inverting the frame pointer. */
   18.11  #define SETUP_EXCEPTION_FRAME_POINTER           \
   18.12 @@ -47,10 +49,14 @@
   18.13          1:
   18.14  
   18.15  #ifdef PERF_COUNTERS
   18.16 -#define PERFC_INCR(_name,_idx)                          \
   18.17 -        lock incl perfcounters+_name(,_idx,4)
   18.18 +#define PERFC_INCR(_name,_idx,_cur)                     \
   18.19 +        pushl _cur;                                     \
   18.20 +        movl VCPU_processor(_cur),_cur;                 \
   18.21 +        shll $PERCPU_SHIFT,_cur;                        \
   18.22 +        incl per_cpu__perfcounters+_name*4(_cur,_idx,4);\
   18.23 +        popl _cur
   18.24  #else
   18.25 -#define PERFC_INCR(_name,_idx)
   18.26 +#define PERFC_INCR(_name,_idx,_cur)
   18.27  #endif
   18.28  
   18.29  #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
    19.1 --- a/xen/include/asm-x86/x86_64/asm_defns.h	Tue Mar 27 16:23:52 2007 +0100
    19.2 +++ b/xen/include/asm-x86/x86_64/asm_defns.h	Tue Mar 27 16:35:37 2007 +0100
    19.3 @@ -1,6 +1,8 @@
    19.4  #ifndef __X86_64_ASM_DEFNS_H__
    19.5  #define __X86_64_ASM_DEFNS_H__
    19.6  
    19.7 +#include <asm/percpu.h>
    19.8 +
    19.9  #ifndef NDEBUG
   19.10  /* Indicate special exception stack frame by inverting the frame pointer. */
   19.11  #define SETUP_EXCEPTION_FRAME_POINTER           \
   19.12 @@ -47,13 +49,18 @@
   19.13          popq  %rdi;
   19.14  
   19.15  #ifdef PERF_COUNTERS
   19.16 -#define PERFC_INCR(_name,_idx)                  \
   19.17 -    pushq %rdx;                                 \
   19.18 -    leaq perfcounters+_name(%rip),%rdx;         \
   19.19 -    lock incl (%rdx,_idx,4);                    \
   19.20 -    popq %rdx;
   19.21 +#define PERFC_INCR(_name,_idx,_cur)             \
   19.22 +        pushq _cur;                             \
   19.23 +        movslq VCPU_processor(_cur),_cur;       \
   19.24 +        pushq %rdx;                             \
   19.25 +        leaq per_cpu__perfcounters(%rip),%rdx;  \
   19.26 +        shlq $PERCPU_SHIFT,_cur;                \
   19.27 +        addq %rdx,_cur;                         \
   19.28 +        popq %rdx;                              \
   19.29 +        incl _name*4(_cur,_idx,4);              \
   19.30 +        popq _cur
   19.31  #else
   19.32 -#define PERFC_INCR(_name,_idx)
   19.33 +#define PERFC_INCR(_name,_idx,_cur)
   19.34  #endif
   19.35  
   19.36  /* Work around AMD erratum #88 */
    20.1 --- a/xen/include/xen/perfc.h	Tue Mar 27 16:23:52 2007 +0100
    20.2 +++ b/xen/include/xen/perfc.h	Tue Mar 27 16:35:37 2007 +0100
    20.3 @@ -6,102 +6,94 @@
    20.4  
    20.5  #include <xen/lib.h>
    20.6  #include <xen/smp.h>
    20.7 -#include <asm/atomic.h>
    20.8 +#include <xen/percpu.h>
    20.9  
   20.10  /* 
   20.11   * NOTE: new counters must be defined in perfc_defn.h
   20.12   * 
   20.13   * PERFCOUNTER (counter, string)              define a new performance counter
   20.14 - * PERFCOUNTER_CPU (counter, string, size)    define a counter per CPU
   20.15 - * PERFCOUNTER_ARRY (counter, string, size)   define an array of counters
   20.16 + * PERFCOUNTER_ARRAY (counter, string, size)  define an array of counters
   20.17   * 
   20.18   * unlike "COUNTERS", "STATUS" variables DO NOT RESET
   20.19   * PERFSTATUS (counter, string)               define a new performance stauts
   20.20 - * PERFSTATUS_CPU (counter, string, size)     define a status var per CPU
   20.21 - * PERFSTATUS_ARRY (counter, string, size)    define an array of status vars
   20.22 + * PERFSTATUS_ARRAY (counter, string, size)   define an array of status vars
   20.23   * 
   20.24   * unsigned long perfc_value  (counter)        get value of a counter  
   20.25 - * unsigned long perfc_valuec (counter)        get value of a per CPU counter
   20.26   * unsigned long perfc_valuea (counter, index) get value of an array counter
   20.27   * unsigned long perfc_set  (counter, val)     set value of a counter  
   20.28 - * unsigned long perfc_setc (counter, val)     set value of a per CPU counter
   20.29   * unsigned long perfc_seta (counter, index, val) set value of an array counter
   20.30   * void perfc_incr  (counter)                  increment a counter          
   20.31 - * void perfc_incrc (counter, index)           increment a per CPU counter   
   20.32 + * void perfc_decr  (counter)                  decrement a status
   20.33   * void perfc_incra (counter, index)           increment an array counter   
   20.34   * void perfc_add   (counter, value)           add a value to a counter     
   20.35 - * void perfc_addc  (counter, value)           add a value to a per CPU counter
   20.36   * void perfc_adda  (counter, index, value)    add a value to array counter 
   20.37   * void perfc_print (counter)                  print out the counter
   20.38   */
   20.39  
   20.40 -#define PERFCOUNTER( var, name ) \
   20.41 -  atomic_t var[1];
   20.42 -#define PERFCOUNTER_CPU( var, name ) \
   20.43 -  atomic_t var[NR_CPUS];
   20.44 -#define PERFCOUNTER_ARRAY( var, name, size ) \
   20.45 -  atomic_t var[size];
   20.46 -#define PERFSTATUS( var, name ) \
   20.47 -  atomic_t var[1];
   20.48 -#define PERFSTATUS_CPU( var, name ) \
   20.49 -  atomic_t var[NR_CPUS];
   20.50 -#define PERFSTATUS_ARRAY( var, name, size ) \
   20.51 -  atomic_t var[size];
   20.52 +#define PERFCOUNTER( name, descr ) \
   20.53 +  PERFC_ ## name,
   20.54 +#define PERFCOUNTER_ARRAY( name, descr, size ) \
   20.55 +  PERFC_ ## name,                              \
   20.56 +  PERFC_LAST_ ## name = PERFC_ ## name + (size) - sizeof(char[2 * !!(size) - 1]),
   20.57  
   20.58 -struct perfcounter {
   20.59 +#define PERFSTATUS       PERFCOUNTER
   20.60 +#define PERFSTATUS_ARRAY PERFCOUNTER_ARRAY
   20.61 +
   20.62 +/* Compatibility: This should go away once all users got converted. */
   20.63 +#define PERFCOUNTER_CPU PERFCOUNTER
   20.64 +
   20.65 +enum perfcounter {
   20.66  #include <xen/perfc_defn.h>
   20.67 +	NUM_PERFCOUNTERS
   20.68  };
   20.69  
   20.70 -extern struct perfcounter perfcounters;
   20.71 +#undef PERFCOUNTER
   20.72 +#undef PERFCOUNTER_ARRAY
   20.73 +#undef PERFSTATUS
   20.74 +#undef PERFSTATUS_ARRAY
   20.75  
   20.76 -#define perfc_value(x)    atomic_read(&perfcounters.x[0])
   20.77 -#define perfc_valuec(x)   atomic_read(&perfcounters.x[smp_processor_id()])
   20.78 +typedef unsigned perfc_t;
   20.79 +#define PRIperfc ""
   20.80 +
   20.81 +DECLARE_PER_CPU(perfc_t[NUM_PERFCOUNTERS], perfcounters);
   20.82 +
   20.83 +#define perfc_value(x)    this_cpu(perfcounters)[PERFC_ ## x]
   20.84  #define perfc_valuea(x,y)                                               \
   20.85 -    ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ?	\
   20.86 -	atomic_read(&perfcounters.x[y]) : 0 )
   20.87 -#define perfc_set(x,v)    atomic_set(&perfcounters.x[0], v)
   20.88 -#define perfc_setc(x,v)   atomic_set(&perfcounters.x[smp_processor_id()], v)
   20.89 +    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
   20.90 +	 this_cpu(perfcounters)[PERFC_ ## x + (y)] : 0 )
   20.91 +#define perfc_set(x,v)    (this_cpu(perfcounters)[PERFC_ ## x] = (v))
   20.92  #define perfc_seta(x,y,v)                                               \
   20.93 -    do {                                                                \
   20.94 -        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
   20.95 -            atomic_set(&perfcounters.x[y], v);                          \
   20.96 -    } while ( 0 )
   20.97 -#define perfc_incr(x)     atomic_inc(&perfcounters.x[0])
   20.98 -#define perfc_decr(x)     atomic_dec(&perfcounters.x[0])
   20.99 -#define perfc_incrc(x)    atomic_inc(&perfcounters.x[smp_processor_id()])
  20.100 -#define perfc_decrc(x)    atomic_dec(&perfcounters.x[smp_processor_id()])
  20.101 +    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
  20.102 +	 this_cpu(perfcounters)[PERFC_ ## x + (y)] = (v) : (v) )
  20.103 +#define perfc_incr(x)     (++this_cpu(perfcounters)[PERFC_ ## x])
  20.104 +#define perfc_decr(x)     (--this_cpu(perfcounters)[PERFC_ ## x])
  20.105  #define perfc_incra(x,y)                                                \
  20.106 -    do {                                                                \
  20.107 -        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
  20.108 -            atomic_inc(&perfcounters.x[y]);                             \
  20.109 -    } while ( 0 )
  20.110 -#define perfc_add(x,y)    atomic_add((y), &perfcounters.x[0])
  20.111 -#define perfc_addc(x,y)   atomic_add((y), &perfcounters.x[smp_processor_id()])
  20.112 -#define perfc_adda(x,y,z)                                               \
  20.113 -    do {                                                                \
  20.114 -        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
  20.115 -            atomic_add((z), &perfcounters.x[y]);                        \
  20.116 -    } while ( 0 )
  20.117 +    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
  20.118 +	 ++this_cpu(perfcounters)[PERFC_ ## x + (y)] : 0 )
  20.119 +#define perfc_add(x,v)    (this_cpu(perfcounters)[PERFC_ ## x] += (v))
  20.120 +#define perfc_adda(x,y,v)                                               \
  20.121 +    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
  20.122 +	 this_cpu(perfcounters)[PERFC_ ## x + (y)] = (v) : (v) )
  20.123  
  20.124  /*
  20.125   * Histogram: special treatment for 0 and 1 count. After that equally spaced 
  20.126   * with last bucket taking the rest.
  20.127   */
  20.128  #ifdef PERF_ARRAYS
  20.129 -#define perfc_incr_histo(_x,_v,_n)                                          \
  20.130 -    do {                                                                    \
  20.131 -        if ( (_v) == 0 )                                                    \
  20.132 -            perfc_incra(_x, 0);                                             \
  20.133 -        else if ( (_v) == 1 )                                               \
  20.134 -            perfc_incra(_x, 1);                                             \
  20.135 -        else if ( (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) <               \
  20.136 -                  (PERFC_MAX_ ## _n - 3) )                                  \
  20.137 -            perfc_incra(_x, (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) + 2); \
  20.138 -        else                                                                \
  20.139 -            perfc_incra(_x, PERFC_MAX_ ## _n - 1);                          \
  20.140 +#define perfc_incr_histo(x,v)                                           \
  20.141 +    do {                                                                \
  20.142 +        if ( (v) == 0 )                                                 \
  20.143 +            perfc_incra(x, 0);                                          \
  20.144 +        else if ( (v) == 1 )                                            \
  20.145 +            perfc_incra(x, 1);                                          \
  20.146 +        else if ( (((v) - 2) / PERFC_ ## x ## _BUCKET_SIZE) <           \
  20.147 +                  (PERFC_LAST_ ## x - PERFC_ ## x - 2) )                \
  20.148 +            perfc_incra(x, (((v) - 2) / PERFC_ ## x ## _BUCKET_SIZE) + 2); \
  20.149 +        else                                                            \
  20.150 +            perfc_incra(x, PERFC_LAST_ ## x - PERFC_ ## x);             \
  20.151      } while ( 0 )
  20.152  #else
  20.153 -#define perfc_incr_histo(_x,_v,_n) ((void)0)
  20.154 +#define perfc_incr_histo(x,v) ((void)0)
  20.155  #endif
  20.156  
  20.157  struct xen_sysctl_perfc_op;
  20.158 @@ -110,22 +102,20 @@ int perfc_control(struct xen_sysctl_perf
  20.159  #else /* PERF_COUNTERS */
  20.160  
  20.161  #define perfc_value(x)    (0)
  20.162 -#define perfc_valuec(x)   (0)
  20.163  #define perfc_valuea(x,y) (0)
  20.164  #define perfc_set(x,v)    ((void)0)
  20.165 -#define perfc_setc(x,v)   ((void)0)
  20.166  #define perfc_seta(x,y,v) ((void)0)
  20.167  #define perfc_incr(x)     ((void)0)
  20.168  #define perfc_decr(x)     ((void)0)
  20.169 -#define perfc_incrc(x)    ((void)0)
  20.170 -#define perfc_decrc(x)    ((void)0)
  20.171  #define perfc_incra(x,y)  ((void)0)
  20.172  #define perfc_decra(x,y)  ((void)0)
  20.173  #define perfc_add(x,y)    ((void)0)
  20.174 -#define perfc_addc(x,y)   ((void)0)
  20.175  #define perfc_adda(x,y,z) ((void)0)
  20.176  #define perfc_incr_histo(x,y,z) ((void)0)
  20.177  
  20.178  #endif /* PERF_COUNTERS */
  20.179  
  20.180 +/* Compatibility: This should go away once all users got converted. */
  20.181 +#define perfc_incrc     perfc_incr
  20.182 +
  20.183  #endif /* __XEN_PERFC_H__ */
    21.1 --- a/xen/include/xen/perfc_defn.h	Tue Mar 27 16:23:52 2007 +0100
    21.2 +++ b/xen/include/xen/perfc_defn.h	Tue Mar 27 16:35:37 2007 +0100
    21.3 @@ -6,6 +6,9 @@
    21.4  
    21.5  PERFCOUNTER_ARRAY(hypercalls,           "hypercalls", NR_hypercalls)
    21.6  
    21.7 +PERFCOUNTER(calls_to_multicall,         "calls to multicall")
    21.8 +PERFCOUNTER(calls_from_multicall,       "calls from multicall")
    21.9 +
   21.10  PERFCOUNTER_CPU(irqs,                   "#interrupts")
   21.11  PERFCOUNTER_CPU(ipis,                   "#IPIs")
   21.12