ia64/xen-unstable

changeset 3963:cfee4c4a8ed6

bitkeeper revision 1.1242 (4225f56fwo6ym-RMTBheAeYhl10ATQ)

forward ported James Bulpin's performance counters tool

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author rneugeba@wyvis.research.intel-research.net
date Wed Mar 02 17:18:39 2005 +0000 (2005-03-02)
parents a6914c2c15cf
children 24703bde489b
files .rootkeys tools/libxc/plan9a.out.h tools/libxc/xc.h tools/libxc/xc_misc.c tools/libxc/xc_plan9_build.c tools/misc/Makefile tools/misc/cpuperf/Makefile tools/misc/cpuperf/README.txt tools/misc/cpuperf/cpuperf.c tools/misc/cpuperf/cpuperf_perfcntr.h tools/misc/cpuperf/cpuperf_xeno.h tools/misc/cpuperf/module/Makefile tools/misc/cpuperf/module/perfcntr.c tools/misc/cpuperf/p4perf.h tools/misc/miniterm/Makefile tools/misc/miniterm/miniterm.c
line diff
     1.1 --- a/.rootkeys	Tue Mar 01 13:47:52 2005 +0000
     1.2 +++ b/.rootkeys	Wed Mar 02 17:18:39 2005 +0000
     1.3 @@ -586,6 +586,14 @@ 40e03332h5V611rRWURRLqb1Ekatxg tools/lib
     1.4  41a216cayFe2FQroFuzvNPw1AvNiqQ tools/libxutil/util.c
     1.5  41a216ca7mgVSnCBHPCLkGOIqPS1CQ tools/libxutil/util.h
     1.6  3f776bd2Xd-dUcPKlPN2vG89VGtfvQ tools/misc/Makefile
     1.7 +4225f56d7sa9aEARfjNeCVTMYDAmZA tools/misc/cpuperf/Makefile
     1.8 +4225f56dS5TGdKojmuBnrV3PzbE6Rg tools/misc/cpuperf/README.txt
     1.9 +4225f56dcodvBSPoWYS6kvwZCQhgzg tools/misc/cpuperf/cpuperf.c
    1.10 +4225f56dMjZK14EWd8K0gq4v5Diwjg tools/misc/cpuperf/cpuperf_perfcntr.h
    1.11 +4225f56d_XjSY1297IiH96qeqD4sCA tools/misc/cpuperf/cpuperf_xeno.h
    1.12 +4225f56dqlGC_UZ681F95mCgLbOeHQ tools/misc/cpuperf/module/Makefile
    1.13 +4225f56dnmms-VFr1MiDVG_dYoM7IQ tools/misc/cpuperf/module/perfcntr.c
    1.14 +4225f56dYhIGQRD_kKVJ6xQrkqO0YQ tools/misc/cpuperf/p4perf.h
    1.15  40ab2cfawIw8tsYo0dQKtp83h4qfTQ tools/misc/fakei386xen
    1.16  3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile
    1.17  3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README
     2.1 --- a/tools/libxc/xc.h	Tue Mar 01 13:47:52 2005 +0000
     2.2 +++ b/tools/libxc/xc.h	Wed Mar 02 17:18:39 2005 +0000
     2.3 @@ -370,6 +370,11 @@ int xc_perfc_control(int xc_handle,
     2.4                       u32 op,
     2.5                       xc_perfc_desc_t *desc);
     2.6  
     2.7 +/* read/write msr */
     2.8 +long long xc_msr_read(int xc_handle, int cpu_mask, int msr);
     2.9 +int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
    2.10 +                  unsigned int high);
    2.11 +
    2.12  /**
    2.13   * Memory maps a range within one domain to a local address range.  Mappings
    2.14   * should be unmapped with munmap and should follow the same rules as mmap
     3.1 --- a/tools/libxc/xc_misc.c	Tue Mar 01 13:47:52 2005 +0000
     3.2 +++ b/tools/libxc/xc_misc.c	Wed Mar 02 17:18:39 2005 +0000
     3.3 @@ -97,3 +97,36 @@ int xc_perfc_control(int xc_handle,
     3.4  
     3.5      return (rc == 0) ? dop.u.perfccontrol.nr_counters : rc;
     3.6  }
     3.7 +
     3.8 +long long xc_msr_read(int xc_handle, int cpu_mask, int msr)
     3.9 +{
    3.10 +    int rc;    
    3.11 +    dom0_op_t op;
    3.12 +    
    3.13 +    op.cmd = DOM0_MSR;
    3.14 +    op.u.msr.write = 0;
    3.15 +    op.u.msr.msr = msr;
    3.16 +    op.u.msr.cpu_mask = cpu_mask;
    3.17 +
    3.18 +    rc = do_dom0_op(xc_handle, &op);
    3.19 +
    3.20 +    return (((unsigned long long)op.u.msr.out2)<<32) | op.u.msr.out1 ;
    3.21 +}
    3.22 +
    3.23 +int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
    3.24 +                  unsigned int high)
    3.25 +{
    3.26 +    int rc;    
    3.27 +    dom0_op_t op;
    3.28 +    
    3.29 +    op.cmd = DOM0_MSR;
    3.30 +    op.u.msr.write = 1;
    3.31 +    op.u.msr.msr = msr;
    3.32 +    op.u.msr.cpu_mask = cpu_mask;
    3.33 +    op.u.msr.in1 = low;
    3.34 +    op.u.msr.in2 = high;
    3.35 +
    3.36 +    rc = do_dom0_op(xc_handle, &op);
    3.37 +    
    3.38 +    return rc;
    3.39 +}
     4.1 --- a/tools/misc/Makefile	Tue Mar 01 13:47:52 2005 +0000
     4.2 +++ b/tools/misc/Makefile	Wed Mar 02 17:18:39 2005 +0000
     4.3 @@ -21,18 +21,21 @@ INSTALL_SBIN = netfix xm xend xensv xenp
     4.4  
     4.5  all: $(TARGETS)
     4.6  	$(MAKE) -C miniterm
     4.7 +	$(MAKE) -C cpuperf
     4.8  
     4.9  install: all
    4.10  	[ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin
    4.11  	[ -d $(DESTDIR)/usr/sbin ] || $(INSTALL_DIR) $(DESTDIR)/usr/sbin
    4.12  	$(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin
    4.13  	$(INSTALL_PROG) $(INSTALL_SBIN) $(DESTDIR)/usr/sbin
    4.14 +	$(MAKE) -C cpuperf install
    4.15  #       No sense in installing miniterm on the Xen box.
    4.16  #	$(MAKE) -C miniterm install
    4.17  
    4.18  clean:
    4.19  	$(RM) *.o $(TARGETS) *~
    4.20  	$(MAKE) -C miniterm clean
    4.21 +	$(MAKE) -C cpuperf clean
    4.22  
    4.23  %.o: %.c $(HDRS) Makefile
    4.24  	$(CC) -c $(CFLAGS) -o $@ $<
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/tools/misc/cpuperf/Makefile	Wed Mar 02 17:18:39 2005 +0000
     5.3 @@ -0,0 +1,51 @@
     5.4 +#
     5.5 +# Make Performance counter tool
     5.6 +#
     5.7 +# $Id: Makefile,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
     5.8 +#
     5.9 +# $Log: Makefile,v $
    5.10 +# Revision 1.1  2003/10/13 16:49:44  jrb44
    5.11 +# Initial revision
    5.12 +#
    5.13 +#
    5.14 +
    5.15 +INSTALL		= install
    5.16 +INSTALL_PROG	= $(INSTALL) -m0755
    5.17 +INSTALL_DIR	= $(INSTALL) -d -m0755
    5.18 +
    5.19 +# these are for Xen
    5.20 +XEN_ROOT=../../..
    5.21 +include $(XEN_ROOT)/tools/Rules.mk
    5.22 +
    5.23 +CC           = gcc
    5.24 +CFLAGS       = -Wall -O3 
    5.25 +
    5.26 +HDRS         = $(wildcard *.h)
    5.27 +SRCS         = $(wildcard *.c)
    5.28 +OBJS         = $(patsubst %.c,%.o,$(SRCS))
    5.29 +
    5.30 +TARGETS      = cpuperf-xen cpuperf-perfcntr
    5.31 +
    5.32 +INSTALL_BIN  = $(TARGETS)
    5.33 +
    5.34 +
    5.35 +all: $(TARGETS)
    5.36 +
    5.37 +clean:
    5.38 +	$(RM) *.o $(TARGETS)
    5.39 +
    5.40 +%: %.c $(HDRS) Makefile
    5.41 +	$(CC) $(CFLAGS) -o $@ $<
    5.42 +
    5.43 +cpuperf-xen: cpuperf.c $(HDRS) Makefile
    5.44 +	$(CC) $(CFLAGS) -I $(XEN_LIBXC) -L$(XEN_LIBXC) -lxc -L$(XEN_LIBXUTIL) -lxutil -DXENO -o $@ $<
    5.45 +
    5.46 +cpuperf-perfcntr: cpuperf.c $(HDRS) Makefile
    5.47 +	$(CC) $(CFLAGS) -DPERFCNTR -o $@ $<
    5.48 +
    5.49 +install: all
    5.50 +	$(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin
    5.51 +
    5.52 +
    5.53 +# End of $RCSfile: Makefile,v $
    5.54 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/tools/misc/cpuperf/README.txt	Wed Mar 02 17:18:39 2005 +0000
     6.3 @@ -0,0 +1,371 @@
     6.4 +Usage
     6.5 +=====
     6.6 +
     6.7 +Use either xen-cpuperf, cpuperf-perfcntr as appropriate to the system
     6.8 +in use.
     6.9 +
    6.10 +To write:
    6.11 +
    6.12 +    cpuperf -E <escr> -C <cccr> 
    6.13 +
    6.14 +        optional: all numbers in base 10 unless specified
    6.15 +
    6.16 +        -d             Debug mode
    6.17 +        -c <cpu>       CPU number
    6.18 +        -t <thread>    ESCR thread bits - default is 12 (Thread 0 all rings)
    6.19 +                         bit 0: Thread 1 in rings 1,2,3
    6.20 +                         bit 1: Thread 1 in ring 0
    6.21 +                         bit 2: Thread 0 in rings 1,2,3
    6.22 +                         bit 3: Thread 0 in ring 0
    6.23 +        -e <eventsel>  Event selection number
    6.24 +        -m <eventmask> Event mask bits
    6.25 +        -T <value>     ESCR tag value
    6.26 +        -k             Sets CCCR 'compare' bit
    6.27 +        -n             Sets CCCR 'complement' bit
    6.28 +        -g             Sets CCCR 'edge' bit
    6.29 +        -P <bit>       Set the specified bit in MSR_P4_PEBS_ENABLE
    6.30 +        -V <bit>       Set the specified bit in MSR_P4_PEBS_MATRIX_VERT
    6.31 +        (-V and -P may be used multiple times to set multiple bits.)
    6.32 +
    6.33 +To read:
    6.34 +
    6.35 +    cpuperf -r    
    6.36 +
    6.37 +        optional: all numbers in base 10 unless specified
    6.38 +    
    6.39 +        -c <cpu>       CPU number
    6.40 +
    6.41 +<cccr> values:
    6.42 +
    6.43 +    BPU_CCCR0
    6.44 +    BPU_CCCR1
    6.45 +    BPU_CCCR2
    6.46 +    BPU_CCCR3
    6.47 +    MS_CCCR0
    6.48 +    MS_CCCR1
    6.49 +    MS_CCCR2
    6.50 +    MS_CCCR3
    6.51 +    FLAME_CCCR0
    6.52 +    FLAME_CCCR1
    6.53 +    FLAME_CCCR2
    6.54 +    FLAME_CCCR3
    6.55 +    IQ_CCCR0
    6.56 +    IQ_CCCR1
    6.57 +    IQ_CCCR2
    6.58 +    IQ_CCCR3
    6.59 +    IQ_CCCR4
    6.60 +    IQ_CCCR5
    6.61 +    NONE - do not program any CCCR, used when setting up an ESCR for tagging
    6.62 +
    6.63 +<escr> values:
    6.64 +
    6.65 +    BSU_ESCR0
    6.66 +    BSU_ESCR1
    6.67 +    FSB_ESCR0
    6.68 +    FSB_ESCR1
    6.69 +    MOB_ESCR0
    6.70 +    MOB_ESCR1
    6.71 +    PMH_ESCR0
    6.72 +    PMH_ESCR1
    6.73 +    BPU_ESCR0
    6.74 +    BPU_ESCR1
    6.75 +    IS_ESCR0
    6.76 +    IS_ESCR1
    6.77 +    ITLB_ESCR0
    6.78 +    ITLB_ESCR1
    6.79 +    IX_ESCR0
    6.80 +    IX_ESCR1
    6.81 +    MS_ESCR0
    6.82 +    MS_ESCR1
    6.83 +    TBPU_ESCR0
    6.84 +    TBPU_ESCR1
    6.85 +    TC_ESCR0
    6.86 +    TC_ESCR1
    6.87 +    FIRM_ESCR0
    6.88 +    FIRM_ESCR1
    6.89 +    FLAME_ESCR0
    6.90 +    FLAME_ESCR1
    6.91 +    DAC_ESCR0
    6.92 +    DAC_ESCR1
    6.93 +    SAAT_ESCR0
    6.94 +    SAAT_ESCR1
    6.95 +    U2L_ESCR0
    6.96 +    U2L_ESCR1
    6.97 +    CRU_ESCR0
    6.98 +    CRU_ESCR1
    6.99 +    CRU_ESCR2
   6.100 +    CRU_ESCR3
   6.101 +    CRU_ESCR4
   6.102 +    CRU_ESCR5
   6.103 +    IQ_ESCR0
   6.104 +    IQ_ESCR1
   6.105 +    RAT_ESCR0
   6.106 +    RAT_ESCR1
   6.107 +    SSU_ESCR0
   6.108 +    SSU_ESCR1
   6.109 +    ALF_ESCR0
   6.110 +    ALF_ESCR1
   6.111 +
   6.112 +
   6.113 +Example configurations
   6.114 +======================
   6.115 +
   6.116 +Note than in most cases there is a choice of ESCRs and CCCRs for
   6.117 +each metric although not all combinations are allowed. Each ESCR and
   6.118 +counter/CCCR can be used only once.
   6.119 +
   6.120 +Mispredicted branches retired
   6.121 +=============================
   6.122 +
   6.123 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 3 -m 1
   6.124 +cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 3 -m 1
   6.125 +cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 3 -m 1
   6.126 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1
   6.127 +cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 3 -m 1
   6.128 +cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 3 -m 1
   6.129 +
   6.130 +Tracecache misses
   6.131 +=================
   6.132 +
   6.133 +cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1
   6.134 +cpuperf -E BPU_ESCR0 -C BPU_CCCR1 -e 3 -m 1
   6.135 +cpuperf -E BPU_ESCR1 -C BPU_CCCR2 -e 3 -m 1
   6.136 +cpuperf -E BPU_ESCR1 -C BPU_CCCR3 -e 3 -m 1
   6.137 +
   6.138 +I-TLB
   6.139 +=====
   6.140 +
   6.141 +cpuperf -E ITLB_ESCR0 -C BPU_CCCR0 -e 24 
   6.142 +cpuperf -E ITLB_ESCR0 -C BPU_CCCR1 -e 24 
   6.143 +cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24 
   6.144 +cpuperf -E ITLB_ESCR1 -C BPU_CCCR3 -e 24 
   6.145 +
   6.146 + -m <n> : bit 0 count HITS, bit 1 MISSES, bit 2 uncacheable hit
   6.147 +
   6.148 + e.g. all ITLB misses -m 2
   6.149 +
   6.150 +Load replays
   6.151 +============
   6.152 +
   6.153 +cpuperf -E MOB_ESCR0 -C BPU_CCCR0 -e 3
   6.154 +cpuperf -E MOB_ESCR0 -C BPU_CCCR1 -e 3
   6.155 +cpuperf -E MOB_ESCR1 -C BPU_CCCR2 -e 3
   6.156 +cpuperf -E MOB_ESCR1 -C BPU_CCCR3 -e 3
   6.157 +
   6.158 + -m <n> : bit mask, replay due to...
   6.159 +           1: unknown store address
   6.160 +           3: unknown store data
   6.161 +           4: partially overlapped data access between LD/ST
   6.162 +           5: unaligned address between LD/ST
   6.163 +
   6.164 +Page walks
   6.165 +==========
   6.166 +
   6.167 +cpuperf -E PMH_ESCR0 -C BPU_CCCR0 -e 1
   6.168 +cpuperf -E PMH_ESCR0 -C BPU_CCCR1 -e 1
   6.169 +cpuperf -E PMH_ESCR1 -C BPU_CCCR2 -e 1
   6.170 +cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1
   6.171 +
   6.172 + -m <n> : bit 0 counts walks for a D-TLB miss, bit 1 for I-TLB miss
   6.173 +
   6.174 +L2/L3 cache accesses
   6.175 +====================
   6.176 +
   6.177 +cpuperf -E BSU_ESCR0 -C BPU_CCCR0 -e 12
   6.178 +cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12
   6.179 +cpuperf -E BSU_ESCR1 -C BPU_CCCR2 -e 12
   6.180 +cpuperf -E BSU_ESCR1 -C BPU_CCCR3 -e 12
   6.181 +
   6.182 + -m <n> : where the bit mask is:
   6.183 +           0: Read L2 HITS Shared
   6.184 +           1: Read L2 HITS Exclusive
   6.185 +           2: Read L2 HITS Modified
   6.186 +           3: Read L3 HITS Shared
   6.187 +           4: Read L3 HITS Exclusive
   6.188 +           5: Read L3 HITS Modified
   6.189 +           8: Read L2 MISS
   6.190 +           9: Read L3 MISS
   6.191 +          10: Write L2 MISS
   6.192 +
   6.193 +Front side bus activity
   6.194 +=======================
   6.195 +
   6.196 +cpuperf -E FSB_ESCR0 -C BPU_CCCR0 -e 23 -k -g
   6.197 +cpuperf -E FSB_ESCR0 -C BPU_CCCR1 -e 23 -k -g
   6.198 +cpuperf -E FSB_ESCR1 -C BPU_CCCR2 -e 23 -k -g
   6.199 +cpuperf -E FSB_ESCR1 -C BPU_CCCR3 -e 23 -k -g
   6.200 +
   6.201 + -m <n> : where the bit mask is for bus events:
   6.202 +           0: DRDY_DRV    Processor drives bus
   6.203 +           1: DRDY_OWN    Processor reads bus
   6.204 +           2: DRDY_OTHER  Data on bus not being sampled by processor
   6.205 +           3: DBSY_DRV    Processor reserves bus for driving
   6.206 +           4: DBSY_OWN    Other entity reserves bus for sending to processor
   6.207 +           5: DBSY_OTHER  Other entity reserves bus for sending elsewhere
   6.208 +
   6.209 + e.g. -m 3 to get cycles bus actually in use.
   6.210 +
   6.211 +Pipeline clear (entire)
   6.212 +=======================
   6.213 +
   6.214 +cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 2
   6.215 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 2
   6.216 +cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 2
   6.217 +cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 2
   6.218 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 2
   6.219 +cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 2
   6.220 +
   6.221 + -m <n> : bit mask:
   6.222 +           0: counts a portion of cycles while clear (use -g for edge trigger)
   6.223 +           1: counts each time machine clears for memory ordering issues
   6.224 +           2: counts each time machine clears for self modifying code
   6.225 +
   6.226 +Instructions retired
   6.227 +====================
   6.228 +
   6.229 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2
   6.230 +cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 2
   6.231 +cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 2
   6.232 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2
   6.233 +cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 2
   6.234 +cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 2
   6.235 +
   6.236 + -m <n> : bit mask:
   6.237 +           0: counts non-bogus, not tagged instructions
   6.238 +           1: counts non-bogus, tagged instructions
   6.239 +           2: counts bogus, not tagged instructions
   6.240 +           3: counts bogus, tagged instructions
   6.241 +
   6.242 + e.g. -m 3 to count legit retirements
   6.243 +
   6.244 +Uops retired
   6.245 +============
   6.246 +
   6.247 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 1
   6.248 +cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 1
   6.249 +cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 1
   6.250 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 1
   6.251 +cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 1
   6.252 +cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 1
   6.253 +
   6.254 + -m <n> : bit mask:
   6.255 +           0: Non-bogus
   6.256 +           1: Bogus
   6.257 +
   6.258 +x87 FP uops
   6.259 +===========
   6.260 +
   6.261 +cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768
   6.262 +cpuperf -E FIRM_ESCR0 -C FLAME_CCCR1 -e 4 -m 32768
   6.263 +cpuperf -E FIRM_ESCR1 -C FLAME_CCCR2 -e 4 -m 32768
   6.264 +cpuperf -E FIRM_ESCR1 -C FLAME_CCCR3 -e 4 -m 32768
   6.265 +
   6.266 +Replay tagging mechanism
   6.267 +========================
   6.268 +
   6.269 +Counts retirement of uops tagged with the replay tagging mechanism
   6.270 +
   6.271 +cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9
   6.272 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9
   6.273 +cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 9
   6.274 +cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 9
   6.275 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 9
   6.276 +cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 9
   6.277 +
   6.278 + -m <n> : bit mask:
   6.279 +           0: Non-bogus (set this bit for all events listed below)
   6.280 +           1: Bogus
   6.281 +
   6.282 +Set replay tagging mechanism bits with -P and -V:
   6.283 +
   6.284 +  L1 cache load miss retired:      -P 0 -P 24 -P 25 -V 0
   6.285 +  L2 cache load miss retired:      -P 1 -P 24 -P 25 -V 0  (read manual)
   6.286 +  DTLB load miss retired:          -P 2 -P 24 -P 25 -V 0
   6.287 +  DTLB store miss retired:         -P 2 -P 24 -P 25 -V 1
   6.288 +  DTLB all miss retired:           -P 2 -P 24 -P 25 -V 0 -V 1
   6.289 +
   6.290 +e.g. to count all DTLB misses
   6.291 +
   6.292 + cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9 -m 1 P 2 -P 24 -P 25 -V 0 -V 1
   6.293 +
   6.294 +Front end event
   6.295 +===============
   6.296 +
   6.297 +To count tagged uops:
   6.298 +
   6.299 +cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 8
   6.300 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 8
   6.301 +cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 8
   6.302 +cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 8
   6.303 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8
   6.304 +cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 8
   6.305 +
   6.306 + -m <n> : bit 0 for non-bogus uops, bit 1 for bogus uops
   6.307 +
   6.308 +Must have another ESCR programmed to tag uops as required
   6.309 +
   6.310 +cpuperf -E RAT_ESCR0 -C NONE -e 2
   6.311 +cpuperf -E RAT_ESCR1 -C NONE -e 2
   6.312 +
   6.313 + -m <n> : bit 1 for LOADs, bit 2 for STOREs
   6.314 +
   6.315 +An example set of counters
   6.316 +===========================
   6.317 +
   6.318 +# instructions retired
   6.319 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3
   6.320 +
   6.321 +# trace cache misses
   6.322 +cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1
   6.323 +
   6.324 +# L1 D cache misses (load misses retired)
   6.325 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9 -m 1 -P 0 -P 24 -P 25 -V 0
   6.326 +
   6.327 +# L2 misses (load and store)
   6.328 +cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12 -m 1280
   6.329 +
   6.330 +# I-TLB misses
   6.331 +cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24 -m 2
   6.332 +
   6.333 +# D-TLB misses (as PT walks)
   6.334 +cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1 -m 1
   6.335 +
   6.336 +# Other 'bonus' counters would be:
   6.337 +#   number of loads executed - need both command lines
   6.338 +cpuperf -E RAT_ESCR0 -C NONE -e 2 -m 2
   6.339 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8 -m 3
   6.340 +
   6.341 +#   number of mispredicted branches
   6.342 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1
   6.343 +
   6.344 +# x87 FP uOps
   6.345 +cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768
   6.346 +
   6.347 +The above has counter assignments
   6.348 +
   6.349 +0  Trace cache misses
   6.350 +1  L2 Misses
   6.351 +2  I-TLB misses
   6.352 +3  D-TLB misses
   6.353 +4  
   6.354 +5  
   6.355 +6  
   6.356 +7  
   6.357 +8  x87 FP uOps 
   6.358 +9  
   6.359 +10 
   6.360 +11 
   6.361 +12 Instructions retired
   6.362 +13 L1 D cache misses
   6.363 +14 Mispredicted branches
   6.364 +15 Loads executed
   6.365 +16 
   6.366 +17 
   6.367 +
   6.368 +Counting instructions retired on each logical CPU
   6.369 +=================================================
   6.370 +
   6.371 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3 -t 12
   6.372 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2 -m 3 -t 3
   6.373 +
   6.374 +Cannot count mispred branches as well due to CRU_ESCR1 use.
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/tools/misc/cpuperf/cpuperf.c	Wed Mar 02 17:18:39 2005 +0000
     7.3 @@ -0,0 +1,301 @@
     7.4 +/*
     7.5 + * User mode program to program performance counters.
     7.6 + *
     7.7 + * JRB/IAP October 2003.
     7.8 + *
     7.9 + * $Id: cpuperf.c,v 1.2 2003/10/14 11:00:59 jrb44 Exp $
    7.10 + *
    7.11 + * $Log: cpuperf.c,v $
    7.12 + * Revision 1.2  2003/10/14 11:00:59  jrb44
    7.13 + * Added dcefault CPU. Added NONE CCCR.
    7.14 + *
    7.15 + * Revision 1.1  2003/10/13 16:49:44  jrb44
    7.16 + * Initial revision
    7.17 + *
    7.18 + */
    7.19 +
    7.20 +#include <sys/types.h>
    7.21 +#include <sched.h>
    7.22 +#include <error.h>
    7.23 +#include <stdio.h>
    7.24 +#include <unistd.h>
    7.25 +#include <stdlib.h>
    7.26 +#include <string.h>
    7.27 +#include <errno.h>
    7.28 +
    7.29 +#include "p4perf.h"
    7.30 +
    7.31 +static inline void cpus_wrmsr(int cpu_mask,
    7.32 +                              int msr,
    7.33 +                              unsigned int low,
    7.34 +                              unsigned int high )
    7.35 +{
    7.36 +    fprintf(stderr, "No backend to write MSR 0x%x <= 0x%08x%08x on %08x\n",
    7.37 +            msr, high, low, cpu_mask);
    7.38 +}
    7.39 +
    7.40 +static inline unsigned long long cpus_rdmsr( int cpu_mask, int msr )
    7.41 +{
    7.42 +    fprintf(stderr, "No backend to read MSR 0x%x on %08x\n", msr, cpu_mask);
    7.43 +    return 0;
    7.44 +}
    7.45 +
    7.46 +#ifdef PERFCNTR
    7.47 +#include "cpuperf_perfcntr.h"
    7.48 +#define cpus_wrmsr perfcntr_wrmsr
    7.49 +#define cpus_rdmsr perfcntr_rdmsr
    7.50 +#endif
    7.51 +
    7.52 +#ifdef XENO
    7.53 +#include "cpuperf_xeno.h"
    7.54 +#define cpus_wrmsr dom0_wrmsr
    7.55 +#define cpus_rdmsr dom0_rdmsr
    7.56 +#endif
    7.57 +
    7.58 +struct macros {
    7.59 +    char         *name;
    7.60 +    unsigned long msr_addr;
    7.61 +    int           number;
    7.62 +};
    7.63 +
    7.64 +#define NO_CCCR 0xfffffffe
    7.65 +
    7.66 +struct macros msr[] = {
    7.67 +    {"BPU_COUNTER0", 0x300, 0},
    7.68 +    {"BPU_COUNTER1", 0x301, 1},
    7.69 +    {"BPU_COUNTER2", 0x302, 2},
    7.70 +    {"BPU_COUNTER3", 0x303, 3},
    7.71 +    {"MS_COUNTER0", 0x304, 4},
    7.72 +    {"MS_COUNTER1", 0x305, 5},
    7.73 +    {"MS_COUNTER2", 0x306, 6},
    7.74 +    {"MS_COUNTER3", 0x307, 7},
    7.75 +    {"FLAME_COUNTER0", 0x308, 8},
    7.76 +    {"FLAME_COUNTER1", 0x309, 9},
    7.77 +    {"FLAME_COUNTER2", 0x30a, 10},
    7.78 +    {"FLAME_COUNTER3", 0x30b, 11},
    7.79 +    {"IQ_COUNTER0", 0x30c, 12},
    7.80 +    {"IQ_COUNTER1", 0x30d, 13},
    7.81 +    {"IQ_COUNTER2", 0x30e, 14},
    7.82 +    {"IQ_COUNTER3", 0x30f, 15},
    7.83 +    {"IQ_COUNTER4", 0x310, 16},
    7.84 +    {"IQ_COUNTER5", 0x311, 17},
    7.85 +    {"BPU_CCCR0", 0x360, 0},
    7.86 +    {"BPU_CCCR1", 0x361, 1},
    7.87 +    {"BPU_CCCR2", 0x362, 2},
    7.88 +    {"BPU_CCCR3", 0x363, 3},
    7.89 +    {"MS_CCCR0", 0x364, 4},
    7.90 +    {"MS_CCCR1", 0x365, 5},
    7.91 +    {"MS_CCCR2", 0x366, 6},
    7.92 +    {"MS_CCCR3", 0x367, 7},
    7.93 +    {"FLAME_CCCR0", 0x368, 8},
    7.94 +    {"FLAME_CCCR1", 0x369, 9},
    7.95 +    {"FLAME_CCCR2", 0x36a, 10},
    7.96 +    {"FLAME_CCCR3", 0x36b, 11},
    7.97 +    {"IQ_CCCR0", 0x36c, 12},
    7.98 +    {"IQ_CCCR1", 0x36d, 13},
    7.99 +    {"IQ_CCCR2", 0x36e, 14},
   7.100 +    {"IQ_CCCR3", 0x36f, 15},
   7.101 +    {"IQ_CCCR4", 0x370, 16},
   7.102 +    {"IQ_CCCR5", 0x371, 17},
   7.103 +    {"BSU_ESCR0", 0x3a0, 7},
   7.104 +    {"BSU_ESCR1", 0x3a1, 7},
   7.105 +    {"FSB_ESCR0", 0x3a2, 6},
   7.106 +    {"FSB_ESCR1", 0x3a3, 6},
   7.107 +    {"MOB_ESCR0", 0x3aa, 2},
   7.108 +    {"MOB_ESCR1", 0x3ab, 2},
   7.109 +    {"PMH_ESCR0", 0x3ac, 4},
   7.110 +    {"PMH_ESCR1", 0x3ad, 4},
   7.111 +    {"BPU_ESCR0", 0x3b2, 0},
   7.112 +    {"BPU_ESCR1", 0x3b3, 0},
   7.113 +    {"IS_ESCR0", 0x3b4, 1},
   7.114 +    {"IS_ESCR1", 0x3b5, 1},
   7.115 +    {"ITLB_ESCR0", 0x3b6, 3},
   7.116 +    {"ITLB_ESCR1", 0x3b7, 3},
   7.117 +    {"IX_ESCR0", 0x3c8, 5},
   7.118 +    {"IX_ESCR1", 0x3c9, 5},
   7.119 +    {"MS_ESCR0", 0x3c0, 0},
   7.120 +    {"MS_ESCR1", 0x3c1, 0},
   7.121 +    {"TBPU_ESCR0", 0x3c2, 2},
   7.122 +    {"TBPU_ESCR1", 0x3c3, 2},
   7.123 +    {"TC_ESCR0", 0x3c4, 1},
   7.124 +    {"TC_ESCR1", 0x3c5, 1},
   7.125 +    {"FIRM_ESCR0", 0x3a4, 1},
   7.126 +    {"FIRM_ESCR1", 0x3a5, 1},
   7.127 +    {"FLAME_ESCR0", 0x3a6, 0},
   7.128 +    {"FLAME_ESCR1", 0x3a7, 0},
   7.129 +    {"DAC_ESCR0", 0x3a8, 5},
   7.130 +    {"DAC_ESCR1", 0x3a9, 5},
   7.131 +    {"SAAT_ESCR0", 0x3ae, 2},
   7.132 +    {"SAAT_ESCR1", 0x3af, 2},
   7.133 +    {"U2L_ESCR0", 0x3b0, 3},
   7.134 +    {"U2L_ESCR1", 0x3b1, 3},
   7.135 +    {"CRU_ESCR0", 0x3b8, 4},
   7.136 +    {"CRU_ESCR1", 0x3b9, 4},
   7.137 +    {"CRU_ESCR2", 0x3cc, 5},
   7.138 +    {"CRU_ESCR3", 0x3cd, 5},
   7.139 +    {"CRU_ESCR4", 0x3e0, 6},
   7.140 +    {"CRU_ESCR5", 0x3e1, 6},
   7.141 +    {"IQ_ESCR0", 0x3ba, 0},
   7.142 +    {"IQ_ESCR1", 0x3bb, 0},
   7.143 +    {"RAT_ESCR0", 0x3bc, 2},
   7.144 +    {"RAT_ESCR1", 0x3bd, 2},
   7.145 +    {"SSU_ESCR0", 0x3be, 3},
   7.146 +    {"SSU_ESCR1", 0x3bf, 3},
   7.147 +    {"ALF_ESCR0", 0x3ca, 1},
   7.148 +    {"ALF_ESCR1", 0x3cb, 1},
   7.149 +    {"PEBS_ENABLE", 0x3f1, 0},
   7.150 +    {"PEBS_MATRIX_VERT", 0x3f2, 0},
   7.151 +    {"NONE", NO_CCCR, 0},
   7.152 +    {NULL, 0, 0}
   7.153 +};
   7.154 +
   7.155 +struct macros *lookup_macro(char *str)
   7.156 +{
   7.157 +    struct macros *m;
   7.158 +
   7.159 +    m = msr;
   7.160 +    while (m->name) {
   7.161 +        if (strcmp(m->name, str) == 0)
   7.162 +            return m;
   7.163 +        m++;
   7.164 +    }
   7.165 +    return NULL;
   7.166 +}
   7.167 +
   7.168 +int main(int argc, char **argv)
   7.169 +{
   7.170 +    int c, t = 0xc, es = 0, em = 0, tv = 0, te = 0;
   7.171 +    unsigned int cpu_mask = 1;
   7.172 +    struct macros *escr = NULL, *cccr = NULL;
   7.173 +    unsigned long escr_val, cccr_val;
   7.174 +    int debug = 0;
   7.175 +    unsigned long pebs = 0, pebs_vert = 0;
   7.176 +    int pebs_x = 0, pebs_vert_x = 0;
   7.177 +    int read = 0;
   7.178 +    int compare = 0;
   7.179 +    int complement = 0;
   7.180 +    int edge = 0;
   7.181 +    
   7.182 +#ifdef XENO
   7.183 +    xen_init();
   7.184 +#endif
   7.185 +
   7.186 +
   7.187 +    while ((c = getopt(argc, argv, "dc:t:e:m:T:E:C:P:V:rkng")) != -1) {
   7.188 +        switch((char)c) {
   7.189 +        case 'P':
   7.190 +            pebs |= 1 << atoi(optarg);
   7.191 +            pebs_x = 1;
   7.192 +            break;
   7.193 +        case 'V':
   7.194 +            pebs_vert |= 1 << atoi(optarg);
   7.195 +            pebs_vert_x = 1;
   7.196 +            break;
   7.197 +        case 'd':
   7.198 +            debug = 1;
   7.199 +            break;
   7.200 +        case 'c':
   7.201 +            {
   7.202 +                int cpu = atoi(optarg);
   7.203 +                cpu_mask  = (cpu == -1)?(~0):(1<<cpu);
   7.204 +            }
   7.205 +            break;
   7.206 +        case 't': // ESCR thread bits
   7.207 +            t = atoi(optarg);
   7.208 +            break;
   7.209 +        case 'e': // eventsel
   7.210 +            es = atoi(optarg);
   7.211 +            break;
   7.212 +        case 'm': // eventmask
   7.213 +            em = atoi(optarg);
   7.214 +            break;
   7.215 +        case 'T': // tag value
   7.216 +            tv = atoi(optarg);
   7.217 +            te = 1;
   7.218 +            break;
   7.219 +        case 'E':
   7.220 +            escr = lookup_macro(optarg);
   7.221 +            if (!escr) {
   7.222 +                fprintf(stderr, "Macro '%s' not found.\n", optarg);
   7.223 +                exit(1);
   7.224 +            }
   7.225 +            break;
   7.226 +        case 'C':
   7.227 +            cccr = lookup_macro(optarg);
   7.228 +            if (!cccr) {
   7.229 +                fprintf(stderr, "Macro '%s' not found.\n", optarg);
   7.230 +                exit(1);
   7.231 +            }
   7.232 +            break;
   7.233 +        case 'r':
   7.234 +            read = 1;
   7.235 +            break;
   7.236 +        case 'k':
   7.237 +            compare = 1;
   7.238 +            break;
   7.239 +        case 'n':
   7.240 +            complement = 1;
   7.241 +            break;
   7.242 +        case 'g':
   7.243 +            edge = 1;
   7.244 +            break;
   7.245 +        }
   7.246 +    }
   7.247 +
   7.248 +    if (read) {
   7.249 +        while((cpu_mask&1)) {
   7.250 +            int i;
   7.251 +            for (i=0x300;i<0x312;i++) {
   7.252 +                printf("%010llx ",cpus_rdmsr( cpu_mask, i ) );
   7.253 +            }
   7.254 +            printf("\n");
   7.255 +            cpu_mask>>=1;
   7.256 +        }
   7.257 +        exit(1);
   7.258 +    } 
   7.259 +    
   7.260 +    if (!escr) {
   7.261 +        fprintf(stderr, "Need an ESCR.\n");
   7.262 +        exit(1);
   7.263 +    }
   7.264 +    if (!cccr) {
   7.265 +        fprintf(stderr, "Need a counter number.\n");
   7.266 +        exit(1);
   7.267 +    }
   7.268 +
   7.269 +    escr_val = P4_ESCR_THREADS(t) | P4_ESCR_EVNTSEL(es) |
   7.270 +        P4_ESCR_EVNTMASK(em) | P4_ESCR_TV(tv) | ((te)?P4_ESCR_TE:0);
   7.271 +    cccr_val = P4_CCCR_ENABLE | P4_CCCR_ESCR(escr->number) |
   7.272 +        ((compare)?P4_CCCR_COMPARE:0) |
   7.273 +        ((complement)?P4_CCCR_COMPLEMENT:0) |
   7.274 +        ((edge)?P4_CCCR_EDGE:0) |
   7.275 +        P4_CCCR_ACTIVE_THREAD(3)/*reserved*/;
   7.276 +
   7.277 +    if (debug) {
   7.278 +        fprintf(stderr, "ESCR 0x%lx <= 0x%08lx\n", escr->msr_addr, escr_val);
   7.279 +        if (cccr->msr_addr != NO_CCCR)
   7.280 +            fprintf(stderr, "CCCR 0x%lx <= 0x%08lx (%u)\n",
   7.281 +                    cccr->msr_addr, cccr_val, cccr->number);
   7.282 +        if (pebs_x)
   7.283 +            fprintf(stderr, "PEBS 0x%x <= 0x%08lx\n",
   7.284 +                    MSR_P4_PEBS_ENABLE, pebs);
   7.285 +        if (pebs_vert_x)
   7.286 +            fprintf(stderr, "PMV  0x%x <= 0x%08lx\n",
   7.287 +                    MSR_P4_PEBS_MATRIX_VERT, pebs_vert);
   7.288 +    }
   7.289 +    
   7.290 +    cpus_wrmsr( cpu_mask, escr->msr_addr, escr_val, 0 );
   7.291 +    if (cccr->msr_addr != NO_CCCR)
   7.292 +        cpus_wrmsr( cpu_mask, cccr->msr_addr, cccr_val, 0 );
   7.293 +    
   7.294 +    if (pebs_x)
   7.295 +        cpus_wrmsr( cpu_mask, MSR_P4_PEBS_ENABLE, pebs, 0 );
   7.296 +    
   7.297 +    if (pebs_vert_x)
   7.298 +        cpus_wrmsr( cpu_mask, MSR_P4_PEBS_MATRIX_VERT, pebs_vert, 0 );
   7.299 +    
   7.300 +    return 0;
   7.301 +}
   7.302 +
   7.303 +// End of $RCSfile: cpuperf.c,v $
   7.304 +
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/tools/misc/cpuperf/cpuperf_perfcntr.h	Wed Mar 02 17:18:39 2005 +0000
     8.3 @@ -0,0 +1,41 @@
     8.4 +/*
     8.5 + * Interface to JRB44's /proc/perfcntr interface.
     8.6 + *
     8.7 + * $Id: cpuperf_perfcntr.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
     8.8 + *
     8.9 + * $Log: cpuperf_perfcntr.h,v $
    8.10 + * Revision 1.1  2003/10/13 16:49:44  jrb44
    8.11 + * Initial revision
    8.12 + *
    8.13 + */
    8.14 +
    8.15 +#define  PROC_PERFCNTR "/proc/perfcntr"
    8.16 +
    8.17 +static inline void perfcntr_wrmsr(int cpu_mask,
    8.18 +                                  int msr,
    8.19 +                                  unsigned int low,
    8.20 +                                  unsigned int high )
    8.21 +{
    8.22 +    FILE *fd;
    8.23 +    unsigned long long value = low | (((unsigned long long)high) << 32);
    8.24 +
    8.25 +    fd = fopen(PROC_PERFCNTR, "w");
    8.26 +    if (fd == NULL)
    8.27 +    {
    8.28 +        perror("open " PROC_PERFCNTR);
    8.29 +        exit(1);
    8.30 +    }
    8.31 +    
    8.32 +    fprintf(fd, "%x %x %llx \n", cpu_mask, msr, value);
    8.33 +    fprintf(stderr, "%x %x %llx \n", cpu_mask, msr, value);
    8.34 +    fclose(fd);
    8.35 +}
    8.36 +
    8.37 +static inline unsigned long long perfcntr_rdmsr( int cpu_mask, int msr )
    8.38 +{
    8.39 +    fprintf(stderr, "WARNING: rdmsr not yet implemented for perfcntr.\n");
    8.40 +    return 0;
    8.41 +}
    8.42 +
    8.43 +// End of $RCSfile: cpuperf_perfcntr.h,v $
    8.44 +
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/tools/misc/cpuperf/cpuperf_xeno.h	Wed Mar 02 17:18:39 2005 +0000
     9.3 @@ -0,0 +1,38 @@
     9.4 +/*
     9.5 + * Interface to Xen MSR hypercalls.
     9.6 + * 
     9.7 + * $Id: cpuperf_xeno.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
     9.8 + * 
     9.9 + * $Log: cpuperf_xeno.h,v $
    9.10 + * Revision 1.1  2003/10/13 16:49:44  jrb44
    9.11 + * Initial revision
    9.12 + *
    9.13 + */
    9.14 +
    9.15 +#include <xc.h>
    9.16 +
    9.17 +static int xc_handle;
    9.18 +
    9.19 +void xen_init()
    9.20 +{
    9.21 +    if ( (xc_handle = xc_interface_open()) == -1 )
    9.22 +    {
    9.23 +        fprintf(stderr, "Error opening xc interface: %d (%s)\n",
    9.24 +                errno, strerror(errno));
    9.25 +        exit(-1);
    9.26 +    }
    9.27 +
    9.28 +}
    9.29 +
    9.30 +void dom0_wrmsr( int cpu_mask, int msr, unsigned int low, unsigned int high )
    9.31 +{
    9.32 +    xc_msr_write (xc_handle, cpu_mask, msr, low, high);
    9.33 +}
    9.34 +
    9.35 +unsigned long long dom0_rdmsr( int cpu_mask, int msr )
    9.36 +{
    9.37 +    return xc_msr_read(xc_handle, cpu_mask, msr);
    9.38 +}
    9.39 +
    9.40 +// End of $RCSfile: cpuperf_xeno.h,v $
    9.41 +
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/tools/misc/cpuperf/module/Makefile	Wed Mar 02 17:18:39 2005 +0000
    10.3 @@ -0,0 +1,16 @@
    10.4 +#############################################################################
    10.5 +# (C) 2005 - Rolf Neugebauer - Intel Research Cambridge
    10.6 +#############################################################################
    10.7 +#
    10.8 +#        File: Makefile
    10.9 +#      Author: Rolf Neugebauer (rolf.neugebauer@intel.com)
   10.10 +#        Date: Mar 2005
   10.11 +# 
   10.12 +# Environment: 
   10.13 +#
   10.14 +
   10.15 +# invoke:
   10.16 +# make -C /lib/modules/`uname -r`/build SUBDIRS=`pwd` modules_install
   10.17 +
   10.18 +obj-m    := perfcntr.o
   10.19 +
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/tools/misc/cpuperf/module/perfcntr.c	Wed Mar 02 17:18:39 2005 +0000
    11.3 @@ -0,0 +1,730 @@
    11.4 +/*
    11.5 + * Linux loadable kernel module to use P4 performance counters.
    11.6 + *
    11.7 + * James Bulpin, Feb 2003.
    11.8 + *
    11.9 + * $Id$
   11.10 + *
   11.11 + * $Log$
   11.12 + */
   11.13 +
   11.14 +#define DRV_NAME        "perfcntr"
   11.15 +#define DRV_VERSION     "0.2"
   11.16 +#define DRV_RELDATE     "02 Jun 2004"
   11.17 +
   11.18 +
   11.19 +#include <linux/module.h>
   11.20 +#include <linux/kernel.h>
   11.21 +#include <linux/init.h>
   11.22 +#include <linux/types.h>
   11.23 +#include <linux/proc_fs.h>
   11.24 +#include <linux/seq_file.h>
   11.25 +
   11.26 +#include <asm/uaccess.h>
   11.27 +#include <asm/pgtable.h>
   11.28 +#include <asm/io.h>
   11.29 +#include <asm/processor.h>
   11.30 +
   11.31 +#define NOHT
   11.32 +
   11.33 +#include "../p4perf.h"
   11.34 +
   11.35 +#ifdef NOHT
   11.36 +# define CPUMASK 0x00000003
   11.37 +#else
   11.38 +# define CPUMASK 0x00000005
   11.39 +#endif
   11.40 +
   11.41 +/*****************************************************************************
   11.42 + * Module admin                                                              *
   11.43 + *****************************************************************************/
   11.44 +
   11.45 +MODULE_AUTHOR("James Bulpin <James.Bulpin@cl.cam.ac.uk>");
   11.46 +MODULE_DESCRIPTION("P4 Performance Counters access "
   11.47 +                   DRV_VERSION " " DRV_RELDATE);
   11.48 +MODULE_LICENSE("GPL");
   11.49 +
   11.50 +static char version[] __devinitdata =
   11.51 +DRV_NAME ": James Bulpin.\n";
   11.52 +
   11.53 +static unsigned char foobar[4];
   11.54 +
   11.55 +/* rpcc: get full 64-bit Pentium TSC value
   11.56 + */
   11.57 +static __inline__ unsigned long long int rpcc(void) 
   11.58 +{
   11.59 +    unsigned int __h, __l;
   11.60 +    __asm__ __volatile__ ("rdtsc" :"=a" (__l), "=d" (__h));
   11.61 +    return (((unsigned long long)__h) << 32) + __l;
   11.62 +}
   11.63 +
   11.64 +/*****************************************************************************
   11.65 + * Display the counters                                                      *
   11.66 + *****************************************************************************/
   11.67 +
   11.68 +//#define processor cpu // post 2.4.16
   11.69 +
   11.70 +typedef union {
   11.71 +    struct {
   11.72 +        unsigned long lo;
   11.73 +        unsigned long hi;
   11.74 +    };
   11.75 +    unsigned long long cnt;
   11.76 +} cpu_perfcntr_t;
   11.77 +
   11.78 +typedef struct counters_t_struct {
   11.79 +    int                processor;
   11.80 +    unsigned long long tsc;
   11.81 +    cpu_perfcntr_t     counters[18];
   11.82 +} counters_t;
   11.83 +
   11.84 +typedef struct perfcntr_t_struct {
   11.85 +    unsigned long cpu_mask;
   11.86 +    counters_t    cpus[4]; // Actually for each cpu in system
   11.87 +} perfcntr_t;
   11.88 +
   11.89 +#ifdef HUMAN_READABLE
   11.90 +# define SHOW_COUNTER(c) rdmsr (c, l, h);\
   11.91 +    seq_printf(m, "0x%03x: 0x%08x%08x\n", c, h, l)
   11.92 +#else
   11.93 +# define SHOW_COUNTER(c) rdmsr (c, l, h);\
   11.94 +    seq_printf(m, " %llu", \
   11.95 +               (unsigned long long)h << 32 | (unsigned long long)l)
   11.96 +#endif
   11.97 +
   11.98 +#if 0
   11.99 +static unsigned long last_l = 0, last_h = 0, last_msr = 0;
  11.100 +static int last_cpu = 0;
  11.101 +#endif
  11.102 +
  11.103 +#define READ_COUNTER(_i, _msr) rdmsr((_msr), l, h); c->counters[_i].lo = l; \
  11.104 +    c->counters[_i].hi = h;
  11.105 +
  11.106 +static perfcntr_t perfcntrs;
  11.107 +
  11.108 +static void show_perfcntr_for(void *v)
  11.109 +{
  11.110 +    unsigned int l, h;
  11.111 +
  11.112 +    perfcntr_t *p = &perfcntrs;
  11.113 +    counters_t *c;
  11.114 +
  11.115 +    if (!((1 << smp_processor_id()) & p->cpu_mask))
  11.116 +        return;
  11.117 +
  11.118 +    c = &p->cpus[smp_processor_id()];
  11.119 +
  11.120 +    c->processor = smp_processor_id();
  11.121 +    c->tsc = rpcc();
  11.122 +
  11.123 +    READ_COUNTER(0,  MSR_P4_BPU_COUNTER0);
  11.124 +    READ_COUNTER(1,  MSR_P4_BPU_COUNTER1);
  11.125 +    READ_COUNTER(2,  MSR_P4_BPU_COUNTER2);
  11.126 +    READ_COUNTER(3,  MSR_P4_BPU_COUNTER3);
  11.127 +
  11.128 +    READ_COUNTER(4,  MSR_P4_MS_COUNTER0);
  11.129 +    READ_COUNTER(5,  MSR_P4_MS_COUNTER1);
  11.130 +    READ_COUNTER(6,  MSR_P4_MS_COUNTER2);
  11.131 +    READ_COUNTER(7,  MSR_P4_MS_COUNTER3);
  11.132 +
  11.133 +    READ_COUNTER(8,  MSR_P4_FLAME_COUNTER0);
  11.134 +    READ_COUNTER(9,  MSR_P4_FLAME_COUNTER1);
  11.135 +    READ_COUNTER(10, MSR_P4_FLAME_COUNTER2);
  11.136 +    READ_COUNTER(11, MSR_P4_FLAME_COUNTER3);
  11.137 +
  11.138 +    READ_COUNTER(12, MSR_P4_IQ_COUNTER0);
  11.139 +    READ_COUNTER(13, MSR_P4_IQ_COUNTER1);
  11.140 +    READ_COUNTER(14, MSR_P4_IQ_COUNTER2);
  11.141 +    READ_COUNTER(15, MSR_P4_IQ_COUNTER3);
  11.142 +    READ_COUNTER(16, MSR_P4_IQ_COUNTER4);
  11.143 +    READ_COUNTER(17, MSR_P4_IQ_COUNTER5);
  11.144 +
  11.145 +    return;    
  11.146 +}
  11.147 +
  11.148 +static int show_perfcntr(struct seq_file *m, void *v)
  11.149 +{
  11.150 +    int i, j;
  11.151 +
  11.152 +    // Get each physical cpu to read counters
  11.153 +    perfcntrs.cpu_mask = CPUMASK;
  11.154 +
  11.155 +    smp_call_function(show_perfcntr_for, NULL, 1, 1);
  11.156 +    show_perfcntr_for(NULL);
  11.157 +
  11.158 +    for (i = 0; i < 32; i++) {
  11.159 +        if (((1 << i) & (perfcntrs.cpu_mask = CPUMASK))) {
  11.160 +            counters_t *c = &perfcntrs.cpus[i];
  11.161 +            seq_printf(m, "%u %llu", c->processor, c->tsc);
  11.162 +            for (j = 0; j < 18; j++) {
  11.163 +                seq_printf(m, " %llu", c->counters[j].cnt);
  11.164 +            }
  11.165 +            seq_printf(m, "\n");
  11.166 +        }
  11.167 +    }
  11.168 +
  11.169 +#if 0
  11.170 +    unsigned long long t;
  11.171 +    unsigned int l, h;
  11.172 +
  11.173 +    t = rpcc();
  11.174 +
  11.175 +
  11.176 +
  11.177 +#ifdef HUMAN_READABLE
  11.178 +    seq_printf(m,
  11.179 +               "show_perfcntr\nprocessor: %u\ntime: %llu\n"
  11.180 +               "last write: 0x%08lx%08lx -> 0x%lx (CPU%u)\n",
  11.181 +               smp_processor_id(),
  11.182 +               t,
  11.183 +               last_h,
  11.184 +               last_l,
  11.185 +               last_msr,
  11.186 +               last_cpu);
  11.187 +#else
  11.188 +    seq_printf(m, "%u %llu", smp_processor_id(), t);
  11.189 +#endif
  11.190 +
  11.191 +    SHOW_COUNTER(MSR_P4_BPU_COUNTER0);
  11.192 +    SHOW_COUNTER(MSR_P4_BPU_COUNTER1);
  11.193 +    SHOW_COUNTER(MSR_P4_BPU_COUNTER2);
  11.194 +    SHOW_COUNTER(MSR_P4_BPU_COUNTER3);
  11.195 +
  11.196 +    SHOW_COUNTER(MSR_P4_MS_COUNTER0);
  11.197 +    SHOW_COUNTER(MSR_P4_MS_COUNTER1);
  11.198 +    SHOW_COUNTER(MSR_P4_MS_COUNTER2);
  11.199 +    SHOW_COUNTER(MSR_P4_MS_COUNTER3);
  11.200 +
  11.201 +    SHOW_COUNTER(MSR_P4_FLAME_COUNTER0);
  11.202 +    SHOW_COUNTER(MSR_P4_FLAME_COUNTER1);
  11.203 +    SHOW_COUNTER(MSR_P4_FLAME_COUNTER2);
  11.204 +    SHOW_COUNTER(MSR_P4_FLAME_COUNTER3);
  11.205 +
  11.206 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER0);
  11.207 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER1);
  11.208 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER2);
  11.209 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER3);
  11.210 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER4);
  11.211 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER5);
  11.212 +
  11.213 +#ifndef HUMAN_READBLE
  11.214 +    seq_printf(m, "\n");
  11.215 +#endif
  11.216 +
  11.217 +#endif
  11.218 +
  11.219 +    return 0;
  11.220 +}
  11.221 +
  11.222 +/*****************************************************************************
  11.223 + * Show counter configuration                                                *
  11.224 + *****************************************************************************/
  11.225 +
  11.226 +typedef union {
  11.227 +    struct {
  11.228 +        unsigned long lo;
  11.229 +        unsigned long hi;
  11.230 +    };
  11.231 +    unsigned long long cnt;
  11.232 +} cpu_perfcfg_t;
  11.233 +
  11.234 +typedef struct configs_t_struct {
  11.235 +    int                processor;
  11.236 +    unsigned long long tsc;
  11.237 +    cpu_perfcfg_t      cccr[18];
  11.238 +    cpu_perfcfg_t      escr[0x42];
  11.239 +} configs_t;
  11.240 +
  11.241 +typedef struct perfcfg_t_struct {
  11.242 +    unsigned long cpu_mask;
  11.243 +    configs_t     cpus[4]; // Actually for each cpu in system
  11.244 +} perfcfg_t;
  11.245 +
  11.246 +static perfcfg_t perfcfgs;
  11.247 +
  11.248 +#define READ_CCCR(_i, _msr) rdmsr((_msr), l, h); c->cccr[_i].lo = l; \
  11.249 +    c->cccr[_i].hi = h;
  11.250 +#define READ_ESCR(_i, _msr) rdmsr((_msr), l, h); c->escr[_i].lo = l; \
  11.251 +    c->escr[_i].hi = h;
  11.252 +
  11.253 +static void show_perfcfg_for(void *v)
  11.254 +{
  11.255 +    unsigned int l, h;
  11.256 +
  11.257 +    perfcfg_t *p = &perfcfgs;
  11.258 +    configs_t *c;
  11.259 +
  11.260 +    if (!((1 << smp_processor_id()) & p->cpu_mask))
  11.261 +        return;
  11.262 +
  11.263 +    c = &p->cpus[smp_processor_id()];
  11.264 +
  11.265 +    c->processor = smp_processor_id();
  11.266 +    c->tsc = rpcc();
  11.267 +
  11.268 +    READ_CCCR(0,  MSR_P4_BPU_CCCR0);
  11.269 +    READ_CCCR(1,  MSR_P4_BPU_CCCR1);
  11.270 +    READ_CCCR(2,  MSR_P4_BPU_CCCR2);
  11.271 +    READ_CCCR(3,  MSR_P4_BPU_CCCR3);
  11.272 +
  11.273 +    READ_CCCR(4,  MSR_P4_MS_CCCR0);
  11.274 +    READ_CCCR(5,  MSR_P4_MS_CCCR1);
  11.275 +    READ_CCCR(6,  MSR_P4_MS_CCCR2);
  11.276 +    READ_CCCR(7,  MSR_P4_MS_CCCR3);
  11.277 +
  11.278 +    READ_CCCR(8,  MSR_P4_FLAME_CCCR0);
  11.279 +    READ_CCCR(9,  MSR_P4_FLAME_CCCR1);
  11.280 +    READ_CCCR(10, MSR_P4_FLAME_CCCR2);
  11.281 +    READ_CCCR(11, MSR_P4_FLAME_CCCR3);
  11.282 +
  11.283 +    READ_CCCR(12, MSR_P4_IQ_CCCR0);
  11.284 +    READ_CCCR(13, MSR_P4_IQ_CCCR1);
  11.285 +    READ_CCCR(14, MSR_P4_IQ_CCCR2);
  11.286 +    READ_CCCR(15, MSR_P4_IQ_CCCR3);
  11.287 +    READ_CCCR(16, MSR_P4_IQ_CCCR4);
  11.288 +    READ_CCCR(17, MSR_P4_IQ_CCCR5);
  11.289 +
  11.290 +    READ_ESCR(0x00, MSR_P4_BSU_ESCR0);
  11.291 +    READ_ESCR(0x02, MSR_P4_FSB_ESCR0);
  11.292 +    READ_ESCR(0x0a, MSR_P4_MOB_ESCR0);
  11.293 +    READ_ESCR(0x0c, MSR_P4_PMH_ESCR0);
  11.294 +    READ_ESCR(0x12, MSR_P4_BPU_ESCR0);
  11.295 +    READ_ESCR(0x14, MSR_P4_IS_ESCR0);
  11.296 +    READ_ESCR(0x16, MSR_P4_ITLB_ESCR0);
  11.297 +    READ_ESCR(0x28, MSR_P4_IX_ESCR0);
  11.298 +    READ_ESCR(0x01, MSR_P4_BSU_ESCR1);
  11.299 +    READ_ESCR(0x03, MSR_P4_FSB_ESCR1);
  11.300 +    READ_ESCR(0x0b, MSR_P4_MOB_ESCR1);
  11.301 +    READ_ESCR(0x0d, MSR_P4_PMH_ESCR1);
  11.302 +    READ_ESCR(0x13, MSR_P4_BPU_ESCR1);
  11.303 +    READ_ESCR(0x15, MSR_P4_IS_ESCR1);
  11.304 +    READ_ESCR(0x17, MSR_P4_ITLB_ESCR1);
  11.305 +    READ_ESCR(0x29, MSR_P4_IX_ESCR1);
  11.306 +    READ_ESCR(0x20, MSR_P4_MS_ESCR0);
  11.307 +    READ_ESCR(0x22, MSR_P4_TBPU_ESCR0);
  11.308 +    READ_ESCR(0x24, MSR_P4_TC_ESCR0);
  11.309 +    READ_ESCR(0x21, MSR_P4_MS_ESCR1);
  11.310 +    READ_ESCR(0x23, MSR_P4_TBPU_ESCR1);
  11.311 +    READ_ESCR(0x25, MSR_P4_TC_ESCR1);
  11.312 +    READ_ESCR(0x04, MSR_P4_FIRM_ESCR0);
  11.313 +    READ_ESCR(0x06, MSR_P4_FLAME_ESCR0);
  11.314 +    READ_ESCR(0x08, MSR_P4_DAC_ESCR0);
  11.315 +    READ_ESCR(0x0e, MSR_P4_SAAT_ESCR0);
  11.316 +    READ_ESCR(0x10, MSR_P4_U2L_ESCR0);
  11.317 +    READ_ESCR(0x05, MSR_P4_FIRM_ESCR1);
  11.318 +    READ_ESCR(0x07, MSR_P4_FLAME_ESCR1);
  11.319 +    READ_ESCR(0x09, MSR_P4_DAC_ESCR1);
  11.320 +    READ_ESCR(0x0f, MSR_P4_SAAT_ESCR1);
  11.321 +    READ_ESCR(0x11, MSR_P4_U2L_ESCR1);
  11.322 +    READ_ESCR(0x18, MSR_P4_CRU_ESCR0);
  11.323 +    READ_ESCR(0x2c, MSR_P4_CRU_ESCR2);
  11.324 +    READ_ESCR(0x40, MSR_P4_CRU_ESCR4);
  11.325 +    READ_ESCR(0x1a, MSR_P4_IQ_ESCR0);
  11.326 +    READ_ESCR(0x1c, MSR_P4_RAT_ESCR0);
  11.327 +    READ_ESCR(0x1e, MSR_P4_SSU_ESCR0);
  11.328 +    READ_ESCR(0x2a, MSR_P4_ALF_ESCR0);
  11.329 +    READ_ESCR(0x19, MSR_P4_CRU_ESCR1);
  11.330 +    READ_ESCR(0x2d, MSR_P4_CRU_ESCR3);
  11.331 +    READ_ESCR(0x41, MSR_P4_CRU_ESCR5);
  11.332 +    READ_ESCR(0x1b, MSR_P4_IQ_ESCR1);
  11.333 +    READ_ESCR(0x1d, MSR_P4_RAT_ESCR1);
  11.334 +    READ_ESCR(0x2b, MSR_P4_ALF_ESCR1);
  11.335 +
  11.336 +    return;    
  11.337 +}
  11.338 +
  11.339 +static char *escr_names[] = {
  11.340 +    "BSU_ESCR0",
  11.341 +    "BSU_ESCR1",
  11.342 +    "FSB_ESCR0",
  11.343 +    "FSB_ESCR1",
  11.344 +    "FIRM_ESCR0",
  11.345 +    "FIRM_ESCR1",
  11.346 +    "FLAME_ESCR0",
  11.347 +    "FLAME_ESCR1",
  11.348 +    "DAC_ESCR0",
  11.349 +    "DAC_ESCR1",
  11.350 +    "MOB_ESCR0",
  11.351 +    "MOB_ESCR1",
  11.352 +    "PMH_ESCR0",
  11.353 +    "PMH_ESCR1",
  11.354 +    "SAAT_ESCR0",
  11.355 +    "SAAT_ESCR1",
  11.356 +    "U2L_ESCR0",
  11.357 +    "U2L_ESCR1",
  11.358 +    "BPU_ESCR0",
  11.359 +    "BPU_ESCR1",
  11.360 +    "IS_ESCR0",
  11.361 +    "IS_ESCR1",
  11.362 +    "ITLB_ESCR0",
  11.363 +    "ITLB_ESCR1",
  11.364 +    "CRU_ESCR0",
  11.365 +    "CRU_ESCR1",
  11.366 +    "IQ_ESCR0",
  11.367 +    "IQ_ESCR1",
  11.368 +    "RAT_ESCR0",
  11.369 +    "RAT_ESCR1",
  11.370 +    "SSU_ESCR0",
  11.371 +    "SSU_ESCR1",
  11.372 +    "MS_ESCR0",
  11.373 +    "MS_ESCR1",
  11.374 +    "TBPU_ESCR0",
  11.375 +    "TBPU_ESCR1",
  11.376 +    "TC_ESCR0",
  11.377 +    "TC_ESCR1",
  11.378 +    "0x3c6",
  11.379 +    "0x3c7",
  11.380 +    "IX_ESCR0",
  11.381 +    "IX_ESCR1",
  11.382 +    "ALF_ESCR0",
  11.383 +    "ALF_ESCR1",
  11.384 +    "CRU_ESCR2",
  11.385 +    "CRU_ESCR3",
  11.386 +    "0x3ce",
  11.387 +    "0x3cf",
  11.388 +    "0x3d0",
  11.389 +    "0x3d1",
  11.390 +    "0x3d2",
  11.391 +    "0x3d3",
  11.392 +    "0x3d4",
  11.393 +    "0x3d5",
  11.394 +    "0x3d6",
  11.395 +    "0x3d7",
  11.396 +    "0x3d8",
  11.397 +    "0x3d9",
  11.398 +    "0x3da",
  11.399 +    "0x3db",
  11.400 +    "0x3dc",
  11.401 +    "0x3dd",
  11.402 +    "0x3de",
  11.403 +    "0x3df",
  11.404 +    "CRU_ESCR4",
  11.405 +    "CRU_ESCR5"
  11.406 +};
  11.407 +
  11.408 +static unsigned long escr_map_0[] = 
  11.409 +{MSR_P4_BPU_ESCR0, MSR_P4_IS_ESCR0,
  11.410 + MSR_P4_MOB_ESCR0, MSR_P4_ITLB_ESCR0,
  11.411 + MSR_P4_PMH_ESCR0, MSR_P4_IX_ESCR0,
  11.412 + MSR_P4_FSB_ESCR0, MSR_P4_BSU_ESCR0}; //BPU even
  11.413 +static unsigned long escr_map_1[] = 
  11.414 +    {MSR_P4_BPU_ESCR1, MSR_P4_IS_ESCR1,
  11.415 +     MSR_P4_MOB_ESCR1, MSR_P4_ITLB_ESCR1,
  11.416 +     MSR_P4_PMH_ESCR1, MSR_P4_IX_ESCR1,
  11.417 +     MSR_P4_FSB_ESCR1, MSR_P4_BSU_ESCR1}; //BPU odd
  11.418 +static unsigned long escr_map_2[] = 
  11.419 +    {MSR_P4_MS_ESCR0, MSR_P4_TC_ESCR0, MSR_P4_TBPU_ESCR0,
  11.420 +     0, 0, 0, 0, 0}; //MS even
  11.421 +static unsigned long escr_map_3[] = 
  11.422 +    {MSR_P4_MS_ESCR1, MSR_P4_TC_ESCR1, MSR_P4_TBPU_ESCR1,
  11.423 +     0, 0, 0, 0, 0}; //MS odd
  11.424 +static unsigned long escr_map_4[] = 
  11.425 +    {MSR_P4_FLAME_ESCR0, MSR_P4_FIRM_ESCR0, MSR_P4_SAAT_ESCR0,
  11.426 +     MSR_P4_U2L_ESCR0, 0, MSR_P4_DAC_ESCR0, 0, 0}; //FLAME even
  11.427 +static unsigned long escr_map_5[] = 
  11.428 +    {MSR_P4_FLAME_ESCR1, MSR_P4_FIRM_ESCR1, MSR_P4_SAAT_ESCR1,
  11.429 +     MSR_P4_U2L_ESCR1, 0, MSR_P4_DAC_ESCR1, 0, 0}; //FLAME odd
  11.430 +static unsigned long escr_map_6[] = 
  11.431 +    {MSR_P4_IQ_ESCR0, MSR_P4_ALF_ESCR0,
  11.432 +     MSR_P4_RAT_ESCR0, MSR_P4_SSU_ESCR0,
  11.433 +     MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR4, 0}; //IQ even
  11.434 +static unsigned long escr_map_7[] = 
  11.435 +    {MSR_P4_IQ_ESCR1, MSR_P4_ALF_ESCR1,
  11.436 +     MSR_P4_RAT_ESCR1, 0,
  11.437 +     MSR_P4_CRU_ESCR1, MSR_P4_CRU_ESCR3, MSR_P4_CRU_ESCR5, 0}; //IQ odd
  11.438 +
  11.439 +static unsigned long *escr_map[] = {
  11.440 +    escr_map_0,
  11.441 +    escr_map_1,
  11.442 +    escr_map_2,
  11.443 +    escr_map_3,
  11.444 +    escr_map_4,
  11.445 +    escr_map_5,
  11.446 +    escr_map_6,
  11.447 +    escr_map_7,
  11.448 +};
  11.449 +
  11.450 +unsigned long get_escr_msr(int c, int e)
  11.451 +{
  11.452 +    int index = -1;
  11.453 +
  11.454 +    // Get the ESCR MSR address from the counter number and the ESCR number.
  11.455 +    switch (c) {
  11.456 +    case P4_BPU_COUNTER0_NUMBER:
  11.457 +    case P4_BPU_COUNTER1_NUMBER:
  11.458 +	index = 0;
  11.459 +	break;
  11.460 +    case P4_BPU_COUNTER2_NUMBER:
  11.461 +    case P4_BPU_COUNTER3_NUMBER:	
  11.462 +	index = 1;
  11.463 +	break;
  11.464 +    case P4_MS_COUNTER0_NUMBER:
  11.465 +    case P4_MS_COUNTER1_NUMBER:
  11.466 +	index = 2; // probably !
  11.467 +	break;
  11.468 +    case P4_MS_COUNTER2_NUMBER:
  11.469 +    case P4_MS_COUNTER3_NUMBER:
  11.470 +	index = 3; // probably !
  11.471 +	break;
  11.472 +    case P4_FLAME_COUNTER0_NUMBER:
  11.473 +    case P4_FLAME_COUNTER1_NUMBER:
  11.474 +	index = 4; // probably !
  11.475 +	break;
  11.476 +    case P4_FLAME_COUNTER2_NUMBER:
  11.477 +    case P4_FLAME_COUNTER3_NUMBER:
  11.478 +	index = 5; // probably !
  11.479 +	break;
  11.480 +    case P4_IQ_COUNTER0_NUMBER:
  11.481 +    case P4_IQ_COUNTER1_NUMBER:
  11.482 +    case P4_IQ_COUNTER4_NUMBER:
  11.483 +	index = 6;
  11.484 +	break;
  11.485 +    case P4_IQ_COUNTER2_NUMBER:
  11.486 +    case P4_IQ_COUNTER3_NUMBER:
  11.487 +    case P4_IQ_COUNTER5_NUMBER:
  11.488 +	index = 7;
  11.489 +	break;
  11.490 +    }
  11.491 +
  11.492 +    if (index != -1) {
  11.493 +	return escr_map[index][e];
  11.494 +    }
  11.495 +
  11.496 +    return 0;
  11.497 +}
  11.498 +
  11.499 +static char null_string[] = "";
  11.500 +static char *get_escr(int c, int e)
  11.501 +{
  11.502 +    unsigned long msr = get_escr_msr(c, e);
  11.503 +
  11.504 +    if ((msr >= 0x3a0) && (msr <= 0x3e1))
  11.505 +	return escr_names[(int)(msr - 0x3a0)];
  11.506 +    return null_string;
  11.507 +}
  11.508 +
  11.509 +static int show_perfcfg(struct seq_file *m, void *v)
  11.510 +{
  11.511 +    int i, j;
  11.512 +
  11.513 +    // Get each physical cpu to read configs
  11.514 +    perfcfgs.cpu_mask = CPUMASK;
  11.515 +
  11.516 +    smp_call_function(show_perfcfg_for, NULL, 1, 1);
  11.517 +    show_perfcfg_for(NULL);
  11.518 +
  11.519 +    for (i = 0; i < 32; i++) {
  11.520 +        if (((1 << i) & (perfcfgs.cpu_mask = CPUMASK))) {
  11.521 +            configs_t *c = &perfcfgs.cpus[i];
  11.522 +            seq_printf(m, "----------------------------------------\n");
  11.523 +            seq_printf(m, "%u %llu\n", c->processor, c->tsc);
  11.524 +            for (j = 0; j < 18; j++) {
  11.525 +                seq_printf(m, "%08lx", c->cccr[j].lo);
  11.526 +
  11.527 +		if (!(c->cccr[j].lo & P4_CCCR_ENABLE))
  11.528 +		    seq_printf(m, " DISABLED");
  11.529 +		else {
  11.530 +		    unsigned long escr_msr =
  11.531 +			get_escr_msr(i, (int)((c->cccr[j].lo >> 13)&7));
  11.532 +		    seq_printf(m, " ESCR=%s",
  11.533 +			       get_escr(i, (int)((c->cccr[j].lo >> 13)&7)));
  11.534 +		    if ((escr_msr >= 0x3a0) && (escr_msr <= 0x3e1)) {
  11.535 +			unsigned long e = c->escr[(int)(escr_msr - 0x3a0)].lo;
  11.536 +			seq_printf(m, "(%08lx es=%lx mask=%lx", e,
  11.537 +				   (e >> 25) & 0x7f,
  11.538 +				   (e >> 9) & 0xffff);
  11.539 +			if ((e & P4_ESCR_T0_USR))
  11.540 +			    seq_printf(m, " T(0)USR");
  11.541 +			if ((e & P4_ESCR_T0_OS))
  11.542 +			    seq_printf(m, " T(0)OS");
  11.543 +			if ((e & P4_ESCR_T1_USR))
  11.544 +			    seq_printf(m, " T1USR");
  11.545 +			if ((e & P4_ESCR_T1_OS))
  11.546 +			    seq_printf(m, " T1OS");
  11.547 +			seq_printf(m, ")");
  11.548 +		    }
  11.549 +		    seq_printf(m, " AT=%u", (int)((c->cccr[j].lo >> 16)&3));
  11.550 +
  11.551 +		    if ((c->cccr[j].lo & P4_CCCR_OVF))
  11.552 +			seq_printf(m, " OVF");
  11.553 +		    if ((c->cccr[j].lo & P4_CCCR_CASCADE))
  11.554 +			seq_printf(m, " CASC");
  11.555 +		    if ((c->cccr[j].lo & P4_CCCR_FORCE_OVF))
  11.556 +			seq_printf(m, " F-OVF");
  11.557 +		    if ((c->cccr[j].lo & P4_CCCR_EDGE))
  11.558 +			seq_printf(m, " EDGE");
  11.559 +		    if ((c->cccr[j].lo & P4_CCCR_COMPLEMENT))
  11.560 +			seq_printf(m, " COMPL");
  11.561 +		    if ((c->cccr[j].lo & P4_CCCR_COMPARE))
  11.562 +			seq_printf(m, " CMP");
  11.563 +		    if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T0))
  11.564 +			seq_printf(m, " OVF_PMI(_T0)");
  11.565 +		    if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T1))
  11.566 +			seq_printf(m, " OVF_PMI_T1");
  11.567 +		}
  11.568 +		seq_printf(m, "\n");
  11.569 +            }
  11.570 +        }
  11.571 +    }
  11.572 +
  11.573 +    return 0;
  11.574 +}
  11.575 +
  11.576 +/*****************************************************************************
  11.577 + * Handle writes                                                             *
  11.578 + *****************************************************************************/
  11.579 +
  11.580 +static int set_msr_cpu_mask;
  11.581 +static unsigned long set_msr_addr;
  11.582 +static unsigned long set_msr_lo;
  11.583 +static unsigned long set_msr_hi;
  11.584 +
  11.585 +static void perfcntr_write_for(void *unused)
  11.586 +{
  11.587 +#ifdef NOHT
  11.588 +    if (((1 << smp_processor_id()) & set_msr_cpu_mask)) {
  11.589 +#endif
  11.590 +        //printk("perfcntr: wrmsr(%08lx, %08lx, %08lx)\n",
  11.591 +        //     set_msr_addr, set_msr_lo, set_msr_hi);
  11.592 +        wrmsr(set_msr_addr, set_msr_lo, set_msr_hi);
  11.593 +#ifdef NOHT
  11.594 +    }
  11.595 +#endif
  11.596 +}
  11.597 +
  11.598 +ssize_t perfcntr_write(struct file *f,
  11.599 +                       const  char *data,
  11.600 +                       size_t       size,
  11.601 +                       loff_t      *pos)
  11.602 +{
  11.603 +    char         *endp;
  11.604 +    ssize_t       ret = 0;
  11.605 +    //unsigned long l, h, msr;
  11.606 +    unsigned long long v;
  11.607 +
  11.608 +    set_msr_cpu_mask = (int)simple_strtoul(data, &endp, 16);
  11.609 +    endp++; // skip past space
  11.610 +    if ((endp - data) >= size) {
  11.611 +        ret = -EINVAL;
  11.612 +        goto out;
  11.613 +    }
  11.614 +
  11.615 +    set_msr_addr = simple_strtoul(endp, &endp, 16);
  11.616 +    endp++; // skip past space
  11.617 +    if ((endp - data) >= size) {
  11.618 +        ret = -EINVAL;
  11.619 +        goto out;
  11.620 +    }
  11.621 +    
  11.622 +    v = simple_strtoul(endp, &endp, 16);
  11.623 +    set_msr_lo = (unsigned long)(v & 0xffffffffULL);
  11.624 +    set_msr_hi = (unsigned long)(v >> 32);
  11.625 +
  11.626 +    smp_call_function(perfcntr_write_for, NULL, 1, 1);
  11.627 +    perfcntr_write_for(NULL);    
  11.628 +
  11.629 +#if 0
  11.630 +    wrmsr(msr, l, h);
  11.631 +    last_l   = l;
  11.632 +    last_h   = h;
  11.633 +    last_msr = msr;
  11.634 +    last_cpu = smp_processor_id();
  11.635 +#endif
  11.636 +    ret = size;
  11.637 +
  11.638 + out:
  11.639 +    return ret;
  11.640 +}
  11.641 +
  11.642 +/*****************************************************************************
  11.643 + * /proc stuff                                                               *
  11.644 + *****************************************************************************/
  11.645 +
  11.646 +static void *c_start(struct seq_file *m, loff_t *pos)
  11.647 +{
  11.648 +    //return *pos < NR_CPUS ? cpu_data + *pos : NULL;
  11.649 +    return *pos == 0 ? foobar : NULL;
  11.650 +}
  11.651 +
  11.652 +static void *c_next(struct seq_file *m, void *v, loff_t *pos)
  11.653 +{
  11.654 +    ++*pos;
  11.655 +    return c_start(m, pos);
  11.656 +}
  11.657 +
  11.658 +static void c_stop(struct seq_file *m, void *v)
  11.659 +{
  11.660 +}
  11.661 +
  11.662 +struct seq_operations perfcntr_op = {
  11.663 +    start:  c_start,
  11.664 +    next:   c_next,
  11.665 +    stop:   c_stop,
  11.666 +    show:   show_perfcntr,
  11.667 +};
  11.668 +
  11.669 +struct seq_operations perfcfg_op = {
  11.670 +    start:  c_start,
  11.671 +    next:   c_next,
  11.672 +    stop:   c_stop,
  11.673 +    show:   show_perfcfg,
  11.674 +};
  11.675 +
  11.676 +static int perfcntr_open(struct inode *inode, struct file *file)
  11.677 +{
  11.678 +    return seq_open(file, &perfcntr_op);
  11.679 +}
  11.680 +
  11.681 +static int perfcfg_open(struct inode *inode, struct file *file)
  11.682 +{
  11.683 +    return seq_open(file, &perfcfg_op);
  11.684 +}
  11.685 +
  11.686 +static struct file_operations proc_perfcntr_operations = {
  11.687 +    open:           perfcntr_open,
  11.688 +    read:           seq_read,
  11.689 +    write:          perfcntr_write,
  11.690 +    llseek:         seq_lseek,
  11.691 +    release:        seq_release,
  11.692 +};
  11.693 +
  11.694 +static struct file_operations proc_perfcfg_operations = {
  11.695 +    open:           perfcfg_open,
  11.696 +    read:           seq_read,
  11.697 +    write:          perfcntr_write,
  11.698 +    llseek:         seq_lseek,
  11.699 +    release:        seq_release,
  11.700 +};
  11.701 +
  11.702 +static void create_seq_entry(char *name, mode_t mode, struct file_operations *f)
  11.703 +{
  11.704 +    struct proc_dir_entry *entry;
  11.705 +    entry = create_proc_entry(name, mode, NULL);
  11.706 +    if (entry)
  11.707 +        entry->proc_fops = f;
  11.708 +}
  11.709 +
  11.710 +/*****************************************************************************
  11.711 + * Module init and cleanup                                                   *
  11.712 + *****************************************************************************/
  11.713 +
  11.714 +static int __init perfcntr_init(void)
  11.715 +{
  11.716 +    printk(version);
  11.717 +
  11.718 +    create_seq_entry("perfcntr", 0777, &proc_perfcntr_operations);
  11.719 +    create_seq_entry("perfcntr_config", 0777, &proc_perfcfg_operations);
  11.720 +
  11.721 +    return 0;
  11.722 +}
  11.723 +
  11.724 +static void __exit perfcntr_exit(void)
  11.725 +{
  11.726 +    remove_proc_entry("perfcntr", NULL);
  11.727 +    remove_proc_entry("perfcntr_config", NULL);
  11.728 +}
  11.729 +
  11.730 +module_init(perfcntr_init);
  11.731 +module_exit(perfcntr_exit);
  11.732 +
  11.733 +/* End of $RCSfile$ */
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/tools/misc/cpuperf/p4perf.h	Wed Mar 02 17:18:39 2005 +0000
    12.3 @@ -0,0 +1,382 @@
    12.4 +/*
    12.5 + * P4 Performance counter stuff.
    12.6 + *
    12.7 + * P4 Xeon with Hyperthreading has counters per physical package which can
    12.8 + * count events from either logical CPU. However, in many cases more than
    12.9 + * ECSR and CCCR/counter can be used to count the same event. For instr or
   12.10 + * uops retired, use either ESCR0/IQ_CCCR0 ESCR1/IQ_CCCR2.
   12.11 + *
   12.12 + * $Id: p4perf.h,v 1.2 2003/10/13 16:51:41 jrb44 Exp $
   12.13 + *
   12.14 + * $Log: p4perf.h,v $
   12.15 + * Revision 1.2  2003/10/13 16:51:41  jrb44
   12.16 + * *** empty log message ***
   12.17 + *
   12.18 + */
   12.19 +
   12.20 +#ifndef P4PERF_H
   12.21 +#define P4PERF_H
   12.22 +
   12.23 +#ifdef __KERNEL__
   12.24 +#include <asm/msr.h>
   12.25 +#endif
   12.26 +
   12.27 +/*****************************************************************************
   12.28 + * Performance counter configuration.                                        *
   12.29 + *****************************************************************************/
   12.30 +
   12.31 +#ifndef P6_EVNTSEL_OS
   12.32 +# define P6_EVNTSEL_OS     (1 << 17)
   12.33 +# define P6_EVNTSEL_USR    (1 << 16)
   12.34 +# define P6_EVNTSEL_E      (1 << 18)
   12.35 +# define P6_EVNTSEL_EN     (1 << 22)
   12.36 +#endif
   12.37 +#define P6_PERF_INST_RETIRED 0xc0
   12.38 +#define P6_PERF_UOPS_RETIRED 0xc2
   12.39 +
   12.40 +#define P4_ESCR_USR                    (1 << 2)
   12.41 +#define P4_ESCR_OS                     (1 << 3)
   12.42 +#define P4_ESCR_T0_USR                 (1 << 2) /* First logical CPU  */
   12.43 +#define P4_ESCR_T0_OS                  (1 << 3)
   12.44 +#define P4_ESCR_T1_USR                 (1 << 0) /* Second logical CPU */
   12.45 +#define P4_ESCR_T1_OS                  (1 << 1)
   12.46 +#define P4_ESCR_TE                     (1 << 4)
   12.47 +#define P4_ESCR_THREADS(t)             (t)
   12.48 +#define P4_ESCR_TV(tag)                (tag << 5)
   12.49 +#define P4_ESCR_EVNTSEL(e)             (e << 25)
   12.50 +#define P4_ESCR_EVNTMASK(e)            (e << 9)
   12.51 +
   12.52 +#define P4_ESCR_EVNTSEL_FRONT_END      0x08
   12.53 +#define P4_ESCR_EVNTSEL_EXECUTION      0x0c
   12.54 +#define P4_ESCR_EVNTSEL_REPLAY         0x09
   12.55 +#define P4_ESCR_EVNTSEL_INSTR_RETIRED  0x02
   12.56 +#define P4_ESCR_EVNTSEL_UOPS_RETIRED   0x01
   12.57 +#define P4_ESCR_EVNTSEL_UOP_TYPE       0x02
   12.58 +#define P4_ESCR_EVNTSEL_RET_MBR_TYPE   0x05
   12.59 +//#define P4_ESCR_EVNTSEL_RET_MBR_TYPE   0x04
   12.60 +
   12.61 +#define P4_ESCR_EVNTMASK_FE_NBOGUS     0x01
   12.62 +#define P4_ESCR_EVNTMASK_FE_BOGUS      0x02
   12.63 +
   12.64 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS0  0x01
   12.65 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS1  0x02
   12.66 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS2  0x04
   12.67 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS3  0x08
   12.68 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS0   0x10
   12.69 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS1   0x20
   12.70 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS2   0x40
   12.71 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS3   0x80
   12.72 +
   12.73 +#define P4_ESCR_EVNTMASK_REPLAY_NBOGUS 0x01
   12.74 +#define P4_ESCR_EVNTMASK_REPLAY_BOGUS  0x02
   12.75 +
   12.76 +#define P4_ESCR_EVNTMASK_IRET_NB_NTAG  0x01
   12.77 +#define P4_ESCR_EVNTMASK_IRET_NB_TAG   0x02
   12.78 +#define P4_ESCR_EVNTMASK_IRET_B_NTAG   0x04
   12.79 +#define P4_ESCR_EVNTMASK_IRET_B_TAG    0x08
   12.80 +
   12.81 +#define P4_ESCR_EVNTMASK_URET_NBOGUS   0x01
   12.82 +#define P4_ESCR_EVNTMASK_URET_BOGUS    0x02
   12.83 +
   12.84 +#define P4_ESCR_EVNTMASK_UOP_LOADS     0x02
   12.85 +#define P4_ESCR_EVNTMASK_UOP_STORES    0x04
   12.86 +
   12.87 +#define P4_ESCR_EVNTMASK_RMBRT_COND    0x02
   12.88 +#define P4_ESCR_EVNTMASK_RMBRT_CALL    0x04
   12.89 +#define P4_ESCR_EVNTMASK_RMBRT_RETURN  0x08
   12.90 +#define P4_ESCR_EVNTMASK_RMBRT_INDIR   0x10
   12.91 +
   12.92 +#define P4_ESCR_EVNTMASK_RBRT_COND     0x02
   12.93 +#define P4_ESCR_EVNTMASK_RBRT_CALL     0x04
   12.94 +#define P4_ESCR_EVNTMASK_RBRT_RETURN   0x08
   12.95 +#define P4_ESCR_EVNTMASK_RBRT_INDIR    0x10
   12.96 +
   12.97 +//#define P4_ESCR_EVNTMASK_INSTR_RETIRED 0x01  /* Non bogus, not tagged */
   12.98 +//#define P4_ESCR_EVNTMASK_UOPS_RETIRED  0x01  /* Non bogus             */
   12.99 +
  12.100 +#define P4_CCCR_OVF                    (1 << 31)
  12.101 +#define P4_CCCR_CASCADE                (1 << 30)
  12.102 +#define P4_CCCR_FORCE_OVF              (1 << 25)
  12.103 +#define P4_CCCR_EDGE                   (1 << 24)
  12.104 +#define P4_CCCR_COMPLEMENT             (1 << 19)
  12.105 +#define P4_CCCR_COMPARE                (1 << 18)
  12.106 +#define P4_CCCR_THRESHOLD(t)           (t << 20)
  12.107 +#define P4_CCCR_ENABLE                 (1 << 12)
  12.108 +#define P4_CCCR_ESCR(escr)             (escr << 13)
  12.109 +#define P4_CCCR_ACTIVE_THREAD(t)       (t << 16)   /* Set to 11 */
  12.110 +#define P4_CCCR_OVF_PMI_T0             (1 << 26)
  12.111 +#define P4_CCCR_OVF_PMI_T1             (1 << 27)
  12.112 +#define P4_CCCR_RESERVED               (3 << 16)
  12.113 +#define P4_CCCR_OVF_PMI                (1 << 26)
  12.114 +
  12.115 +// BPU
  12.116 +#define MSR_P4_BPU_COUNTER0            0x300
  12.117 +#define MSR_P4_BPU_COUNTER1            0x301
  12.118 +#define MSR_P4_BPU_CCCR0               0x360
  12.119 +#define MSR_P4_BPU_CCCR1               0x361
  12.120 +
  12.121 +#define MSR_P4_BPU_COUNTER2            0x302
  12.122 +#define MSR_P4_BPU_COUNTER3            0x303
  12.123 +#define MSR_P4_BPU_CCCR2               0x362
  12.124 +#define MSR_P4_BPU_CCCR3               0x363
  12.125 +
  12.126 +#define MSR_P4_BSU_ESCR0               0x3a0
  12.127 +#define MSR_P4_FSB_ESCR0               0x3a2
  12.128 +#define MSR_P4_MOB_ESCR0               0x3aa
  12.129 +#define MSR_P4_PMH_ESCR0               0x3ac
  12.130 +#define MSR_P4_BPU_ESCR0               0x3b2
  12.131 +#define MSR_P4_IS_ESCR0                0x3b4
  12.132 +#define MSR_P4_ITLB_ESCR0              0x3b6
  12.133 +#define MSR_P4_IX_ESCR0                0x3c8
  12.134 +
  12.135 +#define P4_BSU_ESCR0_NUMBER            7
  12.136 +#define P4_FSB_ESCR0_NUMBER            6
  12.137 +#define P4_MOB_ESCR0_NUMBER            2
  12.138 +#define P4_PMH_ESCR0_NUMBER            4
  12.139 +#define P4_BPU_ESCR0_NUMBER            0
  12.140 +#define P4_IS_ESCR0_NUMBER             1
  12.141 +#define P4_ITLB_ESCR0_NUMBER           3
  12.142 +#define P4_IX_ESCR0_NUMBER             5
  12.143 +
  12.144 +#define MSR_P4_BSU_ESCR1               0x3a1
  12.145 +#define MSR_P4_FSB_ESCR1               0x3a3
  12.146 +#define MSR_P4_MOB_ESCR1               0x3ab
  12.147 +#define MSR_P4_PMH_ESCR1               0x3ad
  12.148 +#define MSR_P4_BPU_ESCR1               0x3b3
  12.149 +#define MSR_P4_IS_ESCR1                0x3b5
  12.150 +#define MSR_P4_ITLB_ESCR1              0x3b7
  12.151 +#define MSR_P4_IX_ESCR1                0x3c9
  12.152 +
  12.153 +#define P4_BSU_ESCR1_NUMBER            7
  12.154 +#define P4_FSB_ESCR1_NUMBER            6
  12.155 +#define P4_MOB_ESCR1_NUMBER            2
  12.156 +#define P4_PMH_ESCR1_NUMBER            4
  12.157 +#define P4_BPU_ESCR1_NUMBER            0
  12.158 +#define P4_IS_ESCR1_NUMBER             1
  12.159 +#define P4_ITLB_ESCR1_NUMBER           3
  12.160 +#define P4_IX_ESCR1_NUMBER             5
  12.161 +
  12.162 +// MS
  12.163 +#define MSR_P4_MS_COUNTER0             0x304
  12.164 +#define MSR_P4_MS_COUNTER1             0x305
  12.165 +#define MSR_P4_MS_CCCR0                0x364
  12.166 +#define MSR_P4_MS_CCCR1                0x365
  12.167 +
  12.168 +#define MSR_P4_MS_COUNTER2             0x306
  12.169 +#define MSR_P4_MS_COUNTER3             0x307
  12.170 +#define MSR_P4_MS_CCCR2                0x366
  12.171 +#define MSR_P4_MS_CCCR3                0x367
  12.172 +
  12.173 +#define MSR_P4_MS_ESCR0                0x3c0
  12.174 +#define MSR_P4_TBPU_ESCR0              0x3c2
  12.175 +#define MSR_P4_TC_ESCR0                0x3c4
  12.176 +
  12.177 +#define P4_MS_ESCR0_NUMBER             0
  12.178 +#define P4_TBPU_ESCR0_NUMBER           2
  12.179 +#define P4_TC_ESCR0_NUMBER             1
  12.180 +
  12.181 +#define MSR_P4_MS_ESCR1                0x3c1
  12.182 +#define MSR_P4_TBPU_ESCR1              0x3c3
  12.183 +#define MSR_P4_TC_ESCR1                0x3c5
  12.184 +
  12.185 +#define P4_MS_ESCR1_NUMBER             0
  12.186 +#define P4_TBPU_ESCR1_NUMBER           2
  12.187 +#define P4_TC_ESCR1_NUMBER             1
  12.188 +
  12.189 +// FLAME
  12.190 +#define MSR_P4_FLAME_COUNTER0          0x308
  12.191 +#define MSR_P4_FLAME_COUNTER1          0x309
  12.192 +#define MSR_P4_FLAME_CCCR0             0x368
  12.193 +#define MSR_P4_FLAME_CCCR1             0x369
  12.194 +
  12.195 +#define MSR_P4_FLAME_COUNTER2          0x30a
  12.196 +#define MSR_P4_FLAME_COUNTER3          0x30b
  12.197 +#define MSR_P4_FLAME_CCCR2             0x36a
  12.198 +#define MSR_P4_FLAME_CCCR3             0x36b
  12.199 +
  12.200 +#define MSR_P4_FIRM_ESCR0              0x3a4
  12.201 +#define MSR_P4_FLAME_ESCR0             0x3a6
  12.202 +#define MSR_P4_DAC_ESCR0               0x3a8
  12.203 +#define MSR_P4_SAAT_ESCR0              0x3ae
  12.204 +#define MSR_P4_U2L_ESCR0               0x3b0
  12.205 +
  12.206 +#define P4_FIRM_ESCR0_NUMBER           1
  12.207 +#define P4_FLAME_ESCR0_NUMBER          0
  12.208 +#define P4_DAC_ESCR0_NUMBER            5
  12.209 +#define P4_SAAT_ESCR0_NUMBER           2
  12.210 +#define P4_U2L_ESCR0_NUMBER            3
  12.211 +
  12.212 +#define MSR_P4_FIRM_ESCR1              0x3a5
  12.213 +#define MSR_P4_FLAME_ESCR1             0x3a7
  12.214 +#define MSR_P4_DAC_ESCR1               0x3a9
  12.215 +#define MSR_P4_SAAT_ESCR1              0x3af
  12.216 +#define MSR_P4_U2L_ESCR1               0x3b1
  12.217 +
  12.218 +#define P4_FIRM_ESCR1_NUMBER           1
  12.219 +#define P4_FLAME_ESCR1_NUMBER          0
  12.220 +#define P4_DAC_ESCR1_NUMBER            5
  12.221 +#define P4_SAAT_ESCR1_NUMBER           2
  12.222 +#define P4_U2L_ESCR1_NUMBER            3
  12.223 +
  12.224 +// IQ
  12.225 +#define MSR_P4_IQ_COUNTER0             0x30c
  12.226 +#define MSR_P4_IQ_COUNTER1             0x30d
  12.227 +#define MSR_P4_IQ_CCCR0                0x36c
  12.228 +#define MSR_P4_IQ_CCCR1                0x36d
  12.229 +
  12.230 +#define MSR_P4_IQ_COUNTER2             0x30e
  12.231 +#define MSR_P4_IQ_COUNTER3             0x30f
  12.232 +#define MSR_P4_IQ_CCCR2                0x36e
  12.233 +#define MSR_P4_IQ_CCCR3                0x36f
  12.234 +
  12.235 +#define MSR_P4_IQ_COUNTER4             0x310
  12.236 +#define MSR_P4_IQ_COUNTER5             0x311
  12.237 +#define MSR_P4_IQ_CCCR4                0x370
  12.238 +#define MSR_P4_IQ_CCCR5                0x371
  12.239 +
  12.240 +#define MSR_P4_CRU_ESCR0               0x3b8
  12.241 +#define MSR_P4_CRU_ESCR2               0x3cc
  12.242 +#define MSR_P4_CRU_ESCR4               0x3e0
  12.243 +#define MSR_P4_IQ_ESCR0                0x3ba
  12.244 +#define MSR_P4_RAT_ESCR0               0x3bc
  12.245 +#define MSR_P4_SSU_ESCR0               0x3be
  12.246 +#define MSR_P4_ALF_ESCR0               0x3ca
  12.247 +
  12.248 +#define P4_CRU_ESCR0_NUMBER            4
  12.249 +#define P4_CRU_ESCR2_NUMBER            5
  12.250 +#define P4_CRU_ESCR4_NUMBER            6
  12.251 +#define P4_IQ_ESCR0_NUMBER             0
  12.252 +#define P4_RAT_ESCR0_NUMBER            2
  12.253 +#define P4_SSU_ESCR0_NUMBER            3
  12.254 +#define P4_ALF_ESCR0_NUMBER            1
  12.255 +
  12.256 +#define MSR_P4_CRU_ESCR1               0x3b9
  12.257 +#define MSR_P4_CRU_ESCR3               0x3cd
  12.258 +#define MSR_P4_CRU_ESCR5               0x3e1
  12.259 +#define MSR_P4_IQ_ESCR1                0x3bb
  12.260 +#define MSR_P4_RAT_ESCR1               0x3bd
  12.261 +#define MSR_P4_ALF_ESCR1               0x3cb
  12.262 +
  12.263 +#define P4_CRU_ESCR1_NUMBER            4
  12.264 +#define P4_CRU_ESCR3_NUMBER            5
  12.265 +#define P4_CRU_ESCR5_NUMBER            6
  12.266 +#define P4_IQ_ESCR1_NUMBER             0
  12.267 +#define P4_RAT_ESCR1_NUMBER            2
  12.268 +#define P4_ALF_ESCR1_NUMBER            1
  12.269 +
  12.270 +#define P4_BPU_COUNTER0_NUMBER         0
  12.271 +#define P4_BPU_COUNTER1_NUMBER         1
  12.272 +#define P4_BPU_COUNTER2_NUMBER         2
  12.273 +#define P4_BPU_COUNTER3_NUMBER         3
  12.274 +
  12.275 +#define P4_MS_COUNTER0_NUMBER          4
  12.276 +#define P4_MS_COUNTER1_NUMBER          5
  12.277 +#define P4_MS_COUNTER2_NUMBER          6
  12.278 +#define P4_MS_COUNTER3_NUMBER          7
  12.279 +
  12.280 +#define P4_FLAME_COUNTER0_NUMBER       8
  12.281 +#define P4_FLAME_COUNTER1_NUMBER       9
  12.282 +#define P4_FLAME_COUNTER2_NUMBER       10
  12.283 +#define P4_FLAME_COUNTER3_NUMBER       11
  12.284 +
  12.285 +#define P4_IQ_COUNTER0_NUMBER          12
  12.286 +#define P4_IQ_COUNTER1_NUMBER          13
  12.287 +#define P4_IQ_COUNTER2_NUMBER          14
  12.288 +#define P4_IQ_COUNTER3_NUMBER          15
  12.289 +#define P4_IQ_COUNTER4_NUMBER          16
  12.290 +#define P4_IQ_COUNTER5_NUMBER          17
  12.291 +
  12.292 +/* PEBS
  12.293 + */
  12.294 +#define MSR_P4_PEBS_ENABLE             0x3F1
  12.295 +#define MSR_P4_PEBS_MATRIX_VERT        0x3F2
  12.296 +
  12.297 +#define P4_PEBS_ENABLE_MY_THR          (1 << 25)
  12.298 +#define P4_PEBS_ENABLE_OTH_THR         (1 << 26)
  12.299 +#define P4_PEBS_ENABLE                 (1 << 24)
  12.300 +#define P4_PEBS_BIT0                   (1 << 0)
  12.301 +#define P4_PEBS_BIT1                   (1 << 1)
  12.302 +#define P4_PEBS_BIT2                   (1 << 2)
  12.303 +
  12.304 +#define P4_PEBS_MATRIX_VERT_BIT0       (1 << 0)
  12.305 +#define P4_PEBS_MATRIX_VERT_BIT1       (1 << 1)
  12.306 +#define P4_PEBS_MATRIX_VERT_BIT2       (1 << 2)
  12.307 +
  12.308 +/* Replay tagging.
  12.309 + */
  12.310 +#define P4_REPLAY_TAGGING_PEBS_L1LMR   P4_PEBS_BIT0
  12.311 +#define P4_REPLAY_TAGGING_PEBS_L2LMR   P4_PEBS_BIT1
  12.312 +#define P4_REPLAY_TAGGING_PEBS_DTLMR   P4_PEBS_BIT2
  12.313 +#define P4_REPLAY_TAGGING_PEBS_DTSMR   P4_PEBS_BIT2
  12.314 +#define P4_REPLAY_TAGGING_PEBS_DTAMR   P4_PEBS_BIT2
  12.315 +
  12.316 +#define P4_REPLAY_TAGGING_VERT_L1LMR   P4_PEBS_MATRIX_VERT_BIT0
  12.317 +#define P4_REPLAY_TAGGING_VERT_L2LMR   P4_PEBS_MATRIX_VERT_BIT0
  12.318 +#define P4_REPLAY_TAGGING_VERT_DTLMR   P4_PEBS_MATRIX_VERT_BIT0
  12.319 +#define P4_REPLAY_TAGGING_VERT_DTSMR   P4_PEBS_MATRIX_VERT_BIT1
  12.320 +#define P4_REPLAY_TAGGING_VERT_DTAMR   P4_PEBS_MATRIX_VERT_BIT0 | P4_PEBS_MATRIX_VERT_BIT1
  12.321 +
  12.322 +
  12.323 +
  12.324 +
  12.325 +/*****************************************************************************
  12.326 + *                                                                           *
  12.327 + *****************************************************************************/
  12.328 +
  12.329 +// x87_FP_uop
  12.330 +#define EVENT_SEL_x87_FP_uop                0x04
  12.331 +#define EVENT_MASK_x87_FP_uop_ALL           (1 << 15)
  12.332 +
  12.333 +// execution event (at retirement)
  12.334 +#define EVENT_SEL_execution_event           0x0C
  12.335 +
  12.336 +// scalar_SP_uop
  12.337 +#define EVENT_SEL_scalar_SP_uop             0x0a
  12.338 +#define EVENT_MASK_scalar_SP_uop_ALL        (1 << 15)
  12.339 +
  12.340 +// scalar_DP_uop
  12.341 +#define EVENT_SEL_scalar_DP_uop             0x0e
  12.342 +#define EVENT_MASK_scalar_DP_uop_ALL        (1 << 15)
  12.343 +
  12.344 +// Instruction retired
  12.345 +#define EVENT_SEL_instr_retired             0x02
  12.346 +#define EVENT_MASK_instr_retired_ALL        0x0f
  12.347 +
  12.348 +// uOps retired
  12.349 +#define EVENT_SEL_uops_retired              0x01
  12.350 +#define EVENT_MASK_uops_retired_ALL         0x03
  12.351 +
  12.352 +// L1 misses retired
  12.353 +#define EVENT_SEL_replay_event              0x09
  12.354 +#define EVENT_MASK_replay_event_ALL         0x03
  12.355 +
  12.356 +// Trace cache
  12.357 +#define EVENT_SEL_BPU_fetch_request         0x03
  12.358 +#define EVENT_MASK_BPU_fetch_request_TCMISS 0x01
  12.359 +
  12.360 +// Bus activity
  12.361 +#define EVENT_SEL_FSB_data_activity               0x17
  12.362 +#define EVENT_MASK_FSB_data_activity_DRDY_DRV     0x01
  12.363 +#define EVENT_MASK_FSB_data_activity_DRDY_OWN     0x02
  12.364 +#define EVENT_MASK_FSB_data_activity_DRDY_OOTHER  0x04
  12.365 +#define EVENT_MASK_FSB_data_activity_DBSY_DRV     0x08
  12.366 +#define EVENT_MASK_FSB_data_activity_DBSY_OWN     0x10
  12.367 +#define EVENT_MASK_FSB_data_activity_DBSY_OOTHER  0x20
  12.368 +
  12.369 +// Cache L2
  12.370 +#define EVENT_SEL_BSQ_cache_reference             0x0c
  12.371 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITS 0x001
  12.372 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITE 0x002
  12.373 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITM 0x004
  12.374 +
  12.375 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITS 0x008
  12.376 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITE 0x010
  12.377 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITM 0x020
  12.378 +
  12.379 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_MISS 0x100
  12.380 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_MISS 0x200
  12.381 +#define EVENT_MASK_BSQ_cache_reference_WR_L2_MISS 0x400
  12.382 +
  12.383 +#endif
  12.384 +
  12.385 +/* End of $RCSfile: p4perf.h,v $ */