direct-io.hg

changeset 3963:cfee4c4a8ed6

bitkeeper revision 1.1242 (4225f56fwo6ym-RMTBheAeYhl10ATQ)

forward ported James Bulpin's performance counters tool

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author rneugeba@wyvis.research.intel-research.net
date Wed Mar 02 17:18:39 2005 +0000 (2005-03-02)
parents a6914c2c15cf
children 24703bde489b
files .rootkeys tools/libxc/plan9a.out.h tools/libxc/xc.h tools/libxc/xc_misc.c tools/libxc/xc_plan9_build.c tools/misc/Makefile tools/misc/cpuperf/Makefile tools/misc/cpuperf/README.txt tools/misc/cpuperf/cpuperf.c tools/misc/cpuperf/cpuperf_perfcntr.h tools/misc/cpuperf/cpuperf_xeno.h tools/misc/cpuperf/module/Makefile tools/misc/cpuperf/module/perfcntr.c tools/misc/cpuperf/p4perf.h tools/misc/miniterm/Makefile tools/misc/miniterm/miniterm.c
line diff
     1.1 --- a/.rootkeys	Tue Mar 01 13:47:52 2005 +0000
     1.2 +++ b/.rootkeys	Wed Mar 02 17:18:39 2005 +0000
     1.3 @@ -586,6 +586,14 @@ 40e03332h5V611rRWURRLqb1Ekatxg tools/lib
     1.4  41a216cayFe2FQroFuzvNPw1AvNiqQ tools/libxutil/util.c
     1.5  41a216ca7mgVSnCBHPCLkGOIqPS1CQ tools/libxutil/util.h
     1.6  3f776bd2Xd-dUcPKlPN2vG89VGtfvQ tools/misc/Makefile
     1.7 +4225f56d7sa9aEARfjNeCVTMYDAmZA tools/misc/cpuperf/Makefile
     1.8 +4225f56dS5TGdKojmuBnrV3PzbE6Rg tools/misc/cpuperf/README.txt
     1.9 +4225f56dcodvBSPoWYS6kvwZCQhgzg tools/misc/cpuperf/cpuperf.c
    1.10 +4225f56dMjZK14EWd8K0gq4v5Diwjg tools/misc/cpuperf/cpuperf_perfcntr.h
    1.11 +4225f56d_XjSY1297IiH96qeqD4sCA tools/misc/cpuperf/cpuperf_xeno.h
    1.12 +4225f56dqlGC_UZ681F95mCgLbOeHQ tools/misc/cpuperf/module/Makefile
    1.13 +4225f56dnmms-VFr1MiDVG_dYoM7IQ tools/misc/cpuperf/module/perfcntr.c
    1.14 +4225f56dYhIGQRD_kKVJ6xQrkqO0YQ tools/misc/cpuperf/p4perf.h
    1.15  40ab2cfawIw8tsYo0dQKtp83h4qfTQ tools/misc/fakei386xen
    1.16  3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile
    1.17  3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README
     3.1 --- a/tools/libxc/xc.h	Tue Mar 01 13:47:52 2005 +0000
     3.2 +++ b/tools/libxc/xc.h	Wed Mar 02 17:18:39 2005 +0000
     3.3 @@ -370,6 +370,11 @@ int xc_perfc_control(int xc_handle,
     3.4                       u32 op,
     3.5                       xc_perfc_desc_t *desc);
     3.6  
     3.7 +/* read/write msr */
     3.8 +long long xc_msr_read(int xc_handle, int cpu_mask, int msr);
     3.9 +int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
    3.10 +                  unsigned int high);
    3.11 +
    3.12  /**
    3.13   * Memory maps a range within one domain to a local address range.  Mappings
    3.14   * should be unmapped with munmap and should follow the same rules as mmap
     4.1 --- a/tools/libxc/xc_misc.c	Tue Mar 01 13:47:52 2005 +0000
     4.2 +++ b/tools/libxc/xc_misc.c	Wed Mar 02 17:18:39 2005 +0000
     4.3 @@ -97,3 +97,36 @@ int xc_perfc_control(int xc_handle,
     4.4  
     4.5      return (rc == 0) ? dop.u.perfccontrol.nr_counters : rc;
     4.6  }
     4.7 +
     4.8 +long long xc_msr_read(int xc_handle, int cpu_mask, int msr)
     4.9 +{
    4.10 +    int rc;    
    4.11 +    dom0_op_t op;
    4.12 +    
    4.13 +    op.cmd = DOM0_MSR;
    4.14 +    op.u.msr.write = 0;
    4.15 +    op.u.msr.msr = msr;
    4.16 +    op.u.msr.cpu_mask = cpu_mask;
    4.17 +
    4.18 +    rc = do_dom0_op(xc_handle, &op);
    4.19 +
    4.20 +    return (((unsigned long long)op.u.msr.out2)<<32) | op.u.msr.out1 ;
    4.21 +}
    4.22 +
    4.23 +int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low,
    4.24 +                  unsigned int high)
    4.25 +{
    4.26 +    int rc;    
    4.27 +    dom0_op_t op;
    4.28 +    
    4.29 +    op.cmd = DOM0_MSR;
    4.30 +    op.u.msr.write = 1;
    4.31 +    op.u.msr.msr = msr;
    4.32 +    op.u.msr.cpu_mask = cpu_mask;
    4.33 +    op.u.msr.in1 = low;
    4.34 +    op.u.msr.in2 = high;
    4.35 +
    4.36 +    rc = do_dom0_op(xc_handle, &op);
    4.37 +    
    4.38 +    return rc;
    4.39 +}
     6.1 --- a/tools/misc/Makefile	Tue Mar 01 13:47:52 2005 +0000
     6.2 +++ b/tools/misc/Makefile	Wed Mar 02 17:18:39 2005 +0000
     6.3 @@ -21,18 +21,21 @@ INSTALL_SBIN = netfix xm xend xensv xenp
     6.4  
     6.5  all: $(TARGETS)
     6.6  	$(MAKE) -C miniterm
     6.7 +	$(MAKE) -C cpuperf
     6.8  
     6.9  install: all
    6.10  	[ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin
    6.11  	[ -d $(DESTDIR)/usr/sbin ] || $(INSTALL_DIR) $(DESTDIR)/usr/sbin
    6.12  	$(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin
    6.13  	$(INSTALL_PROG) $(INSTALL_SBIN) $(DESTDIR)/usr/sbin
    6.14 +	$(MAKE) -C cpuperf install
    6.15  #       No sense in installing miniterm on the Xen box.
    6.16  #	$(MAKE) -C miniterm install
    6.17  
    6.18  clean:
    6.19  	$(RM) *.o $(TARGETS) *~
    6.20  	$(MAKE) -C miniterm clean
    6.21 +	$(MAKE) -C cpuperf clean
    6.22  
    6.23  %.o: %.c $(HDRS) Makefile
    6.24  	$(CC) -c $(CFLAGS) -o $@ $<
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/tools/misc/cpuperf/Makefile	Wed Mar 02 17:18:39 2005 +0000
     7.3 @@ -0,0 +1,51 @@
     7.4 +#
     7.5 +# Make Performance counter tool
     7.6 +#
     7.7 +# $Id: Makefile,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
     7.8 +#
     7.9 +# $Log: Makefile,v $
    7.10 +# Revision 1.1  2003/10/13 16:49:44  jrb44
    7.11 +# Initial revision
    7.12 +#
    7.13 +#
    7.14 +
    7.15 +INSTALL		= install
    7.16 +INSTALL_PROG	= $(INSTALL) -m0755
    7.17 +INSTALL_DIR	= $(INSTALL) -d -m0755
    7.18 +
    7.19 +# these are for Xen
    7.20 +XEN_ROOT=../../..
    7.21 +include $(XEN_ROOT)/tools/Rules.mk
    7.22 +
    7.23 +CC           = gcc
    7.24 +CFLAGS       = -Wall -O3 
    7.25 +
    7.26 +HDRS         = $(wildcard *.h)
    7.27 +SRCS         = $(wildcard *.c)
    7.28 +OBJS         = $(patsubst %.c,%.o,$(SRCS))
    7.29 +
    7.30 +TARGETS      = cpuperf-xen cpuperf-perfcntr
    7.31 +
    7.32 +INSTALL_BIN  = $(TARGETS)
    7.33 +
    7.34 +
    7.35 +all: $(TARGETS)
    7.36 +
    7.37 +clean:
    7.38 +	$(RM) *.o $(TARGETS)
    7.39 +
    7.40 +%: %.c $(HDRS) Makefile
    7.41 +	$(CC) $(CFLAGS) -o $@ $<
    7.42 +
    7.43 +cpuperf-xen: cpuperf.c $(HDRS) Makefile
    7.44 +	$(CC) $(CFLAGS) -I $(XEN_LIBXC) -L$(XEN_LIBXC) -lxc -L$(XEN_LIBXUTIL) -lxutil -DXENO -o $@ $<
    7.45 +
    7.46 +cpuperf-perfcntr: cpuperf.c $(HDRS) Makefile
    7.47 +	$(CC) $(CFLAGS) -DPERFCNTR -o $@ $<
    7.48 +
    7.49 +install: all
    7.50 +	$(INSTALL_PROG) $(INSTALL_BIN) $(DESTDIR)/usr/bin
    7.51 +
    7.52 +
    7.53 +# End of $RCSfile: Makefile,v $
    7.54 +
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/tools/misc/cpuperf/README.txt	Wed Mar 02 17:18:39 2005 +0000
     8.3 @@ -0,0 +1,371 @@
     8.4 +Usage
     8.5 +=====
     8.6 +
     8.7 +Use either xen-cpuperf, cpuperf-perfcntr as appropriate to the system
     8.8 +in use.
     8.9 +
    8.10 +To write:
    8.11 +
    8.12 +    cpuperf -E <escr> -C <cccr> 
    8.13 +
    8.14 +        optional: all numbers in base 10 unless specified
    8.15 +
    8.16 +        -d             Debug mode
    8.17 +        -c <cpu>       CPU number
    8.18 +        -t <thread>    ESCR thread bits - default is 12 (Thread 0 all rings)
    8.19 +                         bit 0: Thread 1 in rings 1,2,3
    8.20 +                         bit 1: Thread 1 in ring 0
    8.21 +                         bit 2: Thread 0 in rings 1,2,3
    8.22 +                         bit 3: Thread 0 in ring 0
    8.23 +        -e <eventsel>  Event selection number
    8.24 +        -m <eventmask> Event mask bits
    8.25 +        -T <value>     ESCR tag value
    8.26 +        -k             Sets CCCR 'compare' bit
    8.27 +        -n             Sets CCCR 'complement' bit
    8.28 +        -g             Sets CCCR 'edge' bit
    8.29 +        -P <bit>       Set the specified bit in MSR_P4_PEBS_ENABLE
    8.30 +        -V <bit>       Set the specified bit in MSR_P4_PEBS_MATRIX_VERT
    8.31 +        (-V and -P may be used multiple times to set multiple bits.)
    8.32 +
    8.33 +To read:
    8.34 +
    8.35 +    cpuperf -r    
    8.36 +
    8.37 +        optional: all numbers in base 10 unless specified
    8.38 +    
    8.39 +        -c <cpu>       CPU number
    8.40 +
    8.41 +<cccr> values:
    8.42 +
    8.43 +    BPU_CCCR0
    8.44 +    BPU_CCCR1
    8.45 +    BPU_CCCR2
    8.46 +    BPU_CCCR3
    8.47 +    MS_CCCR0
    8.48 +    MS_CCCR1
    8.49 +    MS_CCCR2
    8.50 +    MS_CCCR3
    8.51 +    FLAME_CCCR0
    8.52 +    FLAME_CCCR1
    8.53 +    FLAME_CCCR2
    8.54 +    FLAME_CCCR3
    8.55 +    IQ_CCCR0
    8.56 +    IQ_CCCR1
    8.57 +    IQ_CCCR2
    8.58 +    IQ_CCCR3
    8.59 +    IQ_CCCR4
    8.60 +    IQ_CCCR5
    8.61 +    NONE - do not program any CCCR, used when setting up an ESCR for tagging
    8.62 +
    8.63 +<escr> values:
    8.64 +
    8.65 +    BSU_ESCR0
    8.66 +    BSU_ESCR1
    8.67 +    FSB_ESCR0
    8.68 +    FSB_ESCR1
    8.69 +    MOB_ESCR0
    8.70 +    MOB_ESCR1
    8.71 +    PMH_ESCR0
    8.72 +    PMH_ESCR1
    8.73 +    BPU_ESCR0
    8.74 +    BPU_ESCR1
    8.75 +    IS_ESCR0
    8.76 +    IS_ESCR1
    8.77 +    ITLB_ESCR0
    8.78 +    ITLB_ESCR1
    8.79 +    IX_ESCR0
    8.80 +    IX_ESCR1
    8.81 +    MS_ESCR0
    8.82 +    MS_ESCR1
    8.83 +    TBPU_ESCR0
    8.84 +    TBPU_ESCR1
    8.85 +    TC_ESCR0
    8.86 +    TC_ESCR1
    8.87 +    FIRM_ESCR0
    8.88 +    FIRM_ESCR1
    8.89 +    FLAME_ESCR0
    8.90 +    FLAME_ESCR1
    8.91 +    DAC_ESCR0
    8.92 +    DAC_ESCR1
    8.93 +    SAAT_ESCR0
    8.94 +    SAAT_ESCR1
    8.95 +    U2L_ESCR0
    8.96 +    U2L_ESCR1
    8.97 +    CRU_ESCR0
    8.98 +    CRU_ESCR1
    8.99 +    CRU_ESCR2
   8.100 +    CRU_ESCR3
   8.101 +    CRU_ESCR4
   8.102 +    CRU_ESCR5
   8.103 +    IQ_ESCR0
   8.104 +    IQ_ESCR1
   8.105 +    RAT_ESCR0
   8.106 +    RAT_ESCR1
   8.107 +    SSU_ESCR0
   8.108 +    SSU_ESCR1
   8.109 +    ALF_ESCR0
   8.110 +    ALF_ESCR1
   8.111 +
   8.112 +
   8.113 +Example configurations
   8.114 +======================
   8.115 +
   8.116 +Note than in most cases there is a choice of ESCRs and CCCRs for
   8.117 +each metric although not all combinations are allowed. Each ESCR and
   8.118 +counter/CCCR can be used only once.
   8.119 +
   8.120 +Mispredicted branches retired
   8.121 +=============================
   8.122 +
   8.123 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 3 -m 1
   8.124 +cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 3 -m 1
   8.125 +cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 3 -m 1
   8.126 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1
   8.127 +cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 3 -m 1
   8.128 +cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 3 -m 1
   8.129 +
   8.130 +Tracecache misses
   8.131 +=================
   8.132 +
   8.133 +cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1
   8.134 +cpuperf -E BPU_ESCR0 -C BPU_CCCR1 -e 3 -m 1
   8.135 +cpuperf -E BPU_ESCR1 -C BPU_CCCR2 -e 3 -m 1
   8.136 +cpuperf -E BPU_ESCR1 -C BPU_CCCR3 -e 3 -m 1
   8.137 +
   8.138 +I-TLB
   8.139 +=====
   8.140 +
   8.141 +cpuperf -E ITLB_ESCR0 -C BPU_CCCR0 -e 24 
   8.142 +cpuperf -E ITLB_ESCR0 -C BPU_CCCR1 -e 24 
   8.143 +cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24 
   8.144 +cpuperf -E ITLB_ESCR1 -C BPU_CCCR3 -e 24 
   8.145 +
   8.146 + -m <n> : bit 0 count HITS, bit 1 MISSES, bit 2 uncacheable hit
   8.147 +
   8.148 + e.g. all ITLB misses -m 2
   8.149 +
   8.150 +Load replays
   8.151 +============
   8.152 +
   8.153 +cpuperf -E MOB_ESCR0 -C BPU_CCCR0 -e 3
   8.154 +cpuperf -E MOB_ESCR0 -C BPU_CCCR1 -e 3
   8.155 +cpuperf -E MOB_ESCR1 -C BPU_CCCR2 -e 3
   8.156 +cpuperf -E MOB_ESCR1 -C BPU_CCCR3 -e 3
   8.157 +
   8.158 + -m <n> : bit mask, replay due to...
   8.159 +           1: unknown store address
   8.160 +           3: unknown store data
   8.161 +           4: partially overlapped data access between LD/ST
   8.162 +           5: unaligned address between LD/ST
   8.163 +
   8.164 +Page walks
   8.165 +==========
   8.166 +
   8.167 +cpuperf -E PMH_ESCR0 -C BPU_CCCR0 -e 1
   8.168 +cpuperf -E PMH_ESCR0 -C BPU_CCCR1 -e 1
   8.169 +cpuperf -E PMH_ESCR1 -C BPU_CCCR2 -e 1
   8.170 +cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1
   8.171 +
   8.172 + -m <n> : bit 0 counts walks for a D-TLB miss, bit 1 for I-TLB miss
   8.173 +
   8.174 +L2/L3 cache accesses
   8.175 +====================
   8.176 +
   8.177 +cpuperf -E BSU_ESCR0 -C BPU_CCCR0 -e 12
   8.178 +cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12
   8.179 +cpuperf -E BSU_ESCR1 -C BPU_CCCR2 -e 12
   8.180 +cpuperf -E BSU_ESCR1 -C BPU_CCCR3 -e 12
   8.181 +
   8.182 + -m <n> : where the bit mask is:
   8.183 +           0: Read L2 HITS Shared
   8.184 +           1: Read L2 HITS Exclusive
   8.185 +           2: Read L2 HITS Modified
   8.186 +           3: Read L3 HITS Shared
   8.187 +           4: Read L3 HITS Exclusive
   8.188 +           5: Read L3 HITS Modified
   8.189 +           8: Read L2 MISS
   8.190 +           9: Read L3 MISS
   8.191 +          10: Write L2 MISS
   8.192 +
   8.193 +Front side bus activity
   8.194 +=======================
   8.195 +
   8.196 +cpuperf -E FSB_ESCR0 -C BPU_CCCR0 -e 23 -k -g
   8.197 +cpuperf -E FSB_ESCR0 -C BPU_CCCR1 -e 23 -k -g
   8.198 +cpuperf -E FSB_ESCR1 -C BPU_CCCR2 -e 23 -k -g
   8.199 +cpuperf -E FSB_ESCR1 -C BPU_CCCR3 -e 23 -k -g
   8.200 +
   8.201 + -m <n> : where the bit mask is for bus events:
   8.202 +           0: DRDY_DRV    Processor drives bus
   8.203 +           1: DRDY_OWN    Processor reads bus
   8.204 +           2: DRDY_OTHER  Data on bus not being sampled by processor
   8.205 +           3: DBSY_DRV    Processor reserves bus for driving
   8.206 +           4: DBSY_OWN    Other entity reserves bus for sending to processor
   8.207 +           5: DBSY_OTHER  Other entity reserves bus for sending elsewhere
   8.208 +
   8.209 + e.g. -m 3 to get cycles bus actually in use.
   8.210 +
   8.211 +Pipeline clear (entire)
   8.212 +=======================
   8.213 +
   8.214 +cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 2
   8.215 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 2
   8.216 +cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 2
   8.217 +cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 2
   8.218 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 2
   8.219 +cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 2
   8.220 +
   8.221 + -m <n> : bit mask:
   8.222 +           0: counts a portion of cycles while clear (use -g for edge trigger)
   8.223 +           1: counts each time machine clears for memory ordering issues
   8.224 +           2: counts each time machine clears for self modifying code
   8.225 +
   8.226 +Instructions retired
   8.227 +====================
   8.228 +
   8.229 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2
   8.230 +cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 2
   8.231 +cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 2
   8.232 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2
   8.233 +cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 2
   8.234 +cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 2
   8.235 +
   8.236 + -m <n> : bit mask:
   8.237 +           0: counts non-bogus, not tagged instructions
   8.238 +           1: counts non-bogus, tagged instructions
   8.239 +           2: counts bogus, not tagged instructions
   8.240 +           3: counts bogus, tagged instructions
   8.241 +
   8.242 + e.g. -m 3 to count legit retirements
   8.243 +
   8.244 +Uops retired
   8.245 +============
   8.246 +
   8.247 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 1
   8.248 +cpuperf -E CRU_ESCR0 -C IQ_CCCR1 -e 1
   8.249 +cpuperf -E CRU_ESCR0 -C IQ_CCCR4 -e 1
   8.250 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 1
   8.251 +cpuperf -E CRU_ESCR1 -C IQ_CCCR3 -e 1
   8.252 +cpuperf -E CRU_ESCR1 -C IQ_CCCR5 -e 1
   8.253 +
   8.254 + -m <n> : bit mask:
   8.255 +           0: Non-bogus
   8.256 +           1: Bogus
   8.257 +
   8.258 +x87 FP uops
   8.259 +===========
   8.260 +
   8.261 +cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768
   8.262 +cpuperf -E FIRM_ESCR0 -C FLAME_CCCR1 -e 4 -m 32768
   8.263 +cpuperf -E FIRM_ESCR1 -C FLAME_CCCR2 -e 4 -m 32768
   8.264 +cpuperf -E FIRM_ESCR1 -C FLAME_CCCR3 -e 4 -m 32768
   8.265 +
   8.266 +Replay tagging mechanism
   8.267 +========================
   8.268 +
   8.269 +Counts retirement of uops tagged with the replay tagging mechanism
   8.270 +
   8.271 +cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9
   8.272 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9
   8.273 +cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 9
   8.274 +cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 9
   8.275 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 9
   8.276 +cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 9
   8.277 +
   8.278 + -m <n> : bit mask:
   8.279 +           0: Non-bogus (set this bit for all events listed below)
   8.280 +           1: Bogus
   8.281 +
   8.282 +Set replay tagging mechanism bits with -P and -V:
   8.283 +
   8.284 +  L1 cache load miss retired:      -P 0 -P 24 -P 25 -V 0
   8.285 +  L2 cache load miss retired:      -P 1 -P 24 -P 25 -V 0  (read manual)
   8.286 +  DTLB load miss retired:          -P 2 -P 24 -P 25 -V 0
   8.287 +  DTLB store miss retired:         -P 2 -P 24 -P 25 -V 1
   8.288 +  DTLB all miss retired:           -P 2 -P 24 -P 25 -V 0 -V 1
   8.289 +
   8.290 +e.g. to count all DTLB misses
   8.291 +
   8.292 + cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 9 -m 1 P 2 -P 24 -P 25 -V 0 -V 1
   8.293 +
   8.294 +Front end event
   8.295 +===============
   8.296 +
   8.297 +To count tagged uops:
   8.298 +
   8.299 +cpuperf -E CRU_ESCR2 -C IQ_CCCR0 -e 8
   8.300 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 8
   8.301 +cpuperf -E CRU_ESCR2 -C IQ_CCCR4 -e 8
   8.302 +cpuperf -E CRU_ESCR3 -C IQ_CCCR2 -e 8
   8.303 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8
   8.304 +cpuperf -E CRU_ESCR3 -C IQ_CCCR5 -e 8
   8.305 +
   8.306 + -m <n> : bit 0 for non-bogus uops, bit 1 for bogus uops
   8.307 +
   8.308 +Must have another ESCR programmed to tag uops as required
   8.309 +
   8.310 +cpuperf -E RAT_ESCR0 -C NONE -e 2
   8.311 +cpuperf -E RAT_ESCR1 -C NONE -e 2
   8.312 +
   8.313 + -m <n> : bit 1 for LOADs, bit 2 for STOREs
   8.314 +
   8.315 +An example set of counters
   8.316 +===========================
   8.317 +
   8.318 +# instructions retired
   8.319 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3
   8.320 +
   8.321 +# trace cache misses
   8.322 +cpuperf -E BPU_ESCR0 -C BPU_CCCR0 -e 3 -m 1
   8.323 +
   8.324 +# L1 D cache misses (load misses retired)
   8.325 +cpuperf -E CRU_ESCR2 -C IQ_CCCR1 -e 9 -m 1 -P 0 -P 24 -P 25 -V 0
   8.326 +
   8.327 +# L2 misses (load and store)
   8.328 +cpuperf -E BSU_ESCR0 -C BPU_CCCR1 -e 12 -m 1280
   8.329 +
   8.330 +# I-TLB misses
   8.331 +cpuperf -E ITLB_ESCR1 -C BPU_CCCR2 -e 24 -m 2
   8.332 +
   8.333 +# D-TLB misses (as PT walks)
   8.334 +cpuperf -E PMH_ESCR1 -C BPU_CCCR3 -e 1 -m 1
   8.335 +
   8.336 +# Other 'bonus' counters would be:
   8.337 +#   number of loads executed - need both command lines
   8.338 +cpuperf -E RAT_ESCR0 -C NONE -e 2 -m 2
   8.339 +cpuperf -E CRU_ESCR3 -C IQ_CCCR3 -e 8 -m 3
   8.340 +
   8.341 +#   number of mispredicted branches
   8.342 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 3 -m 1
   8.343 +
   8.344 +# x87 FP uOps
   8.345 +cpuperf -E FIRM_ESCR0 -C FLAME_CCCR0 -e 4 -m 32768
   8.346 +
   8.347 +The above has counter assignments
   8.348 +
   8.349 +0  Trace cache misses
   8.350 +1  L2 Misses
   8.351 +2  I-TLB misses
   8.352 +3  D-TLB misses
   8.353 +4  
   8.354 +5  
   8.355 +6  
   8.356 +7  
   8.357 +8  x87 FP uOps 
   8.358 +9  
   8.359 +10 
   8.360 +11 
   8.361 +12 Instructions retired
   8.362 +13 L1 D cache misses
   8.363 +14 Mispredicted branches
   8.364 +15 Loads executed
   8.365 +16 
   8.366 +17 
   8.367 +
   8.368 +Counting instructions retired on each logical CPU
   8.369 +=================================================
   8.370 +
   8.371 +cpuperf -E CRU_ESCR0 -C IQ_CCCR0 -e 2 -m 3 -t 12
   8.372 +cpuperf -E CRU_ESCR1 -C IQ_CCCR2 -e 2 -m 3 -t 3
   8.373 +
   8.374 +Cannot count mispred branches as well due to CRU_ESCR1 use.
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/tools/misc/cpuperf/cpuperf.c	Wed Mar 02 17:18:39 2005 +0000
     9.3 @@ -0,0 +1,301 @@
     9.4 +/*
     9.5 + * User mode program to program performance counters.
     9.6 + *
     9.7 + * JRB/IAP October 2003.
     9.8 + *
     9.9 + * $Id: cpuperf.c,v 1.2 2003/10/14 11:00:59 jrb44 Exp $
    9.10 + *
    9.11 + * $Log: cpuperf.c,v $
    9.12 + * Revision 1.2  2003/10/14 11:00:59  jrb44
    9.13 + * Added dcefault CPU. Added NONE CCCR.
    9.14 + *
    9.15 + * Revision 1.1  2003/10/13 16:49:44  jrb44
    9.16 + * Initial revision
    9.17 + *
    9.18 + */
    9.19 +
    9.20 +#include <sys/types.h>
    9.21 +#include <sched.h>
    9.22 +#include <error.h>
    9.23 +#include <stdio.h>
    9.24 +#include <unistd.h>
    9.25 +#include <stdlib.h>
    9.26 +#include <string.h>
    9.27 +#include <errno.h>
    9.28 +
    9.29 +#include "p4perf.h"
    9.30 +
    9.31 +static inline void cpus_wrmsr(int cpu_mask,
    9.32 +                              int msr,
    9.33 +                              unsigned int low,
    9.34 +                              unsigned int high )
    9.35 +{
    9.36 +    fprintf(stderr, "No backend to write MSR 0x%x <= 0x%08x%08x on %08x\n",
    9.37 +            msr, high, low, cpu_mask);
    9.38 +}
    9.39 +
    9.40 +static inline unsigned long long cpus_rdmsr( int cpu_mask, int msr )
    9.41 +{
    9.42 +    fprintf(stderr, "No backend to read MSR 0x%x on %08x\n", msr, cpu_mask);
    9.43 +    return 0;
    9.44 +}
    9.45 +
    9.46 +#ifdef PERFCNTR
    9.47 +#include "cpuperf_perfcntr.h"
    9.48 +#define cpus_wrmsr perfcntr_wrmsr
    9.49 +#define cpus_rdmsr perfcntr_rdmsr
    9.50 +#endif
    9.51 +
    9.52 +#ifdef XENO
    9.53 +#include "cpuperf_xeno.h"
    9.54 +#define cpus_wrmsr dom0_wrmsr
    9.55 +#define cpus_rdmsr dom0_rdmsr
    9.56 +#endif
    9.57 +
    9.58 +struct macros {
    9.59 +    char         *name;
    9.60 +    unsigned long msr_addr;
    9.61 +    int           number;
    9.62 +};
    9.63 +
    9.64 +#define NO_CCCR 0xfffffffe
    9.65 +
    9.66 +struct macros msr[] = {
    9.67 +    {"BPU_COUNTER0", 0x300, 0},
    9.68 +    {"BPU_COUNTER1", 0x301, 1},
    9.69 +    {"BPU_COUNTER2", 0x302, 2},
    9.70 +    {"BPU_COUNTER3", 0x303, 3},
    9.71 +    {"MS_COUNTER0", 0x304, 4},
    9.72 +    {"MS_COUNTER1", 0x305, 5},
    9.73 +    {"MS_COUNTER2", 0x306, 6},
    9.74 +    {"MS_COUNTER3", 0x307, 7},
    9.75 +    {"FLAME_COUNTER0", 0x308, 8},
    9.76 +    {"FLAME_COUNTER1", 0x309, 9},
    9.77 +    {"FLAME_COUNTER2", 0x30a, 10},
    9.78 +    {"FLAME_COUNTER3", 0x30b, 11},
    9.79 +    {"IQ_COUNTER0", 0x30c, 12},
    9.80 +    {"IQ_COUNTER1", 0x30d, 13},
    9.81 +    {"IQ_COUNTER2", 0x30e, 14},
    9.82 +    {"IQ_COUNTER3", 0x30f, 15},
    9.83 +    {"IQ_COUNTER4", 0x310, 16},
    9.84 +    {"IQ_COUNTER5", 0x311, 17},
    9.85 +    {"BPU_CCCR0", 0x360, 0},
    9.86 +    {"BPU_CCCR1", 0x361, 1},
    9.87 +    {"BPU_CCCR2", 0x362, 2},
    9.88 +    {"BPU_CCCR3", 0x363, 3},
    9.89 +    {"MS_CCCR0", 0x364, 4},
    9.90 +    {"MS_CCCR1", 0x365, 5},
    9.91 +    {"MS_CCCR2", 0x366, 6},
    9.92 +    {"MS_CCCR3", 0x367, 7},
    9.93 +    {"FLAME_CCCR0", 0x368, 8},
    9.94 +    {"FLAME_CCCR1", 0x369, 9},
    9.95 +    {"FLAME_CCCR2", 0x36a, 10},
    9.96 +    {"FLAME_CCCR3", 0x36b, 11},
    9.97 +    {"IQ_CCCR0", 0x36c, 12},
    9.98 +    {"IQ_CCCR1", 0x36d, 13},
    9.99 +    {"IQ_CCCR2", 0x36e, 14},
   9.100 +    {"IQ_CCCR3", 0x36f, 15},
   9.101 +    {"IQ_CCCR4", 0x370, 16},
   9.102 +    {"IQ_CCCR5", 0x371, 17},
   9.103 +    {"BSU_ESCR0", 0x3a0, 7},
   9.104 +    {"BSU_ESCR1", 0x3a1, 7},
   9.105 +    {"FSB_ESCR0", 0x3a2, 6},
   9.106 +    {"FSB_ESCR1", 0x3a3, 6},
   9.107 +    {"MOB_ESCR0", 0x3aa, 2},
   9.108 +    {"MOB_ESCR1", 0x3ab, 2},
   9.109 +    {"PMH_ESCR0", 0x3ac, 4},
   9.110 +    {"PMH_ESCR1", 0x3ad, 4},
   9.111 +    {"BPU_ESCR0", 0x3b2, 0},
   9.112 +    {"BPU_ESCR1", 0x3b3, 0},
   9.113 +    {"IS_ESCR0", 0x3b4, 1},
   9.114 +    {"IS_ESCR1", 0x3b5, 1},
   9.115 +    {"ITLB_ESCR0", 0x3b6, 3},
   9.116 +    {"ITLB_ESCR1", 0x3b7, 3},
   9.117 +    {"IX_ESCR0", 0x3c8, 5},
   9.118 +    {"IX_ESCR1", 0x3c9, 5},
   9.119 +    {"MS_ESCR0", 0x3c0, 0},
   9.120 +    {"MS_ESCR1", 0x3c1, 0},
   9.121 +    {"TBPU_ESCR0", 0x3c2, 2},
   9.122 +    {"TBPU_ESCR1", 0x3c3, 2},
   9.123 +    {"TC_ESCR0", 0x3c4, 1},
   9.124 +    {"TC_ESCR1", 0x3c5, 1},
   9.125 +    {"FIRM_ESCR0", 0x3a4, 1},
   9.126 +    {"FIRM_ESCR1", 0x3a5, 1},
   9.127 +    {"FLAME_ESCR0", 0x3a6, 0},
   9.128 +    {"FLAME_ESCR1", 0x3a7, 0},
   9.129 +    {"DAC_ESCR0", 0x3a8, 5},
   9.130 +    {"DAC_ESCR1", 0x3a9, 5},
   9.131 +    {"SAAT_ESCR0", 0x3ae, 2},
   9.132 +    {"SAAT_ESCR1", 0x3af, 2},
   9.133 +    {"U2L_ESCR0", 0x3b0, 3},
   9.134 +    {"U2L_ESCR1", 0x3b1, 3},
   9.135 +    {"CRU_ESCR0", 0x3b8, 4},
   9.136 +    {"CRU_ESCR1", 0x3b9, 4},
   9.137 +    {"CRU_ESCR2", 0x3cc, 5},
   9.138 +    {"CRU_ESCR3", 0x3cd, 5},
   9.139 +    {"CRU_ESCR4", 0x3e0, 6},
   9.140 +    {"CRU_ESCR5", 0x3e1, 6},
   9.141 +    {"IQ_ESCR0", 0x3ba, 0},
   9.142 +    {"IQ_ESCR1", 0x3bb, 0},
   9.143 +    {"RAT_ESCR0", 0x3bc, 2},
   9.144 +    {"RAT_ESCR1", 0x3bd, 2},
   9.145 +    {"SSU_ESCR0", 0x3be, 3},
   9.146 +    {"SSU_ESCR1", 0x3bf, 3},
   9.147 +    {"ALF_ESCR0", 0x3ca, 1},
   9.148 +    {"ALF_ESCR1", 0x3cb, 1},
   9.149 +    {"PEBS_ENABLE", 0x3f1, 0},
   9.150 +    {"PEBS_MATRIX_VERT", 0x3f2, 0},
   9.151 +    {"NONE", NO_CCCR, 0},
   9.152 +    {NULL, 0, 0}
   9.153 +};
   9.154 +
   9.155 +struct macros *lookup_macro(char *str)
   9.156 +{
   9.157 +    struct macros *m;
   9.158 +
   9.159 +    m = msr;
   9.160 +    while (m->name) {
   9.161 +        if (strcmp(m->name, str) == 0)
   9.162 +            return m;
   9.163 +        m++;
   9.164 +    }
   9.165 +    return NULL;
   9.166 +}
   9.167 +
   9.168 +int main(int argc, char **argv)
   9.169 +{
   9.170 +    int c, t = 0xc, es = 0, em = 0, tv = 0, te = 0;
   9.171 +    unsigned int cpu_mask = 1;
   9.172 +    struct macros *escr = NULL, *cccr = NULL;
   9.173 +    unsigned long escr_val, cccr_val;
   9.174 +    int debug = 0;
   9.175 +    unsigned long pebs = 0, pebs_vert = 0;
   9.176 +    int pebs_x = 0, pebs_vert_x = 0;
   9.177 +    int read = 0;
   9.178 +    int compare = 0;
   9.179 +    int complement = 0;
   9.180 +    int edge = 0;
   9.181 +    
   9.182 +#ifdef XENO
   9.183 +    xen_init();
   9.184 +#endif
   9.185 +
   9.186 +
   9.187 +    while ((c = getopt(argc, argv, "dc:t:e:m:T:E:C:P:V:rkng")) != -1) {
   9.188 +        switch((char)c) {
   9.189 +        case 'P':
   9.190 +            pebs |= 1 << atoi(optarg);
   9.191 +            pebs_x = 1;
   9.192 +            break;
   9.193 +        case 'V':
   9.194 +            pebs_vert |= 1 << atoi(optarg);
   9.195 +            pebs_vert_x = 1;
   9.196 +            break;
   9.197 +        case 'd':
   9.198 +            debug = 1;
   9.199 +            break;
   9.200 +        case 'c':
   9.201 +            {
   9.202 +                int cpu = atoi(optarg);
   9.203 +                cpu_mask  = (cpu == -1)?(~0):(1<<cpu);
   9.204 +            }
   9.205 +            break;
   9.206 +        case 't': // ESCR thread bits
   9.207 +            t = atoi(optarg);
   9.208 +            break;
   9.209 +        case 'e': // eventsel
   9.210 +            es = atoi(optarg);
   9.211 +            break;
   9.212 +        case 'm': // eventmask
   9.213 +            em = atoi(optarg);
   9.214 +            break;
   9.215 +        case 'T': // tag value
   9.216 +            tv = atoi(optarg);
   9.217 +            te = 1;
   9.218 +            break;
   9.219 +        case 'E':
   9.220 +            escr = lookup_macro(optarg);
   9.221 +            if (!escr) {
   9.222 +                fprintf(stderr, "Macro '%s' not found.\n", optarg);
   9.223 +                exit(1);
   9.224 +            }
   9.225 +            break;
   9.226 +        case 'C':
   9.227 +            cccr = lookup_macro(optarg);
   9.228 +            if (!cccr) {
   9.229 +                fprintf(stderr, "Macro '%s' not found.\n", optarg);
   9.230 +                exit(1);
   9.231 +            }
   9.232 +            break;
   9.233 +        case 'r':
   9.234 +            read = 1;
   9.235 +            break;
   9.236 +        case 'k':
   9.237 +            compare = 1;
   9.238 +            break;
   9.239 +        case 'n':
   9.240 +            complement = 1;
   9.241 +            break;
   9.242 +        case 'g':
   9.243 +            edge = 1;
   9.244 +            break;
   9.245 +        }
   9.246 +    }
   9.247 +
   9.248 +    if (read) {
   9.249 +        while((cpu_mask&1)) {
   9.250 +            int i;
   9.251 +            for (i=0x300;i<0x312;i++) {
   9.252 +                printf("%010llx ",cpus_rdmsr( cpu_mask, i ) );
   9.253 +            }
   9.254 +            printf("\n");
   9.255 +            cpu_mask>>=1;
   9.256 +        }
   9.257 +        exit(1);
   9.258 +    } 
   9.259 +    
   9.260 +    if (!escr) {
   9.261 +        fprintf(stderr, "Need an ESCR.\n");
   9.262 +        exit(1);
   9.263 +    }
   9.264 +    if (!cccr) {
   9.265 +        fprintf(stderr, "Need a counter number.\n");
   9.266 +        exit(1);
   9.267 +    }
   9.268 +
   9.269 +    escr_val = P4_ESCR_THREADS(t) | P4_ESCR_EVNTSEL(es) |
   9.270 +        P4_ESCR_EVNTMASK(em) | P4_ESCR_TV(tv) | ((te)?P4_ESCR_TE:0);
   9.271 +    cccr_val = P4_CCCR_ENABLE | P4_CCCR_ESCR(escr->number) |
   9.272 +        ((compare)?P4_CCCR_COMPARE:0) |
   9.273 +        ((complement)?P4_CCCR_COMPLEMENT:0) |
   9.274 +        ((edge)?P4_CCCR_EDGE:0) |
   9.275 +        P4_CCCR_ACTIVE_THREAD(3)/*reserved*/;
   9.276 +
   9.277 +    if (debug) {
   9.278 +        fprintf(stderr, "ESCR 0x%lx <= 0x%08lx\n", escr->msr_addr, escr_val);
   9.279 +        if (cccr->msr_addr != NO_CCCR)
   9.280 +            fprintf(stderr, "CCCR 0x%lx <= 0x%08lx (%u)\n",
   9.281 +                    cccr->msr_addr, cccr_val, cccr->number);
   9.282 +        if (pebs_x)
   9.283 +            fprintf(stderr, "PEBS 0x%x <= 0x%08lx\n",
   9.284 +                    MSR_P4_PEBS_ENABLE, pebs);
   9.285 +        if (pebs_vert_x)
   9.286 +            fprintf(stderr, "PMV  0x%x <= 0x%08lx\n",
   9.287 +                    MSR_P4_PEBS_MATRIX_VERT, pebs_vert);
   9.288 +    }
   9.289 +    
   9.290 +    cpus_wrmsr( cpu_mask, escr->msr_addr, escr_val, 0 );
   9.291 +    if (cccr->msr_addr != NO_CCCR)
   9.292 +        cpus_wrmsr( cpu_mask, cccr->msr_addr, cccr_val, 0 );
   9.293 +    
   9.294 +    if (pebs_x)
   9.295 +        cpus_wrmsr( cpu_mask, MSR_P4_PEBS_ENABLE, pebs, 0 );
   9.296 +    
   9.297 +    if (pebs_vert_x)
   9.298 +        cpus_wrmsr( cpu_mask, MSR_P4_PEBS_MATRIX_VERT, pebs_vert, 0 );
   9.299 +    
   9.300 +    return 0;
   9.301 +}
   9.302 +
   9.303 +// End of $RCSfile: cpuperf.c,v $
   9.304 +
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/tools/misc/cpuperf/cpuperf_perfcntr.h	Wed Mar 02 17:18:39 2005 +0000
    10.3 @@ -0,0 +1,41 @@
    10.4 +/*
    10.5 + * Interface to JRB44's /proc/perfcntr interface.
    10.6 + *
    10.7 + * $Id: cpuperf_perfcntr.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
    10.8 + *
    10.9 + * $Log: cpuperf_perfcntr.h,v $
   10.10 + * Revision 1.1  2003/10/13 16:49:44  jrb44
   10.11 + * Initial revision
   10.12 + *
   10.13 + */
   10.14 +
   10.15 +#define  PROC_PERFCNTR "/proc/perfcntr"
   10.16 +
   10.17 +static inline void perfcntr_wrmsr(int cpu_mask,
   10.18 +                                  int msr,
   10.19 +                                  unsigned int low,
   10.20 +                                  unsigned int high )
   10.21 +{
   10.22 +    FILE *fd;
   10.23 +    unsigned long long value = low | (((unsigned long long)high) << 32);
   10.24 +
   10.25 +    fd = fopen(PROC_PERFCNTR, "w");
   10.26 +    if (fd == NULL)
   10.27 +    {
   10.28 +        perror("open " PROC_PERFCNTR);
   10.29 +        exit(1);
   10.30 +    }
   10.31 +    
   10.32 +    fprintf(fd, "%x %x %llx \n", cpu_mask, msr, value);
   10.33 +    fprintf(stderr, "%x %x %llx \n", cpu_mask, msr, value);
   10.34 +    fclose(fd);
   10.35 +}
   10.36 +
   10.37 +static inline unsigned long long perfcntr_rdmsr( int cpu_mask, int msr )
   10.38 +{
   10.39 +    fprintf(stderr, "WARNING: rdmsr not yet implemented for perfcntr.\n");
   10.40 +    return 0;
   10.41 +}
   10.42 +
   10.43 +// End of $RCSfile: cpuperf_perfcntr.h,v $
   10.44 +
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/tools/misc/cpuperf/cpuperf_xeno.h	Wed Mar 02 17:18:39 2005 +0000
    11.3 @@ -0,0 +1,38 @@
    11.4 +/*
    11.5 + * Interface to Xen MSR hypercalls.
    11.6 + * 
    11.7 + * $Id: cpuperf_xeno.h,v 1.1 2003/10/13 16:49:44 jrb44 Exp $
    11.8 + * 
    11.9 + * $Log: cpuperf_xeno.h,v $
   11.10 + * Revision 1.1  2003/10/13 16:49:44  jrb44
   11.11 + * Initial revision
   11.12 + *
   11.13 + */
   11.14 +
   11.15 +#include <xc.h>
   11.16 +
   11.17 +static int xc_handle;
   11.18 +
   11.19 +void xen_init()
   11.20 +{
   11.21 +    if ( (xc_handle = xc_interface_open()) == -1 )
   11.22 +    {
   11.23 +        fprintf(stderr, "Error opening xc interface: %d (%s)\n",
   11.24 +                errno, strerror(errno));
   11.25 +        exit(-1);
   11.26 +    }
   11.27 +
   11.28 +}
   11.29 +
   11.30 +void dom0_wrmsr( int cpu_mask, int msr, unsigned int low, unsigned int high )
   11.31 +{
   11.32 +    xc_msr_write (xc_handle, cpu_mask, msr, low, high);
   11.33 +}
   11.34 +
   11.35 +unsigned long long dom0_rdmsr( int cpu_mask, int msr )
   11.36 +{
   11.37 +    return xc_msr_read(xc_handle, cpu_mask, msr);
   11.38 +}
   11.39 +
   11.40 +// End of $RCSfile: cpuperf_xeno.h,v $
   11.41 +
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/tools/misc/cpuperf/module/Makefile	Wed Mar 02 17:18:39 2005 +0000
    12.3 @@ -0,0 +1,16 @@
    12.4 +#############################################################################
    12.5 +# (C) 2005 - Rolf Neugebauer - Intel Research Cambridge
    12.6 +#############################################################################
    12.7 +#
    12.8 +#        File: Makefile
    12.9 +#      Author: Rolf Neugebauer (rolf.neugebauer@intel.com)
   12.10 +#        Date: Mar 2005
   12.11 +# 
   12.12 +# Environment: 
   12.13 +#
   12.14 +
   12.15 +# invoke:
   12.16 +# make -C /lib/modules/`uname -r`/build SUBDIRS=`pwd` modules_install
   12.17 +
   12.18 +obj-m    := perfcntr.o
   12.19 +
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/tools/misc/cpuperf/module/perfcntr.c	Wed Mar 02 17:18:39 2005 +0000
    13.3 @@ -0,0 +1,730 @@
    13.4 +/*
    13.5 + * Linux loadable kernel module to use P4 performance counters.
    13.6 + *
    13.7 + * James Bulpin, Feb 2003.
    13.8 + *
    13.9 + * $Id$
   13.10 + *
   13.11 + * $Log$
   13.12 + */
   13.13 +
   13.14 +#define DRV_NAME        "perfcntr"
   13.15 +#define DRV_VERSION     "0.2"
   13.16 +#define DRV_RELDATE     "02 Jun 2004"
   13.17 +
   13.18 +
   13.19 +#include <linux/module.h>
   13.20 +#include <linux/kernel.h>
   13.21 +#include <linux/init.h>
   13.22 +#include <linux/types.h>
   13.23 +#include <linux/proc_fs.h>
   13.24 +#include <linux/seq_file.h>
   13.25 +
   13.26 +#include <asm/uaccess.h>
   13.27 +#include <asm/pgtable.h>
   13.28 +#include <asm/io.h>
   13.29 +#include <asm/processor.h>
   13.30 +
   13.31 +#define NOHT
   13.32 +
   13.33 +#include "../p4perf.h"
   13.34 +
   13.35 +#ifdef NOHT
   13.36 +# define CPUMASK 0x00000003
   13.37 +#else
   13.38 +# define CPUMASK 0x00000005
   13.39 +#endif
   13.40 +
   13.41 +/*****************************************************************************
   13.42 + * Module admin                                                              *
   13.43 + *****************************************************************************/
   13.44 +
   13.45 +MODULE_AUTHOR("James Bulpin <James.Bulpin@cl.cam.ac.uk>");
   13.46 +MODULE_DESCRIPTION("P4 Performance Counters access "
   13.47 +                   DRV_VERSION " " DRV_RELDATE);
   13.48 +MODULE_LICENSE("GPL");
   13.49 +
   13.50 +static char version[] __devinitdata =
   13.51 +DRV_NAME ": James Bulpin.\n";
   13.52 +
   13.53 +static unsigned char foobar[4];
   13.54 +
   13.55 +/* rpcc: get full 64-bit Pentium TSC value
   13.56 + */
   13.57 +static __inline__ unsigned long long int rpcc(void) 
   13.58 +{
   13.59 +    unsigned int __h, __l;
   13.60 +    __asm__ __volatile__ ("rdtsc" :"=a" (__l), "=d" (__h));
   13.61 +    return (((unsigned long long)__h) << 32) + __l;
   13.62 +}
   13.63 +
   13.64 +/*****************************************************************************
   13.65 + * Display the counters                                                      *
   13.66 + *****************************************************************************/
   13.67 +
   13.68 +//#define processor cpu // post 2.4.16
   13.69 +
   13.70 +typedef union {
   13.71 +    struct {
   13.72 +        unsigned long lo;
   13.73 +        unsigned long hi;
   13.74 +    };
   13.75 +    unsigned long long cnt;
   13.76 +} cpu_perfcntr_t;
   13.77 +
   13.78 +typedef struct counters_t_struct {
   13.79 +    int                processor;
   13.80 +    unsigned long long tsc;
   13.81 +    cpu_perfcntr_t     counters[18];
   13.82 +} counters_t;
   13.83 +
   13.84 +typedef struct perfcntr_t_struct {
   13.85 +    unsigned long cpu_mask;
   13.86 +    counters_t    cpus[4]; // Actually for each cpu in system
   13.87 +} perfcntr_t;
   13.88 +
   13.89 +#ifdef HUMAN_READABLE
   13.90 +# define SHOW_COUNTER(c) rdmsr (c, l, h);\
   13.91 +    seq_printf(m, "0x%03x: 0x%08x%08x\n", c, h, l)
   13.92 +#else
   13.93 +# define SHOW_COUNTER(c) rdmsr (c, l, h);\
   13.94 +    seq_printf(m, " %llu", \
   13.95 +               (unsigned long long)h << 32 | (unsigned long long)l)
   13.96 +#endif
   13.97 +
   13.98 +#if 0
   13.99 +static unsigned long last_l = 0, last_h = 0, last_msr = 0;
  13.100 +static int last_cpu = 0;
  13.101 +#endif
  13.102 +
  13.103 +#define READ_COUNTER(_i, _msr) rdmsr((_msr), l, h); c->counters[_i].lo = l; \
  13.104 +    c->counters[_i].hi = h;
  13.105 +
  13.106 +static perfcntr_t perfcntrs;
  13.107 +
  13.108 +static void show_perfcntr_for(void *v)
  13.109 +{
  13.110 +    unsigned int l, h;
  13.111 +
  13.112 +    perfcntr_t *p = &perfcntrs;
  13.113 +    counters_t *c;
  13.114 +
  13.115 +    if (!((1 << smp_processor_id()) & p->cpu_mask))
  13.116 +        return;
  13.117 +
  13.118 +    c = &p->cpus[smp_processor_id()];
  13.119 +
  13.120 +    c->processor = smp_processor_id();
  13.121 +    c->tsc = rpcc();
  13.122 +
  13.123 +    READ_COUNTER(0,  MSR_P4_BPU_COUNTER0);
  13.124 +    READ_COUNTER(1,  MSR_P4_BPU_COUNTER1);
  13.125 +    READ_COUNTER(2,  MSR_P4_BPU_COUNTER2);
  13.126 +    READ_COUNTER(3,  MSR_P4_BPU_COUNTER3);
  13.127 +
  13.128 +    READ_COUNTER(4,  MSR_P4_MS_COUNTER0);
  13.129 +    READ_COUNTER(5,  MSR_P4_MS_COUNTER1);
  13.130 +    READ_COUNTER(6,  MSR_P4_MS_COUNTER2);
  13.131 +    READ_COUNTER(7,  MSR_P4_MS_COUNTER3);
  13.132 +
  13.133 +    READ_COUNTER(8,  MSR_P4_FLAME_COUNTER0);
  13.134 +    READ_COUNTER(9,  MSR_P4_FLAME_COUNTER1);
  13.135 +    READ_COUNTER(10, MSR_P4_FLAME_COUNTER2);
  13.136 +    READ_COUNTER(11, MSR_P4_FLAME_COUNTER3);
  13.137 +
  13.138 +    READ_COUNTER(12, MSR_P4_IQ_COUNTER0);
  13.139 +    READ_COUNTER(13, MSR_P4_IQ_COUNTER1);
  13.140 +    READ_COUNTER(14, MSR_P4_IQ_COUNTER2);
  13.141 +    READ_COUNTER(15, MSR_P4_IQ_COUNTER3);
  13.142 +    READ_COUNTER(16, MSR_P4_IQ_COUNTER4);
  13.143 +    READ_COUNTER(17, MSR_P4_IQ_COUNTER5);
  13.144 +
  13.145 +    return;    
  13.146 +}
  13.147 +
  13.148 +static int show_perfcntr(struct seq_file *m, void *v)
  13.149 +{
  13.150 +    int i, j;
  13.151 +
  13.152 +    // Get each physical cpu to read counters
  13.153 +    perfcntrs.cpu_mask = CPUMASK;
  13.154 +
  13.155 +    smp_call_function(show_perfcntr_for, NULL, 1, 1);
  13.156 +    show_perfcntr_for(NULL);
  13.157 +
  13.158 +    for (i = 0; i < 32; i++) {
  13.159 +        if (((1 << i) & (perfcntrs.cpu_mask = CPUMASK))) {
  13.160 +            counters_t *c = &perfcntrs.cpus[i];
  13.161 +            seq_printf(m, "%u %llu", c->processor, c->tsc);
  13.162 +            for (j = 0; j < 18; j++) {
  13.163 +                seq_printf(m, " %llu", c->counters[j].cnt);
  13.164 +            }
  13.165 +            seq_printf(m, "\n");
  13.166 +        }
  13.167 +    }
  13.168 +
  13.169 +#if 0
  13.170 +    unsigned long long t;
  13.171 +    unsigned int l, h;
  13.172 +
  13.173 +    t = rpcc();
  13.174 +
  13.175 +
  13.176 +
  13.177 +#ifdef HUMAN_READABLE
  13.178 +    seq_printf(m,
  13.179 +               "show_perfcntr\nprocessor: %u\ntime: %llu\n"
  13.180 +               "last write: 0x%08lx%08lx -> 0x%lx (CPU%u)\n",
  13.181 +               smp_processor_id(),
  13.182 +               t,
  13.183 +               last_h,
  13.184 +               last_l,
  13.185 +               last_msr,
  13.186 +               last_cpu);
  13.187 +#else
  13.188 +    seq_printf(m, "%u %llu", smp_processor_id(), t);
  13.189 +#endif
  13.190 +
  13.191 +    SHOW_COUNTER(MSR_P4_BPU_COUNTER0);
  13.192 +    SHOW_COUNTER(MSR_P4_BPU_COUNTER1);
  13.193 +    SHOW_COUNTER(MSR_P4_BPU_COUNTER2);
  13.194 +    SHOW_COUNTER(MSR_P4_BPU_COUNTER3);
  13.195 +
  13.196 +    SHOW_COUNTER(MSR_P4_MS_COUNTER0);
  13.197 +    SHOW_COUNTER(MSR_P4_MS_COUNTER1);
  13.198 +    SHOW_COUNTER(MSR_P4_MS_COUNTER2);
  13.199 +    SHOW_COUNTER(MSR_P4_MS_COUNTER3);
  13.200 +
  13.201 +    SHOW_COUNTER(MSR_P4_FLAME_COUNTER0);
  13.202 +    SHOW_COUNTER(MSR_P4_FLAME_COUNTER1);
  13.203 +    SHOW_COUNTER(MSR_P4_FLAME_COUNTER2);
  13.204 +    SHOW_COUNTER(MSR_P4_FLAME_COUNTER3);
  13.205 +
  13.206 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER0);
  13.207 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER1);
  13.208 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER2);
  13.209 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER3);
  13.210 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER4);
  13.211 +    SHOW_COUNTER(MSR_P4_IQ_COUNTER5);
  13.212 +
  13.213 +#ifndef HUMAN_READBLE
  13.214 +    seq_printf(m, "\n");
  13.215 +#endif
  13.216 +
  13.217 +#endif
  13.218 +
  13.219 +    return 0;
  13.220 +}
  13.221 +
  13.222 +/*****************************************************************************
  13.223 + * Show counter configuration                                                *
  13.224 + *****************************************************************************/
  13.225 +
  13.226 +typedef union {
  13.227 +    struct {
  13.228 +        unsigned long lo;
  13.229 +        unsigned long hi;
  13.230 +    };
  13.231 +    unsigned long long cnt;
  13.232 +} cpu_perfcfg_t;
  13.233 +
  13.234 +typedef struct configs_t_struct {
  13.235 +    int                processor;
  13.236 +    unsigned long long tsc;
  13.237 +    cpu_perfcfg_t      cccr[18];
  13.238 +    cpu_perfcfg_t      escr[0x42];
  13.239 +} configs_t;
  13.240 +
  13.241 +typedef struct perfcfg_t_struct {
  13.242 +    unsigned long cpu_mask;
  13.243 +    configs_t     cpus[4]; // Actually for each cpu in system
  13.244 +} perfcfg_t;
  13.245 +
  13.246 +static perfcfg_t perfcfgs;
  13.247 +
  13.248 +#define READ_CCCR(_i, _msr) rdmsr((_msr), l, h); c->cccr[_i].lo = l; \
  13.249 +    c->cccr[_i].hi = h;
  13.250 +#define READ_ESCR(_i, _msr) rdmsr((_msr), l, h); c->escr[_i].lo = l; \
  13.251 +    c->escr[_i].hi = h;
  13.252 +
  13.253 +static void show_perfcfg_for(void *v)
  13.254 +{
  13.255 +    unsigned int l, h;
  13.256 +
  13.257 +    perfcfg_t *p = &perfcfgs;
  13.258 +    configs_t *c;
  13.259 +
  13.260 +    if (!((1 << smp_processor_id()) & p->cpu_mask))
  13.261 +        return;
  13.262 +
  13.263 +    c = &p->cpus[smp_processor_id()];
  13.264 +
  13.265 +    c->processor = smp_processor_id();
  13.266 +    c->tsc = rpcc();
  13.267 +
  13.268 +    READ_CCCR(0,  MSR_P4_BPU_CCCR0);
  13.269 +    READ_CCCR(1,  MSR_P4_BPU_CCCR1);
  13.270 +    READ_CCCR(2,  MSR_P4_BPU_CCCR2);
  13.271 +    READ_CCCR(3,  MSR_P4_BPU_CCCR3);
  13.272 +
  13.273 +    READ_CCCR(4,  MSR_P4_MS_CCCR0);
  13.274 +    READ_CCCR(5,  MSR_P4_MS_CCCR1);
  13.275 +    READ_CCCR(6,  MSR_P4_MS_CCCR2);
  13.276 +    READ_CCCR(7,  MSR_P4_MS_CCCR3);
  13.277 +
  13.278 +    READ_CCCR(8,  MSR_P4_FLAME_CCCR0);
  13.279 +    READ_CCCR(9,  MSR_P4_FLAME_CCCR1);
  13.280 +    READ_CCCR(10, MSR_P4_FLAME_CCCR2);
  13.281 +    READ_CCCR(11, MSR_P4_FLAME_CCCR3);
  13.282 +
  13.283 +    READ_CCCR(12, MSR_P4_IQ_CCCR0);
  13.284 +    READ_CCCR(13, MSR_P4_IQ_CCCR1);
  13.285 +    READ_CCCR(14, MSR_P4_IQ_CCCR2);
  13.286 +    READ_CCCR(15, MSR_P4_IQ_CCCR3);
  13.287 +    READ_CCCR(16, MSR_P4_IQ_CCCR4);
  13.288 +    READ_CCCR(17, MSR_P4_IQ_CCCR5);
  13.289 +
  13.290 +    READ_ESCR(0x00, MSR_P4_BSU_ESCR0);
  13.291 +    READ_ESCR(0x02, MSR_P4_FSB_ESCR0);
  13.292 +    READ_ESCR(0x0a, MSR_P4_MOB_ESCR0);
  13.293 +    READ_ESCR(0x0c, MSR_P4_PMH_ESCR0);
  13.294 +    READ_ESCR(0x12, MSR_P4_BPU_ESCR0);
  13.295 +    READ_ESCR(0x14, MSR_P4_IS_ESCR0);
  13.296 +    READ_ESCR(0x16, MSR_P4_ITLB_ESCR0);
  13.297 +    READ_ESCR(0x28, MSR_P4_IX_ESCR0);
  13.298 +    READ_ESCR(0x01, MSR_P4_BSU_ESCR1);
  13.299 +    READ_ESCR(0x03, MSR_P4_FSB_ESCR1);
  13.300 +    READ_ESCR(0x0b, MSR_P4_MOB_ESCR1);
  13.301 +    READ_ESCR(0x0d, MSR_P4_PMH_ESCR1);
  13.302 +    READ_ESCR(0x13, MSR_P4_BPU_ESCR1);
  13.303 +    READ_ESCR(0x15, MSR_P4_IS_ESCR1);
  13.304 +    READ_ESCR(0x17, MSR_P4_ITLB_ESCR1);
  13.305 +    READ_ESCR(0x29, MSR_P4_IX_ESCR1);
  13.306 +    READ_ESCR(0x20, MSR_P4_MS_ESCR0);
  13.307 +    READ_ESCR(0x22, MSR_P4_TBPU_ESCR0);
  13.308 +    READ_ESCR(0x24, MSR_P4_TC_ESCR0);
  13.309 +    READ_ESCR(0x21, MSR_P4_MS_ESCR1);
  13.310 +    READ_ESCR(0x23, MSR_P4_TBPU_ESCR1);
  13.311 +    READ_ESCR(0x25, MSR_P4_TC_ESCR1);
  13.312 +    READ_ESCR(0x04, MSR_P4_FIRM_ESCR0);
  13.313 +    READ_ESCR(0x06, MSR_P4_FLAME_ESCR0);
  13.314 +    READ_ESCR(0x08, MSR_P4_DAC_ESCR0);
  13.315 +    READ_ESCR(0x0e, MSR_P4_SAAT_ESCR0);
  13.316 +    READ_ESCR(0x10, MSR_P4_U2L_ESCR0);
  13.317 +    READ_ESCR(0x05, MSR_P4_FIRM_ESCR1);
  13.318 +    READ_ESCR(0x07, MSR_P4_FLAME_ESCR1);
  13.319 +    READ_ESCR(0x09, MSR_P4_DAC_ESCR1);
  13.320 +    READ_ESCR(0x0f, MSR_P4_SAAT_ESCR1);
  13.321 +    READ_ESCR(0x11, MSR_P4_U2L_ESCR1);
  13.322 +    READ_ESCR(0x18, MSR_P4_CRU_ESCR0);
  13.323 +    READ_ESCR(0x2c, MSR_P4_CRU_ESCR2);
  13.324 +    READ_ESCR(0x40, MSR_P4_CRU_ESCR4);
  13.325 +    READ_ESCR(0x1a, MSR_P4_IQ_ESCR0);
  13.326 +    READ_ESCR(0x1c, MSR_P4_RAT_ESCR0);
  13.327 +    READ_ESCR(0x1e, MSR_P4_SSU_ESCR0);
  13.328 +    READ_ESCR(0x2a, MSR_P4_ALF_ESCR0);
  13.329 +    READ_ESCR(0x19, MSR_P4_CRU_ESCR1);
  13.330 +    READ_ESCR(0x2d, MSR_P4_CRU_ESCR3);
  13.331 +    READ_ESCR(0x41, MSR_P4_CRU_ESCR5);
  13.332 +    READ_ESCR(0x1b, MSR_P4_IQ_ESCR1);
  13.333 +    READ_ESCR(0x1d, MSR_P4_RAT_ESCR1);
  13.334 +    READ_ESCR(0x2b, MSR_P4_ALF_ESCR1);
  13.335 +
  13.336 +    return;    
  13.337 +}
  13.338 +
  13.339 +static char *escr_names[] = {
  13.340 +    "BSU_ESCR0",
  13.341 +    "BSU_ESCR1",
  13.342 +    "FSB_ESCR0",
  13.343 +    "FSB_ESCR1",
  13.344 +    "FIRM_ESCR0",
  13.345 +    "FIRM_ESCR1",
  13.346 +    "FLAME_ESCR0",
  13.347 +    "FLAME_ESCR1",
  13.348 +    "DAC_ESCR0",
  13.349 +    "DAC_ESCR1",
  13.350 +    "MOB_ESCR0",
  13.351 +    "MOB_ESCR1",
  13.352 +    "PMH_ESCR0",
  13.353 +    "PMH_ESCR1",
  13.354 +    "SAAT_ESCR0",
  13.355 +    "SAAT_ESCR1",
  13.356 +    "U2L_ESCR0",
  13.357 +    "U2L_ESCR1",
  13.358 +    "BPU_ESCR0",
  13.359 +    "BPU_ESCR1",
  13.360 +    "IS_ESCR0",
  13.361 +    "IS_ESCR1",
  13.362 +    "ITLB_ESCR0",
  13.363 +    "ITLB_ESCR1",
  13.364 +    "CRU_ESCR0",
  13.365 +    "CRU_ESCR1",
  13.366 +    "IQ_ESCR0",
  13.367 +    "IQ_ESCR1",
  13.368 +    "RAT_ESCR0",
  13.369 +    "RAT_ESCR1",
  13.370 +    "SSU_ESCR0",
  13.371 +    "SSU_ESCR1",
  13.372 +    "MS_ESCR0",
  13.373 +    "MS_ESCR1",
  13.374 +    "TBPU_ESCR0",
  13.375 +    "TBPU_ESCR1",
  13.376 +    "TC_ESCR0",
  13.377 +    "TC_ESCR1",
  13.378 +    "0x3c6",
  13.379 +    "0x3c7",
  13.380 +    "IX_ESCR0",
  13.381 +    "IX_ESCR1",
  13.382 +    "ALF_ESCR0",
  13.383 +    "ALF_ESCR1",
  13.384 +    "CRU_ESCR2",
  13.385 +    "CRU_ESCR3",
  13.386 +    "0x3ce",
  13.387 +    "0x3cf",
  13.388 +    "0x3d0",
  13.389 +    "0x3d1",
  13.390 +    "0x3d2",
  13.391 +    "0x3d3",
  13.392 +    "0x3d4",
  13.393 +    "0x3d5",
  13.394 +    "0x3d6",
  13.395 +    "0x3d7",
  13.396 +    "0x3d8",
  13.397 +    "0x3d9",
  13.398 +    "0x3da",
  13.399 +    "0x3db",
  13.400 +    "0x3dc",
  13.401 +    "0x3dd",
  13.402 +    "0x3de",
  13.403 +    "0x3df",
  13.404 +    "CRU_ESCR4",
  13.405 +    "CRU_ESCR5"
  13.406 +};
  13.407 +
  13.408 +static unsigned long escr_map_0[] = 
  13.409 +{MSR_P4_BPU_ESCR0, MSR_P4_IS_ESCR0,
  13.410 + MSR_P4_MOB_ESCR0, MSR_P4_ITLB_ESCR0,
  13.411 + MSR_P4_PMH_ESCR0, MSR_P4_IX_ESCR0,
  13.412 + MSR_P4_FSB_ESCR0, MSR_P4_BSU_ESCR0}; //BPU even
  13.413 +static unsigned long escr_map_1[] = 
  13.414 +    {MSR_P4_BPU_ESCR1, MSR_P4_IS_ESCR1,
  13.415 +     MSR_P4_MOB_ESCR1, MSR_P4_ITLB_ESCR1,
  13.416 +     MSR_P4_PMH_ESCR1, MSR_P4_IX_ESCR1,
  13.417 +     MSR_P4_FSB_ESCR1, MSR_P4_BSU_ESCR1}; //BPU odd
  13.418 +static unsigned long escr_map_2[] = 
  13.419 +    {MSR_P4_MS_ESCR0, MSR_P4_TC_ESCR0, MSR_P4_TBPU_ESCR0,
  13.420 +     0, 0, 0, 0, 0}; //MS even
  13.421 +static unsigned long escr_map_3[] = 
  13.422 +    {MSR_P4_MS_ESCR1, MSR_P4_TC_ESCR1, MSR_P4_TBPU_ESCR1,
  13.423 +     0, 0, 0, 0, 0}; //MS odd
  13.424 +static unsigned long escr_map_4[] = 
  13.425 +    {MSR_P4_FLAME_ESCR0, MSR_P4_FIRM_ESCR0, MSR_P4_SAAT_ESCR0,
  13.426 +     MSR_P4_U2L_ESCR0, 0, MSR_P4_DAC_ESCR0, 0, 0}; //FLAME even
  13.427 +static unsigned long escr_map_5[] = 
  13.428 +    {MSR_P4_FLAME_ESCR1, MSR_P4_FIRM_ESCR1, MSR_P4_SAAT_ESCR1,
  13.429 +     MSR_P4_U2L_ESCR1, 0, MSR_P4_DAC_ESCR1, 0, 0}; //FLAME odd
  13.430 +static unsigned long escr_map_6[] = 
  13.431 +    {MSR_P4_IQ_ESCR0, MSR_P4_ALF_ESCR0,
  13.432 +     MSR_P4_RAT_ESCR0, MSR_P4_SSU_ESCR0,
  13.433 +     MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR4, 0}; //IQ even
  13.434 +static unsigned long escr_map_7[] = 
  13.435 +    {MSR_P4_IQ_ESCR1, MSR_P4_ALF_ESCR1,
  13.436 +     MSR_P4_RAT_ESCR1, 0,
  13.437 +     MSR_P4_CRU_ESCR1, MSR_P4_CRU_ESCR3, MSR_P4_CRU_ESCR5, 0}; //IQ odd
  13.438 +
  13.439 +static unsigned long *escr_map[] = {
  13.440 +    escr_map_0,
  13.441 +    escr_map_1,
  13.442 +    escr_map_2,
  13.443 +    escr_map_3,
  13.444 +    escr_map_4,
  13.445 +    escr_map_5,
  13.446 +    escr_map_6,
  13.447 +    escr_map_7,
  13.448 +};
  13.449 +
  13.450 +unsigned long get_escr_msr(int c, int e)
  13.451 +{
  13.452 +    int index = -1;
  13.453 +
  13.454 +    // Get the ESCR MSR address from the counter number and the ESCR number.
  13.455 +    switch (c) {
  13.456 +    case P4_BPU_COUNTER0_NUMBER:
  13.457 +    case P4_BPU_COUNTER1_NUMBER:
  13.458 +	index = 0;
  13.459 +	break;
  13.460 +    case P4_BPU_COUNTER2_NUMBER:
  13.461 +    case P4_BPU_COUNTER3_NUMBER:	
  13.462 +	index = 1;
  13.463 +	break;
  13.464 +    case P4_MS_COUNTER0_NUMBER:
  13.465 +    case P4_MS_COUNTER1_NUMBER:
  13.466 +	index = 2; // probably !
  13.467 +	break;
  13.468 +    case P4_MS_COUNTER2_NUMBER:
  13.469 +    case P4_MS_COUNTER3_NUMBER:
  13.470 +	index = 3; // probably !
  13.471 +	break;
  13.472 +    case P4_FLAME_COUNTER0_NUMBER:
  13.473 +    case P4_FLAME_COUNTER1_NUMBER:
  13.474 +	index = 4; // probably !
  13.475 +	break;
  13.476 +    case P4_FLAME_COUNTER2_NUMBER:
  13.477 +    case P4_FLAME_COUNTER3_NUMBER:
  13.478 +	index = 5; // probably !
  13.479 +	break;
  13.480 +    case P4_IQ_COUNTER0_NUMBER:
  13.481 +    case P4_IQ_COUNTER1_NUMBER:
  13.482 +    case P4_IQ_COUNTER4_NUMBER:
  13.483 +	index = 6;
  13.484 +	break;
  13.485 +    case P4_IQ_COUNTER2_NUMBER:
  13.486 +    case P4_IQ_COUNTER3_NUMBER:
  13.487 +    case P4_IQ_COUNTER5_NUMBER:
  13.488 +	index = 7;
  13.489 +	break;
  13.490 +    }
  13.491 +
  13.492 +    if (index != -1) {
  13.493 +	return escr_map[index][e];
  13.494 +    }
  13.495 +
  13.496 +    return 0;
  13.497 +}
  13.498 +
  13.499 +static char null_string[] = "";
  13.500 +static char *get_escr(int c, int e)
  13.501 +{
  13.502 +    unsigned long msr = get_escr_msr(c, e);
  13.503 +
  13.504 +    if ((msr >= 0x3a0) && (msr <= 0x3e1))
  13.505 +	return escr_names[(int)(msr - 0x3a0)];
  13.506 +    return null_string;
  13.507 +}
  13.508 +
  13.509 +static int show_perfcfg(struct seq_file *m, void *v)
  13.510 +{
  13.511 +    int i, j;
  13.512 +
  13.513 +    // Get each physical cpu to read configs
  13.514 +    perfcfgs.cpu_mask = CPUMASK;
  13.515 +
  13.516 +    smp_call_function(show_perfcfg_for, NULL, 1, 1);
  13.517 +    show_perfcfg_for(NULL);
  13.518 +
  13.519 +    for (i = 0; i < 32; i++) {
  13.520 +        if (((1 << i) & (perfcfgs.cpu_mask = CPUMASK))) {
  13.521 +            configs_t *c = &perfcfgs.cpus[i];
  13.522 +            seq_printf(m, "----------------------------------------\n");
  13.523 +            seq_printf(m, "%u %llu\n", c->processor, c->tsc);
  13.524 +            for (j = 0; j < 18; j++) {
  13.525 +                seq_printf(m, "%08lx", c->cccr[j].lo);
  13.526 +
  13.527 +		if (!(c->cccr[j].lo & P4_CCCR_ENABLE))
  13.528 +		    seq_printf(m, " DISABLED");
  13.529 +		else {
  13.530 +		    unsigned long escr_msr =
  13.531 +			get_escr_msr(i, (int)((c->cccr[j].lo >> 13)&7));
  13.532 +		    seq_printf(m, " ESCR=%s",
  13.533 +			       get_escr(i, (int)((c->cccr[j].lo >> 13)&7)));
  13.534 +		    if ((escr_msr >= 0x3a0) && (escr_msr <= 0x3e1)) {
  13.535 +			unsigned long e = c->escr[(int)(escr_msr - 0x3a0)].lo;
  13.536 +			seq_printf(m, "(%08lx es=%lx mask=%lx", e,
  13.537 +				   (e >> 25) & 0x7f,
  13.538 +				   (e >> 9) & 0xffff);
  13.539 +			if ((e & P4_ESCR_T0_USR))
  13.540 +			    seq_printf(m, " T(0)USR");
  13.541 +			if ((e & P4_ESCR_T0_OS))
  13.542 +			    seq_printf(m, " T(0)OS");
  13.543 +			if ((e & P4_ESCR_T1_USR))
  13.544 +			    seq_printf(m, " T1USR");
  13.545 +			if ((e & P4_ESCR_T1_OS))
  13.546 +			    seq_printf(m, " T1OS");
  13.547 +			seq_printf(m, ")");
  13.548 +		    }
  13.549 +		    seq_printf(m, " AT=%u", (int)((c->cccr[j].lo >> 16)&3));
  13.550 +
  13.551 +		    if ((c->cccr[j].lo & P4_CCCR_OVF))
  13.552 +			seq_printf(m, " OVF");
  13.553 +		    if ((c->cccr[j].lo & P4_CCCR_CASCADE))
  13.554 +			seq_printf(m, " CASC");
  13.555 +		    if ((c->cccr[j].lo & P4_CCCR_FORCE_OVF))
  13.556 +			seq_printf(m, " F-OVF");
  13.557 +		    if ((c->cccr[j].lo & P4_CCCR_EDGE))
  13.558 +			seq_printf(m, " EDGE");
  13.559 +		    if ((c->cccr[j].lo & P4_CCCR_COMPLEMENT))
  13.560 +			seq_printf(m, " COMPL");
  13.561 +		    if ((c->cccr[j].lo & P4_CCCR_COMPARE))
  13.562 +			seq_printf(m, " CMP");
  13.563 +		    if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T0))
  13.564 +			seq_printf(m, " OVF_PMI(_T0)");
  13.565 +		    if ((c->cccr[j].lo & P4_CCCR_OVF_PMI_T1))
  13.566 +			seq_printf(m, " OVF_PMI_T1");
  13.567 +		}
  13.568 +		seq_printf(m, "\n");
  13.569 +            }
  13.570 +        }
  13.571 +    }
  13.572 +
  13.573 +    return 0;
  13.574 +}
  13.575 +
  13.576 +/*****************************************************************************
  13.577 + * Handle writes                                                             *
  13.578 + *****************************************************************************/
  13.579 +
  13.580 +static int set_msr_cpu_mask;
  13.581 +static unsigned long set_msr_addr;
  13.582 +static unsigned long set_msr_lo;
  13.583 +static unsigned long set_msr_hi;
  13.584 +
  13.585 +static void perfcntr_write_for(void *unused)
  13.586 +{
  13.587 +#ifdef NOHT
  13.588 +    if (((1 << smp_processor_id()) & set_msr_cpu_mask)) {
  13.589 +#endif
  13.590 +        //printk("perfcntr: wrmsr(%08lx, %08lx, %08lx)\n",
  13.591 +        //     set_msr_addr, set_msr_lo, set_msr_hi);
  13.592 +        wrmsr(set_msr_addr, set_msr_lo, set_msr_hi);
  13.593 +#ifdef NOHT
  13.594 +    }
  13.595 +#endif
  13.596 +}
  13.597 +
  13.598 +ssize_t perfcntr_write(struct file *f,
  13.599 +                       const  char *data,
  13.600 +                       size_t       size,
  13.601 +                       loff_t      *pos)
  13.602 +{
  13.603 +    char         *endp;
  13.604 +    ssize_t       ret = 0;
  13.605 +    //unsigned long l, h, msr;
  13.606 +    unsigned long long v;
  13.607 +
  13.608 +    set_msr_cpu_mask = (int)simple_strtoul(data, &endp, 16);
  13.609 +    endp++; // skip past space
  13.610 +    if ((endp - data) >= size) {
  13.611 +        ret = -EINVAL;
  13.612 +        goto out;
  13.613 +    }
  13.614 +
  13.615 +    set_msr_addr = simple_strtoul(endp, &endp, 16);
  13.616 +    endp++; // skip past space
  13.617 +    if ((endp - data) >= size) {
  13.618 +        ret = -EINVAL;
  13.619 +        goto out;
  13.620 +    }
  13.621 +    
  13.622 +    v = simple_strtoul(endp, &endp, 16);
  13.623 +    set_msr_lo = (unsigned long)(v & 0xffffffffULL);
  13.624 +    set_msr_hi = (unsigned long)(v >> 32);
  13.625 +
  13.626 +    smp_call_function(perfcntr_write_for, NULL, 1, 1);
  13.627 +    perfcntr_write_for(NULL);    
  13.628 +
  13.629 +#if 0
  13.630 +    wrmsr(msr, l, h);
  13.631 +    last_l   = l;
  13.632 +    last_h   = h;
  13.633 +    last_msr = msr;
  13.634 +    last_cpu = smp_processor_id();
  13.635 +#endif
  13.636 +    ret = size;
  13.637 +
  13.638 + out:
  13.639 +    return ret;
  13.640 +}
  13.641 +
  13.642 +/*****************************************************************************
  13.643 + * /proc stuff                                                               *
  13.644 + *****************************************************************************/
  13.645 +
  13.646 +static void *c_start(struct seq_file *m, loff_t *pos)
  13.647 +{
  13.648 +    //return *pos < NR_CPUS ? cpu_data + *pos : NULL;
  13.649 +    return *pos == 0 ? foobar : NULL;
  13.650 +}
  13.651 +
  13.652 +static void *c_next(struct seq_file *m, void *v, loff_t *pos)
  13.653 +{
  13.654 +    ++*pos;
  13.655 +    return c_start(m, pos);
  13.656 +}
  13.657 +
  13.658 +static void c_stop(struct seq_file *m, void *v)
  13.659 +{
  13.660 +}
  13.661 +
  13.662 +struct seq_operations perfcntr_op = {
  13.663 +    start:  c_start,
  13.664 +    next:   c_next,
  13.665 +    stop:   c_stop,
  13.666 +    show:   show_perfcntr,
  13.667 +};
  13.668 +
  13.669 +struct seq_operations perfcfg_op = {
  13.670 +    start:  c_start,
  13.671 +    next:   c_next,
  13.672 +    stop:   c_stop,
  13.673 +    show:   show_perfcfg,
  13.674 +};
  13.675 +
  13.676 +static int perfcntr_open(struct inode *inode, struct file *file)
  13.677 +{
  13.678 +    return seq_open(file, &perfcntr_op);
  13.679 +}
  13.680 +
  13.681 +static int perfcfg_open(struct inode *inode, struct file *file)
  13.682 +{
  13.683 +    return seq_open(file, &perfcfg_op);
  13.684 +}
  13.685 +
  13.686 +static struct file_operations proc_perfcntr_operations = {
  13.687 +    open:           perfcntr_open,
  13.688 +    read:           seq_read,
  13.689 +    write:          perfcntr_write,
  13.690 +    llseek:         seq_lseek,
  13.691 +    release:        seq_release,
  13.692 +};
  13.693 +
  13.694 +static struct file_operations proc_perfcfg_operations = {
  13.695 +    open:           perfcfg_open,
  13.696 +    read:           seq_read,
  13.697 +    write:          perfcntr_write,
  13.698 +    llseek:         seq_lseek,
  13.699 +    release:        seq_release,
  13.700 +};
  13.701 +
  13.702 +static void create_seq_entry(char *name, mode_t mode, struct file_operations *f)
  13.703 +{
  13.704 +    struct proc_dir_entry *entry;
  13.705 +    entry = create_proc_entry(name, mode, NULL);
  13.706 +    if (entry)
  13.707 +        entry->proc_fops = f;
  13.708 +}
  13.709 +
  13.710 +/*****************************************************************************
  13.711 + * Module init and cleanup                                                   *
  13.712 + *****************************************************************************/
  13.713 +
  13.714 +static int __init perfcntr_init(void)
  13.715 +{
  13.716 +    printk(version);
  13.717 +
  13.718 +    create_seq_entry("perfcntr", 0777, &proc_perfcntr_operations);
  13.719 +    create_seq_entry("perfcntr_config", 0777, &proc_perfcfg_operations);
  13.720 +
  13.721 +    return 0;
  13.722 +}
  13.723 +
  13.724 +static void __exit perfcntr_exit(void)
  13.725 +{
  13.726 +    remove_proc_entry("perfcntr", NULL);
  13.727 +    remove_proc_entry("perfcntr_config", NULL);
  13.728 +}
  13.729 +
  13.730 +module_init(perfcntr_init);
  13.731 +module_exit(perfcntr_exit);
  13.732 +
  13.733 +/* End of $RCSfile$ */
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/tools/misc/cpuperf/p4perf.h	Wed Mar 02 17:18:39 2005 +0000
    14.3 @@ -0,0 +1,382 @@
    14.4 +/*
    14.5 + * P4 Performance counter stuff.
    14.6 + *
    14.7 + * P4 Xeon with Hyperthreading has counters per physical package which can
    14.8 + * count events from either logical CPU. However, in many cases more than
    14.9 + * ECSR and CCCR/counter can be used to count the same event. For instr or
   14.10 + * uops retired, use either ESCR0/IQ_CCCR0 ESCR1/IQ_CCCR2.
   14.11 + *
   14.12 + * $Id: p4perf.h,v 1.2 2003/10/13 16:51:41 jrb44 Exp $
   14.13 + *
   14.14 + * $Log: p4perf.h,v $
   14.15 + * Revision 1.2  2003/10/13 16:51:41  jrb44
   14.16 + * *** empty log message ***
   14.17 + *
   14.18 + */
   14.19 +
   14.20 +#ifndef P4PERF_H
   14.21 +#define P4PERF_H
   14.22 +
   14.23 +#ifdef __KERNEL__
   14.24 +#include <asm/msr.h>
   14.25 +#endif
   14.26 +
   14.27 +/*****************************************************************************
   14.28 + * Performance counter configuration.                                        *
   14.29 + *****************************************************************************/
   14.30 +
   14.31 +#ifndef P6_EVNTSEL_OS
   14.32 +# define P6_EVNTSEL_OS     (1 << 17)
   14.33 +# define P6_EVNTSEL_USR    (1 << 16)
   14.34 +# define P6_EVNTSEL_E      (1 << 18)
   14.35 +# define P6_EVNTSEL_EN     (1 << 22)
   14.36 +#endif
   14.37 +#define P6_PERF_INST_RETIRED 0xc0
   14.38 +#define P6_PERF_UOPS_RETIRED 0xc2
   14.39 +
   14.40 +#define P4_ESCR_USR                    (1 << 2)
   14.41 +#define P4_ESCR_OS                     (1 << 3)
   14.42 +#define P4_ESCR_T0_USR                 (1 << 2) /* First logical CPU  */
   14.43 +#define P4_ESCR_T0_OS                  (1 << 3)
   14.44 +#define P4_ESCR_T1_USR                 (1 << 0) /* Second logical CPU */
   14.45 +#define P4_ESCR_T1_OS                  (1 << 1)
   14.46 +#define P4_ESCR_TE                     (1 << 4)
   14.47 +#define P4_ESCR_THREADS(t)             (t)
   14.48 +#define P4_ESCR_TV(tag)                (tag << 5)
   14.49 +#define P4_ESCR_EVNTSEL(e)             (e << 25)
   14.50 +#define P4_ESCR_EVNTMASK(e)            (e << 9)
   14.51 +
   14.52 +#define P4_ESCR_EVNTSEL_FRONT_END      0x08
   14.53 +#define P4_ESCR_EVNTSEL_EXECUTION      0x0c
   14.54 +#define P4_ESCR_EVNTSEL_REPLAY         0x09
   14.55 +#define P4_ESCR_EVNTSEL_INSTR_RETIRED  0x02
   14.56 +#define P4_ESCR_EVNTSEL_UOPS_RETIRED   0x01
   14.57 +#define P4_ESCR_EVNTSEL_UOP_TYPE       0x02
   14.58 +#define P4_ESCR_EVNTSEL_RET_MBR_TYPE   0x05
   14.59 +//#define P4_ESCR_EVNTSEL_RET_MBR_TYPE   0x04
   14.60 +
   14.61 +#define P4_ESCR_EVNTMASK_FE_NBOGUS     0x01
   14.62 +#define P4_ESCR_EVNTMASK_FE_BOGUS      0x02
   14.63 +
   14.64 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS0  0x01
   14.65 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS1  0x02
   14.66 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS2  0x04
   14.67 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS3  0x08
   14.68 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS0   0x10
   14.69 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS1   0x20
   14.70 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS2   0x40
   14.71 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS3   0x80
   14.72 +
   14.73 +#define P4_ESCR_EVNTMASK_REPLAY_NBOGUS 0x01
   14.74 +#define P4_ESCR_EVNTMASK_REPLAY_BOGUS  0x02
   14.75 +
   14.76 +#define P4_ESCR_EVNTMASK_IRET_NB_NTAG  0x01
   14.77 +#define P4_ESCR_EVNTMASK_IRET_NB_TAG   0x02
   14.78 +#define P4_ESCR_EVNTMASK_IRET_B_NTAG   0x04
   14.79 +#define P4_ESCR_EVNTMASK_IRET_B_TAG    0x08
   14.80 +
   14.81 +#define P4_ESCR_EVNTMASK_URET_NBOGUS   0x01
   14.82 +#define P4_ESCR_EVNTMASK_URET_BOGUS    0x02
   14.83 +
   14.84 +#define P4_ESCR_EVNTMASK_UOP_LOADS     0x02
   14.85 +#define P4_ESCR_EVNTMASK_UOP_STORES    0x04
   14.86 +
   14.87 +#define P4_ESCR_EVNTMASK_RMBRT_COND    0x02
   14.88 +#define P4_ESCR_EVNTMASK_RMBRT_CALL    0x04
   14.89 +#define P4_ESCR_EVNTMASK_RMBRT_RETURN  0x08
   14.90 +#define P4_ESCR_EVNTMASK_RMBRT_INDIR   0x10
   14.91 +
   14.92 +#define P4_ESCR_EVNTMASK_RBRT_COND     0x02
   14.93 +#define P4_ESCR_EVNTMASK_RBRT_CALL     0x04
   14.94 +#define P4_ESCR_EVNTMASK_RBRT_RETURN   0x08
   14.95 +#define P4_ESCR_EVNTMASK_RBRT_INDIR    0x10
   14.96 +
   14.97 +//#define P4_ESCR_EVNTMASK_INSTR_RETIRED 0x01  /* Non bogus, not tagged */
   14.98 +//#define P4_ESCR_EVNTMASK_UOPS_RETIRED  0x01  /* Non bogus             */
   14.99 +
  14.100 +#define P4_CCCR_OVF                    (1 << 31)
  14.101 +#define P4_CCCR_CASCADE                (1 << 30)
  14.102 +#define P4_CCCR_FORCE_OVF              (1 << 25)
  14.103 +#define P4_CCCR_EDGE                   (1 << 24)
  14.104 +#define P4_CCCR_COMPLEMENT             (1 << 19)
  14.105 +#define P4_CCCR_COMPARE                (1 << 18)
  14.106 +#define P4_CCCR_THRESHOLD(t)           (t << 20)
  14.107 +#define P4_CCCR_ENABLE                 (1 << 12)
  14.108 +#define P4_CCCR_ESCR(escr)             (escr << 13)
  14.109 +#define P4_CCCR_ACTIVE_THREAD(t)       (t << 16)   /* Set to 11 */
  14.110 +#define P4_CCCR_OVF_PMI_T0             (1 << 26)
  14.111 +#define P4_CCCR_OVF_PMI_T1             (1 << 27)
  14.112 +#define P4_CCCR_RESERVED               (3 << 16)
  14.113 +#define P4_CCCR_OVF_PMI                (1 << 26)
  14.114 +
  14.115 +// BPU
  14.116 +#define MSR_P4_BPU_COUNTER0            0x300
  14.117 +#define MSR_P4_BPU_COUNTER1            0x301
  14.118 +#define MSR_P4_BPU_CCCR0               0x360
  14.119 +#define MSR_P4_BPU_CCCR1               0x361
  14.120 +
  14.121 +#define MSR_P4_BPU_COUNTER2            0x302
  14.122 +#define MSR_P4_BPU_COUNTER3            0x303
  14.123 +#define MSR_P4_BPU_CCCR2               0x362
  14.124 +#define MSR_P4_BPU_CCCR3               0x363
  14.125 +
  14.126 +#define MSR_P4_BSU_ESCR0               0x3a0
  14.127 +#define MSR_P4_FSB_ESCR0               0x3a2
  14.128 +#define MSR_P4_MOB_ESCR0               0x3aa
  14.129 +#define MSR_P4_PMH_ESCR0               0x3ac
  14.130 +#define MSR_P4_BPU_ESCR0               0x3b2
  14.131 +#define MSR_P4_IS_ESCR0                0x3b4
  14.132 +#define MSR_P4_ITLB_ESCR0              0x3b6
  14.133 +#define MSR_P4_IX_ESCR0                0x3c8
  14.134 +
  14.135 +#define P4_BSU_ESCR0_NUMBER            7
  14.136 +#define P4_FSB_ESCR0_NUMBER            6
  14.137 +#define P4_MOB_ESCR0_NUMBER            2
  14.138 +#define P4_PMH_ESCR0_NUMBER            4
  14.139 +#define P4_BPU_ESCR0_NUMBER            0
  14.140 +#define P4_IS_ESCR0_NUMBER             1
  14.141 +#define P4_ITLB_ESCR0_NUMBER           3
  14.142 +#define P4_IX_ESCR0_NUMBER             5
  14.143 +
  14.144 +#define MSR_P4_BSU_ESCR1               0x3a1
  14.145 +#define MSR_P4_FSB_ESCR1               0x3a3
  14.146 +#define MSR_P4_MOB_ESCR1               0x3ab
  14.147 +#define MSR_P4_PMH_ESCR1               0x3ad
  14.148 +#define MSR_P4_BPU_ESCR1               0x3b3
  14.149 +#define MSR_P4_IS_ESCR1                0x3b5
  14.150 +#define MSR_P4_ITLB_ESCR1              0x3b7
  14.151 +#define MSR_P4_IX_ESCR1                0x3c9
  14.152 +
  14.153 +#define P4_BSU_ESCR1_NUMBER            7
  14.154 +#define P4_FSB_ESCR1_NUMBER            6
  14.155 +#define P4_MOB_ESCR1_NUMBER            2
  14.156 +#define P4_PMH_ESCR1_NUMBER            4
  14.157 +#define P4_BPU_ESCR1_NUMBER            0
  14.158 +#define P4_IS_ESCR1_NUMBER             1
  14.159 +#define P4_ITLB_ESCR1_NUMBER           3
  14.160 +#define P4_IX_ESCR1_NUMBER             5
  14.161 +
  14.162 +// MS
  14.163 +#define MSR_P4_MS_COUNTER0             0x304
  14.164 +#define MSR_P4_MS_COUNTER1             0x305
  14.165 +#define MSR_P4_MS_CCCR0                0x364
  14.166 +#define MSR_P4_MS_CCCR1                0x365
  14.167 +
  14.168 +#define MSR_P4_MS_COUNTER2             0x306
  14.169 +#define MSR_P4_MS_COUNTER3             0x307
  14.170 +#define MSR_P4_MS_CCCR2                0x366
  14.171 +#define MSR_P4_MS_CCCR3                0x367
  14.172 +
  14.173 +#define MSR_P4_MS_ESCR0                0x3c0
  14.174 +#define MSR_P4_TBPU_ESCR0              0x3c2
  14.175 +#define MSR_P4_TC_ESCR0                0x3c4
  14.176 +
  14.177 +#define P4_MS_ESCR0_NUMBER             0
  14.178 +#define P4_TBPU_ESCR0_NUMBER           2
  14.179 +#define P4_TC_ESCR0_NUMBER             1
  14.180 +
  14.181 +#define MSR_P4_MS_ESCR1                0x3c1
  14.182 +#define MSR_P4_TBPU_ESCR1              0x3c3
  14.183 +#define MSR_P4_TC_ESCR1                0x3c5
  14.184 +
  14.185 +#define P4_MS_ESCR1_NUMBER             0
  14.186 +#define P4_TBPU_ESCR1_NUMBER           2
  14.187 +#define P4_TC_ESCR1_NUMBER             1
  14.188 +
  14.189 +// FLAME
  14.190 +#define MSR_P4_FLAME_COUNTER0          0x308
  14.191 +#define MSR_P4_FLAME_COUNTER1          0x309
  14.192 +#define MSR_P4_FLAME_CCCR0             0x368
  14.193 +#define MSR_P4_FLAME_CCCR1             0x369
  14.194 +
  14.195 +#define MSR_P4_FLAME_COUNTER2          0x30a
  14.196 +#define MSR_P4_FLAME_COUNTER3          0x30b
  14.197 +#define MSR_P4_FLAME_CCCR2             0x36a
  14.198 +#define MSR_P4_FLAME_CCCR3             0x36b
  14.199 +
  14.200 +#define MSR_P4_FIRM_ESCR0              0x3a4
  14.201 +#define MSR_P4_FLAME_ESCR0             0x3a6
  14.202 +#define MSR_P4_DAC_ESCR0               0x3a8
  14.203 +#define MSR_P4_SAAT_ESCR0              0x3ae
  14.204 +#define MSR_P4_U2L_ESCR0               0x3b0
  14.205 +
  14.206 +#define P4_FIRM_ESCR0_NUMBER           1
  14.207 +#define P4_FLAME_ESCR0_NUMBER          0
  14.208 +#define P4_DAC_ESCR0_NUMBER            5
  14.209 +#define P4_SAAT_ESCR0_NUMBER           2
  14.210 +#define P4_U2L_ESCR0_NUMBER            3
  14.211 +
  14.212 +#define MSR_P4_FIRM_ESCR1              0x3a5
  14.213 +#define MSR_P4_FLAME_ESCR1             0x3a7
  14.214 +#define MSR_P4_DAC_ESCR1               0x3a9
  14.215 +#define MSR_P4_SAAT_ESCR1              0x3af
  14.216 +#define MSR_P4_U2L_ESCR1               0x3b1
  14.217 +
  14.218 +#define P4_FIRM_ESCR1_NUMBER           1
  14.219 +#define P4_FLAME_ESCR1_NUMBER          0
  14.220 +#define P4_DAC_ESCR1_NUMBER            5
  14.221 +#define P4_SAAT_ESCR1_NUMBER           2
  14.222 +#define P4_U2L_ESCR1_NUMBER            3
  14.223 +
  14.224 +// IQ
  14.225 +#define MSR_P4_IQ_COUNTER0             0x30c
  14.226 +#define MSR_P4_IQ_COUNTER1             0x30d
  14.227 +#define MSR_P4_IQ_CCCR0                0x36c
  14.228 +#define MSR_P4_IQ_CCCR1                0x36d
  14.229 +
  14.230 +#define MSR_P4_IQ_COUNTER2             0x30e
  14.231 +#define MSR_P4_IQ_COUNTER3             0x30f
  14.232 +#define MSR_P4_IQ_CCCR2                0x36e
  14.233 +#define MSR_P4_IQ_CCCR3                0x36f
  14.234 +
  14.235 +#define MSR_P4_IQ_COUNTER4             0x310
  14.236 +#define MSR_P4_IQ_COUNTER5             0x311
  14.237 +#define MSR_P4_IQ_CCCR4                0x370
  14.238 +#define MSR_P4_IQ_CCCR5                0x371
  14.239 +
  14.240 +#define MSR_P4_CRU_ESCR0               0x3b8
  14.241 +#define MSR_P4_CRU_ESCR2               0x3cc
  14.242 +#define MSR_P4_CRU_ESCR4               0x3e0
  14.243 +#define MSR_P4_IQ_ESCR0                0x3ba
  14.244 +#define MSR_P4_RAT_ESCR0               0x3bc
  14.245 +#define MSR_P4_SSU_ESCR0               0x3be
  14.246 +#define MSR_P4_ALF_ESCR0               0x3ca
  14.247 +
  14.248 +#define P4_CRU_ESCR0_NUMBER            4
  14.249 +#define P4_CRU_ESCR2_NUMBER            5
  14.250 +#define P4_CRU_ESCR4_NUMBER            6
  14.251 +#define P4_IQ_ESCR0_NUMBER             0
  14.252 +#define P4_RAT_ESCR0_NUMBER            2
  14.253 +#define P4_SSU_ESCR0_NUMBER            3
  14.254 +#define P4_ALF_ESCR0_NUMBER            1
  14.255 +
  14.256 +#define MSR_P4_CRU_ESCR1               0x3b9
  14.257 +#define MSR_P4_CRU_ESCR3               0x3cd
  14.258 +#define MSR_P4_CRU_ESCR5               0x3e1
  14.259 +#define MSR_P4_IQ_ESCR1                0x3bb
  14.260 +#define MSR_P4_RAT_ESCR1               0x3bd
  14.261 +#define MSR_P4_ALF_ESCR1               0x3cb
  14.262 +
  14.263 +#define P4_CRU_ESCR1_NUMBER            4
  14.264 +#define P4_CRU_ESCR3_NUMBER            5
  14.265 +#define P4_CRU_ESCR5_NUMBER            6
  14.266 +#define P4_IQ_ESCR1_NUMBER             0
  14.267 +#define P4_RAT_ESCR1_NUMBER            2
  14.268 +#define P4_ALF_ESCR1_NUMBER            1
  14.269 +
  14.270 +#define P4_BPU_COUNTER0_NUMBER         0
  14.271 +#define P4_BPU_COUNTER1_NUMBER         1
  14.272 +#define P4_BPU_COUNTER2_NUMBER         2
  14.273 +#define P4_BPU_COUNTER3_NUMBER         3
  14.274 +
  14.275 +#define P4_MS_COUNTER0_NUMBER          4
  14.276 +#define P4_MS_COUNTER1_NUMBER          5
  14.277 +#define P4_MS_COUNTER2_NUMBER          6
  14.278 +#define P4_MS_COUNTER3_NUMBER          7
  14.279 +
  14.280 +#define P4_FLAME_COUNTER0_NUMBER       8
  14.281 +#define P4_FLAME_COUNTER1_NUMBER       9
  14.282 +#define P4_FLAME_COUNTER2_NUMBER       10
  14.283 +#define P4_FLAME_COUNTER3_NUMBER       11
  14.284 +
  14.285 +#define P4_IQ_COUNTER0_NUMBER          12
  14.286 +#define P4_IQ_COUNTER1_NUMBER          13
  14.287 +#define P4_IQ_COUNTER2_NUMBER          14
  14.288 +#define P4_IQ_COUNTER3_NUMBER          15
  14.289 +#define P4_IQ_COUNTER4_NUMBER          16
  14.290 +#define P4_IQ_COUNTER5_NUMBER          17
  14.291 +
  14.292 +/* PEBS
  14.293 + */
  14.294 +#define MSR_P4_PEBS_ENABLE             0x3F1
  14.295 +#define MSR_P4_PEBS_MATRIX_VERT        0x3F2
  14.296 +
  14.297 +#define P4_PEBS_ENABLE_MY_THR          (1 << 25)
  14.298 +#define P4_PEBS_ENABLE_OTH_THR         (1 << 26)
  14.299 +#define P4_PEBS_ENABLE                 (1 << 24)
  14.300 +#define P4_PEBS_BIT0                   (1 << 0)
  14.301 +#define P4_PEBS_BIT1                   (1 << 1)
  14.302 +#define P4_PEBS_BIT2                   (1 << 2)
  14.303 +
  14.304 +#define P4_PEBS_MATRIX_VERT_BIT0       (1 << 0)
  14.305 +#define P4_PEBS_MATRIX_VERT_BIT1       (1 << 1)
  14.306 +#define P4_PEBS_MATRIX_VERT_BIT2       (1 << 2)
  14.307 +
  14.308 +/* Replay tagging.
  14.309 + */
  14.310 +#define P4_REPLAY_TAGGING_PEBS_L1LMR   P4_PEBS_BIT0
  14.311 +#define P4_REPLAY_TAGGING_PEBS_L2LMR   P4_PEBS_BIT1
  14.312 +#define P4_REPLAY_TAGGING_PEBS_DTLMR   P4_PEBS_BIT2
  14.313 +#define P4_REPLAY_TAGGING_PEBS_DTSMR   P4_PEBS_BIT2
  14.314 +#define P4_REPLAY_TAGGING_PEBS_DTAMR   P4_PEBS_BIT2
  14.315 +
  14.316 +#define P4_REPLAY_TAGGING_VERT_L1LMR   P4_PEBS_MATRIX_VERT_BIT0
  14.317 +#define P4_REPLAY_TAGGING_VERT_L2LMR   P4_PEBS_MATRIX_VERT_BIT0
  14.318 +#define P4_REPLAY_TAGGING_VERT_DTLMR   P4_PEBS_MATRIX_VERT_BIT0
  14.319 +#define P4_REPLAY_TAGGING_VERT_DTSMR   P4_PEBS_MATRIX_VERT_BIT1
  14.320 +#define P4_REPLAY_TAGGING_VERT_DTAMR   P4_PEBS_MATRIX_VERT_BIT0 | P4_PEBS_MATRIX_VERT_BIT1
  14.321 +
  14.322 +
  14.323 +
  14.324 +
  14.325 +/*****************************************************************************
  14.326 + *                                                                           *
  14.327 + *****************************************************************************/
  14.328 +
  14.329 +// x87_FP_uop
  14.330 +#define EVENT_SEL_x87_FP_uop                0x04
  14.331 +#define EVENT_MASK_x87_FP_uop_ALL           (1 << 15)
  14.332 +
  14.333 +// execution event (at retirement)
  14.334 +#define EVENT_SEL_execution_event           0x0C
  14.335 +
  14.336 +// scalar_SP_uop
  14.337 +#define EVENT_SEL_scalar_SP_uop             0x0a
  14.338 +#define EVENT_MASK_scalar_SP_uop_ALL        (1 << 15)
  14.339 +
  14.340 +// scalar_DP_uop
  14.341 +#define EVENT_SEL_scalar_DP_uop             0x0e
  14.342 +#define EVENT_MASK_scalar_DP_uop_ALL        (1 << 15)
  14.343 +
  14.344 +// Instruction retired
  14.345 +#define EVENT_SEL_instr_retired             0x02
  14.346 +#define EVENT_MASK_instr_retired_ALL        0x0f
  14.347 +
  14.348 +// uOps retired
  14.349 +#define EVENT_SEL_uops_retired              0x01
  14.350 +#define EVENT_MASK_uops_retired_ALL         0x03
  14.351 +
  14.352 +// L1 misses retired
  14.353 +#define EVENT_SEL_replay_event              0x09
  14.354 +#define EVENT_MASK_replay_event_ALL         0x03
  14.355 +
  14.356 +// Trace cache
  14.357 +#define EVENT_SEL_BPU_fetch_request         0x03
  14.358 +#define EVENT_MASK_BPU_fetch_request_TCMISS 0x01
  14.359 +
  14.360 +// Bus activity
  14.361 +#define EVENT_SEL_FSB_data_activity               0x17
  14.362 +#define EVENT_MASK_FSB_data_activity_DRDY_DRV     0x01
  14.363 +#define EVENT_MASK_FSB_data_activity_DRDY_OWN     0x02
  14.364 +#define EVENT_MASK_FSB_data_activity_DRDY_OOTHER  0x04
  14.365 +#define EVENT_MASK_FSB_data_activity_DBSY_DRV     0x08
  14.366 +#define EVENT_MASK_FSB_data_activity_DBSY_OWN     0x10
  14.367 +#define EVENT_MASK_FSB_data_activity_DBSY_OOTHER  0x20
  14.368 +
  14.369 +// Cache L2
  14.370 +#define EVENT_SEL_BSQ_cache_reference             0x0c
  14.371 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITS 0x001
  14.372 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITE 0x002
  14.373 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITM 0x004
  14.374 +
  14.375 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITS 0x008
  14.376 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITE 0x010
  14.377 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITM 0x020
  14.378 +
  14.379 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_MISS 0x100
  14.380 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_MISS 0x200
  14.381 +#define EVENT_MASK_BSQ_cache_reference_WR_L2_MISS 0x400
  14.382 +
  14.383 +#endif
  14.384 +
  14.385 +/* End of $RCSfile: p4perf.h,v $ */