ia64/xen-unstable

changeset 9625:e049baa9055d

Add xenoprof support
Signed-off-by: Jose Renato Santos <jsantos@hpl.hp.com>
author ack@kneesa.uk.xensource.com
date Thu Apr 06 18:58:01 2006 +0100 (2006-04-06)
parents 388c59fefaa6
children b6c5920e5d99
files buildconfigs/linux-defconfig_xen0_x86_32 buildconfigs/linux-defconfig_xenU_x86_32 buildconfigs/linux-defconfig_xen_x86_32 linux-2.6-xen-sparse/arch/i386/Kconfig linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c linux-2.6-xen-sparse/arch/i386/oprofile/Makefile linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h patches/linux-2.6.16/xenoprof-generic.patch xen/arch/x86/Makefile xen/arch/x86/domain.c xen/arch/x86/oprofile/Makefile xen/arch/x86/oprofile/nmi_int.c xen/arch/x86/oprofile/op_counter.h xen/arch/x86/oprofile/op_model_athlon.c xen/arch/x86/oprofile/op_model_p4.c xen/arch/x86/oprofile/op_model_ppro.c xen/arch/x86/oprofile/op_x86_model.h xen/arch/x86/oprofile/xenoprof.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_64/entry.S xen/include/public/xen.h xen/include/public/xenoprof.h xen/include/xen/sched.h xen/include/xen/xenoprof.h
line diff
     1.1 --- a/buildconfigs/linux-defconfig_xen0_x86_32	Thu Apr 06 17:49:21 2006 +0100
     1.2 +++ b/buildconfigs/linux-defconfig_xen0_x86_32	Thu Apr 06 18:58:01 2006 +0100
     1.3 @@ -1231,6 +1231,7 @@ CONFIG_NLS_ISO8859_1=y
     1.4  #
     1.5  # Instrumentation Support
     1.6  #
     1.7 +# CONFIG_PROFILING is not set
     1.8  # CONFIG_KPROBES is not set
     1.9  
    1.10  #
     2.1 --- a/buildconfigs/linux-defconfig_xenU_x86_32	Thu Apr 06 17:49:21 2006 +0100
     2.2 +++ b/buildconfigs/linux-defconfig_xenU_x86_32	Thu Apr 06 18:58:01 2006 +0100
     2.3 @@ -779,6 +779,7 @@ CONFIG_NLS_ISO8859_1=y
     2.4  #
     2.5  # Instrumentation Support
     2.6  #
     2.7 +# CONFIG_PROFILING is not set
     2.8  # CONFIG_KPROBES is not set
     2.9  
    2.10  #
     3.1 --- a/buildconfigs/linux-defconfig_xen_x86_32	Thu Apr 06 17:49:21 2006 +0100
     3.2 +++ b/buildconfigs/linux-defconfig_xen_x86_32	Thu Apr 06 18:58:01 2006 +0100
     3.3 @@ -2892,6 +2892,7 @@ CONFIG_NLS_UTF8=m
     3.4  #
     3.5  # Instrumentation Support
     3.6  #
     3.7 +# CONFIG_PROFILING is not set
     3.8  # CONFIG_KPROBES is not set
     3.9  
    3.10  #
     4.1 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig	Thu Apr 06 17:49:21 2006 +0100
     4.2 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig	Thu Apr 06 18:58:01 2006 +0100
     4.3 @@ -1116,9 +1116,7 @@ source "fs/Kconfig"
     4.4  menu "Instrumentation Support"
     4.5  	depends on EXPERIMENTAL
     4.6  
     4.7 -if !X86_XEN
     4.8  source "arch/i386/oprofile/Kconfig"
     4.9 -endif
    4.10  
    4.11  config KPROBES
    4.12  	bool "Kprobes (EXPERIMENTAL)"
     5.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Thu Apr 06 17:49:21 2006 +0100
     5.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Thu Apr 06 18:58:01 2006 +0100
     5.3 @@ -177,6 +177,32 @@ int touch_pte_range(struct mm_struct *mm
     5.4  
     5.5  EXPORT_SYMBOL(touch_pte_range);
     5.6  
     5.7 +void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot)
     5.8 +{
     5.9 +	int error;
    5.10 +       
    5.11 +	struct vm_struct *vma;
    5.12 +	vma = get_vm_area (vm_size, VM_IOREMAP);
    5.13 +      
    5.14 +	if (vma == NULL) {
    5.15 +		printk ("ioremap.c,vm_map_xen_pages(): "
    5.16 +			"Failed to get VMA area\n");
    5.17 +		return NULL;
    5.18 +	}
    5.19 +
    5.20 +	error = direct_kernel_remap_pfn_range((unsigned long) vma->addr,
    5.21 +					      maddr >> PAGE_SHIFT, vm_size,
    5.22 +					      prot, DOMID_SELF );
    5.23 +	if (error == 0) {
    5.24 +		return vma->addr;
    5.25 +	} else {
    5.26 +		printk ("ioremap.c,vm_map_xen_pages(): "
    5.27 +			"Failed to map xen shared pages into kernel space\n");
    5.28 +		return NULL;
    5.29 +	}
    5.30 +}
    5.31 +EXPORT_SYMBOL(vm_map_xen_pages);
    5.32 +
    5.33  /*
    5.34   * Does @address reside within a non-highmem page that is local to this virtual
    5.35   * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile	Thu Apr 06 18:58:01 2006 +0100
     6.3 @@ -0,0 +1,16 @@
     6.4 +obj-$(CONFIG_OPROFILE) += oprofile.o
     6.5 +
     6.6 +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
     6.7 +		oprof.o cpu_buffer.o buffer_sync.o \
     6.8 +		event_buffer.o oprofile_files.o \
     6.9 +		oprofilefs.o oprofile_stats.o  \
    6.10 +		timer_int.o )
    6.11 +
    6.12 +ifdef CONFIG_X86_XEN
    6.13 +oprofile-y				:= $(DRIVER_OBJS) xenoprof.o
    6.14 +else 
    6.15 +oprofile-y				:= $(DRIVER_OBJS) init.o backtrace.o
    6.16 +oprofile-$(CONFIG_X86_LOCAL_APIC) 	+= nmi_int.o op_model_athlon.o \
    6.17 +					   op_model_ppro.o op_model_p4.o
    6.18 +oprofile-$(CONFIG_X86_IO_APIC)		+= nmi_timer_int.o
    6.19 +endif
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c	Thu Apr 06 18:58:01 2006 +0100
     7.3 @@ -0,0 +1,395 @@
     7.4 +/**
     7.5 + * @file xenoprof.c
     7.6 + *
     7.7 + * @remark Copyright 2002 OProfile authors
     7.8 + * @remark Read the file COPYING
     7.9 + *
    7.10 + * @author John Levon <levon@movementarian.org>
    7.11 + *
    7.12 + * Modified by Aravind Menon and Jose Renato Santos for Xen
    7.13 + * These modifications are:
    7.14 + * Copyright (C) 2005 Hewlett-Packard Co.
    7.15 + */
    7.16 +
    7.17 +#include <linux/init.h>
    7.18 +#include <linux/notifier.h>
    7.19 +#include <linux/smp.h>
    7.20 +#include <linux/oprofile.h>
    7.21 +#include <linux/sysdev.h>
    7.22 +#include <linux/slab.h>
    7.23 +#include <linux/interrupt.h>
    7.24 +#include <linux/vmalloc.h>
    7.25 +#include <asm/nmi.h>
    7.26 +#include <asm/msr.h>
    7.27 +#include <asm/apic.h>
    7.28 +#include <asm/pgtable.h>
    7.29 +#include <xen/evtchn.h>
    7.30 +#include "op_counter.h"
    7.31 +
    7.32 +#include <xen/interface/xen.h>
    7.33 +#include <xen/interface/xenoprof.h>
    7.34 +
    7.35 +static int xenoprof_start(void);
    7.36 +static void xenoprof_stop(void);
    7.37 +
    7.38 +void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot);
    7.39 +
    7.40 +static int xenoprof_enabled = 0;
    7.41 +static int num_events = 0;
    7.42 +static int is_primary = 0;
    7.43 +
    7.44 +/* sample buffers shared with Xen */
    7.45 +xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
    7.46 +/* Shared buffer area */
    7.47 +char * shared_buffer;
    7.48 +/* Number of buffers in shared area (one per VCPU) */
    7.49 +int nbuf;
    7.50 +/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
    7.51 +int ovf_irq[NR_CPUS];
    7.52 +/* cpu model type string - copied from Xen memory space on XENOPROF_init command */
    7.53 +char cpu_type[XENOPROF_CPU_TYPE_SIZE];
    7.54 +
    7.55 +#ifdef CONFIG_PM
    7.56 +
    7.57 +static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
    7.58 +{
    7.59 +	if (xenoprof_enabled == 1)
    7.60 +		xenoprof_stop();
    7.61 +	return 0;
    7.62 +}
    7.63 +
    7.64 +
    7.65 +static int xenoprof_resume(struct sys_device * dev)
    7.66 +{
    7.67 +	if (xenoprof_enabled == 1)
    7.68 +		xenoprof_start();
    7.69 +	return 0;
    7.70 +}
    7.71 +
    7.72 +
    7.73 +static struct sysdev_class oprofile_sysclass = {
    7.74 +	set_kset_name("oprofile"),
    7.75 +	.resume		= xenoprof_resume,
    7.76 +	.suspend	= xenoprof_suspend
    7.77 +};
    7.78 +
    7.79 +
    7.80 +static struct sys_device device_oprofile = {
    7.81 +	.id	= 0,
    7.82 +	.cls	= &oprofile_sysclass,
    7.83 +};
    7.84 +
    7.85 +
    7.86 +static int __init init_driverfs(void)
    7.87 +{
    7.88 +	int error;
    7.89 +	if (!(error = sysdev_class_register(&oprofile_sysclass)))
    7.90 +		error = sysdev_register(&device_oprofile);
    7.91 +	return error;
    7.92 +}
    7.93 +
    7.94 +
    7.95 +static void __exit exit_driverfs(void)
    7.96 +{
    7.97 +	sysdev_unregister(&device_oprofile);
    7.98 +	sysdev_class_unregister(&oprofile_sysclass);
    7.99 +}
   7.100 +
   7.101 +#else
   7.102 +#define init_driverfs() do { } while (0)
   7.103 +#define exit_driverfs() do { } while (0)
   7.104 +#endif /* CONFIG_PM */
   7.105 +
   7.106 +unsigned long long oprofile_samples = 0;
   7.107 +
   7.108 +static irqreturn_t 
   7.109 +xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
   7.110 +{
   7.111 +	int head, tail, size;
   7.112 +	xenoprof_buf_t * buf;
   7.113 +	int cpu;
   7.114 +
   7.115 +	cpu = smp_processor_id();
   7.116 +	buf = xenoprof_buf[cpu];
   7.117 +
   7.118 +	head = buf->event_head;
   7.119 +	tail = buf->event_tail;
   7.120 +	size = buf->event_size;
   7.121 +
   7.122 +	if (tail > head) {
   7.123 +		while (tail < size) {
   7.124 +			oprofile_add_pc(buf->event_log[tail].eip,
   7.125 +					buf->event_log[tail].mode,
   7.126 +					buf->event_log[tail].event);
   7.127 +			oprofile_samples++;
   7.128 +			tail++;
   7.129 +		}
   7.130 +		tail = 0;
   7.131 +	}
   7.132 +	while (tail < head) {
   7.133 +		oprofile_add_pc(buf->event_log[tail].eip,
   7.134 +				buf->event_log[tail].mode,
   7.135 +				buf->event_log[tail].event);
   7.136 +		oprofile_samples++;
   7.137 +		tail++;
   7.138 +	}
   7.139 +
   7.140 +	buf->event_tail = tail;
   7.141 +
   7.142 +	return IRQ_HANDLED;
   7.143 +}
   7.144 +
   7.145 +
   7.146 +static void unbind_virq_cpu(void * info)
   7.147 +{
   7.148 +	int cpu = smp_processor_id();
   7.149 +	if (ovf_irq[cpu] >= 0) {
   7.150 +		unbind_from_irqhandler(ovf_irq[cpu], NULL);
   7.151 +		ovf_irq[cpu] = -1;
   7.152 +	}
   7.153 +}
   7.154 +
   7.155 +
   7.156 +static void unbind_virq(void)
   7.157 +{
   7.158 +	on_each_cpu(unbind_virq_cpu, NULL, 0, 1);
   7.159 +}
   7.160 +
   7.161 +
   7.162 +int bind_virq_error;
   7.163 +
   7.164 +static void bind_virq_cpu(void * info)
   7.165 +{
   7.166 +	int result;
   7.167 +	int cpu = smp_processor_id();
   7.168 +
   7.169 +	result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
   7.170 +					 cpu,
   7.171 +					 xenoprof_ovf_interrupt,
   7.172 +					 SA_INTERRUPT,
   7.173 +					 "xenoprof",
   7.174 +					 NULL);
   7.175 +
   7.176 +	if (result<0) {
   7.177 +		bind_virq_error = result;
   7.178 +		printk("xenoprof.c: binding VIRQ_XENOPROF to IRQ failed on CPU "
   7.179 +		       "%d\n", cpu);
   7.180 +	} else {
   7.181 +		ovf_irq[cpu] = result;
   7.182 +	}
   7.183 +}
   7.184 +
   7.185 +
   7.186 +static int bind_virq(void)
   7.187 +{
   7.188 +	bind_virq_error = 0;
   7.189 +	on_each_cpu(bind_virq_cpu, NULL, 0, 1);
   7.190 +	if (bind_virq_error) {
   7.191 +		unbind_virq();
   7.192 +		return bind_virq_error;
   7.193 +	} else {
   7.194 +		return 0;
   7.195 +	}
   7.196 +}
   7.197 +
   7.198 +
   7.199 +static int xenoprof_setup(void)
   7.200 +{
   7.201 +	int ret;
   7.202 +
   7.203 +	ret = bind_virq();
   7.204 +	if (ret)
   7.205 +		return ret;
   7.206 +
   7.207 +	if (is_primary) {
   7.208 +		ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters,
   7.209 +					     (unsigned long)NULL,
   7.210 +					     (unsigned long)NULL);
   7.211 +		if (ret)
   7.212 +			goto err;
   7.213 +
   7.214 +		ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events,
   7.215 +					     (unsigned long)&counter_config,
   7.216 +					     (unsigned long)num_events);
   7.217 +		if (ret)
   7.218 +			goto err;
   7.219 +	}
   7.220 +
   7.221 +	ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq,
   7.222 +				     (unsigned long)NULL,
   7.223 +				     (unsigned long)NULL);
   7.224 +	if (ret)
   7.225 +		goto err;
   7.226 +
   7.227 +	xenoprof_enabled = 1;
   7.228 +	return 0;
   7.229 + err:
   7.230 +	unbind_virq();
   7.231 +	return ret;
   7.232 +}
   7.233 +
   7.234 +
   7.235 +static void xenoprof_shutdown(void)
   7.236 +{
   7.237 +	xenoprof_enabled = 0;
   7.238 +
   7.239 +	HYPERVISOR_xenoprof_op(XENOPROF_disable_virq,
   7.240 +			       (unsigned long)NULL,
   7.241 +			       (unsigned long)NULL);
   7.242 +
   7.243 +	if (is_primary) {
   7.244 +		HYPERVISOR_xenoprof_op(XENOPROF_release_counters,
   7.245 +				       (unsigned long)NULL,
   7.246 +				       (unsigned long)NULL);
   7.247 +	}
   7.248 +
   7.249 +	unbind_virq();
   7.250 +}
   7.251 +
   7.252 +
   7.253 +static int xenoprof_start(void)
   7.254 +{
   7.255 +	int ret = 0;
   7.256 +
   7.257 +	if (is_primary)
   7.258 +		ret = HYPERVISOR_xenoprof_op(XENOPROF_start,
   7.259 +					     (unsigned long)NULL,
   7.260 +					     (unsigned long)NULL);
   7.261 +	return ret;
   7.262 +}
   7.263 +
   7.264 +
   7.265 +static void xenoprof_stop(void)
   7.266 +{
   7.267 +	if (is_primary)
   7.268 +		HYPERVISOR_xenoprof_op(XENOPROF_stop,
   7.269 +				       (unsigned long)NULL,
   7.270 +				       (unsigned long)NULL);
   7.271 +}
   7.272 +
   7.273 +
   7.274 +static int xenoprof_set_active(int * active_domains,
   7.275 +			  unsigned int adomains)
   7.276 +{
   7.277 +	int ret = 0;
   7.278 +	if (is_primary)
   7.279 +		ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active,
   7.280 +					     (unsigned long)active_domains,
   7.281 +					     (unsigned long)adomains);
   7.282 +	return ret;
   7.283 +}
   7.284 +
   7.285 +
   7.286 +struct op_counter_config counter_config[OP_MAX_COUNTER];
   7.287 +
   7.288 +static int xenoprof_create_files(struct super_block * sb, struct dentry * root)
   7.289 +{
   7.290 +	unsigned int i;
   7.291 +
   7.292 +	for (i = 0; i < num_events; ++i) {
   7.293 +		struct dentry * dir;
   7.294 +		char buf[2];
   7.295 + 
   7.296 +		snprintf(buf, 2, "%d", i);
   7.297 +		dir = oprofilefs_mkdir(sb, root, buf);
   7.298 +		oprofilefs_create_ulong(sb, dir, "enabled",
   7.299 +					&counter_config[i].enabled);
   7.300 +		oprofilefs_create_ulong(sb, dir, "event",
   7.301 +					&counter_config[i].event);
   7.302 +		oprofilefs_create_ulong(sb, dir, "count",
   7.303 +					&counter_config[i].count);
   7.304 +		oprofilefs_create_ulong(sb, dir, "unit_mask",
   7.305 +					&counter_config[i].unit_mask);
   7.306 +		oprofilefs_create_ulong(sb, dir, "kernel",
   7.307 +					&counter_config[i].kernel);
   7.308 +		oprofilefs_create_ulong(sb, dir, "user",
   7.309 +					&counter_config[i].user);
   7.310 +	}
   7.311 +
   7.312 +	return 0;
   7.313 +}
   7.314 +
   7.315 +
   7.316 +struct oprofile_operations xenoprof_ops = {
   7.317 +	.create_files 	= xenoprof_create_files,
   7.318 +	.set_active	= xenoprof_set_active,
   7.319 +	.setup 		= xenoprof_setup,
   7.320 +	.shutdown	= xenoprof_shutdown,
   7.321 +	.start		= xenoprof_start,
   7.322 +	.stop		= xenoprof_stop
   7.323 +};
   7.324 +
   7.325 +
   7.326 +/* in order to get driverfs right */
   7.327 +static int using_xenoprof;
   7.328 +
   7.329 +int __init oprofile_arch_init(struct oprofile_operations * ops)
   7.330 +{
   7.331 +	xenoprof_init_result_t result;
   7.332 +	xenoprof_buf_t * buf;
   7.333 +	int max_samples = 16;
   7.334 +	int vm_size;
   7.335 +	int npages;
   7.336 +	int i;
   7.337 +
   7.338 +	int ret = HYPERVISOR_xenoprof_op(XENOPROF_init,
   7.339 +					 (unsigned long)max_samples,
   7.340 +					 (unsigned long)&result);
   7.341 +
   7.342 +	if (!ret) {
   7.343 +		pgprot_t prot = __pgprot(_KERNPG_TABLE);
   7.344 +
   7.345 +		num_events = result.num_events;
   7.346 +		is_primary = result.is_primary;
   7.347 +		nbuf = result.nbuf;
   7.348 +
   7.349 +		npages = (result.bufsize * nbuf - 1) / PAGE_SIZE + 1;
   7.350 +		vm_size = npages * PAGE_SIZE;
   7.351 +
   7.352 +		shared_buffer = (char *) vm_map_xen_pages(result.buf_maddr,
   7.353 +							  vm_size, prot);
   7.354 +		if (!shared_buffer) {
   7.355 +			ret = -ENOMEM;
   7.356 +			goto out;
   7.357 +		}
   7.358 +
   7.359 +		for (i=0; i< nbuf; i++) {
   7.360 +			buf = (xenoprof_buf_t*) 
   7.361 +				&shared_buffer[i * result.bufsize];
   7.362 +			BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
   7.363 +			xenoprof_buf[buf->vcpu_id] = buf;
   7.364 +		}
   7.365 +
   7.366 +		/*  cpu_type is detected by Xen */
   7.367 +		cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
   7.368 +		strncpy(cpu_type, result.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
   7.369 +		xenoprof_ops.cpu_type = cpu_type;
   7.370 +
   7.371 +		init_driverfs();
   7.372 +		using_xenoprof = 1;
   7.373 +		*ops = xenoprof_ops;
   7.374 +
   7.375 +		for (i=0; i<NR_CPUS; i++)
   7.376 +			ovf_irq[i] = -1;
   7.377 +	}
   7.378 + out:
   7.379 +	printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, "
   7.380 +	       "is_primary %d\n", ret, num_events, is_primary);
   7.381 +	return ret;
   7.382 +}
   7.383 +
   7.384 +
   7.385 +void __exit oprofile_arch_exit(void)
   7.386 +{
   7.387 +	if (using_xenoprof)
   7.388 +		exit_driverfs();
   7.389 +
   7.390 +	if (shared_buffer) {
   7.391 +		vunmap(shared_buffer);
   7.392 +		shared_buffer = NULL;
   7.393 +	}
   7.394 +	if (is_primary)
   7.395 +		HYPERVISOR_xenoprof_op(XENOPROF_shutdown,
   7.396 +				       (unsigned long)NULL,
   7.397 +				       (unsigned long)NULL);
   7.398 +}
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile	Thu Apr 06 18:58:01 2006 +0100
     8.3 @@ -0,0 +1,22 @@
     8.4 +#
     8.5 +# oprofile for x86-64.
     8.6 +# Just reuse the one from i386. 
     8.7 +#
     8.8 +
     8.9 +obj-$(CONFIG_OPROFILE) += oprofile.o
    8.10 + 
    8.11 +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
    8.12 +	oprof.o cpu_buffer.o buffer_sync.o \
    8.13 +	event_buffer.o oprofile_files.o \
    8.14 +	oprofilefs.o oprofile_stats.o \
    8.15 +	timer_int.o )
    8.16 +
    8.17 +ifdef
    8.18 +OPROFILE-y := xenoprof.o
    8.19 +else
    8.20 +OPROFILE-y := init.o backtrace.o
    8.21 +OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o op_model_p4.o \
    8.22 +				     op_model_ppro.o
    8.23 +OPROFILE-$(CONFIG_X86_IO_APIC)    += nmi_timer_int.o 
    8.24 +endif
    8.25 +oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y))
     9.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Thu Apr 06 17:49:21 2006 +0100
     9.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Thu Apr 06 18:58:01 2006 +0100
     9.3 @@ -336,6 +336,14 @@ HYPERVISOR_callback_op(
     9.4  	return _hypercall2(int, callback_op, cmd, arg);
     9.5  }
     9.6  
     9.7 +static inline int
     9.8 +HYPERVISOR_xenoprof_op(
     9.9 +	int op, unsigned long arg1, unsigned long arg2)
    9.10 +{
    9.11 +	return _hypercall3(int, xenoprof_op, op, arg1, arg2);
    9.12 +}
    9.13 +
    9.14 +
    9.15  #endif /* __HYPERCALL_H__ */
    9.16  
    9.17  /*
    10.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Thu Apr 06 17:49:21 2006 +0100
    10.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Thu Apr 06 18:58:01 2006 +0100
    10.3 @@ -337,6 +337,13 @@ HYPERVISOR_callback_op(
    10.4  	return _hypercall2(int, callback_op, cmd, arg);
    10.5  }
    10.6  
    10.7 +static inline int
    10.8 +HYPERVISOR_xenoprof_op(
    10.9 +	int op, unsigned long arg1, unsigned long arg2)
   10.10 +{
   10.11 +	return _hypercall3(int, xenoprof_op, op, arg1, arg2);
   10.12 +}
   10.13 +
   10.14  #endif /* __HYPERCALL_H__ */
   10.15  
   10.16  /*
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/patches/linux-2.6.16/xenoprof-generic.patch	Thu Apr 06 18:58:01 2006 +0100
    11.3 @@ -0,0 +1,384 @@
    11.4 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c ./drivers/oprofile/buffer_sync.c
    11.5 +--- ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c	2006-03-20 05:53:29.000000000 +0000
    11.6 ++++ ./drivers/oprofile/buffer_sync.c	2006-04-03 15:53:05.000000000 +0100
    11.7 +@@ -6,6 +6,10 @@
    11.8 +  *
    11.9 +  * @author John Levon <levon@movementarian.org>
   11.10 +  *
   11.11 ++ * Modified by Aravind Menon for Xen
   11.12 ++ * These modifications are:
   11.13 ++ * Copyright (C) 2005 Hewlett-Packard Co.
   11.14 ++ *
   11.15 +  * This is the core of the buffer management. Each
   11.16 +  * CPU buffer is processed and entered into the
   11.17 +  * global event buffer. Such processing is necessary
   11.18 +@@ -275,15 +279,24 @@ static void add_cpu_switch(int i)
   11.19 + 	last_cookie = INVALID_COOKIE;
   11.20 + }
   11.21 + 
   11.22 +-static void add_kernel_ctx_switch(unsigned int in_kernel)
   11.23 ++static void add_cpu_mode_switch(unsigned int cpu_mode)
   11.24 + {
   11.25 + 	add_event_entry(ESCAPE_CODE);
   11.26 +-	if (in_kernel)
   11.27 +-		add_event_entry(KERNEL_ENTER_SWITCH_CODE); 
   11.28 +-	else
   11.29 +-		add_event_entry(KERNEL_EXIT_SWITCH_CODE); 
   11.30 ++	switch (cpu_mode) {
   11.31 ++	case CPU_MODE_USER:
   11.32 ++		add_event_entry(USER_ENTER_SWITCH_CODE);
   11.33 ++		break;
   11.34 ++	case CPU_MODE_KERNEL:
   11.35 ++		add_event_entry(KERNEL_ENTER_SWITCH_CODE);
   11.36 ++		break;
   11.37 ++	case CPU_MODE_XEN:
   11.38 ++		add_event_entry(XEN_ENTER_SWITCH_CODE);
   11.39 ++		break;
   11.40 ++	default:
   11.41 ++		break;
   11.42 ++	}
   11.43 + }
   11.44 +- 
   11.45 ++
   11.46 + static void
   11.47 + add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
   11.48 + {
   11.49 +@@ -348,9 +361,9 @@ static int add_us_sample(struct mm_struc
   11.50 +  * for later lookup from userspace.
   11.51 +  */
   11.52 + static int
   11.53 +-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
   11.54 ++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
   11.55 + {
   11.56 +-	if (in_kernel) {
   11.57 ++	if (cpu_mode >= CPU_MODE_KERNEL) {
   11.58 + 		add_sample_entry(s->eip, s->event);
   11.59 + 		return 1;
   11.60 + 	} else if (mm) {
   11.61 +@@ -496,7 +509,7 @@ void sync_buffer(int cpu)
   11.62 + 	struct mm_struct *mm = NULL;
   11.63 + 	struct task_struct * new;
   11.64 + 	unsigned long cookie = 0;
   11.65 +-	int in_kernel = 1;
   11.66 ++	int cpu_mode = 1;
   11.67 + 	unsigned int i;
   11.68 + 	sync_buffer_state state = sb_buffer_start;
   11.69 + 	unsigned long available;
   11.70 +@@ -513,12 +526,12 @@ void sync_buffer(int cpu)
   11.71 + 		struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
   11.72 +  
   11.73 + 		if (is_code(s->eip)) {
   11.74 +-			if (s->event <= CPU_IS_KERNEL) {
   11.75 ++			if (s->event <= CPU_MODE_XEN) {
   11.76 + 				/* kernel/userspace switch */
   11.77 +-				in_kernel = s->event;
   11.78 ++				cpu_mode = s->event;
   11.79 + 				if (state == sb_buffer_start)
   11.80 + 					state = sb_sample_start;
   11.81 +-				add_kernel_ctx_switch(s->event);
   11.82 ++				add_cpu_mode_switch(s->event);
   11.83 + 			} else if (s->event == CPU_TRACE_BEGIN) {
   11.84 + 				state = sb_bt_start;
   11.85 + 				add_trace_begin();
   11.86 +@@ -536,7 +549,7 @@ void sync_buffer(int cpu)
   11.87 + 			}
   11.88 + 		} else {
   11.89 + 			if (state >= sb_bt_start &&
   11.90 +-			    !add_sample(mm, s, in_kernel)) {
   11.91 ++			    !add_sample(mm, s, cpu_mode)) {
   11.92 + 				if (state == sb_bt_start) {
   11.93 + 					state = sb_bt_ignore;
   11.94 + 					atomic_inc(&oprofile_stats.bt_lost_no_mapping);
   11.95 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c ./drivers/oprofile/cpu_buffer.c
   11.96 +--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c	2006-03-20 05:53:29.000000000 +0000
   11.97 ++++ ./drivers/oprofile/cpu_buffer.c	2006-04-03 15:53:05.000000000 +0100
   11.98 +@@ -6,6 +6,10 @@
   11.99 +  *
  11.100 +  * @author John Levon <levon@movementarian.org>
  11.101 +  *
  11.102 ++ * Modified by Aravind Menon for Xen
  11.103 ++ * These modifications are:
  11.104 ++ * Copyright (C) 2005 Hewlett-Packard Co.
  11.105 ++ *
  11.106 +  * Each CPU has a local buffer that stores PC value/event
  11.107 +  * pairs. We also log context switches when we notice them.
  11.108 +  * Eventually each CPU's buffer is processed into the global
  11.109 +@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void)
  11.110 + 			goto fail;
  11.111 +  
  11.112 + 		b->last_task = NULL;
  11.113 +-		b->last_is_kernel = -1;
  11.114 ++		b->last_cpu_mode = -1;
  11.115 + 		b->tracing = 0;
  11.116 + 		b->buffer_size = buffer_size;
  11.117 + 		b->tail_pos = 0;
  11.118 +@@ -114,7 +118,7 @@ void cpu_buffer_reset(struct oprofile_cp
  11.119 + 	 * collected will populate the buffer with proper
  11.120 + 	 * values to initialize the buffer
  11.121 + 	 */
  11.122 +-	cpu_buf->last_is_kernel = -1;
  11.123 ++	cpu_buf->last_cpu_mode = -1;
  11.124 + 	cpu_buf->last_task = NULL;
  11.125 + }
  11.126 + 
  11.127 +@@ -164,13 +168,13 @@ add_code(struct oprofile_cpu_buffer * bu
  11.128 +  * because of the head/tail separation of the writer and reader
  11.129 +  * of the CPU buffer.
  11.130 +  *
  11.131 +- * is_kernel is needed because on some architectures you cannot
  11.132 ++ * cpu_mode is needed because on some architectures you cannot
  11.133 +  * tell if you are in kernel or user space simply by looking at
  11.134 +- * pc. We tag this in the buffer by generating kernel enter/exit
  11.135 +- * events whenever is_kernel changes
  11.136 ++ * pc. We tag this in the buffer by generating kernel/user (and xen)
  11.137 ++ *  enter events whenever cpu_mode changes
  11.138 +  */
  11.139 + static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
  11.140 +-		      int is_kernel, unsigned long event)
  11.141 ++		      int cpu_mode, unsigned long event)
  11.142 + {
  11.143 + 	struct task_struct * task;
  11.144 + 
  11.145 +@@ -181,16 +185,16 @@ static int log_sample(struct oprofile_cp
  11.146 + 		return 0;
  11.147 + 	}
  11.148 + 
  11.149 +-	is_kernel = !!is_kernel;
  11.150 ++	WARN_ON(cpu_mode > CPU_MODE_XEN);
  11.151 + 
  11.152 + 	task = current;
  11.153 + 
  11.154 + 	/* notice a switch from user->kernel or vice versa */
  11.155 +-	if (cpu_buf->last_is_kernel != is_kernel) {
  11.156 +-		cpu_buf->last_is_kernel = is_kernel;
  11.157 +-		add_code(cpu_buf, is_kernel);
  11.158 ++	if (cpu_buf->last_cpu_mode != cpu_mode) {
  11.159 ++		cpu_buf->last_cpu_mode = cpu_mode;
  11.160 ++		add_code(cpu_buf, cpu_mode);
  11.161 + 	}
  11.162 +-
  11.163 ++	
  11.164 + 	/* notice a task switch */
  11.165 + 	if (cpu_buf->last_task != task) {
  11.166 + 		cpu_buf->last_task = task;
  11.167 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h ./drivers/oprofile/cpu_buffer.h
  11.168 +--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h	2006-03-20 05:53:29.000000000 +0000
  11.169 ++++ ./drivers/oprofile/cpu_buffer.h	2006-04-03 15:53:05.000000000 +0100
  11.170 +@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
  11.171 + 	volatile unsigned long tail_pos;
  11.172 + 	unsigned long buffer_size;
  11.173 + 	struct task_struct * last_task;
  11.174 +-	int last_is_kernel;
  11.175 ++	int last_cpu_mode;
  11.176 + 	int tracing;
  11.177 + 	struct op_sample * buffer;
  11.178 + 	unsigned long sample_received;
  11.179 +@@ -51,7 +51,9 @@ extern struct oprofile_cpu_buffer cpu_bu
  11.180 + void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
  11.181 + 
  11.182 + /* transient events for the CPU buffer -> event buffer */
  11.183 +-#define CPU_IS_KERNEL 1
  11.184 +-#define CPU_TRACE_BEGIN 2
  11.185 ++#define CPU_MODE_USER    0
  11.186 ++#define CPU_MODE_KERNEL  1
  11.187 ++#define CPU_MODE_XEN     2
  11.188 ++#define CPU_TRACE_BEGIN  3
  11.189 + 
  11.190 + #endif /* OPROFILE_CPU_BUFFER_H */
  11.191 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h ./drivers/oprofile/event_buffer.h
  11.192 +--- ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h	2006-03-20 05:53:29.000000000 +0000
  11.193 ++++ ./drivers/oprofile/event_buffer.h	2006-04-03 15:53:05.000000000 +0100
  11.194 +@@ -29,11 +29,12 @@ void wake_up_buffer_waiter(void);
  11.195 + #define CPU_SWITCH_CODE 		2
  11.196 + #define COOKIE_SWITCH_CODE 		3
  11.197 + #define KERNEL_ENTER_SWITCH_CODE	4
  11.198 +-#define KERNEL_EXIT_SWITCH_CODE		5
  11.199 ++#define USER_ENTER_SWITCH_CODE		5
  11.200 + #define MODULE_LOADED_CODE		6
  11.201 + #define CTX_TGID_CODE			7
  11.202 + #define TRACE_BEGIN_CODE		8
  11.203 + #define TRACE_END_CODE			9
  11.204 ++#define XEN_ENTER_SWITCH_CODE		10
  11.205 +  
  11.206 + #define INVALID_COOKIE ~0UL
  11.207 + #define NO_COOKIE 0UL
  11.208 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.c ./drivers/oprofile/oprof.c
  11.209 +--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.c	2006-03-20 05:53:29.000000000 +0000
  11.210 ++++ ./drivers/oprofile/oprof.c	2006-04-03 15:53:05.000000000 +0100
  11.211 +@@ -5,6 +5,10 @@
  11.212 +  * @remark Read the file COPYING
  11.213 +  *
  11.214 +  * @author John Levon <levon@movementarian.org>
  11.215 ++ *
  11.216 ++ * Modified by Aravind Menon for Xen
  11.217 ++ * These modifications are:
  11.218 ++ * Copyright (C) 2005 Hewlett-Packard Co.
  11.219 +  */
  11.220 + 
  11.221 + #include <linux/kernel.h>
  11.222 +@@ -19,7 +23,7 @@
  11.223 + #include "cpu_buffer.h"
  11.224 + #include "buffer_sync.h"
  11.225 + #include "oprofile_stats.h"
  11.226 +- 
  11.227 ++
  11.228 + struct oprofile_operations oprofile_ops;
  11.229 + 
  11.230 + unsigned long oprofile_started;
  11.231 +@@ -33,6 +37,17 @@ static DECLARE_MUTEX(start_sem);
  11.232 +  */
  11.233 + static int timer = 0;
  11.234 + 
  11.235 ++extern unsigned int adomains;
  11.236 ++extern int active_domains[MAX_OPROF_DOMAINS];
  11.237 ++
  11.238 ++int oprofile_set_active(void)
  11.239 ++{
  11.240 ++	if (oprofile_ops.set_active)
  11.241 ++		return oprofile_ops.set_active(active_domains, adomains);
  11.242 ++
  11.243 ++	return -EINVAL;
  11.244 ++}
  11.245 ++
  11.246 + int oprofile_setup(void)
  11.247 + {
  11.248 + 	int err;
  11.249 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.h ./drivers/oprofile/oprof.h
  11.250 +--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.h	2006-03-20 05:53:29.000000000 +0000
  11.251 ++++ ./drivers/oprofile/oprof.h	2006-04-03 15:53:05.000000000 +0100
  11.252 +@@ -35,5 +35,7 @@ void oprofile_create_files(struct super_
  11.253 + void oprofile_timer_init(struct oprofile_operations * ops);
  11.254 + 
  11.255 + int oprofile_set_backtrace(unsigned long depth);
  11.256 ++
  11.257 ++int oprofile_set_active(void);
  11.258 +  
  11.259 + #endif /* OPROF_H */
  11.260 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c ./drivers/oprofile/oprofile_files.c
  11.261 +--- ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c	2006-03-20 05:53:29.000000000 +0000
  11.262 ++++ ./drivers/oprofile/oprofile_files.c	2006-04-03 15:53:05.000000000 +0100
  11.263 +@@ -5,15 +5,21 @@
  11.264 +  * @remark Read the file COPYING
  11.265 +  *
  11.266 +  * @author John Levon <levon@movementarian.org>
  11.267 ++ *
  11.268 ++ * Modified by Aravind Menon for Xen
  11.269 ++ * These modifications are:
  11.270 ++ * Copyright (C) 2005 Hewlett-Packard Co.	
  11.271 +  */
  11.272 + 
  11.273 + #include <linux/fs.h>
  11.274 + #include <linux/oprofile.h>
  11.275 ++#include <asm/uaccess.h>
  11.276 ++#include <linux/ctype.h>
  11.277 + 
  11.278 + #include "event_buffer.h"
  11.279 + #include "oprofile_stats.h"
  11.280 + #include "oprof.h"
  11.281 +- 
  11.282 ++
  11.283 + unsigned long fs_buffer_size = 131072;
  11.284 + unsigned long fs_cpu_buffer_size = 8192;
  11.285 + unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
  11.286 +@@ -117,11 +123,79 @@ static ssize_t dump_write(struct file * 
  11.287 + static struct file_operations dump_fops = {
  11.288 + 	.write		= dump_write,
  11.289 + };
  11.290 +- 
  11.291 ++
  11.292 ++#define TMPBUFSIZE 512
  11.293 ++
  11.294 ++unsigned int adomains = 0;
  11.295 ++long active_domains[MAX_OPROF_DOMAINS];
  11.296 ++
  11.297 ++static ssize_t adomain_write(struct file * file, char const __user * buf, 
  11.298 ++			     size_t count, loff_t * offset)
  11.299 ++{
  11.300 ++	char tmpbuf[TMPBUFSIZE];
  11.301 ++	char * startp = tmpbuf;
  11.302 ++	char * endp = tmpbuf;
  11.303 ++	int i;
  11.304 ++	unsigned long val;
  11.305 ++	
  11.306 ++	if (*offset)
  11.307 ++		return -EINVAL;	
  11.308 ++	if (!count)
  11.309 ++		return 0;
  11.310 ++	if (count > TMPBUFSIZE - 1)
  11.311 ++		return -EINVAL;
  11.312 ++
  11.313 ++	memset(tmpbuf, 0x0, TMPBUFSIZE);
  11.314 ++
  11.315 ++	if (copy_from_user(tmpbuf, buf, count))
  11.316 ++		return -EFAULT;
  11.317 ++	
  11.318 ++	for (i = 0; i < MAX_OPROF_DOMAINS; i++)
  11.319 ++		active_domains[i] = -1;
  11.320 ++	adomains = 0;
  11.321 ++
  11.322 ++	while (1) {
  11.323 ++		val = simple_strtol(startp, &endp, 0);
  11.324 ++		if (endp == startp)
  11.325 ++			break;
  11.326 ++		while (ispunct(*endp))
  11.327 ++			endp++;
  11.328 ++		active_domains[adomains++] = val;
  11.329 ++		if (adomains >= MAX_OPROF_DOMAINS)
  11.330 ++			break;
  11.331 ++		startp = endp;
  11.332 ++	}
  11.333 ++	if (oprofile_set_active())
  11.334 ++		return -EINVAL; 
  11.335 ++	return count;
  11.336 ++}
  11.337 ++
  11.338 ++static ssize_t adomain_read(struct file * file, char __user * buf, 
  11.339 ++			    size_t count, loff_t * offset)
  11.340 ++{
  11.341 ++	char tmpbuf[TMPBUFSIZE];
  11.342 ++	size_t len = 0;
  11.343 ++	int i;
  11.344 ++	/* This is all screwed up if we run out of space */
  11.345 ++	for (i = 0; i < adomains; i++) 
  11.346 ++		len += snprintf(tmpbuf + len, TMPBUFSIZE - len, 
  11.347 ++				"%u ", (unsigned int)active_domains[i]);
  11.348 ++	len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n");
  11.349 ++	return simple_read_from_buffer((void __user *)buf, count, 
  11.350 ++				       offset, tmpbuf, len);
  11.351 ++}
  11.352 ++
  11.353 ++
  11.354 ++static struct file_operations active_domain_ops = {
  11.355 ++	.read		= adomain_read,
  11.356 ++	.write		= adomain_write,
  11.357 ++};
  11.358 ++
  11.359 + void oprofile_create_files(struct super_block * sb, struct dentry * root)
  11.360 + {
  11.361 + 	oprofilefs_create_file(sb, root, "enable", &enable_fops);
  11.362 + 	oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
  11.363 ++	oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
  11.364 + 	oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
  11.365 + 	oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
  11.366 + 	oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
  11.367 +diff -pruN ../pristine-linux-2.6.16/include/linux/oprofile.h ./include/linux/oprofile.h
  11.368 +--- ../pristine-linux-2.6.16/include/linux/oprofile.h	2006-03-20 05:53:29.000000000 +0000
  11.369 ++++ ./include/linux/oprofile.h	2006-04-03 15:53:05.000000000 +0100
  11.370 +@@ -16,6 +16,8 @@
  11.371 + #include <linux/types.h>
  11.372 + #include <linux/spinlock.h>
  11.373 + #include <asm/atomic.h>
  11.374 ++
  11.375 ++#include <xen/interface/xenoprof.h>
  11.376 +  
  11.377 + struct super_block;
  11.378 + struct dentry;
  11.379 +@@ -27,6 +29,8 @@ struct oprofile_operations {
  11.380 + 	/* create any necessary configuration files in the oprofile fs.
  11.381 + 	 * Optional. */
  11.382 + 	int (*create_files)(struct super_block * sb, struct dentry * root);
  11.383 ++	/* setup active domains with Xen */
  11.384 ++	int (*set_active)(int *active_domains, unsigned int adomains);
  11.385 + 	/* Do any necessary interrupt setup. Optional. */
  11.386 + 	int (*setup)(void);
  11.387 + 	/* Do any necessary interrupt shutdown. Optional. */
    12.1 --- a/xen/arch/x86/Makefile	Thu Apr 06 17:49:21 2006 +0100
    12.2 +++ b/xen/arch/x86/Makefile	Thu Apr 06 18:58:01 2006 +0100
    12.3 @@ -2,6 +2,7 @@ subdir-y += acpi
    12.4  subdir-y += cpu
    12.5  subdir-y += genapic
    12.6  subdir-y += hvm
    12.7 +subdir-y += oprofile
    12.8  
    12.9  subdir-$(x86_32) += x86_32
   12.10  subdir-$(x86_64) += x86_64
    13.1 --- a/xen/arch/x86/domain.c	Thu Apr 06 17:49:21 2006 +0100
    13.2 +++ b/xen/arch/x86/domain.c	Thu Apr 06 18:58:01 2006 +0100
    13.3 @@ -915,6 +915,8 @@ static void relinquish_memory(struct dom
    13.4      spin_unlock_recursive(&d->page_alloc_lock);
    13.5  }
    13.6  
    13.7 +extern void free_xenoprof_pages(struct domain *d);
    13.8 +
    13.9  void domain_relinquish_resources(struct domain *d)
   13.10  {
   13.11      struct vcpu *v;
   13.12 @@ -961,6 +963,10 @@ void domain_relinquish_resources(struct 
   13.13      /* Relinquish every page of memory. */
   13.14      relinquish_memory(d, &d->xenpage_list);
   13.15      relinquish_memory(d, &d->page_list);
   13.16 +
   13.17 +    /* Free page used by xen oprofile buffer */
   13.18 +    free_xenoprof_pages(d);
   13.19 +
   13.20  }
   13.21  
   13.22  void arch_dump_domain_info(struct domain *d)
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/xen/arch/x86/oprofile/Makefile	Thu Apr 06 18:58:01 2006 +0100
    14.3 @@ -0,0 +1,5 @@
    14.4 +obj-y += xenoprof.o
    14.5 +obj-y += nmi_int.o
    14.6 +obj-y += op_model_p4.o
    14.7 +obj-y += op_model_ppro.o
    14.8 +obj-y += op_model_athlon.o
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/xen/arch/x86/oprofile/nmi_int.c	Thu Apr 06 18:58:01 2006 +0100
    15.3 @@ -0,0 +1,399 @@
    15.4 +/**
    15.5 + * @file nmi_int.c
    15.6 + *
    15.7 + * @remark Copyright 2002 OProfile authors
    15.8 + * @remark Read the file COPYING
    15.9 + *
   15.10 + * @author John Levon <levon@movementarian.org>
   15.11 + *
   15.12 + * Modified for Xen: by Aravind Menon & Jose Renato Santos
   15.13 + *   These modifications are:
   15.14 + *   Copyright (C) 2005 Hewlett-Packard Co.
   15.15 + */
   15.16 +
   15.17 +#include <xen/event.h>
   15.18 +#include <xen/types.h>
   15.19 +#include <xen/errno.h>
   15.20 +#include <xen/init.h>
   15.21 +#include <public/xen.h>
   15.22 +#include <asm/nmi.h>
   15.23 +#include <asm/msr.h>
   15.24 +#include <asm/apic.h>
   15.25 +#include <asm/regs.h>
   15.26 +#include <asm/current.h>
   15.27 +#include <xen/delay.h>
   15.28 + 
   15.29 +#include "op_counter.h"
   15.30 +#include "op_x86_model.h"
   15.31 + 
   15.32 +static struct op_x86_model_spec const * model;
   15.33 +static struct op_msrs cpu_msrs[NR_CPUS];
   15.34 +static unsigned long saved_lvtpc[NR_CPUS];
   15.35 +
   15.36 +#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1)
   15.37 +extern int active_domains[MAX_OPROF_DOMAINS];
   15.38 +extern unsigned int adomains;
   15.39 +extern struct domain *primary_profiler;
   15.40 +extern struct domain *adomain_ptrs[MAX_OPROF_DOMAINS];
   15.41 +extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE];
   15.42 +extern int is_active(struct domain *d);
   15.43 +extern int active_id(struct domain *d);
   15.44 +extern int is_profiled(struct domain *d);
   15.45 +
   15.46 +extern size_t strlcpy(char *dest, const char *src, size_t size);
   15.47 +
   15.48 +
   15.49 +int nmi_callback(struct cpu_user_regs *regs, int cpu)
   15.50 +{
   15.51 +	int xen_mode = 0;
   15.52 +	int ovf;
   15.53 +
   15.54 +	ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs);
   15.55 +	xen_mode = ring_0(regs);
   15.56 +	if ( ovf )
   15.57 +	{
   15.58 +		if ( is_active(current->domain) )
   15.59 +		{
   15.60 +			if ( !xen_mode )
   15.61 +			{
   15.62 +				send_guest_vcpu_virq(current, VIRQ_XENOPROF);
   15.63 +			} 
   15.64 +		}
   15.65 +	}
   15.66 +	return 1;
   15.67 +}
   15.68 + 
   15.69 + 
   15.70 +static void nmi_cpu_save_registers(struct op_msrs * msrs)
   15.71 +{
   15.72 +	unsigned int const nr_ctrs = model->num_counters;
   15.73 +	unsigned int const nr_ctrls = model->num_controls; 
   15.74 +	struct op_msr * counters = msrs->counters;
   15.75 +	struct op_msr * controls = msrs->controls;
   15.76 +	unsigned int i;
   15.77 +
   15.78 +	for (i = 0; i < nr_ctrs; ++i) {
   15.79 +		rdmsr(counters[i].addr,
   15.80 +			counters[i].saved.low,
   15.81 +			counters[i].saved.high);
   15.82 +	}
   15.83 + 
   15.84 +	for (i = 0; i < nr_ctrls; ++i) {
   15.85 +		rdmsr(controls[i].addr,
   15.86 +			controls[i].saved.low,
   15.87 +			controls[i].saved.high);
   15.88 +	}
   15.89 +}
   15.90 +
   15.91 +
   15.92 +static void nmi_save_registers(void * dummy)
   15.93 +{
   15.94 +	int cpu = smp_processor_id();
   15.95 +	struct op_msrs * msrs = &cpu_msrs[cpu];
   15.96 +	model->fill_in_addresses(msrs);
   15.97 +	nmi_cpu_save_registers(msrs);
   15.98 +}
   15.99 +
  15.100 +
  15.101 +static void free_msrs(void)
  15.102 +{
  15.103 +	int i;
  15.104 +	for (i = 0; i < NR_CPUS; ++i) {
  15.105 +		xfree(cpu_msrs[i].counters);
  15.106 +		cpu_msrs[i].counters = NULL;
  15.107 +		xfree(cpu_msrs[i].controls);
  15.108 +		cpu_msrs[i].controls = NULL;
  15.109 +	}
  15.110 +}
  15.111 +
  15.112 +
  15.113 +static int allocate_msrs(void)
  15.114 +{
  15.115 +	int success = 1;
  15.116 +	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
  15.117 +	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
  15.118 +
  15.119 +	int i;
  15.120 +	for (i = 0; i < NR_CPUS; ++i) {
  15.121 +		if (!test_bit(i, &cpu_online_map))
  15.122 +			continue;
  15.123 +
  15.124 +		cpu_msrs[i].counters = xmalloc_bytes(counters_size);
  15.125 +		if (!cpu_msrs[i].counters) {
  15.126 +			success = 0;
  15.127 +			break;
  15.128 +		}
  15.129 +		cpu_msrs[i].controls = xmalloc_bytes(controls_size);
  15.130 +		if (!cpu_msrs[i].controls) {
  15.131 +			success = 0;
  15.132 +			break;
  15.133 +		}
  15.134 +	}
  15.135 +
  15.136 +	if (!success)
  15.137 +		free_msrs();
  15.138 +
  15.139 +	return success;
  15.140 +}
  15.141 +
  15.142 +
  15.143 +static void nmi_cpu_setup(void * dummy)
  15.144 +{
  15.145 +	int cpu = smp_processor_id();
  15.146 +	struct op_msrs * msrs = &cpu_msrs[cpu];
  15.147 +	model->setup_ctrs(msrs);
  15.148 +}
  15.149 +
  15.150 +
  15.151 +int nmi_setup_events(void)
  15.152 +{
  15.153 +	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
  15.154 +	return 0;
  15.155 +}
  15.156 +
  15.157 +int nmi_reserve_counters(void)
  15.158 +{
  15.159 +	if (!allocate_msrs())
  15.160 +		return -ENOMEM;
  15.161 +
  15.162 +	/* We walk a thin line between law and rape here.
  15.163 +	 * We need to be careful to install our NMI handler
  15.164 +	 * without actually triggering any NMIs as this will
  15.165 +	 * break the core code horrifically.
  15.166 +	 */
  15.167 +	if (reserve_lapic_nmi() < 0) {
  15.168 +		free_msrs();
  15.169 +		return -EBUSY;
  15.170 +	}
  15.171 +	/* We need to serialize save and setup for HT because the subset
  15.172 +	 * of msrs are distinct for save and setup operations
  15.173 +	 */
  15.174 +	on_each_cpu(nmi_save_registers, NULL, 0, 1);
  15.175 + 	return 0;
  15.176 +}
  15.177 +
  15.178 +int nmi_enable_virq(void)
  15.179 +{
  15.180 +	set_nmi_callback(nmi_callback);
  15.181 +	return 0;
  15.182 +}
  15.183 +
  15.184 +
  15.185 +void nmi_disable_virq(void)
  15.186 +{
  15.187 +	unset_nmi_callback();
  15.188 +} 
  15.189 +
  15.190 +
  15.191 +static void nmi_restore_registers(struct op_msrs * msrs)
  15.192 +{
  15.193 +	unsigned int const nr_ctrs = model->num_counters;
  15.194 +	unsigned int const nr_ctrls = model->num_controls; 
  15.195 +	struct op_msr * counters = msrs->counters;
  15.196 +	struct op_msr * controls = msrs->controls;
  15.197 +	unsigned int i;
  15.198 +
  15.199 +	for (i = 0; i < nr_ctrls; ++i) {
  15.200 +		wrmsr(controls[i].addr,
  15.201 +			controls[i].saved.low,
  15.202 +			controls[i].saved.high);
  15.203 +	}
  15.204 + 
  15.205 +	for (i = 0; i < nr_ctrs; ++i) {
  15.206 +		wrmsr(counters[i].addr,
  15.207 +			counters[i].saved.low,
  15.208 +			counters[i].saved.high);
  15.209 +	}
  15.210 +}
  15.211 + 
  15.212 +
  15.213 +static void nmi_cpu_shutdown(void * dummy)
  15.214 +{
  15.215 +	int cpu = smp_processor_id();
  15.216 +	struct op_msrs * msrs = &cpu_msrs[cpu];
  15.217 +	nmi_restore_registers(msrs);
  15.218 +}
  15.219 +
  15.220 + 
  15.221 +void nmi_release_counters(void)
  15.222 +{
  15.223 +	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
  15.224 +	release_lapic_nmi();
  15.225 +	free_msrs();
  15.226 +}
  15.227 +
  15.228 + 
  15.229 +static void nmi_cpu_start(void * dummy)
  15.230 +{
  15.231 +	int cpu = smp_processor_id();
  15.232 +	struct op_msrs const * msrs = &cpu_msrs[cpu];
  15.233 +	saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
  15.234 +	apic_write(APIC_LVTPC, APIC_DM_NMI);
  15.235 +	model->start(msrs);
  15.236 +}
  15.237 + 
  15.238 +
  15.239 +int nmi_start(void)
  15.240 +{
  15.241 +	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
  15.242 +	return 0;
  15.243 +}
  15.244 + 
  15.245 + 
  15.246 +static void nmi_cpu_stop(void * dummy)
  15.247 +{
  15.248 +	unsigned int v;
  15.249 +	int cpu = smp_processor_id();
  15.250 +	struct op_msrs const * msrs = &cpu_msrs[cpu];
  15.251 +	model->stop(msrs);
  15.252 +
  15.253 +	/* restoring APIC_LVTPC can trigger an apic error because the delivery
  15.254 +	 * mode and vector nr combination can be illegal. That's by design: on
  15.255 +	 * power on apic lvt contain a zero vector nr which are legal only for
  15.256 +	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
  15.257 +	 */
  15.258 +	if ( !(apic_read(APIC_LVTPC) & APIC_DM_NMI)
  15.259 +	     || (apic_read(APIC_LVTPC) & APIC_LVT_MASKED) )
  15.260 +	{
  15.261 +		printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC));
  15.262 +		mdelay(5000);
  15.263 +	}
  15.264 +	v = apic_read(APIC_LVTERR);
  15.265 +	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
  15.266 +	apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
  15.267 +	apic_write(APIC_LVTERR, v);
  15.268 +}
  15.269 + 
  15.270 + 
  15.271 +void nmi_stop(void)
  15.272 +{
  15.273 +	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
  15.274 +}
  15.275 +
  15.276 +
  15.277 +struct op_counter_config counter_config[OP_MAX_COUNTER];
  15.278 +
  15.279 +static int __init p4_init(char * cpu_type)
  15.280 +{ 
  15.281 +	__u8 cpu_model = current_cpu_data.x86_model;
  15.282 +
  15.283 +	if (cpu_model > 4)
  15.284 +		return 0;
  15.285 +
  15.286 +#ifndef CONFIG_SMP
  15.287 +	strncpy (cpu_type, "i386/p4", XENOPROF_CPU_TYPE_SIZE - 1);
  15.288 +	model = &op_p4_spec;
  15.289 +	return 1;
  15.290 +#else
  15.291 +	switch (smp_num_siblings) {
  15.292 +		case 1:
  15.293 +			strncpy (cpu_type, "i386/p4", 
  15.294 +				 XENOPROF_CPU_TYPE_SIZE - 1);
  15.295 +			model = &op_p4_spec;
  15.296 +			return 1;
  15.297 +
  15.298 +		case 2:
  15.299 +			strncpy (cpu_type, "i386/p4-ht", 
  15.300 +				 XENOPROF_CPU_TYPE_SIZE - 1);
  15.301 +			model = &op_p4_ht2_spec;
  15.302 +			return 1;
  15.303 +	}
  15.304 +#endif
  15.305 +	printk("Xenoprof ERROR: P4 HyperThreading detected with > 2 threads\n");
  15.306 +
  15.307 +	return 0;
  15.308 +}
  15.309 +
  15.310 +
  15.311 +static int __init ppro_init(char *cpu_type)
  15.312 +{
  15.313 +	__u8 cpu_model = current_cpu_data.x86_model;
  15.314 +
  15.315 +	if (cpu_model > 0xd)
  15.316 +		return 0;
  15.317 +
  15.318 +	if (cpu_model == 9) {
  15.319 +		strncpy (cpu_type, "i386/p6_mobile", XENOPROF_CPU_TYPE_SIZE - 1);
  15.320 +	} else if (cpu_model > 5) {
  15.321 +		strncpy (cpu_type, "i386/piii", XENOPROF_CPU_TYPE_SIZE - 1);
  15.322 +	} else if (cpu_model > 2) {
  15.323 +		strncpy (cpu_type, "i386/pii", XENOPROF_CPU_TYPE_SIZE - 1);
  15.324 +	} else {
  15.325 +		strncpy (cpu_type, "i386/ppro", XENOPROF_CPU_TYPE_SIZE - 1);
  15.326 +	}
  15.327 +
  15.328 +	model = &op_ppro_spec;
  15.329 +	return 1;
  15.330 +}
  15.331 +
  15.332 +int nmi_init(int *num_events, int *is_primary, char *cpu_type)
  15.333 +{
  15.334 +	__u8 vendor = current_cpu_data.x86_vendor;
  15.335 +	__u8 family = current_cpu_data.x86;
  15.336 +	int prim = 0;
  15.337 + 
  15.338 +	if (!cpu_has_apic)
  15.339 +		return -ENODEV;
  15.340 +
  15.341 +	if (primary_profiler == NULL) {
  15.342 +		/* For now, only dom0 can be the primary profiler */
  15.343 +		if (current->domain->domain_id == 0) {
  15.344 +			primary_profiler = current->domain;
  15.345 +			prim = 1;
  15.346 +		}
  15.347 +	}
  15.348 + 
  15.349 +	/* Make sure string is NULL terminated */
  15.350 +	cpu_type[XENOPROF_CPU_TYPE_SIZE - 1] = 0;
  15.351 +
  15.352 +	switch (vendor) {
  15.353 +		case X86_VENDOR_AMD:
  15.354 +			/* Needs to be at least an Athlon (or hammer in 32bit mode) */
  15.355 +
  15.356 +			switch (family) {
  15.357 +			default:
  15.358 +				return -ENODEV;
  15.359 +			case 6:
  15.360 +				model = &op_athlon_spec;
  15.361 +				strncpy (cpu_type, "i386/athlon", 
  15.362 +					 XENOPROF_CPU_TYPE_SIZE - 1);
  15.363 +				break;
  15.364 +			case 0xf:
  15.365 +				model = &op_athlon_spec;
  15.366 +				/* Actually it could be i386/hammer too, but give
  15.367 +				   user space an consistent name. */
  15.368 +				strncpy (cpu_type, "x86-64/hammer", 
  15.369 +					 XENOPROF_CPU_TYPE_SIZE - 1);
  15.370 +				break;
  15.371 +			}
  15.372 +			break;
  15.373 + 
  15.374 +		case X86_VENDOR_INTEL:
  15.375 +			switch (family) {
  15.376 +				/* Pentium IV */
  15.377 +				case 0xf:
  15.378 +					if (!p4_init(cpu_type))
  15.379 +						return -ENODEV;
  15.380 +					break;
  15.381 +
  15.382 +				/* A P6-class processor */
  15.383 +				case 6:
  15.384 +					if (!ppro_init(cpu_type))
  15.385 +						return -ENODEV;
  15.386 +					break;
  15.387 +
  15.388 +				default:
  15.389 +					return -ENODEV;
  15.390 +			}
  15.391 +			break;
  15.392 +
  15.393 +		default:
  15.394 +			return -ENODEV;
  15.395 +	}
  15.396 +
  15.397 +	*num_events = model->num_counters;
  15.398 +	*is_primary = prim;
  15.399 +
  15.400 +	return 0;
  15.401 +}
  15.402 +
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xen/arch/x86/oprofile/op_counter.h	Thu Apr 06 18:58:01 2006 +0100
    16.3 @@ -0,0 +1,29 @@
    16.4 +/**
    16.5 + * @file op_counter.h
    16.6 + *
    16.7 + * @remark Copyright 2002 OProfile authors
    16.8 + * @remark Read the file COPYING
    16.9 + *
   16.10 + * @author John Levon
   16.11 + */
   16.12 + 
   16.13 +#ifndef OP_COUNTER_H
   16.14 +#define OP_COUNTER_H
   16.15 + 
   16.16 +#define OP_MAX_COUNTER 8
   16.17 + 
   16.18 +/* Per-perfctr configuration as set via
   16.19 + * oprofilefs.
   16.20 + */
   16.21 +struct op_counter_config {
   16.22 +        unsigned long count;
   16.23 +        unsigned long enabled;
   16.24 +        unsigned long event;
   16.25 +        unsigned long kernel;
   16.26 +        unsigned long user;
   16.27 +        unsigned long unit_mask;
   16.28 +};
   16.29 +
   16.30 +extern struct op_counter_config counter_config[];
   16.31 +
   16.32 +#endif /* OP_COUNTER_H */
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/xen/arch/x86/oprofile/op_model_athlon.c	Thu Apr 06 18:58:01 2006 +0100
    17.3 @@ -0,0 +1,168 @@
    17.4 +/**
    17.5 + * @file op_model_athlon.h
    17.6 + * athlon / K7 model-specific MSR operations
    17.7 + *
    17.8 + * @remark Copyright 2002 OProfile authors
    17.9 + * @remark Read the file COPYING
   17.10 + *
   17.11 + * @author John Levon
   17.12 + * @author Philippe Elie
   17.13 + * @author Graydon Hoare
   17.14 + */
   17.15 +
   17.16 +#include <xen/types.h>
   17.17 +#include <asm/msr.h>
   17.18 +#include <asm/io.h>
   17.19 +#include <asm/apic.h>
   17.20 +#include <asm/processor.h>
   17.21 +#include <xen/sched.h>
   17.22 +#include <asm/regs.h>
   17.23 +#include <asm/current.h>
   17.24 + 
   17.25 +#include "op_x86_model.h"
   17.26 +#include "op_counter.h"
   17.27 +
   17.28 +#define NUM_COUNTERS 4
   17.29 +#define NUM_CONTROLS 4
   17.30 +
   17.31 +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
   17.32 +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
   17.33 +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
   17.34 +
   17.35 +#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
   17.36 +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
   17.37 +#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
   17.38 +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
   17.39 +#define CTRL_CLEAR(x) (x &= (1<<21))
   17.40 +#define CTRL_SET_ENABLE(val) (val |= 1<<20)
   17.41 +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
   17.42 +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
   17.43 +#define CTRL_SET_UM(val, m) (val |= (m << 8))
   17.44 +#define CTRL_SET_EVENT(val, e) (val |= e)
   17.45 +
   17.46 +static unsigned long reset_value[NUM_COUNTERS];
   17.47 +
   17.48 +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
   17.49 +			       int mode, int event);
   17.50 + 
   17.51 +static void athlon_fill_in_addresses(struct op_msrs * const msrs)
   17.52 +{
   17.53 +	msrs->counters[0].addr = MSR_K7_PERFCTR0;
   17.54 +	msrs->counters[1].addr = MSR_K7_PERFCTR1;
   17.55 +	msrs->counters[2].addr = MSR_K7_PERFCTR2;
   17.56 +	msrs->counters[3].addr = MSR_K7_PERFCTR3;
   17.57 +
   17.58 +	msrs->controls[0].addr = MSR_K7_EVNTSEL0;
   17.59 +	msrs->controls[1].addr = MSR_K7_EVNTSEL1;
   17.60 +	msrs->controls[2].addr = MSR_K7_EVNTSEL2;
   17.61 +	msrs->controls[3].addr = MSR_K7_EVNTSEL3;
   17.62 +}
   17.63 +
   17.64 + 
   17.65 +static void athlon_setup_ctrs(struct op_msrs const * const msrs)
   17.66 +{
   17.67 +	unsigned int low, high;
   17.68 +	int i;
   17.69 + 
   17.70 +	/* clear all counters */
   17.71 +	for (i = 0 ; i < NUM_CONTROLS; ++i) {
   17.72 +		CTRL_READ(low, high, msrs, i);
   17.73 +		CTRL_CLEAR(low);
   17.74 +		CTRL_WRITE(low, high, msrs, i);
   17.75 +	}
   17.76 +	
   17.77 +	/* avoid a false detection of ctr overflows in NMI handler */
   17.78 +	for (i = 0; i < NUM_COUNTERS; ++i) {
   17.79 +		CTR_WRITE(1, msrs, i);
   17.80 +	}
   17.81 +
   17.82 +	/* enable active counters */
   17.83 +	for (i = 0; i < NUM_COUNTERS; ++i) {
   17.84 +		if (counter_config[i].enabled) {
   17.85 +			reset_value[i] = counter_config[i].count;
   17.86 +
   17.87 +			CTR_WRITE(counter_config[i].count, msrs, i);
   17.88 +
   17.89 +			CTRL_READ(low, high, msrs, i);
   17.90 +			CTRL_CLEAR(low);
   17.91 +			CTRL_SET_ENABLE(low);
   17.92 +			CTRL_SET_USR(low, counter_config[i].user);
   17.93 +			CTRL_SET_KERN(low, counter_config[i].kernel);
   17.94 +			CTRL_SET_UM(low, counter_config[i].unit_mask);
   17.95 +			CTRL_SET_EVENT(low, counter_config[i].event);
   17.96 +			CTRL_WRITE(low, high, msrs, i);
   17.97 +		} else {
   17.98 +			reset_value[i] = 0;
   17.99 +		}
  17.100 +	}
  17.101 +}
  17.102 +
  17.103 + 
  17.104 +static int athlon_check_ctrs(unsigned int const cpu,
  17.105 +                             struct op_msrs const * const msrs,
  17.106 +                             struct cpu_user_regs * const regs)
  17.107 +
  17.108 +{
  17.109 +	unsigned int low, high;
  17.110 +	int i;
  17.111 +	int ovf = 0;
  17.112 +	unsigned long eip = regs->eip;
  17.113 +	int mode = 0;
  17.114 +
  17.115 +	if (guest_kernel_mode(current, regs))
  17.116 +		mode = 1;
  17.117 +	else if (ring_0(regs))
  17.118 +		mode = 2;
  17.119 +
  17.120 +	for (i = 0 ; i < NUM_COUNTERS; ++i) {
  17.121 +		CTR_READ(low, high, msrs, i);
  17.122 +		if (CTR_OVERFLOWED(low)) {
  17.123 +			xenoprof_log_event(current, eip, mode, i);
  17.124 +			CTR_WRITE(reset_value[i], msrs, i);
  17.125 +			ovf = 1;
  17.126 +		}
  17.127 +	}
  17.128 +
  17.129 +	/* See op_model_ppro.c */
  17.130 +	return ovf;
  17.131 +}
  17.132 +
  17.133 + 
  17.134 +static void athlon_start(struct op_msrs const * const msrs)
  17.135 +{
  17.136 +	unsigned int low, high;
  17.137 +	int i;
  17.138 +	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
  17.139 +		if (reset_value[i]) {
  17.140 +			CTRL_READ(low, high, msrs, i);
  17.141 +			CTRL_SET_ACTIVE(low);
  17.142 +			CTRL_WRITE(low, high, msrs, i);
  17.143 +		}
  17.144 +	}
  17.145 +}
  17.146 +
  17.147 +
  17.148 +static void athlon_stop(struct op_msrs const * const msrs)
  17.149 +{
  17.150 +	unsigned int low,high;
  17.151 +	int i;
  17.152 +
  17.153 +	/* Subtle: stop on all counters to avoid race with
  17.154 +	 * setting our pm callback */
  17.155 +	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
  17.156 +		CTRL_READ(low, high, msrs, i);
  17.157 +		CTRL_SET_INACTIVE(low);
  17.158 +		CTRL_WRITE(low, high, msrs, i);
  17.159 +	}
  17.160 +}
  17.161 +
  17.162 +
  17.163 +struct op_x86_model_spec const op_athlon_spec = {
  17.164 +	.num_counters = NUM_COUNTERS,
  17.165 +	.num_controls = NUM_CONTROLS,
  17.166 +	.fill_in_addresses = &athlon_fill_in_addresses,
  17.167 +	.setup_ctrs = &athlon_setup_ctrs,
  17.168 +	.check_ctrs = &athlon_check_ctrs,
  17.169 +	.start = &athlon_start,
  17.170 +	.stop = &athlon_stop
  17.171 +};
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/xen/arch/x86/oprofile/op_model_p4.c	Thu Apr 06 18:58:01 2006 +0100
    18.3 @@ -0,0 +1,739 @@
    18.4 +/**
    18.5 + * @file op_model_p4.c
    18.6 + * P4 model-specific MSR operations
    18.7 + *
    18.8 + * @remark Copyright 2002 OProfile authors
    18.9 + * @remark Read the file COPYING
   18.10 + *
   18.11 + * @author Graydon Hoare
   18.12 + */
   18.13 +
   18.14 +#include <xen/types.h>
   18.15 +#include <asm/msr.h>
   18.16 +#include <asm/io.h>
   18.17 +#include <asm/apic.h>
   18.18 +#include <asm/processor.h>
   18.19 +#include <xen/sched.h>
   18.20 +#include <asm/regs.h>
   18.21 +#include <asm/current.h>
   18.22 +
   18.23 +#include "op_x86_model.h"
   18.24 +#include "op_counter.h"
   18.25 +
   18.26 +#define NUM_EVENTS 39
   18.27 +
   18.28 +#define NUM_COUNTERS_NON_HT 8
   18.29 +#define NUM_ESCRS_NON_HT 45
   18.30 +#define NUM_CCCRS_NON_HT 18
   18.31 +#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
   18.32 +
   18.33 +#define NUM_COUNTERS_HT2 4
   18.34 +#define NUM_ESCRS_HT2 23
   18.35 +#define NUM_CCCRS_HT2 9
   18.36 +#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
   18.37 +
   18.38 +static unsigned int num_counters = NUM_COUNTERS_NON_HT;
   18.39 +
   18.40 +
   18.41 +/* this has to be checked dynamically since the
   18.42 +   hyper-threadedness of a chip is discovered at
   18.43 +   kernel boot-time. */
   18.44 +static inline void setup_num_counters(void)
   18.45 +{
   18.46 +#ifdef CONFIG_SMP
   18.47 +	if (smp_num_siblings == 2)
   18.48 +		num_counters = NUM_COUNTERS_HT2;
   18.49 +#endif
   18.50 +}
   18.51 +
   18.52 +static int inline addr_increment(void)
   18.53 +{
   18.54 +#ifdef CONFIG_SMP
   18.55 +	return smp_num_siblings == 2 ? 2 : 1;
   18.56 +#else
   18.57 +	return 1;
   18.58 +#endif
   18.59 +}
   18.60 +
   18.61 +
   18.62 +/* tables to simulate simplified hardware view of p4 registers */
   18.63 +struct p4_counter_binding {
   18.64 +	int virt_counter;
   18.65 +	int counter_address;
   18.66 +	int cccr_address;
   18.67 +};
   18.68 +
   18.69 +struct p4_event_binding {
   18.70 +	int escr_select;  /* value to put in CCCR */
   18.71 +	int event_select; /* value to put in ESCR */
   18.72 +	struct {
   18.73 +		int virt_counter; /* for this counter... */
   18.74 +		int escr_address; /* use this ESCR       */
   18.75 +	} bindings[2];
   18.76 +};
   18.77 +
   18.78 +/* nb: these CTR_* defines are a duplicate of defines in
   18.79 +   event/i386.p4*events. */
   18.80 +
   18.81 +
   18.82 +#define CTR_BPU_0      (1 << 0)
   18.83 +#define CTR_MS_0       (1 << 1)
   18.84 +#define CTR_FLAME_0    (1 << 2)
   18.85 +#define CTR_IQ_4       (1 << 3)
   18.86 +#define CTR_BPU_2      (1 << 4)
   18.87 +#define CTR_MS_2       (1 << 5)
   18.88 +#define CTR_FLAME_2    (1 << 6)
   18.89 +#define CTR_IQ_5       (1 << 7)
   18.90 +
   18.91 +static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
   18.92 +	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
   18.93 +	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
   18.94 +	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
   18.95 +	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
   18.96 +	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
   18.97 +	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
   18.98 +	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
   18.99 +	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
  18.100 +};
  18.101 +
  18.102 +#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
  18.103 +
  18.104 +/* All cccr we don't use. */
  18.105 +static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
  18.106 +	MSR_P4_BPU_CCCR1,	MSR_P4_BPU_CCCR3,
  18.107 +	MSR_P4_MS_CCCR1,	MSR_P4_MS_CCCR3,
  18.108 +	MSR_P4_FLAME_CCCR1,	MSR_P4_FLAME_CCCR3,
  18.109 +	MSR_P4_IQ_CCCR0,	MSR_P4_IQ_CCCR1,
  18.110 +	MSR_P4_IQ_CCCR2,	MSR_P4_IQ_CCCR3
  18.111 +};
  18.112 +
  18.113 +/* p4 event codes in libop/op_event.h are indices into this table. */
  18.114 +
  18.115 +static struct p4_event_binding p4_events[NUM_EVENTS] = {
  18.116 +	
  18.117 +	{ /* BRANCH_RETIRED */
  18.118 +		0x05, 0x06, 
  18.119 +		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
  18.120 +		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  18.121 +	},
  18.122 +	
  18.123 +	{ /* MISPRED_BRANCH_RETIRED */
  18.124 +		0x04, 0x03, 
  18.125 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
  18.126 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
  18.127 +	},
  18.128 +	
  18.129 +	{ /* TC_DELIVER_MODE */
  18.130 +		0x01, 0x01,
  18.131 +		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
  18.132 +		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
  18.133 +	},
  18.134 +	
  18.135 +	{ /* BPU_FETCH_REQUEST */
  18.136 +		0x00, 0x03, 
  18.137 +		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
  18.138 +		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
  18.139 +	},
  18.140 +
  18.141 +	{ /* ITLB_REFERENCE */
  18.142 +		0x03, 0x18,
  18.143 +		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
  18.144 +		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
  18.145 +	},
  18.146 +
  18.147 +	{ /* MEMORY_CANCEL */
  18.148 +		0x05, 0x02,
  18.149 +		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
  18.150 +		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
  18.151 +	},
  18.152 +
  18.153 +	{ /* MEMORY_COMPLETE */
  18.154 +		0x02, 0x08,
  18.155 +		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
  18.156 +		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
  18.157 +	},
  18.158 +
  18.159 +	{ /* LOAD_PORT_REPLAY */
  18.160 +		0x02, 0x04, 
  18.161 +		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
  18.162 +		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
  18.163 +	},
  18.164 +
  18.165 +	{ /* STORE_PORT_REPLAY */
  18.166 +		0x02, 0x05,
  18.167 +		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
  18.168 +		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
  18.169 +	},
  18.170 +
  18.171 +	{ /* MOB_LOAD_REPLAY */
  18.172 +		0x02, 0x03,
  18.173 +		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
  18.174 +		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
  18.175 +	},
  18.176 +
  18.177 +	{ /* PAGE_WALK_TYPE */
  18.178 +		0x04, 0x01,
  18.179 +		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
  18.180 +		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
  18.181 +	},
  18.182 +
  18.183 +	{ /* BSQ_CACHE_REFERENCE */
  18.184 +		0x07, 0x0c, 
  18.185 +		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
  18.186 +		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
  18.187 +	},
  18.188 +
  18.189 +	{ /* IOQ_ALLOCATION */
  18.190 +		0x06, 0x03, 
  18.191 +		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
  18.192 +		  { 0, 0 } }
  18.193 +	},
  18.194 +
  18.195 +	{ /* IOQ_ACTIVE_ENTRIES */
  18.196 +		0x06, 0x1a, 
  18.197 +		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
  18.198 +		  { 0, 0 } }
  18.199 +	},
  18.200 +
  18.201 +	{ /* FSB_DATA_ACTIVITY */
  18.202 +		0x06, 0x17, 
  18.203 +		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
  18.204 +		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
  18.205 +	},
  18.206 +
  18.207 +	{ /* BSQ_ALLOCATION */
  18.208 +		0x07, 0x05, 
  18.209 +		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
  18.210 +		  { 0, 0 } }
  18.211 +	},
  18.212 +
  18.213 +	{ /* BSQ_ACTIVE_ENTRIES */
  18.214 +		0x07, 0x06,
  18.215 +		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
  18.216 +		  { 0, 0 } }
  18.217 +	},
  18.218 +
  18.219 +	{ /* X87_ASSIST */
  18.220 +		0x05, 0x03, 
  18.221 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  18.222 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  18.223 +	},
  18.224 +
  18.225 +	{ /* SSE_INPUT_ASSIST */
  18.226 +		0x01, 0x34,
  18.227 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  18.228 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  18.229 +	},
  18.230 +  
  18.231 +	{ /* PACKED_SP_UOP */
  18.232 +		0x01, 0x08, 
  18.233 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  18.234 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  18.235 +	},
  18.236 +  
  18.237 +	{ /* PACKED_DP_UOP */
  18.238 +		0x01, 0x0c, 
  18.239 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  18.240 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  18.241 +	},
  18.242 +
  18.243 +	{ /* SCALAR_SP_UOP */
  18.244 +		0x01, 0x0a, 
  18.245 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  18.246 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  18.247 +	},
  18.248 +
  18.249 +	{ /* SCALAR_DP_UOP */
  18.250 +		0x01, 0x0e,
  18.251 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  18.252 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  18.253 +	},
  18.254 +
  18.255 +	{ /* 64BIT_MMX_UOP */
  18.256 +		0x01, 0x02, 
  18.257 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  18.258 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  18.259 +	},
  18.260 +  
  18.261 +	{ /* 128BIT_MMX_UOP */
  18.262 +		0x01, 0x1a, 
  18.263 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  18.264 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  18.265 +	},
  18.266 +
  18.267 +	{ /* X87_FP_UOP */
  18.268 +		0x01, 0x04, 
  18.269 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  18.270 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  18.271 +	},
  18.272 +  
  18.273 +	{ /* X87_SIMD_MOVES_UOP */
  18.274 +		0x01, 0x2e, 
  18.275 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  18.276 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  18.277 +	},
  18.278 +  
  18.279 +	{ /* MACHINE_CLEAR */
  18.280 +		0x05, 0x02, 
  18.281 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  18.282 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  18.283 +	},
  18.284 +
  18.285 +	{ /* GLOBAL_POWER_EVENTS */
  18.286 +		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
  18.287 +		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
  18.288 +		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
  18.289 +	},
  18.290 +  
  18.291 +	{ /* TC_MS_XFER */
  18.292 +		0x00, 0x05, 
  18.293 +		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
  18.294 +		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
  18.295 +	},
  18.296 +
  18.297 +	{ /* UOP_QUEUE_WRITES */
  18.298 +		0x00, 0x09,
  18.299 +		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
  18.300 +		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
  18.301 +	},
  18.302 +
  18.303 +	{ /* FRONT_END_EVENT */
  18.304 +		0x05, 0x08,
  18.305 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  18.306 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  18.307 +	},
  18.308 +
  18.309 +	{ /* EXECUTION_EVENT */
  18.310 +		0x05, 0x0c,
  18.311 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  18.312 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  18.313 +	},
  18.314 +
  18.315 +	{ /* REPLAY_EVENT */
  18.316 +		0x05, 0x09,
  18.317 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  18.318 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  18.319 +	},
  18.320 +
  18.321 +	{ /* INSTR_RETIRED */
  18.322 +		0x04, 0x02, 
  18.323 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
  18.324 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
  18.325 +	},
  18.326 +
  18.327 +	{ /* UOPS_RETIRED */
  18.328 +		0x04, 0x01,
  18.329 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
  18.330 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
  18.331 +	},
  18.332 +
  18.333 +	{ /* UOP_TYPE */    
  18.334 +		0x02, 0x02, 
  18.335 +		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
  18.336 +		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
  18.337 +	},
  18.338 +
  18.339 +	{ /* RETIRED_MISPRED_BRANCH_TYPE */
  18.340 +		0x02, 0x05, 
  18.341 +		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
  18.342 +		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
  18.343 +	},
  18.344 +
  18.345 +	{ /* RETIRED_BRANCH_TYPE */
  18.346 +		0x02, 0x04,
  18.347 +		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
  18.348 +		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
  18.349 +	}
  18.350 +};
  18.351 +
  18.352 +
  18.353 +#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
  18.354 +
  18.355 +#define ESCR_RESERVED_BITS 0x80000003
  18.356 +#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
  18.357 +#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
  18.358 +#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
  18.359 +#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
  18.360 +#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
  18.361 +#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
  18.362 +#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
  18.363 +#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
  18.364 +#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
  18.365 +
  18.366 +#define CCCR_RESERVED_BITS 0x38030FFF
  18.367 +#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
  18.368 +#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
  18.369 +#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
  18.370 +#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
  18.371 +#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
  18.372 +#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
  18.373 +#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
  18.374 +#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
  18.375 +#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
  18.376 +#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
  18.377 +#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
  18.378 +
  18.379 +#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
  18.380 +#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
  18.381 +#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
  18.382 +
  18.383 +
  18.384 +/* this assigns a "stagger" to the current CPU, which is used throughout
  18.385 +   the code in this module as an extra array offset, to select the "even"
  18.386 +   or "odd" part of all the divided resources. */
  18.387 +static unsigned int get_stagger(void)
  18.388 +{
  18.389 +#ifdef CONFIG_SMP
  18.390 +	int cpu = smp_processor_id();
  18.391 +	return (cpu != first_cpu(cpu_sibling_map[cpu]));
  18.392 +#endif	
  18.393 +	return 0;
  18.394 +}
  18.395 +
  18.396 +
  18.397 +/* finally, mediate access to a real hardware counter
  18.398 +   by passing a "virtual" counter numer to this macro,
  18.399 +   along with your stagger setting. */
  18.400 +#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
  18.401 +
  18.402 +static unsigned long reset_value[NUM_COUNTERS_NON_HT];
  18.403 +
  18.404 +
  18.405 +static void p4_fill_in_addresses(struct op_msrs * const msrs)
  18.406 +{
  18.407 +	unsigned int i; 
  18.408 +	unsigned int addr, stag;
  18.409 +
  18.410 +	setup_num_counters();
  18.411 +	stag = get_stagger();
  18.412 +
  18.413 +	/* the counter registers we pay attention to */
  18.414 +	for (i = 0; i < num_counters; ++i) {
  18.415 +		msrs->counters[i].addr = 
  18.416 +			p4_counters[VIRT_CTR(stag, i)].counter_address;
  18.417 +	}
  18.418 +
  18.419 +	/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
  18.420 +
  18.421 +	/* 18 CCCR registers */
  18.422 +	for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
  18.423 +	     addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
  18.424 +		msrs->controls[i].addr = addr;
  18.425 +	}
  18.426 +	
  18.427 +	/* 43 ESCR registers in three or four discontiguous group */
  18.428 +	for (addr = MSR_P4_BSU_ESCR0 + stag;
  18.429 +	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
  18.430 +		msrs->controls[i].addr = addr;
  18.431 +	}
  18.432 +
  18.433 +	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
  18.434 +	 * to avoid special case in nmi_{save|restore}_registers() */
  18.435 +	if (boot_cpu_data.x86_model >= 0x3) {
  18.436 +		for (addr = MSR_P4_BSU_ESCR0 + stag;
  18.437 +		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
  18.438 +			msrs->controls[i].addr = addr;
  18.439 +		}
  18.440 +	} else {
  18.441 +		for (addr = MSR_P4_IQ_ESCR0 + stag;
  18.442 +		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
  18.443 +			msrs->controls[i].addr = addr;
  18.444 +		}
  18.445 +	}
  18.446 +
  18.447 +	for (addr = MSR_P4_RAT_ESCR0 + stag;
  18.448 +	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
  18.449 +		msrs->controls[i].addr = addr;
  18.450 +	}
  18.451 +	
  18.452 +	for (addr = MSR_P4_MS_ESCR0 + stag;
  18.453 +	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
  18.454 +		msrs->controls[i].addr = addr;
  18.455 +	}
  18.456 +	
  18.457 +	for (addr = MSR_P4_IX_ESCR0 + stag;
  18.458 +	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
  18.459 +		msrs->controls[i].addr = addr;
  18.460 +	}
  18.461 +
  18.462 +	/* there are 2 remaining non-contiguously located ESCRs */
  18.463 +
  18.464 +	if (num_counters == NUM_COUNTERS_NON_HT) {		
  18.465 +		/* standard non-HT CPUs handle both remaining ESCRs*/
  18.466 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
  18.467 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
  18.468 +
  18.469 +	} else if (stag == 0) {
  18.470 +		/* HT CPUs give the first remainder to the even thread, as
  18.471 +		   the 32nd control register */
  18.472 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
  18.473 +
  18.474 +	} else {
  18.475 +		/* and two copies of the second to the odd thread,
  18.476 +		   for the 22st and 23nd control registers */
  18.477 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
  18.478 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
  18.479 +	}
  18.480 +}
  18.481 +
  18.482 +
  18.483 +static void pmc_setup_one_p4_counter(unsigned int ctr)
  18.484 +{
  18.485 +	int i;
  18.486 +	int const maxbind = 2;
  18.487 +	unsigned int cccr = 0;
  18.488 +	unsigned int escr = 0;
  18.489 +	unsigned int high = 0;
  18.490 +	unsigned int counter_bit;
  18.491 +	struct p4_event_binding *ev = NULL;
  18.492 +	unsigned int stag;
  18.493 +
  18.494 +	stag = get_stagger();
  18.495 +	
  18.496 +	/* convert from counter *number* to counter *bit* */
  18.497 +	counter_bit = 1 << VIRT_CTR(stag, ctr);
  18.498 +	
  18.499 +	/* find our event binding structure. */
  18.500 +	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
  18.501 +		printk(KERN_ERR 
  18.502 +		       "oprofile: P4 event code 0x%lx out of range\n", 
  18.503 +		       counter_config[ctr].event);
  18.504 +		return;
  18.505 +	}
  18.506 +	
  18.507 +	ev = &(p4_events[counter_config[ctr].event - 1]);
  18.508 +	
  18.509 +	for (i = 0; i < maxbind; i++) {
  18.510 +		if (ev->bindings[i].virt_counter & counter_bit) {
  18.511 +
  18.512 +			/* modify ESCR */
  18.513 +			ESCR_READ(escr, high, ev, i);
  18.514 +			ESCR_CLEAR(escr);
  18.515 +			if (stag == 0) {
  18.516 +				ESCR_SET_USR_0(escr, counter_config[ctr].user);
  18.517 +				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
  18.518 +			} else {
  18.519 +				ESCR_SET_USR_1(escr, counter_config[ctr].user);
  18.520 +				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
  18.521 +			}
  18.522 +			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
  18.523 +			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
  18.524 +			ESCR_WRITE(escr, high, ev, i);
  18.525 +		       
  18.526 +			/* modify CCCR */
  18.527 +			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
  18.528 +			CCCR_CLEAR(cccr);
  18.529 +			CCCR_SET_REQUIRED_BITS(cccr);
  18.530 +			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
  18.531 +			if (stag == 0) {
  18.532 +				CCCR_SET_PMI_OVF_0(cccr);
  18.533 +			} else {
  18.534 +				CCCR_SET_PMI_OVF_1(cccr);
  18.535 +			}
  18.536 +			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
  18.537 +			return;
  18.538 +		}
  18.539 +	}
  18.540 +
  18.541 +	printk(KERN_ERR 
  18.542 +	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
  18.543 +	       counter_config[ctr].event, stag, ctr);
  18.544 +}
  18.545 +
  18.546 +
  18.547 +static void p4_setup_ctrs(struct op_msrs const * const msrs)
  18.548 +{
  18.549 +	unsigned int i;
  18.550 +	unsigned int low, high;
  18.551 +	unsigned int addr;
  18.552 +	unsigned int stag;
  18.553 +
  18.554 +	stag = get_stagger();
  18.555 +
  18.556 +	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
  18.557 +	if (! MISC_PMC_ENABLED_P(low)) {
  18.558 +		printk(KERN_ERR "oprofile: P4 PMC not available\n");
  18.559 +		return;
  18.560 +	}
  18.561 +
  18.562 +	/* clear the cccrs we will use */
  18.563 +	for (i = 0 ; i < num_counters ; i++) {
  18.564 +		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
  18.565 +		CCCR_CLEAR(low);
  18.566 +		CCCR_SET_REQUIRED_BITS(low);
  18.567 +		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
  18.568 +	}
  18.569 +
  18.570 +	/* clear cccrs outside our concern */
  18.571 +	for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
  18.572 +		rdmsr(p4_unused_cccr[i], low, high);
  18.573 +		CCCR_CLEAR(low);
  18.574 +		CCCR_SET_REQUIRED_BITS(low);
  18.575 +		wrmsr(p4_unused_cccr[i], low, high);
  18.576 +	}
  18.577 +
  18.578 +	/* clear all escrs (including those outside our concern) */
  18.579 +	for (addr = MSR_P4_BSU_ESCR0 + stag;
  18.580 +	     addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
  18.581 +		wrmsr(addr, 0, 0);
  18.582 +	}
  18.583 +
  18.584 +	/* On older models clear also MSR_P4_IQ_ESCR0/1 */
  18.585 +	if (boot_cpu_data.x86_model < 0x3) {
  18.586 +		wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
  18.587 +		wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
  18.588 +	}
  18.589 +
  18.590 +	for (addr = MSR_P4_RAT_ESCR0 + stag;
  18.591 +	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
  18.592 +		wrmsr(addr, 0, 0);
  18.593 +	}
  18.594 +	
  18.595 +	for (addr = MSR_P4_MS_ESCR0 + stag;
  18.596 +	     addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
  18.597 +		wrmsr(addr, 0, 0);
  18.598 +	}
  18.599 +	
  18.600 +	for (addr = MSR_P4_IX_ESCR0 + stag;
  18.601 +	     addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
  18.602 +		wrmsr(addr, 0, 0);
  18.603 +	}
  18.604 +
  18.605 +	if (num_counters == NUM_COUNTERS_NON_HT) {		
  18.606 +		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
  18.607 +		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
  18.608 +	} else if (stag == 0) {
  18.609 +		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
  18.610 +	} else {
  18.611 +		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
  18.612 +	}		
  18.613 +	
  18.614 +	/* setup all counters */
  18.615 +	for (i = 0 ; i < num_counters ; ++i) {
  18.616 +		if (counter_config[i].enabled) {
  18.617 +			reset_value[i] = counter_config[i].count;
  18.618 +			pmc_setup_one_p4_counter(i);
  18.619 +			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
  18.620 +		} else {
  18.621 +			reset_value[i] = 0;
  18.622 +		}
  18.623 +	}
  18.624 +}
  18.625 +
  18.626 +
  18.627 +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
  18.628 +			       int mode, int event);
  18.629 +
  18.630 +static int p4_check_ctrs(unsigned int const cpu,
  18.631 +                         struct op_msrs const * const msrs,
  18.632 +                         struct cpu_user_regs * const regs)
  18.633 +{
  18.634 +	unsigned long ctr, low, high, stag, real;
  18.635 +	int i;
  18.636 +	int ovf = 0;
  18.637 +	unsigned long eip = regs->eip;
  18.638 +	int mode = 0;
  18.639 +
  18.640 +	if (guest_kernel_mode(current, regs))
  18.641 +		mode = 1;
  18.642 +	else if (ring_0(regs))
  18.643 +		mode = 2;
  18.644 +
  18.645 +	stag = get_stagger();
  18.646 +
  18.647 +	for (i = 0; i < num_counters; ++i) {
  18.648 +		
  18.649 +		if (!reset_value[i]) 
  18.650 +			continue;
  18.651 +
  18.652 +		/* 
  18.653 +		 * there is some eccentricity in the hardware which
  18.654 +		 * requires that we perform 2 extra corrections:
  18.655 +		 *
  18.656 +		 * - check both the CCCR:OVF flag for overflow and the
  18.657 +		 *   counter high bit for un-flagged overflows.
  18.658 +		 *
  18.659 +		 * - write the counter back twice to ensure it gets
  18.660 +		 *   updated properly.
  18.661 +		 * 
  18.662 +		 * the former seems to be related to extra NMIs happening
  18.663 +		 * during the current NMI; the latter is reported as errata
  18.664 +		 * N15 in intel doc 249199-029, pentium 4 specification
  18.665 +		 * update, though their suggested work-around does not
  18.666 +		 * appear to solve the problem.
  18.667 +		 */
  18.668 +		
  18.669 +		real = VIRT_CTR(stag, i);
  18.670 +
  18.671 +		CCCR_READ(low, high, real);
  18.672 + 		CTR_READ(ctr, high, real);
  18.673 +		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
  18.674 +			xenoprof_log_event(current, eip, mode, i);
  18.675 + 			CTR_WRITE(reset_value[i], real);
  18.676 +			CCCR_CLEAR_OVF(low);
  18.677 +			CCCR_WRITE(low, high, real);
  18.678 + 			CTR_WRITE(reset_value[i], real);
  18.679 +			ovf = 1;
  18.680 +		}
  18.681 +	}
  18.682 +
  18.683 +	/* P4 quirk: you have to re-unmask the apic vector */
  18.684 +	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  18.685 +
  18.686 +	return ovf;
  18.687 +}
  18.688 +
  18.689 +
  18.690 +static void p4_start(struct op_msrs const * const msrs)
  18.691 +{
  18.692 +	unsigned int low, high, stag;
  18.693 +	int i;
  18.694 +
  18.695 +	stag = get_stagger();
  18.696 +
  18.697 +	for (i = 0; i < num_counters; ++i) {
  18.698 +		if (!reset_value[i])
  18.699 +			continue;
  18.700 +		CCCR_READ(low, high, VIRT_CTR(stag, i));
  18.701 +		CCCR_SET_ENABLE(low);
  18.702 +		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
  18.703 +	}
  18.704 +}
  18.705 +
  18.706 +
  18.707 +static void p4_stop(struct op_msrs const * const msrs)
  18.708 +{
  18.709 +	unsigned int low, high, stag;
  18.710 +	int i;
  18.711 +
  18.712 +	stag = get_stagger();
  18.713 +
  18.714 +	for (i = 0; i < num_counters; ++i) {
  18.715 +		CCCR_READ(low, high, VIRT_CTR(stag, i));
  18.716 +		CCCR_SET_DISABLE(low);
  18.717 +		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
  18.718 +	}
  18.719 +}
  18.720 +
  18.721 +
  18.722 +#ifdef CONFIG_SMP
  18.723 +struct op_x86_model_spec const op_p4_ht2_spec = {
  18.724 +	.num_counters = NUM_COUNTERS_HT2,
  18.725 +	.num_controls = NUM_CONTROLS_HT2,
  18.726 +	.fill_in_addresses = &p4_fill_in_addresses,
  18.727 +	.setup_ctrs = &p4_setup_ctrs,
  18.728 +	.check_ctrs = &p4_check_ctrs,
  18.729 +	.start = &p4_start,
  18.730 +	.stop = &p4_stop
  18.731 +};
  18.732 +#endif
  18.733 +
  18.734 +struct op_x86_model_spec const op_p4_spec = {
  18.735 +	.num_counters = NUM_COUNTERS_NON_HT,
  18.736 +	.num_controls = NUM_CONTROLS_NON_HT,
  18.737 +	.fill_in_addresses = &p4_fill_in_addresses,
  18.738 +	.setup_ctrs = &p4_setup_ctrs,
  18.739 +	.check_ctrs = &p4_check_ctrs,
  18.740 +	.start = &p4_start,
  18.741 +	.stop = &p4_stop
  18.742 +};
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/xen/arch/x86/oprofile/op_model_ppro.c	Thu Apr 06 18:58:01 2006 +0100
    19.3 @@ -0,0 +1,153 @@
    19.4 +/**
    19.5 + * @file op_model_ppro.h
    19.6 + * pentium pro / P6 model-specific MSR operations
    19.7 + *
    19.8 + * @remark Copyright 2002 OProfile authors
    19.9 + * @remark Read the file COPYING
   19.10 + *
   19.11 + * @author John Levon
   19.12 + * @author Philippe Elie
   19.13 + * @author Graydon Hoare
   19.14 + */
   19.15 +
   19.16 +#include <xen/types.h>
   19.17 +#include <asm/msr.h>
   19.18 +#include <asm/io.h>
   19.19 +#include <asm/apic.h>
   19.20 +#include <asm/processor.h>
   19.21 +#include <xen/sched.h>
   19.22 +#include <asm/regs.h>
   19.23 +#include <asm/current.h>
   19.24 + 
   19.25 +#include "op_x86_model.h"
   19.26 +#include "op_counter.h"
   19.27 +
   19.28 +#define NUM_COUNTERS 2
   19.29 +#define NUM_CONTROLS 2
   19.30 +
   19.31 +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
   19.32 +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
   19.33 +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
   19.34 +
   19.35 +#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
   19.36 +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
   19.37 +#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
   19.38 +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
   19.39 +#define CTRL_CLEAR(x) (x &= (1<<21))
   19.40 +#define CTRL_SET_ENABLE(val) (val |= 1<<20)
   19.41 +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
   19.42 +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
   19.43 +#define CTRL_SET_UM(val, m) (val |= (m << 8))
   19.44 +#define CTRL_SET_EVENT(val, e) (val |= e)
   19.45 +
   19.46 +static unsigned long reset_value[NUM_COUNTERS];
   19.47 + 
   19.48 +static void ppro_fill_in_addresses(struct op_msrs * const msrs)
   19.49 +{
   19.50 +	msrs->counters[0].addr = MSR_P6_PERFCTR0;
   19.51 +	msrs->counters[1].addr = MSR_P6_PERFCTR1;
   19.52 +	
   19.53 +	msrs->controls[0].addr = MSR_P6_EVNTSEL0;
   19.54 +	msrs->controls[1].addr = MSR_P6_EVNTSEL1;
   19.55 +}
   19.56 +
   19.57 +
   19.58 +static void ppro_setup_ctrs(struct op_msrs const * const msrs)
   19.59 +{
   19.60 +	unsigned int low, high;
   19.61 +	int i;
   19.62 +
   19.63 +	/* clear all counters */
   19.64 +	for (i = 0 ; i < NUM_CONTROLS; ++i) {
   19.65 +		CTRL_READ(low, high, msrs, i);
   19.66 +		CTRL_CLEAR(low);
   19.67 +		CTRL_WRITE(low, high, msrs, i);
   19.68 +	}
   19.69 +	
   19.70 +	/* avoid a false detection of ctr overflows in NMI handler */
   19.71 +	for (i = 0; i < NUM_COUNTERS; ++i) {
   19.72 +		CTR_WRITE(1, msrs, i);
   19.73 +	}
   19.74 +
   19.75 +	/* enable active counters */
   19.76 +	for (i = 0; i < NUM_COUNTERS; ++i) {
   19.77 +		if (counter_config[i].enabled) {
   19.78 +			reset_value[i] = counter_config[i].count;
   19.79 +
   19.80 +			CTR_WRITE(counter_config[i].count, msrs, i);
   19.81 +
   19.82 +			CTRL_READ(low, high, msrs, i);
   19.83 +			CTRL_CLEAR(low);
   19.84 +			CTRL_SET_ENABLE(low);
   19.85 +			CTRL_SET_USR(low, counter_config[i].user);
   19.86 +			CTRL_SET_KERN(low, counter_config[i].kernel);
   19.87 +			CTRL_SET_UM(low, counter_config[i].unit_mask);
   19.88 +			CTRL_SET_EVENT(low, counter_config[i].event);
   19.89 +			CTRL_WRITE(low, high, msrs, i);
   19.90 +		}
   19.91 +	}
   19.92 +}
   19.93 +
   19.94 +
   19.95 +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
   19.96 +			       int mode, int event);
   19.97 + 
   19.98 +static int ppro_check_ctrs(unsigned int const cpu,
   19.99 +                           struct op_msrs const * const msrs,
  19.100 +                           struct cpu_user_regs * const regs)
  19.101 +{
  19.102 +	unsigned int low, high;
  19.103 +	int i;
  19.104 +	int ovf = 0;
  19.105 +	unsigned long eip = regs->eip;
  19.106 +	int mode = 0;
  19.107 +
  19.108 +	if ( guest_kernel_mode(current, regs) ) 
  19.109 +		mode = 1;
  19.110 +	else if ( ring_0(regs) )
  19.111 +		mode = 2;
  19.112 + 
  19.113 +	for (i = 0 ; i < NUM_COUNTERS; ++i) {
  19.114 +		CTR_READ(low, high, msrs, i);
  19.115 +		if (CTR_OVERFLOWED(low)) {
  19.116 +			xenoprof_log_event(current, eip, mode, i);
  19.117 +			CTR_WRITE(reset_value[i], msrs, i);
  19.118 +			ovf = 1;
  19.119 +		}
  19.120 +	}
  19.121 +
  19.122 +	/* Only P6 based Pentium M need to re-unmask the apic vector but it
  19.123 +	 * doesn't hurt other P6 variant */
  19.124 +	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  19.125 +
  19.126 +	return ovf;
  19.127 +}
  19.128 +
  19.129 + 
  19.130 +static void ppro_start(struct op_msrs const * const msrs)
  19.131 +{
  19.132 +	unsigned int low,high;
  19.133 +	CTRL_READ(low, high, msrs, 0);
  19.134 +	CTRL_SET_ACTIVE(low);
  19.135 +	CTRL_WRITE(low, high, msrs, 0);
  19.136 +}
  19.137 +
  19.138 +
  19.139 +static void ppro_stop(struct op_msrs const * const msrs)
  19.140 +{
  19.141 +	unsigned int low,high;
  19.142 +	CTRL_READ(low, high, msrs, 0);
  19.143 +	CTRL_SET_INACTIVE(low);
  19.144 +	CTRL_WRITE(low, high, msrs, 0);
  19.145 +}
  19.146 +
  19.147 +
  19.148 +struct op_x86_model_spec const op_ppro_spec = {
  19.149 +	.num_counters = NUM_COUNTERS,
  19.150 +	.num_controls = NUM_CONTROLS,
  19.151 +	.fill_in_addresses = &ppro_fill_in_addresses,
  19.152 +	.setup_ctrs = &ppro_setup_ctrs,
  19.153 +	.check_ctrs = &ppro_check_ctrs,
  19.154 +	.start = &ppro_start,
  19.155 +	.stop = &ppro_stop
  19.156 +};
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/xen/arch/x86/oprofile/op_x86_model.h	Thu Apr 06 18:58:01 2006 +0100
    20.3 @@ -0,0 +1,51 @@
    20.4 +/**
    20.5 + * @file op_x86_model.h
    20.6 + * interface to x86 model-specific MSR operations
    20.7 + *
    20.8 + * @remark Copyright 2002 OProfile authors
    20.9 + * @remark Read the file COPYING
   20.10 + *
   20.11 + * @author Graydon Hoare
   20.12 + */
   20.13 +
   20.14 +#ifndef OP_X86_MODEL_H
   20.15 +#define OP_X86_MODEL_H
   20.16 +
   20.17 +struct op_saved_msr {
   20.18 +	unsigned int high;
   20.19 +	unsigned int low;
   20.20 +};
   20.21 +
   20.22 +struct op_msr {
   20.23 +	unsigned long addr;
   20.24 +	struct op_saved_msr saved;
   20.25 +};
   20.26 +
   20.27 +struct op_msrs {
   20.28 +	struct op_msr * counters;
   20.29 +	struct op_msr * controls;
   20.30 +};
   20.31 +
   20.32 +struct pt_regs;
   20.33 +
   20.34 +/* The model vtable abstracts the differences between
   20.35 + * various x86 CPU model's perfctr support.
   20.36 + */
   20.37 +struct op_x86_model_spec {
   20.38 +	unsigned int const num_counters;
   20.39 +	unsigned int const num_controls;
   20.40 +	void (*fill_in_addresses)(struct op_msrs * const msrs);
   20.41 +	void (*setup_ctrs)(struct op_msrs const * const msrs);
   20.42 +	int (*check_ctrs)(unsigned int const cpu, 
   20.43 +			  struct op_msrs const * const msrs,
   20.44 +			  struct cpu_user_regs * const regs);
   20.45 +	void (*start)(struct op_msrs const * const msrs);
   20.46 +	void (*stop)(struct op_msrs const * const msrs);
   20.47 +};
   20.48 +
   20.49 +extern struct op_x86_model_spec const op_ppro_spec;
   20.50 +extern struct op_x86_model_spec const op_p4_spec;
   20.51 +extern struct op_x86_model_spec const op_p4_ht2_spec;
   20.52 +extern struct op_x86_model_spec const op_athlon_spec;
   20.53 +
   20.54 +#endif /* OP_X86_MODEL_H */
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/xen/arch/x86/oprofile/xenoprof.c	Thu Apr 06 18:58:01 2006 +0100
    21.3 @@ -0,0 +1,553 @@
    21.4 +/*
    21.5 + * Copyright (C) 2005 Hewlett-Packard Co.
    21.6 + * written by Aravind Menon & Jose Renato Santos
    21.7 + *            (email: xenoprof@groups.hp.com)
    21.8 + */
    21.9 +
   21.10 +#include <xen/sched.h>
   21.11 +#include <public/xenoprof.h>
   21.12 +
   21.13 +#include "op_counter.h"
   21.14 +
   21.15 +/* Limit amount of pages used for shared buffer (per domain) */
   21.16 +#define MAX_OPROF_SHARED_PAGES 32
   21.17 +
   21.18 +int active_domains[MAX_OPROF_DOMAINS];
   21.19 +int active_ready[MAX_OPROF_DOMAINS];
   21.20 +unsigned int adomains = 0;
   21.21 +unsigned int activated = 0;
   21.22 +struct domain *primary_profiler = NULL;
   21.23 +int xenoprof_state = XENOPROF_IDLE;
   21.24 +
   21.25 +u64 total_samples = 0;
   21.26 +u64 invalid_buffer_samples = 0;
   21.27 +u64 corrupted_buffer_samples = 0;
   21.28 +u64 lost_samples = 0;
   21.29 +u64 active_samples = 0;
   21.30 +u64 idle_samples = 0;
   21.31 +u64 others_samples = 0;
   21.32 +
   21.33 +
   21.34 +extern int nmi_init(int *num_events, int *is_primary, char *cpu_type);
   21.35 +extern int nmi_reserve_counters(void);
   21.36 +extern int nmi_setup_events(void);
   21.37 +extern int nmi_enable_virq(void);
   21.38 +extern int nmi_start(void);
   21.39 +extern void nmi_stop(void);
   21.40 +extern void nmi_disable_virq(void);
   21.41 +extern void nmi_release_counters(void);
   21.42 +
   21.43 +int is_active(struct domain *d)
   21.44 +{
   21.45 +    xenoprof_t *x = d->xenoprof;
   21.46 +    if ( x )
   21.47 +    {
   21.48 +        if ( x->domain_type == XENOPROF_DOMAIN_ACTIVE )
   21.49 +            return 1;
   21.50 +        else
   21.51 +            return 0;
   21.52 +    }
   21.53 +    else
   21.54 +        return 0;
   21.55 +}
   21.56 +
   21.57 +int is_profiled(struct domain *d)
   21.58 +{
   21.59 +    return is_active(d);
   21.60 +}
   21.61 +
   21.62 +static void xenoprof_reset_stat(void)
   21.63 +{
   21.64 +    total_samples = 0;
   21.65 +    invalid_buffer_samples = 0;
   21.66 +    corrupted_buffer_samples = 0;
   21.67 +    lost_samples = 0;
   21.68 +    active_samples = 0;
   21.69 +    idle_samples = 0;
   21.70 +    others_samples = 0;
   21.71 +
   21.72 +    return;
   21.73 +}
   21.74 +
   21.75 +static void xenoprof_reset_buf(struct domain *d)
   21.76 +{
   21.77 +    int j;
   21.78 +    xenoprof_buf_t *buf;
   21.79 +
   21.80 +    if ( !d->xenoprof )
   21.81 +    {
   21.82 +        printk("xenoprof_reset_buf: ERROR - Unexpected Xenoprof NULL pointer \n");
   21.83 +        return;
   21.84 +    }
   21.85 +
   21.86 +    for ( j=0; j<MAX_VIRT_CPUS; j++ )
   21.87 +    {
   21.88 +        buf = d->xenoprof->vcpu[j].buffer;
   21.89 +        if ( buf )
   21.90 +        {
   21.91 +            buf->event_head = 0;
   21.92 +            buf->event_tail = 0;
   21.93 +        }
   21.94 +    }
   21.95 +}
   21.96 +
   21.97 +int active_index(struct domain *d)
   21.98 +{
   21.99 +    int i;
  21.100 +    int id;
  21.101 +
  21.102 +    id = d->domain_id;
  21.103 +    for ( i=0; i<adomains; i++ )
  21.104 +        if ( active_domains[i] == id )
  21.105 +        {
  21.106 +            return i;
  21.107 +        }
  21.108 +    return -1;
  21.109 +}
  21.110 +
  21.111 +int set_active(struct domain *d)
  21.112 +{
  21.113 +    int ind;
  21.114 +    xenoprof_t *x;
  21.115 +
  21.116 +    ind = active_index(d);
  21.117 +    if ( ind <0 )
  21.118 +        return -EPERM;
  21.119 +
  21.120 +    x = d->xenoprof;
  21.121 +    if ( x )
  21.122 +    {
  21.123 +        x->domain_ready = 1;
  21.124 +        x->domain_type = XENOPROF_DOMAIN_ACTIVE;
  21.125 +        active_ready[ind] = 1;
  21.126 +        activated++;
  21.127 +        return 0;
  21.128 +    }
  21.129 +    else
  21.130 +        return -EPERM;
  21.131 +}
  21.132 +
  21.133 +int reset_active(struct domain *d)
  21.134 +{
  21.135 +    int ind;
  21.136 +    xenoprof_t *x;
  21.137 +
  21.138 +    ind = active_index(d);
  21.139 +    if ( ind <0 )
  21.140 +        return -EPERM;
  21.141 +
  21.142 +    x = d->xenoprof;
  21.143 +    if ( x )
  21.144 +    {
  21.145 +        x->domain_ready = 0;
  21.146 +        x->domain_type = XENOPROF_DOMAIN_IGNORED;
  21.147 +        active_ready[ind] = 0;
  21.148 +        activated--;
  21.149 +        if ( activated <= 0 )
  21.150 +            adomains = 0;
  21.151 +        return 0;
  21.152 +    }
  21.153 +    else
  21.154 +        return -EPERM;
  21.155 +}
  21.156 +
  21.157 +int set_active_domains(int num)
  21.158 +{
  21.159 +    int primary;
  21.160 +    int i;
  21.161 +    struct domain *d;
  21.162 +
  21.163 +    /* reset any existing active domains from previous runs */
  21.164 +    for ( i=0; i<adomains; i++ )
  21.165 +    {
  21.166 +        if ( active_ready[i] )
  21.167 +        {
  21.168 +            d = find_domain_by_id(active_domains[i]);
  21.169 +            if ( d )
  21.170 +            {
  21.171 +                reset_active(d);
  21.172 +                put_domain(d);
  21.173 +            }
  21.174 +        }
  21.175 +    }
  21.176 +
  21.177 +    adomains=num;
  21.178 +
  21.179 +    /* Add primary profiler to list of active domains if not there yet */
  21.180 +    primary = active_index(primary_profiler);
  21.181 +    if ( primary == -1 )
  21.182 +    {
  21.183 +        /* return if there is no space left on list */
  21.184 +        if ( num >= MAX_OPROF_DOMAINS )
  21.185 +            return -E2BIG;
  21.186 +        else
  21.187 +        {
  21.188 +            active_domains[num] = primary_profiler->domain_id;
  21.189 +            num++;
  21.190 +        }
  21.191 +    }
  21.192 +
  21.193 +    adomains = num;
  21.194 +    activated = 0;
  21.195 +
  21.196 +    for ( i=0; i<adomains; i++ )
  21.197 +    {
  21.198 +        active_ready[i] = 0;
  21.199 +    }
  21.200 +
  21.201 +    return 0;
  21.202 +}
  21.203 +
  21.204 +void xenoprof_log_event(struct vcpu *vcpu, unsigned long eip, int mode, int event)
  21.205 +{
  21.206 +    xenoprof_vcpu_t *v;
  21.207 +    xenoprof_buf_t *buf;
  21.208 +    int head;
  21.209 +    int tail;
  21.210 +    int size;
  21.211 +
  21.212 +
  21.213 +    total_samples++;
  21.214 +
  21.215 +    /* ignore samples of un-monitored domains */
  21.216 +    /* Count samples in idle separate from other unmonitored domains */
  21.217 +    if ( !is_profiled(vcpu->domain) )
  21.218 +    {
  21.219 +      others_samples++;
  21.220 +      return;
  21.221 +    }
  21.222 +
  21.223 +    v = &vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id];
  21.224 +
  21.225 +    /* Sanity check. Should never happen */ 
  21.226 +    if ( !v->buffer )
  21.227 +    {
  21.228 +        invalid_buffer_samples++;
  21.229 +        return;
  21.230 +    }
  21.231 +
  21.232 +    buf = vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id].buffer;
  21.233 +
  21.234 +    head = buf->event_head;
  21.235 +    tail = buf->event_tail;
  21.236 +    size = v->event_size;
  21.237 +
  21.238 +    /* make sure indexes in shared buffer are sane */
  21.239 +    if ( (head < 0) || (head >= size) || (tail < 0) || (tail >= size) )
  21.240 +    {
  21.241 +        corrupted_buffer_samples++;
  21.242 +        return;
  21.243 +    }
  21.244 +
  21.245 +    if ( (head == tail - 1) || (head == size - 1 && tail == 0) )
  21.246 +    {
  21.247 +        buf->lost_samples++;
  21.248 +        lost_samples++;
  21.249 +    }
  21.250 +    else
  21.251 +    {
  21.252 +        buf->event_log[head].eip = eip;
  21.253 +        buf->event_log[head].mode = mode;
  21.254 +        buf->event_log[head].event = event;
  21.255 +        head++;
  21.256 +        if ( head >= size )
  21.257 +            head = 0;
  21.258 +        buf->event_head = head;
  21.259 +        active_samples++;
  21.260 +        if ( mode == 0 )
  21.261 +            buf->user_samples++;
  21.262 +        else if ( mode == 1 )
  21.263 +            buf->kernel_samples++;
  21.264 +        else
  21.265 +            buf->xen_samples++;
  21.266 +    }
  21.267 +}
  21.268 +
  21.269 +char *alloc_xenoprof_buf(struct domain *d, int npages)
  21.270 +{
  21.271 +    char *rawbuf;
  21.272 +    int i, order;
  21.273 +
  21.274 +    /* allocate pages to store sample buffer shared with domain */
  21.275 +    order = get_order_from_pages(npages);
  21.276 +    rawbuf =  alloc_xenheap_pages(order);
  21.277 +    if( rawbuf == NULL )
  21.278 +    {
  21.279 +        printk("alloc_xenoprof_buf(): memory allocation failed\n");
  21.280 +        return 0;
  21.281 +    }
  21.282 +
  21.283 +    /* Share pages so that kernel can map it */
  21.284 +    for ( i=0; i<npages; i++ )
  21.285 +    {
  21.286 +        share_xen_page_with_guest(virt_to_page(rawbuf + i * PAGE_SIZE), 
  21.287 +				  d, XENSHARE_writable);
  21.288 +    }
  21.289 +
  21.290 +    return rawbuf;
  21.291 +}
  21.292 +
  21.293 +int alloc_xenoprof_struct(struct domain *d, int max_samples)
  21.294 +{
  21.295 +    struct vcpu *v;
  21.296 +    int nvcpu, npages, bufsize, max_bufsize;
  21.297 +    int i;
  21.298 +
  21.299 +    d->xenoprof = xmalloc(xenoprof_t);
  21.300 +
  21.301 +    if ( !d->xenoprof )
  21.302 +    {
  21.303 +        printk ("alloc_xenoprof_struct(): memory "
  21.304 +                "allocation (xmalloc) failed\n");
  21.305 +        return -ENOMEM;
  21.306 +    }
  21.307 +
  21.308 +    memset(d->xenoprof, 0, sizeof(*d->xenoprof));
  21.309 +
  21.310 +    nvcpu = 0;
  21.311 +    for_each_vcpu(d, v)
  21.312 +        nvcpu++;
  21.313 +
  21.314 +    /* reduce buffer size if necessary to limit pages allocated */
  21.315 +    bufsize = sizeof(xenoprof_buf_t) +
  21.316 +        (max_samples - 1) * sizeof(struct event_log);
  21.317 +    max_bufsize = (MAX_OPROF_SHARED_PAGES * PAGE_SIZE) / nvcpu;
  21.318 +    if ( bufsize > max_bufsize )
  21.319 +    {
  21.320 +        bufsize = max_bufsize;
  21.321 +        max_samples = ( (max_bufsize - sizeof(xenoprof_buf_t)) /
  21.322 +                        sizeof(struct event_log) ) + 1;
  21.323 +    }
  21.324 +
  21.325 +    npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1;
  21.326 +    d->xenoprof->rawbuf = alloc_xenoprof_buf(d, npages);
  21.327 +    if ( !d->xenoprof->rawbuf )
  21.328 +    {
  21.329 +        xfree(d->xenoprof);
  21.330 +        d->xenoprof = NULL;
  21.331 +        return -ENOMEM;
  21.332 +    }
  21.333 +
  21.334 +    d->xenoprof->npages = npages;
  21.335 +    d->xenoprof->nbuf = nvcpu;
  21.336 +    d->xenoprof->bufsize = bufsize;
  21.337 +    d->xenoprof->domain_ready = 0;
  21.338 +    d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED;
  21.339 +
  21.340 +    /* Update buffer pointers for active vcpus */
  21.341 +    i=0;
  21.342 +    for_each_vcpu(d, v)
  21.343 +    {
  21.344 +        d->xenoprof->vcpu[v->vcpu_id].event_size = max_samples;
  21.345 +        d->xenoprof->vcpu[v->vcpu_id].buffer =
  21.346 +            (xenoprof_buf_t *)&d->xenoprof->rawbuf[i * bufsize];
  21.347 +        d->xenoprof->vcpu[v->vcpu_id].buffer->event_size = max_samples;
  21.348 +        d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id = v->vcpu_id;
  21.349 +
  21.350 +        i++;
  21.351 +        /* in the unlikely case that the number of active vcpus changes */
  21.352 +        if ( i >= nvcpu )
  21.353 +            break;
  21.354 +    }
  21.355 +
  21.356 +    return 0;
  21.357 +}
  21.358 +
  21.359 +void free_xenoprof_pages(struct domain *d)
  21.360 +{
  21.361 +    xenoprof_t *x;
  21.362 +    int order;
  21.363 +
  21.364 +    x = d->xenoprof;
  21.365 +
  21.366 +    if ( x )
  21.367 +    {
  21.368 +        if ( x->rawbuf )
  21.369 +        {
  21.370 +            order = get_order_from_pages(x->npages);
  21.371 +            free_xenheap_pages(x->rawbuf, order);
  21.372 +        }
  21.373 +        xfree(x);
  21.374 +        d->xenoprof = NULL;
  21.375 +    }
  21.376 +}
  21.377 +
  21.378 +int xenoprof_init(int max_samples, xenoprof_init_result_t *init_result)
  21.379 +{
  21.380 +    xenoprof_init_result_t result;
  21.381 +    int is_primary, num_events;
  21.382 +    struct domain *d = current->domain;
  21.383 +    int ret;
  21.384 +
  21.385 +    ret = nmi_init(&num_events, &is_primary, result.cpu_type);
  21.386 +    if ( is_primary )
  21.387 +        primary_profiler = current->domain;
  21.388 +
  21.389 +    if ( ret < 0 )
  21.390 +        goto err;
  21.391 +
  21.392 +    /* we allocate xenoprof struct and buffers only at first time 
  21.393 +       xenoprof_init is called. Memory is then kept until domain is destroyed */
  21.394 +    if ( !d->xenoprof )
  21.395 +    {
  21.396 +        if ( (ret = alloc_xenoprof_struct(d, max_samples)) < 0 )
  21.397 +            goto err;
  21.398 +    }
  21.399 +
  21.400 +    xenoprof_reset_buf(d);
  21.401 +
  21.402 +    d->xenoprof->domain_type  = XENOPROF_DOMAIN_IGNORED;
  21.403 +    d->xenoprof->domain_ready = 0;
  21.404 +    d->xenoprof->is_primary = is_primary;
  21.405 +
  21.406 +    result.is_primary = is_primary;
  21.407 +    result.num_events = num_events;
  21.408 +    result.nbuf = d->xenoprof->nbuf;
  21.409 +    result.bufsize = d->xenoprof->bufsize;
  21.410 +    result.buf_maddr = __pa(d->xenoprof->rawbuf);
  21.411 +
  21.412 +    if ( copy_to_user((void *)init_result, (void *)&result, sizeof(result)) )
  21.413 +    {
  21.414 +        ret = -EFAULT;
  21.415 +        goto err;
  21.416 +    }
  21.417 +
  21.418 +    return ret;
  21.419 +
  21.420 + err:
  21.421 +    if ( primary_profiler == current->domain )
  21.422 +        primary_profiler = NULL;
  21.423 +    return ret;
  21.424 +}
  21.425 +
  21.426 +#define PRIV_OP(op) ( (op == XENOPROF_set_active) \
  21.427 +                   || (op == XENOPROF_reserve_counters) \
  21.428 +                   || (op == XENOPROF_setup_events) \
  21.429 +                   || (op == XENOPROF_start) \
  21.430 +                   || (op == XENOPROF_stop) \
  21.431 +                   || (op == XENOPROF_release_counters) \
  21.432 +                   || (op == XENOPROF_shutdown))
  21.433 +
  21.434 +int do_xenoprof_op(int op, unsigned long arg1, unsigned long arg2)
  21.435 +{
  21.436 +    int ret = 0;
  21.437 +
  21.438 +    if ( PRIV_OP(op) && current->domain != primary_profiler )
  21.439 +    {
  21.440 +        printk("xenoprof: dom %d denied privileged operation %d\n",
  21.441 +               current->domain->domain_id, op);
  21.442 +        return -EPERM;
  21.443 +    }
  21.444 +
  21.445 +    switch ( op )
  21.446 +    {
  21.447 +    case XENOPROF_init:
  21.448 +        ret = xenoprof_init((int)arg1, (xenoprof_init_result_t *)arg2);
  21.449 +        break;
  21.450 +
  21.451 +    case XENOPROF_set_active:
  21.452 +        if ( xenoprof_state != XENOPROF_IDLE )
  21.453 +            return -EPERM;
  21.454 +        if ( arg2 > MAX_OPROF_DOMAINS )
  21.455 +            return -E2BIG;
  21.456 +        if ( copy_from_user((void *)&active_domains, 
  21.457 +                            (void *)arg1, arg2*sizeof(int)) )
  21.458 +            return -EFAULT;
  21.459 +        ret = set_active_domains(arg2);
  21.460 +        break;
  21.461 +
  21.462 +    case XENOPROF_reserve_counters:
  21.463 +        if ( xenoprof_state != XENOPROF_IDLE )
  21.464 +            return -EPERM;
  21.465 +        ret = nmi_reserve_counters();
  21.466 +        if ( !ret )
  21.467 +            xenoprof_state = XENOPROF_COUNTERS_RESERVED;
  21.468 +        break;
  21.469 +
  21.470 +    case XENOPROF_setup_events:
  21.471 +        if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED )
  21.472 +            return -EPERM;
  21.473 +        if ( adomains == 0 )
  21.474 +        {
  21.475 +            set_active_domains(0);
  21.476 +        }
  21.477 +
  21.478 +        if ( copy_from_user((void *)&counter_config, (void *)arg1, 
  21.479 +                            arg2 * sizeof(struct op_counter_config)) )
  21.480 +            return -EFAULT;
  21.481 +        ret = nmi_setup_events();
  21.482 +        if ( !ret )
  21.483 +            xenoprof_state = XENOPROF_READY;
  21.484 +        break;
  21.485 +
  21.486 +    case XENOPROF_enable_virq:
  21.487 +        if ( current->domain == primary_profiler )
  21.488 +        {
  21.489 +            nmi_enable_virq();
  21.490 +            xenoprof_reset_stat();
  21.491 +        }
  21.492 +        xenoprof_reset_buf(current->domain);
  21.493 +        ret = set_active(current->domain);
  21.494 +        break;
  21.495 +
  21.496 +    case XENOPROF_start:
  21.497 +        if ( (xenoprof_state == XENOPROF_READY) &&
  21.498 +             (activated == adomains) )
  21.499 +        {
  21.500 +            ret = nmi_start();
  21.501 +        }
  21.502 +        else 
  21.503 +            ret= -EPERM;
  21.504 +
  21.505 +        if ( !ret )
  21.506 +            xenoprof_state = XENOPROF_PROFILING;
  21.507 +        break;
  21.508 +
  21.509 +    case XENOPROF_stop:
  21.510 +        if ( xenoprof_state != XENOPROF_PROFILING )
  21.511 +            return -EPERM;
  21.512 +        nmi_stop();
  21.513 +        xenoprof_state = XENOPROF_READY;
  21.514 +        break;
  21.515 +
  21.516 +    case XENOPROF_disable_virq:
  21.517 +        if ( (xenoprof_state == XENOPROF_PROFILING) && 
  21.518 +             (is_active(current->domain)) )
  21.519 +            return -EPERM;
  21.520 +        ret = reset_active(current->domain);
  21.521 +        break;
  21.522 +
  21.523 +    case XENOPROF_release_counters:
  21.524 +        if ( (xenoprof_state == XENOPROF_COUNTERS_RESERVED) ||
  21.525 +             (xenoprof_state == XENOPROF_READY) )
  21.526 +        {
  21.527 +            xenoprof_state = XENOPROF_IDLE;
  21.528 +            nmi_release_counters();
  21.529 +            nmi_disable_virq();
  21.530 +        }
  21.531 +        else
  21.532 +            ret = -EPERM;
  21.533 +        break;
  21.534 +
  21.535 +    case XENOPROF_shutdown:
  21.536 +        if ( xenoprof_state == XENOPROF_IDLE )
  21.537 +        {
  21.538 +            activated = 0;
  21.539 +            adomains=0;
  21.540 +            primary_profiler = NULL;
  21.541 +            ret = 0;
  21.542 +        }
  21.543 +        else 
  21.544 +            ret = -EPERM;
  21.545 +        break;
  21.546 +
  21.547 +    default:
  21.548 +        ret = -EINVAL;
  21.549 +    }
  21.550 +
  21.551 +    if ( ret < 0 )
  21.552 +        printk("xenoprof: operation %d failed for dom %d (status : %d)\n",
  21.553 +               op, current->domain->domain_id, ret);
  21.554 +
  21.555 +    return ret;
  21.556 +}
    22.1 --- a/xen/arch/x86/x86_32/entry.S	Thu Apr 06 17:49:21 2006 +0100
    22.2 +++ b/xen/arch/x86/x86_32/entry.S	Thu Apr 06 18:58:01 2006 +0100
    22.3 @@ -645,6 +645,7 @@ ENTRY(hypercall_table)
    22.4          .long do_nmi_op
    22.5          .long do_arch_sched_op
    22.6          .long do_callback_op        /* 30 */
    22.7 +        .long do_xenoprof_op
    22.8          .rept NR_hypercalls-((.-hypercall_table)/4)
    22.9          .long do_ni_hypercall
   22.10          .endr
   22.11 @@ -681,6 +682,7 @@ ENTRY(hypercall_args_table)
   22.12          .byte 2 /* do_nmi_op            */
   22.13          .byte 2 /* do_arch_sched_op     */
   22.14          .byte 2 /* do_callback_op       */  /* 30 */
   22.15 +        .byte 3 /* do_xenoprof_op       */
   22.16          .rept NR_hypercalls-(.-hypercall_args_table)
   22.17          .byte 0 /* do_ni_hypercall      */
   22.18          .endr
    23.1 --- a/xen/arch/x86/x86_64/entry.S	Thu Apr 06 17:49:21 2006 +0100
    23.2 +++ b/xen/arch/x86/x86_64/entry.S	Thu Apr 06 18:58:01 2006 +0100
    23.3 @@ -553,6 +553,7 @@ ENTRY(hypercall_table)
    23.4          .quad do_nmi_op
    23.5          .quad do_arch_sched_op
    23.6          .quad do_callback_op        /* 30 */
    23.7 +        .quad do_xenoprof_op
    23.8          .rept NR_hypercalls-((.-hypercall_table)/8)
    23.9          .quad do_ni_hypercall
   23.10          .endr
   23.11 @@ -589,6 +590,7 @@ ENTRY(hypercall_args_table)
   23.12          .byte 2 /* do_nmi_op            */
   23.13          .byte 2 /* do_arch_sched_op     */
   23.14          .byte 2 /* do_callback_op       */  /* 30 */
   23.15 +        .byte 3 /* do_xenoprof_op       */
   23.16          .rept NR_hypercalls-(.-hypercall_args_table)
   23.17          .byte 0 /* do_ni_hypercall      */
   23.18          .endr
    24.1 --- a/xen/include/public/xen.h	Thu Apr 06 17:49:21 2006 +0100
    24.2 +++ b/xen/include/public/xen.h	Thu Apr 06 18:58:01 2006 +0100
    24.3 @@ -61,6 +61,7 @@
    24.4  #define __HYPERVISOR_nmi_op               28
    24.5  #define __HYPERVISOR_sched_op             29
    24.6  #define __HYPERVISOR_callback_op          30
    24.7 +#define __HYPERVISOR_xenoprof_op          31
    24.8  
    24.9  /* 
   24.10   * VIRTUAL INTERRUPTS
   24.11 @@ -77,7 +78,8 @@
   24.12  #define VIRQ_CONSOLE    2  /* G. (DOM0) Bytes received on emergency console. */
   24.13  #define VIRQ_DOM_EXC    3  /* G. (DOM0) Exceptional event for some domain.   */
   24.14  #define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
   24.15 -#define NR_VIRQS        8
   24.16 +#define VIRQ_XENOPROF   7  /* XenOprofile interrupt: new sample available */
   24.17 +#define NR_VIRQS        9
   24.18  
   24.19  /*
   24.20   * MMU-UPDATE REQUESTS
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/xen/include/public/xenoprof.h	Thu Apr 06 18:58:01 2006 +0100
    25.3 @@ -0,0 +1,72 @@
    25.4 +/******************************************************************************
    25.5 + * xenoprof.h
    25.6 + * 
    25.7 + * Interface for enabling system wide profiling based on hardware performance
    25.8 + * counters
    25.9 + * 
   25.10 + * Copyright (C) 2005 Hewlett-Packard Co.
   25.11 + * Written by Aravind Menon & Jose Renato Santos
   25.12 + */
   25.13 +
   25.14 +#ifndef __XEN_PUBLIC_XENOPROF_H__
   25.15 +#define __XEN_PUBLIC_XENOPROF_H__
   25.16 +
   25.17 +/*
   25.18 + * Commands to HYPERVISOR_pmc_op().
   25.19 + */
   25.20 +#define XENOPROF_init               0
   25.21 +#define XENOPROF_set_active         1
   25.22 +#define XENOPROF_reserve_counters   3
   25.23 +#define XENOPROF_setup_events       4
   25.24 +#define XENOPROF_enable_virq        5
   25.25 +#define XENOPROF_start              6
   25.26 +#define XENOPROF_stop               7
   25.27 +#define XENOPROF_disable_virq       8
   25.28 +#define XENOPROF_release_counters   9
   25.29 +#define XENOPROF_shutdown          10
   25.30 +
   25.31 +#define MAX_OPROF_EVENTS    32
   25.32 +#define MAX_OPROF_DOMAINS   25	
   25.33 +#define XENOPROF_CPU_TYPE_SIZE 64
   25.34 +
   25.35 +/* Xenoprof performance events (not Xen events) */
   25.36 +struct event_log {
   25.37 +    uint64_t eip;
   25.38 +    uint8_t mode;
   25.39 +    uint8_t event;
   25.40 +};
   25.41 +
   25.42 +/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
   25.43 +typedef struct xenoprof_buf {
   25.44 +    uint32_t event_head;
   25.45 +    uint32_t event_tail;
   25.46 +    uint32_t event_size;
   25.47 +    uint32_t vcpu_id;
   25.48 +    uint64_t xen_samples;
   25.49 +    uint64_t kernel_samples;
   25.50 +    uint64_t user_samples;
   25.51 +    uint64_t lost_samples;
   25.52 +    struct event_log event_log[1];
   25.53 +} xenoprof_buf_t;
   25.54 +
   25.55 +typedef struct xenoprof_init_result {
   25.56 +    int32_t  num_events;
   25.57 +    int32_t  is_primary;
   25.58 +    int32_t  nbuf;
   25.59 +    int32_t  bufsize;
   25.60 +    uint64_t buf_maddr;
   25.61 +    char cpu_type[XENOPROF_CPU_TYPE_SIZE];
   25.62 +} xenoprof_init_result_t;
   25.63 +
   25.64 +
   25.65 +#endif /* __XEN_PUBLIC_XENOPROF_H__ */
   25.66 +
   25.67 +/*
   25.68 + * Local variables:
   25.69 + * mode: C
   25.70 + * c-set-style: "BSD"
   25.71 + * c-basic-offset: 4
   25.72 + * tab-width: 4
   25.73 + * indent-tabs-mode: nil
   25.74 + * End:
   25.75 + */
    26.1 --- a/xen/include/xen/sched.h	Thu Apr 06 17:49:21 2006 +0100
    26.2 +++ b/xen/include/xen/sched.h	Thu Apr 06 18:58:01 2006 +0100
    26.3 @@ -14,6 +14,7 @@
    26.4  #include <xen/grant_table.h>
    26.5  #include <xen/rangeset.h>
    26.6  #include <asm/domain.h>
    26.7 +#include <xen/xenoprof.h>
    26.8  
    26.9  extern unsigned long volatile jiffies;
   26.10  extern rwlock_t domlist_lock;
   26.11 @@ -155,6 +156,9 @@ struct domain
   26.12  
   26.13      /* Control-plane tools handle for this domain. */
   26.14      xen_domain_handle_t handle;
   26.15 +
   26.16 +    /* pointer to xenoprof data (oprofile support) */
   26.17 +    xenoprof_t *xenoprof;
   26.18  };
   26.19  
   26.20  struct domain_setup_info
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/xen/include/xen/xenoprof.h	Thu Apr 06 18:58:01 2006 +0100
    27.3 @@ -0,0 +1,40 @@
    27.4 +/******************************************************************************
    27.5 + * xenoprof.h
    27.6 + * 
    27.7 + * Xenoprof: Xenoprof enables performance profiling in Xen
    27.8 + * 
    27.9 + * Copyright (C) 2005 Hewlett-Packard Co.
   27.10 + * written by Aravind Menon & Jose Renato Santos
   27.11 + */
   27.12 +
   27.13 +#ifndef __XEN_XENOPROF_H__
   27.14 +#define __XEN_XENOPROF_H__
   27.15 +
   27.16 +#include <public/xenoprof.h>
   27.17 +
   27.18 +#define XENOPROF_DOMAIN_IGNORED    0
   27.19 +#define XENOPROF_DOMAIN_ACTIVE     1
   27.20 +
   27.21 +#define XENOPROF_IDLE              0
   27.22 +#define XENOPROF_COUNTERS_RESERVED 1
   27.23 +#define XENOPROF_READY             2
   27.24 +#define XENOPROF_PROFILING         3
   27.25 +
   27.26 +
   27.27 +typedef struct xenoprof_vcpu {
   27.28 +    int event_size;
   27.29 +    xenoprof_buf_t *buffer;
   27.30 +} xenoprof_vcpu_t;
   27.31 +
   27.32 +typedef struct xenoprof {
   27.33 +    char* rawbuf;
   27.34 +    int npages;
   27.35 +    int nbuf;
   27.36 +    int bufsize;
   27.37 +    int domain_type;
   27.38 +    int domain_ready;
   27.39 +    int is_primary;
   27.40 +    xenoprof_vcpu_t vcpu [MAX_VIRT_CPUS];
   27.41 +} xenoprof_t;
   27.42 +
   27.43 +#endif  /* __XEN__XENOPROF_H__ */