ia64/xen-unstable

changeset 9632:fb174770f426

Merged.
author emellor@leeni.uk.xensource.com
date Fri Apr 07 11:52:00 2006 +0100 (2006-04-07)
parents 9fcfdab04aa9 050ad9813cdb
children c5df04d6af17 591555bc4acb
files linux-2.6-xen-sparse/include/linux/irq.h linux-2.6-xen-sparse/kernel/irq/manage.c linux-2.6-xen-sparse/lib/Kconfig.debug
line diff
     1.1 --- a/buildconfigs/linux-defconfig_xen0_x86_32	Thu Apr 06 14:22:52 2006 +0100
     1.2 +++ b/buildconfigs/linux-defconfig_xen0_x86_32	Fri Apr 07 11:52:00 2006 +0100
     1.3 @@ -1231,6 +1231,7 @@ CONFIG_NLS_ISO8859_1=y
     1.4  #
     1.5  # Instrumentation Support
     1.6  #
     1.7 +# CONFIG_PROFILING is not set
     1.8  # CONFIG_KPROBES is not set
     1.9  
    1.10  #
     2.1 --- a/buildconfigs/linux-defconfig_xen0_x86_64	Thu Apr 06 14:22:52 2006 +0100
     2.2 +++ b/buildconfigs/linux-defconfig_xen0_x86_64	Fri Apr 07 11:52:00 2006 +0100
     2.3 @@ -1183,6 +1183,7 @@ CONFIG_DETECT_SOFTLOCKUP=y
     2.4  # CONFIG_DEBUG_SPINLOCK is not set
     2.5  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
     2.6  # CONFIG_DEBUG_KOBJECT is not set
     2.7 +# CONFIG_DEBUG_INFO is not set
     2.8  # CONFIG_DEBUG_FS is not set
     2.9  # CONFIG_DEBUG_VM is not set
    2.10  CONFIG_FRAME_POINTER=y
     3.1 --- a/buildconfigs/linux-defconfig_xenU_x86_32	Thu Apr 06 14:22:52 2006 +0100
     3.2 +++ b/buildconfigs/linux-defconfig_xenU_x86_32	Fri Apr 07 11:52:00 2006 +0100
     3.3 @@ -779,6 +779,7 @@ CONFIG_NLS_ISO8859_1=y
     3.4  #
     3.5  # Instrumentation Support
     3.6  #
     3.7 +# CONFIG_PROFILING is not set
     3.8  # CONFIG_KPROBES is not set
     3.9  
    3.10  #
     4.1 --- a/buildconfigs/linux-defconfig_xenU_x86_64	Thu Apr 06 14:22:52 2006 +0100
     4.2 +++ b/buildconfigs/linux-defconfig_xenU_x86_64	Fri Apr 07 11:52:00 2006 +0100
     4.3 @@ -1080,6 +1080,7 @@ CONFIG_DETECT_SOFTLOCKUP=y
     4.4  # CONFIG_DEBUG_SPINLOCK is not set
     4.5  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
     4.6  # CONFIG_DEBUG_KOBJECT is not set
     4.7 +# CONFIG_DEBUG_INFO is not set
     4.8  # CONFIG_DEBUG_FS is not set
     4.9  # CONFIG_DEBUG_VM is not set
    4.10  CONFIG_FRAME_POINTER=y
     5.1 --- a/buildconfigs/linux-defconfig_xen_x86_32	Thu Apr 06 14:22:52 2006 +0100
     5.2 +++ b/buildconfigs/linux-defconfig_xen_x86_32	Fri Apr 07 11:52:00 2006 +0100
     5.3 @@ -2892,6 +2892,7 @@ CONFIG_NLS_UTF8=m
     5.4  #
     5.5  # Instrumentation Support
     5.6  #
     5.7 +# CONFIG_PROFILING is not set
     5.8  # CONFIG_KPROBES is not set
     5.9  
    5.10  #
     6.1 --- a/buildconfigs/linux-defconfig_xen_x86_64	Thu Apr 06 14:22:52 2006 +0100
     6.2 +++ b/buildconfigs/linux-defconfig_xen_x86_64	Fri Apr 07 11:52:00 2006 +0100
     6.3 @@ -2587,6 +2587,7 @@ CONFIG_DETECT_SOFTLOCKUP=y
     6.4  # CONFIG_DEBUG_SPINLOCK is not set
     6.5  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
     6.6  # CONFIG_DEBUG_KOBJECT is not set
     6.7 +# CONFIG_DEBUG_INFO is not set
     6.8  # CONFIG_DEBUG_FS is not set
     6.9  # CONFIG_DEBUG_VM is not set
    6.10  # CONFIG_FRAME_POINTER is not set
     7.1 --- a/docs/src/user.tex	Thu Apr 06 14:22:52 2006 +0100
     7.2 +++ b/docs/src/user.tex	Fri Apr 07 11:52:00 2006 +0100
     7.3 @@ -2052,7 +2052,7 @@ dev86 & The dev86 package provides an as
     7.4  
     7.5  If the dev86 package is not available on the x86\_64 distribution, you can install the i386 version of it. The dev86 rpm package for various distributions can be found at {\scriptsize {\tt http://www.rpmfind.net/linux/rpm2html/search.php?query=dev86\&submit=Search}} \\
     7.6  
     7.7 -LibVNCServer & The unmodified guest's VGA display, keyboard, and mouse are virtualized using the vncserver library provided by this package. You can get the sources of libvncserver from {\small {\tt http://sourceforge.net/projects/libvncserver}}. Build and install the sources on the build system to get the libvncserver library. The 0.8pre version of libvncserver is currently working well with Xen.\\
     7.8 +LibVNCServer & The unmodified guest's VGA display, keyboard, and mouse can be virtualized by the vncserver library. You can get the sources of libvncserver from {\small {\tt http://sourceforge.net/projects/libvncserver}}. Build and install the sources on the build system to get the libvncserver library. There is a significant performance degradation in 0.8 version. The current sources in the CVS tree have fixed this degradation. So it is highly recommended to download the latest CVS sources and install them.\\
     7.9  
    7.10  SDL-devel, SDL & Simple DirectMedia Layer (SDL) is another way of virtualizing the unmodified guest console. It provides an X window for the guest console. 
    7.11  
    7.12 @@ -2077,6 +2077,8 @@ acpi & Enable VMX guest ACPI, default=0 
    7.13  
    7.14  apic & Enable VMX guest APIC, default=0 (disabled)\\
    7.15  
    7.16 +pae & Enable VMX guest PAE, default=0 (disabled)\\
    7.17 +
    7.18  vif     & Optionally defines MAC address and/or bridge for the network interfaces. Random MACs are assigned if not given. {\small {\tt type=ioemu}} means ioemu is used to virtualize the VMX NIC. If no type is specified, vbd is used, as with paravirtualized guests.\\
    7.19  
    7.20  disk & Defines the disk devices you want the domain to have access to, and what you want them accessible as. If using a physical device as the VMX guest's disk, each disk entry is of the form 
    7.21 @@ -2229,6 +2231,30 @@ Simply follow the usual method of creati
    7.22  
    7.23  In the default configuration, VNC is on and SDL is off. Therefore VNC windows will open when VMX guests are created. If you want to use SDL to create VMX guests, set {\small {\tt sdl=1}} in your VMX configuration file. You can also turn off VNC by setting {\small {\tt vnc=0}}.
    7.24   
    7.25 +\subsection{Use mouse in VNC window}
    7.26 +The default PS/2 mouse will not work properly in VMX by a VNC window. Summagraphics mouse emulation does work in this environment. A Summagraphics mouse can be enabled by reconfiguring 2 services:
    7.27 +
    7.28 +{\small {\tt 1. General Purpose Mouse (GPM). The GPM daemon is configured in different ways in different Linux distributions. On a Redhat distribution, this is accomplished by changing the file `/etc/sysconfig/mouse' to have the following:\\
    7.29 +MOUSETYPE="summa"\\
    7.30 +XMOUSETYPE="SUMMA"\\
    7.31 +DEVICE=/dev/ttyS0\\
    7.32 +\\
    7.33 +2. X11. For all Linux distributions, change the Mouse0 stanza in `/etc/X11/xorg.conf' to:\\
    7.34 +Section "InputDevice"\\
    7.35 +Identifier "Mouse0"\\
    7.36 +Driver "summa"\\
    7.37 +Option "Device" "/dev/ttyS0"\\
    7.38 +Option "InputFashion" "Tablet"\\
    7.39 +Option "Mode" "Absolute"\\
    7.40 +Option "Name" "EasyPen"\\
    7.41 +Option "Compatible" "True"\\
    7.42 +Option "Protocol" "Auto"\\
    7.43 +Option "SendCoreEvents" "on"\\
    7.44 +Option "Vendor" "GENIUS"\\
    7.45 +EndSection}}
    7.46 +
    7.47 +If the Summagraphics mouse isn't the default mouse, you can manually kill 'gpm' and restart it with the command "gpm -m /dev/ttyS0 -t summa". Note that Summagraphics mouse makes no sense in an SDL window and is therefore not available in this environment.
    7.48 +
    7.49  \subsection{Destroy VMX guests}
    7.50  VMX guests can be destroyed in the same way as can paravirtualized guests. We recommend that you type the command 
    7.51  
     8.1 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig	Thu Apr 06 14:22:52 2006 +0100
     8.2 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig	Fri Apr 07 11:52:00 2006 +0100
     8.3 @@ -1116,9 +1116,7 @@ source "fs/Kconfig"
     8.4  menu "Instrumentation Support"
     8.5  	depends on EXPERIMENTAL
     8.6  
     8.7 -if !X86_XEN
     8.8  source "arch/i386/oprofile/Kconfig"
     8.9 -endif
    8.10  
    8.11  config KPROBES
    8.12  	bool "Kprobes (EXPERIMENTAL)"
     9.1 --- a/linux-2.6-xen-sparse/arch/i386/Makefile	Thu Apr 06 14:22:52 2006 +0100
     9.2 +++ b/linux-2.6-xen-sparse/arch/i386/Makefile	Fri Apr 07 11:52:00 2006 +0100
     9.3 @@ -162,3 +162,4 @@ define archhelp
     9.4  endef
     9.5  
     9.6  CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
     9.7 +CLEAN_FILES += vmlinuz vmlinux-stripped
    10.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Thu Apr 06 14:22:52 2006 +0100
    10.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Fri Apr 07 11:52:00 2006 +0100
    10.3 @@ -1317,6 +1317,11 @@ void __init setup_bootmem_allocator(void
    10.4  		}
    10.5  	}
    10.6  #endif
    10.7 +#ifdef CONFIG_KEXEC
    10.8 +	if (crashk_res.start != crashk_res.end)
    10.9 +		reserve_bootmem(crashk_res.start,
   10.10 +			crashk_res.end - crashk_res.start + 1);
   10.11 +#endif
   10.12  
   10.13  	if (!xen_feature(XENFEAT_auto_translated_physmap))
   10.14  		phys_to_machine_mapping =
   10.15 @@ -1436,11 +1441,6 @@ legacy_init_iomem_resources(struct resou
   10.16  		}
   10.17  	}
   10.18  #endif
   10.19 -#ifdef CONFIG_KEXEC
   10.20 -	if (crashk_res.start != crashk_res.end)
   10.21 -		reserve_bootmem(crashk_res.start,
   10.22 -			crashk_res.end - crashk_res.start + 1);
   10.23 -#endif
   10.24  }
   10.25  
   10.26  /*
    11.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Thu Apr 06 14:22:52 2006 +0100
    11.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Fri Apr 07 11:52:00 2006 +0100
    11.3 @@ -177,6 +177,32 @@ int touch_pte_range(struct mm_struct *mm
    11.4  
    11.5  EXPORT_SYMBOL(touch_pte_range);
    11.6  
    11.7 +void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot)
    11.8 +{
    11.9 +	int error;
   11.10 +       
   11.11 +	struct vm_struct *vma;
   11.12 +	vma = get_vm_area (vm_size, VM_IOREMAP);
   11.13 +      
   11.14 +	if (vma == NULL) {
   11.15 +		printk ("ioremap.c,vm_map_xen_pages(): "
   11.16 +			"Failed to get VMA area\n");
   11.17 +		return NULL;
   11.18 +	}
   11.19 +
   11.20 +	error = direct_kernel_remap_pfn_range((unsigned long) vma->addr,
   11.21 +					      maddr >> PAGE_SHIFT, vm_size,
   11.22 +					      prot, DOMID_SELF );
   11.23 +	if (error == 0) {
   11.24 +		return vma->addr;
   11.25 +	} else {
   11.26 +		printk ("ioremap.c,vm_map_xen_pages(): "
   11.27 +			"Failed to map xen shared pages into kernel space\n");
   11.28 +		return NULL;
   11.29 +	}
   11.30 +}
   11.31 +EXPORT_SYMBOL(vm_map_xen_pages);
   11.32 +
   11.33  /*
   11.34   * Does @address reside within a non-highmem page that is local to this virtual
   11.35   * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile	Fri Apr 07 11:52:00 2006 +0100
    12.3 @@ -0,0 +1,16 @@
    12.4 +obj-$(CONFIG_OPROFILE) += oprofile.o
    12.5 +
    12.6 +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
    12.7 +		oprof.o cpu_buffer.o buffer_sync.o \
    12.8 +		event_buffer.o oprofile_files.o \
    12.9 +		oprofilefs.o oprofile_stats.o  \
   12.10 +		timer_int.o )
   12.11 +
   12.12 +ifdef CONFIG_XEN
   12.13 +oprofile-y				:= $(DRIVER_OBJS) xenoprof.o
   12.14 +else 
   12.15 +oprofile-y				:= $(DRIVER_OBJS) init.o backtrace.o
   12.16 +oprofile-$(CONFIG_X86_LOCAL_APIC) 	+= nmi_int.o op_model_athlon.o \
   12.17 +					   op_model_ppro.o op_model_p4.o
   12.18 +oprofile-$(CONFIG_X86_IO_APIC)		+= nmi_timer_int.o
   12.19 +endif
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c	Fri Apr 07 11:52:00 2006 +0100
    13.3 @@ -0,0 +1,395 @@
    13.4 +/**
    13.5 + * @file xenoprof.c
    13.6 + *
    13.7 + * @remark Copyright 2002 OProfile authors
    13.8 + * @remark Read the file COPYING
    13.9 + *
   13.10 + * @author John Levon <levon@movementarian.org>
   13.11 + *
   13.12 + * Modified by Aravind Menon and Jose Renato Santos for Xen
   13.13 + * These modifications are:
   13.14 + * Copyright (C) 2005 Hewlett-Packard Co.
   13.15 + */
   13.16 +
   13.17 +#include <linux/init.h>
   13.18 +#include <linux/notifier.h>
   13.19 +#include <linux/smp.h>
   13.20 +#include <linux/oprofile.h>
   13.21 +#include <linux/sysdev.h>
   13.22 +#include <linux/slab.h>
   13.23 +#include <linux/interrupt.h>
   13.24 +#include <linux/vmalloc.h>
   13.25 +#include <asm/nmi.h>
   13.26 +#include <asm/msr.h>
   13.27 +#include <asm/apic.h>
   13.28 +#include <asm/pgtable.h>
   13.29 +#include <xen/evtchn.h>
   13.30 +#include "op_counter.h"
   13.31 +
   13.32 +#include <xen/interface/xen.h>
   13.33 +#include <xen/interface/xenoprof.h>
   13.34 +
   13.35 +static int xenoprof_start(void);
   13.36 +static void xenoprof_stop(void);
   13.37 +
   13.38 +void * vm_map_xen_pages(unsigned long maddr, int vm_size, pgprot_t prot);
   13.39 +
   13.40 +static int xenoprof_enabled = 0;
   13.41 +static int num_events = 0;
   13.42 +static int is_primary = 0;
   13.43 +
   13.44 +/* sample buffers shared with Xen */
   13.45 +xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
   13.46 +/* Shared buffer area */
   13.47 +char * shared_buffer;
   13.48 +/* Number of buffers in shared area (one per VCPU) */
   13.49 +int nbuf;
   13.50 +/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
   13.51 +int ovf_irq[NR_CPUS];
   13.52 +/* cpu model type string - copied from Xen memory space on XENOPROF_init command */
   13.53 +char cpu_type[XENOPROF_CPU_TYPE_SIZE];
   13.54 +
   13.55 +#ifdef CONFIG_PM
   13.56 +
   13.57 +static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
   13.58 +{
   13.59 +	if (xenoprof_enabled == 1)
   13.60 +		xenoprof_stop();
   13.61 +	return 0;
   13.62 +}
   13.63 +
   13.64 +
   13.65 +static int xenoprof_resume(struct sys_device * dev)
   13.66 +{
   13.67 +	if (xenoprof_enabled == 1)
   13.68 +		xenoprof_start();
   13.69 +	return 0;
   13.70 +}
   13.71 +
   13.72 +
   13.73 +static struct sysdev_class oprofile_sysclass = {
   13.74 +	set_kset_name("oprofile"),
   13.75 +	.resume		= xenoprof_resume,
   13.76 +	.suspend	= xenoprof_suspend
   13.77 +};
   13.78 +
   13.79 +
   13.80 +static struct sys_device device_oprofile = {
   13.81 +	.id	= 0,
   13.82 +	.cls	= &oprofile_sysclass,
   13.83 +};
   13.84 +
   13.85 +
   13.86 +static int __init init_driverfs(void)
   13.87 +{
   13.88 +	int error;
   13.89 +	if (!(error = sysdev_class_register(&oprofile_sysclass)))
   13.90 +		error = sysdev_register(&device_oprofile);
   13.91 +	return error;
   13.92 +}
   13.93 +
   13.94 +
   13.95 +static void __exit exit_driverfs(void)
   13.96 +{
   13.97 +	sysdev_unregister(&device_oprofile);
   13.98 +	sysdev_class_unregister(&oprofile_sysclass);
   13.99 +}
  13.100 +
  13.101 +#else
  13.102 +#define init_driverfs() do { } while (0)
  13.103 +#define exit_driverfs() do { } while (0)
  13.104 +#endif /* CONFIG_PM */
  13.105 +
  13.106 +unsigned long long oprofile_samples = 0;
  13.107 +
  13.108 +static irqreturn_t 
  13.109 +xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
  13.110 +{
  13.111 +	int head, tail, size;
  13.112 +	xenoprof_buf_t * buf;
  13.113 +	int cpu;
  13.114 +
  13.115 +	cpu = smp_processor_id();
  13.116 +	buf = xenoprof_buf[cpu];
  13.117 +
  13.118 +	head = buf->event_head;
  13.119 +	tail = buf->event_tail;
  13.120 +	size = buf->event_size;
  13.121 +
  13.122 +	if (tail > head) {
  13.123 +		while (tail < size) {
  13.124 +			oprofile_add_pc(buf->event_log[tail].eip,
  13.125 +					buf->event_log[tail].mode,
  13.126 +					buf->event_log[tail].event);
  13.127 +			oprofile_samples++;
  13.128 +			tail++;
  13.129 +		}
  13.130 +		tail = 0;
  13.131 +	}
  13.132 +	while (tail < head) {
  13.133 +		oprofile_add_pc(buf->event_log[tail].eip,
  13.134 +				buf->event_log[tail].mode,
  13.135 +				buf->event_log[tail].event);
  13.136 +		oprofile_samples++;
  13.137 +		tail++;
  13.138 +	}
  13.139 +
  13.140 +	buf->event_tail = tail;
  13.141 +
  13.142 +	return IRQ_HANDLED;
  13.143 +}
  13.144 +
  13.145 +
  13.146 +static void unbind_virq_cpu(void * info)
  13.147 +{
  13.148 +	int cpu = smp_processor_id();
  13.149 +	if (ovf_irq[cpu] >= 0) {
  13.150 +		unbind_from_irqhandler(ovf_irq[cpu], NULL);
  13.151 +		ovf_irq[cpu] = -1;
  13.152 +	}
  13.153 +}
  13.154 +
  13.155 +
  13.156 +static void unbind_virq(void)
  13.157 +{
  13.158 +	on_each_cpu(unbind_virq_cpu, NULL, 0, 1);
  13.159 +}
  13.160 +
  13.161 +
  13.162 +int bind_virq_error;
  13.163 +
  13.164 +static void bind_virq_cpu(void * info)
  13.165 +{
  13.166 +	int result;
  13.167 +	int cpu = smp_processor_id();
  13.168 +
  13.169 +	result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
  13.170 +					 cpu,
  13.171 +					 xenoprof_ovf_interrupt,
  13.172 +					 SA_INTERRUPT,
  13.173 +					 "xenoprof",
  13.174 +					 NULL);
  13.175 +
  13.176 +	if (result<0) {
  13.177 +		bind_virq_error = result;
  13.178 +		printk("xenoprof.c: binding VIRQ_XENOPROF to IRQ failed on CPU "
  13.179 +		       "%d\n", cpu);
  13.180 +	} else {
  13.181 +		ovf_irq[cpu] = result;
  13.182 +	}
  13.183 +}
  13.184 +
  13.185 +
  13.186 +static int bind_virq(void)
  13.187 +{
  13.188 +	bind_virq_error = 0;
  13.189 +	on_each_cpu(bind_virq_cpu, NULL, 0, 1);
  13.190 +	if (bind_virq_error) {
  13.191 +		unbind_virq();
  13.192 +		return bind_virq_error;
  13.193 +	} else {
  13.194 +		return 0;
  13.195 +	}
  13.196 +}
  13.197 +
  13.198 +
  13.199 +static int xenoprof_setup(void)
  13.200 +{
  13.201 +	int ret;
  13.202 +
  13.203 +	ret = bind_virq();
  13.204 +	if (ret)
  13.205 +		return ret;
  13.206 +
  13.207 +	if (is_primary) {
  13.208 +		ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters,
  13.209 +					     (unsigned long)NULL,
  13.210 +					     (unsigned long)NULL);
  13.211 +		if (ret)
  13.212 +			goto err;
  13.213 +
  13.214 +		ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events,
  13.215 +					     (unsigned long)&counter_config,
  13.216 +					     (unsigned long)num_events);
  13.217 +		if (ret)
  13.218 +			goto err;
  13.219 +	}
  13.220 +
  13.221 +	ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq,
  13.222 +				     (unsigned long)NULL,
  13.223 +				     (unsigned long)NULL);
  13.224 +	if (ret)
  13.225 +		goto err;
  13.226 +
  13.227 +	xenoprof_enabled = 1;
  13.228 +	return 0;
  13.229 + err:
  13.230 +	unbind_virq();
  13.231 +	return ret;
  13.232 +}
  13.233 +
  13.234 +
  13.235 +static void xenoprof_shutdown(void)
  13.236 +{
  13.237 +	xenoprof_enabled = 0;
  13.238 +
  13.239 +	HYPERVISOR_xenoprof_op(XENOPROF_disable_virq,
  13.240 +			       (unsigned long)NULL,
  13.241 +			       (unsigned long)NULL);
  13.242 +
  13.243 +	if (is_primary) {
  13.244 +		HYPERVISOR_xenoprof_op(XENOPROF_release_counters,
  13.245 +				       (unsigned long)NULL,
  13.246 +				       (unsigned long)NULL);
  13.247 +	}
  13.248 +
  13.249 +	unbind_virq();
  13.250 +}
  13.251 +
  13.252 +
  13.253 +static int xenoprof_start(void)
  13.254 +{
  13.255 +	int ret = 0;
  13.256 +
  13.257 +	if (is_primary)
  13.258 +		ret = HYPERVISOR_xenoprof_op(XENOPROF_start,
  13.259 +					     (unsigned long)NULL,
  13.260 +					     (unsigned long)NULL);
  13.261 +	return ret;
  13.262 +}
  13.263 +
  13.264 +
  13.265 +static void xenoprof_stop(void)
  13.266 +{
  13.267 +	if (is_primary)
  13.268 +		HYPERVISOR_xenoprof_op(XENOPROF_stop,
  13.269 +				       (unsigned long)NULL,
  13.270 +				       (unsigned long)NULL);
  13.271 +}
  13.272 +
  13.273 +
  13.274 +static int xenoprof_set_active(int * active_domains,
  13.275 +			  unsigned int adomains)
  13.276 +{
  13.277 +	int ret = 0;
  13.278 +	if (is_primary)
  13.279 +		ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active,
  13.280 +					     (unsigned long)active_domains,
  13.281 +					     (unsigned long)adomains);
  13.282 +	return ret;
  13.283 +}
  13.284 +
  13.285 +
  13.286 +struct op_counter_config counter_config[OP_MAX_COUNTER];
  13.287 +
  13.288 +static int xenoprof_create_files(struct super_block * sb, struct dentry * root)
  13.289 +{
  13.290 +	unsigned int i;
  13.291 +
  13.292 +	for (i = 0; i < num_events; ++i) {
  13.293 +		struct dentry * dir;
  13.294 +		char buf[2];
  13.295 + 
  13.296 +		snprintf(buf, 2, "%d", i);
  13.297 +		dir = oprofilefs_mkdir(sb, root, buf);
  13.298 +		oprofilefs_create_ulong(sb, dir, "enabled",
  13.299 +					&counter_config[i].enabled);
  13.300 +		oprofilefs_create_ulong(sb, dir, "event",
  13.301 +					&counter_config[i].event);
  13.302 +		oprofilefs_create_ulong(sb, dir, "count",
  13.303 +					&counter_config[i].count);
  13.304 +		oprofilefs_create_ulong(sb, dir, "unit_mask",
  13.305 +					&counter_config[i].unit_mask);
  13.306 +		oprofilefs_create_ulong(sb, dir, "kernel",
  13.307 +					&counter_config[i].kernel);
  13.308 +		oprofilefs_create_ulong(sb, dir, "user",
  13.309 +					&counter_config[i].user);
  13.310 +	}
  13.311 +
  13.312 +	return 0;
  13.313 +}
  13.314 +
  13.315 +
  13.316 +struct oprofile_operations xenoprof_ops = {
  13.317 +	.create_files 	= xenoprof_create_files,
  13.318 +	.set_active	= xenoprof_set_active,
  13.319 +	.setup 		= xenoprof_setup,
  13.320 +	.shutdown	= xenoprof_shutdown,
  13.321 +	.start		= xenoprof_start,
  13.322 +	.stop		= xenoprof_stop
  13.323 +};
  13.324 +
  13.325 +
  13.326 +/* in order to get driverfs right */
  13.327 +static int using_xenoprof;
  13.328 +
  13.329 +int __init oprofile_arch_init(struct oprofile_operations * ops)
  13.330 +{
  13.331 +	xenoprof_init_result_t result;
  13.332 +	xenoprof_buf_t * buf;
  13.333 +	int max_samples = 16;
  13.334 +	int vm_size;
  13.335 +	int npages;
  13.336 +	int i;
  13.337 +
  13.338 +	int ret = HYPERVISOR_xenoprof_op(XENOPROF_init,
  13.339 +					 (unsigned long)max_samples,
  13.340 +					 (unsigned long)&result);
  13.341 +
  13.342 +	if (!ret) {
  13.343 +		pgprot_t prot = __pgprot(_KERNPG_TABLE);
  13.344 +
  13.345 +		num_events = result.num_events;
  13.346 +		is_primary = result.is_primary;
  13.347 +		nbuf = result.nbuf;
  13.348 +
  13.349 +		npages = (result.bufsize * nbuf - 1) / PAGE_SIZE + 1;
  13.350 +		vm_size = npages * PAGE_SIZE;
  13.351 +
  13.352 +		shared_buffer = (char *) vm_map_xen_pages(result.buf_maddr,
  13.353 +							  vm_size, prot);
  13.354 +		if (!shared_buffer) {
  13.355 +			ret = -ENOMEM;
  13.356 +			goto out;
  13.357 +		}
  13.358 +
  13.359 +		for (i=0; i< nbuf; i++) {
  13.360 +			buf = (xenoprof_buf_t*) 
  13.361 +				&shared_buffer[i * result.bufsize];
  13.362 +			BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
  13.363 +			xenoprof_buf[buf->vcpu_id] = buf;
  13.364 +		}
  13.365 +
  13.366 +		/*  cpu_type is detected by Xen */
  13.367 +		cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
  13.368 +		strncpy(cpu_type, result.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
  13.369 +		xenoprof_ops.cpu_type = cpu_type;
  13.370 +
  13.371 +		init_driverfs();
  13.372 +		using_xenoprof = 1;
  13.373 +		*ops = xenoprof_ops;
  13.374 +
  13.375 +		for (i=0; i<NR_CPUS; i++)
  13.376 +			ovf_irq[i] = -1;
  13.377 +	}
  13.378 + out:
  13.379 +	printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, "
  13.380 +	       "is_primary %d\n", ret, num_events, is_primary);
  13.381 +	return ret;
  13.382 +}
  13.383 +
  13.384 +
  13.385 +void __exit oprofile_arch_exit(void)
  13.386 +{
  13.387 +	if (using_xenoprof)
  13.388 +		exit_driverfs();
  13.389 +
  13.390 +	if (shared_buffer) {
  13.391 +		vunmap(shared_buffer);
  13.392 +		shared_buffer = NULL;
  13.393 +	}
  13.394 +	if (is_primary)
  13.395 +		HYPERVISOR_xenoprof_op(XENOPROF_shutdown,
  13.396 +				       (unsigned long)NULL,
  13.397 +				       (unsigned long)NULL);
  13.398 +}
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile	Fri Apr 07 11:52:00 2006 +0100
    14.3 @@ -0,0 +1,22 @@
    14.4 +#
    14.5 +# oprofile for x86-64.
    14.6 +# Just reuse the one from i386. 
    14.7 +#
    14.8 +
    14.9 +obj-$(CONFIG_OPROFILE) += oprofile.o
   14.10 + 
   14.11 +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
   14.12 +	oprof.o cpu_buffer.o buffer_sync.o \
   14.13 +	event_buffer.o oprofile_files.o \
   14.14 +	oprofilefs.o oprofile_stats.o \
   14.15 +	timer_int.o )
   14.16 +
   14.17 +ifdef CONFIG_XEN
   14.18 +OPROFILE-y := xenoprof.o
   14.19 +else
   14.20 +OPROFILE-y := init.o backtrace.o
   14.21 +OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o op_model_p4.o \
   14.22 +				     op_model_ppro.o
   14.23 +OPROFILE-$(CONFIG_X86_IO_APIC)    += nmi_timer_int.o 
   14.24 +endif
   14.25 +oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y))
    15.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Thu Apr 06 14:22:52 2006 +0100
    15.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Fri Apr 07 11:52:00 2006 +0100
    15.3 @@ -215,52 +215,26 @@ static void print_stats(blkif_t *blkif)
    15.4  
    15.5  int blkif_schedule(void *arg)
    15.6  {
    15.7 -	blkif_t          *blkif = arg;
    15.8 +	blkif_t *blkif = arg;
    15.9  
   15.10  	blkif_get(blkif);
   15.11 +
   15.12  	if (debug_lvl)
   15.13  		printk(KERN_DEBUG "%s: started\n", current->comm);
   15.14 -	for (;;) {
   15.15 -		if (kthread_should_stop()) {
   15.16 -			/* asked to quit? */
   15.17 -			if (!atomic_read(&blkif->io_pending))
   15.18 -				break;
   15.19 -			if (debug_lvl)
   15.20 -				printk(KERN_DEBUG "%s: I/O pending, "
   15.21 -				       "delaying exit\n", current->comm);
   15.22 -		}
   15.23  
   15.24 -		if (!atomic_read(&blkif->io_pending)) {
   15.25 -			/* Wait for work to do. */
   15.26 -			wait_event_interruptible(
   15.27 -				blkif->wq,
   15.28 -				(atomic_read(&blkif->io_pending) ||
   15.29 -				 kthread_should_stop()));
   15.30 -		} else if (list_empty(&pending_free)) {
   15.31 -			/* Wait for pending_req becoming available. */
   15.32 -			wait_event_interruptible(
   15.33 -				pending_free_wq,
   15.34 -				!list_empty(&pending_free));
   15.35 -		}
   15.36 +	while (!kthread_should_stop()) {
   15.37 +		wait_event_interruptible(
   15.38 +			blkif->wq,
   15.39 +			blkif->waiting_reqs || kthread_should_stop());
   15.40 +		wait_event_interruptible(
   15.41 +			pending_free_wq,
   15.42 +			!list_empty(&pending_free) || kthread_should_stop());
   15.43  
   15.44 -		if (blkif->status != CONNECTED) {
   15.45 -			/* make sure we are connected */
   15.46 -			if (debug_lvl)
   15.47 -				printk(KERN_DEBUG "%s: not connected "
   15.48 -				       "(%d pending)\n",
   15.49 -				       current->comm,
   15.50 -				       atomic_read(&blkif->io_pending));
   15.51 -			wait_event_interruptible(
   15.52 -				blkif->wq,
   15.53 -				(blkif->status == CONNECTED ||
   15.54 -				 kthread_should_stop()));
   15.55 -			continue;
   15.56 -		}
   15.57 +		blkif->waiting_reqs = 0;
   15.58 +		smp_mb(); /* clear flag *before* checking for work */
   15.59  
   15.60 -		/* Schedule I/O */
   15.61 -		atomic_set(&blkif->io_pending, 0);
   15.62  		if (do_block_io_op(blkif))
   15.63 -			atomic_inc(&blkif->io_pending);
   15.64 +			blkif->waiting_reqs = 1;
   15.65  		unplug_queue(blkif);
   15.66  
   15.67  		if (log_stats && time_after(jiffies, blkif->st_print))
   15.68 @@ -271,8 +245,10 @@ int blkif_schedule(void *arg)
   15.69  		print_stats(blkif);
   15.70  	if (debug_lvl)
   15.71  		printk(KERN_DEBUG "%s: exiting\n", current->comm);
   15.72 +
   15.73  	blkif->xenblkd = NULL;
   15.74  	blkif_put(blkif);
   15.75 +
   15.76  	return 0;
   15.77  }
   15.78  
   15.79 @@ -311,12 +287,15 @@ static int end_block_io_op(struct bio *b
   15.80   * NOTIFICATION FROM GUEST OS.
   15.81   */
   15.82  
   15.83 +static void blkif_notify_work(blkif_t *blkif)
   15.84 +{
   15.85 +	blkif->waiting_reqs = 1;
   15.86 +	wake_up(&blkif->wq);
   15.87 +}
   15.88 +
   15.89  irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
   15.90  {
   15.91 -	blkif_t *blkif = dev_id;
   15.92 -
   15.93 -	atomic_inc(&blkif->io_pending);
   15.94 -	wake_up(&blkif->wq);
   15.95 +	blkif_notify_work(dev_id);
   15.96  	return IRQ_HANDLED;
   15.97  }
   15.98  
   15.99 @@ -536,10 +515,8 @@ static void make_response(blkif_t *blkif
  15.100  	}
  15.101  	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
  15.102  
  15.103 -	if (more_to_do) {
  15.104 -		atomic_inc(&blkif->io_pending);
  15.105 -		wake_up(&blkif->wq);
  15.106 -	}
  15.107 +	if (more_to_do)
  15.108 +		blkif_notify_work(blkif);
  15.109  	if (notify)
  15.110  		notify_remote_via_irq(blkif->irq);
  15.111  }
    16.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h	Thu Apr 06 14:22:52 2006 +0100
    16.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h	Fri Apr 07 11:52:00 2006 +0100
    16.3 @@ -72,7 +72,6 @@ typedef struct blkif_st {
    16.4  	/* Back pointer to the backend_info. */
    16.5  	struct backend_info *be; 
    16.6  	/* Private fields. */
    16.7 -	enum { DISCONNECTED, CONNECTED } status;
    16.8  #ifdef CONFIG_XEN_BLKDEV_TAP_BE
    16.9  	/* Is this a blktap frontend */
   16.10  	unsigned int     is_blktap;
   16.11 @@ -82,7 +81,7 @@ typedef struct blkif_st {
   16.12  
   16.13  	wait_queue_head_t   wq;
   16.14  	struct task_struct  *xenblkd;
   16.15 -	atomic_t            io_pending;
   16.16 +	unsigned int        waiting_reqs;
   16.17  	request_queue_t     *plug;
   16.18  
   16.19  	/* statistics */
   16.20 @@ -133,8 +132,6 @@ void blkif_xenbus_init(void);
   16.21  irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
   16.22  int blkif_schedule(void *arg);
   16.23  
   16.24 -void update_blkif_status(blkif_t *blkif); 
   16.25 -
   16.26  #endif /* __BLKIF__BACKEND__COMMON_H__ */
   16.27  
   16.28  /*
    17.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c	Thu Apr 06 14:22:52 2006 +0100
    17.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c	Fri Apr 07 11:52:00 2006 +0100
    17.3 @@ -45,7 +45,6 @@ blkif_t *alloc_blkif(domid_t domid)
    17.4  
    17.5  	memset(blkif, 0, sizeof(*blkif));
    17.6  	blkif->domid = domid;
    17.7 -	blkif->status = DISCONNECTED;
    17.8  	spin_lock_init(&blkif->blk_ring_lock);
    17.9  	atomic_set(&blkif->refcnt, 1);
   17.10  	init_waitqueue_head(&blkif->wq);
   17.11 @@ -138,9 +137,6 @@ int blkif_map(blkif_t *blkif, unsigned l
   17.12  	blkif->irq = bind_evtchn_to_irqhandler(
   17.13  		blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
   17.14  
   17.15 -	/* We're potentially connected now */
   17.16 -	update_blkif_status(blkif); 
   17.17 -
   17.18  	return 0;
   17.19  }
   17.20  
    18.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Thu Apr 06 14:22:52 2006 +0100
    18.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Fri Apr 07 11:52:00 2006 +0100
    18.3 @@ -17,7 +17,6 @@
    18.4      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    18.5  */
    18.6  
    18.7 -
    18.8  #include <stdarg.h>
    18.9  #include <linux/module.h>
   18.10  #include <linux/kthread.h>
   18.11 @@ -25,36 +24,52 @@
   18.12  #include "common.h"
   18.13  
   18.14  #undef DPRINTK
   18.15 -#define DPRINTK(fmt, args...) \
   18.16 -    pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
   18.17 -
   18.18 +#define DPRINTK(fmt, args...)				\
   18.19 +	pr_debug("blkback/xenbus (%s:%d) " fmt ".\n",	\
   18.20 +		 __FUNCTION__, __LINE__, ##args)
   18.21  
   18.22  struct backend_info
   18.23  {
   18.24  	struct xenbus_device *dev;
   18.25  	blkif_t *blkif;
   18.26  	struct xenbus_watch backend_watch;
   18.27 -
   18.28  	unsigned major;
   18.29  	unsigned minor;
   18.30  	char *mode;
   18.31  };
   18.32  
   18.33 -
   18.34 -static void maybe_connect(struct backend_info *);
   18.35  static void connect(struct backend_info *);
   18.36  static int connect_ring(struct backend_info *);
   18.37  static void backend_changed(struct xenbus_watch *, const char **,
   18.38  			    unsigned int);
   18.39  
   18.40  
   18.41 -void update_blkif_status(blkif_t *blkif)
   18.42 +static void update_blkif_status(blkif_t *blkif)
   18.43  { 
   18.44 -	if(blkif->irq && blkif->vbd.bdev) {
   18.45 -		blkif->status = CONNECTED; 
   18.46 -		(void)blkif_be_int(0, blkif, NULL); 
   18.47 +	int err;
   18.48 +
   18.49 +	/* Not ready to connect? */
   18.50 +	if (!blkif->irq || !blkif->vbd.bdev)
   18.51 +		return;
   18.52 +
   18.53 +	/* Already connected? */
   18.54 +	if (blkif->be->dev->state == XenbusStateConnected)
   18.55 +		return;
   18.56 +
   18.57 +	/* Attempt to connect: exit if we fail to. */
   18.58 +	connect(blkif->be);
   18.59 +	if (blkif->be->dev->state != XenbusStateConnected)
   18.60 +		return;
   18.61 +
   18.62 +	blkif->xenblkd = kthread_run(blkif_schedule, blkif,
   18.63 +				     "xvd %d %02x:%02x",
   18.64 +				     blkif->domid,
   18.65 +				     blkif->be->major, blkif->be->minor);
   18.66 +	if (IS_ERR(blkif->xenblkd)) {
   18.67 +		err = PTR_ERR(blkif->xenblkd);
   18.68 +		blkif->xenblkd = NULL;
   18.69 +		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
   18.70  	}
   18.71 -	maybe_connect(blkif->be); 
   18.72  }
   18.73  
   18.74  
   18.75 @@ -91,7 +106,6 @@ static int blkback_remove(struct xenbus_
   18.76  		be->backend_watch.node = NULL;
   18.77  	}
   18.78  	if (be->blkif) {
   18.79 -		be->blkif->status = DISCONNECTED; 
   18.80  		if (be->blkif->xenblkd)
   18.81  			kthread_stop(be->blkif->xenblkd);
   18.82  		blkif_put(be->blkif);
   18.83 @@ -185,8 +199,8 @@ static void backend_changed(struct xenbu
   18.84  		return;
   18.85  	}
   18.86  
   18.87 -	if (be->major && be->minor &&
   18.88 -	    (be->major != major || be->minor != minor)) {
   18.89 +	if ((be->major || be->minor) &&
   18.90 +	    ((be->major != major) || (be->minor != minor))) {
   18.91  		printk(KERN_WARNING
   18.92  		       "blkback: changing physical device (from %x:%x to "
   18.93  		       "%x:%x) not supported.\n", be->major, be->minor,
   18.94 @@ -220,17 +234,6 @@ static void backend_changed(struct xenbu
   18.95  			return;
   18.96  		}
   18.97  
   18.98 -		be->blkif->xenblkd = kthread_run(blkif_schedule, be->blkif,
   18.99 -						 "xvd %d %02x:%02x",
  18.100 -						 be->blkif->domid,
  18.101 -						 be->major, be->minor);
  18.102 -		if (IS_ERR(be->blkif->xenblkd)) {
  18.103 -			err = PTR_ERR(be->blkif->xenblkd);
  18.104 -			be->blkif->xenblkd = NULL;
  18.105 -			xenbus_dev_error(dev, err, "start xenblkd");
  18.106 -			return;
  18.107 -		}
  18.108 -
  18.109  		device_create_file(&dev->dev, &dev_attr_physical_device);
  18.110  		device_create_file(&dev->dev, &dev_attr_mode);
  18.111  
  18.112 @@ -290,14 +293,6 @@ static void frontend_changed(struct xenb
  18.113  /* ** Connection ** */
  18.114  
  18.115  
  18.116 -static void maybe_connect(struct backend_info *be)
  18.117 -{
  18.118 -	if ((be->major != 0 || be->minor != 0) &&
  18.119 -	    be->blkif->status == CONNECTED)
  18.120 -		connect(be);
  18.121 -}
  18.122 -
  18.123 -
  18.124  /**
  18.125   * Write the physical details regarding the block device to the store, and
  18.126   * switch to Connected state.
    19.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Thu Apr 06 14:22:52 2006 +0100
    19.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Fri Apr 07 11:52:00 2006 +0100
    19.3 @@ -85,6 +85,23 @@ void smp_resume(void);
    19.4  #define smp_resume()	((void)0)
    19.5  #endif
    19.6  
    19.7 +/* Ensure we run on the idle task page tables so that we will
    19.8 +   switch page tables before running user space. This is needed
    19.9 +   on architectures with separate kernel and user page tables
   19.10 +   because the user page table pointer is not saved/restored. */
   19.11 +static void switch_idle_mm(void)
   19.12 +{
   19.13 +	struct mm_struct *mm = current->active_mm;
   19.14 +
   19.15 +	if (mm == &init_mm)
   19.16 +		return;
   19.17 +
   19.18 +	atomic_inc(&init_mm.mm_count);
   19.19 +	switch_mm(mm, &init_mm, current);
   19.20 +	current->active_mm = &init_mm;
   19.21 +	mmdrop(mm);
   19.22 +}
   19.23 +
   19.24  static int __do_suspend(void *ignore)
   19.25  {
   19.26  	int i, j, k, fpp, err;
   19.27 @@ -164,6 +181,8 @@ static int __do_suspend(void *ignore)
   19.28  
   19.29  	time_resume();
   19.30  
   19.31 +	switch_idle_mm();
   19.32 +
   19.33  	__sti();
   19.34  
   19.35  	xencons_resume();
    20.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Thu Apr 06 14:22:52 2006 +0100
    20.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Fri Apr 07 11:52:00 2006 +0100
    20.3 @@ -301,9 +301,6 @@ static void net_rx_action(unsigned long 
    20.4  		netif   = netdev_priv(skb->dev);
    20.5  		size    = skb->tail - skb->data;
    20.6  
    20.7 -		/* Rederive the machine addresses. */
    20.8 -		new_mfn = mcl->args[1] >> PAGE_SHIFT;
    20.9 -		old_mfn = gop->mfn;
   20.10  		atomic_set(&(skb_shinfo(skb)->dataref), 1);
   20.11  		skb_shinfo(skb)->nr_frags = 0;
   20.12  		skb_shinfo(skb)->frag_list = NULL;
    21.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Thu Apr 06 14:22:52 2006 +0100
    21.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Fri Apr 07 11:52:00 2006 +0100
    21.3 @@ -993,8 +993,8 @@ static void network_connect(struct net_d
    21.4  	 * the RX ring because some of our pages are currently flipped out
    21.5  	 * so we can't just free the RX skbs.
    21.6  	 * NB2. Freelist index entries are always going to be less than
    21.7 -	 *  __PAGE_OFFSET, whereas pointers to skbs will always be equal or
    21.8 -	 * greater than __PAGE_OFFSET: we use this property to distinguish
    21.9 +	 *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
   21.10 +	 * greater than PAGE_OFFSET: we use this property to distinguish
   21.11  	 * them.
   21.12  	 */
   21.13  
   21.14 @@ -1005,7 +1005,7 @@ static void network_connect(struct net_d
   21.15  	 * interface has been down.
   21.16  	 */
   21.17  	for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
   21.18 -		if ((unsigned long)np->tx_skbs[i] < __PAGE_OFFSET)
   21.19 +		if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
   21.20  			continue;
   21.21  
   21.22  		skb = np->tx_skbs[i];
   21.23 @@ -1036,7 +1036,7 @@ static void network_connect(struct net_d
   21.24  
   21.25  	/* Rebuild the RX buffer freelist and the RX ring itself. */
   21.26  	for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) {
   21.27 -		if ((unsigned long)np->rx_skbs[i] < __PAGE_OFFSET)
   21.28 +		if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET)
   21.29  			continue;
   21.30  		gnttab_grant_foreign_transfer_ref(
   21.31  			np->grant_rx_ref[i], np->xbdev->otherend_id,
    22.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Thu Apr 06 14:22:52 2006 +0100
    22.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Fri Apr 07 11:52:00 2006 +0100
    22.3 @@ -277,6 +277,7 @@ static int __init privcmd_init(void)
    22.4  	set_bit(__HYPERVISOR_mmu_update,       hypercall_permission_map);
    22.5  	set_bit(__HYPERVISOR_mmuext_op,        hypercall_permission_map);
    22.6  	set_bit(__HYPERVISOR_xen_version,      hypercall_permission_map);
    22.7 +	set_bit(__HYPERVISOR_sched_op,         hypercall_permission_map);
    22.8  
    22.9  	privcmd_intf = create_xen_proc_entry("privcmd", 0400);
   22.10  	if (privcmd_intf != NULL)
    23.1 --- a/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c	Thu Apr 06 14:22:52 2006 +0100
    23.2 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c	Fri Apr 07 11:52:00 2006 +0100
    23.3 @@ -65,14 +65,18 @@ static irqreturn_t tpmif_int(int irq,
    23.4                               void *tpm_priv,
    23.5                               struct pt_regs *ptregs);
    23.6  static void tpmif_rx_action(unsigned long unused);
    23.7 -static void tpmif_connect(struct tpm_private *tp, domid_t domid);
    23.8 +static int tpmif_connect(struct xenbus_device *dev,
    23.9 +                         struct tpm_private *tp,
   23.10 +                         domid_t domid);
   23.11  static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
   23.12 -static int tpm_allocate_buffers(struct tpm_private *tp);
   23.13 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
   23.14 +static void tpmif_free_tx_buffers(struct tpm_private *tp);
   23.15  static void tpmif_set_connected_state(struct tpm_private *tp,
   23.16                                        u8 newstate);
   23.17  static int tpm_xmit(struct tpm_private *tp,
   23.18                      const u8 * buf, size_t count, int userbuffer,
   23.19                      void *remember);
   23.20 +static void destroy_tpmring(struct tpm_private *tp);
   23.21  
   23.22  #define DPRINTK(fmt, args...) \
   23.23      pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
   23.24 @@ -81,6 +85,8 @@ static int tpm_xmit(struct tpm_private *
   23.25  #define WPRINTK(fmt, args...) \
   23.26      printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
   23.27  
   23.28 +#define GRANT_INVALID_REF	0
   23.29 +
   23.30  
   23.31  static inline int
   23.32  tx_buffer_copy(struct tx_buffer *txb, const u8 * src, int len,
   23.33 @@ -119,6 +125,14 @@ static inline struct tx_buffer *tx_buffe
   23.34  }
   23.35  
   23.36  
   23.37 +static inline void tx_buffer_free(struct tx_buffer *txb)
   23.38 +{
   23.39 +	if (txb) {
   23.40 +		free_page((long)txb->data);
   23.41 +		kfree(txb);
   23.42 +	}
   23.43 +}
   23.44 +
   23.45  /**************************************************************
   23.46   Utility function for the tpm_private structure
   23.47  **************************************************************/
   23.48 @@ -128,23 +142,29 @@ static inline void tpm_private_init(stru
   23.49  	init_waitqueue_head(&tp->wait_q);
   23.50  }
   23.51  
   23.52 +static inline void tpm_private_free(void)
   23.53 +{
   23.54 +	tpmif_free_tx_buffers(my_priv);
   23.55 +	kfree(my_priv);
   23.56 +	my_priv = NULL;
   23.57 +}
   23.58 +
   23.59  static struct tpm_private *tpm_private_get(void)
   23.60  {
   23.61 +	int err;
   23.62  	if (!my_priv) {
   23.63  		my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
   23.64  		if (my_priv) {
   23.65  			tpm_private_init(my_priv);
   23.66 +			err = tpmif_allocate_tx_buffers(my_priv);
   23.67 +			if (err < 0) {
   23.68 +				tpm_private_free();
   23.69 +			}
   23.70  		}
   23.71  	}
   23.72  	return my_priv;
   23.73  }
   23.74  
   23.75 -static inline void tpm_private_free(void)
   23.76 -{
   23.77 -	kfree(my_priv);
   23.78 -	my_priv = NULL;
   23.79 -}
   23.80 -
   23.81  /**************************************************************
   23.82  
   23.83   The interface to let the tpm plugin register its callback
   23.84 @@ -233,6 +253,8 @@ static int setup_tpmring(struct xenbus_d
   23.85  	tpmif_tx_interface_t *sring;
   23.86  	int err;
   23.87  
   23.88 +	tp->ring_ref = GRANT_INVALID_REF;
   23.89 +
   23.90  	sring = (void *)__get_free_page(GFP_KERNEL);
   23.91  	if (!sring) {
   23.92  		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
   23.93 @@ -240,8 +262,6 @@ static int setup_tpmring(struct xenbus_d
   23.94  	}
   23.95  	tp->tx = sring;
   23.96  
   23.97 -	tpm_allocate_buffers(tp);
   23.98 -
   23.99  	err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
  23.100  	if (err < 0) {
  23.101  		free_page((unsigned long)sring);
  23.102 @@ -251,14 +271,13 @@ static int setup_tpmring(struct xenbus_d
  23.103  	}
  23.104  	tp->ring_ref = err;
  23.105  
  23.106 -	err = xenbus_alloc_evtchn(dev, &tp->evtchn);
  23.107 +	err = tpmif_connect(dev, tp, dev->otherend_id);
  23.108  	if (err)
  23.109  		goto fail;
  23.110  
  23.111 -	tpmif_connect(tp, dev->otherend_id);
  23.112 -
  23.113  	return 0;
  23.114  fail:
  23.115 +	destroy_tpmring(tp);
  23.116  	return err;
  23.117  }
  23.118  
  23.119 @@ -266,14 +285,17 @@ fail:
  23.120  static void destroy_tpmring(struct tpm_private *tp)
  23.121  {
  23.122  	tpmif_set_connected_state(tp, 0);
  23.123 -	if (tp->tx != NULL) {
  23.124 +
  23.125 +	if (tp->ring_ref != GRANT_INVALID_REF) {
  23.126  		gnttab_end_foreign_access(tp->ring_ref, 0,
  23.127  					  (unsigned long)tp->tx);
  23.128 +		tp->ring_ref = GRANT_INVALID_REF;
  23.129  		tp->tx = NULL;
  23.130  	}
  23.131  
  23.132  	if (tp->irq)
  23.133 -		unbind_from_irqhandler(tp->irq, NULL);
  23.134 +		unbind_from_irqhandler(tp->irq, tp);
  23.135 +
  23.136  	tp->evtchn = tp->irq = 0;
  23.137  }
  23.138  
  23.139 @@ -377,6 +399,9 @@ static int tpmfront_probe(struct xenbus_
  23.140  	int handle;
  23.141  	struct tpm_private *tp = tpm_private_get();
  23.142  
  23.143 +	if (!tp)
  23.144 +		return -ENOMEM;
  23.145 +
  23.146  	err = xenbus_scanf(XBT_NULL, dev->nodename,
  23.147  	                   "handle", "%i", &handle);
  23.148  	if (XENBUS_EXIST_ERR(err))
  23.149 @@ -402,15 +427,14 @@ static int tpmfront_probe(struct xenbus_
  23.150  
  23.151  static int tpmfront_remove(struct xenbus_device *dev)
  23.152  {
  23.153 -	struct tpm_private *tp = dev->data;
  23.154 +	struct tpm_private *tp = (struct tpm_private *)dev->data;
  23.155  	destroy_tpmring(tp);
  23.156  	return 0;
  23.157  }
  23.158  
  23.159 -static int
  23.160 -tpmfront_suspend(struct xenbus_device *dev)
  23.161 +static int tpmfront_suspend(struct xenbus_device *dev)
  23.162  {
  23.163 -	struct tpm_private *tp = dev->data;
  23.164 +	struct tpm_private *tp = (struct tpm_private *)dev->data;
  23.165  	u32 ctr;
  23.166  
  23.167  	/* lock, so no app can send */
  23.168 @@ -437,29 +461,35 @@ tpmfront_suspend(struct xenbus_device *d
  23.169  	return 0;
  23.170  }
  23.171  
  23.172 -static int
  23.173 -tpmfront_resume(struct xenbus_device *dev)
  23.174 +static int tpmfront_resume(struct xenbus_device *dev)
  23.175  {
  23.176 -	struct tpm_private *tp = dev->data;
  23.177 +	struct tpm_private *tp = (struct tpm_private *)dev->data;
  23.178 +	destroy_tpmring(tp);
  23.179  	return talk_to_backend(dev, tp);
  23.180  }
  23.181  
  23.182 -static void
  23.183 -tpmif_connect(struct tpm_private *tp, domid_t domid)
  23.184 +static int tpmif_connect(struct xenbus_device *dev,
  23.185 +                         struct tpm_private *tp,
  23.186 +                         domid_t domid)
  23.187  {
  23.188  	int err;
  23.189  
  23.190  	tp->backend_id = domid;
  23.191  
  23.192 +	err = xenbus_alloc_evtchn(dev, &tp->evtchn);
  23.193 +	if (err)
  23.194 +		return err;
  23.195 +
  23.196  	err = bind_evtchn_to_irqhandler(tp->evtchn,
  23.197  					tpmif_int, SA_SAMPLE_RANDOM, "tpmif",
  23.198  					tp);
  23.199  	if (err <= 0) {
  23.200  		WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
  23.201 -		return;
  23.202 +		return err;
  23.203  	}
  23.204  
  23.205  	tp->irq = err;
  23.206 +	return 0;
  23.207  }
  23.208  
  23.209  static struct xenbus_device_id tpmfront_ids[] = {
  23.210 @@ -488,19 +518,30 @@ static void __exit exit_tpm_xenbus(void)
  23.211  	xenbus_unregister_driver(&tpmfront);
  23.212  }
  23.213  
  23.214 -
  23.215 -static int
  23.216 -tpm_allocate_buffers(struct tpm_private *tp)
  23.217 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
  23.218  {
  23.219  	unsigned int i;
  23.220  
  23.221 -	for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
  23.222 +	for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
  23.223  		tp->tx_buffers[i] = tx_buffer_alloc();
  23.224 -	return 1;
  23.225 +		if (!tp->tx_buffers[i]) {
  23.226 +			tpmif_free_tx_buffers(tp);
  23.227 +			return -ENOMEM;
  23.228 +		}
  23.229 +	}
  23.230 +	return 0;
  23.231  }
  23.232  
  23.233 -static void
  23.234 -tpmif_rx_action(unsigned long priv)
  23.235 +static void tpmif_free_tx_buffers(struct tpm_private *tp)
  23.236 +{
  23.237 +	unsigned int i;
  23.238 +
  23.239 +	for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
  23.240 +		tx_buffer_free(tp->tx_buffers[i]);
  23.241 +	}
  23.242 +}
  23.243 +
  23.244 +static void tpmif_rx_action(unsigned long priv)
  23.245  {
  23.246  	struct tpm_private *tp = (struct tpm_private *)priv;
  23.247  
  23.248 @@ -545,8 +586,7 @@ exit:
  23.249  }
  23.250  
  23.251  
  23.252 -static irqreturn_t
  23.253 -tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
  23.254 +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
  23.255  {
  23.256  	struct tpm_private *tp = tpm_priv;
  23.257  	unsigned long flags;
  23.258 @@ -560,10 +600,9 @@ tpmif_int(int irq, void *tpm_priv, struc
  23.259  }
  23.260  
  23.261  
  23.262 -static int
  23.263 -tpm_xmit(struct tpm_private *tp,
  23.264 -         const u8 * buf, size_t count, int isuserbuffer,
  23.265 -         void *remember)
  23.266 +static int tpm_xmit(struct tpm_private *tp,
  23.267 +                    const u8 * buf, size_t count, int isuserbuffer,
  23.268 +                    void *remember)
  23.269  {
  23.270  	tpmif_tx_request_t *tx;
  23.271  	TPMIF_RING_IDX i;
  23.272 @@ -693,8 +732,7 @@ static void tpmif_set_connected_state(st
  23.273   * =================================================================
  23.274   */
  23.275  
  23.276 -static int __init
  23.277 -tpmif_init(void)
  23.278 +static int __init tpmif_init(void)
  23.279  {
  23.280  	IPRINTK("Initialising the vTPM driver.\n");
  23.281  	if ( gnttab_alloc_grant_references ( TPMIF_TX_RING_SIZE,
  23.282 @@ -709,8 +747,7 @@ tpmif_init(void)
  23.283  
  23.284  module_init(tpmif_init);
  23.285  
  23.286 -static void __exit
  23.287 -tpmif_exit(void)
  23.288 +static void __exit tpmif_exit(void)
  23.289  {
  23.290  	exit_tpm_xenbus();
  23.291  	gnttab_free_grant_references(gref_head);
    24.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Thu Apr 06 14:22:52 2006 +0100
    24.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Fri Apr 07 11:52:00 2006 +0100
    24.3 @@ -329,6 +329,21 @@ HYPERVISOR_nmi_op(
    24.4  	return _hypercall2(int, nmi_op, op, arg);
    24.5  }
    24.6  
    24.7 +static inline int
    24.8 +HYPERVISOR_callback_op(
    24.9 +	int cmd, void *arg)
   24.10 +{
   24.11 +	return _hypercall2(int, callback_op, cmd, arg);
   24.12 +}
   24.13 +
   24.14 +static inline int
   24.15 +HYPERVISOR_xenoprof_op(
   24.16 +	int op, unsigned long arg1, unsigned long arg2)
   24.17 +{
   24.18 +	return _hypercall3(int, xenoprof_op, op, arg1, arg2);
   24.19 +}
   24.20 +
   24.21 +
   24.22  #endif /* __HYPERCALL_H__ */
   24.23  
   24.24  /*
    25.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h	Thu Apr 06 14:22:52 2006 +0100
    25.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h	Fri Apr 07 11:52:00 2006 +0100
    25.3 @@ -6,6 +6,8 @@
    25.4   *	use of all of the static functions.
    25.5   **/
    25.6  
    25.7 +#include <xen/interface/callback.h>
    25.8 +
    25.9  static char * __init machine_specific_memory_setup(void)
   25.10  {
   25.11  	unsigned long max_pfn = xen_start_info->nr_pages;
   25.12 @@ -23,6 +25,14 @@ extern void nmi(void);
   25.13  static void __init machine_specific_arch_setup(void)
   25.14  {
   25.15  	struct xen_platform_parameters pp;
   25.16 +	struct callback_register event = {
   25.17 +		.type = CALLBACKTYPE_event,
   25.18 +		.address = { __KERNEL_CS, (unsigned long)hypervisor_callback },
   25.19 +	};
   25.20 +	struct callback_register failsafe = {
   25.21 +		.type = CALLBACKTYPE_failsafe,
   25.22 +		.address = { __KERNEL_CS, (unsigned long)failsafe_callback },
   25.23 +	};
   25.24  	struct xennmi_callback cb;
   25.25  
   25.26  	if (xen_feature(XENFEAT_auto_translated_physmap) &&
   25.27 @@ -32,9 +42,8 @@ static void __init machine_specific_arch
   25.28  		memset(empty_zero_page, 0, sizeof(empty_zero_page));
   25.29  	}
   25.30  
   25.31 -	HYPERVISOR_set_callbacks(
   25.32 -	    __KERNEL_CS, (unsigned long)hypervisor_callback,
   25.33 -	    __KERNEL_CS, (unsigned long)failsafe_callback);
   25.34 +	HYPERVISOR_callback_op(CALLBACKOP_register, &event);
   25.35 +	HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
   25.36  
   25.37  	cb.handler_address = (unsigned long)&nmi;
   25.38  	HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
    26.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Thu Apr 06 14:22:52 2006 +0100
    26.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Fri Apr 07 11:52:00 2006 +0100
    26.3 @@ -330,6 +330,20 @@ HYPERVISOR_nmi_op(
    26.4  	return _hypercall2(int, nmi_op, op, arg);
    26.5  }
    26.6  
    26.7 +static inline int
    26.8 +HYPERVISOR_callback_op(
    26.9 +	int cmd, void *arg)
   26.10 +{
   26.11 +	return _hypercall2(int, callback_op, cmd, arg);
   26.12 +}
   26.13 +
   26.14 +static inline int
   26.15 +HYPERVISOR_xenoprof_op(
   26.16 +	int op, unsigned long arg1, unsigned long arg2)
   26.17 +{
   26.18 +	return _hypercall3(int, xenoprof_op, op, arg1, arg2);
   26.19 +}
   26.20 +
   26.21  #endif /* __HYPERCALL_H__ */
   26.22  
   26.23  /*
    27.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h	Thu Apr 06 14:22:52 2006 +0100
    27.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h	Fri Apr 07 11:52:00 2006 +0100
    27.3 @@ -6,20 +6,33 @@
    27.4   *	use of all of the static functions.
    27.5   **/
    27.6  
    27.7 +#include <xen/interface/callback.h>
    27.8 +
    27.9  extern void hypervisor_callback(void);
   27.10  extern void failsafe_callback(void);
   27.11  extern void nmi(void);
   27.12  
   27.13  static void __init machine_specific_arch_setup(void)
   27.14  {
   27.15 +	struct callback_register event = {
   27.16 +		.type = CALLBACKTYPE_event,
   27.17 +		.address = (unsigned long) hypervisor_callback,
   27.18 +	};
   27.19 +	struct callback_register failsafe = {
   27.20 +		.type = CALLBACKTYPE_failsafe,
   27.21 +		.address = (unsigned long)failsafe_callback,
   27.22 +	};
   27.23 +	struct callback_register syscall = {
   27.24 +		.type = CALLBACKTYPE_syscall,
   27.25 +		.address = (unsigned long)system_call,
   27.26 +	};
   27.27  #ifdef CONFIG_X86_LOCAL_APIC
   27.28  	struct xennmi_callback cb;
   27.29  #endif
   27.30  
   27.31 -	HYPERVISOR_set_callbacks(
   27.32 -                (unsigned long) hypervisor_callback,
   27.33 -                (unsigned long) failsafe_callback,
   27.34 -                (unsigned long) system_call);
   27.35 +	HYPERVISOR_callback_op(CALLBACKOP_register, &event);
   27.36 +	HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
   27.37 +	HYPERVISOR_callback_op(CALLBACKOP_register, &syscall);
   27.38  
   27.39  #ifdef CONFIG_X86_LOCAL_APIC
   27.40  	cb.handler_address = (unsigned long)&nmi;
    28.1 --- a/linux-2.6-xen-sparse/include/linux/irq.h	Thu Apr 06 14:22:52 2006 +0100
    28.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.3 @@ -1,244 +0,0 @@
    28.4 -#ifndef __irq_h
    28.5 -#define __irq_h
    28.6 -
    28.7 -/*
    28.8 - * Please do not include this file in generic code.  There is currently
    28.9 - * no requirement for any architecture to implement anything held
   28.10 - * within this file.
   28.11 - *
   28.12 - * Thanks. --rmk
   28.13 - */
   28.14 -
   28.15 -#include <linux/config.h>
   28.16 -#include <linux/smp.h>
   28.17 -
   28.18 -#if !defined(CONFIG_S390)
   28.19 -
   28.20 -#include <linux/linkage.h>
   28.21 -#include <linux/cache.h>
   28.22 -#include <linux/spinlock.h>
   28.23 -#include <linux/cpumask.h>
   28.24 -
   28.25 -#include <asm/irq.h>
   28.26 -#include <asm/ptrace.h>
   28.27 -
   28.28 -/*
   28.29 - * IRQ line status.
   28.30 - */
   28.31 -#define IRQ_INPROGRESS	1	/* IRQ handler active - do not enter! */
   28.32 -#define IRQ_DISABLED	2	/* IRQ disabled - do not enter! */
   28.33 -#define IRQ_PENDING	4	/* IRQ pending - replay on enable */
   28.34 -#define IRQ_REPLAY	8	/* IRQ has been replayed but not acked yet */
   28.35 -#define IRQ_AUTODETECT	16	/* IRQ is being autodetected */
   28.36 -#define IRQ_WAITING	32	/* IRQ not yet seen - for autodetection */
   28.37 -#define IRQ_LEVEL	64	/* IRQ level triggered */
   28.38 -#define IRQ_MASKED	128	/* IRQ masked - shouldn't be seen again */
   28.39 -#if defined(ARCH_HAS_IRQ_PER_CPU)
   28.40 -# define IRQ_PER_CPU	256	/* IRQ is per CPU */
   28.41 -# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
   28.42 -#else
   28.43 -# define CHECK_IRQ_PER_CPU(var) 0
   28.44 -#endif
   28.45 -
   28.46 -/*
   28.47 - * Interrupt controller descriptor. This is all we need
   28.48 - * to describe about the low-level hardware. 
   28.49 - */
   28.50 -struct hw_interrupt_type {
   28.51 -	const char * typename;
   28.52 -	unsigned int (*startup)(unsigned int irq);
   28.53 -	void (*shutdown)(unsigned int irq);
   28.54 -	void (*enable)(unsigned int irq);
   28.55 -	void (*disable)(unsigned int irq);
   28.56 -	void (*ack)(unsigned int irq);
   28.57 -	void (*end)(unsigned int irq);
   28.58 -	void (*set_affinity)(unsigned int irq, cpumask_t dest);
   28.59 -	/* Currently used only by UML, might disappear one day.*/
   28.60 -#ifdef CONFIG_IRQ_RELEASE_METHOD
   28.61 -	void (*release)(unsigned int irq, void *dev_id);
   28.62 -#endif
   28.63 -};
   28.64 -
   28.65 -typedef struct hw_interrupt_type  hw_irq_controller;
   28.66 -
   28.67 -/*
   28.68 - * This is the "IRQ descriptor", which contains various information
   28.69 - * about the irq, including what kind of hardware handling it has,
   28.70 - * whether it is disabled etc etc.
   28.71 - *
   28.72 - * Pad this out to 32 bytes for cache and indexing reasons.
   28.73 - */
   28.74 -typedef struct irq_desc {
   28.75 -	hw_irq_controller *handler;
   28.76 -	void *handler_data;
   28.77 -	struct irqaction *action;	/* IRQ action list */
   28.78 -	unsigned int status;		/* IRQ status */
   28.79 -	unsigned int depth;		/* nested irq disables */
   28.80 -	unsigned int irq_count;		/* For detecting broken interrupts */
   28.81 -	unsigned int irqs_unhandled;
   28.82 -	spinlock_t lock;
   28.83 -#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
   28.84 -	unsigned int move_irq;		/* Flag need to re-target intr dest*/
   28.85 -#endif
   28.86 -} ____cacheline_aligned irq_desc_t;
   28.87 -
   28.88 -extern irq_desc_t irq_desc [NR_IRQS];
   28.89 -
   28.90 -/* Return a pointer to the irq descriptor for IRQ.  */
   28.91 -static inline irq_desc_t *
   28.92 -irq_descp (int irq)
   28.93 -{
   28.94 -	return irq_desc + irq;
   28.95 -}
   28.96 -
   28.97 -#include <asm/hw_irq.h> /* the arch dependent stuff */
   28.98 -
   28.99 -extern int setup_irq(unsigned int irq, struct irqaction * new);
  28.100 -#ifdef CONFIG_XEN
  28.101 -extern int teardown_irq(unsigned int irq, struct irqaction * old);
  28.102 -#endif
  28.103 -
  28.104 -#ifdef CONFIG_GENERIC_HARDIRQS
  28.105 -extern cpumask_t irq_affinity[NR_IRQS];
  28.106 -
  28.107 -#ifdef CONFIG_SMP
  28.108 -static inline void set_native_irq_info(int irq, cpumask_t mask)
  28.109 -{
  28.110 -	irq_affinity[irq] = mask;
  28.111 -}
  28.112 -#else
  28.113 -static inline void set_native_irq_info(int irq, cpumask_t mask)
  28.114 -{
  28.115 -}
  28.116 -#endif
  28.117 -
  28.118 -#ifdef CONFIG_SMP
  28.119 -
  28.120 -#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
  28.121 -extern cpumask_t pending_irq_cpumask[NR_IRQS];
  28.122 -
  28.123 -static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
  28.124 -{
  28.125 -	irq_desc_t *desc = irq_desc + irq;
  28.126 -	unsigned long flags;
  28.127 -
  28.128 -	spin_lock_irqsave(&desc->lock, flags);
  28.129 -	desc->move_irq = 1;
  28.130 -	pending_irq_cpumask[irq] = mask;
  28.131 -	spin_unlock_irqrestore(&desc->lock, flags);
  28.132 -}
  28.133 -
  28.134 -static inline void
  28.135 -move_native_irq(int irq)
  28.136 -{
  28.137 -	cpumask_t tmp;
  28.138 -	irq_desc_t *desc = irq_descp(irq);
  28.139 -
  28.140 -	if (likely (!desc->move_irq))
  28.141 -		return;
  28.142 -
  28.143 -	desc->move_irq = 0;
  28.144 -
  28.145 -	if (likely(cpus_empty(pending_irq_cpumask[irq])))
  28.146 -		return;
  28.147 -
  28.148 -	if (!desc->handler->set_affinity)
  28.149 -		return;
  28.150 -
  28.151 -	/* note - we hold the desc->lock */
  28.152 -	cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
  28.153 -
  28.154 -	/*
  28.155 -	 * If there was a valid mask to work with, please
  28.156 -	 * do the disable, re-program, enable sequence.
  28.157 -	 * This is *not* particularly important for level triggered
  28.158 -	 * but in a edge trigger case, we might be setting rte
  28.159 -	 * when an active trigger is comming in. This could
  28.160 -	 * cause some ioapics to mal-function.
  28.161 -	 * Being paranoid i guess!
  28.162 -	 */
  28.163 -	if (unlikely(!cpus_empty(tmp))) {
  28.164 -		desc->handler->disable(irq);
  28.165 -		desc->handler->set_affinity(irq,tmp);
  28.166 -		desc->handler->enable(irq);
  28.167 -	}
  28.168 -	cpus_clear(pending_irq_cpumask[irq]);
  28.169 -}
  28.170 -
  28.171 -#ifdef CONFIG_PCI_MSI
  28.172 -/*
  28.173 - * Wonder why these are dummies?
  28.174 - * For e.g the set_ioapic_affinity_vector() calls the set_ioapic_affinity_irq()
  28.175 - * counter part after translating the vector to irq info. We need to perform
  28.176 - * this operation on the real irq, when we dont use vector, i.e when
  28.177 - * pci_use_vector() is false.
  28.178 - */
  28.179 -static inline void move_irq(int irq)
  28.180 -{
  28.181 -}
  28.182 -
  28.183 -static inline void set_irq_info(int irq, cpumask_t mask)
  28.184 -{
  28.185 -}
  28.186 -
  28.187 -#else // CONFIG_PCI_MSI
  28.188 -
  28.189 -static inline void move_irq(int irq)
  28.190 -{
  28.191 -	move_native_irq(irq);
  28.192 -}
  28.193 -
  28.194 -static inline void set_irq_info(int irq, cpumask_t mask)
  28.195 -{
  28.196 -	set_native_irq_info(irq, mask);
  28.197 -}
  28.198 -#endif // CONFIG_PCI_MSI
  28.199 -
  28.200 -#else	// CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE
  28.201 -
  28.202 -#define move_irq(x)
  28.203 -#define move_native_irq(x)
  28.204 -#define set_pending_irq(x,y)
  28.205 -static inline void set_irq_info(int irq, cpumask_t mask)
  28.206 -{
  28.207 -	set_native_irq_info(irq, mask);
  28.208 -}
  28.209 -
  28.210 -#endif // CONFIG_GENERIC_PENDING_IRQ
  28.211 -
  28.212 -#else // CONFIG_SMP
  28.213 -
  28.214 -#define move_irq(x)
  28.215 -#define move_native_irq(x)
  28.216 -
  28.217 -#endif // CONFIG_SMP
  28.218 -
  28.219 -extern int no_irq_affinity;
  28.220 -extern int noirqdebug_setup(char *str);
  28.221 -
  28.222 -extern fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
  28.223 -					struct irqaction *action);
  28.224 -extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
  28.225 -extern void note_interrupt(unsigned int irq, irq_desc_t *desc,
  28.226 -					int action_ret, struct pt_regs *regs);
  28.227 -extern int can_request_irq(unsigned int irq, unsigned long irqflags);
  28.228 -
  28.229 -extern void init_irq_proc(void);
  28.230 -
  28.231 -#ifdef CONFIG_AUTO_IRQ_AFFINITY
  28.232 -extern int select_smp_affinity(unsigned int irq);
  28.233 -#else
  28.234 -static inline int
  28.235 -select_smp_affinity(unsigned int irq)
  28.236 -{
  28.237 -	return 1;
  28.238 -}
  28.239 -#endif
  28.240 -
  28.241 -#endif
  28.242 -
  28.243 -extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
  28.244 -
  28.245 -#endif
  28.246 -
  28.247 -#endif /* __irq_h */
    29.1 --- a/linux-2.6-xen-sparse/kernel/irq/manage.c	Thu Apr 06 14:22:52 2006 +0100
    29.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.3 @@ -1,425 +0,0 @@
    29.4 -/*
    29.5 - * linux/kernel/irq/manage.c
    29.6 - *
    29.7 - * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
    29.8 - *
    29.9 - * This file contains driver APIs to the irq subsystem.
   29.10 - */
   29.11 -
   29.12 -#include <linux/config.h>
   29.13 -#include <linux/irq.h>
   29.14 -#include <linux/module.h>
   29.15 -#include <linux/random.h>
   29.16 -#include <linux/interrupt.h>
   29.17 -
   29.18 -#include "internals.h"
   29.19 -
   29.20 -#ifdef CONFIG_SMP
   29.21 -
   29.22 -cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
   29.23 -
   29.24 -#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
   29.25 -cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
   29.26 -#endif
   29.27 -
   29.28 -/**
   29.29 - *	synchronize_irq - wait for pending IRQ handlers (on other CPUs)
   29.30 - *	@irq: interrupt number to wait for
   29.31 - *
   29.32 - *	This function waits for any pending IRQ handlers for this interrupt
   29.33 - *	to complete before returning. If you use this function while
   29.34 - *	holding a resource the IRQ handler may need you will deadlock.
   29.35 - *
   29.36 - *	This function may be called - with care - from IRQ context.
   29.37 - */
   29.38 -void synchronize_irq(unsigned int irq)
   29.39 -{
   29.40 -	struct irq_desc *desc = irq_desc + irq;
   29.41 -
   29.42 -	if (irq >= NR_IRQS)
   29.43 -		return;
   29.44 -
   29.45 -	while (desc->status & IRQ_INPROGRESS)
   29.46 -		cpu_relax();
   29.47 -}
   29.48 -
   29.49 -EXPORT_SYMBOL(synchronize_irq);
   29.50 -
   29.51 -#endif
   29.52 -
   29.53 -/**
   29.54 - *	disable_irq_nosync - disable an irq without waiting
   29.55 - *	@irq: Interrupt to disable
   29.56 - *
   29.57 - *	Disable the selected interrupt line.  Disables and Enables are
   29.58 - *	nested.
   29.59 - *	Unlike disable_irq(), this function does not ensure existing
   29.60 - *	instances of the IRQ handler have completed before returning.
   29.61 - *
   29.62 - *	This function may be called from IRQ context.
   29.63 - */
   29.64 -void disable_irq_nosync(unsigned int irq)
   29.65 -{
   29.66 -	irq_desc_t *desc = irq_desc + irq;
   29.67 -	unsigned long flags;
   29.68 -
   29.69 -	if (irq >= NR_IRQS)
   29.70 -		return;
   29.71 -
   29.72 -	spin_lock_irqsave(&desc->lock, flags);
   29.73 -	if (!desc->depth++) {
   29.74 -		desc->status |= IRQ_DISABLED;
   29.75 -		desc->handler->disable(irq);
   29.76 -	}
   29.77 -	spin_unlock_irqrestore(&desc->lock, flags);
   29.78 -}
   29.79 -
   29.80 -EXPORT_SYMBOL(disable_irq_nosync);
   29.81 -
   29.82 -/**
   29.83 - *	disable_irq - disable an irq and wait for completion
   29.84 - *	@irq: Interrupt to disable
   29.85 - *
   29.86 - *	Disable the selected interrupt line.  Enables and Disables are
   29.87 - *	nested.
   29.88 - *	This function waits for any pending IRQ handlers for this interrupt
   29.89 - *	to complete before returning. If you use this function while
   29.90 - *	holding a resource the IRQ handler may need you will deadlock.
   29.91 - *
   29.92 - *	This function may be called - with care - from IRQ context.
   29.93 - */
   29.94 -void disable_irq(unsigned int irq)
   29.95 -{
   29.96 -	irq_desc_t *desc = irq_desc + irq;
   29.97 -
   29.98 -	if (irq >= NR_IRQS)
   29.99 -		return;
  29.100 -
  29.101 -	disable_irq_nosync(irq);
  29.102 -	if (desc->action)
  29.103 -		synchronize_irq(irq);
  29.104 -}
  29.105 -
  29.106 -EXPORT_SYMBOL(disable_irq);
  29.107 -
  29.108 -/**
  29.109 - *	enable_irq - enable handling of an irq
  29.110 - *	@irq: Interrupt to enable
  29.111 - *
  29.112 - *	Undoes the effect of one call to disable_irq().  If this
  29.113 - *	matches the last disable, processing of interrupts on this
  29.114 - *	IRQ line is re-enabled.
  29.115 - *
  29.116 - *	This function may be called from IRQ context.
  29.117 - */
  29.118 -void enable_irq(unsigned int irq)
  29.119 -{
  29.120 -	irq_desc_t *desc = irq_desc + irq;
  29.121 -	unsigned long flags;
  29.122 -
  29.123 -	if (irq >= NR_IRQS)
  29.124 -		return;
  29.125 -
  29.126 -	spin_lock_irqsave(&desc->lock, flags);
  29.127 -	switch (desc->depth) {
  29.128 -	case 0:
  29.129 -		WARN_ON(1);
  29.130 -		break;
  29.131 -	case 1: {
  29.132 -		unsigned int status = desc->status & ~IRQ_DISABLED;
  29.133 -
  29.134 -		desc->status = status;
  29.135 -		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
  29.136 -			desc->status = status | IRQ_REPLAY;
  29.137 -			hw_resend_irq(desc->handler,irq);
  29.138 -		}
  29.139 -		desc->handler->enable(irq);
  29.140 -		/* fall-through */
  29.141 -	}
  29.142 -	default:
  29.143 -		desc->depth--;
  29.144 -	}
  29.145 -	spin_unlock_irqrestore(&desc->lock, flags);
  29.146 -}
  29.147 -
  29.148 -EXPORT_SYMBOL(enable_irq);
  29.149 -
  29.150 -/*
  29.151 - * Internal function that tells the architecture code whether a
  29.152 - * particular irq has been exclusively allocated or is available
  29.153 - * for driver use.
  29.154 - */
  29.155 -int can_request_irq(unsigned int irq, unsigned long irqflags)
  29.156 -{
  29.157 -	struct irqaction *action;
  29.158 -
  29.159 -	if (irq >= NR_IRQS)
  29.160 -		return 0;
  29.161 -
  29.162 -	action = irq_desc[irq].action;
  29.163 -	if (action)
  29.164 -		if (irqflags & action->flags & SA_SHIRQ)
  29.165 -			action = NULL;
  29.166 -
  29.167 -	return !action;
  29.168 -}
  29.169 -
  29.170 -/**
  29.171 - *	setup_irq - register an irqaction structure
  29.172 - *	@irq: Interrupt to register
  29.173 - *	@irqaction: The irqaction structure to be registered
  29.174 - *
  29.175 - *	Normally called by request_irq, this function can be used
  29.176 - *	directly to allocate special interrupts that are part of the
  29.177 - *	architecture.
  29.178 - */
  29.179 -int setup_irq(unsigned int irq, struct irqaction * new)
  29.180 -{
  29.181 -	struct irq_desc *desc = irq_desc + irq;
  29.182 -	struct irqaction *old, **p;
  29.183 -	unsigned long flags;
  29.184 -	int shared = 0;
  29.185 -
  29.186 -	if (irq >= NR_IRQS)
  29.187 -		return -EINVAL;
  29.188 -
  29.189 -	if (desc->handler == &no_irq_type)
  29.190 -		return -ENOSYS;
  29.191 -	/*
  29.192 -	 * Some drivers like serial.c use request_irq() heavily,
  29.193 -	 * so we have to be careful not to interfere with a
  29.194 -	 * running system.
  29.195 -	 */
  29.196 -	if (new->flags & SA_SAMPLE_RANDOM) {
  29.197 -		/*
  29.198 -		 * This function might sleep, we want to call it first,
  29.199 -		 * outside of the atomic block.
  29.200 -		 * Yes, this might clear the entropy pool if the wrong
  29.201 -		 * driver is attempted to be loaded, without actually
  29.202 -		 * installing a new handler, but is this really a problem,
  29.203 -		 * only the sysadmin is able to do this.
  29.204 -		 */
  29.205 -		rand_initialize_irq(irq);
  29.206 -	}
  29.207 -
  29.208 -	/*
  29.209 -	 * The following block of code has to be executed atomically
  29.210 -	 */
  29.211 -	spin_lock_irqsave(&desc->lock,flags);
  29.212 -	p = &desc->action;
  29.213 -	if ((old = *p) != NULL) {
  29.214 -		/* Can't share interrupts unless both agree to */
  29.215 -		if (!(old->flags & new->flags & SA_SHIRQ)) {
  29.216 -			spin_unlock_irqrestore(&desc->lock,flags);
  29.217 -			return -EBUSY;
  29.218 -		}
  29.219 -
  29.220 -		/* add new interrupt at end of irq queue */
  29.221 -		do {
  29.222 -			p = &old->next;
  29.223 -			old = *p;
  29.224 -		} while (old);
  29.225 -		shared = 1;
  29.226 -	}
  29.227 -
  29.228 -	*p = new;
  29.229 -
  29.230 -	if (!shared) {
  29.231 -		desc->depth = 0;
  29.232 -		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT |
  29.233 -				  IRQ_WAITING | IRQ_INPROGRESS);
  29.234 -		if (desc->handler->startup)
  29.235 -			desc->handler->startup(irq);
  29.236 -		else
  29.237 -			desc->handler->enable(irq);
  29.238 -	}
  29.239 -	spin_unlock_irqrestore(&desc->lock,flags);
  29.240 -
  29.241 -	new->irq = irq;
  29.242 -	register_irq_proc(irq);
  29.243 -	new->dir = NULL;
  29.244 -	register_handler_proc(irq, new);
  29.245 -
  29.246 -	return 0;
  29.247 -}
  29.248 -
  29.249 -/*
  29.250 - *	teardown_irq - unregister an irqaction
  29.251 - *	@irq: Interrupt line being freed
  29.252 - *	@old: Pointer to the irqaction that is to be unregistered
  29.253 - *
  29.254 - *	This function is called by free_irq and does the actual
  29.255 - *	business of unregistering the handler. It exists as a 
  29.256 - *	seperate function to enable handlers to be unregistered 
  29.257 - *	for irqactions that have been allocated statically at 
  29.258 - *	boot time.
  29.259 - *
  29.260 - *	This function must not be called from interrupt context.
  29.261 - */
  29.262 -#ifndef CONFIG_XEN
  29.263 -static
  29.264 -#endif
  29.265 -int teardown_irq(unsigned int irq, struct irqaction * old)
  29.266 -{
  29.267 -	struct irq_desc *desc;
  29.268 -	struct irqaction **p;
  29.269 -	unsigned long flags;
  29.270 -
  29.271 -	if (irq >= NR_IRQS)
  29.272 -		return -ENOENT;
  29.273 -
  29.274 -	desc = irq_desc + irq;
  29.275 -	spin_lock_irqsave(&desc->lock,flags);
  29.276 -	p = &desc->action;
  29.277 -	for (;;) {
  29.278 -		struct irqaction * action = *p;
  29.279 -
  29.280 -		if (action) {
  29.281 -			struct irqaction **pp = p;
  29.282 -
  29.283 -			p = &action->next;
  29.284 -			if (action != old)
  29.285 -				continue;
  29.286 -
  29.287 -			/* Found it - now remove it from the list of entries */
  29.288 -			*pp = action->next;
  29.289 -
  29.290 -			/* Currently used only by UML, might disappear one day.*/
  29.291 -#ifdef CONFIG_IRQ_RELEASE_METHOD
  29.292 -			if (desc->handler->release)
  29.293 -				desc->handler->release(irq, dev_id);
  29.294 -#endif
  29.295 -
  29.296 -			if (!desc->action) {
  29.297 -				desc->status |= IRQ_DISABLED;
  29.298 -				if (desc->handler->shutdown)
  29.299 -					desc->handler->shutdown(irq);
  29.300 -				else
  29.301 -					desc->handler->disable(irq);
  29.302 -			}
  29.303 -			spin_unlock_irqrestore(&desc->lock,flags);
  29.304 -			unregister_handler_proc(irq, action);
  29.305 -
  29.306 -			/* Make sure it's not being used on another CPU */
  29.307 -			synchronize_irq(irq);
  29.308 -			return 0;
  29.309 -		}
  29.310 -		printk(KERN_ERR "Trying to teardown free IRQ%d\n",irq);
  29.311 -		spin_unlock_irqrestore(&desc->lock,flags);
  29.312 -		return -ENOENT;
  29.313 -	}
  29.314 -}
  29.315 -
  29.316 -/**
  29.317 - *	free_irq - free an interrupt
  29.318 - *	@irq: Interrupt line to free
  29.319 - *	@dev_id: Device identity to free
  29.320 - *
  29.321 - *	Remove an interrupt handler. The handler is removed and if the
  29.322 - *	interrupt line is no longer in use by any driver it is disabled.
  29.323 - *	On a shared IRQ the caller must ensure the interrupt is disabled
  29.324 - *	on the card it drives before calling this function. The function
  29.325 - *	does not return until any executing interrupts for this IRQ
  29.326 - *	have completed.
  29.327 - *
  29.328 - *	This function must not be called from interrupt context.
  29.329 - */
  29.330 -void free_irq(unsigned int irq, void *dev_id)
  29.331 -{
  29.332 -	struct irq_desc *desc;
  29.333 -	struct irqaction *action;
  29.334 -	unsigned long flags;
  29.335 -
  29.336 -	if (irq >= NR_IRQS)
  29.337 -		return;
  29.338 -
  29.339 -	desc = irq_desc + irq;
  29.340 -	spin_lock_irqsave(&desc->lock,flags);
  29.341 -	for (action = desc->action; action != NULL; action = action->next) {
  29.342 -		if (action->dev_id != dev_id)
  29.343 -			continue;
  29.344 -
  29.345 -		spin_unlock_irqrestore(&desc->lock,flags);
  29.346 -
  29.347 -		if (teardown_irq(irq, action) == 0)
  29.348 -			kfree(action);
  29.349 -		return;
  29.350 -	}
  29.351 -	printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
  29.352 -	spin_unlock_irqrestore(&desc->lock,flags);
  29.353 -	return;
  29.354 -}
  29.355 -
  29.356 -EXPORT_SYMBOL(free_irq);
  29.357 -
  29.358 -/**
  29.359 - *	request_irq - allocate an interrupt line
  29.360 - *	@irq: Interrupt line to allocate
  29.361 - *	@handler: Function to be called when the IRQ occurs
  29.362 - *	@irqflags: Interrupt type flags
  29.363 - *	@devname: An ascii name for the claiming device
  29.364 - *	@dev_id: A cookie passed back to the handler function
  29.365 - *
  29.366 - *	This call allocates interrupt resources and enables the
  29.367 - *	interrupt line and IRQ handling. From the point this
  29.368 - *	call is made your handler function may be invoked. Since
  29.369 - *	your handler function must clear any interrupt the board
  29.370 - *	raises, you must take care both to initialise your hardware
  29.371 - *	and to set up the interrupt handler in the right order.
  29.372 - *
  29.373 - *	Dev_id must be globally unique. Normally the address of the
  29.374 - *	device data structure is used as the cookie. Since the handler
  29.375 - *	receives this value it makes sense to use it.
  29.376 - *
  29.377 - *	If your interrupt is shared you must pass a non NULL dev_id
  29.378 - *	as this is required when freeing the interrupt.
  29.379 - *
  29.380 - *	Flags:
  29.381 - *
  29.382 - *	SA_SHIRQ		Interrupt is shared
  29.383 - *	SA_INTERRUPT		Disable local interrupts while processing
  29.384 - *	SA_SAMPLE_RANDOM	The interrupt can be used for entropy
  29.385 - *
  29.386 - */
  29.387 -int request_irq(unsigned int irq,
  29.388 -		irqreturn_t (*handler)(int, void *, struct pt_regs *),
  29.389 -		unsigned long irqflags, const char * devname, void *dev_id)
  29.390 -{
  29.391 -	struct irqaction * action;
  29.392 -	int retval;
  29.393 -
  29.394 -	/*
  29.395 -	 * Sanity-check: shared interrupts must pass in a real dev-ID,
  29.396 -	 * otherwise we'll have trouble later trying to figure out
  29.397 -	 * which interrupt is which (messes up the interrupt freeing
  29.398 -	 * logic etc).
  29.399 -	 */
  29.400 -	if ((irqflags & SA_SHIRQ) && !dev_id)
  29.401 -		return -EINVAL;
  29.402 -	if (irq >= NR_IRQS)
  29.403 -		return -EINVAL;
  29.404 -	if (!handler)
  29.405 -		return -EINVAL;
  29.406 -
  29.407 -	action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
  29.408 -	if (!action)
  29.409 -		return -ENOMEM;
  29.410 -
  29.411 -	action->handler = handler;
  29.412 -	action->flags = irqflags;
  29.413 -	cpus_clear(action->mask);
  29.414 -	action->name = devname;
  29.415 -	action->next = NULL;
  29.416 -	action->dev_id = dev_id;
  29.417 -
  29.418 -	select_smp_affinity(irq);
  29.419 -
  29.420 -	retval = setup_irq(irq, action);
  29.421 -	if (retval)
  29.422 -		kfree(action);
  29.423 -
  29.424 -	return retval;
  29.425 -}
  29.426 -
  29.427 -EXPORT_SYMBOL(request_irq);
  29.428 -
    30.1 --- a/linux-2.6-xen-sparse/lib/Kconfig.debug	Thu Apr 06 14:22:52 2006 +0100
    30.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.3 @@ -1,224 +0,0 @@
    30.4 -
    30.5 -config PRINTK_TIME
    30.6 -	bool "Show timing information on printks"
    30.7 -	help
    30.8 -	  Selecting this option causes timing information to be
    30.9 -	  included in printk output.  This allows you to measure
   30.10 -	  the interval between kernel operations, including bootup
   30.11 -	  operations.  This is useful for identifying long delays
   30.12 -	  in kernel startup.
   30.13 -
   30.14 -
   30.15 -config MAGIC_SYSRQ
   30.16 -	bool "Magic SysRq key"
   30.17 -	depends on !UML
   30.18 -	help
   30.19 -	  If you say Y here, you will have some control over the system even
   30.20 -	  if the system crashes for example during kernel debugging (e.g., you
   30.21 -	  will be able to flush the buffer cache to disk, reboot the system
   30.22 -	  immediately or dump some status information). This is accomplished
   30.23 -	  by pressing various keys while holding SysRq (Alt+PrintScreen). It
   30.24 -	  also works on a serial console (on PC hardware at least), if you
   30.25 -	  send a BREAK and then within 5 seconds a command keypress. The
   30.26 -	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
   30.27 -	  unless you really know what this hack does.
   30.28 -
   30.29 -config DEBUG_KERNEL
   30.30 -	bool "Kernel debugging"
   30.31 -	help
   30.32 -	  Say Y here if you are developing drivers or trying to debug and
   30.33 -	  identify kernel problems.
   30.34 -
   30.35 -config LOG_BUF_SHIFT
   30.36 -	int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL
   30.37 -	range 12 21
   30.38 -	default 17 if S390
   30.39 -	default 16 if X86_NUMAQ || IA64
   30.40 -	default 15 if SMP
   30.41 -	default 14
   30.42 -	help
   30.43 -	  Select kernel log buffer size as a power of 2.
   30.44 -	  Defaults and Examples:
   30.45 -	  	     17 => 128 KB for S/390
   30.46 -		     16 => 64 KB for x86 NUMAQ or IA-64
   30.47 -	             15 => 32 KB for SMP
   30.48 -	             14 => 16 KB for uniprocessor
   30.49 -		     13 =>  8 KB
   30.50 -		     12 =>  4 KB
   30.51 -
   30.52 -config DETECT_SOFTLOCKUP
   30.53 -	bool "Detect Soft Lockups"
   30.54 -	depends on DEBUG_KERNEL
   30.55 -	default y
   30.56 -	help
   30.57 -	  Say Y here to enable the kernel to detect "soft lockups",
   30.58 -	  which are bugs that cause the kernel to loop in kernel
   30.59 -	  mode for more than 10 seconds, without giving other tasks a
   30.60 -	  chance to run.
   30.61 -
   30.62 -	  When a soft-lockup is detected, the kernel will print the
   30.63 -	  current stack trace (which you should report), but the
   30.64 -	  system will stay locked up. This feature has negligible
   30.65 -	  overhead.
   30.66 -
   30.67 -	  (Note that "hard lockups" are separate type of bugs that
   30.68 -	   can be detected via the NMI-watchdog, on platforms that
   30.69 -	   support it.)
   30.70 -
   30.71 -config SCHEDSTATS
   30.72 -	bool "Collect scheduler statistics"
   30.73 -	depends on DEBUG_KERNEL && PROC_FS
   30.74 -	help
   30.75 -	  If you say Y here, additional code will be inserted into the
   30.76 -	  scheduler and related routines to collect statistics about
   30.77 -	  scheduler behavior and provide them in /proc/schedstat.  These
   30.78 -	  stats may be useful for both tuning and debugging the scheduler
   30.79 -	  If you aren't debugging the scheduler or trying to tune a specific
   30.80 -	  application, you can say N to avoid the very slight overhead
   30.81 -	  this adds.
   30.82 -
   30.83 -config DEBUG_SLAB
   30.84 -	bool "Debug memory allocations"
   30.85 -	depends on DEBUG_KERNEL && SLAB
   30.86 -	help
   30.87 -	  Say Y here to have the kernel do limited verification on memory
   30.88 -	  allocation as well as poisoning memory on free to catch use of freed
   30.89 -	  memory. This can make kmalloc/kfree-intensive workloads much slower.
   30.90 -
   30.91 -config DEBUG_PREEMPT
   30.92 -	bool "Debug preemptible kernel"
   30.93 -	depends on DEBUG_KERNEL && PREEMPT
   30.94 -	default y
   30.95 -	help
   30.96 -	  If you say Y here then the kernel will use a debug variant of the
   30.97 -	  commonly used smp_processor_id() function and will print warnings
   30.98 -	  if kernel code uses it in a preemption-unsafe way. Also, the kernel
   30.99 -	  will detect preemption count underflows.
  30.100 -
  30.101 -config DEBUG_MUTEXES
  30.102 -	bool "Mutex debugging, deadlock detection"
  30.103 -	default y
  30.104 -	depends on DEBUG_KERNEL
  30.105 -	help
  30.106 -	 This allows mutex semantics violations and mutex related deadlocks
  30.107 -	 (lockups) to be detected and reported automatically.
  30.108 -
  30.109 -config DEBUG_SPINLOCK
  30.110 -	bool "Spinlock debugging"
  30.111 -	depends on DEBUG_KERNEL
  30.112 -	help
  30.113 -	  Say Y here and build SMP to catch missing spinlock initialization
  30.114 -	  and certain other kinds of spinlock errors commonly made.  This is
  30.115 -	  best used in conjunction with the NMI watchdog so that spinlock
  30.116 -	  deadlocks are also debuggable.
  30.117 -
  30.118 -config DEBUG_SPINLOCK_SLEEP
  30.119 -	bool "Sleep-inside-spinlock checking"
  30.120 -	depends on DEBUG_KERNEL
  30.121 -	help
  30.122 -	  If you say Y here, various routines which may sleep will become very
  30.123 -	  noisy if they are called with a spinlock held.
  30.124 -
  30.125 -config DEBUG_KOBJECT
  30.126 -	bool "kobject debugging"
  30.127 -	depends on DEBUG_KERNEL
  30.128 -	help
  30.129 -	  If you say Y here, some extra kobject debugging messages will be sent
  30.130 -	  to the syslog. 
  30.131 -
  30.132 -config DEBUG_HIGHMEM
  30.133 -	bool "Highmem debugging"
  30.134 -	depends on DEBUG_KERNEL && HIGHMEM
  30.135 -	help
  30.136 -	  This options enables addition error checking for high memory systems.
  30.137 -	  Disable for production systems.
  30.138 -
  30.139 -config DEBUG_BUGVERBOSE
  30.140 -	bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
  30.141 -	depends on BUG
  30.142 -	depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
  30.143 -	default !EMBEDDED
  30.144 -	help
  30.145 -	  Say Y here to make BUG() panics output the file name and line number
  30.146 -	  of the BUG call as well as the EIP and oops trace.  This aids
  30.147 -	  debugging but costs about 70-100K of memory.
  30.148 -
  30.149 -config DEBUG_INFO
  30.150 -	bool "Compile the kernel with debug info"
  30.151 -	depends on DEBUG_KERNEL && !X86_64_XEN
  30.152 -	help
  30.153 -          If you say Y here the resulting kernel image will include
  30.154 -	  debugging info resulting in a larger kernel image.
  30.155 -	  Say Y here only if you plan to debug the kernel.
  30.156 -
  30.157 -	  If unsure, say N.
  30.158 -
  30.159 -config DEBUG_IOREMAP
  30.160 -	bool "Enable ioremap() debugging"
  30.161 -	depends on DEBUG_KERNEL && PARISC
  30.162 -	help
  30.163 -	  Enabling this option will cause the kernel to distinguish between
  30.164 -	  ioremapped and physical addresses.  It will print a backtrace (at
  30.165 -	  most one every 10 seconds), hopefully allowing you to see which
  30.166 -	  drivers need work.  Fixing all these problems is a prerequisite
  30.167 -	  for turning on USE_HPPA_IOREMAP.  The warnings are harmless;
  30.168 -	  the kernel has enough information to fix the broken drivers
  30.169 -	  automatically, but we'd like to make it more efficient by not
  30.170 -	  having to do that.
  30.171 -
  30.172 -config DEBUG_FS
  30.173 -	bool "Debug Filesystem"
  30.174 -	depends on DEBUG_KERNEL && SYSFS
  30.175 -	help
  30.176 -	  debugfs is a virtual file system that kernel developers use to put
  30.177 -	  debugging files into.  Enable this option to be able to read and
  30.178 -	  write to these files.
  30.179 -
  30.180 -	  If unsure, say N.
  30.181 -
  30.182 -config DEBUG_VM
  30.183 -	bool "Debug VM"
  30.184 -	depends on DEBUG_KERNEL
  30.185 -	help
  30.186 -	  Enable this to turn on extended checks in the virtual-memory system
  30.187 -          that may impact performance.
  30.188 -
  30.189 -	  If unsure, say N.
  30.190 -
  30.191 -config FRAME_POINTER
  30.192 -	bool "Compile the kernel with frame pointers"
  30.193 -	depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML)
  30.194 -	default y if DEBUG_INFO && UML
  30.195 -	help
  30.196 -	  If you say Y here the resulting kernel image will be slightly larger
  30.197 -	  and slower, but it might give very useful debugging information on
  30.198 -	  some architectures or if you use external debuggers.
  30.199 -	  If you don't debug the kernel, you can say N.
  30.200 -
  30.201 -config FORCED_INLINING
  30.202 -	bool "Force gcc to inline functions marked 'inline'"
  30.203 -	depends on DEBUG_KERNEL
  30.204 -	default y
  30.205 -	help
  30.206 -	  This option determines if the kernel forces gcc to inline the functions
  30.207 -	  developers have marked 'inline'. Doing so takes away freedom from gcc to
  30.208 -	  do what it thinks is best, which is desirable for the gcc 3.x series of
  30.209 -	  compilers. The gcc 4.x series have a rewritten inlining algorithm and
  30.210 -	  disabling this option will generate a smaller kernel there. Hopefully
  30.211 -	  this algorithm is so good that allowing gcc4 to make the decision can
  30.212 -	  become the default in the future, until then this option is there to
  30.213 -	  test gcc for this.
  30.214 -
  30.215 -config RCU_TORTURE_TEST
  30.216 -	tristate "torture tests for RCU"
  30.217 -	depends on DEBUG_KERNEL
  30.218 -	default n
  30.219 -	help
  30.220 -	  This option provides a kernel module that runs torture tests
  30.221 -	  on the RCU infrastructure.  The kernel module may be built
  30.222 -	  after the fact on the running kernel to be tested, if desired.
  30.223 -
  30.224 -	  Say Y here if you want RCU torture tests to start automatically
  30.225 -	  at boot time (you probably don't).
  30.226 -	  Say M if you want the RCU torture tests to build as a module.
  30.227 -	  Say N if you are unsure.
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/patches/linux-2.6.16/xenoprof-generic.patch	Fri Apr 07 11:52:00 2006 +0100
    31.3 @@ -0,0 +1,384 @@
    31.4 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c ./drivers/oprofile/buffer_sync.c
    31.5 +--- ../pristine-linux-2.6.16/drivers/oprofile/buffer_sync.c	2006-03-20 05:53:29.000000000 +0000
    31.6 ++++ ./drivers/oprofile/buffer_sync.c	2006-04-03 15:53:05.000000000 +0100
    31.7 +@@ -6,6 +6,10 @@
    31.8 +  *
    31.9 +  * @author John Levon <levon@movementarian.org>
   31.10 +  *
   31.11 ++ * Modified by Aravind Menon for Xen
   31.12 ++ * These modifications are:
   31.13 ++ * Copyright (C) 2005 Hewlett-Packard Co.
   31.14 ++ *
   31.15 +  * This is the core of the buffer management. Each
   31.16 +  * CPU buffer is processed and entered into the
   31.17 +  * global event buffer. Such processing is necessary
   31.18 +@@ -275,15 +279,24 @@ static void add_cpu_switch(int i)
   31.19 + 	last_cookie = INVALID_COOKIE;
   31.20 + }
   31.21 + 
   31.22 +-static void add_kernel_ctx_switch(unsigned int in_kernel)
   31.23 ++static void add_cpu_mode_switch(unsigned int cpu_mode)
   31.24 + {
   31.25 + 	add_event_entry(ESCAPE_CODE);
   31.26 +-	if (in_kernel)
   31.27 +-		add_event_entry(KERNEL_ENTER_SWITCH_CODE); 
   31.28 +-	else
   31.29 +-		add_event_entry(KERNEL_EXIT_SWITCH_CODE); 
   31.30 ++	switch (cpu_mode) {
   31.31 ++	case CPU_MODE_USER:
   31.32 ++		add_event_entry(USER_ENTER_SWITCH_CODE);
   31.33 ++		break;
   31.34 ++	case CPU_MODE_KERNEL:
   31.35 ++		add_event_entry(KERNEL_ENTER_SWITCH_CODE);
   31.36 ++		break;
   31.37 ++	case CPU_MODE_XEN:
   31.38 ++		add_event_entry(XEN_ENTER_SWITCH_CODE);
   31.39 ++		break;
   31.40 ++	default:
   31.41 ++		break;
   31.42 ++	}
   31.43 + }
   31.44 +- 
   31.45 ++
   31.46 + static void
   31.47 + add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
   31.48 + {
   31.49 +@@ -348,9 +361,9 @@ static int add_us_sample(struct mm_struc
   31.50 +  * for later lookup from userspace.
   31.51 +  */
   31.52 + static int
   31.53 +-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
   31.54 ++add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
   31.55 + {
   31.56 +-	if (in_kernel) {
   31.57 ++	if (cpu_mode >= CPU_MODE_KERNEL) {
   31.58 + 		add_sample_entry(s->eip, s->event);
   31.59 + 		return 1;
   31.60 + 	} else if (mm) {
   31.61 +@@ -496,7 +509,7 @@ void sync_buffer(int cpu)
   31.62 + 	struct mm_struct *mm = NULL;
   31.63 + 	struct task_struct * new;
   31.64 + 	unsigned long cookie = 0;
   31.65 +-	int in_kernel = 1;
   31.66 ++	int cpu_mode = 1;
   31.67 + 	unsigned int i;
   31.68 + 	sync_buffer_state state = sb_buffer_start;
   31.69 + 	unsigned long available;
   31.70 +@@ -513,12 +526,12 @@ void sync_buffer(int cpu)
   31.71 + 		struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
   31.72 +  
   31.73 + 		if (is_code(s->eip)) {
   31.74 +-			if (s->event <= CPU_IS_KERNEL) {
   31.75 ++			if (s->event <= CPU_MODE_XEN) {
   31.76 + 				/* kernel/userspace switch */
   31.77 +-				in_kernel = s->event;
   31.78 ++				cpu_mode = s->event;
   31.79 + 				if (state == sb_buffer_start)
   31.80 + 					state = sb_sample_start;
   31.81 +-				add_kernel_ctx_switch(s->event);
   31.82 ++				add_cpu_mode_switch(s->event);
   31.83 + 			} else if (s->event == CPU_TRACE_BEGIN) {
   31.84 + 				state = sb_bt_start;
   31.85 + 				add_trace_begin();
   31.86 +@@ -536,7 +549,7 @@ void sync_buffer(int cpu)
   31.87 + 			}
   31.88 + 		} else {
   31.89 + 			if (state >= sb_bt_start &&
   31.90 +-			    !add_sample(mm, s, in_kernel)) {
   31.91 ++			    !add_sample(mm, s, cpu_mode)) {
   31.92 + 				if (state == sb_bt_start) {
   31.93 + 					state = sb_bt_ignore;
   31.94 + 					atomic_inc(&oprofile_stats.bt_lost_no_mapping);
   31.95 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c ./drivers/oprofile/cpu_buffer.c
   31.96 +--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.c	2006-03-20 05:53:29.000000000 +0000
   31.97 ++++ ./drivers/oprofile/cpu_buffer.c	2006-04-03 15:53:05.000000000 +0100
   31.98 +@@ -6,6 +6,10 @@
   31.99 +  *
  31.100 +  * @author John Levon <levon@movementarian.org>
  31.101 +  *
  31.102 ++ * Modified by Aravind Menon for Xen
  31.103 ++ * These modifications are:
  31.104 ++ * Copyright (C) 2005 Hewlett-Packard Co.
  31.105 ++ *
  31.106 +  * Each CPU has a local buffer that stores PC value/event
  31.107 +  * pairs. We also log context switches when we notice them.
  31.108 +  * Eventually each CPU's buffer is processed into the global
  31.109 +@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void)
  31.110 + 			goto fail;
  31.111 +  
  31.112 + 		b->last_task = NULL;
  31.113 +-		b->last_is_kernel = -1;
  31.114 ++		b->last_cpu_mode = -1;
  31.115 + 		b->tracing = 0;
  31.116 + 		b->buffer_size = buffer_size;
  31.117 + 		b->tail_pos = 0;
  31.118 +@@ -114,7 +118,7 @@ void cpu_buffer_reset(struct oprofile_cp
  31.119 + 	 * collected will populate the buffer with proper
  31.120 + 	 * values to initialize the buffer
  31.121 + 	 */
  31.122 +-	cpu_buf->last_is_kernel = -1;
  31.123 ++	cpu_buf->last_cpu_mode = -1;
  31.124 + 	cpu_buf->last_task = NULL;
  31.125 + }
  31.126 + 
  31.127 +@@ -164,13 +168,13 @@ add_code(struct oprofile_cpu_buffer * bu
  31.128 +  * because of the head/tail separation of the writer and reader
  31.129 +  * of the CPU buffer.
  31.130 +  *
  31.131 +- * is_kernel is needed because on some architectures you cannot
  31.132 ++ * cpu_mode is needed because on some architectures you cannot
  31.133 +  * tell if you are in kernel or user space simply by looking at
  31.134 +- * pc. We tag this in the buffer by generating kernel enter/exit
  31.135 +- * events whenever is_kernel changes
  31.136 ++ * pc. We tag this in the buffer by generating kernel/user (and xen)
  31.137 ++ *  enter events whenever cpu_mode changes
  31.138 +  */
  31.139 + static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
  31.140 +-		      int is_kernel, unsigned long event)
  31.141 ++		      int cpu_mode, unsigned long event)
  31.142 + {
  31.143 + 	struct task_struct * task;
  31.144 + 
  31.145 +@@ -181,16 +185,16 @@ static int log_sample(struct oprofile_cp
  31.146 + 		return 0;
  31.147 + 	}
  31.148 + 
  31.149 +-	is_kernel = !!is_kernel;
  31.150 ++	WARN_ON(cpu_mode > CPU_MODE_XEN);
  31.151 + 
  31.152 + 	task = current;
  31.153 + 
  31.154 + 	/* notice a switch from user->kernel or vice versa */
  31.155 +-	if (cpu_buf->last_is_kernel != is_kernel) {
  31.156 +-		cpu_buf->last_is_kernel = is_kernel;
  31.157 +-		add_code(cpu_buf, is_kernel);
  31.158 ++	if (cpu_buf->last_cpu_mode != cpu_mode) {
  31.159 ++		cpu_buf->last_cpu_mode = cpu_mode;
  31.160 ++		add_code(cpu_buf, cpu_mode);
  31.161 + 	}
  31.162 +-
  31.163 ++	
  31.164 + 	/* notice a task switch */
  31.165 + 	if (cpu_buf->last_task != task) {
  31.166 + 		cpu_buf->last_task = task;
  31.167 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h ./drivers/oprofile/cpu_buffer.h
  31.168 +--- ../pristine-linux-2.6.16/drivers/oprofile/cpu_buffer.h	2006-03-20 05:53:29.000000000 +0000
  31.169 ++++ ./drivers/oprofile/cpu_buffer.h	2006-04-03 15:53:05.000000000 +0100
  31.170 +@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
  31.171 + 	volatile unsigned long tail_pos;
  31.172 + 	unsigned long buffer_size;
  31.173 + 	struct task_struct * last_task;
  31.174 +-	int last_is_kernel;
  31.175 ++	int last_cpu_mode;
  31.176 + 	int tracing;
  31.177 + 	struct op_sample * buffer;
  31.178 + 	unsigned long sample_received;
  31.179 +@@ -51,7 +51,9 @@ extern struct oprofile_cpu_buffer cpu_bu
  31.180 + void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
  31.181 + 
  31.182 + /* transient events for the CPU buffer -> event buffer */
  31.183 +-#define CPU_IS_KERNEL 1
  31.184 +-#define CPU_TRACE_BEGIN 2
  31.185 ++#define CPU_MODE_USER    0
  31.186 ++#define CPU_MODE_KERNEL  1
  31.187 ++#define CPU_MODE_XEN     2
  31.188 ++#define CPU_TRACE_BEGIN  3
  31.189 + 
  31.190 + #endif /* OPROFILE_CPU_BUFFER_H */
  31.191 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h ./drivers/oprofile/event_buffer.h
  31.192 +--- ../pristine-linux-2.6.16/drivers/oprofile/event_buffer.h	2006-03-20 05:53:29.000000000 +0000
  31.193 ++++ ./drivers/oprofile/event_buffer.h	2006-04-03 15:53:05.000000000 +0100
  31.194 +@@ -29,11 +29,12 @@ void wake_up_buffer_waiter(void);
  31.195 + #define CPU_SWITCH_CODE 		2
  31.196 + #define COOKIE_SWITCH_CODE 		3
  31.197 + #define KERNEL_ENTER_SWITCH_CODE	4
  31.198 +-#define KERNEL_EXIT_SWITCH_CODE		5
  31.199 ++#define USER_ENTER_SWITCH_CODE		5
  31.200 + #define MODULE_LOADED_CODE		6
  31.201 + #define CTX_TGID_CODE			7
  31.202 + #define TRACE_BEGIN_CODE		8
  31.203 + #define TRACE_END_CODE			9
  31.204 ++#define XEN_ENTER_SWITCH_CODE		10
  31.205 +  
  31.206 + #define INVALID_COOKIE ~0UL
  31.207 + #define NO_COOKIE 0UL
  31.208 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.c ./drivers/oprofile/oprof.c
  31.209 +--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.c	2006-03-20 05:53:29.000000000 +0000
  31.210 ++++ ./drivers/oprofile/oprof.c	2006-04-03 15:53:05.000000000 +0100
  31.211 +@@ -5,6 +5,10 @@
  31.212 +  * @remark Read the file COPYING
  31.213 +  *
  31.214 +  * @author John Levon <levon@movementarian.org>
  31.215 ++ *
  31.216 ++ * Modified by Aravind Menon for Xen
  31.217 ++ * These modifications are:
  31.218 ++ * Copyright (C) 2005 Hewlett-Packard Co.
  31.219 +  */
  31.220 + 
  31.221 + #include <linux/kernel.h>
  31.222 +@@ -19,7 +23,7 @@
  31.223 + #include "cpu_buffer.h"
  31.224 + #include "buffer_sync.h"
  31.225 + #include "oprofile_stats.h"
  31.226 +- 
  31.227 ++
  31.228 + struct oprofile_operations oprofile_ops;
  31.229 + 
  31.230 + unsigned long oprofile_started;
  31.231 +@@ -33,6 +37,17 @@ static DECLARE_MUTEX(start_sem);
  31.232 +  */
  31.233 + static int timer = 0;
  31.234 + 
  31.235 ++extern unsigned int adomains;
  31.236 ++extern int active_domains[MAX_OPROF_DOMAINS];
  31.237 ++
  31.238 ++int oprofile_set_active(void)
  31.239 ++{
  31.240 ++	if (oprofile_ops.set_active)
  31.241 ++		return oprofile_ops.set_active(active_domains, adomains);
  31.242 ++
  31.243 ++	return -EINVAL;
  31.244 ++}
  31.245 ++
  31.246 + int oprofile_setup(void)
  31.247 + {
  31.248 + 	int err;
  31.249 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprof.h ./drivers/oprofile/oprof.h
  31.250 +--- ../pristine-linux-2.6.16/drivers/oprofile/oprof.h	2006-03-20 05:53:29.000000000 +0000
  31.251 ++++ ./drivers/oprofile/oprof.h	2006-04-03 15:53:05.000000000 +0100
  31.252 +@@ -35,5 +35,7 @@ void oprofile_create_files(struct super_
  31.253 + void oprofile_timer_init(struct oprofile_operations * ops);
  31.254 + 
  31.255 + int oprofile_set_backtrace(unsigned long depth);
  31.256 ++
  31.257 ++int oprofile_set_active(void);
  31.258 +  
  31.259 + #endif /* OPROF_H */
  31.260 +diff -pruN ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c ./drivers/oprofile/oprofile_files.c
  31.261 +--- ../pristine-linux-2.6.16/drivers/oprofile/oprofile_files.c	2006-03-20 05:53:29.000000000 +0000
  31.262 ++++ ./drivers/oprofile/oprofile_files.c	2006-04-03 15:53:05.000000000 +0100
  31.263 +@@ -5,15 +5,21 @@
  31.264 +  * @remark Read the file COPYING
  31.265 +  *
  31.266 +  * @author John Levon <levon@movementarian.org>
  31.267 ++ *
  31.268 ++ * Modified by Aravind Menon for Xen
  31.269 ++ * These modifications are:
  31.270 ++ * Copyright (C) 2005 Hewlett-Packard Co.	
  31.271 +  */
  31.272 + 
  31.273 + #include <linux/fs.h>
  31.274 + #include <linux/oprofile.h>
  31.275 ++#include <asm/uaccess.h>
  31.276 ++#include <linux/ctype.h>
  31.277 + 
  31.278 + #include "event_buffer.h"
  31.279 + #include "oprofile_stats.h"
  31.280 + #include "oprof.h"
  31.281 +- 
  31.282 ++
  31.283 + unsigned long fs_buffer_size = 131072;
  31.284 + unsigned long fs_cpu_buffer_size = 8192;
  31.285 + unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
  31.286 +@@ -117,11 +123,79 @@ static ssize_t dump_write(struct file * 
  31.287 + static struct file_operations dump_fops = {
  31.288 + 	.write		= dump_write,
  31.289 + };
  31.290 +- 
  31.291 ++
  31.292 ++#define TMPBUFSIZE 512
  31.293 ++
  31.294 ++unsigned int adomains = 0;
  31.295 ++long active_domains[MAX_OPROF_DOMAINS];
  31.296 ++
  31.297 ++static ssize_t adomain_write(struct file * file, char const __user * buf, 
  31.298 ++			     size_t count, loff_t * offset)
  31.299 ++{
  31.300 ++	char tmpbuf[TMPBUFSIZE];
  31.301 ++	char * startp = tmpbuf;
  31.302 ++	char * endp = tmpbuf;
  31.303 ++	int i;
  31.304 ++	unsigned long val;
  31.305 ++	
  31.306 ++	if (*offset)
  31.307 ++		return -EINVAL;	
  31.308 ++	if (!count)
  31.309 ++		return 0;
  31.310 ++	if (count > TMPBUFSIZE - 1)
  31.311 ++		return -EINVAL;
  31.312 ++
  31.313 ++	memset(tmpbuf, 0x0, TMPBUFSIZE);
  31.314 ++
  31.315 ++	if (copy_from_user(tmpbuf, buf, count))
  31.316 ++		return -EFAULT;
  31.317 ++	
  31.318 ++	for (i = 0; i < MAX_OPROF_DOMAINS; i++)
  31.319 ++		active_domains[i] = -1;
  31.320 ++	adomains = 0;
  31.321 ++
  31.322 ++	while (1) {
  31.323 ++		val = simple_strtol(startp, &endp, 0);
  31.324 ++		if (endp == startp)
  31.325 ++			break;
  31.326 ++		while (ispunct(*endp))
  31.327 ++			endp++;
  31.328 ++		active_domains[adomains++] = val;
  31.329 ++		if (adomains >= MAX_OPROF_DOMAINS)
  31.330 ++			break;
  31.331 ++		startp = endp;
  31.332 ++	}
  31.333 ++	if (oprofile_set_active())
  31.334 ++		return -EINVAL; 
  31.335 ++	return count;
  31.336 ++}
  31.337 ++
  31.338 ++static ssize_t adomain_read(struct file * file, char __user * buf, 
  31.339 ++			    size_t count, loff_t * offset)
  31.340 ++{
  31.341 ++	char tmpbuf[TMPBUFSIZE];
  31.342 ++	size_t len = 0;
  31.343 ++	int i;
  31.344 ++	/* This is all screwed up if we run out of space */
  31.345 ++	for (i = 0; i < adomains; i++) 
  31.346 ++		len += snprintf(tmpbuf + len, TMPBUFSIZE - len, 
  31.347 ++				"%u ", (unsigned int)active_domains[i]);
  31.348 ++	len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n");
  31.349 ++	return simple_read_from_buffer((void __user *)buf, count, 
  31.350 ++				       offset, tmpbuf, len);
  31.351 ++}
  31.352 ++
  31.353 ++
  31.354 ++static struct file_operations active_domain_ops = {
  31.355 ++	.read		= adomain_read,
  31.356 ++	.write		= adomain_write,
  31.357 ++};
  31.358 ++
  31.359 + void oprofile_create_files(struct super_block * sb, struct dentry * root)
  31.360 + {
  31.361 + 	oprofilefs_create_file(sb, root, "enable", &enable_fops);
  31.362 + 	oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
  31.363 ++	oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
  31.364 + 	oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
  31.365 + 	oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
  31.366 + 	oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
  31.367 +diff -pruN ../pristine-linux-2.6.16/include/linux/oprofile.h ./include/linux/oprofile.h
  31.368 +--- ../pristine-linux-2.6.16/include/linux/oprofile.h	2006-03-20 05:53:29.000000000 +0000
  31.369 ++++ ./include/linux/oprofile.h	2006-04-03 15:53:05.000000000 +0100
  31.370 +@@ -16,6 +16,8 @@
  31.371 + #include <linux/types.h>
  31.372 + #include <linux/spinlock.h>
  31.373 + #include <asm/atomic.h>
  31.374 ++
  31.375 ++#include <xen/interface/xenoprof.h>
  31.376 +  
  31.377 + struct super_block;
  31.378 + struct dentry;
  31.379 +@@ -27,6 +29,8 @@ struct oprofile_operations {
  31.380 + 	/* create any necessary configuration files in the oprofile fs.
  31.381 + 	 * Optional. */
  31.382 + 	int (*create_files)(struct super_block * sb, struct dentry * root);
  31.383 ++	/* setup active domains with Xen */
  31.384 ++	int (*set_active)(int *active_domains, unsigned int adomains);
  31.385 + 	/* Do any necessary interrupt setup. Optional. */
  31.386 + 	int (*setup)(void);
  31.387 + 	/* Do any necessary interrupt shutdown. Optional. */
    32.1 --- a/tools/examples/init.d/xend	Thu Apr 06 14:22:52 2006 +0100
    32.2 +++ b/tools/examples/init.d/xend	Fri Apr 07 11:52:00 2006 +0100
    32.3 @@ -7,7 +7,7 @@
    32.4  # chkconfig: 2345 98 01
    32.5  # description: Starts and stops the Xen control daemon.
    32.6  
    32.7 -if ! [ -e /proc/xen/privcmd ]; then
    32.8 +if ! grep -q "control_d" /proc/xen/capabilities ; then
    32.9  	exit 0
   32.10  fi
   32.11  
    33.1 --- a/tools/examples/vtpm-common.sh	Thu Apr 06 14:22:52 2006 +0100
    33.2 +++ b/tools/examples/vtpm-common.sh	Fri Apr 07 11:52:00 2006 +0100
    33.3 @@ -261,12 +261,6 @@ function vtpm_create_instance () {
    33.4  
    33.5  	if [ "$REASON" == "create" ]; then
    33.6  		vtpm_reset $instance
    33.7 -	elif [ "$REASON" == "resume" ]; then
    33.8 -		vtpm_setup $instance
    33.9 -	else
   33.10 -		#default case for 'now'
   33.11 -		#vtpm_reset $instance
   33.12 -		true
   33.13  	fi
   33.14  	xenstore_write $XENBUS_PATH/instance $instance
   33.15  }
    34.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Thu Apr 06 14:22:52 2006 +0100
    34.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Fri Apr 07 11:52:00 2006 +0100
    34.3 @@ -409,12 +409,20 @@ int xc_handle;
    34.4  void
    34.5  destroy_hvm_domain(void)
    34.6  {
    34.7 -    extern FILE* logfile;
    34.8 -    char destroy_cmd[32];
    34.9 -
   34.10 -    sprintf(destroy_cmd, "xm destroy %d", domid);
   34.11 -    if (system(destroy_cmd) == -1)
   34.12 -        fprintf(logfile, "%s failed.!\n", destroy_cmd);
   34.13 +   int xcHandle;
   34.14 +   int sts;
   34.15 + 
   34.16 +   xcHandle = xc_interface_open();
   34.17 +   if (xcHandle < 0)
   34.18 +     fprintf(logfile, "Cannot acquire xenctrl handle\n");
   34.19 +   else {
   34.20 +     sts = xc_domain_shutdown(xcHandle, domid, SHUTDOWN_poweroff);
   34.21 +     if (sts != 0)
   34.22 +       fprintf(logfile, "? xc_domain_shutdown failed to issue poweroff, sts %d, errno %d\n", sts, errno);
   34.23 +     else
   34.24 +       fprintf(logfile, "Issued domain %d poweroff\n", domid);
   34.25 +     xc_interface_close(xcHandle);
   34.26 +   }
   34.27  }
   34.28  
   34.29  fd_set wakeup_rfds;
   34.30 @@ -480,13 +488,24 @@ int main_loop(void)
   34.31  
   34.32  static void qemu_hvm_reset(void *unused)
   34.33  {
   34.34 -    char cmd[64];
   34.35 +   int xcHandle;
   34.36 +   int sts;
   34.37  
   34.38 -    /* pause domain first, to avoid repeated reboot request*/
   34.39 -    xc_domain_pause(xc_handle, domid);
   34.40 +   /* pause domain first, to avoid repeated reboot request*/
   34.41 +   xc_domain_pause(xc_handle, domid);
   34.42  
   34.43 -    sprintf(cmd, "xm shutdown -R %d", domid);
   34.44 -    system(cmd);
   34.45 +   xcHandle = xc_interface_open();
   34.46 +   if (xcHandle < 0)
   34.47 +     fprintf(logfile, "Cannot acquire xenctrl handle\n");
   34.48 +   else {
   34.49 +     sts = xc_domain_shutdown(xcHandle, domid, SHUTDOWN_reboot);
   34.50 +     if (sts != 0)
   34.51 +       fprintf(logfile, "? xc_domain_shutdown failed to issue reboot, sts %d\n", sts);
   34.52 +     else
   34.53 +       fprintf(logfile, "Issued domain %d reboot\n", domid);
   34.54 +     xc_interface_close(xcHandle);
   34.55 +   }
   34.56 + 
   34.57  }
   34.58  
   34.59  CPUState * cpu_init()
    35.1 --- a/tools/ioemu/vl.c	Thu Apr 06 14:22:52 2006 +0100
    35.2 +++ b/tools/ioemu/vl.c	Fri Apr 07 11:52:00 2006 +0100
    35.3 @@ -2556,8 +2556,10 @@ static int set_mm_mapping(int xc_handle,
    35.4          return -1;
    35.5      }
    35.6  
    35.7 +#if 0 /* Generates lots of log file output - turn on for debugging */
    35.8      for (i = 0; i < nr_pages; i++)
    35.9          fprintf(stderr, "set_map result i %x result %lx\n", i, extent_start[i]);
   35.10 +#endif
   35.11  
   35.12      return 0;
   35.13  }
    36.1 --- a/tools/libxc/xc_domain.c	Thu Apr 06 14:22:52 2006 +0100
    36.2 +++ b/tools/libxc/xc_domain.c	Fri Apr 07 11:52:00 2006 +0100
    36.3 @@ -58,6 +58,35 @@ int xc_domain_destroy(int xc_handle,
    36.4      return do_dom0_op(xc_handle, &op);
    36.5  }
    36.6  
    36.7 +int xc_domain_shutdown(int xc_handle,
    36.8 +                       uint32_t domid,
    36.9 +                       int reason)
   36.10 +{
   36.11 +    int ret = -1;
   36.12 +    sched_remote_shutdown_t arg;
   36.13 +    DECLARE_HYPERCALL;
   36.14 +
   36.15 +    hypercall.op     = __HYPERVISOR_sched_op;
   36.16 +    hypercall.arg[0] = (unsigned long)SCHEDOP_remote_shutdown;
   36.17 +    hypercall.arg[1] = (unsigned long)&arg;
   36.18 +    arg.domain_id = domid;
   36.19 +    arg.reason = reason;
   36.20 +
   36.21 +    if ( mlock(&arg, sizeof(arg)) != 0 )
   36.22 +    {
   36.23 +        PERROR("Could not lock memory for Xen hypercall");
   36.24 +        goto out1;
   36.25 +    }
   36.26 +
   36.27 +    ret = do_xen_hypercall(xc_handle, &hypercall);
   36.28 +
   36.29 +    safe_munlock(&arg, sizeof(arg));
   36.30 +
   36.31 + out1:
   36.32 +    return ret;
   36.33 +}
   36.34 +
   36.35 +
   36.36  int xc_vcpu_setaffinity(int xc_handle,
   36.37                          uint32_t domid, 
   36.38                          int vcpu,
    37.1 --- a/tools/libxc/xc_linux_restore.c	Thu Apr 06 14:22:52 2006 +0100
    37.2 +++ b/tools/libxc/xc_linux_restore.c	Fri Apr 07 11:52:00 2006 +0100
    37.3 @@ -646,18 +646,14 @@ int xc_linux_restore(int xc_handle, int 
    37.4          goto out;
    37.5      }
    37.6  
    37.7 -    if ((pt_levels == 2) && ((pfn_type[pfn]&LTABTYPE_MASK) != L2TAB)) { 
    37.8 +    if ( (pfn_type[pfn] & LTABTYPE_MASK) != 
    37.9 +         ((unsigned long)pt_levels<<LTAB_SHIFT) ) {
   37.10          ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
   37.11 -            pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB);
   37.12 +            pfn, max_pfn, pfn_type[pfn], 
   37.13 +            (unsigned long)pt_levels<<LTAB_SHIFT); 
   37.14          goto out;
   37.15      }
   37.16  
   37.17 -    if ((pt_levels == 3) && ((pfn_type[pfn]&LTABTYPE_MASK) != L3TAB)) { 
   37.18 -        ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
   37.19 -            pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB);
   37.20 -        goto out;
   37.21 -    }
   37.22 -    
   37.23      ctxt.ctrlreg[3] = p2m[pfn] << PAGE_SHIFT;
   37.24  
   37.25      /* clear any pending events and the selector */
    38.1 --- a/tools/libxc/xenctrl.h	Thu Apr 06 14:22:52 2006 +0100
    38.2 +++ b/tools/libxc/xenctrl.h	Fri Apr 07 11:52:00 2006 +0100
    38.3 @@ -206,6 +206,21 @@ int xc_domain_unpause(int xc_handle,
    38.4  int xc_domain_destroy(int xc_handle, 
    38.5                        uint32_t domid);
    38.6  
    38.7 +/**
    38.8 + * This function will shutdown a domain. This is intended for use in
    38.9 + * fully-virtualized domains where this operation is analogous to the
   38.10 + * sched_op operations in a paravirtualized domain. The caller is
   38.11 + * expected to give the reason for the shutdown.
   38.12 + *
   38.13 + * @parm xc_handle a handle to an open hypervisor interface
   38.14 + * @parm domid the domain id to destroy
   38.15 + * @parm reason is the reason (SHUTDOWN_xxx) for the shutdown
   38.16 + * @return 0 on success, -1 on failure
   38.17 + */
   38.18 +int xc_domain_shutdown(int xc_handle, 
   38.19 +                       uint32_t domid,
   38.20 +                       int reason);
   38.21 +
   38.22  int xc_vcpu_setaffinity(int xc_handle,
   38.23                          uint32_t domid,
   38.24                          int vcpu,
    39.1 --- a/tools/xm-test/tests/vtpm/02_vtpm-cat_pcrs.py	Thu Apr 06 14:22:52 2006 +0100
    39.2 +++ b/tools/xm-test/tests/vtpm/02_vtpm-cat_pcrs.py	Fri Apr 07 11:52:00 2006 +0100
    39.3 @@ -46,6 +46,7 @@ except ConsoleError, e:
    39.4      FAIL(str(e))
    39.5  
    39.6  if re.search("No such file",run["output"]):
    39.7 +    vtpm_cleanup(domName)
    39.8      FAIL("TPM frontend support not compiled into (domU?) kernel")
    39.9  
   39.10  console.closeConsole()
    40.1 --- a/tools/xm-test/tests/vtpm/03_vtpm-susp_res.py	Thu Apr 06 14:22:52 2006 +0100
    40.2 +++ b/tools/xm-test/tests/vtpm/03_vtpm-susp_res.py	Fri Apr 07 11:52:00 2006 +0100
    40.3 @@ -47,6 +47,7 @@ except ConsoleError, e:
    40.4      FAIL(str(e))
    40.5  
    40.6  if re.search("No such file",run["output"]):
    40.7 +    vtpm_cleanup(domName)
    40.8      FAIL("TPM frontend support not compiled into (domU?) kernel")
    40.9  
   40.10  console.closeConsole()
    41.1 --- a/xen/arch/x86/Makefile	Thu Apr 06 14:22:52 2006 +0100
    41.2 +++ b/xen/arch/x86/Makefile	Fri Apr 07 11:52:00 2006 +0100
    41.3 @@ -2,6 +2,7 @@ subdir-y += acpi
    41.4  subdir-y += cpu
    41.5  subdir-y += genapic
    41.6  subdir-y += hvm
    41.7 +subdir-y += oprofile
    41.8  
    41.9  subdir-$(x86_32) += x86_32
   41.10  subdir-$(x86_64) += x86_64
    42.1 --- a/xen/arch/x86/domain.c	Thu Apr 06 14:22:52 2006 +0100
    42.2 +++ b/xen/arch/x86/domain.c	Fri Apr 07 11:52:00 2006 +0100
    42.3 @@ -961,6 +961,10 @@ void domain_relinquish_resources(struct 
    42.4      /* Relinquish every page of memory. */
    42.5      relinquish_memory(d, &d->xenpage_list);
    42.6      relinquish_memory(d, &d->page_list);
    42.7 +
    42.8 +    /* Free page used by xen oprofile buffer */
    42.9 +    free_xenoprof_pages(d);
   42.10 +
   42.11  }
   42.12  
   42.13  void arch_dump_domain_info(struct domain *d)
    43.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.2 +++ b/xen/arch/x86/oprofile/Makefile	Fri Apr 07 11:52:00 2006 +0100
    43.3 @@ -0,0 +1,5 @@
    43.4 +obj-y += xenoprof.o
    43.5 +obj-y += nmi_int.o
    43.6 +obj-y += op_model_p4.o
    43.7 +obj-y += op_model_ppro.o
    43.8 +obj-y += op_model_athlon.o
    44.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    44.2 +++ b/xen/arch/x86/oprofile/nmi_int.c	Fri Apr 07 11:52:00 2006 +0100
    44.3 @@ -0,0 +1,391 @@
    44.4 +/**
    44.5 + * @file nmi_int.c
    44.6 + *
    44.7 + * @remark Copyright 2002 OProfile authors
    44.8 + * @remark Read the file COPYING
    44.9 + *
   44.10 + * @author John Levon <levon@movementarian.org>
   44.11 + *
   44.12 + * Modified for Xen: by Aravind Menon & Jose Renato Santos
   44.13 + *   These modifications are:
   44.14 + *   Copyright (C) 2005 Hewlett-Packard Co.
   44.15 + */
   44.16 +
   44.17 +#include <xen/event.h>
   44.18 +#include <xen/types.h>
   44.19 +#include <xen/errno.h>
   44.20 +#include <xen/init.h>
   44.21 +#include <public/xen.h>
   44.22 +#include <asm/nmi.h>
   44.23 +#include <asm/msr.h>
   44.24 +#include <asm/apic.h>
   44.25 +#include <asm/regs.h>
   44.26 +#include <asm/current.h>
   44.27 +#include <xen/delay.h>
   44.28 + 
   44.29 +#include "op_counter.h"
   44.30 +#include "op_x86_model.h"
   44.31 + 
   44.32 +static struct op_x86_model_spec const * model;
   44.33 +static struct op_msrs cpu_msrs[NR_CPUS];
   44.34 +static unsigned long saved_lvtpc[NR_CPUS];
   44.35 +
   44.36 +#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1)
   44.37 +extern int active_domains[MAX_OPROF_DOMAINS];
   44.38 +extern unsigned int adomains;
   44.39 +extern struct domain *primary_profiler;
   44.40 +extern struct domain *adomain_ptrs[MAX_OPROF_DOMAINS];
   44.41 +extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE];
   44.42 +extern int is_active(struct domain *d);
   44.43 +extern int active_id(struct domain *d);
   44.44 +extern int is_profiled(struct domain *d);
   44.45 +
   44.46 +extern size_t strlcpy(char *dest, const char *src, size_t size);
   44.47 +
   44.48 +
   44.49 +int nmi_callback(struct cpu_user_regs *regs, int cpu)
   44.50 +{
   44.51 +	int xen_mode, ovf;
   44.52 +
   44.53 +	ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs);
   44.54 +	xen_mode = ring_0(regs);
   44.55 +	if ( ovf && is_active(current->domain) && !xen_mode )
   44.56 +		send_guest_vcpu_virq(current, VIRQ_XENOPROF);
   44.57 +
   44.58 +	return 1;
   44.59 +}
   44.60 + 
   44.61 + 
   44.62 +static void nmi_cpu_save_registers(struct op_msrs *msrs)
   44.63 +{
   44.64 +	unsigned int const nr_ctrs = model->num_counters;
   44.65 +	unsigned int const nr_ctrls = model->num_controls; 
   44.66 +	struct op_msr *counters = msrs->counters;
   44.67 +	struct op_msr *controls = msrs->controls;
   44.68 +	unsigned int i;
   44.69 +
   44.70 +	for (i = 0; i < nr_ctrs; ++i) {
   44.71 +		rdmsr(counters[i].addr,
   44.72 +			counters[i].saved.low,
   44.73 +			counters[i].saved.high);
   44.74 +	}
   44.75 + 
   44.76 +	for (i = 0; i < nr_ctrls; ++i) {
   44.77 +		rdmsr(controls[i].addr,
   44.78 +			controls[i].saved.low,
   44.79 +			controls[i].saved.high);
   44.80 +	}
   44.81 +}
   44.82 +
   44.83 +
   44.84 +static void nmi_save_registers(void * dummy)
   44.85 +{
   44.86 +	int cpu = smp_processor_id();
   44.87 +	struct op_msrs * msrs = &cpu_msrs[cpu];
   44.88 +	model->fill_in_addresses(msrs);
   44.89 +	nmi_cpu_save_registers(msrs);
   44.90 +}
   44.91 +
   44.92 +
   44.93 +static void free_msrs(void)
   44.94 +{
   44.95 +	int i;
   44.96 +	for (i = 0; i < NR_CPUS; ++i) {
   44.97 +		xfree(cpu_msrs[i].counters);
   44.98 +		cpu_msrs[i].counters = NULL;
   44.99 +		xfree(cpu_msrs[i].controls);
  44.100 +		cpu_msrs[i].controls = NULL;
  44.101 +	}
  44.102 +}
  44.103 +
  44.104 +
  44.105 +static int allocate_msrs(void)
  44.106 +{
  44.107 +	int success = 1;
  44.108 +	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
  44.109 +	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
  44.110 +
  44.111 +	int i;
  44.112 +	for (i = 0; i < NR_CPUS; ++i) {
  44.113 +		if (!test_bit(i, &cpu_online_map))
  44.114 +			continue;
  44.115 +
  44.116 +		cpu_msrs[i].counters = xmalloc_bytes(counters_size);
  44.117 +		if (!cpu_msrs[i].counters) {
  44.118 +			success = 0;
  44.119 +			break;
  44.120 +		}
  44.121 +		cpu_msrs[i].controls = xmalloc_bytes(controls_size);
  44.122 +		if (!cpu_msrs[i].controls) {
  44.123 +			success = 0;
  44.124 +			break;
  44.125 +		}
  44.126 +	}
  44.127 +
  44.128 +	if (!success)
  44.129 +		free_msrs();
  44.130 +
  44.131 +	return success;
  44.132 +}
  44.133 +
  44.134 +
  44.135 +static void nmi_cpu_setup(void * dummy)
  44.136 +{
  44.137 +	int cpu = smp_processor_id();
  44.138 +	struct op_msrs * msrs = &cpu_msrs[cpu];
  44.139 +	model->setup_ctrs(msrs);
  44.140 +}
  44.141 +
  44.142 +
  44.143 +int nmi_setup_events(void)
  44.144 +{
  44.145 +	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
  44.146 +	return 0;
  44.147 +}
  44.148 +
  44.149 +int nmi_reserve_counters(void)
  44.150 +{
  44.151 +	if (!allocate_msrs())
  44.152 +		return -ENOMEM;
  44.153 +
  44.154 +	/* We walk a thin line between law and rape here.
  44.155 +	 * We need to be careful to install our NMI handler
  44.156 +	 * without actually triggering any NMIs as this will
  44.157 +	 * break the core code horrifically.
  44.158 +	 */
  44.159 +	if (reserve_lapic_nmi() < 0) {
  44.160 +		free_msrs();
  44.161 +		return -EBUSY;
  44.162 +	}
  44.163 +	/* We need to serialize save and setup for HT because the subset
  44.164 +	 * of msrs are distinct for save and setup operations
  44.165 +	 */
  44.166 +	on_each_cpu(nmi_save_registers, NULL, 0, 1);
  44.167 + 	return 0;
  44.168 +}
  44.169 +
  44.170 +int nmi_enable_virq(void)
  44.171 +{
  44.172 +	set_nmi_callback(nmi_callback);
  44.173 +	return 0;
  44.174 +}
  44.175 +
  44.176 +
  44.177 +void nmi_disable_virq(void)
  44.178 +{
  44.179 +	unset_nmi_callback();
  44.180 +} 
  44.181 +
  44.182 +
  44.183 +static void nmi_restore_registers(struct op_msrs * msrs)
  44.184 +{
  44.185 +	unsigned int const nr_ctrs = model->num_counters;
  44.186 +	unsigned int const nr_ctrls = model->num_controls; 
  44.187 +	struct op_msr * counters = msrs->counters;
  44.188 +	struct op_msr * controls = msrs->controls;
  44.189 +	unsigned int i;
  44.190 +
  44.191 +	for (i = 0; i < nr_ctrls; ++i) {
  44.192 +		wrmsr(controls[i].addr,
  44.193 +			controls[i].saved.low,
  44.194 +			controls[i].saved.high);
  44.195 +	}
  44.196 + 
  44.197 +	for (i = 0; i < nr_ctrs; ++i) {
  44.198 +		wrmsr(counters[i].addr,
  44.199 +			counters[i].saved.low,
  44.200 +			counters[i].saved.high);
  44.201 +	}
  44.202 +}
  44.203 + 
  44.204 +
  44.205 +static void nmi_cpu_shutdown(void * dummy)
  44.206 +{
  44.207 +	int cpu = smp_processor_id();
  44.208 +	struct op_msrs * msrs = &cpu_msrs[cpu];
  44.209 +	nmi_restore_registers(msrs);
  44.210 +}
  44.211 +
  44.212 + 
  44.213 +void nmi_release_counters(void)
  44.214 +{
  44.215 +	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
  44.216 +	release_lapic_nmi();
  44.217 +	free_msrs();
  44.218 +}
  44.219 +
  44.220 + 
  44.221 +static void nmi_cpu_start(void * dummy)
  44.222 +{
  44.223 +	int cpu = smp_processor_id();
  44.224 +	struct op_msrs const * msrs = &cpu_msrs[cpu];
  44.225 +	saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
  44.226 +	apic_write(APIC_LVTPC, APIC_DM_NMI);
  44.227 +	model->start(msrs);
  44.228 +}
  44.229 + 
  44.230 +
  44.231 +int nmi_start(void)
  44.232 +{
  44.233 +	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
  44.234 +	return 0;
  44.235 +}
  44.236 + 
  44.237 + 
  44.238 +static void nmi_cpu_stop(void * dummy)
  44.239 +{
  44.240 +	unsigned int v;
  44.241 +	int cpu = smp_processor_id();
  44.242 +	struct op_msrs const * msrs = &cpu_msrs[cpu];
  44.243 +	model->stop(msrs);
  44.244 +
  44.245 +	/* restoring APIC_LVTPC can trigger an apic error because the delivery
  44.246 +	 * mode and vector nr combination can be illegal. That's by design: on
  44.247 +	 * power on apic lvt contain a zero vector nr which are legal only for
  44.248 +	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
  44.249 +	 */
  44.250 +	if ( !(apic_read(APIC_LVTPC) & APIC_DM_NMI)
  44.251 +	     || (apic_read(APIC_LVTPC) & APIC_LVT_MASKED) )
  44.252 +	{
  44.253 +		printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC));
  44.254 +		mdelay(5000);
  44.255 +	}
  44.256 +	v = apic_read(APIC_LVTERR);
  44.257 +	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
  44.258 +	apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
  44.259 +	apic_write(APIC_LVTERR, v);
  44.260 +}
  44.261 + 
  44.262 + 
  44.263 +void nmi_stop(void)
  44.264 +{
  44.265 +	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
  44.266 +}
  44.267 +
  44.268 +
  44.269 +struct op_counter_config counter_config[OP_MAX_COUNTER];
  44.270 +
  44.271 +static int __init p4_init(char * cpu_type)
  44.272 +{ 
  44.273 +	__u8 cpu_model = current_cpu_data.x86_model;
  44.274 +
  44.275 +	if (cpu_model > 4)
  44.276 +		return 0;
  44.277 +
  44.278 +#ifndef CONFIG_SMP
  44.279 +	strncpy (cpu_type, "i386/p4", XENOPROF_CPU_TYPE_SIZE - 1);
  44.280 +	model = &op_p4_spec;
  44.281 +	return 1;
  44.282 +#else
  44.283 +	switch (smp_num_siblings) {
  44.284 +		case 1:
  44.285 +			strncpy (cpu_type, "i386/p4", 
  44.286 +				 XENOPROF_CPU_TYPE_SIZE - 1);
  44.287 +			model = &op_p4_spec;
  44.288 +			return 1;
  44.289 +
  44.290 +		case 2:
  44.291 +			strncpy (cpu_type, "i386/p4-ht", 
  44.292 +				 XENOPROF_CPU_TYPE_SIZE - 1);
  44.293 +			model = &op_p4_ht2_spec;
  44.294 +			return 1;
  44.295 +	}
  44.296 +#endif
  44.297 +	printk("Xenoprof ERROR: P4 HyperThreading detected with > 2 threads\n");
  44.298 +
  44.299 +	return 0;
  44.300 +}
  44.301 +
  44.302 +
  44.303 +static int __init ppro_init(char *cpu_type)
  44.304 +{
  44.305 +	__u8 cpu_model = current_cpu_data.x86_model;
  44.306 +
  44.307 +	if (cpu_model > 0xd)
  44.308 +		return 0;
  44.309 +
  44.310 +	if (cpu_model == 9) {
  44.311 +		strncpy (cpu_type, "i386/p6_mobile", XENOPROF_CPU_TYPE_SIZE - 1);
  44.312 +	} else if (cpu_model > 5) {
  44.313 +		strncpy (cpu_type, "i386/piii", XENOPROF_CPU_TYPE_SIZE - 1);
  44.314 +	} else if (cpu_model > 2) {
  44.315 +		strncpy (cpu_type, "i386/pii", XENOPROF_CPU_TYPE_SIZE - 1);
  44.316 +	} else {
  44.317 +		strncpy (cpu_type, "i386/ppro", XENOPROF_CPU_TYPE_SIZE - 1);
  44.318 +	}
  44.319 +
  44.320 +	model = &op_ppro_spec;
  44.321 +	return 1;
  44.322 +}
  44.323 +
  44.324 +int nmi_init(int *num_events, int *is_primary, char *cpu_type)
  44.325 +{
  44.326 +	__u8 vendor = current_cpu_data.x86_vendor;
  44.327 +	__u8 family = current_cpu_data.x86;
  44.328 +	int prim = 0;
  44.329 + 
  44.330 +	if (!cpu_has_apic)
  44.331 +		return -ENODEV;
  44.332 +
  44.333 +	if (primary_profiler == NULL) {
  44.334 +		/* For now, only dom0 can be the primary profiler */
  44.335 +		if (current->domain->domain_id == 0) {
  44.336 +			primary_profiler = current->domain;
  44.337 +			prim = 1;
  44.338 +		}
  44.339 +	}
  44.340 + 
  44.341 +	/* Make sure string is NULL terminated */
  44.342 +	cpu_type[XENOPROF_CPU_TYPE_SIZE - 1] = 0;
  44.343 +
  44.344 +	switch (vendor) {
  44.345 +		case X86_VENDOR_AMD:
  44.346 +			/* Needs to be at least an Athlon (or hammer in 32bit mode) */
  44.347 +
  44.348 +			switch (family) {
  44.349 +			default:
  44.350 +				return -ENODEV;
  44.351 +			case 6:
  44.352 +				model = &op_athlon_spec;
  44.353 +				strncpy (cpu_type, "i386/athlon", 
  44.354 +					 XENOPROF_CPU_TYPE_SIZE - 1);
  44.355 +				break;
  44.356 +			case 0xf:
  44.357 +				model = &op_athlon_spec;
  44.358 +				/* Actually it could be i386/hammer too, but give
  44.359 +				   user space an consistent name. */
  44.360 +				strncpy (cpu_type, "x86-64/hammer", 
  44.361 +					 XENOPROF_CPU_TYPE_SIZE - 1);
  44.362 +				break;
  44.363 +			}
  44.364 +			break;
  44.365 + 
  44.366 +		case X86_VENDOR_INTEL:
  44.367 +			switch (family) {
  44.368 +				/* Pentium IV */
  44.369 +				case 0xf:
  44.370 +					if (!p4_init(cpu_type))
  44.371 +						return -ENODEV;
  44.372 +					break;
  44.373 +
  44.374 +				/* A P6-class processor */
  44.375 +				case 6:
  44.376 +					if (!ppro_init(cpu_type))
  44.377 +						return -ENODEV;
  44.378 +					break;
  44.379 +
  44.380 +				default:
  44.381 +					return -ENODEV;
  44.382 +			}
  44.383 +			break;
  44.384 +
  44.385 +		default:
  44.386 +			return -ENODEV;
  44.387 +	}
  44.388 +
  44.389 +	*num_events = model->num_counters;
  44.390 +	*is_primary = prim;
  44.391 +
  44.392 +	return 0;
  44.393 +}
  44.394 +
    45.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.2 +++ b/xen/arch/x86/oprofile/op_counter.h	Fri Apr 07 11:52:00 2006 +0100
    45.3 @@ -0,0 +1,29 @@
    45.4 +/**
    45.5 + * @file op_counter.h
    45.6 + *
    45.7 + * @remark Copyright 2002 OProfile authors
    45.8 + * @remark Read the file COPYING
    45.9 + *
   45.10 + * @author John Levon
   45.11 + */
   45.12 + 
   45.13 +#ifndef OP_COUNTER_H
   45.14 +#define OP_COUNTER_H
   45.15 + 
   45.16 +#define OP_MAX_COUNTER 8
   45.17 + 
   45.18 +/* Per-perfctr configuration as set via
   45.19 + * oprofilefs.
   45.20 + */
   45.21 +struct op_counter_config {
   45.22 +        unsigned long count;
   45.23 +        unsigned long enabled;
   45.24 +        unsigned long event;
   45.25 +        unsigned long kernel;
   45.26 +        unsigned long user;
   45.27 +        unsigned long unit_mask;
   45.28 +};
   45.29 +
   45.30 +extern struct op_counter_config counter_config[];
   45.31 +
   45.32 +#endif /* OP_COUNTER_H */
    46.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.2 +++ b/xen/arch/x86/oprofile/op_model_athlon.c	Fri Apr 07 11:52:00 2006 +0100
    46.3 @@ -0,0 +1,168 @@
    46.4 +/**
    46.5 + * @file op_model_athlon.h
    46.6 + * athlon / K7 model-specific MSR operations
    46.7 + *
    46.8 + * @remark Copyright 2002 OProfile authors
    46.9 + * @remark Read the file COPYING
   46.10 + *
   46.11 + * @author John Levon
   46.12 + * @author Philippe Elie
   46.13 + * @author Graydon Hoare
   46.14 + */
   46.15 +
   46.16 +#include <xen/types.h>
   46.17 +#include <asm/msr.h>
   46.18 +#include <asm/io.h>
   46.19 +#include <asm/apic.h>
   46.20 +#include <asm/processor.h>
   46.21 +#include <xen/sched.h>
   46.22 +#include <asm/regs.h>
   46.23 +#include <asm/current.h>
   46.24 + 
   46.25 +#include "op_x86_model.h"
   46.26 +#include "op_counter.h"
   46.27 +
   46.28 +#define NUM_COUNTERS 4
   46.29 +#define NUM_CONTROLS 4
   46.30 +
   46.31 +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
   46.32 +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
   46.33 +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
   46.34 +
   46.35 +#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
   46.36 +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
   46.37 +#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
   46.38 +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
   46.39 +#define CTRL_CLEAR(x) (x &= (1<<21))
   46.40 +#define CTRL_SET_ENABLE(val) (val |= 1<<20)
   46.41 +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
   46.42 +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
   46.43 +#define CTRL_SET_UM(val, m) (val |= (m << 8))
   46.44 +#define CTRL_SET_EVENT(val, e) (val |= e)
   46.45 +
   46.46 +static unsigned long reset_value[NUM_COUNTERS];
   46.47 +
   46.48 +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
   46.49 +			       int mode, int event);
   46.50 + 
   46.51 +static void athlon_fill_in_addresses(struct op_msrs * const msrs)
   46.52 +{
   46.53 +	msrs->counters[0].addr = MSR_K7_PERFCTR0;
   46.54 +	msrs->counters[1].addr = MSR_K7_PERFCTR1;
   46.55 +	msrs->counters[2].addr = MSR_K7_PERFCTR2;
   46.56 +	msrs->counters[3].addr = MSR_K7_PERFCTR3;
   46.57 +
   46.58 +	msrs->controls[0].addr = MSR_K7_EVNTSEL0;
   46.59 +	msrs->controls[1].addr = MSR_K7_EVNTSEL1;
   46.60 +	msrs->controls[2].addr = MSR_K7_EVNTSEL2;
   46.61 +	msrs->controls[3].addr = MSR_K7_EVNTSEL3;
   46.62 +}
   46.63 +
   46.64 + 
   46.65 +static void athlon_setup_ctrs(struct op_msrs const * const msrs)
   46.66 +{
   46.67 +	unsigned int low, high;
   46.68 +	int i;
   46.69 + 
   46.70 +	/* clear all counters */
   46.71 +	for (i = 0 ; i < NUM_CONTROLS; ++i) {
   46.72 +		CTRL_READ(low, high, msrs, i);
   46.73 +		CTRL_CLEAR(low);
   46.74 +		CTRL_WRITE(low, high, msrs, i);
   46.75 +	}
   46.76 +	
   46.77 +	/* avoid a false detection of ctr overflows in NMI handler */
   46.78 +	for (i = 0; i < NUM_COUNTERS; ++i) {
   46.79 +		CTR_WRITE(1, msrs, i);
   46.80 +	}
   46.81 +
   46.82 +	/* enable active counters */
   46.83 +	for (i = 0; i < NUM_COUNTERS; ++i) {
   46.84 +		if (counter_config[i].enabled) {
   46.85 +			reset_value[i] = counter_config[i].count;
   46.86 +
   46.87 +			CTR_WRITE(counter_config[i].count, msrs, i);
   46.88 +
   46.89 +			CTRL_READ(low, high, msrs, i);
   46.90 +			CTRL_CLEAR(low);
   46.91 +			CTRL_SET_ENABLE(low);
   46.92 +			CTRL_SET_USR(low, counter_config[i].user);
   46.93 +			CTRL_SET_KERN(low, counter_config[i].kernel);
   46.94 +			CTRL_SET_UM(low, counter_config[i].unit_mask);
   46.95 +			CTRL_SET_EVENT(low, counter_config[i].event);
   46.96 +			CTRL_WRITE(low, high, msrs, i);
   46.97 +		} else {
   46.98 +			reset_value[i] = 0;
   46.99 +		}
  46.100 +	}
  46.101 +}
  46.102 +
  46.103 + 
  46.104 +static int athlon_check_ctrs(unsigned int const cpu,
  46.105 +                             struct op_msrs const * const msrs,
  46.106 +                             struct cpu_user_regs * const regs)
  46.107 +
  46.108 +{
  46.109 +	unsigned int low, high;
  46.110 +	int i;
  46.111 +	int ovf = 0;
  46.112 +	unsigned long eip = regs->eip;
  46.113 +	int mode = 0;
  46.114 +
  46.115 +	if (guest_kernel_mode(current, regs))
  46.116 +		mode = 1;
  46.117 +	else if (ring_0(regs))
  46.118 +		mode = 2;
  46.119 +
  46.120 +	for (i = 0 ; i < NUM_COUNTERS; ++i) {
  46.121 +		CTR_READ(low, high, msrs, i);
  46.122 +		if (CTR_OVERFLOWED(low)) {
  46.123 +			xenoprof_log_event(current, eip, mode, i);
  46.124 +			CTR_WRITE(reset_value[i], msrs, i);
  46.125 +			ovf = 1;
  46.126 +		}
  46.127 +	}
  46.128 +
  46.129 +	/* See op_model_ppro.c */
  46.130 +	return ovf;
  46.131 +}
  46.132 +
  46.133 + 
  46.134 +static void athlon_start(struct op_msrs const * const msrs)
  46.135 +{
  46.136 +	unsigned int low, high;
  46.137 +	int i;
  46.138 +	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
  46.139 +		if (reset_value[i]) {
  46.140 +			CTRL_READ(low, high, msrs, i);
  46.141 +			CTRL_SET_ACTIVE(low);
  46.142 +			CTRL_WRITE(low, high, msrs, i);
  46.143 +		}
  46.144 +	}
  46.145 +}
  46.146 +
  46.147 +
  46.148 +static void athlon_stop(struct op_msrs const * const msrs)
  46.149 +{
  46.150 +	unsigned int low,high;
  46.151 +	int i;
  46.152 +
  46.153 +	/* Subtle: stop on all counters to avoid race with
  46.154 +	 * setting our pm callback */
  46.155 +	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
  46.156 +		CTRL_READ(low, high, msrs, i);
  46.157 +		CTRL_SET_INACTIVE(low);
  46.158 +		CTRL_WRITE(low, high, msrs, i);
  46.159 +	}
  46.160 +}
  46.161 +
  46.162 +
  46.163 +struct op_x86_model_spec const op_athlon_spec = {
  46.164 +	.num_counters = NUM_COUNTERS,
  46.165 +	.num_controls = NUM_CONTROLS,
  46.166 +	.fill_in_addresses = &athlon_fill_in_addresses,
  46.167 +	.setup_ctrs = &athlon_setup_ctrs,
  46.168 +	.check_ctrs = &athlon_check_ctrs,
  46.169 +	.start = &athlon_start,
  46.170 +	.stop = &athlon_stop
  46.171 +};
    47.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    47.2 +++ b/xen/arch/x86/oprofile/op_model_p4.c	Fri Apr 07 11:52:00 2006 +0100
    47.3 @@ -0,0 +1,739 @@
    47.4 +/**
    47.5 + * @file op_model_p4.c
    47.6 + * P4 model-specific MSR operations
    47.7 + *
    47.8 + * @remark Copyright 2002 OProfile authors
    47.9 + * @remark Read the file COPYING
   47.10 + *
   47.11 + * @author Graydon Hoare
   47.12 + */
   47.13 +
   47.14 +#include <xen/types.h>
   47.15 +#include <asm/msr.h>
   47.16 +#include <asm/io.h>
   47.17 +#include <asm/apic.h>
   47.18 +#include <asm/processor.h>
   47.19 +#include <xen/sched.h>
   47.20 +#include <asm/regs.h>
   47.21 +#include <asm/current.h>
   47.22 +
   47.23 +#include "op_x86_model.h"
   47.24 +#include "op_counter.h"
   47.25 +
   47.26 +#define NUM_EVENTS 39
   47.27 +
   47.28 +#define NUM_COUNTERS_NON_HT 8
   47.29 +#define NUM_ESCRS_NON_HT 45
   47.30 +#define NUM_CCCRS_NON_HT 18
   47.31 +#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
   47.32 +
   47.33 +#define NUM_COUNTERS_HT2 4
   47.34 +#define NUM_ESCRS_HT2 23
   47.35 +#define NUM_CCCRS_HT2 9
   47.36 +#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
   47.37 +
   47.38 +static unsigned int num_counters = NUM_COUNTERS_NON_HT;
   47.39 +
   47.40 +
   47.41 +/* this has to be checked dynamically since the
   47.42 +   hyper-threadedness of a chip is discovered at
   47.43 +   kernel boot-time. */
   47.44 +static inline void setup_num_counters(void)
   47.45 +{
   47.46 +#ifdef CONFIG_SMP
   47.47 +	if (smp_num_siblings == 2)
   47.48 +		num_counters = NUM_COUNTERS_HT2;
   47.49 +#endif
   47.50 +}
   47.51 +
   47.52 +static int inline addr_increment(void)
   47.53 +{
   47.54 +#ifdef CONFIG_SMP
   47.55 +	return smp_num_siblings == 2 ? 2 : 1;
   47.56 +#else
   47.57 +	return 1;
   47.58 +#endif
   47.59 +}
   47.60 +
   47.61 +
   47.62 +/* tables to simulate simplified hardware view of p4 registers */
   47.63 +struct p4_counter_binding {
   47.64 +	int virt_counter;
   47.65 +	int counter_address;
   47.66 +	int cccr_address;
   47.67 +};
   47.68 +
   47.69 +struct p4_event_binding {
   47.70 +	int escr_select;  /* value to put in CCCR */
   47.71 +	int event_select; /* value to put in ESCR */
   47.72 +	struct {
   47.73 +		int virt_counter; /* for this counter... */
   47.74 +		int escr_address; /* use this ESCR       */
   47.75 +	} bindings[2];
   47.76 +};
   47.77 +
   47.78 +/* nb: these CTR_* defines are a duplicate of defines in
   47.79 +   event/i386.p4*events. */
   47.80 +
   47.81 +
   47.82 +#define CTR_BPU_0      (1 << 0)
   47.83 +#define CTR_MS_0       (1 << 1)
   47.84 +#define CTR_FLAME_0    (1 << 2)
   47.85 +#define CTR_IQ_4       (1 << 3)
   47.86 +#define CTR_BPU_2      (1 << 4)
   47.87 +#define CTR_MS_2       (1 << 5)
   47.88 +#define CTR_FLAME_2    (1 << 6)
   47.89 +#define CTR_IQ_5       (1 << 7)
   47.90 +
   47.91 +static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
   47.92 +	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
   47.93 +	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
   47.94 +	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
   47.95 +	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
   47.96 +	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
   47.97 +	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
   47.98 +	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
   47.99 +	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
  47.100 +};
  47.101 +
  47.102 +#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
  47.103 +
  47.104 +/* All cccr we don't use. */
  47.105 +static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
  47.106 +	MSR_P4_BPU_CCCR1,	MSR_P4_BPU_CCCR3,
  47.107 +	MSR_P4_MS_CCCR1,	MSR_P4_MS_CCCR3,
  47.108 +	MSR_P4_FLAME_CCCR1,	MSR_P4_FLAME_CCCR3,
  47.109 +	MSR_P4_IQ_CCCR0,	MSR_P4_IQ_CCCR1,
  47.110 +	MSR_P4_IQ_CCCR2,	MSR_P4_IQ_CCCR3
  47.111 +};
  47.112 +
  47.113 +/* p4 event codes in libop/op_event.h are indices into this table. */
  47.114 +
  47.115 +static struct p4_event_binding p4_events[NUM_EVENTS] = {
  47.116 +	
  47.117 +	{ /* BRANCH_RETIRED */
  47.118 +		0x05, 0x06, 
  47.119 +		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
  47.120 +		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  47.121 +	},
  47.122 +	
  47.123 +	{ /* MISPRED_BRANCH_RETIRED */
  47.124 +		0x04, 0x03, 
  47.125 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
  47.126 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
  47.127 +	},
  47.128 +	
  47.129 +	{ /* TC_DELIVER_MODE */
  47.130 +		0x01, 0x01,
  47.131 +		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
  47.132 +		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
  47.133 +	},
  47.134 +	
  47.135 +	{ /* BPU_FETCH_REQUEST */
  47.136 +		0x00, 0x03, 
  47.137 +		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
  47.138 +		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
  47.139 +	},
  47.140 +
  47.141 +	{ /* ITLB_REFERENCE */
  47.142 +		0x03, 0x18,
  47.143 +		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
  47.144 +		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
  47.145 +	},
  47.146 +
  47.147 +	{ /* MEMORY_CANCEL */
  47.148 +		0x05, 0x02,
  47.149 +		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
  47.150 +		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
  47.151 +	},
  47.152 +
  47.153 +	{ /* MEMORY_COMPLETE */
  47.154 +		0x02, 0x08,
  47.155 +		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
  47.156 +		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
  47.157 +	},
  47.158 +
  47.159 +	{ /* LOAD_PORT_REPLAY */
  47.160 +		0x02, 0x04, 
  47.161 +		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
  47.162 +		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
  47.163 +	},
  47.164 +
  47.165 +	{ /* STORE_PORT_REPLAY */
  47.166 +		0x02, 0x05,
  47.167 +		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
  47.168 +		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
  47.169 +	},
  47.170 +
  47.171 +	{ /* MOB_LOAD_REPLAY */
  47.172 +		0x02, 0x03,
  47.173 +		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
  47.174 +		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
  47.175 +	},
  47.176 +
  47.177 +	{ /* PAGE_WALK_TYPE */
  47.178 +		0x04, 0x01,
  47.179 +		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
  47.180 +		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
  47.181 +	},
  47.182 +
  47.183 +	{ /* BSQ_CACHE_REFERENCE */
  47.184 +		0x07, 0x0c, 
  47.185 +		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
  47.186 +		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
  47.187 +	},
  47.188 +
  47.189 +	{ /* IOQ_ALLOCATION */
  47.190 +		0x06, 0x03, 
  47.191 +		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
  47.192 +		  { 0, 0 } }
  47.193 +	},
  47.194 +
  47.195 +	{ /* IOQ_ACTIVE_ENTRIES */
  47.196 +		0x06, 0x1a, 
  47.197 +		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
  47.198 +		  { 0, 0 } }
  47.199 +	},
  47.200 +
  47.201 +	{ /* FSB_DATA_ACTIVITY */
  47.202 +		0x06, 0x17, 
  47.203 +		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
  47.204 +		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
  47.205 +	},
  47.206 +
  47.207 +	{ /* BSQ_ALLOCATION */
  47.208 +		0x07, 0x05, 
  47.209 +		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
  47.210 +		  { 0, 0 } }
  47.211 +	},
  47.212 +
  47.213 +	{ /* BSQ_ACTIVE_ENTRIES */
  47.214 +		0x07, 0x06,
  47.215 +		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
  47.216 +		  { 0, 0 } }
  47.217 +	},
  47.218 +
  47.219 +	{ /* X87_ASSIST */
  47.220 +		0x05, 0x03, 
  47.221 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  47.222 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  47.223 +	},
  47.224 +
  47.225 +	{ /* SSE_INPUT_ASSIST */
  47.226 +		0x01, 0x34,
  47.227 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  47.228 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  47.229 +	},
  47.230 +  
  47.231 +	{ /* PACKED_SP_UOP */
  47.232 +		0x01, 0x08, 
  47.233 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  47.234 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  47.235 +	},
  47.236 +  
  47.237 +	{ /* PACKED_DP_UOP */
  47.238 +		0x01, 0x0c, 
  47.239 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  47.240 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  47.241 +	},
  47.242 +
  47.243 +	{ /* SCALAR_SP_UOP */
  47.244 +		0x01, 0x0a, 
  47.245 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  47.246 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  47.247 +	},
  47.248 +
  47.249 +	{ /* SCALAR_DP_UOP */
  47.250 +		0x01, 0x0e,
  47.251 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  47.252 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  47.253 +	},
  47.254 +
  47.255 +	{ /* 64BIT_MMX_UOP */
  47.256 +		0x01, 0x02, 
  47.257 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  47.258 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  47.259 +	},
  47.260 +  
  47.261 +	{ /* 128BIT_MMX_UOP */
  47.262 +		0x01, 0x1a, 
  47.263 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  47.264 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  47.265 +	},
  47.266 +
  47.267 +	{ /* X87_FP_UOP */
  47.268 +		0x01, 0x04, 
  47.269 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  47.270 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  47.271 +	},
  47.272 +  
  47.273 +	{ /* X87_SIMD_MOVES_UOP */
  47.274 +		0x01, 0x2e, 
  47.275 +		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
  47.276 +		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
  47.277 +	},
  47.278 +  
  47.279 +	{ /* MACHINE_CLEAR */
  47.280 +		0x05, 0x02, 
  47.281 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  47.282 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  47.283 +	},
  47.284 +
  47.285 +	{ /* GLOBAL_POWER_EVENTS */
  47.286 +		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
  47.287 +		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
  47.288 +		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
  47.289 +	},
  47.290 +  
  47.291 +	{ /* TC_MS_XFER */
  47.292 +		0x00, 0x05, 
  47.293 +		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
  47.294 +		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
  47.295 +	},
  47.296 +
  47.297 +	{ /* UOP_QUEUE_WRITES */
  47.298 +		0x00, 0x09,
  47.299 +		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
  47.300 +		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
  47.301 +	},
  47.302 +
  47.303 +	{ /* FRONT_END_EVENT */
  47.304 +		0x05, 0x08,
  47.305 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  47.306 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  47.307 +	},
  47.308 +
  47.309 +	{ /* EXECUTION_EVENT */
  47.310 +		0x05, 0x0c,
  47.311 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  47.312 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  47.313 +	},
  47.314 +
  47.315 +	{ /* REPLAY_EVENT */
  47.316 +		0x05, 0x09,
  47.317 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
  47.318 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
  47.319 +	},
  47.320 +
  47.321 +	{ /* INSTR_RETIRED */
  47.322 +		0x04, 0x02, 
  47.323 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
  47.324 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
  47.325 +	},
  47.326 +
  47.327 +	{ /* UOPS_RETIRED */
  47.328 +		0x04, 0x01,
  47.329 +		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
  47.330 +		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
  47.331 +	},
  47.332 +
  47.333 +	{ /* UOP_TYPE */    
  47.334 +		0x02, 0x02, 
  47.335 +		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
  47.336 +		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
  47.337 +	},
  47.338 +
  47.339 +	{ /* RETIRED_MISPRED_BRANCH_TYPE */
  47.340 +		0x02, 0x05, 
  47.341 +		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
  47.342 +		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
  47.343 +	},
  47.344 +
  47.345 +	{ /* RETIRED_BRANCH_TYPE */
  47.346 +		0x02, 0x04,
  47.347 +		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
  47.348 +		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
  47.349 +	}
  47.350 +};
  47.351 +
  47.352 +
  47.353 +#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
  47.354 +
  47.355 +#define ESCR_RESERVED_BITS 0x80000003
  47.356 +#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
  47.357 +#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
  47.358 +#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
  47.359 +#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
  47.360 +#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
  47.361 +#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
  47.362 +#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
  47.363 +#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
  47.364 +#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
  47.365 +
  47.366 +#define CCCR_RESERVED_BITS 0x38030FFF
  47.367 +#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
  47.368 +#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
  47.369 +#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
  47.370 +#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
  47.371 +#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
  47.372 +#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
  47.373 +#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
  47.374 +#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
  47.375 +#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
  47.376 +#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
  47.377 +#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
  47.378 +
  47.379 +#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
  47.380 +#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
  47.381 +#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
  47.382 +
  47.383 +
  47.384 +/* this assigns a "stagger" to the current CPU, which is used throughout
  47.385 +   the code in this module as an extra array offset, to select the "even"
  47.386 +   or "odd" part of all the divided resources. */
  47.387 +static unsigned int get_stagger(void)
  47.388 +{
  47.389 +#ifdef CONFIG_SMP
  47.390 +	int cpu = smp_processor_id();
  47.391 +	return (cpu != first_cpu(cpu_sibling_map[cpu]));
  47.392 +#endif	
  47.393 +	return 0;
  47.394 +}
  47.395 +
  47.396 +
  47.397 +/* finally, mediate access to a real hardware counter
  47.398 +   by passing a "virtual" counter numer to this macro,
  47.399 +   along with your stagger setting. */
  47.400 +#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
  47.401 +
  47.402 +static unsigned long reset_value[NUM_COUNTERS_NON_HT];
  47.403 +
  47.404 +
  47.405 +static void p4_fill_in_addresses(struct op_msrs * const msrs)
  47.406 +{
  47.407 +	unsigned int i; 
  47.408 +	unsigned int addr, stag;
  47.409 +
  47.410 +	setup_num_counters();
  47.411 +	stag = get_stagger();
  47.412 +
  47.413 +	/* the counter registers we pay attention to */
  47.414 +	for (i = 0; i < num_counters; ++i) {
  47.415 +		msrs->counters[i].addr = 
  47.416 +			p4_counters[VIRT_CTR(stag, i)].counter_address;
  47.417 +	}
  47.418 +
  47.419 +	/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
  47.420 +
  47.421 +	/* 18 CCCR registers */
  47.422 +	for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
  47.423 +	     addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
  47.424 +		msrs->controls[i].addr = addr;
  47.425 +	}
  47.426 +	
  47.427 +	/* 43 ESCR registers in three or four discontiguous group */
  47.428 +	for (addr = MSR_P4_BSU_ESCR0 + stag;
  47.429 +	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
  47.430 +		msrs->controls[i].addr = addr;
  47.431 +	}
  47.432 +
  47.433 +	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
  47.434 +	 * to avoid special case in nmi_{save|restore}_registers() */
  47.435 +	if (boot_cpu_data.x86_model >= 0x3) {
  47.436 +		for (addr = MSR_P4_BSU_ESCR0 + stag;
  47.437 +		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
  47.438 +			msrs->controls[i].addr = addr;
  47.439 +		}
  47.440 +	} else {
  47.441 +		for (addr = MSR_P4_IQ_ESCR0 + stag;
  47.442 +		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
  47.443 +			msrs->controls[i].addr = addr;
  47.444 +		}
  47.445 +	}
  47.446 +
  47.447 +	for (addr = MSR_P4_RAT_ESCR0 + stag;
  47.448 +	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
  47.449 +		msrs->controls[i].addr = addr;
  47.450 +	}
  47.451 +	
  47.452 +	for (addr = MSR_P4_MS_ESCR0 + stag;
  47.453 +	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
  47.454 +		msrs->controls[i].addr = addr;
  47.455 +	}
  47.456 +	
  47.457 +	for (addr = MSR_P4_IX_ESCR0 + stag;
  47.458 +	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
  47.459 +		msrs->controls[i].addr = addr;
  47.460 +	}
  47.461 +
  47.462 +	/* there are 2 remaining non-contiguously located ESCRs */
  47.463 +
  47.464 +	if (num_counters == NUM_COUNTERS_NON_HT) {		
  47.465 +		/* standard non-HT CPUs handle both remaining ESCRs*/
  47.466 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
  47.467 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
  47.468 +
  47.469 +	} else if (stag == 0) {
  47.470 +		/* HT CPUs give the first remainder to the even thread, as
  47.471 +		   the 32nd control register */
  47.472 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
  47.473 +
  47.474 +	} else {
  47.475 +		/* and two copies of the second to the odd thread,
  47.476 +		   for the 22st and 23nd control registers */
  47.477 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
  47.478 +		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
  47.479 +	}
  47.480 +}
  47.481 +
  47.482 +
  47.483 +static void pmc_setup_one_p4_counter(unsigned int ctr)
  47.484 +{
  47.485 +	int i;
  47.486 +	int const maxbind = 2;
  47.487 +	unsigned int cccr = 0;
  47.488 +	unsigned int escr = 0;
  47.489 +	unsigned int high = 0;
  47.490 +	unsigned int counter_bit;
  47.491 +	struct p4_event_binding *ev = NULL;
  47.492 +	unsigned int stag;
  47.493 +
  47.494 +	stag = get_stagger();
  47.495 +	
  47.496 +	/* convert from counter *number* to counter *bit* */
  47.497 +	counter_bit = 1 << VIRT_CTR(stag, ctr);
  47.498 +	
  47.499 +	/* find our event binding structure. */
  47.500 +	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
  47.501 +		printk(KERN_ERR 
  47.502 +		       "oprofile: P4 event code 0x%lx out of range\n", 
  47.503 +		       counter_config[ctr].event);
  47.504 +		return;
  47.505 +	}
  47.506 +	
  47.507 +	ev = &(p4_events[counter_config[ctr].event - 1]);
  47.508 +	
  47.509 +	for (i = 0; i < maxbind; i++) {
  47.510 +		if (ev->bindings[i].virt_counter & counter_bit) {
  47.511 +
  47.512 +			/* modify ESCR */
  47.513 +			ESCR_READ(escr, high, ev, i);
  47.514 +			ESCR_CLEAR(escr);
  47.515 +			if (stag == 0) {
  47.516 +				ESCR_SET_USR_0(escr, counter_config[ctr].user);
  47.517 +				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
  47.518 +			} else {
  47.519 +				ESCR_SET_USR_1(escr, counter_config[ctr].user);
  47.520 +				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
  47.521 +			}
  47.522 +			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
  47.523 +			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
  47.524 +			ESCR_WRITE(escr, high, ev, i);
  47.525 +		       
  47.526 +			/* modify CCCR */
  47.527 +			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
  47.528 +			CCCR_CLEAR(cccr);
  47.529 +			CCCR_SET_REQUIRED_BITS(cccr);
  47.530 +			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
  47.531 +			if (stag == 0) {
  47.532 +				CCCR_SET_PMI_OVF_0(cccr);
  47.533 +			} else {
  47.534 +				CCCR_SET_PMI_OVF_1(cccr);
  47.535 +			}
  47.536 +			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
  47.537 +			return;
  47.538 +		}
  47.539 +	}
  47.540 +
  47.541 +	printk(KERN_ERR 
  47.542 +	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
  47.543 +	       counter_config[ctr].event, stag, ctr);
  47.544 +}
  47.545 +
  47.546 +
  47.547 +static void p4_setup_ctrs(struct op_msrs const * const msrs)
  47.548 +{
  47.549 +	unsigned int i;
  47.550 +	unsigned int low, high;
  47.551 +	unsigned int addr;
  47.552 +	unsigned int stag;
  47.553 +
  47.554 +	stag = get_stagger();
  47.555 +
  47.556 +	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
  47.557 +	if (! MISC_PMC_ENABLED_P(low)) {
  47.558 +		printk(KERN_ERR "oprofile: P4 PMC not available\n");
  47.559 +		return;
  47.560 +	}
  47.561 +
  47.562 +	/* clear the cccrs we will use */
  47.563 +	for (i = 0 ; i < num_counters ; i++) {
  47.564 +		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
  47.565 +		CCCR_CLEAR(low);
  47.566 +		CCCR_SET_REQUIRED_BITS(low);
  47.567 +		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
  47.568 +	}
  47.569 +
  47.570 +	/* clear cccrs outside our concern */
  47.571 +	for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
  47.572 +		rdmsr(p4_unused_cccr[i], low, high);
  47.573 +		CCCR_CLEAR(low);
  47.574 +		CCCR_SET_REQUIRED_BITS(low);
  47.575 +		wrmsr(p4_unused_cccr[i], low, high);
  47.576 +	}
  47.577 +
  47.578 +	/* clear all escrs (including those outside our concern) */
  47.579 +	for (addr = MSR_P4_BSU_ESCR0 + stag;
  47.580 +	     addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
  47.581 +		wrmsr(addr, 0, 0);
  47.582 +	}
  47.583 +
  47.584 +	/* On older models clear also MSR_P4_IQ_ESCR0/1 */
  47.585 +	if (boot_cpu_data.x86_model < 0x3) {
  47.586 +		wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
  47.587 +		wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
  47.588 +	}
  47.589 +
  47.590 +	for (addr = MSR_P4_RAT_ESCR0 + stag;
  47.591 +	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
  47.592 +		wrmsr(addr, 0, 0);
  47.593 +	}
  47.594 +	
  47.595 +	for (addr = MSR_P4_MS_ESCR0 + stag;
  47.596 +	     addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
  47.597 +		wrmsr(addr, 0, 0);
  47.598 +	}
  47.599 +	
  47.600 +	for (addr = MSR_P4_IX_ESCR0 + stag;
  47.601 +	     addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
  47.602 +		wrmsr(addr, 0, 0);
  47.603 +	}
  47.604 +
  47.605 +	if (num_counters == NUM_COUNTERS_NON_HT) {		
  47.606 +		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
  47.607 +		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
  47.608 +	} else if (stag == 0) {
  47.609 +		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
  47.610 +	} else {
  47.611 +		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
  47.612 +	}		
  47.613 +	
  47.614 +	/* setup all counters */
  47.615 +	for (i = 0 ; i < num_counters ; ++i) {
  47.616 +		if (counter_config[i].enabled) {
  47.617 +			reset_value[i] = counter_config[i].count;
  47.618 +			pmc_setup_one_p4_counter(i);
  47.619 +			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
  47.620 +		} else {
  47.621 +			reset_value[i] = 0;
  47.622 +		}
  47.623 +	}
  47.624 +}
  47.625 +
  47.626 +
  47.627 +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
  47.628 +			       int mode, int event);
  47.629 +
  47.630 +static int p4_check_ctrs(unsigned int const cpu,
  47.631 +                         struct op_msrs const * const msrs,
  47.632 +                         struct cpu_user_regs * const regs)
  47.633 +{
  47.634 +	unsigned long ctr, low, high, stag, real;
  47.635 +	int i;
  47.636 +	int ovf = 0;
  47.637 +	unsigned long eip = regs->eip;
  47.638 +	int mode = 0;
  47.639 +
  47.640 +	if (guest_kernel_mode(current, regs))
  47.641 +		mode = 1;
  47.642 +	else if (ring_0(regs))
  47.643 +		mode = 2;
  47.644 +
  47.645 +	stag = get_stagger();
  47.646 +
  47.647 +	for (i = 0; i < num_counters; ++i) {
  47.648 +		
  47.649 +		if (!reset_value[i]) 
  47.650 +			continue;
  47.651 +
  47.652 +		/* 
  47.653 +		 * there is some eccentricity in the hardware which
  47.654 +		 * requires that we perform 2 extra corrections:
  47.655 +		 *
  47.656 +		 * - check both the CCCR:OVF flag for overflow and the
  47.657 +		 *   counter high bit for un-flagged overflows.
  47.658 +		 *
  47.659 +		 * - write the counter back twice to ensure it gets
  47.660 +		 *   updated properly.
  47.661 +		 * 
  47.662 +		 * the former seems to be related to extra NMIs happening
  47.663 +		 * during the current NMI; the latter is reported as errata
  47.664 +		 * N15 in intel doc 249199-029, pentium 4 specification
  47.665 +		 * update, though their suggested work-around does not
  47.666 +		 * appear to solve the problem.
  47.667 +		 */
  47.668 +		
  47.669 +		real = VIRT_CTR(stag, i);
  47.670 +
  47.671 +		CCCR_READ(low, high, real);
  47.672 + 		CTR_READ(ctr, high, real);
  47.673 +		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
  47.674 +			xenoprof_log_event(current, eip, mode, i);
  47.675 + 			CTR_WRITE(reset_value[i], real);
  47.676 +			CCCR_CLEAR_OVF(low);
  47.677 +			CCCR_WRITE(low, high, real);
  47.678 + 			CTR_WRITE(reset_value[i], real);
  47.679 +			ovf = 1;
  47.680 +		}
  47.681 +	}
  47.682 +
  47.683 +	/* P4 quirk: you have to re-unmask the apic vector */
  47.684 +	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  47.685 +
  47.686 +	return ovf;
  47.687 +}
  47.688 +
  47.689 +
  47.690 +static void p4_start(struct op_msrs const * const msrs)
  47.691 +{
  47.692 +	unsigned int low, high, stag;
  47.693 +	int i;
  47.694 +
  47.695 +	stag = get_stagger();
  47.696 +
  47.697 +	for (i = 0; i < num_counters; ++i) {
  47.698 +		if (!reset_value[i])
  47.699 +			continue;
  47.700 +		CCCR_READ(low, high, VIRT_CTR(stag, i));
  47.701 +		CCCR_SET_ENABLE(low);
  47.702 +		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
  47.703 +	}
  47.704 +}
  47.705 +
  47.706 +
  47.707 +static void p4_stop(struct op_msrs const * const msrs)
  47.708 +{
  47.709 +	unsigned int low, high, stag;
  47.710 +	int i;
  47.711 +
  47.712 +	stag = get_stagger();
  47.713 +
  47.714 +	for (i = 0; i < num_counters; ++i) {
  47.715 +		CCCR_READ(low, high, VIRT_CTR(stag, i));
  47.716 +		CCCR_SET_DISABLE(low);
  47.717 +		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
  47.718 +	}
  47.719 +}
  47.720 +
  47.721 +
  47.722 +#ifdef CONFIG_SMP
  47.723 +struct op_x86_model_spec const op_p4_ht2_spec = {
  47.724 +	.num_counters = NUM_COUNTERS_HT2,
  47.725 +	.num_controls = NUM_CONTROLS_HT2,
  47.726 +	.fill_in_addresses = &p4_fill_in_addresses,
  47.727 +	.setup_ctrs = &p4_setup_ctrs,
  47.728 +	.check_ctrs = &p4_check_ctrs,
  47.729 +	.start = &p4_start,
  47.730 +	.stop = &p4_stop
  47.731 +};
  47.732 +#endif
  47.733 +
  47.734 +struct op_x86_model_spec const op_p4_spec = {
  47.735 +	.num_counters = NUM_COUNTERS_NON_HT,
  47.736 +	.num_controls = NUM_CONTROLS_NON_HT,
  47.737 +	.fill_in_addresses = &p4_fill_in_addresses,
  47.738 +	.setup_ctrs = &p4_setup_ctrs,
  47.739 +	.check_ctrs = &p4_check_ctrs,
  47.740 +	.start = &p4_start,
  47.741 +	.stop = &p4_stop
  47.742 +};
    48.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.2 +++ b/xen/arch/x86/oprofile/op_model_ppro.c	Fri Apr 07 11:52:00 2006 +0100
    48.3 @@ -0,0 +1,153 @@
    48.4 +/**
    48.5 + * @file op_model_ppro.h
    48.6 + * pentium pro / P6 model-specific MSR operations
    48.7 + *
    48.8 + * @remark Copyright 2002 OProfile authors
    48.9 + * @remark Read the file COPYING
   48.10 + *
   48.11 + * @author John Levon
   48.12 + * @author Philippe Elie
   48.13 + * @author Graydon Hoare
   48.14 + */
   48.15 +
   48.16 +#include <xen/types.h>
   48.17 +#include <asm/msr.h>
   48.18 +#include <asm/io.h>
   48.19 +#include <asm/apic.h>
   48.20 +#include <asm/processor.h>
   48.21 +#include <xen/sched.h>
   48.22 +#include <asm/regs.h>
   48.23 +#include <asm/current.h>
   48.24 + 
   48.25 +#include "op_x86_model.h"
   48.26 +#include "op_counter.h"
   48.27 +
   48.28 +#define NUM_COUNTERS 2
   48.29 +#define NUM_CONTROLS 2
   48.30 +
   48.31 +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
   48.32 +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
   48.33 +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
   48.34 +
   48.35 +#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
   48.36 +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
   48.37 +#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
   48.38 +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
   48.39 +#define CTRL_CLEAR(x) (x &= (1<<21))
   48.40 +#define CTRL_SET_ENABLE(val) (val |= 1<<20)
   48.41 +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
   48.42 +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
   48.43 +#define CTRL_SET_UM(val, m) (val |= (m << 8))
   48.44 +#define CTRL_SET_EVENT(val, e) (val |= e)
   48.45 +
   48.46 +static unsigned long reset_value[NUM_COUNTERS];
   48.47 + 
   48.48 +static void ppro_fill_in_addresses(struct op_msrs * const msrs)
   48.49 +{
   48.50 +	msrs->counters[0].addr = MSR_P6_PERFCTR0;
   48.51 +	msrs->counters[1].addr = MSR_P6_PERFCTR1;
   48.52 +	
   48.53 +	msrs->controls[0].addr = MSR_P6_EVNTSEL0;
   48.54 +	msrs->controls[1].addr = MSR_P6_EVNTSEL1;
   48.55 +}
   48.56 +
   48.57 +
   48.58 +static void ppro_setup_ctrs(struct op_msrs const * const msrs)
   48.59 +{
   48.60 +	unsigned int low, high;
   48.61 +	int i;
   48.62 +
   48.63 +	/* clear all counters */
   48.64 +	for (i = 0 ; i < NUM_CONTROLS; ++i) {
   48.65 +		CTRL_READ(low, high, msrs, i);
   48.66 +		CTRL_CLEAR(low);
   48.67 +		CTRL_WRITE(low, high, msrs, i);
   48.68 +	}
   48.69 +	
   48.70 +	/* avoid a false detection of ctr overflows in NMI handler */
   48.71 +	for (i = 0; i < NUM_COUNTERS; ++i) {
   48.72 +		CTR_WRITE(1, msrs, i);
   48.73 +	}
   48.74 +
   48.75 +	/* enable active counters */
   48.76 +	for (i = 0; i < NUM_COUNTERS; ++i) {
   48.77 +		if (counter_config[i].enabled) {
   48.78 +			reset_value[i] = counter_config[i].count;
   48.79 +
   48.80 +			CTR_WRITE(counter_config[i].count, msrs, i);
   48.81 +
   48.82 +			CTRL_READ(low, high, msrs, i);
   48.83 +			CTRL_CLEAR(low);
   48.84 +			CTRL_SET_ENABLE(low);
   48.85 +			CTRL_SET_USR(low, counter_config[i].user);
   48.86 +			CTRL_SET_KERN(low, counter_config[i].kernel);
   48.87 +			CTRL_SET_UM(low, counter_config[i].unit_mask);
   48.88 +			CTRL_SET_EVENT(low, counter_config[i].event);
   48.89 +			CTRL_WRITE(low, high, msrs, i);
   48.90 +		}
   48.91 +	}
   48.92 +}
   48.93 +
   48.94 +
   48.95 +extern void xenoprof_log_event(struct vcpu *v, unsigned long eip,
   48.96 +			       int mode, int event);
   48.97 + 
   48.98 +static int ppro_check_ctrs(unsigned int const cpu,
   48.99 +                           struct op_msrs const * const msrs,
  48.100 +                           struct cpu_user_regs * const regs)
  48.101 +{
  48.102 +	unsigned int low, high;
  48.103 +	int i;
  48.104 +	int ovf = 0;
  48.105 +	unsigned long eip = regs->eip;
  48.106 +	int mode = 0;
  48.107 +
  48.108 +	if ( guest_kernel_mode(current, regs) ) 
  48.109 +		mode = 1;
  48.110 +	else if ( ring_0(regs) )
  48.111 +		mode = 2;
  48.112 + 
  48.113 +	for (i = 0 ; i < NUM_COUNTERS; ++i) {
  48.114 +		CTR_READ(low, high, msrs, i);
  48.115 +		if (CTR_OVERFLOWED(low)) {
  48.116 +			xenoprof_log_event(current, eip, mode, i);
  48.117 +			CTR_WRITE(reset_value[i], msrs, i);
  48.118 +			ovf = 1;
  48.119 +		}
  48.120 +	}
  48.121 +
  48.122 +	/* Only P6 based Pentium M need to re-unmask the apic vector but it
  48.123 +	 * doesn't hurt other P6 variant */
  48.124 +	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  48.125 +
  48.126 +	return ovf;
  48.127 +}
  48.128 +
  48.129 + 
  48.130 +static void ppro_start(struct op_msrs const * const msrs)
  48.131 +{
  48.132 +	unsigned int low,high;
  48.133 +	CTRL_READ(low, high, msrs, 0);
  48.134 +	CTRL_SET_ACTIVE(low);
  48.135 +	CTRL_WRITE(low, high, msrs, 0);
  48.136 +}
  48.137 +
  48.138 +
  48.139 +static void ppro_stop(struct op_msrs const * const msrs)
  48.140 +{
  48.141 +	unsigned int low,high;
  48.142 +	CTRL_READ(low, high, msrs, 0);
  48.143 +	CTRL_SET_INACTIVE(low);
  48.144 +	CTRL_WRITE(low, high, msrs, 0);
  48.145 +}
  48.146 +
  48.147 +
  48.148 +struct op_x86_model_spec const op_ppro_spec = {
  48.149 +	.num_counters = NUM_COUNTERS,
  48.150 +	.num_controls = NUM_CONTROLS,
  48.151 +	.fill_in_addresses = &ppro_fill_in_addresses,
  48.152 +	.setup_ctrs = &ppro_setup_ctrs,
  48.153 +	.check_ctrs = &ppro_check_ctrs,
  48.154 +	.start = &ppro_start,
  48.155 +	.stop = &ppro_stop
  48.156 +};
    49.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.2 +++ b/xen/arch/x86/oprofile/op_x86_model.h	Fri Apr 07 11:52:00 2006 +0100
    49.3 @@ -0,0 +1,51 @@
    49.4 +/**
    49.5 + * @file op_x86_model.h
    49.6 + * interface to x86 model-specific MSR operations
    49.7 + *
    49.8 + * @remark Copyright 2002 OProfile authors
    49.9 + * @remark Read the file COPYING
   49.10 + *
   49.11 + * @author Graydon Hoare
   49.12 + */
   49.13 +
   49.14 +#ifndef OP_X86_MODEL_H
   49.15 +#define OP_X86_MODEL_H
   49.16 +
   49.17 +struct op_saved_msr {
   49.18 +	unsigned int high;
   49.19 +	unsigned int low;
   49.20 +};
   49.21 +
   49.22 +struct op_msr {
   49.23 +	unsigned long addr;
   49.24 +	struct op_saved_msr saved;
   49.25 +};
   49.26 +
   49.27 +struct op_msrs {
   49.28 +	struct op_msr * counters;
   49.29 +	struct op_msr * controls;
   49.30 +};
   49.31 +
   49.32 +struct pt_regs;
   49.33 +
   49.34 +/* The model vtable abstracts the differences between
   49.35 + * various x86 CPU model's perfctr support.
   49.36 + */
   49.37 +struct op_x86_model_spec {
   49.38 +	unsigned int const num_counters;
   49.39 +	unsigned int const num_controls;
   49.40 +	void (*fill_in_addresses)(struct op_msrs * const msrs);
   49.41 +	void (*setup_ctrs)(struct op_msrs const * const msrs);
   49.42 +	int (*check_ctrs)(unsigned int const cpu, 
   49.43 +			  struct op_msrs const * const msrs,
   49.44 +			  struct cpu_user_regs * const regs);
   49.45 +	void (*start)(struct op_msrs const * const msrs);
   49.46 +	void (*stop)(struct op_msrs const * const msrs);
   49.47 +};
   49.48 +
   49.49 +extern struct op_x86_model_spec const op_ppro_spec;
   49.50 +extern struct op_x86_model_spec const op_p4_spec;
   49.51 +extern struct op_x86_model_spec const op_p4_ht2_spec;
   49.52 +extern struct op_x86_model_spec const op_athlon_spec;
   49.53 +
   49.54 +#endif /* OP_X86_MODEL_H */
    50.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    50.2 +++ b/xen/arch/x86/oprofile/xenoprof.c	Fri Apr 07 11:52:00 2006 +0100
    50.3 @@ -0,0 +1,528 @@
    50.4 +/*
    50.5 + * Copyright (C) 2005 Hewlett-Packard Co.
    50.6 + * written by Aravind Menon & Jose Renato Santos
    50.7 + *            (email: xenoprof@groups.hp.com)
    50.8 + */
    50.9 +
   50.10 +#include <xen/sched.h>
   50.11 +#include <public/xenoprof.h>
   50.12 +
   50.13 +#include "op_counter.h"
   50.14 +
   50.15 +/* Limit amount of pages used for shared buffer (per domain) */
   50.16 +#define MAX_OPROF_SHARED_PAGES 32
   50.17 +
   50.18 +int active_domains[MAX_OPROF_DOMAINS];
   50.19 +int active_ready[MAX_OPROF_DOMAINS];
   50.20 +unsigned int adomains;
   50.21 +unsigned int activated;
   50.22 +struct domain *primary_profiler;
   50.23 +int xenoprof_state = XENOPROF_IDLE;
   50.24 +
   50.25 +u64 total_samples;
   50.26 +u64 invalid_buffer_samples;
   50.27 +u64 corrupted_buffer_samples;
   50.28 +u64 lost_samples;
   50.29 +u64 active_samples;
   50.30 +u64 idle_samples;
   50.31 +u64 others_samples;
   50.32 +
   50.33 +
   50.34 +extern int nmi_init(int *num_events, int *is_primary, char *cpu_type);
   50.35 +extern int nmi_reserve_counters(void);
   50.36 +extern int nmi_setup_events(void);
   50.37 +extern int nmi_enable_virq(void);
   50.38 +extern int nmi_start(void);
   50.39 +extern void nmi_stop(void);
   50.40 +extern void nmi_disable_virq(void);
   50.41 +extern void nmi_release_counters(void);
   50.42 +
   50.43 +int is_active(struct domain *d)
   50.44 +{
   50.45 +    struct xenoprof *x = d->xenoprof;
   50.46 +    return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_ACTIVE));
   50.47 +}
   50.48 +
   50.49 +int is_profiled(struct domain *d)
   50.50 +{
   50.51 +    return is_active(d);
   50.52 +}
   50.53 +
   50.54 +static void xenoprof_reset_stat(void)
   50.55 +{
   50.56 +    total_samples = 0;
   50.57 +    invalid_buffer_samples = 0;
   50.58 +    corrupted_buffer_samples = 0;
   50.59 +    lost_samples = 0;
   50.60 +    active_samples = 0;
   50.61 +    idle_samples = 0;
   50.62 +    others_samples = 0;
   50.63 +}
   50.64 +
   50.65 +static void xenoprof_reset_buf(struct domain *d)
   50.66 +{
   50.67 +    int j;
   50.68 +    struct xenoprof_buf *buf;
   50.69 +
   50.70 +    if ( d->xenoprof == NULL )
   50.71 +    {
   50.72 +        printk("xenoprof_reset_buf: ERROR - Unexpected "
   50.73 +               "Xenoprof NULL pointer \n");
   50.74 +        return;
   50.75 +    }
   50.76 +
   50.77 +    for ( j = 0; j < MAX_VIRT_CPUS; j++ )
   50.78 +    {
   50.79 +        buf = d->xenoprof->vcpu[j].buffer;
   50.80 +        if ( buf != NULL )
   50.81 +        {
   50.82 +            buf->event_head = 0;
   50.83 +            buf->event_tail = 0;
   50.84 +        }
   50.85 +    }
   50.86 +}
   50.87 +
   50.88 +int active_index(struct domain *d)
   50.89 +{
   50.90 +    int i, id = d->domain_id;
   50.91 +
   50.92 +    for ( i = 0; i < adomains; i++ )
   50.93 +        if ( active_domains[i] == id )
   50.94 +            return i;
   50.95 +
   50.96 +    return -1;
   50.97 +}
   50.98 +
   50.99 +int set_active(struct domain *d)
  50.100 +{
  50.101 +    int ind;
  50.102 +    struct xenoprof *x;
  50.103 +
  50.104 +    ind = active_index(d);
  50.105 +    if ( ind < 0 )
  50.106 +        return -EPERM;
  50.107 +
  50.108 +    x = d->xenoprof;
  50.109 +    if ( x == NULL )
  50.110 +        return -EPERM;
  50.111 +
  50.112 +    x->domain_ready = 1;
  50.113 +    x->domain_type = XENOPROF_DOMAIN_ACTIVE;
  50.114 +    active_ready[ind] = 1;
  50.115 +    activated++;
  50.116 +
  50.117 +    return 0;
  50.118 +}
  50.119 +
  50.120 +int reset_active(struct domain *d)
  50.121 +{
  50.122 +    int ind;
  50.123 +    struct xenoprof *x;
  50.124 +
  50.125 +    ind = active_index(d);
  50.126 +    if ( ind < 0 )
  50.127 +        return -EPERM;
  50.128 +
  50.129 +    x = d->xenoprof;
  50.130 +    if ( x == NULL )
  50.131 +        return -EPERM;
  50.132 +
  50.133 +    x->domain_ready = 0;
  50.134 +    x->domain_type = XENOPROF_DOMAIN_IGNORED;
  50.135 +    active_ready[ind] = 0;
  50.136 +    activated--;
  50.137 +    if ( activated <= 0 )
  50.138 +        adomains = 0;
  50.139 +
  50.140 +    return 0;
  50.141 +}
  50.142 +
  50.143 +int set_active_domains(int num)
  50.144 +{
  50.145 +    int primary;
  50.146 +    int i;
  50.147 +    struct domain *d;
  50.148 +
  50.149 +    /* Reset any existing active domains from previous runs. */
  50.150 +    for ( i = 0; i < adomains; i++ )
  50.151 +    {
  50.152 +        if ( active_ready[i] )
  50.153 +        {
  50.154 +            d = find_domain_by_id(active_domains[i]);
  50.155 +            if ( d != NULL )
  50.156 +            {
  50.157 +                reset_active(d);
  50.158 +                put_domain(d);
  50.159 +            }
  50.160 +        }
  50.161 +    }
  50.162 +
  50.163 +    adomains = num;
  50.164 +
  50.165 +    /* Add primary profiler to list of active domains if not there yet */
  50.166 +    primary = active_index(primary_profiler);
  50.167 +    if ( primary == -1 )
  50.168 +    {
  50.169 +        /* Return if there is no space left on list. */
  50.170 +        if ( num >= MAX_OPROF_DOMAINS )
  50.171 +            return -E2BIG;
  50.172 +        active_domains[num] = primary_profiler->domain_id;
  50.173 +        num++;
  50.174 +    }
  50.175 +
  50.176 +    adomains = num;
  50.177 +    activated = 0;
  50.178 +
  50.179 +    for ( i = 0; i < adomains; i++ )
  50.180 +        active_ready[i] = 0;
  50.181 +
  50.182 +    return 0;
  50.183 +}
  50.184 +
  50.185 +void xenoprof_log_event(
  50.186 +    struct vcpu *vcpu, unsigned long eip, int mode, int event)
  50.187 +{
  50.188 +    struct xenoprof_vcpu *v;
  50.189 +    struct xenoprof_buf *buf;
  50.190 +    int head;
  50.191 +    int tail;
  50.192 +    int size;
  50.193 +
  50.194 +
  50.195 +    total_samples++;
  50.196 +
  50.197 +    /* ignore samples of un-monitored domains */
  50.198 +    /* Count samples in idle separate from other unmonitored domains */
  50.199 +    if ( !is_profiled(vcpu->domain) )
  50.200 +    {
  50.201 +        others_samples++;
  50.202 +        return;
  50.203 +    }
  50.204 +
  50.205 +    v = &vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id];
  50.206 +
  50.207 +    /* Sanity check. Should never happen */ 
  50.208 +    if ( v->buffer == NULL )
  50.209 +    {
  50.210 +        invalid_buffer_samples++;
  50.211 +        return;
  50.212 +    }
  50.213 +
  50.214 +    buf = vcpu->domain->xenoprof->vcpu[vcpu->vcpu_id].buffer;
  50.215 +
  50.216 +    head = buf->event_head;
  50.217 +    tail = buf->event_tail;
  50.218 +    size = v->event_size;
  50.219 +
  50.220 +    /* make sure indexes in shared buffer are sane */
  50.221 +    if ( (head < 0) || (head >= size) || (tail < 0) || (tail >= size) )
  50.222 +    {
  50.223 +        corrupted_buffer_samples++;
  50.224 +        return;
  50.225 +    }
  50.226 +
  50.227 +    if ( (head == tail - 1) || (head == size - 1 && tail == 0) )
  50.228 +    {
  50.229 +        buf->lost_samples++;
  50.230 +        lost_samples++;
  50.231 +    }
  50.232 +    else
  50.233 +    {
  50.234 +        buf->event_log[head].eip = eip;
  50.235 +        buf->event_log[head].mode = mode;
  50.236 +        buf->event_log[head].event = event;
  50.237 +        head++;
  50.238 +        if ( head >= size )
  50.239 +            head = 0;
  50.240 +        buf->event_head = head;
  50.241 +        active_samples++;
  50.242 +        if ( mode == 0 )
  50.243 +            buf->user_samples++;
  50.244 +        else if ( mode == 1 )
  50.245 +            buf->kernel_samples++;
  50.246 +        else
  50.247 +            buf->xen_samples++;
  50.248 +    }
  50.249 +}
  50.250 +
  50.251 +char *alloc_xenoprof_buf(struct domain *d, int npages)
  50.252 +{
  50.253 +    char *rawbuf;
  50.254 +    int i, order;
  50.255 +
  50.256 +    /* allocate pages to store sample buffer shared with domain */
  50.257 +    order  = get_order_from_pages(npages);
  50.258 +    rawbuf = alloc_xenheap_pages(order);
  50.259 +    if ( rawbuf == NULL )
  50.260 +    {
  50.261 +        printk("alloc_xenoprof_buf(): memory allocation failed\n");
  50.262 +        return 0;
  50.263 +    }
  50.264 +
  50.265 +    /* Share pages so that kernel can map it */
  50.266 +    for ( i = 0; i < npages; i++ )
  50.267 +        share_xen_page_with_guest(
  50.268 +            virt_to_page(rawbuf + i * PAGE_SIZE), 
  50.269 +            d, XENSHARE_writable);
  50.270 +
  50.271 +    return rawbuf;
  50.272 +}
  50.273 +
  50.274 +int alloc_xenoprof_struct(struct domain *d, int max_samples)
  50.275 +{
  50.276 +    struct vcpu *v;
  50.277 +    int nvcpu, npages, bufsize, max_bufsize;
  50.278 +    int i;
  50.279 +
  50.280 +    d->xenoprof = xmalloc(struct xenoprof);
  50.281 +
  50.282 +    if ( d->xenoprof == NULL )
  50.283 +    {
  50.284 +        printk ("alloc_xenoprof_struct(): memory "
  50.285 +                "allocation (xmalloc) failed\n");
  50.286 +        return -ENOMEM;
  50.287 +    }
  50.288 +
  50.289 +    memset(d->xenoprof, 0, sizeof(*d->xenoprof));
  50.290 +
  50.291 +    nvcpu = 0;
  50.292 +    for_each_vcpu ( d, v )
  50.293 +        nvcpu++;
  50.294 +
  50.295 +    /* reduce buffer size if necessary to limit pages allocated */
  50.296 +    bufsize = sizeof(struct xenoprof_buf) +
  50.297 +        (max_samples - 1) * sizeof(struct event_log);
  50.298 +    max_bufsize = (MAX_OPROF_SHARED_PAGES * PAGE_SIZE) / nvcpu;
  50.299 +    if ( bufsize > max_bufsize )
  50.300 +    {
  50.301 +        bufsize = max_bufsize;
  50.302 +        max_samples = ( (max_bufsize - sizeof(struct xenoprof_buf)) /
  50.303 +                        sizeof(struct event_log) ) + 1;
  50.304 +    }
  50.305 +
  50.306 +    npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1;
  50.307 +    d->xenoprof->rawbuf = alloc_xenoprof_buf(d, npages);
  50.308 +    if ( d->xenoprof->rawbuf == NULL )
  50.309 +    {
  50.310 +        xfree(d->xenoprof);
  50.311 +        d->xenoprof = NULL;
  50.312 +        return -ENOMEM;
  50.313 +    }
  50.314 +
  50.315 +    d->xenoprof->npages = npages;
  50.316 +    d->xenoprof->nbuf = nvcpu;
  50.317 +    d->xenoprof->bufsize = bufsize;
  50.318 +    d->xenoprof->domain_ready = 0;
  50.319 +    d->xenoprof->domain_type = XENOPROF_DOMAIN_IGNORED;
  50.320 +
  50.321 +    /* Update buffer pointers for active vcpus */
  50.322 +    i = 0;
  50.323 +    for_each_vcpu ( d, v )
  50.324 +    {
  50.325 +        d->xenoprof->vcpu[v->vcpu_id].event_size = max_samples;
  50.326 +        d->xenoprof->vcpu[v->vcpu_id].buffer =
  50.327 +            (struct xenoprof_buf *)&d->xenoprof->rawbuf[i * bufsize];
  50.328 +        d->xenoprof->vcpu[v->vcpu_id].buffer->event_size = max_samples;
  50.329 +        d->xenoprof->vcpu[v->vcpu_id].buffer->vcpu_id = v->vcpu_id;
  50.330 +
  50.331 +        i++;
  50.332 +        /* in the unlikely case that the number of active vcpus changes */
  50.333 +        if ( i >= nvcpu )
  50.334 +            break;
  50.335 +    }
  50.336 +
  50.337 +    return 0;
  50.338 +}
  50.339 +
  50.340 +void free_xenoprof_pages(struct domain *d)
  50.341 +{
  50.342 +    struct xenoprof *x;
  50.343 +    int order;
  50.344 +
  50.345 +    x = d->xenoprof;
  50.346 +    if ( x == NULL )
  50.347 +        return;
  50.348 +
  50.349 +    if ( x->rawbuf != NULL )
  50.350 +    {
  50.351 +        order = get_order_from_pages(x->npages);
  50.352 +        free_xenheap_pages(x->rawbuf, order);
  50.353 +    }
  50.354 +
  50.355 +    xfree(x);
  50.356 +    d->xenoprof = NULL;
  50.357 +}
  50.358 +
  50.359 +int xenoprof_init(int max_samples, xenoprof_init_result_t *init_result)
  50.360 +{
  50.361 +    xenoprof_init_result_t result;
  50.362 +    int is_primary, num_events;
  50.363 +    struct domain *d = current->domain;
  50.364 +    int ret;
  50.365 +
  50.366 +    ret = nmi_init(&num_events, &is_primary, result.cpu_type);
  50.367 +    if ( is_primary )
  50.368 +        primary_profiler = current->domain;
  50.369 +
  50.370 +    if ( ret < 0 )
  50.371 +        goto err;
  50.372 +
  50.373 +    /*
  50.374 +     * We allocate xenoprof struct and buffers only at first time xenoprof_init
  50.375 +     * is called. Memory is then kept until domain is destroyed.
  50.376 +     */
  50.377 +    if ( (d->xenoprof == NULL) &&
  50.378 +         ((ret = alloc_xenoprof_struct(d, max_samples)) < 0) )
  50.379 +        goto err;
  50.380 +
  50.381 +    xenoprof_reset_buf(d);
  50.382 +
  50.383 +    d->xenoprof->domain_type  = XENOPROF_DOMAIN_IGNORED;
  50.384 +    d->xenoprof->domain_ready = 0;
  50.385 +    d->xenoprof->is_primary = is_primary;
  50.386 +
  50.387 +    result.is_primary = is_primary;
  50.388 +    result.num_events = num_events;
  50.389 +    result.nbuf = d->xenoprof->nbuf;
  50.390 +    result.bufsize = d->xenoprof->bufsize;
  50.391 +    result.buf_maddr = __pa(d->xenoprof->rawbuf);
  50.392 +
  50.393 +    if ( copy_to_user((void *)init_result, (void *)&result, sizeof(result)) )
  50.394 +    {
  50.395 +        ret = -EFAULT;
  50.396 +        goto err;
  50.397 +    }
  50.398 +
  50.399 +    return ret;
  50.400 +
  50.401 + err:
  50.402 +    if ( primary_profiler == current->domain )
  50.403 +        primary_profiler = NULL;
  50.404 +    return ret;
  50.405 +}
  50.406 +
  50.407 +#define PRIV_OP(op) ( (op == XENOPROF_set_active)       \
  50.408 +                   || (op == XENOPROF_reserve_counters) \
  50.409 +                   || (op == XENOPROF_setup_events)     \
  50.410 +                   || (op == XENOPROF_start)            \
  50.411 +                   || (op == XENOPROF_stop)             \
  50.412 +                   || (op == XENOPROF_release_counters) \
  50.413 +                   || (op == XENOPROF_shutdown))
  50.414 +
  50.415 +int do_xenoprof_op(int op, unsigned long arg1, unsigned long arg2)
  50.416 +{
  50.417 +    int ret = 0;
  50.418 +
  50.419 +    if ( PRIV_OP(op) && (current->domain != primary_profiler) )
  50.420 +    {
  50.421 +        printk("xenoprof: dom %d denied privileged operation %d\n",
  50.422 +               current->domain->domain_id, op);
  50.423 +        return -EPERM;
  50.424 +    }
  50.425 +
  50.426 +    switch ( op )
  50.427 +    {
  50.428 +    case XENOPROF_init:
  50.429 +        ret = xenoprof_init((int)arg1, (xenoprof_init_result_t *)arg2);
  50.430 +        break;
  50.431 +
  50.432 +    case XENOPROF_set_active:
  50.433 +        if ( xenoprof_state != XENOPROF_IDLE )
  50.434 +            return -EPERM;
  50.435 +        if ( arg2 > MAX_OPROF_DOMAINS )
  50.436 +            return -E2BIG;
  50.437 +        if ( copy_from_user((void *)&active_domains, 
  50.438 +                            (void *)arg1, arg2*sizeof(int)) )
  50.439 +            return -EFAULT;
  50.440 +        ret = set_active_domains(arg2);
  50.441 +        break;
  50.442 +
  50.443 +    case XENOPROF_reserve_counters:
  50.444 +        if ( xenoprof_state != XENOPROF_IDLE )
  50.445 +            return -EPERM;
  50.446 +        ret = nmi_reserve_counters();
  50.447 +        if ( !ret )
  50.448 +            xenoprof_state = XENOPROF_COUNTERS_RESERVED;
  50.449 +        break;
  50.450 +
  50.451 +    case XENOPROF_setup_events:
  50.452 +        if ( xenoprof_state != XENOPROF_COUNTERS_RESERVED )
  50.453 +            return -EPERM;
  50.454 +        if ( adomains == 0 )
  50.455 +            set_active_domains(0);
  50.456 +
  50.457 +        if ( copy_from_user((void *)&counter_config, (void *)arg1, 
  50.458 +                            arg2 * sizeof(struct op_counter_config)) )
  50.459 +            return -EFAULT;
  50.460 +        ret = nmi_setup_events();
  50.461 +        if ( !ret )
  50.462 +            xenoprof_state = XENOPROF_READY;
  50.463 +        break;
  50.464 +
  50.465 +    case XENOPROF_enable_virq:
  50.466 +        if ( current->domain == primary_profiler )
  50.467 +        {
  50.468 +            nmi_enable_virq();
  50.469 +            xenoprof_reset_stat();
  50.470 +        }
  50.471 +        xenoprof_reset_buf(current->domain);
  50.472 +        ret = set_active(current->domain);
  50.473 +        break;
  50.474 +
  50.475 +    case XENOPROF_start:
  50.476 +        ret = -EPERM;
  50.477 +        if ( (xenoprof_state == XENOPROF_READY) &&
  50.478 +             (activated == adomains) )
  50.479 +            ret = nmi_start();
  50.480 +
  50.481 +        if ( ret == 0 )
  50.482 +            xenoprof_state = XENOPROF_PROFILING;
  50.483 +        break;
  50.484 +
  50.485 +    case XENOPROF_stop:
  50.486 +        if ( xenoprof_state != XENOPROF_PROFILING )
  50.487 +            return -EPERM;
  50.488 +        nmi_stop();
  50.489 +        xenoprof_state = XENOPROF_READY;
  50.490 +        break;
  50.491 +
  50.492 +    case XENOPROF_disable_virq:
  50.493 +        if ( (xenoprof_state == XENOPROF_PROFILING) && 
  50.494 +             (is_active(current->domain)) )
  50.495 +            return -EPERM;
  50.496 +        ret = reset_active(current->domain);
  50.497 +        break;
  50.498 +
  50.499 +    case XENOPROF_release_counters:
  50.500 +        ret = -EPERM;
  50.501 +        if ( (xenoprof_state == XENOPROF_COUNTERS_RESERVED) ||
  50.502 +             (xenoprof_state == XENOPROF_READY) )
  50.503 +        {
  50.504 +            xenoprof_state = XENOPROF_IDLE;
  50.505 +            nmi_release_counters();
  50.506 +            nmi_disable_virq();
  50.507 +            ret = 0;
  50.508 +        }
  50.509 +        break;
  50.510 +
  50.511 +    case XENOPROF_shutdown:
  50.512 +        ret = -EPERM;
  50.513 +        if ( xenoprof_state == XENOPROF_IDLE )
  50.514 +        {
  50.515 +            activated = 0;
  50.516 +            adomains=0;
  50.517 +            primary_profiler = NULL;
  50.518 +            ret = 0;
  50.519 +        }
  50.520 +        break;
  50.521 +
  50.522 +    default:
  50.523 +        ret = -EINVAL;
  50.524 +    }
  50.525 +
  50.526 +    if ( ret < 0 )
  50.527 +        printk("xenoprof: operation %d failed for dom %d (status : %d)\n",
  50.528 +               op, current->domain->domain_id, ret);
  50.529 +
  50.530 +    return ret;
  50.531 +}
    51.1 --- a/xen/arch/x86/shutdown.c	Thu Apr 06 14:22:52 2006 +0100
    51.2 +++ b/xen/arch/x86/shutdown.c	Fri Apr 07 11:52:00 2006 +0100
    51.3 @@ -44,7 +44,7 @@ static inline void kb_wait(void)
    51.4  void __attribute__((noreturn)) __machine_halt(void *unused)
    51.5  {
    51.6      for ( ; ; )
    51.7 -        safe_halt();
    51.8 +        __asm__ __volatile__ ( "hlt" );
    51.9  }
   51.10  
   51.11  void machine_halt(void)
    52.1 --- a/xen/arch/x86/traps.c	Thu Apr 06 14:22:52 2006 +0100
    52.2 +++ b/xen/arch/x86/traps.c	Fri Apr 07 11:52:00 2006 +0100
    52.3 @@ -32,6 +32,7 @@
    52.4  #include <xen/errno.h>
    52.5  #include <xen/mm.h>
    52.6  #include <xen/console.h>
    52.7 +#include <xen/reboot.h>
    52.8  #include <asm/regs.h>
    52.9  #include <xen/delay.h>
   52.10  #include <xen/event.h>
   52.11 @@ -318,8 +319,7 @@ asmlinkage void fatal_trap(int trapnr, s
   52.12      console_force_lock();
   52.13  
   52.14      /* Wait for manual reset. */
   52.15 -    for ( ; ; )
   52.16 -        __asm__ __volatile__ ( "hlt" );
   52.17 +    machine_halt();
   52.18  }
   52.19  
   52.20  static inline int do_trap(int trapnr, char *str,
    53.1 --- a/xen/arch/x86/x86_32/entry.S	Thu Apr 06 14:22:52 2006 +0100
    53.2 +++ b/xen/arch/x86/x86_32/entry.S	Fri Apr 07 11:52:00 2006 +0100
    53.3 @@ -119,7 +119,7 @@ FIX1:   SET_XEN_SEGMENTS(a)
    53.4          movl  $DBLFLT1,%eax
    53.5          pushl %eax                     # EIP
    53.6          pushl %esi                     # error_code/entry_vector
    53.7 -        jmp   error_code
    53.8 +        jmp   handle_exception
    53.9  DBLFLT1:GET_CURRENT(%ebx)
   53.10          jmp   test_all_events
   53.11  failsafe_callback:
   53.12 @@ -381,14 +381,6 @@ domain_crash_synchronous:
   53.13          jmp   __domain_crash_synchronous
   53.14  
   53.15          ALIGN
   53.16 -process_guest_exception_and_events:
   53.17 -        leal VCPU_trap_bounce(%ebx),%edx
   53.18 -        testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx)
   53.19 -        jz   test_all_events
   53.20 -        call create_bounce_frame
   53.21 -        jmp  test_all_events
   53.22 -
   53.23 -        ALIGN
   53.24  ENTRY(ret_from_intr)
   53.25          GET_CURRENT(%ebx)
   53.26          movl  UREGS_eflags(%esp),%eax
   53.27 @@ -400,7 +392,7 @@ ENTRY(ret_from_intr)
   53.28  ENTRY(divide_error)
   53.29  	pushl $TRAP_divide_error<<16
   53.30  	ALIGN
   53.31 -error_code:
   53.32 +handle_exception:
   53.33          FIXUP_RING0_GUEST_STACK
   53.34          SAVE_ALL_NOSEGREGS(a)
   53.35          SET_XEN_SEGMENTS(a)
   53.36 @@ -419,7 +411,11 @@ error_code:
   53.37          movb  UREGS_cs(%esp),%al
   53.38          testl $(3|X86_EFLAGS_VM),%eax
   53.39  	jz    restore_all_xen
   53.40 -        jmp   process_guest_exception_and_events
   53.41 +        leal  VCPU_trap_bounce(%ebx),%edx
   53.42 +        testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx)
   53.43 +        jz    test_all_events
   53.44 +        call  create_bounce_frame
   53.45 +        jmp   test_all_events
   53.46  
   53.47  exception_with_ints_disabled:
   53.48          movl  UREGS_eflags(%esp),%eax
   53.49 @@ -452,71 +448,71 @@ FATAL_exception_with_ints_disabled:
   53.50                                          
   53.51  ENTRY(coprocessor_error)
   53.52  	pushl $TRAP_copro_error<<16
   53.53 -	jmp error_code
   53.54 +	jmp   handle_exception
   53.55  
   53.56  ENTRY(simd_coprocessor_error)
   53.57  	pushl $TRAP_simd_error<<16
   53.58 -	jmp error_code
   53.59 +	jmp   handle_exception
   53.60  
   53.61  ENTRY(device_not_available)
   53.62  	pushl $TRAP_no_device<<16
   53.63 -        jmp   error_code
   53.64 +        jmp   handle_exception
   53.65  
   53.66  ENTRY(debug)
   53.67  	pushl $TRAP_debug<<16
   53.68 -	jmp error_code
   53.69 +	jmp   handle_exception
   53.70  
   53.71  ENTRY(int3)
   53.72  	pushl $TRAP_int3<<16
   53.73 -	jmp error_code
   53.74 +	jmp   handle_exception
   53.75  
   53.76  ENTRY(overflow)
   53.77  	pushl $TRAP_overflow<<16
   53.78 -	jmp error_code
   53.79 +	jmp   handle_exception
   53.80  
   53.81  ENTRY(bounds)
   53.82  	pushl $TRAP_bounds<<16
   53.83 -	jmp error_code
   53.84 +	jmp   handle_exception
   53.85  
   53.86  ENTRY(invalid_op)
   53.87  	pushl $TRAP_invalid_op<<16
   53.88 -	jmp error_code
   53.89 +	jmp   handle_exception
   53.90  
   53.91  ENTRY(coprocessor_segment_overrun)
   53.92  	pushl $TRAP_copro_seg<<16
   53.93 -	jmp error_code
   53.94 +	jmp   handle_exception
   53.95  
   53.96  ENTRY(invalid_TSS)
   53.97 -        movw $TRAP_invalid_tss,2(%esp)
   53.98 -	jmp error_code
   53.99 +        movw  $TRAP_invalid_tss,2(%esp)
  53.100 +	jmp   handle_exception
  53.101  
  53.102  ENTRY(segment_not_present)
  53.103 -        movw $TRAP_no_segment,2(%esp)
  53.104 -	jmp error_code
  53.105 +        movw  $TRAP_no_segment,2(%esp)
  53.106 +	jmp   handle_exception
  53.107  
  53.108  ENTRY(stack_segment)
  53.109 -        movw $TRAP_stack_error,2(%esp)
  53.110 -	jmp error_code
  53.111 +        movw  $TRAP_stack_error,2(%esp)
  53.112 +	jmp   handle_exception
  53.113  
  53.114  ENTRY(general_protection)
  53.115 -        movw $TRAP_gp_fault,2(%esp)
  53.116 -	jmp error_code
  53.117 +        movw  $TRAP_gp_fault,2(%esp)
  53.118 +	jmp   handle_exception
  53.119  
  53.120  ENTRY(alignment_check)
  53.121 -        movw $TRAP_alignment_check,2(%esp)
  53.122 -	jmp error_code
  53.123 +        movw  $TRAP_alignment_check,2(%esp)
  53.124 +	jmp   handle_exception
  53.125  
  53.126  ENTRY(page_fault)
  53.127 -        movw $TRAP_page_fault,2(%esp)
  53.128 -	jmp error_code
  53.129 +        movw  $TRAP_page_fault,2(%esp)
  53.130 +	jmp   handle_exception
  53.131  
  53.132  ENTRY(machine_check)
  53.133          pushl $TRAP_machine_check<<16
  53.134 -	jmp error_code
  53.135 +	jmp   handle_exception
  53.136  
  53.137  ENTRY(spurious_interrupt_bug)
  53.138          pushl $TRAP_spurious_int<<16
  53.139 -	jmp error_code
  53.140 +	jmp   handle_exception
  53.141  
  53.142  ENTRY(nmi)
  53.143  #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
  53.144 @@ -648,6 +644,8 @@ ENTRY(hypercall_table)
  53.145          .long do_acm_op
  53.146          .long do_nmi_op
  53.147          .long do_arch_sched_op
  53.148 +        .long do_callback_op        /* 30 */
  53.149 +        .long do_xenoprof_op
  53.150          .rept NR_hypercalls-((.-hypercall_table)/4)
  53.151          .long do_ni_hypercall
  53.152          .endr
  53.153 @@ -683,6 +681,8 @@ ENTRY(hypercall_args_table)
  53.154          .byte 1 /* do_acm_op            */
  53.155          .byte 2 /* do_nmi_op            */
  53.156          .byte 2 /* do_arch_sched_op     */
  53.157 +        .byte 2 /* do_callback_op       */  /* 30 */
  53.158 +        .byte 3 /* do_xenoprof_op       */
  53.159          .rept NR_hypercalls-(.-hypercall_args_table)
  53.160          .byte 0 /* do_ni_hypercall      */
  53.161          .endr
    54.1 --- a/xen/arch/x86/x86_32/traps.c	Thu Apr 06 14:22:52 2006 +0100
    54.2 +++ b/xen/arch/x86/x86_32/traps.c	Fri Apr 07 11:52:00 2006 +0100
    54.3 @@ -9,11 +9,14 @@
    54.4  #include <xen/mm.h>
    54.5  #include <xen/irq.h>
    54.6  #include <xen/symbols.h>
    54.7 +#include <xen/reboot.h>
    54.8  #include <asm/current.h>
    54.9  #include <asm/flushtlb.h>
   54.10  #include <asm/hvm/hvm.h>
   54.11  #include <asm/hvm/support.h>
   54.12  
   54.13 +#include <public/callback.h>
   54.14 +
   54.15  /* All CPUs have their own IDT to allow int80 direct trap. */
   54.16  idt_entry_t *idt_tables[NR_CPUS] = { 0 };
   54.17  
   54.18 @@ -178,8 +181,7 @@ asmlinkage void do_double_fault(void)
   54.19      console_force_lock();
   54.20  
   54.21      /* Wait for manual reset. */
   54.22 -    for ( ; ; )
   54.23 -        __asm__ __volatile__ ( "hlt" );
   54.24 +    machine_halt();
   54.25  }
   54.26  
   54.27  unsigned long do_iret(void)
   54.28 @@ -315,20 +317,102 @@ void init_int80_direct_trap(struct vcpu 
   54.29          set_int80_direct_trap(v);
   54.30  }
   54.31  
   54.32 +static long register_guest_callback(struct callback_register *reg)
   54.33 +{
   54.34 +    long ret = 0;
   54.35 +    struct vcpu *v = current;
   54.36 +
   54.37 +    fixup_guest_code_selector(reg->address.cs);
   54.38 +
   54.39 +    switch ( reg->type )
   54.40 +    {
   54.41 +    case CALLBACKTYPE_event:
   54.42 +        v->arch.guest_context.event_callback_cs     = reg->address.cs;
   54.43 +        v->arch.guest_context.event_callback_eip    = reg->address.eip;
   54.44 +        break;
   54.45 +
   54.46 +    case CALLBACKTYPE_failsafe:
   54.47 +        v->arch.guest_context.failsafe_callback_cs  = reg->address.cs;
   54.48 +        v->arch.guest_context.failsafe_callback_eip = reg->address.eip;
   54.49 +        break;
   54.50 +
   54.51 +    default:
   54.52 +        ret = -EINVAL;
   54.53 +        break;
   54.54 +    }
   54.55 +
   54.56 +    return ret;
   54.57 +}
   54.58 +
   54.59 +static long unregister_guest_callback(struct callback_unregister *unreg)
   54.60 +{
   54.61 +    long ret;
   54.62 +
   54.63 +    switch ( unreg->type )
   54.64 +    {
   54.65 +    default:
   54.66 +        ret = -EINVAL;
   54.67 +        break;
   54.68 +    }
   54.69 +
   54.70 +    return ret;
   54.71 +}
   54.72 +
   54.73 +
   54.74 +long do_callback_op(int cmd, GUEST_HANDLE(void) arg)
   54.75 +{
   54.76 +    long ret;
   54.77 +
   54.78 +    switch ( cmd )
   54.79 +    {
   54.80 +    case CALLBACKOP_register:
   54.81 +    {
   54.82 +        struct callback_register reg;
   54.83 +
   54.84 +        ret = -EFAULT;
   54.85 +        if ( copy_from_guest(&reg, arg, 1) )
   54.86 +            break;
   54.87 +
   54.88 +        ret = register_guest_callback(&reg);
   54.89 +    }
   54.90 +    break;
   54.91 +
   54.92 +    case CALLBACKOP_unregister:
   54.93 +    {
   54.94 +        struct callback_unregister unreg;
   54.95 +
   54.96 +        ret = -EFAULT;
   54.97 +        if ( copy_from_guest(&unreg, arg, 1) )
   54.98 +            break;
   54.99 +
  54.100 +        ret = unregister_guest_callback(&unreg);
  54.101 +    }
  54.102 +    break;
  54.103 +
  54.104 +    default:
  54.105 +        ret = -EINVAL;
  54.106 +        break;
  54.107 +    }
  54.108 +
  54.109 +    return ret;
  54.110 +}
  54.111 +
  54.112  long do_set_callbacks(unsigned long event_selector,
  54.113                        unsigned long event_address,
  54.114                        unsigned long failsafe_selector,
  54.115                        unsigned long failsafe_address)
  54.116  {
  54.117 -    struct vcpu *d = current;
  54.118 +    struct callback_register event = {
  54.119 +        .type = CALLBACKTYPE_event,
  54.120 +        .address = { event_selector, event_address },
  54.121 +    };
  54.122 +    struct callback_register failsafe = {
  54.123 +        .type = CALLBACKTYPE_failsafe,
  54.124 +        .address = { failsafe_selector, failsafe_address },
  54.125 +    };
  54.126  
  54.127 -    fixup_guest_code_selector(event_selector);
  54.128 -    fixup_guest_code_selector(failsafe_selector);
  54.129 -
  54.130 -    d->arch.guest_context.event_callback_cs     = event_selector;
  54.131 -    d->arch.guest_context.event_callback_eip    = event_address;
  54.132 -    d->arch.guest_context.failsafe_callback_cs  = failsafe_selector;
  54.133 -    d->arch.guest_context.failsafe_callback_eip = failsafe_address;
  54.134 +    register_guest_callback(&event);
  54.135 +    register_guest_callback(&failsafe);
  54.136  
  54.137      return 0;
  54.138  }
    55.1 --- a/xen/arch/x86/x86_64/entry.S	Thu Apr 06 14:22:52 2006 +0100
    55.2 +++ b/xen/arch/x86/x86_64/entry.S	Fri Apr 07 11:52:00 2006 +0100
    55.3 @@ -68,7 +68,7 @@ FIX1:   popq  -15*8-8(%rsp)            #
    55.4          leaq  DBLFLT1(%rip),%rax
    55.5          pushq %rax                     # RIP
    55.6          pushq %rsi                     # error_code/entry_vector
    55.7 -        jmp   error_code
    55.8 +        jmp   handle_exception
    55.9  DBLFLT1:GET_CURRENT(%rbx)
   55.10          jmp   test_all_events
   55.11  failsafe_callback:
   55.12 @@ -320,15 +320,6 @@ domain_crash_synchronous:
   55.13          jmp  __domain_crash_synchronous
   55.14  
   55.15          ALIGN
   55.16 -/* %rbx: struct vcpu */
   55.17 -process_guest_exception_and_events:
   55.18 -        leaq  VCPU_trap_bounce(%rbx),%rdx
   55.19 -        testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
   55.20 -        jz    test_all_events
   55.21 -        call  create_bounce_frame
   55.22 -        jmp   test_all_events
   55.23 -
   55.24 -        ALIGN
   55.25  /* No special register assumptions. */
   55.26  ENTRY(ret_from_intr)
   55.27          GET_CURRENT(%rbx)
   55.28 @@ -338,7 +329,7 @@ ENTRY(ret_from_intr)
   55.29  
   55.30          ALIGN
   55.31  /* No special register assumptions. */
   55.32 -error_code:
   55.33 +handle_exception:
   55.34          SAVE_ALL
   55.35          testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)
   55.36          jz    exception_with_ints_disabled
   55.37 @@ -351,7 +342,11 @@ error_code:
   55.38          callq *(%rdx,%rax,8)
   55.39          testb $3,UREGS_cs(%rsp)
   55.40          jz    restore_all_xen
   55.41 -        jmp   process_guest_exception_and_events
   55.42 +        leaq  VCPU_trap_bounce(%rbx),%rdx
   55.43 +        testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
   55.44 +        jz    test_all_events
   55.45 +        call  create_bounce_frame
   55.46 +        jmp   test_all_events
   55.47  
   55.48  /* No special register assumptions. */
   55.49  exception_with_ints_disabled:
   55.50 @@ -384,90 +379,90 @@ FATAL_exception_with_ints_disabled:
   55.51  ENTRY(divide_error)
   55.52          pushq $0
   55.53          movl  $TRAP_divide_error,4(%rsp)
   55.54 -        jmp   error_code
   55.55 +        jmp   handle_exception
   55.56  
   55.57  ENTRY(coprocessor_error)
   55.58          pushq $0
   55.59          movl  $TRAP_copro_error,4(%rsp)
   55.60 -	jmp   error_code
   55.61 +	jmp   handle_exception
   55.62  
   55.63  ENTRY(simd_coprocessor_error)
   55.64          pushq $0
   55.65          movl  $TRAP_simd_error,4(%rsp)
   55.66 -	jmp error_code
   55.67 +	jmp   handle_exception
   55.68  
   55.69  ENTRY(device_not_available)
   55.70          pushq $0
   55.71          movl  $TRAP_no_device,4(%rsp)
   55.72 -        jmp   error_code
   55.73 +        jmp   handle_exception
   55.74  
   55.75  ENTRY(debug)
   55.76          pushq $0
   55.77          movl  $TRAP_debug,4(%rsp)
   55.78 -	jmp   error_code
   55.79 +	jmp   handle_exception
   55.80  
   55.81  ENTRY(int3)
   55.82          pushq $0
   55.83  	movl  $TRAP_int3,4(%rsp)
   55.84 -	jmp   error_code
   55.85 +	jmp   handle_exception
   55.86  
   55.87  ENTRY(overflow)
   55.88          pushq $0
   55.89  	movl  $TRAP_overflow,4(%rsp)
   55.90 -	jmp   error_code
   55.91 +	jmp   handle_exception
   55.92  
   55.93  ENTRY(bounds)
   55.94          pushq $0
   55.95  	movl  $TRAP_bounds,4(%rsp)
   55.96 -	jmp   error_code
   55.97 +	jmp   handle_exception
   55.98  
   55.99  ENTRY(invalid_op)
  55.100          pushq $0
  55.101  	movl  $TRAP_invalid_op,4(%rsp)
  55.102 -	jmp   error_code
  55.103 +	jmp   handle_exception
  55.104  
  55.105  ENTRY(coprocessor_segment_overrun)
  55.106          pushq $0
  55.107  	movl  $TRAP_copro_seg,4(%rsp)
  55.108 -	jmp   error_code
  55.109 +	jmp   handle_exception
  55.110  
  55.111  ENTRY(invalid_TSS)
  55.112          movl  $TRAP_invalid_tss,4(%rsp)
  55.113 -	jmp   error_code
  55.114 +	jmp   handle_exception
  55.115  
  55.116  ENTRY(segment_not_present)
  55.117          movl  $TRAP_no_segment,4(%rsp)
  55.118 -	jmp   error_code
  55.119 +	jmp   handle_exception
  55.120  
  55.121  ENTRY(stack_segment)
  55.122          movl  $TRAP_stack_error,4(%rsp)
  55.123 -	jmp   error_code
  55.124 +	jmp   handle_exception
  55.125  
  55.126  ENTRY(general_protection)
  55.127          movl  $TRAP_gp_fault,4(%rsp)
  55.128 -	jmp   error_code
  55.129 +	jmp   handle_exception
  55.130  
  55.131  ENTRY(alignment_check)
  55.132          movl  $TRAP_alignment_check,4(%rsp)
  55.133 -	jmp   error_code
  55.134 +	jmp   handle_exception
  55.135  
  55.136  ENTRY(page_fault)
  55.137          movl  $TRAP_page_fault,4(%rsp)
  55.138 -	jmp   error_code
  55.139 +	jmp   handle_exception
  55.140  
  55.141  ENTRY(machine_check)
  55.142          pushq $0
  55.143          movl  $TRAP_machine_check,4(%rsp)
  55.144 -	jmp   error_code
  55.145 +	jmp   handle_exception
  55.146  
  55.147  ENTRY(spurious_interrupt_bug)
  55.148          pushq $0
  55.149          movl  $TRAP_spurious_int,4(%rsp)
  55.150 -	jmp   error_code
  55.151 +	jmp   handle_exception
  55.152  
  55.153  ENTRY(double_fault)
  55.154          movl  $TRAP_double_fault,4(%rsp)
  55.155 -        jmp   error_code
  55.156 +        jmp   handle_exception
  55.157  
  55.158  ENTRY(nmi)
  55.159          pushq $0
  55.160 @@ -557,6 +552,8 @@ ENTRY(hypercall_table)
  55.161          .quad do_acm_op
  55.162          .quad do_nmi_op
  55.163          .quad do_arch_sched_op
  55.164 +        .quad do_callback_op        /* 30 */
  55.165 +        .quad do_xenoprof_op
  55.166          .rept NR_hypercalls-((.-hypercall_table)/8)
  55.167          .quad do_ni_hypercall
  55.168          .endr
  55.169 @@ -592,6 +589,8 @@ ENTRY(hypercall_args_table)
  55.170          .byte 1 /* do_acm_op            */
  55.171          .byte 2 /* do_nmi_op            */
  55.172          .byte 2 /* do_arch_sched_op     */
  55.173 +        .byte 2 /* do_callback_op       */  /* 30 */
  55.174 +        .byte 3 /* do_xenoprof_op       */
  55.175          .rept NR_hypercalls-(.-hypercall_args_table)
  55.176          .byte 0 /* do_ni_hypercall      */
  55.177          .endr
    56.1 --- a/xen/arch/x86/x86_64/traps.c	Thu Apr 06 14:22:52 2006 +0100
    56.2 +++ b/xen/arch/x86/x86_64/traps.c	Fri Apr 07 11:52:00 2006 +0100
    56.3 @@ -10,6 +10,7 @@
    56.4  #include <xen/symbols.h>
    56.5  #include <xen/console.h>
    56.6  #include <xen/sched.h>
    56.7 +#include <xen/reboot.h>
    56.8  #include <asm/current.h>
    56.9  #include <asm/flushtlb.h>
   56.10  #include <asm/msr.h>
   56.11 @@ -17,6 +18,8 @@
   56.12  #include <asm/hvm/hvm.h>
   56.13  #include <asm/hvm/support.h>
   56.14  
   56.15 +#include <public/callback.h>
   56.16 +
   56.17  void show_registers(struct cpu_user_regs *regs)
   56.18  {
   56.19      struct cpu_user_regs fault_regs = *regs;
   56.20 @@ -164,8 +167,7 @@ asmlinkage void do_double_fault(struct c
   56.21      console_force_lock();
   56.22  
   56.23      /* Wait for manual reset. */
   56.24 -    for ( ; ; )
   56.25 -        __asm__ __volatile__ ( "hlt" );
   56.26 +    machine_halt();
   56.27  }
   56.28  
   56.29  void toggle_guest_mode(struct vcpu *v)
   56.30 @@ -184,13 +186,19 @@ unsigned long do_iret(void)
   56.31  
   56.32      if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp,
   56.33                                   sizeof(iret_saved))) )
   56.34 +    {
   56.35 +        DPRINTK("Fault while reading IRET context from guest stack\n");
   56.36          domain_crash_synchronous();
   56.37 +    }
   56.38  
   56.39      /* Returning to user mode? */
   56.40      if ( (iret_saved.cs & 3) == 3 )
   56.41      {
   56.42          if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
   56.43 -            return -EFAULT;
   56.44 +        {
   56.45 +            DPRINTK("Guest switching to user mode with no user page tables\n");
   56.46 +            domain_crash_synchronous();
   56.47 +        }
   56.48          toggle_guest_mode(v);
   56.49      }
   56.50  
   56.51 @@ -312,15 +320,106 @@ void __init percpu_traps_init(void)
   56.52      wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
   56.53  }
   56.54  
   56.55 +static long register_guest_callback(struct callback_register *reg)
   56.56 +{
   56.57 +    long ret = 0;
   56.58 +    struct vcpu *v = current;
   56.59 +
   56.60 +    switch ( reg->type )
   56.61 +    {
   56.62 +    case CALLBACKTYPE_event:
   56.63 +        v->arch.guest_context.event_callback_eip    = reg->address;
   56.64 +        break;
   56.65 +
   56.66 +    case CALLBACKTYPE_failsafe:
   56.67 +        v->arch.guest_context.failsafe_callback_eip = reg->address;
   56.68 +        break;
   56.69 +
   56.70 +    case CALLBACKTYPE_syscall:
   56.71 +        v->arch.guest_context.syscall_callback_eip  = reg->address;
   56.72 +        break;
   56.73 +
   56.74 +    default:
   56.75 +        ret = -EINVAL;
   56.76 +        break;
   56.77 +    }
   56.78 +
   56.79 +    return ret;
   56.80 +}
   56.81 +
   56.82 +static long unregister_guest_callback(struct callback_unregister *unreg)
   56.83 +{
   56.84 +    long ret;
   56.85 +
   56.86 +    switch ( unreg->type )
   56.87 +    {
   56.88 +    default:
   56.89 +        ret = -EINVAL;
   56.90 +        break;
   56.91 +    }
   56.92 +
   56.93 +    return ret;
   56.94 +}
   56.95 +
   56.96 +
   56.97 +long do_callback_op(int cmd, GUEST_HANDLE(void) arg)
   56.98 +{
   56.99 +    long ret;
  56.100 +
  56.101 +    switch ( cmd )
  56.102 +    {
  56.103 +    case CALLBACKOP_register:
  56.104 +    {
  56.105 +        struct callback_register reg;
  56.106 +
  56.107 +        ret = -EFAULT;
  56.108 +        if ( copy_from_guest(&reg, arg, 1) )
  56.109 +            break;
  56.110 +
  56.111 +        ret = register_guest_callback(&reg);
  56.112 +    }
  56.113 +    break;
  56.114 +
  56.115 +    case CALLBACKOP_unregister:
  56.116 +    {
  56.117 +        struct callback_unregister unreg;
  56.118 +
  56.119 +        ret = -EFAULT;
  56.120 +        if ( copy_from_guest(&unreg, arg, 1) )
  56.121 +            break;
  56.122 +
  56.123 +        ret = unregister_guest_callback(&unreg);
  56.124 +    }
  56.125 +    break;
  56.126 +
  56.127 +    default:
  56.128 +        ret = -EINVAL;
  56.129 +        break;
  56.130 +    }
  56.131 +
  56.132 +    return ret;
  56.133 +}
  56.134 +
  56.135  long do_set_callbacks(unsigned long event_address,
  56.136                        unsigned long failsafe_address,
  56.137                        unsigned long syscall_address)
  56.138  {
  56.139 -    struct vcpu *d = current;
  56.140 +    struct callback_register event = {
  56.141 +        .type = CALLBACKTYPE_event,
  56.142 +        .address = event_address,
  56.143 +    };
  56.144 +    struct callback_register failsafe = {
  56.145 +        .type = CALLBACKTYPE_failsafe,
  56.146 +        .address = failsafe_address,
  56.147 +    };
  56.148 +    struct callback_register syscall = {
  56.149 +        .type = CALLBACKTYPE_syscall,
  56.150 +        .address = syscall_address,
  56.151 +    };
  56.152  
  56.153 -    d->arch.guest_context.event_callback_eip    = event_address;
  56.154 -    d->arch.guest_context.failsafe_callback_eip = failsafe_address;
  56.155 -    d->arch.guest_context.syscall_callback_eip  = syscall_address;
  56.156 +    register_guest_callback(&event);
  56.157 +    register_guest_callback(&failsafe);
  56.158 +    register_guest_callback(&syscall);
  56.159  
  56.160      return 0;
  56.161  }
    57.1 --- a/xen/common/event_channel.c	Thu Apr 06 14:22:52 2006 +0100
    57.2 +++ b/xen/common/event_channel.c	Fri Apr 07 11:52:00 2006 +0100
    57.3 @@ -57,6 +57,7 @@ static int virq_is_global(int virq)
    57.4      {
    57.5      case VIRQ_TIMER:
    57.6      case VIRQ_DEBUG:
    57.7 +    case VIRQ_XENOPROF:
    57.8          rc = 0;
    57.9          break;
   57.10      default:
    58.1 --- a/xen/common/schedule.c	Thu Apr 06 14:22:52 2006 +0100
    58.2 +++ b/xen/common/schedule.c	Fri Apr 07 11:52:00 2006 +0100
    58.3 @@ -413,6 +413,30 @@ long do_sched_op(int cmd, GUEST_HANDLE(v
    58.4          break;
    58.5      }
    58.6  
    58.7 +    case SCHEDOP_remote_shutdown:
    58.8 +    {
    58.9 +        struct domain *d;
   58.10 +        struct sched_remote_shutdown sched_remote_shutdown;
   58.11 +
   58.12 +        if ( !IS_PRIV(current->domain) )
   58.13 +            return -EPERM;
   58.14 +
   58.15 +        ret = -EFAULT;
   58.16 +        if ( copy_from_guest(&sched_remote_shutdown, arg, 1) )
   58.17 +            break;
   58.18 +
   58.19 +        ret = -ESRCH;
   58.20 +        d = find_domain_by_id(sched_remote_shutdown.domain_id);
   58.21 +        if ( d == NULL )
   58.22 +            break;
   58.23 +
   58.24 +        domain_shutdown(d, (u8)sched_remote_shutdown.reason);
   58.25 +        put_domain(d);
   58.26 +        ret = 0;
   58.27 +
   58.28 +        break;
   58.29 +    }
   58.30 +
   58.31      default:
   58.32          ret = -ENOSYS;
   58.33      }
    59.1 --- a/xen/drivers/char/console.c	Thu Apr 06 14:22:52 2006 +0100
    59.2 +++ b/xen/drivers/char/console.c	Fri Apr 07 11:52:00 2006 +0100
    59.3 @@ -520,6 +520,7 @@ void console_force_unlock(void)
    59.4  {
    59.5      console_lock = SPIN_LOCK_UNLOCKED;
    59.6      serial_force_unlock(sercon_handle);
    59.7 +    console_start_sync();
    59.8  }
    59.9  
   59.10  void console_force_lock(void)
    60.1 --- a/xen/include/public/arch-x86_32.h	Thu Apr 06 14:22:52 2006 +0100
    60.2 +++ b/xen/include/public/arch-x86_32.h	Fri Apr 07 11:52:00 2006 +0100
    60.3 @@ -168,6 +168,11 @@ typedef struct {
    60.4      unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
    60.5  } arch_vcpu_info_t;
    60.6  
    60.7 +typedef struct {
    60.8 +    unsigned long cs;
    60.9 +    unsigned long eip;
   60.10 +} xen_callback_t;
   60.11 +
   60.12  #endif /* !__ASSEMBLY__ */
   60.13  
   60.14  /*
    61.1 --- a/xen/include/public/arch-x86_64.h	Thu Apr 06 14:22:52 2006 +0100
    61.2 +++ b/xen/include/public/arch-x86_64.h	Fri Apr 07 11:52:00 2006 +0100
    61.3 @@ -244,6 +244,8 @@ typedef struct {
    61.4      unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
    61.5  } arch_vcpu_info_t;
    61.6  
    61.7 +typedef unsigned long xen_callback_t;
    61.8 +
    61.9  #endif /* !__ASSEMBLY__ */
   61.10  
   61.11  /*
    62.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    62.2 +++ b/xen/include/public/callback.h	Fri Apr 07 11:52:00 2006 +0100
    62.3 @@ -0,0 +1,57 @@
    62.4 +/******************************************************************************
    62.5 + * callback.h
    62.6 + *
    62.7 + * Register guest OS callbacks with Xen.
    62.8 + *
    62.9 + * Copyright (c) 2006, Ian Campbell
   62.10 + */
   62.11 +
   62.12 +#ifndef __XEN_PUBLIC_CALLBACK_H__
   62.13 +#define __XEN_PUBLIC_CALLBACK_H__
   62.14 +
   62.15 +#include "xen.h"
   62.16 +
   62.17 +/*
   62.18 + * Prototype for this hypercall is:
   62.19 + *   long callback_op(int cmd, void *extra_args)
   62.20 + * @cmd        == CALLBACKOP_??? (callback operation).
   62.21 + * @extra_args == Operation-specific extra arguments (NULL if none).
   62.22 + */
   62.23 +
   62.24 +#define CALLBACKTYPE_event                 0
   62.25 +#define CALLBACKTYPE_failsafe              1
   62.26 +#define CALLBACKTYPE_syscall               2 /* x86_64 only */
   62.27 +
   62.28 +/*
   62.29 + * Register a callback.
   62.30 + */
   62.31 +#define CALLBACKOP_register                0
   62.32 +typedef struct callback_register {
   62.33 +     int type;
   62.34 +     xen_callback_t address;
   62.35 +} callback_register_t;
   62.36 +DEFINE_GUEST_HANDLE(callback_register_t);
   62.37 +
   62.38 +/*
   62.39 + * Unregister a callback.
   62.40 + *
   62.41 + * Not all callbacks can be unregistered. -EINVAL will be returned if
   62.42 + * you attempt to unregister such a callback.
   62.43 + */
   62.44 +#define CALLBACKOP_unregister              1
   62.45 +typedef struct callback_unregister {
   62.46 +     int type;
   62.47 +} callback_unregister_t;
   62.48 +DEFINE_GUEST_HANDLE(callback_unregister_t);
   62.49 +
   62.50 +#endif /* __XEN_PUBLIC_CALLBACK_H__ */
   62.51 +
   62.52 +/*
   62.53 + * Local variables:
   62.54 + * mode: C
   62.55 + * c-set-style: "BSD"
   62.56 + * c-basic-offset: 4
   62.57 + * tab-width: 4
   62.58 + * indent-tabs-mode: nil
   62.59 + * End:
   62.60 + */
    63.1 --- a/xen/include/public/dom0_ops.h	Thu Apr 06 14:22:52 2006 +0100
    63.2 +++ b/xen/include/public/dom0_ops.h	Fri Apr 07 11:52:00 2006 +0100
    63.3 @@ -140,15 +140,16 @@ typedef struct dom0_settime {
    63.4  DEFINE_GUEST_HANDLE(dom0_settime_t);
    63.5  
    63.6  #define DOM0_GETPAGEFRAMEINFO 18
    63.7 +#define LTAB_SHIFT 28
    63.8  #define NOTAB 0         /* normal page */
    63.9 -#define L1TAB (1<<28)
   63.10 -#define L2TAB (2<<28)
   63.11 -#define L3TAB (3<<28)
   63.12 -#define L4TAB (4<<28)
   63.13 +#define L1TAB (1<<LTAB_SHIFT)
   63.14 +#define L2TAB (2<<LTAB_SHIFT)
   63.15 +#define L3TAB (3<<LTAB_SHIFT)
   63.16 +#define L4TAB (4<<LTAB_SHIFT)
   63.17  #define LPINTAB  (1<<31)
   63.18 -#define XTAB  (0xf<<28) /* invalid page */
   63.19 +#define XTAB  (0xf<<LTAB_SHIFT) /* invalid page */
   63.20  #define LTAB_MASK XTAB
   63.21 -#define LTABTYPE_MASK (0x7<<28)
   63.22 +#define LTABTYPE_MASK (0x7<<LTAB_SHIFT)
   63.23  
   63.24  typedef struct dom0_getpageframeinfo {
   63.25      /* IN variables. */
    64.1 --- a/xen/include/public/sched.h	Thu Apr 06 14:22:52 2006 +0100
    64.2 +++ b/xen/include/public/sched.h	Fri Apr 07 11:52:00 2006 +0100
    64.3 @@ -65,6 +65,19 @@ typedef struct sched_poll {
    64.4  DEFINE_GUEST_HANDLE(sched_poll_t);
    64.5  
    64.6  /*
    64.7 + * Declare a shutdown for another domain. The main use of this function is
    64.8 + * in interpreting shutdown requests and reasons for fully-virtualized
    64.9 + * domains.  A para-virtualized domain may use SCHEDOP_shutdown directly.
   64.10 + * @arg == pointer to sched_remote_shutdown structure.
   64.11 + */
   64.12 +#define SCHEDOP_remote_shutdown        4
   64.13 +typedef struct sched_remote_shutdown {
   64.14 +    domid_t domain_id;         /* Remote domain ID */
   64.15 +    unsigned int reason;       /* SHUTDOWN_xxx reason */
   64.16 +} sched_remote_shutdown_t;
   64.17 +DEFINE_GUEST_HANDLE(sched_remote_shutdown_t);
   64.18 +
   64.19 +/*
   64.20   * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
   64.21   * software to determine the appropriate action. For the most part, Xen does
   64.22   * not care about the shutdown code.
    65.1 --- a/xen/include/public/xen.h	Thu Apr 06 14:22:52 2006 +0100
    65.2 +++ b/xen/include/public/xen.h	Fri Apr 07 11:52:00 2006 +0100
    65.3 @@ -60,6 +60,8 @@
    65.4  #define __HYPERVISOR_acm_op               27
    65.5  #define __HYPERVISOR_nmi_op               28
    65.6  #define __HYPERVISOR_sched_op             29
    65.7 +#define __HYPERVISOR_callback_op          30
    65.8 +#define __HYPERVISOR_xenoprof_op          31
    65.9  
   65.10  /* 
   65.11   * VIRTUAL INTERRUPTS
   65.12 @@ -76,6 +78,7 @@
   65.13  #define VIRQ_CONSOLE    2  /* G. (DOM0) Bytes received on emergency console. */
   65.14  #define VIRQ_DOM_EXC    3  /* G. (DOM0) Exceptional event for some domain.   */
   65.15  #define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
   65.16 +#define VIRQ_XENOPROF   7  /* V. XenOprofile interrupt: new sample available */
   65.17  #define NR_VIRQS        8
   65.18  
   65.19  /*
    66.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    66.2 +++ b/xen/include/public/xenoprof.h	Fri Apr 07 11:52:00 2006 +0100
    66.3 @@ -0,0 +1,83 @@
    66.4 +/******************************************************************************
    66.5 + * xenoprof.h
    66.6 + * 
    66.7 + * Interface for enabling system wide profiling based on hardware performance
    66.8 + * counters
    66.9 + * 
   66.10 + * Copyright (C) 2005 Hewlett-Packard Co.
   66.11 + * Written by Aravind Menon & Jose Renato Santos
   66.12 + */
   66.13 +
   66.14 +#ifndef __XEN_PUBLIC_XENOPROF_H__
   66.15 +#define __XEN_PUBLIC_XENOPROF_H__
   66.16 +
   66.17 +/*
   66.18 + * Commands to HYPERVISOR_pmc_op().
   66.19 + */
   66.20 +#define XENOPROF_init               0
   66.21 +#define XENOPROF_set_active         1
   66.22 +#define XENOPROF_reserve_counters   3
   66.23 +#define XENOPROF_setup_events       4
   66.24 +#define XENOPROF_enable_virq        5
   66.25 +#define XENOPROF_start              6
   66.26 +#define XENOPROF_stop               7
   66.27 +#define XENOPROF_disable_virq       8
   66.28 +#define XENOPROF_release_counters   9
   66.29 +#define XENOPROF_shutdown          10
   66.30 +
   66.31 +#define MAX_OPROF_EVENTS    32
   66.32 +#define MAX_OPROF_DOMAINS   25	
   66.33 +#define XENOPROF_CPU_TYPE_SIZE 64
   66.34 +
   66.35 +/* Xenoprof performance events (not Xen events) */
   66.36 +struct event_log {
   66.37 +    uint64_t eip;
   66.38 +    uint8_t mode;
   66.39 +    uint8_t event;
   66.40 +};
   66.41 +
   66.42 +/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
   66.43 +typedef struct xenoprof_buf {
   66.44 +    uint32_t event_head;
   66.45 +    uint32_t event_tail;
   66.46 +    uint32_t event_size;
   66.47 +    uint32_t vcpu_id;
   66.48 +    uint64_t xen_samples;
   66.49 +    uint64_t kernel_samples;
   66.50 +    uint64_t user_samples;
   66.51 +    uint64_t lost_samples;
   66.52 +    struct event_log event_log[1];
   66.53 +} xenoprof_buf_t;
   66.54 +DEFINE_GUEST_HANDLE(xenoprof_buf_t);
   66.55 +
   66.56 +typedef struct xenoprof_init_result {
   66.57 +    int32_t  num_events;
   66.58 +    int32_t  is_primary;
   66.59 +    int32_t  nbuf;
   66.60 +    int32_t  bufsize;
   66.61 +    uint64_t buf_maddr;
   66.62 +    char cpu_type[XENOPROF_CPU_TYPE_SIZE];
   66.63 +} xenoprof_init_result_t;
   66.64 +DEFINE_GUEST_HANDLE(xenoprof_init_result_t);
   66.65 +
   66.66 +typedef struct xenoprof_counter_config {
   66.67 +    unsigned long count;
   66.68 +    unsigned long enabled;
   66.69 +    unsigned long event;
   66.70 +    unsigned long kernel;
   66.71 +    unsigned long user;
   66.72 +    unsigned long unit_mask;
   66.73 +} xenoprof_counter_config_t;
   66.74 +DEFINE_GUEST_HANDLE(xenoprof_counter_config_t);
   66.75 +
   66.76 +#endif /* __XEN_PUBLIC_XENOPROF_H__ */
   66.77 +
   66.78 +/*
   66.79 + * Local variables:
   66.80 + * mode: C
   66.81 + * c-set-style: "BSD"
   66.82 + * c-basic-offset: 4
   66.83 + * tab-width: 4
   66.84 + * indent-tabs-mode: nil
   66.85 + * End:
   66.86 + */
    67.1 --- a/xen/include/xen/sched.h	Thu Apr 06 14:22:52 2006 +0100
    67.2 +++ b/xen/include/xen/sched.h	Fri Apr 07 11:52:00 2006 +0100
    67.3 @@ -14,6 +14,7 @@
    67.4  #include <xen/grant_table.h>
    67.5  #include <xen/rangeset.h>
    67.6  #include <asm/domain.h>
    67.7 +#include <xen/xenoprof.h>
    67.8  
    67.9  extern unsigned long volatile jiffies;
   67.10  extern rwlock_t domlist_lock;
   67.11 @@ -155,6 +156,9 @@ struct domain
   67.12  
   67.13      /* Control-plane tools handle for this domain. */
   67.14      xen_domain_handle_t handle;
   67.15 +
   67.16 +    /* OProfile support. */
   67.17 +    struct xenoprof *xenoprof;
   67.18  };
   67.19  
   67.20  struct domain_setup_info
    68.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    68.2 +++ b/xen/include/xen/xenoprof.h	Fri Apr 07 11:52:00 2006 +0100
    68.3 @@ -0,0 +1,42 @@
    68.4 +/******************************************************************************
    68.5 + * xenoprof.h
    68.6 + * 
    68.7 + * Xenoprof: Xenoprof enables performance profiling in Xen
    68.8 + * 
    68.9 + * Copyright (C) 2005 Hewlett-Packard Co.
   68.10 + * written by Aravind Menon & Jose Renato Santos
   68.11 + */
   68.12 +
   68.13 +#ifndef __XEN_XENOPROF_H__
   68.14 +#define __XEN_XENOPROF_H__
   68.15 +
   68.16 +#include <public/xenoprof.h>
   68.17 +
   68.18 +#define XENOPROF_DOMAIN_IGNORED    0
   68.19 +#define XENOPROF_DOMAIN_ACTIVE     1
   68.20 +
   68.21 +#define XENOPROF_IDLE              0
   68.22 +#define XENOPROF_COUNTERS_RESERVED 1
   68.23 +#define XENOPROF_READY             2
   68.24 +#define XENOPROF_PROFILING         3
   68.25 +
   68.26 +struct xenoprof_vcpu {
   68.27 +    int event_size;
   68.28 +    struct xenoprof_buf *buffer;
   68.29 +};
   68.30 +
   68.31 +struct xenoprof {
   68.32 +    char* rawbuf;
   68.33 +    int npages;
   68.34 +    int nbuf;
   68.35 +    int bufsize;
   68.36 +    int domain_type;
   68.37 +    int domain_ready;
   68.38 +    int is_primary;
   68.39 +    struct xenoprof_vcpu vcpu [MAX_VIRT_CPUS];
   68.40 +};
   68.41 +
   68.42 +struct domain;
   68.43 +void free_xenoprof_pages(struct domain *d);
   68.44 +
   68.45 +#endif  /* __XEN__XENOPROF_H__ */