ia64/xen-unstable

changeset 12624:6cfe32a69ac6

[IA64] import oprofile, perfmon related files from linux to xen.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>

Updated to 2.6.16.33

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
author awilliam@xenbuild.aw
date Tue Nov 28 11:15:35 2006 -0700 (2006-11-28)
parents 4f4d358aa5e4
children da51aee40456
files xen/arch/ia64/linux-xen/README.origin xen/arch/ia64/linux-xen/perfmon.c xen/arch/ia64/linux-xen/perfmon_default_smpl.c xen/arch/ia64/linux-xen/perfmon_generic.h xen/arch/ia64/linux-xen/perfmon_itanium.h xen/arch/ia64/linux-xen/perfmon_mckinley.h xen/arch/ia64/linux-xen/perfmon_montecito.h xen/arch/ia64/linux/README.origin xen/arch/ia64/linux/carta_random.S xen/include/asm-ia64/linux-xen/asm/README.origin xen/include/asm-ia64/linux-xen/asm/perfmon.h xen/include/asm-ia64/linux-xen/asm/perfmon_default_smpl.h xen/include/asm-ia64/linux-xen/linux/README.origin xen/include/asm-ia64/linux-xen/linux/oprofile.h
line diff
     1.1 --- a/xen/arch/ia64/linux-xen/README.origin	Tue Nov 28 10:37:36 2006 -0700
     1.2 +++ b/xen/arch/ia64/linux-xen/README.origin	Tue Nov 28 11:15:35 2006 -0700
     1.3 @@ -33,3 +33,11 @@ unwind_i.h		-> linux/arch/ia64/kernel/un
     1.4  
     1.5  # The files below are from Linux-2.6.16
     1.6  iosapic.c		-> linux/arch/ia64/kernel/iosapic.c
     1.7 +
     1.8 +# The files below are from Linux-2.6.16.33
     1.9 +perfmon.c		-> linux/arch/kernel/perfmon.c
    1.10 +perfmon_default_smpl.c	-> linux/arch/kernel/perfmon_default_smpl.c
    1.11 +perfmon_generic.h	-> linux/arch/kernel/perfmon_generic.h
    1.12 +perfmon_itanium.h	-> linux/arch/kernel/perfmon_itanium.h
    1.13 +perfmon_mckinley.h	-> linux/arch/kernel/perfmon_mckinley.h
    1.14 +perfmon_montecito.h	-> linux/arch/kernel/perfmon_montecito.h
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/xen/arch/ia64/linux-xen/perfmon.c	Tue Nov 28 11:15:35 2006 -0700
     2.3 @@ -0,0 +1,6852 @@
     2.4 +/*
     2.5 + * This file implements the perfmon-2 subsystem which is used
     2.6 + * to program the IA-64 Performance Monitoring Unit (PMU).
     2.7 + *
     2.8 + * The initial version of perfmon.c was written by
     2.9 + * Ganesh Venkitachalam, IBM Corp.
    2.10 + *
    2.11 + * Then it was modified for perfmon-1.x by Stephane Eranian and
    2.12 + * David Mosberger, Hewlett Packard Co.
    2.13 + *
    2.14 + * Version Perfmon-2.x is a rewrite of perfmon-1.x
    2.15 + * by Stephane Eranian, Hewlett Packard Co.
    2.16 + *
    2.17 + * Copyright (C) 1999-2005  Hewlett Packard Co
    2.18 + *               Stephane Eranian <eranian@hpl.hp.com>
    2.19 + *               David Mosberger-Tang <davidm@hpl.hp.com>
    2.20 + *
    2.21 + * More information about perfmon available at:
    2.22 + * 	http://www.hpl.hp.com/research/linux/perfmon
    2.23 + */
    2.24 +
    2.25 +#include <linux/config.h>
    2.26 +#include <linux/module.h>
    2.27 +#include <linux/kernel.h>
    2.28 +#include <linux/sched.h>
    2.29 +#include <linux/interrupt.h>
    2.30 +#include <linux/smp_lock.h>
    2.31 +#include <linux/proc_fs.h>
    2.32 +#include <linux/seq_file.h>
    2.33 +#include <linux/init.h>
    2.34 +#include <linux/vmalloc.h>
    2.35 +#include <linux/mm.h>
    2.36 +#include <linux/sysctl.h>
    2.37 +#include <linux/list.h>
    2.38 +#include <linux/file.h>
    2.39 +#include <linux/poll.h>
    2.40 +#include <linux/vfs.h>
    2.41 +#include <linux/pagemap.h>
    2.42 +#include <linux/mount.h>
    2.43 +#include <linux/bitops.h>
    2.44 +#include <linux/capability.h>
    2.45 +#include <linux/rcupdate.h>
    2.46 +#include <linux/completion.h>
    2.47 +
    2.48 +#include <asm/errno.h>
    2.49 +#include <asm/intrinsics.h>
    2.50 +#include <asm/page.h>
    2.51 +#include <asm/perfmon.h>
    2.52 +#include <asm/processor.h>
    2.53 +#include <asm/signal.h>
    2.54 +#include <asm/system.h>
    2.55 +#include <asm/uaccess.h>
    2.56 +#include <asm/delay.h>
    2.57 +
    2.58 +#ifdef CONFIG_PERFMON
    2.59 +/*
    2.60 + * perfmon context state
    2.61 + */
    2.62 +#define PFM_CTX_UNLOADED	1	/* context is not loaded onto any task */
    2.63 +#define PFM_CTX_LOADED		2	/* context is loaded onto a task */
    2.64 +#define PFM_CTX_MASKED		3	/* context is loaded but monitoring is masked due to overflow */
    2.65 +#define PFM_CTX_ZOMBIE		4	/* owner of the context is closing it */
    2.66 +
    2.67 +#define PFM_INVALID_ACTIVATION	(~0UL)
    2.68 +
    2.69 +/*
    2.70 + * depth of message queue
    2.71 + */
    2.72 +#define PFM_MAX_MSGS		32
    2.73 +#define PFM_CTXQ_EMPTY(g)	((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
    2.74 +
    2.75 +/*
    2.76 + * type of a PMU register (bitmask).
    2.77 + * bitmask structure:
    2.78 + * 	bit0   : register implemented
    2.79 + * 	bit1   : end marker
    2.80 + * 	bit2-3 : reserved
    2.81 + * 	bit4   : pmc has pmc.pm
    2.82 + * 	bit5   : pmc controls a counter (has pmc.oi), pmd is used as counter
    2.83 + * 	bit6-7 : register type
    2.84 + * 	bit8-31: reserved
    2.85 + */
    2.86 +#define PFM_REG_NOTIMPL		0x0 /* not implemented at all */
    2.87 +#define PFM_REG_IMPL		0x1 /* register implemented */
    2.88 +#define PFM_REG_END		0x2 /* end marker */
    2.89 +#define PFM_REG_MONITOR		(0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
    2.90 +#define PFM_REG_COUNTING	(0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
    2.91 +#define PFM_REG_CONTROL		(0x4<<4|PFM_REG_IMPL) /* PMU control register */
    2.92 +#define	PFM_REG_CONFIG		(0x8<<4|PFM_REG_IMPL) /* configuration register */
    2.93 +#define PFM_REG_BUFFER	 	(0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
    2.94 +
    2.95 +#define PMC_IS_LAST(i)	(pmu_conf->pmc_desc[i].type & PFM_REG_END)
    2.96 +#define PMD_IS_LAST(i)	(pmu_conf->pmd_desc[i].type & PFM_REG_END)
    2.97 +
    2.98 +#define PMC_OVFL_NOTIFY(ctx, i)	((ctx)->ctx_pmds[i].flags &  PFM_REGFL_OVFL_NOTIFY)
    2.99 +
   2.100 +/* i assumed unsigned */
   2.101 +#define PMC_IS_IMPL(i)	  (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
   2.102 +#define PMD_IS_IMPL(i)	  (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
   2.103 +
   2.104 +/* XXX: these assume that register i is implemented */
   2.105 +#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
   2.106 +#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
   2.107 +#define PMC_IS_MONITOR(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR)  == PFM_REG_MONITOR)
   2.108 +#define PMC_IS_CONTROL(i)  ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL)  == PFM_REG_CONTROL)
   2.109 +
   2.110 +#define PMC_DFL_VAL(i)     pmu_conf->pmc_desc[i].default_value
   2.111 +#define PMC_RSVD_MASK(i)   pmu_conf->pmc_desc[i].reserved_mask
   2.112 +#define PMD_PMD_DEP(i)	   pmu_conf->pmd_desc[i].dep_pmd[0]
   2.113 +#define PMC_PMD_DEP(i)	   pmu_conf->pmc_desc[i].dep_pmd[0]
   2.114 +
   2.115 +#define PFM_NUM_IBRS	  IA64_NUM_DBG_REGS
   2.116 +#define PFM_NUM_DBRS	  IA64_NUM_DBG_REGS
   2.117 +
   2.118 +#define CTX_OVFL_NOBLOCK(c)	((c)->ctx_fl_block == 0)
   2.119 +#define CTX_HAS_SMPL(c)		((c)->ctx_fl_is_sampling)
   2.120 +#define PFM_CTX_TASK(h)		(h)->ctx_task
   2.121 +
   2.122 +#define PMU_PMC_OI		5 /* position of pmc.oi bit */
   2.123 +
   2.124 +/* XXX: does not support more than 64 PMDs */
   2.125 +#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
   2.126 +#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
   2.127 +
   2.128 +#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
   2.129 +
   2.130 +#define CTX_USED_IBR(ctx,n) 	(ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
   2.131 +#define CTX_USED_DBR(ctx,n) 	(ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
   2.132 +#define CTX_USES_DBREGS(ctx)	(((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
   2.133 +#define PFM_CODE_RR	0	/* requesting code range restriction */
   2.134 +#define PFM_DATA_RR	1	/* requestion data range restriction */
   2.135 +
   2.136 +#define PFM_CPUINFO_CLEAR(v)	pfm_get_cpu_var(pfm_syst_info) &= ~(v)
   2.137 +#define PFM_CPUINFO_SET(v)	pfm_get_cpu_var(pfm_syst_info) |= (v)
   2.138 +#define PFM_CPUINFO_GET()	pfm_get_cpu_var(pfm_syst_info)
   2.139 +
   2.140 +#define RDEP(x)	(1UL<<(x))
   2.141 +
   2.142 +/*
   2.143 + * context protection macros
   2.144 + * in SMP:
   2.145 + * 	- we need to protect against CPU concurrency (spin_lock)
   2.146 + * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
   2.147 + * in UP:
   2.148 + * 	- we need to protect against PMU overflow interrupts (local_irq_disable)
   2.149 + *
   2.150 + * spin_lock_irqsave()/spin_lock_irqrestore():
   2.151 + * 	in SMP: local_irq_disable + spin_lock
   2.152 + * 	in UP : local_irq_disable
   2.153 + *
   2.154 + * spin_lock()/spin_lock():
   2.155 + * 	in UP : removed automatically
   2.156 + * 	in SMP: protect against context accesses from other CPU. interrupts
   2.157 + * 	        are not masked. This is useful for the PMU interrupt handler
   2.158 + * 	        because we know we will not get PMU concurrency in that code.
   2.159 + */
   2.160 +#define PROTECT_CTX(c, f) \
   2.161 +	do {  \
   2.162 +		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
   2.163 +		spin_lock_irqsave(&(c)->ctx_lock, f); \
   2.164 +		DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->pid)); \
   2.165 +	} while(0)
   2.166 +
   2.167 +#define UNPROTECT_CTX(c, f) \
   2.168 +	do { \
   2.169 +		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
   2.170 +		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
   2.171 +	} while(0)
   2.172 +
   2.173 +#define PROTECT_CTX_NOPRINT(c, f) \
   2.174 +	do {  \
   2.175 +		spin_lock_irqsave(&(c)->ctx_lock, f); \
   2.176 +	} while(0)
   2.177 +
   2.178 +
   2.179 +#define UNPROTECT_CTX_NOPRINT(c, f) \
   2.180 +	do { \
   2.181 +		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
   2.182 +	} while(0)
   2.183 +
   2.184 +
   2.185 +#define PROTECT_CTX_NOIRQ(c) \
   2.186 +	do {  \
   2.187 +		spin_lock(&(c)->ctx_lock); \
   2.188 +	} while(0)
   2.189 +
   2.190 +#define UNPROTECT_CTX_NOIRQ(c) \
   2.191 +	do { \
   2.192 +		spin_unlock(&(c)->ctx_lock); \
   2.193 +	} while(0)
   2.194 +
   2.195 +
   2.196 +#ifdef CONFIG_SMP
   2.197 +
   2.198 +#define GET_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)
   2.199 +#define INC_ACTIVATION()	pfm_get_cpu_var(pmu_activation_number)++
   2.200 +#define SET_ACTIVATION(c)	(c)->ctx_last_activation = GET_ACTIVATION()
   2.201 +
   2.202 +#else /* !CONFIG_SMP */
   2.203 +#define SET_ACTIVATION(t) 	do {} while(0)
   2.204 +#define GET_ACTIVATION(t) 	do {} while(0)
   2.205 +#define INC_ACTIVATION(t) 	do {} while(0)
   2.206 +#endif /* CONFIG_SMP */
   2.207 +
   2.208 +#define SET_PMU_OWNER(t, c)	do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
   2.209 +#define GET_PMU_OWNER()		pfm_get_cpu_var(pmu_owner)
   2.210 +#define GET_PMU_CTX()		pfm_get_cpu_var(pmu_ctx)
   2.211 +
   2.212 +#define LOCK_PFS(g)	    	spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
   2.213 +#define UNLOCK_PFS(g)	    	spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
   2.214 +
   2.215 +#define PFM_REG_RETFLAG_SET(flags, val)	do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
   2.216 +
   2.217 +/*
   2.218 + * cmp0 must be the value of pmc0
   2.219 + */
   2.220 +#define PMC0_HAS_OVFL(cmp0)  (cmp0 & ~0x1UL)
   2.221 +
   2.222 +#define PFMFS_MAGIC 0xa0b4d889
   2.223 +
   2.224 +/*
   2.225 + * debugging
   2.226 + */
   2.227 +#define PFM_DEBUGGING 1
   2.228 +#ifdef PFM_DEBUGGING
   2.229 +#define DPRINT(a) \
   2.230 +	do { \
   2.231 +		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
   2.232 +	} while (0)
   2.233 +
   2.234 +#define DPRINT_ovfl(a) \
   2.235 +	do { \
   2.236 +		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
   2.237 +	} while (0)
   2.238 +#endif
   2.239 +
   2.240 +/*
   2.241 + * 64-bit software counter structure
   2.242 + *
   2.243 + * the next_reset_type is applied to the next call to pfm_reset_regs()
   2.244 + */
   2.245 +typedef struct {
   2.246 +	unsigned long	val;		/* virtual 64bit counter value */
   2.247 +	unsigned long	lval;		/* last reset value */
   2.248 +	unsigned long	long_reset;	/* reset value on sampling overflow */
   2.249 +	unsigned long	short_reset;    /* reset value on overflow */
   2.250 +	unsigned long	reset_pmds[4];  /* which other pmds to reset when this counter overflows */
   2.251 +	unsigned long	smpl_pmds[4];   /* which pmds are accessed when counter overflow */
   2.252 +	unsigned long	seed;		/* seed for random-number generator */
   2.253 +	unsigned long	mask;		/* mask for random-number generator */
   2.254 +	unsigned int 	flags;		/* notify/do not notify */
   2.255 +	unsigned long	eventid;	/* overflow event identifier */
   2.256 +} pfm_counter_t;
   2.257 +
   2.258 +/*
   2.259 + * context flags
   2.260 + */
   2.261 +typedef struct {
   2.262 +	unsigned int block:1;		/* when 1, task will blocked on user notifications */
   2.263 +	unsigned int system:1;		/* do system wide monitoring */
   2.264 +	unsigned int using_dbreg:1;	/* using range restrictions (debug registers) */
   2.265 +	unsigned int is_sampling:1;	/* true if using a custom format */
   2.266 +	unsigned int excl_idle:1;	/* exclude idle task in system wide session */
   2.267 +	unsigned int going_zombie:1;	/* context is zombie (MASKED+blocking) */
   2.268 +	unsigned int trap_reason:2;	/* reason for going into pfm_handle_work() */
   2.269 +	unsigned int no_msg:1;		/* no message sent on overflow */
   2.270 +	unsigned int can_restart:1;	/* allowed to issue a PFM_RESTART */
   2.271 +	unsigned int reserved:22;
   2.272 +} pfm_context_flags_t;
   2.273 +
   2.274 +#define PFM_TRAP_REASON_NONE		0x0	/* default value */
   2.275 +#define PFM_TRAP_REASON_BLOCK		0x1	/* we need to block on overflow */
   2.276 +#define PFM_TRAP_REASON_RESET		0x2	/* we need to reset PMDs */
   2.277 +
   2.278 +
   2.279 +/*
   2.280 + * perfmon context: encapsulates all the state of a monitoring session
   2.281 + */
   2.282 +
   2.283 +typedef struct pfm_context {
   2.284 +	spinlock_t		ctx_lock;		/* context protection */
   2.285 +
   2.286 +	pfm_context_flags_t	ctx_flags;		/* bitmask of flags  (block reason incl.) */
   2.287 +	unsigned int		ctx_state;		/* state: active/inactive (no bitfield) */
   2.288 +
   2.289 +	struct task_struct 	*ctx_task;		/* task to which context is attached */
   2.290 +
   2.291 +	unsigned long		ctx_ovfl_regs[4];	/* which registers overflowed (notification) */
   2.292 +
   2.293 +	struct completion	ctx_restart_done;  	/* use for blocking notification mode */
   2.294 +
   2.295 +	unsigned long		ctx_used_pmds[4];	/* bitmask of PMD used            */
   2.296 +	unsigned long		ctx_all_pmds[4];	/* bitmask of all accessible PMDs */
   2.297 +	unsigned long		ctx_reload_pmds[4];	/* bitmask of force reload PMD on ctxsw in */
   2.298 +
   2.299 +	unsigned long		ctx_all_pmcs[4];	/* bitmask of all accessible PMCs */
   2.300 +	unsigned long		ctx_reload_pmcs[4];	/* bitmask of force reload PMC on ctxsw in */
   2.301 +	unsigned long		ctx_used_monitors[4];	/* bitmask of monitor PMC being used */
   2.302 +
   2.303 +	unsigned long		ctx_pmcs[IA64_NUM_PMC_REGS];	/*  saved copies of PMC values */
   2.304 +
   2.305 +	unsigned int		ctx_used_ibrs[1];		/* bitmask of used IBR (speedup ctxsw in) */
   2.306 +	unsigned int		ctx_used_dbrs[1];		/* bitmask of used DBR (speedup ctxsw in) */
   2.307 +	unsigned long		ctx_dbrs[IA64_NUM_DBG_REGS];	/* DBR values (cache) when not loaded */
   2.308 +	unsigned long		ctx_ibrs[IA64_NUM_DBG_REGS];	/* IBR values (cache) when not loaded */
   2.309 +
   2.310 +	pfm_counter_t		ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
   2.311 +
   2.312 +	u64			ctx_saved_psr_up;	/* only contains psr.up value */
   2.313 +
   2.314 +	unsigned long		ctx_last_activation;	/* context last activation number for last_cpu */
   2.315 +	unsigned int		ctx_last_cpu;		/* CPU id of current or last CPU used (SMP only) */
   2.316 +	unsigned int		ctx_cpu;		/* cpu to which perfmon is applied (system wide) */
   2.317 +
   2.318 +	int			ctx_fd;			/* file descriptor used my this context */
   2.319 +	pfm_ovfl_arg_t		ctx_ovfl_arg;		/* argument to custom buffer format handler */
   2.320 +
   2.321 +	pfm_buffer_fmt_t	*ctx_buf_fmt;		/* buffer format callbacks */
   2.322 +	void			*ctx_smpl_hdr;		/* points to sampling buffer header kernel vaddr */
   2.323 +	unsigned long		ctx_smpl_size;		/* size of sampling buffer */
   2.324 +	void			*ctx_smpl_vaddr;	/* user level virtual address of smpl buffer */
   2.325 +
   2.326 +	wait_queue_head_t 	ctx_msgq_wait;
   2.327 +	pfm_msg_t		ctx_msgq[PFM_MAX_MSGS];
   2.328 +	int			ctx_msgq_head;
   2.329 +	int			ctx_msgq_tail;
   2.330 +	struct fasync_struct	*ctx_async_queue;
   2.331 +
   2.332 +	wait_queue_head_t 	ctx_zombieq;		/* termination cleanup wait queue */
   2.333 +} pfm_context_t;
   2.334 +
   2.335 +/*
   2.336 + * magic number used to verify that structure is really
   2.337 + * a perfmon context
   2.338 + */
   2.339 +#define PFM_IS_FILE(f)		((f)->f_op == &pfm_file_ops)
   2.340 +
   2.341 +#define PFM_GET_CTX(t)	 	((pfm_context_t *)(t)->thread.pfm_context)
   2.342 +
   2.343 +#ifdef CONFIG_SMP
   2.344 +#define SET_LAST_CPU(ctx, v)	(ctx)->ctx_last_cpu = (v)
   2.345 +#define GET_LAST_CPU(ctx)	(ctx)->ctx_last_cpu
   2.346 +#else
   2.347 +#define SET_LAST_CPU(ctx, v)	do {} while(0)
   2.348 +#define GET_LAST_CPU(ctx)	do {} while(0)
   2.349 +#endif
   2.350 +
   2.351 +
   2.352 +#define ctx_fl_block		ctx_flags.block
   2.353 +#define ctx_fl_system		ctx_flags.system
   2.354 +#define ctx_fl_using_dbreg	ctx_flags.using_dbreg
   2.355 +#define ctx_fl_is_sampling	ctx_flags.is_sampling
   2.356 +#define ctx_fl_excl_idle	ctx_flags.excl_idle
   2.357 +#define ctx_fl_going_zombie	ctx_flags.going_zombie
   2.358 +#define ctx_fl_trap_reason	ctx_flags.trap_reason
   2.359 +#define ctx_fl_no_msg		ctx_flags.no_msg
   2.360 +#define ctx_fl_can_restart	ctx_flags.can_restart
   2.361 +
   2.362 +#define PFM_SET_WORK_PENDING(t, v)	do { (t)->thread.pfm_needs_checking = v; } while(0);
   2.363 +#define PFM_GET_WORK_PENDING(t)		(t)->thread.pfm_needs_checking
   2.364 +
   2.365 +/*
   2.366 + * global information about all sessions
   2.367 + * mostly used to synchronize between system wide and per-process
   2.368 + */
   2.369 +typedef struct {
   2.370 +	spinlock_t		pfs_lock;		   /* lock the structure */
   2.371 +
   2.372 +	unsigned int		pfs_task_sessions;	   /* number of per task sessions */
   2.373 +	unsigned int		pfs_sys_sessions;	   /* number of per system wide sessions */
   2.374 +	unsigned int		pfs_sys_use_dbregs;	   /* incremented when a system wide session uses debug regs */
   2.375 +	unsigned int		pfs_ptrace_use_dbregs;	   /* incremented when a process uses debug regs */
   2.376 +	struct task_struct	*pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
   2.377 +} pfm_session_t;
   2.378 +
   2.379 +/*
   2.380 + * information about a PMC or PMD.
   2.381 + * dep_pmd[]: a bitmask of dependent PMD registers
   2.382 + * dep_pmc[]: a bitmask of dependent PMC registers
   2.383 + */
   2.384 +typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
   2.385 +typedef struct {
   2.386 +	unsigned int		type;
   2.387 +	int			pm_pos;
   2.388 +	unsigned long		default_value;	/* power-on default value */
   2.389 +	unsigned long		reserved_mask;	/* bitmask of reserved bits */
   2.390 +	pfm_reg_check_t		read_check;
   2.391 +	pfm_reg_check_t		write_check;
   2.392 +	unsigned long		dep_pmd[4];
   2.393 +	unsigned long		dep_pmc[4];
   2.394 +} pfm_reg_desc_t;
   2.395 +
   2.396 +/* assume cnum is a valid monitor */
   2.397 +#define PMC_PM(cnum, val)	(((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
   2.398 +
   2.399 +/*
   2.400 + * This structure is initialized at boot time and contains
   2.401 + * a description of the PMU main characteristics.
   2.402 + *
   2.403 + * If the probe function is defined, detection is based
   2.404 + * on its return value: 
   2.405 + * 	- 0 means recognized PMU
   2.406 + * 	- anything else means not supported
   2.407 + * When the probe function is not defined, then the pmu_family field
   2.408 + * is used and it must match the host CPU family such that:
   2.409 + * 	- cpu->family & config->pmu_family != 0
   2.410 + */
   2.411 +typedef struct {
   2.412 +	unsigned long  ovfl_val;	/* overflow value for counters */
   2.413 +
   2.414 +	pfm_reg_desc_t *pmc_desc;	/* detailed PMC register dependencies descriptions */
   2.415 +	pfm_reg_desc_t *pmd_desc;	/* detailed PMD register dependencies descriptions */
   2.416 +
   2.417 +	unsigned int   num_pmcs;	/* number of PMCS: computed at init time */
   2.418 +	unsigned int   num_pmds;	/* number of PMDS: computed at init time */
   2.419 +	unsigned long  impl_pmcs[4];	/* bitmask of implemented PMCS */
   2.420 +	unsigned long  impl_pmds[4];	/* bitmask of implemented PMDS */
   2.421 +
   2.422 +	char	      *pmu_name;	/* PMU family name */
   2.423 +	unsigned int  pmu_family;	/* cpuid family pattern used to identify pmu */
   2.424 +	unsigned int  flags;		/* pmu specific flags */
   2.425 +	unsigned int  num_ibrs;		/* number of IBRS: computed at init time */
   2.426 +	unsigned int  num_dbrs;		/* number of DBRS: computed at init time */
   2.427 +	unsigned int  num_counters;	/* PMC/PMD counting pairs : computed at init time */
   2.428 +	int           (*probe)(void);   /* customized probe routine */
   2.429 +	unsigned int  use_rr_dbregs:1;	/* set if debug registers used for range restriction */
   2.430 +} pmu_config_t;
   2.431 +/*
   2.432 + * PMU specific flags
   2.433 + */
   2.434 +#define PFM_PMU_IRQ_RESEND	1	/* PMU needs explicit IRQ resend */
   2.435 +
   2.436 +/*
   2.437 + * debug register related type definitions
   2.438 + */
   2.439 +typedef struct {
   2.440 +	unsigned long ibr_mask:56;
   2.441 +	unsigned long ibr_plm:4;
   2.442 +	unsigned long ibr_ig:3;
   2.443 +	unsigned long ibr_x:1;
   2.444 +} ibr_mask_reg_t;
   2.445 +
   2.446 +typedef struct {
   2.447 +	unsigned long dbr_mask:56;
   2.448 +	unsigned long dbr_plm:4;
   2.449 +	unsigned long dbr_ig:2;
   2.450 +	unsigned long dbr_w:1;
   2.451 +	unsigned long dbr_r:1;
   2.452 +} dbr_mask_reg_t;
   2.453 +
   2.454 +typedef union {
   2.455 +	unsigned long  val;
   2.456 +	ibr_mask_reg_t ibr;
   2.457 +	dbr_mask_reg_t dbr;
   2.458 +} dbreg_t;
   2.459 +
   2.460 +
   2.461 +/*
   2.462 + * perfmon command descriptions
   2.463 + */
   2.464 +typedef struct {
   2.465 +	int		(*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
   2.466 +	char		*cmd_name;
   2.467 +	int		cmd_flags;
   2.468 +	unsigned int	cmd_narg;
   2.469 +	size_t		cmd_argsize;
   2.470 +	int		(*cmd_getsize)(void *arg, size_t *sz);
   2.471 +} pfm_cmd_desc_t;
   2.472 +
   2.473 +#define PFM_CMD_FD		0x01	/* command requires a file descriptor */
   2.474 +#define PFM_CMD_ARG_READ	0x02	/* command must read argument(s) */
   2.475 +#define PFM_CMD_ARG_RW		0x04	/* command must read/write argument(s) */
   2.476 +#define PFM_CMD_STOP		0x08	/* command does not work on zombie context */
   2.477 +
   2.478 +
   2.479 +#define PFM_CMD_NAME(cmd)	pfm_cmd_tab[(cmd)].cmd_name
   2.480 +#define PFM_CMD_READ_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
   2.481 +#define PFM_CMD_RW_ARG(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
   2.482 +#define PFM_CMD_USE_FD(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
   2.483 +#define PFM_CMD_STOPPED(cmd)	(pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
   2.484 +
   2.485 +#define PFM_CMD_ARG_MANY	-1 /* cannot be zero */
   2.486 +
   2.487 +typedef struct {
   2.488 +	unsigned long pfm_spurious_ovfl_intr_count;	/* keep track of spurious ovfl interrupts */
   2.489 +	unsigned long pfm_replay_ovfl_intr_count;	/* keep track of replayed ovfl interrupts */
   2.490 +	unsigned long pfm_ovfl_intr_count; 		/* keep track of ovfl interrupts */
   2.491 +	unsigned long pfm_ovfl_intr_cycles;		/* cycles spent processing ovfl interrupts */
   2.492 +	unsigned long pfm_ovfl_intr_cycles_min;		/* min cycles spent processing ovfl interrupts */
   2.493 +	unsigned long pfm_ovfl_intr_cycles_max;		/* max cycles spent processing ovfl interrupts */
   2.494 +	unsigned long pfm_smpl_handler_calls;
   2.495 +	unsigned long pfm_smpl_handler_cycles;
   2.496 +	char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
   2.497 +} pfm_stats_t;
   2.498 +
   2.499 +/*
   2.500 + * perfmon internal variables
   2.501 + */
   2.502 +static pfm_stats_t		pfm_stats[NR_CPUS];
   2.503 +static pfm_session_t		pfm_sessions;	/* global sessions information */
   2.504 +
   2.505 +static DEFINE_SPINLOCK(pfm_alt_install_check);
   2.506 +static pfm_intr_handler_desc_t  *pfm_alt_intr_handler;
   2.507 +
   2.508 +static struct proc_dir_entry 	*perfmon_dir;
   2.509 +static pfm_uuid_t		pfm_null_uuid = {0,};
   2.510 +
   2.511 +static spinlock_t		pfm_buffer_fmt_lock;
   2.512 +static LIST_HEAD(pfm_buffer_fmt_list);
   2.513 +
   2.514 +static pmu_config_t		*pmu_conf;
   2.515 +
   2.516 +/* sysctl() controls */
   2.517 +pfm_sysctl_t pfm_sysctl;
   2.518 +EXPORT_SYMBOL(pfm_sysctl);
   2.519 +
   2.520 +static ctl_table pfm_ctl_table[]={
   2.521 +	{1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
   2.522 +	{2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
   2.523 +	{3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
   2.524 +	{4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
   2.525 +	{ 0, },
   2.526 +};
   2.527 +static ctl_table pfm_sysctl_dir[] = {
   2.528 +	{1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
   2.529 + 	{0,},
   2.530 +};
   2.531 +static ctl_table pfm_sysctl_root[] = {
   2.532 +	{1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
   2.533 + 	{0,},
   2.534 +};
   2.535 +static struct ctl_table_header *pfm_sysctl_header;
   2.536 +
   2.537 +static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
   2.538 +static int pfm_flush(struct file *filp);
   2.539 +
   2.540 +#define pfm_get_cpu_var(v)		__ia64_per_cpu_var(v)
   2.541 +#define pfm_get_cpu_data(a,b)		per_cpu(a, b)
   2.542 +
   2.543 +static inline void
   2.544 +pfm_put_task(struct task_struct *task)
   2.545 +{
   2.546 +	if (task != current) put_task_struct(task);
   2.547 +}
   2.548 +
   2.549 +static inline void
   2.550 +pfm_set_task_notify(struct task_struct *task)
   2.551 +{
   2.552 +	struct thread_info *info;
   2.553 +
   2.554 +	info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
   2.555 +	set_bit(TIF_NOTIFY_RESUME, &info->flags);
   2.556 +}
   2.557 +
   2.558 +static inline void
   2.559 +pfm_clear_task_notify(void)
   2.560 +{
   2.561 +	clear_thread_flag(TIF_NOTIFY_RESUME);
   2.562 +}
   2.563 +
   2.564 +static inline void
   2.565 +pfm_reserve_page(unsigned long a)
   2.566 +{
   2.567 +	SetPageReserved(vmalloc_to_page((void *)a));
   2.568 +}
   2.569 +static inline void
   2.570 +pfm_unreserve_page(unsigned long a)
   2.571 +{
   2.572 +	ClearPageReserved(vmalloc_to_page((void*)a));
   2.573 +}
   2.574 +
   2.575 +static inline unsigned long
   2.576 +pfm_protect_ctx_ctxsw(pfm_context_t *x)
   2.577 +{
   2.578 +	spin_lock(&(x)->ctx_lock);
   2.579 +	return 0UL;
   2.580 +}
   2.581 +
   2.582 +static inline void
   2.583 +pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
   2.584 +{
   2.585 +	spin_unlock(&(x)->ctx_lock);
   2.586 +}
   2.587 +
   2.588 +static inline unsigned int
   2.589 +pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
   2.590 +{
   2.591 +	return do_munmap(mm, addr, len);
   2.592 +}
   2.593 +
   2.594 +static inline unsigned long 
   2.595 +pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
   2.596 +{
   2.597 +	return get_unmapped_area(file, addr, len, pgoff, flags);
   2.598 +}
   2.599 +
   2.600 +
   2.601 +static struct super_block *
   2.602 +pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
   2.603 +{
   2.604 +	return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
   2.605 +}
   2.606 +
   2.607 +static struct file_system_type pfm_fs_type = {
   2.608 +	.name     = "pfmfs",
   2.609 +	.get_sb   = pfmfs_get_sb,
   2.610 +	.kill_sb  = kill_anon_super,
   2.611 +};
   2.612 +
   2.613 +DEFINE_PER_CPU(unsigned long, pfm_syst_info);
   2.614 +DEFINE_PER_CPU(struct task_struct *, pmu_owner);
   2.615 +DEFINE_PER_CPU(pfm_context_t  *, pmu_ctx);
   2.616 +DEFINE_PER_CPU(unsigned long, pmu_activation_number);
   2.617 +EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
   2.618 +
   2.619 +
   2.620 +/* forward declaration */
   2.621 +static struct file_operations pfm_file_ops;
   2.622 +
   2.623 +/*
   2.624 + * forward declarations
   2.625 + */
   2.626 +#ifndef CONFIG_SMP
   2.627 +static void pfm_lazy_save_regs (struct task_struct *ta);
   2.628 +#endif
   2.629 +
   2.630 +void dump_pmu_state(const char *);
   2.631 +static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
   2.632 +
   2.633 +#include "perfmon_itanium.h"
   2.634 +#include "perfmon_mckinley.h"
   2.635 +#include "perfmon_montecito.h"
   2.636 +#include "perfmon_generic.h"
   2.637 +
   2.638 +static pmu_config_t *pmu_confs[]={
   2.639 +	&pmu_conf_mont,
   2.640 +	&pmu_conf_mck,
   2.641 +	&pmu_conf_ita,
   2.642 +	&pmu_conf_gen, /* must be last */
   2.643 +	NULL
   2.644 +};
   2.645 +
   2.646 +
   2.647 +static int pfm_end_notify_user(pfm_context_t *ctx);
   2.648 +
   2.649 +static inline void
   2.650 +pfm_clear_psr_pp(void)
   2.651 +{
   2.652 +	ia64_rsm(IA64_PSR_PP);
   2.653 +	ia64_srlz_i();
   2.654 +}
   2.655 +
   2.656 +static inline void
   2.657 +pfm_set_psr_pp(void)
   2.658 +{
   2.659 +	ia64_ssm(IA64_PSR_PP);
   2.660 +	ia64_srlz_i();
   2.661 +}
   2.662 +
   2.663 +static inline void
   2.664 +pfm_clear_psr_up(void)
   2.665 +{
   2.666 +	ia64_rsm(IA64_PSR_UP);
   2.667 +	ia64_srlz_i();
   2.668 +}
   2.669 +
   2.670 +static inline void
   2.671 +pfm_set_psr_up(void)
   2.672 +{
   2.673 +	ia64_ssm(IA64_PSR_UP);
   2.674 +	ia64_srlz_i();
   2.675 +}
   2.676 +
   2.677 +static inline unsigned long
   2.678 +pfm_get_psr(void)
   2.679 +{
   2.680 +	unsigned long tmp;
   2.681 +	tmp = ia64_getreg(_IA64_REG_PSR);
   2.682 +	ia64_srlz_i();
   2.683 +	return tmp;
   2.684 +}
   2.685 +
   2.686 +static inline void
   2.687 +pfm_set_psr_l(unsigned long val)
   2.688 +{
   2.689 +	ia64_setreg(_IA64_REG_PSR_L, val);
   2.690 +	ia64_srlz_i();
   2.691 +}
   2.692 +
   2.693 +static inline void
   2.694 +pfm_freeze_pmu(void)
   2.695 +{
   2.696 +	ia64_set_pmc(0,1UL);
   2.697 +	ia64_srlz_d();
   2.698 +}
   2.699 +
   2.700 +static inline void
   2.701 +pfm_unfreeze_pmu(void)
   2.702 +{
   2.703 +	ia64_set_pmc(0,0UL);
   2.704 +	ia64_srlz_d();
   2.705 +}
   2.706 +
   2.707 +static inline void
   2.708 +pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
   2.709 +{
   2.710 +	int i;
   2.711 +
   2.712 +	for (i=0; i < nibrs; i++) {
   2.713 +		ia64_set_ibr(i, ibrs[i]);
   2.714 +		ia64_dv_serialize_instruction();
   2.715 +	}
   2.716 +	ia64_srlz_i();
   2.717 +}
   2.718 +
   2.719 +static inline void
   2.720 +pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
   2.721 +{
   2.722 +	int i;
   2.723 +
   2.724 +	for (i=0; i < ndbrs; i++) {
   2.725 +		ia64_set_dbr(i, dbrs[i]);
   2.726 +		ia64_dv_serialize_data();
   2.727 +	}
   2.728 +	ia64_srlz_d();
   2.729 +}
   2.730 +
   2.731 +/*
   2.732 + * PMD[i] must be a counter. no check is made
   2.733 + */
   2.734 +static inline unsigned long
   2.735 +pfm_read_soft_counter(pfm_context_t *ctx, int i)
   2.736 +{
   2.737 +	return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
   2.738 +}
   2.739 +
   2.740 +/*
   2.741 + * PMD[i] must be a counter. no check is made
   2.742 + */
   2.743 +static inline void
   2.744 +pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
   2.745 +{
   2.746 +	unsigned long ovfl_val = pmu_conf->ovfl_val;
   2.747 +
   2.748 +	ctx->ctx_pmds[i].val = val  & ~ovfl_val;
   2.749 +	/*
   2.750 +	 * writing to unimplemented part is ignore, so we do not need to
   2.751 +	 * mask off top part
   2.752 +	 */
   2.753 +	ia64_set_pmd(i, val & ovfl_val);
   2.754 +}
   2.755 +
   2.756 +static pfm_msg_t *
   2.757 +pfm_get_new_msg(pfm_context_t *ctx)
   2.758 +{
   2.759 +	int idx, next;
   2.760 +
   2.761 +	next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
   2.762 +
   2.763 +	DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
   2.764 +	if (next == ctx->ctx_msgq_head) return NULL;
   2.765 +
   2.766 + 	idx = 	ctx->ctx_msgq_tail;
   2.767 +	ctx->ctx_msgq_tail = next;
   2.768 +
   2.769 +	DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
   2.770 +
   2.771 +	return ctx->ctx_msgq+idx;
   2.772 +}
   2.773 +
   2.774 +static pfm_msg_t *
   2.775 +pfm_get_next_msg(pfm_context_t *ctx)
   2.776 +{
   2.777 +	pfm_msg_t *msg;
   2.778 +
   2.779 +	DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
   2.780 +
   2.781 +	if (PFM_CTXQ_EMPTY(ctx)) return NULL;
   2.782 +
   2.783 +	/*
   2.784 +	 * get oldest message
   2.785 +	 */
   2.786 +	msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
   2.787 +
   2.788 +	/*
   2.789 +	 * and move forward
   2.790 +	 */
   2.791 +	ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
   2.792 +
   2.793 +	DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
   2.794 +
   2.795 +	return msg;
   2.796 +}
   2.797 +
   2.798 +static void
   2.799 +pfm_reset_msgq(pfm_context_t *ctx)
   2.800 +{
   2.801 +	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
   2.802 +	DPRINT(("ctx=%p msgq reset\n", ctx));
   2.803 +}
   2.804 +
   2.805 +static void *
   2.806 +pfm_rvmalloc(unsigned long size)
   2.807 +{
   2.808 +	void *mem;
   2.809 +	unsigned long addr;
   2.810 +
   2.811 +	size = PAGE_ALIGN(size);
   2.812 +	mem  = vmalloc(size);
   2.813 +	if (mem) {
   2.814 +		//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
   2.815 +		memset(mem, 0, size);
   2.816 +		addr = (unsigned long)mem;
   2.817 +		while (size > 0) {
   2.818 +			pfm_reserve_page(addr);
   2.819 +			addr+=PAGE_SIZE;
   2.820 +			size-=PAGE_SIZE;
   2.821 +		}
   2.822 +	}
   2.823 +	return mem;
   2.824 +}
   2.825 +
   2.826 +static void
   2.827 +pfm_rvfree(void *mem, unsigned long size)
   2.828 +{
   2.829 +	unsigned long addr;
   2.830 +
   2.831 +	if (mem) {
   2.832 +		DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
   2.833 +		addr = (unsigned long) mem;
   2.834 +		while ((long) size > 0) {
   2.835 +			pfm_unreserve_page(addr);
   2.836 +			addr+=PAGE_SIZE;
   2.837 +			size-=PAGE_SIZE;
   2.838 +		}
   2.839 +		vfree(mem);
   2.840 +	}
   2.841 +	return;
   2.842 +}
   2.843 +
   2.844 +static pfm_context_t *
   2.845 +pfm_context_alloc(void)
   2.846 +{
   2.847 +	pfm_context_t *ctx;
   2.848 +
   2.849 +	/* 
   2.850 +	 * allocate context descriptor 
   2.851 +	 * must be able to free with interrupts disabled
   2.852 +	 */
   2.853 +	ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
   2.854 +	if (ctx) {
   2.855 +		memset(ctx, 0, sizeof(pfm_context_t));
   2.856 +		DPRINT(("alloc ctx @%p\n", ctx));
   2.857 +	}
   2.858 +	return ctx;
   2.859 +}
   2.860 +
   2.861 +static void
   2.862 +pfm_context_free(pfm_context_t *ctx)
   2.863 +{
   2.864 +	if (ctx) {
   2.865 +		DPRINT(("free ctx @%p\n", ctx));
   2.866 +		kfree(ctx);
   2.867 +	}
   2.868 +}
   2.869 +
   2.870 +static void
   2.871 +pfm_mask_monitoring(struct task_struct *task)
   2.872 +{
   2.873 +	pfm_context_t *ctx = PFM_GET_CTX(task);
   2.874 +	struct thread_struct *th = &task->thread;
   2.875 +	unsigned long mask, val, ovfl_mask;
   2.876 +	int i;
   2.877 +
   2.878 +	DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
   2.879 +
   2.880 +	ovfl_mask = pmu_conf->ovfl_val;
   2.881 +	/*
   2.882 +	 * monitoring can only be masked as a result of a valid
   2.883 +	 * counter overflow. In UP, it means that the PMU still
   2.884 +	 * has an owner. Note that the owner can be different
   2.885 +	 * from the current task. However the PMU state belongs
   2.886 +	 * to the owner.
   2.887 +	 * In SMP, a valid overflow only happens when task is
   2.888 +	 * current. Therefore if we come here, we know that
   2.889 +	 * the PMU state belongs to the current task, therefore
   2.890 +	 * we can access the live registers.
   2.891 +	 *
   2.892 +	 * So in both cases, the live register contains the owner's
   2.893 +	 * state. We can ONLY touch the PMU registers and NOT the PSR.
   2.894 +	 *
   2.895 +	 * As a consequence to this call, the thread->pmds[] array
   2.896 +	 * contains stale information which must be ignored
   2.897 +	 * when context is reloaded AND monitoring is active (see
   2.898 +	 * pfm_restart).
   2.899 +	 */
   2.900 +	mask = ctx->ctx_used_pmds[0];
   2.901 +	for (i = 0; mask; i++, mask>>=1) {
   2.902 +		/* skip non used pmds */
   2.903 +		if ((mask & 0x1) == 0) continue;
   2.904 +		val = ia64_get_pmd(i);
   2.905 +
   2.906 +		if (PMD_IS_COUNTING(i)) {
   2.907 +			/*
   2.908 +		 	 * we rebuild the full 64 bit value of the counter
   2.909 +		 	 */
   2.910 +			ctx->ctx_pmds[i].val += (val & ovfl_mask);
   2.911 +		} else {
   2.912 +			ctx->ctx_pmds[i].val = val;
   2.913 +		}
   2.914 +		DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
   2.915 +			i,
   2.916 +			ctx->ctx_pmds[i].val,
   2.917 +			val & ovfl_mask));
   2.918 +	}
   2.919 +	/*
   2.920 +	 * mask monitoring by setting the privilege level to 0
   2.921 +	 * we cannot use psr.pp/psr.up for this, it is controlled by
   2.922 +	 * the user
   2.923 +	 *
   2.924 +	 * if task is current, modify actual registers, otherwise modify
   2.925 +	 * thread save state, i.e., what will be restored in pfm_load_regs()
   2.926 +	 */
   2.927 +	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
   2.928 +	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
   2.929 +		if ((mask & 0x1) == 0UL) continue;
   2.930 +		ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
   2.931 +		th->pmcs[i] &= ~0xfUL;
   2.932 +		DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
   2.933 +	}
   2.934 +	/*
   2.935 +	 * make all of this visible
   2.936 +	 */
   2.937 +	ia64_srlz_d();
   2.938 +}
   2.939 +
   2.940 +/*
   2.941 + * must always be done with task == current
   2.942 + *
   2.943 + * context must be in MASKED state when calling
   2.944 + */
   2.945 +static void
   2.946 +pfm_restore_monitoring(struct task_struct *task)
   2.947 +{
   2.948 +	pfm_context_t *ctx = PFM_GET_CTX(task);
   2.949 +	struct thread_struct *th = &task->thread;
   2.950 +	unsigned long mask, ovfl_mask;
   2.951 +	unsigned long psr, val;
   2.952 +	int i, is_system;
   2.953 +
   2.954 +	is_system = ctx->ctx_fl_system;
   2.955 +	ovfl_mask = pmu_conf->ovfl_val;
   2.956 +
   2.957 +	if (task != current) {
   2.958 +		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
   2.959 +		return;
   2.960 +	}
   2.961 +	if (ctx->ctx_state != PFM_CTX_MASKED) {
   2.962 +		printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
   2.963 +			task->pid, current->pid, ctx->ctx_state);
   2.964 +		return;
   2.965 +	}
   2.966 +	psr = pfm_get_psr();
   2.967 +	/*
   2.968 +	 * monitoring is masked via the PMC.
   2.969 +	 * As we restore their value, we do not want each counter to
   2.970 +	 * restart right away. We stop monitoring using the PSR,
   2.971 +	 * restore the PMC (and PMD) and then re-establish the psr
   2.972 +	 * as it was. Note that there can be no pending overflow at
   2.973 +	 * this point, because monitoring was MASKED.
   2.974 +	 *
   2.975 +	 * system-wide session are pinned and self-monitoring
   2.976 +	 */
   2.977 +	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
   2.978 +		/* disable dcr pp */
   2.979 +		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
   2.980 +		pfm_clear_psr_pp();
   2.981 +	} else {
   2.982 +		pfm_clear_psr_up();
   2.983 +	}
   2.984 +	/*
   2.985 +	 * first, we restore the PMD
   2.986 +	 */
   2.987 +	mask = ctx->ctx_used_pmds[0];
   2.988 +	for (i = 0; mask; i++, mask>>=1) {
   2.989 +		/* skip non used pmds */
   2.990 +		if ((mask & 0x1) == 0) continue;
   2.991 +
   2.992 +		if (PMD_IS_COUNTING(i)) {
   2.993 +			/*
   2.994 +			 * we split the 64bit value according to
   2.995 +			 * counter width
   2.996 +			 */
   2.997 +			val = ctx->ctx_pmds[i].val & ovfl_mask;
   2.998 +			ctx->ctx_pmds[i].val &= ~ovfl_mask;
   2.999 +		} else {
  2.1000 +			val = ctx->ctx_pmds[i].val;
  2.1001 +		}
  2.1002 +		ia64_set_pmd(i, val);
  2.1003 +
  2.1004 +		DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
  2.1005 +			i,
  2.1006 +			ctx->ctx_pmds[i].val,
  2.1007 +			val));
  2.1008 +	}
  2.1009 +	/*
  2.1010 +	 * restore the PMCs
  2.1011 +	 */
  2.1012 +	mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
  2.1013 +	for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
  2.1014 +		if ((mask & 0x1) == 0UL) continue;
  2.1015 +		th->pmcs[i] = ctx->ctx_pmcs[i];
  2.1016 +		ia64_set_pmc(i, th->pmcs[i]);
  2.1017 +		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
  2.1018 +	}
  2.1019 +	ia64_srlz_d();
  2.1020 +
  2.1021 +	/*
  2.1022 +	 * must restore DBR/IBR because could be modified while masked
  2.1023 +	 * XXX: need to optimize 
  2.1024 +	 */
  2.1025 +	if (ctx->ctx_fl_using_dbreg) {
  2.1026 +		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
  2.1027 +		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
  2.1028 +	}
  2.1029 +
  2.1030 +	/*
  2.1031 +	 * now restore PSR
  2.1032 +	 */
  2.1033 +	if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
  2.1034 +		/* enable dcr pp */
  2.1035 +		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
  2.1036 +		ia64_srlz_i();
  2.1037 +	}
  2.1038 +	pfm_set_psr_l(psr);
  2.1039 +}
  2.1040 +
  2.1041 +static inline void
  2.1042 +pfm_save_pmds(unsigned long *pmds, unsigned long mask)
  2.1043 +{
  2.1044 +	int i;
  2.1045 +
  2.1046 +	ia64_srlz_d();
  2.1047 +
  2.1048 +	for (i=0; mask; i++, mask>>=1) {
  2.1049 +		if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
  2.1050 +	}
  2.1051 +}
  2.1052 +
  2.1053 +/*
  2.1054 + * reload from thread state (used for ctxw only)
  2.1055 + */
  2.1056 +static inline void
  2.1057 +pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
  2.1058 +{
  2.1059 +	int i;
  2.1060 +	unsigned long val, ovfl_val = pmu_conf->ovfl_val;
  2.1061 +
  2.1062 +	for (i=0; mask; i++, mask>>=1) {
  2.1063 +		if ((mask & 0x1) == 0) continue;
  2.1064 +		val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
  2.1065 +		ia64_set_pmd(i, val);
  2.1066 +	}
  2.1067 +	ia64_srlz_d();
  2.1068 +}
  2.1069 +
  2.1070 +/*
  2.1071 + * propagate PMD from context to thread-state
  2.1072 + */
  2.1073 +static inline void
  2.1074 +pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
  2.1075 +{
  2.1076 +	struct thread_struct *thread = &task->thread;
  2.1077 +	unsigned long ovfl_val = pmu_conf->ovfl_val;
  2.1078 +	unsigned long mask = ctx->ctx_all_pmds[0];
  2.1079 +	unsigned long val;
  2.1080 +	int i;
  2.1081 +
  2.1082 +	DPRINT(("mask=0x%lx\n", mask));
  2.1083 +
  2.1084 +	for (i=0; mask; i++, mask>>=1) {
  2.1085 +
  2.1086 +		val = ctx->ctx_pmds[i].val;
  2.1087 +
  2.1088 +		/*
  2.1089 +		 * We break up the 64 bit value into 2 pieces
  2.1090 +		 * the lower bits go to the machine state in the
  2.1091 +		 * thread (will be reloaded on ctxsw in).
  2.1092 +		 * The upper part stays in the soft-counter.
  2.1093 +		 */
  2.1094 +		if (PMD_IS_COUNTING(i)) {
  2.1095 +			ctx->ctx_pmds[i].val = val & ~ovfl_val;
  2.1096 +			 val &= ovfl_val;
  2.1097 +		}
  2.1098 +		thread->pmds[i] = val;
  2.1099 +
  2.1100 +		DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
  2.1101 +			i,
  2.1102 +			thread->pmds[i],
  2.1103 +			ctx->ctx_pmds[i].val));
  2.1104 +	}
  2.1105 +}
  2.1106 +
  2.1107 +/*
  2.1108 + * propagate PMC from context to thread-state
  2.1109 + */
  2.1110 +static inline void
  2.1111 +pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
  2.1112 +{
  2.1113 +	struct thread_struct *thread = &task->thread;
  2.1114 +	unsigned long mask = ctx->ctx_all_pmcs[0];
  2.1115 +	int i;
  2.1116 +
  2.1117 +	DPRINT(("mask=0x%lx\n", mask));
  2.1118 +
  2.1119 +	for (i=0; mask; i++, mask>>=1) {
  2.1120 +		/* masking 0 with ovfl_val yields 0 */
  2.1121 +		thread->pmcs[i] = ctx->ctx_pmcs[i];
  2.1122 +		DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
  2.1123 +	}
  2.1124 +}
  2.1125 +
  2.1126 +
  2.1127 +
  2.1128 +static inline void
  2.1129 +pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
  2.1130 +{
  2.1131 +	int i;
  2.1132 +
  2.1133 +	for (i=0; mask; i++, mask>>=1) {
  2.1134 +		if ((mask & 0x1) == 0) continue;
  2.1135 +		ia64_set_pmc(i, pmcs[i]);
  2.1136 +	}
  2.1137 +	ia64_srlz_d();
  2.1138 +}
  2.1139 +
  2.1140 +static inline int
  2.1141 +pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
  2.1142 +{
  2.1143 +	return memcmp(a, b, sizeof(pfm_uuid_t));
  2.1144 +}
  2.1145 +
  2.1146 +static inline int
  2.1147 +pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
  2.1148 +{
  2.1149 +	int ret = 0;
  2.1150 +	if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
  2.1151 +	return ret;
  2.1152 +}
  2.1153 +
  2.1154 +static inline int
  2.1155 +pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
  2.1156 +{
  2.1157 +	int ret = 0;
  2.1158 +	if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
  2.1159 +	return ret;
  2.1160 +}
  2.1161 +
  2.1162 +
  2.1163 +static inline int
  2.1164 +pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
  2.1165 +		     int cpu, void *arg)
  2.1166 +{
  2.1167 +	int ret = 0;
  2.1168 +	if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
  2.1169 +	return ret;
  2.1170 +}
  2.1171 +
  2.1172 +static inline int
  2.1173 +pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
  2.1174 +		     int cpu, void *arg)
  2.1175 +{
  2.1176 +	int ret = 0;
  2.1177 +	if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
  2.1178 +	return ret;
  2.1179 +}
  2.1180 +
  2.1181 +static inline int
  2.1182 +pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
  2.1183 +{
  2.1184 +	int ret = 0;
  2.1185 +	if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
  2.1186 +	return ret;
  2.1187 +}
  2.1188 +
  2.1189 +static inline int
  2.1190 +pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
  2.1191 +{
  2.1192 +	int ret = 0;
  2.1193 +	if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
  2.1194 +	return ret;
  2.1195 +}
  2.1196 +
  2.1197 +static pfm_buffer_fmt_t *
  2.1198 +__pfm_find_buffer_fmt(pfm_uuid_t uuid)
  2.1199 +{
  2.1200 +	struct list_head * pos;
  2.1201 +	pfm_buffer_fmt_t * entry;
  2.1202 +
  2.1203 +	list_for_each(pos, &pfm_buffer_fmt_list) {
  2.1204 +		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
  2.1205 +		if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
  2.1206 +			return entry;
  2.1207 +	}
  2.1208 +	return NULL;
  2.1209 +}
  2.1210 + 
  2.1211 +/*
  2.1212 + * find a buffer format based on its uuid
  2.1213 + */
  2.1214 +static pfm_buffer_fmt_t *
  2.1215 +pfm_find_buffer_fmt(pfm_uuid_t uuid)
  2.1216 +{
  2.1217 +	pfm_buffer_fmt_t * fmt;
  2.1218 +	spin_lock(&pfm_buffer_fmt_lock);
  2.1219 +	fmt = __pfm_find_buffer_fmt(uuid);
  2.1220 +	spin_unlock(&pfm_buffer_fmt_lock);
  2.1221 +	return fmt;
  2.1222 +}
  2.1223 + 
  2.1224 +int
  2.1225 +pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
  2.1226 +{
  2.1227 +	int ret = 0;
  2.1228 +
  2.1229 +	/* some sanity checks */
  2.1230 +	if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;
  2.1231 +
  2.1232 +	/* we need at least a handler */
  2.1233 +	if (fmt->fmt_handler == NULL) return -EINVAL;
  2.1234 +
  2.1235 +	/*
  2.1236 +	 * XXX: need check validity of fmt_arg_size
  2.1237 +	 */
  2.1238 +
  2.1239 +	spin_lock(&pfm_buffer_fmt_lock);
  2.1240 +
  2.1241 +	if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
  2.1242 +		printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
  2.1243 +		ret = -EBUSY;
  2.1244 +		goto out;
  2.1245 +	} 
  2.1246 +	list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
  2.1247 +	printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
  2.1248 +
  2.1249 +out:
  2.1250 +	spin_unlock(&pfm_buffer_fmt_lock);
  2.1251 + 	return ret;
  2.1252 +}
  2.1253 +EXPORT_SYMBOL(pfm_register_buffer_fmt);
  2.1254 +
  2.1255 +int
  2.1256 +pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
  2.1257 +{
  2.1258 +	pfm_buffer_fmt_t *fmt;
  2.1259 +	int ret = 0;
  2.1260 +
  2.1261 +	spin_lock(&pfm_buffer_fmt_lock);
  2.1262 +
  2.1263 +	fmt = __pfm_find_buffer_fmt(uuid);
  2.1264 +	if (!fmt) {
  2.1265 +		printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
  2.1266 +		ret = -EINVAL;
  2.1267 +		goto out;
  2.1268 +	}
  2.1269 +	list_del_init(&fmt->fmt_list);
  2.1270 +	printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);
  2.1271 +
  2.1272 +out:
  2.1273 +	spin_unlock(&pfm_buffer_fmt_lock);
  2.1274 +	return ret;
  2.1275 +
  2.1276 +}
  2.1277 +EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
  2.1278 +
  2.1279 +extern void update_pal_halt_status(int);
  2.1280 +
  2.1281 +static int
  2.1282 +pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
  2.1283 +{
  2.1284 +	unsigned long flags;
  2.1285 +	/*
  2.1286 +	 * validy checks on cpu_mask have been done upstream
  2.1287 +	 */
  2.1288 +	LOCK_PFS(flags);
  2.1289 +
  2.1290 +	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
  2.1291 +		pfm_sessions.pfs_sys_sessions,
  2.1292 +		pfm_sessions.pfs_task_sessions,
  2.1293 +		pfm_sessions.pfs_sys_use_dbregs,
  2.1294 +		is_syswide,
  2.1295 +		cpu));
  2.1296 +
  2.1297 +	if (is_syswide) {
  2.1298 +		/*
  2.1299 +		 * cannot mix system wide and per-task sessions
  2.1300 +		 */
  2.1301 +		if (pfm_sessions.pfs_task_sessions > 0UL) {
  2.1302 +			DPRINT(("system wide not possible, %u conflicting task_sessions\n",
  2.1303 +			  	pfm_sessions.pfs_task_sessions));
  2.1304 +			goto abort;
  2.1305 +		}
  2.1306 +
  2.1307 +		if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;
  2.1308 +
  2.1309 +		DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));
  2.1310 +
  2.1311 +		pfm_sessions.pfs_sys_session[cpu] = task;
  2.1312 +
  2.1313 +		pfm_sessions.pfs_sys_sessions++ ;
  2.1314 +
  2.1315 +	} else {
  2.1316 +		if (pfm_sessions.pfs_sys_sessions) goto abort;
  2.1317 +		pfm_sessions.pfs_task_sessions++;
  2.1318 +	}
  2.1319 +
  2.1320 +	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
  2.1321 +		pfm_sessions.pfs_sys_sessions,
  2.1322 +		pfm_sessions.pfs_task_sessions,
  2.1323 +		pfm_sessions.pfs_sys_use_dbregs,
  2.1324 +		is_syswide,
  2.1325 +		cpu));
  2.1326 +
  2.1327 +	/*
  2.1328 +	 * disable default_idle() to go to PAL_HALT
  2.1329 +	 */
  2.1330 +	update_pal_halt_status(0);
  2.1331 +
  2.1332 +	UNLOCK_PFS(flags);
  2.1333 +
  2.1334 +	return 0;
  2.1335 +
  2.1336 +error_conflict:
  2.1337 +	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
  2.1338 +  		pfm_sessions.pfs_sys_session[cpu]->pid,
  2.1339 +		cpu));
  2.1340 +abort:
  2.1341 +	UNLOCK_PFS(flags);
  2.1342 +
  2.1343 +	return -EBUSY;
  2.1344 +
  2.1345 +}
  2.1346 +
  2.1347 +static int
  2.1348 +pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
  2.1349 +{
  2.1350 +	unsigned long flags;
  2.1351 +	/*
  2.1352 +	 * validy checks on cpu_mask have been done upstream
  2.1353 +	 */
  2.1354 +	LOCK_PFS(flags);
  2.1355 +
  2.1356 +	DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
  2.1357 +		pfm_sessions.pfs_sys_sessions,
  2.1358 +		pfm_sessions.pfs_task_sessions,
  2.1359 +		pfm_sessions.pfs_sys_use_dbregs,
  2.1360 +		is_syswide,
  2.1361 +		cpu));
  2.1362 +
  2.1363 +
  2.1364 +	if (is_syswide) {
  2.1365 +		pfm_sessions.pfs_sys_session[cpu] = NULL;
  2.1366 +		/*
  2.1367 +		 * would not work with perfmon+more than one bit in cpu_mask
  2.1368 +		 */
  2.1369 +		if (ctx && ctx->ctx_fl_using_dbreg) {
  2.1370 +			if (pfm_sessions.pfs_sys_use_dbregs == 0) {
  2.1371 +				printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
  2.1372 +			} else {
  2.1373 +				pfm_sessions.pfs_sys_use_dbregs--;
  2.1374 +			}
  2.1375 +		}
  2.1376 +		pfm_sessions.pfs_sys_sessions--;
  2.1377 +	} else {
  2.1378 +		pfm_sessions.pfs_task_sessions--;
  2.1379 +	}
  2.1380 +	DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
  2.1381 +		pfm_sessions.pfs_sys_sessions,
  2.1382 +		pfm_sessions.pfs_task_sessions,
  2.1383 +		pfm_sessions.pfs_sys_use_dbregs,
  2.1384 +		is_syswide,
  2.1385 +		cpu));
  2.1386 +
  2.1387 +	/*
  2.1388 +	 * if possible, enable default_idle() to go into PAL_HALT
  2.1389 +	 */
  2.1390 +	if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0)
  2.1391 +		update_pal_halt_status(1);
  2.1392 +
  2.1393 +	UNLOCK_PFS(flags);
  2.1394 +
  2.1395 +	return 0;
  2.1396 +}
  2.1397 +
  2.1398 +/*
  2.1399 + * removes virtual mapping of the sampling buffer.
  2.1400 + * IMPORTANT: cannot be called with interrupts disable, e.g. inside
  2.1401 + * a PROTECT_CTX() section.
  2.1402 + */
  2.1403 +static int
  2.1404 +pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
  2.1405 +{
  2.1406 +	int r;
  2.1407 +
  2.1408 +	/* sanity checks */
  2.1409 +	if (task->mm == NULL || size == 0UL || vaddr == NULL) {
  2.1410 +		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
  2.1411 +		return -EINVAL;
  2.1412 +	}
  2.1413 +
  2.1414 +	DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));
  2.1415 +
  2.1416 +	/*
  2.1417 +	 * does the actual unmapping
  2.1418 +	 */
  2.1419 +	down_write(&task->mm->mmap_sem);
  2.1420 +
  2.1421 +	DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
  2.1422 +
  2.1423 +	r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
  2.1424 +
  2.1425 +	up_write(&task->mm->mmap_sem);
  2.1426 +	if (r !=0) {
  2.1427 +		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
  2.1428 +	}
  2.1429 +
  2.1430 +	DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
  2.1431 +
  2.1432 +	return 0;
  2.1433 +}
  2.1434 +
  2.1435 +/*
  2.1436 + * free actual physical storage used by sampling buffer
  2.1437 + */
  2.1438 +#if 0
  2.1439 +static int
  2.1440 +pfm_free_smpl_buffer(pfm_context_t *ctx)
  2.1441 +{
  2.1442 +	pfm_buffer_fmt_t *fmt;
  2.1443 +
  2.1444 +	if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;
  2.1445 +
  2.1446 +	/*
  2.1447 +	 * we won't use the buffer format anymore
  2.1448 +	 */
  2.1449 +	fmt = ctx->ctx_buf_fmt;
  2.1450 +
  2.1451 +	DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
  2.1452 +		ctx->ctx_smpl_hdr,
  2.1453 +		ctx->ctx_smpl_size,
  2.1454 +		ctx->ctx_smpl_vaddr));
  2.1455 +
  2.1456 +	pfm_buf_fmt_exit(fmt, current, NULL, NULL);
  2.1457 +
  2.1458 +	/*
  2.1459 +	 * free the buffer
  2.1460 +	 */
  2.1461 +	pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);
  2.1462 +
  2.1463 +	ctx->ctx_smpl_hdr  = NULL;
  2.1464 +	ctx->ctx_smpl_size = 0UL;
  2.1465 +
  2.1466 +	return 0;
  2.1467 +
  2.1468 +invalid_free:
  2.1469 +	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
  2.1470 +	return -EINVAL;
  2.1471 +}
  2.1472 +#endif
  2.1473 +
  2.1474 +static inline void
  2.1475 +pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
  2.1476 +{
  2.1477 +	if (fmt == NULL) return;
  2.1478 +
  2.1479 +	pfm_buf_fmt_exit(fmt, current, NULL, NULL);
  2.1480 +
  2.1481 +}
  2.1482 +
  2.1483 +/*
  2.1484 + * pfmfs should _never_ be mounted by userland - too much of security hassle,
  2.1485 + * no real gain from having the whole whorehouse mounted. So we don't need
  2.1486 + * any operations on the root directory. However, we need a non-trivial
  2.1487 + * d_name - pfm: will go nicely and kill the special-casing in procfs.
  2.1488 + */
  2.1489 +static struct vfsmount *pfmfs_mnt;
  2.1490 +
  2.1491 +static int __init
  2.1492 +init_pfm_fs(void)
  2.1493 +{
  2.1494 +	int err = register_filesystem(&pfm_fs_type);
  2.1495 +	if (!err) {
  2.1496 +		pfmfs_mnt = kern_mount(&pfm_fs_type);
  2.1497 +		err = PTR_ERR(pfmfs_mnt);
  2.1498 +		if (IS_ERR(pfmfs_mnt))
  2.1499 +			unregister_filesystem(&pfm_fs_type);
  2.1500 +		else
  2.1501 +			err = 0;
  2.1502 +	}
  2.1503 +	return err;
  2.1504 +}
  2.1505 +
  2.1506 +static void __exit
  2.1507 +exit_pfm_fs(void)
  2.1508 +{
  2.1509 +	unregister_filesystem(&pfm_fs_type);
  2.1510 +	mntput(pfmfs_mnt);
  2.1511 +}
  2.1512 +
  2.1513 +static ssize_t
  2.1514 +pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
  2.1515 +{
  2.1516 +	pfm_context_t *ctx;
  2.1517 +	pfm_msg_t *msg;
  2.1518 +	ssize_t ret;
  2.1519 +	unsigned long flags;
  2.1520 +  	DECLARE_WAITQUEUE(wait, current);
  2.1521 +	if (PFM_IS_FILE(filp) == 0) {
  2.1522 +		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
  2.1523 +		return -EINVAL;
  2.1524 +	}
  2.1525 +
  2.1526 +	ctx = (pfm_context_t *)filp->private_data;
  2.1527 +	if (ctx == NULL) {
  2.1528 +		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
  2.1529 +		return -EINVAL;
  2.1530 +	}
  2.1531 +
  2.1532 +	/*
  2.1533 +	 * check even when there is no message
  2.1534 +	 */
  2.1535 +	if (size < sizeof(pfm_msg_t)) {
  2.1536 +		DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
  2.1537 +		return -EINVAL;
  2.1538 +	}
  2.1539 +
  2.1540 +	PROTECT_CTX(ctx, flags);
  2.1541 +
  2.1542 +  	/*
  2.1543 +	 * put ourselves on the wait queue
  2.1544 +	 */
  2.1545 +  	add_wait_queue(&ctx->ctx_msgq_wait, &wait);
  2.1546 +
  2.1547 +
  2.1548 +  	for(;;) {
  2.1549 +		/*
  2.1550 +		 * check wait queue
  2.1551 +		 */
  2.1552 +
  2.1553 +  		set_current_state(TASK_INTERRUPTIBLE);
  2.1554 +
  2.1555 +		DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
  2.1556 +
  2.1557 +		ret = 0;
  2.1558 +		if(PFM_CTXQ_EMPTY(ctx) == 0) break;
  2.1559 +
  2.1560 +		UNPROTECT_CTX(ctx, flags);
  2.1561 +
  2.1562 +		/*
  2.1563 +		 * check non-blocking read
  2.1564 +		 */
  2.1565 +      		ret = -EAGAIN;
  2.1566 +		if(filp->f_flags & O_NONBLOCK) break;
  2.1567 +
  2.1568 +		/*
  2.1569 +		 * check pending signals
  2.1570 +		 */
  2.1571 +		if(signal_pending(current)) {
  2.1572 +			ret = -EINTR;
  2.1573 +			break;
  2.1574 +		}
  2.1575 +      		/*
  2.1576 +		 * no message, so wait
  2.1577 +		 */
  2.1578 +      		schedule();
  2.1579 +
  2.1580 +		PROTECT_CTX(ctx, flags);
  2.1581 +	}
  2.1582 +	DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
  2.1583 +  	set_current_state(TASK_RUNNING);
  2.1584 +	remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
  2.1585 +
  2.1586 +	if (ret < 0) goto abort;
  2.1587 +
  2.1588 +	ret = -EINVAL;
  2.1589 +	msg = pfm_get_next_msg(ctx);
  2.1590 +	if (msg == NULL) {
  2.1591 +		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
  2.1592 +		goto abort_locked;
  2.1593 +	}
  2.1594 +
  2.1595 +	DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
  2.1596 +
  2.1597 +	ret = -EFAULT;
  2.1598 +  	if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
  2.1599 +
  2.1600 +abort_locked:
  2.1601 +	UNPROTECT_CTX(ctx, flags);
  2.1602 +abort:
  2.1603 +	return ret;
  2.1604 +}
  2.1605 +
  2.1606 +static ssize_t
  2.1607 +pfm_write(struct file *file, const char __user *ubuf,
  2.1608 +			  size_t size, loff_t *ppos)
  2.1609 +{
  2.1610 +	DPRINT(("pfm_write called\n"));
  2.1611 +	return -EINVAL;
  2.1612 +}
  2.1613 +
  2.1614 +static unsigned int
  2.1615 +pfm_poll(struct file *filp, poll_table * wait)
  2.1616 +{
  2.1617 +	pfm_context_t *ctx;
  2.1618 +	unsigned long flags;
  2.1619 +	unsigned int mask = 0;
  2.1620 +
  2.1621 +	if (PFM_IS_FILE(filp) == 0) {
  2.1622 +		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
  2.1623 +		return 0;
  2.1624 +	}
  2.1625 +
  2.1626 +	ctx = (pfm_context_t *)filp->private_data;
  2.1627 +	if (ctx == NULL) {
  2.1628 +		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
  2.1629 +		return 0;
  2.1630 +	}
  2.1631 +
  2.1632 +
  2.1633 +	DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));
  2.1634 +
  2.1635 +	poll_wait(filp, &ctx->ctx_msgq_wait, wait);
  2.1636 +
  2.1637 +	PROTECT_CTX(ctx, flags);
  2.1638 +
  2.1639 +	if (PFM_CTXQ_EMPTY(ctx) == 0)
  2.1640 +		mask =  POLLIN | POLLRDNORM;
  2.1641 +
  2.1642 +	UNPROTECT_CTX(ctx, flags);
  2.1643 +
  2.1644 +	DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));
  2.1645 +
  2.1646 +	return mask;
  2.1647 +}
  2.1648 +
  2.1649 +static int
  2.1650 +pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
  2.1651 +{
  2.1652 +	DPRINT(("pfm_ioctl called\n"));
  2.1653 +	return -EINVAL;
  2.1654 +}
  2.1655 +
  2.1656 +/*
  2.1657 + * interrupt cannot be masked when coming here
  2.1658 + */
  2.1659 +static inline int
  2.1660 +pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
  2.1661 +{
  2.1662 +	int ret;
  2.1663 +
  2.1664 +	ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
  2.1665 +
  2.1666 +	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
  2.1667 +		current->pid,
  2.1668 +		fd,
  2.1669 +		on,
  2.1670 +		ctx->ctx_async_queue, ret));
  2.1671 +
  2.1672 +	return ret;
  2.1673 +}
  2.1674 +
  2.1675 +static int
  2.1676 +pfm_fasync(int fd, struct file *filp, int on)
  2.1677 +{
  2.1678 +	pfm_context_t *ctx;
  2.1679 +	int ret;
  2.1680 +
  2.1681 +	if (PFM_IS_FILE(filp) == 0) {
  2.1682 +		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
  2.1683 +		return -EBADF;
  2.1684 +	}
  2.1685 +
  2.1686 +	ctx = (pfm_context_t *)filp->private_data;
  2.1687 +	if (ctx == NULL) {
  2.1688 +		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
  2.1689 +		return -EBADF;
  2.1690 +	}
  2.1691 +	/*
  2.1692 +	 * we cannot mask interrupts during this call because this may
  2.1693 +	 * may go to sleep if memory is not readily avalaible.
  2.1694 +	 *
  2.1695 +	 * We are protected from the conetxt disappearing by the get_fd()/put_fd()
  2.1696 +	 * done in caller. Serialization of this function is ensured by caller.
  2.1697 +	 */
  2.1698 +	ret = pfm_do_fasync(fd, filp, ctx, on);
  2.1699 +
  2.1700 +
  2.1701 +	DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
  2.1702 +		fd,
  2.1703 +		on,
  2.1704 +		ctx->ctx_async_queue, ret));
  2.1705 +
  2.1706 +	return ret;
  2.1707 +}
  2.1708 +
  2.1709 +#ifdef CONFIG_SMP
  2.1710 +/*
  2.1711 + * this function is exclusively called from pfm_close().
  2.1712 + * The context is not protected at that time, nor are interrupts
  2.1713 + * on the remote CPU. That's necessary to avoid deadlocks.
  2.1714 + */
  2.1715 +static void
  2.1716 +pfm_syswide_force_stop(void *info)
  2.1717 +{
  2.1718 +	pfm_context_t   *ctx = (pfm_context_t *)info;
  2.1719 +	struct pt_regs *regs = task_pt_regs(current);
  2.1720 +	struct task_struct *owner;
  2.1721 +	unsigned long flags;
  2.1722 +	int ret;
  2.1723 +
  2.1724 +	if (ctx->ctx_cpu != smp_processor_id()) {
  2.1725 +		printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d  but on CPU%d\n",
  2.1726 +			ctx->ctx_cpu,
  2.1727 +			smp_processor_id());
  2.1728 +		return;
  2.1729 +	}
  2.1730 +	owner = GET_PMU_OWNER();
  2.1731 +	if (owner != ctx->ctx_task) {
  2.1732 +		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
  2.1733 +			smp_processor_id(),
  2.1734 +			owner->pid, ctx->ctx_task->pid);
  2.1735 +		return;
  2.1736 +	}
  2.1737 +	if (GET_PMU_CTX() != ctx) {
  2.1738 +		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
  2.1739 +			smp_processor_id(),
  2.1740 +			GET_PMU_CTX(), ctx);
  2.1741 +		return;
  2.1742 +	}
  2.1743 +
  2.1744 +	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));	
  2.1745 +	/*
  2.1746 +	 * the context is already protected in pfm_close(), we simply
  2.1747 +	 * need to mask interrupts to avoid a PMU interrupt race on
  2.1748 +	 * this CPU
  2.1749 +	 */
  2.1750 +	local_irq_save(flags);
  2.1751 +
  2.1752 +	ret = pfm_context_unload(ctx, NULL, 0, regs);
  2.1753 +	if (ret) {
  2.1754 +		DPRINT(("context_unload returned %d\n", ret));
  2.1755 +	}
  2.1756 +
  2.1757 +	/*
  2.1758 +	 * unmask interrupts, PMU interrupts are now spurious here
  2.1759 +	 */
  2.1760 +	local_irq_restore(flags);
  2.1761 +}
  2.1762 +
  2.1763 +static void
  2.1764 +pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
  2.1765 +{
  2.1766 +	int ret;
  2.1767 +
  2.1768 +	DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
  2.1769 +	ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
  2.1770 +	DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
  2.1771 +}
  2.1772 +#endif /* CONFIG_SMP */
  2.1773 +
  2.1774 +/*
  2.1775 + * called for each close(). Partially free resources.
  2.1776 + * When caller is self-monitoring, the context is unloaded.
  2.1777 + */
  2.1778 +static int
  2.1779 +pfm_flush(struct file *filp)
  2.1780 +{
  2.1781 +	pfm_context_t *ctx;
  2.1782 +	struct task_struct *task;
  2.1783 +	struct pt_regs *regs;
  2.1784 +	unsigned long flags;
  2.1785 +	unsigned long smpl_buf_size = 0UL;
  2.1786 +	void *smpl_buf_vaddr = NULL;
  2.1787 +	int state, is_system;
  2.1788 +
  2.1789 +	if (PFM_IS_FILE(filp) == 0) {
  2.1790 +		DPRINT(("bad magic for\n"));
  2.1791 +		return -EBADF;
  2.1792 +	}
  2.1793 +
  2.1794 +	ctx = (pfm_context_t *)filp->private_data;
  2.1795 +	if (ctx == NULL) {
  2.1796 +		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
  2.1797 +		return -EBADF;
  2.1798 +	}
  2.1799 +
  2.1800 +	/*
  2.1801 +	 * remove our file from the async queue, if we use this mode.
  2.1802 +	 * This can be done without the context being protected. We come
  2.1803 +	 * here when the context has become unreacheable by other tasks.
  2.1804 +	 *
  2.1805 +	 * We may still have active monitoring at this point and we may
  2.1806 +	 * end up in pfm_overflow_handler(). However, fasync_helper()
  2.1807 +	 * operates with interrupts disabled and it cleans up the
  2.1808 +	 * queue. If the PMU handler is called prior to entering
  2.1809 +	 * fasync_helper() then it will send a signal. If it is
  2.1810 +	 * invoked after, it will find an empty queue and no
  2.1811 +	 * signal will be sent. In both case, we are safe
  2.1812 +	 */
  2.1813 +	if (filp->f_flags & FASYNC) {
  2.1814 +		DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
  2.1815 +		pfm_do_fasync (-1, filp, ctx, 0);
  2.1816 +	}
  2.1817 +
  2.1818 +	PROTECT_CTX(ctx, flags);
  2.1819 +
  2.1820 +	state     = ctx->ctx_state;
  2.1821 +	is_system = ctx->ctx_fl_system;
  2.1822 +
  2.1823 +	task = PFM_CTX_TASK(ctx);
  2.1824 +	regs = task_pt_regs(task);
  2.1825 +
  2.1826 +	DPRINT(("ctx_state=%d is_current=%d\n",
  2.1827 +		state,
  2.1828 +		task == current ? 1 : 0));
  2.1829 +
  2.1830 +	/*
  2.1831 +	 * if state == UNLOADED, then task is NULL
  2.1832 +	 */
  2.1833 +
  2.1834 +	/*
  2.1835 +	 * we must stop and unload because we are losing access to the context.
  2.1836 +	 */
  2.1837 +	if (task == current) {
  2.1838 +#ifdef CONFIG_SMP
  2.1839 +		/*
  2.1840 +		 * the task IS the owner but it migrated to another CPU: that's bad
  2.1841 +		 * but we must handle this cleanly. Unfortunately, the kernel does
  2.1842 +		 * not provide a mechanism to block migration (while the context is loaded).
  2.1843 +		 *
  2.1844 +		 * We need to release the resource on the ORIGINAL cpu.
  2.1845 +		 */
  2.1846 +		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
  2.1847 +
  2.1848 +			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
  2.1849 +			/*
  2.1850 +			 * keep context protected but unmask interrupt for IPI
  2.1851 +			 */
  2.1852 +			local_irq_restore(flags);
  2.1853 +
  2.1854 +			pfm_syswide_cleanup_other_cpu(ctx);
  2.1855 +
  2.1856 +			/*
  2.1857 +			 * restore interrupt masking
  2.1858 +			 */
  2.1859 +			local_irq_save(flags);
  2.1860 +
  2.1861 +			/*
  2.1862 +			 * context is unloaded at this point
  2.1863 +			 */
  2.1864 +		} else
  2.1865 +#endif /* CONFIG_SMP */
  2.1866 +		{
  2.1867 +
  2.1868 +			DPRINT(("forcing unload\n"));
  2.1869 +			/*
  2.1870 +		 	* stop and unload, returning with state UNLOADED
  2.1871 +		 	* and session unreserved.
  2.1872 +		 	*/
  2.1873 +			pfm_context_unload(ctx, NULL, 0, regs);
  2.1874 +
  2.1875 +			DPRINT(("ctx_state=%d\n", ctx->ctx_state));
  2.1876 +		}
  2.1877 +	}
  2.1878 +
  2.1879 +	/*
  2.1880 +	 * remove virtual mapping, if any, for the calling task.
  2.1881 +	 * cannot reset ctx field until last user is calling close().
  2.1882 +	 *
  2.1883 +	 * ctx_smpl_vaddr must never be cleared because it is needed
  2.1884 +	 * by every task with access to the context
  2.1885 +	 *
  2.1886 +	 * When called from do_exit(), the mm context is gone already, therefore
  2.1887 +	 * mm is NULL, i.e., the VMA is already gone  and we do not have to
  2.1888 +	 * do anything here
  2.1889 +	 */
  2.1890 +	if (ctx->ctx_smpl_vaddr && current->mm) {
  2.1891 +		smpl_buf_vaddr = ctx->ctx_smpl_vaddr;
  2.1892 +		smpl_buf_size  = ctx->ctx_smpl_size;
  2.1893 +	}
  2.1894 +
  2.1895 +	UNPROTECT_CTX(ctx, flags);
  2.1896 +
  2.1897 +	/*
  2.1898 +	 * if there was a mapping, then we systematically remove it
  2.1899 +	 * at this point. Cannot be done inside critical section
  2.1900 +	 * because some VM function reenables interrupts.
  2.1901 +	 *
  2.1902 +	 */
  2.1903 +	if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);
  2.1904 +
  2.1905 +	return 0;
  2.1906 +}
  2.1907 +/*
  2.1908 + * called either on explicit close() or from exit_files(). 
  2.1909 + * Only the LAST user of the file gets to this point, i.e., it is
  2.1910 + * called only ONCE.
  2.1911 + *
  2.1912 + * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 
  2.1913 + * (fput()),i.e, last task to access the file. Nobody else can access the 
  2.1914 + * file at this point.
  2.1915 + *
  2.1916 + * When called from exit_files(), the VMA has been freed because exit_mm()
  2.1917 + * is executed before exit_files().
  2.1918 + *
  2.1919 + * When called from exit_files(), the current task is not yet ZOMBIE but we
  2.1920 + * flush the PMU state to the context. 
  2.1921 + */
  2.1922 +static int
  2.1923 +pfm_close(struct inode *inode, struct file *filp)
  2.1924 +{
  2.1925 +	pfm_context_t *ctx;
  2.1926 +	struct task_struct *task;
  2.1927 +	struct pt_regs *regs;
  2.1928 +  	DECLARE_WAITQUEUE(wait, current);
  2.1929 +	unsigned long flags;
  2.1930 +	unsigned long smpl_buf_size = 0UL;
  2.1931 +	void *smpl_buf_addr = NULL;
  2.1932 +	int free_possible = 1;
  2.1933 +	int state, is_system;
  2.1934 +
  2.1935 +	DPRINT(("pfm_close called private=%p\n", filp->private_data));
  2.1936 +
  2.1937 +	if (PFM_IS_FILE(filp) == 0) {
  2.1938 +		DPRINT(("bad magic\n"));
  2.1939 +		return -EBADF;
  2.1940 +	}
  2.1941 +	
  2.1942 +	ctx = (pfm_context_t *)filp->private_data;
  2.1943 +	if (ctx == NULL) {
  2.1944 +		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
  2.1945 +		return -EBADF;
  2.1946 +	}
  2.1947 +
  2.1948 +	PROTECT_CTX(ctx, flags);
  2.1949 +
  2.1950 +	state     = ctx->ctx_state;
  2.1951 +	is_system = ctx->ctx_fl_system;
  2.1952 +
  2.1953 +	task = PFM_CTX_TASK(ctx);
  2.1954 +	regs = task_pt_regs(task);
  2.1955 +
  2.1956 +	DPRINT(("ctx_state=%d is_current=%d\n", 
  2.1957 +		state,
  2.1958 +		task == current ? 1 : 0));
  2.1959 +
  2.1960 +	/*
  2.1961 +	 * if task == current, then pfm_flush() unloaded the context
  2.1962 +	 */
  2.1963 +	if (state == PFM_CTX_UNLOADED) goto doit;
  2.1964 +
  2.1965 +	/*
  2.1966 +	 * context is loaded/masked and task != current, we need to
  2.1967 +	 * either force an unload or go zombie
  2.1968 +	 */
  2.1969 +
  2.1970 +	/*
  2.1971 +	 * The task is currently blocked or will block after an overflow.
  2.1972 +	 * we must force it to wakeup to get out of the
  2.1973 +	 * MASKED state and transition to the unloaded state by itself.
  2.1974 +	 *
  2.1975 +	 * This situation is only possible for per-task mode
  2.1976 +	 */
  2.1977 +	if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) {
  2.1978 +
  2.1979 +		/*
  2.1980 +		 * set a "partial" zombie state to be checked
  2.1981 +		 * upon return from down() in pfm_handle_work().
  2.1982 +		 *
  2.1983 +		 * We cannot use the ZOMBIE state, because it is checked
  2.1984 +		 * by pfm_load_regs() which is called upon wakeup from down().
  2.1985 +		 * In such case, it would free the context and then we would
  2.1986 +		 * return to pfm_handle_work() which would access the
  2.1987 +		 * stale context. Instead, we set a flag invisible to pfm_load_regs()
  2.1988 +		 * but visible to pfm_handle_work().
  2.1989 +		 *
  2.1990 +		 * For some window of time, we have a zombie context with
  2.1991 +		 * ctx_state = MASKED  and not ZOMBIE
  2.1992 +		 */
  2.1993 +		ctx->ctx_fl_going_zombie = 1;
  2.1994 +
  2.1995 +		/*
  2.1996 +		 * force task to wake up from MASKED state
  2.1997 +		 */
  2.1998 +		complete(&ctx->ctx_restart_done);
  2.1999 +
  2.2000 +		DPRINT(("waking up ctx_state=%d\n", state));
  2.2001 +
  2.2002 +		/*
  2.2003 +		 * put ourself to sleep waiting for the other
  2.2004 +		 * task to report completion
  2.2005 +		 *
  2.2006 +		 * the context is protected by mutex, therefore there
  2.2007 +		 * is no risk of being notified of completion before
  2.2008 +		 * begin actually on the waitq.
  2.2009 +		 */
  2.2010 +  		set_current_state(TASK_INTERRUPTIBLE);
  2.2011 +  		add_wait_queue(&ctx->ctx_zombieq, &wait);
  2.2012 +
  2.2013 +		UNPROTECT_CTX(ctx, flags);
  2.2014 +
  2.2015 +		/*
  2.2016 +		 * XXX: check for signals :
  2.2017 +		 * 	- ok for explicit close
  2.2018 +		 * 	- not ok when coming from exit_files()
  2.2019 +		 */
  2.2020 +      		schedule();
  2.2021 +
  2.2022 +
  2.2023 +		PROTECT_CTX(ctx, flags);
  2.2024 +
  2.2025 +
  2.2026 +		remove_wait_queue(&ctx->ctx_zombieq, &wait);
  2.2027 +  		set_current_state(TASK_RUNNING);
  2.2028 +
  2.2029 +		/*
  2.2030 +		 * context is unloaded at this point
  2.2031 +		 */
  2.2032 +		DPRINT(("after zombie wakeup ctx_state=%d for\n", state));
  2.2033 +	}
  2.2034 +	else if (task != current) {
  2.2035 +#ifdef CONFIG_SMP
  2.2036 +		/*
  2.2037 +	 	 * switch context to zombie state
  2.2038 +	 	 */
  2.2039 +		ctx->ctx_state = PFM_CTX_ZOMBIE;
  2.2040 +
  2.2041 +		DPRINT(("zombie ctx for [%d]\n", task->pid));
  2.2042 +		/*
  2.2043 +		 * cannot free the context on the spot. deferred until
  2.2044 +		 * the task notices the ZOMBIE state
  2.2045 +		 */
  2.2046 +		free_possible = 0;
  2.2047 +#else
  2.2048 +		pfm_context_unload(ctx, NULL, 0, regs);
  2.2049 +#endif
  2.2050 +	}
  2.2051 +
  2.2052 +doit:
  2.2053 +	/* reload state, may have changed during  opening of critical section */
  2.2054 +	state = ctx->ctx_state;
  2.2055 +
  2.2056 +	/*
  2.2057 +	 * the context is still attached to a task (possibly current)
  2.2058 +	 * we cannot destroy it right now
  2.2059 +	 */
  2.2060 +
  2.2061 +	/*
  2.2062 +	 * we must free the sampling buffer right here because
  2.2063 +	 * we cannot rely on it being cleaned up later by the
  2.2064 +	 * monitored task. It is not possible to free vmalloc'ed
  2.2065 +	 * memory in pfm_load_regs(). Instead, we remove the buffer
  2.2066 +	 * now. should there be subsequent PMU overflow originally
  2.2067 +	 * meant for sampling, the will be converted to spurious
  2.2068 +	 * and that's fine because the monitoring tools is gone anyway.
  2.2069 +	 */
  2.2070 +	if (ctx->ctx_smpl_hdr) {
  2.2071 +		smpl_buf_addr = ctx->ctx_smpl_hdr;
  2.2072 +		smpl_buf_size = ctx->ctx_smpl_size;
  2.2073 +		/* no more sampling */
  2.2074 +		ctx->ctx_smpl_hdr = NULL;
  2.2075 +		ctx->ctx_fl_is_sampling = 0;
  2.2076 +	}
  2.2077 +
  2.2078 +	DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n",
  2.2079 +		state,
  2.2080 +		free_possible,
  2.2081 +		smpl_buf_addr,
  2.2082 +		smpl_buf_size));
  2.2083 +
  2.2084 +	if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt);
  2.2085 +
  2.2086 +	/*
  2.2087 +	 * UNLOADED that the session has already been unreserved.
  2.2088 +	 */
  2.2089 +	if (state == PFM_CTX_ZOMBIE) {
  2.2090 +		pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu);
  2.2091 +	}
  2.2092 +
  2.2093 +	/*
  2.2094 +	 * disconnect file descriptor from context must be done
  2.2095 +	 * before we unlock.
  2.2096 +	 */
  2.2097 +	filp->private_data = NULL;
  2.2098 +
  2.2099 +	/*
  2.2100 +	 * if we free on the spot, the context is now completely unreacheable
  2.2101 +	 * from the callers side. The monitored task side is also cut, so we
  2.2102 +	 * can freely cut.
  2.2103 +	 *
  2.2104 +	 * If we have a deferred free, only the caller side is disconnected.
  2.2105 +	 */
  2.2106 +	UNPROTECT_CTX(ctx, flags);
  2.2107 +
  2.2108 +	/*
  2.2109 +	 * All memory free operations (especially for vmalloc'ed memory)
  2.2110 +	 * MUST be done with interrupts ENABLED.
  2.2111 +	 */
  2.2112 +	if (smpl_buf_addr)  pfm_rvfree(smpl_buf_addr, smpl_buf_size);
  2.2113 +
  2.2114 +	/*
  2.2115 +	 * return the memory used by the context
  2.2116 +	 */
  2.2117 +	if (free_possible) pfm_context_free(ctx);
  2.2118 +
  2.2119 +	return 0;
  2.2120 +}
  2.2121 +
  2.2122 +static int
  2.2123 +pfm_no_open(struct inode *irrelevant, struct file *dontcare)
  2.2124 +{
  2.2125 +	DPRINT(("pfm_no_open called\n"));
  2.2126 +	return -ENXIO;
  2.2127 +}
  2.2128 +
  2.2129 +
  2.2130 +
  2.2131 +static struct file_operations pfm_file_ops = {
  2.2132 +	.llseek   = no_llseek,
  2.2133 +	.read     = pfm_read,
  2.2134 +	.write    = pfm_write,
  2.2135 +	.poll     = pfm_poll,
  2.2136 +	.ioctl    = pfm_ioctl,
  2.2137 +	.open     = pfm_no_open,	/* special open code to disallow open via /proc */
  2.2138 +	.fasync   = pfm_fasync,
  2.2139 +	.release  = pfm_close,
  2.2140 +	.flush	  = pfm_flush
  2.2141 +};
  2.2142 +
  2.2143 +static int
  2.2144 +pfmfs_delete_dentry(struct dentry *dentry)
  2.2145 +{
  2.2146 +	return 1;
  2.2147 +}
  2.2148 +
  2.2149 +static struct dentry_operations pfmfs_dentry_operations = {
  2.2150 +	.d_delete = pfmfs_delete_dentry,
  2.2151 +};
  2.2152 +
  2.2153 +
  2.2154 +static int
  2.2155 +pfm_alloc_fd(struct file **cfile)
  2.2156 +{
  2.2157 +	int fd, ret = 0;
  2.2158 +	struct file *file = NULL;
  2.2159 +	struct inode * inode;
  2.2160 +	char name[32];
  2.2161 +	struct qstr this;
  2.2162 +
  2.2163 +	fd = get_unused_fd();
  2.2164 +	if (fd < 0) return -ENFILE;
  2.2165 +
  2.2166 +	ret = -ENFILE;
  2.2167 +
  2.2168 +	file = get_empty_filp();
  2.2169 +	if (!file) goto out;
  2.2170 +
  2.2171 +	/*
  2.2172 +	 * allocate a new inode
  2.2173 +	 */
  2.2174 +	inode = new_inode(pfmfs_mnt->mnt_sb);
  2.2175 +	if (!inode) goto out;
  2.2176 +
  2.2177 +	DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));
  2.2178 +
  2.2179 +	inode->i_mode = S_IFCHR|S_IRUGO;
  2.2180 +	inode->i_uid  = current->fsuid;
  2.2181 +	inode->i_gid  = current->fsgid;
  2.2182 +
  2.2183 +	sprintf(name, "[%lu]", inode->i_ino);
  2.2184 +	this.name = name;
  2.2185 +	this.len  = strlen(name);
  2.2186 +	this.hash = inode->i_ino;
  2.2187 +
  2.2188 +	ret = -ENOMEM;
  2.2189 +
  2.2190 +	/*
  2.2191 +	 * allocate a new dcache entry
  2.2192 +	 */
  2.2193 +	file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
  2.2194 +	if (!file->f_dentry) goto out;
  2.2195 +
  2.2196 +	file->f_dentry->d_op = &pfmfs_dentry_operations;
  2.2197 +
  2.2198 +	d_add(file->f_dentry, inode);
  2.2199 +	file->f_vfsmnt = mntget(pfmfs_mnt);
  2.2200 +	file->f_mapping = inode->i_mapping;
  2.2201 +
  2.2202 +	file->f_op    = &pfm_file_ops;
  2.2203 +	file->f_mode  = FMODE_READ;
  2.2204 +	file->f_flags = O_RDONLY;
  2.2205 +	file->f_pos   = 0;
  2.2206 +
  2.2207 +	/*
  2.2208 +	 * may have to delay until context is attached?
  2.2209 +	 */
  2.2210 +	fd_install(fd, file);
  2.2211 +
  2.2212 +	/*
  2.2213 +	 * the file structure we will use
  2.2214 +	 */
  2.2215 +	*cfile = file;
  2.2216 +
  2.2217 +	return fd;
  2.2218 +out:
  2.2219 +	if (file) put_filp(file);
  2.2220 +	put_unused_fd(fd);
  2.2221 +	return ret;
  2.2222 +}
  2.2223 +
  2.2224 +static void
  2.2225 +pfm_free_fd(int fd, struct file *file)
  2.2226 +{
  2.2227 +	struct files_struct *files = current->files;
  2.2228 +	struct fdtable *fdt;
  2.2229 +
  2.2230 +	/* 
  2.2231 +	 * there ie no fd_uninstall(), so we do it here
  2.2232 +	 */
  2.2233 +	spin_lock(&files->file_lock);
  2.2234 +	fdt = files_fdtable(files);
  2.2235 +	rcu_assign_pointer(fdt->fd[fd], NULL);
  2.2236 +	spin_unlock(&files->file_lock);
  2.2237 +
  2.2238 +	if (file)
  2.2239 +		put_filp(file);
  2.2240 +	put_unused_fd(fd);
  2.2241 +}
  2.2242 +
  2.2243 +static int
  2.2244 +pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size)
  2.2245 +{
  2.2246 +	DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
  2.2247 +
  2.2248 +	while (size > 0) {
  2.2249 +		unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT;
  2.2250 +
  2.2251 +
  2.2252 +		if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY))
  2.2253 +			return -ENOMEM;
  2.2254 +
  2.2255 +		addr  += PAGE_SIZE;
  2.2256 +		buf   += PAGE_SIZE;
  2.2257 +		size  -= PAGE_SIZE;
  2.2258 +	}
  2.2259 +	return 0;
  2.2260 +}
  2.2261 +
  2.2262 +/*
  2.2263 + * allocate a sampling buffer and remaps it into the user address space of the task
  2.2264 + */
  2.2265 +static int
  2.2266 +pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
  2.2267 +{
  2.2268 +	struct mm_struct *mm = task->mm;
  2.2269 +	struct vm_area_struct *vma = NULL;
  2.2270 +	unsigned long size;
  2.2271 +	void *smpl_buf;
  2.2272 +
  2.2273 +
  2.2274 +	/*
  2.2275 +	 * the fixed header + requested size and align to page boundary
  2.2276 +	 */
  2.2277 +	size = PAGE_ALIGN(rsize);
  2.2278 +
  2.2279 +	DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size));
  2.2280 +
  2.2281 +	/*
  2.2282 +	 * check requested size to avoid Denial-of-service attacks
  2.2283 +	 * XXX: may have to refine this test
  2.2284 +	 * Check against address space limit.
  2.2285 +	 *
  2.2286 +	 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
  2.2287 +	 * 	return -ENOMEM;
  2.2288 +	 */
  2.2289 +	if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
  2.2290 +		return -ENOMEM;
  2.2291 +
  2.2292 +	/*
  2.2293 +	 * We do the easy to undo allocations first.
  2.2294 + 	 *
  2.2295 +	 * pfm_rvmalloc(), clears the buffer, so there is no leak
  2.2296 +	 */
  2.2297 +	smpl_buf = pfm_rvmalloc(size);
  2.2298 +	if (smpl_buf == NULL) {
  2.2299 +		DPRINT(("Can't allocate sampling buffer\n"));
  2.2300 +		return -ENOMEM;
  2.2301 +	}
  2.2302 +
  2.2303 +	DPRINT(("smpl_buf @%p\n", smpl_buf));
  2.2304 +
  2.2305 +	/* allocate vma */
  2.2306 +	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
  2.2307 +	if (!vma) {
  2.2308 +		DPRINT(("Cannot allocate vma\n"));
  2.2309 +		goto error_kmem;
  2.2310 +	}
  2.2311 +	memset(vma, 0, sizeof(*vma));
  2.2312 +
  2.2313 +	/*
  2.2314 +	 * partially initialize the vma for the sampling buffer
  2.2315 +	 */
  2.2316 +	vma->vm_mm	     = mm;
  2.2317 +	vma->vm_flags	     = VM_READ| VM_MAYREAD |VM_RESERVED;
  2.2318 +	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
  2.2319 +
  2.2320 +	/*
  2.2321 +	 * Now we have everything we need and we can initialize
  2.2322 +	 * and connect all the data structures
  2.2323 +	 */
  2.2324 +
  2.2325 +	ctx->ctx_smpl_hdr   = smpl_buf;
  2.2326 +	ctx->ctx_smpl_size  = size; /* aligned size */
  2.2327 +
  2.2328 +	/*
  2.2329 +	 * Let's do the difficult operations next.
  2.2330 +	 *
  2.2331 +	 * now we atomically find some area in the address space and
  2.2332 +	 * remap the buffer in it.
  2.2333 +	 */
  2.2334 +	down_write(&task->mm->mmap_sem);
  2.2335 +
  2.2336 +	/* find some free area in address space, must have mmap sem held */
  2.2337 +	vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
  2.2338 +	if (vma->vm_start == 0UL) {
  2.2339 +		DPRINT(("Cannot find unmapped area for size %ld\n", size));
  2.2340 +		up_write(&task->mm->mmap_sem);
  2.2341 +		goto error;
  2.2342 +	}
  2.2343 +	vma->vm_end = vma->vm_start + size;
  2.2344 +	vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
  2.2345 +
  2.2346 +	DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
  2.2347 +
  2.2348 +	/* can only be applied to current task, need to have the mm semaphore held when called */
  2.2349 +	if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
  2.2350 +		DPRINT(("Can't remap buffer\n"));
  2.2351 +		up_write(&task->mm->mmap_sem);
  2.2352 +		goto error;
  2.2353 +	}
  2.2354 +
  2.2355 +	/*
  2.2356 +	 * now insert the vma in the vm list for the process, must be
  2.2357 +	 * done with mmap lock held
  2.2358 +	 */
  2.2359 +	insert_vm_struct(mm, vma);
  2.2360 +
  2.2361 +	mm->total_vm  += size >> PAGE_SHIFT;
  2.2362 +	vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
  2.2363 +							vma_pages(vma));
  2.2364 +	up_write(&task->mm->mmap_sem);
  2.2365 +
  2.2366 +	/*
  2.2367 +	 * keep track of user level virtual address
  2.2368 +	 */
  2.2369 +	ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
  2.2370 +	*(unsigned long *)user_vaddr = vma->vm_start;
  2.2371 +
  2.2372 +	return 0;
  2.2373 +
  2.2374 +error:
  2.2375 +	kmem_cache_free(vm_area_cachep, vma);
  2.2376 +error_kmem:
  2.2377 +	pfm_rvfree(smpl_buf, size);
  2.2378 +
  2.2379 +	return -ENOMEM;
  2.2380 +}
  2.2381 +
  2.2382 +/*
  2.2383 + * XXX: do something better here
  2.2384 + */
  2.2385 +static int
  2.2386 +pfm_bad_permissions(struct task_struct *task)
  2.2387 +{
  2.2388 +	/* inspired by ptrace_attach() */
  2.2389 +	DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
  2.2390 +		current->uid,
  2.2391 +		current->gid,
  2.2392 +		task->euid,
  2.2393 +		task->suid,
  2.2394 +		task->uid,
  2.2395 +		task->egid,
  2.2396 +		task->sgid));
  2.2397 +
  2.2398 +	return ((current->uid != task->euid)
  2.2399 +	    || (current->uid != task->suid)
  2.2400 +	    || (current->uid != task->uid)
  2.2401 +	    || (current->gid != task->egid)
  2.2402 +	    || (current->gid != task->sgid)
  2.2403 +	    || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
  2.2404 +}
  2.2405 +
  2.2406 +static int
  2.2407 +pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx)
  2.2408 +{
  2.2409 +	int ctx_flags;
  2.2410 +
  2.2411 +	/* valid signal */
  2.2412 +
  2.2413 +	ctx_flags = pfx->ctx_flags;
  2.2414 +
  2.2415 +	if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
  2.2416 +
  2.2417 +		/*
  2.2418 +		 * cannot block in this mode
  2.2419 +		 */
  2.2420 +		if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
  2.2421 +			DPRINT(("cannot use blocking mode when in system wide monitoring\n"));
  2.2422 +			return -EINVAL;
  2.2423 +		}
  2.2424 +	} else {
  2.2425 +	}
  2.2426 +	/* probably more to add here */
  2.2427 +
  2.2428 +	return 0;
  2.2429 +}
  2.2430 +
  2.2431 +static int
  2.2432 +pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags,
  2.2433 +		     unsigned int cpu, pfarg_context_t *arg)
  2.2434 +{
  2.2435 +	pfm_buffer_fmt_t *fmt = NULL;
  2.2436 +	unsigned long size = 0UL;
  2.2437 +	void *uaddr = NULL;
  2.2438 +	void *fmt_arg = NULL;
  2.2439 +	int ret = 0;
  2.2440 +#define PFM_CTXARG_BUF_ARG(a)	(pfm_buffer_fmt_t *)(a+1)
  2.2441 +
  2.2442 +	/* invoke and lock buffer format, if found */
  2.2443 +	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
  2.2444 +	if (fmt == NULL) {
  2.2445 +		DPRINT(("[%d] cannot find buffer format\n", task->pid));
  2.2446 +		return -EINVAL;
  2.2447 +	}
  2.2448 +
  2.2449 +	/*
  2.2450 +	 * buffer argument MUST be contiguous to pfarg_context_t
  2.2451 +	 */
  2.2452 +	if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg);
  2.2453 +
  2.2454 +	ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
  2.2455 +
  2.2456 +	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
  2.2457 +
  2.2458 +	if (ret) goto error;
  2.2459 +
  2.2460 +	/* link buffer format and context */
  2.2461 +	ctx->ctx_buf_fmt = fmt;
  2.2462 +
  2.2463 +	/*
  2.2464 +	 * check if buffer format wants to use perfmon buffer allocation/mapping service
  2.2465 +	 */
  2.2466 +	ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size);
  2.2467 +	if (ret) goto error;
  2.2468 +
  2.2469 +	if (size) {
  2.2470 +		/*
  2.2471 +		 * buffer is always remapped into the caller's address space
  2.2472 +		 */
  2.2473 +		ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr);
  2.2474 +		if (ret) goto error;
  2.2475 +
  2.2476 +		/* keep track of user address of buffer */
  2.2477 +		arg->ctx_smpl_vaddr = uaddr;
  2.2478 +	}
  2.2479 +	ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg);
  2.2480 +
  2.2481 +error:
  2.2482 +	return ret;
  2.2483 +}
  2.2484 +
  2.2485 +static void
  2.2486 +pfm_reset_pmu_state(pfm_context_t *ctx)
  2.2487 +{
  2.2488 +	int i;
  2.2489 +
  2.2490 +	/*
  2.2491 +	 * install reset values for PMC.
  2.2492 +	 */
  2.2493 +	for (i=1; PMC_IS_LAST(i) == 0; i++) {
  2.2494 +		if (PMC_IS_IMPL(i) == 0) continue;
  2.2495 +		ctx->ctx_pmcs[i] = PMC_DFL_VAL(i);
  2.2496 +		DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i]));
  2.2497 +	}
  2.2498 +	/*
  2.2499 +	 * PMD registers are set to 0UL when the context in memset()
  2.2500 +	 */
  2.2501 +
  2.2502 +	/*
  2.2503 +	 * On context switched restore, we must restore ALL pmc and ALL pmd even
  2.2504 +	 * when they are not actively used by the task. In UP, the incoming process
  2.2505 +	 * may otherwise pick up left over PMC, PMD state from the previous process.
  2.2506 +	 * As opposed to PMD, stale PMC can cause harm to the incoming
  2.2507 +	 * process because they may change what is being measured.
  2.2508 +	 * Therefore, we must systematically reinstall the entire
  2.2509 +	 * PMC state. In SMP, the same thing is possible on the
  2.2510 +	 * same CPU but also on between 2 CPUs.
  2.2511 +	 *
  2.2512 +	 * The problem with PMD is information leaking especially
  2.2513 +	 * to user level when psr.sp=0
  2.2514 +	 *
  2.2515 +	 * There is unfortunately no easy way to avoid this problem
  2.2516 +	 * on either UP or SMP. This definitively slows down the
  2.2517 +	 * pfm_load_regs() function.
  2.2518 +	 */
  2.2519 +
  2.2520 +	 /*
  2.2521 +	  * bitmask of all PMCs accessible to this context
  2.2522 +	  *
  2.2523 +	  * PMC0 is treated differently.
  2.2524 +	  */
  2.2525 +	ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1;
  2.2526 +
  2.2527 +	/*
  2.2528 +	 * bitmask of all PMDs that are accesible to this context
  2.2529 +	 */
  2.2530 +	ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0];
  2.2531 +
  2.2532 +	DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0]));
  2.2533 +
  2.2534 +	/*
  2.2535 +	 * useful in case of re-enable after disable
  2.2536 +	 */
  2.2537 +	ctx->ctx_used_ibrs[0] = 0UL;
  2.2538 +	ctx->ctx_used_dbrs[0] = 0UL;
  2.2539 +}
  2.2540 +
  2.2541 +static int
  2.2542 +pfm_ctx_getsize(void *arg, size_t *sz)
  2.2543 +{
  2.2544 +	pfarg_context_t *req = (pfarg_context_t *)arg;
  2.2545 +	pfm_buffer_fmt_t *fmt;
  2.2546 +
  2.2547 +	*sz = 0;
  2.2548 +
  2.2549 +	if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0;
  2.2550 +
  2.2551 +	fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id);
  2.2552 +	if (fmt == NULL) {
  2.2553 +		DPRINT(("cannot find buffer format\n"));
  2.2554 +		return -EINVAL;
  2.2555 +	}
  2.2556 +	/* get just enough to copy in user parameters */
  2.2557 +	*sz = fmt->fmt_arg_size;
  2.2558 +	DPRINT(("arg_size=%lu\n", *sz));
  2.2559 +
  2.2560 +	return 0;
  2.2561 +}
  2.2562 +
  2.2563 +
  2.2564 +
  2.2565 +/*
  2.2566 + * cannot attach if :
  2.2567 + * 	- kernel task
  2.2568 + * 	- task not owned by caller
  2.2569 + * 	- task incompatible with context mode
  2.2570 + */
  2.2571 +static int
  2.2572 +pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
  2.2573 +{
  2.2574 +	/*
  2.2575 +	 * no kernel task or task not owner by caller
  2.2576 +	 */
  2.2577 +	if (task->mm == NULL) {
  2.2578 +		DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
  2.2579 +		return -EPERM;
  2.2580 +	}
  2.2581 +	if (pfm_bad_permissions(task)) {
  2.2582 +		DPRINT(("no permission to attach to  [%d]\n", task->pid));
  2.2583 +		return -EPERM;
  2.2584 +	}
  2.2585 +	/*
  2.2586 +	 * cannot block in self-monitoring mode
  2.2587 +	 */
  2.2588 +	if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
  2.2589 +		DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
  2.2590 +		return -EINVAL;
  2.2591 +	}
  2.2592 +
  2.2593 +	if (task->exit_state == EXIT_ZOMBIE) {
  2.2594 +		DPRINT(("cannot attach to  zombie task [%d]\n", task->pid));
  2.2595 +		return -EBUSY;
  2.2596 +	}
  2.2597 +
  2.2598 +	/*
  2.2599 +	 * always ok for self
  2.2600 +	 */
  2.2601 +	if (task == current) return 0;
  2.2602 +
  2.2603 +	if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
  2.2604 +		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
  2.2605 +		return -EBUSY;
  2.2606 +	}
  2.2607 +	/*
  2.2608 +	 * make sure the task is off any CPU
  2.2609 +	 */
  2.2610 +	wait_task_inactive(task);
  2.2611 +
  2.2612 +	/* more to come... */
  2.2613 +
  2.2614 +	return 0;
  2.2615 +}
  2.2616 +
  2.2617 +static int
  2.2618 +pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
  2.2619 +{
  2.2620 +	struct task_struct *p = current;
  2.2621 +	int ret;
  2.2622 +
  2.2623 +	/* XXX: need to add more checks here */
  2.2624 +	if (pid < 2) return -EPERM;
  2.2625 +
  2.2626 +	if (pid != current->pid) {
  2.2627 +
  2.2628 +		read_lock(&tasklist_lock);
  2.2629 +
  2.2630 +		p = find_task_by_pid(pid);
  2.2631 +
  2.2632 +		/* make sure task cannot go away while we operate on it */
  2.2633 +		if (p) get_task_struct(p);
  2.2634 +
  2.2635 +		read_unlock(&tasklist_lock);
  2.2636 +
  2.2637 +		if (p == NULL) return -ESRCH;
  2.2638 +	}
  2.2639 +
  2.2640 +	ret = pfm_task_incompatible(ctx, p);
  2.2641 +	if (ret == 0) {
  2.2642 +		*task = p;
  2.2643 +	} else if (p != current) {
  2.2644 +		pfm_put_task(p);
  2.2645 +	}
  2.2646 +	return ret;
  2.2647 +}
  2.2648 +
  2.2649 +
  2.2650 +
  2.2651 +static int
  2.2652 +pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.2653 +{
  2.2654 +	pfarg_context_t *req = (pfarg_context_t *)arg;
  2.2655 +	struct file *filp;
  2.2656 +	int ctx_flags;
  2.2657 +	int ret;
  2.2658 +
  2.2659 +	/* let's check the arguments first */
  2.2660 +	ret = pfarg_is_sane(current, req);
  2.2661 +	if (ret < 0) return ret;
  2.2662 +
  2.2663 +	ctx_flags = req->ctx_flags;
  2.2664 +
  2.2665 +	ret = -ENOMEM;
  2.2666 +
  2.2667 +	ctx = pfm_context_alloc();
  2.2668 +	if (!ctx) goto error;
  2.2669 +
  2.2670 +	ret = pfm_alloc_fd(&filp);
  2.2671 +	if (ret < 0) goto error_file;
  2.2672 +
  2.2673 +	req->ctx_fd = ctx->ctx_fd = ret;
  2.2674 +
  2.2675 +	/*
  2.2676 +	 * attach context to file
  2.2677 +	 */
  2.2678 +	filp->private_data = ctx;
  2.2679 +
  2.2680 +	/*
  2.2681 +	 * does the user want to sample?
  2.2682 +	 */
  2.2683 +	if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
  2.2684 +		ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req);
  2.2685 +		if (ret) goto buffer_error;
  2.2686 +	}
  2.2687 +
  2.2688 +	/*
  2.2689 +	 * init context protection lock
  2.2690 +	 */
  2.2691 +	spin_lock_init(&ctx->ctx_lock);
  2.2692 +
  2.2693 +	/*
  2.2694 +	 * context is unloaded
  2.2695 +	 */
  2.2696 +	ctx->ctx_state = PFM_CTX_UNLOADED;
  2.2697 +
  2.2698 +	/*
  2.2699 +	 * initialization of context's flags
  2.2700 +	 */
  2.2701 +	ctx->ctx_fl_block       = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
  2.2702 +	ctx->ctx_fl_system      = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
  2.2703 +	ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
  2.2704 +	ctx->ctx_fl_no_msg      = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
  2.2705 +	/*
  2.2706 +	 * will move to set properties
  2.2707 +	 * ctx->ctx_fl_excl_idle   = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
  2.2708 +	 */
  2.2709 +
  2.2710 +	/*
  2.2711 +	 * init restart semaphore to locked
  2.2712 +	 */
  2.2713 +	init_completion(&ctx->ctx_restart_done);
  2.2714 +
  2.2715 +	/*
  2.2716 +	 * activation is used in SMP only
  2.2717 +	 */
  2.2718 +	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
  2.2719 +	SET_LAST_CPU(ctx, -1);
  2.2720 +
  2.2721 +	/*
  2.2722 +	 * initialize notification message queue
  2.2723 +	 */
  2.2724 +	ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
  2.2725 +	init_waitqueue_head(&ctx->ctx_msgq_wait);
  2.2726 +	init_waitqueue_head(&ctx->ctx_zombieq);
  2.2727 +
  2.2728 +	DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
  2.2729 +		ctx,
  2.2730 +		ctx_flags,
  2.2731 +		ctx->ctx_fl_system,
  2.2732 +		ctx->ctx_fl_block,
  2.2733 +		ctx->ctx_fl_excl_idle,
  2.2734 +		ctx->ctx_fl_no_msg,
  2.2735 +		ctx->ctx_fd));
  2.2736 +
  2.2737 +	/*
  2.2738 +	 * initialize soft PMU state
  2.2739 +	 */
  2.2740 +	pfm_reset_pmu_state(ctx);
  2.2741 +
  2.2742 +	return 0;
  2.2743 +
  2.2744 +buffer_error:
  2.2745 +	pfm_free_fd(ctx->ctx_fd, filp);
  2.2746 +
  2.2747 +	if (ctx->ctx_buf_fmt) {
  2.2748 +		pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
  2.2749 +	}
  2.2750 +error_file:
  2.2751 +	pfm_context_free(ctx);
  2.2752 +
  2.2753 +error:
  2.2754 +	return ret;
  2.2755 +}
  2.2756 +
  2.2757 +static inline unsigned long
  2.2758 +pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
  2.2759 +{
  2.2760 +	unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
  2.2761 +	unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
  2.2762 +	extern unsigned long carta_random32 (unsigned long seed);
  2.2763 +
  2.2764 +	if (reg->flags & PFM_REGFL_RANDOM) {
  2.2765 +		new_seed = carta_random32(old_seed);
  2.2766 +		val -= (old_seed & mask);	/* counter values are negative numbers! */
  2.2767 +		if ((mask >> 32) != 0)
  2.2768 +			/* construct a full 64-bit random value: */
  2.2769 +			new_seed |= carta_random32(old_seed >> 32) << 32;
  2.2770 +		reg->seed = new_seed;
  2.2771 +	}
  2.2772 +	reg->lval = val;
  2.2773 +	return val;
  2.2774 +}
  2.2775 +
  2.2776 +static void
  2.2777 +pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
  2.2778 +{
  2.2779 +	unsigned long mask = ovfl_regs[0];
  2.2780 +	unsigned long reset_others = 0UL;
  2.2781 +	unsigned long val;
  2.2782 +	int i;
  2.2783 +
  2.2784 +	/*
  2.2785 +	 * now restore reset value on sampling overflowed counters
  2.2786 +	 */
  2.2787 +	mask >>= PMU_FIRST_COUNTER;
  2.2788 +	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
  2.2789 +
  2.2790 +		if ((mask & 0x1UL) == 0UL) continue;
  2.2791 +
  2.2792 +		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
  2.2793 +		reset_others        |= ctx->ctx_pmds[i].reset_pmds[0];
  2.2794 +
  2.2795 +		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
  2.2796 +	}
  2.2797 +
  2.2798 +	/*
  2.2799 +	 * Now take care of resetting the other registers
  2.2800 +	 */
  2.2801 +	for(i = 0; reset_others; i++, reset_others >>= 1) {
  2.2802 +
  2.2803 +		if ((reset_others & 0x1) == 0) continue;
  2.2804 +
  2.2805 +		ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
  2.2806 +
  2.2807 +		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
  2.2808 +			  is_long_reset ? "long" : "short", i, val));
  2.2809 +	}
  2.2810 +}
  2.2811 +
  2.2812 +static void
  2.2813 +pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
  2.2814 +{
  2.2815 +	unsigned long mask = ovfl_regs[0];
  2.2816 +	unsigned long reset_others = 0UL;
  2.2817 +	unsigned long val;
  2.2818 +	int i;
  2.2819 +
  2.2820 +	DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset));
  2.2821 +
  2.2822 +	if (ctx->ctx_state == PFM_CTX_MASKED) {
  2.2823 +		pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset);
  2.2824 +		return;
  2.2825 +	}
  2.2826 +
  2.2827 +	/*
  2.2828 +	 * now restore reset value on sampling overflowed counters
  2.2829 +	 */
  2.2830 +	mask >>= PMU_FIRST_COUNTER;
  2.2831 +	for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
  2.2832 +
  2.2833 +		if ((mask & 0x1UL) == 0UL) continue;
  2.2834 +
  2.2835 +		val           = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
  2.2836 +		reset_others |= ctx->ctx_pmds[i].reset_pmds[0];
  2.2837 +
  2.2838 +		DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
  2.2839 +
  2.2840 +		pfm_write_soft_counter(ctx, i, val);
  2.2841 +	}
  2.2842 +
  2.2843 +	/*
  2.2844 +	 * Now take care of resetting the other registers
  2.2845 +	 */
  2.2846 +	for(i = 0; reset_others; i++, reset_others >>= 1) {
  2.2847 +
  2.2848 +		if ((reset_others & 0x1) == 0) continue;
  2.2849 +
  2.2850 +		val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
  2.2851 +
  2.2852 +		if (PMD_IS_COUNTING(i)) {
  2.2853 +			pfm_write_soft_counter(ctx, i, val);
  2.2854 +		} else {
  2.2855 +			ia64_set_pmd(i, val);
  2.2856 +		}
  2.2857 +		DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
  2.2858 +			  is_long_reset ? "long" : "short", i, val));
  2.2859 +	}
  2.2860 +	ia64_srlz_d();
  2.2861 +}
  2.2862 +
  2.2863 +static int
  2.2864 +pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.2865 +{
  2.2866 +	struct thread_struct *thread = NULL;
  2.2867 +	struct task_struct *task;
  2.2868 +	pfarg_reg_t *req = (pfarg_reg_t *)arg;
  2.2869 +	unsigned long value, pmc_pm;
  2.2870 +	unsigned long smpl_pmds, reset_pmds, impl_pmds;
  2.2871 +	unsigned int cnum, reg_flags, flags, pmc_type;
  2.2872 +	int i, can_access_pmu = 0, is_loaded, is_system, expert_mode;
  2.2873 +	int is_monitor, is_counting, state;
  2.2874 +	int ret = -EINVAL;
  2.2875 +	pfm_reg_check_t	wr_func;
  2.2876 +#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
  2.2877 +
  2.2878 +	state     = ctx->ctx_state;
  2.2879 +	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
  2.2880 +	is_system = ctx->ctx_fl_system;
  2.2881 +	task      = ctx->ctx_task;
  2.2882 +	impl_pmds = pmu_conf->impl_pmds[0];
  2.2883 +
  2.2884 +	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
  2.2885 +
  2.2886 +	if (is_loaded) {
  2.2887 +		thread = &task->thread;
  2.2888 +		/*
  2.2889 +		 * In system wide and when the context is loaded, access can only happen
  2.2890 +		 * when the caller is running on the CPU being monitored by the session.
  2.2891 +		 * It does not have to be the owner (ctx_task) of the context per se.
  2.2892 +		 */
  2.2893 +		if (is_system && ctx->ctx_cpu != smp_processor_id()) {
  2.2894 +			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
  2.2895 +			return -EBUSY;
  2.2896 +		}
  2.2897 +		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
  2.2898 +	}
  2.2899 +	expert_mode = pfm_sysctl.expert_mode; 
  2.2900 +
  2.2901 +	for (i = 0; i < count; i++, req++) {
  2.2902 +
  2.2903 +		cnum       = req->reg_num;
  2.2904 +		reg_flags  = req->reg_flags;
  2.2905 +		value      = req->reg_value;
  2.2906 +		smpl_pmds  = req->reg_smpl_pmds[0];
  2.2907 +		reset_pmds = req->reg_reset_pmds[0];
  2.2908 +		flags      = 0;
  2.2909 +
  2.2910 +
  2.2911 +		if (cnum >= PMU_MAX_PMCS) {
  2.2912 +			DPRINT(("pmc%u is invalid\n", cnum));
  2.2913 +			goto error;
  2.2914 +		}
  2.2915 +
  2.2916 +		pmc_type   = pmu_conf->pmc_desc[cnum].type;
  2.2917 +		pmc_pm     = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1;
  2.2918 +		is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0;
  2.2919 +		is_monitor  = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0;
  2.2920 +
  2.2921 +		/*
  2.2922 +		 * we reject all non implemented PMC as well
  2.2923 +		 * as attempts to modify PMC[0-3] which are used
  2.2924 +		 * as status registers by the PMU
  2.2925 +		 */
  2.2926 +		if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) {
  2.2927 +			DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type));
  2.2928 +			goto error;
  2.2929 +		}
  2.2930 +		wr_func = pmu_conf->pmc_desc[cnum].write_check;
  2.2931 +		/*
  2.2932 +		 * If the PMC is a monitor, then if the value is not the default:
  2.2933 +		 * 	- system-wide session: PMCx.pm=1 (privileged monitor)
  2.2934 +		 * 	- per-task           : PMCx.pm=0 (user monitor)
  2.2935 +		 */
  2.2936 +		if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) {
  2.2937 +			DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n",
  2.2938 +				cnum,
  2.2939 +				pmc_pm,
  2.2940 +				is_system));
  2.2941 +			goto error;
  2.2942 +		}
  2.2943 +
  2.2944 +		if (is_counting) {
  2.2945 +			/*
  2.2946 +		 	 * enforce generation of overflow interrupt. Necessary on all
  2.2947 +		 	 * CPUs.
  2.2948 +		 	 */
  2.2949 +			value |= 1 << PMU_PMC_OI;
  2.2950 +
  2.2951 +			if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
  2.2952 +				flags |= PFM_REGFL_OVFL_NOTIFY;
  2.2953 +			}
  2.2954 +
  2.2955 +			if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
  2.2956 +
  2.2957 +			/* verify validity of smpl_pmds */
  2.2958 +			if ((smpl_pmds & impl_pmds) != smpl_pmds) {
  2.2959 +				DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum));
  2.2960 +				goto error;
  2.2961 +			}
  2.2962 +
  2.2963 +			/* verify validity of reset_pmds */
  2.2964 +			if ((reset_pmds & impl_pmds) != reset_pmds) {
  2.2965 +				DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
  2.2966 +				goto error;
  2.2967 +			}
  2.2968 +		} else {
  2.2969 +			if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
  2.2970 +				DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum));
  2.2971 +				goto error;
  2.2972 +			}
  2.2973 +			/* eventid on non-counting monitors are ignored */
  2.2974 +		}
  2.2975 +
  2.2976 +		/*
  2.2977 +		 * execute write checker, if any
  2.2978 +		 */
  2.2979 +		if (likely(expert_mode == 0 && wr_func)) {
  2.2980 +			ret = (*wr_func)(task, ctx, cnum, &value, regs);
  2.2981 +			if (ret) goto error;
  2.2982 +			ret = -EINVAL;
  2.2983 +		}
  2.2984 +
  2.2985 +		/*
  2.2986 +		 * no error on this register
  2.2987 +		 */
  2.2988 +		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
  2.2989 +
  2.2990 +		/*
  2.2991 +		 * Now we commit the changes to the software state
  2.2992 +		 */
  2.2993 +
  2.2994 +		/*
  2.2995 +		 * update overflow information
  2.2996 +		 */
  2.2997 +		if (is_counting) {
  2.2998 +			/*
  2.2999 +		 	 * full flag update each time a register is programmed
  2.3000 +		 	 */
  2.3001 +			ctx->ctx_pmds[cnum].flags = flags;
  2.3002 +
  2.3003 +			ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds;
  2.3004 +			ctx->ctx_pmds[cnum].smpl_pmds[0]  = smpl_pmds;
  2.3005 +			ctx->ctx_pmds[cnum].eventid       = req->reg_smpl_eventid;
  2.3006 +
  2.3007 +			/*
  2.3008 +			 * Mark all PMDS to be accessed as used.
  2.3009 +			 *
  2.3010 +			 * We do not keep track of PMC because we have to
  2.3011 +			 * systematically restore ALL of them.
  2.3012 +			 *
  2.3013 +			 * We do not update the used_monitors mask, because
  2.3014 +			 * if we have not programmed them, then will be in
  2.3015 +			 * a quiescent state, therefore we will not need to
  2.3016 +			 * mask/restore then when context is MASKED.
  2.3017 +			 */
  2.3018 +			CTX_USED_PMD(ctx, reset_pmds);
  2.3019 +			CTX_USED_PMD(ctx, smpl_pmds);
  2.3020 +			/*
  2.3021 +		 	 * make sure we do not try to reset on
  2.3022 +		 	 * restart because we have established new values
  2.3023 +		 	 */
  2.3024 +			if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
  2.3025 +		}
  2.3026 +		/*
  2.3027 +		 * Needed in case the user does not initialize the equivalent
  2.3028 +		 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no
  2.3029 +		 * possible leak here.
  2.3030 +		 */
  2.3031 +		CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]);
  2.3032 +
  2.3033 +		/*
  2.3034 +		 * keep track of the monitor PMC that we are using.
  2.3035 +		 * we save the value of the pmc in ctx_pmcs[] and if
  2.3036 +		 * the monitoring is not stopped for the context we also
  2.3037 +		 * place it in the saved state area so that it will be
  2.3038 +		 * picked up later by the context switch code.
  2.3039 +		 *
  2.3040 +		 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
  2.3041 +		 *
  2.3042 +		 * The value in thread->pmcs[] may be modified on overflow, i.e.,  when
  2.3043 +		 * monitoring needs to be stopped.
  2.3044 +		 */
  2.3045 +		if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
  2.3046 +
  2.3047 +		/*
  2.3048 +		 * update context state
  2.3049 +		 */
  2.3050 +		ctx->ctx_pmcs[cnum] = value;
  2.3051 +
  2.3052 +		if (is_loaded) {
  2.3053 +			/*
  2.3054 +			 * write thread state
  2.3055 +			 */
  2.3056 +			if (is_system == 0) thread->pmcs[cnum] = value;
  2.3057 +
  2.3058 +			/*
  2.3059 +			 * write hardware register if we can
  2.3060 +			 */
  2.3061 +			if (can_access_pmu) {
  2.3062 +				ia64_set_pmc(cnum, value);
  2.3063 +			}
  2.3064 +#ifdef CONFIG_SMP
  2.3065 +			else {
  2.3066 +				/*
  2.3067 +				 * per-task SMP only here
  2.3068 +				 *
  2.3069 +			 	 * we are guaranteed that the task is not running on the other CPU,
  2.3070 +			 	 * we indicate that this PMD will need to be reloaded if the task
  2.3071 +			 	 * is rescheduled on the CPU it ran last on.
  2.3072 +			 	 */
  2.3073 +				ctx->ctx_reload_pmcs[0] |= 1UL << cnum;
  2.3074 +			}
  2.3075 +#endif
  2.3076 +		}
  2.3077 +
  2.3078 +		DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n",
  2.3079 +			  cnum,
  2.3080 +			  value,
  2.3081 +			  is_loaded,
  2.3082 +			  can_access_pmu,
  2.3083 +			  flags,
  2.3084 +			  ctx->ctx_all_pmcs[0],
  2.3085 +			  ctx->ctx_used_pmds[0],
  2.3086 +			  ctx->ctx_pmds[cnum].eventid,
  2.3087 +			  smpl_pmds,
  2.3088 +			  reset_pmds,
  2.3089 +			  ctx->ctx_reload_pmcs[0],
  2.3090 +			  ctx->ctx_used_monitors[0],
  2.3091 +			  ctx->ctx_ovfl_regs[0]));
  2.3092 +	}
  2.3093 +
  2.3094 +	/*
  2.3095 +	 * make sure the changes are visible
  2.3096 +	 */
  2.3097 +	if (can_access_pmu) ia64_srlz_d();
  2.3098 +
  2.3099 +	return 0;
  2.3100 +error:
  2.3101 +	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
  2.3102 +	return ret;
  2.3103 +}
  2.3104 +
  2.3105 +static int
  2.3106 +pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.3107 +{
  2.3108 +	struct thread_struct *thread = NULL;
  2.3109 +	struct task_struct *task;
  2.3110 +	pfarg_reg_t *req = (pfarg_reg_t *)arg;
  2.3111 +	unsigned long value, hw_value, ovfl_mask;
  2.3112 +	unsigned int cnum;
  2.3113 +	int i, can_access_pmu = 0, state;
  2.3114 +	int is_counting, is_loaded, is_system, expert_mode;
  2.3115 +	int ret = -EINVAL;
  2.3116 +	pfm_reg_check_t wr_func;
  2.3117 +
  2.3118 +
  2.3119 +	state     = ctx->ctx_state;
  2.3120 +	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
  2.3121 +	is_system = ctx->ctx_fl_system;
  2.3122 +	ovfl_mask = pmu_conf->ovfl_val;
  2.3123 +	task      = ctx->ctx_task;
  2.3124 +
  2.3125 +	if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL;
  2.3126 +
  2.3127 +	/*
  2.3128 +	 * on both UP and SMP, we can only write to the PMC when the task is
  2.3129 +	 * the owner of the local PMU.
  2.3130 +	 */
  2.3131 +	if (likely(is_loaded)) {
  2.3132 +		thread = &task->thread;
  2.3133 +		/*
  2.3134 +		 * In system wide and when the context is loaded, access can only happen
  2.3135 +		 * when the caller is running on the CPU being monitored by the session.
  2.3136 +		 * It does not have to be the owner (ctx_task) of the context per se.
  2.3137 +		 */
  2.3138 +		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
  2.3139 +			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
  2.3140 +			return -EBUSY;
  2.3141 +		}
  2.3142 +		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
  2.3143 +	}
  2.3144 +	expert_mode = pfm_sysctl.expert_mode; 
  2.3145 +
  2.3146 +	for (i = 0; i < count; i++, req++) {
  2.3147 +
  2.3148 +		cnum  = req->reg_num;
  2.3149 +		value = req->reg_value;
  2.3150 +
  2.3151 +		if (!PMD_IS_IMPL(cnum)) {
  2.3152 +			DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum));
  2.3153 +			goto abort_mission;
  2.3154 +		}
  2.3155 +		is_counting = PMD_IS_COUNTING(cnum);
  2.3156 +		wr_func     = pmu_conf->pmd_desc[cnum].write_check;
  2.3157 +
  2.3158 +		/*
  2.3159 +		 * execute write checker, if any
  2.3160 +		 */
  2.3161 +		if (unlikely(expert_mode == 0 && wr_func)) {
  2.3162 +			unsigned long v = value;
  2.3163 +
  2.3164 +			ret = (*wr_func)(task, ctx, cnum, &v, regs);
  2.3165 +			if (ret) goto abort_mission;
  2.3166 +
  2.3167 +			value = v;
  2.3168 +			ret   = -EINVAL;
  2.3169 +		}
  2.3170 +
  2.3171 +		/*
  2.3172 +		 * no error on this register
  2.3173 +		 */
  2.3174 +		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
  2.3175 +
  2.3176 +		/*
  2.3177 +		 * now commit changes to software state
  2.3178 +		 */
  2.3179 +		hw_value = value;
  2.3180 +
  2.3181 +		/*
  2.3182 +		 * update virtualized (64bits) counter
  2.3183 +		 */
  2.3184 +		if (is_counting) {
  2.3185 +			/*
  2.3186 +			 * write context state
  2.3187 +			 */
  2.3188 +			ctx->ctx_pmds[cnum].lval = value;
  2.3189 +
  2.3190 +			/*
  2.3191 +			 * when context is load we use the split value
  2.3192 +			 */
  2.3193 +			if (is_loaded) {
  2.3194 +				hw_value = value &  ovfl_mask;
  2.3195 +				value    = value & ~ovfl_mask;
  2.3196 +			}
  2.3197 +		}
  2.3198 +		/*
  2.3199 +		 * update reset values (not just for counters)
  2.3200 +		 */
  2.3201 +		ctx->ctx_pmds[cnum].long_reset  = req->reg_long_reset;
  2.3202 +		ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset;
  2.3203 +
  2.3204 +		/*
  2.3205 +		 * update randomization parameters (not just for counters)
  2.3206 +		 */
  2.3207 +		ctx->ctx_pmds[cnum].seed = req->reg_random_seed;
  2.3208 +		ctx->ctx_pmds[cnum].mask = req->reg_random_mask;
  2.3209 +
  2.3210 +		/*
  2.3211 +		 * update context value
  2.3212 +		 */
  2.3213 +		ctx->ctx_pmds[cnum].val  = value;
  2.3214 +
  2.3215 +		/*
  2.3216 +		 * Keep track of what we use
  2.3217 +		 *
  2.3218 +		 * We do not keep track of PMC because we have to
  2.3219 +		 * systematically restore ALL of them.
  2.3220 +		 */
  2.3221 +		CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum));
  2.3222 +
  2.3223 +		/*
  2.3224 +		 * mark this PMD register used as well
  2.3225 +		 */
  2.3226 +		CTX_USED_PMD(ctx, RDEP(cnum));
  2.3227 +
  2.3228 +		/*
  2.3229 +		 * make sure we do not try to reset on
  2.3230 +		 * restart because we have established new values
  2.3231 +		 */
  2.3232 +		if (is_counting && state == PFM_CTX_MASKED) {
  2.3233 +			ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
  2.3234 +		}
  2.3235 +
  2.3236 +		if (is_loaded) {
  2.3237 +			/*
  2.3238 +		 	 * write thread state
  2.3239 +		 	 */
  2.3240 +			if (is_system == 0) thread->pmds[cnum] = hw_value;
  2.3241 +
  2.3242 +			/*
  2.3243 +			 * write hardware register if we can
  2.3244 +			 */
  2.3245 +			if (can_access_pmu) {
  2.3246 +				ia64_set_pmd(cnum, hw_value);
  2.3247 +			} else {
  2.3248 +#ifdef CONFIG_SMP
  2.3249 +				/*
  2.3250 +			 	 * we are guaranteed that the task is not running on the other CPU,
  2.3251 +			 	 * we indicate that this PMD will need to be reloaded if the task
  2.3252 +			 	 * is rescheduled on the CPU it ran last on.
  2.3253 +			 	 */
  2.3254 +				ctx->ctx_reload_pmds[0] |= 1UL << cnum;
  2.3255 +#endif
  2.3256 +			}
  2.3257 +		}
  2.3258 +
  2.3259 +		DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx  short_reset=0x%lx "
  2.3260 +			  "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n",
  2.3261 +			cnum,
  2.3262 +			value,
  2.3263 +			is_loaded,
  2.3264 +			can_access_pmu,
  2.3265 +			hw_value,
  2.3266 +			ctx->ctx_pmds[cnum].val,
  2.3267 +			ctx->ctx_pmds[cnum].short_reset,
  2.3268 +			ctx->ctx_pmds[cnum].long_reset,
  2.3269 +			PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
  2.3270 +			ctx->ctx_pmds[cnum].seed,
  2.3271 +			ctx->ctx_pmds[cnum].mask,
  2.3272 +			ctx->ctx_used_pmds[0],
  2.3273 +			ctx->ctx_pmds[cnum].reset_pmds[0],
  2.3274 +			ctx->ctx_reload_pmds[0],
  2.3275 +			ctx->ctx_all_pmds[0],
  2.3276 +			ctx->ctx_ovfl_regs[0]));
  2.3277 +	}
  2.3278 +
  2.3279 +	/*
  2.3280 +	 * make changes visible
  2.3281 +	 */
  2.3282 +	if (can_access_pmu) ia64_srlz_d();
  2.3283 +
  2.3284 +	return 0;
  2.3285 +
  2.3286 +abort_mission:
  2.3287 +	/*
  2.3288 +	 * for now, we have only one possibility for error
  2.3289 +	 */
  2.3290 +	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
  2.3291 +	return ret;
  2.3292 +}
  2.3293 +
  2.3294 +/*
  2.3295 + * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function.
  2.3296 + * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an
  2.3297 + * interrupt is delivered during the call, it will be kept pending until we leave, making
  2.3298 + * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are
  2.3299 + * guaranteed to return consistent data to the user, it may simply be old. It is not
  2.3300 + * trivial to treat the overflow while inside the call because you may end up in
  2.3301 + * some module sampling buffer code causing deadlocks.
  2.3302 + */
  2.3303 +static int
  2.3304 +pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.3305 +{
  2.3306 +	struct thread_struct *thread = NULL;
  2.3307 +	struct task_struct *task;
  2.3308 +	unsigned long val = 0UL, lval, ovfl_mask, sval;
  2.3309 +	pfarg_reg_t *req = (pfarg_reg_t *)arg;
  2.3310 +	unsigned int cnum, reg_flags = 0;
  2.3311 +	int i, can_access_pmu = 0, state;
  2.3312 +	int is_loaded, is_system, is_counting, expert_mode;
  2.3313 +	int ret = -EINVAL;
  2.3314 +	pfm_reg_check_t rd_func;
  2.3315 +
  2.3316 +	/*
  2.3317 +	 * access is possible when loaded only for
  2.3318 +	 * self-monitoring tasks or in UP mode
  2.3319 +	 */
  2.3320 +
  2.3321 +	state     = ctx->ctx_state;
  2.3322 +	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
  2.3323 +	is_system = ctx->ctx_fl_system;
  2.3324 +	ovfl_mask = pmu_conf->ovfl_val;
  2.3325 +	task      = ctx->ctx_task;
  2.3326 +
  2.3327 +	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
  2.3328 +
  2.3329 +	if (likely(is_loaded)) {
  2.3330 +		thread = &task->thread;
  2.3331 +		/*
  2.3332 +		 * In system wide and when the context is loaded, access can only happen
  2.3333 +		 * when the caller is running on the CPU being monitored by the session.
  2.3334 +		 * It does not have to be the owner (ctx_task) of the context per se.
  2.3335 +		 */
  2.3336 +		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
  2.3337 +			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
  2.3338 +			return -EBUSY;
  2.3339 +		}
  2.3340 +		/*
  2.3341 +		 * this can be true when not self-monitoring only in UP
  2.3342 +		 */
  2.3343 +		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
  2.3344 +
  2.3345 +		if (can_access_pmu) ia64_srlz_d();
  2.3346 +	}
  2.3347 +	expert_mode = pfm_sysctl.expert_mode; 
  2.3348 +
  2.3349 +	DPRINT(("ld=%d apmu=%d ctx_state=%d\n",
  2.3350 +		is_loaded,
  2.3351 +		can_access_pmu,
  2.3352 +		state));
  2.3353 +
  2.3354 +	/*
  2.3355 +	 * on both UP and SMP, we can only read the PMD from the hardware register when
  2.3356 +	 * the task is the owner of the local PMU.
  2.3357 +	 */
  2.3358 +
  2.3359 +	for (i = 0; i < count; i++, req++) {
  2.3360 +
  2.3361 +		cnum        = req->reg_num;
  2.3362 +		reg_flags   = req->reg_flags;
  2.3363 +
  2.3364 +		if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
  2.3365 +		/*
  2.3366 +		 * we can only read the register that we use. That includes
  2.3367 +		 * the one we explicitely initialize AND the one we want included
  2.3368 +		 * in the sampling buffer (smpl_regs).
  2.3369 +		 *
  2.3370 +		 * Having this restriction allows optimization in the ctxsw routine
  2.3371 +		 * without compromising security (leaks)
  2.3372 +		 */
  2.3373 +		if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error;
  2.3374 +
  2.3375 +		sval        = ctx->ctx_pmds[cnum].val;
  2.3376 +		lval        = ctx->ctx_pmds[cnum].lval;
  2.3377 +		is_counting = PMD_IS_COUNTING(cnum);
  2.3378 +
  2.3379 +		/*
  2.3380 +		 * If the task is not the current one, then we check if the
  2.3381 +		 * PMU state is still in the local live register due to lazy ctxsw.
  2.3382 +		 * If true, then we read directly from the registers.
  2.3383 +		 */
  2.3384 +		if (can_access_pmu){
  2.3385 +			val = ia64_get_pmd(cnum);
  2.3386 +		} else {
  2.3387 +			/*
  2.3388 +			 * context has been saved
  2.3389 +			 * if context is zombie, then task does not exist anymore.
  2.3390 +			 * In this case, we use the full value saved in the context (pfm_flush_regs()).
  2.3391 +			 */
  2.3392 +			val = is_loaded ? thread->pmds[cnum] : 0UL;
  2.3393 +		}
  2.3394 +		rd_func = pmu_conf->pmd_desc[cnum].read_check;
  2.3395 +
  2.3396 +		if (is_counting) {
  2.3397 +			/*
  2.3398 +			 * XXX: need to check for overflow when loaded
  2.3399 +			 */
  2.3400 +			val &= ovfl_mask;
  2.3401 +			val += sval;
  2.3402 +		}
  2.3403 +
  2.3404 +		/*
  2.3405 +		 * execute read checker, if any
  2.3406 +		 */
  2.3407 +		if (unlikely(expert_mode == 0 && rd_func)) {
  2.3408 +			unsigned long v = val;
  2.3409 +			ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs);
  2.3410 +			if (ret) goto error;
  2.3411 +			val = v;
  2.3412 +			ret = -EINVAL;
  2.3413 +		}
  2.3414 +
  2.3415 +		PFM_REG_RETFLAG_SET(reg_flags, 0);
  2.3416 +
  2.3417 +		DPRINT(("pmd[%u]=0x%lx\n", cnum, val));
  2.3418 +
  2.3419 +		/*
  2.3420 +		 * update register return value, abort all if problem during copy.
  2.3421 +		 * we only modify the reg_flags field. no check mode is fine because
  2.3422 +		 * access has been verified upfront in sys_perfmonctl().
  2.3423 +		 */
  2.3424 +		req->reg_value            = val;
  2.3425 +		req->reg_flags            = reg_flags;
  2.3426 +		req->reg_last_reset_val   = lval;
  2.3427 +	}
  2.3428 +
  2.3429 +	return 0;
  2.3430 +
  2.3431 +error:
  2.3432 +	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
  2.3433 +	return ret;
  2.3434 +}
  2.3435 +
  2.3436 +int
  2.3437 +pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
  2.3438 +{
  2.3439 +	pfm_context_t *ctx;
  2.3440 +
  2.3441 +	if (req == NULL) return -EINVAL;
  2.3442 +
  2.3443 + 	ctx = GET_PMU_CTX();
  2.3444 +
  2.3445 +	if (ctx == NULL) return -EINVAL;
  2.3446 +
  2.3447 +	/*
  2.3448 +	 * for now limit to current task, which is enough when calling
  2.3449 +	 * from overflow handler
  2.3450 +	 */
  2.3451 +	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
  2.3452 +
  2.3453 +	return pfm_write_pmcs(ctx, req, nreq, regs);
  2.3454 +}
  2.3455 +EXPORT_SYMBOL(pfm_mod_write_pmcs);
  2.3456 +
  2.3457 +int
  2.3458 +pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
  2.3459 +{
  2.3460 +	pfm_context_t *ctx;
  2.3461 +
  2.3462 +	if (req == NULL) return -EINVAL;
  2.3463 +
  2.3464 + 	ctx = GET_PMU_CTX();
  2.3465 +
  2.3466 +	if (ctx == NULL) return -EINVAL;
  2.3467 +
  2.3468 +	/*
  2.3469 +	 * for now limit to current task, which is enough when calling
  2.3470 +	 * from overflow handler
  2.3471 +	 */
  2.3472 +	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
  2.3473 +
  2.3474 +	return pfm_read_pmds(ctx, req, nreq, regs);
  2.3475 +}
  2.3476 +EXPORT_SYMBOL(pfm_mod_read_pmds);
  2.3477 +
  2.3478 +/*
  2.3479 + * Only call this function when a process it trying to
  2.3480 + * write the debug registers (reading is always allowed)
  2.3481 + */
  2.3482 +int
  2.3483 +pfm_use_debug_registers(struct task_struct *task)
  2.3484 +{
  2.3485 +	pfm_context_t *ctx = task->thread.pfm_context;
  2.3486 +	unsigned long flags;
  2.3487 +	int ret = 0;
  2.3488 +
  2.3489 +	if (pmu_conf->use_rr_dbregs == 0) return 0;
  2.3490 +
  2.3491 +	DPRINT(("called for [%d]\n", task->pid));
  2.3492 +
  2.3493 +	/*
  2.3494 +	 * do it only once
  2.3495 +	 */
  2.3496 +	if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
  2.3497 +
  2.3498 +	/*
  2.3499 +	 * Even on SMP, we do not need to use an atomic here because
  2.3500 +	 * the only way in is via ptrace() and this is possible only when the
  2.3501 +	 * process is stopped. Even in the case where the ctxsw out is not totally
  2.3502 +	 * completed by the time we come here, there is no way the 'stopped' process
  2.3503 +	 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
  2.3504 +	 * So this is always safe.
  2.3505 +	 */
  2.3506 +	if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
  2.3507 +
  2.3508 +	LOCK_PFS(flags);
  2.3509 +
  2.3510 +	/*
  2.3511 +	 * We cannot allow setting breakpoints when system wide monitoring
  2.3512 +	 * sessions are using the debug registers.
  2.3513 +	 */
  2.3514 +	if (pfm_sessions.pfs_sys_use_dbregs> 0)
  2.3515 +		ret = -1;
  2.3516 +	else
  2.3517 +		pfm_sessions.pfs_ptrace_use_dbregs++;
  2.3518 +
  2.3519 +	DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
  2.3520 +		  pfm_sessions.pfs_ptrace_use_dbregs,
  2.3521 +		  pfm_sessions.pfs_sys_use_dbregs,
  2.3522 +		  task->pid, ret));
  2.3523 +
  2.3524 +	UNLOCK_PFS(flags);
  2.3525 +
  2.3526 +	return ret;
  2.3527 +}
  2.3528 +
  2.3529 +/*
  2.3530 + * This function is called for every task that exits with the
  2.3531 + * IA64_THREAD_DBG_VALID set. This indicates a task which was
  2.3532 + * able to use the debug registers for debugging purposes via
  2.3533 + * ptrace(). Therefore we know it was not using them for
  2.3534 + * perfmormance monitoring, so we only decrement the number
  2.3535 + * of "ptraced" debug register users to keep the count up to date
  2.3536 + */
  2.3537 +int
  2.3538 +pfm_release_debug_registers(struct task_struct *task)
  2.3539 +{
  2.3540 +	unsigned long flags;
  2.3541 +	int ret;
  2.3542 +
  2.3543 +	if (pmu_conf->use_rr_dbregs == 0) return 0;
  2.3544 +
  2.3545 +	LOCK_PFS(flags);
  2.3546 +	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
  2.3547 +		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
  2.3548 +		ret = -1;
  2.3549 +	}  else {
  2.3550 +		pfm_sessions.pfs_ptrace_use_dbregs--;
  2.3551 +		ret = 0;
  2.3552 +	}
  2.3553 +	UNLOCK_PFS(flags);
  2.3554 +
  2.3555 +	return ret;
  2.3556 +}
  2.3557 +
  2.3558 +static int
  2.3559 +pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.3560 +{
  2.3561 +	struct task_struct *task;
  2.3562 +	pfm_buffer_fmt_t *fmt;
  2.3563 +	pfm_ovfl_ctrl_t rst_ctrl;
  2.3564 +	int state, is_system;
  2.3565 +	int ret = 0;
  2.3566 +
  2.3567 +	state     = ctx->ctx_state;
  2.3568 +	fmt       = ctx->ctx_buf_fmt;
  2.3569 +	is_system = ctx->ctx_fl_system;
  2.3570 +	task      = PFM_CTX_TASK(ctx);
  2.3571 +
  2.3572 +	switch(state) {
  2.3573 +		case PFM_CTX_MASKED:
  2.3574 +			break;
  2.3575 +		case PFM_CTX_LOADED: 
  2.3576 +			if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break;
  2.3577 +			/* fall through */
  2.3578 +		case PFM_CTX_UNLOADED:
  2.3579 +		case PFM_CTX_ZOMBIE:
  2.3580 +			DPRINT(("invalid state=%d\n", state));
  2.3581 +			return -EBUSY;
  2.3582 +		default:
  2.3583 +			DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state));
  2.3584 +			return -EINVAL;
  2.3585 +	}
  2.3586 +
  2.3587 +	/*
  2.3588 + 	 * In system wide and when the context is loaded, access can only happen
  2.3589 + 	 * when the caller is running on the CPU being monitored by the session.
  2.3590 + 	 * It does not have to be the owner (ctx_task) of the context per se.
  2.3591 + 	 */
  2.3592 +	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
  2.3593 +		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
  2.3594 +		return -EBUSY;
  2.3595 +	}
  2.3596 +
  2.3597 +	/* sanity check */
  2.3598 +	if (unlikely(task == NULL)) {
  2.3599 +		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
  2.3600 +		return -EINVAL;
  2.3601 +	}
  2.3602 +
  2.3603 +	if (task == current || is_system) {
  2.3604 +
  2.3605 +		fmt = ctx->ctx_buf_fmt;
  2.3606 +
  2.3607 +		DPRINT(("restarting self %d ovfl=0x%lx\n",
  2.3608 +			task->pid,
  2.3609 +			ctx->ctx_ovfl_regs[0]));
  2.3610 +
  2.3611 +		if (CTX_HAS_SMPL(ctx)) {
  2.3612 +
  2.3613 +			prefetch(ctx->ctx_smpl_hdr);
  2.3614 +
  2.3615 +			rst_ctrl.bits.mask_monitoring = 0;
  2.3616 +			rst_ctrl.bits.reset_ovfl_pmds = 0;
  2.3617 +
  2.3618 +			if (state == PFM_CTX_LOADED)
  2.3619 +				ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
  2.3620 +			else
  2.3621 +				ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
  2.3622 +		} else {
  2.3623 +			rst_ctrl.bits.mask_monitoring = 0;
  2.3624 +			rst_ctrl.bits.reset_ovfl_pmds = 1;
  2.3625 +		}
  2.3626 +
  2.3627 +		if (ret == 0) {
  2.3628 +			if (rst_ctrl.bits.reset_ovfl_pmds)
  2.3629 +				pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
  2.3630 +
  2.3631 +			if (rst_ctrl.bits.mask_monitoring == 0) {
  2.3632 +				DPRINT(("resuming monitoring for [%d]\n", task->pid));
  2.3633 +
  2.3634 +				if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
  2.3635 +			} else {
  2.3636 +				DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
  2.3637 +
  2.3638 +				// cannot use pfm_stop_monitoring(task, regs);
  2.3639 +			}
  2.3640 +		}
  2.3641 +		/*
  2.3642 +		 * clear overflowed PMD mask to remove any stale information
  2.3643 +		 */
  2.3644 +		ctx->ctx_ovfl_regs[0] = 0UL;
  2.3645 +
  2.3646 +		/*
  2.3647 +		 * back to LOADED state
  2.3648 +		 */
  2.3649 +		ctx->ctx_state = PFM_CTX_LOADED;
  2.3650 +
  2.3651 +		/*
  2.3652 +		 * XXX: not really useful for self monitoring
  2.3653 +		 */
  2.3654 +		ctx->ctx_fl_can_restart = 0;
  2.3655 +
  2.3656 +		return 0;
  2.3657 +	}
  2.3658 +
  2.3659 +	/* 
  2.3660 +	 * restart another task
  2.3661 +	 */
  2.3662 +
  2.3663 +	/*
  2.3664 +	 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 
  2.3665 +	 * one is seen by the task.
  2.3666 +	 */
  2.3667 +	if (state == PFM_CTX_MASKED) {
  2.3668 +		if (ctx->ctx_fl_can_restart == 0) return -EINVAL;
  2.3669 +		/*
  2.3670 +		 * will prevent subsequent restart before this one is
  2.3671 +		 * seen by other task
  2.3672 +		 */
  2.3673 +		ctx->ctx_fl_can_restart = 0;
  2.3674 +	}
  2.3675 +
  2.3676 +	/*
  2.3677 +	 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e.
  2.3678 +	 * the task is blocked or on its way to block. That's the normal
  2.3679 +	 * restart path. If the monitoring is not masked, then the task
  2.3680 +	 * can be actively monitoring and we cannot directly intervene.
  2.3681 +	 * Therefore we use the trap mechanism to catch the task and
  2.3682 +	 * force it to reset the buffer/reset PMDs.
  2.3683 +	 *
  2.3684 +	 * if non-blocking, then we ensure that the task will go into
  2.3685 +	 * pfm_handle_work() before returning to user mode.
  2.3686 +	 *
  2.3687 +	 * We cannot explicitely reset another task, it MUST always
  2.3688 +	 * be done by the task itself. This works for system wide because
  2.3689 +	 * the tool that is controlling the session is logically doing 
  2.3690 +	 * "self-monitoring".
  2.3691 +	 */
  2.3692 +	if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
  2.3693 +		DPRINT(("unblocking [%d] \n", task->pid));
  2.3694 +		complete(&ctx->ctx_restart_done);
  2.3695 +	} else {
  2.3696 +		DPRINT(("[%d] armed exit trap\n", task->pid));
  2.3697 +
  2.3698 +		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
  2.3699 +
  2.3700 +		PFM_SET_WORK_PENDING(task, 1);
  2.3701 +
  2.3702 +		pfm_set_task_notify(task);
  2.3703 +
  2.3704 +		/*
  2.3705 +		 * XXX: send reschedule if task runs on another CPU
  2.3706 +		 */
  2.3707 +	}
  2.3708 +	return 0;
  2.3709 +}
  2.3710 +
  2.3711 +static int
  2.3712 +pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.3713 +{
  2.3714 +	unsigned int m = *(unsigned int *)arg;
  2.3715 +
  2.3716 +	pfm_sysctl.debug = m == 0 ? 0 : 1;
  2.3717 +
  2.3718 +	printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
  2.3719 +
  2.3720 +	if (m == 0) {
  2.3721 +		memset(pfm_stats, 0, sizeof(pfm_stats));
  2.3722 +		for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL;
  2.3723 +	}
  2.3724 +	return 0;
  2.3725 +}
  2.3726 +
  2.3727 +/*
  2.3728 + * arg can be NULL and count can be zero for this function
  2.3729 + */
  2.3730 +static int
  2.3731 +pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.3732 +{
  2.3733 +	struct thread_struct *thread = NULL;
  2.3734 +	struct task_struct *task;
  2.3735 +	pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
  2.3736 +	unsigned long flags;
  2.3737 +	dbreg_t dbreg;
  2.3738 +	unsigned int rnum;
  2.3739 +	int first_time;
  2.3740 +	int ret = 0, state;
  2.3741 +	int i, can_access_pmu = 0;
  2.3742 +	int is_system, is_loaded;
  2.3743 +
  2.3744 +	if (pmu_conf->use_rr_dbregs == 0) return -EINVAL;
  2.3745 +
  2.3746 +	state     = ctx->ctx_state;
  2.3747 +	is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
  2.3748 +	is_system = ctx->ctx_fl_system;
  2.3749 +	task      = ctx->ctx_task;
  2.3750 +
  2.3751 +	if (state == PFM_CTX_ZOMBIE) return -EINVAL;
  2.3752 +
  2.3753 +	/*
  2.3754 +	 * on both UP and SMP, we can only write to the PMC when the task is
  2.3755 +	 * the owner of the local PMU.
  2.3756 +	 */
  2.3757 +	if (is_loaded) {
  2.3758 +		thread = &task->thread;
  2.3759 +		/*
  2.3760 +		 * In system wide and when the context is loaded, access can only happen
  2.3761 +		 * when the caller is running on the CPU being monitored by the session.
  2.3762 +		 * It does not have to be the owner (ctx_task) of the context per se.
  2.3763 +		 */
  2.3764 +		if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
  2.3765 +			DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
  2.3766 +			return -EBUSY;
  2.3767 +		}
  2.3768 +		can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
  2.3769 +	}
  2.3770 +
  2.3771 +	/*
  2.3772 +	 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
  2.3773 +	 * ensuring that no real breakpoint can be installed via this call.
  2.3774 +	 *
  2.3775 +	 * IMPORTANT: regs can be NULL in this function
  2.3776 +	 */
  2.3777 +
  2.3778 +	first_time = ctx->ctx_fl_using_dbreg == 0;
  2.3779 +
  2.3780 +	/*
  2.3781 +	 * don't bother if we are loaded and task is being debugged
  2.3782 +	 */
  2.3783 +	if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
  2.3784 +		DPRINT(("debug registers already in use for [%d]\n", task->pid));
  2.3785 +		return -EBUSY;
  2.3786 +	}
  2.3787 +
  2.3788 +	/*
  2.3789 +	 * check for debug registers in system wide mode
  2.3790 +	 *
  2.3791 +	 * If though a check is done in pfm_context_load(),
  2.3792 +	 * we must repeat it here, in case the registers are
  2.3793 +	 * written after the context is loaded
  2.3794 +	 */
  2.3795 +	if (is_loaded) {
  2.3796 +		LOCK_PFS(flags);
  2.3797 +
  2.3798 +		if (first_time && is_system) {
  2.3799 +			if (pfm_sessions.pfs_ptrace_use_dbregs)
  2.3800 +				ret = -EBUSY;
  2.3801 +			else
  2.3802 +				pfm_sessions.pfs_sys_use_dbregs++;
  2.3803 +		}
  2.3804 +		UNLOCK_PFS(flags);
  2.3805 +	}
  2.3806 +
  2.3807 +	if (ret != 0) return ret;
  2.3808 +
  2.3809 +	/*
  2.3810 +	 * mark ourself as user of the debug registers for
  2.3811 +	 * perfmon purposes.
  2.3812 +	 */
  2.3813 +	ctx->ctx_fl_using_dbreg = 1;
  2.3814 +
  2.3815 +	/*
  2.3816 + 	 * clear hardware registers to make sure we don't
  2.3817 + 	 * pick up stale state.
  2.3818 +	 *
  2.3819 +	 * for a system wide session, we do not use
  2.3820 +	 * thread.dbr, thread.ibr because this process
  2.3821 +	 * never leaves the current CPU and the state
  2.3822 +	 * is shared by all processes running on it
  2.3823 + 	 */
  2.3824 +	if (first_time && can_access_pmu) {
  2.3825 +		DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
  2.3826 +		for (i=0; i < pmu_conf->num_ibrs; i++) {
  2.3827 +			ia64_set_ibr(i, 0UL);
  2.3828 +			ia64_dv_serialize_instruction();
  2.3829 +		}
  2.3830 +		ia64_srlz_i();
  2.3831 +		for (i=0; i < pmu_conf->num_dbrs; i++) {
  2.3832 +			ia64_set_dbr(i, 0UL);
  2.3833 +			ia64_dv_serialize_data();
  2.3834 +		}
  2.3835 +		ia64_srlz_d();
  2.3836 +	}
  2.3837 +
  2.3838 +	/*
  2.3839 +	 * Now install the values into the registers
  2.3840 +	 */
  2.3841 +	for (i = 0; i < count; i++, req++) {
  2.3842 +
  2.3843 +		rnum      = req->dbreg_num;
  2.3844 +		dbreg.val = req->dbreg_value;
  2.3845 +
  2.3846 +		ret = -EINVAL;
  2.3847 +
  2.3848 +		if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) {
  2.3849 +			DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
  2.3850 +				  rnum, dbreg.val, mode, i, count));
  2.3851 +
  2.3852 +			goto abort_mission;
  2.3853 +		}
  2.3854 +
  2.3855 +		/*
  2.3856 +		 * make sure we do not install enabled breakpoint
  2.3857 +		 */
  2.3858 +		if (rnum & 0x1) {
  2.3859 +			if (mode == PFM_CODE_RR)
  2.3860 +				dbreg.ibr.ibr_x = 0;
  2.3861 +			else
  2.3862 +				dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
  2.3863 +		}
  2.3864 +
  2.3865 +		PFM_REG_RETFLAG_SET(req->dbreg_flags, 0);
  2.3866 +
  2.3867 +		/*
  2.3868 +		 * Debug registers, just like PMC, can only be modified
  2.3869 +		 * by a kernel call. Moreover, perfmon() access to those
  2.3870 +		 * registers are centralized in this routine. The hardware
  2.3871 +		 * does not modify the value of these registers, therefore,
  2.3872 +		 * if we save them as they are written, we can avoid having
  2.3873 +		 * to save them on context switch out. This is made possible
  2.3874 +		 * by the fact that when perfmon uses debug registers, ptrace()
  2.3875 +		 * won't be able to modify them concurrently.
  2.3876 +		 */
  2.3877 +		if (mode == PFM_CODE_RR) {
  2.3878 +			CTX_USED_IBR(ctx, rnum);
  2.3879 +
  2.3880 +			if (can_access_pmu) {
  2.3881 +				ia64_set_ibr(rnum, dbreg.val);
  2.3882 +				ia64_dv_serialize_instruction();
  2.3883 +			}
  2.3884 +
  2.3885 +			ctx->ctx_ibrs[rnum] = dbreg.val;
  2.3886 +
  2.3887 +			DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n",
  2.3888 +				rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu));
  2.3889 +		} else {
  2.3890 +			CTX_USED_DBR(ctx, rnum);
  2.3891 +
  2.3892 +			if (can_access_pmu) {
  2.3893 +				ia64_set_dbr(rnum, dbreg.val);
  2.3894 +				ia64_dv_serialize_data();
  2.3895 +			}
  2.3896 +			ctx->ctx_dbrs[rnum] = dbreg.val;
  2.3897 +
  2.3898 +			DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n",
  2.3899 +				rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu));
  2.3900 +		}
  2.3901 +	}
  2.3902 +
  2.3903 +	return 0;
  2.3904 +
  2.3905 +abort_mission:
  2.3906 +	/*
  2.3907 +	 * in case it was our first attempt, we undo the global modifications
  2.3908 +	 */
  2.3909 +	if (first_time) {
  2.3910 +		LOCK_PFS(flags);
  2.3911 +		if (ctx->ctx_fl_system) {
  2.3912 +			pfm_sessions.pfs_sys_use_dbregs--;
  2.3913 +		}
  2.3914 +		UNLOCK_PFS(flags);
  2.3915 +		ctx->ctx_fl_using_dbreg = 0;
  2.3916 +	}
  2.3917 +	/*
  2.3918 +	 * install error return flag
  2.3919 +	 */
  2.3920 +	PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL);
  2.3921 +
  2.3922 +	return ret;
  2.3923 +}
  2.3924 +
  2.3925 +static int
  2.3926 +pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.3927 +{
  2.3928 +	return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs);
  2.3929 +}
  2.3930 +
  2.3931 +static int
  2.3932 +pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.3933 +{
  2.3934 +	return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs);
  2.3935 +}
  2.3936 +
  2.3937 +int
  2.3938 +pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
  2.3939 +{
  2.3940 +	pfm_context_t *ctx;
  2.3941 +
  2.3942 +	if (req == NULL) return -EINVAL;
  2.3943 +
  2.3944 + 	ctx = GET_PMU_CTX();
  2.3945 +
  2.3946 +	if (ctx == NULL) return -EINVAL;
  2.3947 +
  2.3948 +	/*
  2.3949 +	 * for now limit to current task, which is enough when calling
  2.3950 +	 * from overflow handler
  2.3951 +	 */
  2.3952 +	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
  2.3953 +
  2.3954 +	return pfm_write_ibrs(ctx, req, nreq, regs);
  2.3955 +}
  2.3956 +EXPORT_SYMBOL(pfm_mod_write_ibrs);
  2.3957 +
  2.3958 +int
  2.3959 +pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
  2.3960 +{
  2.3961 +	pfm_context_t *ctx;
  2.3962 +
  2.3963 +	if (req == NULL) return -EINVAL;
  2.3964 +
  2.3965 + 	ctx = GET_PMU_CTX();
  2.3966 +
  2.3967 +	if (ctx == NULL) return -EINVAL;
  2.3968 +
  2.3969 +	/*
  2.3970 +	 * for now limit to current task, which is enough when calling
  2.3971 +	 * from overflow handler
  2.3972 +	 */
  2.3973 +	if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
  2.3974 +
  2.3975 +	return pfm_write_dbrs(ctx, req, nreq, regs);
  2.3976 +}
  2.3977 +EXPORT_SYMBOL(pfm_mod_write_dbrs);
  2.3978 +
  2.3979 +
  2.3980 +static int
  2.3981 +pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.3982 +{
  2.3983 +	pfarg_features_t *req = (pfarg_features_t *)arg;
  2.3984 +
  2.3985 +	req->ft_version = PFM_VERSION;
  2.3986 +	return 0;
  2.3987 +}
  2.3988 +
  2.3989 +static int
  2.3990 +pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.3991 +{
  2.3992 +	struct pt_regs *tregs;
  2.3993 +	struct task_struct *task = PFM_CTX_TASK(ctx);
  2.3994 +	int state, is_system;
  2.3995 +
  2.3996 +	state     = ctx->ctx_state;
  2.3997 +	is_system = ctx->ctx_fl_system;
  2.3998 +
  2.3999 +	/*
  2.4000 +	 * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE)
  2.4001 +	 */
  2.4002 +	if (state == PFM_CTX_UNLOADED) return -EINVAL;
  2.4003 +
  2.4004 +	/*
  2.4005 + 	 * In system wide and when the context is loaded, access can only happen
  2.4006 + 	 * when the caller is running on the CPU being monitored by the session.
  2.4007 + 	 * It does not have to be the owner (ctx_task) of the context per se.
  2.4008 + 	 */
  2.4009 +	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
  2.4010 +		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
  2.4011 +		return -EBUSY;
  2.4012 +	}
  2.4013 +	DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
  2.4014 +		PFM_CTX_TASK(ctx)->pid,
  2.4015 +		state,
  2.4016 +		is_system));
  2.4017 +	/*
  2.4018 +	 * in system mode, we need to update the PMU directly
  2.4019 +	 * and the user level state of the caller, which may not
  2.4020 +	 * necessarily be the creator of the context.
  2.4021 +	 */
  2.4022 +	if (is_system) {
  2.4023 +		/*
  2.4024 +		 * Update local PMU first
  2.4025 +		 *
  2.4026 +		 * disable dcr pp
  2.4027 +		 */
  2.4028 +		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
  2.4029 +		ia64_srlz_i();
  2.4030 +
  2.4031 +		/*
  2.4032 +		 * update local cpuinfo
  2.4033 +		 */
  2.4034 +		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
  2.4035 +
  2.4036 +		/*
  2.4037 +		 * stop monitoring, does srlz.i
  2.4038 +		 */
  2.4039 +		pfm_clear_psr_pp();
  2.4040 +
  2.4041 +		/*
  2.4042 +		 * stop monitoring in the caller
  2.4043 +		 */
  2.4044 +		ia64_psr(regs)->pp = 0;
  2.4045 +
  2.4046 +		return 0;
  2.4047 +	}
  2.4048 +	/*
  2.4049 +	 * per-task mode
  2.4050 +	 */
  2.4051 +
  2.4052 +	if (task == current) {
  2.4053 +		/* stop monitoring  at kernel level */
  2.4054 +		pfm_clear_psr_up();
  2.4055 +
  2.4056 +		/*
  2.4057 +	 	 * stop monitoring at the user level
  2.4058 +	 	 */
  2.4059 +		ia64_psr(regs)->up = 0;
  2.4060 +	} else {
  2.4061 +		tregs = task_pt_regs(task);
  2.4062 +
  2.4063 +		/*
  2.4064 +	 	 * stop monitoring at the user level
  2.4065 +	 	 */
  2.4066 +		ia64_psr(tregs)->up = 0;
  2.4067 +
  2.4068 +		/*
  2.4069 +		 * monitoring disabled in kernel at next reschedule
  2.4070 +		 */
  2.4071 +		ctx->ctx_saved_psr_up = 0;
  2.4072 +		DPRINT(("task=[%d]\n", task->pid));
  2.4073 +	}
  2.4074 +	return 0;
  2.4075 +}
  2.4076 +
  2.4077 +
  2.4078 +static int
  2.4079 +pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.4080 +{
  2.4081 +	struct pt_regs *tregs;
  2.4082 +	int state, is_system;
  2.4083 +
  2.4084 +	state     = ctx->ctx_state;
  2.4085 +	is_system = ctx->ctx_fl_system;
  2.4086 +
  2.4087 +	if (state != PFM_CTX_LOADED) return -EINVAL;
  2.4088 +
  2.4089 +	/*
  2.4090 + 	 * In system wide and when the context is loaded, access can only happen
  2.4091 + 	 * when the caller is running on the CPU being monitored by the session.
  2.4092 + 	 * It does not have to be the owner (ctx_task) of the context per se.
  2.4093 + 	 */
  2.4094 +	if (is_system && ctx->ctx_cpu != smp_processor_id()) {
  2.4095 +		DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
  2.4096 +		return -EBUSY;
  2.4097 +	}
  2.4098 +
  2.4099 +	/*
  2.4100 +	 * in system mode, we need to update the PMU directly
  2.4101 +	 * and the user level state of the caller, which may not
  2.4102 +	 * necessarily be the creator of the context.
  2.4103 +	 */
  2.4104 +	if (is_system) {
  2.4105 +
  2.4106 +		/*
  2.4107 +		 * set user level psr.pp for the caller
  2.4108 +		 */
  2.4109 +		ia64_psr(regs)->pp = 1;
  2.4110 +
  2.4111 +		/*
  2.4112 +		 * now update the local PMU and cpuinfo
  2.4113 +		 */
  2.4114 +		PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
  2.4115 +
  2.4116 +		/*
  2.4117 +		 * start monitoring at kernel level
  2.4118 +		 */
  2.4119 +		pfm_set_psr_pp();
  2.4120 +
  2.4121 +		/* enable dcr pp */
  2.4122 +		ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
  2.4123 +		ia64_srlz_i();
  2.4124 +
  2.4125 +		return 0;
  2.4126 +	}
  2.4127 +
  2.4128 +	/*
  2.4129 +	 * per-process mode
  2.4130 +	 */
  2.4131 +
  2.4132 +	if (ctx->ctx_task == current) {
  2.4133 +
  2.4134 +		/* start monitoring at kernel level */
  2.4135 +		pfm_set_psr_up();
  2.4136 +
  2.4137 +		/*
  2.4138 +		 * activate monitoring at user level
  2.4139 +		 */
  2.4140 +		ia64_psr(regs)->up = 1;
  2.4141 +
  2.4142 +	} else {
  2.4143 +		tregs = task_pt_regs(ctx->ctx_task);
  2.4144 +
  2.4145 +		/*
  2.4146 +		 * start monitoring at the kernel level the next
  2.4147 +		 * time the task is scheduled
  2.4148 +		 */
  2.4149 +		ctx->ctx_saved_psr_up = IA64_PSR_UP;
  2.4150 +
  2.4151 +		/*
  2.4152 +		 * activate monitoring at user level
  2.4153 +		 */
  2.4154 +		ia64_psr(tregs)->up = 1;
  2.4155 +	}
  2.4156 +	return 0;
  2.4157 +}
  2.4158 +
  2.4159 +static int
  2.4160 +pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.4161 +{
  2.4162 +	pfarg_reg_t *req = (pfarg_reg_t *)arg;
  2.4163 +	unsigned int cnum;
  2.4164 +	int i;
  2.4165 +	int ret = -EINVAL;
  2.4166 +
  2.4167 +	for (i = 0; i < count; i++, req++) {
  2.4168 +
  2.4169 +		cnum = req->reg_num;
  2.4170 +
  2.4171 +		if (!PMC_IS_IMPL(cnum)) goto abort_mission;
  2.4172 +
  2.4173 +		req->reg_value = PMC_DFL_VAL(cnum);
  2.4174 +
  2.4175 +		PFM_REG_RETFLAG_SET(req->reg_flags, 0);
  2.4176 +
  2.4177 +		DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value));
  2.4178 +	}
  2.4179 +	return 0;
  2.4180 +
  2.4181 +abort_mission:
  2.4182 +	PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
  2.4183 +	return ret;
  2.4184 +}
  2.4185 +
  2.4186 +static int
  2.4187 +pfm_check_task_exist(pfm_context_t *ctx)
  2.4188 +{
  2.4189 +	struct task_struct *g, *t;
  2.4190 +	int ret = -ESRCH;
  2.4191 +
  2.4192 +	read_lock(&tasklist_lock);
  2.4193 +
  2.4194 +	do_each_thread (g, t) {
  2.4195 +		if (t->thread.pfm_context == ctx) {
  2.4196 +			ret = 0;
  2.4197 +			break;
  2.4198 +		}
  2.4199 +	} while_each_thread (g, t);
  2.4200 +
  2.4201 +	read_unlock(&tasklist_lock);
  2.4202 +
  2.4203 +	DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
  2.4204 +
  2.4205 +	return ret;
  2.4206 +}
  2.4207 +
  2.4208 +static int
  2.4209 +pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.4210 +{
  2.4211 +	struct task_struct *task;
  2.4212 +	struct thread_struct *thread;
  2.4213 +	struct pfm_context_t *old;
  2.4214 +	unsigned long flags;
  2.4215 +#ifndef CONFIG_SMP
  2.4216 +	struct task_struct *owner_task = NULL;
  2.4217 +#endif
  2.4218 +	pfarg_load_t *req = (pfarg_load_t *)arg;
  2.4219 +	unsigned long *pmcs_source, *pmds_source;
  2.4220 +	int the_cpu;
  2.4221 +	int ret = 0;
  2.4222 +	int state, is_system, set_dbregs = 0;
  2.4223 +
  2.4224 +	state     = ctx->ctx_state;
  2.4225 +	is_system = ctx->ctx_fl_system;
  2.4226 +	/*
  2.4227 +	 * can only load from unloaded or terminated state
  2.4228 +	 */
  2.4229 +	if (state != PFM_CTX_UNLOADED) {
  2.4230 +		DPRINT(("cannot load to [%d], invalid ctx_state=%d\n",
  2.4231 +			req->load_pid,
  2.4232 +			ctx->ctx_state));
  2.4233 +		return -EBUSY;
  2.4234 +	}
  2.4235 +
  2.4236 +	DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));
  2.4237 +
  2.4238 +	if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) {
  2.4239 +		DPRINT(("cannot use blocking mode on self\n"));
  2.4240 +		return -EINVAL;
  2.4241 +	}
  2.4242 +
  2.4243 +	ret = pfm_get_task(ctx, req->load_pid, &task);
  2.4244 +	if (ret) {
  2.4245 +		DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret));
  2.4246 +		return ret;
  2.4247 +	}
  2.4248 +
  2.4249 +	ret = -EINVAL;
  2.4250 +
  2.4251 +	/*
  2.4252 +	 * system wide is self monitoring only
  2.4253 +	 */
  2.4254 +	if (is_system && task != current) {
  2.4255 +		DPRINT(("system wide is self monitoring only load_pid=%d\n",
  2.4256 +			req->load_pid));
  2.4257 +		goto error;
  2.4258 +	}
  2.4259 +
  2.4260 +	thread = &task->thread;
  2.4261 +
  2.4262 +	ret = 0;
  2.4263 +	/*
  2.4264 +	 * cannot load a context which is using range restrictions,
  2.4265 +	 * into a task that is being debugged.
  2.4266 +	 */
  2.4267 +	if (ctx->ctx_fl_using_dbreg) {
  2.4268 +		if (thread->flags & IA64_THREAD_DBG_VALID) {
  2.4269 +			ret = -EBUSY;
  2.4270 +			DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid));
  2.4271 +			goto error;
  2.4272 +		}
  2.4273 +		LOCK_PFS(flags);
  2.4274 +
  2.4275 +		if (is_system) {
  2.4276 +			if (pfm_sessions.pfs_ptrace_use_dbregs) {
  2.4277 +				DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
  2.4278 +				ret = -EBUSY;
  2.4279 +			} else {
  2.4280 +				pfm_sessions.pfs_sys_use_dbregs++;
  2.4281 +				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
  2.4282 +				set_dbregs = 1;
  2.4283 +			}
  2.4284 +		}
  2.4285 +
  2.4286 +		UNLOCK_PFS(flags);
  2.4287 +
  2.4288 +		if (ret) goto error;
  2.4289 +	}
  2.4290 +
  2.4291 +	/*
  2.4292 +	 * SMP system-wide monitoring implies self-monitoring.
  2.4293 +	 *
  2.4294 +	 * The programming model expects the task to
  2.4295 +	 * be pinned on a CPU throughout the session.
  2.4296 +	 * Here we take note of the current CPU at the
  2.4297 +	 * time the context is loaded. No call from
  2.4298 +	 * another CPU will be allowed.
  2.4299 +	 *
  2.4300 +	 * The pinning via shed_setaffinity()
  2.4301 +	 * must be done by the calling task prior
  2.4302 +	 * to this call.
  2.4303 +	 *
  2.4304 +	 * systemwide: keep track of CPU this session is supposed to run on
  2.4305 +	 */
  2.4306 +	the_cpu = ctx->ctx_cpu = smp_processor_id();
  2.4307 +
  2.4308 +	ret = -EBUSY;
  2.4309 +	/*
  2.4310 +	 * now reserve the session
  2.4311 +	 */
  2.4312 +	ret = pfm_reserve_session(current, is_system, the_cpu);
  2.4313 +	if (ret) goto error;
  2.4314 +
  2.4315 +	/*
  2.4316 +	 * task is necessarily stopped at this point.
  2.4317 +	 *
  2.4318 +	 * If the previous context was zombie, then it got removed in
  2.4319 +	 * pfm_save_regs(). Therefore we should not see it here.
  2.4320 +	 * If we see a context, then this is an active context
  2.4321 +	 *
  2.4322 +	 * XXX: needs to be atomic
  2.4323 +	 */
  2.4324 +	DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
  2.4325 +		thread->pfm_context, ctx));
  2.4326 +
  2.4327 +	ret = -EBUSY;
  2.4328 +	old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
  2.4329 +	if (old != NULL) {
  2.4330 +		DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
  2.4331 +		goto error_unres;
  2.4332 +	}
  2.4333 +
  2.4334 +	pfm_reset_msgq(ctx);
  2.4335 +
  2.4336 +	ctx->ctx_state = PFM_CTX_LOADED;
  2.4337 +
  2.4338 +	/*
  2.4339 +	 * link context to task
  2.4340 +	 */
  2.4341 +	ctx->ctx_task = task;
  2.4342 +
  2.4343 +	if (is_system) {
  2.4344 +		/*
  2.4345 +		 * we load as stopped
  2.4346 +		 */
  2.4347 +		PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
  2.4348 +		PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
  2.4349 +
  2.4350 +		if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
  2.4351 +	} else {
  2.4352 +		thread->flags |= IA64_THREAD_PM_VALID;
  2.4353 +	}
  2.4354 +
  2.4355 +	/*
  2.4356 +	 * propagate into thread-state
  2.4357 +	 */
  2.4358 +	pfm_copy_pmds(task, ctx);
  2.4359 +	pfm_copy_pmcs(task, ctx);
  2.4360 +
  2.4361 +	pmcs_source = thread->pmcs;
  2.4362 +	pmds_source = thread->pmds;
  2.4363 +
  2.4364 +	/*
  2.4365 +	 * always the case for system-wide
  2.4366 +	 */
  2.4367 +	if (task == current) {
  2.4368 +
  2.4369 +		if (is_system == 0) {
  2.4370 +
  2.4371 +			/* allow user level control */
  2.4372 +			ia64_psr(regs)->sp = 0;
  2.4373 +			DPRINT(("clearing psr.sp for [%d]\n", task->pid));
  2.4374 +
  2.4375 +			SET_LAST_CPU(ctx, smp_processor_id());
  2.4376 +			INC_ACTIVATION();
  2.4377 +			SET_ACTIVATION(ctx);
  2.4378 +#ifndef CONFIG_SMP
  2.4379 +			/*
  2.4380 +			 * push the other task out, if any
  2.4381 +			 */
  2.4382 +			owner_task = GET_PMU_OWNER();
  2.4383 +			if (owner_task) pfm_lazy_save_regs(owner_task);
  2.4384 +#endif
  2.4385 +		}
  2.4386 +		/*
  2.4387 +		 * load all PMD from ctx to PMU (as opposed to thread state)
  2.4388 +		 * restore all PMC from ctx to PMU
  2.4389 +		 */
  2.4390 +		pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]);
  2.4391 +		pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]);
  2.4392 +
  2.4393 +		ctx->ctx_reload_pmcs[0] = 0UL;
  2.4394 +		ctx->ctx_reload_pmds[0] = 0UL;
  2.4395 +
  2.4396 +		/*
  2.4397 +		 * guaranteed safe by earlier check against DBG_VALID
  2.4398 +		 */
  2.4399 +		if (ctx->ctx_fl_using_dbreg) {
  2.4400 +			pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
  2.4401 +			pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
  2.4402 +		}
  2.4403 +		/*
  2.4404 +		 * set new ownership
  2.4405 +		 */
  2.4406 +		SET_PMU_OWNER(task, ctx);
  2.4407 +
  2.4408 +		DPRINT(("context loaded on PMU for [%d]\n", task->pid));
  2.4409 +	} else {
  2.4410 +		/*
  2.4411 +		 * when not current, task MUST be stopped, so this is safe
  2.4412 +		 */
  2.4413 +		regs = task_pt_regs(task);
  2.4414 +
  2.4415 +		/* force a full reload */
  2.4416 +		ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
  2.4417 +		SET_LAST_CPU(ctx, -1);
  2.4418 +
  2.4419 +		/* initial saved psr (stopped) */
  2.4420 +		ctx->ctx_saved_psr_up = 0UL;
  2.4421 +		ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
  2.4422 +	}
  2.4423 +
  2.4424 +	ret = 0;
  2.4425 +
  2.4426 +error_unres:
  2.4427 +	if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu);
  2.4428 +error:
  2.4429 +	/*
  2.4430 +	 * we must undo the dbregs setting (for system-wide)
  2.4431 +	 */
  2.4432 +	if (ret && set_dbregs) {
  2.4433 +		LOCK_PFS(flags);
  2.4434 +		pfm_sessions.pfs_sys_use_dbregs--;
  2.4435 +		UNLOCK_PFS(flags);
  2.4436 +	}
  2.4437 +	/*
  2.4438 +	 * release task, there is now a link with the context
  2.4439 +	 */
  2.4440 +	if (is_system == 0 && task != current) {
  2.4441 +		pfm_put_task(task);
  2.4442 +
  2.4443 +		if (ret == 0) {
  2.4444 +			ret = pfm_check_task_exist(ctx);
  2.4445 +			if (ret) {
  2.4446 +				ctx->ctx_state = PFM_CTX_UNLOADED;
  2.4447 +				ctx->ctx_task  = NULL;
  2.4448 +			}
  2.4449 +		}
  2.4450 +	}
  2.4451 +	return ret;
  2.4452 +}
  2.4453 +
  2.4454 +/*
  2.4455 + * in this function, we do not need to increase the use count
  2.4456 + * for the task via get_task_struct(), because we hold the
  2.4457 + * context lock. If the task were to disappear while having
  2.4458 + * a context attached, it would go through pfm_exit_thread()
  2.4459 + * which also grabs the context lock  and would therefore be blocked
  2.4460 + * until we are here.
  2.4461 + */
  2.4462 +static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx);
  2.4463 +
  2.4464 +static int
  2.4465 +pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
  2.4466 +{
  2.4467 +	struct task_struct *task = PFM_CTX_TASK(ctx);
  2.4468 +	struct pt_regs *tregs;
  2.4469 +	int prev_state, is_system;
  2.4470 +	int ret;
  2.4471 +
  2.4472 +	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
  2.4473 +
  2.4474 +	prev_state = ctx->ctx_state;
  2.4475 +	is_system  = ctx->ctx_fl_system;
  2.4476 +
  2.4477 +	/*
  2.4478 +	 * unload only when necessary
  2.4479 +	 */
  2.4480 +	if (prev_state == PFM_CTX_UNLOADED) {
  2.4481 +		DPRINT(("ctx_state=%d, nothing to do\n", prev_state));
  2.4482 +		return 0;
  2.4483 +	}
  2.4484 +
  2.4485 +	/*
  2.4486 +	 * clear psr and dcr bits
  2.4487 +	 */
  2.4488 +	ret = pfm_stop(ctx, NULL, 0, regs);
  2.4489 +	if (ret) return ret;
  2.4490 +
  2.4491 +	ctx->ctx_state = PFM_CTX_UNLOADED;
  2.4492 +
  2.4493 +	/*
  2.4494 +	 * in system mode, we need to update the PMU directly
  2.4495 +	 * and the user level state of the caller, which may not
  2.4496 +	 * necessarily be the creator of the context.
  2.4497 +	 */
  2.4498 +	if (is_system) {
  2.4499 +
  2.4500 +		/*
  2.4501 +		 * Update cpuinfo
  2.4502 +		 *
  2.4503 +		 * local PMU is taken care of in pfm_stop()
  2.4504 +		 */
  2.4505 +		PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
  2.4506 +		PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
  2.4507 +
  2.4508 +		/*
  2.4509 +		 * save PMDs in context
  2.4510 +		 * release ownership
  2.4511 +		 */
  2.4512 +		pfm_flush_pmds(current, ctx);
  2.4513 +
  2.4514 +		/*
  2.4515 +		 * at this point we are done with the PMU
  2.4516 +		 * so we can unreserve the resource.
  2.4517 +		 */
  2.4518 +		if (prev_state != PFM_CTX_ZOMBIE) 
  2.4519 +			pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu);
  2.4520 +
  2.4521 +		/*
  2.4522 +		 * disconnect context from task
  2.4523 +		 */
  2.4524 +		task->thread.pfm_context = NULL;
  2.4525 +		/*
  2.4526 +		 * disconnect task from context
  2.4527 +		 */
  2.4528 +		ctx->ctx_task = NULL;
  2.4529 +
  2.4530 +		/*
  2.4531 +		 * There is nothing more to cleanup here.
  2.4532 +		 */
  2.4533 +		return 0;
  2.4534 +	}
  2.4535 +
  2.4536 +	/*
  2.4537 +	 * per-task mode
  2.4538 +	 */
  2.4539 +	tregs = task == current ? regs : task_pt_regs(task);
  2.4540 +
  2.4541 +	if (task == current) {
  2.4542 +		/*
  2.4543 +		 * cancel user level control
  2.4544 +		 */
  2.4545 +		ia64_psr(regs)->sp = 1;
  2.4546 +
  2.4547 +		DPRINT(("setting psr.sp for [%d]\n", task->pid));
  2.4548 +	}
  2.4549 +	/*
  2.4550 +	 * save PMDs to context
  2.4551 +	 * release ownership
  2.4552 +	 */
  2.4553 +	pfm_flush_pmds(task, ctx);
  2.4554 +
  2.4555 +	/*
  2.4556 +	 * at this point we are done with the PMU
  2.4557 +	 * so we can unreserve the resource.
  2.4558 +	 *
  2.4559 +	 * when state was ZOMBIE, we have already unreserved.
  2.4560 +	 */
  2.4561 +	if (prev_state != PFM_CTX_ZOMBIE) 
  2.4562 +		pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu);
  2.4563 +
  2.4564 +	/*
  2.4565 +	 * reset activation counter and psr
  2.4566 +	 */
  2.4567 +	ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
  2.4568 +	SET_LAST_CPU(ctx, -1);
  2.4569 +
  2.4570 +	/*
  2.4571 +	 * PMU state will not be restored
  2.4572 +	 */
  2.4573 +	task->thread.flags &= ~IA64_THREAD_PM_VALID;
  2.4574 +
  2.4575 +	/*
  2.4576 +	 * break links between context and task
  2.4577 +	 */
  2.4578 +	task->thread.pfm_context  = NULL;
  2.4579 +	ctx->ctx_task             = NULL;
  2.4580 +
  2.4581 +	PFM_SET_WORK_PENDING(task, 0);
  2.4582 +
  2.4583 +	ctx->ctx_fl_trap_reason  = PFM_TRAP_REASON_NONE;
  2.4584 +	ctx->ctx_fl_can_restart  = 0;
  2.4585 +	ctx->ctx_fl_going_zombie = 0;
  2.4586 +
  2.4587 +	DPRINT(("disconnected [%d] from context\n", task->pid));
  2.4588 +
  2.4589 +	return 0;
  2.4590 +}
  2.4591 +
  2.4592 +
  2.4593 +/*
  2.4594 + * called only from exit_thread(): task == current
  2.4595 + * we come here only if current has a context attached (loaded or masked)
  2.4596 + */
  2.4597 +void
  2.4598 +pfm_exit_thread(struct task_struct *task)
  2.4599 +{
  2.4600 +	pfm_context_t *ctx;
  2.4601 +	unsigned long flags;
  2.4602 +	struct pt_regs *regs = task_pt_regs(task);
  2.4603 +	int ret, state;
  2.4604 +	int free_ok = 0;
  2.4605 +
  2.4606 +	ctx = PFM_GET_CTX(task);
  2.4607 +
  2.4608 +	PROTECT_CTX(ctx, flags);
  2.4609 +
  2.4610 +	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
  2.4611 +
  2.4612 +	state = ctx->ctx_state;
  2.4613 +	switch(state) {
  2.4614 +		case PFM_CTX_UNLOADED:
  2.4615 +			/*
  2.4616 +	 		 * only comes to thios function if pfm_context is not NULL, i.e., cannot
  2.4617 +			 * be in unloaded state
  2.4618 +	 		 */
  2.4619 +			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
  2.4620 +			break;
  2.4621 +		case PFM_CTX_LOADED:
  2.4622 +		case PFM_CTX_MASKED:
  2.4623 +			ret = pfm_context_unload(ctx, NULL, 0, regs);
  2.4624 +			if (ret) {
  2.4625 +				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
  2.4626 +			}
  2.4627 +			DPRINT(("ctx unloaded for current state was %d\n", state));
  2.4628 +
  2.4629 +			pfm_end_notify_user(ctx);
  2.4630 +			break;
  2.4631 +		case PFM_CTX_ZOMBIE:
  2.4632 +			ret = pfm_context_unload(ctx, NULL, 0, regs);
  2.4633 +			if (ret) {
  2.4634 +				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
  2.4635 +			}
  2.4636 +			free_ok = 1;
  2.4637 +			break;
  2.4638 +		default:
  2.4639 +			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
  2.4640 +			break;
  2.4641 +	}
  2.4642 +	UNPROTECT_CTX(ctx, flags);
  2.4643 +
  2.4644 +	{ u64 psr = pfm_get_psr();
  2.4645 +	  BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
  2.4646 +	  BUG_ON(GET_PMU_OWNER());
  2.4647 +	  BUG_ON(ia64_psr(regs)->up);
  2.4648 +	  BUG_ON(ia64_psr(regs)->pp);
  2.4649 +	}
  2.4650 +
  2.4651 +	/*
  2.4652 +	 * All memory free operations (especially for vmalloc'ed memory)
  2.4653 +	 * MUST be done with interrupts ENABLED.
  2.4654 +	 */
  2.4655 +	if (free_ok) pfm_context_free(ctx);
  2.4656 +}
  2.4657 +
  2.4658 +/*
  2.4659 + * functions MUST be listed in the increasing order of their index (see permfon.h)
  2.4660 + */
  2.4661 +#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz }
  2.4662 +#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL }
  2.4663 +#define PFM_CMD_PCLRWS	(PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP)
  2.4664 +#define PFM_CMD_PCLRW	(PFM_CMD_FD|PFM_CMD_ARG_RW)
  2.4665 +#define PFM_CMD_NONE	{ NULL, "no-cmd", 0, 0, 0, NULL}
  2.4666 +
  2.4667 +static pfm_cmd_desc_t pfm_cmd_tab[]={
  2.4668 +/* 0  */PFM_CMD_NONE,
  2.4669 +/* 1  */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
  2.4670 +/* 2  */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
  2.4671 +/* 3  */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
  2.4672 +/* 4  */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS),
  2.4673 +/* 5  */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS),
  2.4674 +/* 6  */PFM_CMD_NONE,
  2.4675 +/* 7  */PFM_CMD_NONE,
  2.4676 +/* 8  */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize),
  2.4677 +/* 9  */PFM_CMD_NONE,
  2.4678 +/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW),
  2.4679 +/* 11 */PFM_CMD_NONE,
  2.4680 +/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL),
  2.4681 +/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL),
  2.4682 +/* 14 */PFM_CMD_NONE,
  2.4683 +/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
  2.4684 +/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL),
  2.4685 +/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS),
  2.4686 +/* 18 */PFM_CMD_NONE,
  2.4687 +/* 19 */PFM_CMD_NONE,
  2.4688 +/* 20 */PFM_CMD_NONE,
  2.4689 +/* 21 */PFM_CMD_NONE,
  2.4690 +/* 22 */PFM_CMD_NONE,
  2.4691 +/* 23 */PFM_CMD_NONE,
  2.4692 +/* 24 */PFM_CMD_NONE,
  2.4693 +/* 25 */PFM_CMD_NONE,
  2.4694 +/* 26 */PFM_CMD_NONE,
  2.4695 +/* 27 */PFM_CMD_NONE,
  2.4696 +/* 28 */PFM_CMD_NONE,
  2.4697 +/* 29 */PFM_CMD_NONE,
  2.4698 +/* 30 */PFM_CMD_NONE,
  2.4699 +/* 31 */PFM_CMD_NONE,
  2.4700 +/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL),
  2.4701 +/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL)
  2.4702 +};
  2.4703 +#define PFM_CMD_COUNT	(sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
  2.4704 +
  2.4705 +static int
  2.4706 +pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
  2.4707 +{
  2.4708 +	struct task_struct *task;
  2.4709 +	int state, old_state;
  2.4710 +
  2.4711 +recheck:
  2.4712 +	state = ctx->ctx_state;
  2.4713 +	task  = ctx->ctx_task;
  2.4714 +
  2.4715 +	if (task == NULL) {
  2.4716 +		DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
  2.4717 +		return 0;
  2.4718 +	}
  2.4719 +
  2.4720 +	DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
  2.4721 +		ctx->ctx_fd,
  2.4722 +		state,
  2.4723 +		task->pid,
  2.4724 +		task->state, PFM_CMD_STOPPED(cmd)));
  2.4725 +
  2.4726 +	/*
  2.4727 +	 * self-monitoring always ok.
  2.4728 +	 *
  2.4729 +	 * for system-wide the caller can either be the creator of the
  2.4730 +	 * context (to one to which the context is attached to) OR
  2.4731 +	 * a task running on the same CPU as the session.
  2.4732 +	 */
  2.4733 +	if (task == current || ctx->ctx_fl_system) return 0;
  2.4734 +
  2.4735 +	/*
  2.4736 +	 * we are monitoring another thread
  2.4737 +	 */
  2.4738 +	switch(state) {
  2.4739 +		case PFM_CTX_UNLOADED:
  2.4740 +			/*
  2.4741 +			 * if context is UNLOADED we are safe to go
  2.4742 +			 */
  2.4743 +			return 0;
  2.4744 +		case PFM_CTX_ZOMBIE:
  2.4745 +			/*
  2.4746 +			 * no command can operate on a zombie context
  2.4747 +			 */
  2.4748 +			DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
  2.4749 +			return -EINVAL;
  2.4750 +		case PFM_CTX_MASKED:
  2.4751 +			/*
  2.4752 +			 * PMU state has been saved to software even though
  2.4753 +			 * the thread may still be running.
  2.4754 +			 */
  2.4755 +			if (cmd != PFM_UNLOAD_CONTEXT) return 0;
  2.4756 +	}
  2.4757 +
  2.4758 +	/*
  2.4759 +	 * context is LOADED or MASKED. Some commands may need to have 
  2.4760 +	 * the task stopped.
  2.4761 +	 *
  2.4762 +	 * We could lift this restriction for UP but it would mean that
  2.4763 +	 * the user has no guarantee the task would not run between
  2.4764 +	 * two successive calls to perfmonctl(). That's probably OK.
  2.4765 +	 * If this user wants to ensure the task does not run, then
  2.4766 +	 * the task must be stopped.
  2.4767 +	 */
  2.4768 +	if (PFM_CMD_STOPPED(cmd)) {
  2.4769 +		if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
  2.4770 +			DPRINT(("[%d] task not in stopped state\n", task->pid));
  2.4771 +			return -EBUSY;
  2.4772 +		}
  2.4773 +		/*
  2.4774 +		 * task is now stopped, wait for ctxsw out
  2.4775 +		 *
  2.4776 +		 * This is an interesting point in the code.
  2.4777 +		 * We need to unprotect the context because
  2.4778 +		 * the pfm_save_regs() routines needs to grab
  2.4779 +		 * the same lock. There are danger in doing
  2.4780 +		 * this because it leaves a window open for
  2.4781 +		 * another task to get access to the context
  2.4782 +		 * and possibly change its state. The one thing
  2.4783 +		 * that is not possible is for the context to disappear
  2.4784 +		 * because we are protected by the VFS layer, i.e.,
  2.4785 +		 * get_fd()/put_fd().
  2.4786 +		 */
  2.4787 +		old_state = state;
  2.4788 +
  2.4789 +		UNPROTECT_CTX(ctx, flags);
  2.4790 +
  2.4791 +		wait_task_inactive(task);
  2.4792 +
  2.4793 +		PROTECT_CTX(ctx, flags);
  2.4794 +
  2.4795 +		/*
  2.4796 +		 * we must recheck to verify if state has changed
  2.4797 +		 */
  2.4798 +		if (ctx->ctx_state != old_state) {
  2.4799 +			DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state));
  2.4800 +			goto recheck;
  2.4801 +		}
  2.4802 +	}
  2.4803 +	return 0;
  2.4804 +}
  2.4805 +
  2.4806 +/*
  2.4807 + * system-call entry point (must return long)
  2.4808 + */
  2.4809 +asmlinkage long
  2.4810 +sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
  2.4811 +{
  2.4812 +	struct file *file = NULL;
  2.4813 +	pfm_context_t *ctx = NULL;
  2.4814 +	unsigned long flags = 0UL;
  2.4815 +	void *args_k = NULL;
  2.4816 +	long ret; /* will expand int return types */
  2.4817 +	size_t base_sz, sz, xtra_sz = 0;
  2.4818 +	int narg, completed_args = 0, call_made = 0, cmd_flags;
  2.4819 +	int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
  2.4820 +	int (*getsize)(void *arg, size_t *sz);
  2.4821 +#define PFM_MAX_ARGSIZE	4096
  2.4822 +
  2.4823 +	/*
  2.4824 +	 * reject any call if perfmon was disabled at initialization
  2.4825 +	 */
  2.4826 +	if (unlikely(pmu_conf == NULL)) return -ENOSYS;
  2.4827 +
  2.4828 +	if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
  2.4829 +		DPRINT(("invalid cmd=%d\n", cmd));
  2.4830 +		return -EINVAL;
  2.4831 +	}
  2.4832 +
  2.4833 +	func      = pfm_cmd_tab[cmd].cmd_func;
  2.4834 +	narg      = pfm_cmd_tab[cmd].cmd_narg;
  2.4835 +	base_sz   = pfm_cmd_tab[cmd].cmd_argsize;
  2.4836 +	getsize   = pfm_cmd_tab[cmd].cmd_getsize;
  2.4837 +	cmd_flags = pfm_cmd_tab[cmd].cmd_flags;
  2.4838 +
  2.4839 +	if (unlikely(func == NULL)) {
  2.4840 +		DPRINT(("invalid cmd=%d\n", cmd));
  2.4841 +		return -EINVAL;
  2.4842 +	}
  2.4843 +
  2.4844 +	DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n",
  2.4845 +		PFM_CMD_NAME(cmd),
  2.4846 +		cmd,
  2.4847 +		narg,
  2.4848 +		base_sz,
  2.4849 +		count));
  2.4850 +
  2.4851 +	/*
  2.4852 +	 * check if number of arguments matches what the command expects
  2.4853 +	 */
  2.4854 +	if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count)))
  2.4855 +		return -EINVAL;
  2.4856 +
  2.4857 +restart_args:
  2.4858 +	sz = xtra_sz + base_sz*count;
  2.4859 +	/*
  2.4860 +	 * limit abuse to min page size
  2.4861 +	 */
  2.4862 +	if (unlikely(sz > PFM_MAX_ARGSIZE)) {
  2.4863 +		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
  2.4864 +		return -E2BIG;
  2.4865 +	}
  2.4866 +
  2.4867 +	/*
  2.4868 +	 * allocate default-sized argument buffer
  2.4869 +	 */
  2.4870 +	if (likely(count && args_k == NULL)) {
  2.4871 +		args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL);
  2.4872 +		if (args_k == NULL) return -ENOMEM;
  2.4873 +	}
  2.4874 +
  2.4875 +	ret = -EFAULT;
  2.4876 +
  2.4877 +	/*
  2.4878 +	 * copy arguments
  2.4879 +	 *
  2.4880 +	 * assume sz = 0 for command without parameters
  2.4881 +	 */
  2.4882 +	if (sz && copy_from_user(args_k, arg, sz)) {
  2.4883 +		DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg));
  2.4884 +		goto error_args;
  2.4885 +	}
  2.4886 +
  2.4887 +	/*
  2.4888 +	 * check if command supports extra parameters
  2.4889 +	 */
  2.4890 +	if (completed_args == 0 && getsize) {
  2.4891 +		/*
  2.4892 +		 * get extra parameters size (based on main argument)
  2.4893 +		 */
  2.4894 +		ret = (*getsize)(args_k, &xtra_sz);
  2.4895 +		if (ret) goto error_args;
  2.4896 +
  2.4897 +		completed_args = 1;
  2.4898 +
  2.4899 +		DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz));
  2.4900 +
  2.4901 +		/* retry if necessary */
  2.4902 +		if (likely(xtra_sz)) goto restart_args;
  2.4903 +	}
  2.4904 +
  2.4905 +	if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd;
  2.4906 +
  2.4907 +	ret = -EBADF;
  2.4908 +
  2.4909 +	file = fget(fd);
  2.4910 +	if (unlikely(file == NULL)) {
  2.4911 +		DPRINT(("invalid fd %d\n", fd));
  2.4912 +		goto error_args;
  2.4913 +	}
  2.4914 +	if (unlikely(PFM_IS_FILE(file) == 0)) {
  2.4915 +		DPRINT(("fd %d not related to perfmon\n", fd));
  2.4916 +		goto error_args;
  2.4917 +	}
  2.4918 +
  2.4919 +	ctx = (pfm_context_t *)file->private_data;
  2.4920 +	if (unlikely(ctx == NULL)) {
  2.4921 +		DPRINT(("no context for fd %d\n", fd));
  2.4922 +		goto error_args;
  2.4923 +	}
  2.4924 +	prefetch(&ctx->ctx_state);
  2.4925 +
  2.4926 +	PROTECT_CTX(ctx, flags);
  2.4927 +
  2.4928 +	/*
  2.4929 +	 * check task is stopped
  2.4930 +	 */
  2.4931 +	ret = pfm_check_task_state(ctx, cmd, flags);
  2.4932 +	if (unlikely(ret)) goto abort_locked;
  2.4933 +
  2.4934 +skip_fd:
  2.4935 +	ret = (*func)(ctx, args_k, count, task_pt_regs(current));
  2.4936 +
  2.4937 +	call_made = 1;
  2.4938 +
  2.4939 +abort_locked:
  2.4940 +	if (likely(ctx)) {
  2.4941 +		DPRINT(("context unlocked\n"));
  2.4942 +		UNPROTECT_CTX(ctx, flags);
  2.4943 +	}
  2.4944 +
  2.4945 +	/* copy argument back to user, if needed */
  2.4946 +	if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
  2.4947 +
  2.4948 +error_args:
  2.4949 +	if (file)
  2.4950 +		fput(file);
  2.4951 +
  2.4952 +	kfree(args_k);
  2.4953 +
  2.4954 +	DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
  2.4955 +
  2.4956 +	return ret;
  2.4957 +}
  2.4958 +
  2.4959 +static void
  2.4960 +pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs)
  2.4961 +{
  2.4962 +	pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt;
  2.4963 +	pfm_ovfl_ctrl_t rst_ctrl;
  2.4964 +	int state;
  2.4965 +	int ret = 0;
  2.4966 +
  2.4967 +	state = ctx->ctx_state;
  2.4968 +	/*
  2.4969 +	 * Unlock sampling buffer and reset index atomically
  2.4970 +	 * XXX: not really needed when blocking
  2.4971 +	 */
  2.4972 +	if (CTX_HAS_SMPL(ctx)) {
  2.4973 +
  2.4974 +		rst_ctrl.bits.mask_monitoring = 0;
  2.4975 +		rst_ctrl.bits.reset_ovfl_pmds = 0;
  2.4976 +
  2.4977 +		if (state == PFM_CTX_LOADED)
  2.4978 +			ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
  2.4979 +		else
  2.4980 +			ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
  2.4981 +	} else {
  2.4982 +		rst_ctrl.bits.mask_monitoring = 0;
  2.4983 +		rst_ctrl.bits.reset_ovfl_pmds = 1;
  2.4984 +	}
  2.4985 +
  2.4986 +	if (ret == 0) {
  2.4987 +		if (rst_ctrl.bits.reset_ovfl_pmds) {
  2.4988 +			pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET);
  2.4989 +		}
  2.4990 +		if (rst_ctrl.bits.mask_monitoring == 0) {
  2.4991 +			DPRINT(("resuming monitoring\n"));
  2.4992 +			if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current);
  2.4993 +		} else {
  2.4994 +			DPRINT(("stopping monitoring\n"));
  2.4995 +			//pfm_stop_monitoring(current, regs);
  2.4996 +		}
  2.4997 +		ctx->ctx_state = PFM_CTX_LOADED;
  2.4998 +	}
  2.4999 +}
  2.5000 +
  2.5001 +/*
  2.5002 + * context MUST BE LOCKED when calling
  2.5003 + * can only be called for current
  2.5004 + */
  2.5005 +static void
  2.5006 +pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
  2.5007 +{
  2.5008 +	int ret;
  2.5009 +
  2.5010 +	DPRINT(("entering for [%d]\n", current->pid));
  2.5011 +
  2.5012 +	ret = pfm_context_unload(ctx, NULL, 0, regs);
  2.5013 +	if (ret) {
  2.5014 +		printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
  2.5015 +	}
  2.5016 +
  2.5017 +	/*
  2.5018 +	 * and wakeup controlling task, indicating we are now disconnected
  2.5019 +	 */
  2.5020 +	wake_up_interruptible(&ctx->ctx_zombieq);
  2.5021 +
  2.5022 +	/*
  2.5023 +	 * given that context is still locked, the controlling
  2.5024 +	 * task will only get access when we return from
  2.5025 +	 * pfm_handle_work().
  2.5026 +	 */
  2.5027 +}
  2.5028 +
  2.5029 +static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
  2.5030 + /*
  2.5031 +  * pfm_handle_work() can be called with interrupts enabled
  2.5032 +  * (TIF_NEED_RESCHED) or disabled. The down_interruptible
  2.5033 +  * call may sleep, therefore we must re-enable interrupts
  2.5034 +  * to avoid deadlocks. It is safe to do so because this function
  2.5035 +  * is called ONLY when returning to user level (PUStk=1), in which case
  2.5036 +  * there is no risk of kernel stack overflow due to deep
  2.5037 +  * interrupt nesting.
  2.5038 +  */
  2.5039 +void
  2.5040 +pfm_handle_work(void)
  2.5041 +{
  2.5042 +	pfm_context_t *ctx;
  2.5043 +	struct pt_regs *regs;
  2.5044 +	unsigned long flags, dummy_flags;
  2.5045 +	unsigned long ovfl_regs;
  2.5046 +	unsigned int reason;
  2.5047 +	int ret;
  2.5048 +
  2.5049 +	ctx = PFM_GET_CTX(current);
  2.5050 +	if (ctx == NULL) {
  2.5051 +		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
  2.5052 +		return;
  2.5053 +	}
  2.5054 +
  2.5055 +	PROTECT_CTX(ctx, flags);
  2.5056 +
  2.5057 +	PFM_SET_WORK_PENDING(current, 0);
  2.5058 +
  2.5059 +	pfm_clear_task_notify();
  2.5060 +
  2.5061 +	regs = task_pt_regs(current);
  2.5062 +
  2.5063 +	/*
  2.5064 +	 * extract reason for being here and clear
  2.5065 +	 */
  2.5066 +	reason = ctx->ctx_fl_trap_reason;
  2.5067 +	ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
  2.5068 +	ovfl_regs = ctx->ctx_ovfl_regs[0];
  2.5069 +
  2.5070 +	DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state));
  2.5071 +
  2.5072 +	/*
  2.5073 +	 * must be done before we check for simple-reset mode
  2.5074 +	 */
  2.5075 +	if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
  2.5076 +
  2.5077 +
  2.5078 +	//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
  2.5079 +	if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
  2.5080 +
  2.5081 +	/*
  2.5082 +	 * restore interrupt mask to what it was on entry.
  2.5083 +	 * Could be enabled/diasbled.
  2.5084 +	 */
  2.5085 +	UNPROTECT_CTX(ctx, flags);
  2.5086 +
  2.5087 +	/*
  2.5088 +	 * force interrupt enable because of down_interruptible()
  2.5089 +	 */
  2.5090 +	local_irq_enable();
  2.5091 +
  2.5092 +	DPRINT(("before block sleeping\n"));
  2.5093 +
  2.5094 +	/*
  2.5095 +	 * may go through without blocking on SMP systems
  2.5096 +	 * if restart has been received already by the time we call down()
  2.5097 +	 */
  2.5098 +	ret = wait_for_completion_interruptible(&ctx->ctx_restart_done);
  2.5099 +
  2.5100 +	DPRINT(("after block sleeping ret=%d\n", ret));
  2.5101 +
  2.5102 +	/*
  2.5103 +	 * lock context and mask interrupts again
  2.5104 +	 * We save flags into a dummy because we may have
  2.5105 +	 * altered interrupts mask compared to entry in this
  2.5106 +	 * function.
  2.5107 +	 */
  2.5108 +	PROTECT_CTX(ctx, dummy_flags);
  2.5109 +
  2.5110 +	/*
  2.5111 +	 * we need to read the ovfl_regs only after wake-up
  2.5112 +	 * because we may have had pfm_write_pmds() in between
  2.5113 +	 * and that can changed PMD values and therefore 
  2.5114 +	 * ovfl_regs is reset for these new PMD values.
  2.5115 +	 */
  2.5116 +	ovfl_regs = ctx->ctx_ovfl_regs[0];
  2.5117 +
  2.5118 +	if (ctx->ctx_fl_going_zombie) {
  2.5119 +do_zombie:
  2.5120 +		DPRINT(("context is zombie, bailing out\n"));
  2.5121 +		pfm_context_force_terminate(ctx, regs);
  2.5122 +		goto nothing_to_do;
  2.5123 +	}
  2.5124 +	/*
  2.5125 +	 * in case of interruption of down() we don't restart anything
  2.5126 +	 */
  2.5127 +	if (ret < 0) goto nothing_to_do;
  2.5128 +
  2.5129 +skip_blocking:
  2.5130 +	pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
  2.5131 +	ctx->ctx_ovfl_regs[0] = 0UL;
  2.5132 +
  2.5133 +nothing_to_do:
  2.5134 +	/*
  2.5135 +	 * restore flags as they were upon entry
  2.5136 +	 */
  2.5137 +	UNPROTECT_CTX(ctx, flags);
  2.5138 +}
  2.5139 +
  2.5140 +static int
  2.5141 +pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg)
  2.5142 +{
  2.5143 +	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
  2.5144 +		DPRINT(("ignoring overflow notification, owner is zombie\n"));
  2.5145 +		return 0;
  2.5146 +	}
  2.5147 +
  2.5148 +	DPRINT(("waking up somebody\n"));
  2.5149 +
  2.5150 +	if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait);
  2.5151 +
  2.5152 +	/*
  2.5153 +	 * safe, we are not in intr handler, nor in ctxsw when
  2.5154 +	 * we come here
  2.5155 +	 */
  2.5156 +	kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN);
  2.5157 +
  2.5158 +	return 0;
  2.5159 +}
  2.5160 +
  2.5161 +static int
  2.5162 +pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds)
  2.5163 +{
  2.5164 +	pfm_msg_t *msg = NULL;
  2.5165 +
  2.5166 +	if (ctx->ctx_fl_no_msg == 0) {
  2.5167 +		msg = pfm_get_new_msg(ctx);
  2.5168 +		if (msg == NULL) {
  2.5169 +			printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n");
  2.5170 +			return -1;
  2.5171 +		}
  2.5172 +
  2.5173 +		msg->pfm_ovfl_msg.msg_type         = PFM_MSG_OVFL;
  2.5174 +		msg->pfm_ovfl_msg.msg_ctx_fd       = ctx->ctx_fd;
  2.5175 +		msg->pfm_ovfl_msg.msg_active_set   = 0;
  2.5176 +		msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds;
  2.5177 +		msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL;
  2.5178 +		msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL;
  2.5179 +		msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL;
  2.5180 +		msg->pfm_ovfl_msg.msg_tstamp       = 0UL;
  2.5181 +	}
  2.5182 +
  2.5183 +	DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n",
  2.5184 +		msg,
  2.5185 +		ctx->ctx_fl_no_msg,
  2.5186 +		ctx->ctx_fd,
  2.5187 +		ovfl_pmds));
  2.5188 +
  2.5189 +	return pfm_notify_user(ctx, msg);
  2.5190 +}
  2.5191 +
  2.5192 +static int
  2.5193 +pfm_end_notify_user(pfm_context_t *ctx)
  2.5194 +{
  2.5195 +	pfm_msg_t *msg;
  2.5196 +
  2.5197 +	msg = pfm_get_new_msg(ctx);
  2.5198 +	if (msg == NULL) {
  2.5199 +		printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n");
  2.5200 +		return -1;
  2.5201 +	}
  2.5202 +	/* no leak */
  2.5203 +	memset(msg, 0, sizeof(*msg));
  2.5204 +
  2.5205 +	msg->pfm_end_msg.msg_type    = PFM_MSG_END;
  2.5206 +	msg->pfm_end_msg.msg_ctx_fd  = ctx->ctx_fd;
  2.5207 +	msg->pfm_ovfl_msg.msg_tstamp = 0UL;
  2.5208 +
  2.5209 +	DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n",
  2.5210 +		msg,
  2.5211 +		ctx->ctx_fl_no_msg,
  2.5212 +		ctx->ctx_fd));
  2.5213 +
  2.5214 +	return pfm_notify_user(ctx, msg);
  2.5215 +}
  2.5216 +
  2.5217 +/*
  2.5218 + * main overflow processing routine.
  2.5219 + * it can be called from the interrupt path or explicitely during the context switch code
  2.5220 + */
  2.5221 +static void
  2.5222 +pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
  2.5223 +{
  2.5224 +	pfm_ovfl_arg_t *ovfl_arg;
  2.5225 +	unsigned long mask;
  2.5226 +	unsigned long old_val, ovfl_val, new_val;
  2.5227 +	unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
  2.5228 +	unsigned long tstamp;
  2.5229 +	pfm_ovfl_ctrl_t	ovfl_ctrl;
  2.5230 +	unsigned int i, has_smpl;
  2.5231 +	int must_notify = 0;
  2.5232 +
  2.5233 +	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring;
  2.5234 +
  2.5235 +	/*
  2.5236 +	 * sanity test. Should never happen
  2.5237 +	 */
  2.5238 +	if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
  2.5239 +
  2.5240 +	tstamp   = ia64_get_itc();
  2.5241 +	mask     = pmc0 >> PMU_FIRST_COUNTER;
  2.5242 +	ovfl_val = pmu_conf->ovfl_val;
  2.5243 +	has_smpl = CTX_HAS_SMPL(ctx);
  2.5244 +
  2.5245 +	DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
  2.5246 +		     "used_pmds=0x%lx\n",
  2.5247 +			pmc0,
  2.5248 +			task ? task->pid: -1,
  2.5249 +			(regs ? regs->cr_iip : 0),
  2.5250 +			CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
  2.5251 +			ctx->ctx_used_pmds[0]));
  2.5252 +
  2.5253 +
  2.5254 +	/*
  2.5255 +	 * first we update the virtual counters
  2.5256 +	 * assume there was a prior ia64_srlz_d() issued
  2.5257 +	 */
  2.5258 +	for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
  2.5259 +
  2.5260 +		/* skip pmd which did not overflow */
  2.5261 +		if ((mask & 0x1) == 0) continue;
  2.5262 +
  2.5263 +		/*
  2.5264 +		 * Note that the pmd is not necessarily 0 at this point as qualified events
  2.5265 +		 * may have happened before the PMU was frozen. The residual count is not
  2.5266 +		 * taken into consideration here but will be with any read of the pmd via
  2.5267 +		 * pfm_read_pmds().
  2.5268 +		 */
  2.5269 +		old_val              = new_val = ctx->ctx_pmds[i].val;
  2.5270 +		new_val             += 1 + ovfl_val;
  2.5271 +		ctx->ctx_pmds[i].val = new_val;
  2.5272 +
  2.5273 +		/*
  2.5274 +		 * check for overflow condition
  2.5275 +		 */
  2.5276 +		if (likely(old_val > new_val)) {
  2.5277 +			ovfl_pmds |= 1UL << i;
  2.5278 +			if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i;
  2.5279 +		}
  2.5280 +
  2.5281 +		DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
  2.5282 +			i,
  2.5283 +			new_val,
  2.5284 +			old_val,
  2.5285 +			ia64_get_pmd(i) & ovfl_val,
  2.5286 +			ovfl_pmds,
  2.5287 +			ovfl_notify));
  2.5288 +	}
  2.5289 +
  2.5290 +	/*
  2.5291 +	 * there was no 64-bit overflow, nothing else to do
  2.5292 +	 */
  2.5293 +	if (ovfl_pmds == 0UL) return;
  2.5294 +
  2.5295 +	/* 
  2.5296 +	 * reset all control bits
  2.5297 +	 */
  2.5298 +	ovfl_ctrl.val = 0;
  2.5299 +	reset_pmds    = 0UL;
  2.5300 +
  2.5301 +	/*
  2.5302 +	 * if a sampling format module exists, then we "cache" the overflow by 
  2.5303 +	 * calling the module's handler() routine.
  2.5304 +	 */
  2.5305 +	if (has_smpl) {
  2.5306 +		unsigned long start_cycles, end_cycles;
  2.5307 +		unsigned long pmd_mask;
  2.5308 +		int j, k, ret = 0;
  2.5309 +		int this_cpu = smp_processor_id();
  2.5310 +
  2.5311 +		pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
  2.5312 +		ovfl_arg = &ctx->ctx_ovfl_arg;
  2.5313 +
  2.5314 +		prefetch(ctx->ctx_smpl_hdr);
  2.5315 +
  2.5316 +		for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) {
  2.5317 +
  2.5318 +			mask = 1UL << i;
  2.5319 +
  2.5320 +			if ((pmd_mask & 0x1) == 0) continue;
  2.5321 +
  2.5322 +			ovfl_arg->ovfl_pmd      = (unsigned char )i;
  2.5323 +			ovfl_arg->ovfl_notify   = ovfl_notify & mask ? 1 : 0;
  2.5324 +			ovfl_arg->active_set    = 0;
  2.5325 +			ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */
  2.5326 +			ovfl_arg->smpl_pmds[0]  = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
  2.5327 +
  2.5328 +			ovfl_arg->pmd_value      = ctx->ctx_pmds[i].val;
  2.5329 +			ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval;
  2.5330 +			ovfl_arg->pmd_eventid    = ctx->ctx_pmds[i].eventid;
  2.5331 +
  2.5332 +			/*
  2.5333 +		 	 * copy values of pmds of interest. Sampling format may copy them
  2.5334 +		 	 * into sampling buffer.
  2.5335 +		 	 */
  2.5336 +			if (smpl_pmds) {
  2.5337 +				for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
  2.5338 +					if ((smpl_pmds & 0x1) == 0) continue;
  2.5339 +					ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ?  pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
  2.5340 +					DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1]));
  2.5341 +				}
  2.5342 +			}
  2.5343 +
  2.5344 +			pfm_stats[this_cpu].pfm_smpl_handler_calls++;
  2.5345 +
  2.5346 +			start_cycles = ia64_get_itc();
  2.5347 +
  2.5348 +			/*
  2.5349 +		 	 * call custom buffer format record (handler) routine
  2.5350 +		 	 */
  2.5351 +			ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp);
  2.5352 +
  2.5353 +			end_cycles = ia64_get_itc();
  2.5354 +
  2.5355 +			/*
  2.5356 +			 * For those controls, we take the union because they have
  2.5357 +			 * an all or nothing behavior.
  2.5358 +			 */
  2.5359 +			ovfl_ctrl.bits.notify_user     |= ovfl_arg->ovfl_ctrl.bits.notify_user;
  2.5360 +			ovfl_ctrl.bits.block_task      |= ovfl_arg->ovfl_ctrl.bits.block_task;
  2.5361 +			ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring;
  2.5362 +			/*
  2.5363 +			 * build the bitmask of pmds to reset now
  2.5364 +			 */
  2.5365 +			if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
  2.5366 +
  2.5367 +			pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
  2.5368 +		}
  2.5369 +		/*
  2.5370 +		 * when the module cannot handle the rest of the overflows, we abort right here
  2.5371 +		 */
  2.5372 +		if (ret && pmd_mask) {
  2.5373 +			DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n",
  2.5374 +				pmd_mask<<PMU_FIRST_COUNTER));
  2.5375 +		}
  2.5376 +		/*
  2.5377 +		 * remove the pmds we reset now from the set of pmds to reset in pfm_restart()
  2.5378 +		 */
  2.5379 +		ovfl_pmds &= ~reset_pmds;
  2.5380 +	} else {
  2.5381 +		/*
  2.5382 +		 * when no sampling module is used, then the default
  2.5383 +		 * is to notify on overflow if requested by user
  2.5384 +		 */
  2.5385 +		ovfl_ctrl.bits.notify_user     = ovfl_notify ? 1 : 0;
  2.5386 +		ovfl_ctrl.bits.block_task      = ovfl_notify ? 1 : 0;
  2.5387 +		ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */
  2.5388 +		ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1;
  2.5389 +		/*
  2.5390 +		 * if needed, we reset all overflowed pmds
  2.5391 +		 */
  2.5392 +		if (ovfl_notify == 0) reset_pmds = ovfl_pmds;
  2.5393 +	}
  2.5394 +
  2.5395 +	DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds));
  2.5396 +
  2.5397 +	/*
  2.5398 +	 * reset the requested PMD registers using the short reset values
  2.5399 +	 */
  2.5400 +	if (reset_pmds) {
  2.5401 +		unsigned long bm = reset_pmds;
  2.5402 +		pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET);
  2.5403 +	}
  2.5404 +
  2.5405 +	if (ovfl_notify && ovfl_ctrl.bits.notify_user) {
  2.5406 +		/*
  2.5407 +		 * keep track of what to reset when unblocking
  2.5408 +		 */
  2.5409 +		ctx->ctx_ovfl_regs[0] = ovfl_pmds;
  2.5410 +
  2.5411 +		/*
  2.5412 +		 * check for blocking context 
  2.5413 +		 */
  2.5414 +		if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) {
  2.5415 +
  2.5416 +			ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK;
  2.5417 +
  2.5418 +			/*
  2.5419 +			 * set the perfmon specific checking pending work for the task
  2.5420 +			 */
  2.5421 +			PFM_SET_WORK_PENDING(task, 1);
  2.5422 +
  2.5423 +			/*
  2.5424 +			 * when coming from ctxsw, current still points to the
  2.5425 +			 * previous task, therefore we must work with task and not current.
  2.5426 +			 */
  2.5427 +			pfm_set_task_notify(task);
  2.5428 +		}
  2.5429 +		/*
  2.5430 +		 * defer until state is changed (shorten spin window). the context is locked
  2.5431 +		 * anyway, so the signal receiver would come spin for nothing.
  2.5432 +		 */
  2.5433 +		must_notify = 1;
  2.5434 +	}
  2.5435 +
  2.5436 +	DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
  2.5437 +			GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
  2.5438 +			PFM_GET_WORK_PENDING(task),
  2.5439 +			ctx->ctx_fl_trap_reason,
  2.5440 +			ovfl_pmds,
  2.5441 +			ovfl_notify,
  2.5442 +			ovfl_ctrl.bits.mask_monitoring ? 1 : 0));
  2.5443 +	/*
  2.5444 +	 * in case monitoring must be stopped, we toggle the psr bits
  2.5445 +	 */
  2.5446 +	if (ovfl_ctrl.bits.mask_monitoring) {
  2.5447 +		pfm_mask_monitoring(task);
  2.5448 +		ctx->ctx_state = PFM_CTX_MASKED;
  2.5449 +		ctx->ctx_fl_can_restart = 1;
  2.5450 +	}
  2.5451 +
  2.5452 +	/*
  2.5453 +	 * send notification now
  2.5454 +	 */
  2.5455 +	if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify);
  2.5456 +
  2.5457 +	return;
  2.5458 +
  2.5459 +sanity_check:
  2.5460 +	printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
  2.5461 +			smp_processor_id(),
  2.5462 +			task ? task->pid : -1,
  2.5463 +			pmc0);
  2.5464 +	return;
  2.5465 +
  2.5466 +stop_monitoring:
  2.5467 +	/*
  2.5468 +	 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs().
  2.5469 +	 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can
  2.5470 +	 * come here as zombie only if the task is the current task. In which case, we
  2.5471 +	 * can access the PMU  hardware directly.
  2.5472 +	 *
  2.5473 +	 * Note that zombies do have PM_VALID set. So here we do the minimal.
  2.5474 +	 *
  2.5475 +	 * In case the context was zombified it could not be reclaimed at the time
  2.5476 +	 * the monitoring program exited. At this point, the PMU reservation has been
  2.5477 +	 * returned, the sampiing buffer has been freed. We must convert this call
  2.5478 +	 * into a spurious interrupt. However, we must also avoid infinite overflows
  2.5479 +	 * by stopping monitoring for this task. We can only come here for a per-task
  2.5480 +	 * context. All we need to do is to stop monitoring using the psr bits which
  2.5481 +	 * are always task private. By re-enabling secure montioring, we ensure that
  2.5482 +	 * the monitored task will not be able to re-activate monitoring.
  2.5483 +	 * The task will eventually be context switched out, at which point the context
  2.5484 +	 * will be reclaimed (that includes releasing ownership of the PMU).
  2.5485 +	 *
  2.5486 +	 * So there might be a window of time where the number of per-task session is zero
  2.5487 +	 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie
  2.5488 +	 * context. This is safe because if a per-task session comes in, it will push this one
  2.5489 +	 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide
  2.5490 +	 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will
  2.5491 +	 * also push our zombie context out.
  2.5492 +	 *
  2.5493 +	 * Overall pretty hairy stuff....
  2.5494 +	 */
  2.5495 +	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
  2.5496 +	pfm_clear_psr_up();
  2.5497 +	ia64_psr(regs)->up = 0;
  2.5498 +	ia64_psr(regs)->sp = 1;
  2.5499 +	return;
  2.5500 +}
  2.5501 +
  2.5502 +static int
  2.5503 +pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
  2.5504 +{
  2.5505 +	struct task_struct *task;
  2.5506 +	pfm_context_t *ctx;
  2.5507 +	unsigned long flags;
  2.5508 +	u64 pmc0;
  2.5509 +	int this_cpu = smp_processor_id();
  2.5510 +	int retval = 0;
  2.5511 +
  2.5512 +	pfm_stats[this_cpu].pfm_ovfl_intr_count++;
  2.5513 +
  2.5514 +	/*
  2.5515 +	 * srlz.d done before arriving here
  2.5516 +	 */
  2.5517 +	pmc0 = ia64_get_pmc(0);
  2.5518 +
  2.5519 +	task = GET_PMU_OWNER();
  2.5520 +	ctx  = GET_PMU_CTX();
  2.5521 +
  2.5522 +	/*
  2.5523 +	 * if we have some pending bits set
  2.5524 +	 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1
  2.5525 +	 */
  2.5526 +	if (PMC0_HAS_OVFL(pmc0) && task) {
  2.5527 +		/*
  2.5528 +		 * we assume that pmc0.fr is always set here
  2.5529 +		 */
  2.5530 +
  2.5531 +		/* sanity check */
  2.5532 +		if (!ctx) goto report_spurious1;
  2.5533 +
  2.5534 +		if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 
  2.5535 +			goto report_spurious2;
  2.5536 +
  2.5537 +		PROTECT_CTX_NOPRINT(ctx, flags);
  2.5538 +
  2.5539 +		pfm_overflow_handler(task, ctx, pmc0, regs);
  2.5540 +
  2.5541 +		UNPROTECT_CTX_NOPRINT(ctx, flags);
  2.5542 +
  2.5543 +	} else {
  2.5544 +		pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++;
  2.5545 +		retval = -1;
  2.5546 +	}
  2.5547 +	/*
  2.5548 +	 * keep it unfrozen at all times
  2.5549 +	 */
  2.5550 +	pfm_unfreeze_pmu();
  2.5551 +
  2.5552 +	return retval;
  2.5553 +
  2.5554 +report_spurious1:
  2.5555 +	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
  2.5556 +		this_cpu, task->pid);
  2.5557 +	pfm_unfreeze_pmu();
  2.5558 +	return -1;
  2.5559 +report_spurious2:
  2.5560 +	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 
  2.5561 +		this_cpu, 
  2.5562 +		task->pid);
  2.5563 +	pfm_unfreeze_pmu();
  2.5564 +	return -1;
  2.5565 +}
  2.5566 +
  2.5567 +static irqreturn_t
  2.5568 +pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
  2.5569 +{
  2.5570 +	unsigned long start_cycles, total_cycles;
  2.5571 +	unsigned long min, max;
  2.5572 +	int this_cpu;
  2.5573 +	int ret;
  2.5574 +
  2.5575 +	this_cpu = get_cpu();
  2.5576 +	if (likely(!pfm_alt_intr_handler)) {
  2.5577 +		min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
  2.5578 +		max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
  2.5579 +
  2.5580 +		start_cycles = ia64_get_itc();
  2.5581 +
  2.5582 +		ret = pfm_do_interrupt_handler(irq, arg, regs);
  2.5583 +
  2.5584 +		total_cycles = ia64_get_itc();
  2.5585 +
  2.5586 +		/*
  2.5587 +		 * don't measure spurious interrupts
  2.5588 +		 */
  2.5589 +		if (likely(ret == 0)) {
  2.5590 +			total_cycles -= start_cycles;
  2.5591 +
  2.5592 +			if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
  2.5593 +			if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
  2.5594 +
  2.5595 +			pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
  2.5596 +		}
  2.5597 +	}
  2.5598 +	else {
  2.5599 +		(*pfm_alt_intr_handler->handler)(irq, arg, regs);
  2.5600 +	}
  2.5601 +
  2.5602 +	put_cpu_no_resched();
  2.5603 +	return IRQ_HANDLED;
  2.5604 +}
  2.5605 +
  2.5606 +/*
  2.5607 + * /proc/perfmon interface, for debug only
  2.5608 + */
  2.5609 +
  2.5610 +#define PFM_PROC_SHOW_HEADER	((void *)NR_CPUS+1)
  2.5611 +
  2.5612 +static void *
  2.5613 +pfm_proc_start(struct seq_file *m, loff_t *pos)
  2.5614 +{
  2.5615 +	if (*pos == 0) {
  2.5616 +		return PFM_PROC_SHOW_HEADER;
  2.5617 +	}
  2.5618 +
  2.5619 +	while (*pos <= NR_CPUS) {
  2.5620 +		if (cpu_online(*pos - 1)) {
  2.5621 +			return (void *)*pos;
  2.5622 +		}
  2.5623 +		++*pos;
  2.5624 +	}
  2.5625 +	return NULL;
  2.5626 +}
  2.5627 +
  2.5628 +static void *
  2.5629 +pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
  2.5630 +{
  2.5631 +	++*pos;
  2.5632 +	return pfm_proc_start(m, pos);
  2.5633 +}
  2.5634 +
  2.5635 +static void
  2.5636 +pfm_proc_stop(struct seq_file *m, void *v)
  2.5637 +{
  2.5638 +}
  2.5639 +
  2.5640 +static void
  2.5641 +pfm_proc_show_header(struct seq_file *m)
  2.5642 +{
  2.5643 +	struct list_head * pos;
  2.5644 +	pfm_buffer_fmt_t * entry;
  2.5645 +	unsigned long flags;
  2.5646 +
  2.5647 + 	seq_printf(m,
  2.5648 +		"perfmon version           : %u.%u\n"
  2.5649 +		"model                     : %s\n"
  2.5650 +		"fastctxsw                 : %s\n"
  2.5651 +		"expert mode               : %s\n"
  2.5652 +		"ovfl_mask                 : 0x%lx\n"
  2.5653 +		"PMU flags                 : 0x%x\n",
  2.5654 +		PFM_VERSION_MAJ, PFM_VERSION_MIN,
  2.5655 +		pmu_conf->pmu_name,
  2.5656 +		pfm_sysctl.fastctxsw > 0 ? "Yes": "No",
  2.5657 +		pfm_sysctl.expert_mode > 0 ? "Yes": "No",
  2.5658 +		pmu_conf->ovfl_val,
  2.5659 +		pmu_conf->flags);
  2.5660 +
  2.5661 +  	LOCK_PFS(flags);
  2.5662 +
  2.5663 + 	seq_printf(m,
  2.5664 + 		"proc_sessions             : %u\n"
  2.5665 + 		"sys_sessions              : %u\n"
  2.5666 + 		"sys_use_dbregs            : %u\n"
  2.5667 + 		"ptrace_use_dbregs         : %u\n",
  2.5668 + 		pfm_sessions.pfs_task_sessions,
  2.5669 + 		pfm_sessions.pfs_sys_sessions,
  2.5670 + 		pfm_sessions.pfs_sys_use_dbregs,
  2.5671 + 		pfm_sessions.pfs_ptrace_use_dbregs);
  2.5672 +
  2.5673 +  	UNLOCK_PFS(flags);
  2.5674 +
  2.5675 +	spin_lock(&pfm_buffer_fmt_lock);
  2.5676 +
  2.5677 +	list_for_each(pos, &pfm_buffer_fmt_list) {
  2.5678 +		entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
  2.5679 +		seq_printf(m, "format                    : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
  2.5680 +			entry->fmt_uuid[0],
  2.5681 +			entry->fmt_uuid[1],
  2.5682 +			entry->fmt_uuid[2],
  2.5683 +			entry->fmt_uuid[3],
  2.5684 +			entry->fmt_uuid[4],
  2.5685 +			entry->fmt_uuid[5],
  2.5686 +			entry->fmt_uuid[6],
  2.5687 +			entry->fmt_uuid[7],
  2.5688 +			entry->fmt_uuid[8],
  2.5689 +			entry->fmt_uuid[9],
  2.5690 +			entry->fmt_uuid[10],
  2.5691 +			entry->fmt_uuid[11],
  2.5692 +			entry->fmt_uuid[12],
  2.5693 +			entry->fmt_uuid[13],
  2.5694 +			entry->fmt_uuid[14],
  2.5695 +			entry->fmt_uuid[15],
  2.5696 +			entry->fmt_name);
  2.5697 +	}
  2.5698 +	spin_unlock(&pfm_buffer_fmt_lock);
  2.5699 +
  2.5700 +}
  2.5701 +
  2.5702 +static int
  2.5703 +pfm_proc_show(struct seq_file *m, void *v)
  2.5704 +{
  2.5705 +	unsigned long psr;
  2.5706 +	unsigned int i;
  2.5707 +	int cpu;
  2.5708 +
  2.5709 +	if (v == PFM_PROC_SHOW_HEADER) {
  2.5710 +		pfm_proc_show_header(m);
  2.5711 +		return 0;
  2.5712 +	}
  2.5713 +
  2.5714 +	/* show info for CPU (v - 1) */
  2.5715 +
  2.5716 +	cpu = (long)v - 1;
  2.5717 +	seq_printf(m,
  2.5718 +		"CPU%-2d overflow intrs      : %lu\n"
  2.5719 +		"CPU%-2d overflow cycles     : %lu\n"
  2.5720 +		"CPU%-2d overflow min        : %lu\n"
  2.5721 +		"CPU%-2d overflow max        : %lu\n"
  2.5722 +		"CPU%-2d smpl handler calls  : %lu\n"
  2.5723 +		"CPU%-2d smpl handler cycles : %lu\n"
  2.5724 +		"CPU%-2d spurious intrs      : %lu\n"
  2.5725 +		"CPU%-2d replay   intrs      : %lu\n"
  2.5726 +		"CPU%-2d syst_wide           : %d\n"
  2.5727 +		"CPU%-2d dcr_pp              : %d\n"
  2.5728 +		"CPU%-2d exclude idle        : %d\n"
  2.5729 +		"CPU%-2d owner               : %d\n"
  2.5730 +		"CPU%-2d context             : %p\n"
  2.5731 +		"CPU%-2d activations         : %lu\n",
  2.5732 +		cpu, pfm_stats[cpu].pfm_ovfl_intr_count,
  2.5733 +		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles,
  2.5734 +		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min,
  2.5735 +		cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max,
  2.5736 +		cpu, pfm_stats[cpu].pfm_smpl_handler_calls,
  2.5737 +		cpu, pfm_stats[cpu].pfm_smpl_handler_cycles,
  2.5738 +		cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count,
  2.5739 +		cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count,
  2.5740 +		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0,
  2.5741 +		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0,
  2.5742 +		cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0,
  2.5743 +		cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1,
  2.5744 +		cpu, pfm_get_cpu_data(pmu_ctx, cpu),
  2.5745 +		cpu, pfm_get_cpu_data(pmu_activation_number, cpu));
  2.5746 +
  2.5747 +	if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) {
  2.5748 +
  2.5749 +		psr = pfm_get_psr();
  2.5750 +
  2.5751 +		ia64_srlz_d();
  2.5752 +
  2.5753 +		seq_printf(m, 
  2.5754 +			"CPU%-2d psr                 : 0x%lx\n"
  2.5755 +			"CPU%-2d pmc0                : 0x%lx\n", 
  2.5756 +			cpu, psr,
  2.5757 +			cpu, ia64_get_pmc(0));
  2.5758 +
  2.5759 +		for (i=0; PMC_IS_LAST(i) == 0;  i++) {
  2.5760 +			if (PMC_IS_COUNTING(i) == 0) continue;
  2.5761 +   			seq_printf(m, 
  2.5762 +				"CPU%-2d pmc%u                : 0x%lx\n"
  2.5763 +   				"CPU%-2d pmd%u                : 0x%lx\n", 
  2.5764 +				cpu, i, ia64_get_pmc(i),
  2.5765 +				cpu, i, ia64_get_pmd(i));
  2.5766 +  		}
  2.5767 +	}
  2.5768 +	return 0;
  2.5769 +}
  2.5770 +
  2.5771 +struct seq_operations pfm_seq_ops = {
  2.5772 +	.start =	pfm_proc_start,
  2.5773 + 	.next =		pfm_proc_next,
  2.5774 + 	.stop =		pfm_proc_stop,
  2.5775 + 	.show =		pfm_proc_show
  2.5776 +};
  2.5777 +
  2.5778 +static int
  2.5779 +pfm_proc_open(struct inode *inode, struct file *file)
  2.5780 +{
  2.5781 +	return seq_open(file, &pfm_seq_ops);
  2.5782 +}
  2.5783 +
  2.5784 +
  2.5785 +/*
  2.5786 + * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
  2.5787 + * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
  2.5788 + * is active or inactive based on mode. We must rely on the value in
  2.5789 + * local_cpu_data->pfm_syst_info
  2.5790 + */
  2.5791 +void
  2.5792 +pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
  2.5793 +{
  2.5794 +	struct pt_regs *regs;
  2.5795 +	unsigned long dcr;
  2.5796 +	unsigned long dcr_pp;
  2.5797 +
  2.5798 +	dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
  2.5799 +
  2.5800 +	/*
  2.5801 +	 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
  2.5802 +	 * on every CPU, so we can rely on the pid to identify the idle task.
  2.5803 +	 */
  2.5804 +	if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
  2.5805 +		regs = task_pt_regs(task);
  2.5806 +		ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
  2.5807 +		return;
  2.5808 +	}
  2.5809 +	/*
  2.5810 +	 * if monitoring has started
  2.5811 +	 */
  2.5812 +	if (dcr_pp) {
  2.5813 +		dcr = ia64_getreg(_IA64_REG_CR_DCR);
  2.5814 +		/*
  2.5815 +		 * context switching in?
  2.5816 +		 */
  2.5817 +		if (is_ctxswin) {
  2.5818 +			/* mask monitoring for the idle task */
  2.5819 +			ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
  2.5820 +			pfm_clear_psr_pp();
  2.5821 +			ia64_srlz_i();
  2.5822 +			return;
  2.5823 +		}
  2.5824 +		/*
  2.5825 +		 * context switching out
  2.5826 +		 * restore monitoring for next task
  2.5827 +		 *
  2.5828 +		 * Due to inlining this odd if-then-else construction generates
  2.5829 +		 * better code.
  2.5830 +		 */
  2.5831 +		ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP);
  2.5832 +		pfm_set_psr_pp();
  2.5833 +		ia64_srlz_i();
  2.5834 +	}
  2.5835 +}
  2.5836 +
  2.5837 +#ifdef CONFIG_SMP
  2.5838 +
  2.5839 +static void
  2.5840 +pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
  2.5841 +{
  2.5842 +	struct task_struct *task = ctx->ctx_task;
  2.5843 +
  2.5844 +	ia64_psr(regs)->up = 0;
  2.5845 +	ia64_psr(regs)->sp = 1;
  2.5846 +
  2.5847 +	if (GET_PMU_OWNER() == task) {
  2.5848 +		DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
  2.5849 +		SET_PMU_OWNER(NULL, NULL);
  2.5850 +	}
  2.5851 +
  2.5852 +	/*
  2.5853 +	 * disconnect the task from the context and vice-versa
  2.5854 +	 */
  2.5855 +	PFM_SET_WORK_PENDING(task, 0);
  2.5856 +
  2.5857 +	task->thread.pfm_context  = NULL;
  2.5858 +	task->thread.flags       &= ~IA64_THREAD_PM_VALID;
  2.5859 +
  2.5860 +	DPRINT(("force cleanup for [%d]\n",  task->pid));
  2.5861 +}
  2.5862 +
  2.5863 +
  2.5864 +/*
  2.5865 + * in 2.6, interrupts are masked when we come here and the runqueue lock is held
  2.5866 + */
  2.5867 +void
  2.5868 +pfm_save_regs(struct task_struct *task)
  2.5869 +{
  2.5870 +	pfm_context_t *ctx;
  2.5871 +	struct thread_struct *t;
  2.5872 +	unsigned long flags;
  2.5873 +	u64 psr;
  2.5874 +
  2.5875 +
  2.5876 +	ctx = PFM_GET_CTX(task);
  2.5877 +	if (ctx == NULL) return;
  2.5878 +	t = &task->thread;
  2.5879 +
  2.5880 +	/*
  2.5881 + 	 * we always come here with interrupts ALREADY disabled by
  2.5882 + 	 * the scheduler. So we simply need to protect against concurrent
  2.5883 +	 * access, not CPU concurrency.
  2.5884 +	 */
  2.5885 +	flags = pfm_protect_ctx_ctxsw(ctx);
  2.5886 +
  2.5887 +	if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
  2.5888 +		struct pt_regs *regs = task_pt_regs(task);
  2.5889 +
  2.5890 +		pfm_clear_psr_up();
  2.5891 +
  2.5892 +		pfm_force_cleanup(ctx, regs);
  2.5893 +
  2.5894 +		BUG_ON(ctx->ctx_smpl_hdr);
  2.5895 +
  2.5896 +		pfm_unprotect_ctx_ctxsw(ctx, flags);
  2.5897 +
  2.5898 +		pfm_context_free(ctx);
  2.5899 +		return;
  2.5900 +	}
  2.5901 +
  2.5902 +	/*
  2.5903 +	 * save current PSR: needed because we modify it
  2.5904 +	 */
  2.5905 +	ia64_srlz_d();
  2.5906 +	psr = pfm_get_psr();
  2.5907 +
  2.5908 +	BUG_ON(psr & (IA64_PSR_I));
  2.5909 +
  2.5910 +	/*
  2.5911 +	 * stop monitoring:
  2.5912 +	 * This is the last instruction which may generate an overflow
  2.5913 +	 *
  2.5914 +	 * We do not need to set psr.sp because, it is irrelevant in kernel.
  2.5915 +	 * It will be restored from ipsr when going back to user level
  2.5916 +	 */
  2.5917 +	pfm_clear_psr_up();
  2.5918 +
  2.5919 +	/*
  2.5920 +	 * keep a copy of psr.up (for reload)
  2.5921 +	 */
  2.5922 +	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
  2.5923 +
  2.5924 +	/*
  2.5925 +	 * release ownership of this PMU.
  2.5926 +	 * PM interrupts are masked, so nothing
  2.5927 +	 * can happen.
  2.5928 +	 */
  2.5929 +	SET_PMU_OWNER(NULL, NULL);
  2.5930 +
  2.5931 +	/*
  2.5932 +	 * we systematically save the PMD as we have no
  2.5933 +	 * guarantee we will be schedule at that same
  2.5934 +	 * CPU again.
  2.5935 +	 */
  2.5936 +	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
  2.5937 +
  2.5938 +	/*
  2.5939 +	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
  2.5940 +	 * we will need it on the restore path to check
  2.5941 +	 * for pending overflow.
  2.5942 +	 */
  2.5943 +	t->pmcs[0] = ia64_get_pmc(0);
  2.5944 +
  2.5945 +	/*
  2.5946 +	 * unfreeze PMU if had pending overflows
  2.5947 +	 */
  2.5948 +	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
  2.5949 +
  2.5950 +	/*
  2.5951 +	 * finally, allow context access.
  2.5952 +	 * interrupts will still be masked after this call.
  2.5953 +	 */
  2.5954 +	pfm_unprotect_ctx_ctxsw(ctx, flags);
  2.5955 +}
  2.5956 +
  2.5957 +#else /* !CONFIG_SMP */
  2.5958 +void
  2.5959 +pfm_save_regs(struct task_struct *task)
  2.5960 +{
  2.5961 +	pfm_context_t *ctx;
  2.5962 +	u64 psr;
  2.5963 +
  2.5964 +	ctx = PFM_GET_CTX(task);
  2.5965 +	if (ctx == NULL) return;
  2.5966 +
  2.5967 +	/*
  2.5968 +	 * save current PSR: needed because we modify it
  2.5969 +	 */
  2.5970 +	psr = pfm_get_psr();
  2.5971 +
  2.5972 +	BUG_ON(psr & (IA64_PSR_I));
  2.5973 +
  2.5974 +	/*
  2.5975 +	 * stop monitoring:
  2.5976 +	 * This is the last instruction which may generate an overflow
  2.5977 +	 *
  2.5978 +	 * We do not need to set psr.sp because, it is irrelevant in kernel.
  2.5979 +	 * It will be restored from ipsr when going back to user level
  2.5980 +	 */
  2.5981 +	pfm_clear_psr_up();
  2.5982 +
  2.5983 +	/*
  2.5984 +	 * keep a copy of psr.up (for reload)
  2.5985 +	 */
  2.5986 +	ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
  2.5987 +}
  2.5988 +
  2.5989 +static void
  2.5990 +pfm_lazy_save_regs (struct task_struct *task)
  2.5991 +{
  2.5992 +	pfm_context_t *ctx;
  2.5993 +	struct thread_struct *t;
  2.5994 +	unsigned long flags;
  2.5995 +
  2.5996 +	{ u64 psr  = pfm_get_psr();
  2.5997 +	  BUG_ON(psr & IA64_PSR_UP);
  2.5998 +	}
  2.5999 +
  2.6000 +	ctx = PFM_GET_CTX(task);
  2.6001 +	t   = &task->thread;
  2.6002 +
  2.6003 +	/*
  2.6004 +	 * we need to mask PMU overflow here to
  2.6005 +	 * make sure that we maintain pmc0 until
  2.6006 +	 * we save it. overflow interrupts are
  2.6007 +	 * treated as spurious if there is no
  2.6008 +	 * owner.
  2.6009 +	 *
  2.6010 +	 * XXX: I don't think this is necessary
  2.6011 +	 */
  2.6012 +	PROTECT_CTX(ctx,flags);
  2.6013 +
  2.6014 +	/*
  2.6015 +	 * release ownership of this PMU.
  2.6016 +	 * must be done before we save the registers.
  2.6017 +	 *
  2.6018 +	 * after this call any PMU interrupt is treated
  2.6019 +	 * as spurious.
  2.6020 +	 */
  2.6021 +	SET_PMU_OWNER(NULL, NULL);
  2.6022 +
  2.6023 +	/*
  2.6024 +	 * save all the pmds we use
  2.6025 +	 */
  2.6026 +	pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
  2.6027 +
  2.6028 +	/*
  2.6029 +	 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
  2.6030 +	 * it is needed to check for pended overflow
  2.6031 +	 * on the restore path
  2.6032 +	 */
  2.6033 +	t->pmcs[0] = ia64_get_pmc(0);
  2.6034 +
  2.6035 +	/*
  2.6036 +	 * unfreeze PMU if had pending overflows
  2.6037 +	 */
  2.6038 +	if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
  2.6039 +
  2.6040 +	/*
  2.6041 +	 * now get can unmask PMU interrupts, they will
  2.6042 +	 * be treated as purely spurious and we will not
  2.6043 +	 * lose any information
  2.6044 +	 */
  2.6045 +	UNPROTECT_CTX(ctx,flags);
  2.6046 +}
  2.6047 +#endif /* CONFIG_SMP */
  2.6048 +
  2.6049 +#ifdef CONFIG_SMP
  2.6050 +/*
  2.6051 + * in 2.6, interrupts are masked when we come here and the runqueue lock is held
  2.6052 + */
  2.6053 +void
  2.6054 +pfm_load_regs (struct task_struct *task)
  2.6055 +{
  2.6056 +	pfm_context_t *ctx;
  2.6057 +	struct thread_struct *t;
  2.6058 +	unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
  2.6059 +	unsigned long flags;
  2.6060 +	u64 psr, psr_up;
  2.6061 +	int need_irq_resend;
  2.6062 +
  2.6063 +	ctx = PFM_GET_CTX(task);
  2.6064 +	if (unlikely(ctx == NULL)) return;
  2.6065 +
  2.6066 +	BUG_ON(GET_PMU_OWNER());
  2.6067 +
  2.6068 +	t     = &task->thread;
  2.6069 +	/*
  2.6070 +	 * possible on unload
  2.6071 +	 */
  2.6072 +	if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
  2.6073 +
  2.6074 +	/*
  2.6075 + 	 * we always come here with interrupts ALREADY disabled by
  2.6076 + 	 * the scheduler. So we simply need to protect against concurrent
  2.6077 +	 * access, not CPU concurrency.
  2.6078 +	 */
  2.6079 +	flags = pfm_protect_ctx_ctxsw(ctx);
  2.6080 +	psr   = pfm_get_psr();
  2.6081 +
  2.6082 +	need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
  2.6083 +
  2.6084 +	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
  2.6085 +	BUG_ON(psr & IA64_PSR_I);
  2.6086 +
  2.6087 +	if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) {
  2.6088 +		struct pt_regs *regs = task_pt_regs(task);
  2.6089 +
  2.6090 +		BUG_ON(ctx->ctx_smpl_hdr);
  2.6091 +
  2.6092 +		pfm_force_cleanup(ctx, regs);
  2.6093 +
  2.6094 +		pfm_unprotect_ctx_ctxsw(ctx, flags);
  2.6095 +
  2.6096 +		/*
  2.6097 +		 * this one (kmalloc'ed) is fine with interrupts disabled
  2.6098 +		 */
  2.6099 +		pfm_context_free(ctx);
  2.6100 +
  2.6101 +		return;
  2.6102 +	}
  2.6103 +
  2.6104 +	/*
  2.6105 +	 * we restore ALL the debug registers to avoid picking up
  2.6106 +	 * stale state.
  2.6107 +	 */
  2.6108 +	if (ctx->ctx_fl_using_dbreg) {
  2.6109 +		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
  2.6110 +		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
  2.6111 +	}
  2.6112 +	/*
  2.6113 +	 * retrieve saved psr.up
  2.6114 +	 */
  2.6115 +	psr_up = ctx->ctx_saved_psr_up;
  2.6116 +
  2.6117 +	/*
  2.6118 +	 * if we were the last user of the PMU on that CPU,
  2.6119 +	 * then nothing to do except restore psr
  2.6120 +	 */
  2.6121 +	if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
  2.6122 +
  2.6123 +		/*
  2.6124 +		 * retrieve partial reload masks (due to user modifications)
  2.6125 +		 */
  2.6126 +		pmc_mask = ctx->ctx_reload_pmcs[0];
  2.6127 +		pmd_mask = ctx->ctx_reload_pmds[0];
  2.6128 +
  2.6129 +	} else {
  2.6130 +		/*
  2.6131 +	 	 * To avoid leaking information to the user level when psr.sp=0,
  2.6132 +	 	 * we must reload ALL implemented pmds (even the ones we don't use).
  2.6133 +	 	 * In the kernel we only allow PFM_READ_PMDS on registers which
  2.6134 +	 	 * we initialized or requested (sampling) so there is no risk there.
  2.6135 +	 	 */
  2.6136 +		pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
  2.6137 +
  2.6138 +		/*
  2.6139 +	 	 * ALL accessible PMCs are systematically reloaded, unused registers
  2.6140 +	 	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
  2.6141 +	 	 * up stale configuration.
  2.6142 +	 	 *
  2.6143 +	 	 * PMC0 is never in the mask. It is always restored separately.
  2.6144 +	 	 */
  2.6145 +		pmc_mask = ctx->ctx_all_pmcs[0];
  2.6146 +	}
  2.6147 +	/*
  2.6148 +	 * when context is MASKED, we will restore PMC with plm=0
  2.6149 +	 * and PMD with stale information, but that's ok, nothing
  2.6150 +	 * will be captured.
  2.6151 +	 *
  2.6152 +	 * XXX: optimize here
  2.6153 +	 */
  2.6154 +	if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
  2.6155 +	if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
  2.6156 +
  2.6157 +	/*
  2.6158 +	 * check for pending overflow at the time the state
  2.6159 +	 * was saved.
  2.6160 +	 */
  2.6161 +	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
  2.6162 +		/*
  2.6163 +		 * reload pmc0 with the overflow information
  2.6164 +		 * On McKinley PMU, this will trigger a PMU interrupt
  2.6165 +		 */
  2.6166 +		ia64_set_pmc(0, t->pmcs[0]);
  2.6167 +		ia64_srlz_d();
  2.6168 +		t->pmcs[0] = 0UL;
  2.6169 +
  2.6170 +		/*
  2.6171 +		 * will replay the PMU interrupt
  2.6172 +		 */
  2.6173 +		if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
  2.6174 +
  2.6175 +		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
  2.6176 +	}
  2.6177 +
  2.6178 +	/*
  2.6179 +	 * we just did a reload, so we reset the partial reload fields
  2.6180 +	 */
  2.6181 +	ctx->ctx_reload_pmcs[0] = 0UL;
  2.6182 +	ctx->ctx_reload_pmds[0] = 0UL;
  2.6183 +
  2.6184 +	SET_LAST_CPU(ctx, smp_processor_id());
  2.6185 +
  2.6186 +	/*
  2.6187 +	 * dump activation value for this PMU
  2.6188 +	 */
  2.6189 +	INC_ACTIVATION();
  2.6190 +	/*
  2.6191 +	 * record current activation for this context
  2.6192 +	 */
  2.6193 +	SET_ACTIVATION(ctx);
  2.6194 +
  2.6195 +	/*
  2.6196 +	 * establish new ownership. 
  2.6197 +	 */
  2.6198 +	SET_PMU_OWNER(task, ctx);
  2.6199 +
  2.6200 +	/*
  2.6201 +	 * restore the psr.up bit. measurement
  2.6202 +	 * is active again.
  2.6203 +	 * no PMU interrupt can happen at this point
  2.6204 +	 * because we still have interrupts disabled.
  2.6205 +	 */
  2.6206 +	if (likely(psr_up)) pfm_set_psr_up();
  2.6207 +
  2.6208 +	/*
  2.6209 +	 * allow concurrent access to context
  2.6210 +	 */
  2.6211 +	pfm_unprotect_ctx_ctxsw(ctx, flags);
  2.6212 +}
  2.6213 +#else /*  !CONFIG_SMP */
  2.6214 +/*
  2.6215 + * reload PMU state for UP kernels
  2.6216 + * in 2.5 we come here with interrupts disabled
  2.6217 + */
  2.6218 +void
  2.6219 +pfm_load_regs (struct task_struct *task)
  2.6220 +{
  2.6221 +	struct thread_struct *t;
  2.6222 +	pfm_context_t *ctx;
  2.6223 +	struct task_struct *owner;
  2.6224 +	unsigned long pmd_mask, pmc_mask;
  2.6225 +	u64 psr, psr_up;
  2.6226 +	int need_irq_resend;
  2.6227 +
  2.6228 +	owner = GET_PMU_OWNER();
  2.6229 +	ctx   = PFM_GET_CTX(task);
  2.6230 +	t     = &task->thread;
  2.6231 +	psr   = pfm_get_psr();
  2.6232 +
  2.6233 +	BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
  2.6234 +	BUG_ON(psr & IA64_PSR_I);
  2.6235 +
  2.6236 +	/*
  2.6237 +	 * we restore ALL the debug registers to avoid picking up
  2.6238 +	 * stale state.
  2.6239 +	 *
  2.6240 +	 * This must be done even when the task is still the owner
  2.6241 +	 * as the registers may have been modified via ptrace()
  2.6242 +	 * (not perfmon) by the previous task.
  2.6243 +	 */
  2.6244 +	if (ctx->ctx_fl_using_dbreg) {
  2.6245 +		pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
  2.6246 +		pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
  2.6247 +	}
  2.6248 +
  2.6249 +	/*
  2.6250 +	 * retrieved saved psr.up
  2.6251 +	 */
  2.6252 +	psr_up = ctx->ctx_saved_psr_up;
  2.6253 +	need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
  2.6254 +
  2.6255 +	/*
  2.6256 +	 * short path, our state is still there, just
  2.6257 +	 * need to restore psr and we go
  2.6258 +	 *
  2.6259 +	 * we do not touch either PMC nor PMD. the psr is not touched
  2.6260 +	 * by the overflow_handler. So we are safe w.r.t. to interrupt
  2.6261 +	 * concurrency even without interrupt masking.
  2.6262 +	 */
  2.6263 +	if (likely(owner == task)) {
  2.6264 +		if (likely(psr_up)) pfm_set_psr_up();
  2.6265 +		return;
  2.6266 +	}
  2.6267 +
  2.6268 +	/*
  2.6269 +	 * someone else is still using the PMU, first push it out and
  2.6270 +	 * then we'll be able to install our stuff !
  2.6271 +	 *
  2.6272 +	 * Upon return, there will be no owner for the current PMU
  2.6273 +	 */
  2.6274 +	if (owner) pfm_lazy_save_regs(owner);
  2.6275 +
  2.6276 +	/*
  2.6277 +	 * To avoid leaking information to the user level when psr.sp=0,
  2.6278 +	 * we must reload ALL implemented pmds (even the ones we don't use).
  2.6279 +	 * In the kernel we only allow PFM_READ_PMDS on registers which
  2.6280 +	 * we initialized or requested (sampling) so there is no risk there.
  2.6281 +	 */
  2.6282 +	pmd_mask = pfm_sysctl.fastctxsw ?  ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
  2.6283 +
  2.6284 +	/*
  2.6285 +	 * ALL accessible PMCs are systematically reloaded, unused registers
  2.6286 +	 * get their default (from pfm_reset_pmu_state()) values to avoid picking
  2.6287 +	 * up stale configuration.
  2.6288 +	 *
  2.6289 +	 * PMC0 is never in the mask. It is always restored separately
  2.6290 +	 */
  2.6291 +	pmc_mask = ctx->ctx_all_pmcs[0];
  2.6292 +
  2.6293 +	pfm_restore_pmds(t->pmds, pmd_mask);
  2.6294 +	pfm_restore_pmcs(t->pmcs, pmc_mask);
  2.6295 +
  2.6296 +	/*
  2.6297 +	 * check for pending overflow at the time the state
  2.6298 +	 * was saved.
  2.6299 +	 */
  2.6300 +	if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
  2.6301 +		/*
  2.6302 +		 * reload pmc0 with the overflow information
  2.6303 +		 * On McKinley PMU, this will trigger a PMU interrupt
  2.6304 +		 */
  2.6305 +		ia64_set_pmc(0, t->pmcs[0]);
  2.6306 +		ia64_srlz_d();
  2.6307 +
  2.6308 +		t->pmcs[0] = 0UL;
  2.6309 +
  2.6310 +		/*
  2.6311 +		 * will replay the PMU interrupt
  2.6312 +		 */
  2.6313 +		if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
  2.6314 +
  2.6315 +		pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
  2.6316 +	}
  2.6317 +
  2.6318 +	/*
  2.6319 +	 * establish new ownership. 
  2.6320 +	 */
  2.6321 +	SET_PMU_OWNER(task, ctx);
  2.6322 +
  2.6323 +	/*
  2.6324 +	 * restore the psr.up bit. measurement
  2.6325 +	 * is active again.
  2.6326 +	 * no PMU interrupt can happen at this point
  2.6327 +	 * because we still have interrupts disabled.
  2.6328 +	 */
  2.6329 +	if (likely(psr_up)) pfm_set_psr_up();
  2.6330 +}
  2.6331 +#endif /* CONFIG_SMP */
  2.6332 +
  2.6333 +/*
  2.6334 + * this function assumes monitoring is stopped
  2.6335 + */
  2.6336 +static void
  2.6337 +pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
  2.6338 +{
  2.6339 +	u64 pmc0;
  2.6340 +	unsigned long mask2, val, pmd_val, ovfl_val;
  2.6341 +	int i, can_access_pmu = 0;
  2.6342 +	int is_self;
  2.6343 +
  2.6344 +	/*
  2.6345 +	 * is the caller the task being monitored (or which initiated the
  2.6346 +	 * session for system wide measurements)
  2.6347 +	 */
  2.6348 +	is_self = ctx->ctx_task == task ? 1 : 0;
  2.6349 +
  2.6350 +	/*
  2.6351 +	 * can access PMU is task is the owner of the PMU state on the current CPU
  2.6352 +	 * or if we are running on the CPU bound to the context in system-wide mode
  2.6353 +	 * (that is not necessarily the task the context is attached to in this mode).
  2.6354 +	 * In system-wide we always have can_access_pmu true because a task running on an
  2.6355 +	 * invalid processor is flagged earlier in the call stack (see pfm_stop).
  2.6356 +	 */
  2.6357 +	can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id());
  2.6358 +	if (can_access_pmu) {
  2.6359 +		/*
  2.6360 +		 * Mark the PMU as not owned
  2.6361 +		 * This will cause the interrupt handler to do nothing in case an overflow
  2.6362 +		 * interrupt was in-flight
  2.6363 +		 * This also guarantees that pmc0 will contain the final state
  2.6364 +		 * It virtually gives us full control on overflow processing from that point
  2.6365 +		 * on.
  2.6366 +		 */
  2.6367 +		SET_PMU_OWNER(NULL, NULL);
  2.6368 +		DPRINT(("releasing ownership\n"));
  2.6369 +
  2.6370 +		/*
  2.6371 +		 * read current overflow status:
  2.6372 +		 *
  2.6373 +		 * we are guaranteed to read the final stable state
  2.6374 +		 */
  2.6375 +		ia64_srlz_d();
  2.6376 +		pmc0 = ia64_get_pmc(0); /* slow */
  2.6377 +
  2.6378 +		/*
  2.6379 +		 * reset freeze bit, overflow status information destroyed
  2.6380 +		 */
  2.6381 +		pfm_unfreeze_pmu();
  2.6382 +	} else {
  2.6383 +		pmc0 = task->thread.pmcs[0];
  2.6384 +		/*
  2.6385 +		 * clear whatever overflow status bits there were
  2.6386 +		 */
  2.6387 +		task->thread.pmcs[0] = 0;
  2.6388 +	}
  2.6389 +	ovfl_val = pmu_conf->ovfl_val;
  2.6390 +	/*
  2.6391 +	 * we save all the used pmds
  2.6392 +	 * we take care of overflows for counting PMDs
  2.6393 +	 *
  2.6394 +	 * XXX: sampling situation is not taken into account here
  2.6395 +	 */
  2.6396 +	mask2 = ctx->ctx_used_pmds[0];
  2.6397 +
  2.6398 +	DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2));
  2.6399 +
  2.6400 +	for (i = 0; mask2; i++, mask2>>=1) {
  2.6401 +
  2.6402 +		/* skip non used pmds */
  2.6403 +		if ((mask2 & 0x1) == 0) continue;
  2.6404 +
  2.6405 +		/*
  2.6406 +		 * can access PMU always true in system wide mode
  2.6407 +		 */
  2.6408 +		val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
  2.6409 +
  2.6410 +		if (PMD_IS_COUNTING(i)) {
  2.6411 +			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
  2.6412 +				task->pid,
  2.6413 +				i,
  2.6414 +				ctx->ctx_pmds[i].val,
  2.6415 +				val & ovfl_val));
  2.6416 +
  2.6417 +			/*
  2.6418 +			 * we rebuild the full 64 bit value of the counter
  2.6419 +			 */
  2.6420 +			val = ctx->ctx_pmds[i].val + (val & ovfl_val);
  2.6421 +
  2.6422 +			/*
  2.6423 +			 * now everything is in ctx_pmds[] and we need
  2.6424 +			 * to clear the saved context from save_regs() such that
  2.6425 +			 * pfm_read_pmds() gets the correct value
  2.6426 +			 */
  2.6427 +			pmd_val = 0UL;
  2.6428 +
  2.6429 +			/*
  2.6430 +			 * take care of overflow inline
  2.6431 +			 */
  2.6432 +			if (pmc0 & (1UL << i)) {
  2.6433 +				val += 1 + ovfl_val;
  2.6434 +				DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
  2.6435 +			}
  2.6436 +		}
  2.6437 +
  2.6438 +		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
  2.6439 +
  2.6440 +		if (is_self) task->thread.pmds[i] = pmd_val;
  2.6441 +
  2.6442 +		ctx->ctx_pmds[i].val = val;
  2.6443 +	}
  2.6444 +}
  2.6445 +
  2.6446 +static struct irqaction perfmon_irqaction = {
  2.6447 +	.handler = pfm_interrupt_handler,
  2.6448 +	.flags   = SA_INTERRUPT,
  2.6449 +	.name    = "perfmon"
  2.6450 +};
  2.6451 +
  2.6452 +static void
  2.6453 +pfm_alt_save_pmu_state(void *data)
  2.6454 +{
  2.6455 +	struct pt_regs *regs;
  2.6456 +
  2.6457 +	regs = task_pt_regs(current);
  2.6458 +
  2.6459 +	DPRINT(("called\n"));
  2.6460 +
  2.6461 +	/*
  2.6462 +	 * should not be necessary but
  2.6463 +	 * let's take not risk
  2.6464 +	 */
  2.6465 +	pfm_clear_psr_up();
  2.6466 +	pfm_clear_psr_pp();
  2.6467 +	ia64_psr(regs)->pp = 0;
  2.6468 +
  2.6469 +	/*
  2.6470 +	 * This call is required
  2.6471 +	 * May cause a spurious interrupt on some processors
  2.6472 +	 */
  2.6473 +	pfm_freeze_pmu();
  2.6474 +
  2.6475 +	ia64_srlz_d();
  2.6476 +}
  2.6477 +
  2.6478 +void
  2.6479 +pfm_alt_restore_pmu_state(void *data)
  2.6480 +{
  2.6481 +	struct pt_regs *regs;
  2.6482 +
  2.6483 +	regs = task_pt_regs(current);
  2.6484 +
  2.6485 +	DPRINT(("called\n"));
  2.6486 +
  2.6487 +	/*
  2.6488 +	 * put PMU back in state expected
  2.6489 +	 * by perfmon
  2.6490 +	 */
  2.6491 +	pfm_clear_psr_up();
  2.6492 +	pfm_clear_psr_pp();
  2.6493 +	ia64_psr(regs)->pp = 0;
  2.6494 +
  2.6495 +	/*
  2.6496 +	 * perfmon runs with PMU unfrozen at all times
  2.6497 +	 */
  2.6498 +	pfm_unfreeze_pmu();
  2.6499 +
  2.6500 +	ia64_srlz_d();
  2.6501 +}
  2.6502 +
  2.6503 +int
  2.6504 +pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
  2.6505 +{
  2.6506 +	int ret, i;
  2.6507 +	int reserve_cpu;
  2.6508 +
  2.6509 +	/* some sanity checks */
  2.6510 +	if (hdl == NULL || hdl->handler == NULL) return -EINVAL;
  2.6511 +
  2.6512 +	/* do the easy test first */
  2.6513 +	if (pfm_alt_intr_handler) return -EBUSY;
  2.6514 +
  2.6515 +	/* one at a time in the install or remove, just fail the others */
  2.6516 +	if (!spin_trylock(&pfm_alt_install_check)) {
  2.6517 +		return -EBUSY;
  2.6518 +	}
  2.6519 +
  2.6520 +	/* reserve our session */
  2.6521 +	for_each_online_cpu(reserve_cpu) {
  2.6522 +		ret = pfm_reserve_session(NULL, 1, reserve_cpu);
  2.6523 +		if (ret) goto cleanup_reserve;
  2.6524 +	}
  2.6525 +
  2.6526 +	/* save the current system wide pmu states */
  2.6527 +	ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
  2.6528 +	if (ret) {
  2.6529 +		DPRINT(("on_each_cpu() failed: %d\n", ret));
  2.6530 +		goto cleanup_reserve;
  2.6531 +	}
  2.6532 +
  2.6533 +	/* officially change to the alternate interrupt handler */
  2.6534 +	pfm_alt_intr_handler = hdl;
  2.6535 +
  2.6536 +	spin_unlock(&pfm_alt_install_check);
  2.6537 +
  2.6538 +	return 0;
  2.6539 +
  2.6540 +cleanup_reserve:
  2.6541 +	for_each_online_cpu(i) {
  2.6542 +		/* don't unreserve more than we reserved */
  2.6543 +		if (i >= reserve_cpu) break;
  2.6544 +
  2.6545 +		pfm_unreserve_session(NULL, 1, i);
  2.6546 +	}
  2.6547 +
  2.6548 +	spin_unlock(&pfm_alt_install_check);
  2.6549 +
  2.6550 +	return ret;
  2.6551 +}
  2.6552 +EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt);
  2.6553 +
  2.6554 +int
  2.6555 +pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
  2.6556 +{
  2.6557 +	int i;
  2.6558 +	int ret;
  2.6559 +
  2.6560 +	if (hdl == NULL) return -EINVAL;
  2.6561 +
  2.6562 +	/* cannot remove someone else's handler! */
  2.6563 +	if (pfm_alt_intr_handler != hdl) return -EINVAL;
  2.6564 +
  2.6565 +	/* one at a time in the install or remove, just fail the others */
  2.6566 +	if (!spin_trylock(&pfm_alt_install_check)) {
  2.6567 +		return -EBUSY;
  2.6568 +	}
  2.6569 +
  2.6570 +	pfm_alt_intr_handler = NULL;
  2.6571 +
  2.6572 +	ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
  2.6573 +	if (ret) {
  2.6574 +		DPRINT(("on_each_cpu() failed: %d\n", ret));
  2.6575 +	}
  2.6576 +
  2.6577 +	for_each_online_cpu(i) {
  2.6578 +		pfm_unreserve_session(NULL, 1, i);
  2.6579 +	}
  2.6580 +
  2.6581 +	spin_unlock(&pfm_alt_install_check);
  2.6582 +
  2.6583 +	return 0;
  2.6584 +}
  2.6585 +EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt);
  2.6586 +
  2.6587 +/*
  2.6588 + * perfmon initialization routine, called from the initcall() table
  2.6589 + */
  2.6590 +static int init_pfm_fs(void);
  2.6591 +
  2.6592 +static int __init
  2.6593 +pfm_probe_pmu(void)
  2.6594 +{
  2.6595 +	pmu_config_t **p;
  2.6596 +	int family;
  2.6597 +
  2.6598 +	family = local_cpu_data->family;
  2.6599 +	p      = pmu_confs;
  2.6600 +
  2.6601 +	while(*p) {
  2.6602 +		if ((*p)->probe) {
  2.6603 +			if ((*p)->probe() == 0) goto found;
  2.6604 +		} else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) {
  2.6605 +			goto found;
  2.6606 +		}
  2.6607 +		p++;
  2.6608 +	}
  2.6609 +	return -1;
  2.6610 +found:
  2.6611 +	pmu_conf = *p;
  2.6612 +	return 0;
  2.6613 +}
  2.6614 +
  2.6615 +static struct file_operations pfm_proc_fops = {
  2.6616 +	.open		= pfm_proc_open,
  2.6617 +	.read		= seq_read,
  2.6618 +	.llseek		= seq_lseek,
  2.6619 +	.release	= seq_release,
  2.6620 +};
  2.6621 +
  2.6622 +int __init
  2.6623 +pfm_init(void)
  2.6624 +{
  2.6625 +	unsigned int n, n_counters, i;
  2.6626 +
  2.6627 +	printk("perfmon: version %u.%u IRQ %u\n",
  2.6628 +		PFM_VERSION_MAJ,
  2.6629 +		PFM_VERSION_MIN,
  2.6630 +		IA64_PERFMON_VECTOR);
  2.6631 +
  2.6632 +	if (pfm_probe_pmu()) {
  2.6633 +		printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", 
  2.6634 +				local_cpu_data->family);
  2.6635 +		return -ENODEV;
  2.6636 +	}
  2.6637 +
  2.6638 +	/*
  2.6639 +	 * compute the number of implemented PMD/PMC from the
  2.6640 +	 * description tables
  2.6641 +	 */
  2.6642 +	n = 0;
  2.6643 +	for (i=0; PMC_IS_LAST(i) == 0;  i++) {
  2.6644 +		if (PMC_IS_IMPL(i) == 0) continue;
  2.6645 +		pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63);
  2.6646 +		n++;
  2.6647 +	}
  2.6648 +	pmu_conf->num_pmcs = n;
  2.6649 +
  2.6650 +	n = 0; n_counters = 0;
  2.6651 +	for (i=0; PMD_IS_LAST(i) == 0;  i++) {
  2.6652 +		if (PMD_IS_IMPL(i) == 0) continue;
  2.6653 +		pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63);
  2.6654 +		n++;
  2.6655 +		if (PMD_IS_COUNTING(i)) n_counters++;
  2.6656 +	}
  2.6657 +	pmu_conf->num_pmds      = n;
  2.6658 +	pmu_conf->num_counters  = n_counters;
  2.6659 +
  2.6660 +	/*
  2.6661 +	 * sanity checks on the number of debug registers
  2.6662 +	 */
  2.6663 +	if (pmu_conf->use_rr_dbregs) {
  2.6664 +		if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) {
  2.6665 +			printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs);
  2.6666 +			pmu_conf = NULL;
  2.6667 +			return -1;
  2.6668 +		}
  2.6669 +		if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) {
  2.6670 +			printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs);
  2.6671 +			pmu_conf = NULL;
  2.6672 +			return -1;
  2.6673 +		}
  2.6674 +	}
  2.6675 +
  2.6676 +	printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n",
  2.6677 +	       pmu_conf->pmu_name,
  2.6678 +	       pmu_conf->num_pmcs,
  2.6679 +	       pmu_conf->num_pmds,
  2.6680 +	       pmu_conf->num_counters,
  2.6681 +	       ffz(pmu_conf->ovfl_val));
  2.6682 +
  2.6683 +	/* sanity check */
  2.6684 +	if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
  2.6685 +		printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
  2.6686 +		pmu_conf = NULL;
  2.6687 +		return -1;
  2.6688 +	}
  2.6689 +
  2.6690 +	/*
  2.6691 +	 * create /proc/perfmon (mostly for debugging purposes)
  2.6692 +	 */
  2.6693 + 	perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
  2.6694 +	if (perfmon_dir == NULL) {
  2.6695 +		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
  2.6696 +		pmu_conf = NULL;
  2.6697 +		return -1;
  2.6698 +	}
  2.6699 +  	/*
  2.6700 + 	 * install customized file operations for /proc/perfmon entry
  2.6701 + 	 */
  2.6702 + 	perfmon_dir->proc_fops = &pfm_proc_fops;
  2.6703 +
  2.6704 +	/*
  2.6705 +	 * create /proc/sys/kernel/perfmon (for debugging purposes)
  2.6706 +	 */
  2.6707 +	pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
  2.6708 +
  2.6709 +	/*
  2.6710 +	 * initialize all our spinlocks
  2.6711 +	 */
  2.6712 +	spin_lock_init(&pfm_sessions.pfs_lock);
  2.6713 +	spin_lock_init(&pfm_buffer_fmt_lock);
  2.6714 +
  2.6715 +	init_pfm_fs();
  2.6716 +
  2.6717 +	for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL;
  2.6718 +
  2.6719 +	return 0;
  2.6720 +}
  2.6721 +
  2.6722 +__initcall(pfm_init);
  2.6723 +
  2.6724 +/*
  2.6725 + * this function is called before pfm_init()
  2.6726 + */
  2.6727 +void
  2.6728 +pfm_init_percpu (void)
  2.6729 +{
  2.6730 +	/*
  2.6731 +	 * make sure no measurement is active
  2.6732 +	 * (may inherit programmed PMCs from EFI).
  2.6733 +	 */
  2.6734 +	pfm_clear_psr_pp();
  2.6735 +	pfm_clear_psr_up();
  2.6736 +
  2.6737 +	/*
  2.6738 +	 * we run with the PMU not frozen at all times
  2.6739 +	 */
  2.6740 +	pfm_unfreeze_pmu();
  2.6741 +
  2.6742 +	if (smp_processor_id() == 0)
  2.6743 +		register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
  2.6744 +
  2.6745 +	ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
  2.6746 +	ia64_srlz_d();
  2.6747 +}
  2.6748 +
  2.6749 +/*
  2.6750 + * used for debug purposes only
  2.6751 + */
  2.6752 +void
  2.6753 +dump_pmu_state(const char *from)
  2.6754 +{
  2.6755 +	struct task_struct *task;
  2.6756 +	struct thread_struct *t;
  2.6757 +	struct pt_regs *regs;
  2.6758 +	pfm_context_t *ctx;
  2.6759 +	unsigned long psr, dcr, info, flags;
  2.6760 +	int i, this_cpu;
  2.6761 +
  2.6762 +	local_irq_save(flags);
  2.6763 +
  2.6764 +	this_cpu = smp_processor_id();
  2.6765 +	regs     = task_pt_regs(current);
  2.6766 +	info     = PFM_CPUINFO_GET();
  2.6767 +	dcr      = ia64_getreg(_IA64_REG_CR_DCR);
  2.6768 +
  2.6769 +	if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) {
  2.6770 +		local_irq_restore(flags);
  2.6771 +		return;
  2.6772 +	}
  2.6773 +
  2.6774 +	printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 
  2.6775 +		this_cpu, 
  2.6776 +		from, 
  2.6777 +		current->pid, 
  2.6778 +		regs->cr_iip,
  2.6779 +		current->comm);
  2.6780 +
  2.6781 +	task = GET_PMU_OWNER();
  2.6782 +	ctx  = GET_PMU_CTX();
  2.6783 +
  2.6784 +	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
  2.6785 +
  2.6786 +	psr = pfm_get_psr();
  2.6787 +
  2.6788 +	printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", 
  2.6789 +		this_cpu,
  2.6790 +		ia64_get_pmc(0),
  2.6791 +		psr & IA64_PSR_PP ? 1 : 0,
  2.6792 +		psr & IA64_PSR_UP ? 1 : 0,
  2.6793 +		dcr & IA64_DCR_PP ? 1 : 0,
  2.6794 +		info,
  2.6795 +		ia64_psr(regs)->up,
  2.6796 +		ia64_psr(regs)->pp);
  2.6797 +
  2.6798 +	ia64_psr(regs)->up = 0;
  2.6799 +	ia64_psr(regs)->pp = 0;
  2.6800 +
  2.6801 +	t = &current->thread;
  2.6802 +
  2.6803 +	for (i=1; PMC_IS_LAST(i) == 0; i++) {
  2.6804 +		if (PMC_IS_IMPL(i) == 0) continue;
  2.6805 +		printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
  2.6806 +	}
  2.6807 +
  2.6808 +	for (i=1; PMD_IS_LAST(i) == 0; i++) {
  2.6809 +		if (PMD_IS_IMPL(i) == 0) continue;
  2.6810 +		printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
  2.6811 +	}
  2.6812 +
  2.6813 +	if (ctx) {
  2.6814 +		printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n",
  2.6815 +				this_cpu,
  2.6816 +				ctx->ctx_state,
  2.6817 +				ctx->ctx_smpl_vaddr,
  2.6818 +				ctx->ctx_smpl_hdr,
  2.6819 +				ctx->ctx_msgq_head,
  2.6820 +				ctx->ctx_msgq_tail,
  2.6821 +				ctx->ctx_saved_psr_up);
  2.6822 +	}
  2.6823 +	local_irq_restore(flags);
  2.6824 +}
  2.6825 +
  2.6826 +/*
  2.6827 + * called from process.c:copy_thread(). task is new child.
  2.6828 + */
  2.6829 +void
  2.6830 +pfm_inherit(struct task_struct *task, struct pt_regs *regs)
  2.6831 +{
  2.6832 +	struct thread_struct *thread;
  2.6833 +
  2.6834 +	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
  2.6835 +
  2.6836 +	thread = &task->thread;
  2.6837 +
  2.6838 +	/*
  2.6839 +	 * cut links inherited from parent (current)
  2.6840 +	 */
  2.6841 +	thread->pfm_context = NULL;
  2.6842 +
  2.6843 +	PFM_SET_WORK_PENDING(task, 0);
  2.6844 +
  2.6845 +	/*
  2.6846 +	 * the psr bits are already set properly in copy_threads()
  2.6847 +	 */
  2.6848 +}
  2.6849 +#else  /* !CONFIG_PERFMON */
  2.6850 +asmlinkage long
  2.6851 +sys_perfmonctl (int fd, int cmd, void *arg, int count)
  2.6852 +{
  2.6853 +	return -ENOSYS;
  2.6854 +}
  2.6855 +#endif /* CONFIG_PERFMON */
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/xen/arch/ia64/linux-xen/perfmon_default_smpl.c	Tue Nov 28 11:15:35 2006 -0700
     3.3 @@ -0,0 +1,297 @@
     3.4 +/*
     3.5 + * Copyright (C) 2002-2003 Hewlett-Packard Co
     3.6 + *               Stephane Eranian <eranian@hpl.hp.com>
     3.7 + *
     3.8 + * This file implements the default sampling buffer format
     3.9 + * for the Linux/ia64 perfmon-2 subsystem.
    3.10 + */
    3.11 +#include <linux/kernel.h>
    3.12 +#include <linux/types.h>
    3.13 +#include <linux/module.h>
    3.14 +#include <linux/config.h>
    3.15 +#include <linux/init.h>
    3.16 +#include <asm/delay.h>
    3.17 +#include <linux/smp.h>
    3.18 +
    3.19 +#include <asm/perfmon.h>
    3.20 +#include <asm/perfmon_default_smpl.h>
    3.21 +
    3.22 +MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
    3.23 +MODULE_DESCRIPTION("perfmon default sampling format");
    3.24 +MODULE_LICENSE("GPL");
    3.25 +
    3.26 +#define DEFAULT_DEBUG 1
    3.27 +
    3.28 +#ifdef DEFAULT_DEBUG
    3.29 +#define DPRINT(a) \
    3.30 +	do { \
    3.31 +		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
    3.32 +	} while (0)
    3.33 +
    3.34 +#define DPRINT_ovfl(a) \
    3.35 +	do { \
    3.36 +		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
    3.37 +	} while (0)
    3.38 +
    3.39 +#else
    3.40 +#define DPRINT(a)
    3.41 +#define DPRINT_ovfl(a)
    3.42 +#endif
    3.43 +
    3.44 +static int
    3.45 +default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
    3.46 +{
    3.47 +	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
    3.48 +	int ret = 0;
    3.49 +
    3.50 +	if (data == NULL) {
    3.51 +		DPRINT(("[%d] no argument passed\n", task->pid));
    3.52 +		return -EINVAL;
    3.53 +	}
    3.54 +
    3.55 +	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
    3.56 +
    3.57 +	/*
    3.58 +	 * must hold at least the buffer header + one minimally sized entry
    3.59 +	 */
    3.60 +	if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
    3.61 +
    3.62 +	DPRINT(("buf_size=%lu\n", arg->buf_size));
    3.63 +
    3.64 +	return ret;
    3.65 +}
    3.66 +
    3.67 +static int
    3.68 +default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
    3.69 +{
    3.70 +	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
    3.71 +
    3.72 +	/*
    3.73 +	 * size has been validated in default_validate
    3.74 +	 */
    3.75 +	*size = arg->buf_size;
    3.76 +
    3.77 +	return 0;
    3.78 +}
    3.79 +
    3.80 +static int
    3.81 +default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
    3.82 +{
    3.83 +	pfm_default_smpl_hdr_t *hdr;
    3.84 +	pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
    3.85 +
    3.86 +	hdr = (pfm_default_smpl_hdr_t *)buf;
    3.87 +
    3.88 +	hdr->hdr_version      = PFM_DEFAULT_SMPL_VERSION;
    3.89 +	hdr->hdr_buf_size     = arg->buf_size;
    3.90 +	hdr->hdr_cur_offs     = sizeof(*hdr);
    3.91 +	hdr->hdr_overflows    = 0UL;
    3.92 +	hdr->hdr_count        = 0UL;
    3.93 +
    3.94 +	DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
    3.95 +		task->pid,
    3.96 +		buf,
    3.97 +		hdr->hdr_buf_size,
    3.98 +		sizeof(*hdr),
    3.99 +		hdr->hdr_version,
   3.100 +		hdr->hdr_cur_offs));
   3.101 +
   3.102 +	return 0;
   3.103 +}
   3.104 +
   3.105 +static int
   3.106 +default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
   3.107 +{
   3.108 +	pfm_default_smpl_hdr_t *hdr;
   3.109 +	pfm_default_smpl_entry_t *ent;
   3.110 +	void *cur, *last;
   3.111 +	unsigned long *e, entry_size;
   3.112 +	unsigned int npmds, i;
   3.113 +	unsigned char ovfl_pmd;
   3.114 +	unsigned char ovfl_notify;
   3.115 +
   3.116 +	if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
   3.117 +		DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
   3.118 +		return -EINVAL;
   3.119 +	}
   3.120 +
   3.121 +	hdr         = (pfm_default_smpl_hdr_t *)buf;
   3.122 +	cur         = buf+hdr->hdr_cur_offs;
   3.123 +	last        = buf+hdr->hdr_buf_size;
   3.124 +	ovfl_pmd    = arg->ovfl_pmd;
   3.125 +	ovfl_notify = arg->ovfl_notify;
   3.126 +
   3.127 +	/*
   3.128 +	 * precheck for sanity
   3.129 +	 */
   3.130 +	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
   3.131 +
   3.132 +	npmds = hweight64(arg->smpl_pmds[0]);
   3.133 +
   3.134 +	ent = (pfm_default_smpl_entry_t *)cur;
   3.135 +
   3.136 +	prefetch(arg->smpl_pmds_values);
   3.137 +
   3.138 +	entry_size = sizeof(*ent) + (npmds << 3);
   3.139 +
   3.140 +	/* position for first pmd */
   3.141 +	e = (unsigned long *)(ent+1);
   3.142 +
   3.143 +	hdr->hdr_count++;
   3.144 +
   3.145 +	DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
   3.146 +			task->pid,
   3.147 +			hdr->hdr_count,
   3.148 +			cur, last,
   3.149 +			last-cur,
   3.150 +			ovfl_pmd,
   3.151 +			ovfl_notify, npmds));
   3.152 +
   3.153 +	/*
   3.154 +	 * current = task running at the time of the overflow.
   3.155 +	 *
   3.156 +	 * per-task mode:
   3.157 +	 * 	- this is ususally the task being monitored.
   3.158 +	 * 	  Under certain conditions, it might be a different task
   3.159 +	 *
   3.160 +	 * system-wide:
   3.161 +	 * 	- this is not necessarily the task controlling the session
   3.162 +	 */
   3.163 +	ent->pid            = current->pid;
   3.164 +	ent->ovfl_pmd  	    = ovfl_pmd;
   3.165 +	ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
   3.166 +
   3.167 +	/*
   3.168 +	 * where did the fault happen (includes slot number)
   3.169 +	 */
   3.170 +	ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
   3.171 +
   3.172 +	ent->tstamp    = stamp;
   3.173 +	ent->cpu       = smp_processor_id();
   3.174 +	ent->set       = arg->active_set;
   3.175 +	ent->tgid      = current->tgid;
   3.176 +
   3.177 +	/*
   3.178 +	 * selectively store PMDs in increasing index number
   3.179 +	 */
   3.180 +	if (npmds) {
   3.181 +		unsigned long *val = arg->smpl_pmds_values;
   3.182 +		for(i=0; i < npmds; i++) {
   3.183 +			*e++ = *val++;
   3.184 +		}
   3.185 +	}
   3.186 +
   3.187 +	/*
   3.188 +	 * update position for next entry
   3.189 +	 */
   3.190 +	hdr->hdr_cur_offs += entry_size;
   3.191 +	cur               += entry_size;
   3.192 +
   3.193 +	/*
   3.194 +	 * post check to avoid losing the last sample
   3.195 +	 */
   3.196 +	if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
   3.197 +
   3.198 +	/*
   3.199 +	 * keep same ovfl_pmds, ovfl_notify
   3.200 +	 */
   3.201 +	arg->ovfl_ctrl.bits.notify_user     = 0;
   3.202 +	arg->ovfl_ctrl.bits.block_task      = 0;
   3.203 +	arg->ovfl_ctrl.bits.mask_monitoring = 0;
   3.204 +	arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
   3.205 +
   3.206 +	return 0;
   3.207 +full:
   3.208 +	DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
   3.209 +
   3.210 +	/*
   3.211 +	 * increment number of buffer overflow.
   3.212 +	 * important to detect duplicate set of samples.
   3.213 +	 */
   3.214 +	hdr->hdr_overflows++;
   3.215 +
   3.216 +	/*
   3.217 +	 * if no notification requested, then we saturate the buffer
   3.218 +	 */
   3.219 +	if (ovfl_notify == 0) {
   3.220 +		arg->ovfl_ctrl.bits.notify_user     = 0;
   3.221 +		arg->ovfl_ctrl.bits.block_task      = 0;
   3.222 +		arg->ovfl_ctrl.bits.mask_monitoring = 1;
   3.223 +		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
   3.224 +	} else {
   3.225 +		arg->ovfl_ctrl.bits.notify_user     = 1;
   3.226 +		arg->ovfl_ctrl.bits.block_task      = 1; /* ignored for non-blocking context */
   3.227 +		arg->ovfl_ctrl.bits.mask_monitoring = 1;
   3.228 +		arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
   3.229 +	}
   3.230 +	return -1; /* we are full, sorry */
   3.231 +}
   3.232 +
   3.233 +static int
   3.234 +default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
   3.235 +{
   3.236 +	pfm_default_smpl_hdr_t *hdr;
   3.237 +
   3.238 +	hdr = (pfm_default_smpl_hdr_t *)buf;
   3.239 +
   3.240 +	hdr->hdr_count    = 0UL;
   3.241 +	hdr->hdr_cur_offs = sizeof(*hdr);
   3.242 +
   3.243 +	ctrl->bits.mask_monitoring = 0;
   3.244 +	ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
   3.245 +
   3.246 +	return 0;
   3.247 +}
   3.248 +
   3.249 +static int
   3.250 +default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
   3.251 +{
   3.252 +	DPRINT(("[%d] exit(%p)\n", task->pid, buf));
   3.253 +	return 0;
   3.254 +}
   3.255 +
   3.256 +static pfm_buffer_fmt_t default_fmt={
   3.257 + 	.fmt_name 	    = "default_format",
   3.258 + 	.fmt_uuid	    = PFM_DEFAULT_SMPL_UUID,
   3.259 + 	.fmt_arg_size	    = sizeof(pfm_default_smpl_arg_t),
   3.260 + 	.fmt_validate	    = default_validate,
   3.261 + 	.fmt_getsize	    = default_get_size,
   3.262 + 	.fmt_init	    = default_init,
   3.263 + 	.fmt_handler	    = default_handler,
   3.264 + 	.fmt_restart	    = default_restart,
   3.265 + 	.fmt_restart_active = default_restart,
   3.266 + 	.fmt_exit	    = default_exit,
   3.267 +};
   3.268 +
   3.269 +static int __init
   3.270 +pfm_default_smpl_init_module(void)
   3.271 +{
   3.272 +	int ret;
   3.273 +
   3.274 +	ret = pfm_register_buffer_fmt(&default_fmt);
   3.275 +	if (ret == 0) {
   3.276 +		printk("perfmon_default_smpl: %s v%u.%u registered\n",
   3.277 +			default_fmt.fmt_name,
   3.278 +			PFM_DEFAULT_SMPL_VERSION_MAJ,
   3.279 +			PFM_DEFAULT_SMPL_VERSION_MIN);
   3.280 +	} else {
   3.281 +		printk("perfmon_default_smpl: %s cannot register ret=%d\n",
   3.282 +			default_fmt.fmt_name,
   3.283 +			ret);
   3.284 +	}
   3.285 +
   3.286 +	return ret;
   3.287 +}
   3.288 +
   3.289 +static void __exit
   3.290 +pfm_default_smpl_cleanup_module(void)
   3.291 +{
   3.292 +	int ret;
   3.293 +	ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
   3.294 +
   3.295 +	printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
   3.296 +}
   3.297 +
   3.298 +module_init(pfm_default_smpl_init_module);
   3.299 +module_exit(pfm_default_smpl_cleanup_module);
   3.300 +
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/xen/arch/ia64/linux-xen/perfmon_generic.h	Tue Nov 28 11:15:35 2006 -0700
     4.3 @@ -0,0 +1,45 @@
     4.4 +/*
     4.5 + * This file contains the generic PMU register description tables
     4.6 + * and pmc checker used by perfmon.c.
     4.7 + *
     4.8 + * Copyright (C) 2002-2003  Hewlett Packard Co
     4.9 + *               Stephane Eranian <eranian@hpl.hp.com>
    4.10 + */
    4.11 +
    4.12 +static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
    4.13 +/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    4.14 +/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    4.15 +/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    4.16 +/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    4.17 +/* pmc4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    4.18 +/* pmc5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    4.19 +/* pmc6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    4.20 +/* pmc7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    4.21 +	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
    4.22 +};
    4.23 +
    4.24 +static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
    4.25 +/* pmd0  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
    4.26 +/* pmd1  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
    4.27 +/* pmd2  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
    4.28 +/* pmd3  */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
    4.29 +/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
    4.30 +/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
    4.31 +/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
    4.32 +/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
    4.33 +	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
    4.34 +};
    4.35 +
    4.36 +/*
    4.37 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
    4.38 + */
    4.39 +static pmu_config_t pmu_conf_gen={
    4.40 +	.pmu_name   = "Generic",
    4.41 +	.pmu_family = 0xff, /* any */
    4.42 +	.ovfl_val   = (1UL << 32) - 1,
    4.43 +	.num_ibrs   = 0, /* does not use */
    4.44 +	.num_dbrs   = 0, /* does not use */
    4.45 +	.pmd_desc   = pfm_gen_pmd_desc,
    4.46 +	.pmc_desc   = pfm_gen_pmc_desc
    4.47 +};
    4.48 +
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/xen/arch/ia64/linux-xen/perfmon_itanium.h	Tue Nov 28 11:15:35 2006 -0700
     5.3 @@ -0,0 +1,115 @@
     5.4 +/*
     5.5 + * This file contains the Itanium PMU register description tables
     5.6 + * and pmc checker used by perfmon.c.
     5.7 + *
     5.8 + * Copyright (C) 2002-2003  Hewlett Packard Co
     5.9 + *               Stephane Eranian <eranian@hpl.hp.com>
    5.10 + */
    5.11 +static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
    5.12 +
    5.13 +static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
    5.14 +/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.15 +/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.16 +/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.17 +/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.18 +/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.19 +/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.20 +/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.21 +/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.22 +/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.23 +/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.24 +/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.25 +/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.26 +/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.27 +/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    5.28 +	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
    5.29 +};
    5.30 +
    5.31 +static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
    5.32 +/* pmd0  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
    5.33 +/* pmd1  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
    5.34 +/* pmd2  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
    5.35 +/* pmd3  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
    5.36 +/* pmd4  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
    5.37 +/* pmd5  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
    5.38 +/* pmd6  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
    5.39 +/* pmd7  */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
    5.40 +/* pmd8  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    5.41 +/* pmd9  */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    5.42 +/* pmd10 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    5.43 +/* pmd11 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    5.44 +/* pmd12 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    5.45 +/* pmd13 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    5.46 +/* pmd14 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    5.47 +/* pmd15 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    5.48 +/* pmd16 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    5.49 +/* pmd17 */ { PFM_REG_BUFFER  , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
    5.50 +	    { PFM_REG_END     , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
    5.51 +};
    5.52 +
    5.53 +static int
    5.54 +pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
    5.55 +{
    5.56 +	int ret;
    5.57 +	int is_loaded;
    5.58 +
    5.59 +	/* sanitfy check */
    5.60 +	if (ctx == NULL) return -EINVAL;
    5.61 +
    5.62 +	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
    5.63 +
    5.64 +	/*
    5.65 +	 * we must clear the (instruction) debug registers if pmc13.ta bit is cleared
    5.66 +	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
    5.67 +	 */
    5.68 +	if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
    5.69 +
    5.70 +		DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
    5.71 +
    5.72 +		/* don't mix debug with perfmon */
    5.73 +		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
    5.74 +
    5.75 +		/*
    5.76 +		 * a count of 0 will mark the debug registers as in use and also
    5.77 +		 * ensure that they are properly cleared.
    5.78 +		 */
    5.79 +		ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
    5.80 +		if (ret) return ret;
    5.81 +	}
    5.82 +
    5.83 +	/*
    5.84 +	 * we must clear the (data) debug registers if pmc11.pt bit is cleared
    5.85 +	 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
    5.86 +	 */
    5.87 +	if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
    5.88 +
    5.89 +		DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
    5.90 +
    5.91 +		/* don't mix debug with perfmon */
    5.92 +		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
    5.93 +
    5.94 +		/*
    5.95 +		 * a count of 0 will mark the debug registers as in use and also
    5.96 +		 * ensure that they are properly cleared.
    5.97 +		 */
    5.98 +		ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
    5.99 +		if (ret) return ret;
   5.100 +	}
   5.101 +	return 0;
   5.102 +}
   5.103 +
   5.104 +/*
   5.105 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
   5.106 + */
   5.107 +static pmu_config_t pmu_conf_ita={
   5.108 +	.pmu_name      = "Itanium",
   5.109 +	.pmu_family    = 0x7,
   5.110 +	.ovfl_val      = (1UL << 32) - 1,
   5.111 +	.pmd_desc      = pfm_ita_pmd_desc,
   5.112 +	.pmc_desc      = pfm_ita_pmc_desc,
   5.113 +	.num_ibrs      = 8,
   5.114 +	.num_dbrs      = 8,
   5.115 +	.use_rr_dbregs = 1, /* debug register are use for range retrictions */
   5.116 +};
   5.117 +
   5.118 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/xen/arch/ia64/linux-xen/perfmon_mckinley.h	Tue Nov 28 11:15:35 2006 -0700
     6.3 @@ -0,0 +1,187 @@
     6.4 +/*
     6.5 + * This file contains the McKinley PMU register description tables
     6.6 + * and pmc checker used by perfmon.c.
     6.7 + *
     6.8 + * Copyright (C) 2002-2003  Hewlett Packard Co
     6.9 + *               Stephane Eranian <eranian@hpl.hp.com>
    6.10 + */
    6.11 +static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
    6.12 +
    6.13 +static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
    6.14 +/* pmc0  */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.15 +/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.16 +/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.17 +/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.18 +/* pmc4  */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.19 +/* pmc5  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.20 +/* pmc6  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.21 +/* pmc7  */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL,  pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.22 +/* pmc8  */ { PFM_REG_CONFIG  , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.23 +/* pmc9  */ { PFM_REG_CONFIG  , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.24 +/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.25 +/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL,  pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.26 +/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL,  pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.27 +/* pmc13 */ { PFM_REG_CONFIG  , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.28 +/* pmc14 */ { PFM_REG_CONFIG  , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.29 +/* pmc15 */ { PFM_REG_CONFIG  , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
    6.30 +	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
    6.31 +};
    6.32 +
    6.33 +static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
    6.34 +/* pmd0  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
    6.35 +/* pmd1  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
    6.36 +/* pmd2  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
    6.37 +/* pmd3  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
    6.38 +/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
    6.39 +/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
    6.40 +/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
    6.41 +/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
    6.42 +/* pmd8  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    6.43 +/* pmd9  */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    6.44 +/* pmd10 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    6.45 +/* pmd11 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    6.46 +/* pmd12 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    6.47 +/* pmd13 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    6.48 +/* pmd14 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    6.49 +/* pmd15 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    6.50 +/* pmd16 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
    6.51 +/* pmd17 */ { PFM_REG_BUFFER  , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
    6.52 +	    { PFM_REG_END     , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
    6.53 +};
    6.54 +
    6.55 +/*
    6.56 + * PMC reserved fields must have their power-up values preserved
    6.57 + */
    6.58 +static int
    6.59 +pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
    6.60 +{
    6.61 +	unsigned long tmp1, tmp2, ival = *val;
    6.62 +
    6.63 +	/* remove reserved areas from user value */
    6.64 +	tmp1 = ival & PMC_RSVD_MASK(cnum);
    6.65 +
    6.66 +	/* get reserved fields values */
    6.67 +	tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
    6.68 +
    6.69 +	*val = tmp1 | tmp2;
    6.70 +
    6.71 +	DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
    6.72 +		  cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
    6.73 +	return 0;
    6.74 +}
    6.75 +
    6.76 +/*
    6.77 + * task can be NULL if the context is unloaded
    6.78 + */
    6.79 +static int
    6.80 +pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
    6.81 +{
    6.82 +	int ret = 0, check_case1 = 0;
    6.83 +	unsigned long val8 = 0, val14 = 0, val13 = 0;
    6.84 +	int is_loaded;
    6.85 +
    6.86 +	/* first preserve the reserved fields */
    6.87 +	pfm_mck_reserved(cnum, val, regs);
    6.88 +
    6.89 +	/* sanitfy check */
    6.90 +	if (ctx == NULL) return -EINVAL;
    6.91 +
    6.92 +	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
    6.93 +
    6.94 +	/*
    6.95 +	 * we must clear the debug registers if pmc13 has a value which enable
    6.96 +	 * memory pipeline event constraints. In this case we need to clear the
    6.97 +	 * the debug registers if they have not yet been accessed. This is required
    6.98 +	 * to avoid picking stale state.
    6.99 +	 * PMC13 is "active" if:
   6.100 +	 * 	one of the pmc13.cfg_dbrpXX field is different from 0x3
   6.101 +	 * AND
   6.102 +	 * 	at the corresponding pmc13.ena_dbrpXX is set.
   6.103 +	 */
   6.104 +	DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
   6.105 +
   6.106 +	if (cnum == 13 && is_loaded
   6.107 +	    && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
   6.108 +
   6.109 +		DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
   6.110 +
   6.111 +		/* don't mix debug with perfmon */
   6.112 +		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
   6.113 +
   6.114 +		/*
   6.115 +		 * a count of 0 will mark the debug registers as in use and also
   6.116 +		 * ensure that they are properly cleared.
   6.117 +		 */
   6.118 +		ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
   6.119 +		if (ret) return ret;
   6.120 +	}
   6.121 +	/*
   6.122 +	 * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
   6.123 +	 * before they are (fl_using_dbreg==0) to avoid picking up stale information.
   6.124 +	 */
   6.125 +	if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
   6.126 +
   6.127 +		DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
   6.128 +
   6.129 +		/* don't mix debug with perfmon */
   6.130 +		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
   6.131 +
   6.132 +		/*
   6.133 +		 * a count of 0 will mark the debug registers as in use and also
   6.134 +		 * ensure that they are properly cleared.
   6.135 +		 */
   6.136 +		ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
   6.137 +		if (ret) return ret;
   6.138 +
   6.139 +	}
   6.140 +
   6.141 +	switch(cnum) {
   6.142 +		case  4: *val |= 1UL << 23; /* force power enable bit */
   6.143 +			 break;
   6.144 +		case  8: val8 = *val;
   6.145 +			 val13 = ctx->ctx_pmcs[13];
   6.146 +			 val14 = ctx->ctx_pmcs[14];
   6.147 +			 check_case1 = 1;
   6.148 +			 break;
   6.149 +		case 13: val8  = ctx->ctx_pmcs[8];
   6.150 +			 val13 = *val;
   6.151 +			 val14 = ctx->ctx_pmcs[14];
   6.152 +			 check_case1 = 1;
   6.153 +			 break;
   6.154 +		case 14: val8  = ctx->ctx_pmcs[8];
   6.155 +			 val13 = ctx->ctx_pmcs[13];
   6.156 +			 val14 = *val;
   6.157 +			 check_case1 = 1;
   6.158 +			 break;
   6.159 +	}
   6.160 +	/* check illegal configuration which can produce inconsistencies in tagging
   6.161 +	 * i-side events in L1D and L2 caches
   6.162 +	 */
   6.163 +	if (check_case1) {
   6.164 +		ret =   ((val13 >> 45) & 0xf) == 0
   6.165 +		   && ((val8 & 0x1) == 0)
   6.166 +		   && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
   6.167 +		       ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
   6.168 +
   6.169 +		if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
   6.170 +	}
   6.171 +
   6.172 +	return ret ? -EINVAL : 0;
   6.173 +}
   6.174 +
   6.175 +/*
   6.176 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
   6.177 + */
   6.178 +static pmu_config_t pmu_conf_mck={
   6.179 +	.pmu_name      = "Itanium 2",
   6.180 +	.pmu_family    = 0x1f,
   6.181 +	.flags	       = PFM_PMU_IRQ_RESEND,
   6.182 +	.ovfl_val      = (1UL << 47) - 1,
   6.183 +	.pmd_desc      = pfm_mck_pmd_desc,
   6.184 +	.pmc_desc      = pfm_mck_pmc_desc,
   6.185 +	.num_ibrs       = 8,
   6.186 +	.num_dbrs       = 8,
   6.187 +	.use_rr_dbregs = 1 /* debug register are use for range retrictions */
   6.188 +};
   6.189 +
   6.190 +
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/xen/arch/ia64/linux-xen/perfmon_montecito.h	Tue Nov 28 11:15:35 2006 -0700
     7.3 @@ -0,0 +1,269 @@
     7.4 +/*
     7.5 + * This file contains the Montecito PMU register description tables
     7.6 + * and pmc checker used by perfmon.c.
     7.7 + *
     7.8 + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
     7.9 + *               Contributed by Stephane Eranian <eranian@hpl.hp.com>
    7.10 + */
    7.11 +static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
    7.12 +
    7.13 +#define RDEP_MONT_ETB	(RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\
    7.14 +			 RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63))
    7.15 +#define RDEP_MONT_DEAR  (RDEP(32)|RDEP(33)|RDEP(36))
    7.16 +#define RDEP_MONT_IEAR  (RDEP(34)|RDEP(35))
    7.17 +
    7.18 +static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={
    7.19 +/* pmc0  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
    7.20 +/* pmc1  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
    7.21 +/* pmc2  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
    7.22 +/* pmc3  */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
    7.23 +/* pmc4  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}},
    7.24 +/* pmc5  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}},
    7.25 +/* pmc6  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}},
    7.26 +/* pmc7  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}},
    7.27 +/* pmc8  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}},
    7.28 +/* pmc9  */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}},
    7.29 +/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}},
    7.30 +/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}},
    7.31 +/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}},
    7.32 +/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}},
    7.33 +/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}},
    7.34 +/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}},
    7.35 +/* pmc16 */ { PFM_REG_NOTIMPL, },
    7.36 +/* pmc17 */ { PFM_REG_NOTIMPL, },
    7.37 +/* pmc18 */ { PFM_REG_NOTIMPL, },
    7.38 +/* pmc19 */ { PFM_REG_NOTIMPL, },
    7.39 +/* pmc20 */ { PFM_REG_NOTIMPL, },
    7.40 +/* pmc21 */ { PFM_REG_NOTIMPL, },
    7.41 +/* pmc22 */ { PFM_REG_NOTIMPL, },
    7.42 +/* pmc23 */ { PFM_REG_NOTIMPL, },
    7.43 +/* pmc24 */ { PFM_REG_NOTIMPL, },
    7.44 +/* pmc25 */ { PFM_REG_NOTIMPL, },
    7.45 +/* pmc26 */ { PFM_REG_NOTIMPL, },
    7.46 +/* pmc27 */ { PFM_REG_NOTIMPL, },
    7.47 +/* pmc28 */ { PFM_REG_NOTIMPL, },
    7.48 +/* pmc29 */ { PFM_REG_NOTIMPL, },
    7.49 +/* pmc30 */ { PFM_REG_NOTIMPL, },
    7.50 +/* pmc31 */ { PFM_REG_NOTIMPL, },
    7.51 +/* pmc32 */ { PFM_REG_CONFIG,  0, 0x30f01ffffffffff, 0x30f01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
    7.52 +/* pmc33 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
    7.53 +/* pmc34 */ { PFM_REG_CONFIG,  0, 0xf01ffffffffff, 0xf01ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
    7.54 +/* pmc35 */ { PFM_REG_CONFIG,  0, 0x0,  0x1ffffffffff, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
    7.55 +/* pmc36 */ { PFM_REG_CONFIG,  0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
    7.56 +/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}},
    7.57 +/* pmc38 */ { PFM_REG_CONFIG,  0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
    7.58 +/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
    7.59 +/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}},
    7.60 +/* pmc41 */ { PFM_REG_CONFIG,  0, 0x00002078fefefefe, 0x1e00018181818, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
    7.61 +/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
    7.62 +	    { PFM_REG_END    , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
    7.63 +};
    7.64 +
    7.65 +static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={
    7.66 +/* pmd0  */ { PFM_REG_NOTIMPL, }, 
    7.67 +/* pmd1  */ { PFM_REG_NOTIMPL, },
    7.68 +/* pmd2  */ { PFM_REG_NOTIMPL, },
    7.69 +/* pmd3  */ { PFM_REG_NOTIMPL, },
    7.70 +/* pmd4  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}},
    7.71 +/* pmd5  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}},
    7.72 +/* pmd6  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}},
    7.73 +/* pmd7  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}},
    7.74 +/* pmd8  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}}, 
    7.75 +/* pmd9  */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}},
    7.76 +/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}},
    7.77 +/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}},
    7.78 +/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}},
    7.79 +/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}},
    7.80 +/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}},
    7.81 +/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}},
    7.82 +/* pmd16 */ { PFM_REG_NOTIMPL, },
    7.83 +/* pmd17 */ { PFM_REG_NOTIMPL, },
    7.84 +/* pmd18 */ { PFM_REG_NOTIMPL, },
    7.85 +/* pmd19 */ { PFM_REG_NOTIMPL, },
    7.86 +/* pmd20 */ { PFM_REG_NOTIMPL, },
    7.87 +/* pmd21 */ { PFM_REG_NOTIMPL, },
    7.88 +/* pmd22 */ { PFM_REG_NOTIMPL, },
    7.89 +/* pmd23 */ { PFM_REG_NOTIMPL, },
    7.90 +/* pmd24 */ { PFM_REG_NOTIMPL, },
    7.91 +/* pmd25 */ { PFM_REG_NOTIMPL, },
    7.92 +/* pmd26 */ { PFM_REG_NOTIMPL, },
    7.93 +/* pmd27 */ { PFM_REG_NOTIMPL, },
    7.94 +/* pmd28 */ { PFM_REG_NOTIMPL, },
    7.95 +/* pmd29 */ { PFM_REG_NOTIMPL, },
    7.96 +/* pmd30 */ { PFM_REG_NOTIMPL, },
    7.97 +/* pmd31 */ { PFM_REG_NOTIMPL, },
    7.98 +/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
    7.99 +/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
   7.100 +/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}},
   7.101 +/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}},
   7.102 +/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}},
   7.103 +/* pmd37 */ { PFM_REG_NOTIMPL, },
   7.104 +/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.105 +/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.106 +/* pmd40 */ { PFM_REG_NOTIMPL, },
   7.107 +/* pmd41 */ { PFM_REG_NOTIMPL, },
   7.108 +/* pmd42 */ { PFM_REG_NOTIMPL, },
   7.109 +/* pmd43 */ { PFM_REG_NOTIMPL, },
   7.110 +/* pmd44 */ { PFM_REG_NOTIMPL, },
   7.111 +/* pmd45 */ { PFM_REG_NOTIMPL, },
   7.112 +/* pmd46 */ { PFM_REG_NOTIMPL, },
   7.113 +/* pmd47 */ { PFM_REG_NOTIMPL, },
   7.114 +/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.115 +/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.116 +/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.117 +/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.118 +/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.119 +/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.120 +/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.121 +/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.122 +/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.123 +/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.124 +/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.125 +/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.126 +/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.127 +/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.128 +/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.129 +/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
   7.130 +	    { PFM_REG_END   , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
   7.131 +};
   7.132 +
   7.133 +/*
   7.134 + * PMC reserved fields must have their power-up values preserved
   7.135 + */
   7.136 +static int
   7.137 +pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
   7.138 +{
   7.139 +	unsigned long tmp1, tmp2, ival = *val;
   7.140 +
   7.141 +	/* remove reserved areas from user value */
   7.142 +	tmp1 = ival & PMC_RSVD_MASK(cnum);
   7.143 +
   7.144 +	/* get reserved fields values */
   7.145 +	tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
   7.146 +
   7.147 +	*val = tmp1 | tmp2;
   7.148 +
   7.149 +	DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
   7.150 +		  cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
   7.151 +	return 0;
   7.152 +}
   7.153 +
   7.154 +/*
   7.155 + * task can be NULL if the context is unloaded
   7.156 + */
   7.157 +static int
   7.158 +pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
   7.159 +{
   7.160 +	int ret = 0;
   7.161 +	unsigned long val32 = 0, val38 = 0, val41 = 0;
   7.162 +	unsigned long tmpval;
   7.163 +	int check_case1 = 0;
   7.164 +	int is_loaded;
   7.165 +
   7.166 +	/* first preserve the reserved fields */
   7.167 +	pfm_mont_reserved(cnum, val, regs);
   7.168 +
   7.169 +	tmpval = *val;
   7.170 +
   7.171 +	/* sanity check */
   7.172 +	if (ctx == NULL) return -EINVAL;
   7.173 +
   7.174 +	is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
   7.175 +
   7.176 +	/*
   7.177 +	 * we must clear the debug registers if pmc41 has a value which enable
   7.178 +	 * memory pipeline event constraints. In this case we need to clear the
   7.179 +	 * the debug registers if they have not yet been accessed. This is required
   7.180 +	 * to avoid picking stale state.
   7.181 +	 * PMC41 is "active" if:
   7.182 +	 * 	one of the pmc41.cfg_dtagXX field is different from 0x3
   7.183 +	 * AND
   7.184 +	 * 	at the corresponding pmc41.en_dbrpXX is set.
   7.185 +	 * AND
   7.186 +	 *	ctx_fl_using_dbreg == 0  (i.e., dbr not yet used)
   7.187 +	 */
   7.188 +	DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded));
   7.189 +
   7.190 +	if (cnum == 41 && is_loaded 
   7.191 +	    && (tmpval & 0x1e00000000000) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
   7.192 +
   7.193 +		DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval));
   7.194 +
   7.195 +		/* don't mix debug with perfmon */
   7.196 +		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
   7.197 +
   7.198 +		/*
   7.199 +		 * a count of 0 will mark the debug registers if:
   7.200 +		 * AND
   7.201 +		 */
   7.202 +		ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
   7.203 +		if (ret) return ret;
   7.204 +	}
   7.205 +	/*
   7.206 +	 * we must clear the (instruction) debug registers if:
   7.207 +	 * 	pmc38.ig_ibrpX is 0 (enabled)
   7.208 +	 * AND
   7.209 +	 *	ctx_fl_using_dbreg == 0  (i.e., dbr not yet used)
   7.210 +	 */
   7.211 +	if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) {
   7.212 +
   7.213 +		DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval));
   7.214 +
   7.215 +		/* don't mix debug with perfmon */
   7.216 +		if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
   7.217 +
   7.218 +		/*
   7.219 +		 * a count of 0 will mark the debug registers as in use and also
   7.220 +		 * ensure that they are properly cleared.
   7.221 +		 */
   7.222 +		ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
   7.223 +		if (ret) return ret;
   7.224 +
   7.225 +	}
   7.226 +	switch(cnum) {
   7.227 +		case  32: val32 = *val;
   7.228 +			  val38 = ctx->ctx_pmcs[38];
   7.229 +			  val41 = ctx->ctx_pmcs[41];
   7.230 +			  check_case1 = 1;
   7.231 +			  break;
   7.232 +		case  38: val38 = *val;
   7.233 +			  val32 = ctx->ctx_pmcs[32];
   7.234 +			  val41 = ctx->ctx_pmcs[41];
   7.235 +			  check_case1 = 1;
   7.236 +			  break;
   7.237 +		case  41: val41 = *val;
   7.238 +			  val32 = ctx->ctx_pmcs[32];
   7.239 +			  val38 = ctx->ctx_pmcs[38];
   7.240 +			  check_case1 = 1;
   7.241 +			  break;
   7.242 +	}
   7.243 +	/* check illegal configuration which can produce inconsistencies in tagging
   7.244 +	 * i-side events in L1D and L2 caches
   7.245 +	 */
   7.246 +	if (check_case1) {
   7.247 +		ret =   (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
   7.248 +		     && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
   7.249 +		     ||  (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
   7.250 +		if (ret) {
   7.251 +			DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32));
   7.252 +			return -EINVAL;
   7.253 +		}
   7.254 +	}
   7.255 +	*val = tmpval;
   7.256 +	return 0;
   7.257 +}
   7.258 +
   7.259 +/*
   7.260 + * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
   7.261 + */
   7.262 +static pmu_config_t pmu_conf_mont={
   7.263 +	.pmu_name        = "Montecito",
   7.264 +	.pmu_family      = 0x20,
   7.265 +	.flags           = PFM_PMU_IRQ_RESEND,
   7.266 +	.ovfl_val        = (1UL << 47) - 1,
   7.267 +	.pmd_desc        = pfm_mont_pmd_desc,
   7.268 +	.pmc_desc        = pfm_mont_pmc_desc,
   7.269 +	.num_ibrs        = 8,
   7.270 +	.num_dbrs        = 8,
   7.271 +	.use_rr_dbregs   = 1 /* debug register are use for range retrictions */
   7.272 +};
     8.1 --- a/xen/arch/ia64/linux/README.origin	Tue Nov 28 10:37:36 2006 -0700
     8.2 +++ b/xen/arch/ia64/linux/README.origin	Tue Nov 28 11:15:35 2006 -0700
     8.3 @@ -24,3 +24,6 @@ idiv64.S		-> linux/arch/ia64/lib/idiv64.
     8.4  memcpy_mck.S		-> linux/arch/ia64/lib/memcpy_mck.S
     8.5  memset.S		-> linux/arch/ia64/lib/memset.S
     8.6  strlen.S		-> linux/arch/ia64/lib/strlen.S
     8.7 +
     8.8 +# The files below are from Linux-2.6.16.33
     8.9 +carta_random.S		-> linux/arch/ia64/lib/carta_random.S
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/xen/arch/ia64/linux/carta_random.S	Tue Nov 28 11:15:35 2006 -0700
     9.3 @@ -0,0 +1,54 @@
     9.4 +/*
     9.5 + * Fast, simple, yet decent quality random number generator based on
     9.6 + * a paper by David G. Carta ("Two Fast Implementations of the
     9.7 + * `Minimal Standard' Random Number Generator," Communications of the
     9.8 + * ACM, January, 1990).
     9.9 + *
    9.10 + * Copyright (C) 2002 Hewlett-Packard Co
    9.11 + *	David Mosberger-Tang <davidm@hpl.hp.com>
    9.12 + */
    9.13 +
    9.14 +#include <asm/asmmacro.h>
    9.15 +
    9.16 +#define a	r2
    9.17 +#define m	r3
    9.18 +#define lo	r8
    9.19 +#define hi	r9
    9.20 +#define t0	r16
    9.21 +#define t1	r17
    9.22 +#define	seed	r32
    9.23 +
    9.24 +GLOBAL_ENTRY(carta_random32)
    9.25 +	movl	a = (16807 << 16) | 16807
    9.26 +	;;
    9.27 +	pmpyshr2.u t0 = a, seed, 0
    9.28 +	pmpyshr2.u t1 = a, seed, 16
    9.29 +	;;
    9.30 +	unpack2.l t0 = t1, t0
    9.31 +	dep	m = -1, r0, 0, 31
    9.32 +	;;
    9.33 +	zxt4	lo = t0
    9.34 +	shr.u	hi = t0, 32
    9.35 +	;;
    9.36 +	dep	t0 = 0, hi, 15, 49	// t0 = (hi & 0x7fff)
    9.37 +	;;
    9.38 +	shl	t0 = t0, 16		// t0 = (hi & 0x7fff) << 16
    9.39 +	shr	t1 = hi, 15		// t1 = (hi >> 15)
    9.40 +	;;
    9.41 +	add	lo = lo, t0
    9.42 +	;;
    9.43 +	cmp.gtu	p6, p0 = lo, m
    9.44 +	;;
    9.45 +(p6)	and	lo = lo, m
    9.46 +	;;
    9.47 +(p6)	add	lo = 1, lo
    9.48 +	;;
    9.49 +	add	lo = lo, t1
    9.50 +	;;
    9.51 +	cmp.gtu p6, p0 = lo, m
    9.52 +	;;
    9.53 +(p6)	and	lo = lo, m
    9.54 +	;;
    9.55 +(p6)	add	lo = 1, lo
    9.56 +	br.ret.sptk.many rp
    9.57 +END(carta_random32)
    10.1 --- a/xen/include/asm-ia64/linux-xen/asm/README.origin	Tue Nov 28 10:37:36 2006 -0700
    10.2 +++ b/xen/include/asm-ia64/linux-xen/asm/README.origin	Tue Nov 28 11:15:35 2006 -0700
    10.3 @@ -30,3 +30,7 @@ types.h			-> linux/include/asm-ia64/type
    10.4  
    10.5  # The files below are from Linux-2.6.16
    10.6  iosapic.h		-> linux/include/asm-ia64/iosapic.h
    10.7 +
    10.8 +# The files below are from Linux-2.6.16.33
    10.9 +perfmon.h		-> linux/include/asm-ia64/perfmon.h
   10.10 +perfmon_default_smpl.h	-> linux/include/asm-ia64/perfmon_default_smpl.h
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/xen/include/asm-ia64/linux-xen/asm/perfmon.h	Tue Nov 28 11:15:35 2006 -0700
    11.3 @@ -0,0 +1,279 @@
    11.4 +/*
    11.5 + * Copyright (C) 2001-2003 Hewlett-Packard Co
    11.6 + *               Stephane Eranian <eranian@hpl.hp.com>
    11.7 + */
    11.8 +
    11.9 +#ifndef _ASM_IA64_PERFMON_H
   11.10 +#define _ASM_IA64_PERFMON_H
   11.11 +
   11.12 +/*
   11.13 + * perfmon comamnds supported on all CPU models
   11.14 + */
   11.15 +#define PFM_WRITE_PMCS		0x01
   11.16 +#define PFM_WRITE_PMDS		0x02
   11.17 +#define PFM_READ_PMDS		0x03
   11.18 +#define PFM_STOP		0x04
   11.19 +#define PFM_START		0x05
   11.20 +#define PFM_ENABLE		0x06 /* obsolete */
   11.21 +#define PFM_DISABLE		0x07 /* obsolete */
   11.22 +#define PFM_CREATE_CONTEXT	0x08
   11.23 +#define PFM_DESTROY_CONTEXT	0x09 /* obsolete use close() */
   11.24 +#define PFM_RESTART		0x0a
   11.25 +#define PFM_PROTECT_CONTEXT	0x0b /* obsolete */
   11.26 +#define PFM_GET_FEATURES	0x0c
   11.27 +#define PFM_DEBUG		0x0d
   11.28 +#define PFM_UNPROTECT_CONTEXT	0x0e /* obsolete */
   11.29 +#define PFM_GET_PMC_RESET_VAL	0x0f
   11.30 +#define PFM_LOAD_CONTEXT	0x10
   11.31 +#define PFM_UNLOAD_CONTEXT	0x11
   11.32 +
   11.33 +/*
   11.34 + * PMU model specific commands (may not be supported on all PMU models)
   11.35 + */
   11.36 +#define PFM_WRITE_IBRS		0x20
   11.37 +#define PFM_WRITE_DBRS		0x21
   11.38 +
   11.39 +/*
   11.40 + * context flags
   11.41 + */
   11.42 +#define PFM_FL_NOTIFY_BLOCK    	 0x01	/* block task on user level notifications */
   11.43 +#define PFM_FL_SYSTEM_WIDE	 0x02	/* create a system wide context */
   11.44 +#define PFM_FL_OVFL_NO_MSG	 0x80   /* do not post overflow/end messages for notification */
   11.45 +
   11.46 +/*
   11.47 + * event set flags
   11.48 + */
   11.49 +#define PFM_SETFL_EXCL_IDLE      0x01   /* exclude idle task (syswide only) XXX: DO NOT USE YET */
   11.50 +
   11.51 +/*
   11.52 + * PMC flags
   11.53 + */
   11.54 +#define PFM_REGFL_OVFL_NOTIFY	0x1	/* send notification on overflow */
   11.55 +#define PFM_REGFL_RANDOM	0x2	/* randomize sampling interval   */
   11.56 +
   11.57 +/*
   11.58 + * PMD/PMC/IBR/DBR return flags (ignored on input)
   11.59 + *
   11.60 + * Those flags are used on output and must be checked in case EAGAIN is returned
   11.61 + * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure.
   11.62 + */
   11.63 +#define PFM_REG_RETFL_NOTAVAIL	(1UL<<31) /* set if register is implemented but not available */
   11.64 +#define PFM_REG_RETFL_EINVAL	(1UL<<30) /* set if register entry is invalid */
   11.65 +#define PFM_REG_RETFL_MASK	(PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL)
   11.66 +
   11.67 +#define PFM_REG_HAS_ERROR(flag)	(((flag) & PFM_REG_RETFL_MASK) != 0)
   11.68 +
   11.69 +typedef unsigned char pfm_uuid_t[16];	/* custom sampling buffer identifier type */
   11.70 +
   11.71 +/*
   11.72 + * Request structure used to define a context
   11.73 + */
   11.74 +typedef struct {
   11.75 +	pfm_uuid_t     ctx_smpl_buf_id;	 /* which buffer format to use (if needed) */
   11.76 +	unsigned long  ctx_flags;	 /* noblock/block */
   11.77 +	unsigned short ctx_nextra_sets;	 /* number of extra event sets (you always get 1) */
   11.78 +	unsigned short ctx_reserved1;	 /* for future use */
   11.79 +	int	       ctx_fd;		 /* return arg: unique identification for context */
   11.80 +	void	       *ctx_smpl_vaddr;	 /* return arg: virtual address of sampling buffer, is used */
   11.81 +	unsigned long  ctx_reserved2[11];/* for future use */
   11.82 +} pfarg_context_t;
   11.83 +
   11.84 +/*
   11.85 + * Request structure used to write/read a PMC or PMD
   11.86 + */
   11.87 +typedef struct {
   11.88 +	unsigned int	reg_num;	   /* which register */
   11.89 +	unsigned short	reg_set;	   /* event set for this register */
   11.90 +	unsigned short	reg_reserved1;	   /* for future use */
   11.91 +
   11.92 +	unsigned long	reg_value;	   /* initial pmc/pmd value */
   11.93 +	unsigned long	reg_flags;	   /* input: pmc/pmd flags, return: reg error */
   11.94 +
   11.95 +	unsigned long	reg_long_reset;	   /* reset after buffer overflow notification */
   11.96 +	unsigned long	reg_short_reset;   /* reset after counter overflow */
   11.97 +
   11.98 +	unsigned long	reg_reset_pmds[4]; /* which other counters to reset on overflow */
   11.99 +	unsigned long	reg_random_seed;   /* seed value when randomization is used */
  11.100 +	unsigned long	reg_random_mask;   /* bitmask used to limit random value */
  11.101 +	unsigned long   reg_last_reset_val;/* return: PMD last reset value */
  11.102 +
  11.103 +	unsigned long	reg_smpl_pmds[4];  /* which pmds are accessed when PMC overflows */
  11.104 +	unsigned long	reg_smpl_eventid;  /* opaque sampling event identifier */
  11.105 +
  11.106 +	unsigned long   reg_reserved2[3];   /* for future use */
  11.107 +} pfarg_reg_t;
  11.108 +
  11.109 +typedef struct {
  11.110 +	unsigned int	dbreg_num;		/* which debug register */
  11.111 +	unsigned short	dbreg_set;		/* event set for this register */
  11.112 +	unsigned short	dbreg_reserved1;	/* for future use */
  11.113 +	unsigned long	dbreg_value;		/* value for debug register */
  11.114 +	unsigned long	dbreg_flags;		/* return: dbreg error */
  11.115 +	unsigned long	dbreg_reserved2[1];	/* for future use */
  11.116 +} pfarg_dbreg_t;
  11.117 +
  11.118 +typedef struct {
  11.119 +	unsigned int	ft_version;	/* perfmon: major [16-31], minor [0-15] */
  11.120 +	unsigned int	ft_reserved;	/* reserved for future use */
  11.121 +	unsigned long	reserved[4];	/* for future use */
  11.122 +} pfarg_features_t;
  11.123 +
  11.124 +typedef struct {
  11.125 +	pid_t		load_pid;	   /* process to load the context into */
  11.126 +	unsigned short	load_set;	   /* first event set to load */
  11.127 +	unsigned short	load_reserved1;	   /* for future use */
  11.128 +	unsigned long	load_reserved2[3]; /* for future use */
  11.129 +} pfarg_load_t;
  11.130 +
  11.131 +typedef struct {
  11.132 +	int		msg_type;		/* generic message header */
  11.133 +	int		msg_ctx_fd;		/* generic message header */
  11.134 +	unsigned long	msg_ovfl_pmds[4];	/* which PMDs overflowed */
  11.135 +	unsigned short  msg_active_set;		/* active set at the time of overflow */
  11.136 +	unsigned short  msg_reserved1;		/* for future use */
  11.137 +	unsigned int    msg_reserved2;		/* for future use */
  11.138 +	unsigned long	msg_tstamp;		/* for perf tuning/debug */
  11.139 +} pfm_ovfl_msg_t;
  11.140 +
  11.141 +typedef struct {
  11.142 +	int		msg_type;		/* generic message header */
  11.143 +	int		msg_ctx_fd;		/* generic message header */
  11.144 +	unsigned long	msg_tstamp;		/* for perf tuning */
  11.145 +} pfm_end_msg_t;
  11.146 +
  11.147 +typedef struct {
  11.148 +	int		msg_type;		/* type of the message */
  11.149 +	int		msg_ctx_fd;		/* unique identifier for the context */
  11.150 +	unsigned long	msg_tstamp;		/* for perf tuning */
  11.151 +} pfm_gen_msg_t;
  11.152 +
  11.153 +#define PFM_MSG_OVFL	1	/* an overflow happened */
  11.154 +#define PFM_MSG_END	2	/* task to which context was attached ended */
  11.155 +
  11.156 +typedef union {
  11.157 +	pfm_ovfl_msg_t	pfm_ovfl_msg;
  11.158 +	pfm_end_msg_t	pfm_end_msg;
  11.159 +	pfm_gen_msg_t	pfm_gen_msg;
  11.160 +} pfm_msg_t;
  11.161 +
  11.162 +/*
  11.163 + * Define the version numbers for both perfmon as a whole and the sampling buffer format.
  11.164 + */
  11.165 +#define PFM_VERSION_MAJ		 2U
  11.166 +#define PFM_VERSION_MIN		 0U
  11.167 +#define PFM_VERSION		 (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
  11.168 +#define PFM_VERSION_MAJOR(x)	 (((x)>>16) & 0xffff)
  11.169 +#define PFM_VERSION_MINOR(x)	 ((x) & 0xffff)
  11.170 +
  11.171 +
  11.172 +/*
  11.173 + * miscellaneous architected definitions
  11.174 + */
  11.175 +#define PMU_FIRST_COUNTER	4	/* first counting monitor (PMC/PMD) */
  11.176 +#define PMU_MAX_PMCS		256	/* maximum architected number of PMC registers */
  11.177 +#define PMU_MAX_PMDS		256	/* maximum architected number of PMD registers */
  11.178 +
  11.179 +#ifdef __KERNEL__
  11.180 +
  11.181 +extern long perfmonctl(int fd, int cmd, void *arg, int narg);
  11.182 +
  11.183 +typedef struct {
  11.184 +	void (*handler)(int irq, void *arg, struct pt_regs *regs);
  11.185 +} pfm_intr_handler_desc_t;
  11.186 +
  11.187 +extern void pfm_save_regs (struct task_struct *);
  11.188 +extern void pfm_load_regs (struct task_struct *);
  11.189 +
  11.190 +extern void pfm_exit_thread(struct task_struct *);
  11.191 +extern int  pfm_use_debug_registers(struct task_struct *);
  11.192 +extern int  pfm_release_debug_registers(struct task_struct *);
  11.193 +extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin);
  11.194 +extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs);
  11.195 +extern void pfm_init_percpu(void);
  11.196 +extern void pfm_handle_work(void);
  11.197 +extern int  pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
  11.198 +extern int  pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
  11.199 +
  11.200 +
  11.201 +
  11.202 +/*
  11.203 + * Reset PMD register flags
  11.204 + */
  11.205 +#define PFM_PMD_SHORT_RESET	0
  11.206 +#define PFM_PMD_LONG_RESET	1
  11.207 +
  11.208 +typedef union {
  11.209 +	unsigned int val;
  11.210 +	struct {
  11.211 +		unsigned int notify_user:1;	/* notify user program of overflow */
  11.212 +		unsigned int reset_ovfl_pmds:1;	/* reset overflowed PMDs */
  11.213 +		unsigned int block_task:1;	/* block monitored task on kernel exit */
  11.214 +		unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */
  11.215 +		unsigned int reserved:28;	/* for future use */
  11.216 +	} bits;
  11.217 +} pfm_ovfl_ctrl_t;
  11.218 +
  11.219 +typedef struct {
  11.220 +	unsigned char	ovfl_pmd;			/* index of overflowed PMD  */
  11.221 +	unsigned char   ovfl_notify;			/* =1 if monitor requested overflow notification */
  11.222 +	unsigned short  active_set;			/* event set active at the time of the overflow */
  11.223 +	pfm_ovfl_ctrl_t ovfl_ctrl;			/* return: perfmon controls to set by handler */
  11.224 +
  11.225 +	unsigned long   pmd_last_reset;			/* last reset value of of the PMD */
  11.226 +	unsigned long	smpl_pmds[4];			/* bitmask of other PMD of interest on overflow */
  11.227 +	unsigned long   smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */
  11.228 +	unsigned long   pmd_value;			/* current 64-bit value of the PMD */
  11.229 +	unsigned long	pmd_eventid;			/* eventid associated with PMD */
  11.230 +} pfm_ovfl_arg_t;
  11.231 +
  11.232 +
  11.233 +typedef struct {
  11.234 +	char		*fmt_name;
  11.235 +	pfm_uuid_t	fmt_uuid;
  11.236 +	size_t		fmt_arg_size;
  11.237 +	unsigned long	fmt_flags;
  11.238 +
  11.239 +	int		(*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg);
  11.240 +	int		(*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size);
  11.241 +	int 		(*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg);
  11.242 +	int		(*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp);
  11.243 +	int		(*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
  11.244 +	int		(*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
  11.245 +	int		(*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs);
  11.246 +
  11.247 +	struct list_head fmt_list;
  11.248 +} pfm_buffer_fmt_t;
  11.249 +
  11.250 +extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt);
  11.251 +extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid);
  11.252 +
  11.253 +/*
  11.254 + * perfmon interface exported to modules
  11.255 + */
  11.256 +extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
  11.257 +extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
  11.258 +extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
  11.259 +extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
  11.260 +
  11.261 +/*
  11.262 + * describe the content of the local_cpu_date->pfm_syst_info field
  11.263 + */
  11.264 +#define PFM_CPUINFO_SYST_WIDE	0x1	/* if set a system wide session exists */
  11.265 +#define PFM_CPUINFO_DCR_PP	0x2	/* if set the system wide session has started */
  11.266 +#define PFM_CPUINFO_EXCL_IDLE	0x4	/* the system wide session excludes the idle task */
  11.267 +
  11.268 +/*
  11.269 + * sysctl control structure. visible to sampling formats
  11.270 + */
  11.271 +typedef struct {
  11.272 +	int	debug;		/* turn on/off debugging via syslog */
  11.273 +	int	debug_ovfl;	/* turn on/off debug printk in overflow handler */
  11.274 +	int	fastctxsw;	/* turn on/off fast (unsecure) ctxsw */
  11.275 +	int	expert_mode;	/* turn on/off value checking */
  11.276 +} pfm_sysctl_t;
  11.277 +extern pfm_sysctl_t pfm_sysctl;
  11.278 +
  11.279 +
  11.280 +#endif /* __KERNEL__ */
  11.281 +
  11.282 +#endif /* _ASM_IA64_PERFMON_H */
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/xen/include/asm-ia64/linux-xen/asm/perfmon_default_smpl.h	Tue Nov 28 11:15:35 2006 -0700
    12.3 @@ -0,0 +1,83 @@
    12.4 +/*
    12.5 + * Copyright (C) 2002-2003 Hewlett-Packard Co
    12.6 + *               Stephane Eranian <eranian@hpl.hp.com>
    12.7 + *
    12.8 + * This file implements the default sampling buffer format
    12.9 + * for Linux/ia64 perfmon subsystem.
   12.10 + */
   12.11 +#ifndef __PERFMON_DEFAULT_SMPL_H__
   12.12 +#define __PERFMON_DEFAULT_SMPL_H__ 1
   12.13 +
   12.14 +#define PFM_DEFAULT_SMPL_UUID { \
   12.15 +		0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82, 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
   12.16 +
   12.17 +/*
   12.18 + * format specific parameters (passed at context creation)
   12.19 + */
   12.20 +typedef struct {
   12.21 +	unsigned long buf_size;		/* size of the buffer in bytes */
   12.22 +	unsigned int  flags;		/* buffer specific flags */
   12.23 +	unsigned int  res1;		/* for future use */
   12.24 +	unsigned long reserved[2];	/* for future use */
   12.25 +} pfm_default_smpl_arg_t;
   12.26 +
   12.27 +/*
   12.28 + * combined context+format specific structure. Can be passed
   12.29 + * to PFM_CONTEXT_CREATE
   12.30 + */
   12.31 +typedef struct {
   12.32 +	pfarg_context_t		ctx_arg;
   12.33 +	pfm_default_smpl_arg_t	buf_arg;
   12.34 +} pfm_default_smpl_ctx_arg_t;
   12.35 +
   12.36 +/*
   12.37 + * This header is at the beginning of the sampling buffer returned to the user.
   12.38 + * It is directly followed by the first record.
   12.39 + */
   12.40 +typedef struct {
   12.41 +	unsigned long	hdr_count;		/* how many valid entries */
   12.42 +	unsigned long	hdr_cur_offs;		/* current offset from top of buffer */
   12.43 +	unsigned long	hdr_reserved2;		/* reserved for future use */
   12.44 +
   12.45 +	unsigned long	hdr_overflows;		/* how many times the buffer overflowed */
   12.46 +	unsigned long   hdr_buf_size;		/* how many bytes in the buffer */
   12.47 +
   12.48 +	unsigned int	hdr_version;		/* contains perfmon version (smpl format diffs) */
   12.49 +	unsigned int	hdr_reserved1;		/* for future use */
   12.50 +	unsigned long	hdr_reserved[10];	/* for future use */
   12.51 +} pfm_default_smpl_hdr_t;
   12.52 +
   12.53 +/*
   12.54 + * Entry header in the sampling buffer.  The header is directly followed
   12.55 + * with the values of the PMD registers of interest saved in increasing 
   12.56 + * index order: PMD4, PMD5, and so on. How many PMDs are present depends 
   12.57 + * on how the session was programmed.
   12.58 + *
   12.59 + * In the case where multiple counters overflow at the same time, multiple
   12.60 + * entries are written consecutively.
   12.61 + *
   12.62 + * last_reset_value member indicates the initial value of the overflowed PMD. 
   12.63 + */
   12.64 +typedef struct {
   12.65 +        int             pid;                    /* thread id (for NPTL, this is gettid()) */
   12.66 +        unsigned char   reserved1[3];           /* reserved for future use */
   12.67 +        unsigned char   ovfl_pmd;               /* index of overflowed PMD */
   12.68 +
   12.69 +        unsigned long   last_reset_val;         /* initial value of overflowed PMD */
   12.70 +        unsigned long   ip;                     /* where did the overflow interrupt happened  */
   12.71 +        unsigned long   tstamp;                 /* ar.itc when entering perfmon intr. handler */
   12.72 +
   12.73 +        unsigned short  cpu;                    /* cpu on which the overfow occured */
   12.74 +        unsigned short  set;                    /* event set active when overflow ocurred   */
   12.75 +        int    		tgid;              	/* thread group id (for NPTL, this is getpid()) */
   12.76 +} pfm_default_smpl_entry_t;
   12.77 +
   12.78 +#define PFM_DEFAULT_MAX_PMDS		64 /* how many pmds supported by data structures (sizeof(unsigned long) */
   12.79 +#define PFM_DEFAULT_MAX_ENTRY_SIZE	(sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*PFM_DEFAULT_MAX_PMDS))
   12.80 +#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE	(sizeof(pfm_default_smpl_hdr_t)+PFM_DEFAULT_MAX_ENTRY_SIZE)
   12.81 +
   12.82 +#define PFM_DEFAULT_SMPL_VERSION_MAJ	2U
   12.83 +#define PFM_DEFAULT_SMPL_VERSION_MIN	0U
   12.84 +#define PFM_DEFAULT_SMPL_VERSION	(((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff))
   12.85 +
   12.86 +#endif /* __PERFMON_DEFAULT_SMPL_H__ */
    13.1 --- a/xen/include/asm-ia64/linux-xen/linux/README.origin	Tue Nov 28 10:37:36 2006 -0700
    13.2 +++ b/xen/include/asm-ia64/linux-xen/linux/README.origin	Tue Nov 28 11:15:35 2006 -0700
    13.3 @@ -9,3 +9,6 @@ efi.h			-> linux/include/linux/efi.h
    13.4  gfp.h	 		-> linux/include/linux/gfp.h
    13.5  hardirq.h 		-> linux/include/linux/hardirq.h
    13.6  interrupt.h 		-> linux/include/linux/interrupt.h
    13.7 +
    13.8 +# The files below are from Linux-2.6.16.33
    13.9 +oprofile.h		-> linux/include/linux/oprofile.h
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/xen/include/asm-ia64/linux-xen/linux/oprofile.h	Tue Nov 28 11:15:35 2006 -0700
    14.3 @@ -0,0 +1,119 @@
    14.4 +/**
    14.5 + * @file oprofile.h
    14.6 + *
    14.7 + * API for machine-specific interrupts to interface
    14.8 + * to oprofile.
    14.9 + *
   14.10 + * @remark Copyright 2002 OProfile authors
   14.11 + * @remark Read the file COPYING
   14.12 + *
   14.13 + * @author John Levon <levon@movementarian.org>
   14.14 + */
   14.15 +
   14.16 +#ifndef OPROFILE_H
   14.17 +#define OPROFILE_H
   14.18 +
   14.19 +#include <linux/types.h>
   14.20 +#include <linux/spinlock.h>
   14.21 +#include <asm/atomic.h>
   14.22 + 
   14.23 +struct super_block;
   14.24 +struct dentry;
   14.25 +struct file_operations;
   14.26 +struct pt_regs;
   14.27 + 
   14.28 +/* Operations structure to be filled in */
   14.29 +struct oprofile_operations {
   14.30 +	/* create any necessary configuration files in the oprofile fs.
   14.31 +	 * Optional. */
   14.32 +	int (*create_files)(struct super_block * sb, struct dentry * root);
   14.33 +	/* Do any necessary interrupt setup. Optional. */
   14.34 +	int (*setup)(void);
   14.35 +	/* Do any necessary interrupt shutdown. Optional. */
   14.36 +	void (*shutdown)(void);
   14.37 +	/* Start delivering interrupts. */
   14.38 +	int (*start)(void);
   14.39 +	/* Stop delivering interrupts. */
   14.40 +	void (*stop)(void);
   14.41 +	/* Initiate a stack backtrace. Optional. */
   14.42 +	void (*backtrace)(struct pt_regs * const regs, unsigned int depth);
   14.43 +	/* CPU identification string. */
   14.44 +	char * cpu_type;
   14.45 +};
   14.46 +
   14.47 +/**
   14.48 + * One-time initialisation. *ops must be set to a filled-in
   14.49 + * operations structure. This is called even in timer interrupt
   14.50 + * mode so an arch can set a backtrace callback.
   14.51 + *
   14.52 + * If an error occurs, the fields should be left untouched.
   14.53 + */
   14.54 +int oprofile_arch_init(struct oprofile_operations * ops);
   14.55 + 
   14.56 +/**
   14.57 + * One-time exit/cleanup for the arch.
   14.58 + */
   14.59 +void oprofile_arch_exit(void);
   14.60 +
   14.61 +/**
   14.62 + * Add a sample. This may be called from any context. Pass
   14.63 + * smp_processor_id() as cpu.
   14.64 + */
   14.65 +void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
   14.66 +
   14.67 +/* Use this instead when the PC value is not from the regs. Doesn't
   14.68 + * backtrace. */
   14.69 +void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event);
   14.70 +
   14.71 +/* add a backtrace entry, to be called from the ->backtrace callback */
   14.72 +void oprofile_add_trace(unsigned long eip);
   14.73 +
   14.74 +
   14.75 +/**
   14.76 + * Create a file of the given name as a child of the given root, with
   14.77 + * the specified file operations.
   14.78 + */
   14.79 +int oprofilefs_create_file(struct super_block * sb, struct dentry * root,
   14.80 +	char const * name, struct file_operations * fops);
   14.81 +
   14.82 +int oprofilefs_create_file_perm(struct super_block * sb, struct dentry * root,
   14.83 +	char const * name, struct file_operations * fops, int perm);
   14.84 + 
   14.85 +/** Create a file for read/write access to an unsigned long. */
   14.86 +int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root,
   14.87 +	char const * name, ulong * val);
   14.88 + 
   14.89 +/** Create a file for read-only access to an unsigned long. */
   14.90 +int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root,
   14.91 +	char const * name, ulong * val);
   14.92 + 
   14.93 +/** Create a file for read-only access to an atomic_t. */
   14.94 +int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root,
   14.95 +	char const * name, atomic_t * val);
   14.96 + 
   14.97 +/** create a directory */
   14.98 +struct dentry * oprofilefs_mkdir(struct super_block * sb, struct dentry * root,
   14.99 +	char const * name);
  14.100 +
  14.101 +/**
  14.102 + * Write the given asciz string to the given user buffer @buf, updating *offset
  14.103 + * appropriately. Returns bytes written or -EFAULT.
  14.104 + */
  14.105 +ssize_t oprofilefs_str_to_user(char const * str, char __user * buf, size_t count, loff_t * offset);
  14.106 +
  14.107 +/**
  14.108 + * Convert an unsigned long value into ASCII and copy it to the user buffer @buf,
  14.109 + * updating *offset appropriately. Returns bytes written or -EFAULT.
  14.110 + */
  14.111 +ssize_t oprofilefs_ulong_to_user(unsigned long val, char __user * buf, size_t count, loff_t * offset);
  14.112 +
  14.113 +/**
  14.114 + * Read an ASCII string for a number from a userspace buffer and fill *val on success.
  14.115 + * Returns 0 on success, < 0 on error.
  14.116 + */
  14.117 +int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, size_t count);
  14.118 +
  14.119 +/** lock for read/write safety */
  14.120 +extern spinlock_t oprofilefs_lock;
  14.121 + 
  14.122 +#endif /* OPROFILE_H */