ia64/linux-2.6.18-xen.hg

view drivers/oprofile/cpu_buffer.c @ 893:f994bfe9b93b

linux/blktap2: reduce TLB flush scope

c/s 885 added very coarse TLB flushing. Since these flushes always
follow single page updates, single page flushes (when available) are
sufficient.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 04 10:32:57 2009 +0100 (2009-06-04)
parents 132f24200f4c
children
line source
1 /**
2 * @file cpu_buffer.c
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon <levon@movementarian.org>
8 *
9 * Modified by Aravind Menon for Xen
10 * These modifications are:
11 * Copyright (C) 2005 Hewlett-Packard Co.
12 *
13 * Each CPU has a local buffer that stores PC value/event
14 * pairs. We also log context switches when we notice them.
15 * Eventually each CPU's buffer is processed into the global
16 * event buffer by sync_buffer().
17 *
18 * We use a local buffer for two reasons: an NMI or similar
19 * interrupt cannot synchronise, and high sampling rates
20 * would lead to catastrophic global synchronisation if
21 * a global buffer was used.
22 */
24 #include <linux/sched.h>
25 #include <linux/oprofile.h>
26 #include <linux/vmalloc.h>
27 #include <linux/errno.h>
29 #include "event_buffer.h"
30 #include "cpu_buffer.h"
31 #include "buffer_sync.h"
32 #include "oprof.h"
34 struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned;
36 static void wq_sync_buffer(void *);
38 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
39 static int work_enabled;
41 static int32_t current_domain = COORDINATOR_DOMAIN;
43 void free_cpu_buffers(void)
44 {
45 int i;
47 for_each_online_cpu(i)
48 vfree(cpu_buffer[i].buffer);
49 }
51 int alloc_cpu_buffers(void)
52 {
53 int i;
55 unsigned long buffer_size = fs_cpu_buffer_size;
57 for_each_online_cpu(i) {
58 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
60 b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size,
61 cpu_to_node(i));
62 if (!b->buffer)
63 goto fail;
65 b->last_task = NULL;
66 b->last_cpu_mode = -1;
67 b->tracing = 0;
68 b->buffer_size = buffer_size;
69 b->tail_pos = 0;
70 b->head_pos = 0;
71 b->sample_received = 0;
72 b->sample_lost_overflow = 0;
73 b->cpu = i;
74 INIT_WORK(&b->work, wq_sync_buffer, b);
75 }
76 return 0;
78 fail:
79 free_cpu_buffers();
80 return -ENOMEM;
81 }
83 void start_cpu_work(void)
84 {
85 int i;
87 work_enabled = 1;
89 for_each_online_cpu(i) {
90 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
92 /*
93 * Spread the work by 1 jiffy per cpu so they dont all
94 * fire at once.
95 */
96 schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
97 }
98 }
100 void end_cpu_work(void)
101 {
102 int i;
104 work_enabled = 0;
106 for_each_online_cpu(i) {
107 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
109 cancel_delayed_work(&b->work);
110 }
112 flush_scheduled_work();
113 }
115 /* Resets the cpu buffer to a sane state. */
116 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf)
117 {
118 /* reset these to invalid values; the next sample
119 * collected will populate the buffer with proper
120 * values to initialize the buffer
121 */
122 cpu_buf->last_cpu_mode = -1;
123 cpu_buf->last_task = NULL;
124 }
126 /* compute number of available slots in cpu_buffer queue */
127 static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b)
128 {
129 unsigned long head = b->head_pos;
130 unsigned long tail = b->tail_pos;
132 if (tail > head)
133 return (tail - head) - 1;
135 return tail + (b->buffer_size - head) - 1;
136 }
138 static void increment_head(struct oprofile_cpu_buffer * b)
139 {
140 unsigned long new_head = b->head_pos + 1;
142 /* Ensure anything written to the slot before we
143 * increment is visible */
144 wmb();
146 if (new_head < b->buffer_size)
147 b->head_pos = new_head;
148 else
149 b->head_pos = 0;
150 }
152 static inline void
153 add_sample(struct oprofile_cpu_buffer * cpu_buf,
154 unsigned long pc, unsigned long event)
155 {
156 struct op_sample * entry = &cpu_buf->buffer[cpu_buf->head_pos];
157 entry->eip = pc;
158 entry->event = event;
159 increment_head(cpu_buf);
160 }
162 static inline void
163 add_code(struct oprofile_cpu_buffer * buffer, unsigned long value)
164 {
165 add_sample(buffer, ESCAPE_CODE, value);
166 }
168 /* This must be safe from any context. It's safe writing here
169 * because of the head/tail separation of the writer and reader
170 * of the CPU buffer.
171 *
172 * cpu_mode is needed because on some architectures you cannot
173 * tell if you are in kernel or user space simply by looking at
174 * pc. We tag this in the buffer by generating kernel/user (and xen)
175 * enter events whenever cpu_mode changes
176 */
177 static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
178 int cpu_mode, unsigned long event)
179 {
180 struct task_struct * task;
182 cpu_buf->sample_received++;
184 if (nr_available_slots(cpu_buf) < 3) {
185 cpu_buf->sample_lost_overflow++;
186 return 0;
187 }
189 task = current;
191 /* notice a switch from user->kernel or vice versa */
192 if (cpu_buf->last_cpu_mode != cpu_mode) {
193 cpu_buf->last_cpu_mode = cpu_mode;
194 add_code(cpu_buf, cpu_mode);
195 }
197 /* notice a task switch */
198 /* if not processing other domain samples */
199 if ((cpu_buf->last_task != task) &&
200 (current_domain == COORDINATOR_DOMAIN)) {
201 cpu_buf->last_task = task;
202 add_code(cpu_buf, (unsigned long)task);
203 }
205 add_sample(cpu_buf, pc, event);
206 return 1;
207 }
209 static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
210 {
211 if (nr_available_slots(cpu_buf) < 4) {
212 cpu_buf->sample_lost_overflow++;
213 return 0;
214 }
216 add_code(cpu_buf, CPU_TRACE_BEGIN);
217 cpu_buf->tracing = 1;
218 return 1;
219 }
221 static void oprofile_end_trace(struct oprofile_cpu_buffer * cpu_buf)
222 {
223 cpu_buf->tracing = 0;
224 }
226 void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
227 unsigned long event, int is_kernel)
228 {
229 struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
231 if (!backtrace_depth) {
232 log_sample(cpu_buf, pc, is_kernel, event);
233 return;
234 }
236 if (!oprofile_begin_trace(cpu_buf))
237 return;
239 /* if log_sample() fail we can't backtrace since we lost the source
240 * of this event */
241 if (log_sample(cpu_buf, pc, is_kernel, event))
242 oprofile_ops.backtrace(regs, backtrace_depth);
243 oprofile_end_trace(cpu_buf);
244 }
246 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
247 {
248 int is_kernel = !user_mode(regs);
249 unsigned long pc = profile_pc(regs);
251 oprofile_add_ext_sample(pc, regs, event, is_kernel);
252 }
254 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
255 {
256 struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
257 log_sample(cpu_buf, pc, is_kernel, event);
258 }
260 void oprofile_add_trace(unsigned long pc)
261 {
262 struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
264 if (!cpu_buf->tracing)
265 return;
267 if (nr_available_slots(cpu_buf) < 1) {
268 cpu_buf->tracing = 0;
269 cpu_buf->sample_lost_overflow++;
270 return;
271 }
273 /* broken frame can give an eip with the same value as an escape code,
274 * abort the trace if we get it */
275 if (pc == ESCAPE_CODE) {
276 cpu_buf->tracing = 0;
277 cpu_buf->backtrace_aborted++;
278 return;
279 }
281 add_sample(cpu_buf, pc, 0);
282 }
284 int oprofile_add_domain_switch(int32_t domain_id)
285 {
286 struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
288 /* should have space for switching into and out of domain
289 (2 slots each) plus one sample and one cpu mode switch */
290 if (((nr_available_slots(cpu_buf) < 6) &&
291 (domain_id != COORDINATOR_DOMAIN)) ||
292 (nr_available_slots(cpu_buf) < 2))
293 return 0;
295 add_code(cpu_buf, CPU_DOMAIN_SWITCH);
296 add_sample(cpu_buf, domain_id, 0);
298 current_domain = domain_id;
300 return 1;
301 }
303 /*
304 * This serves to avoid cpu buffer overflow, and makes sure
305 * the task mortuary progresses
306 *
307 * By using schedule_delayed_work_on and then schedule_delayed_work
308 * we guarantee this will stay on the correct cpu
309 */
310 static void wq_sync_buffer(void * data)
311 {
312 struct oprofile_cpu_buffer * b = data;
313 if (b->cpu != smp_processor_id()) {
314 printk("WQ on CPU%d, prefer CPU%d\n",
315 smp_processor_id(), b->cpu);
316 }
317 sync_buffer(b->cpu);
319 /* don't re-add the work if we're shutting down */
320 if (work_enabled)
321 schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE);
322 }