ia64/xen-unstable

view xen/common/trace.c @ 19107:696351cde9a4

Allow memflags to be specified to alloc_xenheap_pages().

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Jan 28 16:58:41 2009 +0000 (2009-01-28)
parents 74621a2add54
children 71af89e70fee
line source
1 /******************************************************************************
2 * common/trace.c
3 *
4 * Xen Trace Buffer
5 *
6 * Copyright (C) 2004 by Intel Research Cambridge
7 *
8 * Authors: Mark Williamson, mark.a.williamson@intel.com
9 * Rob Gardner, rob.gardner@hp.com
10 * Date: October 2005
11 *
12 * Copyright (C) 2005 Bin Ren
13 *
14 * The trace buffer code is designed to allow debugging traces of Xen to be
15 * generated on UP / SMP machines. Each trace entry is timestamped so that
16 * it's possible to reconstruct a chronological record of trace events.
17 */
19 #include <xen/config.h>
20 #include <asm/types.h>
21 #include <asm/io.h>
22 #include <xen/lib.h>
23 #include <xen/sched.h>
24 #include <xen/smp.h>
25 #include <xen/trace.h>
26 #include <xen/errno.h>
27 #include <xen/event.h>
28 #include <xen/softirq.h>
29 #include <xen/init.h>
30 #include <xen/mm.h>
31 #include <xen/percpu.h>
32 #include <asm/atomic.h>
33 #include <public/sysctl.h>
35 #ifdef CONFIG_COMPAT
36 #include <compat/trace.h>
37 #define xen_t_buf t_buf
38 CHECK_t_buf;
39 #undef xen_t_buf
40 #define TB_COMPAT IS_COMPAT(dom0)
41 #else
42 #define compat_t_rec t_rec
43 #define TB_COMPAT 0
44 #endif
46 /* opt_tbuf_size: trace buffer size (in pages) */
47 static unsigned int opt_tbuf_size = 0;
48 integer_param("tbuf_size", opt_tbuf_size);
50 /* Pointers to the meta-data objects for all system trace buffers */
51 static DEFINE_PER_CPU(struct t_buf *, t_bufs);
52 static DEFINE_PER_CPU(unsigned char *, t_data);
53 static int data_size;
55 /* High water mark for trace buffers; */
56 /* Send virtual interrupt when buffer level reaches this point */
57 static int t_buf_highwater;
59 /* Number of records lost due to per-CPU trace buffer being full. */
60 static DEFINE_PER_CPU(unsigned long, lost_records);
61 static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc);
63 /* a flag recording whether initialization has been done */
64 /* or more properly, if the tbuf subsystem is enabled right now */
65 int tb_init_done __read_mostly;
67 /* which CPUs tracing is enabled on */
68 static cpumask_t tb_cpu_mask = CPU_MASK_ALL;
70 /* which tracing events are enabled */
71 static u32 tb_event_mask = TRC_ALL;
73 /**
74 * alloc_trace_bufs - performs initialization of the per-cpu trace buffers.
75 *
76 * This function is called at start of day in order to initialize the per-cpu
77 * trace buffers. The trace buffers are then available for debugging use, via
78 * the %TRACE_xD macros exported in <xen/trace.h>.
79 *
80 * This function may also be called later when enabling trace buffers
81 * via the SET_SIZE hypercall.
82 */
83 static int alloc_trace_bufs(void)
84 {
85 int i, order;
86 unsigned long nr_pages;
87 char *rawbuf;
88 struct t_buf *buf;
90 if ( opt_tbuf_size == 0 )
91 return -EINVAL;
93 nr_pages = num_online_cpus() * opt_tbuf_size;
94 order = get_order_from_pages(nr_pages);
95 data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
97 if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL )
98 {
99 printk("Xen trace buffers: memory allocation failed\n");
100 opt_tbuf_size = 0;
101 return -EINVAL;
102 }
104 /* Share pages so that xentrace can map them. */
105 for ( i = 0; i < nr_pages; i++ )
106 share_xen_page_with_privileged_guests(
107 virt_to_page(rawbuf) + i, XENSHARE_writable);
109 for_each_online_cpu ( i )
110 {
111 buf = per_cpu(t_bufs, i) = (struct t_buf *)
112 &rawbuf[i*opt_tbuf_size*PAGE_SIZE];
113 buf->cons = buf->prod = 0;
114 per_cpu(t_data, i) = (unsigned char *)(buf + 1);
115 }
117 t_buf_highwater = data_size >> 1; /* 50% high water */
119 return 0;
120 }
123 /**
124 * tb_set_size - handle the logic involved with dynamically
125 * allocating and deallocating tbufs
126 *
127 * This function is called when the SET_SIZE hypercall is done.
128 */
129 static int tb_set_size(int size)
130 {
131 /*
132 * Setting size is a one-shot operation. It can be done either at
133 * boot time or via control tools, but not by both. Once buffers
134 * are created they cannot be destroyed.
135 */
136 if ( (opt_tbuf_size != 0) || (size <= 0) )
137 {
138 gdprintk(XENLOG_INFO, "tb_set_size from %d to %d not implemented\n",
139 opt_tbuf_size, size);
140 return -EINVAL;
141 }
143 opt_tbuf_size = size;
144 if ( alloc_trace_bufs() != 0 )
145 return -EINVAL;
147 printk("Xen trace buffers: initialized\n");
148 return 0;
149 }
151 int trace_will_trace_event(u32 event)
152 {
153 if ( !tb_init_done )
154 return 0;
156 /*
157 * Copied from __trace_var()
158 */
159 if ( (tb_event_mask & event) == 0 )
160 return 0;
162 /* match class */
163 if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
164 return 0;
166 /* then match subclass */
167 if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
168 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
169 return 0;
171 if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
172 return 0;
174 return 1;
175 }
177 /**
178 * init_trace_bufs - performs initialization of the per-cpu trace buffers.
179 *
180 * This function is called at start of day in order to initialize the per-cpu
181 * trace buffers. The trace buffers are then available for debugging use, via
182 * the %TRACE_xD macros exported in <xen/trace.h>.
183 */
184 void __init init_trace_bufs(void)
185 {
186 if ( opt_tbuf_size == 0 )
187 {
188 printk("Xen trace buffers: disabled\n");
189 return;
190 }
192 if ( alloc_trace_bufs() == 0 )
193 {
194 printk("Xen trace buffers: initialised\n");
195 wmb(); /* above must be visible before tb_init_done flag set */
196 tb_init_done = 1;
197 }
198 }
200 /**
201 * tb_control - sysctl operations on trace buffers.
202 * @tbc: a pointer to a xen_sysctl_tbuf_op_t to be filled out
203 */
204 int tb_control(xen_sysctl_tbuf_op_t *tbc)
205 {
206 static DEFINE_SPINLOCK(lock);
207 int rc = 0;
209 spin_lock(&lock);
211 switch ( tbc->cmd )
212 {
213 case XEN_SYSCTL_TBUFOP_get_info:
214 tbc->evt_mask = tb_event_mask;
215 tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(per_cpu(t_bufs, 0)) : 0;
216 tbc->size = opt_tbuf_size * PAGE_SIZE;
217 break;
218 case XEN_SYSCTL_TBUFOP_set_cpu_mask:
219 xenctl_cpumap_to_cpumask(&tb_cpu_mask, &tbc->cpu_mask);
220 break;
221 case XEN_SYSCTL_TBUFOP_set_evt_mask:
222 tb_event_mask = tbc->evt_mask;
223 break;
224 case XEN_SYSCTL_TBUFOP_set_size:
225 rc = !tb_init_done ? tb_set_size(tbc->size) : -EINVAL;
226 break;
227 case XEN_SYSCTL_TBUFOP_enable:
228 /* Enable trace buffers. Check buffers are already allocated. */
229 if ( opt_tbuf_size == 0 )
230 rc = -EINVAL;
231 else
232 tb_init_done = 1;
233 break;
234 case XEN_SYSCTL_TBUFOP_disable:
235 /*
236 * Disable trace buffers. Just stops new records from being written,
237 * does not deallocate any memory.
238 */
239 tb_init_done = 0;
240 break;
241 default:
242 rc = -EINVAL;
243 break;
244 }
246 spin_unlock(&lock);
248 return rc;
249 }
251 static inline int calc_rec_size(int cycles, int extra)
252 {
253 int rec_size;
254 rec_size = 4;
255 if ( cycles )
256 rec_size += 8;
257 rec_size += extra;
258 return rec_size;
259 }
261 static inline int calc_unconsumed_bytes(struct t_buf *buf)
262 {
263 int x = buf->prod - buf->cons;
264 if ( x < 0 )
265 x += 2*data_size;
267 ASSERT(x >= 0);
268 ASSERT(x <= data_size);
270 return x;
271 }
273 static inline int calc_bytes_to_wrap(struct t_buf *buf)
274 {
275 int x = data_size - buf->prod;
276 if ( x <= 0 )
277 x += data_size;
279 ASSERT(x > 0);
280 ASSERT(x <= data_size);
282 return x;
283 }
285 static inline int calc_bytes_avail(struct t_buf *buf)
286 {
287 return data_size - calc_unconsumed_bytes(buf);
288 }
290 static inline struct t_rec *
291 next_record(struct t_buf *buf)
292 {
293 int x = buf->prod;
294 if ( x >= data_size )
295 x -= data_size;
297 ASSERT(x >= 0);
298 ASSERT(x < data_size);
300 return (struct t_rec *)&this_cpu(t_data)[x];
301 }
303 static inline int __insert_record(struct t_buf *buf,
304 unsigned long event,
305 int extra,
306 int cycles,
307 int rec_size,
308 unsigned char *extra_data)
309 {
310 struct t_rec *rec;
311 unsigned char *dst;
312 unsigned long extra_word = extra/sizeof(u32);
313 int local_rec_size = calc_rec_size(cycles, extra);
314 uint32_t next;
316 BUG_ON(local_rec_size != rec_size);
317 BUG_ON(extra & 3);
319 /* Double-check once more that we have enough space.
320 * Don't bugcheck here, in case the userland tool is doing
321 * something stupid. */
322 if ( calc_bytes_avail(buf) < rec_size )
323 {
324 printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
325 __func__,
326 calc_bytes_avail(buf),
327 data_size, buf->prod, buf->cons, data_size, rec_size);
328 return 0;
329 }
330 rmb();
332 rec = next_record(buf);
333 rec->event = event;
334 rec->extra_u32 = extra_word;
335 dst = (unsigned char *)rec->u.nocycles.extra_u32;
336 if ( (rec->cycles_included = cycles) != 0 )
337 {
338 u64 tsc = (u64)get_cycles();
339 rec->u.cycles.cycles_lo = (uint32_t)tsc;
340 rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32);
341 dst = (unsigned char *)rec->u.cycles.extra_u32;
342 }
344 if ( extra_data && extra )
345 memcpy(dst, extra_data, extra);
347 wmb();
349 next = buf->prod + rec_size;
350 if ( next >= 2*data_size )
351 next -= 2*data_size;
352 ASSERT(next >= 0);
353 ASSERT(next < 2*data_size);
354 buf->prod = next;
356 return rec_size;
357 }
359 static inline int insert_wrap_record(struct t_buf *buf, int size)
360 {
361 int space_left = calc_bytes_to_wrap(buf);
362 unsigned long extra_space = space_left - sizeof(u32);
363 int cycles = 0;
365 BUG_ON(space_left > size);
367 /* We may need to add cycles to take up enough space... */
368 if ( (extra_space/sizeof(u32)) > TRACE_EXTRA_MAX )
369 {
370 cycles = 1;
371 extra_space -= sizeof(u64);
372 ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
373 }
375 return __insert_record(buf,
376 TRC_TRACE_WRAP_BUFFER,
377 extra_space,
378 cycles,
379 space_left,
380 NULL);
381 }
383 #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
385 static inline int insert_lost_records(struct t_buf *buf)
386 {
387 struct {
388 u32 lost_records;
389 u32 did:16, vid:16;
390 u64 first_tsc;
391 } __attribute__((packed)) ed;
393 ed.vid = current->vcpu_id;
394 ed.did = current->domain->domain_id;
395 ed.lost_records = this_cpu(lost_records);
396 ed.first_tsc = this_cpu(lost_records_first_tsc);
398 this_cpu(lost_records) = 0;
400 return __insert_record(buf,
401 TRC_LOST_RECORDS,
402 sizeof(ed),
403 1 /* cycles */,
404 LOST_REC_SIZE,
405 (unsigned char *)&ed);
406 }
408 /*
409 * Notification is performed in qtasklet to avoid deadlocks with contexts
410 * which __trace_var() may be called from (e.g., scheduler critical regions).
411 */
412 static void trace_notify_dom0(unsigned long unused)
413 {
414 send_guest_global_virq(dom0, VIRQ_TBUF);
415 }
416 static DECLARE_TASKLET(trace_notify_dom0_tasklet, trace_notify_dom0, 0);
418 /**
419 * trace - Enters a trace tuple into the trace buffer for the current CPU.
420 * @event: the event type being logged
421 * @d1...d5: the data items for the event being logged
422 *
423 * Logs a trace record into the appropriate buffer. Returns nonzero on
424 * failure, otherwise 0. Failure occurs only if the trace buffers are not yet
425 * initialised.
426 */
427 void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
428 {
429 struct t_buf *buf;
430 unsigned long flags, bytes_to_tail, bytes_to_wrap;
431 int rec_size, total_size;
432 int extra_word;
433 int started_below_highwater;
435 if( !tb_init_done )
436 return;
438 /* Convert byte count into word count, rounding up */
439 extra_word = (extra / sizeof(u32));
440 if ( (extra % sizeof(u32)) != 0 )
441 extra_word++;
443 ASSERT(extra_word <= TRACE_EXTRA_MAX);
444 extra_word = min_t(int, extra_word, TRACE_EXTRA_MAX);
446 /* Round size up to nearest word */
447 extra = extra_word * sizeof(u32);
449 if ( (tb_event_mask & event) == 0 )
450 return;
452 /* match class */
453 if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
454 return;
456 /* then match subclass */
457 if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
458 & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
459 return;
461 if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
462 return;
464 /* Read tb_init_done /before/ t_bufs. */
465 rmb();
467 buf = this_cpu(t_bufs);
469 local_irq_save(flags);
471 started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
473 /* Calculate the record size */
474 rec_size = calc_rec_size(cycles, extra);
476 /* How many bytes are available in the buffer? */
477 bytes_to_tail = calc_bytes_avail(buf);
479 /* How many bytes until the next wrap-around? */
480 bytes_to_wrap = calc_bytes_to_wrap(buf);
482 /*
483 * Calculate expected total size to commit this record by
484 * doing a dry-run.
485 */
486 total_size = 0;
488 /* First, check to see if we need to include a lost_record.
489 */
490 if ( this_cpu(lost_records) )
491 {
492 if ( LOST_REC_SIZE > bytes_to_wrap )
493 {
494 total_size += bytes_to_wrap;
495 bytes_to_wrap = data_size;
496 }
497 total_size += LOST_REC_SIZE;
498 bytes_to_wrap -= LOST_REC_SIZE;
500 /* LOST_REC might line up perfectly with the buffer wrap */
501 if ( bytes_to_wrap == 0 )
502 bytes_to_wrap = data_size;
503 }
505 if ( rec_size > bytes_to_wrap )
506 {
507 total_size += bytes_to_wrap;
508 }
509 total_size += rec_size;
511 /* Do we have enough space for everything? */
512 if ( total_size > bytes_to_tail )
513 {
514 if ( ++this_cpu(lost_records) == 1 )
515 this_cpu(lost_records_first_tsc)=(u64)get_cycles();
516 local_irq_restore(flags);
517 return;
518 }
520 /*
521 * Now, actually write information
522 */
523 bytes_to_wrap = calc_bytes_to_wrap(buf);
525 if ( this_cpu(lost_records) )
526 {
527 if ( LOST_REC_SIZE > bytes_to_wrap )
528 {
529 insert_wrap_record(buf, LOST_REC_SIZE);
530 bytes_to_wrap = data_size;
531 }
532 insert_lost_records(buf);
533 bytes_to_wrap -= LOST_REC_SIZE;
535 /* LOST_REC might line up perfectly with the buffer wrap */
536 if ( bytes_to_wrap == 0 )
537 bytes_to_wrap = data_size;
538 }
540 if ( rec_size > bytes_to_wrap )
541 insert_wrap_record(buf, rec_size);
543 /* Write the original record */
544 __insert_record(buf, event, extra, cycles, rec_size, extra_data);
546 local_irq_restore(flags);
548 /* Notify trace buffer consumer that we've crossed the high water mark. */
549 if ( started_below_highwater &&
550 (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
551 tasklet_schedule(&trace_notify_dom0_tasklet);
552 }
554 /*
555 * Local variables:
556 * mode: C
557 * c-set-style: "BSD"
558 * c-basic-offset: 4
559 * tab-width: 4
560 * indent-tabs-mode: nil
561 * End:
562 */