ia64/xen-unstable
changeset 15954:7ed576909132
[xen][tracing] Introduce variable-size trace records
This patch introduces variable-size trace records. Each record consists of
a 32-bit "header", an optional cycle count, and up to seven more 32-bit words.
The header is packed with the following information:
bits 0-27: The trace event.
bits 28-30: The number of 32-bit "extra" words in the records
bit 31: Does the trace include a 64-bit tsc?
This patch standardizes behavior wrt 32 and 64-bit hypervisors and dom0s.
Note that this patch requires a new version of the xentrace daemon running in
dom0. The new daemon, instead of pre-pending the cpu to every record as it
writes it, inserts a "cpu change" record to the trace file that record the
cpu and the number of records it's about to write.
Signed-off-by: George Dunlap <gdunlap@xensource.com>
This patch introduces variable-size trace records. Each record consists of
a 32-bit "header", an optional cycle count, and up to seven more 32-bit words.
The header is packed with the following information:
bits 0-27: The trace event.
bits 28-30: The number of 32-bit "extra" words in the records
bit 31: Does the trace include a 64-bit tsc?
This patch standardizes behavior wrt 32 and 64-bit hypervisors and dom0s.
Note that this patch requires a new version of the xentrace daemon running in
dom0. The new daemon, instead of pre-pending the cpu to every record as it
writes it, inserts a "cpu change" record to the trace file that record the
cpu and the number of records it's about to write.
Signed-off-by: George Dunlap <gdunlap@xensource.com>
author | George Dunlap <gdunlap@xensource.com> |
---|---|
date | Fri Sep 21 15:26:07 2007 +0100 (2007-09-21) |
parents | 517432b9f8b5 |
children | 305a8dbc264c |
files | tools/xentrace/xentrace.c xen/common/trace.c xen/include/public/trace.h xen/include/xen/trace.h |
line diff
1.1 --- a/tools/xentrace/xentrace.c Fri Sep 21 16:52:17 2007 +0100 1.2 +++ b/tools/xentrace/xentrace.c Fri Sep 21 15:26:07 2007 +0100 1.3 @@ -22,6 +22,7 @@ 1.4 #include <signal.h> 1.5 #include <inttypes.h> 1.6 #include <string.h> 1.7 +#include <assert.h> 1.8 1.9 #include <xen/xen.h> 1.10 #include <xen/trace.h> 1.11 @@ -83,24 +84,58 @@ struct timespec millis_to_timespec(unsig 1.12 } 1.13 1.14 /** 1.15 - * write_rec - output a trace record in binary format 1.16 + * write_buffer - write a section of the trace buffer 1.17 * @cpu - source buffer CPU ID 1.18 - * @rec - trace record to output 1.19 + * @start 1.20 + * @size - size of write (may be less than total window size) 1.21 + * @total_size - total size of the window (0 on 2nd write of wrapped windows) 1.22 * @out - output stream 1.23 * 1.24 - * Outputs the trace record to a filestream, prepending the CPU ID of the 1.25 - * source trace buffer. 1.26 + * Outputs the trace buffer to a filestream, prepending the CPU and size 1.27 + * of the buffer write. 1.28 */ 1.29 -void write_rec(unsigned int cpu, struct t_rec *rec, FILE *out) 1.30 +void write_buffer(unsigned int cpu, unsigned char *start, int size, 1.31 + int total_size, int outfd) 1.32 { 1.33 size_t written = 0; 1.34 - written += fwrite(&cpu, sizeof(cpu), 1, out); 1.35 - written += fwrite(rec, sizeof(*rec), 1, out); 1.36 - if ( written != 2 ) 1.37 + 1.38 + /* Write a CPU_BUF record on each buffer "window" written. Wrapped 1.39 + * windows may involve two writes, so only write the record on the 1.40 + * first write. */ 1.41 + if(total_size) 1.42 { 1.43 - PERROR("Failed to write trace record"); 1.44 - exit(EXIT_FAILURE); 1.45 + struct { 1.46 + uint32_t header; 1.47 + struct { 1.48 + unsigned cpu; 1.49 + unsigned byte_count; 1.50 + } extra; 1.51 + } rec; 1.52 + 1.53 + rec.header = TRC_TRACE_CPU_CHANGE 1.54 + | ((sizeof(rec.extra)/sizeof(uint32_t)) << TRACE_EXTRA_SHIFT); 1.55 + rec.extra.cpu = cpu; 1.56 + rec.extra.byte_count = total_size; 1.57 + 1.58 + written = write(outfd, &rec, sizeof(rec)); 1.59 + 1.60 + if(written!=sizeof(rec)) { 1.61 + fprintf(stderr, "Cannot write cpu change (write returned %d)\n", 1.62 + written); 1.63 + goto fail; 1.64 + } 1.65 } 1.66 + 1.67 + written = write(outfd, start, size); 1.68 + if ( written != size ) { 1.69 + fprintf(stderr, "Write failed! (size %d, returned %d)\n", 1.70 + size, written); 1.71 + goto fail; 1.72 + } 1.73 + return; 1.74 + fail: 1.75 + PERROR("Failed to write trace data"); 1.76 + exit(EXIT_FAILURE); 1.77 } 1.78 1.79 static void get_tbufs(unsigned long *mfn, unsigned long *size) 1.80 @@ -233,12 +268,12 @@ struct t_buf **init_bufs_ptrs(void *bufs 1.81 * mapped in user space. Note that the trace buffer metadata contains machine 1.82 * pointers - the array returned allows more convenient access to them. 1.83 */ 1.84 -struct t_rec **init_rec_ptrs(struct t_buf **meta, unsigned int num) 1.85 +unsigned char **init_rec_ptrs(struct t_buf **meta, unsigned int num) 1.86 { 1.87 int i; 1.88 - struct t_rec **data; 1.89 + unsigned char **data; 1.90 1.91 - data = calloc(num, sizeof(struct t_rec *)); 1.92 + data = calloc(num, sizeof(unsigned char *)); 1.93 if ( data == NULL ) 1.94 { 1.95 PERROR("Failed to allocate memory for data pointers\n"); 1.96 @@ -246,7 +281,7 @@ struct t_rec **init_rec_ptrs(struct t_bu 1.97 } 1.98 1.99 for ( i = 0; i < num; i++ ) 1.100 - data[i] = (struct t_rec *)(meta[i] + 1); 1.101 + data[i] = (unsigned char *)(meta[i] + 1); 1.102 1.103 return data; 1.104 } 1.105 @@ -281,19 +316,19 @@ unsigned int get_num_cpus(void) 1.106 * monitor_tbufs - monitor the contents of tbufs and output to a file 1.107 * @logfile: the FILE * representing the file to log to 1.108 */ 1.109 -int monitor_tbufs(FILE *logfile) 1.110 +int monitor_tbufs(int outfd) 1.111 { 1.112 int i; 1.113 1.114 void *tbufs_mapped; /* pointer to where the tbufs are mapped */ 1.115 struct t_buf **meta; /* pointers to the trace buffer metadata */ 1.116 - struct t_rec **data; /* pointers to the trace buffer data areas 1.117 + unsigned char **data; /* pointers to the trace buffer data areas 1.118 * where they are mapped into user space. */ 1.119 unsigned long tbufs_mfn; /* mfn of the tbufs */ 1.120 unsigned int num; /* number of trace buffers / logical CPUS */ 1.121 unsigned long size; /* size of a single trace buffer */ 1.122 1.123 - int size_in_recs; 1.124 + unsigned long data_size; 1.125 1.126 /* get number of logical CPUs (and therefore number of trace buffers) */ 1.127 num = get_num_cpus(); 1.128 @@ -302,7 +337,7 @@ int monitor_tbufs(FILE *logfile) 1.129 get_tbufs(&tbufs_mfn, &size); 1.130 tbufs_mapped = map_tbufs(tbufs_mfn, num, size); 1.131 1.132 - size_in_recs = (size - sizeof(struct t_buf)) / sizeof(struct t_rec); 1.133 + data_size = (size - sizeof(struct t_buf)); 1.134 1.135 /* build arrays of convenience ptrs */ 1.136 meta = init_bufs_ptrs(tbufs_mapped, num, size); 1.137 @@ -317,13 +352,48 @@ int monitor_tbufs(FILE *logfile) 1.138 { 1.139 for ( i = 0; (i < num) && !interrupted; i++ ) 1.140 { 1.141 - while ( meta[i]->cons != meta[i]->prod ) 1.142 + unsigned long start_offset, end_offset, window_size, cons, prod; 1.143 + rmb(); /* read prod, then read item. */ 1.144 + 1.145 + /* Read window information only once. */ 1.146 + cons = meta[i]->cons; 1.147 + prod = meta[i]->prod; 1.148 + 1.149 + if(cons == prod) 1.150 + continue; 1.151 + 1.152 + assert(prod > cons); 1.153 + 1.154 + window_size = prod - cons; 1.155 + start_offset = cons % data_size; 1.156 + end_offset = prod % data_size; 1.157 + 1.158 + if(end_offset > start_offset) 1.159 { 1.160 - rmb(); /* read prod, then read item. */ 1.161 - write_rec(i, data[i] + meta[i]->cons % size_in_recs, logfile); 1.162 - mb(); /* read item, then update cons. */ 1.163 - meta[i]->cons++; 1.164 + /* If window does not wrap, write in one big chunk */ 1.165 + write_buffer(i, data[i]+start_offset, 1.166 + window_size, 1.167 + window_size, 1.168 + outfd); 1.169 } 1.170 + else 1.171 + { 1.172 + /* If wrapped, write in two chunks: 1.173 + * - first, start to the end of the buffer 1.174 + * - second, start of buffer to end of window 1.175 + */ 1.176 + write_buffer(i, data[i]+start_offset, 1.177 + data_size - start_offset, 1.178 + window_size, 1.179 + outfd); 1.180 + write_buffer(i, data[i], 1.181 + end_offset, 1.182 + 0, 1.183 + outfd); 1.184 + } 1.185 + 1.186 + mb(); /* read buffer, then update cons. */ 1.187 + meta[i]->cons = meta[i]->prod; 1.188 } 1.189 1.190 nanosleep(&opts.poll_sleep, NULL); 1.191 @@ -333,7 +403,7 @@ int monitor_tbufs(FILE *logfile) 1.192 free(meta); 1.193 free(data); 1.194 /* don't need to munmap - cleanup is automatic */ 1.195 - fclose(logfile); 1.196 + close(outfd); 1.197 1.198 return 0; 1.199 } 1.200 @@ -503,7 +573,6 @@ const char *argp_program_bug_address = " 1.201 int main(int argc, char **argv) 1.202 { 1.203 int outfd = 1, ret; 1.204 - FILE *logfile; 1.205 struct sigaction act; 1.206 1.207 opts.outfile = 0; 1.208 @@ -537,8 +606,6 @@ int main(int argc, char **argv) 1.209 exit(EXIT_FAILURE); 1.210 } 1.211 1.212 - logfile = fdopen(outfd, "w"); 1.213 - 1.214 /* ensure that if we get a signal, we'll do cleanup, then exit */ 1.215 act.sa_handler = close_handler; 1.216 act.sa_flags = 0; 1.217 @@ -547,7 +614,16 @@ int main(int argc, char **argv) 1.218 sigaction(SIGTERM, &act, NULL); 1.219 sigaction(SIGINT, &act, NULL); 1.220 1.221 - ret = monitor_tbufs(logfile); 1.222 + ret = monitor_tbufs(outfd); 1.223 1.224 return ret; 1.225 } 1.226 +/* 1.227 + * Local variables: 1.228 + * mode: C 1.229 + * c-set-style: "BSD" 1.230 + * c-basic-offset: 4 1.231 + * tab-width: 4 1.232 + * indent-tabs-mode: nil 1.233 + * End: 1.234 + */
2.1 --- a/xen/common/trace.c Fri Sep 21 16:52:17 2007 +0100 2.2 +++ b/xen/common/trace.c Fri Sep 21 15:26:07 2007 +0100 2.3 @@ -43,19 +43,14 @@ CHECK_t_buf; 2.4 #define TB_COMPAT 0 2.5 #endif 2.6 2.7 -typedef union { 2.8 - struct t_rec *nat; 2.9 - struct compat_t_rec *cmp; 2.10 -} t_rec_u; 2.11 - 2.12 /* opt_tbuf_size: trace buffer size (in pages) */ 2.13 static unsigned int opt_tbuf_size = 0; 2.14 integer_param("tbuf_size", opt_tbuf_size); 2.15 2.16 /* Pointers to the meta-data objects for all system trace buffers */ 2.17 static DEFINE_PER_CPU(struct t_buf *, t_bufs); 2.18 -static DEFINE_PER_CPU(t_rec_u, t_recs); 2.19 -static int nr_recs; 2.20 +static DEFINE_PER_CPU(unsigned char *, t_data); 2.21 +static int data_size; 2.22 2.23 /* High water mark for trace buffers; */ 2.24 /* Send virtual interrupt when buffer level reaches this point */ 2.25 @@ -102,8 +97,7 @@ static int alloc_trace_bufs(void) 2.26 2.27 nr_pages = num_online_cpus() * opt_tbuf_size; 2.28 order = get_order_from_pages(nr_pages); 2.29 - nr_recs = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf)) / 2.30 - (!TB_COMPAT ? sizeof(struct t_rec) : sizeof(struct compat_t_rec)); 2.31 + data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf)); 2.32 2.33 if ( (rawbuf = alloc_xenheap_pages(order)) == NULL ) 2.34 { 2.35 @@ -122,10 +116,10 @@ static int alloc_trace_bufs(void) 2.36 buf = per_cpu(t_bufs, i) = (struct t_buf *) 2.37 &rawbuf[i*opt_tbuf_size*PAGE_SIZE]; 2.38 buf->cons = buf->prod = 0; 2.39 - per_cpu(t_recs, i).nat = (struct t_rec *)(buf + 1); 2.40 + per_cpu(t_data, i) = (unsigned char *)(buf + 1); 2.41 } 2.42 2.43 - t_buf_highwater = nr_recs >> 1; /* 50% high water */ 2.44 + t_buf_highwater = data_size >> 1; /* 50% high water */ 2.45 open_softirq(TRACE_SOFTIRQ, trace_notify_guest); 2.46 2.47 return 0; 2.48 @@ -235,6 +229,129 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc 2.49 return rc; 2.50 } 2.51 2.52 +static inline int calc_rec_size(int cycles, int extra) 2.53 +{ 2.54 + int rec_size; 2.55 + rec_size = 4; 2.56 + if(cycles) 2.57 + rec_size += 8; 2.58 + rec_size += extra; 2.59 + return rec_size; 2.60 +} 2.61 + 2.62 +static inline int calc_bytes_to_wrap(struct t_buf *buf) 2.63 +{ 2.64 + return data_size - (buf->prod % data_size); 2.65 +} 2.66 + 2.67 +static inline unsigned calc_bytes_avail(struct t_buf *buf) 2.68 +{ 2.69 + return data_size - (buf->prod - buf->cons); 2.70 +} 2.71 + 2.72 +static inline int __insert_record(struct t_buf *buf, 2.73 + unsigned long event, 2.74 + int extra, 2.75 + int cycles, 2.76 + int rec_size, 2.77 + unsigned char * extra_data) 2.78 +{ 2.79 + struct t_rec *rec; 2.80 + unsigned char * dst; 2.81 + unsigned long extra_word = extra/sizeof(u32); 2.82 + int local_rec_size = calc_rec_size(cycles, extra); 2.83 + 2.84 + BUG_ON(local_rec_size != rec_size); 2.85 + 2.86 + /* Double-check once more that we have enough space. 2.87 + * Don't bugcheck here, in case the userland tool is doing 2.88 + * something stupid. */ 2.89 + if(calc_bytes_avail(buf) < rec_size ) 2.90 + { 2.91 + printk("%s: %u bytes left (%u - (%u - %u)) recsize %u.\n", 2.92 + __func__, 2.93 + data_size - (buf->prod - buf->cons), 2.94 + data_size, 2.95 + buf->prod, buf->cons, rec_size); 2.96 + return 0; 2.97 + } 2.98 + rmb(); 2.99 + 2.100 + rec = (struct t_rec *)&this_cpu(t_data)[buf->prod % data_size]; 2.101 + rec->header = event; 2.102 + rec->header |= extra_word << TRACE_EXTRA_SHIFT; 2.103 + if(cycles) 2.104 + { 2.105 + u64 tsc = (u64)get_cycles(); 2.106 + 2.107 + rec->header |= TRC_HD_CYCLE_FLAG; 2.108 + rec->u.cycles.cycles_lo = tsc & ((((u64)1)<<32)-1); 2.109 + rec->u.cycles.cycles_hi = tsc >> 32; 2.110 + dst = rec->u.cycles.data; 2.111 + } 2.112 + else 2.113 + dst = rec->u.nocycles.data; 2.114 + 2.115 + if(extra_data && extra) 2.116 + memcpy(dst, extra_data, extra); 2.117 + 2.118 + wmb(); 2.119 + buf->prod+=rec_size; 2.120 + 2.121 + return rec_size; 2.122 +} 2.123 + 2.124 +static inline int insert_wrap_record(struct t_buf *buf, int size) 2.125 +{ 2.126 + int space_left = calc_bytes_to_wrap(buf); 2.127 + unsigned long extra_space = space_left - sizeof(u32); 2.128 + int cycles=0; 2.129 + 2.130 + if(space_left > size) 2.131 + printk("%s: space_left %d, size %d!\n", 2.132 + __func__, space_left, size); 2.133 + 2.134 + BUG_ON(space_left > size); 2.135 + 2.136 + /* We may need to add cycles to take up enough space... */ 2.137 + if((extra_space/sizeof(u32)) > TRACE_EXTRA_MAX) 2.138 + { 2.139 + cycles = 1; 2.140 + extra_space -= sizeof(u64); 2.141 + 2.142 + ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX); 2.143 + } 2.144 + 2.145 + 2.146 + return __insert_record(buf, 2.147 + TRC_TRACE_WRAP_BUFFER, 2.148 + extra_space, 2.149 + cycles, 2.150 + space_left, 2.151 + NULL); 2.152 +} 2.153 + 2.154 +#define LOST_REC_SIZE 8 2.155 + 2.156 +static inline int insert_lost_records(struct t_buf *buf) 2.157 +{ 2.158 + struct { 2.159 + u32 lost_records; 2.160 + } ed; 2.161 + 2.162 + ed.lost_records = this_cpu(lost_records); 2.163 + 2.164 + this_cpu(lost_records) = 0; 2.165 + 2.166 + return __insert_record(buf, 2.167 + TRC_LOST_RECORDS, 2.168 + sizeof(ed), 2.169 + 0 /* !cycles */, 2.170 + LOST_REC_SIZE, 2.171 + (unsigned char *)&ed); 2.172 +} 2.173 + 2.174 + 2.175 /** 2.176 * trace - Enters a trace tuple into the trace buffer for the current CPU. 2.177 * @event: the event type being logged 2.178 @@ -244,14 +361,32 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc 2.179 * failure, otherwise 0. Failure occurs only if the trace buffers are not yet 2.180 * initialised. 2.181 */ 2.182 -void trace(u32 event, unsigned long d1, unsigned long d2, 2.183 - unsigned long d3, unsigned long d4, unsigned long d5) 2.184 +void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data) 2.185 { 2.186 struct t_buf *buf; 2.187 - t_rec_u rec; 2.188 - unsigned long flags; 2.189 + unsigned long flags, bytes_to_tail, bytes_to_wrap; 2.190 + int rec_size, total_size; 2.191 + int extra_word; 2.192 + int started_below_highwater; 2.193 + 2.194 + if(!tb_init_done) 2.195 + return; 2.196 + 2.197 + /* Convert byte count into word count, rounding up */ 2.198 + extra_word = (extra / sizeof(u32)); 2.199 + if((extra % sizeof(u32)) != 0) 2.200 + extra_word++; 2.201 2.202 - BUG_ON(!tb_init_done); 2.203 +#if !NDEBUG 2.204 + ASSERT(extra_word<=TRACE_EXTRA_MAX); 2.205 +#else 2.206 + /* Not worth crashing a production system over */ 2.207 + if(extra_word > TRACE_EXTRA_MAX) 2.208 + extra_word = TRACE_EXTRA_MAX; 2.209 +#endif 2.210 + 2.211 + /* Round size up to nearest word */ 2.212 + extra = extra_word * sizeof(u32); 2.213 2.214 if ( (tb_event_mask & event) == 0 ) 2.215 return; 2.216 @@ -275,75 +410,127 @@ void trace(u32 event, unsigned long d1, 2.217 2.218 local_irq_save(flags); 2.219 2.220 - /* Check if space for two records (we write two if there are lost recs). */ 2.221 - if ( (buf->prod - buf->cons) >= (nr_recs - 1) ) 2.222 + started_below_highwater = ( (buf->prod - buf->cons) < t_buf_highwater ); 2.223 + 2.224 + /* Calculate the record size */ 2.225 + rec_size = calc_rec_size(cycles, extra); 2.226 + 2.227 + /* How many bytes are available in the buffer? */ 2.228 + bytes_to_tail = calc_bytes_avail(buf); 2.229 + 2.230 + /* How many bytes until the next wrap-around? */ 2.231 + bytes_to_wrap = calc_bytes_to_wrap(buf); 2.232 + 2.233 + /* 2.234 + * Calculate expected total size to commit this record by 2.235 + * doing a dry-run. 2.236 + */ 2.237 + total_size = 0; 2.238 + 2.239 + /* First, check to see if we need to include a lost_record. 2.240 + * 2.241 + * calc_bytes_to_wrap() involves integer division, which we'd like to 2.242 + * avoid if we can. So do the math, check it in debug versions, and 2.243 + * do a final check always if we happen to write a record. 2.244 + */ 2.245 + if(this_cpu(lost_records)) 2.246 + { 2.247 + if(LOST_REC_SIZE > bytes_to_wrap) 2.248 + { 2.249 + total_size += bytes_to_wrap; 2.250 + bytes_to_wrap = data_size; 2.251 + } 2.252 + else 2.253 + { 2.254 + bytes_to_wrap -= LOST_REC_SIZE; 2.255 + if(bytes_to_wrap == 0) 2.256 + bytes_to_wrap == data_size; 2.257 + } 2.258 + total_size += LOST_REC_SIZE; 2.259 + } 2.260 + 2.261 + ASSERT(bytes_to_wrap == calc_bytes_to_wrap(buf)); 2.262 + 2.263 + if(rec_size > bytes_to_wrap) 2.264 + { 2.265 + total_size += bytes_to_wrap; 2.266 + bytes_to_wrap = data_size; 2.267 + } 2.268 + else 2.269 + { 2.270 + bytes_to_wrap -= rec_size; 2.271 + } 2.272 + 2.273 + total_size += rec_size; 2.274 + 2.275 + /* Do we have enough space for everything? */ 2.276 + if(total_size > bytes_to_tail) 2.277 { 2.278 this_cpu(lost_records)++; 2.279 local_irq_restore(flags); 2.280 return; 2.281 } 2.282 2.283 - if ( unlikely(this_cpu(lost_records) != 0) ) 2.284 + /* 2.285 + * Now, actually write information 2.286 + */ 2.287 + bytes_to_wrap = calc_bytes_to_wrap(buf); 2.288 + 2.289 + if(this_cpu(lost_records)) 2.290 { 2.291 - if ( !TB_COMPAT ) 2.292 + if(LOST_REC_SIZE > bytes_to_wrap) 2.293 { 2.294 - rec.nat = &this_cpu(t_recs).nat[buf->prod % nr_recs]; 2.295 - memset(rec.nat, 0, sizeof(*rec.nat)); 2.296 - rec.nat->cycles = (u64)get_cycles(); 2.297 - rec.nat->event = TRC_LOST_RECORDS; 2.298 - rec.nat->data[0] = this_cpu(lost_records); 2.299 - this_cpu(lost_records) = 0; 2.300 - } 2.301 + insert_wrap_record(buf, LOST_REC_SIZE); 2.302 + bytes_to_wrap = data_size; 2.303 + } 2.304 else 2.305 { 2.306 - rec.cmp = &this_cpu(t_recs).cmp[buf->prod % nr_recs]; 2.307 - memset(rec.cmp, 0, sizeof(*rec.cmp)); 2.308 - rec.cmp->cycles = (u64)get_cycles(); 2.309 - rec.cmp->event = TRC_LOST_RECORDS; 2.310 - rec.cmp->data[0] = this_cpu(lost_records); 2.311 - this_cpu(lost_records) = 0; 2.312 + bytes_to_wrap -= LOST_REC_SIZE; 2.313 + /* LOST_REC might line up perfectly with the buffer wrap */ 2.314 + if(bytes_to_wrap == 0) 2.315 + bytes_to_wrap = data_size; 2.316 } 2.317 - 2.318 - wmb(); 2.319 - buf->prod++; 2.320 + insert_lost_records(buf); 2.321 } 2.322 2.323 - if ( !TB_COMPAT ) 2.324 - { 2.325 - rec.nat = &this_cpu(t_recs).nat[buf->prod % nr_recs]; 2.326 - rec.nat->cycles = (u64)get_cycles(); 2.327 - rec.nat->event = event; 2.328 - rec.nat->data[0] = d1; 2.329 - rec.nat->data[1] = d2; 2.330 - rec.nat->data[2] = d3; 2.331 - rec.nat->data[3] = d4; 2.332 - rec.nat->data[4] = d5; 2.333 - } 2.334 - else 2.335 + ASSERT(bytes_to_wrap == calc_bytes_to_wrap(buf)); 2.336 + 2.337 + if(rec_size > bytes_to_wrap) 2.338 { 2.339 - rec.cmp = &this_cpu(t_recs).cmp[buf->prod % nr_recs]; 2.340 - rec.cmp->cycles = (u64)get_cycles(); 2.341 - rec.cmp->event = event; 2.342 - rec.cmp->data[0] = d1; 2.343 - rec.cmp->data[1] = d2; 2.344 - rec.cmp->data[2] = d3; 2.345 - rec.cmp->data[3] = d4; 2.346 - rec.cmp->data[4] = d5; 2.347 - } 2.348 + insert_wrap_record(buf, rec_size); 2.349 + } 2.350 2.351 - wmb(); 2.352 - buf->prod++; 2.353 + /* Write the original record */ 2.354 + __insert_record(buf, event, extra, cycles, rec_size, extra_data); 2.355 2.356 local_irq_restore(flags); 2.357 2.358 /* 2.359 - * Notify trace buffer consumer that we've reached the high water mark. 2.360 + * Notify trace buffer consumer that we've crossed the high water mark. 2.361 * 2.362 */ 2.363 - if ( (buf->prod - buf->cons) == t_buf_highwater ) 2.364 + if ( started_below_highwater 2.365 + && ( (buf->prod - buf->cons) > t_buf_highwater ) ) 2.366 raise_softirq(TRACE_SOFTIRQ); 2.367 } 2.368 2.369 + 2.370 +void __trace_fixed(u32 event, unsigned long d1, unsigned long d2, 2.371 + unsigned long d3, unsigned long d4, unsigned long d5) 2.372 +{ 2.373 + u32 extra_data[5]; 2.374 + 2.375 + /* In a 64-bit hypervisor, this will truncate to 32 bits. */ 2.376 + extra_data[0]=d1; 2.377 + extra_data[1]=d2; 2.378 + extra_data[2]=d3; 2.379 + extra_data[3]=d4; 2.380 + extra_data[4]=d5; 2.381 + 2.382 + __trace_var(event, 1/* include cycles */, sizeof(*extra_data)*5, 2.383 + (unsigned char *)extra_data); 2.384 +} 2.385 + 2.386 /* 2.387 * Local variables: 2.388 * mode: C
3.1 --- a/xen/include/public/trace.h Fri Sep 21 16:52:17 2007 +0100 3.2 +++ b/xen/include/public/trace.h Fri Sep 21 15:26:07 2007 +0100 3.3 @@ -26,14 +26,21 @@ 3.4 #ifndef __XEN_PUBLIC_TRACE_H__ 3.5 #define __XEN_PUBLIC_TRACE_H__ 3.6 3.7 +#define TRACE_EXTRA_MAX 7 3.8 +#define TRACE_EXTRA_SHIFT 28 3.9 + 3.10 /* Trace classes */ 3.11 #define TRC_CLS_SHIFT 16 3.12 -#define TRC_GEN 0x0001f000 /* General trace */ 3.13 -#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ 3.14 -#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ 3.15 -#define TRC_HVM 0x0008f000 /* Xen HVM trace */ 3.16 -#define TRC_MEM 0x0010f000 /* Xen memory trace */ 3.17 -#define TRC_ALL 0xfffff000 3.18 +#define TRC_GEN 0x0001f000 /* General trace */ 3.19 +#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ 3.20 +#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ 3.21 +#define TRC_HVM 0x0008f000 /* Xen HVM trace */ 3.22 +#define TRC_MEM 0x0010f000 /* Xen memory trace */ 3.23 +#define TRC_ALL 0x0ffff000 3.24 +#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) 3.25 +#define TRC_HD_CYCLE_FLAG (1UL<<31) 3.26 +#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) ) 3.27 +#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX) 3.28 3.29 /* Trace subclasses */ 3.30 #define TRC_SUBCLS_SHIFT 12 3.31 @@ -44,6 +51,8 @@ 3.32 3.33 /* Trace events per class */ 3.34 #define TRC_LOST_RECORDS (TRC_GEN + 1) 3.35 +#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2) 3.36 +#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3) 3.37 3.38 #define TRC_SCHED_DOM_ADD (TRC_SCHED + 1) 3.39 #define TRC_SCHED_DOM_REM (TRC_SCHED + 2) 3.40 @@ -92,9 +101,19 @@ 3.41 3.42 /* This structure represents a single trace buffer record. */ 3.43 struct t_rec { 3.44 - uint64_t cycles; /* cycle counter timestamp */ 3.45 - uint32_t event; /* event ID */ 3.46 - unsigned long data[5]; /* event data items */ 3.47 + uint32_t header; /* 31 : Cycles included? 3.48 + 30-28: # of extra words 3.49 + 27- 0: event ID */ 3.50 + union { 3.51 + struct { 3.52 + uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */ 3.53 + unsigned char data[28]; /* event data items */ 3.54 + } cycles; 3.55 + struct { 3.56 + unsigned char data[28]; /* event data items */ 3.57 + } nocycles; 3.58 + }u; 3.59 + 3.60 }; 3.61 3.62 /* 3.63 @@ -102,9 +121,9 @@ struct t_rec { 3.64 * field, indexes into an array of struct t_rec's. 3.65 */ 3.66 struct t_buf { 3.67 - uint32_t cons; /* Next item to be consumed by control tools. */ 3.68 - uint32_t prod; /* Next item to be produced by Xen. */ 3.69 - /* 'nr_recs' records follow immediately after the meta-data header. */ 3.70 + uint32_t cons; /* Offset of next item to be consumed by control tools. */ 3.71 + uint32_t prod; /* Offset of next item to be produced by Xen. */ 3.72 + /* Records follow immediately after the meta-data header. */ 3.73 }; 3.74 3.75 #endif /* __XEN_PUBLIC_TRACE_H__ */
4.1 --- a/xen/include/xen/trace.h Fri Sep 21 16:52:17 2007 +0100 4.2 +++ b/xen/include/xen/trace.h Fri Sep 21 15:26:07 2007 +0100 4.3 @@ -33,27 +33,74 @@ void init_trace_bufs(void); 4.4 /* used to retrieve the physical address of the trace buffers */ 4.5 int tb_control(struct xen_sysctl_tbuf_op *tbc); 4.6 4.7 -void trace(u32 event, unsigned long d1, unsigned long d2, 4.8 +void __trace_fixed(u32 event, unsigned long d1, unsigned long d2, 4.9 unsigned long d3, unsigned long d4, unsigned long d5); 4.10 +void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data); 4.11 + 4.12 +static inline void trace_fixed(u32 event, unsigned long d1, 4.13 + unsigned long d2, unsigned long d3, 4.14 + unsigned long d4, unsigned long d5) 4.15 +{ 4.16 + if( unlikely(tb_init_done) ) 4.17 + __trace_fixed(event, d1, d2, d3, d4, d5); 4.18 +} 4.19 + 4.20 +static inline void trace_var(u32 event, int cycles, int extra, 4.21 + unsigned char *extra_data) 4.22 +{ 4.23 + if( unlikely(tb_init_done) ) 4.24 + __trace_var(event, cycles, extra, extra_data); 4.25 +} 4.26 4.27 -/* Avoids troubling the caller with casting their arguments to a trace macro */ 4.28 -#define trace_do_casts(e,d1,d2,d3,d4,d5) \ 4.29 - do { \ 4.30 - if ( unlikely(tb_init_done) ) \ 4.31 - trace(e, \ 4.32 - (unsigned long)(d1), \ 4.33 - (unsigned long)(d2), \ 4.34 - (unsigned long)(d3), \ 4.35 - (unsigned long)(d4), \ 4.36 - (unsigned long)(d5)); \ 4.37 +/* Convenience macros for calling the trace function. */ 4.38 +#define TRACE_0D(_e) \ 4.39 + do { \ 4.40 + trace_var(_e, 1, 0, NULL); \ 4.41 + } while ( 0 ) 4.42 + 4.43 +#define TRACE_1D(_e,_d) \ 4.44 + do { \ 4.45 + u32 _d1; \ 4.46 + _d1 = _d; \ 4.47 + trace_var(_e, 1, sizeof(_d1), (unsigned char *)&_d1); \ 4.48 + } while ( 0 ) 4.49 + 4.50 +#define TRACE_2D(_e,d1,d2) \ 4.51 + do { \ 4.52 + u32 _d[2]; \ 4.53 + _d[0]=d1; \ 4.54 + _d[1]=d2; \ 4.55 + trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \ 4.56 + } while ( 0 ) 4.57 + 4.58 +#define TRACE_3D(_e,d1,d2,d3) \ 4.59 + do { \ 4.60 + u32 _d[3]; \ 4.61 + _d[0]=d1; \ 4.62 + _d[1]=d2; \ 4.63 + _d[2]=d3; \ 4.64 + trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \ 4.65 + } while ( 0 ) 4.66 + 4.67 +#define TRACE_4D(_e,d1,d2,d3,d4) \ 4.68 + do { \ 4.69 + u32 _d[4]; \ 4.70 + _d[0]=d1; \ 4.71 + _d[1]=d2; \ 4.72 + _d[2]=d3; \ 4.73 + _d[3]=d4; \ 4.74 + trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \ 4.75 + } while ( 0 ) 4.76 + 4.77 +#define TRACE_5D(_e,d1,d2,d3,d4,d5) \ 4.78 + do { \ 4.79 + u32 _d[5]; \ 4.80 + _d[0]=d1; \ 4.81 + _d[1]=d2; \ 4.82 + _d[2]=d3; \ 4.83 + _d[3]=d4; \ 4.84 + _d[4]=d5; \ 4.85 + trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \ 4.86 } while ( 0 ) 4.87 4.88 -/* Convenience macros for calling the trace function. */ 4.89 -#define TRACE_0D(event) trace_do_casts(event,0, 0, 0, 0, 0 ) 4.90 -#define TRACE_1D(event,d) trace_do_casts(event,d, 0, 0, 0, 0 ) 4.91 -#define TRACE_2D(event,d1,d2) trace_do_casts(event,d1,d2,0, 0, 0 ) 4.92 -#define TRACE_3D(event,d1,d2,d3) trace_do_casts(event,d1,d2,d3,0, 0 ) 4.93 -#define TRACE_4D(event,d1,d2,d3,d4) trace_do_casts(event,d1,d2,d3,d4,0 ) 4.94 -#define TRACE_5D(event,d1,d2,d3,d4,d5) trace_do_casts(event,d1,d2,d3,d4,d5) 4.95 - 4.96 #endif /* __XEN_TRACE_H__ */