ia64/xen-unstable

changeset 15954:7ed576909132

[xen][tracing] Introduce variable-size trace records

This patch introduces variable-size trace records. Each record consists of
a 32-bit "header", an optional cycle count, and up to seven more 32-bit words.

The header is packed with the following information:
bits 0-27: The trace event.
bits 28-30: The number of 32-bit "extra" words in the records
bit 31: Does the trace include a 64-bit tsc?

This patch standardizes behavior wrt 32 and 64-bit hypervisors and dom0s.

Note that this patch requires a new version of the xentrace daemon running in
dom0. The new daemon, instead of pre-pending the cpu to every record as it
writes it, inserts a "cpu change" record to the trace file that record the
cpu and the number of records it's about to write.

Signed-off-by: George Dunlap <gdunlap@xensource.com>
author George Dunlap <gdunlap@xensource.com>
date Fri Sep 21 15:26:07 2007 +0100 (2007-09-21)
parents 517432b9f8b5
children 305a8dbc264c
files tools/xentrace/xentrace.c xen/common/trace.c xen/include/public/trace.h xen/include/xen/trace.h
line diff
     1.1 --- a/tools/xentrace/xentrace.c	Fri Sep 21 16:52:17 2007 +0100
     1.2 +++ b/tools/xentrace/xentrace.c	Fri Sep 21 15:26:07 2007 +0100
     1.3 @@ -22,6 +22,7 @@
     1.4  #include <signal.h>
     1.5  #include <inttypes.h>
     1.6  #include <string.h>
     1.7 +#include <assert.h>
     1.8  
     1.9  #include <xen/xen.h>
    1.10  #include <xen/trace.h>
    1.11 @@ -83,24 +84,58 @@ struct timespec millis_to_timespec(unsig
    1.12  }
    1.13  
    1.14  /**
    1.15 - * write_rec - output a trace record in binary format
    1.16 + * write_buffer - write a section of the trace buffer
    1.17   * @cpu      - source buffer CPU ID
    1.18 - * @rec      - trace record to output
    1.19 + * @start
    1.20 + * @size     - size of write (may be less than total window size)
    1.21 + * @total_size - total size of the window (0 on 2nd write of wrapped windows)
    1.22   * @out      - output stream
    1.23   *
    1.24 - * Outputs the trace record to a filestream, prepending the CPU ID of the
    1.25 - * source trace buffer.
    1.26 + * Outputs the trace buffer to a filestream, prepending the CPU and size
    1.27 + * of the buffer write.
    1.28   */
    1.29 -void write_rec(unsigned int cpu, struct t_rec *rec, FILE *out)
    1.30 +void write_buffer(unsigned int cpu, unsigned char *start, int size,
    1.31 +               int total_size, int outfd)
    1.32  {
    1.33      size_t written = 0;
    1.34 -    written += fwrite(&cpu, sizeof(cpu), 1, out);
    1.35 -    written += fwrite(rec, sizeof(*rec), 1, out);
    1.36 -    if ( written != 2 )
    1.37 +    
    1.38 +    /* Write a CPU_BUF record on each buffer "window" written.  Wrapped
    1.39 +     * windows may involve two writes, so only write the record on the
    1.40 +     * first write. */
    1.41 +    if(total_size)
    1.42      {
    1.43 -        PERROR("Failed to write trace record");
    1.44 -        exit(EXIT_FAILURE);
    1.45 +        struct {
    1.46 +            uint32_t header;
    1.47 +            struct {
    1.48 +                unsigned cpu;
    1.49 +                unsigned byte_count;
    1.50 +            } extra;
    1.51 +        } rec;
    1.52 +
    1.53 +        rec.header = TRC_TRACE_CPU_CHANGE
    1.54 +            | ((sizeof(rec.extra)/sizeof(uint32_t)) << TRACE_EXTRA_SHIFT);
    1.55 +        rec.extra.cpu = cpu;
    1.56 +        rec.extra.byte_count = total_size;
    1.57 +
    1.58 +        written = write(outfd, &rec, sizeof(rec));
    1.59 +
    1.60 +        if(written!=sizeof(rec)) {
    1.61 +            fprintf(stderr, "Cannot write cpu change (write returned %d)\n",
    1.62 +                    written);
    1.63 +            goto fail;
    1.64 +        }
    1.65      }
    1.66 +
    1.67 +    written = write(outfd, start, size);
    1.68 +    if ( written != size ) {
    1.69 +        fprintf(stderr, "Write failed! (size %d, returned %d)\n",
    1.70 +                size, written);
    1.71 +        goto fail;
    1.72 +    }
    1.73 +    return;
    1.74 + fail:
    1.75 +    PERROR("Failed to write trace data");
    1.76 +    exit(EXIT_FAILURE);
    1.77  }
    1.78  
    1.79  static void get_tbufs(unsigned long *mfn, unsigned long *size)
    1.80 @@ -233,12 +268,12 @@ struct t_buf **init_bufs_ptrs(void *bufs
    1.81   * mapped in user space.  Note that the trace buffer metadata contains machine
    1.82   * pointers - the array returned allows more convenient access to them.
    1.83   */
    1.84 -struct t_rec **init_rec_ptrs(struct t_buf **meta, unsigned int num)
    1.85 +unsigned char **init_rec_ptrs(struct t_buf **meta, unsigned int num)
    1.86  {
    1.87      int i;
    1.88 -    struct t_rec **data;
    1.89 +    unsigned char **data;
    1.90      
    1.91 -    data = calloc(num, sizeof(struct t_rec *));
    1.92 +    data = calloc(num, sizeof(unsigned char *));
    1.93      if ( data == NULL )
    1.94      {
    1.95          PERROR("Failed to allocate memory for data pointers\n");
    1.96 @@ -246,7 +281,7 @@ struct t_rec **init_rec_ptrs(struct t_bu
    1.97      }
    1.98  
    1.99      for ( i = 0; i < num; i++ )
   1.100 -        data[i] = (struct t_rec *)(meta[i] + 1);
   1.101 +        data[i] = (unsigned char *)(meta[i] + 1);
   1.102  
   1.103      return data;
   1.104  }
   1.105 @@ -281,19 +316,19 @@ unsigned int get_num_cpus(void)
   1.106   * monitor_tbufs - monitor the contents of tbufs and output to a file
   1.107   * @logfile:       the FILE * representing the file to log to
   1.108   */
   1.109 -int monitor_tbufs(FILE *logfile)
   1.110 +int monitor_tbufs(int outfd)
   1.111  {
   1.112      int i;
   1.113  
   1.114      void *tbufs_mapped;          /* pointer to where the tbufs are mapped    */
   1.115      struct t_buf **meta;         /* pointers to the trace buffer metadata    */
   1.116 -    struct t_rec **data;         /* pointers to the trace buffer data areas
   1.117 +    unsigned char **data;        /* pointers to the trace buffer data areas
   1.118                                    * where they are mapped into user space.   */
   1.119      unsigned long tbufs_mfn;     /* mfn of the tbufs                         */
   1.120      unsigned int  num;           /* number of trace buffers / logical CPUS   */
   1.121      unsigned long size;          /* size of a single trace buffer            */
   1.122  
   1.123 -    int size_in_recs;
   1.124 +    unsigned long data_size;
   1.125  
   1.126      /* get number of logical CPUs (and therefore number of trace buffers) */
   1.127      num = get_num_cpus();
   1.128 @@ -302,7 +337,7 @@ int monitor_tbufs(FILE *logfile)
   1.129      get_tbufs(&tbufs_mfn, &size);
   1.130      tbufs_mapped = map_tbufs(tbufs_mfn, num, size);
   1.131  
   1.132 -    size_in_recs = (size - sizeof(struct t_buf)) / sizeof(struct t_rec);
   1.133 +    data_size = (size - sizeof(struct t_buf));
   1.134  
   1.135      /* build arrays of convenience ptrs */
   1.136      meta  = init_bufs_ptrs(tbufs_mapped, num, size);
   1.137 @@ -317,13 +352,48 @@ int monitor_tbufs(FILE *logfile)
   1.138      {
   1.139          for ( i = 0; (i < num) && !interrupted; i++ )
   1.140          {
   1.141 -            while ( meta[i]->cons != meta[i]->prod )
   1.142 +            unsigned long start_offset, end_offset, window_size, cons, prod;
   1.143 +            rmb(); /* read prod, then read item. */
   1.144 +                
   1.145 +            /* Read window information only once. */
   1.146 +            cons = meta[i]->cons;
   1.147 +            prod = meta[i]->prod;
   1.148 +            
   1.149 +            if(cons == prod)
   1.150 +                continue;
   1.151 +           
   1.152 +            assert(prod > cons);
   1.153 +
   1.154 +            window_size = prod - cons;
   1.155 +            start_offset = cons % data_size;
   1.156 +            end_offset = prod % data_size;
   1.157 +
   1.158 +            if(end_offset > start_offset)
   1.159              {
   1.160 -                rmb(); /* read prod, then read item. */
   1.161 -                write_rec(i, data[i] + meta[i]->cons % size_in_recs, logfile);
   1.162 -                mb(); /* read item, then update cons. */
   1.163 -                meta[i]->cons++;
   1.164 +                /* If window does not wrap, write in one big chunk */
   1.165 +                write_buffer(i, data[i]+start_offset,
   1.166 +                             window_size,
   1.167 +                             window_size,
   1.168 +                             outfd);
   1.169              }
   1.170 +            else
   1.171 +            {
   1.172 +                /* If wrapped, write in two chunks:
   1.173 +                 * - first, start to the end of the buffer
   1.174 +                 * - second, start of buffer to end of window
   1.175 +                 */
   1.176 +                write_buffer(i, data[i]+start_offset,
   1.177 +                             data_size - start_offset,
   1.178 +                             window_size,
   1.179 +                             outfd);
   1.180 +                write_buffer(i, data[i],
   1.181 +                             end_offset,
   1.182 +                             0,
   1.183 +                             outfd);
   1.184 +            }
   1.185 +
   1.186 +            mb(); /* read buffer, then update cons. */
   1.187 +            meta[i]->cons = meta[i]->prod;
   1.188          }
   1.189  
   1.190          nanosleep(&opts.poll_sleep, NULL);
   1.191 @@ -333,7 +403,7 @@ int monitor_tbufs(FILE *logfile)
   1.192      free(meta);
   1.193      free(data);
   1.194      /* don't need to munmap - cleanup is automatic */
   1.195 -    fclose(logfile);
   1.196 +    close(outfd);
   1.197  
   1.198      return 0;
   1.199  }
   1.200 @@ -503,7 +573,6 @@ const char *argp_program_bug_address = "
   1.201  int main(int argc, char **argv)
   1.202  {
   1.203      int outfd = 1, ret;
   1.204 -    FILE *logfile;
   1.205      struct sigaction act;
   1.206  
   1.207      opts.outfile = 0;
   1.208 @@ -537,8 +606,6 @@ int main(int argc, char **argv)
   1.209          exit(EXIT_FAILURE);
   1.210      }
   1.211  
   1.212 -    logfile = fdopen(outfd, "w");
   1.213 -    
   1.214      /* ensure that if we get a signal, we'll do cleanup, then exit */
   1.215      act.sa_handler = close_handler;
   1.216      act.sa_flags = 0;
   1.217 @@ -547,7 +614,16 @@ int main(int argc, char **argv)
   1.218      sigaction(SIGTERM, &act, NULL);
   1.219      sigaction(SIGINT,  &act, NULL);
   1.220  
   1.221 -    ret = monitor_tbufs(logfile);
   1.222 +    ret = monitor_tbufs(outfd);
   1.223  
   1.224      return ret;
   1.225  }
   1.226 +/*
   1.227 + * Local variables:
   1.228 + * mode: C
   1.229 + * c-set-style: "BSD"
   1.230 + * c-basic-offset: 4
   1.231 + * tab-width: 4
   1.232 + * indent-tabs-mode: nil
   1.233 + * End:
   1.234 + */
     2.1 --- a/xen/common/trace.c	Fri Sep 21 16:52:17 2007 +0100
     2.2 +++ b/xen/common/trace.c	Fri Sep 21 15:26:07 2007 +0100
     2.3 @@ -43,19 +43,14 @@ CHECK_t_buf;
     2.4  #define TB_COMPAT 0
     2.5  #endif
     2.6  
     2.7 -typedef union {
     2.8 -	struct t_rec *nat;
     2.9 -	struct compat_t_rec *cmp;
    2.10 -} t_rec_u;
    2.11 -
    2.12  /* opt_tbuf_size: trace buffer size (in pages) */
    2.13  static unsigned int opt_tbuf_size = 0;
    2.14  integer_param("tbuf_size", opt_tbuf_size);
    2.15  
    2.16  /* Pointers to the meta-data objects for all system trace buffers */
    2.17  static DEFINE_PER_CPU(struct t_buf *, t_bufs);
    2.18 -static DEFINE_PER_CPU(t_rec_u, t_recs);
    2.19 -static int nr_recs;
    2.20 +static DEFINE_PER_CPU(unsigned char *, t_data);
    2.21 +static int data_size;
    2.22  
    2.23  /* High water mark for trace buffers; */
    2.24  /* Send virtual interrupt when buffer level reaches this point */
    2.25 @@ -102,8 +97,7 @@ static int alloc_trace_bufs(void)
    2.26  
    2.27      nr_pages = num_online_cpus() * opt_tbuf_size;
    2.28      order    = get_order_from_pages(nr_pages);
    2.29 -    nr_recs  = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf)) /
    2.30 -        (!TB_COMPAT ? sizeof(struct t_rec) : sizeof(struct compat_t_rec));
    2.31 +    data_size  = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
    2.32      
    2.33      if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
    2.34      {
    2.35 @@ -122,10 +116,10 @@ static int alloc_trace_bufs(void)
    2.36          buf = per_cpu(t_bufs, i) = (struct t_buf *)
    2.37              &rawbuf[i*opt_tbuf_size*PAGE_SIZE];
    2.38          buf->cons = buf->prod = 0;
    2.39 -        per_cpu(t_recs, i).nat = (struct t_rec *)(buf + 1);
    2.40 +        per_cpu(t_data, i) = (unsigned char *)(buf + 1);
    2.41      }
    2.42  
    2.43 -    t_buf_highwater = nr_recs >> 1; /* 50% high water */
    2.44 +    t_buf_highwater = data_size >> 1; /* 50% high water */
    2.45      open_softirq(TRACE_SOFTIRQ, trace_notify_guest);
    2.46  
    2.47      return 0;
    2.48 @@ -235,6 +229,129 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
    2.49      return rc;
    2.50  }
    2.51  
    2.52 +static inline int calc_rec_size(int cycles, int extra) 
    2.53 +{
    2.54 +    int rec_size;
    2.55 +    rec_size = 4;
    2.56 +    if(cycles)
    2.57 +        rec_size += 8;
    2.58 +    rec_size += extra;
    2.59 +    return rec_size;
    2.60 +}
    2.61 +
    2.62 +static inline int calc_bytes_to_wrap(struct t_buf *buf)
    2.63 +{
    2.64 +    return data_size - (buf->prod % data_size);
    2.65 +}
    2.66 +
    2.67 +static inline unsigned calc_bytes_avail(struct t_buf *buf)
    2.68 +{
    2.69 +    return data_size - (buf->prod - buf->cons);
    2.70 +}
    2.71 +
    2.72 +static inline int __insert_record(struct t_buf *buf,
    2.73 +                                  unsigned long event,
    2.74 +                                  int extra,
    2.75 +                                  int cycles,
    2.76 +                                  int rec_size,
    2.77 +                                  unsigned char * extra_data) 
    2.78 +{
    2.79 +    struct t_rec *rec;
    2.80 +    unsigned char * dst;
    2.81 +    unsigned long extra_word = extra/sizeof(u32);
    2.82 +    int local_rec_size = calc_rec_size(cycles, extra);
    2.83 +
    2.84 +    BUG_ON(local_rec_size != rec_size);
    2.85 +
    2.86 +    /* Double-check once more that we have enough space.
    2.87 +     * Don't bugcheck here, in case the userland tool is doing
    2.88 +     * something stupid. */
    2.89 +    if(calc_bytes_avail(buf) < rec_size )
    2.90 +    {
    2.91 +        printk("%s: %u bytes left (%u - (%u - %u)) recsize %u.\n",
    2.92 +               __func__,
    2.93 +               data_size - (buf->prod - buf->cons),
    2.94 +               data_size,
    2.95 +               buf->prod, buf->cons, rec_size);
    2.96 +        return 0;
    2.97 +    }
    2.98 +    rmb();
    2.99 +
   2.100 +    rec = (struct t_rec *)&this_cpu(t_data)[buf->prod % data_size];
   2.101 +    rec->header = event;
   2.102 +    rec->header |= extra_word << TRACE_EXTRA_SHIFT;
   2.103 +    if(cycles) 
   2.104 +    {
   2.105 +        u64 tsc = (u64)get_cycles();
   2.106 +
   2.107 +        rec->header |= TRC_HD_CYCLE_FLAG;
   2.108 +        rec->u.cycles.cycles_lo = tsc & ((((u64)1)<<32)-1);
   2.109 +        rec->u.cycles.cycles_hi = tsc >> 32;
   2.110 +        dst = rec->u.cycles.data;
   2.111 +    } 
   2.112 +    else
   2.113 +        dst = rec->u.nocycles.data;
   2.114 +
   2.115 +    if(extra_data && extra)
   2.116 +        memcpy(dst, extra_data, extra);
   2.117 +
   2.118 +    wmb();
   2.119 +    buf->prod+=rec_size;
   2.120 +
   2.121 +    return rec_size;
   2.122 +}
   2.123 +
   2.124 +static inline int insert_wrap_record(struct t_buf *buf, int size)
   2.125 +{
   2.126 +    int space_left = calc_bytes_to_wrap(buf);
   2.127 +    unsigned long extra_space = space_left - sizeof(u32);
   2.128 +    int cycles=0;
   2.129 +
   2.130 +    if(space_left > size)
   2.131 +        printk("%s: space_left %d, size %d!\n",
   2.132 +               __func__, space_left, size);
   2.133 +
   2.134 +    BUG_ON(space_left > size);
   2.135 +
   2.136 +    /* We may need to add cycles to take up enough space... */
   2.137 +    if((extra_space/sizeof(u32)) > TRACE_EXTRA_MAX)
   2.138 +    {
   2.139 +        cycles = 1;
   2.140 +        extra_space -= sizeof(u64);
   2.141 +        
   2.142 +        ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
   2.143 +    }
   2.144 +
   2.145 +
   2.146 +    return __insert_record(buf,
   2.147 +                    TRC_TRACE_WRAP_BUFFER,
   2.148 +                    extra_space,
   2.149 +                    cycles,
   2.150 +                    space_left,
   2.151 +                    NULL);
   2.152 +}
   2.153 +
   2.154 +#define LOST_REC_SIZE 8
   2.155 +
   2.156 +static inline int insert_lost_records(struct t_buf *buf)
   2.157 +{
   2.158 +    struct {
   2.159 +        u32 lost_records;
   2.160 +    } ed;
   2.161 +
   2.162 +    ed.lost_records = this_cpu(lost_records);
   2.163 +
   2.164 +    this_cpu(lost_records) = 0;
   2.165 +
   2.166 +    return __insert_record(buf,
   2.167 +                           TRC_LOST_RECORDS,
   2.168 +                           sizeof(ed),
   2.169 +                           0 /* !cycles */,
   2.170 +                           LOST_REC_SIZE,
   2.171 +                           (unsigned char *)&ed);
   2.172 +}
   2.173 +
   2.174 +
   2.175  /**
   2.176   * trace - Enters a trace tuple into the trace buffer for the current CPU.
   2.177   * @event: the event type being logged
   2.178 @@ -244,14 +361,32 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
   2.179   * failure, otherwise 0.  Failure occurs only if the trace buffers are not yet
   2.180   * initialised.
   2.181   */
   2.182 -void trace(u32 event, unsigned long d1, unsigned long d2,
   2.183 -           unsigned long d3, unsigned long d4, unsigned long d5)
   2.184 +void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
   2.185  {
   2.186      struct t_buf *buf;
   2.187 -    t_rec_u rec;
   2.188 -    unsigned long flags;
   2.189 +    unsigned long flags, bytes_to_tail, bytes_to_wrap;
   2.190 +    int rec_size, total_size;
   2.191 +    int extra_word;
   2.192 +    int started_below_highwater;
   2.193 +
   2.194 +    if(!tb_init_done)
   2.195 +        return;
   2.196 +
   2.197 +    /* Convert byte count into word count, rounding up */
   2.198 +    extra_word = (extra / sizeof(u32));
   2.199 +    if((extra % sizeof(u32)) != 0)
   2.200 +        extra_word++;
   2.201      
   2.202 -    BUG_ON(!tb_init_done);
   2.203 +#if !NDEBUG
   2.204 +    ASSERT(extra_word<=TRACE_EXTRA_MAX);
   2.205 +#else
   2.206 +    /* Not worth crashing a production system over */
   2.207 +    if(extra_word > TRACE_EXTRA_MAX)
   2.208 +        extra_word = TRACE_EXTRA_MAX;
   2.209 +#endif
   2.210 +
   2.211 +    /* Round size up to nearest word */
   2.212 +    extra = extra_word * sizeof(u32);
   2.213  
   2.214      if ( (tb_event_mask & event) == 0 )
   2.215          return;
   2.216 @@ -275,75 +410,127 @@ void trace(u32 event, unsigned long d1, 
   2.217  
   2.218      local_irq_save(flags);
   2.219  
   2.220 -    /* Check if space for two records (we write two if there are lost recs). */
   2.221 -    if ( (buf->prod - buf->cons) >= (nr_recs - 1) )
   2.222 +    started_below_highwater = ( (buf->prod - buf->cons) < t_buf_highwater );
   2.223 +
   2.224 +    /* Calculate the record size */
   2.225 +    rec_size = calc_rec_size(cycles, extra);
   2.226 + 
   2.227 +    /* How many bytes are available in the buffer? */
   2.228 +    bytes_to_tail = calc_bytes_avail(buf);
   2.229 +    
   2.230 +    /* How many bytes until the next wrap-around? */
   2.231 +    bytes_to_wrap = calc_bytes_to_wrap(buf);
   2.232 +    
   2.233 +    /* 
   2.234 +     * Calculate expected total size to commit this record by
   2.235 +     * doing a dry-run.
   2.236 +     */
   2.237 +    total_size = 0;
   2.238 +
   2.239 +    /* First, check to see if we need to include a lost_record.
   2.240 +     *
   2.241 +     * calc_bytes_to_wrap() involves integer division, which we'd like to
   2.242 +     * avoid if we can.  So do the math, check it in debug versions, and
   2.243 +     * do a final check always if we happen to write a record.
   2.244 +     */
   2.245 +    if(this_cpu(lost_records))
   2.246 +    {
   2.247 +        if(LOST_REC_SIZE > bytes_to_wrap)
   2.248 +        {
   2.249 +            total_size += bytes_to_wrap;
   2.250 +            bytes_to_wrap = data_size;
   2.251 +        } 
   2.252 +        else
   2.253 +        {
   2.254 +            bytes_to_wrap -= LOST_REC_SIZE;
   2.255 +            if(bytes_to_wrap == 0)
   2.256 +                bytes_to_wrap == data_size;
   2.257 +        }
   2.258 +        total_size += LOST_REC_SIZE;
   2.259 +    }
   2.260 +
   2.261 +    ASSERT(bytes_to_wrap == calc_bytes_to_wrap(buf));
   2.262 +
   2.263 +    if(rec_size > bytes_to_wrap)
   2.264 +    {
   2.265 +        total_size += bytes_to_wrap;
   2.266 +        bytes_to_wrap = data_size;
   2.267 +    } 
   2.268 +    else
   2.269 +    {
   2.270 +        bytes_to_wrap -= rec_size;
   2.271 +    }
   2.272 +
   2.273 +    total_size += rec_size;
   2.274 +
   2.275 +    /* Do we have enough space for everything? */
   2.276 +    if(total_size > bytes_to_tail) 
   2.277      {
   2.278          this_cpu(lost_records)++;
   2.279          local_irq_restore(flags);
   2.280          return;
   2.281      }
   2.282  
   2.283 -    if ( unlikely(this_cpu(lost_records) != 0) )
   2.284 +    /*
   2.285 +     * Now, actually write information 
   2.286 +     */
   2.287 +    bytes_to_wrap = calc_bytes_to_wrap(buf);
   2.288 +
   2.289 +    if(this_cpu(lost_records))
   2.290      {
   2.291 -        if ( !TB_COMPAT )
   2.292 +        if(LOST_REC_SIZE > bytes_to_wrap)
   2.293          {
   2.294 -            rec.nat = &this_cpu(t_recs).nat[buf->prod % nr_recs];
   2.295 -            memset(rec.nat, 0, sizeof(*rec.nat));
   2.296 -            rec.nat->cycles  = (u64)get_cycles();
   2.297 -            rec.nat->event   = TRC_LOST_RECORDS;
   2.298 -            rec.nat->data[0] = this_cpu(lost_records);
   2.299 -            this_cpu(lost_records) = 0;
   2.300 -        }
   2.301 +            insert_wrap_record(buf, LOST_REC_SIZE);
   2.302 +            bytes_to_wrap = data_size;
   2.303 +        } 
   2.304          else
   2.305          {
   2.306 -            rec.cmp = &this_cpu(t_recs).cmp[buf->prod % nr_recs];
   2.307 -            memset(rec.cmp, 0, sizeof(*rec.cmp));
   2.308 -            rec.cmp->cycles  = (u64)get_cycles();
   2.309 -            rec.cmp->event   = TRC_LOST_RECORDS;
   2.310 -            rec.cmp->data[0] = this_cpu(lost_records);
   2.311 -            this_cpu(lost_records) = 0;
   2.312 +            bytes_to_wrap -= LOST_REC_SIZE;
   2.313 +            /* LOST_REC might line up perfectly with the buffer wrap */
   2.314 +            if(bytes_to_wrap == 0)
   2.315 +                bytes_to_wrap = data_size;
   2.316          }
   2.317 -
   2.318 -        wmb();
   2.319 -        buf->prod++;
   2.320 +        insert_lost_records(buf);
   2.321      }
   2.322  
   2.323 -    if ( !TB_COMPAT )
   2.324 -    {
   2.325 -        rec.nat = &this_cpu(t_recs).nat[buf->prod % nr_recs];
   2.326 -        rec.nat->cycles  = (u64)get_cycles();
   2.327 -        rec.nat->event   = event;
   2.328 -        rec.nat->data[0] = d1;
   2.329 -        rec.nat->data[1] = d2;
   2.330 -        rec.nat->data[2] = d3;
   2.331 -        rec.nat->data[3] = d4;
   2.332 -        rec.nat->data[4] = d5;
   2.333 -    }
   2.334 -    else
   2.335 +    ASSERT(bytes_to_wrap == calc_bytes_to_wrap(buf));
   2.336 +
   2.337 +    if(rec_size > bytes_to_wrap)
   2.338      {
   2.339 -        rec.cmp = &this_cpu(t_recs).cmp[buf->prod % nr_recs];
   2.340 -        rec.cmp->cycles  = (u64)get_cycles();
   2.341 -        rec.cmp->event   = event;
   2.342 -        rec.cmp->data[0] = d1;
   2.343 -        rec.cmp->data[1] = d2;
   2.344 -        rec.cmp->data[2] = d3;
   2.345 -        rec.cmp->data[3] = d4;
   2.346 -        rec.cmp->data[4] = d5;
   2.347 -    }
   2.348 +        insert_wrap_record(buf, rec_size);
   2.349 +    } 
   2.350  
   2.351 -    wmb();
   2.352 -    buf->prod++;
   2.353 +    /* Write the original record */
   2.354 +    __insert_record(buf, event, extra, cycles, rec_size, extra_data);
   2.355  
   2.356      local_irq_restore(flags);
   2.357  
   2.358      /*
   2.359 -     * Notify trace buffer consumer that we've reached the high water mark.
   2.360 +     * Notify trace buffer consumer that we've crossed the high water mark.
   2.361       *
   2.362       */
   2.363 -    if ( (buf->prod - buf->cons) == t_buf_highwater )
   2.364 +    if ( started_below_highwater
   2.365 +         && ( (buf->prod - buf->cons) > t_buf_highwater ) )
   2.366          raise_softirq(TRACE_SOFTIRQ);
   2.367  }
   2.368  
   2.369 +
   2.370 +void __trace_fixed(u32 event, unsigned long d1, unsigned long d2,
   2.371 +           unsigned long d3, unsigned long d4, unsigned long d5)
   2.372 +{
   2.373 +    u32 extra_data[5];
   2.374 +    
   2.375 +    /* In a 64-bit hypervisor, this will truncate to 32 bits. */
   2.376 +    extra_data[0]=d1;
   2.377 +    extra_data[1]=d2;
   2.378 +    extra_data[2]=d3;
   2.379 +    extra_data[3]=d4;
   2.380 +    extra_data[4]=d5;
   2.381 +
   2.382 +    __trace_var(event, 1/* include cycles */, sizeof(*extra_data)*5,
   2.383 +              (unsigned char *)extra_data);
   2.384 +}
   2.385 +
   2.386  /*
   2.387   * Local variables:
   2.388   * mode: C
     3.1 --- a/xen/include/public/trace.h	Fri Sep 21 16:52:17 2007 +0100
     3.2 +++ b/xen/include/public/trace.h	Fri Sep 21 15:26:07 2007 +0100
     3.3 @@ -26,14 +26,21 @@
     3.4  #ifndef __XEN_PUBLIC_TRACE_H__
     3.5  #define __XEN_PUBLIC_TRACE_H__
     3.6  
     3.7 +#define TRACE_EXTRA_MAX    7
     3.8 +#define TRACE_EXTRA_SHIFT 28
     3.9 +
    3.10  /* Trace classes */
    3.11  #define TRC_CLS_SHIFT 16
    3.12 -#define TRC_GEN     0x0001f000    /* General trace            */
    3.13 -#define TRC_SCHED   0x0002f000    /* Xen Scheduler trace      */
    3.14 -#define TRC_DOM0OP  0x0004f000    /* Xen DOM0 operation trace */
    3.15 -#define TRC_HVM     0x0008f000    /* Xen HVM trace            */
    3.16 -#define TRC_MEM     0x0010f000    /* Xen memory trace         */
    3.17 -#define TRC_ALL     0xfffff000
    3.18 +#define TRC_GEN      0x0001f000    /* General trace            */
    3.19 +#define TRC_SCHED    0x0002f000    /* Xen Scheduler trace      */
    3.20 +#define TRC_DOM0OP   0x0004f000    /* Xen DOM0 operation trace */
    3.21 +#define TRC_HVM      0x0008f000    /* Xen HVM trace            */
    3.22 +#define TRC_MEM      0x0010f000    /* Xen memory trace         */
    3.23 +#define TRC_ALL      0x0ffff000
    3.24 +#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
    3.25 +#define TRC_HD_CYCLE_FLAG (1UL<<31)
    3.26 +#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) )
    3.27 +#define TRC_HD_EXTRA(x)    (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX)
    3.28  
    3.29  /* Trace subclasses */
    3.30  #define TRC_SUBCLS_SHIFT 12
    3.31 @@ -44,6 +51,8 @@
    3.32  
    3.33  /* Trace events per class */
    3.34  #define TRC_LOST_RECORDS        (TRC_GEN + 1)
    3.35 +#define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)
    3.36 +#define TRC_TRACE_CPU_CHANGE    (TRC_GEN + 3)
    3.37  
    3.38  #define TRC_SCHED_DOM_ADD       (TRC_SCHED +  1)
    3.39  #define TRC_SCHED_DOM_REM       (TRC_SCHED +  2)
    3.40 @@ -92,9 +101,19 @@
    3.41  
    3.42  /* This structure represents a single trace buffer record. */
    3.43  struct t_rec {
    3.44 -    uint64_t cycles;          /* cycle counter timestamp */
    3.45 -    uint32_t event;           /* event ID                */
    3.46 -    unsigned long data[5];    /* event data items        */
    3.47 +    uint32_t header;           /* 31   : Cycles included?
    3.48 +                                  30-28: # of extra words
    3.49 +                                  27- 0: event ID                */
    3.50 +    union {
    3.51 +        struct {
    3.52 +            uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */
    3.53 +            unsigned char data[28];        /* event data items        */
    3.54 +        } cycles;
    3.55 +        struct {
    3.56 +            unsigned char data[28];        /* event data items        */
    3.57 +        } nocycles;
    3.58 +    }u;
    3.59 +        
    3.60  };
    3.61  
    3.62  /*
    3.63 @@ -102,9 +121,9 @@ struct t_rec {
    3.64   * field, indexes into an array of struct t_rec's.
    3.65   */
    3.66  struct t_buf {
    3.67 -    uint32_t cons;      /* Next item to be consumed by control tools. */
    3.68 -    uint32_t prod;      /* Next item to be produced by Xen.           */
    3.69 -    /* 'nr_recs' records follow immediately after the meta-data header.    */
    3.70 +    uint32_t cons;   /* Offset of next item to be consumed by control tools. */
    3.71 +    uint32_t prod;   /* Offset of next item to be produced by Xen.           */
    3.72 +    /*  Records follow immediately after the meta-data header.    */
    3.73  };
    3.74  
    3.75  #endif /* __XEN_PUBLIC_TRACE_H__ */
     4.1 --- a/xen/include/xen/trace.h	Fri Sep 21 16:52:17 2007 +0100
     4.2 +++ b/xen/include/xen/trace.h	Fri Sep 21 15:26:07 2007 +0100
     4.3 @@ -33,27 +33,74 @@ void init_trace_bufs(void);
     4.4  /* used to retrieve the physical address of the trace buffers */
     4.5  int tb_control(struct xen_sysctl_tbuf_op *tbc);
     4.6  
     4.7 -void trace(u32 event, unsigned long d1, unsigned long d2,
     4.8 +void __trace_fixed(u32 event, unsigned long d1, unsigned long d2,
     4.9             unsigned long d3, unsigned long d4, unsigned long d5);
    4.10 +void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
    4.11 +
    4.12 +static inline void trace_fixed(u32 event, unsigned long d1,
    4.13 +                               unsigned long d2, unsigned long d3,
    4.14 +                               unsigned long d4, unsigned long d5)
    4.15 +{
    4.16 +    if( unlikely(tb_init_done) )
    4.17 +        __trace_fixed(event, d1, d2, d3, d4, d5);
    4.18 +}
    4.19 +
    4.20 +static inline void trace_var(u32 event, int cycles, int extra,
    4.21 +                               unsigned char *extra_data)
    4.22 +{
    4.23 +    if( unlikely(tb_init_done) )
    4.24 +        __trace_var(event, cycles, extra, extra_data);
    4.25 +}
    4.26  
    4.27 -/* Avoids troubling the caller with casting their arguments to a trace macro */
    4.28 -#define trace_do_casts(e,d1,d2,d3,d4,d5) \
    4.29 -    do {                                 \
    4.30 -        if ( unlikely(tb_init_done) )    \
    4.31 -            trace(e,                     \
    4.32 -                  (unsigned long)(d1),   \
    4.33 -                  (unsigned long)(d2),   \
    4.34 -                  (unsigned long)(d3),   \
    4.35 -                  (unsigned long)(d4),   \
    4.36 -                  (unsigned long)(d5));  \
    4.37 +/* Convenience macros for calling the trace function. */
    4.38 +#define TRACE_0D(_e)                            \
    4.39 +    do {                                        \
    4.40 +        trace_var(_e, 1, 0, NULL);              \
    4.41 +    } while ( 0 )
    4.42 +  
    4.43 +#define TRACE_1D(_e,_d)                                         \
    4.44 +    do {                                                        \
    4.45 +        u32 _d1;                                                \
    4.46 +        _d1 = _d;                                               \
    4.47 +        trace_var(_e, 1, sizeof(_d1), (unsigned char *)&_d1);	\
    4.48 +    } while ( 0 )
    4.49 + 
    4.50 +#define TRACE_2D(_e,d1,d2)                                      \
    4.51 +    do {							\
    4.52 +        u32 _d[2];						\
    4.53 +        _d[0]=d1;						\
    4.54 +        _d[1]=d2;						\
    4.55 +        trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d);	\
    4.56 +    } while ( 0 )
    4.57 + 
    4.58 +#define TRACE_3D(_e,d1,d2,d3)                                   \
    4.59 +    do {                                                        \
    4.60 +        u32 _d[3];                                              \
    4.61 +        _d[0]=d1;                                               \
    4.62 +        _d[1]=d2;                                               \
    4.63 +        _d[2]=d3;                                               \
    4.64 +        trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d);	\
    4.65 +    } while ( 0 )
    4.66 + 
    4.67 +#define TRACE_4D(_e,d1,d2,d3,d4)                                \
    4.68 +    do {                                                        \
    4.69 +        u32 _d[4];                                              \
    4.70 +        _d[0]=d1;                                               \
    4.71 +        _d[1]=d2;                                               \
    4.72 +        _d[2]=d3;                                               \
    4.73 +        _d[3]=d4;                                               \
    4.74 +        trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d);	\
    4.75 +    } while ( 0 )
    4.76 + 
    4.77 +#define TRACE_5D(_e,d1,d2,d3,d4,d5)                             \
    4.78 +    do {							\
    4.79 +        u32 _d[5];						\
    4.80 +        _d[0]=d1;						\
    4.81 +        _d[1]=d2;						\
    4.82 +        _d[2]=d3;						\
    4.83 +        _d[3]=d4;						\
    4.84 +        _d[4]=d5;						\
    4.85 +        trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d);	\
    4.86      } while ( 0 )
    4.87  
    4.88 -/* Convenience macros for calling the trace function. */
    4.89 -#define TRACE_0D(event)                trace_do_casts(event,0, 0, 0, 0, 0 )
    4.90 -#define TRACE_1D(event,d)              trace_do_casts(event,d, 0, 0, 0, 0 )
    4.91 -#define TRACE_2D(event,d1,d2)          trace_do_casts(event,d1,d2,0, 0, 0 )
    4.92 -#define TRACE_3D(event,d1,d2,d3)       trace_do_casts(event,d1,d2,d3,0, 0 )
    4.93 -#define TRACE_4D(event,d1,d2,d3,d4)    trace_do_casts(event,d1,d2,d3,d4,0 )
    4.94 -#define TRACE_5D(event,d1,d2,d3,d4,d5) trace_do_casts(event,d1,d2,d3,d4,d5)
    4.95 -
    4.96  #endif /* __XEN_TRACE_H__ */