ia64/xen-unstable

changeset 17014:98e9d5d4b309

xentrace: Allow xentrace to handle >4G of trace data.
It was previously assert'ing when it hit 4G.

Also, because the trace buffer is not a power of 2 in size,
using modulo arithmetic to address the buffer does not work
when the index wraps around 2^32.

This patch fixes both issues, and as a side effect, removes all
integer division from the hypervisor side of the trace mechanism.

Signed-off-by: Michael A Fetterman <Michael.Fetterman@cl.cam.ac.uk>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Feb 11 09:46:21 2008 +0000 (2008-02-11)
parents 7d03c0b07504
children 7a415bce11c9
files tools/xentrace/xentrace.c xen/common/trace.c xen/include/public/trace.h
line diff
     1.1 --- a/tools/xentrace/xentrace.c	Mon Feb 11 09:45:36 2008 +0000
     1.2 +++ b/tools/xentrace/xentrace.c	Mon Feb 11 09:46:21 2008 +0000
     1.3 @@ -362,9 +362,18 @@ int monitor_tbufs(int outfd)
     1.4              if ( cons == prod )
     1.5                  continue;
     1.6             
     1.7 -            assert(prod > cons);
     1.8 +            assert(cons < 2*data_size);
     1.9 +            assert(prod < 2*data_size);
    1.10  
    1.11 -            window_size = prod - cons;
    1.12 +            // NB: if (prod<cons), then (prod-cons)%data_size will not yield
    1.13 +            // the correct answer because data_size is not a power of 2.
    1.14 +            if ( prod < cons )
    1.15 +                window_size = (prod + 2*data_size) - cons;
    1.16 +            else
    1.17 +                window_size = prod - cons;
    1.18 +            assert(window_size > 0);
    1.19 +            assert(window_size <= data_size);
    1.20 +
    1.21              start_offset = cons % data_size;
    1.22              end_offset = prod % data_size;
    1.23  
     2.1 --- a/xen/common/trace.c	Mon Feb 11 09:45:36 2008 +0000
     2.2 +++ b/xen/common/trace.c	Mon Feb 11 09:46:21 2008 +0000
     2.3 @@ -239,14 +239,46 @@ static inline int calc_rec_size(int cycl
     2.4      return rec_size;
     2.5  }
     2.6  
     2.7 +static inline int calc_unconsumed_bytes(struct t_buf *buf)
     2.8 +{
     2.9 +    int x = buf->prod - buf->cons;
    2.10 +    if ( x < 0 )
    2.11 +        x += 2*data_size;
    2.12 +
    2.13 +    ASSERT(x >= 0);
    2.14 +    ASSERT(x <= data_size);
    2.15 +
    2.16 +    return x;
    2.17 +}
    2.18 +
    2.19  static inline int calc_bytes_to_wrap(struct t_buf *buf)
    2.20  {
    2.21 -    return data_size - (buf->prod % data_size);
    2.22 +    int x = data_size - buf->prod;
    2.23 +    if ( x <= 0 )
    2.24 +        x += data_size;
    2.25 +
    2.26 +    ASSERT(x > 0);
    2.27 +    ASSERT(x <= data_size);
    2.28 +
    2.29 +    return x;
    2.30  }
    2.31  
    2.32 -static inline unsigned calc_bytes_avail(struct t_buf *buf)
    2.33 +static inline int calc_bytes_avail(struct t_buf *buf)
    2.34  {
    2.35 -    return data_size - (buf->prod - buf->cons);
    2.36 +    return data_size - calc_unconsumed_bytes(buf);
    2.37 +}
    2.38 +
    2.39 +static inline struct t_rec *
    2.40 +next_record(struct t_buf *buf)
    2.41 +{
    2.42 +    int x = buf->prod;
    2.43 +    if ( x >= data_size )
    2.44 +        x -= data_size;
    2.45 +
    2.46 +    ASSERT(x >= 0);
    2.47 +    ASSERT(x < data_size);
    2.48 +
    2.49 +    return (struct t_rec *)&this_cpu(t_data)[x];
    2.50  }
    2.51  
    2.52  static inline int __insert_record(struct t_buf *buf,
    2.53 @@ -260,24 +292,25 @@ static inline int __insert_record(struct
    2.54      unsigned char *dst;
    2.55      unsigned long extra_word = extra/sizeof(u32);
    2.56      int local_rec_size = calc_rec_size(cycles, extra);
    2.57 +    uint32_t next;
    2.58  
    2.59      BUG_ON(local_rec_size != rec_size);
    2.60 +    BUG_ON(extra & 3);
    2.61  
    2.62      /* Double-check once more that we have enough space.
    2.63       * Don't bugcheck here, in case the userland tool is doing
    2.64       * something stupid. */
    2.65      if ( calc_bytes_avail(buf) < rec_size )
    2.66      {
    2.67 -        printk("%s: %u bytes left (%u - (%u - %u)) recsize %u.\n",
    2.68 +        printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
    2.69                 __func__,
    2.70 -               data_size - (buf->prod - buf->cons),
    2.71 -               data_size,
    2.72 -               buf->prod, buf->cons, rec_size);
    2.73 +               calc_bytes_avail(buf),
    2.74 +               data_size, buf->prod, buf->cons, data_size, rec_size);
    2.75          return 0;
    2.76      }
    2.77      rmb();
    2.78  
    2.79 -    rec = (struct t_rec *)&this_cpu(t_data)[buf->prod % data_size];
    2.80 +    rec = next_record(buf);
    2.81      rec->event = event;
    2.82      rec->extra_u32 = extra_word;
    2.83      dst = (unsigned char *)rec->u.nocycles.extra_u32;
    2.84 @@ -293,7 +326,13 @@ static inline int __insert_record(struct
    2.85          memcpy(dst, extra_data, extra);
    2.86  
    2.87      wmb();
    2.88 -    buf->prod += rec_size;
    2.89 +
    2.90 +    next = buf->prod + rec_size;
    2.91 +    if ( next >= 2*data_size )
    2.92 +        next -= 2*data_size;
    2.93 +    ASSERT(next >= 0);
    2.94 +    ASSERT(next < 2*data_size);
    2.95 +    buf->prod = next;
    2.96  
    2.97      return rec_size;
    2.98  }
    2.99 @@ -395,7 +434,7 @@ void __trace_var(u32 event, int cycles, 
   2.100  
   2.101      local_irq_save(flags);
   2.102  
   2.103 -    started_below_highwater = ((buf->prod - buf->cons) < t_buf_highwater);
   2.104 +    started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
   2.105  
   2.106      /* Calculate the record size */
   2.107      rec_size = calc_rec_size(cycles, extra);
   2.108 @@ -413,10 +452,6 @@ void __trace_var(u32 event, int cycles, 
   2.109      total_size = 0;
   2.110  
   2.111      /* First, check to see if we need to include a lost_record.
   2.112 -     *
   2.113 -     * calc_bytes_to_wrap() involves integer division, which we'd like to
   2.114 -     * avoid if we can.  So do the math, check it in debug versions, and
   2.115 -     * do a final check always if we happen to write a record.
   2.116       */
   2.117      if ( this_cpu(lost_records) )
   2.118      {
   2.119 @@ -477,7 +512,7 @@ void __trace_var(u32 event, int cycles, 
   2.120  
   2.121      /* Notify trace buffer consumer that we've crossed the high water mark. */
   2.122      if ( started_below_highwater &&
   2.123 -         ((buf->prod - buf->cons) >= t_buf_highwater) )
   2.124 +         (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
   2.125          raise_softirq(TRACE_SOFTIRQ);
   2.126  }
   2.127  
     3.1 --- a/xen/include/public/trace.h	Mon Feb 11 09:45:36 2008 +0000
     3.2 +++ b/xen/include/public/trace.h	Mon Feb 11 09:46:21 2008 +0000
     3.3 @@ -141,6 +141,14 @@ struct t_rec {
     3.4   * field, indexes into an array of struct t_rec's.
     3.5   */
     3.6  struct t_buf {
     3.7 +    /* Assume the data buffer size is X.  X is generally not a power of 2.
     3.8 +     * CONS and PROD are incremented modulo (2*X):
     3.9 +     *     0 <= cons < 2*X
    3.10 +     *     0 <= prod < 2*X
    3.11 +     * This is done because addition modulo X breaks at 2^32 when X is not a
    3.12 +     * power of 2:
    3.13 +     *     (((2^32 - 1) % X) + 1) % X != (2^32) % X
    3.14 +     */
    3.15      uint32_t cons;   /* Offset of next item to be consumed by control tools. */
    3.16      uint32_t prod;   /* Offset of next item to be produced by Xen.           */
    3.17      /*  Records follow immediately after the meta-data header.    */