ia64/xen-unstable

changeset 1019:83b414c7559c

bitkeeper revision 1.659 (3fe9c2e8WATFQ64aNLeVQC9FQJ17Ig)

Many files:
I/O ring indexes are now not stored modulo ring size, but are free running mod word size.
author kaf24@scramble.cl.cam.ac.uk
date Wed Dec 24 16:46:32 2003 +0000 (2003-12-24)
parents 80553bc5d3e8
children d8941770d7a7
files xen/common/keyhandler.c xen/drivers/block/xen_block.c xen/include/hypervisor-ifs/block.h xen/include/hypervisor-ifs/network.h xen/include/xeno/sched.h xen/include/xeno/vif.h xen/net/dev.c xenolinux-2.4.23-sparse/arch/xeno/drivers/block/xl_block.c xenolinux-2.4.23-sparse/arch/xeno/drivers/network/network.c
line diff
     1.1 --- a/xen/common/keyhandler.c	Sun Dec 21 14:20:06 2003 +0000
     1.2 +++ b/xen/common/keyhandler.c	Wed Dec 24 16:46:32 2003 +0000
     1.3 @@ -109,17 +109,6 @@ void do_task_queues(u_char key, void *de
     1.4          {
     1.5  	    printk("Guest: events = %08lx, events_mask = %08lx\n", 
     1.6  		   s->events, s->events_mask); 
     1.7 -	  
     1.8 -	    if ( (v = find_vif_by_id((p->domain)<<VIF_DOMAIN_SHIFT)) != NULL )
     1.9 -            {
    1.10 -                printk("rx_prod=%d ,rx_cons=%d, tx_prod=%d, tx_cons=%d\n",
    1.11 -                       v->rx_prod, v->rx_cons, v->tx_prod, v->tx_cons );
    1.12 -                printk("rx_req_cons=%d, rx_resp_prod=%d, "
    1.13 -                       "tx_req_cons=%d, tx_resp_prod=%d\n", 
    1.14 -                       v->rx_req_cons, v->rx_resp_prod, 
    1.15 -                       v->tx_req_cons, v->tx_resp_prod);
    1.16 -                put_vif(v);
    1.17 -	    }
    1.18  	    printk("Notifying guest...\n"); 
    1.19  	    set_bit(_EVENT_DEBUG, &s->events); 
    1.20  	}
     2.1 --- a/xen/drivers/block/xen_block.c	Sun Dec 21 14:20:06 2003 +0000
     2.2 +++ b/xen/drivers/block/xen_block.c	Wed Dec 24 16:46:32 2003 +0000
     2.3 @@ -1,4 +1,4 @@
     2.4 -/*
     2.5 +/******************************************************************************
     2.6   * xen_block.c
     2.7   *
     2.8   * process incoming block io requests from guestos's.
     2.9 @@ -21,36 +21,38 @@
    2.10  #include <xeno/slab.h>
    2.11  
    2.12  /*
    2.13 - * These are rather arbitrary. They are fairly large because adjacent
    2.14 - * requests pulled from a communication ring are quite likely to end
    2.15 - * up being part of the same scatter/gather request at the disc.
    2.16 + * These are rather arbitrary. They are fairly large because adjacent requests
    2.17 + * pulled from a communication ring are quite likely to end up being part of
    2.18 + * the same scatter/gather request at the disc.
    2.19   * 
    2.20   * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
    2.21   * This will increase the chances of being able to write whole tracks.
    2.22 - * '64' should be enough to keep us competitive with Linux.
    2.23 + * 64 should be enough to keep us competitive with Linux.
    2.24   */
    2.25  #define MAX_PENDING_REQS 64
    2.26  #define BATCH_PER_DOMAIN 16
    2.27  
    2.28  /*
    2.29 - * Each outstanding request which we've passed to the lower device layers
    2.30 - * has a 'pending_req' allocated to it. Each buffer_head that completes
    2.31 - * decrements the pendcnt towards zero. When it hits zero, the specified
    2.32 - * domain has a response queued for it, with the saved 'id' passed back.
    2.33 + * Each outstanding request that we've passed to the lower device layers has a 
    2.34 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 
    2.35 + * the pendcnt towards zero. When it hits zero, the specified domain has a 
    2.36 + * response queued for it, with the saved 'id' passed back.
    2.37   * 
    2.38 - * We can't allocate pending_req's in order, since they may complete out
    2.39 - * of order. We therefore maintain an allocation ring. This ring also 
    2.40 - * indicates when enough work has been passed down -- at that point the
    2.41 - * allocation ring will be empty.
    2.42 + * We can't allocate pending_req's in order, since they may complete out of 
    2.43 + * order. We therefore maintain an allocation ring. This ring also indicates 
    2.44 + * when enough work has been passed down -- at that point the allocation ring 
    2.45 + * will be empty.
    2.46   */
    2.47  static pending_req_t pending_reqs[MAX_PENDING_REQS];
    2.48  static unsigned char pending_ring[MAX_PENDING_REQS];
    2.49 -static unsigned int pending_prod, pending_cons;
    2.50  static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
    2.51 -#define PENDREQ_IDX_INC(_i) ((_i) = ((_i)+1) & (MAX_PENDING_REQS-1))
    2.52 +/* NB. We use a different index type to differentiate from shared blk rings. */
    2.53 +typedef unsigned int PEND_RING_IDX;
    2.54 +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
    2.55 +static PEND_RING_IDX pending_prod, pending_cons;
    2.56 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
    2.57  
    2.58  static kmem_cache_t *buffer_head_cachep;
    2.59 -static atomic_t nr_pending;
    2.60  
    2.61  static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
    2.62  
    2.63 @@ -64,8 +66,8 @@ static void unlock_buffer(unsigned long 
    2.64  
    2.65  static void io_schedule(unsigned long unused);
    2.66  static int do_block_io_op_domain(struct task_struct *p, int max_to_do);
    2.67 -static void dispatch_rw_block_io(struct task_struct *p, int index);
    2.68 -static void dispatch_debug_block_io(struct task_struct *p, int index);
    2.69 +static void dispatch_rw_block_io(struct task_struct *p, 
    2.70 +                                 blk_ring_req_entry_t *req);
    2.71  static void make_response(struct task_struct *p, unsigned long id, 
    2.72                            unsigned short op, unsigned long st);
    2.73  
    2.74 @@ -122,7 +124,7 @@ static void io_schedule(unsigned long un
    2.75      struct list_head *ent;
    2.76  
    2.77      /* Queue up a batch of requests. */
    2.78 -    while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
    2.79 +    while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
    2.80              !list_empty(&io_schedule_list) )
    2.81      {
    2.82          ent = io_schedule_list.next;
    2.83 @@ -147,11 +149,9 @@ static void maybe_trigger_io_schedule(vo
    2.84       */
    2.85      smp_mb();
    2.86  
    2.87 -    if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) &&
    2.88 +    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
    2.89           !list_empty(&io_schedule_list) )
    2.90 -    {
    2.91          tasklet_schedule(&io_schedule_tasklet);
    2.92 -    }
    2.93  }
    2.94  
    2.95  
    2.96 @@ -185,10 +185,10 @@ static void end_block_io_op_softirq(stru
    2.97                            pending_req->operation, pending_req->status);
    2.98              put_task_struct(pending_req->domain);
    2.99              spin_lock(&pend_prod_lock);
   2.100 -            pending_ring[pending_prod] = pending_req - pending_reqs;
   2.101 -            PENDREQ_IDX_INC(pending_prod);
   2.102 +            pending_ring[MASK_PEND_IDX(pending_prod)] = 
   2.103 +                pending_req - pending_reqs;
   2.104 +            pending_prod++;
   2.105              spin_unlock(&pend_prod_lock);
   2.106 -            atomic_dec(&nr_pending);
   2.107              maybe_trigger_io_schedule();
   2.108          }
   2.109          
   2.110 @@ -227,7 +227,7 @@ long do_block_io_op(block_io_op_t *u_blo
   2.111      block_io_op_t op; 
   2.112      struct task_struct *p = current;
   2.113  
   2.114 -    if ( copy_from_user(&op, u_block_io_op, sizeof(op)) )
   2.115 +    if ( unlikely(copy_from_user(&op, u_block_io_op, sizeof(op)) != 0) )
   2.116          return -EFAULT;
   2.117  
   2.118      switch ( op.cmd )
   2.119 @@ -285,18 +285,16 @@ long do_block_io_op(block_io_op_t *u_blo
   2.120  
   2.121      case BLOCK_IO_OP_VBD_PROBE: 
   2.122  	/* query VBD information for self or others (or all) */
   2.123 -	ret = vbd_probe(&op.u.probe_params); 
   2.124 -	if(ret == 0)
   2.125 +	if ( (ret = vbd_probe(&op.u.probe_params)) == 0 )
   2.126  	    copy_to_user(u_block_io_op, &op, sizeof(op)); 
   2.127  	break; 
   2.128  
   2.129      case BLOCK_IO_OP_VBD_INFO: 
   2.130  	/* query information about a particular VBD */
   2.131 -	ret = vbd_info(&op.u.info_params); 
   2.132 -	if(ret == 0)
   2.133 +        if ( (ret = vbd_info(&op.u.info_params)) == 0 ) 
   2.134  	    copy_to_user(u_block_io_op, &op, sizeof(op)); 
   2.135  	break; 
   2.136 -
   2.137 +        
   2.138      default: 
   2.139  	ret = -ENOSYS; 
   2.140      } 
   2.141 @@ -369,33 +367,27 @@ static void unlock_buffer(unsigned long 
   2.142  static int do_block_io_op_domain(struct task_struct *p, int max_to_do)
   2.143  {
   2.144      blk_ring_t *blk_ring = p->blk_ring_base;
   2.145 -    int i, more_to_do = 0;
   2.146 +    blk_ring_req_entry_t *req;
   2.147 +    BLK_RING_IDX i;
   2.148 +    int more_to_do = 0;
   2.149  
   2.150 -    /*
   2.151 -     * Take items off the comms ring, taking care not to catch up
   2.152 -     * with the response-producer index.
   2.153 -     */
   2.154 +    /* Take items off the comms ring, taking care not to overflow. */
   2.155      for ( i = p->blk_req_cons; 
   2.156 -	  (i != blk_ring->req_prod) &&
   2.157 -              (((p->blk_resp_prod-i) & (BLK_RING_SIZE-1)) != 1); 
   2.158 -	  i = BLK_RING_INC(i) ) 
   2.159 +	  (i != blk_ring->req_prod) && ((i-p->blk_resp_prod) != BLK_RING_SIZE);
   2.160 +          i++ )
   2.161      {
   2.162 -        if ( (max_to_do-- == 0) || 
   2.163 -             (atomic_read(&nr_pending) == MAX_PENDING_REQS) )
   2.164 +        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
   2.165          {
   2.166              more_to_do = 1;
   2.167              break;
   2.168          }
   2.169          
   2.170 -	switch ( blk_ring->ring[i].req.operation )
   2.171 +        req = &blk_ring->ring[MASK_BLK_IDX(i)].req;
   2.172 +	switch ( req->operation )
   2.173          {
   2.174  	case XEN_BLOCK_READ:
   2.175  	case XEN_BLOCK_WRITE:
   2.176 -	    dispatch_rw_block_io(p, i);
   2.177 -	    break;
   2.178 -
   2.179 -	case XEN_BLOCK_DEBUG:
   2.180 -	    dispatch_debug_block_io(p, i);
   2.181 +	    dispatch_rw_block_io(p, req);
   2.182  	    break;
   2.183  
   2.184  	default:
   2.185 @@ -411,16 +403,10 @@ static int do_block_io_op_domain(struct 
   2.186      return more_to_do;
   2.187  }
   2.188  
   2.189 -static void dispatch_debug_block_io(struct task_struct *p, int index)
   2.190 -{
   2.191 -    DPRINTK("dispatch_debug_block_io: unimplemented\n"); 
   2.192 -}
   2.193 -
   2.194 -static void dispatch_rw_block_io(struct task_struct *p, int index)
   2.195 +static void dispatch_rw_block_io(struct task_struct *p, 
   2.196 +                                 blk_ring_req_entry_t *req)
   2.197  {
   2.198      extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
   2.199 -    blk_ring_t *blk_ring = p->blk_ring_base;
   2.200 -    blk_ring_req_entry_t *req = &blk_ring->ring[index].req;
   2.201      struct buffer_head *bh;
   2.202      int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
   2.203      unsigned short nr_sects;
   2.204 @@ -479,9 +465,9 @@ static void dispatch_rw_block_io(struct 
   2.205              }
   2.206  
   2.207  	    /*
   2.208 -             * XXX Clear any 'partition' info in device. This works because IDE
   2.209 -             * ignores the partition bits anyway. Only SCSI needs this hack,
   2.210 -             * and it has four bits to clear.
   2.211 +             * Clear any 'partition' bits in the device id. This works because
   2.212 +             * IDE ignores the partition bits anyway. Only SCSI needs this
   2.213 +             * hack, and we know that always requires the four LSBs cleared.
   2.214               */
   2.215  	    phys_seg[nr_psegs].dev = req->device & 0xFFF0;
   2.216              new_segs = 1;
   2.217 @@ -506,9 +492,7 @@ static void dispatch_rw_block_io(struct 
   2.218  	}
   2.219      }
   2.220  
   2.221 -    atomic_inc(&nr_pending);
   2.222 -    pending_req = pending_reqs + pending_ring[pending_cons];
   2.223 -    PENDREQ_IDX_INC(pending_cons);
   2.224 +    pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
   2.225      pending_req->domain    = p;
   2.226      pending_req->id        = req->id;
   2.227      pending_req->operation = operation;
   2.228 @@ -563,17 +547,16 @@ static void make_response(struct task_st
   2.229  			  unsigned short op, unsigned long st)
   2.230  {
   2.231      unsigned long cpu_mask;
   2.232 -    int position;
   2.233 -    blk_ring_t *blk_ring;
   2.234 +    blk_ring_resp_entry_t *resp;
   2.235  
   2.236      /* Place on the response ring for the relevant domain. */ 
   2.237      spin_lock(&p->blk_ring_lock);
   2.238 -    blk_ring = p->blk_ring_base;
   2.239 -    position = p->blk_resp_prod;
   2.240 -    blk_ring->ring[position].resp.id        = id;
   2.241 -    blk_ring->ring[position].resp.operation = op;
   2.242 -    blk_ring->ring[position].resp.status    = st;
   2.243 -    p->blk_resp_prod = blk_ring->resp_prod = BLK_RING_INC(position);
   2.244 +    resp = &p->blk_ring_base->ring[MASK_BLK_IDX(p->blk_resp_prod)].resp;
   2.245 +    resp->id        = id;
   2.246 +    resp->operation = op;
   2.247 +    resp->status    = st;
   2.248 +    wmb();
   2.249 +    p->blk_ring_base->resp_prod = ++p->blk_resp_prod;
   2.250      spin_unlock(&p->blk_ring_lock);
   2.251      
   2.252      /* Kick the relevant domain. */
   2.253 @@ -585,11 +568,12 @@ static void dump_blockq(u_char key, void
   2.254  {
   2.255      unsigned long flags;
   2.256      struct task_struct *p;
   2.257 -    blk_ring_t *blk_ring ;
   2.258 +    blk_ring_t *blk_ring;
   2.259      int i;
   2.260  
   2.261 -    printk("Dumping block queue stats: nr_pending = %d (prod=%d,cons=%d)\n",
   2.262 -           atomic_read(&nr_pending), pending_prod, pending_cons);
   2.263 +    printk("Dumping block queue stats: nr_pending = %d"
   2.264 +           " (prod=0x%08x,cons=0x%08x)\n",
   2.265 +           NR_PENDING_REQS, pending_prod, pending_cons);
   2.266  
   2.267      read_lock_irqsave(&tasklist_lock, flags);
   2.268      p = &idle0_task;
   2.269 @@ -599,7 +583,8 @@ static void dump_blockq(u_char key, void
   2.270              printk("Domain: %d\n", p->domain);
   2.271              blk_ring = p->blk_ring_base;
   2.272              
   2.273 -            printk("  req_prod:%d, req_cons:%d resp_prod:%d/%d on_list=%d\n",
   2.274 +            printk("  req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
   2.275 +                   "0x%08x on_list=%d\n",
   2.276                     blk_ring->req_prod, p->blk_req_cons,
   2.277                     blk_ring->resp_prod, p->blk_resp_prod,
   2.278                     __on_blkdev_list(p));
   2.279 @@ -621,7 +606,9 @@ static void dump_blockq(u_char key, void
   2.280  /* Start-of-day initialisation for a new domain. */
   2.281  void init_blkdev_info(struct task_struct *p)
   2.282  {
   2.283 -    if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG();
   2.284 +    if ( unlikely(sizeof(*p->blk_ring_base) > PAGE_SIZE) )
   2.285 +        BUG();
   2.286 +
   2.287      p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL);
   2.288      clear_page(p->blk_ring_base);
   2.289      SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p);
   2.290 @@ -655,8 +642,8 @@ void initialize_block_io ()
   2.291  {
   2.292      int i;
   2.293  
   2.294 -    atomic_set(&nr_pending, 0);
   2.295 -    pending_prod = pending_cons = 0;
   2.296 +    pending_cons = 0;
   2.297 +    pending_prod = MAX_PENDING_REQS;
   2.298      memset(pending_reqs, 0, sizeof(pending_reqs));
   2.299      for ( i = 0; i < MAX_PENDING_REQS; i++ )
   2.300          pending_ring[i] = i;
     3.1 --- a/xen/include/hypervisor-ifs/block.h	Sun Dec 21 14:20:06 2003 +0000
     3.2 +++ b/xen/include/hypervisor-ifs/block.h	Wed Dec 24 16:46:32 2003 +0000
     3.3 @@ -24,7 +24,6 @@
     3.4  
     3.5  /* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */
     3.6  #define BLK_RING_SIZE        64
     3.7 -#define BLK_RING_INC(_i)     (((_i)+1) & (BLK_RING_SIZE-1))
     3.8  
     3.9  /*
    3.10   * Maximum scatter/gather segments per request.
    3.11 @@ -50,10 +49,23 @@ typedef struct blk_ring_resp_entry
    3.12      unsigned long   status;               /* cuurently boolean good/bad   */
    3.13  } blk_ring_resp_entry_t;
    3.14  
    3.15 +/*
    3.16 + * We use a special capitalised type name because it is _essential_ that all 
    3.17 + * arithmetic on indexes is done on an integer type of the correct size.
    3.18 + */
    3.19 +typedef unsigned int BLK_RING_IDX;
    3.20 +
    3.21 +/*
    3.22 + * Ring indexes are 'free running'. That is, they are not stored modulo the
    3.23 + * size of the ring buffer. The following macro converts a free-running counter
    3.24 + * into a value that can directly index a ring-buffer array.
    3.25 + */
    3.26 +#define MASK_BLK_IDX(_i) ((_i)&(BLK_RING_SIZE-1))
    3.27 +
    3.28  typedef struct blk_ring_st 
    3.29  {
    3.30 -    unsigned int req_prod;  /* Request producer. Updated by guest OS. */
    3.31 -    unsigned int resp_prod; /* Response producer. Updated by Xen.     */
    3.32 +    BLK_RING_IDX req_prod;  /* Request producer. Updated by guest OS. */
    3.33 +    BLK_RING_IDX resp_prod; /* Response producer. Updated by Xen.     */
    3.34      union {
    3.35          blk_ring_req_entry_t  req;
    3.36          blk_ring_resp_entry_t resp;
     4.1 --- a/xen/include/hypervisor-ifs/network.h	Sun Dec 21 14:20:06 2003 +0000
     4.2 +++ b/xen/include/hypervisor-ifs/network.h	Wed Dec 24 16:46:32 2003 +0000
     4.3 @@ -85,6 +85,20 @@ typedef struct net_ring_st
     4.4      rx_entry_t rx_ring[RX_RING_SIZE];
     4.5  } net_ring_t;
     4.6  
     4.7 +/*
     4.8 + * We use a special capitalised type name because it is _essential_ that all 
     4.9 + * arithmetic on indexes is done on an integer type of the correct size.
    4.10 + */
    4.11 +typedef unsigned int NET_RING_IDX;
    4.12 +
    4.13 +/*
    4.14 + * Ring indexes are 'free running'. That is, they are not stored modulo the
    4.15 + * size of the ring buffer. The following macros convert a free-running counter
    4.16 + * into a value that can directly index a ring-buffer array.
    4.17 + */
    4.18 +#define MASK_NET_RX_IDX(_i) ((_i)&(RX_RING_SIZE-1))
    4.19 +#define MASK_NET_TX_IDX(_i) ((_i)&(TX_RING_SIZE-1))
    4.20 +
    4.21  typedef struct net_idx_st
    4.22  {
    4.23      /*
    4.24 @@ -93,8 +107,8 @@ typedef struct net_idx_st
    4.25       * Guest OS places empty buffers into ring at rx_req_prod.
    4.26       * Guest OS receives EVENT_NET when rx_rssp_prod passes rx_event.
    4.27       */
    4.28 -    unsigned int tx_req_prod, tx_resp_prod, tx_event;
    4.29 -    unsigned int rx_req_prod, rx_resp_prod, rx_event;
    4.30 +    NET_RING_IDX tx_req_prod, tx_resp_prod, tx_event;
    4.31 +    NET_RING_IDX rx_req_prod, rx_resp_prod, rx_event;
    4.32  } net_idx_t;
    4.33  
    4.34  /*
     5.1 --- a/xen/include/xeno/sched.h	Sun Dec 21 14:20:06 2003 +0000
     5.2 +++ b/xen/include/xeno/sched.h	Wed Dec 24 16:46:32 2003 +0000
     5.3 @@ -122,8 +122,8 @@ struct task_struct
     5.4  
     5.5      /* Block I/O */
     5.6      blk_ring_t *blk_ring_base;
     5.7 -    unsigned int blk_req_cons;  /* request consumer */
     5.8 -    unsigned int blk_resp_prod; /* (private version of) response producer */
     5.9 +    BLK_RING_IDX blk_req_cons;  /* request consumer */
    5.10 +    BLK_RING_IDX blk_resp_prod; /* (private version of) response producer */
    5.11      struct list_head blkdev_list;
    5.12      spinlock_t blk_ring_lock;
    5.13      vbd_t *vbdtab[VBD_HTAB_SZ];   /* mapping from 16-bit vdevices to vbds */
     6.1 --- a/xen/include/xeno/vif.h	Sun Dec 21 14:20:06 2003 +0000
     6.2 +++ b/xen/include/xeno/vif.h	Wed Dec 24 16:46:32 2003 +0000
     6.3 @@ -54,17 +54,17 @@ typedef struct net_vif_st {
     6.4  
     6.5      /* The private rings and indexes. */
     6.6      rx_shadow_entry_t rx_shadow_ring[RX_RING_SIZE];
     6.7 -    unsigned int rx_prod;  /* More buffers for filling go here. */
     6.8 -    unsigned int rx_cons;  /* Next buffer to fill is here. */
     6.9 +    NET_RING_IDX rx_prod;  /* More buffers for filling go here. */
    6.10 +    NET_RING_IDX rx_cons;  /* Next buffer to fill is here. */
    6.11      tx_shadow_entry_t tx_shadow_ring[TX_RING_SIZE];
    6.12 -    unsigned int tx_prod;  /* More packets for sending go here. */
    6.13 -    unsigned int tx_cons;  /* Next packet to send is here. */
    6.14 +    NET_RING_IDX tx_prod;  /* More packets for sending go here. */
    6.15 +    NET_RING_IDX tx_cons;  /* Next packet to send is here. */
    6.16  
    6.17      /* Private indexes into shared ring. */
    6.18 -    unsigned int rx_req_cons;
    6.19 -    unsigned int rx_resp_prod; /* private version of shared variable */
    6.20 -    unsigned int tx_req_cons;
    6.21 -    unsigned int tx_resp_prod; /* private version of shared variable */
    6.22 +    NET_RING_IDX rx_req_cons;
    6.23 +    NET_RING_IDX rx_resp_prod; /* private version of shared variable */
    6.24 +    NET_RING_IDX tx_req_cons;
    6.25 +    NET_RING_IDX tx_resp_prod; /* private version of shared variable */
    6.26  
    6.27      /* Usage accounting */
    6.28      long long total_bytes_sent;
     7.1 --- a/xen/net/dev.c	Sun Dec 21 14:20:06 2003 +0000
     7.2 +++ b/xen/net/dev.c	Wed Dec 24 16:46:32 2003 +0000
     7.3 @@ -39,11 +39,6 @@
     7.4  #define rtnl_lock() ((void)0)
     7.5  #define rtnl_unlock() ((void)0)
     7.6  
     7.7 -#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
     7.8 -#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
     7.9 -#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
    7.10 -#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
    7.11 -
    7.12  struct skb_completion_queues skb_queue[NR_CPUS] __cacheline_aligned;
    7.13  
    7.14  static int get_tx_bufs(net_vif_t *vif);
    7.15 @@ -495,7 +490,6 @@ void deliver_packet(struct sk_buff *skb,
    7.16      rx_shadow_entry_t *rx;
    7.17      unsigned long *ptep, pte; 
    7.18      struct pfn_info *old_page, *new_page, *pte_page;
    7.19 -    unsigned int i; 
    7.20      unsigned short size;
    7.21      unsigned char  offset, status = RING_STATUS_OK;
    7.22      struct task_struct *p = vif->domain;
    7.23 @@ -506,14 +500,13 @@ void deliver_packet(struct sk_buff *skb,
    7.24  
    7.25      spin_lock(&vif->rx_lock);
    7.26  
    7.27 -    if ( (i = vif->rx_cons) == vif->rx_prod )
    7.28 +    if ( unlikely(vif->rx_cons == vif->rx_prod) )
    7.29      {
    7.30          spin_unlock(&vif->rx_lock);
    7.31          perfc_incr(net_rx_capacity_drop);
    7.32          return;
    7.33      }
    7.34 -    rx = &vif->rx_shadow_ring[i];
    7.35 -    vif->rx_cons = RX_RING_INC(i);
    7.36 +    rx = &vif->rx_shadow_ring[MASK_NET_RX_IDX(vif->rx_cons++)];
    7.37  
    7.38      size   = (unsigned short)skb->len;
    7.39      offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK);
    7.40 @@ -739,7 +732,7 @@ static void net_tx_action(unsigned long 
    7.41  
    7.42          add_to_net_schedule_list_tail(vif);
    7.43  
    7.44 -        if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL )
    7.45 +        if ( unlikely((skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL) )
    7.46          {
    7.47              printk("Out of memory in net_tx_action()!\n");
    7.48              add_to_net_schedule_list_tail(vif);
    7.49 @@ -748,8 +741,7 @@ static void net_tx_action(unsigned long 
    7.50          }
    7.51          
    7.52          /* Pick an entry from the transmit queue. */
    7.53 -        tx = &vif->tx_shadow_ring[vif->tx_cons];
    7.54 -        vif->tx_cons = TX_RING_INC(vif->tx_cons);
    7.55 +        tx = &vif->tx_shadow_ring[MASK_NET_TX_IDX(vif->tx_cons++)];
    7.56  
    7.57          skb->destructor = tx_skb_release;
    7.58  
    7.59 @@ -776,7 +768,7 @@ static void net_tx_action(unsigned long 
    7.60          vif->total_bytes_sent += tx->size;
    7.61  
    7.62          /* Is the NIC crap? */
    7.63 -        if ( !(dev->features & NETIF_F_SG) )
    7.64 +        if ( unlikely(!(dev->features & NETIF_F_SG)) )
    7.65          {
    7.66              nskb = skb_copy(skb, GFP_KERNEL);
    7.67              kfree_skb(skb);
    7.68 @@ -784,7 +776,7 @@ static void net_tx_action(unsigned long 
    7.69          }
    7.70  
    7.71          /* Transmit should always work, or the queue would be stopped. */
    7.72 -        if ( dev->hard_start_xmit(skb, dev) != 0 )
    7.73 +        if ( unlikely(dev->hard_start_xmit(skb, dev) != 0) )
    7.74          {
    7.75              printk("Weird failure in hard_start_xmit!\n");
    7.76              kfree_skb(skb);
    7.77 @@ -1849,7 +1841,9 @@ static int get_tx_bufs(net_vif_t *vif)
    7.78      unsigned short      protocol;
    7.79      struct sk_buff     *skb;
    7.80      tx_req_entry_t      tx;
    7.81 -    int                 i, j, ret = 0;
    7.82 +    tx_shadow_entry_t  *stx;
    7.83 +    NET_RING_IDX        i, j;
    7.84 +    int                 ret = 0;
    7.85  
    7.86      if ( vif->tx_req_cons == shared_idxs->tx_req_prod )
    7.87          return 0;
    7.88 @@ -1870,13 +1864,14 @@ static int get_tx_bufs(net_vif_t *vif)
    7.89   again:
    7.90      for ( i = vif->tx_req_cons; 
    7.91            (i != shared_idxs->tx_req_prod) && 
    7.92 -              (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 
    7.93 -          i = TX_RING_INC(i) )
    7.94 +              ((i-vif->tx_resp_prod) != TX_RING_SIZE);
    7.95 +          i++ )
    7.96      {
    7.97 -        tx     = shared_rings->tx_ring[i].req;
    7.98 +        tx     = shared_rings->tx_ring[MASK_NET_TX_IDX(i)].req;
    7.99          target = VIF_DROP;
   7.100  
   7.101 -        if ( (tx.size <= PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) )
   7.102 +        if ( unlikely(tx.size <= PKT_PROT_LEN) || 
   7.103 +             unlikely(tx.size > ETH_FRAME_LEN) )
   7.104          {
   7.105              DPRINTK("Bad packet size: %d\n", tx.size);
   7.106              make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   7.107 @@ -1940,7 +1935,7 @@ static int get_tx_bufs(net_vif_t *vif)
   7.108          if ( VIF_LOCAL(target) )
   7.109          {
   7.110              /* Local delivery */
   7.111 -            if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL )
   7.112 +            if ( unlikely((skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL) )
   7.113              {
   7.114                  make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   7.115                  put_vif(target);
   7.116 @@ -1972,20 +1967,21 @@ static int get_tx_bufs(net_vif_t *vif)
   7.117          }
   7.118          else if ( (target == VIF_PHYS) || IS_PRIV(p) )
   7.119          {
   7.120 -            vif->tx_shadow_ring[j].id     = tx.id;
   7.121 -            vif->tx_shadow_ring[j].size   = tx.size;
   7.122 -            vif->tx_shadow_ring[j].header = 
   7.123 -                kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
   7.124 -            if ( vif->tx_shadow_ring[j].header == NULL )
   7.125 +            stx = &vif->tx_shadow_ring[MASK_NET_TX_IDX(j)];
   7.126 +            stx->id     = tx.id;
   7.127 +            stx->size   = tx.size;
   7.128 +            stx->header = kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
   7.129 +            if ( unlikely(stx->header == NULL) )
   7.130              { 
   7.131                  make_tx_response(vif, tx.id, RING_STATUS_OK);
   7.132                  goto cleanup_and_continue;
   7.133              }
   7.134  
   7.135 -            memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN);
   7.136 -            vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN;
   7.137 +            memcpy(stx->header, g_data, PKT_PROT_LEN);
   7.138 +            stx->payload = tx.addr + PKT_PROT_LEN;
   7.139 +
   7.140 +            j++;
   7.141              buf_page = NULL; /* hand off our page reference */
   7.142 -            j = TX_RING_INC(j);
   7.143          }
   7.144          else
   7.145          {
   7.146 @@ -2023,8 +2019,9 @@ static void get_rx_bufs(net_vif_t *vif)
   7.147      struct task_struct *p = vif->domain;
   7.148      net_ring_t *shared_rings = vif->shared_rings;
   7.149      net_idx_t *shared_idxs = vif->shared_idxs;
   7.150 -    unsigned int i, j;
   7.151 +    NET_RING_IDX i, j;
   7.152      rx_req_entry_t rx;
   7.153 +    rx_shadow_entry_t *srx;
   7.154      unsigned long  pte_pfn, buf_pfn;
   7.155      struct pfn_info *pte_page, *buf_page;
   7.156      unsigned long *ptep, pte;
   7.157 @@ -2039,10 +2036,10 @@ static void get_rx_bufs(net_vif_t *vif)
   7.158      j = vif->rx_prod;
   7.159      for ( i = vif->rx_req_cons; 
   7.160            (i != shared_idxs->rx_req_prod) && 
   7.161 -              (((vif->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); 
   7.162 -          i = RX_RING_INC(i) )
   7.163 +              ((i-vif->rx_resp_prod) != RX_RING_SIZE);
   7.164 +          i++ )
   7.165      {
   7.166 -        rx = shared_rings->rx_ring[i].req;
   7.167 +        rx = shared_rings->rx_ring[MASK_NET_RX_IDX(i)].req;
   7.168  
   7.169          pte_pfn  = rx.addr >> PAGE_SHIFT;
   7.170          pte_page = &frame_table[pte_pfn];
   7.171 @@ -2116,10 +2113,10 @@ static void get_rx_bufs(net_vif_t *vif)
   7.172          list_del(&buf_page->list);
   7.173          spin_unlock(&p->page_list_lock);
   7.174  
   7.175 -        vif->rx_shadow_ring[j].id      = rx.id;
   7.176 -        vif->rx_shadow_ring[j].pte_ptr = rx.addr;
   7.177 -        vif->rx_shadow_ring[j].buf_pfn = buf_pfn;
   7.178 -        j = RX_RING_INC(j);
   7.179 +        srx = &vif->rx_shadow_ring[MASK_NET_RX_IDX(j++)];
   7.180 +        srx->id      = rx.id;
   7.181 +        srx->pte_ptr = rx.addr;
   7.182 +        srx->buf_pfn = buf_pfn;
   7.183              
   7.184      rx_unmap_and_continue:
   7.185          unmap_domain_mem(ptep);
   7.186 @@ -2153,7 +2150,7 @@ static long get_bufs_from_vif(net_vif_t 
   7.187  
   7.188  long flush_bufs_for_vif(net_vif_t *vif)
   7.189  {
   7.190 -    int i;
   7.191 +    NET_RING_IDX i;
   7.192      unsigned long *ptep, pte;
   7.193      struct pfn_info *page;
   7.194      struct task_struct *p = vif->domain;
   7.195 @@ -2164,17 +2161,17 @@ long flush_bufs_for_vif(net_vif_t *vif)
   7.196      /* Return any outstanding receive buffers to the guest OS. */
   7.197      spin_lock(&vif->rx_lock);
   7.198      for ( i = vif->rx_req_cons; 
   7.199 -          (i != shared_idxs->rx_req_prod) && 
   7.200 -              (((vif->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); 
   7.201 -          i = RX_RING_INC(i) )
   7.202 +          (i != shared_idxs->rx_req_prod) &&
   7.203 +              ((i-vif->rx_resp_prod) != RX_RING_SIZE);
   7.204 +          i++ );
   7.205      {
   7.206 -        make_rx_response(vif, shared_rings->rx_ring[i].req.id, 0,
   7.207 -                         RING_STATUS_DROPPED, 0);
   7.208 +        make_rx_response(vif, shared_rings->rx_ring[MASK_NET_RX_IDX(i)].req.id,
   7.209 +                         0, RING_STATUS_DROPPED, 0);
   7.210      }
   7.211      vif->rx_req_cons = i;
   7.212 -    for ( i = vif->rx_cons; i != vif->rx_prod; i = RX_RING_INC(i) )
   7.213 +    for ( i = vif->rx_cons; i != vif->rx_prod; i++ )
   7.214      {
   7.215 -        rx = &vif->rx_shadow_ring[i];
   7.216 +        rx = &vif->rx_shadow_ring[MASK_NET_RX_IDX(i)];
   7.217  
   7.218          /* Give the buffer page back to the domain. */
   7.219          page = &frame_table[rx->buf_pfn];
   7.220 @@ -2209,12 +2206,12 @@ long flush_bufs_for_vif(net_vif_t *vif)
   7.221       */
   7.222      spin_lock(&vif->tx_lock);
   7.223      for ( i = vif->tx_req_cons; 
   7.224 -          (i != shared_idxs->tx_req_prod) && 
   7.225 -              (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 
   7.226 -          i = TX_RING_INC(i) )
   7.227 +          (i != shared_idxs->tx_req_prod) &&
   7.228 +              ((i-vif->tx_resp_prod) != TX_RING_SIZE);
   7.229 +          i++ )
   7.230      {
   7.231 -        make_tx_response(vif, shared_rings->tx_ring[i].req.id, 
   7.232 -                           RING_STATUS_DROPPED);
   7.233 +        make_tx_response(vif, shared_rings->tx_ring[MASK_NET_TX_IDX(i)].req.id,
   7.234 +                         RING_STATUS_DROPPED);
   7.235      }
   7.236      vif->tx_req_cons = i;
   7.237      spin_unlock(&vif->tx_lock);
   7.238 @@ -2237,11 +2234,11 @@ long do_net_io_op(netop_t *uop)
   7.239  
   7.240      perfc_incr(net_hypercalls);
   7.241  
   7.242 -    if ( copy_from_user(&op, uop, sizeof(op)) )
   7.243 +    if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
   7.244          return -EFAULT;
   7.245  
   7.246 -    if ( (op.vif >= MAX_DOMAIN_VIFS) || 
   7.247 -         ((vif = current->net_vif_list[op.vif]) == NULL) )
   7.248 +    if ( unlikely(op.vif >= MAX_DOMAIN_VIFS) || 
   7.249 +         unlikely((vif = current->net_vif_list[op.vif]) == NULL) )
   7.250          return -EINVAL;
   7.251  
   7.252      switch ( op.cmd )
   7.253 @@ -2292,18 +2289,17 @@ static void make_tx_response(net_vif_t  
   7.254                               unsigned short id, 
   7.255                               unsigned char  st)
   7.256  {
   7.257 -    unsigned int pos;
   7.258 +    NET_RING_IDX i = vif->tx_resp_prod;
   7.259      tx_resp_entry_t *resp;
   7.260  
   7.261 -    /* Place on the response ring for the relevant domain. */ 
   7.262 -    pos  = vif->tx_resp_prod;
   7.263 -    resp = &vif->shared_rings->tx_ring[pos].resp;
   7.264 +    resp = &vif->shared_rings->tx_ring[MASK_NET_TX_IDX(i)].resp;
   7.265      resp->id     = id;
   7.266      resp->status = st;
   7.267 -    pos = TX_RING_INC(pos);
   7.268 -    vif->tx_resp_prod = vif->shared_idxs->tx_resp_prod = pos;
   7.269 +    wmb();
   7.270 +    vif->shared_idxs->tx_resp_prod = vif->tx_resp_prod = ++i;
   7.271 +
   7.272      smp_mb(); /* Update producer before checking event threshold. */
   7.273 -    if ( pos == vif->shared_idxs->tx_event )
   7.274 +    if ( i == vif->shared_idxs->tx_event )
   7.275      {
   7.276          unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET);
   7.277          guest_event_notify(cpu_mask);    
   7.278 @@ -2317,20 +2313,19 @@ static void make_rx_response(net_vif_t  
   7.279                               unsigned char  st,
   7.280                               unsigned char  off)
   7.281  {
   7.282 -    unsigned int pos;
   7.283 +    NET_RING_IDX i = vif->rx_resp_prod;
   7.284      rx_resp_entry_t *resp;
   7.285  
   7.286 -    /* Place on the response ring for the relevant domain. */ 
   7.287 -    pos  = vif->rx_resp_prod;
   7.288 -    resp = &vif->shared_rings->rx_ring[pos].resp;
   7.289 +    resp = &vif->shared_rings->rx_ring[MASK_NET_RX_IDX(i)].resp;
   7.290      resp->id     = id;
   7.291      resp->size   = size;
   7.292      resp->status = st;
   7.293      resp->offset = off;
   7.294 -    pos = RX_RING_INC(pos);
   7.295 -    vif->rx_resp_prod = vif->shared_idxs->rx_resp_prod = pos;
   7.296 +    wmb();
   7.297 +    vif->shared_idxs->rx_resp_prod = vif->rx_resp_prod = ++i;
   7.298 +
   7.299      smp_mb(); /* Update producer before checking event threshold. */
   7.300 -    if ( pos == vif->shared_idxs->rx_event )
   7.301 +    if ( i == vif->shared_idxs->rx_event )
   7.302      {
   7.303          unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET);
   7.304          guest_event_notify(cpu_mask);    
     8.1 --- a/xenolinux-2.4.23-sparse/arch/xeno/drivers/block/xl_block.c	Sun Dec 21 14:20:06 2003 +0000
     8.2 +++ b/xenolinux-2.4.23-sparse/arch/xeno/drivers/block/xl_block.c	Wed Dec 24 16:46:32 2003 +0000
     8.3 @@ -20,14 +20,14 @@ typedef unsigned char byte; /* from linu
     8.4  static unsigned int state = STATE_SUSPENDED;
     8.5  
     8.6  static blk_ring_t *blk_ring;
     8.7 -static unsigned int resp_cons; /* Response consumer for comms ring. */
     8.8 -static unsigned int req_prod;  /* Private request producer.         */
     8.9 +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
    8.10 +static BLK_RING_IDX req_prod;  /* Private request producer.         */
    8.11  
    8.12  #define XDI_MAX 64 
    8.13  static xen_disk_info_t xlblk_disk_info; /* information about our disks/VBDs */
    8.14  
    8.15  /* We plug the I/O ring if the driver is suspended or if the ring is full. */
    8.16 -#define RING_PLUGGED ((BLK_RING_INC(req_prod) == resp_cons) || \
    8.17 +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
    8.18                        (state != STATE_ACTIVE))
    8.19  
    8.20  /*
    8.21 @@ -260,10 +260,12 @@ static int hypervisor_request(unsigned l
    8.22      blk_ring_req_entry_t *req;
    8.23      struct buffer_head *bh;
    8.24  
    8.25 -    if ( nr_sectors >= (1<<9) ) BUG();
    8.26 -    if ( (buffer_ma & ((1<<9)-1)) != 0 ) BUG();
    8.27 +    if ( unlikely(nr_sectors >= (1<<9)) )
    8.28 +        BUG();
    8.29 +    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
    8.30 +        BUG();
    8.31  
    8.32 -    if ( state == STATE_CLOSED )
    8.33 +    if ( unlikely(state == STATE_CLOSED) )
    8.34          return 1;
    8.35  
    8.36      switch ( operation )
    8.37 @@ -273,16 +275,18 @@ static int hypervisor_request(unsigned l
    8.38      case XEN_BLOCK_WRITE:
    8.39          gd = get_gendisk(device); 
    8.40  
    8.41 -        /* Update the sector_number we'll pass down as appropriate; note 
    8.42 -           that we could sanity check that resulting sector will be in 
    8.43 -           this partition, but this will happen in xen anyhow */
    8.44 +        /*
    8.45 +         * Update the sector_number we'll pass down as appropriate; note that
    8.46 +         * we could sanity check that resulting sector will be in this
    8.47 +         * partition, but this will happen in xen anyhow.
    8.48 +         */
    8.49          sector_number += gd->part[MINOR(device)].start_sect;
    8.50  
    8.51          if ( (sg_operation == operation) &&
    8.52               (sg_dev == device) &&
    8.53               (sg_next_sect == sector_number) )
    8.54          {
    8.55 -            req = &blk_ring->ring[(req_prod-1)&(BLK_RING_SIZE-1)].req;
    8.56 +            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
    8.57              bh = (struct buffer_head *)id;
    8.58              bh->b_reqnext = (struct buffer_head *)req->id;
    8.59              req->id = id;
    8.60 @@ -310,14 +314,14 @@ static int hypervisor_request(unsigned l
    8.61      }
    8.62  
    8.63      /* Fill out a communications ring structure. */
    8.64 -    req = &blk_ring->ring[req_prod].req;
    8.65 +    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
    8.66      req->id            = id;
    8.67      req->operation     = operation;
    8.68      req->sector_number = sector_number;
    8.69      req->device        = device; 
    8.70      req->nr_segments   = 1;
    8.71      req->buffer_and_sects[0] = buffer_ma | nr_sectors;
    8.72 -    req_prod = BLK_RING_INC(req_prod);
    8.73 +    req_prod++;
    8.74  
    8.75      return 0;
    8.76  }
    8.77 @@ -345,8 +349,9 @@ void do_xlblk_request(request_queue_t *r
    8.78                  req->current_nr_sectors, req->nr_sectors, req->bh);
    8.79  
    8.80          rw = req->cmd;
    8.81 -        if ( rw == READA ) rw = READ;
    8.82 -        if ((rw != READ) && (rw != WRITE))
    8.83 +        if ( rw == READA )
    8.84 +            rw = READ;
    8.85 +        if ( unlikely((rw != READ) && (rw != WRITE)) )
    8.86              panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
    8.87  
    8.88          req->errors = 0;
    8.89 @@ -362,13 +367,13 @@ void do_xlblk_request(request_queue_t *r
    8.90                  (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
    8.91                  bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
    8.92  
    8.93 -            if(full) { 
    8.94 -
    8.95 +            if ( full )
    8.96 +            { 
    8.97                  bh->b_reqnext = next_bh;
    8.98                  pending_queues[nr_pending++] = rq;
    8.99 -                if ( nr_pending >= MAX_PENDING ) BUG();
   8.100 +                if ( unlikely(nr_pending >= MAX_PENDING) )
   8.101 +                    BUG();
   8.102                  goto out; 
   8.103 -
   8.104              }
   8.105  
   8.106              queued++;
   8.107 @@ -390,7 +395,8 @@ void do_xlblk_request(request_queue_t *r
   8.108              else
   8.109              {
   8.110                  /* That was the last buffer head. Finalise the request. */
   8.111 -                if ( end_that_request_first(req, 1, "XenBlk") ) BUG();
   8.112 +                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
   8.113 +                    BUG();
   8.114                  blkdev_dequeue_request(req);
   8.115                  end_that_request_last(req);
   8.116              }
   8.117 @@ -406,40 +412,34 @@ static void kick_pending_request_queues(
   8.118  {
   8.119      /* We kick pending request queues if the ring is reasonably empty. */
   8.120      if ( (nr_pending != 0) && 
   8.121 -         (((req_prod - resp_cons) & (BLK_RING_SIZE - 1)) < 
   8.122 -          (BLK_RING_SIZE >> 1)) )
   8.123 +         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
   8.124      {
   8.125          /* Attempt to drain the queue, but bail if the ring becomes full. */
   8.126 -        while ( nr_pending != 0 )
   8.127 -        {
   8.128 +        while ( (nr_pending != 0) && !RING_PLUGGED )
   8.129              do_xlblk_request(pending_queues[--nr_pending]);
   8.130 -            if ( RING_PLUGGED ) break;
   8.131 -        }
   8.132      }
   8.133  }
   8.134  
   8.135  
   8.136  static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
   8.137  {
   8.138 -    int i; 
   8.139 +    BLK_RING_IDX i; 
   8.140      unsigned long flags; 
   8.141      struct buffer_head *bh, *next_bh;
   8.142  
   8.143 -    if ( state == STATE_CLOSED )
   8.144 +    if ( unlikely(state == STATE_CLOSED) )
   8.145          return;
   8.146      
   8.147      spin_lock_irqsave(&io_request_lock, flags);     
   8.148  
   8.149 -    for ( i  = resp_cons;
   8.150 -          i != blk_ring->resp_prod;
   8.151 -          i  = BLK_RING_INC(i) )
   8.152 +    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
   8.153      {
   8.154 -        blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp;
   8.155 -        switch (bret->operation)
   8.156 +        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
   8.157 +        switch ( bret->operation )
   8.158          {
   8.159          case XEN_BLOCK_READ:
   8.160          case XEN_BLOCK_WRITE:
   8.161 -            if ( bret->status )
   8.162 +            if ( unlikely(bret->status != 0) )
   8.163                  DPRINTK("Bad return from blkdev data request: %lx\n",
   8.164                          bret->status);
   8.165              for ( bh = (struct buffer_head *)bret->id; 
     9.1 --- a/xenolinux-2.4.23-sparse/arch/xeno/drivers/network/network.c	Sun Dec 21 14:20:06 2003 +0000
     9.2 +++ b/xenolinux-2.4.23-sparse/arch/xeno/drivers/network/network.c	Wed Dec 24 16:46:32 2003 +0000
     9.3 @@ -27,14 +27,6 @@
     9.4  
     9.5  #define NET_IRQ _EVENT_NET
     9.6  
     9.7 -#define TX_MAX_ENTRIES (TX_RING_SIZE - 2)
     9.8 -#define RX_MAX_ENTRIES (RX_RING_SIZE - 2)
     9.9 -
    9.10 -#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
    9.11 -#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
    9.12 -#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
    9.13 -#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
    9.14 -
    9.15  #define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
    9.16  
    9.17  static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
    9.18 @@ -50,12 +42,11 @@ struct net_private
    9.19      struct net_device *dev;
    9.20  
    9.21      struct net_device_stats stats;
    9.22 -    atomic_t tx_entries;
    9.23 -    unsigned int rx_resp_cons, tx_resp_cons, tx_full;
    9.24 -    unsigned int net_ring_fixmap_idx;
    9.25 -    net_ring_t *net_ring;
    9.26 -    net_idx_t  *net_idx;
    9.27 -    spinlock_t tx_lock;
    9.28 +    NET_RING_IDX rx_resp_cons, tx_resp_cons;
    9.29 +    unsigned int net_ring_fixmap_idx, tx_full;
    9.30 +    net_ring_t  *net_ring;
    9.31 +    net_idx_t   *net_idx;
    9.32 +    spinlock_t   tx_lock;
    9.33      unsigned int idx; /* Domain-specific index of this VIF. */
    9.34  
    9.35      unsigned int rx_bufs_to_notify;
    9.36 @@ -80,7 +71,7 @@ struct net_private
    9.37  #define GET_ID_FROM_FREELIST(_list)                \
    9.38   ({ unsigned long _id = (unsigned long)(_list)[0]; \
    9.39      (_list)[0]  = (_list)[_id];                    \
    9.40 -    _id; })
    9.41 +    (unsigned short)_id; })
    9.42  
    9.43  
    9.44  static void _dbg_network_int(struct net_device *dev)
    9.45 @@ -90,14 +81,15 @@ static void _dbg_network_int(struct net_
    9.46      if ( np->state == STATE_CLOSED )
    9.47          return;
    9.48      
    9.49 -    printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d,"
    9.50 -           " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n",
    9.51 -           np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons, 
    9.52 +    printk(KERN_ALERT "tx_full = %d, tx_resp_cons = 0x%08x,"
    9.53 +           " tx_req_prod = 0x%08x, tx_resp_prod = 0x%08x,"
    9.54 +           " tx_event = 0x%08x, state=%d\n",
    9.55 +           np->tx_full, np->tx_resp_cons, 
    9.56             np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, 
    9.57             np->net_idx->tx_event,
    9.58             test_bit(__LINK_STATE_XOFF, &dev->state));
    9.59 -    printk(KERN_ALERT "rx_resp_cons = %d,"
    9.60 -           " rx_req_prod = %d, rx_resp_prod = %d, rx_event = %d\n",
    9.61 +    printk(KERN_ALERT "rx_resp_cons = 0x%08x,"
    9.62 +           " rx_req_prod = 0x%08x, rx_resp_prod = 0x%08x, rx_event = 0x%08x\n",
    9.63             np->rx_resp_cons, np->net_idx->rx_req_prod,
    9.64             np->net_idx->rx_resp_prod, np->net_idx->rx_event);
    9.65  }
    9.66 @@ -149,7 +141,6 @@ static int network_open(struct net_devic
    9.67      np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
    9.68      memset(&np->stats, 0, sizeof(np->stats));
    9.69      spin_lock_init(&np->tx_lock);
    9.70 -    atomic_set(&np->tx_entries, 0);
    9.71      memset(np->net_ring, 0, sizeof(*np->net_ring));
    9.72      memset(np->net_idx, 0, sizeof(*np->net_idx));
    9.73  
    9.74 @@ -174,33 +165,40 @@ static int network_open(struct net_devic
    9.75  
    9.76  static void network_tx_buf_gc(struct net_device *dev)
    9.77  {
    9.78 -    unsigned int i;
    9.79 +    NET_RING_IDX i, prod;
    9.80 +    unsigned short id;
    9.81      struct net_private *np = dev->priv;
    9.82      struct sk_buff *skb;
    9.83 -    unsigned int prod;
    9.84      tx_entry_t *tx_ring = np->net_ring->tx_ring;
    9.85  
    9.86      do {
    9.87          prod = np->net_idx->tx_resp_prod;
    9.88  
    9.89 -        for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) )
    9.90 +        for ( i = np->tx_resp_cons; i != prod; i++ )
    9.91          {
    9.92 -            skb = np->tx_skbs[tx_ring[i].resp.id];
    9.93 -            ADD_ID_TO_FREELIST(np->tx_skbs, tx_ring[i].resp.id);
    9.94 +            id  = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
    9.95 +            skb = np->tx_skbs[id];
    9.96 +            ADD_ID_TO_FREELIST(np->tx_skbs, id);
    9.97              dev_kfree_skb_any(skb);
    9.98 -            atomic_dec(&np->tx_entries);
    9.99          }
   9.100          
   9.101          np->tx_resp_cons = prod;
   9.102          
   9.103 -        /* Set a new event, then check for race with update of tx_cons. */
   9.104 -        np->net_idx->tx_event =
   9.105 -            TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1);
   9.106 +        /*
   9.107 +         * Set a new event, then check for race with update of tx_cons. Note
   9.108 +         * that it is essential to schedule a callback, no matter how few
   9.109 +         * buffers are pending. Even if there is space in the transmit ring,
   9.110 +         * higher layers may be blocked because too much data is outstanding:
   9.111 +         * in such cases notification from Xen is likely to be the only kick
   9.112 +         * that we'll get.
   9.113 +         */
   9.114 +        np->net_idx->tx_event = 
   9.115 +            prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
   9.116          mb();
   9.117      }
   9.118      while ( prod != np->net_idx->tx_resp_prod );
   9.119  
   9.120 -    if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
   9.121 +    if ( np->tx_full && ((np->net_idx->tx_req_prod - prod) < TX_RING_SIZE) )
   9.122      {
   9.123          np->tx_full = 0;
   9.124          if ( np->state == STATE_ACTIVE )
   9.125 @@ -221,19 +219,21 @@ static inline pte_t *get_ppte(void *addr
   9.126  
   9.127  static void network_alloc_rx_buffers(struct net_device *dev)
   9.128  {
   9.129 -    unsigned int i, id;
   9.130 +    unsigned short id;
   9.131      struct net_private *np = dev->priv;
   9.132      struct sk_buff *skb;
   9.133 -    unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES);    
   9.134      netop_t netop;
   9.135 +    NET_RING_IDX i = np->net_idx->rx_req_prod;
   9.136  
   9.137 -    if ( ((i = np->net_idx->rx_req_prod) == end) ||
   9.138 -         (np->state != STATE_ACTIVE) )
   9.139 +    if ( unlikely((i - np->rx_resp_cons) == RX_RING_SIZE) || 
   9.140 +         unlikely(np->state != STATE_ACTIVE) )
   9.141          return;
   9.142  
   9.143      do {
   9.144          skb = dev_alloc_skb(RX_BUF_SIZE);
   9.145 -        if ( skb == NULL ) break;
   9.146 +        if ( unlikely(skb == NULL) )
   9.147 +            break;
   9.148 +
   9.149          skb->dev = dev;
   9.150  
   9.151          if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
   9.152 @@ -242,13 +242,13 @@ static void network_alloc_rx_buffers(str
   9.153          id = GET_ID_FROM_FREELIST(np->rx_skbs);
   9.154          np->rx_skbs[id] = skb;
   9.155  
   9.156 -        np->net_ring->rx_ring[i].req.id   = (unsigned short)id;
   9.157 -        np->net_ring->rx_ring[i].req.addr = 
   9.158 +        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id   = id;
   9.159 +        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
   9.160              virt_to_machine(get_ppte(skb->head));
   9.161  
   9.162          np->rx_bufs_to_notify++;
   9.163      }
   9.164 -    while ( (i = RX_RING_INC(i)) != end );
   9.165 +    while ( (++i - np->rx_resp_cons) != RX_RING_SIZE );
   9.166  
   9.167      /*
   9.168       * We may have allocated buffers which have entries outstanding in the page
   9.169 @@ -257,10 +257,10 @@ static void network_alloc_rx_buffers(str
   9.170      flush_page_update_queue();
   9.171  
   9.172      np->net_idx->rx_req_prod = i;
   9.173 -    np->net_idx->rx_event    = RX_RING_INC(np->rx_resp_cons);
   9.174 +    np->net_idx->rx_event    = np->rx_resp_cons + 1;
   9.175          
   9.176      /* Batch Xen notifications. */
   9.177 -    if ( np->rx_bufs_to_notify > (RX_MAX_ENTRIES/4) )
   9.178 +    if ( np->rx_bufs_to_notify > (RX_RING_SIZE/4) )
   9.179      {
   9.180          netop.cmd = NETOP_PUSH_BUFFERS;
   9.181          netop.vif = np->idx;
   9.182 @@ -272,21 +272,25 @@ static void network_alloc_rx_buffers(str
   9.183  
   9.184  static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
   9.185  {
   9.186 -    unsigned int i, id;
   9.187 +    unsigned short id;
   9.188      struct net_private *np = (struct net_private *)dev->priv;
   9.189 +    tx_req_entry_t *tx;
   9.190      netop_t netop;
   9.191 +    NET_RING_IDX i;
   9.192  
   9.193 -    if ( np->tx_full )
   9.194 +    if ( unlikely(np->tx_full) )
   9.195      {
   9.196          printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
   9.197          netif_stop_queue(dev);
   9.198          return -ENOBUFS;
   9.199      }
   9.200  
   9.201 -    if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE )
   9.202 +    if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
   9.203 +                  PAGE_SIZE) )
   9.204      {
   9.205          struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
   9.206 -        if ( new_skb == NULL ) return 1;
   9.207 +        if ( unlikely(new_skb == NULL) )
   9.208 +            return 1;
   9.209          skb_put(new_skb, skb->len);
   9.210          memcpy(new_skb->data, skb->data, skb->len);
   9.211          dev_kfree_skb(skb);
   9.212 @@ -300,16 +304,18 @@ static int network_start_xmit(struct sk_
   9.213      id = GET_ID_FROM_FREELIST(np->tx_skbs);
   9.214      np->tx_skbs[id] = skb;
   9.215  
   9.216 -    np->net_ring->tx_ring[i].req.id   = (unsigned short)id;
   9.217 -    np->net_ring->tx_ring[i].req.addr =
   9.218 -        phys_to_machine(virt_to_phys(skb->data));
   9.219 -    np->net_ring->tx_ring[i].req.size = skb->len;
   9.220 -    np->net_idx->tx_req_prod = TX_RING_INC(i);
   9.221 -    atomic_inc(&np->tx_entries);
   9.222 +    tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
   9.223 +
   9.224 +    tx->id   = id;
   9.225 +    tx->addr = phys_to_machine(virt_to_phys(skb->data));
   9.226 +    tx->size = skb->len;
   9.227 +
   9.228 +    wmb();
   9.229 +    np->net_idx->tx_req_prod = i + 1;
   9.230  
   9.231      network_tx_buf_gc(dev);
   9.232  
   9.233 -    if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES )
   9.234 +    if ( (i - np->tx_resp_cons) == TX_RING_SIZE )
   9.235      {
   9.236          np->tx_full = 1;
   9.237          netif_stop_queue(dev);
   9.238 @@ -336,12 +342,12 @@ static int network_start_xmit(struct sk_
   9.239  static inline void _network_interrupt(struct net_device *dev)
   9.240  {
   9.241      struct net_private *np = dev->priv;
   9.242 -    unsigned int i;
   9.243      unsigned long flags;
   9.244      struct sk_buff *skb;
   9.245      rx_resp_entry_t *rx;
   9.246 +    NET_RING_IDX i;
   9.247  
   9.248 -    if ( np->state == STATE_CLOSED )
   9.249 +    if ( unlikely(np->state == STATE_CLOSED) )
   9.250          return;
   9.251      
   9.252      spin_lock_irqsave(&np->tx_lock, flags);
   9.253 @@ -349,16 +355,14 @@ static inline void _network_interrupt(st
   9.254      spin_unlock_irqrestore(&np->tx_lock, flags);
   9.255  
   9.256   again:
   9.257 -    for ( i  = np->rx_resp_cons; 
   9.258 -          i != np->net_idx->rx_resp_prod; 
   9.259 -          i  = RX_RING_INC(i) )
   9.260 +    for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
   9.261      {
   9.262 -        rx  = &np->net_ring->rx_ring[i].resp;
   9.263 +        rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
   9.264  
   9.265          skb = np->rx_skbs[rx->id];
   9.266          ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
   9.267  
   9.268 -        if ( rx->status != RING_STATUS_OK )
   9.269 +        if ( unlikely(rx->status != RING_STATUS_OK) )
   9.270          {
   9.271              /* Gate this error. We get a (valid) slew of them on suspend. */
   9.272              if ( np->state == STATE_ACTIVE )
   9.273 @@ -396,7 +400,8 @@ static inline void _network_interrupt(st
   9.274      
   9.275      /* Deal with hypervisor racing our resetting of rx_event. */
   9.276      mb();
   9.277 -    if ( np->net_idx->rx_resp_prod != i ) goto again;
   9.278 +    if ( np->net_idx->rx_resp_prod != i )
   9.279 +        goto again;
   9.280  }
   9.281  
   9.282