ia64/xen-unstable
changeset 1019:83b414c7559c
bitkeeper revision 1.659 (3fe9c2e8WATFQ64aNLeVQC9FQJ17Ig)
Many files:
I/O ring indexes are now not stored modulo ring size, but are free running mod word size.
Many files:
I/O ring indexes are now not stored modulo ring size, but are free running mod word size.
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Wed Dec 24 16:46:32 2003 +0000 (2003-12-24) |
parents | 80553bc5d3e8 |
children | d8941770d7a7 |
files | xen/common/keyhandler.c xen/drivers/block/xen_block.c xen/include/hypervisor-ifs/block.h xen/include/hypervisor-ifs/network.h xen/include/xeno/sched.h xen/include/xeno/vif.h xen/net/dev.c xenolinux-2.4.23-sparse/arch/xeno/drivers/block/xl_block.c xenolinux-2.4.23-sparse/arch/xeno/drivers/network/network.c |
line diff
1.1 --- a/xen/common/keyhandler.c Sun Dec 21 14:20:06 2003 +0000 1.2 +++ b/xen/common/keyhandler.c Wed Dec 24 16:46:32 2003 +0000 1.3 @@ -109,17 +109,6 @@ void do_task_queues(u_char key, void *de 1.4 { 1.5 printk("Guest: events = %08lx, events_mask = %08lx\n", 1.6 s->events, s->events_mask); 1.7 - 1.8 - if ( (v = find_vif_by_id((p->domain)<<VIF_DOMAIN_SHIFT)) != NULL ) 1.9 - { 1.10 - printk("rx_prod=%d ,rx_cons=%d, tx_prod=%d, tx_cons=%d\n", 1.11 - v->rx_prod, v->rx_cons, v->tx_prod, v->tx_cons ); 1.12 - printk("rx_req_cons=%d, rx_resp_prod=%d, " 1.13 - "tx_req_cons=%d, tx_resp_prod=%d\n", 1.14 - v->rx_req_cons, v->rx_resp_prod, 1.15 - v->tx_req_cons, v->tx_resp_prod); 1.16 - put_vif(v); 1.17 - } 1.18 printk("Notifying guest...\n"); 1.19 set_bit(_EVENT_DEBUG, &s->events); 1.20 }
2.1 --- a/xen/drivers/block/xen_block.c Sun Dec 21 14:20:06 2003 +0000 2.2 +++ b/xen/drivers/block/xen_block.c Wed Dec 24 16:46:32 2003 +0000 2.3 @@ -1,4 +1,4 @@ 2.4 -/* 2.5 +/****************************************************************************** 2.6 * xen_block.c 2.7 * 2.8 * process incoming block io requests from guestos's. 2.9 @@ -21,36 +21,38 @@ 2.10 #include <xeno/slab.h> 2.11 2.12 /* 2.13 - * These are rather arbitrary. They are fairly large because adjacent 2.14 - * requests pulled from a communication ring are quite likely to end 2.15 - * up being part of the same scatter/gather request at the disc. 2.16 + * These are rather arbitrary. They are fairly large because adjacent requests 2.17 + * pulled from a communication ring are quite likely to end up being part of 2.18 + * the same scatter/gather request at the disc. 2.19 * 2.20 * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW ** 2.21 * This will increase the chances of being able to write whole tracks. 2.22 - * '64' should be enough to keep us competitive with Linux. 2.23 + * 64 should be enough to keep us competitive with Linux. 2.24 */ 2.25 #define MAX_PENDING_REQS 64 2.26 #define BATCH_PER_DOMAIN 16 2.27 2.28 /* 2.29 - * Each outstanding request which we've passed to the lower device layers 2.30 - * has a 'pending_req' allocated to it. Each buffer_head that completes 2.31 - * decrements the pendcnt towards zero. When it hits zero, the specified 2.32 - * domain has a response queued for it, with the saved 'id' passed back. 2.33 + * Each outstanding request that we've passed to the lower device layers has a 2.34 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 2.35 + * the pendcnt towards zero. When it hits zero, the specified domain has a 2.36 + * response queued for it, with the saved 'id' passed back. 2.37 * 2.38 - * We can't allocate pending_req's in order, since they may complete out 2.39 - * of order. We therefore maintain an allocation ring. This ring also 2.40 - * indicates when enough work has been passed down -- at that point the 2.41 - * allocation ring will be empty. 2.42 + * We can't allocate pending_req's in order, since they may complete out of 2.43 + * order. We therefore maintain an allocation ring. This ring also indicates 2.44 + * when enough work has been passed down -- at that point the allocation ring 2.45 + * will be empty. 2.46 */ 2.47 static pending_req_t pending_reqs[MAX_PENDING_REQS]; 2.48 static unsigned char pending_ring[MAX_PENDING_REQS]; 2.49 -static unsigned int pending_prod, pending_cons; 2.50 static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; 2.51 -#define PENDREQ_IDX_INC(_i) ((_i) = ((_i)+1) & (MAX_PENDING_REQS-1)) 2.52 +/* NB. We use a different index type to differentiate from shared blk rings. */ 2.53 +typedef unsigned int PEND_RING_IDX; 2.54 +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) 2.55 +static PEND_RING_IDX pending_prod, pending_cons; 2.56 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) 2.57 2.58 static kmem_cache_t *buffer_head_cachep; 2.59 -static atomic_t nr_pending; 2.60 2.61 static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned; 2.62 2.63 @@ -64,8 +66,8 @@ static void unlock_buffer(unsigned long 2.64 2.65 static void io_schedule(unsigned long unused); 2.66 static int do_block_io_op_domain(struct task_struct *p, int max_to_do); 2.67 -static void dispatch_rw_block_io(struct task_struct *p, int index); 2.68 -static void dispatch_debug_block_io(struct task_struct *p, int index); 2.69 +static void dispatch_rw_block_io(struct task_struct *p, 2.70 + blk_ring_req_entry_t *req); 2.71 static void make_response(struct task_struct *p, unsigned long id, 2.72 unsigned short op, unsigned long st); 2.73 2.74 @@ -122,7 +124,7 @@ static void io_schedule(unsigned long un 2.75 struct list_head *ent; 2.76 2.77 /* Queue up a batch of requests. */ 2.78 - while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) && 2.79 + while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && 2.80 !list_empty(&io_schedule_list) ) 2.81 { 2.82 ent = io_schedule_list.next; 2.83 @@ -147,11 +149,9 @@ static void maybe_trigger_io_schedule(vo 2.84 */ 2.85 smp_mb(); 2.86 2.87 - if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) && 2.88 + if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && 2.89 !list_empty(&io_schedule_list) ) 2.90 - { 2.91 tasklet_schedule(&io_schedule_tasklet); 2.92 - } 2.93 } 2.94 2.95 2.96 @@ -185,10 +185,10 @@ static void end_block_io_op_softirq(stru 2.97 pending_req->operation, pending_req->status); 2.98 put_task_struct(pending_req->domain); 2.99 spin_lock(&pend_prod_lock); 2.100 - pending_ring[pending_prod] = pending_req - pending_reqs; 2.101 - PENDREQ_IDX_INC(pending_prod); 2.102 + pending_ring[MASK_PEND_IDX(pending_prod)] = 2.103 + pending_req - pending_reqs; 2.104 + pending_prod++; 2.105 spin_unlock(&pend_prod_lock); 2.106 - atomic_dec(&nr_pending); 2.107 maybe_trigger_io_schedule(); 2.108 } 2.109 2.110 @@ -227,7 +227,7 @@ long do_block_io_op(block_io_op_t *u_blo 2.111 block_io_op_t op; 2.112 struct task_struct *p = current; 2.113 2.114 - if ( copy_from_user(&op, u_block_io_op, sizeof(op)) ) 2.115 + if ( unlikely(copy_from_user(&op, u_block_io_op, sizeof(op)) != 0) ) 2.116 return -EFAULT; 2.117 2.118 switch ( op.cmd ) 2.119 @@ -285,18 +285,16 @@ long do_block_io_op(block_io_op_t *u_blo 2.120 2.121 case BLOCK_IO_OP_VBD_PROBE: 2.122 /* query VBD information for self or others (or all) */ 2.123 - ret = vbd_probe(&op.u.probe_params); 2.124 - if(ret == 0) 2.125 + if ( (ret = vbd_probe(&op.u.probe_params)) == 0 ) 2.126 copy_to_user(u_block_io_op, &op, sizeof(op)); 2.127 break; 2.128 2.129 case BLOCK_IO_OP_VBD_INFO: 2.130 /* query information about a particular VBD */ 2.131 - ret = vbd_info(&op.u.info_params); 2.132 - if(ret == 0) 2.133 + if ( (ret = vbd_info(&op.u.info_params)) == 0 ) 2.134 copy_to_user(u_block_io_op, &op, sizeof(op)); 2.135 break; 2.136 - 2.137 + 2.138 default: 2.139 ret = -ENOSYS; 2.140 } 2.141 @@ -369,33 +367,27 @@ static void unlock_buffer(unsigned long 2.142 static int do_block_io_op_domain(struct task_struct *p, int max_to_do) 2.143 { 2.144 blk_ring_t *blk_ring = p->blk_ring_base; 2.145 - int i, more_to_do = 0; 2.146 + blk_ring_req_entry_t *req; 2.147 + BLK_RING_IDX i; 2.148 + int more_to_do = 0; 2.149 2.150 - /* 2.151 - * Take items off the comms ring, taking care not to catch up 2.152 - * with the response-producer index. 2.153 - */ 2.154 + /* Take items off the comms ring, taking care not to overflow. */ 2.155 for ( i = p->blk_req_cons; 2.156 - (i != blk_ring->req_prod) && 2.157 - (((p->blk_resp_prod-i) & (BLK_RING_SIZE-1)) != 1); 2.158 - i = BLK_RING_INC(i) ) 2.159 + (i != blk_ring->req_prod) && ((i-p->blk_resp_prod) != BLK_RING_SIZE); 2.160 + i++ ) 2.161 { 2.162 - if ( (max_to_do-- == 0) || 2.163 - (atomic_read(&nr_pending) == MAX_PENDING_REQS) ) 2.164 + if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) ) 2.165 { 2.166 more_to_do = 1; 2.167 break; 2.168 } 2.169 2.170 - switch ( blk_ring->ring[i].req.operation ) 2.171 + req = &blk_ring->ring[MASK_BLK_IDX(i)].req; 2.172 + switch ( req->operation ) 2.173 { 2.174 case XEN_BLOCK_READ: 2.175 case XEN_BLOCK_WRITE: 2.176 - dispatch_rw_block_io(p, i); 2.177 - break; 2.178 - 2.179 - case XEN_BLOCK_DEBUG: 2.180 - dispatch_debug_block_io(p, i); 2.181 + dispatch_rw_block_io(p, req); 2.182 break; 2.183 2.184 default: 2.185 @@ -411,16 +403,10 @@ static int do_block_io_op_domain(struct 2.186 return more_to_do; 2.187 } 2.188 2.189 -static void dispatch_debug_block_io(struct task_struct *p, int index) 2.190 -{ 2.191 - DPRINTK("dispatch_debug_block_io: unimplemented\n"); 2.192 -} 2.193 - 2.194 -static void dispatch_rw_block_io(struct task_struct *p, int index) 2.195 +static void dispatch_rw_block_io(struct task_struct *p, 2.196 + blk_ring_req_entry_t *req) 2.197 { 2.198 extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 2.199 - blk_ring_t *blk_ring = p->blk_ring_base; 2.200 - blk_ring_req_entry_t *req = &blk_ring->ring[index].req; 2.201 struct buffer_head *bh; 2.202 int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ; 2.203 unsigned short nr_sects; 2.204 @@ -479,9 +465,9 @@ static void dispatch_rw_block_io(struct 2.205 } 2.206 2.207 /* 2.208 - * XXX Clear any 'partition' info in device. This works because IDE 2.209 - * ignores the partition bits anyway. Only SCSI needs this hack, 2.210 - * and it has four bits to clear. 2.211 + * Clear any 'partition' bits in the device id. This works because 2.212 + * IDE ignores the partition bits anyway. Only SCSI needs this 2.213 + * hack, and we know that always requires the four LSBs cleared. 2.214 */ 2.215 phys_seg[nr_psegs].dev = req->device & 0xFFF0; 2.216 new_segs = 1; 2.217 @@ -506,9 +492,7 @@ static void dispatch_rw_block_io(struct 2.218 } 2.219 } 2.220 2.221 - atomic_inc(&nr_pending); 2.222 - pending_req = pending_reqs + pending_ring[pending_cons]; 2.223 - PENDREQ_IDX_INC(pending_cons); 2.224 + pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]]; 2.225 pending_req->domain = p; 2.226 pending_req->id = req->id; 2.227 pending_req->operation = operation; 2.228 @@ -563,17 +547,16 @@ static void make_response(struct task_st 2.229 unsigned short op, unsigned long st) 2.230 { 2.231 unsigned long cpu_mask; 2.232 - int position; 2.233 - blk_ring_t *blk_ring; 2.234 + blk_ring_resp_entry_t *resp; 2.235 2.236 /* Place on the response ring for the relevant domain. */ 2.237 spin_lock(&p->blk_ring_lock); 2.238 - blk_ring = p->blk_ring_base; 2.239 - position = p->blk_resp_prod; 2.240 - blk_ring->ring[position].resp.id = id; 2.241 - blk_ring->ring[position].resp.operation = op; 2.242 - blk_ring->ring[position].resp.status = st; 2.243 - p->blk_resp_prod = blk_ring->resp_prod = BLK_RING_INC(position); 2.244 + resp = &p->blk_ring_base->ring[MASK_BLK_IDX(p->blk_resp_prod)].resp; 2.245 + resp->id = id; 2.246 + resp->operation = op; 2.247 + resp->status = st; 2.248 + wmb(); 2.249 + p->blk_ring_base->resp_prod = ++p->blk_resp_prod; 2.250 spin_unlock(&p->blk_ring_lock); 2.251 2.252 /* Kick the relevant domain. */ 2.253 @@ -585,11 +568,12 @@ static void dump_blockq(u_char key, void 2.254 { 2.255 unsigned long flags; 2.256 struct task_struct *p; 2.257 - blk_ring_t *blk_ring ; 2.258 + blk_ring_t *blk_ring; 2.259 int i; 2.260 2.261 - printk("Dumping block queue stats: nr_pending = %d (prod=%d,cons=%d)\n", 2.262 - atomic_read(&nr_pending), pending_prod, pending_cons); 2.263 + printk("Dumping block queue stats: nr_pending = %d" 2.264 + " (prod=0x%08x,cons=0x%08x)\n", 2.265 + NR_PENDING_REQS, pending_prod, pending_cons); 2.266 2.267 read_lock_irqsave(&tasklist_lock, flags); 2.268 p = &idle0_task; 2.269 @@ -599,7 +583,8 @@ static void dump_blockq(u_char key, void 2.270 printk("Domain: %d\n", p->domain); 2.271 blk_ring = p->blk_ring_base; 2.272 2.273 - printk(" req_prod:%d, req_cons:%d resp_prod:%d/%d on_list=%d\n", 2.274 + printk(" req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/" 2.275 + "0x%08x on_list=%d\n", 2.276 blk_ring->req_prod, p->blk_req_cons, 2.277 blk_ring->resp_prod, p->blk_resp_prod, 2.278 __on_blkdev_list(p)); 2.279 @@ -621,7 +606,9 @@ static void dump_blockq(u_char key, void 2.280 /* Start-of-day initialisation for a new domain. */ 2.281 void init_blkdev_info(struct task_struct *p) 2.282 { 2.283 - if ( sizeof(*p->blk_ring_base) > PAGE_SIZE ) BUG(); 2.284 + if ( unlikely(sizeof(*p->blk_ring_base) > PAGE_SIZE) ) 2.285 + BUG(); 2.286 + 2.287 p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL); 2.288 clear_page(p->blk_ring_base); 2.289 SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p); 2.290 @@ -655,8 +642,8 @@ void initialize_block_io () 2.291 { 2.292 int i; 2.293 2.294 - atomic_set(&nr_pending, 0); 2.295 - pending_prod = pending_cons = 0; 2.296 + pending_cons = 0; 2.297 + pending_prod = MAX_PENDING_REQS; 2.298 memset(pending_reqs, 0, sizeof(pending_reqs)); 2.299 for ( i = 0; i < MAX_PENDING_REQS; i++ ) 2.300 pending_ring[i] = i;
3.1 --- a/xen/include/hypervisor-ifs/block.h Sun Dec 21 14:20:06 2003 +0000 3.2 +++ b/xen/include/hypervisor-ifs/block.h Wed Dec 24 16:46:32 2003 +0000 3.3 @@ -24,7 +24,6 @@ 3.4 3.5 /* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */ 3.6 #define BLK_RING_SIZE 64 3.7 -#define BLK_RING_INC(_i) (((_i)+1) & (BLK_RING_SIZE-1)) 3.8 3.9 /* 3.10 * Maximum scatter/gather segments per request. 3.11 @@ -50,10 +49,23 @@ typedef struct blk_ring_resp_entry 3.12 unsigned long status; /* cuurently boolean good/bad */ 3.13 } blk_ring_resp_entry_t; 3.14 3.15 +/* 3.16 + * We use a special capitalised type name because it is _essential_ that all 3.17 + * arithmetic on indexes is done on an integer type of the correct size. 3.18 + */ 3.19 +typedef unsigned int BLK_RING_IDX; 3.20 + 3.21 +/* 3.22 + * Ring indexes are 'free running'. That is, they are not stored modulo the 3.23 + * size of the ring buffer. The following macro converts a free-running counter 3.24 + * into a value that can directly index a ring-buffer array. 3.25 + */ 3.26 +#define MASK_BLK_IDX(_i) ((_i)&(BLK_RING_SIZE-1)) 3.27 + 3.28 typedef struct blk_ring_st 3.29 { 3.30 - unsigned int req_prod; /* Request producer. Updated by guest OS. */ 3.31 - unsigned int resp_prod; /* Response producer. Updated by Xen. */ 3.32 + BLK_RING_IDX req_prod; /* Request producer. Updated by guest OS. */ 3.33 + BLK_RING_IDX resp_prod; /* Response producer. Updated by Xen. */ 3.34 union { 3.35 blk_ring_req_entry_t req; 3.36 blk_ring_resp_entry_t resp;
4.1 --- a/xen/include/hypervisor-ifs/network.h Sun Dec 21 14:20:06 2003 +0000 4.2 +++ b/xen/include/hypervisor-ifs/network.h Wed Dec 24 16:46:32 2003 +0000 4.3 @@ -85,6 +85,20 @@ typedef struct net_ring_st 4.4 rx_entry_t rx_ring[RX_RING_SIZE]; 4.5 } net_ring_t; 4.6 4.7 +/* 4.8 + * We use a special capitalised type name because it is _essential_ that all 4.9 + * arithmetic on indexes is done on an integer type of the correct size. 4.10 + */ 4.11 +typedef unsigned int NET_RING_IDX; 4.12 + 4.13 +/* 4.14 + * Ring indexes are 'free running'. That is, they are not stored modulo the 4.15 + * size of the ring buffer. The following macros convert a free-running counter 4.16 + * into a value that can directly index a ring-buffer array. 4.17 + */ 4.18 +#define MASK_NET_RX_IDX(_i) ((_i)&(RX_RING_SIZE-1)) 4.19 +#define MASK_NET_TX_IDX(_i) ((_i)&(TX_RING_SIZE-1)) 4.20 + 4.21 typedef struct net_idx_st 4.22 { 4.23 /* 4.24 @@ -93,8 +107,8 @@ typedef struct net_idx_st 4.25 * Guest OS places empty buffers into ring at rx_req_prod. 4.26 * Guest OS receives EVENT_NET when rx_rssp_prod passes rx_event. 4.27 */ 4.28 - unsigned int tx_req_prod, tx_resp_prod, tx_event; 4.29 - unsigned int rx_req_prod, rx_resp_prod, rx_event; 4.30 + NET_RING_IDX tx_req_prod, tx_resp_prod, tx_event; 4.31 + NET_RING_IDX rx_req_prod, rx_resp_prod, rx_event; 4.32 } net_idx_t; 4.33 4.34 /*
5.1 --- a/xen/include/xeno/sched.h Sun Dec 21 14:20:06 2003 +0000 5.2 +++ b/xen/include/xeno/sched.h Wed Dec 24 16:46:32 2003 +0000 5.3 @@ -122,8 +122,8 @@ struct task_struct 5.4 5.5 /* Block I/O */ 5.6 blk_ring_t *blk_ring_base; 5.7 - unsigned int blk_req_cons; /* request consumer */ 5.8 - unsigned int blk_resp_prod; /* (private version of) response producer */ 5.9 + BLK_RING_IDX blk_req_cons; /* request consumer */ 5.10 + BLK_RING_IDX blk_resp_prod; /* (private version of) response producer */ 5.11 struct list_head blkdev_list; 5.12 spinlock_t blk_ring_lock; 5.13 vbd_t *vbdtab[VBD_HTAB_SZ]; /* mapping from 16-bit vdevices to vbds */
6.1 --- a/xen/include/xeno/vif.h Sun Dec 21 14:20:06 2003 +0000 6.2 +++ b/xen/include/xeno/vif.h Wed Dec 24 16:46:32 2003 +0000 6.3 @@ -54,17 +54,17 @@ typedef struct net_vif_st { 6.4 6.5 /* The private rings and indexes. */ 6.6 rx_shadow_entry_t rx_shadow_ring[RX_RING_SIZE]; 6.7 - unsigned int rx_prod; /* More buffers for filling go here. */ 6.8 - unsigned int rx_cons; /* Next buffer to fill is here. */ 6.9 + NET_RING_IDX rx_prod; /* More buffers for filling go here. */ 6.10 + NET_RING_IDX rx_cons; /* Next buffer to fill is here. */ 6.11 tx_shadow_entry_t tx_shadow_ring[TX_RING_SIZE]; 6.12 - unsigned int tx_prod; /* More packets for sending go here. */ 6.13 - unsigned int tx_cons; /* Next packet to send is here. */ 6.14 + NET_RING_IDX tx_prod; /* More packets for sending go here. */ 6.15 + NET_RING_IDX tx_cons; /* Next packet to send is here. */ 6.16 6.17 /* Private indexes into shared ring. */ 6.18 - unsigned int rx_req_cons; 6.19 - unsigned int rx_resp_prod; /* private version of shared variable */ 6.20 - unsigned int tx_req_cons; 6.21 - unsigned int tx_resp_prod; /* private version of shared variable */ 6.22 + NET_RING_IDX rx_req_cons; 6.23 + NET_RING_IDX rx_resp_prod; /* private version of shared variable */ 6.24 + NET_RING_IDX tx_req_cons; 6.25 + NET_RING_IDX tx_resp_prod; /* private version of shared variable */ 6.26 6.27 /* Usage accounting */ 6.28 long long total_bytes_sent;
7.1 --- a/xen/net/dev.c Sun Dec 21 14:20:06 2003 +0000 7.2 +++ b/xen/net/dev.c Wed Dec 24 16:46:32 2003 +0000 7.3 @@ -39,11 +39,6 @@ 7.4 #define rtnl_lock() ((void)0) 7.5 #define rtnl_unlock() ((void)0) 7.6 7.7 -#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) 7.8 -#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) 7.9 -#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) 7.10 -#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) 7.11 - 7.12 struct skb_completion_queues skb_queue[NR_CPUS] __cacheline_aligned; 7.13 7.14 static int get_tx_bufs(net_vif_t *vif); 7.15 @@ -495,7 +490,6 @@ void deliver_packet(struct sk_buff *skb, 7.16 rx_shadow_entry_t *rx; 7.17 unsigned long *ptep, pte; 7.18 struct pfn_info *old_page, *new_page, *pte_page; 7.19 - unsigned int i; 7.20 unsigned short size; 7.21 unsigned char offset, status = RING_STATUS_OK; 7.22 struct task_struct *p = vif->domain; 7.23 @@ -506,14 +500,13 @@ void deliver_packet(struct sk_buff *skb, 7.24 7.25 spin_lock(&vif->rx_lock); 7.26 7.27 - if ( (i = vif->rx_cons) == vif->rx_prod ) 7.28 + if ( unlikely(vif->rx_cons == vif->rx_prod) ) 7.29 { 7.30 spin_unlock(&vif->rx_lock); 7.31 perfc_incr(net_rx_capacity_drop); 7.32 return; 7.33 } 7.34 - rx = &vif->rx_shadow_ring[i]; 7.35 - vif->rx_cons = RX_RING_INC(i); 7.36 + rx = &vif->rx_shadow_ring[MASK_NET_RX_IDX(vif->rx_cons++)]; 7.37 7.38 size = (unsigned short)skb->len; 7.39 offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK); 7.40 @@ -739,7 +732,7 @@ static void net_tx_action(unsigned long 7.41 7.42 add_to_net_schedule_list_tail(vif); 7.43 7.44 - if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL ) 7.45 + if ( unlikely((skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL) ) 7.46 { 7.47 printk("Out of memory in net_tx_action()!\n"); 7.48 add_to_net_schedule_list_tail(vif); 7.49 @@ -748,8 +741,7 @@ static void net_tx_action(unsigned long 7.50 } 7.51 7.52 /* Pick an entry from the transmit queue. */ 7.53 - tx = &vif->tx_shadow_ring[vif->tx_cons]; 7.54 - vif->tx_cons = TX_RING_INC(vif->tx_cons); 7.55 + tx = &vif->tx_shadow_ring[MASK_NET_TX_IDX(vif->tx_cons++)]; 7.56 7.57 skb->destructor = tx_skb_release; 7.58 7.59 @@ -776,7 +768,7 @@ static void net_tx_action(unsigned long 7.60 vif->total_bytes_sent += tx->size; 7.61 7.62 /* Is the NIC crap? */ 7.63 - if ( !(dev->features & NETIF_F_SG) ) 7.64 + if ( unlikely(!(dev->features & NETIF_F_SG)) ) 7.65 { 7.66 nskb = skb_copy(skb, GFP_KERNEL); 7.67 kfree_skb(skb); 7.68 @@ -784,7 +776,7 @@ static void net_tx_action(unsigned long 7.69 } 7.70 7.71 /* Transmit should always work, or the queue would be stopped. */ 7.72 - if ( dev->hard_start_xmit(skb, dev) != 0 ) 7.73 + if ( unlikely(dev->hard_start_xmit(skb, dev) != 0) ) 7.74 { 7.75 printk("Weird failure in hard_start_xmit!\n"); 7.76 kfree_skb(skb); 7.77 @@ -1849,7 +1841,9 @@ static int get_tx_bufs(net_vif_t *vif) 7.78 unsigned short protocol; 7.79 struct sk_buff *skb; 7.80 tx_req_entry_t tx; 7.81 - int i, j, ret = 0; 7.82 + tx_shadow_entry_t *stx; 7.83 + NET_RING_IDX i, j; 7.84 + int ret = 0; 7.85 7.86 if ( vif->tx_req_cons == shared_idxs->tx_req_prod ) 7.87 return 0; 7.88 @@ -1870,13 +1864,14 @@ static int get_tx_bufs(net_vif_t *vif) 7.89 again: 7.90 for ( i = vif->tx_req_cons; 7.91 (i != shared_idxs->tx_req_prod) && 7.92 - (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 7.93 - i = TX_RING_INC(i) ) 7.94 + ((i-vif->tx_resp_prod) != TX_RING_SIZE); 7.95 + i++ ) 7.96 { 7.97 - tx = shared_rings->tx_ring[i].req; 7.98 + tx = shared_rings->tx_ring[MASK_NET_TX_IDX(i)].req; 7.99 target = VIF_DROP; 7.100 7.101 - if ( (tx.size <= PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) ) 7.102 + if ( unlikely(tx.size <= PKT_PROT_LEN) || 7.103 + unlikely(tx.size > ETH_FRAME_LEN) ) 7.104 { 7.105 DPRINTK("Bad packet size: %d\n", tx.size); 7.106 make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 7.107 @@ -1940,7 +1935,7 @@ static int get_tx_bufs(net_vif_t *vif) 7.108 if ( VIF_LOCAL(target) ) 7.109 { 7.110 /* Local delivery */ 7.111 - if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL ) 7.112 + if ( unlikely((skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL) ) 7.113 { 7.114 make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 7.115 put_vif(target); 7.116 @@ -1972,20 +1967,21 @@ static int get_tx_bufs(net_vif_t *vif) 7.117 } 7.118 else if ( (target == VIF_PHYS) || IS_PRIV(p) ) 7.119 { 7.120 - vif->tx_shadow_ring[j].id = tx.id; 7.121 - vif->tx_shadow_ring[j].size = tx.size; 7.122 - vif->tx_shadow_ring[j].header = 7.123 - kmem_cache_alloc(net_header_cachep, GFP_KERNEL); 7.124 - if ( vif->tx_shadow_ring[j].header == NULL ) 7.125 + stx = &vif->tx_shadow_ring[MASK_NET_TX_IDX(j)]; 7.126 + stx->id = tx.id; 7.127 + stx->size = tx.size; 7.128 + stx->header = kmem_cache_alloc(net_header_cachep, GFP_KERNEL); 7.129 + if ( unlikely(stx->header == NULL) ) 7.130 { 7.131 make_tx_response(vif, tx.id, RING_STATUS_OK); 7.132 goto cleanup_and_continue; 7.133 } 7.134 7.135 - memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN); 7.136 - vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN; 7.137 + memcpy(stx->header, g_data, PKT_PROT_LEN); 7.138 + stx->payload = tx.addr + PKT_PROT_LEN; 7.139 + 7.140 + j++; 7.141 buf_page = NULL; /* hand off our page reference */ 7.142 - j = TX_RING_INC(j); 7.143 } 7.144 else 7.145 { 7.146 @@ -2023,8 +2019,9 @@ static void get_rx_bufs(net_vif_t *vif) 7.147 struct task_struct *p = vif->domain; 7.148 net_ring_t *shared_rings = vif->shared_rings; 7.149 net_idx_t *shared_idxs = vif->shared_idxs; 7.150 - unsigned int i, j; 7.151 + NET_RING_IDX i, j; 7.152 rx_req_entry_t rx; 7.153 + rx_shadow_entry_t *srx; 7.154 unsigned long pte_pfn, buf_pfn; 7.155 struct pfn_info *pte_page, *buf_page; 7.156 unsigned long *ptep, pte; 7.157 @@ -2039,10 +2036,10 @@ static void get_rx_bufs(net_vif_t *vif) 7.158 j = vif->rx_prod; 7.159 for ( i = vif->rx_req_cons; 7.160 (i != shared_idxs->rx_req_prod) && 7.161 - (((vif->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); 7.162 - i = RX_RING_INC(i) ) 7.163 + ((i-vif->rx_resp_prod) != RX_RING_SIZE); 7.164 + i++ ) 7.165 { 7.166 - rx = shared_rings->rx_ring[i].req; 7.167 + rx = shared_rings->rx_ring[MASK_NET_RX_IDX(i)].req; 7.168 7.169 pte_pfn = rx.addr >> PAGE_SHIFT; 7.170 pte_page = &frame_table[pte_pfn]; 7.171 @@ -2116,10 +2113,10 @@ static void get_rx_bufs(net_vif_t *vif) 7.172 list_del(&buf_page->list); 7.173 spin_unlock(&p->page_list_lock); 7.174 7.175 - vif->rx_shadow_ring[j].id = rx.id; 7.176 - vif->rx_shadow_ring[j].pte_ptr = rx.addr; 7.177 - vif->rx_shadow_ring[j].buf_pfn = buf_pfn; 7.178 - j = RX_RING_INC(j); 7.179 + srx = &vif->rx_shadow_ring[MASK_NET_RX_IDX(j++)]; 7.180 + srx->id = rx.id; 7.181 + srx->pte_ptr = rx.addr; 7.182 + srx->buf_pfn = buf_pfn; 7.183 7.184 rx_unmap_and_continue: 7.185 unmap_domain_mem(ptep); 7.186 @@ -2153,7 +2150,7 @@ static long get_bufs_from_vif(net_vif_t 7.187 7.188 long flush_bufs_for_vif(net_vif_t *vif) 7.189 { 7.190 - int i; 7.191 + NET_RING_IDX i; 7.192 unsigned long *ptep, pte; 7.193 struct pfn_info *page; 7.194 struct task_struct *p = vif->domain; 7.195 @@ -2164,17 +2161,17 @@ long flush_bufs_for_vif(net_vif_t *vif) 7.196 /* Return any outstanding receive buffers to the guest OS. */ 7.197 spin_lock(&vif->rx_lock); 7.198 for ( i = vif->rx_req_cons; 7.199 - (i != shared_idxs->rx_req_prod) && 7.200 - (((vif->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); 7.201 - i = RX_RING_INC(i) ) 7.202 + (i != shared_idxs->rx_req_prod) && 7.203 + ((i-vif->rx_resp_prod) != RX_RING_SIZE); 7.204 + i++ ); 7.205 { 7.206 - make_rx_response(vif, shared_rings->rx_ring[i].req.id, 0, 7.207 - RING_STATUS_DROPPED, 0); 7.208 + make_rx_response(vif, shared_rings->rx_ring[MASK_NET_RX_IDX(i)].req.id, 7.209 + 0, RING_STATUS_DROPPED, 0); 7.210 } 7.211 vif->rx_req_cons = i; 7.212 - for ( i = vif->rx_cons; i != vif->rx_prod; i = RX_RING_INC(i) ) 7.213 + for ( i = vif->rx_cons; i != vif->rx_prod; i++ ) 7.214 { 7.215 - rx = &vif->rx_shadow_ring[i]; 7.216 + rx = &vif->rx_shadow_ring[MASK_NET_RX_IDX(i)]; 7.217 7.218 /* Give the buffer page back to the domain. */ 7.219 page = &frame_table[rx->buf_pfn]; 7.220 @@ -2209,12 +2206,12 @@ long flush_bufs_for_vif(net_vif_t *vif) 7.221 */ 7.222 spin_lock(&vif->tx_lock); 7.223 for ( i = vif->tx_req_cons; 7.224 - (i != shared_idxs->tx_req_prod) && 7.225 - (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 7.226 - i = TX_RING_INC(i) ) 7.227 + (i != shared_idxs->tx_req_prod) && 7.228 + ((i-vif->tx_resp_prod) != TX_RING_SIZE); 7.229 + i++ ) 7.230 { 7.231 - make_tx_response(vif, shared_rings->tx_ring[i].req.id, 7.232 - RING_STATUS_DROPPED); 7.233 + make_tx_response(vif, shared_rings->tx_ring[MASK_NET_TX_IDX(i)].req.id, 7.234 + RING_STATUS_DROPPED); 7.235 } 7.236 vif->tx_req_cons = i; 7.237 spin_unlock(&vif->tx_lock); 7.238 @@ -2237,11 +2234,11 @@ long do_net_io_op(netop_t *uop) 7.239 7.240 perfc_incr(net_hypercalls); 7.241 7.242 - if ( copy_from_user(&op, uop, sizeof(op)) ) 7.243 + if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) 7.244 return -EFAULT; 7.245 7.246 - if ( (op.vif >= MAX_DOMAIN_VIFS) || 7.247 - ((vif = current->net_vif_list[op.vif]) == NULL) ) 7.248 + if ( unlikely(op.vif >= MAX_DOMAIN_VIFS) || 7.249 + unlikely((vif = current->net_vif_list[op.vif]) == NULL) ) 7.250 return -EINVAL; 7.251 7.252 switch ( op.cmd ) 7.253 @@ -2292,18 +2289,17 @@ static void make_tx_response(net_vif_t 7.254 unsigned short id, 7.255 unsigned char st) 7.256 { 7.257 - unsigned int pos; 7.258 + NET_RING_IDX i = vif->tx_resp_prod; 7.259 tx_resp_entry_t *resp; 7.260 7.261 - /* Place on the response ring for the relevant domain. */ 7.262 - pos = vif->tx_resp_prod; 7.263 - resp = &vif->shared_rings->tx_ring[pos].resp; 7.264 + resp = &vif->shared_rings->tx_ring[MASK_NET_TX_IDX(i)].resp; 7.265 resp->id = id; 7.266 resp->status = st; 7.267 - pos = TX_RING_INC(pos); 7.268 - vif->tx_resp_prod = vif->shared_idxs->tx_resp_prod = pos; 7.269 + wmb(); 7.270 + vif->shared_idxs->tx_resp_prod = vif->tx_resp_prod = ++i; 7.271 + 7.272 smp_mb(); /* Update producer before checking event threshold. */ 7.273 - if ( pos == vif->shared_idxs->tx_event ) 7.274 + if ( i == vif->shared_idxs->tx_event ) 7.275 { 7.276 unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET); 7.277 guest_event_notify(cpu_mask); 7.278 @@ -2317,20 +2313,19 @@ static void make_rx_response(net_vif_t 7.279 unsigned char st, 7.280 unsigned char off) 7.281 { 7.282 - unsigned int pos; 7.283 + NET_RING_IDX i = vif->rx_resp_prod; 7.284 rx_resp_entry_t *resp; 7.285 7.286 - /* Place on the response ring for the relevant domain. */ 7.287 - pos = vif->rx_resp_prod; 7.288 - resp = &vif->shared_rings->rx_ring[pos].resp; 7.289 + resp = &vif->shared_rings->rx_ring[MASK_NET_RX_IDX(i)].resp; 7.290 resp->id = id; 7.291 resp->size = size; 7.292 resp->status = st; 7.293 resp->offset = off; 7.294 - pos = RX_RING_INC(pos); 7.295 - vif->rx_resp_prod = vif->shared_idxs->rx_resp_prod = pos; 7.296 + wmb(); 7.297 + vif->shared_idxs->rx_resp_prod = vif->rx_resp_prod = ++i; 7.298 + 7.299 smp_mb(); /* Update producer before checking event threshold. */ 7.300 - if ( pos == vif->shared_idxs->rx_event ) 7.301 + if ( i == vif->shared_idxs->rx_event ) 7.302 { 7.303 unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET); 7.304 guest_event_notify(cpu_mask);
8.1 --- a/xenolinux-2.4.23-sparse/arch/xeno/drivers/block/xl_block.c Sun Dec 21 14:20:06 2003 +0000 8.2 +++ b/xenolinux-2.4.23-sparse/arch/xeno/drivers/block/xl_block.c Wed Dec 24 16:46:32 2003 +0000 8.3 @@ -20,14 +20,14 @@ typedef unsigned char byte; /* from linu 8.4 static unsigned int state = STATE_SUSPENDED; 8.5 8.6 static blk_ring_t *blk_ring; 8.7 -static unsigned int resp_cons; /* Response consumer for comms ring. */ 8.8 -static unsigned int req_prod; /* Private request producer. */ 8.9 +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ 8.10 +static BLK_RING_IDX req_prod; /* Private request producer. */ 8.11 8.12 #define XDI_MAX 64 8.13 static xen_disk_info_t xlblk_disk_info; /* information about our disks/VBDs */ 8.14 8.15 /* We plug the I/O ring if the driver is suspended or if the ring is full. */ 8.16 -#define RING_PLUGGED ((BLK_RING_INC(req_prod) == resp_cons) || \ 8.17 +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ 8.18 (state != STATE_ACTIVE)) 8.19 8.20 /* 8.21 @@ -260,10 +260,12 @@ static int hypervisor_request(unsigned l 8.22 blk_ring_req_entry_t *req; 8.23 struct buffer_head *bh; 8.24 8.25 - if ( nr_sectors >= (1<<9) ) BUG(); 8.26 - if ( (buffer_ma & ((1<<9)-1)) != 0 ) BUG(); 8.27 + if ( unlikely(nr_sectors >= (1<<9)) ) 8.28 + BUG(); 8.29 + if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) 8.30 + BUG(); 8.31 8.32 - if ( state == STATE_CLOSED ) 8.33 + if ( unlikely(state == STATE_CLOSED) ) 8.34 return 1; 8.35 8.36 switch ( operation ) 8.37 @@ -273,16 +275,18 @@ static int hypervisor_request(unsigned l 8.38 case XEN_BLOCK_WRITE: 8.39 gd = get_gendisk(device); 8.40 8.41 - /* Update the sector_number we'll pass down as appropriate; note 8.42 - that we could sanity check that resulting sector will be in 8.43 - this partition, but this will happen in xen anyhow */ 8.44 + /* 8.45 + * Update the sector_number we'll pass down as appropriate; note that 8.46 + * we could sanity check that resulting sector will be in this 8.47 + * partition, but this will happen in xen anyhow. 8.48 + */ 8.49 sector_number += gd->part[MINOR(device)].start_sect; 8.50 8.51 if ( (sg_operation == operation) && 8.52 (sg_dev == device) && 8.53 (sg_next_sect == sector_number) ) 8.54 { 8.55 - req = &blk_ring->ring[(req_prod-1)&(BLK_RING_SIZE-1)].req; 8.56 + req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req; 8.57 bh = (struct buffer_head *)id; 8.58 bh->b_reqnext = (struct buffer_head *)req->id; 8.59 req->id = id; 8.60 @@ -310,14 +314,14 @@ static int hypervisor_request(unsigned l 8.61 } 8.62 8.63 /* Fill out a communications ring structure. */ 8.64 - req = &blk_ring->ring[req_prod].req; 8.65 + req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; 8.66 req->id = id; 8.67 req->operation = operation; 8.68 req->sector_number = sector_number; 8.69 req->device = device; 8.70 req->nr_segments = 1; 8.71 req->buffer_and_sects[0] = buffer_ma | nr_sectors; 8.72 - req_prod = BLK_RING_INC(req_prod); 8.73 + req_prod++; 8.74 8.75 return 0; 8.76 } 8.77 @@ -345,8 +349,9 @@ void do_xlblk_request(request_queue_t *r 8.78 req->current_nr_sectors, req->nr_sectors, req->bh); 8.79 8.80 rw = req->cmd; 8.81 - if ( rw == READA ) rw = READ; 8.82 - if ((rw != READ) && (rw != WRITE)) 8.83 + if ( rw == READA ) 8.84 + rw = READ; 8.85 + if ( unlikely((rw != READ) && (rw != WRITE)) ) 8.86 panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); 8.87 8.88 req->errors = 0; 8.89 @@ -362,13 +367,13 @@ void do_xlblk_request(request_queue_t *r 8.90 (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 8.91 bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); 8.92 8.93 - if(full) { 8.94 - 8.95 + if ( full ) 8.96 + { 8.97 bh->b_reqnext = next_bh; 8.98 pending_queues[nr_pending++] = rq; 8.99 - if ( nr_pending >= MAX_PENDING ) BUG(); 8.100 + if ( unlikely(nr_pending >= MAX_PENDING) ) 8.101 + BUG(); 8.102 goto out; 8.103 - 8.104 } 8.105 8.106 queued++; 8.107 @@ -390,7 +395,8 @@ void do_xlblk_request(request_queue_t *r 8.108 else 8.109 { 8.110 /* That was the last buffer head. Finalise the request. */ 8.111 - if ( end_that_request_first(req, 1, "XenBlk") ) BUG(); 8.112 + if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) 8.113 + BUG(); 8.114 blkdev_dequeue_request(req); 8.115 end_that_request_last(req); 8.116 } 8.117 @@ -406,40 +412,34 @@ static void kick_pending_request_queues( 8.118 { 8.119 /* We kick pending request queues if the ring is reasonably empty. */ 8.120 if ( (nr_pending != 0) && 8.121 - (((req_prod - resp_cons) & (BLK_RING_SIZE - 1)) < 8.122 - (BLK_RING_SIZE >> 1)) ) 8.123 + ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) ) 8.124 { 8.125 /* Attempt to drain the queue, but bail if the ring becomes full. */ 8.126 - while ( nr_pending != 0 ) 8.127 - { 8.128 + while ( (nr_pending != 0) && !RING_PLUGGED ) 8.129 do_xlblk_request(pending_queues[--nr_pending]); 8.130 - if ( RING_PLUGGED ) break; 8.131 - } 8.132 } 8.133 } 8.134 8.135 8.136 static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) 8.137 { 8.138 - int i; 8.139 + BLK_RING_IDX i; 8.140 unsigned long flags; 8.141 struct buffer_head *bh, *next_bh; 8.142 8.143 - if ( state == STATE_CLOSED ) 8.144 + if ( unlikely(state == STATE_CLOSED) ) 8.145 return; 8.146 8.147 spin_lock_irqsave(&io_request_lock, flags); 8.148 8.149 - for ( i = resp_cons; 8.150 - i != blk_ring->resp_prod; 8.151 - i = BLK_RING_INC(i) ) 8.152 + for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) 8.153 { 8.154 - blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp; 8.155 - switch (bret->operation) 8.156 + blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; 8.157 + switch ( bret->operation ) 8.158 { 8.159 case XEN_BLOCK_READ: 8.160 case XEN_BLOCK_WRITE: 8.161 - if ( bret->status ) 8.162 + if ( unlikely(bret->status != 0) ) 8.163 DPRINTK("Bad return from blkdev data request: %lx\n", 8.164 bret->status); 8.165 for ( bh = (struct buffer_head *)bret->id;
9.1 --- a/xenolinux-2.4.23-sparse/arch/xeno/drivers/network/network.c Sun Dec 21 14:20:06 2003 +0000 9.2 +++ b/xenolinux-2.4.23-sparse/arch/xeno/drivers/network/network.c Wed Dec 24 16:46:32 2003 +0000 9.3 @@ -27,14 +27,6 @@ 9.4 9.5 #define NET_IRQ _EVENT_NET 9.6 9.7 -#define TX_MAX_ENTRIES (TX_RING_SIZE - 2) 9.8 -#define RX_MAX_ENTRIES (RX_RING_SIZE - 2) 9.9 - 9.10 -#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) 9.11 -#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) 9.12 -#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) 9.13 -#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) 9.14 - 9.15 #define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ 9.16 9.17 static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); 9.18 @@ -50,12 +42,11 @@ struct net_private 9.19 struct net_device *dev; 9.20 9.21 struct net_device_stats stats; 9.22 - atomic_t tx_entries; 9.23 - unsigned int rx_resp_cons, tx_resp_cons, tx_full; 9.24 - unsigned int net_ring_fixmap_idx; 9.25 - net_ring_t *net_ring; 9.26 - net_idx_t *net_idx; 9.27 - spinlock_t tx_lock; 9.28 + NET_RING_IDX rx_resp_cons, tx_resp_cons; 9.29 + unsigned int net_ring_fixmap_idx, tx_full; 9.30 + net_ring_t *net_ring; 9.31 + net_idx_t *net_idx; 9.32 + spinlock_t tx_lock; 9.33 unsigned int idx; /* Domain-specific index of this VIF. */ 9.34 9.35 unsigned int rx_bufs_to_notify; 9.36 @@ -80,7 +71,7 @@ struct net_private 9.37 #define GET_ID_FROM_FREELIST(_list) \ 9.38 ({ unsigned long _id = (unsigned long)(_list)[0]; \ 9.39 (_list)[0] = (_list)[_id]; \ 9.40 - _id; }) 9.41 + (unsigned short)_id; }) 9.42 9.43 9.44 static void _dbg_network_int(struct net_device *dev) 9.45 @@ -90,14 +81,15 @@ static void _dbg_network_int(struct net_ 9.46 if ( np->state == STATE_CLOSED ) 9.47 return; 9.48 9.49 - printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d," 9.50 - " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n", 9.51 - np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons, 9.52 + printk(KERN_ALERT "tx_full = %d, tx_resp_cons = 0x%08x," 9.53 + " tx_req_prod = 0x%08x, tx_resp_prod = 0x%08x," 9.54 + " tx_event = 0x%08x, state=%d\n", 9.55 + np->tx_full, np->tx_resp_cons, 9.56 np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, 9.57 np->net_idx->tx_event, 9.58 test_bit(__LINK_STATE_XOFF, &dev->state)); 9.59 - printk(KERN_ALERT "rx_resp_cons = %d," 9.60 - " rx_req_prod = %d, rx_resp_prod = %d, rx_event = %d\n", 9.61 + printk(KERN_ALERT "rx_resp_cons = 0x%08x," 9.62 + " rx_req_prod = 0x%08x, rx_resp_prod = 0x%08x, rx_event = 0x%08x\n", 9.63 np->rx_resp_cons, np->net_idx->rx_req_prod, 9.64 np->net_idx->rx_resp_prod, np->net_idx->rx_event); 9.65 } 9.66 @@ -149,7 +141,6 @@ static int network_open(struct net_devic 9.67 np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; 9.68 memset(&np->stats, 0, sizeof(np->stats)); 9.69 spin_lock_init(&np->tx_lock); 9.70 - atomic_set(&np->tx_entries, 0); 9.71 memset(np->net_ring, 0, sizeof(*np->net_ring)); 9.72 memset(np->net_idx, 0, sizeof(*np->net_idx)); 9.73 9.74 @@ -174,33 +165,40 @@ static int network_open(struct net_devic 9.75 9.76 static void network_tx_buf_gc(struct net_device *dev) 9.77 { 9.78 - unsigned int i; 9.79 + NET_RING_IDX i, prod; 9.80 + unsigned short id; 9.81 struct net_private *np = dev->priv; 9.82 struct sk_buff *skb; 9.83 - unsigned int prod; 9.84 tx_entry_t *tx_ring = np->net_ring->tx_ring; 9.85 9.86 do { 9.87 prod = np->net_idx->tx_resp_prod; 9.88 9.89 - for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) ) 9.90 + for ( i = np->tx_resp_cons; i != prod; i++ ) 9.91 { 9.92 - skb = np->tx_skbs[tx_ring[i].resp.id]; 9.93 - ADD_ID_TO_FREELIST(np->tx_skbs, tx_ring[i].resp.id); 9.94 + id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; 9.95 + skb = np->tx_skbs[id]; 9.96 + ADD_ID_TO_FREELIST(np->tx_skbs, id); 9.97 dev_kfree_skb_any(skb); 9.98 - atomic_dec(&np->tx_entries); 9.99 } 9.100 9.101 np->tx_resp_cons = prod; 9.102 9.103 - /* Set a new event, then check for race with update of tx_cons. */ 9.104 - np->net_idx->tx_event = 9.105 - TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1); 9.106 + /* 9.107 + * Set a new event, then check for race with update of tx_cons. Note 9.108 + * that it is essential to schedule a callback, no matter how few 9.109 + * buffers are pending. Even if there is space in the transmit ring, 9.110 + * higher layers may be blocked because too much data is outstanding: 9.111 + * in such cases notification from Xen is likely to be the only kick 9.112 + * that we'll get. 9.113 + */ 9.114 + np->net_idx->tx_event = 9.115 + prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; 9.116 mb(); 9.117 } 9.118 while ( prod != np->net_idx->tx_resp_prod ); 9.119 9.120 - if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) 9.121 + if ( np->tx_full && ((np->net_idx->tx_req_prod - prod) < TX_RING_SIZE) ) 9.122 { 9.123 np->tx_full = 0; 9.124 if ( np->state == STATE_ACTIVE ) 9.125 @@ -221,19 +219,21 @@ static inline pte_t *get_ppte(void *addr 9.126 9.127 static void network_alloc_rx_buffers(struct net_device *dev) 9.128 { 9.129 - unsigned int i, id; 9.130 + unsigned short id; 9.131 struct net_private *np = dev->priv; 9.132 struct sk_buff *skb; 9.133 - unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES); 9.134 netop_t netop; 9.135 + NET_RING_IDX i = np->net_idx->rx_req_prod; 9.136 9.137 - if ( ((i = np->net_idx->rx_req_prod) == end) || 9.138 - (np->state != STATE_ACTIVE) ) 9.139 + if ( unlikely((i - np->rx_resp_cons) == RX_RING_SIZE) || 9.140 + unlikely(np->state != STATE_ACTIVE) ) 9.141 return; 9.142 9.143 do { 9.144 skb = dev_alloc_skb(RX_BUF_SIZE); 9.145 - if ( skb == NULL ) break; 9.146 + if ( unlikely(skb == NULL) ) 9.147 + break; 9.148 + 9.149 skb->dev = dev; 9.150 9.151 if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) 9.152 @@ -242,13 +242,13 @@ static void network_alloc_rx_buffers(str 9.153 id = GET_ID_FROM_FREELIST(np->rx_skbs); 9.154 np->rx_skbs[id] = skb; 9.155 9.156 - np->net_ring->rx_ring[i].req.id = (unsigned short)id; 9.157 - np->net_ring->rx_ring[i].req.addr = 9.158 + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; 9.159 + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 9.160 virt_to_machine(get_ppte(skb->head)); 9.161 9.162 np->rx_bufs_to_notify++; 9.163 } 9.164 - while ( (i = RX_RING_INC(i)) != end ); 9.165 + while ( (++i - np->rx_resp_cons) != RX_RING_SIZE ); 9.166 9.167 /* 9.168 * We may have allocated buffers which have entries outstanding in the page 9.169 @@ -257,10 +257,10 @@ static void network_alloc_rx_buffers(str 9.170 flush_page_update_queue(); 9.171 9.172 np->net_idx->rx_req_prod = i; 9.173 - np->net_idx->rx_event = RX_RING_INC(np->rx_resp_cons); 9.174 + np->net_idx->rx_event = np->rx_resp_cons + 1; 9.175 9.176 /* Batch Xen notifications. */ 9.177 - if ( np->rx_bufs_to_notify > (RX_MAX_ENTRIES/4) ) 9.178 + if ( np->rx_bufs_to_notify > (RX_RING_SIZE/4) ) 9.179 { 9.180 netop.cmd = NETOP_PUSH_BUFFERS; 9.181 netop.vif = np->idx; 9.182 @@ -272,21 +272,25 @@ static void network_alloc_rx_buffers(str 9.183 9.184 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) 9.185 { 9.186 - unsigned int i, id; 9.187 + unsigned short id; 9.188 struct net_private *np = (struct net_private *)dev->priv; 9.189 + tx_req_entry_t *tx; 9.190 netop_t netop; 9.191 + NET_RING_IDX i; 9.192 9.193 - if ( np->tx_full ) 9.194 + if ( unlikely(np->tx_full) ) 9.195 { 9.196 printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); 9.197 netif_stop_queue(dev); 9.198 return -ENOBUFS; 9.199 } 9.200 9.201 - if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE ) 9.202 + if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= 9.203 + PAGE_SIZE) ) 9.204 { 9.205 struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); 9.206 - if ( new_skb == NULL ) return 1; 9.207 + if ( unlikely(new_skb == NULL) ) 9.208 + return 1; 9.209 skb_put(new_skb, skb->len); 9.210 memcpy(new_skb->data, skb->data, skb->len); 9.211 dev_kfree_skb(skb); 9.212 @@ -300,16 +304,18 @@ static int network_start_xmit(struct sk_ 9.213 id = GET_ID_FROM_FREELIST(np->tx_skbs); 9.214 np->tx_skbs[id] = skb; 9.215 9.216 - np->net_ring->tx_ring[i].req.id = (unsigned short)id; 9.217 - np->net_ring->tx_ring[i].req.addr = 9.218 - phys_to_machine(virt_to_phys(skb->data)); 9.219 - np->net_ring->tx_ring[i].req.size = skb->len; 9.220 - np->net_idx->tx_req_prod = TX_RING_INC(i); 9.221 - atomic_inc(&np->tx_entries); 9.222 + tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; 9.223 + 9.224 + tx->id = id; 9.225 + tx->addr = phys_to_machine(virt_to_phys(skb->data)); 9.226 + tx->size = skb->len; 9.227 + 9.228 + wmb(); 9.229 + np->net_idx->tx_req_prod = i + 1; 9.230 9.231 network_tx_buf_gc(dev); 9.232 9.233 - if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES ) 9.234 + if ( (i - np->tx_resp_cons) == TX_RING_SIZE ) 9.235 { 9.236 np->tx_full = 1; 9.237 netif_stop_queue(dev); 9.238 @@ -336,12 +342,12 @@ static int network_start_xmit(struct sk_ 9.239 static inline void _network_interrupt(struct net_device *dev) 9.240 { 9.241 struct net_private *np = dev->priv; 9.242 - unsigned int i; 9.243 unsigned long flags; 9.244 struct sk_buff *skb; 9.245 rx_resp_entry_t *rx; 9.246 + NET_RING_IDX i; 9.247 9.248 - if ( np->state == STATE_CLOSED ) 9.249 + if ( unlikely(np->state == STATE_CLOSED) ) 9.250 return; 9.251 9.252 spin_lock_irqsave(&np->tx_lock, flags); 9.253 @@ -349,16 +355,14 @@ static inline void _network_interrupt(st 9.254 spin_unlock_irqrestore(&np->tx_lock, flags); 9.255 9.256 again: 9.257 - for ( i = np->rx_resp_cons; 9.258 - i != np->net_idx->rx_resp_prod; 9.259 - i = RX_RING_INC(i) ) 9.260 + for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) 9.261 { 9.262 - rx = &np->net_ring->rx_ring[i].resp; 9.263 + rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; 9.264 9.265 skb = np->rx_skbs[rx->id]; 9.266 ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); 9.267 9.268 - if ( rx->status != RING_STATUS_OK ) 9.269 + if ( unlikely(rx->status != RING_STATUS_OK) ) 9.270 { 9.271 /* Gate this error. We get a (valid) slew of them on suspend. */ 9.272 if ( np->state == STATE_ACTIVE ) 9.273 @@ -396,7 +400,8 @@ static inline void _network_interrupt(st 9.274 9.275 /* Deal with hypervisor racing our resetting of rx_event. */ 9.276 mb(); 9.277 - if ( np->net_idx->rx_resp_prod != i ) goto again; 9.278 + if ( np->net_idx->rx_resp_prod != i ) 9.279 + goto again; 9.280 } 9.281 9.282