direct-io.hg
changeset 2235:10b75f2911b6
bitkeeper revision 1.1159.1.59 (411d7b028SfpgCIy6erydbpaxfxBQA)
blkfront.c : cope with request reordering across migrations
blkfront.c : cope with request reordering across migrations
author | iap10@labyrinth.cl.cam.ac.uk |
---|---|
date | Sat Aug 14 02:37:54 2004 +0000 (2004-08-14) |
parents | 480807ffa67f |
children | 49739c6ac967 e14eabd8803b 6abde822c82f |
files | linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c |
line diff
1.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Aug 13 18:17:02 2004 +0000 1.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c Sat Aug 14 02:37:54 2004 +0000 1.3 @@ -39,10 +39,9 @@ static blkif_ring_t *blk_ring; 1.4 static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */ 1.5 static BLKIF_RING_IDX req_prod; /* Private request producer. */ 1.6 1.7 -static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for 1.8 - * recovery. Responses not stored here. */ 1.9 -static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for 1.10 - * recovery */ 1.11 +unsigned long rec_ring_free; 1.12 +blkif_request_t rec_ring[BLKIF_RING_SIZE]; 1.13 + 1.14 static int recovery = 0; /* "Recovery in progress" flag. Protected 1.15 * by the blkif_io_lock */ 1.16 1.17 @@ -71,6 +70,27 @@ static void kick_pending_request_queues( 1.18 1.19 int __init xlblk_init(void); 1.20 1.21 + 1.22 +static inline int GET_ID_FROM_FREELIST( void ) 1.23 +{ 1.24 + unsigned long free = rec_ring_free; 1.25 + 1.26 + if(free>BLKIF_RING_SIZE) BUG(); 1.27 + 1.28 + rec_ring_free = rec_ring[free].id; 1.29 + 1.30 + rec_ring[free].id = 0x0fffffee; // debug 1.31 + 1.32 + return free; 1.33 +} 1.34 + 1.35 +static inline void ADD_ID_TO_FREELIST( unsigned long id ) 1.36 +{ 1.37 + rec_ring[id].id = rec_ring_free; 1.38 + rec_ring_free = id; 1.39 +} 1.40 + 1.41 + 1.42 /************************** KERNEL VERSION 2.6 **************************/ 1.43 1.44 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 1.45 @@ -213,6 +233,7 @@ static int blkif_queue_request(struct re 1.46 struct bio *bio; 1.47 struct bio_vec *bvec; 1.48 int idx, s; 1.49 + unsigned long id; 1.50 unsigned int fsect, lsect; 1.51 1.52 if (unlikely(blkif_state != BLKIF_STATE_CONNECTED)) 1.53 @@ -220,7 +241,12 @@ static int blkif_queue_request(struct re 1.54 1.55 /* Fill out a communications ring structure. */ 1.56 ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req; 1.57 - ring_req->id = (unsigned long)req; 1.58 + id = GET_ID_FROM_FREELIST(); 1.59 + rec_ring[id].id = (unsigned long) req; 1.60 + 1.61 +//printk("r: %d req %p (%ld)\n",req_prod,req,id); 1.62 + 1.63 + ring_req->id = id; 1.64 ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : 1.65 BLKIF_OP_READ; 1.66 ring_req->sector_number = (blkif_sector_t)req->sector; 1.67 @@ -248,11 +274,7 @@ static int blkif_queue_request(struct re 1.68 req_prod++; 1.69 1.70 /* Keep a private copy so we can reissue requests when recovering. */ 1.71 - translate_req_to_pfn( 1.72 - &blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req, 1.73 - ring_req); 1.74 - 1.75 - blk_ring_rec->req_prod++; 1.76 + translate_req_to_pfn( &rec_ring[id], ring_req); 1.77 1.78 return 0; 1.79 } 1.80 @@ -319,7 +341,16 @@ static irqreturn_t blkif_int(int irq, vo 1.81 1.82 for ( i = resp_cons; i != rp; i++ ) 1.83 { 1.84 + unsigned long id; 1.85 bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp; 1.86 + 1.87 + id = bret->id; 1.88 + req = (struct request *)rec_ring[id].id; 1.89 + 1.90 +//printk("i: %d req %p (%ld)\n",i,req,id); 1.91 + 1.92 + ADD_ID_TO_FREELIST(id); // overwrites req 1.93 + 1.94 switch ( bret->operation ) 1.95 { 1.96 case BLKIF_OP_READ: 1.97 @@ -327,13 +358,14 @@ static irqreturn_t blkif_int(int irq, vo 1.98 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) 1.99 DPRINTK("Bad return from blkdev data request: %x\n", 1.100 bret->status); 1.101 - req = (struct request *)bret->id; 1.102 + 1.103 if ( unlikely(end_that_request_first 1.104 (req, 1.105 (bret->status == BLKIF_RSP_OKAY), 1.106 req->hard_nr_sectors)) ) 1.107 BUG(); 1.108 end_that_request_last(req); 1.109 + 1.110 break; 1.111 case BLKIF_OP_PROBE: 1.112 memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 1.113 @@ -345,7 +377,6 @@ static irqreturn_t blkif_int(int irq, vo 1.114 } 1.115 1.116 resp_cons = i; 1.117 - resp_cons_rec = i; 1.118 1.119 if ( (xlbd_blk_queue != NULL) && 1.120 test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) ) 1.121 @@ -618,6 +649,7 @@ static int blkif_queue_request(unsigned 1.122 kdev_t device) 1.123 { 1.124 unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 1.125 + unsigned long xid; 1.126 struct gendisk *gd; 1.127 blkif_request_t *req; 1.128 struct buffer_head *bh; 1.129 @@ -663,10 +695,15 @@ static int blkif_queue_request(unsigned 1.130 (sg_dev == device) && 1.131 (sg_next_sect == sector_number) ) 1.132 { 1.133 + 1.134 req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req; 1.135 bh = (struct buffer_head *)id; 1.136 - bh->b_reqnext = (struct buffer_head *)req->id; 1.137 - req->id = id; 1.138 + 1.139 + bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id; 1.140 + 1.141 + 1.142 + rec_ring[req->id].id = id; 1.143 + 1.144 req->frame_and_sects[req->nr_segments] = 1.145 buffer_ma | (fsect<<3) | lsect; 1.146 if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) 1.147 @@ -675,8 +712,7 @@ static int blkif_queue_request(unsigned 1.148 DISABLE_SCATTERGATHER(); 1.149 1.150 /* Update the copy of the request in the recovery ring. */ 1.151 - translate_req_to_pfn(&blk_ring_rec->ring[ 1.152 - MASK_BLKIF_IDX(blk_ring_rec->req_prod - 1)].req, req); 1.153 + translate_req_to_pfn(&rec_ring[req->id], req ); 1.154 1.155 return 0; 1.156 } 1.157 @@ -698,18 +734,25 @@ static int blkif_queue_request(unsigned 1.158 1.159 /* Fill out a communications ring structure. */ 1.160 req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req; 1.161 - req->id = id; 1.162 + 1.163 + xid = GET_ID_FROM_FREELIST(); 1.164 + rec_ring[xid].id = id; 1.165 + 1.166 + req->id = xid; 1.167 req->operation = operation; 1.168 req->sector_number = (blkif_sector_t)sector_number; 1.169 req->device = device; 1.170 req->nr_segments = 1; 1.171 req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; 1.172 + 1.173 +//printk("N: %d req %p (%ld)\n",req_prod,rec_ring[xid].id,xid); 1.174 + 1.175 req_prod++; 1.176 1.177 /* Keep a private copy so we can reissue requests when recovering. */ 1.178 - translate_req_to_pfn(&blk_ring_rec->ring[ 1.179 - MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req, req); 1.180 - blk_ring_rec->req_prod++; 1.181 + translate_req_to_pfn(&rec_ring[xid], req ); 1.182 + 1.183 + 1.184 1.185 return 0; 1.186 } 1.187 @@ -816,7 +859,16 @@ static void blkif_int(int irq, void *dev 1.188 1.189 for ( i = resp_cons; i != rp; i++ ) 1.190 { 1.191 + unsigned long id; 1.192 blkif_response_t *bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp; 1.193 + 1.194 + id = bret->id; 1.195 + bh = (struct buffer_head *)rec_ring[id].id; 1.196 + 1.197 +//printk("i: %d req %p (%ld)\n",i,bh,id); 1.198 + 1.199 + ADD_ID_TO_FREELIST(id); 1.200 + 1.201 switch ( bret->operation ) 1.202 { 1.203 case BLKIF_OP_READ: 1.204 @@ -824,14 +876,13 @@ static void blkif_int(int irq, void *dev 1.205 if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) 1.206 DPRINTK("Bad return from blkdev data request: %lx\n", 1.207 bret->status); 1.208 - for ( bh = (struct buffer_head *)bret->id; 1.209 - bh != NULL; 1.210 - bh = next_bh ) 1.211 + for ( ; bh != NULL; bh = next_bh ) 1.212 { 1.213 next_bh = bh->b_reqnext; 1.214 bh->b_reqnext = NULL; 1.215 bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); 1.216 } 1.217 + 1.218 break; 1.219 case BLKIF_OP_PROBE: 1.220 memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 1.221 @@ -843,7 +894,6 @@ static void blkif_int(int irq, void *dev 1.222 } 1.223 1.224 resp_cons = i; 1.225 - resp_cons_rec = i; 1.226 1.227 kick_pending_request_queues(); 1.228 1.229 @@ -859,8 +909,12 @@ static inline void translate_req_to_pfn( 1.230 blkif_request_t *req) 1.231 { 1.232 int i; 1.233 - 1.234 - *xreq = *req; 1.235 + 1.236 + xreq->operation = req->operation; 1.237 + xreq->nr_segments = req->nr_segments; 1.238 + xreq->device = req->device; 1.239 + // preserve id 1.240 + xreq->sector_number = req->sector_number; 1.241 1.242 for ( i = 0; i < req->nr_segments; i++ ) 1.243 { 1.244 @@ -868,6 +922,7 @@ static inline void translate_req_to_pfn( 1.245 (machine_to_phys_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << 1.246 PAGE_SHIFT); 1.247 } 1.248 + 1.249 } 1.250 1.251 static inline void translate_req_to_mfn(blkif_request_t *xreq, 1.252 @@ -875,7 +930,11 @@ static inline void translate_req_to_mfn( 1.253 { 1.254 int i; 1.255 1.256 - *xreq = *req; 1.257 + xreq->operation = req->operation; 1.258 + xreq->nr_segments = req->nr_segments; 1.259 + xreq->device = req->device; 1.260 + xreq->id = req->id; // copy id (unlike above) 1.261 + xreq->sector_number = req->sector_number; 1.262 1.263 for ( i = 0; i < req->nr_segments; i++ ) 1.264 { 1.265 @@ -885,6 +944,8 @@ static inline void translate_req_to_mfn( 1.266 } 1.267 } 1.268 1.269 + 1.270 + 1.271 static inline void flush_requests(void) 1.272 { 1.273 DISABLE_SCATTERGATHER(); 1.274 @@ -908,7 +969,7 @@ static void kick_pending_request_queues( 1.275 1.276 void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) 1.277 { 1.278 - unsigned long flags; 1.279 + unsigned long flags, id; 1.280 1.281 retry: 1.282 while ( (req_prod - resp_cons) == BLKIF_RING_SIZE ) 1.283 @@ -927,8 +988,13 @@ void blkif_control_send(blkif_request_t 1.284 DISABLE_SCATTERGATHER(); 1.285 blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req; 1.286 1.287 - translate_req_to_pfn(&blk_ring_rec->ring[ 1.288 - MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,req); 1.289 + id = GET_ID_FROM_FREELIST(); 1.290 + blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id; 1.291 + rec_ring[id].id = (unsigned long) req; 1.292 + 1.293 +//printk("c: %d req %p (%ld)\n",req_prod,req,id); 1.294 + 1.295 + translate_req_to_pfn( &rec_ring[id], req ); 1.296 1.297 req_prod++; 1.298 flush_requests(); 1.299 @@ -1017,36 +1083,42 @@ static void blkif_status_change(blkif_fe 1.300 1.301 if ( recovery ) 1.302 { 1.303 - int i,j; 1.304 + int i; 1.305 + 1.306 + /* Hmm, requests might be re-ordered when we re-issue them. 1.307 + This will need to be fixed once we have barriers */ 1.308 + 1.309 + // req_prod = 0; : already is zero 1.310 1.311 - /* 1.312 - * Shouldn't need the io_request_lock here - the device is plugged 1.313 - * and the recovery flag prevents the interrupt handler changing 1.314 - * anything. 1.315 - */ 1.316 + // stage 1 : find active and move to safety 1.317 + for ( i=0; i <BLKIF_RING_SIZE; i++ ) 1.318 + { 1.319 + if ( rec_ring[i].id >= PAGE_OFFSET ) 1.320 + { 1.321 + translate_req_to_mfn( 1.322 + &blk_ring->ring[req_prod].req, &rec_ring[i] ); 1.323 1.324 - /* Reissue requests from the private block ring. */ 1.325 - for ( i = 0; 1.326 - resp_cons_rec < blk_ring_rec->req_prod; 1.327 - resp_cons_rec++, i++ ) 1.328 - { 1.329 - translate_req_to_mfn( 1.330 - &blk_ring->ring[i].req, 1.331 - &blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req); 1.332 - } 1.333 + req_prod++; 1.334 + } 1.335 + } 1.336 1.337 - /* Reset the private block ring to match the new ring. */ 1.338 - for( j = 0; j < i; j++ ) 1.339 - { 1.340 - translate_req_to_pfn( 1.341 - &blk_ring_rec->ring[j].req, 1.342 - &blk_ring->ring[j].req); 1.343 - } 1.344 +printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod); 1.345 + 1.346 + // stage 2 : set up shadow list 1.347 + for ( i=0; i<req_prod; i++ ) 1.348 + { 1.349 + rec_ring[i].id = blk_ring->ring[i].req.id; 1.350 + blk_ring->ring[i].req.id = i; 1.351 + translate_req_to_pfn( &rec_ring[i], &blk_ring->ring[i].req ); 1.352 + } 1.353 1.354 - resp_cons_rec = 0; 1.355 + // stage 3 : set up free list 1.356 + for ( ; i < BLKIF_RING_SIZE; i++ ) 1.357 + rec_ring[i].id = i+1; 1.358 + rec_ring_free = req_prod; 1.359 + rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff; 1.360 1.361 /* blk_ring->req_prod will be set when we flush_requests().*/ 1.362 - blk_ring_rec->req_prod = req_prod = i; 1.363 wmb(); 1.364 1.365 /* Switch off recovery mode, using a memory barrier to ensure that 1.366 @@ -1057,6 +1129,9 @@ static void blkif_status_change(blkif_fe 1.367 1.368 /* Kicks things back into life. */ 1.369 flush_requests(); 1.370 + 1.371 + 1.372 + 1.373 } 1.374 else 1.375 { 1.376 @@ -1114,15 +1189,20 @@ int __init xlblk_init(void) 1.377 { 1.378 ctrl_msg_t cmsg; 1.379 blkif_fe_driver_status_changed_t st; 1.380 - 1.381 + int i; 1.382 + 1.383 if ( (start_info.flags & SIF_INITDOMAIN) 1.384 || (start_info.flags & SIF_BLK_BE_DOMAIN) ) 1.385 return 0; 1.386 1.387 printk(KERN_INFO "Initialising Xen virtual block device\n"); 1.388 1.389 - blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL); 1.390 - memset(blk_ring_rec, 0, sizeof(*blk_ring_rec)); 1.391 + rec_ring_free = 0; 1.392 + for (i=0; i<BLKIF_RING_SIZE; i++) 1.393 + { 1.394 + rec_ring[i].id = i+1; 1.395 + } 1.396 + rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff; 1.397 1.398 (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, 1.399 CALLBACK_IN_BLOCKING_CONTEXT);
2.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c Fri Aug 13 18:17:02 2004 +0000 2.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c Sat Aug 14 02:37:54 2004 +0000 2.3 @@ -592,12 +592,16 @@ static void network_connect(struct net_d 2.4 wmb(); 2.5 np->rx->req_prod = requeue_idx; 2.6 2.7 +printk(KERN_ALERT"Netfront recovered tx=%d rxfree=%d\n", 2.8 + np->tx->req_prod,np->rx->req_prod); 2.9 + 2.10 /* Step 3: All public and private state should now be sane. Get 2.11 * ready to start sending and receiving packets and give the driver 2.12 * domain a kick because we've probably just requeued some 2.13 * packets. 2.14 */ 2.15 np->backend_state = BEST_CONNECTED; 2.16 + wmb(); 2.17 notify_via_evtchn(status->evtchn); 2.18 network_tx_buf_gc(dev); 2.19