ia64/xen-unstable

changeset 2235:10b75f2911b6

bitkeeper revision 1.1159.1.59 (411d7b028SfpgCIy6erydbpaxfxBQA)

blkfront.c : cope with request reordering across migrations
author iap10@labyrinth.cl.cam.ac.uk
date Sat Aug 14 02:37:54 2004 +0000 (2004-08-14)
parents 480807ffa67f
children 49739c6ac967 e14eabd8803b 6abde822c82f
files linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c
line diff
     1.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c	Fri Aug 13 18:17:02 2004 +0000
     1.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c	Sat Aug 14 02:37:54 2004 +0000
     1.3 @@ -39,10 +39,9 @@ static blkif_ring_t *blk_ring;
     1.4  static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
     1.5  static BLKIF_RING_IDX req_prod;  /* Private request producer.         */
     1.6  
     1.7 -static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for
     1.8 -                                    * recovery.  Responses not stored here. */
     1.9 -static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for
    1.10 -                                      * recovery */
    1.11 +unsigned long rec_ring_free;
    1.12 +blkif_request_t rec_ring[BLKIF_RING_SIZE];
    1.13 +
    1.14  static int recovery = 0;           /* "Recovery in progress" flag.  Protected
    1.15                                      * by the blkif_io_lock */
    1.16  
    1.17 @@ -71,6 +70,27 @@ static void kick_pending_request_queues(
    1.18  
    1.19  int __init xlblk_init(void);
    1.20  
    1.21 +
    1.22 +static inline int GET_ID_FROM_FREELIST( void )
    1.23 +{
    1.24 +    unsigned long free = rec_ring_free;
    1.25 +
    1.26 +    if(free>BLKIF_RING_SIZE) BUG();
    1.27 +
    1.28 +    rec_ring_free = rec_ring[free].id;
    1.29 +
    1.30 +    rec_ring[free].id = 0x0fffffee; // debug
    1.31 +
    1.32 +    return free;
    1.33 +}
    1.34 +
    1.35 +static inline void ADD_ID_TO_FREELIST( unsigned long id )
    1.36 +{
    1.37 +    rec_ring[id].id = rec_ring_free;
    1.38 +    rec_ring_free = id;
    1.39 +}
    1.40 +
    1.41 +
    1.42  /**************************  KERNEL VERSION 2.6  **************************/
    1.43  
    1.44  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
    1.45 @@ -213,6 +233,7 @@ static int blkif_queue_request(struct re
    1.46      struct bio *bio;
    1.47      struct bio_vec *bvec;
    1.48      int idx, s;
    1.49 +    unsigned long id;
    1.50      unsigned int fsect, lsect;
    1.51  
    1.52      if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
    1.53 @@ -220,7 +241,12 @@ static int blkif_queue_request(struct re
    1.54  
    1.55      /* Fill out a communications ring structure. */
    1.56      ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
    1.57 -    ring_req->id = (unsigned long)req;
    1.58 +    id = GET_ID_FROM_FREELIST();
    1.59 +    rec_ring[id].id = (unsigned long) req;
    1.60 +
    1.61 +//printk("r: %d req %p (%ld)\n",req_prod,req,id);
    1.62 +
    1.63 +    ring_req->id = id;
    1.64      ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
    1.65          BLKIF_OP_READ;
    1.66      ring_req->sector_number = (blkif_sector_t)req->sector;
    1.67 @@ -248,11 +274,7 @@ static int blkif_queue_request(struct re
    1.68      req_prod++;
    1.69  
    1.70      /* Keep a private copy so we can reissue requests when recovering. */
    1.71 -    translate_req_to_pfn(
    1.72 -        &blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req,
    1.73 -        ring_req);
    1.74 -
    1.75 -    blk_ring_rec->req_prod++;
    1.76 +    translate_req_to_pfn( &rec_ring[id], ring_req);
    1.77  
    1.78      return 0;
    1.79  }
    1.80 @@ -319,7 +341,16 @@ static irqreturn_t blkif_int(int irq, vo
    1.81  
    1.82      for ( i = resp_cons; i != rp; i++ )
    1.83      {
    1.84 +	unsigned long id;
    1.85          bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
    1.86 +
    1.87 +	id = bret->id;
    1.88 +	req = (struct request *)rec_ring[id].id;
    1.89 +
    1.90 +//printk("i: %d req %p (%ld)\n",i,req,id);
    1.91 +
    1.92 +	ADD_ID_TO_FREELIST(id);  // overwrites req
    1.93 +
    1.94          switch ( bret->operation )
    1.95          {
    1.96          case BLKIF_OP_READ:
    1.97 @@ -327,13 +358,14 @@ static irqreturn_t blkif_int(int irq, vo
    1.98              if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
    1.99                  DPRINTK("Bad return from blkdev data request: %x\n",
   1.100                          bret->status);
   1.101 -            req = (struct request *)bret->id;
   1.102 +	    
   1.103              if ( unlikely(end_that_request_first
   1.104                            (req, 
   1.105                             (bret->status == BLKIF_RSP_OKAY),
   1.106                             req->hard_nr_sectors)) )
   1.107                  BUG();
   1.108              end_that_request_last(req);
   1.109 +
   1.110              break;
   1.111          case BLKIF_OP_PROBE:
   1.112              memcpy(&blkif_control_rsp, bret, sizeof(*bret));
   1.113 @@ -345,7 +377,6 @@ static irqreturn_t blkif_int(int irq, vo
   1.114      }
   1.115      
   1.116      resp_cons = i;
   1.117 -    resp_cons_rec = i;
   1.118  
   1.119      if ( (xlbd_blk_queue != NULL) &&
   1.120           test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) )
   1.121 @@ -618,6 +649,7 @@ static int blkif_queue_request(unsigned 
   1.122                                 kdev_t          device)
   1.123  {
   1.124      unsigned long       buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
   1.125 +    unsigned long       xid;
   1.126      struct gendisk     *gd;
   1.127      blkif_request_t    *req;
   1.128      struct buffer_head *bh;
   1.129 @@ -663,10 +695,15 @@ static int blkif_queue_request(unsigned 
   1.130               (sg_dev == device) &&
   1.131               (sg_next_sect == sector_number) )
   1.132          {
   1.133 +
   1.134              req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req;
   1.135              bh = (struct buffer_head *)id;
   1.136 -            bh->b_reqnext = (struct buffer_head *)req->id;
   1.137 -            req->id = id;
   1.138 +	    
   1.139 +            bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
   1.140 +	    
   1.141 +
   1.142 +	    rec_ring[req->id].id = id;
   1.143 +
   1.144              req->frame_and_sects[req->nr_segments] = 
   1.145                  buffer_ma | (fsect<<3) | lsect;
   1.146              if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
   1.147 @@ -675,8 +712,7 @@ static int blkif_queue_request(unsigned 
   1.148                  DISABLE_SCATTERGATHER();
   1.149  
   1.150              /* Update the copy of the request in the recovery ring. */
   1.151 -            translate_req_to_pfn(&blk_ring_rec->ring[
   1.152 -                MASK_BLKIF_IDX(blk_ring_rec->req_prod - 1)].req, req);
   1.153 +            translate_req_to_pfn(&rec_ring[req->id], req );
   1.154  
   1.155              return 0;
   1.156          }
   1.157 @@ -698,18 +734,25 @@ static int blkif_queue_request(unsigned 
   1.158  
   1.159      /* Fill out a communications ring structure. */
   1.160      req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
   1.161 -    req->id            = id;
   1.162 +
   1.163 +    xid = GET_ID_FROM_FREELIST();
   1.164 +    rec_ring[xid].id = id;
   1.165 +
   1.166 +    req->id            = xid;
   1.167      req->operation     = operation;
   1.168      req->sector_number = (blkif_sector_t)sector_number;
   1.169      req->device        = device; 
   1.170      req->nr_segments   = 1;
   1.171      req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
   1.172 +
   1.173 +//printk("N: %d req %p (%ld)\n",req_prod,rec_ring[xid].id,xid);
   1.174 +
   1.175      req_prod++;
   1.176  
   1.177      /* Keep a private copy so we can reissue requests when recovering. */    
   1.178 -    translate_req_to_pfn(&blk_ring_rec->ring[
   1.179 -        MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req, req);
   1.180 -    blk_ring_rec->req_prod++;
   1.181 +    translate_req_to_pfn(&rec_ring[xid], req );
   1.182 +
   1.183 +
   1.184  
   1.185      return 0;
   1.186  }
   1.187 @@ -816,7 +859,16 @@ static void blkif_int(int irq, void *dev
   1.188  
   1.189      for ( i = resp_cons; i != rp; i++ )
   1.190      {
   1.191 +	unsigned long id;
   1.192          blkif_response_t *bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
   1.193 +
   1.194 +	id = bret->id;
   1.195 +	bh = (struct buffer_head *)rec_ring[id].id; 
   1.196 +
   1.197 +//printk("i: %d req %p (%ld)\n",i,bh,id);
   1.198 +
   1.199 +	ADD_ID_TO_FREELIST(id);
   1.200 +
   1.201          switch ( bret->operation )
   1.202          {
   1.203          case BLKIF_OP_READ:
   1.204 @@ -824,14 +876,13 @@ static void blkif_int(int irq, void *dev
   1.205              if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
   1.206                  DPRINTK("Bad return from blkdev data request: %lx\n",
   1.207                          bret->status);
   1.208 -            for ( bh = (struct buffer_head *)bret->id; 
   1.209 -                  bh != NULL; 
   1.210 -                  bh = next_bh )
   1.211 +            for ( ; bh != NULL; bh = next_bh )
   1.212              {
   1.213                  next_bh = bh->b_reqnext;
   1.214                  bh->b_reqnext = NULL;
   1.215                  bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
   1.216              }
   1.217 +
   1.218              break;
   1.219          case BLKIF_OP_PROBE:
   1.220              memcpy(&blkif_control_rsp, bret, sizeof(*bret));
   1.221 @@ -843,7 +894,6 @@ static void blkif_int(int irq, void *dev
   1.222      }
   1.223      
   1.224      resp_cons = i;
   1.225 -    resp_cons_rec = i;
   1.226  
   1.227      kick_pending_request_queues();
   1.228  
   1.229 @@ -859,8 +909,12 @@ static inline void translate_req_to_pfn(
   1.230                                          blkif_request_t *req)
   1.231  {
   1.232      int i;
   1.233 -    
   1.234 -    *xreq = *req; 
   1.235 +
   1.236 +    xreq->operation     = req->operation;
   1.237 +    xreq->nr_segments   = req->nr_segments;
   1.238 +    xreq->device        = req->device;
   1.239 +    // preserve id
   1.240 +    xreq->sector_number = req->sector_number;
   1.241  
   1.242      for ( i = 0; i < req->nr_segments; i++ )
   1.243      {
   1.244 @@ -868,6 +922,7 @@ static inline void translate_req_to_pfn(
   1.245              (machine_to_phys_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] <<
   1.246               PAGE_SHIFT);
   1.247      }
   1.248 +    
   1.249  }
   1.250  
   1.251  static inline void translate_req_to_mfn(blkif_request_t *xreq,
   1.252 @@ -875,7 +930,11 @@ static inline void translate_req_to_mfn(
   1.253  {
   1.254      int i;
   1.255  
   1.256 -    *xreq = *req;
   1.257 +    xreq->operation     = req->operation;
   1.258 +    xreq->nr_segments   = req->nr_segments;
   1.259 +    xreq->device        = req->device;
   1.260 +    xreq->id            = req->id;   // copy id (unlike above)
   1.261 +    xreq->sector_number = req->sector_number;
   1.262  
   1.263      for ( i = 0; i < req->nr_segments; i++ )
   1.264      {
   1.265 @@ -885,6 +944,8 @@ static inline void translate_req_to_mfn(
   1.266      }
   1.267  }
   1.268  
   1.269 +
   1.270 +
   1.271  static inline void flush_requests(void)
   1.272  {
   1.273      DISABLE_SCATTERGATHER();
   1.274 @@ -908,7 +969,7 @@ static void kick_pending_request_queues(
   1.275  
   1.276  void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
   1.277  {
   1.278 -    unsigned long flags;
   1.279 +    unsigned long flags, id;
   1.280  
   1.281   retry:
   1.282      while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
   1.283 @@ -927,8 +988,13 @@ void blkif_control_send(blkif_request_t 
   1.284      DISABLE_SCATTERGATHER();
   1.285      blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req;    
   1.286  
   1.287 -    translate_req_to_pfn(&blk_ring_rec->ring[
   1.288 -	MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,req);
   1.289 +    id = GET_ID_FROM_FREELIST();
   1.290 +    blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id;
   1.291 +    rec_ring[id].id = (unsigned long) req;
   1.292 +
   1.293 +//printk("c: %d req %p (%ld)\n",req_prod,req,id);
   1.294 +
   1.295 +    translate_req_to_pfn( &rec_ring[id], req );
   1.296  
   1.297      req_prod++;
   1.298      flush_requests();
   1.299 @@ -1017,36 +1083,42 @@ static void blkif_status_change(blkif_fe
   1.300  
   1.301          if ( recovery )
   1.302          {
   1.303 -            int i,j;
   1.304 +	    int i;
   1.305 +
   1.306 +	    /* Hmm, requests might be re-ordered when we re-issue them.
   1.307 +	       This will need to be fixed once we have barriers */
   1.308 +
   1.309 +	    // req_prod = 0;   : already is zero
   1.310  
   1.311 -            /*
   1.312 -             * Shouldn't need the io_request_lock here - the device is plugged
   1.313 -             * and the recovery flag prevents the interrupt handler changing
   1.314 -             * anything.
   1.315 -             */
   1.316 +	    // stage 1 : find active and move to safety
   1.317 +	    for ( i=0; i <BLKIF_RING_SIZE; i++ )
   1.318 +	    {
   1.319 +		if ( rec_ring[i].id >= PAGE_OFFSET )
   1.320 +		{
   1.321 +		    translate_req_to_mfn(
   1.322 +			&blk_ring->ring[req_prod].req, &rec_ring[i] );
   1.323  
   1.324 -            /* Reissue requests from the private block ring. */
   1.325 -            for ( i = 0;
   1.326 -                  resp_cons_rec < blk_ring_rec->req_prod;
   1.327 -                  resp_cons_rec++, i++ )
   1.328 -            {                
   1.329 -                translate_req_to_mfn(
   1.330 -                    &blk_ring->ring[i].req,
   1.331 -                    &blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req);
   1.332 -            }
   1.333 +		    req_prod++;
   1.334 +		}
   1.335 +	    }
   1.336  
   1.337 -            /* Reset the private block ring to match the new ring. */
   1.338 -            for( j = 0; j < i; j++ )
   1.339 -            {
   1.340 -                translate_req_to_pfn(
   1.341 -                    &blk_ring_rec->ring[j].req,
   1.342 -                    &blk_ring->ring[j].req);
   1.343 -            }
   1.344 +printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
   1.345 +	    
   1.346 +	    // stage 2 : set up shadow list
   1.347 +	    for ( i=0; i<req_prod; i++ )
   1.348 +	    {
   1.349 +		rec_ring[i].id = blk_ring->ring[i].req.id;		
   1.350 +		blk_ring->ring[i].req.id = i;
   1.351 +		translate_req_to_pfn( &rec_ring[i], &blk_ring->ring[i].req );
   1.352 +	    }
   1.353  
   1.354 -            resp_cons_rec = 0;
   1.355 +	    // stage 3 : set up free list
   1.356 +	    for ( ; i < BLKIF_RING_SIZE; i++ )
   1.357 +		rec_ring[i].id = i+1;
   1.358 +	    rec_ring_free = req_prod;
   1.359 +	    rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
   1.360  
   1.361              /* blk_ring->req_prod will be set when we flush_requests().*/
   1.362 -            blk_ring_rec->req_prod = req_prod = i;
   1.363              wmb();
   1.364  
   1.365              /* Switch off recovery mode, using a memory barrier to ensure that
   1.366 @@ -1057,6 +1129,9 @@ static void blkif_status_change(blkif_fe
   1.367  
   1.368              /* Kicks things back into life. */
   1.369              flush_requests();
   1.370 +
   1.371 +
   1.372 +
   1.373          }
   1.374          else
   1.375          {
   1.376 @@ -1114,15 +1189,20 @@ int __init xlblk_init(void)
   1.377  {
   1.378      ctrl_msg_t                       cmsg;
   1.379      blkif_fe_driver_status_changed_t st;
   1.380 -
   1.381 +    int i;
   1.382 +    
   1.383      if ( (start_info.flags & SIF_INITDOMAIN) 
   1.384           || (start_info.flags & SIF_BLK_BE_DOMAIN) )
   1.385          return 0;
   1.386  
   1.387      printk(KERN_INFO "Initialising Xen virtual block device\n");
   1.388  
   1.389 -    blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
   1.390 -    memset(blk_ring_rec, 0, sizeof(*blk_ring_rec));
   1.391 +    rec_ring_free = 0;
   1.392 +    for (i=0; i<BLKIF_RING_SIZE; i++)
   1.393 +    {
   1.394 +	rec_ring[i].id = i+1;
   1.395 +    }
   1.396 +    rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
   1.397  
   1.398      (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
   1.399                                      CALLBACK_IN_BLOCKING_CONTEXT);
     2.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c	Fri Aug 13 18:17:02 2004 +0000
     2.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c	Sat Aug 14 02:37:54 2004 +0000
     2.3 @@ -592,12 +592,16 @@ static void network_connect(struct net_d
     2.4      wmb();                
     2.5      np->rx->req_prod = requeue_idx;
     2.6  
     2.7 +printk(KERN_ALERT"Netfront recovered tx=%d rxfree=%d\n",
     2.8 +       np->tx->req_prod,np->rx->req_prod);
     2.9 +
    2.10      /* Step 3: All public and private state should now be sane.  Get
    2.11       * ready to start sending and receiving packets and give the driver
    2.12       * domain a kick because we've probably just requeued some
    2.13       * packets.
    2.14       */
    2.15      np->backend_state = BEST_CONNECTED;
    2.16 +    wmb();
    2.17      notify_via_evtchn(status->evtchn);  
    2.18      network_tx_buf_gc(dev);
    2.19