ia64/xen-unstable

changeset 223:81ac00c13385

bitkeeper revision 1.81 (3e561fbbcTcsPP9qTjC_BVR3zYpKjw)

Many files:
Block-device layer all fixed up. Serialisation removed. Should fly! :-)
author kaf24@labyrinth.cl.cam.ac.uk
date Fri Feb 21 12:46:51 2003 +0000 (2003-02-21)
parents 7d86ebeca986
children b05b1796e256
files xen-2.4.16/drivers/block/ll_rw_blk.c xen-2.4.16/drivers/block/xen_block.c xen-2.4.16/include/hypervisor-ifs/block.h xen-2.4.16/include/xeno/blkdev.h xen-2.4.16/include/xeno/sched.h xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c
line diff
     1.1 --- a/xen-2.4.16/drivers/block/ll_rw_blk.c	Thu Feb 20 23:52:36 2003 +0000
     1.2 +++ b/xen-2.4.16/drivers/block/ll_rw_blk.c	Fri Feb 21 12:46:51 2003 +0000
     1.3 @@ -5,7 +5,6 @@
     1.4   * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
     1.5   * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
     1.6   * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
     1.7 - * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
     1.8   */
     1.9  
    1.10  /*
    1.11 @@ -40,14 +39,6 @@
    1.12  #define DPRINTK(_f, _a...) ((void)0)
    1.13  #endif
    1.14  
    1.15 -/* XXX SMH: temporarily we just dive at xen_block completion handler */
    1.16 -extern void end_block_io_op(struct buffer_head *bh); 
    1.17 -
    1.18 -static void end_buffer_dummy(struct buffer_head *bh, int uptodate)
    1.19 -{
    1.20 -  /* do nothing */
    1.21 -}
    1.22 -
    1.23  /* This will die as all synchronous stuff is coming to an end */
    1.24  #define complete(_r) panic("completion.h stuff may be needed...")
    1.25  
    1.26 @@ -1036,8 +1027,6 @@ out:
    1.27  	return 0;
    1.28  end_io:
    1.29  	bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
    1.30 -	/* XXX SMH: do we need this every time? */
    1.31 -	end_block_io_op(bh);
    1.32  	return 0;
    1.33  }
    1.34  
    1.35 @@ -1107,8 +1096,6 @@ void generic_make_request (int rw, struc
    1.36  
    1.37  			/* Yecch again */
    1.38  			bh->b_end_io(bh, 0);
    1.39 -			/* XXX SMH */ 
    1.40 -			end_block_io_op(bh);
    1.41  			return;
    1.42  		}
    1.43  	}
    1.44 @@ -1238,7 +1225,6 @@ void ll_rw_block(int rw, int nr, struct 
    1.45  
    1.46  		/* We have the buffer lock */
    1.47  		atomic_inc(&bh->b_count);
    1.48 -		bh->b_end_io = end_buffer_dummy;
    1.49  
    1.50  		switch(rw) {
    1.51  		case WRITE:
    1.52 @@ -1258,8 +1244,6 @@ void ll_rw_block(int rw, int nr, struct 
    1.53  			BUG();
    1.54  	end_io:
    1.55  			bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
    1.56 -			/* XXX SMH */
    1.57 -			end_block_io_op(bh);
    1.58  			continue;
    1.59  		}
    1.60  
    1.61 @@ -1313,7 +1297,6 @@ int end_that_request_first (struct reque
    1.62  		req->bh = bh->b_reqnext;
    1.63  		bh->b_reqnext = NULL;
    1.64  		bh->b_end_io(bh, uptodate);
    1.65 -		end_block_io_op(bh);
    1.66  		if ((bh = req->bh) != NULL) {
    1.67  			req->hard_sector += nsect;
    1.68  			req->hard_nr_sectors -= nsect;
     2.1 --- a/xen-2.4.16/drivers/block/xen_block.c	Thu Feb 20 23:52:36 2003 +0000
     2.2 +++ b/xen-2.4.16/drivers/block/xen_block.c	Fri Feb 21 12:46:51 2003 +0000
     2.3 @@ -23,24 +23,26 @@
     2.4  #define DPRINTK(_f, _a...) ((void)0)
     2.5  #endif
     2.6  
     2.7 -typedef struct blk_request
     2.8 -{
     2.9 -    struct buffer_head *bh;
    2.10 -    void               *id;
    2.11 -    struct task_struct *domain;
    2.12 -} blk_request_t;
    2.13 -
    2.14 +/*
    2.15 + * These are rather arbitrary. They are fairly large because adjacent
    2.16 + * requests pulled from a communication ring are quite likely to end
    2.17 + * up being part of the same scatter/gather request at the disc.
    2.18 + * It might be a good idea to add scatter/gather support explicitly to
    2.19 + * the scatter/gather ring (eg. each request has an array of N pointers);
    2.20 + * then these values would better reflect real costs at the disc.
    2.21 + */
    2.22  #define MAX_PENDING_REQS 32
    2.23  #define BATCH_PER_DOMAIN 8
    2.24  
    2.25 -static kmem_cache_t *blk_request_cachep;
    2.26 +static kmem_cache_t *buffer_head_cachep;
    2.27  static atomic_t nr_pending;
    2.28  
    2.29  static void io_schedule(unsigned long unused);
    2.30 -static int do_block_io_op_domain(struct task_struct* task, int max_to_do);
    2.31 +static int do_block_io_op_domain(struct task_struct *p, int max_to_do);
    2.32  static void dispatch_rw_block_io(struct task_struct *p, int index);
    2.33  static void dispatch_probe_block_io(struct task_struct *p, int index);
    2.34  static void dispatch_debug_block_io(struct task_struct *p, int index);
    2.35 +static void make_response(struct task_struct *p, void *id, unsigned long st);
    2.36  
    2.37  
    2.38  /******************************************************************
    2.39 @@ -104,6 +106,7 @@ static void io_schedule(unsigned long un
    2.40      struct task_struct *p;
    2.41      struct list_head *ent;
    2.42  
    2.43 +    /* Queue up a batch of requests. */
    2.44      while ( (atomic_read(&nr_pending) < MAX_PENDING_REQS) &&
    2.45              !list_empty(&io_schedule_list) )
    2.46      {
    2.47 @@ -113,10 +116,20 @@ static void io_schedule(unsigned long un
    2.48          if ( do_block_io_op_domain(p, BATCH_PER_DOMAIN) )
    2.49              add_to_blkdev_list_tail(p);
    2.50      }
    2.51 +
    2.52 +    /* Push the batch through to disc. */
    2.53 +    run_task_queue(&tq_disk);
    2.54  }
    2.55  
    2.56  static void maybe_trigger_io_schedule(void)
    2.57  {
    2.58 +    /*
    2.59 +     * Needed so that two processes, who together make the following predicate
    2.60 +     * true, don't both read stale values and evaluate the predicate
    2.61 +     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
    2.62 +     */
    2.63 +    smp_mb();
    2.64 +
    2.65      if ( (atomic_read(&nr_pending) < (MAX_PENDING_REQS/2)) &&
    2.66           !list_empty(&io_schedule_list) )
    2.67      {
    2.68 @@ -127,51 +140,17 @@ static void maybe_trigger_io_schedule(vo
    2.69  
    2.70  
    2.71  /******************************************************************
    2.72 - * COMPLETION CALLBACK -- XXX Hook properly into bh->b_end_io
    2.73 + * COMPLETION CALLBACK -- Called as bh->b_end_io()
    2.74   */
    2.75  
    2.76 -void end_block_io_op(struct buffer_head * bh)
    2.77 +static void end_block_io_op(struct buffer_head *bh, int uptodate)
    2.78  {
    2.79 -    unsigned long cpu_mask;
    2.80 -    blk_request_t *blk_request = NULL;
    2.81 -    unsigned long flags;
    2.82 -    struct task_struct *p;
    2.83 -    int position = 0;
    2.84 -    blk_ring_t *blk_ring;
    2.85 -
    2.86 -    DPRINTK("XEN end_block_io_op, bh: %p\n", bh);
    2.87 -    
    2.88 -    if ( (blk_request = (blk_request_t *)bh->b_xen_request) == NULL ) 
    2.89 -        goto bad_interrupt;
    2.90 -
    2.91      atomic_dec(&nr_pending);
    2.92 -    
    2.93 -    p = blk_request->domain;
    2.94 +    make_response(bh->b_xen_domain, bh->b_xen_id, uptodate ? 0 : 1);
    2.95  
    2.96 -    /* Place on the response ring for the relevant domain. */ 
    2.97 -    spin_lock_irqsave(&p->blk_ring_lock, flags);
    2.98 -    blk_ring = p->blk_ring_base;
    2.99 -    position = blk_ring->resp_prod;
   2.100 -    blk_ring->resp_ring[position].id     = blk_request->id;
   2.101 -    blk_ring->resp_ring[position].status = 0;
   2.102 -    blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
   2.103 -    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
   2.104 -    
   2.105 -    /* Kick the relevant domain. */
   2.106 -    cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
   2.107 -    guest_event_notify(cpu_mask); 
   2.108 -
   2.109 -    /* Free state associated with this request. */
   2.110 -    if ( blk_request->bh ) 
   2.111 -        kfree(blk_request->bh);     
   2.112 -    kmem_cache_free(blk_request_cachep, blk_request);
   2.113 +    kmem_cache_free(buffer_head_cachep, bh);
   2.114  
   2.115      maybe_trigger_io_schedule();
   2.116 -
   2.117 -    return;
   2.118 -
   2.119 - bad_interrupt:
   2.120 -    panic("Block IO interrupt received for unknown buffer [%p]\n", bh);
   2.121  }
   2.122  
   2.123  
   2.124 @@ -193,55 +172,43 @@ long do_block_io_op(void)
   2.125   * DOWNWARD CALLS -- These interface with the block-device layer proper.
   2.126   */
   2.127  
   2.128 -static int do_block_io_op_domain(struct task_struct* task, int max_to_do)
   2.129 +static int do_block_io_op_domain(struct task_struct* p, int max_to_do)
   2.130  {
   2.131 -    blk_ring_t *blk_ring = task->blk_ring_base;
   2.132 -    int loop, more_to_do = 0;
   2.133 -    int resp_ring_ents = 
   2.134 -        (blk_ring->resp_prod - blk_ring->resp_cons) & (BLK_RESP_RING_SIZE - 1);
   2.135 -
   2.136 -    DPRINTK("XEN do_block_io_op %d %d\n",
   2.137 -            blk_ring->req_cons, blk_ring->req_prod);
   2.138 +    blk_ring_t *blk_ring = p->blk_ring_base;
   2.139 +    int i, more_to_do = 0;
   2.140  
   2.141 -    for ( loop = blk_ring->req_cons; 
   2.142 -	  loop != blk_ring->req_prod; 
   2.143 -	  loop = BLK_REQ_RING_INC(loop) ) 
   2.144 +    for ( i = p->blk_req_cons; 
   2.145 +	  i != blk_ring->req_prod; 
   2.146 +	  i = BLK_RING_INC(i) ) 
   2.147      {
   2.148 -        /*
   2.149 -         * Bail if we've reached the batch allowance for thsi interface,
   2.150 -         * or if we risk producing enough responses to overflow the
   2.151 -         * communication ring.
   2.152 -         */
   2.153 -        if ( (max_to_do-- == 0) ||
   2.154 -             ((atomic_read(&nr_pending) + resp_ring_ents) >
   2.155 -              BLK_RESP_RING_MAX_ENTRIES) )
   2.156 +        if ( max_to_do-- == 0 )
   2.157          {
   2.158              more_to_do = 1;
   2.159              break;
   2.160          }
   2.161          
   2.162 -	switch ( blk_ring->req_ring[loop].operation )
   2.163 +	switch ( blk_ring->ring[i].req.operation )
   2.164          {
   2.165  	case XEN_BLOCK_READ:
   2.166  	case XEN_BLOCK_WRITE:
   2.167 -	    dispatch_rw_block_io(task, loop);
   2.168 +	    dispatch_rw_block_io(p, i);
   2.169  	    break;
   2.170  
   2.171  	case XEN_BLOCK_PROBE:
   2.172 -	    dispatch_probe_block_io(task, loop);
   2.173 +	    dispatch_probe_block_io(p, i);
   2.174  	    break;
   2.175  
   2.176  	case XEN_BLOCK_DEBUG:
   2.177 -	    dispatch_debug_block_io(task, loop);
   2.178 +	    dispatch_debug_block_io(p, i);
   2.179  	    break;
   2.180  
   2.181  	default:
   2.182  	    panic("error: unknown block io operation [%d]\n",
   2.183 -                  blk_ring->req_ring[loop].operation);
   2.184 +                  blk_ring->ring[i].req.operation);
   2.185  	}
   2.186      }
   2.187  
   2.188 -    blk_ring->req_cons = loop;
   2.189 +    p->blk_req_cons = i;
   2.190      return more_to_do;
   2.191  }
   2.192  
   2.193 @@ -255,20 +222,11 @@ static void dispatch_probe_block_io(stru
   2.194      extern void ide_probe_devices(xen_disk_info_t *xdi);
   2.195      blk_ring_t *blk_ring = p->blk_ring_base;
   2.196      xen_disk_info_t *xdi;
   2.197 -    unsigned long flags, cpu_mask;
   2.198 -    
   2.199 -    xdi = phys_to_virt((unsigned long)blk_ring->req_ring[index].buffer);
   2.200 -    
   2.201 +
   2.202 +    xdi = phys_to_virt((unsigned long)blk_ring->ring[index].req.buffer);    
   2.203      ide_probe_devices(xdi);
   2.204  
   2.205 -    spin_lock_irqsave(&p->blk_ring_lock, flags);
   2.206 -    blk_ring->resp_ring[blk_ring->resp_prod].id = blk_ring->req_ring[index].id;
   2.207 -    blk_ring->resp_ring[blk_ring->resp_prod].status = 0;
   2.208 -    blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
   2.209 -    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
   2.210 -
   2.211 -    cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
   2.212 -    guest_event_notify(cpu_mask); 
   2.213 +    make_response(p, blk_ring->ring[index].req.id, 0);
   2.214  }
   2.215  
   2.216  static void dispatch_rw_block_io(struct task_struct *p, int index)
   2.217 @@ -276,49 +234,45 @@ static void dispatch_rw_block_io(struct 
   2.218      extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
   2.219      blk_ring_t *blk_ring = p->blk_ring_base;
   2.220      struct buffer_head *bh;
   2.221 -    struct request_queue *rq;
   2.222      int operation;
   2.223 -    blk_request_t *blk_request;
   2.224      
   2.225      /*
   2.226       * check to make sure that the block request seems at least
   2.227       * a bit legitimate
   2.228       */
   2.229 -    if ( (blk_ring->req_ring[index].block_size & (0x200 - 1)) != 0 )
   2.230 +    if ( (blk_ring->ring[index].req.block_size & (0x200 - 1)) != 0 )
   2.231  	panic("error: dodgy block size: %d\n", 
   2.232 -              blk_ring->req_ring[index].block_size);
   2.233 +              blk_ring->ring[index].req.block_size);
   2.234      
   2.235 -    if ( blk_ring->req_ring[index].buffer == NULL )
   2.236 +    if ( blk_ring->ring[index].req.buffer == NULL )
   2.237  	panic("xen_block: bogus buffer from guestOS\n"); 
   2.238  
   2.239 -    DPRINTK("req_cons: %d  req_prod %d  index: %d, op: %s, pri: %s\n",
   2.240 -            blk_ring->req_cons, blk_ring->req_prod, index, 
   2.241 -            (blk_ring->req_ring[index].operation == XEN_BLOCK_READ ? 
   2.242 -             "read" : "write"), 
   2.243 -            (blk_ring->req_ring[index].priority == XEN_BLOCK_SYNC ? 
   2.244 -             "sync" : "async"));
   2.245 +    DPRINTK("req_cons: %d  req_prod %d  index: %d, op: %s\n",
   2.246 +            p->blk_req_cons, blk_ring->req_prod, index, 
   2.247 +            (blk_ring->ring[index].req.operation == XEN_BLOCK_READ ? 
   2.248 +             "read" : "write"));
   2.249  
   2.250      atomic_inc(&nr_pending);
   2.251 -    blk_request = kmem_cache_alloc(blk_request_cachep, GFP_ATOMIC);
   2.252 -
   2.253 -    /* we'll be doing this frequently, would a cache be appropriate? */
   2.254 -    bh = (struct buffer_head *) kmalloc(sizeof(struct buffer_head), 
   2.255 -					GFP_KERNEL);
   2.256 +    bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
   2.257      if ( bh == NULL ) panic("bh is null\n");
   2.258  
   2.259      /* set just the important bits of the buffer header */
   2.260      memset (bh, 0, sizeof (struct buffer_head));
   2.261      
   2.262 -    bh->b_blocknr       = blk_ring->req_ring[index].block_number;
   2.263 -    bh->b_size          = blk_ring->req_ring[index].block_size; 
   2.264 -    bh->b_dev           = blk_ring->req_ring[index].device; 
   2.265 -    bh->b_rsector       = blk_ring->req_ring[index].sector_number;
   2.266 +    bh->b_blocknr       = blk_ring->ring[index].req.block_number;
   2.267 +    bh->b_size          = blk_ring->ring[index].req.block_size; 
   2.268 +    bh->b_dev           = blk_ring->ring[index].req.device; 
   2.269 +    bh->b_rsector       = blk_ring->ring[index].req.sector_number;
   2.270      bh->b_data          = phys_to_virt((unsigned long)
   2.271 -				       blk_ring->req_ring[index].buffer);
   2.272 +				       blk_ring->ring[index].req.buffer);
   2.273      bh->b_count.counter = 1;
   2.274 -    bh->b_xen_request   = (void *)blk_request;  
   2.275 -    
   2.276 -    if ( blk_ring->req_ring[index].operation == XEN_BLOCK_WRITE )
   2.277 +    bh->b_end_io        = end_block_io_op;
   2.278 +
   2.279 +    /* Save meta data about request. */
   2.280 +    bh->b_xen_domain    = p;
   2.281 +    bh->b_xen_id        = blk_ring->ring[index].req.id;
   2.282 +
   2.283 +    if ( blk_ring->ring[index].req.operation == XEN_BLOCK_WRITE )
   2.284      {
   2.285  	bh->b_state = ((1 << BH_JBD) | (1 << BH_Mapped) | (1 << BH_Req) |
   2.286  		       (1 << BH_Dirty) | (1 << BH_Uptodate));
   2.287 @@ -330,15 +284,8 @@ static void dispatch_rw_block_io(struct 
   2.288  	operation = READ;
   2.289      }
   2.290  
   2.291 -    /* save meta data about request */
   2.292 -    blk_request->id     = blk_ring->req_ring[index].id;
   2.293 -    blk_request->bh     = bh;
   2.294 -    blk_request->domain = p; 
   2.295 -    
   2.296 -    /* dispatch single block request */
   2.297 -    ll_rw_block(operation, 1, &bh);       /* linux top half */
   2.298 -    rq = blk_get_queue(bh->b_rdev);                         
   2.299 -    generic_unplug_device(rq);            /* linux bottom half */
   2.300 +    /* Dispatch a single request. We'll flush it to disc later. */
   2.301 +    ll_rw_block(operation, 1, &bh);
   2.302  }
   2.303  
   2.304  
   2.305 @@ -347,6 +294,26 @@ static void dispatch_rw_block_io(struct 
   2.306   * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
   2.307   */
   2.308  
   2.309 +static void make_response(struct task_struct *p, void *id, unsigned long st)
   2.310 +{
   2.311 +    unsigned long cpu_mask, flags;
   2.312 +    int position;
   2.313 +    blk_ring_t *blk_ring;
   2.314 +
   2.315 +    /* Place on the response ring for the relevant domain. */ 
   2.316 +    spin_lock_irqsave(&p->blk_ring_lock, flags);
   2.317 +    blk_ring = p->blk_ring_base;
   2.318 +    position = blk_ring->resp_prod;
   2.319 +    blk_ring->ring[position].resp.id     = id;
   2.320 +    blk_ring->ring[position].resp.status = st;
   2.321 +    blk_ring->resp_prod = BLK_RING_INC(position);
   2.322 +    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
   2.323 +    
   2.324 +    /* Kick the relevant domain. */
   2.325 +    cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
   2.326 +    guest_event_notify(cpu_mask); 
   2.327 +}
   2.328 +
   2.329  static void dump_blockq(u_char key, void *dev_id, struct pt_regs *regs) 
   2.330  {
   2.331      printk("Dumping block queue stats: nr_pending = %d\n",
   2.332 @@ -378,12 +345,9 @@ void initialize_block_io ()
   2.333      spin_lock_init(&io_schedule_list_lock);
   2.334      INIT_LIST_HEAD(&io_schedule_list);
   2.335  
   2.336 -    blk_request_cachep = kmem_cache_create(
   2.337 -        "blk_request_cache", sizeof(blk_request_t),
   2.338 +    buffer_head_cachep = kmem_cache_create(
   2.339 +        "buffer_head_cache", sizeof(struct buffer_head),
   2.340          0, SLAB_HWCACHE_ALIGN, NULL, NULL);
   2.341      
   2.342      add_key_handler('b', dump_blockq, "dump xen ide blkdev stats");     
   2.343  }
   2.344 -
   2.345 -
   2.346 -
     3.1 --- a/xen-2.4.16/include/hypervisor-ifs/block.h	Thu Feb 20 23:52:36 2003 +0000
     3.2 +++ b/xen-2.4.16/include/hypervisor-ifs/block.h	Fri Feb 21 12:46:51 2003 +0000
     3.3 @@ -21,26 +21,14 @@
     3.4  #define XEN_BLOCK_PROBE 8      /* determine io configuration from hypervisor */
     3.5  #define XEN_BLOCK_DEBUG 16                                          /* debug */
     3.6  
     3.7 -#define XEN_BLOCK_SYNC  2
     3.8 -#define XEN_BLOCK_ASYNC 3
     3.9 -
    3.10 -#define XEN_BLOCK_MAX_DOMAINS 32  /* NOTE: FIX THIS. VALUE SHOULD COME FROM? */
    3.11 -
    3.12 -#define BLK_REQ_RING_SIZE  64
    3.13 -#define BLK_RESP_RING_SIZE 64
    3.14 -
    3.15 -#define BLK_REQ_RING_MAX_ENTRIES  (BLK_REQ_RING_SIZE - 2)
    3.16 -#define BLK_RESP_RING_MAX_ENTRIES (BLK_RESP_RING_SIZE - 2)
    3.17 -
    3.18 -#define BLK_REQ_RING_INC(_i)     (((_i)+1) & (BLK_REQ_RING_SIZE-1))
    3.19 -#define BLK_RESP_RING_INC(_i)    (((_i)+1) & (BLK_RESP_RING_SIZE-1))
    3.20 -#define BLK_REQ_RING_ADD(_i,_j)  (((_i)+(_j)) & (BLK_REQ_RING_SIZE-1))
    3.21 -#define BLK_RESP_RING_ADD(_i,_j) (((_i)+(_j)) & (BLK_RESP_RING_SIZE-1))
    3.22 +#define BLK_RING_SIZE        128
    3.23 +#define BLK_RING_MAX_ENTRIES (BLK_RING_SIZE - 2)
    3.24 +#define BLK_RING_INC(_i)     (((_i)+1) & (BLK_RING_SIZE-1))
    3.25 +#define BLK_RING_ADD(_i,_j)  (((_i)+(_j)) & (BLK_RING_SIZE-1))
    3.26  
    3.27  typedef struct blk_ring_req_entry 
    3.28  {
    3.29      void *          id;                /* for guest os use */
    3.30 -    int             priority;          /* SYNC or ASYNC for now */
    3.31      int             operation;         /* XEN_BLOCK_READ or XEN_BLOCK_WRITE */
    3.32      char *          buffer;
    3.33      unsigned long   block_number;      /* block number */
    3.34 @@ -57,10 +45,12 @@ typedef struct blk_ring_resp_entry
    3.35  
    3.36  typedef struct blk_ring_st 
    3.37  {
    3.38 -  unsigned int      req_prod, req_cons;
    3.39 -  unsigned int      resp_prod, resp_cons;
    3.40 -  blk_ring_req_entry_t  req_ring[BLK_REQ_RING_SIZE];
    3.41 -  blk_ring_resp_entry_t resp_ring[BLK_RESP_RING_SIZE];
    3.42 +    unsigned int req_prod;  /* Request producer. Updated by guest OS. */
    3.43 +    unsigned int resp_prod; /* Response producer. Updated by Xen.     */
    3.44 +    union {
    3.45 +        blk_ring_req_entry_t  req;
    3.46 +        blk_ring_resp_entry_t resp;
    3.47 +    } ring[BLK_RING_SIZE];
    3.48  } blk_ring_t;
    3.49  
    3.50  #define MAX_XEN_DISK_COUNT 100
     4.1 --- a/xen-2.4.16/include/xeno/blkdev.h	Thu Feb 20 23:52:36 2003 +0000
     4.2 +++ b/xen-2.4.16/include/xeno/blkdev.h	Fri Feb 21 12:46:51 2003 +0000
     4.3 @@ -62,8 +62,8 @@ enum bh_state_bits {
     4.4                           * for private allocation by other entities
     4.5                           */
     4.6  };
     4.7 +
     4.8  struct buffer_head {
     4.9 -        struct buffer_head *b_next;     /* Hash queue list */
    4.10          unsigned long b_blocknr;        /* block number */
    4.11          unsigned short b_size;          /* block size */
    4.12          unsigned short b_list;          /* List that this buffer appears */
    4.13 @@ -72,24 +72,18 @@ struct buffer_head {
    4.14          atomic_t b_count;               /* users using this block */
    4.15          kdev_t b_rdev;                  /* Real device */
    4.16          unsigned long b_state;          /* buffer state bitmap (see above) */
    4.17 -        unsigned long b_flushtime;      /* Time when (dirty) buffer should be written */
    4.18  
    4.19 -        struct buffer_head *b_next_free;/* lru/free list linkage */
    4.20 -        struct buffer_head *b_prev_free;/* doubly linked list of buffers */
    4.21 -        struct buffer_head *b_this_page;/* circular list of buffers in one page */
    4.22          struct buffer_head *b_reqnext;  /* request queue */
    4.23  
    4.24 -        struct buffer_head **b_pprev;   /* doubly linked list of hash-queue */
    4.25          char * b_data;                  /* pointer to data block */
    4.26          struct pfn_info *b_page;            /* the page this bh is mapped to */
    4.27 -        void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
    4.28 -        void *b_private;                /* reserved for b_end_io */
    4.29 +        void (*b_end_io)(struct buffer_head *bh, int uptodate);
    4.30  
    4.31          unsigned long b_rsector;        /* Real buffer location on disk */
    4.32  
    4.33 -        struct inode *       b_inode;
    4.34 -        struct list_head     b_inode_buffers;   /* doubly linked list of inode dirty buffers */
    4.35 -        void *b_xen_request;                        /* xen request structure */
    4.36 +        /* Both used by b_end_io function in xen_block.c */
    4.37 +        void *b_xen_domain;
    4.38 +        void *b_xen_id;
    4.39  };
    4.40  
    4.41  typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
    4.42 @@ -127,15 +121,9 @@ static inline void mark_buffer_clean(str
    4.43  
    4.44  static inline void buffer_IO_error(struct buffer_head * bh)
    4.45  {
    4.46 -    extern void end_block_io_op(struct buffer_head *bh);
    4.47 -
    4.48      mark_buffer_clean(bh);
    4.49 -    /*
    4.50 -     * b_end_io has to clear the BH_Uptodate bitflag in the error case!
    4.51 -     */
    4.52 +    /* b_end_io has to clear the BH_Uptodate bitflag in the error case! */
    4.53      bh->b_end_io(bh, 0);
    4.54 -    /* XXX KAF */
    4.55 -    end_block_io_op(bh);
    4.56  }
    4.57  
    4.58  /**** XXX END OF BUFFER_HEAD STUFF XXXX ****/
     5.1 --- a/xen-2.4.16/include/xeno/sched.h	Thu Feb 20 23:52:36 2003 +0000
     5.2 +++ b/xen-2.4.16/include/xeno/sched.h	Fri Feb 21 12:46:51 2003 +0000
     5.3 @@ -76,6 +76,7 @@ struct task_struct {
     5.4  
     5.5      /* Block I/O */
     5.6      blk_ring_t *blk_ring_base;
     5.7 +    unsigned int blk_req_cons; /* request consumer */
     5.8      struct list_head blkdev_list;
     5.9      spinlock_t blk_ring_lock;
    5.10  
     6.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c	Thu Feb 20 23:52:36 2003 +0000
     6.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c	Fri Feb 21 12:46:51 2003 +0000
     6.3 @@ -1,3 +1,10 @@
     6.4 +/******************************************************************************
     6.5 + * xl_block.c
     6.6 + * 
     6.7 + * Xenolinux virtual block-device driver.
     6.8 + * 
     6.9 + */
    6.10 +
    6.11  #include <linux/config.h>
    6.12  #include <linux/module.h>
    6.13  
    6.14 @@ -49,6 +56,7 @@ static int xlblk_max_sectors[XLBLK_MAX];
    6.15  #endif
    6.16  
    6.17  static blk_ring_t *blk_ring;
    6.18 +static unsigned int resp_cons; /* Response consumer for comms ring. */
    6.19  static xen_disk_info_t xen_disk_info;
    6.20  
    6.21  int hypervisor_request(void *         id,
    6.22 @@ -56,8 +64,7 @@ int hypervisor_request(void *         id
    6.23                         char *         buffer,
    6.24                         unsigned long  block_number,
    6.25                         unsigned short block_size,
    6.26 -                       kdev_t         device,
    6.27 -                       int            mode);
    6.28 +                       kdev_t         device);
    6.29  
    6.30  
    6.31  /* ------------------------------------------------------------------------
    6.32 @@ -160,29 +167,29 @@ static int xenolinux_block_revalidate(kd
    6.33   * block_number:  block to read
    6.34   * block_size:  size of each block
    6.35   * device:  ide/hda is 768 or 0x300
    6.36 - * mode: XEN_BLOCK_SYNC or XEN_BLOCK_ASYNC.  async requests
    6.37 - *   will queue until a sync request is issued.
    6.38   */
    6.39  int hypervisor_request(void *         id,
    6.40                         int            operation,
    6.41                         char *         buffer,
    6.42                         unsigned long  block_number,
    6.43                         unsigned short block_size,
    6.44 -                       kdev_t         device,
    6.45 -                       int            mode)
    6.46 +                       kdev_t         device)
    6.47  {
    6.48      int position;
    6.49 -    void *buffer_pa, *buffer_ma; 
    6.50 +    void *buffer_ma; 
    6.51      kdev_t phys_device = (kdev_t) 0;
    6.52      unsigned long sector_number = 0;
    6.53      struct gendisk *gd;     
    6.54  
    6.55 -    /* Bail if there's no room in the request communication ring. */
    6.56 -    if ( BLK_REQ_RING_INC(blk_ring->req_prod) == blk_ring->req_cons )
    6.57 +    /*
    6.58 +     * Bail if there's no room in the request communication ring. This may be 
    6.59 +     * because we have a whole bunch of outstanding responses to process. No 
    6.60 +     * matter, as the response handler will kick the request queue.
    6.61 +     */
    6.62 +    if ( BLK_RING_INC(blk_ring->req_prod) == resp_cons )
    6.63          return 1;
    6.64  
    6.65 -    buffer_pa = (void *)virt_to_phys(buffer); 
    6.66 -    buffer_ma = (void *)phys_to_machine((unsigned long)buffer_pa); 
    6.67 +    buffer_ma = (void *)phys_to_machine(virt_to_phys(buffer)); 
    6.68  
    6.69      switch ( operation )
    6.70      {
    6.71 @@ -209,18 +216,15 @@ int hypervisor_request(void *         id
    6.72  
    6.73      /* Fill out a communications ring structure & trap to the hypervisor */
    6.74      position = blk_ring->req_prod;
    6.75 -    blk_ring->req_ring[position].id            = id;
    6.76 -    blk_ring->req_ring[position].priority      = mode;
    6.77 -    blk_ring->req_ring[position].operation     = operation;
    6.78 -    blk_ring->req_ring[position].buffer        = buffer_ma;
    6.79 -    blk_ring->req_ring[position].block_number  = block_number;
    6.80 -    blk_ring->req_ring[position].block_size    = block_size;
    6.81 -    blk_ring->req_ring[position].device        = phys_device;
    6.82 -    blk_ring->req_ring[position].sector_number = sector_number;
    6.83 +    blk_ring->ring[position].req.id            = id;
    6.84 +    blk_ring->ring[position].req.operation     = operation;
    6.85 +    blk_ring->ring[position].req.buffer        = buffer_ma;
    6.86 +    blk_ring->ring[position].req.block_number  = block_number;
    6.87 +    blk_ring->ring[position].req.block_size    = block_size;
    6.88 +    blk_ring->ring[position].req.device        = phys_device;
    6.89 +    blk_ring->ring[position].req.sector_number = sector_number;
    6.90  
    6.91 -    blk_ring->req_prod = BLK_REQ_RING_INC(blk_ring->req_prod);
    6.92 -
    6.93 -    if ( mode == XEN_BLOCK_SYNC ) HYPERVISOR_block_io_op();
    6.94 +    blk_ring->req_prod = BLK_RING_INC(position);
    6.95  
    6.96      return 0;
    6.97  }
    6.98 @@ -258,8 +262,7 @@ static void do_xlblk_request (request_qu
    6.99  	{
   6.100              full = hypervisor_request(
   6.101                  bh, (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
   6.102 -                bh->b_data, bh->b_rsector, bh->b_size, 
   6.103 -                bh->b_dev, XEN_BLOCK_ASYNC);
   6.104 +                bh->b_data, bh->b_rsector, bh->b_size, bh->b_dev);
   6.105              
   6.106              if ( full ) goto out;
   6.107  
   6.108 @@ -313,15 +316,15 @@ static void xlblk_response_int(int irq, 
   6.109      
   6.110      spin_lock_irqsave(&io_request_lock, flags);	    
   6.111  
   6.112 -    for ( i = blk_ring->resp_cons;
   6.113 +    for ( i  = resp_cons;
   6.114  	  i != blk_ring->resp_prod;
   6.115 -	  i = BLK_RESP_RING_INC(i) )
   6.116 +	  i  = BLK_RING_INC(i) )
   6.117      {
   6.118 -	blk_ring_resp_entry_t *bret = &blk_ring->resp_ring[i];
   6.119 +	blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp;
   6.120          if ( (bh = bret->id) != NULL ) bh->b_end_io(bh, 1);
   6.121      }
   6.122      
   6.123 -    blk_ring->resp_cons = i;
   6.124 +    resp_cons = i;
   6.125  
   6.126      /* KAF: We can push work down at this point. We have the lock. */
   6.127      do_xlblk_request(BLK_DEFAULT_QUEUE(MAJOR_NR));
   6.128 @@ -336,9 +339,7 @@ int __init xlblk_init(void)
   6.129  
   6.130      /* This mapping was created early at boot time. */
   6.131      blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
   6.132 -
   6.133 -    blk_ring->req_prod = blk_ring->req_cons = 0;
   6.134 -    blk_ring->resp_prod = blk_ring->resp_cons = 0;
   6.135 +    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = 0;
   6.136      
   6.137      error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, 0, 
   6.138  			"xlblk-response", NULL);
   6.139 @@ -351,8 +352,9 @@ int __init xlblk_init(void)
   6.140      xen_disk_info.count = 0;
   6.141  
   6.142      if ( hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info,
   6.143 -                            0, 0, (kdev_t) 0, XEN_BLOCK_SYNC) )
   6.144 +                            0, 0, (kdev_t) 0) )
   6.145          BUG();
   6.146 +    HYPERVISOR_block_io_op();
   6.147      while ( blk_ring->resp_prod != 1 ) barrier();
   6.148      for ( i = 0; i < xen_disk_info.count; i++ )
   6.149      { 
     7.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c	Thu Feb 20 23:52:36 2003 +0000
     7.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block_test.c	Fri Feb 21 12:46:51 2003 +0000
     7.3 @@ -15,6 +15,7 @@
     7.4  #include <linux/slab.h>
     7.5  #include <linux/string.h>
     7.6  #include <linux/errno.h>
     7.7 +#include <linux/delay.h>
     7.8  
     7.9  #include <asm/hypervisor-ifs/block.h>
    7.10  #include <asm/hypervisor-ifs/hypervisor-if.h>
    7.11 @@ -127,7 +128,6 @@ int proc_write_bdt(struct file *file, co
    7.12    int  block_number = 0;
    7.13    int  block_size = 0;
    7.14    int  device = 0;
    7.15 -  int  mode;
    7.16  
    7.17    if (copy_from_user(local, buffer, count))
    7.18    {
    7.19 @@ -158,16 +158,6 @@ int proc_write_bdt(struct file *file, co
    7.20      return -EINVAL;
    7.21    }
    7.22  
    7.23 -  if (opcode == 'r' || opcode == 'w' ||
    7.24 -      opcode == 'd' || opcode == 'D')
    7.25 -  {
    7.26 -    mode = XEN_BLOCK_SYNC;
    7.27 -  }
    7.28 -  else /* (opcode == 'R' || opcode == 'W') */
    7.29 -  {
    7.30 -    mode = XEN_BLOCK_ASYNC;
    7.31 -  }
    7.32 -
    7.33    if (data)
    7.34    {
    7.35      kfree(data);
    7.36 @@ -187,7 +177,9 @@ int proc_write_bdt(struct file *file, co
    7.37    /* submit request */
    7.38    hypervisor_request(0, meta.operation, meta.buffer, 
    7.39  		     meta.block_number, meta.block_size,
    7.40 -		     meta.device, mode);
    7.41 +		     meta.device);
    7.42 +  HYPERVISOR_block_io_op();
    7.43 +  mdelay(1000); /* should wait for a proper acknowledgement/response. */
    7.44  
    7.45    kfree(local);
    7.46    return count;