ia64/xen-unstable

changeset 222:7d86ebeca986

bitkeeper revision 1.80 (3e556a44pqm9KRyptkYpEsBf7mp3Bg)

xl_block.c, xen_block.c:
Big rewrite of Xenolinux virtual blkdev driver. Currently hangs after raw 'dd' writes :-(
author kaf24@labyrinth.cl.cam.ac.uk
date Thu Feb 20 23:52:36 2003 +0000 (2003-02-20)
parents a9945f054eb9
children 81ac00c13385
files xen-2.4.16/drivers/block/xen_block.c xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c
line diff
     1.1 --- a/xen-2.4.16/drivers/block/xen_block.c	Thu Feb 20 20:17:41 2003 +0000
     1.2 +++ b/xen-2.4.16/drivers/block/xen_block.c	Thu Feb 20 23:52:36 2003 +0000
     1.3 @@ -17,8 +17,11 @@
     1.4  #include <xeno/keyhandler.h>
     1.5  #include <xeno/interrupt.h>
     1.6  
     1.7 -#define XEN_BLK_DEBUG 0
     1.8 -#define XEN_BLK_DEBUG_LEVEL KERN_ALERT
     1.9 +#if 0
    1.10 +#define DPRINTK(_f, _a...) printk( _f , ## _a )
    1.11 +#else
    1.12 +#define DPRINTK(_f, _a...) ((void)0)
    1.13 +#endif
    1.14  
    1.15  typedef struct blk_request
    1.16  {
    1.17 @@ -136,11 +139,9 @@ void end_block_io_op(struct buffer_head 
    1.18      int position = 0;
    1.19      blk_ring_t *blk_ring;
    1.20  
    1.21 -    if ( XEN_BLK_DEBUG )  
    1.22 -	printk(XEN_BLK_DEBUG_LEVEL "XEN end_block_io_op,  bh: %lx\n",
    1.23 -	       (unsigned long)bh);
    1.24 +    DPRINTK("XEN end_block_io_op, bh: %p\n", bh);
    1.25      
    1.26 -    if ( (blk_request = (blk_request_t *)bh->b_xen_request) == NULL) 
    1.27 +    if ( (blk_request = (blk_request_t *)bh->b_xen_request) == NULL ) 
    1.28          goto bad_interrupt;
    1.29  
    1.30      atomic_dec(&nr_pending);
    1.31 @@ -170,11 +171,7 @@ void end_block_io_op(struct buffer_head 
    1.32      return;
    1.33  
    1.34   bad_interrupt:
    1.35 -    printk (KERN_ALERT
    1.36 -            "   block io interrupt received for unknown buffer [0x%lx]\n",
    1.37 -            (unsigned long) bh);
    1.38 -    BUG();
    1.39 -    return;
    1.40 +    panic("Block IO interrupt received for unknown buffer [%p]\n", bh);
    1.41  }
    1.42  
    1.43  
    1.44 @@ -200,22 +197,30 @@ static int do_block_io_op_domain(struct 
    1.45  {
    1.46      blk_ring_t *blk_ring = task->blk_ring_base;
    1.47      int loop, more_to_do = 0;
    1.48 +    int resp_ring_ents = 
    1.49 +        (blk_ring->resp_prod - blk_ring->resp_cons) & (BLK_RESP_RING_SIZE - 1);
    1.50  
    1.51 -    if (XEN_BLK_DEBUG)  
    1.52 -	printk(XEN_BLK_DEBUG_LEVEL "XEN do_block_io_op %d %d\n",
    1.53 -	       blk_ring->req_cons, blk_ring->req_prod);
    1.54 +    DPRINTK("XEN do_block_io_op %d %d\n",
    1.55 +            blk_ring->req_cons, blk_ring->req_prod);
    1.56  
    1.57      for ( loop = blk_ring->req_cons; 
    1.58  	  loop != blk_ring->req_prod; 
    1.59  	  loop = BLK_REQ_RING_INC(loop) ) 
    1.60      {
    1.61 -        if ( max_to_do-- == 0 )
    1.62 +        /*
    1.63 +         * Bail if we've reached the batch allowance for thsi interface,
    1.64 +         * or if we risk producing enough responses to overflow the
    1.65 +         * communication ring.
    1.66 +         */
    1.67 +        if ( (max_to_do-- == 0) ||
    1.68 +             ((atomic_read(&nr_pending) + resp_ring_ents) >
    1.69 +              BLK_RESP_RING_MAX_ENTRIES) )
    1.70          {
    1.71              more_to_do = 1;
    1.72              break;
    1.73          }
    1.74          
    1.75 -	switch (blk_ring->req_ring[loop].operation)
    1.76 +	switch ( blk_ring->req_ring[loop].operation )
    1.77          {
    1.78  	case XEN_BLOCK_READ:
    1.79  	case XEN_BLOCK_WRITE:
    1.80 @@ -231,9 +236,8 @@ static int do_block_io_op_domain(struct 
    1.81  	    break;
    1.82  
    1.83  	default:
    1.84 -	    printk (KERN_ALERT "error: unknown block io operation [%d]\n",
    1.85 -		    blk_ring->req_ring[loop].operation);
    1.86 -	    BUG();
    1.87 +	    panic("error: unknown block io operation [%d]\n",
    1.88 +                  blk_ring->req_ring[loop].operation);
    1.89  	}
    1.90      }
    1.91  
    1.92 @@ -243,7 +247,7 @@ static int do_block_io_op_domain(struct 
    1.93  
    1.94  static void dispatch_debug_block_io(struct task_struct *p, int index)
    1.95  {
    1.96 -    printk (KERN_ALERT "dispatch_debug_block_io: UNIMPL\n"); 
    1.97 +    DPRINTK("dispatch_debug_block_io: unimplemented\n"); 
    1.98  }
    1.99  
   1.100  static void dispatch_probe_block_io(struct task_struct *p, int index)
   1.101 @@ -281,26 +285,18 @@ static void dispatch_rw_block_io(struct 
   1.102       * a bit legitimate
   1.103       */
   1.104      if ( (blk_ring->req_ring[index].block_size & (0x200 - 1)) != 0 )
   1.105 -    {
   1.106 -	printk(KERN_ALERT "    error: dodgy block size: %d\n", 
   1.107 -	       blk_ring->req_ring[index].block_size);
   1.108 -	BUG();
   1.109 -    }
   1.110 +	panic("error: dodgy block size: %d\n", 
   1.111 +              blk_ring->req_ring[index].block_size);
   1.112      
   1.113      if ( blk_ring->req_ring[index].buffer == NULL )
   1.114 -    { 
   1.115 -	printk(KERN_ALERT "xen_block: bogus buffer from guestOS\n"); 
   1.116 -	BUG();
   1.117 -    }
   1.118 +	panic("xen_block: bogus buffer from guestOS\n"); 
   1.119  
   1.120 -    if (XEN_BLK_DEBUG)
   1.121 -	printk(XEN_BLK_DEBUG_LEVEL "    req_cons: %d  req_prod %d  index: %d "
   1.122 -	       "op: %s, pri: %s\n", blk_ring->req_cons, blk_ring->req_prod, 
   1.123 -	       index, 
   1.124 -	       (blk_ring->req_ring[index].operation == XEN_BLOCK_READ ? 
   1.125 -		"read" : "write"), 
   1.126 -	       (blk_ring->req_ring[index].priority == XEN_BLOCK_SYNC ? 
   1.127 -		"sync" : "async"));
   1.128 +    DPRINTK("req_cons: %d  req_prod %d  index: %d, op: %s, pri: %s\n",
   1.129 +            blk_ring->req_cons, blk_ring->req_prod, index, 
   1.130 +            (blk_ring->req_ring[index].operation == XEN_BLOCK_READ ? 
   1.131 +             "read" : "write"), 
   1.132 +            (blk_ring->req_ring[index].priority == XEN_BLOCK_SYNC ? 
   1.133 +             "sync" : "async"));
   1.134  
   1.135      atomic_inc(&nr_pending);
   1.136      blk_request = kmem_cache_alloc(blk_request_cachep, GFP_ATOMIC);
   1.137 @@ -308,11 +304,7 @@ static void dispatch_rw_block_io(struct 
   1.138      /* we'll be doing this frequently, would a cache be appropriate? */
   1.139      bh = (struct buffer_head *) kmalloc(sizeof(struct buffer_head), 
   1.140  					GFP_KERNEL);
   1.141 -    if ( bh == NULL )
   1.142 -    {
   1.143 -	printk(KERN_ALERT "ERROR: bh is null\n");
   1.144 -	BUG();
   1.145 -    }
   1.146 +    if ( bh == NULL ) panic("bh is null\n");
   1.147  
   1.148      /* set just the important bits of the buffer header */
   1.149      memset (bh, 0, sizeof (struct buffer_head));
   1.150 @@ -326,7 +318,7 @@ static void dispatch_rw_block_io(struct 
   1.151      bh->b_count.counter = 1;
   1.152      bh->b_xen_request   = (void *)blk_request;  
   1.153      
   1.154 -    if (blk_ring->req_ring[index].operation == XEN_BLOCK_WRITE)
   1.155 +    if ( blk_ring->req_ring[index].operation == XEN_BLOCK_WRITE )
   1.156      {
   1.157  	bh->b_state = ((1 << BH_JBD) | (1 << BH_Mapped) | (1 << BH_Req) |
   1.158  		       (1 << BH_Dirty) | (1 << BH_Uptodate));
     2.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c	Thu Feb 20 20:17:41 2003 +0000
     2.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c	Thu Feb 20 23:52:36 2003 +0000
     2.3 @@ -8,24 +8,20 @@
     2.4  #include <linux/errno.h>
     2.5  
     2.6  #include <linux/fs.h>
     2.7 -#include <linux/hdreg.h>                               /* HDIO_GETGEO, et al */
     2.8 +#include <linux/hdreg.h>
     2.9  #include <linux/blkdev.h>
    2.10  #include <linux/major.h>
    2.11  
    2.12 -/* NOTE: this is drive independent, so no inclusion of ide.h */
    2.13 -
    2.14  #include <asm/hypervisor-ifs/block.h>
    2.15  #include <asm/hypervisor-ifs/hypervisor-if.h>
    2.16  #include <asm/io.h>
    2.17 -#include <asm/uaccess.h>                                       /* put_user() */
    2.18 -
    2.19 -#define MAJOR_NR XLBLK_MAJOR   /* force defns in blk.h, must preceed include */
    2.20 -static int xlblk_major = XLBLK_MAJOR;
    2.21 +#include <asm/uaccess.h>
    2.22  
    2.23 -#include <linux/blk.h>           /* must come after definition of MAJOR_NR!! */
    2.24 +#define MAJOR_NR XLBLK_MAJOR   /* force defns in blk.h, must precede include */
    2.25 +static int xlblk_major = XLBLK_MAJOR;
    2.26 +#include <linux/blk.h>
    2.27  
    2.28 -/* instead of including linux/ide.h to pick up the definitiong of byte
    2.29 - * (and consequently screwing up blk.h, we'll just copy the definition */
    2.30 +/* Copied from linux/ide.h */
    2.31  typedef unsigned char	byte; 
    2.32  
    2.33  void xlblk_ide_register_disk(int, unsigned long);
    2.34 @@ -44,40 +40,24 @@ static int xlblk_max_sectors[XLBLK_MAX];
    2.35  
    2.36  #define DEBUG_IRQ    _EVENT_DEBUG 
    2.37  
    2.38 -typedef struct xlblk_device
    2.39 -{
    2.40 -  struct buffer_head *bh;
    2.41 -  unsigned int tx_count;                  /* number of used slots in tx ring */
    2.42 -} xlblk_device_t;
    2.43 -
    2.44 -xlblk_device_t xlblk_device;
    2.45 -
    2.46 -#define XLBLK_DEBUG       0
    2.47 -#define XLBLK_DEBUG_IOCTL 0
    2.48 -
    2.49 -/* Our per-request identifier is a discriminated union, using LSB. */
    2.50 -#define BH_TO_ID(_bh)   ((void *)(_bh))
    2.51 -#define REQ_TO_ID(_req) ((void *)((unsigned long)(_req) | 1))
    2.52 -#define ID_IS_REQ(_id)  ((int)(_id) & 1)
    2.53 -#define ID_TO_BH(_id)   ((struct buffer_head *)(_id))
    2.54 -#define ID_TO_REQ(_id)  ((struct request *)((unsigned long)(_id) & ~1))
    2.55 +#if 0
    2.56 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
    2.57 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
    2.58 +#else
    2.59 +#define DPRINTK(_f, _a...) ((void)0)
    2.60 +#define DPRINTK_IOCTL(_f, _a...) ((void)0)
    2.61 +#endif
    2.62  
    2.63  static blk_ring_t *blk_ring;
    2.64 -
    2.65 -/* 
    2.66 - * disk management
    2.67 - */
    2.68 -
    2.69 -xen_disk_info_t xen_disk_info;
    2.70 +static xen_disk_info_t xen_disk_info;
    2.71  
    2.72 -/* some declarations */
    2.73 -void hypervisor_request(void *         id,
    2.74 -			int            operation,
    2.75 -			char *         buffer,
    2.76 -			unsigned long  block_number,
    2.77 -			unsigned short block_size,
    2.78 -			kdev_t         device,
    2.79 -			int            mode);
    2.80 +int hypervisor_request(void *         id,
    2.81 +                       int            operation,
    2.82 +                       char *         buffer,
    2.83 +                       unsigned long  block_number,
    2.84 +                       unsigned short block_size,
    2.85 +                       kdev_t         device,
    2.86 +                       int            mode);
    2.87  
    2.88  
    2.89  /* ------------------------------------------------------------------------
    2.90 @@ -85,17 +65,13 @@ void hypervisor_request(void *         i
    2.91  
    2.92  static int xenolinux_block_open(struct inode *inode, struct file *filep)
    2.93  {
    2.94 -    if (XLBLK_DEBUG)
    2.95 -	printk (KERN_ALERT "xenolinux_block_open\n"); 
    2.96 -
    2.97 +    DPRINTK("xenolinux_block_open\n"); 
    2.98      return 0;
    2.99  }
   2.100  
   2.101  static int xenolinux_block_release(struct inode *inode, struct file *filep)
   2.102  {
   2.103 -    if (XLBLK_DEBUG)
   2.104 -	printk (KERN_ALERT "xenolinux_block_release\n");
   2.105 -
   2.106 +    DPRINTK("xenolinux_block_release\n");
   2.107      return 0;
   2.108  }
   2.109  
   2.110 @@ -105,8 +81,7 @@ static int xenolinux_block_ioctl(struct 
   2.111      int minor_dev;
   2.112      struct hd_geometry *geo = (struct hd_geometry *)argument;
   2.113  
   2.114 -    if (XLBLK_DEBUG_IOCTL)
   2.115 -	printk (KERN_ALERT "xenolinux_block_ioctl\n"); 
   2.116 +    DPRINTK("xenolinux_block_ioctl\n"); 
   2.117  
   2.118      /* check permissions */
   2.119      if (!capable(CAP_SYS_ADMIN)) return -EPERM;
   2.120 @@ -114,36 +89,28 @@ static int xenolinux_block_ioctl(struct 
   2.121      minor_dev = MINOR(inode->i_rdev);
   2.122      if (minor_dev >= XLBLK_MAX)  return -ENODEV;
   2.123      
   2.124 -    if (XLBLK_DEBUG_IOCTL)
   2.125 -	printk (KERN_ALERT "   command: 0x%x, argument: 0x%lx, minor: 0x%x\n",
   2.126 -		command, (long) argument, minor_dev); 
   2.127 +    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, minor: 0x%x\n",
   2.128 +                  command, (long) argument, minor_dev); 
   2.129    
   2.130 -    switch (command) {
   2.131 -
   2.132 +    switch (command)
   2.133 +    {
   2.134      case BLKGETSIZE:
   2.135 -	if (XLBLK_DEBUG_IOCTL) 
   2.136 -	    printk (KERN_ALERT
   2.137 -		    "   BLKGETSIZE: %x %lx\n", BLKGETSIZE, 
   2.138 -		    (long) xen_disk_info.disks[0].capacity); 
   2.139 +        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, 
   2.140 +                      (long) xen_disk_info.disks[0].capacity); 
   2.141  	return put_user(xen_disk_info.disks[0].capacity, 
   2.142  			(unsigned long *) argument);
   2.143  
   2.144      case BLKRRPART:
   2.145 -	if (XLBLK_DEBUG_IOCTL)
   2.146 -	    printk (KERN_ALERT "   BLKRRPART: %x\n", BLKRRPART); 
   2.147 +        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART); 
   2.148  	break;
   2.149  
   2.150      case BLKSSZGET:
   2.151 -	if (XLBLK_DEBUG_IOCTL)
   2.152 -	    printk (KERN_ALERT "   BLKSSZGET: %x 0x%x\n", BLKSSZGET,
   2.153 -		    xlblk_hardsect_size[minor_dev]);
   2.154 +        DPRINTK_IOCTL("   BLKSSZGET: %x 0x%x\n", BLKSSZGET,
   2.155 +                      xlblk_hardsect_size[minor_dev]);
   2.156  	return xlblk_hardsect_size[minor_dev]; 
   2.157  
   2.158      case HDIO_GETGEO:
   2.159 -
   2.160 -	if (XLBLK_DEBUG_IOCTL)
   2.161 -	    printk (KERN_ALERT "   HDIO_GETGEO: %x\n", HDIO_GETGEO);
   2.162 -
   2.163 +        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
   2.164  	if (!argument) return -EINVAL;
   2.165  	if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
   2.166  	if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
   2.167 @@ -152,10 +119,7 @@ static int xenolinux_block_ioctl(struct 
   2.168  	return 0;
   2.169  
   2.170      case HDIO_GETGEO_BIG: 
   2.171 -
   2.172 -	if (XLBLK_DEBUG_IOCTL) 
   2.173 -	    printk (KERN_ALERT "   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
   2.174 -
   2.175 +        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
   2.176  	if (!argument) return -EINVAL;
   2.177  	if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
   2.178  	if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
   2.179 @@ -165,8 +129,7 @@ static int xenolinux_block_ioctl(struct 
   2.180  	return 0;
   2.181  
   2.182      default:
   2.183 -	if (XLBLK_DEBUG_IOCTL) 
   2.184 -	    printk (KERN_ALERT "   eh? unknown ioctl\n");
   2.185 +        DPRINTK_IOCTL("   eh? unknown ioctl\n");
   2.186  	break;
   2.187      }
   2.188      
   2.189 @@ -175,15 +138,13 @@ static int xenolinux_block_ioctl(struct 
   2.190  
   2.191  static int xenolinux_block_check(kdev_t dev)
   2.192  {
   2.193 -    if (XLBLK_DEBUG) 
   2.194 -      printk (KERN_ALERT "xenolinux_block_check\n");
   2.195 +    DPRINTK("xenolinux_block_check\n");
   2.196      return 0;
   2.197  }
   2.198  
   2.199  static int xenolinux_block_revalidate(kdev_t dev)
   2.200  {
   2.201 -    if (XLBLK_DEBUG) 
   2.202 -	printk (KERN_ALERT "xenolinux_block_revalidate\n"); 
   2.203 +    DPRINTK("xenolinux_block_revalidate\n"); 
   2.204      return 0;
   2.205  }
   2.206  
   2.207 @@ -202,14 +163,13 @@ static int xenolinux_block_revalidate(kd
   2.208   * mode: XEN_BLOCK_SYNC or XEN_BLOCK_ASYNC.  async requests
   2.209   *   will queue until a sync request is issued.
   2.210   */
   2.211 -
   2.212 -void hypervisor_request(void *         id,
   2.213 -			int            operation,
   2.214 -			char *         buffer,
   2.215 -			unsigned long  block_number,
   2.216 -			unsigned short block_size,
   2.217 -			kdev_t         device,
   2.218 -			int            mode)
   2.219 +int hypervisor_request(void *         id,
   2.220 +                       int            operation,
   2.221 +                       char *         buffer,
   2.222 +                       unsigned long  block_number,
   2.223 +                       unsigned short block_size,
   2.224 +                       kdev_t         device,
   2.225 +                       int            mode)
   2.226  {
   2.227      int position;
   2.228      void *buffer_pa, *buffer_ma; 
   2.229 @@ -217,43 +177,36 @@ void hypervisor_request(void *         i
   2.230      unsigned long sector_number = 0;
   2.231      struct gendisk *gd;     
   2.232  
   2.233 +    /* Bail if there's no room in the request communication ring. */
   2.234 +    if ( BLK_REQ_RING_INC(blk_ring->req_prod) == blk_ring->req_cons )
   2.235 +        return 1;
   2.236 +
   2.237      buffer_pa = (void *)virt_to_phys(buffer); 
   2.238      buffer_ma = (void *)phys_to_machine((unsigned long)buffer_pa); 
   2.239  
   2.240 -    if (operation == XEN_BLOCK_PROBE) {
   2.241 +    switch ( operation )
   2.242 +    {
   2.243 +    case XEN_BLOCK_PROBE:
   2.244  	phys_device = (kdev_t) 0;
   2.245  	sector_number = 0;
   2.246 -
   2.247 -    } else if (operation == XEN_BLOCK_READ || operation == XEN_BLOCK_WRITE) {
   2.248 +        break;
   2.249  
   2.250 -	/*
   2.251 -	 * map logial major device to the physical device number 
   2.252 -	 *           XLBLK_MAJOR -> IDE0_MAJOR  (123 -> 3)
   2.253 -	 */
   2.254 -	if (MAJOR(device) == XLBLK_MAJOR) 
   2.255 -	    phys_device = MKDEV(IDE0_MAJOR, 0);
   2.256 -	else {
   2.257 -	    printk (KERN_ALERT "error: xl_block::hypervisor_request: "
   2.258 -		    "unknown device [0x%x]\n", device);
   2.259 -	    BUG();
   2.260 -	}
   2.261 -
   2.262 -	/*
   2.263 -	 * compute real buffer location on disk (from ll_rw_block.c::submit_bh)
   2.264 -	 */
   2.265 +    case XEN_BLOCK_READ:
   2.266 +    case XEN_BLOCK_WRITE:
   2.267 +	if ( MAJOR(device) != XLBLK_MAJOR ) 
   2.268 +	    panic("error: xl_block::hypervisor_request: "
   2.269 +                  "unknown device [0x%x]\n", device);
   2.270 +        phys_device = MKDEV(IDE0_MAJOR, 0);
   2.271 +	/* Compute real buffer location on disk */
   2.272  	sector_number = block_number;
   2.273 -
   2.274  	if ( (gd = (struct gendisk *)xen_disk_info.disks[0].gendisk) != NULL )
   2.275  	    sector_number += gd->part[MINOR(device)&IDE_PARTN_MASK].start_sect;
   2.276 +        break;
   2.277 +
   2.278 +    default:
   2.279 +        panic("unknown op %d\n", operation);
   2.280      }
   2.281  
   2.282 -
   2.283 -    if (BLK_REQ_RING_INC(blk_ring->req_prod) == blk_ring->req_cons) {
   2.284 -	printk (KERN_ALERT "hypervisor_request: req_cons: %d, req_prod:%d",
   2.285 -		blk_ring->req_cons, blk_ring->req_prod);
   2.286 -	BUG(); 
   2.287 -    }
   2.288 -    
   2.289      /* Fill out a communications ring structure & trap to the hypervisor */
   2.290      position = blk_ring->req_prod;
   2.291      blk_ring->req_ring[position].id            = id;
   2.292 @@ -267,112 +220,60 @@ void hypervisor_request(void *         i
   2.293  
   2.294      blk_ring->req_prod = BLK_REQ_RING_INC(blk_ring->req_prod);
   2.295  
   2.296 -    switch ( mode )
   2.297 -    { 
   2.298 -    case XEN_BLOCK_SYNC:  
   2.299 -	/* trap into hypervisor */
   2.300 -	HYPERVISOR_block_io_op();
   2.301 -	break; 
   2.302 +    if ( mode == XEN_BLOCK_SYNC ) HYPERVISOR_block_io_op();
   2.303  
   2.304 -    case XEN_BLOCK_ASYNC:
   2.305 -	/* for now, do nothing.  the request will go in the ring and
   2.306 -	   the next sync request will trigger the hypervisor to act */
   2.307 -	printk("Oh dear-- ASYNC xen block of doom!\n"); 
   2.308 -	break; 
   2.309 -
   2.310 -    default: 
   2.311 -	/* ummm, unknown mode. */
   2.312 -	printk("xl_block thingy: unknown mode %d\n", mode); 
   2.313 -	BUG();
   2.314 -    }
   2.315 -
   2.316 -    return;
   2.317 +    return 0;
   2.318  }
   2.319  
   2.320  
   2.321  /*
   2.322   * do_xlblk_request
   2.323 - *
   2.324 - * read a block; request is in a request queue
   2.325 - *
   2.326 - * TO DO: should probably release the io_request_lock and then re-acquire
   2.327 - *        (see LDD p. 338)
   2.328 + *  read a block; request is in a request queue
   2.329   */
   2.330  static void do_xlblk_request (request_queue_t *rq)
   2.331  {
   2.332      struct request *req;
   2.333      struct buffer_head *bh;
   2.334 -    unsigned long offset;
   2.335 -    unsigned long length;
   2.336 -    int rw, nsect;
   2.337 +    int rw, nsect, full, queued = 0;
   2.338      
   2.339 -    if ( XLBLK_DEBUG )
   2.340 -	printk (KERN_ALERT "xlblk.c::do_xlblk_request for '%s'\n", 
   2.341 -		DEVICE_NAME); 
   2.342 +    DPRINTK("xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME); 
   2.343  
   2.344 -    /*
   2.345 -     * XXXXXXX KAF: This is really inefficient!!!!
   2.346 -     * 
   2.347 -     * What we really want is a scatter/gather interface, where each 
   2.348 -     * request maps onto one scatter/gather descriptor.
   2.349 -     * 
   2.350 -     * We then don't need to worry about buffer_heads getting serviced out
   2.351 -     * of order (because we get one reponse when an entire request is done).
   2.352 -     * 
   2.353 -     * We should look at SCSI code to see how to queue multiple requests
   2.354 -     * at once. Quite likely we'll have to take charge of the requests and
   2.355 -     * peel them off of the request_queue.
   2.356 -     * 
   2.357 -     * This is all for another day :-)
   2.358 -     * 
   2.359 -     * Just bear in mind that we'd like the following to be a loop!
   2.360 -     */
   2.361 -    /* while*/ if ( !QUEUE_EMPTY )
   2.362 +    while ( !rq->plugged && !QUEUE_EMPTY )
   2.363      {
   2.364 -	req = CURRENT;
   2.365 -	if ( rq->plugged || (req == NULL) ) return; 
   2.366 +	if ( (req = CURRENT) == NULL ) goto out;
   2.367  		
   2.368 -	if (XLBLK_DEBUG) 
   2.369 -	    printk (KERN_ALERT
   2.370 -		    "do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
   2.371 -		    req, req->cmd, req->sector,
   2.372 -		    req->current_nr_sectors, req->nr_sectors, req->bh);
   2.373 -	
   2.374 -	
   2.375 +        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
   2.376 +                req, req->cmd, req->sector,
   2.377 +                req->current_nr_sectors, req->nr_sectors, req->bh);
   2.378 +
   2.379 +        rw = req->cmd;
   2.380 +        if ( rw == READA ) rw = READ;
   2.381 +        if ((rw != READ) && (rw != WRITE))
   2.382 +            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
   2.383 +
   2.384  	req->errors = 0;
   2.385  
   2.386          bh = req->bh;
   2.387 -        /*
   2.388 -         * XXX KAF: I get read errors if I turn the following into a loop.
   2.389 -         * Why doesn't it work? I should even be able to handle out-of-order
   2.390 -         * responses... :-(
   2.391 -         */
   2.392 -        /* while */ if ( bh != NULL )
   2.393 +        while ( bh != NULL )
   2.394  	{
   2.395 -	    offset = bh->b_rsector << 9;
   2.396 -	    length = bh->b_size;
   2.397 -	    
   2.398 -	    rw = req->cmd;
   2.399 -	    if (rw == READA)  rw= READ;
   2.400 -	    if ((rw != READ) && (rw != WRITE)) {
   2.401 -		printk (KERN_ALERT
   2.402 -			"XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
   2.403 -		BUG();
   2.404 -	    }
   2.405 +            full = hypervisor_request(
   2.406 +                bh, (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
   2.407 +                bh->b_data, bh->b_rsector, bh->b_size, 
   2.408 +                bh->b_dev, XEN_BLOCK_ASYNC);
   2.409 +            
   2.410 +            if ( full ) goto out;
   2.411 +
   2.412 +            queued++;
   2.413  
   2.414 -            if ( bh->b_reqnext != NULL )
   2.415 +            /* Dequeue the buffer head from the request. */
   2.416 +            nsect = bh->b_size >> 9;
   2.417 +            req->bh = bh->b_reqnext;
   2.418 +            bh->b_reqnext = NULL;
   2.419 +            bh = req->bh;
   2.420 +            
   2.421 +            if ( bh != NULL )
   2.422              {
   2.423 -                hypervisor_request(
   2.424 -                    BH_TO_ID(bh),
   2.425 -                    rw == READ ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
   2.426 -                    bh->b_data, bh->b_rsector, bh->b_size, 
   2.427 -                    bh->b_dev, XEN_BLOCK_SYNC);
   2.428 -
   2.429 -                /* From ll_rw_blk.c:end_that_request_first(). */
   2.430 -                nsect = bh->b_size >> 9;
   2.431 -                req->bh = bh->b_reqnext;
   2.432 -                bh->b_reqnext = NULL;
   2.433 -                bh = req->bh;
   2.434 +                /* There's another buffer head to do. Update the request. */
   2.435                  req->hard_sector += nsect;
   2.436                  req->hard_nr_sectors -= nsect;
   2.437                  req->sector = req->hard_sector;
   2.438 @@ -382,15 +283,16 @@ static void do_xlblk_request (request_qu
   2.439              }
   2.440              else
   2.441              {
   2.442 -                hypervisor_request(
   2.443 -                    REQ_TO_ID(req),
   2.444 -                    rw == READ ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
   2.445 -                    bh->b_data, bh->b_rsector, bh->b_size, 
   2.446 -                    bh->b_dev, XEN_BLOCK_SYNC);
   2.447 -                bh = NULL;
   2.448 +                /* That was the last buffer head. Finalise the request. */
   2.449 +                if ( end_that_request_first(req, 1, "XenBlk") ) BUG();
   2.450 +                blkdev_dequeue_request(req);
   2.451 +                end_that_request_last(req);
   2.452              }
   2.453          }
   2.454      }
   2.455 +
   2.456 + out:
   2.457 +    if ( queued != 0 ) HYPERVISOR_block_io_op();
   2.458  }
   2.459  
   2.460  
   2.461 @@ -405,42 +307,23 @@ static struct block_device_operations xe
   2.462  
   2.463  static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
   2.464  {
   2.465 -    struct request *req;
   2.466 -    int loop;
   2.467 +    int i;
   2.468      unsigned long flags; 
   2.469      struct buffer_head *bh;
   2.470      
   2.471      spin_lock_irqsave(&io_request_lock, flags);	    
   2.472  
   2.473 -    for ( loop = blk_ring->resp_cons;
   2.474 -	  loop != blk_ring->resp_prod;
   2.475 -	  loop = BLK_RESP_RING_INC(loop) )
   2.476 +    for ( i = blk_ring->resp_cons;
   2.477 +	  i != blk_ring->resp_prod;
   2.478 +	  i = BLK_RESP_RING_INC(i) )
   2.479      {
   2.480 -	blk_ring_resp_entry_t *bret = &blk_ring->resp_ring[loop];
   2.481 -
   2.482 -        if ( bret->id == NULL ) continue; /* probes have NULL id */
   2.483 -
   2.484 -	if ( ID_IS_REQ(bret->id) )
   2.485 -        {
   2.486 -            req = ID_TO_REQ(bret->id);
   2.487 -            if ( end_that_request_first(req, 1, "XenBlk") ) BUG();
   2.488 -            blkdev_dequeue_request(req);
   2.489 -            end_that_request_last(req);
   2.490 -	}
   2.491 -        else
   2.492 -        {
   2.493 -            bh = ID_TO_BH(bret->id);
   2.494 -            bh->b_end_io(bh, 1);
   2.495 -        }
   2.496 +	blk_ring_resp_entry_t *bret = &blk_ring->resp_ring[i];
   2.497 +        if ( (bh = bret->id) != NULL ) bh->b_end_io(bh, 1);
   2.498      }
   2.499      
   2.500 -    blk_ring->resp_cons = loop;
   2.501 +    blk_ring->resp_cons = i;
   2.502  
   2.503 -    /*
   2.504 -     * KAF: I believe this is safe. It also appears to be necessary, if
   2.505 -     * we left any data outstanding when welast exited do_xlblk_request.
   2.506 -     * Otherwise we just hang...
   2.507 -     */
   2.508 +    /* KAF: We can push work down at this point. We have the lock. */
   2.509      do_xlblk_request(BLK_DEFAULT_QUEUE(MAJOR_NR));
   2.510      
   2.511      spin_unlock_irqrestore(&io_request_lock, flags);
   2.512 @@ -449,7 +332,7 @@ static void xlblk_response_int(int irq, 
   2.513  
   2.514  int __init xlblk_init(void)
   2.515  {
   2.516 -    int loop, error, result;
   2.517 +    int i, error, result;
   2.518  
   2.519      /* This mapping was created early at boot time. */
   2.520      blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
   2.521 @@ -458,7 +341,7 @@ int __init xlblk_init(void)
   2.522      blk_ring->resp_prod = blk_ring->resp_cons = 0;
   2.523      
   2.524      error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, 0, 
   2.525 -			"xlblk-response", &xlblk_device);
   2.526 +			"xlblk-response", NULL);
   2.527      if (error) {
   2.528  	printk(KERN_ALERT "Could not allocate receive interrupt\n");
   2.529  	goto fail;
   2.530 @@ -467,14 +350,15 @@ int __init xlblk_init(void)
   2.531      memset (&xen_disk_info, 0, sizeof(xen_disk_info));
   2.532      xen_disk_info.count = 0;
   2.533  
   2.534 -    hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info,
   2.535 -		       0, 0, (kdev_t) 0, XEN_BLOCK_SYNC);
   2.536 +    if ( hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info,
   2.537 +                            0, 0, (kdev_t) 0, XEN_BLOCK_SYNC) )
   2.538 +        BUG();
   2.539      while ( blk_ring->resp_prod != 1 ) barrier();
   2.540 -    for ( loop = 0; loop < xen_disk_info.count; loop++ )
   2.541 +    for ( i = 0; i < xen_disk_info.count; i++ )
   2.542      { 
   2.543  	printk (KERN_ALERT "  %2d: type: %d, capacity: %ld\n",
   2.544 -		loop, xen_disk_info.disks[loop].type, 
   2.545 -		xen_disk_info.disks[loop].capacity);
   2.546 +		i, xen_disk_info.disks[i].type, 
   2.547 +		xen_disk_info.disks[i].capacity);
   2.548      }
   2.549      
   2.550      SET_MODULE_OWNER(&xenolinux_block_fops);
   2.551 @@ -486,11 +370,11 @@ int __init xlblk_init(void)
   2.552      }
   2.553  
   2.554      /* initialize global arrays in drivers/block/ll_rw_block.c */
   2.555 -    for (loop = 0; loop < XLBLK_MAX; loop++) {
   2.556 -	xlblk_blk_size[loop]      = xen_disk_info.disks[0].capacity;
   2.557 -	xlblk_blksize_size[loop]  = 512;
   2.558 -	xlblk_hardsect_size[loop] = 512;
   2.559 -	xlblk_max_sectors[loop]   = 128;
   2.560 +    for (i = 0; i < XLBLK_MAX; i++) {
   2.561 +	xlblk_blk_size[i]      = xen_disk_info.disks[0].capacity;
   2.562 +	xlblk_blksize_size[i]  = 512;
   2.563 +	xlblk_hardsect_size[i] = 512;
   2.564 +	xlblk_max_sectors[i]   = 128;
   2.565      }
   2.566      xlblk_read_ahead  = 8; 
   2.567  
   2.568 @@ -502,15 +386,11 @@ int __init xlblk_init(void)
   2.569  
   2.570      blk_init_queue(BLK_DEFAULT_QUEUE(xlblk_major), do_xlblk_request);
   2.571  
   2.572 -#if 0 /* KAF: We now do the default thing and leave requests on the queue. */
   2.573      /*
   2.574 -     * XXX KAF (again): see big XXX comment above. As per SCSI code, we'll
   2.575 -     * probably add this in so that we can peel off multiple outstanding
   2.576 -     * requests from teh request queue, giving us easy access to the 
   2.577 -     * real head that still has work to be sent down to Xen.
   2.578 +     * Turn off barking 'headactive' mode. We dequeue buffer heads as
   2.579 +     * soon as we pass them down to Xen.
   2.580       */
   2.581      blk_queue_headactive(BLK_DEFAULT_QUEUE(xlblk_major), 0);
   2.582 -#endif
   2.583  
   2.584      xlblk_ide_register_disk(0, xen_disk_info.disks[0].capacity);
   2.585