ia64/xen-unstable

changeset 13999:3c827d68fa87

[TAPDISK] add tapdisk support for image chaining

Enables tapdisk to chain an arbitrary number of VDIs, propagating reads of
holes in children to their parent images. Introduces two new functions to the
tapdisk interface to facilitate this. Modifies the QCoW plugin to take
advantage of these changes, thus providing support for arbitrarily long chains
of QCoW image types.
author Jake Wires <jwires@xensource.com>
date Fri Feb 16 20:31:27 2007 -0800 (2007-02-16)
parents 32a059913591
children 31e65c4ba739
files tools/blktap/drivers/Makefile tools/blktap/drivers/block-aio.c tools/blktap/drivers/block-qcow.c tools/blktap/drivers/block-ram.c tools/blktap/drivers/block-sync.c tools/blktap/drivers/block-vmdk.c tools/blktap/drivers/img2qcow.c tools/blktap/drivers/qcow2raw.c tools/blktap/drivers/tapdisk.c tools/blktap/drivers/tapdisk.h tools/blktap/lib/blktaplib.h tools/blktap/lib/xs_api.c
line diff
     1.1 --- a/tools/blktap/drivers/Makefile	Fri Feb 16 16:34:28 2007 +0000
     1.2 +++ b/tools/blktap/drivers/Makefile	Fri Feb 16 20:31:27 2007 -0800
     1.3 @@ -5,7 +5,7 @@ INCLUDES += -I.. -I../lib
     1.4  
     1.5  IBIN         = blktapctrl tapdisk
     1.6  QCOW_UTIL    = img2qcow qcow2raw qcow-create
     1.7 -INST_DIR  = /usr/sbin
     1.8 +INST_DIR     = /usr/sbin
     1.9  LIBAIO_DIR   = ../../libaio/src
    1.10  
    1.11  CFLAGS   += -Werror
    1.12 @@ -17,7 +17,7 @@ CFLAGS   += -D_GNU_SOURCE
    1.13  
    1.14  # Get gcc to generate the dependencies for us.
    1.15  CFLAGS   += -Wp,-MD,.$(@F).d
    1.16 -DEPS     = .*.d
    1.17 +DEPS      = .*.d
    1.18  
    1.19  THREADLIB := -lpthread -lz
    1.20  LIBS      := -L. -L.. -L../lib
    1.21 @@ -29,10 +29,10 @@ LIBS      += -L$(XEN_XENSTORE) -lxenstor
    1.22  
    1.23  AIOLIBS   := $(LIBAIO_DIR)/libaio.a
    1.24  
    1.25 -BLK-OBJS  := block-aio.o 
    1.26 -BLK-OBJS  += block-sync.o 
    1.27 +BLK-OBJS  := block-aio.o
    1.28 +BLK-OBJS  += block-sync.o
    1.29  BLK-OBJS  += block-vmdk.o
    1.30 -BLK-OBJS  += block-ram.o 
    1.31 +BLK-OBJS  += block-ram.o
    1.32  BLK-OBJS  += block-qcow.o
    1.33  BLK-OBJS  += aes.o
    1.34  
    1.35 @@ -52,13 +52,13 @@ tapdisk: $(BLK-OBJS) tapdisk.c
    1.36  qcow-util: img2qcow qcow2raw qcow-create
    1.37  
    1.38  img2qcow qcow2raw qcow-create: %: $(BLK-OBJS)
    1.39 -	$(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS)  $(LIBS)
    1.40 +	$(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS) $(LIBS)
    1.41  
    1.42  install: all
    1.43 -	$(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INST_DIR)
    1.44 +	$(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(VHD_UTIL) $(DESTDIR)$(INST_DIR)
    1.45  
    1.46  clean:
    1.47 -	rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL)
    1.48 +	rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL) $(VHD_UTIL)
    1.49  
    1.50  .PHONY: clean install
    1.51  
     2.1 --- a/tools/blktap/drivers/block-aio.c	Fri Feb 16 16:34:28 2007 +0000
     2.2 +++ b/tools/blktap/drivers/block-aio.c	Fri Feb 16 20:31:27 2007 -0800
     2.3 @@ -58,6 +58,7 @@ struct pending_aio {
     2.4  	td_callback_t cb;
     2.5  	int id;
     2.6  	void *private;
     2.7 +	uint64_t lsec;
     2.8  };
     2.9  
    2.10  struct tdaio_state {
    2.11 @@ -139,12 +140,23 @@ static int get_image_info(struct td_stat
    2.12  	return 0;
    2.13  }
    2.14  
    2.15 +static inline void init_fds(struct disk_driver *dd)
    2.16 +{
    2.17 +	int i;
    2.18 +	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
    2.19 +
    2.20 +	for(i = 0; i < MAX_IOFD; i++) 
    2.21 +		dd->io_fd[i] = 0;
    2.22 +
    2.23 +	dd->io_fd[0] = prv->poll_fd;
    2.24 +}
    2.25 +
    2.26  /* Open the disk file and initialize aio state. */
    2.27 -int tdaio_open (struct td_state *s, const char *name)
    2.28 +int tdaio_open (struct disk_driver *dd, const char *name)
    2.29  {
    2.30  	int i, fd, ret = 0;
    2.31 -	struct tdaio_state *prv = (struct tdaio_state *)s->private;
    2.32 -	s->private = prv;
    2.33 +	struct td_state    *s   = dd->td_state;
    2.34 +	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
    2.35  
    2.36  	DPRINTF("block-aio open('%s')", name);
    2.37  	/* Initialize AIO */
    2.38 @@ -194,47 +206,21 @@ int tdaio_open (struct td_state *s, cons
    2.39  
    2.40          prv->fd = fd;
    2.41  
    2.42 +	init_fds(dd);
    2.43  	ret = get_image_info(s, fd);
    2.44 +
    2.45  done:
    2.46  	return ret;	
    2.47  }
    2.48  
    2.49 -int tdaio_queue_read(struct td_state *s, uint64_t sector,
    2.50 -			       int nb_sectors, char *buf, td_callback_t cb,
    2.51 -			       int id, void *private)
    2.52 +int tdaio_queue_read(struct disk_driver *dd, uint64_t sector,
    2.53 +		     int nb_sectors, char *buf, td_callback_t cb,
    2.54 +		     int id, void *private)
    2.55  {
    2.56  	struct   iocb *io;
    2.57  	struct   pending_aio *pio;
    2.58 -	struct   tdaio_state *prv = (struct tdaio_state *)s->private;
    2.59 -	int      size    = nb_sectors * s->sector_size;
    2.60 -	uint64_t offset  = sector * (uint64_t)s->sector_size;
    2.61 -	long     ioidx;
    2.62 -	
    2.63 -	if (prv->iocb_free_count == 0)
    2.64 -		return -ENOMEM;
    2.65 -	io = prv->iocb_free[--prv->iocb_free_count];
    2.66 -	
    2.67 -	ioidx = IOCB_IDX(prv, io);
    2.68 -	pio = &prv->pending_aio[ioidx];
    2.69 -	pio->cb = cb;
    2.70 -	pio->id = id;
    2.71 -	pio->private = private;
    2.72 -	
    2.73 -	io_prep_pread(io, prv->fd, buf, size, offset);
    2.74 -	io->data = (void *)ioidx;
    2.75 -	
    2.76 -	prv->iocb_queue[prv->iocb_queued++] = io;
    2.77 -	
    2.78 -	return 0;
    2.79 -}
    2.80 -			
    2.81 -int tdaio_queue_write(struct td_state *s, uint64_t sector,
    2.82 -			       int nb_sectors, char *buf, td_callback_t cb,
    2.83 -			       int id, void *private)
    2.84 -{
    2.85 -	struct   iocb *io;
    2.86 -	struct   pending_aio *pio;
    2.87 -	struct   tdaio_state *prv = (struct tdaio_state *)s->private;
    2.88 +	struct   td_state    *s   = dd->td_state;
    2.89 +	struct   tdaio_state *prv = (struct tdaio_state *)dd->private;
    2.90  	int      size    = nb_sectors * s->sector_size;
    2.91  	uint64_t offset  = sector * (uint64_t)s->sector_size;
    2.92  	long     ioidx;
    2.93 @@ -248,19 +234,51 @@ int tdaio_queue_write(struct td_state *s
    2.94  	pio->cb = cb;
    2.95  	pio->id = id;
    2.96  	pio->private = private;
    2.97 +	pio->lsec = sector;
    2.98 +	
    2.99 +	io_prep_pread(io, prv->fd, buf, size, offset);
   2.100 +	io->data = (void *)ioidx;
   2.101 +	
   2.102 +	prv->iocb_queue[prv->iocb_queued++] = io;
   2.103 +
   2.104 +	return 0;
   2.105 +}
   2.106 +			
   2.107 +int tdaio_queue_write(struct disk_driver *dd, uint64_t sector,
   2.108 +		      int nb_sectors, char *buf, td_callback_t cb,
   2.109 +		      int id, void *private)
   2.110 +{
   2.111 +	struct   iocb *io;
   2.112 +	struct   pending_aio *pio;
   2.113 +	struct   td_state    *s   = dd->td_state;
   2.114 +	struct   tdaio_state *prv = (struct tdaio_state *)dd->private;
   2.115 +	int      size    = nb_sectors * s->sector_size;
   2.116 +	uint64_t offset  = sector * (uint64_t)s->sector_size;
   2.117 +	long     ioidx;
   2.118 +	
   2.119 +	if (prv->iocb_free_count == 0)
   2.120 +		return -ENOMEM;
   2.121 +	io = prv->iocb_free[--prv->iocb_free_count];
   2.122 +	
   2.123 +	ioidx = IOCB_IDX(prv, io);
   2.124 +	pio = &prv->pending_aio[ioidx];
   2.125 +	pio->cb = cb;
   2.126 +	pio->id = id;
   2.127 +	pio->private = private;
   2.128 +	pio->lsec = sector;
   2.129  	
   2.130  	io_prep_pwrite(io, prv->fd, buf, size, offset);
   2.131  	io->data = (void *)ioidx;
   2.132  	
   2.133  	prv->iocb_queue[prv->iocb_queued++] = io;
   2.134 -	
   2.135 +
   2.136  	return 0;
   2.137  }
   2.138  			
   2.139 -int tdaio_submit(struct td_state *s)
   2.140 +int tdaio_submit(struct disk_driver *dd)
   2.141  {
   2.142  	int ret;
   2.143 -	struct   tdaio_state *prv = (struct tdaio_state *)s->private;
   2.144 +	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
   2.145  
   2.146  	ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
   2.147  	
   2.148 @@ -269,38 +287,24 @@ int tdaio_submit(struct td_state *s)
   2.149  	/* Success case: */
   2.150  	prv->iocb_queued = 0;
   2.151  	
   2.152 -	return ret;
   2.153 +	return 0;
   2.154  }
   2.155  
   2.156 -int *tdaio_get_fd(struct td_state *s)
   2.157 +int tdaio_close(struct disk_driver *dd)
   2.158  {
   2.159 -	struct tdaio_state *prv = (struct tdaio_state *)s->private;
   2.160 -	int *fds, i;
   2.161 -
   2.162 -	fds = malloc(sizeof(int) * MAX_IOFD);
   2.163 -	/*initialise the FD array*/
   2.164 -	for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
   2.165 -
   2.166 -	fds[0] = prv->poll_fd;
   2.167 -
   2.168 -	return fds;	
   2.169 -}
   2.170 -
   2.171 -int tdaio_close(struct td_state *s)
   2.172 -{
   2.173 -	struct tdaio_state *prv = (struct tdaio_state *)s->private;
   2.174 +	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
   2.175  	
   2.176  	io_destroy(prv->aio_ctx);
   2.177  	close(prv->fd);
   2.178 -	
   2.179 +
   2.180  	return 0;
   2.181  }
   2.182  
   2.183 -int tdaio_do_callbacks(struct td_state *s, int sid)
   2.184 +int tdaio_do_callbacks(struct disk_driver *dd, int sid)
   2.185  {
   2.186  	int ret, i, rsp = 0;
   2.187  	struct io_event *ep;
   2.188 -	struct tdaio_state *prv = (struct tdaio_state *)s->private;
   2.189 +	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
   2.190  
   2.191  	/* Non-blocking test for completed io. */
   2.192  	ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
   2.193 @@ -311,22 +315,34 @@ int tdaio_do_callbacks(struct td_state *
   2.194  		struct pending_aio *pio;
   2.195  		
   2.196  		pio = &prv->pending_aio[(long)io->data];
   2.197 -		rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 : 1,
   2.198 +		rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1,
   2.199 +			       pio->lsec, io->u.c.nbytes >> 9, 
   2.200  			       pio->id, pio->private);
   2.201  
   2.202  		prv->iocb_free[prv->iocb_free_count++] = io;
   2.203  	}
   2.204  	return rsp;
   2.205  }
   2.206 -	
   2.207 +
   2.208 +int tdaio_has_parent(struct disk_driver *dd)
   2.209 +{
   2.210 +	return 0;
   2.211 +}
   2.212 +
   2.213 +int tdaio_get_parent(struct disk_driver *dd, struct disk_driver *parent)
   2.214 +{
   2.215 +	return -EINVAL;
   2.216 +}
   2.217 +
   2.218  struct tap_disk tapdisk_aio = {
   2.219 -	"tapdisk_aio",
   2.220 -	sizeof(struct tdaio_state),
   2.221 -	tdaio_open,
   2.222 -	tdaio_queue_read,
   2.223 -	tdaio_queue_write,
   2.224 -	tdaio_submit,
   2.225 -	tdaio_get_fd,
   2.226 -	tdaio_close,
   2.227 -	tdaio_do_callbacks,
   2.228 +	.disk_type          = "tapdisk_aio",
   2.229 +	.private_data_size  = sizeof(struct tdaio_state),
   2.230 +	.td_open            = tdaio_open,
   2.231 +	.td_queue_read      = tdaio_queue_read,
   2.232 +	.td_queue_write     = tdaio_queue_write,
   2.233 +	.td_submit          = tdaio_submit,
   2.234 +	.td_has_parent      = tdaio_has_parent,
   2.235 +	.td_get_parent      = tdaio_get_parent,
   2.236 +	.td_close           = tdaio_close,
   2.237 +	.td_do_callbacks    = tdaio_do_callbacks,
   2.238  };
     3.1 --- a/tools/blktap/drivers/block-qcow.c	Fri Feb 16 16:34:28 2007 +0000
     3.2 +++ b/tools/blktap/drivers/block-qcow.c	Fri Feb 16 20:31:27 2007 -0800
     3.3 @@ -55,7 +55,6 @@
     3.4  
     3.5  /******AIO DEFINES******/
     3.6  #define REQUEST_ASYNC_FD 1
     3.7 -#define MAX_QCOW_IDS  0xFFFF
     3.8  #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
     3.9  
    3.10  struct pending_aio {
    3.11 @@ -65,7 +64,6 @@ struct pending_aio {
    3.12  	int nb_sectors;
    3.13  	char *buf;
    3.14  	uint64_t sector;
    3.15 -	int qcow_idx;
    3.16  };
    3.17  
    3.18  #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
    3.19 @@ -115,9 +113,9 @@ typedef struct QCowHeader_ext {
    3.20  struct tdqcow_state {
    3.21          int fd;                        /*Main Qcow file descriptor */
    3.22  	uint64_t fd_end;               /*Store a local record of file length */
    3.23 -	int bfd;                       /*Backing file descriptor*/
    3.24  	char *name;                    /*Record of the filename*/
    3.25 -	int poll_pipe[2];              /*dummy fd for polling on */
    3.26 +	uint32_t backing_file_size;
    3.27 +	uint64_t backing_file_offset;
    3.28  	int encrypted;                 /*File contents are encrypted or plain*/
    3.29  	int cluster_bits;              /*Determines length of cluster as 
    3.30  					*indicated by file hdr*/
    3.31 @@ -149,7 +147,6 @@ struct tdqcow_state {
    3.32  	AES_KEY aes_decrypt_key;       /*AES key*/
    3.33          /* libaio state */
    3.34          io_context_t       aio_ctx;
    3.35 -	int		   nr_reqs [MAX_QCOW_IDS];
    3.36          struct iocb        iocb_list  [MAX_AIO_REQS];
    3.37          struct iocb       *iocb_free  [MAX_AIO_REQS];
    3.38          struct pending_aio pending_aio[MAX_AIO_REQS];
    3.39 @@ -162,10 +159,11 @@ struct tdqcow_state {
    3.40  
    3.41  static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
    3.42  
    3.43 -static int init_aio_state(struct td_state *bs)
    3.44 +static int init_aio_state(struct disk_driver *dd)
    3.45  {
    3.46          int i;
    3.47 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
    3.48 +	struct td_state     *bs = dd->td_state;
    3.49 +	struct tdqcow_state  *s = (struct tdqcow_state *)dd->private;
    3.50          long     ioidx;
    3.51  
    3.52          /*Initialize Locking bitmap*/
    3.53 @@ -202,8 +200,7 @@ static int init_aio_state(struct td_stat
    3.54  
    3.55          for (i=0;i<MAX_AIO_REQS;i++)
    3.56                  s->iocb_free[i] = &s->iocb_list[i];
    3.57 -	for (i=0;i<MAX_QCOW_IDS;i++)
    3.58 -		s->nr_reqs[i] = 0;
    3.59 +
    3.60          DPRINTF("AIO state initialised\n");
    3.61  
    3.62          return 0;
    3.63 @@ -238,7 +235,10 @@ static uint32_t gen_cksum(char *ptr, int
    3.64  
    3.65  	if(!md) return 0;
    3.66  
    3.67 -	if (MD5((unsigned char *)ptr, len, md) != md) return 0;
    3.68 +	if (MD5((unsigned char *)ptr, len, md) != md) {
    3.69 +		free(md);
    3.70 +		return 0;
    3.71 +	}
    3.72  
    3.73  	memcpy(&ret, md, sizeof(uint32_t));
    3.74  	free(md);
    3.75 @@ -247,26 +247,42 @@ static uint32_t gen_cksum(char *ptr, int
    3.76  
    3.77  static int get_filesize(char *filename, uint64_t *size, struct stat *st)
    3.78  {
    3.79 -	int blockfd;
    3.80 +	int fd;
    3.81 +	QCowHeader header;
    3.82  
    3.83  	/*Set to the backing file size*/
    3.84 +	fd = open(filename, O_RDONLY);
    3.85 +	if (fd < 0)
    3.86 +		return -1;
    3.87 +	if (read(fd, &header, sizeof(header)) < sizeof(header)) {
    3.88 +		close(fd);
    3.89 +		return -1;
    3.90 +	}
    3.91 +	close(fd);
    3.92 +	
    3.93 +	be32_to_cpus(&header.magic);
    3.94 +	be64_to_cpus(&header.size);
    3.95 +	if (header.magic == QCOW_MAGIC) {
    3.96 +		*size = header.size >> SECTOR_SHIFT;
    3.97 +		return 0;
    3.98 +	}
    3.99 +
   3.100  	if(S_ISBLK(st->st_mode)) {
   3.101 -		blockfd = open(filename, O_RDONLY);
   3.102 -		if (blockfd < 0)
   3.103 +		fd = open(filename, O_RDONLY);
   3.104 +		if (fd < 0)
   3.105  			return -1;
   3.106 -		if (ioctl(blockfd,BLKGETSIZE,size)!=0) {
   3.107 +		if (ioctl(fd,BLKGETSIZE,size)!=0) {
   3.108  			printf("Unable to get Block device size\n");
   3.109 -			close(blockfd);
   3.110 +			close(fd);
   3.111  			return -1;
   3.112  		}
   3.113 -		close(blockfd);
   3.114 +		close(fd);
   3.115  	} else *size = (st->st_size >> SECTOR_SHIFT);	
   3.116  	return 0;
   3.117  }
   3.118  
   3.119 -static int qcow_set_key(struct td_state *bs, const char *key)
   3.120 +static int qcow_set_key(struct tdqcow_state *s, const char *key)
   3.121  {
   3.122 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.123  	uint8_t keybuf[16];
   3.124  	int len, i;
   3.125  	
   3.126 @@ -306,10 +322,9 @@ static int qcow_set_key(struct td_state 
   3.127  	return 0;
   3.128  }
   3.129  
   3.130 -static int async_read(struct tdqcow_state *s, int fd, int size, 
   3.131 -		     uint64_t offset,
   3.132 -		     char *buf, td_callback_t cb,
   3.133 -		     int id, uint64_t sector, int qcow_idx, void *private)
   3.134 +static int async_read(struct tdqcow_state *s, int size, 
   3.135 +		      uint64_t offset, char *buf, td_callback_t cb,
   3.136 +		      int id, uint64_t sector, void *private)
   3.137  {
   3.138          struct   iocb *io;
   3.139          struct   pending_aio *pio;
   3.140 @@ -325,9 +340,8 @@ static int async_read(struct tdqcow_stat
   3.141  	pio->nb_sectors = size/512;
   3.142  	pio->buf = buf;
   3.143  	pio->sector = sector;
   3.144 -	pio->qcow_idx = qcow_idx;
   3.145  
   3.146 -        io_prep_pread(io, fd, buf, size, offset);
   3.147 +        io_prep_pread(io, s->fd, buf, size, offset);
   3.148          io->data = (void *)ioidx;
   3.149  
   3.150          s->iocb_queue[s->iocb_queued++] = io;
   3.151 @@ -335,10 +349,9 @@ static int async_read(struct tdqcow_stat
   3.152          return 1;
   3.153  }
   3.154  
   3.155 -static int async_write(struct tdqcow_state *s, int fd, int size, 
   3.156 -		     uint64_t offset,
   3.157 -		     char *buf, td_callback_t cb,
   3.158 -		      int id, uint64_t sector, int qcow_idx, void *private)
   3.159 +static int async_write(struct tdqcow_state *s, int size,
   3.160 +		       uint64_t offset, char *buf, td_callback_t cb,
   3.161 +		       int id, uint64_t sector, void *private)
   3.162  {
   3.163          struct   iocb *io;
   3.164          struct   pending_aio *pio;
   3.165 @@ -354,9 +367,8 @@ static int async_write(struct tdqcow_sta
   3.166  	pio->nb_sectors = size/512;
   3.167  	pio->buf = buf;
   3.168  	pio->sector = sector;
   3.169 -	pio->qcow_idx = qcow_idx;
   3.170  
   3.171 -        io_prep_pwrite(io, fd, buf, size, offset);
   3.172 +        io_prep_pwrite(io, s->fd, buf, size, offset);
   3.173          io->data = (void *)ioidx;
   3.174  
   3.175          s->iocb_queue[s->iocb_queued++] = io;
   3.176 @@ -383,17 +395,6 @@ static void aio_unlock(struct tdqcow_sta
   3.177  	return;
   3.178  }
   3.179  
   3.180 -/*TODO - Use a freelist*/
   3.181 -static int get_free_idx(struct tdqcow_state *s)
   3.182 -{
   3.183 -	int i;
   3.184 -	
   3.185 -	for(i = 0; i < MAX_QCOW_IDS; i++) {
   3.186 -		if(s->nr_reqs[i] == 0) return i;
   3.187 -	}
   3.188 -	return -1;
   3.189 -}
   3.190 -
   3.191  /* 
   3.192   * The crypt function is compatible with the linux cryptoloop
   3.193   * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
   3.194 @@ -425,23 +426,23 @@ static int qtruncate(int fd, off_t lengt
   3.195  {
   3.196  	int ret, i; 
   3.197  	int current = 0, rem = 0;
   3.198 -	int sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
   3.199 +	uint64_t sectors;
   3.200  	struct stat st;
   3.201 -	char buf[DEFAULT_SECTOR_SIZE];
   3.202 +	char *buf;
   3.203  
   3.204  	/* If length is greater than the current file len
   3.205  	 * we synchronously write zeroes to the end of the 
   3.206  	 * file, otherwise we truncate the length down
   3.207  	 */
   3.208 -	memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
   3.209  	ret = fstat(fd, &st);
   3.210 -	if (ret == -1)
   3.211 +	if (ret == -1) 
   3.212  		return -1;
   3.213  	if (S_ISBLK(st.st_mode))
   3.214  		return 0;
   3.215 -
   3.216 +	
   3.217 +	sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
   3.218  	current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
   3.219 -	rem = st.st_size % DEFAULT_SECTOR_SIZE;
   3.220 +	rem     = st.st_size % DEFAULT_SECTOR_SIZE;
   3.221  
   3.222  	/* If we are extending this file, we write zeros to the end --
   3.223  	 * this tries to ensure that the extents allocated wind up being
   3.224 @@ -449,28 +450,40 @@ static int qtruncate(int fd, off_t lengt
   3.225  	 */
   3.226  	if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
   3.227  		/*We are extending the file*/
   3.228 +		if ((ret = posix_memalign((void **)&buf, 
   3.229 +					  512, DEFAULT_SECTOR_SIZE))) {
   3.230 +			DPRINTF("posix_memalign failed: %d\n", ret);
   3.231 +			return -1;
   3.232 +		}
   3.233 +		memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
   3.234  		if (lseek(fd, 0, SEEK_END)==-1) {
   3.235 -			fprintf(stderr, 
   3.236 -				"Lseek EOF failed (%d), internal error\n",
   3.237 +			DPRINTF("Lseek EOF failed (%d), internal error\n",
   3.238  				errno);
   3.239 +			free(buf);
   3.240  			return -1;
   3.241  		}
   3.242  		if (rem) {
   3.243  			ret = write(fd, buf, rem);
   3.244 -			if (ret != rem)
   3.245 +			if (ret != rem) {
   3.246 +				DPRINTF("write failed: ret = %d, err = %s\n",
   3.247 +					ret, strerror(errno));
   3.248 +				free(buf);
   3.249  				return -1;
   3.250 +			}
   3.251  		}
   3.252  		for (i = current; i < sectors; i++ ) {
   3.253  			ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
   3.254 -			if (ret != DEFAULT_SECTOR_SIZE)
   3.255 +			if (ret != DEFAULT_SECTOR_SIZE) {
   3.256 +				DPRINTF("write failed: ret = %d, err = %s\n",
   3.257 +					ret, strerror(errno));
   3.258 +				free(buf);
   3.259  				return -1;
   3.260 +			}
   3.261  		}
   3.262 -		
   3.263 +		free(buf);
   3.264  	} else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
   3.265 -		if (ftruncate(fd, sectors * DEFAULT_SECTOR_SIZE)==-1) {
   3.266 -			fprintf(stderr,
   3.267 -				"Ftruncate failed (%d), internal error\n",
   3.268 -                                errno);
   3.269 +		if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) {
   3.270 +			DPRINTF("Ftruncate failed (%s)\n", strerror(errno));
   3.271  			return -1;
   3.272  		}
   3.273  	return 0;
   3.274 @@ -490,12 +503,11 @@ static int qtruncate(int fd, off_t lengt
   3.275   *
   3.276   * return 0 if not allocated.
   3.277   */
   3.278 -static uint64_t get_cluster_offset(struct td_state *bs,
   3.279 +static uint64_t get_cluster_offset(struct tdqcow_state *s,
   3.280                                     uint64_t offset, int allocate,
   3.281                                     int compressed_size,
   3.282                                     int n_start, int n_end)
   3.283  {
   3.284 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.285  	int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
   3.286  	char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr;
   3.287  	uint64_t l2_offset, *l2_table, cluster_offset, tmp;
   3.288 @@ -550,8 +562,10 @@ static uint64_t get_cluster_offset(struc
   3.289  		 * entry is written before blocks.
   3.290  		 */
   3.291  		lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
   3.292 -		if (write(s->fd, tmp_ptr, 4096) != 4096)
   3.293 +		if (write(s->fd, tmp_ptr, 4096) != 4096) {
   3.294 +			free(tmp_ptr);
   3.295  		 	return 0;
   3.296 +		}
   3.297  		free(tmp_ptr);
   3.298  
   3.299  		new_l2_table = 1;
   3.300 @@ -716,9 +730,10 @@ found:
   3.301  	return cluster_offset;
   3.302  }
   3.303  
   3.304 -static void init_cluster_cache(struct td_state *bs)
   3.305 +static void init_cluster_cache(struct disk_driver *dd)
   3.306  {
   3.307 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.308 +	struct td_state     *bs = dd->td_state;
   3.309 +	struct tdqcow_state *s  = (struct tdqcow_state *)dd->private;
   3.310  	uint32_t count = 0;
   3.311  	int i, cluster_entries;
   3.312  
   3.313 @@ -727,22 +742,20 @@ static void init_cluster_cache(struct td
   3.314  		cluster_entries, s->cluster_size);
   3.315  
   3.316  	for (i = 0; i < bs->size; i += cluster_entries) {
   3.317 -		if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++;
   3.318 +		if (get_cluster_offset(s, i << 9, 0, 0, 0, 1)) count++;
   3.319  		if (count >= L2_CACHE_SIZE) return;
   3.320  	}
   3.321  	DPRINTF("Finished cluster initialisation, added %d entries\n", count);
   3.322  	return;
   3.323  }
   3.324  
   3.325 -static int qcow_is_allocated(struct td_state *bs, int64_t sector_num, 
   3.326 +static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num,
   3.327                               int nb_sectors, int *pnum)
   3.328  {
   3.329 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.330 -
   3.331  	int index_in_cluster, n;
   3.332  	uint64_t cluster_offset;
   3.333  
   3.334 -	cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
   3.335 +	cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0);
   3.336  	index_in_cluster = sector_num & (s->cluster_sectors - 1);
   3.337  	n = s->cluster_sectors - index_in_cluster;
   3.338  	if (n > nb_sectors)
   3.339 @@ -800,11 +813,23 @@ static int decompress_cluster(struct tdq
   3.340  	return 0;
   3.341  }
   3.342  
   3.343 +static inline void init_fds(struct disk_driver *dd)
   3.344 +{
   3.345 +	int i;
   3.346 +	struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
   3.347 +
   3.348 +	for(i = 0; i < MAX_IOFD; i++) 
   3.349 +		dd->io_fd[i] = 0;
   3.350 +
   3.351 +	dd->io_fd[0] = s->poll_fd;
   3.352 +}
   3.353 +
   3.354  /* Open the disk file and initialize qcow state. */
   3.355 -int tdqcow_open (struct td_state *bs, const char *name)
   3.356 +int tdqcow_open (struct disk_driver *dd, const char *name)
   3.357  {
   3.358  	int fd, len, i, shift, ret, size, l1_table_size;
   3.359 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.360 +	struct td_state     *bs = dd->td_state;
   3.361 +	struct tdqcow_state *s  = (struct tdqcow_state *)dd->private;
   3.362  	char *buf;
   3.363  	QCowHeader *header;
   3.364  	QCowHeader_ext *exthdr;
   3.365 @@ -812,10 +837,6 @@ int tdqcow_open (struct td_state *bs, co
   3.366  	uint64_t final_cluster = 0;
   3.367  
   3.368   	DPRINTF("QCOW: Opening %s\n",name);
   3.369 -	/* set up a pipe so that we can hand back a poll fd that won't fire.*/
   3.370 -	ret = pipe(s->poll_pipe);
   3.371 -	if (ret != 0)
   3.372 -		return (0 - errno);
   3.373  
   3.374  	fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
   3.375  	if (fd < 0) {
   3.376 @@ -826,7 +847,7 @@ int tdqcow_open (struct td_state *bs, co
   3.377  	s->fd = fd;
   3.378  	asprintf(&s->name,"%s", name);
   3.379  
   3.380 -	ASSERT(sizeof(header) < 512);
   3.381 +	ASSERT(sizeof(QCowHeader) + sizeof(QCowHeader_ext) < 512);
   3.382  
   3.383  	ret = posix_memalign((void **)&buf, 512, 512);
   3.384  	if (ret != 0) goto fail;
   3.385 @@ -861,7 +882,9 @@ int tdqcow_open (struct td_state *bs, co
   3.386  	s->cluster_alloc = s->l2_size;
   3.387  	bs->size = header->size / 512;
   3.388  	s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
   3.389 -	
   3.390 +	s->backing_file_offset = header->backing_file_offset;
   3.391 +	s->backing_file_size   = header->backing_file_size;
   3.392 +
   3.393  	/* read the level 1 table */
   3.394  	shift = s->cluster_bits + s->l2_bits;
   3.395  	s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
   3.396 @@ -887,7 +910,7 @@ int tdqcow_open (struct td_state *bs, co
   3.397  	if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
   3.398  		goto fail;
   3.399  
   3.400 -	for(i = 0;i < s->l1_size; i++) {
   3.401 +	for(i = 0; i < s->l1_size; i++) {
   3.402  		//be64_to_cpus(&s->l1_table[i]);
   3.403  		//DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
   3.404  		if (s->l1_table[i] > final_cluster)
   3.405 @@ -907,41 +930,15 @@ int tdqcow_open (struct td_state *bs, co
   3.406  	if(ret != 0) goto fail;
   3.407  	s->cluster_cache_offset = -1;
   3.408  
   3.409 -	/* read the backing file name */
   3.410 -	s->bfd = -1;
   3.411 -	if (header->backing_file_offset != 0) {
   3.412 -		DPRINTF("Reading backing file data\n");
   3.413 -		len = header->backing_file_size;
   3.414 -		if (len > 1023)
   3.415 -			len = 1023;
   3.416 -
   3.417 -                /*TODO - Fix read size for O_DIRECT and use original fd!*/
   3.418 -		fd = open(name, O_RDONLY | O_LARGEFILE);
   3.419 -
   3.420 -		lseek(fd, header->backing_file_offset, SEEK_SET);
   3.421 -		if (read(fd, bs->backing_file, len) != len)
   3.422 -			goto fail;
   3.423 -		bs->backing_file[len] = '\0';
   3.424 -		close(fd);
   3.425 -		/***********************************/
   3.426 -
   3.427 -		/*Open backing file*/
   3.428 -		fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE);
   3.429 -		if (fd < 0) {
   3.430 -			DPRINTF("Unable to open backing file: %s\n",
   3.431 -				bs->backing_file);
   3.432 -			goto fail;
   3.433 -		}
   3.434 -		s->bfd = fd;
   3.435 +	if (s->backing_file_offset != 0)
   3.436  		s->cluster_alloc = 1; /*Cannot use pre-alloc*/
   3.437 -	}
   3.438  
   3.439          bs->sector_size = 512;
   3.440          bs->info = 0;
   3.441  	
   3.442  	/*Detect min_cluster_alloc*/
   3.443  	s->min_cluster_alloc = 1; /*Default*/
   3.444 -	if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) {
   3.445 +	if (s->backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) {
   3.446  		/*We test to see if the xen magic # exists*/
   3.447  		exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
   3.448  		be32_to_cpus(&exthdr->xmagic);
   3.449 @@ -962,10 +959,11 @@ int tdqcow_open (struct td_state *bs, co
   3.450  	}
   3.451  
   3.452   end_xenhdr:
   3.453 -	if (init_aio_state(bs)!=0) {
   3.454 +	if (init_aio_state(dd)!=0) {
   3.455  		DPRINTF("Unable to initialise AIO state\n");
   3.456  		goto fail;
   3.457  	}
   3.458 +	init_fds(dd);
   3.459  	s->fd_end = (final_cluster == 0 ? (s->l1_table_offset + l1_table_size) : 
   3.460  				(final_cluster + s->cluster_size));
   3.461  
   3.462 @@ -981,213 +979,145 @@ fail:
   3.463  	return -1;
   3.464  }
   3.465  
   3.466 - int tdqcow_queue_read(struct td_state *bs, uint64_t sector,
   3.467 -			       int nb_sectors, char *buf, td_callback_t cb,
   3.468 -			       int id, void *private)
   3.469 +int tdqcow_queue_read(struct disk_driver *dd, uint64_t sector,
   3.470 +		      int nb_sectors, char *buf, td_callback_t cb,
   3.471 +		      int id, void *private)
   3.472  {
   3.473 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.474 -	int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
   3.475 -	uint64_t cluster_offset;
   3.476 +	struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
   3.477 +	int ret = 0, index_in_cluster, n, i, rsp = 0;
   3.478 +	uint64_t cluster_offset, sec, nr_secs;
   3.479 +
   3.480 +	sec     = sector;
   3.481 +	nr_secs = nb_sectors;
   3.482  
   3.483  	/*Check we can get a lock*/
   3.484 -	for (i = 0; i < nb_sectors; i++)
   3.485 -		if (!aio_can_lock(s, sector + i)) {
   3.486 -			DPRINTF("AIO_CAN_LOCK failed [%llu]\n", 
   3.487 -				(long long) sector + i);
   3.488 -			return -EBUSY;
   3.489 -		}
   3.490 -	
   3.491 +	for (i = 0; i < nb_sectors; i++) 
   3.492 +		if (!aio_can_lock(s, sector + i)) 
   3.493 +			return cb(dd, -EBUSY, sector, nb_sectors, id, private);
   3.494 +
   3.495  	/*We store a local record of the request*/
   3.496 -	qcow_idx = get_free_idx(s);
   3.497  	while (nb_sectors > 0) {
   3.498  		cluster_offset = 
   3.499 -			get_cluster_offset(bs, sector << 9, 0, 0, 0, 0);
   3.500 +			get_cluster_offset(s, sector << 9, 0, 0, 0, 0);
   3.501  		index_in_cluster = sector & (s->cluster_sectors - 1);
   3.502  		n = s->cluster_sectors - index_in_cluster;
   3.503  		if (n > nb_sectors)
   3.504  			n = nb_sectors;
   3.505  
   3.506 -		if (s->iocb_free_count == 0 || !aio_lock(s, sector)) {
   3.507 -			DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" 
   3.508 -				"[%llu]\n", s->iocb_free_count, 
   3.509 -				(long long) sector);
   3.510 -			return -ENOMEM;
   3.511 -		}
   3.512 +		if (s->iocb_free_count == 0 || !aio_lock(s, sector)) 
   3.513 +			return cb(dd, -EBUSY, sector, nb_sectors, id, private);
   3.514  		
   3.515 -		if (!cluster_offset && (s->bfd > 0)) {
   3.516 -			s->nr_reqs[qcow_idx]++;
   3.517 -			asubmit += async_read(s, s->bfd, n * 512, sector << 9, 
   3.518 -					      buf, cb, id, sector, 
   3.519 -					      qcow_idx, private);
   3.520 -		} else if(!cluster_offset) {
   3.521 -			memset(buf, 0, 512 * n);
   3.522 +		if(!cluster_offset) {
   3.523  			aio_unlock(s, sector);
   3.524 +			ret = cb(dd, BLK_NOT_ALLOCATED, 
   3.525 +				 sector, n, id, private);
   3.526 +			if (ret == -EBUSY) {
   3.527 +				/* mark remainder of request
   3.528 +				 * as busy and try again later */
   3.529 +				return cb(dd, -EBUSY, sector + n,
   3.530 +					  nb_sectors - n, id, private);
   3.531 +			} else rsp += ret;
   3.532  		} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
   3.533 +			aio_unlock(s, sector);
   3.534  			if (decompress_cluster(s, cluster_offset) < 0) {
   3.535 -				ret = -1;
   3.536 +				rsp += cb(dd, -EIO, sector, 
   3.537 +					  nb_sectors, id, private);
   3.538  				goto done;
   3.539  			}
   3.540  			memcpy(buf, s->cluster_cache + index_in_cluster * 512, 
   3.541  			       512 * n);
   3.542 -		} else {			
   3.543 -			s->nr_reqs[qcow_idx]++;
   3.544 -			asubmit += async_read(s, s->fd, n * 512, 
   3.545 -					      (cluster_offset + 
   3.546 -					       index_in_cluster * 512), 
   3.547 -					      buf, cb, id, sector, 
   3.548 -					      qcow_idx, private);
   3.549 +			rsp += cb(dd, 0, sector, n, id, private);
   3.550 +		} else {
   3.551 +			async_read(s, n * 512, 
   3.552 +				   (cluster_offset + index_in_cluster * 512),
   3.553 +				   buf, cb, id, sector, private);
   3.554  		}
   3.555  		nb_sectors -= n;
   3.556  		sector += n;
   3.557  		buf += n * 512;
   3.558  	}
   3.559  done:
   3.560 -        /*Callback if no async requests outstanding*/
   3.561 -        if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
   3.562 -
   3.563 -	return 0;
   3.564 +	return rsp;
   3.565  }
   3.566  
   3.567 - int tdqcow_queue_write(struct td_state *bs, uint64_t sector,
   3.568 -			       int nb_sectors, char *buf, td_callback_t cb,
   3.569 -			       int id, void *private)
   3.570 +int tdqcow_queue_write(struct disk_driver *dd, uint64_t sector,
   3.571 +		       int nb_sectors, char *buf, td_callback_t cb,
   3.572 +		       int id, void *private)
   3.573  {
   3.574 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.575 -	int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
   3.576 -	uint64_t cluster_offset;
   3.577 +	struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
   3.578 +	int ret = 0, index_in_cluster, n, i;
   3.579 +	uint64_t cluster_offset, sec, nr_secs;
   3.580 +
   3.581 +	sec     = sector;
   3.582 +	nr_secs = nb_sectors;
   3.583  
   3.584  	/*Check we can get a lock*/
   3.585  	for (i = 0; i < nb_sectors; i++)
   3.586 -		if (!aio_can_lock(s, sector + i))  {
   3.587 -			DPRINTF("AIO_CAN_LOCK failed [%llu]\n", 
   3.588 -				(long long) (sector + i));
   3.589 -			return -EBUSY;
   3.590 -		}
   3.591 +		if (!aio_can_lock(s, sector + i))  
   3.592 +			return cb(dd, -EBUSY, sector, nb_sectors, id, private);
   3.593  		   
   3.594  	/*We store a local record of the request*/
   3.595 -	qcow_idx = get_free_idx(s);	
   3.596  	while (nb_sectors > 0) {
   3.597  		index_in_cluster = sector & (s->cluster_sectors - 1);
   3.598  		n = s->cluster_sectors - index_in_cluster;
   3.599  		if (n > nb_sectors)
   3.600  			n = nb_sectors;
   3.601  
   3.602 -		if (s->iocb_free_count == 0 || !aio_lock(s, sector)){
   3.603 -			DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" 
   3.604 -				"[%llu]\n", s->iocb_free_count, 
   3.605 -				(long long) sector);
   3.606 -			return -ENOMEM;
   3.607 +		if (s->iocb_free_count == 0 || !aio_lock(s, sector))
   3.608 +			return cb(dd, -EBUSY, sector, nb_sectors, id, private);
   3.609 +
   3.610 +		cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
   3.611 +						    index_in_cluster, 
   3.612 +						    index_in_cluster+n);
   3.613 +		if (!cluster_offset) {
   3.614 +			DPRINTF("Ooops, no write cluster offset!\n");
   3.615 +			return cb(dd, -EIO, sector, nb_sectors, id, private);
   3.616  		}
   3.617  
   3.618 -		if (!IS_ZERO(buf,n * 512)) {
   3.619 -
   3.620 -			cluster_offset = get_cluster_offset(bs, sector << 9, 
   3.621 -							    1, 0, 
   3.622 -							    index_in_cluster, 
   3.623 -							    index_in_cluster+n
   3.624 -				);
   3.625 -			if (!cluster_offset) {
   3.626 -				DPRINTF("Ooops, no write cluster offset!\n");
   3.627 -				ret = -1;
   3.628 -				goto done;
   3.629 -			}
   3.630 -
   3.631 -			if (s->crypt_method) {
   3.632 -				encrypt_sectors(s, sector, s->cluster_data, 
   3.633 -						(unsigned char *)buf, n, 1,
   3.634 -						&s->aes_encrypt_key);
   3.635 -				s->nr_reqs[qcow_idx]++;
   3.636 -				asubmit += async_write(s, s->fd, n * 512, 
   3.637 -						       (cluster_offset + 
   3.638 -							index_in_cluster*512), 
   3.639 -						       (char *)s->cluster_data,
   3.640 -						       cb, id, sector, 
   3.641 -						       qcow_idx, private);
   3.642 -			} else {
   3.643 -				s->nr_reqs[qcow_idx]++;
   3.644 -				asubmit += async_write(s, s->fd, n * 512, 
   3.645 -						       (cluster_offset + 
   3.646 -							index_in_cluster*512),
   3.647 -						       buf, cb, id, sector, 
   3.648 -						       qcow_idx, private);
   3.649 -			}
   3.650 +		if (s->crypt_method) {
   3.651 +			encrypt_sectors(s, sector, s->cluster_data, 
   3.652 +					(unsigned char *)buf, n, 1,
   3.653 +					&s->aes_encrypt_key);
   3.654 +			async_write(s, n * 512, 
   3.655 +				    (cluster_offset + index_in_cluster*512),
   3.656 +				    (char *)s->cluster_data, cb, id, sector, 
   3.657 +				    private);
   3.658  		} else {
   3.659 -			/*Write data contains zeros, but we must check to see 
   3.660 -			  if cluster already allocated*/
   3.661 -			cluster_offset = get_cluster_offset(bs, sector << 9, 
   3.662 -							    0, 0, 
   3.663 -							    index_in_cluster, 
   3.664 -							    index_in_cluster+n
   3.665 -				);	
   3.666 -			if(cluster_offset) {
   3.667 -				if (s->crypt_method) {
   3.668 -					encrypt_sectors(s, sector, 
   3.669 -							s->cluster_data, 
   3.670 -							(unsigned char *)buf, 
   3.671 -							n, 1,
   3.672 -							&s->aes_encrypt_key);
   3.673 -					s->nr_reqs[qcow_idx]++;
   3.674 -					asubmit += async_write(s, s->fd, 
   3.675 -							       n * 512, 
   3.676 -							       (cluster_offset+
   3.677 -								index_in_cluster * 512), 
   3.678 -							       (char *)s->cluster_data, cb, id, sector, 
   3.679 -							       qcow_idx, private);
   3.680 -				} else {
   3.681 -					s->nr_reqs[qcow_idx]++;
   3.682 -					asubmit += async_write(s, s->fd, n*512,
   3.683 -							       cluster_offset + index_in_cluster * 512, 
   3.684 -							       buf, cb, id, sector, 
   3.685 -							       qcow_idx, private);
   3.686 -				}
   3.687 -			}
   3.688 -			else aio_unlock(s, sector);
   3.689 +			async_write(s, n * 512, 
   3.690 +				    (cluster_offset + index_in_cluster*512),
   3.691 +				    buf, cb, id, sector, private);
   3.692  		}
   3.693 +		
   3.694  		nb_sectors -= n;
   3.695  		sector += n;
   3.696  		buf += n * 512;
   3.697  	}
   3.698  	s->cluster_cache_offset = -1; /* disable compressed cache */
   3.699  
   3.700 -done:
   3.701 -	/*Callback if no async requests outstanding*/
   3.702 -        if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
   3.703 -
   3.704  	return 0;
   3.705  }
   3.706   		
   3.707 -int tdqcow_submit(struct td_state *bs)
   3.708 +int tdqcow_submit(struct disk_driver *dd)
   3.709  {
   3.710          int ret;
   3.711 -        struct   tdqcow_state *prv = (struct tdqcow_state *)bs->private;
   3.712 +        struct   tdqcow_state *prv = (struct tdqcow_state *)dd->private;
   3.713  
   3.714 -        ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
   3.715 +	if (!prv->iocb_queued)
   3.716 +		return 0;
   3.717 +
   3.718 +	ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
   3.719  
   3.720          /* XXX: TODO: Handle error conditions here. */
   3.721  
   3.722          /* Success case: */
   3.723          prv->iocb_queued = 0;
   3.724  
   3.725 -        return ret;
   3.726 +        return 0;
   3.727  }
   3.728  
   3.729 -
   3.730 -int *tdqcow_get_fd(struct td_state *bs)
   3.731 +int tdqcow_close(struct disk_driver *dd)
   3.732  {
   3.733 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.734 -	int *fds, i;
   3.735 -
   3.736 -	fds = malloc(sizeof(int) * MAX_IOFD);
   3.737 -	/*initialise the FD array*/
   3.738 -	for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
   3.739 -
   3.740 -	fds[0] = s->poll_fd;
   3.741 -	return fds;
   3.742 -}
   3.743 -
   3.744 -int tdqcow_close(struct td_state *bs)
   3.745 -{
   3.746 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.747 +	struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
   3.748  	uint32_t cksum, out;
   3.749  	int fd, offset;
   3.750  
   3.751 @@ -1203,6 +1133,7 @@ int tdqcow_close(struct td_state *bs)
   3.752  		close(fd);
   3.753  	}
   3.754  
   3.755 +	io_destroy(s->aio_ctx);
   3.756  	free(s->name);
   3.757  	free(s->l1_table);
   3.758  	free(s->l2_cache);
   3.759 @@ -1212,11 +1143,11 @@ int tdqcow_close(struct td_state *bs)
   3.760  	return 0;
   3.761  }
   3.762  
   3.763 -int tdqcow_do_callbacks(struct td_state *s, int sid)
   3.764 +int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
   3.765  {
   3.766          int ret, i, rsp = 0,*ptr;
   3.767          struct io_event *ep;
   3.768 -        struct tdqcow_state *prv = (struct tdqcow_state *)s->private;
   3.769 +        struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
   3.770  
   3.771          if (sid > MAX_IOFD) return 1;
   3.772  	
   3.773 @@ -1224,25 +1155,24 @@ int tdqcow_do_callbacks(struct td_state 
   3.774          ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
   3.775                             NULL);
   3.776  
   3.777 -        for (ep=prv->aio_events, i = ret; i-->0; ep++) {
   3.778 +        for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
   3.779                  struct iocb        *io  = ep->obj;
   3.780                  struct pending_aio *pio;
   3.781  
   3.782                  pio = &prv->pending_aio[(long)io->data];
   3.783  
   3.784  		aio_unlock(prv, pio->sector);
   3.785 -		if (pio->id >= 0) {
   3.786 -			if (prv->crypt_method)
   3.787 -				encrypt_sectors(prv, pio->sector, 
   3.788 -						(unsigned char *)pio->buf, 
   3.789 -						(unsigned char *)pio->buf, 
   3.790 -						pio->nb_sectors, 0, 
   3.791 -						&prv->aes_decrypt_key);
   3.792 -			prv->nr_reqs[pio->qcow_idx]--;
   3.793 -			if (prv->nr_reqs[pio->qcow_idx] == 0) 
   3.794 -			        rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 : 1, pio->id, 
   3.795 -					       pio->private);
   3.796 -		} else if (pio->id == -2) free(pio->buf);
   3.797 +
   3.798 +		if (prv->crypt_method)
   3.799 +			encrypt_sectors(prv, pio->sector, 
   3.800 +					(unsigned char *)pio->buf, 
   3.801 +					(unsigned char *)pio->buf, 
   3.802 +					pio->nb_sectors, 0, 
   3.803 +					&prv->aes_decrypt_key);
   3.804 +
   3.805 +		rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1, 
   3.806 +			       pio->sector, pio->nb_sectors,
   3.807 +			       pio->id, pio->private);
   3.808  
   3.809                  prv->iocb_free[prv->iocb_free_count++] = io;
   3.810          }
   3.811 @@ -1250,7 +1180,7 @@ int tdqcow_do_callbacks(struct td_state 
   3.812  }
   3.813  
   3.814  int qcow_create(const char *filename, uint64_t total_size,
   3.815 -                      const char *backing_file, int sparse)
   3.816 +		const char *backing_file, int sparse)
   3.817  {
   3.818  	int fd, header_size, backing_filename_len, l1_size, i;
   3.819  	int shift, length, adjust, flags = 0, ret = 0;
   3.820 @@ -1391,9 +1321,8 @@ int qcow_create(const char *filename, ui
   3.821  	return 0;
   3.822  }
   3.823  
   3.824 -int qcow_make_empty(struct td_state *bs)
   3.825 +int qcow_make_empty(struct tdqcow_state *s)
   3.826  {
   3.827 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.828  	uint32_t l1_length = s->l1_size * sizeof(uint64_t);
   3.829  
   3.830  	memset(s->l1_table, 0, l1_length);
   3.831 @@ -1412,19 +1341,16 @@ int qcow_make_empty(struct td_state *bs)
   3.832  	return 0;
   3.833  }
   3.834  
   3.835 -int qcow_get_cluster_size(struct td_state *bs)
   3.836 +int qcow_get_cluster_size(struct tdqcow_state *s)
   3.837  {
   3.838 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.839 -
   3.840  	return s->cluster_size;
   3.841  }
   3.842  
   3.843  /* XXX: put compressed sectors first, then all the cluster aligned
   3.844     tables to avoid losing bytes in alignment */
   3.845 -int qcow_compress_cluster(struct td_state *bs, int64_t sector_num, 
   3.846 +int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num, 
   3.847                            const uint8_t *buf)
   3.848  {
   3.849 -	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
   3.850  	z_stream strm;
   3.851  	int ret, out_len;
   3.852  	uint8_t *out_buf;
   3.853 @@ -1463,7 +1389,7 @@ int qcow_compress_cluster(struct td_stat
   3.854  		/* could not compress: write normal cluster */
   3.855  		//tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
   3.856  	} else {
   3.857 -		cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 
   3.858 +		cluster_offset = get_cluster_offset(s, sector_num << 9, 2, 
   3.859                                              out_len, 0, 0);
   3.860  		cluster_offset &= s->cluster_offset_mask;
   3.861  		lseek(s->fd, cluster_offset, SEEK_SET);
   3.862 @@ -1477,15 +1403,54 @@ int qcow_compress_cluster(struct td_stat
   3.863  	return 0;
   3.864  }
   3.865  
   3.866 +int tdqcow_has_parent(struct disk_driver *dd)
   3.867 +{
   3.868 +	struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
   3.869 +	return (s->backing_file_offset ? 1 : 0);
   3.870 +}
   3.871 +
   3.872 +int tdqcow_get_parent(struct disk_driver *cdd, struct disk_driver *pdd)
   3.873 +{
   3.874 +	off_t off;
   3.875 +	char *buf, *filename;
   3.876 +	int len, secs, ret = -1;
   3.877 +	struct tdqcow_state *child  = (struct tdqcow_state *)cdd->private;
   3.878 +
   3.879 +	if (!child->backing_file_offset)
   3.880 +		return -1;
   3.881 +
   3.882 +	/* read the backing file name */
   3.883 +	len  = child->backing_file_size;
   3.884 +	off  = child->backing_file_offset - (child->backing_file_offset % 512);
   3.885 +	secs = (len + (child->backing_file_offset - off) + 511) >> 9;
   3.886 +
   3.887 +	if (posix_memalign((void **)&buf, 512, secs << 9)) 
   3.888 +		return -1;
   3.889 +
   3.890 +	if (lseek(child->fd, off, SEEK_SET) == (off_t)-1)
   3.891 +		goto out;
   3.892 +
   3.893 +	if (read(child->fd, buf, secs << 9) != secs << 9)
   3.894 +		goto out;
   3.895 +	filename      = buf + (child->backing_file_offset - off);
   3.896 +	filename[len] = '\0';
   3.897 +
   3.898 +	/*Open backing file*/
   3.899 +	ret = tdqcow_open(pdd, filename);
   3.900 + out:
   3.901 +	free(buf);
   3.902 +	return ret;
   3.903 +}
   3.904 +
   3.905  struct tap_disk tapdisk_qcow = {
   3.906 -	"tapdisk_qcow",
   3.907 -	sizeof(struct tdqcow_state),
   3.908 -	tdqcow_open,
   3.909 -	tdqcow_queue_read,
   3.910 -	tdqcow_queue_write,
   3.911 -	tdqcow_submit,
   3.912 -	tdqcow_get_fd,
   3.913 -	tdqcow_close,
   3.914 -	tdqcow_do_callbacks,
   3.915 +	.disk_type           = "tapdisk_qcow",
   3.916 +	.private_data_size   = sizeof(struct tdqcow_state),
   3.917 +	.td_open             = tdqcow_open,
   3.918 +	.td_queue_read       = tdqcow_queue_read,
   3.919 +	.td_queue_write      = tdqcow_queue_write,
   3.920 +	.td_submit           = tdqcow_submit,
   3.921 +	.td_has_parent       = tdqcow_has_parent,
   3.922 +	.td_get_parent       = tdqcow_get_parent,
   3.923 +	.td_close            = tdqcow_close,
   3.924 +	.td_do_callbacks     = tdqcow_do_callbacks,
   3.925  };
   3.926 -
     4.1 --- a/tools/blktap/drivers/block-ram.c	Fri Feb 16 16:34:28 2007 +0000
     4.2 +++ b/tools/blktap/drivers/block-ram.c	Fri Feb 16 20:31:27 2007 -0800
     4.3 @@ -123,14 +123,25 @@ static int get_image_info(struct td_stat
     4.4  	return 0;
     4.5  }
     4.6  
     4.7 +static inline void init_fds(struct disk_driver *dd)
     4.8 +{
     4.9 +        int i;
    4.10 +	struct tdram_state *prv = (struct tdram_state *)dd->private;
    4.11 +
    4.12 +        for(i =0 ; i < MAX_IOFD; i++)
    4.13 +		dd->io_fd[i] = 0;
    4.14 +
    4.15 +        dd->io_fd[0] = prv->poll_pipe[0];
    4.16 +}
    4.17 +
    4.18  /* Open the disk file and initialize ram state. */
    4.19 -int tdram_open (struct td_state *s, const char *name)
    4.20 +int tdram_open (struct disk_driver *dd, const char *name)
    4.21  {
    4.22 +	char *p;
    4.23 +	uint64_t size;
    4.24  	int i, fd, ret = 0, count = 0;
    4.25 -	struct tdram_state *prv = (struct tdram_state *)s->private;
    4.26 -	uint64_t size;
    4.27 -	char *p;
    4.28 -	s->private = prv;
    4.29 +	struct td_state    *s     = dd->td_state;
    4.30 +	struct tdram_state *prv   = (struct tdram_state *)dd->private;
    4.31  
    4.32  	connections++;
    4.33  	
    4.34 @@ -209,88 +220,80 @@ int tdram_open (struct td_state *s, cons
    4.35  		ret = 0;
    4.36  	} 
    4.37  
    4.38 +	init_fds(dd);
    4.39  done:
    4.40  	return ret;
    4.41  }
    4.42  
    4.43 - int tdram_queue_read(struct td_state *s, uint64_t sector,
    4.44 -			       int nb_sectors, char *buf, td_callback_t cb,
    4.45 -			       int id, void *private)
    4.46 + int tdram_queue_read(struct disk_driver *dd, uint64_t sector,
    4.47 +		      int nb_sectors, char *buf, td_callback_t cb,
    4.48 +		      int id, void *private)
    4.49  {
    4.50 -	struct tdram_state *prv = (struct tdram_state *)s->private;
    4.51 +	struct td_state    *s   = dd->td_state;
    4.52 +	struct tdram_state *prv = (struct tdram_state *)dd->private;
    4.53  	int      size    = nb_sectors * s->sector_size;
    4.54  	uint64_t offset  = sector * (uint64_t)s->sector_size;
    4.55 -	int ret;
    4.56  
    4.57  	memcpy(buf, img + offset, size);
    4.58 -	ret = size;
    4.59  
    4.60 -	cb(s, (ret < 0) ? ret: 0, id, private);
    4.61 -
    4.62 -	return ret;
    4.63 +	return cb(dd, 0, sector, nb_sectors, id, private);
    4.64  }
    4.65  
    4.66 - int tdram_queue_write(struct td_state *s, uint64_t sector,
    4.67 -			       int nb_sectors, char *buf, td_callback_t cb,
    4.68 -			       int id, void *private)
    4.69 +int tdram_queue_write(struct disk_driver *dd, uint64_t sector,
    4.70 +		      int nb_sectors, char *buf, td_callback_t cb,
    4.71 +		      int id, void *private)
    4.72  {
    4.73 -	struct tdram_state *prv = (struct tdram_state *)s->private;
    4.74 +	struct td_state    *s   = dd->td_state;
    4.75 +	struct tdram_state *prv = (struct tdram_state *)dd->private;
    4.76  	int      size    = nb_sectors * s->sector_size;
    4.77  	uint64_t offset  = sector * (uint64_t)s->sector_size;
    4.78 -	int ret;
    4.79  	
    4.80 -	/*We assume that write access is controlled at a higher level for multiple disks*/
    4.81 +	/* We assume that write access is controlled
    4.82 +	 * at a higher level for multiple disks */
    4.83  	memcpy(img + offset, buf, size);
    4.84 -	ret = size;
    4.85  
    4.86 -	cb(s, (ret < 0) ? ret : 0, id, private);
    4.87 -
    4.88 -	return ret;
    4.89 +	return cb(dd, 0, sector, nb_sectors, id, private);
    4.90  }
    4.91   		
    4.92 -int tdram_submit(struct td_state *s)
    4.93 +int tdram_submit(struct disk_driver *dd)
    4.94  {
    4.95  	return 0;	
    4.96  }
    4.97  
    4.98 -
    4.99 -int *tdram_get_fd(struct td_state *s)
   4.100 +int tdram_close(struct disk_driver *dd)
   4.101  {
   4.102 -	struct tdram_state *prv = (struct tdram_state *)s->private;
   4.103 -        int *fds, i;
   4.104 -
   4.105 -        fds = malloc(sizeof(int) * MAX_IOFD);
   4.106 -        /*initialise the FD array*/
   4.107 -        for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
   4.108 -
   4.109 -        fds[0] = prv->poll_pipe[0];
   4.110 -        return fds;	
   4.111 -}
   4.112 -
   4.113 -int tdram_close(struct td_state *s)
   4.114 -{
   4.115 -	struct tdram_state *prv = (struct tdram_state *)s->private;
   4.116 +	struct tdram_state *prv = (struct tdram_state *)dd->private;
   4.117  	
   4.118  	connections--;
   4.119  	
   4.120  	return 0;
   4.121  }
   4.122  
   4.123 -int tdram_do_callbacks(struct td_state *s, int sid)
   4.124 +int tdram_do_callbacks(struct disk_driver *dd, int sid)
   4.125  {
   4.126  	/* always ask for a kick */
   4.127  	return 1;
   4.128  }
   4.129  
   4.130 +int tdram_has_parent(struct disk_driver *dd)
   4.131 +{
   4.132 +	return 0;
   4.133 +}
   4.134 +
   4.135 +int tdram_get_parent(struct disk_driver *dd, struct disk_driver *parent)
   4.136 +{
   4.137 +	return -EINVAL;
   4.138 +}
   4.139 +
   4.140  struct tap_disk tapdisk_ram = {
   4.141 -	"tapdisk_ram",
   4.142 -	sizeof(struct tdram_state),
   4.143 -	tdram_open,
   4.144 -	tdram_queue_read,
   4.145 -	tdram_queue_write,
   4.146 -	tdram_submit,
   4.147 -	tdram_get_fd,
   4.148 -	tdram_close,
   4.149 -	tdram_do_callbacks,
   4.150 +	.disk_type          = "tapdisk_ram",
   4.151 +	.private_data_size  = sizeof(struct tdram_state),
   4.152 +	.td_open            = tdram_open,
   4.153 +	.td_queue_read      = tdram_queue_read,
   4.154 +	.td_queue_write     = tdram_queue_write,
   4.155 +	.td_submit          = tdram_submit,
   4.156 +	.td_has_parent      = tdram_has_parent,
   4.157 +	.td_get_parent      = tdram_get_parent,
   4.158 +	.td_close           = tdram_close,
   4.159 +	.td_do_callbacks    = tdram_do_callbacks,
   4.160  };
   4.161 -
     5.1 --- a/tools/blktap/drivers/block-sync.c	Fri Feb 16 16:34:28 2007 +0000
     5.2 +++ b/tools/blktap/drivers/block-sync.c	Fri Feb 16 20:31:27 2007 -0800
     5.3 @@ -106,12 +106,23 @@ static int get_image_info(struct td_stat
     5.4  	return 0;
     5.5  }
     5.6  
     5.7 +static inline void init_fds(struct disk_driver *dd)
     5.8 +{
     5.9 +	int i;
    5.10 +	struct tdsync_state *prv = (struct tdsync_state *)dd->private;
    5.11 +	
    5.12 +	for(i = 0; i < MAX_IOFD; i++)
    5.13 +		dd->io_fd[i] = 0;
    5.14 +
    5.15 +	dd->io_fd[0] = prv->poll_pipe[0];
    5.16 +}
    5.17 +
    5.18  /* Open the disk file and initialize aio state. */
    5.19 -int tdsync_open (struct td_state *s, const char *name)
    5.20 +int tdsync_open (struct disk_driver *dd, const char *name)
    5.21  {
    5.22  	int i, fd, ret = 0;
    5.23 -	struct tdsync_state *prv = (struct tdsync_state *)s->private;
    5.24 -	s->private = prv;
    5.25 +	struct td_state     *s   = dd->td_state;
    5.26 +	struct tdsync_state *prv = (struct tdsync_state *)dd->private;
    5.27  	
    5.28  	/* set up a pipe so that we can hand back a poll fd that won't fire.*/
    5.29  	ret = pipe(prv->poll_pipe);
    5.30 @@ -138,16 +149,18 @@ int tdsync_open (struct td_state *s, con
    5.31  
    5.32          prv->fd = fd;
    5.33  
    5.34 +	init_fds(dd);
    5.35  	ret = get_image_info(s, fd);
    5.36  done:
    5.37  	return ret;	
    5.38  }
    5.39  
    5.40 - int tdsync_queue_read(struct td_state *s, uint64_t sector,
    5.41 + int tdsync_queue_read(struct disk_driver *dd, uint64_t sector,
    5.42  			       int nb_sectors, char *buf, td_callback_t cb,
    5.43  			       int id, void *private)
    5.44  {
    5.45 -	struct tdsync_state *prv = (struct tdsync_state *)s->private;
    5.46 +	struct td_state     *s   = dd->td_state;
    5.47 +	struct tdsync_state *prv = (struct tdsync_state *)dd->private;
    5.48  	int      size    = nb_sectors * s->sector_size;
    5.49  	uint64_t offset  = sector * (uint64_t)s->sector_size;
    5.50  	int ret;
    5.51 @@ -162,16 +175,15 @@ done:
    5.52  		} 
    5.53  	} else ret = 0 - errno;
    5.54  		
    5.55 -	cb(s, (ret < 0) ? ret: 0, id, private);
    5.56 -	
    5.57 -	return 1;
    5.58 +	return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private);
    5.59  }
    5.60  
    5.61 - int tdsync_queue_write(struct td_state *s, uint64_t sector,
    5.62 + int tdsync_queue_write(struct disk_driver *dd, uint64_t sector,
    5.63  			       int nb_sectors, char *buf, td_callback_t cb,
    5.64  			       int id, void *private)
    5.65  {
    5.66 -	struct tdsync_state *prv = (struct tdsync_state *)s->private;
    5.67 +	struct td_state     *s   = dd->td_state;
    5.68 +	struct tdsync_state *prv = (struct tdsync_state *)dd->private;
    5.69  	int      size    = nb_sectors * s->sector_size;
    5.70  	uint64_t offset  = sector * (uint64_t)s->sector_size;
    5.71  	int ret = 0;
    5.72 @@ -186,34 +198,17 @@ done:
    5.73  		}
    5.74  	} else ret = 0 - errno;
    5.75  		
    5.76 -	cb(s, (ret < 0) ? ret : 0, id, private);
    5.77 -	
    5.78 -	return 1;
    5.79 +	return cb(dd, (ret < 0) ? ret : 0, sector, nb_sectors, id, private);
    5.80  }
    5.81   		
    5.82 -int tdsync_submit(struct td_state *s)
    5.83 +int tdsync_submit(struct disk_driver *dd)
    5.84  {
    5.85  	return 0;	
    5.86  }
    5.87  
    5.88 -
    5.89 -int *tdsync_get_fd(struct td_state *s)
    5.90 +int tdsync_close(struct disk_driver *dd)
    5.91  {
    5.92 -	struct tdsync_state *prv = (struct tdsync_state *)s->private;
    5.93 -	
    5.94 -	int *fds, i;
    5.95 -
    5.96 -	fds = malloc(sizeof(int) * MAX_IOFD);
    5.97 -	/*initialise the FD array*/
    5.98 -	for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
    5.99 -
   5.100 -	fds[0] = prv->poll_pipe[0];
   5.101 -	return fds;
   5.102 -}
   5.103 -
   5.104 -int tdsync_close(struct td_state *s)
   5.105 -{
   5.106 -	struct tdsync_state *prv = (struct tdsync_state *)s->private;
   5.107 +	struct tdsync_state *prv = (struct tdsync_state *)dd->private;
   5.108  	
   5.109  	close(prv->fd);
   5.110  	close(prv->poll_pipe[0]);
   5.111 @@ -222,21 +217,31 @@ int tdsync_close(struct td_state *s)
   5.112  	return 0;
   5.113  }
   5.114  
   5.115 -int tdsync_do_callbacks(struct td_state *s, int sid)
   5.116 +int tdsync_do_callbacks(struct disk_driver *dd, int sid)
   5.117  {
   5.118  	/* always ask for a kick */
   5.119  	return 1;
   5.120  }
   5.121  
   5.122 +int tdsync_has_parent(struct disk_driver *dd)
   5.123 +{
   5.124 +	return 0;
   5.125 +}
   5.126 +
   5.127 +int tdsync_get_parent(struct disk_driver *dd, struct disk_driver *parent)
   5.128 +{
   5.129 +	return -EINVAL;
   5.130 +}
   5.131 +
   5.132  struct tap_disk tapdisk_sync = {
   5.133 -	"tapdisk_sync",
   5.134 -	sizeof(struct tdsync_state),
   5.135 -	tdsync_open,
   5.136 -	tdsync_queue_read,
   5.137 -	tdsync_queue_write,
   5.138 -	tdsync_submit,
   5.139 -	tdsync_get_fd,
   5.140 -	tdsync_close,
   5.141 -	tdsync_do_callbacks,
   5.142 +	.disk_type           = "tapdisk_sync",
   5.143 +	.private_data_size   = sizeof(struct tdsync_state),
   5.144 +	.td_open             = tdsync_open,
   5.145 +	.td_queue_read       = tdsync_queue_read,
   5.146 +	.td_queue_write      = tdsync_queue_write,
   5.147 +	.td_submit           = tdsync_submit,
   5.148 +	.td_has_parent       = tdsync_has_parent,
   5.149 +	.td_get_parent       = tdsync_get_parent,
   5.150 +	.td_close            = tdsync_close,
   5.151 +	.td_do_callbacks     = tdsync_do_callbacks,
   5.152  };
   5.153 -
     6.1 --- a/tools/blktap/drivers/block-vmdk.c	Fri Feb 16 16:34:28 2007 +0000
     6.2 +++ b/tools/blktap/drivers/block-vmdk.c	Fri Feb 16 20:31:27 2007 -0800
     6.3 @@ -107,14 +107,25 @@ struct tdvmdk_state {
     6.4      	unsigned int cluster_sectors;
     6.5  };
     6.6  
     6.7 +static inline void init_fds(struct disk_driver *dd)
     6.8 +{
     6.9 +        int i;
    6.10 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
    6.11 +
    6.12 +        for (i = 0; i < MAX_IOFD; i++)
    6.13 +		dd->io_fd[i] = 0;
    6.14 +
    6.15 +        dd->io_fd[0] = prv->poll_pipe[0];
    6.16 +}
    6.17  
    6.18  /* Open the disk file and initialize aio state. */
    6.19 -static int tdvmdk_open (struct td_state *s, const char *name)
    6.20 +static int tdvmdk_open (struct disk_driver *dd, const char *name)
    6.21  {
    6.22  	int ret, fd;
    6.23      	int l1_size, i;
    6.24      	uint32_t magic;
    6.25 -	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
    6.26 +	struct td_state     *s   = dd->td_state;
    6.27 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
    6.28  
    6.29  	/* set up a pipe so that we can hand back a poll fd that won't fire.*/
    6.30  	ret = pipe(prv->poll_pipe);
    6.31 @@ -206,6 +217,7 @@ static int tdvmdk_open (struct td_state 
    6.32      	if (!prv->l2_cache)
    6.33          	goto fail;
    6.34      	prv->fd = fd;
    6.35 +	init_fds(dd);
    6.36  	DPRINTF("VMDK File opened successfully\n");
    6.37      	return 0;
    6.38  	
    6.39 @@ -218,10 +230,9 @@ fail:
    6.40  	return -1;
    6.41  }
    6.42  
    6.43 -static uint64_t get_cluster_offset(struct td_state *s, 
    6.44 +static uint64_t get_cluster_offset(struct tdvmdk_state *prv, 
    6.45                                     uint64_t offset, int allocate)
    6.46  {
    6.47 -	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
    6.48      	unsigned int l1_index, l2_offset, l2_index;
    6.49      	int min_index, i, j;
    6.50      	uint32_t min_count, *l2_table, tmp;
    6.51 @@ -291,16 +302,17 @@ static uint64_t get_cluster_offset(struc
    6.52      	return cluster_offset;
    6.53  }
    6.54  
    6.55 -static int tdvmdk_queue_read(struct td_state *s, uint64_t sector,
    6.56 +static int tdvmdk_queue_read(struct disk_driver *dd, uint64_t sector,
    6.57  			       int nb_sectors, char *buf, td_callback_t cb,
    6.58  			       int id, void *private)
    6.59  {
    6.60 -	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
    6.61 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
    6.62      	int index_in_cluster, n;
    6.63      	uint64_t cluster_offset;
    6.64      	int ret = 0;
    6.65 +
    6.66      	while (nb_sectors > 0) {
    6.67 -        	cluster_offset = get_cluster_offset(s, sector << 9, 0);
    6.68 +        	cluster_offset = get_cluster_offset(prv, sector << 9, 0);
    6.69          	index_in_cluster = sector % prv->cluster_sectors;
    6.70          	n = prv->cluster_sectors - index_in_cluster;
    6.71          	if (n > nb_sectors)
    6.72 @@ -321,27 +333,24 @@ static int tdvmdk_queue_read(struct td_s
    6.73          	buf += n * 512;
    6.74      	}
    6.75  done:
    6.76 -	cb(s, ret == -1 ? -1 : 0, id, private);
    6.77 -	
    6.78 -	return 1;
    6.79 +	return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private);
    6.80  }
    6.81  
    6.82 -static  int tdvmdk_queue_write(struct td_state *s, uint64_t sector,
    6.83 +static  int tdvmdk_queue_write(struct disk_driver *dd, uint64_t sector,
    6.84  			       int nb_sectors, char *buf, td_callback_t cb,
    6.85  			       int id, void *private)
    6.86  {
    6.87 -	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
    6.88 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
    6.89      	int index_in_cluster, n;
    6.90      	uint64_t cluster_offset;
    6.91      	int ret = 0;
    6.92 -    	
    6.93  
    6.94      	while (nb_sectors > 0) {
    6.95          	index_in_cluster = sector & (prv->cluster_sectors - 1);
    6.96          	n = prv->cluster_sectors - index_in_cluster;
    6.97          	if (n > nb_sectors)
    6.98              		n = nb_sectors;
    6.99 -        	cluster_offset = get_cluster_offset(s, sector << 9, 1);
   6.100 +        	cluster_offset = get_cluster_offset(prv, sector << 9, 1);
   6.101          	if (!cluster_offset) {
   6.102              		ret = -1;
   6.103              		goto done;
   6.104 @@ -358,33 +367,17 @@ static  int tdvmdk_queue_write(struct td
   6.105          	buf += n * 512;
   6.106      	}
   6.107  done:
   6.108 -	cb(s, ret == -1 ? -1 : 0, id, private);
   6.109 -	
   6.110 -	return 1;
   6.111 +	return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private);
   6.112  }
   6.113   		
   6.114 -static int tdvmdk_submit(struct td_state *s)
   6.115 +static int tdvmdk_submit(struct disk_driver *dd)
   6.116  {
   6.117  	return 0;	
   6.118  }
   6.119  
   6.120 -
   6.121 -static int *tdvmdk_get_fd(struct td_state *s)
   6.122 +static int tdvmdk_close(struct disk_driver *dd)
   6.123  {
   6.124 -	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
   6.125 -        int *fds, i;
   6.126 -
   6.127 -        fds = malloc(sizeof(int) * MAX_IOFD);
   6.128 -        /*initialise the FD array*/
   6.129 -        for (i=0;i<MAX_IOFD;i++) fds[i] = 0;
   6.130 -
   6.131 -        fds[0] = prv->poll_pipe[0];
   6.132 -        return fds;
   6.133 -}
   6.134 -
   6.135 -static int tdvmdk_close(struct td_state *s)
   6.136 -{
   6.137 -	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
   6.138 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
   6.139  	
   6.140      	safer_free(prv->l1_table);
   6.141      	safer_free(prv->l1_backup_table);
   6.142 @@ -395,21 +388,31 @@ static int tdvmdk_close(struct td_state 
   6.143  	return 0;
   6.144  }
   6.145  
   6.146 -static int tdvmdk_do_callbacks(struct td_state *s, int sid)
   6.147 +static int tdvmdk_do_callbacks(struct disk_driver *dd, int sid)
   6.148  {
   6.149  	/* always ask for a kick */
   6.150  	return 1;
   6.151  }
   6.152  
   6.153 +static int tdvmdk_has_parent(struct disk_driver *dd)
   6.154 +{
   6.155 +	return 0;
   6.156 +}
   6.157 +
   6.158 +static int tdvmdk_get_parent(struct disk_driver *dd, struct disk_driver *parent)
   6.159 +{
   6.160 +	return -EINVAL;
   6.161 +}
   6.162 +
   6.163  struct tap_disk tapdisk_vmdk = {
   6.164 -	"tapdisk_vmdk",
   6.165 -	sizeof(struct tdvmdk_state),
   6.166 -	tdvmdk_open,
   6.167 -	tdvmdk_queue_read,
   6.168 -	tdvmdk_queue_write,
   6.169 -	tdvmdk_submit,
   6.170 -	tdvmdk_get_fd,
   6.171 -	tdvmdk_close,
   6.172 -	tdvmdk_do_callbacks,
   6.173 +	.disk_type           = "tapdisk_vmdk",
   6.174 +	.private_data_size   = sizeof(struct tdvmdk_state),
   6.175 +	.td_open             = tdvmdk_open,
   6.176 +	.td_queue_read       = tdvmdk_queue_read,
   6.177 +	.td_queue_write      = tdvmdk_queue_write,
   6.178 +	.td_submit           = tdvmdk_submit,
   6.179 +	.td_has_parent       = tdvmdk_has_parent,
   6.180 +	.td_get_parent       = tdvmdk_get_parent,
   6.181 +	.td_close            = tdvmdk_close,
   6.182 +	.td_do_callbacks     = tdvmdk_do_callbacks,
   6.183  };
   6.184 -
     7.1 --- a/tools/blktap/drivers/img2qcow.c	Fri Feb 16 16:34:28 2007 +0000
     7.2 +++ b/tools/blktap/drivers/img2qcow.c	Fri Feb 16 20:31:27 2007 -0800
     7.3 @@ -147,7 +147,8 @@ static int get_image_info(struct td_stat
     7.4  	return 0;
     7.5  }
     7.6  
     7.7 -static int send_responses(struct td_state *s, int res, int idx, void *private)
     7.8 +static int send_responses(struct disk_driver *dd, int res, uint64_t sec, 
     7.9 +			  int nr_secs, int idx, void *private)
    7.10  {
    7.11  	if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res);
    7.12  	
    7.13 @@ -159,7 +160,7 @@ static int send_responses(struct td_stat
    7.14  
    7.15  int main(int argc, char *argv[])
    7.16  {
    7.17 -	struct tap_disk *drv;
    7.18 +	struct disk_driver dd;
    7.19  	struct td_state *s;
    7.20  	int ret = -1, fd, len;
    7.21  	fd_set readfds;
    7.22 @@ -195,16 +196,17 @@ int main(int argc, char *argv[])
    7.23  	} else DFPRINTF("Qcow file created: size %llu sectors\n",
    7.24  			(long long unsigned)s->size);
    7.25  	
    7.26 -	drv = &tapdisk_qcow;
    7.27 -	s->private = malloc(drv->private_data_size);
    7.28 +	dd.td_state = s;
    7.29 +	dd.drv      = &tapdisk_qcow;
    7.30 +	dd.private  = malloc(dd.drv->private_data_size);
    7.31  
    7.32          /*Open qcow file*/
    7.33 -        if (drv->td_open(s, argv[1])!=0) {
    7.34 +        if (dd.drv->td_open(&dd, argv[1])!=0) {
    7.35  		DFPRINTF("Unable to open Qcow file [%s]\n",argv[1]);
    7.36  		exit(-1);
    7.37  	}
    7.38  
    7.39 -	io_fd = drv->td_get_fd(s);
    7.40 +	io_fd = dd.io_fd;
    7.41  
    7.42  	/*Initialise the output string*/
    7.43  	memset(output,0x20,25);
    7.44 @@ -245,9 +247,9 @@ int main(int argc, char *argv[])
    7.45  				len = (len >> 9) << 9;
    7.46  			}
    7.47  
    7.48 -			ret = drv->td_queue_write(s, i >> 9,
    7.49 -						  len >> 9, buf, 
    7.50 -						  send_responses, 0, buf);
    7.51 +			ret = dd.drv->td_queue_write(&dd, i >> 9,
    7.52 +						     len >> 9, buf, 
    7.53 +						     send_responses, 0, buf);
    7.54  				
    7.55  			if (!ret) submit_events++;
    7.56  				
    7.57 @@ -261,7 +263,7 @@ int main(int argc, char *argv[])
    7.58  			debug_output(i,s->size << 9);
    7.59  			
    7.60  			if ((submit_events % 10 == 0) || complete) 
    7.61 -				drv->td_submit(s);
    7.62 +				dd.drv->td_submit(&dd);
    7.63  			timeout.tv_usec = 0;
    7.64  			
    7.65  		} else {
    7.66 @@ -275,14 +277,14 @@ int main(int argc, char *argv[])
    7.67                  ret = select(maxfds + 1, &readfds, (fd_set *) 0,
    7.68                               (fd_set *) 0, &timeout);
    7.69  			     
    7.70 -		if (ret > 0) drv->td_do_callbacks(s, 0);
    7.71 +		if (ret > 0) dd.drv->td_do_callbacks(&dd, 0);
    7.72  		if (complete && (returned_events == submit_events)) 
    7.73  			running = 0;
    7.74  	}
    7.75  	memcpy(output+prev+1,"=",1);
    7.76  	DFPRINTF("\r%s     100%%\nTRANSFER COMPLETE\n\n", output);
    7.77 -        drv->td_close(s);
    7.78 -        free(s->private);
    7.79 +        dd.drv->td_close(&dd);
    7.80 +        free(dd.private);
    7.81          free(s);
    7.82  		
    7.83  	return 0;
     8.1 --- a/tools/blktap/drivers/qcow2raw.c	Fri Feb 16 16:34:28 2007 +0000
     8.2 +++ b/tools/blktap/drivers/qcow2raw.c	Fri Feb 16 20:31:27 2007 -0800
     8.3 @@ -55,8 +55,7 @@ static int read_complete = 0, write_comp
     8.4  static int returned_read_events = 0, returned_write_events = 0;
     8.5  static int submit_events = 0;
     8.6  static uint32_t read_idx = 0, write_idx = 0;
     8.7 -struct tap_disk *drv1, *drv2;
     8.8 -struct td_state *sqcow, *saio;
     8.9 +struct disk_driver ddqcow, ddaio;
    8.10  static uint64_t prev = 0, written = 0;
    8.11  static char output[25];
    8.12  
    8.13 @@ -100,7 +99,8 @@ static inline void LOCAL_FD_SET(fd_set *
    8.14  	return;
    8.15  }
    8.16  
    8.17 -static int send_write_responses(struct td_state *s, int res, int idx, void *private)
    8.18 +static int send_write_responses(struct disk_driver *dd, int res, uint64_t sec,
    8.19 +				int nr_secs, int idx, void *private)
    8.20  {
    8.21  	if (res < 0) {
    8.22  		DFPRINTF("AIO FAILURE: res [%d]!\n",res);
    8.23 @@ -112,12 +112,13 @@ static int send_write_responses(struct t
    8.24  	if (complete && (returned_write_events == submit_events)) 
    8.25  		write_complete = 1;
    8.26  
    8.27 -	debug_output(written, s->size << 9);
    8.28 +	debug_output(written, dd->td_state->size << 9);
    8.29  	free(private);
    8.30  	return 0;
    8.31  }
    8.32  
    8.33 -static int send_read_responses(struct td_state *s, int res, int idx, void *private)
    8.34 +static int send_read_responses(struct disk_driver *dd, int res, uint64_t sec,
    8.35 +			       int nr_secs, int idx, void *private)
    8.36  {
    8.37  	int ret;
    8.38  
    8.39 @@ -128,8 +129,8 @@ static int send_read_responses(struct td
    8.40  	if (complete && (returned_read_events == submit_events)) 
    8.41  		read_complete = 1;
    8.42  	
    8.43 -	ret = drv2->td_queue_write(saio, idx, BLOCK_PROCESSSZ>>9, private, 
    8.44 -				   send_write_responses, idx, private);
    8.45 +	ret = ddaio.drv->td_queue_write(&ddaio, idx, BLOCK_PROCESSSZ>>9, private, 
    8.46 +					send_write_responses, idx, private);
    8.47  	if (ret != 0) {
    8.48  		DFPRINTF("ERROR in submitting queue write!\n");
    8.49  		return 0;
    8.50 @@ -137,7 +138,7 @@ static int send_read_responses(struct td
    8.51  
    8.52  	if ( (complete && returned_read_events == submit_events) || 
    8.53  	     (returned_read_events % 10 == 0) ) {
    8.54 -		drv2->td_submit(saio);
    8.55 +		ddaio.drv->td_submit(&ddaio);
    8.56  	}
    8.57  
    8.58  	return 0;
    8.59 @@ -161,20 +162,20 @@ int main(int argc, char *argv[])
    8.60  		exit(-1);
    8.61  	}
    8.62  
    8.63 -	sqcow = malloc(sizeof(struct td_state));
    8.64 -	saio  = malloc(sizeof(struct td_state));
    8.65 +	ddqcow.td_state = malloc(sizeof(struct td_state));
    8.66 +	ddaio.td_state  = malloc(sizeof(struct td_state));
    8.67  	
    8.68  	/*Open qcow source file*/	
    8.69 -	drv1 = &tapdisk_qcow;
    8.70 -	sqcow->private = malloc(drv1->private_data_size);
    8.71 +	ddqcow.drv = &tapdisk_qcow;
    8.72 +	ddqcow.private = malloc(ddqcow.drv->private_data_size);
    8.73  
    8.74 -        if (drv1->td_open(sqcow, argv[2])!=0) {
    8.75 +        if (ddqcow.drv->td_open(&ddqcow, argv[2])!=0) {
    8.76  		DFPRINTF("Unable to open Qcow file [%s]\n",argv[2]);
    8.77  		exit(-1);
    8.78  	} else DFPRINTF("QCOW file opened, size %llu\n",
    8.79 -		      (long long unsigned)sqcow->size);
    8.80 +		      (long long unsigned)ddqcow.td_state->size);
    8.81  
    8.82 -	qcowio_fd = drv1->td_get_fd(sqcow);
    8.83 +	qcowio_fd = ddqcow.io_fd;
    8.84  
    8.85          /*Setup aio destination file*/
    8.86  	ret = stat(argv[1],&finfo);
    8.87 @@ -191,12 +192,12 @@ int main(int argc, char *argv[])
    8.88  				       argv[1], 0 - errno);
    8.89  				exit(-1);
    8.90  			}
    8.91 -			if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
    8.92 +			if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) {
    8.93  				DFPRINTF("Unable to create file "
    8.94  					"[%s] of size %llu (errno %d). "
    8.95  					 "Exiting...\n",
    8.96  					argv[1], 
    8.97 -					(long long unsigned)sqcow->size<<9, 
    8.98 +					(long long unsigned)ddqcow.td_state->size<<9, 
    8.99  					0 - errno);
   8.100  				close(fd);
   8.101  				exit(-1);
   8.102 @@ -238,43 +239,43 @@ int main(int argc, char *argv[])
   8.103  				close(fd);
   8.104  				exit(-1);
   8.105  			}
   8.106 -			if (size < sqcow->size<<9) {
   8.107 +			if (size < ddqcow.td_state->size<<9) {
   8.108  				DFPRINTF("ERROR: Not enough space on device "
   8.109  					"%s (%lu bytes available, %llu bytes required\n",
   8.110  					argv[1], size, 
   8.111 -					(long long unsigned)sqcow->size<<9);
   8.112 +					(long long unsigned)ddqcow.td_state->size<<9);
   8.113  				close(fd);
   8.114  				exit(-1);				
   8.115  			}
   8.116  		} else {
   8.117 -			if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
   8.118 +			if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) {
   8.119  				DFPRINTF("Unable to create file "
   8.120  					"[%s] of size %llu (errno %d). "
   8.121  					 "Exiting...\n",
   8.122  					argv[1], 
   8.123 -					(long long unsigned)sqcow->size<<9, 
   8.124 +					(long long unsigned)ddqcow.td_state->size<<9, 
   8.125  					 0 - errno);
   8.126  				close(fd);
   8.127  				exit(-1);
   8.128  			} else DFPRINTF("File [%s] truncated to length %llu "
   8.129  					"(%llu)\n", 
   8.130  				       argv[1], 
   8.131 -				       (long long unsigned)sqcow->size<<9, 
   8.132 -				       (long long unsigned)sqcow->size);
   8.133 +				       (long long unsigned)ddqcow.td_state->size<<9, 
   8.134 +				       (long long unsigned)ddqcow.td_state->size);
   8.135  		}
   8.136  		close(fd);
   8.137  	}
   8.138  
   8.139  	/*Open aio destination file*/	
   8.140 -	drv2 = &tapdisk_aio;
   8.141 -	saio->private = malloc(drv2->private_data_size);
   8.142 +	ddaio.drv = &tapdisk_aio;
   8.143 +	ddaio.private = malloc(ddaio.drv->private_data_size);
   8.144  
   8.145 -        if (drv2->td_open(saio, argv[1])!=0) {
   8.146 +        if (ddaio.drv->td_open(&ddaio, argv[1])!=0) {
   8.147  		DFPRINTF("Unable to open Qcow file [%s]\n", argv[1]);
   8.148  		exit(-1);
   8.149  	}
   8.150  
   8.151 -	aio_fd = drv2->td_get_fd(saio);
   8.152 +	aio_fd = ddaio.io_fd;
   8.153  
   8.154  	/*Initialise the output string*/
   8.155  	memset(output,0x20,25);
   8.156 @@ -298,9 +299,9 @@ int main(int argc, char *argv[])
   8.157  			}
   8.158  		
   8.159  			/*Attempt to read 4k sized blocks*/
   8.160 -			ret = drv1->td_queue_read(sqcow, i>>9,
   8.161 -						  BLOCK_PROCESSSZ>>9, buf, 
   8.162 -						  send_read_responses, i>>9, buf);
   8.163 +			ret = ddqcow.drv->td_queue_read(&ddqcow, i>>9,
   8.164 +							BLOCK_PROCESSSZ>>9, buf, 
   8.165 +							send_read_responses, i>>9, buf);
   8.166  
   8.167  			if (ret < 0) {
   8.168  				DFPRINTF("UNABLE TO READ block [%llu]\n",
   8.169 @@ -311,12 +312,12 @@ int main(int argc, char *argv[])
   8.170  				submit_events++;
   8.171  			}
   8.172  
   8.173 -			if (i >= sqcow->size<<9) {
   8.174 +			if (i >= ddqcow.td_state->size<<9) {
   8.175  				complete = 1;
   8.176  			}
   8.177  			
   8.178  			if ((submit_events % 10 == 0) || complete) 
   8.179 -				drv1->td_submit(sqcow);
   8.180 +				ddqcow.drv->td_submit(&ddqcow);
   8.181  			timeout.tv_usec = 0;
   8.182  			
   8.183  		} else {
   8.184 @@ -332,9 +333,9 @@ int main(int argc, char *argv[])
   8.185  			     
   8.186  		if (ret > 0) {
   8.187  			if (FD_ISSET(qcowio_fd[0], &readfds)) 
   8.188 -				drv1->td_do_callbacks(sqcow, 0);
   8.189 +				ddqcow.drv->td_do_callbacks(&ddqcow, 0);
   8.190  			if (FD_ISSET(aio_fd[0], &readfds)) 
   8.191 -				drv2->td_do_callbacks(saio, 0);
   8.192 +				ddaio.drv->td_do_callbacks(&ddaio, 0);
   8.193  		}
   8.194  		if (complete && (returned_write_events == submit_events)) 
   8.195  			running = 0;
     9.1 --- a/tools/blktap/drivers/tapdisk.c	Fri Feb 16 16:34:28 2007 +0000
     9.2 +++ b/tools/blktap/drivers/tapdisk.c	Fri Feb 16 20:31:27 2007 -0800
     9.3 @@ -48,6 +48,12 @@ static pid_t process;
     9.4  int connected_disks = 0;
     9.5  fd_list_entry_t *fd_start = NULL;
     9.6  
     9.7 +int do_cow_read(struct disk_driver *dd, blkif_request_t *req, 
     9.8 +		int sidx, uint64_t sector, int nr_secs);
     9.9 +
    9.10 +#define td_for_each_disk(tds, drv) \
    9.11 +        for (drv = tds->disks; drv != NULL; drv = drv->next)
    9.12 +
    9.13  void usage(void) 
    9.14  {
    9.15  	fprintf(stderr, "blktap-utils: v1.0.0\n");
    9.16 @@ -78,10 +84,17 @@ void daemonize(void)
    9.17  static void unmap_disk(struct td_state *s)
    9.18  {
    9.19  	tapdev_info_t *info = s->ring_info;
    9.20 -	struct tap_disk *drv = s->drv;
    9.21 +	struct disk_driver *dd, *tmp;
    9.22  	fd_list_entry_t *entry;
    9.23  
    9.24 -	drv->td_close(s);
    9.25 +	dd = s->disks;
    9.26 +	while (dd) {
    9.27 +		tmp = dd->next;
    9.28 +		dd->drv->td_close(dd);
    9.29 +		free(dd->private);
    9.30 +		free(dd);
    9.31 +		dd = tmp;
    9.32 +	}
    9.33  
    9.34  	if (info != NULL && info->mem > 0)
    9.35  	        munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
    9.36 @@ -96,7 +109,6 @@ static void unmap_disk(struct td_state *
    9.37  	free(s->fd_entry);
    9.38  	free(s->blkif);
    9.39  	free(s->ring_info);
    9.40 -        free(s->private);
    9.41  	free(s);
    9.42  
    9.43  	return;
    9.44 @@ -113,16 +125,19 @@ void sig_handler(int sig)
    9.45  static inline int LOCAL_FD_SET(fd_set *readfds)
    9.46  {
    9.47  	fd_list_entry_t *ptr;
    9.48 +	struct disk_driver *dd;
    9.49  
    9.50  	ptr = fd_start;
    9.51  	while (ptr != NULL) {
    9.52  		if (ptr->tap_fd) {
    9.53  			FD_SET(ptr->tap_fd, readfds);
    9.54 -			if (ptr->io_fd[READ]) 
    9.55 -				FD_SET(ptr->io_fd[READ], readfds);
    9.56 -			maxfds = (ptr->io_fd[READ] > maxfds ? 
    9.57 -					ptr->io_fd[READ]: maxfds);
    9.58 -			maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds);
    9.59 +			td_for_each_disk(ptr->s, dd) {
    9.60 +				if (dd->io_fd[READ]) 
    9.61 +					FD_SET(dd->io_fd[READ], readfds);
    9.62 +				maxfds = (dd->io_fd[READ] > maxfds ? 
    9.63 +					  dd->io_fd[READ] : maxfds);
    9.64 +			}
    9.65 +			maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd : maxfds);
    9.66  		}
    9.67  		ptr = ptr->next;
    9.68  	}
    9.69 @@ -130,8 +145,7 @@ static inline int LOCAL_FD_SET(fd_set *r
    9.70  	return 0;
    9.71  }
    9.72  
    9.73 -static inline fd_list_entry_t *add_fd_entry(
    9.74 -	int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
    9.75 +static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s)
    9.76  {
    9.77  	fd_list_entry_t **pprev, *entry;
    9.78  	int i;
    9.79 @@ -139,12 +153,10 @@ static inline fd_list_entry_t *add_fd_en
    9.80  	DPRINTF("Adding fd_list_entry\n");
    9.81  
    9.82  	/*Add to linked list*/
    9.83 -	s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
    9.84 +	s->fd_entry   = entry = malloc(sizeof(fd_list_entry_t));
    9.85  	entry->tap_fd = tap_fd;
    9.86 -	for (i = 0; i < MAX_IOFD; i++)
    9.87 -		entry->io_fd[i] = io_fd[i];
    9.88 -	entry->s = s;
    9.89 -	entry->next = NULL;
    9.90 +	entry->s      = s;
    9.91 +	entry->next   = NULL;
    9.92  
    9.93  	pprev = &fd_start;
    9.94  	while (*pprev != NULL)
    9.95 @@ -171,7 +183,7 @@ static inline struct td_state *get_state
    9.96  static struct tap_disk *get_driver(int drivertype)
    9.97  {
    9.98  	/* blktapctrl has passed us the driver type */
    9.99 -	
   9.100 +
   9.101  	return dtypes[drivertype]->drv;
   9.102  }
   9.103  
   9.104 @@ -183,14 +195,36 @@ static struct td_state *state_init(void)
   9.105  
   9.106  	s = malloc(sizeof(struct td_state));
   9.107  	blkif = s->blkif = malloc(sizeof(blkif_t));
   9.108 -	s->ring_info = malloc(sizeof(tapdev_info_t));
   9.109 +	s->ring_info = calloc(1, sizeof(tapdev_info_t));
   9.110  
   9.111 -	for (i = 0; i < MAX_REQUESTS; i++)
   9.112 -		blkif->pending_list[i].count = 0;
   9.113 +	for (i = 0; i < MAX_REQUESTS; i++) {
   9.114 +		blkif->pending_list[i].secs_pending = 0;
   9.115 +		blkif->pending_list[i].submitting = 0;
   9.116 +	}
   9.117  
   9.118  	return s;
   9.119  }
   9.120  
   9.121 +static struct disk_driver *disk_init(struct td_state *s, struct tap_disk *drv)
   9.122 +{
   9.123 +	struct disk_driver *dd;
   9.124 +
   9.125 +	dd = calloc(1, sizeof(struct disk_driver));
   9.126 +	if (!dd)
   9.127 +		return NULL;
   9.128 +	
   9.129 +	dd->private = malloc(drv->private_data_size);
   9.130 +	if (!dd->private) {
   9.131 +		free(dd);
   9.132 +		return NULL;
   9.133 +	}
   9.134 +
   9.135 +	dd->drv      = drv;
   9.136 +	dd->td_state = s;
   9.137 +
   9.138 +	return dd;
   9.139 +}
   9.140 +
   9.141  static int map_new_dev(struct td_state *s, int minor)
   9.142  {
   9.143  	int tap_fd;
   9.144 @@ -246,6 +280,51 @@ static int map_new_dev(struct td_state *
   9.145  	return -1;
   9.146  }
   9.147  
   9.148 +static int open_disk(struct td_state *s, struct disk_driver *dd, char *path)
   9.149 +{
   9.150 +	int err;
   9.151 +	struct disk_driver *d = dd;
   9.152 +
   9.153 +	err = dd->drv->td_open(dd, path);
   9.154 +	if (err)
   9.155 +		return err;
   9.156 +
   9.157 +	/* load backing files as necessary */
   9.158 +	while (d->drv->td_has_parent(d)) {
   9.159 +		struct disk_driver *new;
   9.160 +		
   9.161 +		new = calloc(1, sizeof(struct disk_driver));
   9.162 +		if (!new)
   9.163 +			goto fail;
   9.164 +		new->drv      = d->drv;
   9.165 +		new->td_state = s;
   9.166 +		new->private  = malloc(new->drv->private_data_size);
   9.167 +		if (!new->private) {
   9.168 +			free(new);
   9.169 +			goto fail;
   9.170 +		}
   9.171 +		
   9.172 +		err = d->drv->td_get_parent(d, new);
   9.173 +		if (err)
   9.174 +			goto fail;
   9.175 +
   9.176 +		d = d->next = new;
   9.177 +	}
   9.178 +
   9.179 +	return 0;
   9.180 +
   9.181 + fail:
   9.182 +	DPRINTF("failed opening disk\n");
   9.183 +	while (dd) {
   9.184 +		d = dd->next;
   9.185 +		dd->drv->td_close(dd);
   9.186 +		free(dd->private);
   9.187 +		free(dd);
   9.188 +		dd = d;
   9.189 +	}
   9.190 +	return err;
   9.191 +}
   9.192 +
   9.193  static int read_msg(char *buf)
   9.194  {
   9.195  	int length, len, msglen, tap_fd, *io_fd;
   9.196 @@ -255,6 +334,7 @@ static int read_msg(char *buf)
   9.197  	msg_newdev_t *msg_dev;
   9.198  	msg_pid_t *msg_pid;
   9.199  	struct tap_disk *drv;
   9.200 +	struct disk_driver *dd;
   9.201  	int ret = -1;
   9.202  	struct td_state *s = NULL;
   9.203  	fd_list_entry_t *entry;
   9.204 @@ -289,20 +369,20 @@ static int read_msg(char *buf)
   9.205  			if (s == NULL)
   9.206  				goto params_done;
   9.207  
   9.208 -			s->drv = drv;
   9.209 -			s->private = malloc(drv->private_data_size);
   9.210 -			if (s->private == NULL) {
   9.211 +			s->disks = dd = disk_init(s, drv);
   9.212 +			if (!dd) {
   9.213  				free(s);
   9.214  				goto params_done;
   9.215  			}
   9.216  
   9.217  			/*Open file*/
   9.218 -			ret = drv->td_open(s, path);
   9.219 -			io_fd = drv->td_get_fd(s);
   9.220 +			ret = open_disk(s, dd, path);
   9.221 +			if (ret)
   9.222 +				goto params_done;
   9.223  
   9.224 -			entry = add_fd_entry(0, io_fd, s);
   9.225 +			entry = add_fd_entry(0, s);
   9.226  			entry->cookie = msg->cookie;
   9.227 -			DPRINTF("Entered cookie %d\n",entry->cookie);
   9.228 +			DPRINTF("Entered cookie %d\n", entry->cookie);
   9.229  			
   9.230  			memset(buf, 0x00, MSG_SIZE); 
   9.231  			
   9.232 @@ -323,13 +403,12 @@ static int read_msg(char *buf)
   9.233  			free(path);
   9.234  			return 1;
   9.235  			
   9.236 -			
   9.237 -			
   9.238  		case CTLMSG_NEWDEV:
   9.239  			msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
   9.240  
   9.241  			s = get_state(msg->cookie);
   9.242 -			DPRINTF("Retrieving state, cookie %d.....[%s]\n",msg->cookie, (s == NULL ? "FAIL":"OK"));
   9.243 +			DPRINTF("Retrieving state, cookie %d.....[%s]\n",
   9.244 +				msg->cookie, (s == NULL ? "FAIL":"OK"));
   9.245  			if (s != NULL) {
   9.246  				ret = ((map_new_dev(s, msg_dev->devnum) 
   9.247  					== msg_dev->devnum ? 0: -1));
   9.248 @@ -397,49 +476,75 @@ static inline void kick_responses(struct
   9.249  	}
   9.250  }
   9.251  
   9.252 -void io_done(struct td_state *s, int sid)
   9.253 +void io_done(struct disk_driver *dd, int sid)
   9.254  {
   9.255 -	struct tap_disk *drv = s->drv;
   9.256 +	struct tap_disk *drv = dd->drv;
   9.257  
   9.258  	if (!run) return; /*We have received signal to close*/
   9.259  
   9.260 -	if (drv->td_do_callbacks(s, sid) > 0) kick_responses(s);
   9.261 +	if (drv->td_do_callbacks(dd, sid) > 0) kick_responses(dd->td_state);
   9.262  
   9.263  	return;
   9.264  }
   9.265  
   9.266 -int send_responses(struct td_state *s, int res, int idx, void *private)
   9.267 +static inline uint64_t
   9.268 +segment_start(blkif_request_t *req, int sidx)
   9.269  {
   9.270 +	int i;
   9.271 +	uint64_t start = req->sector_number;
   9.272 +
   9.273 +	for (i = 0; i < sidx; i++) 
   9.274 +		start += (req->seg[i].last_sect - req->seg[i].first_sect + 1);
   9.275 +
   9.276 +	return start;
   9.277 +}
   9.278 +
   9.279 +uint64_t sends, responds;
   9.280 +int send_responses(struct disk_driver *dd, int res, 
   9.281 +		   uint64_t sector, int nr_secs, int idx, void *private)
   9.282 +{
   9.283 +	pending_req_t   *preq;
   9.284  	blkif_request_t *req;
   9.285  	int responses_queued = 0;
   9.286 +	struct td_state *s = dd->td_state;
   9.287  	blkif_t *blkif = s->blkif;
   9.288 +	int sidx = (int)private, secs_done = nr_secs;
   9.289  
   9.290 -	req   = &blkif->pending_list[idx].req;
   9.291 -			
   9.292 -	if ( (idx > MAX_REQUESTS-1) || 
   9.293 -	    (blkif->pending_list[idx].count == 0) )
   9.294 +	if ( (idx > MAX_REQUESTS-1) )
   9.295  	{
   9.296  		DPRINTF("invalid index returned(%u)!\n", idx);
   9.297  		return 0;
   9.298  	}
   9.299 -	
   9.300 -	if (res != 0) {
   9.301 -	        blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
   9.302 +	preq = &blkif->pending_list[idx];
   9.303 +	req  = &preq->req;
   9.304 +
   9.305 +	if (res == BLK_NOT_ALLOCATED) {
   9.306 +		res = do_cow_read(dd, req, sidx, sector, nr_secs);
   9.307 +		if (res >= 0) {
   9.308 +			secs_done = res;
   9.309 +			res = 0;
   9.310 +		} else
   9.311 +			secs_done = 0;
   9.312  	}
   9.313  
   9.314 -	blkif->pending_list[idx].count--;
   9.315 +	preq->secs_pending -= secs_done;
   9.316 +
   9.317 +	if (res == -EBUSY && preq->submitting) 
   9.318 +		return -EBUSY;  /* propagate -EBUSY back to higher layers */
   9.319 +	if (res) 
   9.320 +		preq->status = BLKIF_RSP_ERROR;
   9.321  	
   9.322 -	if (blkif->pending_list[idx].count == 0) 
   9.323 +	if (!preq->submitting && preq->secs_pending == 0) 
   9.324  	{
   9.325  		blkif_request_t tmp;
   9.326  		blkif_response_t *rsp;
   9.327 -		
   9.328 -		tmp = blkif->pending_list[idx].req;
   9.329 +
   9.330 +		tmp = preq->req;
   9.331  		rsp = (blkif_response_t *)req;
   9.332  		
   9.333  		rsp->id = tmp.id;
   9.334  		rsp->operation = tmp.operation;
   9.335 -		rsp->status = blkif->pending_list[idx].status;
   9.336 +		rsp->status = preq->status;
   9.337  		
   9.338  		write_rsp_to_ring(s, rsp);
   9.339  		responses_queued++;
   9.340 @@ -447,15 +552,51 @@ int send_responses(struct td_state *s, i
   9.341  	return responses_queued;
   9.342  }
   9.343  
   9.344 +int do_cow_read(struct disk_driver *dd, blkif_request_t *req, 
   9.345 +		int sidx, uint64_t sector, int nr_secs)
   9.346 +{
   9.347 +	char *page;
   9.348 +	int ret, early;
   9.349 +	uint64_t seg_start, seg_end;
   9.350 +	struct td_state  *s = dd->td_state;
   9.351 +	tapdev_info_t *info = s->ring_info;
   9.352 +	struct disk_driver *parent = dd->next;
   9.353 +	
   9.354 +	seg_start = segment_start(req, sidx);
   9.355 +	seg_end   = seg_start + req->seg[sidx].last_sect + 1;
   9.356 +	
   9.357 +	ASSERT(sector >= seg_start && sector + nr_secs <= seg_end);
   9.358 +
   9.359 +	page  = (char *)MMAP_VADDR(info->vstart, 
   9.360 +				   (unsigned long)req->id, sidx);
   9.361 +	page += (req->seg[sidx].first_sect << SECTOR_SHIFT);
   9.362 +	page += ((sector - seg_start) << SECTOR_SHIFT);
   9.363 +
   9.364 +	if (!parent) {
   9.365 +		memset(page, 0, nr_secs << SECTOR_SHIFT);
   9.366 +		return nr_secs;
   9.367 +	}
   9.368 +
   9.369 +	/* reissue request to backing file */
   9.370 +	ret = parent->drv->td_queue_read(parent, sector, nr_secs,
   9.371 +					 page, send_responses, 
   9.372 +					 req->id, (void *)sidx);
   9.373 +	if (ret > 0)
   9.374 +		parent->early += ret;
   9.375 +
   9.376 +	return ((ret >= 0) ? 0 : ret);
   9.377 +}
   9.378 +
   9.379  static void get_io_request(struct td_state *s)
   9.380  {
   9.381 -	RING_IDX          rp, rc, j, i, ret;
   9.382 +	RING_IDX          rp, rc, j, i;
   9.383  	blkif_request_t  *req;
   9.384 -	int idx, nsects;
   9.385 +	int idx, nsects, ret;
   9.386  	uint64_t sector_nr;
   9.387  	char *page;
   9.388  	int early = 0; /* count early completions */
   9.389 -	struct tap_disk *drv = s->drv;
   9.390 +	struct disk_driver *dd = s->disks;
   9.391 +	struct tap_disk *drv   = dd->drv;
   9.392  	blkif_t *blkif = s->blkif;
   9.393  	tapdev_info_t *info = s->ring_info;
   9.394  	int page_size = getpagesize();
   9.395 @@ -466,23 +607,33 @@ static void get_io_request(struct td_sta
   9.396  	rmb();
   9.397  	for (j = info->fe_ring.req_cons; j != rp; j++)
   9.398  	{
   9.399 -		int done = 0; 
   9.400 +		int done = 0, start_seg = 0; 
   9.401  
   9.402  		req = NULL;
   9.403  		req = RING_GET_REQUEST(&info->fe_ring, j);
   9.404  		++info->fe_ring.req_cons;
   9.405  		
   9.406  		if (req == NULL) continue;
   9.407 -		
   9.408 +
   9.409  		idx = req->id;
   9.410 -		ASSERT(blkif->pending_list[idx].count == 0);
   9.411 -		memcpy(&blkif->pending_list[idx].req, req, sizeof(*req));
   9.412 -		blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
   9.413 -		blkif->pending_list[idx].count = req->nr_segments;
   9.414  
   9.415 -		sector_nr = req->sector_number;
   9.416 +		if (info->busy.req) {
   9.417 +			/* continue where we left off last time */
   9.418 +			ASSERT(info->busy.req == req);
   9.419 +			start_seg = info->busy.seg_idx;
   9.420 +			sector_nr = segment_start(req, start_seg);
   9.421 +			info->busy.seg_idx = 0;
   9.422 +			info->busy.req     = NULL;
   9.423 +		} else {
   9.424 +			ASSERT(blkif->pending_list[idx].secs_pending == 0);
   9.425 +			memcpy(&blkif->pending_list[idx].req, 
   9.426 +			       req, sizeof(*req));
   9.427 +			blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
   9.428 +			blkif->pending_list[idx].submitting = 1;
   9.429 +			sector_nr = req->sector_number;
   9.430 +		}
   9.431  
   9.432 -		for (i = 0; i < req->nr_segments; i++) {
   9.433 +		for (i = start_seg; i < req->nr_segments; i++) {
   9.434  			nsects = req->seg[i].last_sect - 
   9.435  				 req->seg[i].first_sect + 1;
   9.436  	
   9.437 @@ -508,31 +659,37 @@ static void get_io_request(struct td_sta
   9.438  					(long long unsigned) sector_nr);
   9.439  				continue;
   9.440  			}
   9.441 -			
   9.442 +
   9.443 +			blkif->pending_list[idx].secs_pending += nsects;
   9.444 +
   9.445  			switch (req->operation) 
   9.446  			{
   9.447  			case BLKIF_OP_WRITE:
   9.448 -				ret = drv->td_queue_write(s, sector_nr,
   9.449 -						nsects, page, send_responses, 
   9.450 -						idx, NULL);
   9.451 -				if (ret > 0) early += ret;
   9.452 +				ret = drv->td_queue_write(dd, sector_nr,
   9.453 +							  nsects, page, 
   9.454 +							  send_responses,
   9.455 +							  idx, (void *)i);
   9.456 +				if (ret > 0) dd->early += ret;
   9.457  				else if (ret == -EBUSY) {
   9.458 -					/*
   9.459 -					 * TODO: Sector is locked         *
   9.460 -					 * Need to put req back on queue  *
   9.461 -					 */
   9.462 +					/* put req back on queue */
   9.463 +					--info->fe_ring.req_cons;
   9.464 +					info->busy.req     = req;
   9.465 +					info->busy.seg_idx = i;
   9.466 +					goto out;
   9.467  				}
   9.468  				break;
   9.469  			case BLKIF_OP_READ:
   9.470 -				ret = drv->td_queue_read(s, sector_nr,
   9.471 -						nsects, page, send_responses, 
   9.472 -						idx, NULL);
   9.473 -				if (ret > 0) early += ret;
   9.474 +				ret = drv->td_queue_read(dd, sector_nr,
   9.475 +							 nsects, page, 
   9.476 +							 send_responses,
   9.477 +							 idx, (void *)i);
   9.478 +				if (ret > 0) dd->early += ret;
   9.479  				else if (ret == -EBUSY) {
   9.480 -					/*
   9.481 -					 * TODO: Sector is locked         *
   9.482 -					 * Need to put req back on queue  *
   9.483 -					 */
   9.484 +					/* put req back on queue */
   9.485 +					--info->fe_ring.req_cons;
   9.486 +					info->busy.req     = req;
   9.487 +					info->busy.seg_idx = i;
   9.488 +					goto out;
   9.489  				}
   9.490  				break;
   9.491  			default:
   9.492 @@ -541,14 +698,22 @@ static void get_io_request(struct td_sta
   9.493  			}
   9.494  			sector_nr += nsects;
   9.495  		}
   9.496 +		blkif->pending_list[idx].submitting = 0;
   9.497 +		/* force write_rsp_to_ring for synchronous case */
   9.498 +		if (blkif->pending_list[idx].secs_pending == 0)
   9.499 +			dd->early += send_responses(dd, 0, 0, 0, idx, (void *)0);
   9.500  	}
   9.501  
   9.502 + out:
   9.503  	/*Batch done*/
   9.504 -	drv->td_submit(s);
   9.505 -	
   9.506 -	if (early > 0) 
   9.507 -		io_done(s,10);
   9.508 -		
   9.509 +	td_for_each_disk(s, dd) {
   9.510 +		dd->early += dd->drv->td_submit(dd);
   9.511 +		if (dd->early > 0) {
   9.512 +			io_done(dd, 10);
   9.513 +			dd->early = 0;
   9.514 +		}
   9.515 +	}
   9.516 +
   9.517  	return;
   9.518  }
   9.519  
   9.520 @@ -558,10 +723,9 @@ int main(int argc, char *argv[])
   9.521  	char *p, *buf;
   9.522  	fd_set readfds, writefds;	
   9.523  	fd_list_entry_t *ptr;
   9.524 -	struct tap_disk *drv;
   9.525  	struct td_state *s;
   9.526  	char openlogbuf[128];
   9.527 -	
   9.528 +
   9.529  	if (argc != 3) usage();
   9.530  
   9.531  	daemonize();
   9.532 @@ -573,12 +737,12 @@ int main(int argc, char *argv[])
   9.533  	signal (SIGINT, sig_handler);
   9.534  
   9.535  	/*Open the control channel*/
   9.536 -	fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK);
   9.537 +	fds[READ]  = open(argv[1],O_RDWR|O_NONBLOCK);
   9.538  	fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK);
   9.539  
   9.540  	if ( (fds[READ] < 0) || (fds[WRITE] < 0) ) 
   9.541  	{
   9.542 -		DPRINTF("FD open failed [%d,%d]\n",fds[READ], fds[WRITE]);
   9.543 +		DPRINTF("FD open failed [%d,%d]\n", fds[READ], fds[WRITE]);
   9.544  		exit(-1);
   9.545  	}
   9.546  
   9.547 @@ -608,11 +772,22 @@ int main(int argc, char *argv[])
   9.548  		{
   9.549  			ptr = fd_start;
   9.550  			while (ptr != NULL) {
   9.551 -				if (FD_ISSET(ptr->tap_fd, &readfds)) 
   9.552 +				int progress_made = 0;
   9.553 +				struct disk_driver *dd;
   9.554 +				tapdev_info_t *info = ptr->s->ring_info;
   9.555 +
   9.556 +				td_for_each_disk(ptr->s, dd) {
   9.557 +					if (dd->io_fd[READ] &&
   9.558 +					    FD_ISSET(dd->io_fd[READ], 
   9.559 +						     &readfds)) {
   9.560 +						io_done(dd, READ);
   9.561 +						progress_made = 1;
   9.562 +					}
   9.563 +				}
   9.564 +
   9.565 +				if (FD_ISSET(ptr->tap_fd, &readfds) ||
   9.566 +				    (info->busy.req && progress_made))
   9.567  					get_io_request(ptr->s);
   9.568 -				if (ptr->io_fd[READ] && 
   9.569 -						FD_ISSET(ptr->io_fd[READ], &readfds)) 
   9.570 -					io_done(ptr->s, READ);
   9.571  
   9.572  				ptr = ptr->next;
   9.573  			}
   9.574 @@ -628,11 +803,8 @@ int main(int argc, char *argv[])
   9.575  	ptr = fd_start;
   9.576  	while (ptr != NULL) {
   9.577  		s = ptr->s;
   9.578 -		drv = s->drv;
   9.579  
   9.580  		unmap_disk(s);
   9.581 -		drv->td_close(s);
   9.582 -		free(s->private);
   9.583  		free(s->blkif);
   9.584  		free(s->ring_info);
   9.585  		free(s);
    10.1 --- a/tools/blktap/drivers/tapdisk.h	Fri Feb 16 16:34:28 2007 +0000
    10.2 +++ b/tools/blktap/drivers/tapdisk.h	Fri Feb 16 20:31:27 2007 -0800
    10.3 @@ -43,6 +43,9 @@
    10.4   *   - The fd used for poll is an otherwise unused pipe, which allows poll to 
    10.5   *     be safely called without ever returning anything.
    10.6   * 
    10.7 + * NOTE: tapdisk uses the number of sectors submitted per request as a 
    10.8 + * ref count.  Plugins must use the callback function to communicate the
    10.9 + * completion--or error--of every sector submitted to them.
   10.10   */
   10.11  
   10.12  #ifndef TAPDISK_H_
   10.13 @@ -65,39 +68,55 @@
   10.14  #define SECTOR_SHIFT             9
   10.15  #define DEFAULT_SECTOR_SIZE    512
   10.16  
   10.17 +#define MAX_IOFD                 2
   10.18 +
   10.19 +#define BLK_NOT_ALLOCATED       99
   10.20 +
   10.21 +struct td_state;
   10.22 +struct tap_disk;
   10.23 +
   10.24 +struct disk_driver {
   10.25 +	int early;
   10.26 +	void *private;
   10.27 +	int io_fd[MAX_IOFD];
   10.28 +	struct tap_disk *drv;
   10.29 +	struct td_state *td_state;
   10.30 +	struct disk_driver *next;
   10.31 +};
   10.32 +
   10.33  /* This structure represents the state of an active virtual disk.           */
   10.34  struct td_state {
   10.35 -	void *private;
   10.36 -	void *drv;
   10.37 +	struct disk_driver *disks;
   10.38  	void *blkif;
   10.39  	void *image;
   10.40  	void *ring_info;
   10.41  	void *fd_entry;
   10.42 -	char backing_file[1024]; /*Used by differencing disks, e.g. qcow*/
   10.43  	unsigned long      sector_size;
   10.44  	unsigned long long size;
   10.45  	unsigned int       info;
   10.46  };
   10.47  
   10.48  /* Prototype of the callback to activate as requests complete.              */
   10.49 -typedef int (*td_callback_t)(struct td_state *s, int res, int id, void *prv);
   10.50 +typedef int (*td_callback_t)(struct disk_driver *dd, int res, uint64_t sector,
   10.51 +			     int nb_sectors, int id, void *private);
   10.52  
   10.53  /* Structure describing the interface to a virtual disk implementation.     */
   10.54  /* See note at the top of this file describing this interface.              */
   10.55  struct tap_disk {
   10.56  	const char *disk_type;
   10.57  	int private_data_size;
   10.58 -	int (*td_open)        (struct td_state *s, const char *name);
   10.59 -	int (*td_queue_read)  (struct td_state *s, uint64_t sector,
   10.60 -			       int nb_sectors, char *buf, td_callback_t cb,
   10.61 +	int (*td_open)        (struct disk_driver *dd, const char *name);
   10.62 +	int (*td_queue_read)  (struct disk_driver *dd, uint64_t sector,
   10.63 +			       int nb_sectors, char *buf, td_callback_t cb, 
   10.64  			       int id, void *prv);
   10.65 -	int (*td_queue_write) (struct td_state *s, uint64_t sector,
   10.66 -			       int nb_sectors, char *buf, td_callback_t cb,
   10.67 +	int (*td_queue_write) (struct disk_driver *dd, uint64_t sector,
   10.68 +			       int nb_sectors, char *buf, td_callback_t cb, 
   10.69  			       int id, void *prv);
   10.70 -	int (*td_submit)      (struct td_state *s);
   10.71 -	int *(*td_get_fd)      (struct td_state *s);
   10.72 -	int (*td_close)       (struct td_state *s);
   10.73 -	int (*td_do_callbacks)(struct td_state *s, int sid);
   10.74 +	int (*td_submit)      (struct disk_driver *dd);
   10.75 +	int (*td_has_parent)  (struct disk_driver *dd);
   10.76 +	int (*td_get_parent)  (struct disk_driver *dd, struct disk_driver *p);
   10.77 +	int (*td_close)       (struct disk_driver *dd);
   10.78 +	int (*td_do_callbacks)(struct disk_driver *dd, int sid);
   10.79  };
   10.80  
   10.81  typedef struct disk_info {
   10.82 @@ -119,14 +138,13 @@ extern struct tap_disk tapdisk_vmdk;
   10.83  extern struct tap_disk tapdisk_ram;
   10.84  extern struct tap_disk tapdisk_qcow;
   10.85  
   10.86 -#define MAX_DISK_TYPES  20
   10.87 -#define MAX_IOFD        2
   10.88 +#define MAX_DISK_TYPES     20
   10.89  
   10.90 -#define DISK_TYPE_AIO   0
   10.91 -#define DISK_TYPE_SYNC  1
   10.92 -#define DISK_TYPE_VMDK  2
   10.93 -#define DISK_TYPE_RAM   3
   10.94 -#define DISK_TYPE_QCOW  4
   10.95 +#define DISK_TYPE_AIO      0
   10.96 +#define DISK_TYPE_SYNC     1
   10.97 +#define DISK_TYPE_VMDK     2
   10.98 +#define DISK_TYPE_RAM      3
   10.99 +#define DISK_TYPE_QCOW     4
  10.100  
  10.101  
  10.102  /*Define Individual Disk Parameters here */
  10.103 @@ -197,12 +215,10 @@ typedef struct driver_list_entry {
  10.104  typedef struct fd_list_entry {
  10.105  	int cookie;
  10.106  	int  tap_fd;
  10.107 -	int  io_fd[MAX_IOFD];
  10.108  	struct td_state *s;
  10.109  	struct fd_list_entry **pprev, *next;
  10.110  } fd_list_entry_t;
  10.111  
  10.112  int qcow_create(const char *filename, uint64_t total_size,
  10.113  		const char *backing_file, int flags);
  10.114 -
  10.115  #endif /*TAPDISK_H_*/
    11.1 --- a/tools/blktap/lib/blktaplib.h	Fri Feb 16 16:34:28 2007 +0000
    11.2 +++ b/tools/blktap/lib/blktaplib.h	Fri Feb 16 20:31:27 2007 -0800
    11.3 @@ -91,8 +91,9 @@ struct blkif;
    11.4  
    11.5  typedef struct {
    11.6  	blkif_request_t  req;
    11.7 -	struct blkif         *blkif;
    11.8 -	int              count;
    11.9 +	struct blkif    *blkif;
   11.10 +	int              submitting;
   11.11 +	int              secs_pending;
   11.12          int16_t          status;
   11.13  } pending_req_t;
   11.14  
   11.15 @@ -116,7 +117,7 @@ typedef struct blkif {
   11.16  	
   11.17  	void *prv;  /* device-specific data */
   11.18  	void *info; /*Image parameter passing */
   11.19 -	pending_req_t    pending_list[MAX_REQUESTS];
   11.20 +	pending_req_t pending_list[MAX_REQUESTS];
   11.21  	int devnum;
   11.22  	int fds[2];
   11.23  	int be_id;
   11.24 @@ -141,6 +142,11 @@ int blkif_init(blkif_t *blkif, long int 
   11.25  void free_blkif(blkif_t *blkif);
   11.26  void __init_blkif(void);
   11.27  
   11.28 +typedef struct busy_state {
   11.29 +	int seg_idx;
   11.30 +	blkif_request_t *req;
   11.31 +} busy_state_t;
   11.32 +
   11.33  typedef struct tapdev_info {
   11.34  	int fd;
   11.35  	char *mem;
   11.36 @@ -148,6 +154,7 @@ typedef struct tapdev_info {
   11.37  	blkif_back_ring_t  fe_ring;
   11.38  	unsigned long vstart;
   11.39  	blkif_t *blkif;
   11.40 +	busy_state_t busy;
   11.41  } tapdev_info_t;
   11.42  
   11.43  typedef struct domid_translate {
    12.1 --- a/tools/blktap/lib/xs_api.c	Fri Feb 16 16:34:28 2007 +0000
    12.2 +++ b/tools/blktap/lib/xs_api.c	Fri Feb 16 20:31:27 2007 -0800
    12.3 @@ -311,8 +311,8 @@ int unregister_xenbus_watch(struct xs_ha
    12.4  	}
    12.5  
    12.6  	if (!xs_unwatch(h, watch->node, token))
    12.7 -		DPRINTF("XENBUS Failed to release watch %s: %i\n",
    12.8 -			watch->node, er);
    12.9 +		DPRINTF("XENBUS Failed to release watch %s\n",
   12.10 +			watch->node);
   12.11  
   12.12  	list_del(&watch->list);
   12.13  	
   12.14 @@ -351,9 +351,9 @@ int xs_fire_next_watch(struct xs_handle 
   12.15  	
   12.16  	node  = res[XS_WATCH_PATH];
   12.17  	token = res[XS_WATCH_TOKEN];
   12.18 -	
   12.19 +
   12.20  	w = find_watch(token);
   12.21 -	if (w)
   12.22 +	if (w) 
   12.23  		w->callback(h, w, node);
   12.24  
   12.25  	free(res);