ia64/xen-unstable

changeset 17111:89ee92328720

blktap: Remove some code duplication

With my qcow2 patch to blktap I contributed a bit to the mess, so here
an attempt to clean up and remove at least some of the code
duplication in the functions dealing with aio operations.

Signed-off-by: Kevin Wolf <kwolf@suse.de>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Feb 25 09:12:20 2008 +0000 (2008-02-25)
parents cd06c0c1bfff
children 44ec075760b6
files tools/blktap/drivers/block-aio.c tools/blktap/drivers/block-qcow.c tools/blktap/drivers/block-qcow2.c tools/blktap/drivers/tapaio.c tools/blktap/drivers/tapaio.h
line diff
     1.1 --- a/tools/blktap/drivers/block-aio.c	Mon Feb 25 09:09:01 2008 +0000
     1.2 +++ b/tools/blktap/drivers/block-aio.c	Mon Feb 25 09:12:20 2008 +0000
     1.3 @@ -52,28 +52,11 @@
     1.4  #define O_LARGEFILE	0
     1.5  #endif
     1.6  
     1.7 -struct pending_aio {
     1.8 -	td_callback_t cb;
     1.9 -	int id;
    1.10 -	void *private;
    1.11 -	uint64_t lsec;
    1.12 -};
    1.13 -
    1.14  struct tdaio_state {
    1.15  	int fd;
    1.16 -	
    1.17 -	/* libaio state */
    1.18 -	tap_aio_context_t  aio_ctx;
    1.19 -	struct iocb        iocb_list  [MAX_AIO_REQS];
    1.20 -	struct iocb       *iocb_free  [MAX_AIO_REQS];
    1.21 -	struct pending_aio pending_aio[MAX_AIO_REQS];
    1.22 -	int                iocb_free_count;
    1.23 -	struct iocb       *iocb_queue[MAX_AIO_REQS];
    1.24 -	int                iocb_queued;
    1.25 -	struct io_event    aio_events[MAX_AIO_REQS];
    1.26 +	tap_aio_context_t aio;
    1.27  };
    1.28  
    1.29 -#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
    1.30  
    1.31  /*Get Image size, secsize*/
    1.32  static int get_image_info(struct td_state *s, int fd)
    1.33 @@ -131,7 +114,7 @@ static inline void init_fds(struct disk_
    1.34  	for(i = 0; i < MAX_IOFD; i++) 
    1.35  		dd->io_fd[i] = 0;
    1.36  
    1.37 -	dd->io_fd[0] = prv->aio_ctx.pollfd;
    1.38 +	dd->io_fd[0] = prv->aio.aio_ctx.pollfd;
    1.39  }
    1.40  
    1.41  /* Open the disk file and initialize aio state. */
    1.42 @@ -142,27 +125,11 @@ int tdaio_open (struct disk_driver *dd, 
    1.43  	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
    1.44  
    1.45  	DPRINTF("block-aio open('%s')", name);
    1.46 -	/* Initialize AIO */
    1.47 -	prv->iocb_free_count = MAX_AIO_REQS;
    1.48 -	prv->iocb_queued     = 0;
    1.49  
    1.50 -	ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
    1.51 -	if (ret < 0) {
    1.52 -                if (ret == -EAGAIN) {
    1.53 -                        DPRINTF("Couldn't setup AIO context.  If you are "
    1.54 -                                "trying to concurrently use a large number "
    1.55 -                                "of blktap-based disks, you may need to "
    1.56 -                                "increase the system-wide aio request limit. "
    1.57 -                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
    1.58 -                                "aio-max-nr')\n");
    1.59 -                } else {
    1.60 -                        DPRINTF("Couldn't setup AIO context.\n");
    1.61 -                }
    1.62 -		goto done;
    1.63 -	}
    1.64 -
    1.65 -	for (i=0;i<MAX_AIO_REQS;i++)
    1.66 -		prv->iocb_free[i] = &prv->iocb_list[i];
    1.67 +	/* Initialize AIO */
    1.68 +	ret = tap_aio_init(&prv->aio, 0, MAX_AIO_REQS);
    1.69 +	if (ret != 0)
    1.70 +		return ret;
    1.71  
    1.72  	/* Open the file */
    1.73  	o_flags = O_DIRECT | O_LARGEFILE | 
    1.74 @@ -198,87 +165,40 @@ int tdaio_queue_read(struct disk_driver 
    1.75  		     int nb_sectors, char *buf, td_callback_t cb,
    1.76  		     int id, void *private)
    1.77  {
    1.78 -	struct   iocb *io;
    1.79 -	struct   pending_aio *pio;
    1.80  	struct   td_state    *s   = dd->td_state;
    1.81  	struct   tdaio_state *prv = (struct tdaio_state *)dd->private;
    1.82  	int      size    = nb_sectors * s->sector_size;
    1.83  	uint64_t offset  = sector * (uint64_t)s->sector_size;
    1.84 -	long     ioidx;
    1.85 -	
    1.86 -	if (prv->iocb_free_count == 0)
    1.87 -		return -ENOMEM;
    1.88 -	io = prv->iocb_free[--prv->iocb_free_count];
    1.89 -	
    1.90 -	ioidx = IOCB_IDX(prv, io);
    1.91 -	pio = &prv->pending_aio[ioidx];
    1.92 -	pio->cb = cb;
    1.93 -	pio->id = id;
    1.94 -	pio->private = private;
    1.95 -	pio->lsec = sector;
    1.96 -	
    1.97 -	io_prep_pread(io, prv->fd, buf, size, offset);
    1.98 -	io->data = (void *)ioidx;
    1.99 -	
   1.100 -	prv->iocb_queue[prv->iocb_queued++] = io;
   1.101  
   1.102 -	return 0;
   1.103 +	return tap_aio_read(&prv->aio, prv->fd, size, offset, buf, 
   1.104 +		cb, id, sector, private);
   1.105  }
   1.106  			
   1.107  int tdaio_queue_write(struct disk_driver *dd, uint64_t sector,
   1.108  		      int nb_sectors, char *buf, td_callback_t cb,
   1.109  		      int id, void *private)
   1.110  {
   1.111 -	struct   iocb *io;
   1.112 -	struct   pending_aio *pio;
   1.113  	struct   td_state    *s   = dd->td_state;
   1.114  	struct   tdaio_state *prv = (struct tdaio_state *)dd->private;
   1.115  	int      size    = nb_sectors * s->sector_size;
   1.116  	uint64_t offset  = sector * (uint64_t)s->sector_size;
   1.117 -	long     ioidx;
   1.118 -	
   1.119 -	if (prv->iocb_free_count == 0)
   1.120 -		return -ENOMEM;
   1.121 -	io = prv->iocb_free[--prv->iocb_free_count];
   1.122 -	
   1.123 -	ioidx = IOCB_IDX(prv, io);
   1.124 -	pio = &prv->pending_aio[ioidx];
   1.125 -	pio->cb = cb;
   1.126 -	pio->id = id;
   1.127 -	pio->private = private;
   1.128 -	pio->lsec = sector;
   1.129 -	
   1.130 -	io_prep_pwrite(io, prv->fd, buf, size, offset);
   1.131 -	io->data = (void *)ioidx;
   1.132 -	
   1.133 -	prv->iocb_queue[prv->iocb_queued++] = io;
   1.134  
   1.135 -	return 0;
   1.136 +	return tap_aio_write(&prv->aio, prv->fd, size, offset, buf,
   1.137 +		cb, id, sector, private);
   1.138 +}
   1.139 +
   1.140 +int tdaio_submit(struct disk_driver *dd)
   1.141 +{
   1.142 +	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
   1.143 +
   1.144 +	return tap_aio_submit(&prv->aio);
   1.145  }
   1.146  			
   1.147 -int tdaio_submit(struct disk_driver *dd)
   1.148 -{
   1.149 -	int ret;
   1.150 -	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
   1.151 -
   1.152 -	if (!prv->iocb_queued)
   1.153 -		return 0;
   1.154 -
   1.155 -	ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);
   1.156 -	
   1.157 -	/* XXX: TODO: Handle error conditions here. */
   1.158 -	
   1.159 -	/* Success case: */
   1.160 -	prv->iocb_queued = 0;
   1.161 -	
   1.162 -	return 0;
   1.163 -}
   1.164 -
   1.165  int tdaio_close(struct disk_driver *dd)
   1.166  {
   1.167  	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
   1.168  	
   1.169 -	io_destroy(prv->aio_ctx.aio_ctx);
   1.170 +	io_destroy(prv->aio.aio_ctx.aio_ctx);
   1.171  	close(prv->fd);
   1.172  
   1.173  	return 0;
   1.174 @@ -290,26 +210,26 @@ int tdaio_do_callbacks(struct disk_drive
   1.175  	struct io_event *ep;
   1.176  	struct tdaio_state *prv = (struct tdaio_state *)dd->private;
   1.177  
   1.178 -	nr_events = tap_aio_get_events(&prv->aio_ctx);
   1.179 +	nr_events = tap_aio_get_events(&prv->aio.aio_ctx);
   1.180  repeat:
   1.181 -	for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
   1.182 +	for (ep = prv->aio.aio_events, i = nr_events; i-- > 0; ep++) {
   1.183  		struct iocb        *io  = ep->obj;
   1.184  		struct pending_aio *pio;
   1.185  		
   1.186 -		pio = &prv->pending_aio[(long)io->data];
   1.187 +		pio = &prv->aio.pending_aio[(long)io->data];
   1.188  		rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1,
   1.189 -			       pio->lsec, io->u.c.nbytes >> 9, 
   1.190 +			       pio->sector, io->u.c.nbytes >> 9, 
   1.191  			       pio->id, pio->private);
   1.192  
   1.193 -		prv->iocb_free[prv->iocb_free_count++] = io;
   1.194 +		prv->aio.iocb_free[prv->aio.iocb_free_count++] = io;
   1.195  	}
   1.196  
   1.197  	if (nr_events) {
   1.198 -		nr_events = tap_aio_more_events(&prv->aio_ctx);
   1.199 +		nr_events = tap_aio_more_events(&prv->aio.aio_ctx);
   1.200  		goto repeat;
   1.201  	}
   1.202  
   1.203 -	tap_aio_continue(&prv->aio_ctx);
   1.204 +	tap_aio_continue(&prv->aio.aio_ctx);
   1.205  
   1.206  	return rsp;
   1.207  }
     2.1 --- a/tools/blktap/drivers/block-qcow.c	Mon Feb 25 09:09:01 2008 +0000
     2.2 +++ b/tools/blktap/drivers/block-qcow.c	Mon Feb 25 09:12:20 2008 +0000
     2.3 @@ -59,15 +59,7 @@
     2.4          (l + (s - 1)) - ((l + (s - 1)) % s)); \
     2.5  })
     2.6  
     2.7 -struct pending_aio {
     2.8 -        td_callback_t cb;
     2.9 -        int id;
    2.10 -        void *private;
    2.11 -	int nb_sectors;
    2.12 -	char *buf;
    2.13 -	uint64_t sector;
    2.14 -};
    2.15 -
    2.16 +#undef IOCB_IDX
    2.17  #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
    2.18  
    2.19  #define ZERO_TEST(_b) (_b | 0x00)
    2.20 @@ -140,110 +132,19 @@ struct tdqcow_state {
    2.21  	uint32_t l2_cache_counts[L2_CACHE_SIZE];      /*Cache access record*/
    2.22  	uint8_t *cluster_cache;          
    2.23  	uint8_t *cluster_data;
    2.24 -	uint8_t *sector_lock;          /*Locking bitmap for AIO reads/writes*/
    2.25  	uint64_t cluster_cache_offset; /**/
    2.26  	uint32_t crypt_method;         /*current crypt method, 0 if no 
    2.27  					*key yet */
    2.28  	uint32_t crypt_method_header;  /**/
    2.29  	AES_KEY aes_encrypt_key;       /*AES key*/
    2.30  	AES_KEY aes_decrypt_key;       /*AES key*/
    2.31 -        /* libaio state */
    2.32 -        tap_aio_context_t   aio_ctx;
    2.33 -        int                 max_aio_reqs;
    2.34 -        struct iocb        *iocb_list;
    2.35 -        struct iocb       **iocb_free;
    2.36 -        struct pending_aio *pending_aio;
    2.37 -        int                 iocb_free_count;
    2.38 -        struct iocb       **iocb_queue;
    2.39 -        int                 iocb_queued;
    2.40 -        struct io_event    *aio_events;
    2.41 +        
    2.42 +	/* libaio state */
    2.43 +	tap_aio_context_t	aio;
    2.44  };
    2.45  
    2.46  static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
    2.47  
    2.48 -static void free_aio_state(struct disk_driver *dd)
    2.49 -{
    2.50 -        struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
    2.51 -
    2.52 -        if (s->sector_lock)
    2.53 -                free(s->sector_lock);
    2.54 -        if (s->iocb_list)
    2.55 -                free(s->iocb_list);
    2.56 -        if (s->pending_aio)
    2.57 -                free(s->pending_aio);
    2.58 -        if (s->aio_events)
    2.59 -                free(s->aio_events);
    2.60 -        if (s->iocb_free)
    2.61 -                free(s->iocb_free);
    2.62 -        if (s->iocb_queue)
    2.63 -                free(s->iocb_queue);
    2.64 -}
    2.65 -
    2.66 -static int init_aio_state(struct disk_driver *dd)
    2.67 -{
    2.68 -	int i, ret;
    2.69 -	struct td_state     *bs = dd->td_state;
    2.70 -	struct tdqcow_state  *s = (struct tdqcow_state *)dd->private;
    2.71 -        long     ioidx;
    2.72 -
    2.73 -        s->iocb_list = NULL;
    2.74 -        s->pending_aio = NULL;
    2.75 -        s->aio_events = NULL;
    2.76 -        s->iocb_free = NULL;
    2.77 -        s->iocb_queue = NULL;
    2.78 -
    2.79 -        /*Initialize Locking bitmap*/
    2.80 -	s->sector_lock = calloc(1, bs->size);
    2.81 -	
    2.82 -	if (!s->sector_lock) {
    2.83 -		DPRINTF("Failed to allocate sector lock\n");
    2.84 -		goto fail;
    2.85 -	}
    2.86 -
    2.87 -        /* A segment (i.e. a page) can span multiple clusters */
    2.88 -        s->max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
    2.89 -            MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
    2.90 -
    2.91 -        /* Initialize AIO */
    2.92 -        s->iocb_free_count = s->max_aio_reqs;
    2.93 -        s->iocb_queued     = 0;
    2.94 -
    2.95 -        if (!(s->iocb_list = malloc(sizeof(struct iocb) * s->max_aio_reqs)) ||
    2.96 -            !(s->pending_aio = malloc(sizeof(struct pending_aio) * s->max_aio_reqs)) ||
    2.97 -            !(s->aio_events = malloc(sizeof(struct io_event) * s->max_aio_reqs)) ||
    2.98 -            !(s->iocb_free = malloc(sizeof(struct iocb *) * s->max_aio_reqs)) ||
    2.99 -            !(s->iocb_queue = malloc(sizeof(struct iocb *) * s->max_aio_reqs))) {
   2.100 -                DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
   2.101 -                        s->max_aio_reqs);
   2.102 -                goto fail;
   2.103 -        }
   2.104 -
   2.105 -	ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
   2.106 -	if (ret < 0) {
   2.107 -                if (ret == -EAGAIN) {
   2.108 -                        DPRINTF("Couldn't setup AIO context.  If you are "
   2.109 -                                "trying to concurrently use a large number "
   2.110 -                                "of blktap-based disks, you may need to "
   2.111 -                                "increase the system-wide aio request limit. "
   2.112 -                                "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
   2.113 -                                "aio-max-nr')\n");
   2.114 -                } else {
   2.115 -                        DPRINTF("Couldn't setup AIO context.\n");
   2.116 -                }
   2.117 -		goto fail;
   2.118 -	}
   2.119 -
   2.120 -        for (i=0;i<s->max_aio_reqs;i++)
   2.121 -                s->iocb_free[i] = &s->iocb_list[i];
   2.122 -
   2.123 -        DPRINTF("AIO state initialised\n");
   2.124 -
   2.125 -        return 0;
   2.126 -
   2.127 - fail:
   2.128 -	return -1;
   2.129 -}
   2.130 -
   2.131  static uint32_t gen_cksum(char *ptr, int len)
   2.132  {
   2.133  	unsigned char *md;
   2.134 @@ -339,79 +240,6 @@ static int qcow_set_key(struct tdqcow_st
   2.135  	return 0;
   2.136  }
   2.137  
   2.138 -static int async_read(struct tdqcow_state *s, int size, 
   2.139 -		      uint64_t offset, char *buf, td_callback_t cb,
   2.140 -		      int id, uint64_t sector, void *private)
   2.141 -{
   2.142 -        struct   iocb *io;
   2.143 -        struct   pending_aio *pio;
   2.144 -	long     ioidx;
   2.145 -
   2.146 -        io = s->iocb_free[--s->iocb_free_count];
   2.147 -
   2.148 -        ioidx = IOCB_IDX(s, io);
   2.149 -        pio = &s->pending_aio[ioidx];
   2.150 -        pio->cb = cb;
   2.151 -        pio->id = id;
   2.152 -        pio->private = private;
   2.153 -	pio->nb_sectors = size/512;
   2.154 -	pio->buf = buf;
   2.155 -	pio->sector = sector;
   2.156 -
   2.157 -        io_prep_pread(io, s->fd, buf, size, offset);
   2.158 -        io->data = (void *)ioidx;
   2.159 -
   2.160 -        s->iocb_queue[s->iocb_queued++] = io;
   2.161 -
   2.162 -        return 1;
   2.163 -}
   2.164 -
   2.165 -static int async_write(struct tdqcow_state *s, int size,
   2.166 -		       uint64_t offset, char *buf, td_callback_t cb,
   2.167 -		       int id, uint64_t sector, void *private)
   2.168 -{
   2.169 -        struct   iocb *io;
   2.170 -        struct   pending_aio *pio;
   2.171 -	long     ioidx;
   2.172 -
   2.173 -        io = s->iocb_free[--s->iocb_free_count];
   2.174 -
   2.175 -        ioidx = IOCB_IDX(s, io);
   2.176 -        pio = &s->pending_aio[ioidx];
   2.177 -        pio->cb = cb;
   2.178 -        pio->id = id;
   2.179 -        pio->private = private;
   2.180 -	pio->nb_sectors = size/512;
   2.181 -	pio->buf = buf;
   2.182 -	pio->sector = sector;
   2.183 -
   2.184 -        io_prep_pwrite(io, s->fd, buf, size, offset);
   2.185 -        io->data = (void *)ioidx;
   2.186 -
   2.187 -        s->iocb_queue[s->iocb_queued++] = io;
   2.188 -
   2.189 -        return 1;
   2.190 -}
   2.191 -
   2.192 -/*TODO: Fix sector span!*/
   2.193 -static int aio_can_lock(struct tdqcow_state *s, uint64_t sector)
   2.194 -{
   2.195 -	return (s->sector_lock[sector] ? 0 : 1);
   2.196 -}
   2.197 -
   2.198 -static int aio_lock(struct tdqcow_state *s, uint64_t sector)
   2.199 -{
   2.200 -	return ++s->sector_lock[sector];
   2.201 -}
   2.202 -
   2.203 -static void aio_unlock(struct tdqcow_state *s, uint64_t sector)
   2.204 -{
   2.205 -	if (!s->sector_lock[sector]) return;
   2.206 -
   2.207 -	--s->sector_lock[sector];
   2.208 -	return;
   2.209 -}
   2.210 -
   2.211  /* 
   2.212   * The crypt function is compatible with the linux cryptoloop
   2.213   * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
   2.214 @@ -841,13 +669,14 @@ static inline void init_fds(struct disk_
   2.215  	for(i = 0; i < MAX_IOFD; i++) 
   2.216  		dd->io_fd[i] = 0;
   2.217  
   2.218 -	dd->io_fd[0] = s->aio_ctx.pollfd;
   2.219 +	dd->io_fd[0] = s->aio.aio_ctx.pollfd;
   2.220  }
   2.221  
   2.222  /* Open the disk file and initialize qcow state. */
   2.223  int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flags)
   2.224  {
   2.225  	int fd, len, i, shift, ret, size, l1_table_size, o_flags;
   2.226 +	int max_aio_reqs;
   2.227  	struct td_state     *bs = dd->td_state;
   2.228  	struct tdqcow_state *s  = (struct tdqcow_state *)dd->private;
   2.229  	char *buf;
   2.230 @@ -996,9 +825,14 @@ int tdqcow_open (struct disk_driver *dd,
   2.231  	}
   2.232  
   2.233   end_xenhdr:
   2.234 -	if (init_aio_state(dd)!=0) {
   2.235 + 	
   2.236 +	/* A segment (i.e. a page) can span multiple clusters */
   2.237 +	max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
   2.238 +		MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
   2.239 +
   2.240 +	if (tap_aio_init(&s->aio, bs->size, max_aio_reqs)!=0) {
   2.241  		DPRINTF("Unable to initialise AIO state\n");
   2.242 -                free_aio_state(dd);
   2.243 +                tap_aio_free(&s->aio);
   2.244  		goto fail;
   2.245  	}
   2.246  	init_fds(dd);
   2.247 @@ -1015,7 +849,7 @@ int tdqcow_open (struct disk_driver *dd,
   2.248  	
   2.249  fail:
   2.250  	DPRINTF("QCOW Open failed\n");
   2.251 -	free_aio_state(dd);
   2.252 +	tap_aio_free(&s->aio);
   2.253  	free(s->l1_table);
   2.254  	free(s->l2_cache);
   2.255  	free(s->cluster_cache);
   2.256 @@ -1037,7 +871,7 @@ int tdqcow_queue_read(struct disk_driver
   2.257  
   2.258  	/*Check we can get a lock*/
   2.259  	for (i = 0; i < nb_sectors; i++) 
   2.260 -		if (!aio_can_lock(s, sector + i)) 
   2.261 +		if (!tap_aio_can_lock(&s->aio, sector + i)) 
   2.262  			return cb(dd, -EBUSY, sector, nb_sectors, id, private);
   2.263  
   2.264  	/*We store a local record of the request*/
   2.265 @@ -1049,11 +883,11 @@ int tdqcow_queue_read(struct disk_driver
   2.266  		if (n > nb_sectors)
   2.267  			n = nb_sectors;
   2.268  
   2.269 -		if (s->iocb_free_count == 0 || !aio_lock(s, sector)) 
   2.270 +		if (s->aio.iocb_free_count == 0 || !tap_aio_lock(&s->aio, sector)) 
   2.271  			return cb(dd, -EBUSY, sector, nb_sectors, id, private);
   2.272  		
   2.273  		if(!cluster_offset) {
   2.274 -			aio_unlock(s, sector);
   2.275 +			tap_aio_unlock(&s->aio, sector);
   2.276  			ret = cb(dd, BLK_NOT_ALLOCATED, 
   2.277  				 sector, n, id, private);
   2.278  			if (ret == -EBUSY) {
   2.279 @@ -1064,7 +898,7 @@ int tdqcow_queue_read(struct disk_driver
   2.280  			} else
   2.281  				rsp += ret;
   2.282  		} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
   2.283 -			aio_unlock(s, sector);
   2.284 +			tap_aio_unlock(&s->aio, sector);
   2.285  			if (decompress_cluster(s, cluster_offset) < 0) {
   2.286  				rsp += cb(dd, -EIO, sector, 
   2.287  					  nb_sectors, id, private);
   2.288 @@ -1074,7 +908,7 @@ int tdqcow_queue_read(struct disk_driver
   2.289  			       512 * n);
   2.290  			rsp += cb(dd, 0, sector, n, id, private);
   2.291  		} else {
   2.292 -			async_read(s, n * 512, 
   2.293 +			tap_aio_read(&s->aio, s->fd, n * 512, 
   2.294  				   (cluster_offset + index_in_cluster * 512),
   2.295  				   buf, cb, id, sector, private);
   2.296  		}
   2.297 @@ -1099,7 +933,7 @@ int tdqcow_queue_write(struct disk_drive
   2.298  
   2.299  	/*Check we can get a lock*/
   2.300  	for (i = 0; i < nb_sectors; i++)
   2.301 -		if (!aio_can_lock(s, sector + i))  
   2.302 +		if (!tap_aio_can_lock(&s->aio, sector + i))  
   2.303  			return cb(dd, -EBUSY, sector, nb_sectors, id, private);
   2.304  		   
   2.305  	/*We store a local record of the request*/
   2.306 @@ -1109,7 +943,7 @@ int tdqcow_queue_write(struct disk_drive
   2.307  		if (n > nb_sectors)
   2.308  			n = nb_sectors;
   2.309  
   2.310 -		if (s->iocb_free_count == 0 || !aio_lock(s, sector))
   2.311 +		if (s->aio.iocb_free_count == 0 || !tap_aio_lock(&s->aio, sector))
   2.312  			return cb(dd, -EBUSY, sector, nb_sectors, id, private);
   2.313  
   2.314  		cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
   2.315 @@ -1117,7 +951,7 @@ int tdqcow_queue_write(struct disk_drive
   2.316  						    index_in_cluster+n);
   2.317  		if (!cluster_offset) {
   2.318  			DPRINTF("Ooops, no write cluster offset!\n");
   2.319 -			aio_unlock(s, sector);
   2.320 +			tap_aio_unlock(&s->aio, sector);
   2.321  			return cb(dd, -EIO, sector, nb_sectors, id, private);
   2.322  		}
   2.323  
   2.324 @@ -1125,12 +959,12 @@ int tdqcow_queue_write(struct disk_drive
   2.325  			encrypt_sectors(s, sector, s->cluster_data, 
   2.326  					(unsigned char *)buf, n, 1,
   2.327  					&s->aes_encrypt_key);
   2.328 -			async_write(s, n * 512, 
   2.329 +			tap_aio_write(&s->aio, s->fd, n * 512, 
   2.330  				    (cluster_offset + index_in_cluster*512),
   2.331  				    (char *)s->cluster_data, cb, id, sector, 
   2.332  				    private);
   2.333  		} else {
   2.334 -			async_write(s, n * 512, 
   2.335 +			tap_aio_write(&s->aio, s->fd, n * 512, 
   2.336  				    (cluster_offset + index_in_cluster*512),
   2.337  				    buf, cb, id, sector, private);
   2.338  		}
   2.339 @@ -1146,20 +980,9 @@ int tdqcow_queue_write(struct disk_drive
   2.340   		
   2.341  int tdqcow_submit(struct disk_driver *dd)
   2.342  {
   2.343 -        int ret;
   2.344 -        struct   tdqcow_state *prv = (struct tdqcow_state *)dd->private;
   2.345 -
   2.346 -	if (!prv->iocb_queued)
   2.347 -		return 0;
   2.348 +        struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;
   2.349  
   2.350 -	ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);
   2.351 -
   2.352 -        /* XXX: TODO: Handle error conditions here. */
   2.353 -
   2.354 -        /* Success case: */
   2.355 -        prv->iocb_queued = 0;
   2.356 -
   2.357 -        return 0;
   2.358 +	return tap_aio_submit(&prv->aio);
   2.359  }
   2.360  
   2.361  int tdqcow_close(struct disk_driver *dd)
   2.362 @@ -1180,7 +1003,7 @@ int tdqcow_close(struct disk_driver *dd)
   2.363  		close(fd);
   2.364  	}
   2.365  
   2.366 -	io_destroy(s->aio_ctx.aio_ctx);
   2.367 +	io_destroy(s->aio.aio_ctx.aio_ctx);
   2.368  	free(s->name);
   2.369  	free(s->l1_table);
   2.370  	free(s->l2_cache);
   2.371 @@ -1198,15 +1021,15 @@ int tdqcow_do_callbacks(struct disk_driv
   2.372  
   2.373          if (sid > MAX_IOFD) return 1;
   2.374  
   2.375 -        nr_events = tap_aio_get_events(&prv->aio_ctx);
   2.376 +        nr_events = tap_aio_get_events(&prv->aio.aio_ctx);
   2.377  repeat:
   2.378 -        for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
   2.379 +        for (ep = prv->aio.aio_events, i = nr_events; i-- > 0; ep++) {
   2.380                  struct iocb        *io  = ep->obj;
   2.381                  struct pending_aio *pio;
   2.382  
   2.383 -                pio = &prv->pending_aio[(long)io->data];
   2.384 +                pio = &prv->aio.pending_aio[(long)io->data];
   2.385  
   2.386 -		aio_unlock(prv, pio->sector);
   2.387 +		tap_aio_unlock(&prv->aio, pio->sector);
   2.388  
   2.389  		if (prv->crypt_method)
   2.390  			encrypt_sectors(prv, pio->sector, 
   2.391 @@ -1219,15 +1042,15 @@ repeat:
   2.392  			       pio->sector, pio->nb_sectors,
   2.393  			       pio->id, pio->private);
   2.394  
   2.395 -                prv->iocb_free[prv->iocb_free_count++] = io;
   2.396 +                prv->aio.iocb_free[prv->aio.iocb_free_count++] = io;
   2.397          }
   2.398  
   2.399          if (nr_events) {
   2.400 -                nr_events = tap_aio_more_events(&prv->aio_ctx);
   2.401 +                nr_events = tap_aio_more_events(&prv->aio.aio_ctx);
   2.402                  goto repeat;
   2.403          }
   2.404  
   2.405 -        tap_aio_continue(&prv->aio_ctx);
   2.406 +        tap_aio_continue(&prv->aio.aio_ctx);
   2.407  
   2.408          return rsp;
   2.409  }
     3.1 --- a/tools/blktap/drivers/block-qcow2.c	Mon Feb 25 09:09:01 2008 +0000
     3.2 +++ b/tools/blktap/drivers/block-qcow2.c	Mon Feb 25 09:12:20 2008 +0000
     3.3 @@ -145,20 +145,7 @@ typedef struct BDRVQcowState {
     3.4  
     3.5  	int64_t total_sectors;
     3.6  
     3.7 -
     3.8 -	struct {
     3.9 -		tap_aio_context_t    aio_ctx;
    3.10 -		int                  max_aio_reqs;
    3.11 -		struct iocb         *iocb_list;
    3.12 -		struct iocb        **iocb_free;
    3.13 -		struct pending_aio  *pending_aio;
    3.14 -		int                  iocb_free_count;
    3.15 -		struct iocb        **iocb_queue;
    3.16 -		int	             iocb_queued;
    3.17 -		struct io_event     *aio_events;
    3.18 -		
    3.19 -		uint8_t *sector_lock;		   /*Locking bitmap for AIO reads/writes*/
    3.20 -	} async;
    3.21 +	tap_aio_context_t async;
    3.22  
    3.23  	/* Original qemu variables */
    3.24  	int cluster_bits;
    3.25 @@ -222,9 +209,6 @@ static void free_clusters(struct disk_dr
    3.26  static void check_refcounts(struct disk_driver *bs);
    3.27  #endif
    3.28  
    3.29 -static int init_aio_state(struct disk_driver *bs);
    3.30 -static void free_aio_state(struct disk_driver *bs);
    3.31 -
    3.32  static int qcow_sync_read(struct disk_driver *dd, uint64_t sector,
    3.33  		int nb_sectors, char *buf, td_callback_t cb,
    3.34  		int id, void *prv);
    3.35 @@ -309,7 +293,7 @@ static int qcow_probe(const uint8_t *buf
    3.36  static int qcow_open(struct disk_driver *bs, const char *filename, td_flag_t flags)
    3.37  {
    3.38  	BDRVQcowState *s = bs->private;
    3.39 -	int len, i, shift, ret;
    3.40 +	int len, i, shift, ret, max_aio_reqs;
    3.41  	QCowHeader header;
    3.42  
    3.43  	int fd, o_flags;
    3.44 @@ -475,9 +459,14 @@ static int qcow_open(struct disk_driver 
    3.45  
    3.46  #ifdef USE_AIO
    3.47  	/* Initialize AIO */
    3.48 -	if (init_aio_state(bs)!=0) {
    3.49 +
    3.50 +	/* A segment (i.e. a page) can span multiple clusters */
    3.51 +	max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
    3.52 +		MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
    3.53 +
    3.54 +	if (tap_aio_init(&s->async, bs->td_state->size, max_aio_reqs)) {
    3.55  		DPRINTF("Unable to initialise AIO state\n");
    3.56 -		free_aio_state(bs);
    3.57 +		tap_aio_free(&s->async);
    3.58  		goto fail;
    3.59  	}
    3.60  
    3.61 @@ -496,7 +485,7 @@ static int qcow_open(struct disk_driver 
    3.62  	DPRINTF("qcow_open failed\n");
    3.63  
    3.64  #ifdef USE_AIO	
    3.65 -	free_aio_state(bs);
    3.66 +	tap_aio_free(&s->async);
    3.67  #endif
    3.68  
    3.69  	qcow_free_snapshots(bs);
    3.70 @@ -1070,200 +1059,6 @@ static int qcow_write(struct disk_driver
    3.71  #ifdef USE_AIO
    3.72  
    3.73  /*
    3.74 - * General AIO helper functions
    3.75 - */
    3.76 -
    3.77 -#define IOCB_IDX(_s, _io) ((_io) - (_s)->async.iocb_list)
    3.78 -
    3.79 -struct pending_aio {
    3.80 -	td_callback_t cb;
    3.81 -	int id;
    3.82 -	void *private;
    3.83 -	int nb_sectors;
    3.84 -	char *buf;
    3.85 -	uint64_t sector;
    3.86 -};
    3.87 -
    3.88 -
    3.89 -static int init_aio_state(struct disk_driver *dd)
    3.90 -{
    3.91 -	int i, ret;
    3.92 -	struct td_state *bs = dd->td_state;
    3.93 -	struct BDRVQcowState *s = (struct BDRVQcowState*) dd->private;
    3.94 -	long ioidx;
    3.95 -
    3.96 -	s->async.iocb_list = NULL;
    3.97 -	s->async.pending_aio = NULL;
    3.98 -	s->async.aio_events = NULL;
    3.99 -	s->async.iocb_free = NULL;
   3.100 -	s->async.iocb_queue = NULL;
   3.101 -
   3.102 -	/*Initialize Locking bitmap*/
   3.103 -	s->async.sector_lock = calloc(1, bs->size);
   3.104 -		
   3.105 -	if (!s->async.sector_lock) {
   3.106 -		DPRINTF("Failed to allocate sector lock\n");
   3.107 -		goto fail;
   3.108 -	}
   3.109 -
   3.110 -	/* A segment (i.e. a page) can span multiple clusters */
   3.111 -	s->async.max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
   3.112 -		MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
   3.113 -
   3.114 -	/* Initialize AIO */
   3.115 -	s->async.iocb_free_count = s->async.max_aio_reqs;
   3.116 -	s->async.iocb_queued	 = 0;
   3.117 -
   3.118 -	if (!(s->async.iocb_list = malloc(sizeof(struct iocb) * s->async.max_aio_reqs)) ||
   3.119 -		!(s->async.pending_aio = malloc(sizeof(struct pending_aio) * s->async.max_aio_reqs)) ||
   3.120 -		!(s->async.aio_events = malloc(sizeof(struct io_event) * s->async.max_aio_reqs)) ||
   3.121 -		!(s->async.iocb_free = malloc(sizeof(struct iocb *) * s->async.max_aio_reqs)) ||
   3.122 -		!(s->async.iocb_queue = malloc(sizeof(struct iocb *) * s->async.max_aio_reqs))) 
   3.123 -	{
   3.124 -		DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
   3.125 -				s->async.max_aio_reqs);
   3.126 -		goto fail;
   3.127 -	}
   3.128 -
   3.129 -	ret = tap_aio_setup(&s->async.aio_ctx, s->async.aio_events, s->async.max_aio_reqs);
   3.130 -	if (ret < 0) {
   3.131 -		if (ret == -EAGAIN) {
   3.132 -			DPRINTF("Couldn't setup AIO context.  If you are "
   3.133 -				"trying to concurrently use a large number "
   3.134 -				"of blktap-based disks, you may need to "
   3.135 -				"increase the system-wide aio request limit. "
   3.136 -				"(e.g. 'echo echo 1048576 > /proc/sys/fs/"
   3.137 -				"aio-max-nr')\n");
   3.138 -		} else {
   3.139 -			DPRINTF("Couldn't setup AIO context.\n");
   3.140 -		}
   3.141 -		goto fail;
   3.142 -	}
   3.143 -
   3.144 -	for (i=0;i<s->async.max_aio_reqs;i++)
   3.145 -			s->async.iocb_free[i] = &s->async.iocb_list[i];
   3.146 -
   3.147 -	DPRINTF("AIO state initialised\n");
   3.148 -
   3.149 -	return 0;
   3.150 -
   3.151 -fail:
   3.152 -	return -1;
   3.153 -}
   3.154 -
   3.155 -static void free_aio_state(struct disk_driver *dd)
   3.156 -{
   3.157 -	struct BDRVQcowState *s = (struct BDRVQcowState*) dd->private;
   3.158 -
   3.159 -	if (s->async.sector_lock)
   3.160 -		free(s->async.sector_lock);
   3.161 -	if (s->async.iocb_list)
   3.162 -		free(s->async.iocb_list);
   3.163 -	if (s->async.pending_aio)
   3.164 -		free(s->async.pending_aio);
   3.165 -	if (s->async.aio_events)
   3.166 -		free(s->async.aio_events);
   3.167 -	if (s->async.iocb_free)
   3.168 -		free(s->async.iocb_free);
   3.169 -	if (s->async.iocb_queue)
   3.170 -		free(s->async.iocb_queue);
   3.171 -}
   3.172 -
   3.173 -static int async_read(struct BDRVQcowState *s, int size, 
   3.174 -		uint64_t offset, char *buf, td_callback_t cb,
   3.175 -		int id, uint64_t sector, void *private)
   3.176 -{
   3.177 -	struct	 iocb *io;
   3.178 -	struct	 pending_aio *pio;
   3.179 -	long	 ioidx;
   3.180 -
   3.181 -	io = s->async.iocb_free[--s->async.iocb_free_count];
   3.182 -
   3.183 -	ioidx = IOCB_IDX(s, io);
   3.184 -	pio = &s->async.pending_aio[ioidx];
   3.185 -	pio->cb = cb;
   3.186 -	pio->id = id;
   3.187 -	pio->private = private;
   3.188 -	pio->nb_sectors = size/512;
   3.189 -	pio->buf = buf;
   3.190 -	pio->sector = sector;
   3.191 -
   3.192 -	io_prep_pread(io, s->fd, buf, size, offset);
   3.193 -	io->data = (void *)ioidx;
   3.194 -
   3.195 -	s->async.iocb_queue[s->async.iocb_queued++] = io;
   3.196 -
   3.197 -	return 1;
   3.198 -}
   3.199 -
   3.200 -static int async_write(struct BDRVQcowState *s, int size,
   3.201 -		uint64_t offset, char *buf, td_callback_t cb,
   3.202 -		int id, uint64_t sector, void *private)
   3.203 -{
   3.204 -	struct	 iocb *io;
   3.205 -	struct	 pending_aio *pio;
   3.206 -	long	 ioidx;
   3.207 -
   3.208 -	io = s->async.iocb_free[--s->async.iocb_free_count];
   3.209 -
   3.210 -	ioidx = IOCB_IDX(s, io);
   3.211 -	pio = &s->async.pending_aio[ioidx];
   3.212 -	pio->cb = cb;
   3.213 -	pio->id = id;
   3.214 -	pio->private = private;
   3.215 -	pio->nb_sectors = size/512;
   3.216 -	pio->buf = buf;
   3.217 -	pio->sector = sector;
   3.218 -
   3.219 -	io_prep_pwrite(io, s->fd, buf, size, offset);
   3.220 -	io->data = (void *)ioidx;
   3.221 -
   3.222 -	s->async.iocb_queue[s->async.iocb_queued++] = io;
   3.223 -
   3.224 -	return 1;
   3.225 -}
   3.226 -
   3.227 -static int async_submit(struct disk_driver *dd)
   3.228 -{
   3.229 -	int ret;
   3.230 -	struct BDRVQcowState *prv = (struct BDRVQcowState*) dd->private;
   3.231 -
   3.232 -	if (!prv->async.iocb_queued)
   3.233 -		return 0;
   3.234 -
   3.235 -	ret = io_submit(prv->async.aio_ctx.aio_ctx, prv->async.iocb_queued, prv->async.iocb_queue);
   3.236 -
   3.237 -	/* XXX: TODO: Handle error conditions here. */
   3.238 -
   3.239 -	/* Success case: */
   3.240 -	prv->async.iocb_queued = 0;
   3.241 -
   3.242 -	return 0;
   3.243 -}
   3.244 -
   3.245 -/*TODO: Fix sector span!*/
   3.246 -static int aio_can_lock(struct BDRVQcowState *s, uint64_t sector)
   3.247 -{
   3.248 -	return (s->async.sector_lock[sector] ? 0 : 1);
   3.249 -}
   3.250 -
   3.251 -static int aio_lock(struct BDRVQcowState *s, uint64_t sector)
   3.252 -{
   3.253 -	return ++s->async.sector_lock[sector];
   3.254 -}
   3.255 -
   3.256 -static void aio_unlock(struct BDRVQcowState *s, uint64_t sector)
   3.257 -{
   3.258 -	if (!s->async.sector_lock[sector]) return;
   3.259 -
   3.260 -	--s->async.sector_lock[sector];
   3.261 -	return;
   3.262 -}
   3.263 -
   3.264 -
   3.265 -
   3.266 -
   3.267 -/*
   3.268   * QCOW2 specific AIO functions
   3.269   */
   3.270  
   3.271 @@ -1278,7 +1073,7 @@ static int qcow_queue_read(struct disk_d
   3.272  
   3.273  	/*Check we can get a lock*/
   3.274  	for (i = 0; i < nb_sectors; i++) 
   3.275 -		if (!aio_can_lock(s, sector + i)) 
   3.276 +		if (!tap_aio_can_lock(&s->async, sector + i)) 
   3.277  			return cb(bs, -EBUSY, sector, nb_sectors, id, private);
   3.278  
   3.279  	while (nb_sectors > 0) {
   3.280 @@ -1290,13 +1085,13 @@ static int qcow_queue_read(struct disk_d
   3.281  		if (n > nb_sectors)
   3.282  			n = nb_sectors;
   3.283  
   3.284 -		if (s->async.iocb_free_count == 0 || !aio_lock(s, sector)) 
   3.285 +		if (s->async.iocb_free_count == 0 || !tap_aio_lock(&s->async, sector)) 
   3.286  			return cb(bs, -EBUSY, sector, nb_sectors, id, private);
   3.287  
   3.288  		if (!cluster_offset) {
   3.289  
   3.290  			/* The requested sector is not allocated */
   3.291 -			aio_unlock(s, sector);
   3.292 +			tap_aio_unlock(&s->async, sector);
   3.293  			ret = cb(bs, BLK_NOT_ALLOCATED, 
   3.294  					sector, n, id, private);
   3.295  			if (ret == -EBUSY) {
   3.296 @@ -1311,7 +1106,7 @@ static int qcow_queue_read(struct disk_d
   3.297  		} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
   3.298  
   3.299  			/* sync read for compressed clusters */
   3.300 -			aio_unlock(s, sector);
   3.301 +			tap_aio_unlock(&s->async, sector);
   3.302  			if (decompress_cluster(s, cluster_offset) < 0) {
   3.303  				rsp += cb(bs, -EIO, sector, nb_sectors, id, private);
   3.304  				goto done;
   3.305 @@ -1323,7 +1118,7 @@ static int qcow_queue_read(struct disk_d
   3.306  		} else {
   3.307  
   3.308  			/* async read */
   3.309 -			async_read(s, n * 512, 
   3.310 +			tap_aio_read(&s->async, s->fd, n * 512, 
   3.311  					(cluster_offset + index_in_cluster * 512),
   3.312  					buf, cb, id, sector, private);
   3.313  		}
   3.314 @@ -1351,7 +1146,7 @@ static int qcow_queue_write(struct disk_
   3.315  	
   3.316  	/*Check we can get a lock*/
   3.317  	for (i = 0; i < nb_sectors; i++) 
   3.318 -		if (!aio_can_lock(s, sector + i)) 
   3.319 +		if (!tap_aio_can_lock(&s->async, sector + i)) 
   3.320  			return cb(bs, -EBUSY, sector, nb_sectors, id, private);
   3.321  
   3.322  
   3.323 @@ -1362,7 +1157,7 @@ static int qcow_queue_write(struct disk_
   3.324  		if (n > nb_sectors)
   3.325  			n = nb_sectors;
   3.326  
   3.327 -		if (s->async.iocb_free_count == 0 || !aio_lock(s, sector))
   3.328 +		if (s->async.iocb_free_count == 0 || !tap_aio_lock(&s->async, sector))
   3.329  			return cb(bs, -EBUSY, sector, nb_sectors, id, private);
   3.330  
   3.331  
   3.332 @@ -1372,14 +1167,14 @@ static int qcow_queue_write(struct disk_
   3.333  
   3.334  		if (!cluster_offset) {
   3.335  			DPRINTF("Ooops, no write cluster offset!\n");
   3.336 -			aio_unlock(s, sector);
   3.337 +			tap_aio_unlock(&s->async, sector);
   3.338  			return cb(bs, -EIO, sector, nb_sectors, id, private);
   3.339  		}
   3.340  
   3.341  
   3.342  		// TODO Encryption
   3.343  
   3.344 -		async_write(s, n * 512, 
   3.345 +		tap_aio_write(&s->async, s->fd, n * 512, 
   3.346  				(cluster_offset + index_in_cluster*512),
   3.347  				buf, cb, id, sector, private);
   3.348  
   3.349 @@ -1402,9 +1197,14 @@ static int qcow_queue_write(struct disk_
   3.350  static int qcow_close(struct disk_driver *bs)
   3.351  {
   3.352  	BDRVQcowState *s = bs->private;
   3.353 -		
   3.354 +	
   3.355 +#ifdef USE_AIO	
   3.356 +	io_destroy(s->async.aio_ctx.aio_ctx);
   3.357 +	tap_aio_free(&s->async);
   3.358 +#else		
   3.359  	close(s->poll_pipe[0]);
   3.360 -		close(s->poll_pipe[1]);
   3.361 +	close(s->poll_pipe[1]);
   3.362 +#endif		
   3.363  
   3.364  	qemu_free(s->l1_table);
   3.365  	qemu_free(s->l2_cache);
   3.366 @@ -1606,23 +1406,10 @@ static int qcow_write_compressed(struct 
   3.367  
   3.368  static int qcow_submit(struct disk_driver *bs)
   3.369  {
   3.370 -	int ret;
   3.371 -	struct	 BDRVQcowState *prv = (struct BDRVQcowState*)bs->private;
   3.372 -
   3.373 -
   3.374 -	fsync(prv->fd);
   3.375 +	struct BDRVQcowState *s = (struct BDRVQcowState*) bs->private;
   3.376  
   3.377 -	if (!prv->async.iocb_queued)
   3.378 -		return 0;
   3.379 -
   3.380 -	ret = io_submit(prv->async.aio_ctx.aio_ctx, prv->async.iocb_queued, prv->async.iocb_queue);
   3.381 -
   3.382 -	/* XXX: TODO: Handle error conditions here. */
   3.383 -
   3.384 -	/* Success case: */
   3.385 -	prv->async.iocb_queued = 0;
   3.386 -
   3.387 -	return 0;
   3.388 +	fsync(s->fd);
   3.389 +	return tap_aio_submit(&s->async);
   3.390  }
   3.391  
   3.392  
   3.393 @@ -2246,7 +2033,7 @@ repeat:
   3.394  
   3.395  		pio = &prv->async.pending_aio[(long)io->data];
   3.396  
   3.397 -		aio_unlock(prv, pio->sector);
   3.398 +		tap_aio_unlock(&prv->async, pio->sector);
   3.399  
   3.400  		if (prv->crypt_method)
   3.401  			encrypt_sectors(prv, pio->sector, 
     4.1 --- a/tools/blktap/drivers/tapaio.c	Mon Feb 25 09:09:01 2008 +0000
     4.2 +++ b/tools/blktap/drivers/tapaio.c	Mon Feb 25 09:12:20 2008 +0000
     4.3 @@ -32,6 +32,7 @@
     4.4  #include <unistd.h>
     4.5  #include <errno.h>
     4.6  #include <string.h>
     4.7 +#include <stdlib.h>
     4.8  
     4.9  /**
    4.10   * We used a kernel patch to return an fd associated with the AIO context
    4.11 @@ -62,7 +63,7 @@
    4.12  static void *
    4.13  tap_aio_completion_thread(void *arg)
    4.14  {
    4.15 -	tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
    4.16 +	tap_aio_internal_context_t *ctx = (tap_aio_internal_context_t *) arg;
    4.17  	int command;
    4.18  	int nr_events;
    4.19  	int rc;
    4.20 @@ -84,7 +85,7 @@ tap_aio_completion_thread(void *arg)
    4.21  }
    4.22  
    4.23  void
    4.24 -tap_aio_continue(tap_aio_context_t *ctx)
    4.25 +tap_aio_continue(tap_aio_internal_context_t *ctx)
    4.26  {
    4.27          int cmd = 0;
    4.28  
    4.29 @@ -95,8 +96,8 @@ tap_aio_continue(tap_aio_context_t *ctx)
    4.30                  DPRINTF("Cannot write to command pipe\n");
    4.31  }
    4.32  
    4.33 -int
    4.34 -tap_aio_setup(tap_aio_context_t *ctx,
    4.35 +static int
    4.36 +tap_aio_setup(tap_aio_internal_context_t *ctx,
    4.37                struct io_event *aio_events,
    4.38                int max_aio_events)
    4.39  {
    4.40 @@ -144,7 +145,7 @@ tap_aio_setup(tap_aio_context_t *ctx,
    4.41  }
    4.42  
    4.43  int
    4.44 -tap_aio_get_events(tap_aio_context_t *ctx)
    4.45 +tap_aio_get_events(tap_aio_internal_context_t *ctx)
    4.46  {
    4.47          int nr_events = 0;
    4.48  
    4.49 @@ -171,10 +172,185 @@ tap_aio_get_events(tap_aio_context_t *ct
    4.50          return nr_events;
    4.51  }
    4.52  
    4.53 -int tap_aio_more_events(tap_aio_context_t *ctx)
    4.54 +int tap_aio_more_events(tap_aio_internal_context_t *ctx)
    4.55  {
    4.56          return io_getevents(ctx->aio_ctx, 0,
    4.57                              ctx->max_aio_events, ctx->aio_events, NULL);
    4.58  }
    4.59  
    4.60 +int tap_aio_init(tap_aio_context_t *ctx, uint64_t sectors,
    4.61 +		int max_aio_reqs)
    4.62 +{
    4.63 +	int i, ret;
    4.64 +	long ioidx;
    4.65  
    4.66 +	ctx->iocb_list = NULL;
    4.67 +	ctx->pending_aio = NULL;
    4.68 +	ctx->aio_events = NULL;
    4.69 +	ctx->iocb_free = NULL;
    4.70 +	ctx->iocb_queue = NULL;
    4.71 +
    4.72 +	/*Initialize Locking bitmap*/
    4.73 +	ctx->sector_lock = calloc(1, sectors);
    4.74 +		
    4.75 +	if (!ctx->sector_lock) {
    4.76 +		DPRINTF("Failed to allocate sector lock\n");
    4.77 +		goto fail;
    4.78 +	}
    4.79 +
    4.80 +
    4.81 +	/* Initialize AIO */
    4.82 +	ctx->max_aio_reqs = max_aio_reqs;
    4.83 +	ctx->iocb_free_count = ctx->max_aio_reqs;
    4.84 +	ctx->iocb_queued	 = 0;
    4.85 +
    4.86 +	if (!(ctx->iocb_list = malloc(sizeof(struct iocb) * ctx->max_aio_reqs)) ||
    4.87 +		!(ctx->pending_aio = malloc(sizeof(struct pending_aio) * ctx->max_aio_reqs)) ||
    4.88 +		!(ctx->aio_events = malloc(sizeof(struct io_event) * ctx->max_aio_reqs)) ||
    4.89 +		!(ctx->iocb_free = malloc(sizeof(struct iocb *) * ctx->max_aio_reqs)) ||
    4.90 +		!(ctx->iocb_queue = malloc(sizeof(struct iocb *) * ctx->max_aio_reqs))) 
    4.91 +	{
    4.92 +		DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
    4.93 +				ctx->max_aio_reqs);
    4.94 +		goto fail;
    4.95 +	}
    4.96 +
    4.97 +	ret = tap_aio_setup(&ctx->aio_ctx, ctx->aio_events, ctx->max_aio_reqs);
    4.98 +	if (ret < 0) {
    4.99 +		if (ret == -EAGAIN) {
   4.100 +			DPRINTF("Couldn't setup AIO context.  If you are "
   4.101 +				"trying to concurrently use a large number "
   4.102 +				"of blktap-based disks, you may need to "
   4.103 +				"increase the system-wide aio request limit. "
   4.104 +				"(e.g. 'echo echo 1048576 > /proc/sys/fs/"
   4.105 +				"aio-max-nr')\n");
   4.106 +		} else {
   4.107 +			DPRINTF("Couldn't setup AIO context.\n");
   4.108 +		}
   4.109 +		goto fail;
   4.110 +	}
   4.111 +
   4.112 +	for (i=0;i<ctx->max_aio_reqs;i++)
   4.113 +		ctx->iocb_free[i] = &ctx->iocb_list[i];
   4.114 +
   4.115 +	DPRINTF("AIO state initialised\n");
   4.116 +
   4.117 +	return 0;
   4.118 +
   4.119 +fail:
   4.120 +	return -1;
   4.121 +}
   4.122 +
   4.123 +void tap_aio_free(tap_aio_context_t *ctx)
   4.124 +{
   4.125 +	if (ctx->sector_lock)
   4.126 +		free(ctx->sector_lock);
   4.127 +	if (ctx->iocb_list)
   4.128 +		free(ctx->iocb_list);
   4.129 +	if (ctx->pending_aio)
   4.130 +		free(ctx->pending_aio);
   4.131 +	if (ctx->aio_events)
   4.132 +		free(ctx->aio_events);
   4.133 +	if (ctx->iocb_free)
   4.134 +		free(ctx->iocb_free);
   4.135 +	if (ctx->iocb_queue)
   4.136 +		free(ctx->iocb_queue);
   4.137 +}
   4.138 +
   4.139 +/*TODO: Fix sector span!*/
   4.140 +int tap_aio_can_lock(tap_aio_context_t *ctx, uint64_t sector)
   4.141 +{
   4.142 +	return (ctx->sector_lock[sector] ? 0 : 1);
   4.143 +}
   4.144 +
   4.145 +int tap_aio_lock(tap_aio_context_t *ctx, uint64_t sector)
   4.146 +{
   4.147 +	return ++ctx->sector_lock[sector];
   4.148 +}
   4.149 +
   4.150 +void tap_aio_unlock(tap_aio_context_t *ctx, uint64_t sector)
   4.151 +{
   4.152 +	if (!ctx->sector_lock[sector]) return;
   4.153 +
   4.154 +	--ctx->sector_lock[sector];
   4.155 +	return;
   4.156 +}
   4.157 +
   4.158 +
   4.159 +int tap_aio_read(tap_aio_context_t *ctx, int fd, int size, 
   4.160 +		uint64_t offset, char *buf, td_callback_t cb,
   4.161 +		int id, uint64_t sector, void *private)
   4.162 +{
   4.163 +	struct	 iocb *io;
   4.164 +	struct	 pending_aio *pio;
   4.165 +	long	 ioidx;
   4.166 +
   4.167 +	if (ctx->iocb_free_count == 0)
   4.168 +		return -ENOMEM;
   4.169 +
   4.170 +	io = ctx->iocb_free[--ctx->iocb_free_count];
   4.171 +
   4.172 +	ioidx = IOCB_IDX(ctx, io);
   4.173 +	pio = &ctx->pending_aio[ioidx];
   4.174 +	pio->cb = cb;
   4.175 +	pio->id = id;
   4.176 +	pio->private = private;
   4.177 +	pio->nb_sectors = size/512;
   4.178 +	pio->buf = buf;
   4.179 +	pio->sector = sector;
   4.180 +
   4.181 +	io_prep_pread(io, fd, buf, size, offset);
   4.182 +	io->data = (void *)ioidx;
   4.183 +
   4.184 +	ctx->iocb_queue[ctx->iocb_queued++] = io;
   4.185 +
   4.186 +	return 0;
   4.187 +}
   4.188 +
   4.189 +int tap_aio_write(tap_aio_context_t *ctx, int fd, int size,
   4.190 +		uint64_t offset, char *buf, td_callback_t cb,
   4.191 +		int id, uint64_t sector, void *private)
   4.192 +{
   4.193 +	struct	 iocb *io;
   4.194 +	struct	 pending_aio *pio;
   4.195 +	long	 ioidx;
   4.196 +
   4.197 +	if (ctx->iocb_free_count == 0)
   4.198 +		return -ENOMEM;
   4.199 +
   4.200 +	io = ctx->iocb_free[--ctx->iocb_free_count];
   4.201 +
   4.202 +	ioidx = IOCB_IDX(ctx, io);
   4.203 +	pio = &ctx->pending_aio[ioidx];
   4.204 +	pio->cb = cb;
   4.205 +	pio->id = id;
   4.206 +	pio->private = private;
   4.207 +	pio->nb_sectors = size/512;
   4.208 +	pio->buf = buf;
   4.209 +	pio->sector = sector;
   4.210 +
   4.211 +	io_prep_pwrite(io, fd, buf, size, offset);
   4.212 +	io->data = (void *)ioidx;
   4.213 +
   4.214 +	ctx->iocb_queue[ctx->iocb_queued++] = io;
   4.215 +
   4.216 +	return 0;
   4.217 +}
   4.218 +
   4.219 +int tap_aio_submit(tap_aio_context_t *ctx)
   4.220 +{
   4.221 +	int ret;
   4.222 +
   4.223 +	if (!ctx->iocb_queued)
   4.224 +		return 0;
   4.225 +
   4.226 +	ret = io_submit(ctx->aio_ctx.aio_ctx, ctx->iocb_queued, ctx->iocb_queue);
   4.227 +
   4.228 +	/* XXX: TODO: Handle error conditions here. */
   4.229 +
   4.230 +	/* Success case: */
   4.231 +	ctx->iocb_queued = 0;
   4.232 +
   4.233 +	return 0;
   4.234 +}
   4.235 +
     5.1 --- a/tools/blktap/drivers/tapaio.h	Mon Feb 25 09:09:01 2008 +0000
     5.2 +++ b/tools/blktap/drivers/tapaio.h	Mon Feb 25 09:12:20 2008 +0000
     5.3 @@ -32,8 +32,13 @@
     5.4  
     5.5  #include <pthread.h>
     5.6  #include <libaio.h>
     5.7 +#include <stdint.h>
     5.8  
     5.9 -struct tap_aio_context {
    5.10 +#include "tapdisk.h"
    5.11 +
    5.12 +#define IOCB_IDX(_ctx, _io) ((_io) - (_ctx)->iocb_list)
    5.13 +
    5.14 +struct tap_aio_internal_context {
    5.15          io_context_t     aio_ctx;
    5.16  
    5.17          struct io_event *aio_events;
    5.18 @@ -45,14 +50,59 @@ struct tap_aio_context {
    5.19          int              pollfd;
    5.20          unsigned int     poll_in_thread : 1;
    5.21  };
    5.22 +	
    5.23 +
    5.24 +typedef struct tap_aio_internal_context tap_aio_internal_context_t;
    5.25 +
    5.26 +
    5.27 +struct pending_aio {
    5.28 +	td_callback_t cb;
    5.29 +	int id;
    5.30 +	void *private;
    5.31 +	int nb_sectors;
    5.32 +	char *buf;
    5.33 +	uint64_t sector;
    5.34 +};
    5.35 +
    5.36 +	
    5.37 +struct tap_aio_context {
    5.38 +	tap_aio_internal_context_t    aio_ctx;
    5.39 +
    5.40 +	int                  max_aio_reqs;
    5.41 +	struct iocb         *iocb_list;
    5.42 +	struct iocb        **iocb_free;
    5.43 +	struct pending_aio  *pending_aio;
    5.44 +	int                  iocb_free_count;
    5.45 +	struct iocb        **iocb_queue;
    5.46 +	int	             iocb_queued;
    5.47 +	struct io_event     *aio_events;
    5.48 +
    5.49 +	/* Locking bitmap for AIO reads/writes */
    5.50 +	uint8_t *sector_lock;		   
    5.51 +};
    5.52  
    5.53  typedef struct tap_aio_context tap_aio_context_t;
    5.54  
    5.55 -int  tap_aio_setup      (tap_aio_context_t *ctx,
    5.56 -                         struct io_event *aio_events,
    5.57 -                         int max_aio_events);
    5.58 -void tap_aio_continue   (tap_aio_context_t *ctx);
    5.59 -int  tap_aio_get_events (tap_aio_context_t *ctx);
    5.60 -int  tap_aio_more_events(tap_aio_context_t *ctx);
    5.61 +void tap_aio_continue   (tap_aio_internal_context_t *ctx);
    5.62 +int  tap_aio_get_events (tap_aio_internal_context_t *ctx);
    5.63 +int  tap_aio_more_events(tap_aio_internal_context_t *ctx);
    5.64 +
    5.65 +
    5.66 +int tap_aio_init(tap_aio_context_t *ctx, uint64_t sectors,
    5.67 +		int max_aio_reqs);
    5.68 +void tap_aio_free(tap_aio_context_t *ctx);
    5.69 +
    5.70 +int tap_aio_can_lock(tap_aio_context_t *ctx, uint64_t sector);
    5.71 +int tap_aio_lock(tap_aio_context_t *ctx, uint64_t sector);
    5.72 +void tap_aio_unlock(tap_aio_context_t *ctx, uint64_t sector);
    5.73 +
    5.74 +
    5.75 +int tap_aio_read(tap_aio_context_t *ctx, int fd, int size, 
    5.76 +		uint64_t offset, char *buf, td_callback_t cb,
    5.77 +		int id, uint64_t sector, void *private);
    5.78 +int tap_aio_write(tap_aio_context_t *ctx, int fd, int size,
    5.79 +		uint64_t offset, char *buf, td_callback_t cb,
    5.80 +		int id, uint64_t sector, void *private);
    5.81 +int tap_aio_submit(tap_aio_context_t *ctx);
    5.82  
    5.83  #endif /* __TAPAIO_H__ */