ia64/xen-unstable
changeset 13999:3c827d68fa87
[TAPDISK] add tapdisk support for image chaining
Enables tapdisk to chain an arbitrary number of VDIs, propagating reads of
holes in children to their parent images. Introduces two new functions to the
tapdisk interface to facilitate this. Modifies the QCoW plugin to take
advantage of these changes, thus providing support for arbitrarily long chains
of QCoW image types.
Enables tapdisk to chain an arbitrary number of VDIs, propagating reads of
holes in children to their parent images. Introduces two new functions to the
tapdisk interface to facilitate this. Modifies the QCoW plugin to take
advantage of these changes, thus providing support for arbitrarily long chains
of QCoW image types.
author | Jake Wires <jwires@xensource.com> |
---|---|
date | Fri Feb 16 20:31:27 2007 -0800 (2007-02-16) |
parents | 32a059913591 |
children | 31e65c4ba739 |
files | tools/blktap/drivers/Makefile tools/blktap/drivers/block-aio.c tools/blktap/drivers/block-qcow.c tools/blktap/drivers/block-ram.c tools/blktap/drivers/block-sync.c tools/blktap/drivers/block-vmdk.c tools/blktap/drivers/img2qcow.c tools/blktap/drivers/qcow2raw.c tools/blktap/drivers/tapdisk.c tools/blktap/drivers/tapdisk.h tools/blktap/lib/blktaplib.h tools/blktap/lib/xs_api.c |
line diff
1.1 --- a/tools/blktap/drivers/Makefile Fri Feb 16 16:34:28 2007 +0000 1.2 +++ b/tools/blktap/drivers/Makefile Fri Feb 16 20:31:27 2007 -0800 1.3 @@ -5,7 +5,7 @@ INCLUDES += -I.. -I../lib 1.4 1.5 IBIN = blktapctrl tapdisk 1.6 QCOW_UTIL = img2qcow qcow2raw qcow-create 1.7 -INST_DIR = /usr/sbin 1.8 +INST_DIR = /usr/sbin 1.9 LIBAIO_DIR = ../../libaio/src 1.10 1.11 CFLAGS += -Werror 1.12 @@ -17,7 +17,7 @@ CFLAGS += -D_GNU_SOURCE 1.13 1.14 # Get gcc to generate the dependencies for us. 1.15 CFLAGS += -Wp,-MD,.$(@F).d 1.16 -DEPS = .*.d 1.17 +DEPS = .*.d 1.18 1.19 THREADLIB := -lpthread -lz 1.20 LIBS := -L. -L.. -L../lib 1.21 @@ -29,10 +29,10 @@ LIBS += -L$(XEN_XENSTORE) -lxenstor 1.22 1.23 AIOLIBS := $(LIBAIO_DIR)/libaio.a 1.24 1.25 -BLK-OBJS := block-aio.o 1.26 -BLK-OBJS += block-sync.o 1.27 +BLK-OBJS := block-aio.o 1.28 +BLK-OBJS += block-sync.o 1.29 BLK-OBJS += block-vmdk.o 1.30 -BLK-OBJS += block-ram.o 1.31 +BLK-OBJS += block-ram.o 1.32 BLK-OBJS += block-qcow.o 1.33 BLK-OBJS += aes.o 1.34 1.35 @@ -52,13 +52,13 @@ tapdisk: $(BLK-OBJS) tapdisk.c 1.36 qcow-util: img2qcow qcow2raw qcow-create 1.37 1.38 img2qcow qcow2raw qcow-create: %: $(BLK-OBJS) 1.39 - $(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS) $(LIBS) 1.40 + $(CC) $(CFLAGS) -o $* $(BLK-OBJS) $*.c $(AIOLIBS) $(LIBS) 1.41 1.42 install: all 1.43 - $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INST_DIR) 1.44 + $(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(VHD_UTIL) $(DESTDIR)$(INST_DIR) 1.45 1.46 clean: 1.47 - rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL) 1.48 + rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL) $(VHD_UTIL) 1.49 1.50 .PHONY: clean install 1.51
2.1 --- a/tools/blktap/drivers/block-aio.c Fri Feb 16 16:34:28 2007 +0000 2.2 +++ b/tools/blktap/drivers/block-aio.c Fri Feb 16 20:31:27 2007 -0800 2.3 @@ -58,6 +58,7 @@ struct pending_aio { 2.4 td_callback_t cb; 2.5 int id; 2.6 void *private; 2.7 + uint64_t lsec; 2.8 }; 2.9 2.10 struct tdaio_state { 2.11 @@ -139,12 +140,23 @@ static int get_image_info(struct td_stat 2.12 return 0; 2.13 } 2.14 2.15 +static inline void init_fds(struct disk_driver *dd) 2.16 +{ 2.17 + int i; 2.18 + struct tdaio_state *prv = (struct tdaio_state *)dd->private; 2.19 + 2.20 + for(i = 0; i < MAX_IOFD; i++) 2.21 + dd->io_fd[i] = 0; 2.22 + 2.23 + dd->io_fd[0] = prv->poll_fd; 2.24 +} 2.25 + 2.26 /* Open the disk file and initialize aio state. */ 2.27 -int tdaio_open (struct td_state *s, const char *name) 2.28 +int tdaio_open (struct disk_driver *dd, const char *name) 2.29 { 2.30 int i, fd, ret = 0; 2.31 - struct tdaio_state *prv = (struct tdaio_state *)s->private; 2.32 - s->private = prv; 2.33 + struct td_state *s = dd->td_state; 2.34 + struct tdaio_state *prv = (struct tdaio_state *)dd->private; 2.35 2.36 DPRINTF("block-aio open('%s')", name); 2.37 /* Initialize AIO */ 2.38 @@ -194,18 +206,21 @@ int tdaio_open (struct td_state *s, cons 2.39 2.40 prv->fd = fd; 2.41 2.42 + init_fds(dd); 2.43 ret = get_image_info(s, fd); 2.44 + 2.45 done: 2.46 return ret; 2.47 } 2.48 2.49 -int tdaio_queue_read(struct td_state *s, uint64_t sector, 2.50 - int nb_sectors, char *buf, td_callback_t cb, 2.51 - int id, void *private) 2.52 +int tdaio_queue_read(struct disk_driver *dd, uint64_t sector, 2.53 + int nb_sectors, char *buf, td_callback_t cb, 2.54 + int id, void *private) 2.55 { 2.56 struct iocb *io; 2.57 struct pending_aio *pio; 2.58 - struct tdaio_state *prv = (struct tdaio_state *)s->private; 2.59 + struct td_state *s = dd->td_state; 2.60 + struct tdaio_state *prv = (struct tdaio_state *)dd->private; 2.61 int size = nb_sectors * s->sector_size; 2.62 uint64_t offset = sector * (uint64_t)s->sector_size; 2.63 long ioidx; 2.64 @@ -219,22 +234,24 @@ int tdaio_queue_read(struct td_state *s, 2.65 pio->cb = cb; 2.66 pio->id = id; 2.67 pio->private = private; 2.68 + pio->lsec = sector; 2.69 2.70 io_prep_pread(io, prv->fd, buf, size, offset); 2.71 io->data = (void *)ioidx; 2.72 2.73 prv->iocb_queue[prv->iocb_queued++] = io; 2.74 - 2.75 + 2.76 return 0; 2.77 } 2.78 2.79 -int tdaio_queue_write(struct td_state *s, uint64_t sector, 2.80 - int nb_sectors, char *buf, td_callback_t cb, 2.81 - int id, void *private) 2.82 +int tdaio_queue_write(struct disk_driver *dd, uint64_t sector, 2.83 + int nb_sectors, char *buf, td_callback_t cb, 2.84 + int id, void *private) 2.85 { 2.86 struct iocb *io; 2.87 struct pending_aio *pio; 2.88 - struct tdaio_state *prv = (struct tdaio_state *)s->private; 2.89 + struct td_state *s = dd->td_state; 2.90 + struct tdaio_state *prv = (struct tdaio_state *)dd->private; 2.91 int size = nb_sectors * s->sector_size; 2.92 uint64_t offset = sector * (uint64_t)s->sector_size; 2.93 long ioidx; 2.94 @@ -248,19 +265,20 @@ int tdaio_queue_write(struct td_state *s 2.95 pio->cb = cb; 2.96 pio->id = id; 2.97 pio->private = private; 2.98 + pio->lsec = sector; 2.99 2.100 io_prep_pwrite(io, prv->fd, buf, size, offset); 2.101 io->data = (void *)ioidx; 2.102 2.103 prv->iocb_queue[prv->iocb_queued++] = io; 2.104 - 2.105 + 2.106 return 0; 2.107 } 2.108 2.109 -int tdaio_submit(struct td_state *s) 2.110 +int tdaio_submit(struct disk_driver *dd) 2.111 { 2.112 int ret; 2.113 - struct tdaio_state *prv = (struct tdaio_state *)s->private; 2.114 + struct tdaio_state *prv = (struct tdaio_state *)dd->private; 2.115 2.116 ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); 2.117 2.118 @@ -269,38 +287,24 @@ int tdaio_submit(struct td_state *s) 2.119 /* Success case: */ 2.120 prv->iocb_queued = 0; 2.121 2.122 - return ret; 2.123 + return 0; 2.124 } 2.125 2.126 -int *tdaio_get_fd(struct td_state *s) 2.127 +int tdaio_close(struct disk_driver *dd) 2.128 { 2.129 - struct tdaio_state *prv = (struct tdaio_state *)s->private; 2.130 - int *fds, i; 2.131 - 2.132 - fds = malloc(sizeof(int) * MAX_IOFD); 2.133 - /*initialise the FD array*/ 2.134 - for(i=0;i<MAX_IOFD;i++) fds[i] = 0; 2.135 - 2.136 - fds[0] = prv->poll_fd; 2.137 - 2.138 - return fds; 2.139 -} 2.140 - 2.141 -int tdaio_close(struct td_state *s) 2.142 -{ 2.143 - struct tdaio_state *prv = (struct tdaio_state *)s->private; 2.144 + struct tdaio_state *prv = (struct tdaio_state *)dd->private; 2.145 2.146 io_destroy(prv->aio_ctx); 2.147 close(prv->fd); 2.148 - 2.149 + 2.150 return 0; 2.151 } 2.152 2.153 -int tdaio_do_callbacks(struct td_state *s, int sid) 2.154 +int tdaio_do_callbacks(struct disk_driver *dd, int sid) 2.155 { 2.156 int ret, i, rsp = 0; 2.157 struct io_event *ep; 2.158 - struct tdaio_state *prv = (struct tdaio_state *)s->private; 2.159 + struct tdaio_state *prv = (struct tdaio_state *)dd->private; 2.160 2.161 /* Non-blocking test for completed io. */ 2.162 ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events, 2.163 @@ -311,22 +315,34 @@ int tdaio_do_callbacks(struct td_state * 2.164 struct pending_aio *pio; 2.165 2.166 pio = &prv->pending_aio[(long)io->data]; 2.167 - rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 : 1, 2.168 + rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1, 2.169 + pio->lsec, io->u.c.nbytes >> 9, 2.170 pio->id, pio->private); 2.171 2.172 prv->iocb_free[prv->iocb_free_count++] = io; 2.173 } 2.174 return rsp; 2.175 } 2.176 - 2.177 + 2.178 +int tdaio_has_parent(struct disk_driver *dd) 2.179 +{ 2.180 + return 0; 2.181 +} 2.182 + 2.183 +int tdaio_get_parent(struct disk_driver *dd, struct disk_driver *parent) 2.184 +{ 2.185 + return -EINVAL; 2.186 +} 2.187 + 2.188 struct tap_disk tapdisk_aio = { 2.189 - "tapdisk_aio", 2.190 - sizeof(struct tdaio_state), 2.191 - tdaio_open, 2.192 - tdaio_queue_read, 2.193 - tdaio_queue_write, 2.194 - tdaio_submit, 2.195 - tdaio_get_fd, 2.196 - tdaio_close, 2.197 - tdaio_do_callbacks, 2.198 + .disk_type = "tapdisk_aio", 2.199 + .private_data_size = sizeof(struct tdaio_state), 2.200 + .td_open = tdaio_open, 2.201 + .td_queue_read = tdaio_queue_read, 2.202 + .td_queue_write = tdaio_queue_write, 2.203 + .td_submit = tdaio_submit, 2.204 + .td_has_parent = tdaio_has_parent, 2.205 + .td_get_parent = tdaio_get_parent, 2.206 + .td_close = tdaio_close, 2.207 + .td_do_callbacks = tdaio_do_callbacks, 2.208 };
3.1 --- a/tools/blktap/drivers/block-qcow.c Fri Feb 16 16:34:28 2007 +0000 3.2 +++ b/tools/blktap/drivers/block-qcow.c Fri Feb 16 20:31:27 2007 -0800 3.3 @@ -55,7 +55,6 @@ 3.4 3.5 /******AIO DEFINES******/ 3.6 #define REQUEST_ASYNC_FD 1 3.7 -#define MAX_QCOW_IDS 0xFFFF 3.8 #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) 3.9 3.10 struct pending_aio { 3.11 @@ -65,7 +64,6 @@ struct pending_aio { 3.12 int nb_sectors; 3.13 char *buf; 3.14 uint64_t sector; 3.15 - int qcow_idx; 3.16 }; 3.17 3.18 #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list) 3.19 @@ -115,9 +113,9 @@ typedef struct QCowHeader_ext { 3.20 struct tdqcow_state { 3.21 int fd; /*Main Qcow file descriptor */ 3.22 uint64_t fd_end; /*Store a local record of file length */ 3.23 - int bfd; /*Backing file descriptor*/ 3.24 char *name; /*Record of the filename*/ 3.25 - int poll_pipe[2]; /*dummy fd for polling on */ 3.26 + uint32_t backing_file_size; 3.27 + uint64_t backing_file_offset; 3.28 int encrypted; /*File contents are encrypted or plain*/ 3.29 int cluster_bits; /*Determines length of cluster as 3.30 *indicated by file hdr*/ 3.31 @@ -149,7 +147,6 @@ struct tdqcow_state { 3.32 AES_KEY aes_decrypt_key; /*AES key*/ 3.33 /* libaio state */ 3.34 io_context_t aio_ctx; 3.35 - int nr_reqs [MAX_QCOW_IDS]; 3.36 struct iocb iocb_list [MAX_AIO_REQS]; 3.37 struct iocb *iocb_free [MAX_AIO_REQS]; 3.38 struct pending_aio pending_aio[MAX_AIO_REQS]; 3.39 @@ -162,10 +159,11 @@ struct tdqcow_state { 3.40 3.41 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset); 3.42 3.43 -static int init_aio_state(struct td_state *bs) 3.44 +static int init_aio_state(struct disk_driver *dd) 3.45 { 3.46 int i; 3.47 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.48 + struct td_state *bs = dd->td_state; 3.49 + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; 3.50 long ioidx; 3.51 3.52 /*Initialize Locking bitmap*/ 3.53 @@ -202,8 +200,7 @@ static int init_aio_state(struct td_stat 3.54 3.55 for (i=0;i<MAX_AIO_REQS;i++) 3.56 s->iocb_free[i] = &s->iocb_list[i]; 3.57 - for (i=0;i<MAX_QCOW_IDS;i++) 3.58 - s->nr_reqs[i] = 0; 3.59 + 3.60 DPRINTF("AIO state initialised\n"); 3.61 3.62 return 0; 3.63 @@ -238,7 +235,10 @@ static uint32_t gen_cksum(char *ptr, int 3.64 3.65 if(!md) return 0; 3.66 3.67 - if (MD5((unsigned char *)ptr, len, md) != md) return 0; 3.68 + if (MD5((unsigned char *)ptr, len, md) != md) { 3.69 + free(md); 3.70 + return 0; 3.71 + } 3.72 3.73 memcpy(&ret, md, sizeof(uint32_t)); 3.74 free(md); 3.75 @@ -247,26 +247,42 @@ static uint32_t gen_cksum(char *ptr, int 3.76 3.77 static int get_filesize(char *filename, uint64_t *size, struct stat *st) 3.78 { 3.79 - int blockfd; 3.80 + int fd; 3.81 + QCowHeader header; 3.82 3.83 /*Set to the backing file size*/ 3.84 + fd = open(filename, O_RDONLY); 3.85 + if (fd < 0) 3.86 + return -1; 3.87 + if (read(fd, &header, sizeof(header)) < sizeof(header)) { 3.88 + close(fd); 3.89 + return -1; 3.90 + } 3.91 + close(fd); 3.92 + 3.93 + be32_to_cpus(&header.magic); 3.94 + be64_to_cpus(&header.size); 3.95 + if (header.magic == QCOW_MAGIC) { 3.96 + *size = header.size >> SECTOR_SHIFT; 3.97 + return 0; 3.98 + } 3.99 + 3.100 if(S_ISBLK(st->st_mode)) { 3.101 - blockfd = open(filename, O_RDONLY); 3.102 - if (blockfd < 0) 3.103 + fd = open(filename, O_RDONLY); 3.104 + if (fd < 0) 3.105 return -1; 3.106 - if (ioctl(blockfd,BLKGETSIZE,size)!=0) { 3.107 + if (ioctl(fd,BLKGETSIZE,size)!=0) { 3.108 printf("Unable to get Block device size\n"); 3.109 - close(blockfd); 3.110 + close(fd); 3.111 return -1; 3.112 } 3.113 - close(blockfd); 3.114 + close(fd); 3.115 } else *size = (st->st_size >> SECTOR_SHIFT); 3.116 return 0; 3.117 } 3.118 3.119 -static int qcow_set_key(struct td_state *bs, const char *key) 3.120 +static int qcow_set_key(struct tdqcow_state *s, const char *key) 3.121 { 3.122 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.123 uint8_t keybuf[16]; 3.124 int len, i; 3.125 3.126 @@ -306,10 +322,9 @@ static int qcow_set_key(struct td_state 3.127 return 0; 3.128 } 3.129 3.130 -static int async_read(struct tdqcow_state *s, int fd, int size, 3.131 - uint64_t offset, 3.132 - char *buf, td_callback_t cb, 3.133 - int id, uint64_t sector, int qcow_idx, void *private) 3.134 +static int async_read(struct tdqcow_state *s, int size, 3.135 + uint64_t offset, char *buf, td_callback_t cb, 3.136 + int id, uint64_t sector, void *private) 3.137 { 3.138 struct iocb *io; 3.139 struct pending_aio *pio; 3.140 @@ -325,9 +340,8 @@ static int async_read(struct tdqcow_stat 3.141 pio->nb_sectors = size/512; 3.142 pio->buf = buf; 3.143 pio->sector = sector; 3.144 - pio->qcow_idx = qcow_idx; 3.145 3.146 - io_prep_pread(io, fd, buf, size, offset); 3.147 + io_prep_pread(io, s->fd, buf, size, offset); 3.148 io->data = (void *)ioidx; 3.149 3.150 s->iocb_queue[s->iocb_queued++] = io; 3.151 @@ -335,10 +349,9 @@ static int async_read(struct tdqcow_stat 3.152 return 1; 3.153 } 3.154 3.155 -static int async_write(struct tdqcow_state *s, int fd, int size, 3.156 - uint64_t offset, 3.157 - char *buf, td_callback_t cb, 3.158 - int id, uint64_t sector, int qcow_idx, void *private) 3.159 +static int async_write(struct tdqcow_state *s, int size, 3.160 + uint64_t offset, char *buf, td_callback_t cb, 3.161 + int id, uint64_t sector, void *private) 3.162 { 3.163 struct iocb *io; 3.164 struct pending_aio *pio; 3.165 @@ -354,9 +367,8 @@ static int async_write(struct tdqcow_sta 3.166 pio->nb_sectors = size/512; 3.167 pio->buf = buf; 3.168 pio->sector = sector; 3.169 - pio->qcow_idx = qcow_idx; 3.170 3.171 - io_prep_pwrite(io, fd, buf, size, offset); 3.172 + io_prep_pwrite(io, s->fd, buf, size, offset); 3.173 io->data = (void *)ioidx; 3.174 3.175 s->iocb_queue[s->iocb_queued++] = io; 3.176 @@ -383,17 +395,6 @@ static void aio_unlock(struct tdqcow_sta 3.177 return; 3.178 } 3.179 3.180 -/*TODO - Use a freelist*/ 3.181 -static int get_free_idx(struct tdqcow_state *s) 3.182 -{ 3.183 - int i; 3.184 - 3.185 - for(i = 0; i < MAX_QCOW_IDS; i++) { 3.186 - if(s->nr_reqs[i] == 0) return i; 3.187 - } 3.188 - return -1; 3.189 -} 3.190 - 3.191 /* 3.192 * The crypt function is compatible with the linux cryptoloop 3.193 * algorithm for < 4 GB images. NOTE: out_buf == in_buf is 3.194 @@ -425,23 +426,23 @@ static int qtruncate(int fd, off_t lengt 3.195 { 3.196 int ret, i; 3.197 int current = 0, rem = 0; 3.198 - int sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; 3.199 + uint64_t sectors; 3.200 struct stat st; 3.201 - char buf[DEFAULT_SECTOR_SIZE]; 3.202 + char *buf; 3.203 3.204 /* If length is greater than the current file len 3.205 * we synchronously write zeroes to the end of the 3.206 * file, otherwise we truncate the length down 3.207 */ 3.208 - memset(buf, 0x00, DEFAULT_SECTOR_SIZE); 3.209 ret = fstat(fd, &st); 3.210 - if (ret == -1) 3.211 + if (ret == -1) 3.212 return -1; 3.213 if (S_ISBLK(st.st_mode)) 3.214 return 0; 3.215 - 3.216 + 3.217 + sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; 3.218 current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; 3.219 - rem = st.st_size % DEFAULT_SECTOR_SIZE; 3.220 + rem = st.st_size % DEFAULT_SECTOR_SIZE; 3.221 3.222 /* If we are extending this file, we write zeros to the end -- 3.223 * this tries to ensure that the extents allocated wind up being 3.224 @@ -449,28 +450,40 @@ static int qtruncate(int fd, off_t lengt 3.225 */ 3.226 if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) { 3.227 /*We are extending the file*/ 3.228 + if ((ret = posix_memalign((void **)&buf, 3.229 + 512, DEFAULT_SECTOR_SIZE))) { 3.230 + DPRINTF("posix_memalign failed: %d\n", ret); 3.231 + return -1; 3.232 + } 3.233 + memset(buf, 0x00, DEFAULT_SECTOR_SIZE); 3.234 if (lseek(fd, 0, SEEK_END)==-1) { 3.235 - fprintf(stderr, 3.236 - "Lseek EOF failed (%d), internal error\n", 3.237 + DPRINTF("Lseek EOF failed (%d), internal error\n", 3.238 errno); 3.239 + free(buf); 3.240 return -1; 3.241 } 3.242 if (rem) { 3.243 ret = write(fd, buf, rem); 3.244 - if (ret != rem) 3.245 + if (ret != rem) { 3.246 + DPRINTF("write failed: ret = %d, err = %s\n", 3.247 + ret, strerror(errno)); 3.248 + free(buf); 3.249 return -1; 3.250 + } 3.251 } 3.252 for (i = current; i < sectors; i++ ) { 3.253 ret = write(fd, buf, DEFAULT_SECTOR_SIZE); 3.254 - if (ret != DEFAULT_SECTOR_SIZE) 3.255 + if (ret != DEFAULT_SECTOR_SIZE) { 3.256 + DPRINTF("write failed: ret = %d, err = %s\n", 3.257 + ret, strerror(errno)); 3.258 + free(buf); 3.259 return -1; 3.260 + } 3.261 } 3.262 - 3.263 + free(buf); 3.264 } else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE)) 3.265 - if (ftruncate(fd, sectors * DEFAULT_SECTOR_SIZE)==-1) { 3.266 - fprintf(stderr, 3.267 - "Ftruncate failed (%d), internal error\n", 3.268 - errno); 3.269 + if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) { 3.270 + DPRINTF("Ftruncate failed (%s)\n", strerror(errno)); 3.271 return -1; 3.272 } 3.273 return 0; 3.274 @@ -490,12 +503,11 @@ static int qtruncate(int fd, off_t lengt 3.275 * 3.276 * return 0 if not allocated. 3.277 */ 3.278 -static uint64_t get_cluster_offset(struct td_state *bs, 3.279 +static uint64_t get_cluster_offset(struct tdqcow_state *s, 3.280 uint64_t offset, int allocate, 3.281 int compressed_size, 3.282 int n_start, int n_end) 3.283 { 3.284 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.285 int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector; 3.286 char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr; 3.287 uint64_t l2_offset, *l2_table, cluster_offset, tmp; 3.288 @@ -550,8 +562,10 @@ static uint64_t get_cluster_offset(struc 3.289 * entry is written before blocks. 3.290 */ 3.291 lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET); 3.292 - if (write(s->fd, tmp_ptr, 4096) != 4096) 3.293 + if (write(s->fd, tmp_ptr, 4096) != 4096) { 3.294 + free(tmp_ptr); 3.295 return 0; 3.296 + } 3.297 free(tmp_ptr); 3.298 3.299 new_l2_table = 1; 3.300 @@ -716,9 +730,10 @@ found: 3.301 return cluster_offset; 3.302 } 3.303 3.304 -static void init_cluster_cache(struct td_state *bs) 3.305 +static void init_cluster_cache(struct disk_driver *dd) 3.306 { 3.307 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.308 + struct td_state *bs = dd->td_state; 3.309 + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; 3.310 uint32_t count = 0; 3.311 int i, cluster_entries; 3.312 3.313 @@ -727,22 +742,20 @@ static void init_cluster_cache(struct td 3.314 cluster_entries, s->cluster_size); 3.315 3.316 for (i = 0; i < bs->size; i += cluster_entries) { 3.317 - if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++; 3.318 + if (get_cluster_offset(s, i << 9, 0, 0, 0, 1)) count++; 3.319 if (count >= L2_CACHE_SIZE) return; 3.320 } 3.321 DPRINTF("Finished cluster initialisation, added %d entries\n", count); 3.322 return; 3.323 } 3.324 3.325 -static int qcow_is_allocated(struct td_state *bs, int64_t sector_num, 3.326 +static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num, 3.327 int nb_sectors, int *pnum) 3.328 { 3.329 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.330 - 3.331 int index_in_cluster, n; 3.332 uint64_t cluster_offset; 3.333 3.334 - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); 3.335 + cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0); 3.336 index_in_cluster = sector_num & (s->cluster_sectors - 1); 3.337 n = s->cluster_sectors - index_in_cluster; 3.338 if (n > nb_sectors) 3.339 @@ -800,11 +813,23 @@ static int decompress_cluster(struct tdq 3.340 return 0; 3.341 } 3.342 3.343 +static inline void init_fds(struct disk_driver *dd) 3.344 +{ 3.345 + int i; 3.346 + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; 3.347 + 3.348 + for(i = 0; i < MAX_IOFD; i++) 3.349 + dd->io_fd[i] = 0; 3.350 + 3.351 + dd->io_fd[0] = s->poll_fd; 3.352 +} 3.353 + 3.354 /* Open the disk file and initialize qcow state. */ 3.355 -int tdqcow_open (struct td_state *bs, const char *name) 3.356 +int tdqcow_open (struct disk_driver *dd, const char *name) 3.357 { 3.358 int fd, len, i, shift, ret, size, l1_table_size; 3.359 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.360 + struct td_state *bs = dd->td_state; 3.361 + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; 3.362 char *buf; 3.363 QCowHeader *header; 3.364 QCowHeader_ext *exthdr; 3.365 @@ -812,10 +837,6 @@ int tdqcow_open (struct td_state *bs, co 3.366 uint64_t final_cluster = 0; 3.367 3.368 DPRINTF("QCOW: Opening %s\n",name); 3.369 - /* set up a pipe so that we can hand back a poll fd that won't fire.*/ 3.370 - ret = pipe(s->poll_pipe); 3.371 - if (ret != 0) 3.372 - return (0 - errno); 3.373 3.374 fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE); 3.375 if (fd < 0) { 3.376 @@ -826,7 +847,7 @@ int tdqcow_open (struct td_state *bs, co 3.377 s->fd = fd; 3.378 asprintf(&s->name,"%s", name); 3.379 3.380 - ASSERT(sizeof(header) < 512); 3.381 + ASSERT(sizeof(QCowHeader) + sizeof(QCowHeader_ext) < 512); 3.382 3.383 ret = posix_memalign((void **)&buf, 512, 512); 3.384 if (ret != 0) goto fail; 3.385 @@ -861,7 +882,9 @@ int tdqcow_open (struct td_state *bs, co 3.386 s->cluster_alloc = s->l2_size; 3.387 bs->size = header->size / 512; 3.388 s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; 3.389 - 3.390 + s->backing_file_offset = header->backing_file_offset; 3.391 + s->backing_file_size = header->backing_file_size; 3.392 + 3.393 /* read the level 1 table */ 3.394 shift = s->cluster_bits + s->l2_bits; 3.395 s->l1_size = (header->size + (1LL << shift) - 1) >> shift; 3.396 @@ -887,7 +910,7 @@ int tdqcow_open (struct td_state *bs, co 3.397 if (read(fd, s->l1_table, l1_table_size) != l1_table_size) 3.398 goto fail; 3.399 3.400 - for(i = 0;i < s->l1_size; i++) { 3.401 + for(i = 0; i < s->l1_size; i++) { 3.402 //be64_to_cpus(&s->l1_table[i]); 3.403 //DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]); 3.404 if (s->l1_table[i] > final_cluster) 3.405 @@ -907,41 +930,15 @@ int tdqcow_open (struct td_state *bs, co 3.406 if(ret != 0) goto fail; 3.407 s->cluster_cache_offset = -1; 3.408 3.409 - /* read the backing file name */ 3.410 - s->bfd = -1; 3.411 - if (header->backing_file_offset != 0) { 3.412 - DPRINTF("Reading backing file data\n"); 3.413 - len = header->backing_file_size; 3.414 - if (len > 1023) 3.415 - len = 1023; 3.416 - 3.417 - /*TODO - Fix read size for O_DIRECT and use original fd!*/ 3.418 - fd = open(name, O_RDONLY | O_LARGEFILE); 3.419 - 3.420 - lseek(fd, header->backing_file_offset, SEEK_SET); 3.421 - if (read(fd, bs->backing_file, len) != len) 3.422 - goto fail; 3.423 - bs->backing_file[len] = '\0'; 3.424 - close(fd); 3.425 - /***********************************/ 3.426 - 3.427 - /*Open backing file*/ 3.428 - fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE); 3.429 - if (fd < 0) { 3.430 - DPRINTF("Unable to open backing file: %s\n", 3.431 - bs->backing_file); 3.432 - goto fail; 3.433 - } 3.434 - s->bfd = fd; 3.435 + if (s->backing_file_offset != 0) 3.436 s->cluster_alloc = 1; /*Cannot use pre-alloc*/ 3.437 - } 3.438 3.439 bs->sector_size = 512; 3.440 bs->info = 0; 3.441 3.442 /*Detect min_cluster_alloc*/ 3.443 s->min_cluster_alloc = 1; /*Default*/ 3.444 - if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) { 3.445 + if (s->backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) { 3.446 /*We test to see if the xen magic # exists*/ 3.447 exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader)); 3.448 be32_to_cpus(&exthdr->xmagic); 3.449 @@ -962,10 +959,11 @@ int tdqcow_open (struct td_state *bs, co 3.450 } 3.451 3.452 end_xenhdr: 3.453 - if (init_aio_state(bs)!=0) { 3.454 + if (init_aio_state(dd)!=0) { 3.455 DPRINTF("Unable to initialise AIO state\n"); 3.456 goto fail; 3.457 } 3.458 + init_fds(dd); 3.459 s->fd_end = (final_cluster == 0 ? (s->l1_table_offset + l1_table_size) : 3.460 (final_cluster + s->cluster_size)); 3.461 3.462 @@ -981,213 +979,145 @@ fail: 3.463 return -1; 3.464 } 3.465 3.466 - int tdqcow_queue_read(struct td_state *bs, uint64_t sector, 3.467 - int nb_sectors, char *buf, td_callback_t cb, 3.468 - int id, void *private) 3.469 +int tdqcow_queue_read(struct disk_driver *dd, uint64_t sector, 3.470 + int nb_sectors, char *buf, td_callback_t cb, 3.471 + int id, void *private) 3.472 { 3.473 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.474 - int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0; 3.475 - uint64_t cluster_offset; 3.476 + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; 3.477 + int ret = 0, index_in_cluster, n, i, rsp = 0; 3.478 + uint64_t cluster_offset, sec, nr_secs; 3.479 + 3.480 + sec = sector; 3.481 + nr_secs = nb_sectors; 3.482 3.483 /*Check we can get a lock*/ 3.484 - for (i = 0; i < nb_sectors; i++) 3.485 - if (!aio_can_lock(s, sector + i)) { 3.486 - DPRINTF("AIO_CAN_LOCK failed [%llu]\n", 3.487 - (long long) sector + i); 3.488 - return -EBUSY; 3.489 - } 3.490 - 3.491 + for (i = 0; i < nb_sectors; i++) 3.492 + if (!aio_can_lock(s, sector + i)) 3.493 + return cb(dd, -EBUSY, sector, nb_sectors, id, private); 3.494 + 3.495 /*We store a local record of the request*/ 3.496 - qcow_idx = get_free_idx(s); 3.497 while (nb_sectors > 0) { 3.498 cluster_offset = 3.499 - get_cluster_offset(bs, sector << 9, 0, 0, 0, 0); 3.500 + get_cluster_offset(s, sector << 9, 0, 0, 0, 0); 3.501 index_in_cluster = sector & (s->cluster_sectors - 1); 3.502 n = s->cluster_sectors - index_in_cluster; 3.503 if (n > nb_sectors) 3.504 n = nb_sectors; 3.505 3.506 - if (s->iocb_free_count == 0 || !aio_lock(s, sector)) { 3.507 - DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" 3.508 - "[%llu]\n", s->iocb_free_count, 3.509 - (long long) sector); 3.510 - return -ENOMEM; 3.511 - } 3.512 + if (s->iocb_free_count == 0 || !aio_lock(s, sector)) 3.513 + return cb(dd, -EBUSY, sector, nb_sectors, id, private); 3.514 3.515 - if (!cluster_offset && (s->bfd > 0)) { 3.516 - s->nr_reqs[qcow_idx]++; 3.517 - asubmit += async_read(s, s->bfd, n * 512, sector << 9, 3.518 - buf, cb, id, sector, 3.519 - qcow_idx, private); 3.520 - } else if(!cluster_offset) { 3.521 - memset(buf, 0, 512 * n); 3.522 + if(!cluster_offset) { 3.523 aio_unlock(s, sector); 3.524 + ret = cb(dd, BLK_NOT_ALLOCATED, 3.525 + sector, n, id, private); 3.526 + if (ret == -EBUSY) { 3.527 + /* mark remainder of request 3.528 + * as busy and try again later */ 3.529 + return cb(dd, -EBUSY, sector + n, 3.530 + nb_sectors - n, id, private); 3.531 + } else rsp += ret; 3.532 } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { 3.533 + aio_unlock(s, sector); 3.534 if (decompress_cluster(s, cluster_offset) < 0) { 3.535 - ret = -1; 3.536 + rsp += cb(dd, -EIO, sector, 3.537 + nb_sectors, id, private); 3.538 goto done; 3.539 } 3.540 memcpy(buf, s->cluster_cache + index_in_cluster * 512, 3.541 512 * n); 3.542 - } else { 3.543 - s->nr_reqs[qcow_idx]++; 3.544 - asubmit += async_read(s, s->fd, n * 512, 3.545 - (cluster_offset + 3.546 - index_in_cluster * 512), 3.547 - buf, cb, id, sector, 3.548 - qcow_idx, private); 3.549 + rsp += cb(dd, 0, sector, n, id, private); 3.550 + } else { 3.551 + async_read(s, n * 512, 3.552 + (cluster_offset + index_in_cluster * 512), 3.553 + buf, cb, id, sector, private); 3.554 } 3.555 nb_sectors -= n; 3.556 sector += n; 3.557 buf += n * 512; 3.558 } 3.559 done: 3.560 - /*Callback if no async requests outstanding*/ 3.561 - if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private); 3.562 - 3.563 - return 0; 3.564 + return rsp; 3.565 } 3.566 3.567 - int tdqcow_queue_write(struct td_state *bs, uint64_t sector, 3.568 - int nb_sectors, char *buf, td_callback_t cb, 3.569 - int id, void *private) 3.570 +int tdqcow_queue_write(struct disk_driver *dd, uint64_t sector, 3.571 + int nb_sectors, char *buf, td_callback_t cb, 3.572 + int id, void *private) 3.573 { 3.574 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.575 - int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0; 3.576 - uint64_t cluster_offset; 3.577 + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; 3.578 + int ret = 0, index_in_cluster, n, i; 3.579 + uint64_t cluster_offset, sec, nr_secs; 3.580 + 3.581 + sec = sector; 3.582 + nr_secs = nb_sectors; 3.583 3.584 /*Check we can get a lock*/ 3.585 for (i = 0; i < nb_sectors; i++) 3.586 - if (!aio_can_lock(s, sector + i)) { 3.587 - DPRINTF("AIO_CAN_LOCK failed [%llu]\n", 3.588 - (long long) (sector + i)); 3.589 - return -EBUSY; 3.590 - } 3.591 + if (!aio_can_lock(s, sector + i)) 3.592 + return cb(dd, -EBUSY, sector, nb_sectors, id, private); 3.593 3.594 /*We store a local record of the request*/ 3.595 - qcow_idx = get_free_idx(s); 3.596 while (nb_sectors > 0) { 3.597 index_in_cluster = sector & (s->cluster_sectors - 1); 3.598 n = s->cluster_sectors - index_in_cluster; 3.599 if (n > nb_sectors) 3.600 n = nb_sectors; 3.601 3.602 - if (s->iocb_free_count == 0 || !aio_lock(s, sector)){ 3.603 - DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" 3.604 - "[%llu]\n", s->iocb_free_count, 3.605 - (long long) sector); 3.606 - return -ENOMEM; 3.607 + if (s->iocb_free_count == 0 || !aio_lock(s, sector)) 3.608 + return cb(dd, -EBUSY, sector, nb_sectors, id, private); 3.609 + 3.610 + cluster_offset = get_cluster_offset(s, sector << 9, 1, 0, 3.611 + index_in_cluster, 3.612 + index_in_cluster+n); 3.613 + if (!cluster_offset) { 3.614 + DPRINTF("Ooops, no write cluster offset!\n"); 3.615 + return cb(dd, -EIO, sector, nb_sectors, id, private); 3.616 } 3.617 3.618 - if (!IS_ZERO(buf,n * 512)) { 3.619 - 3.620 - cluster_offset = get_cluster_offset(bs, sector << 9, 3.621 - 1, 0, 3.622 - index_in_cluster, 3.623 - index_in_cluster+n 3.624 - ); 3.625 - if (!cluster_offset) { 3.626 - DPRINTF("Ooops, no write cluster offset!\n"); 3.627 - ret = -1; 3.628 - goto done; 3.629 - } 3.630 - 3.631 - if (s->crypt_method) { 3.632 - encrypt_sectors(s, sector, s->cluster_data, 3.633 - (unsigned char *)buf, n, 1, 3.634 - &s->aes_encrypt_key); 3.635 - s->nr_reqs[qcow_idx]++; 3.636 - asubmit += async_write(s, s->fd, n * 512, 3.637 - (cluster_offset + 3.638 - index_in_cluster*512), 3.639 - (char *)s->cluster_data, 3.640 - cb, id, sector, 3.641 - qcow_idx, private); 3.642 - } else { 3.643 - s->nr_reqs[qcow_idx]++; 3.644 - asubmit += async_write(s, s->fd, n * 512, 3.645 - (cluster_offset + 3.646 - index_in_cluster*512), 3.647 - buf, cb, id, sector, 3.648 - qcow_idx, private); 3.649 - } 3.650 + if (s->crypt_method) { 3.651 + encrypt_sectors(s, sector, s->cluster_data, 3.652 + (unsigned char *)buf, n, 1, 3.653 + &s->aes_encrypt_key); 3.654 + async_write(s, n * 512, 3.655 + (cluster_offset + index_in_cluster*512), 3.656 + (char *)s->cluster_data, cb, id, sector, 3.657 + private); 3.658 } else { 3.659 - /*Write data contains zeros, but we must check to see 3.660 - if cluster already allocated*/ 3.661 - cluster_offset = get_cluster_offset(bs, sector << 9, 3.662 - 0, 0, 3.663 - index_in_cluster, 3.664 - index_in_cluster+n 3.665 - ); 3.666 - if(cluster_offset) { 3.667 - if (s->crypt_method) { 3.668 - encrypt_sectors(s, sector, 3.669 - s->cluster_data, 3.670 - (unsigned char *)buf, 3.671 - n, 1, 3.672 - &s->aes_encrypt_key); 3.673 - s->nr_reqs[qcow_idx]++; 3.674 - asubmit += async_write(s, s->fd, 3.675 - n * 512, 3.676 - (cluster_offset+ 3.677 - index_in_cluster * 512), 3.678 - (char *)s->cluster_data, cb, id, sector, 3.679 - qcow_idx, private); 3.680 - } else { 3.681 - s->nr_reqs[qcow_idx]++; 3.682 - asubmit += async_write(s, s->fd, n*512, 3.683 - cluster_offset + index_in_cluster * 512, 3.684 - buf, cb, id, sector, 3.685 - qcow_idx, private); 3.686 - } 3.687 - } 3.688 - else aio_unlock(s, sector); 3.689 + async_write(s, n * 512, 3.690 + (cluster_offset + index_in_cluster*512), 3.691 + buf, cb, id, sector, private); 3.692 } 3.693 + 3.694 nb_sectors -= n; 3.695 sector += n; 3.696 buf += n * 512; 3.697 } 3.698 s->cluster_cache_offset = -1; /* disable compressed cache */ 3.699 3.700 -done: 3.701 - /*Callback if no async requests outstanding*/ 3.702 - if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private); 3.703 - 3.704 return 0; 3.705 } 3.706 3.707 -int tdqcow_submit(struct td_state *bs) 3.708 +int tdqcow_submit(struct disk_driver *dd) 3.709 { 3.710 int ret; 3.711 - struct tdqcow_state *prv = (struct tdqcow_state *)bs->private; 3.712 + struct tdqcow_state *prv = (struct tdqcow_state *)dd->private; 3.713 3.714 - ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); 3.715 + if (!prv->iocb_queued) 3.716 + return 0; 3.717 + 3.718 + ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue); 3.719 3.720 /* XXX: TODO: Handle error conditions here. */ 3.721 3.722 /* Success case: */ 3.723 prv->iocb_queued = 0; 3.724 3.725 - return ret; 3.726 + return 0; 3.727 } 3.728 3.729 - 3.730 -int *tdqcow_get_fd(struct td_state *bs) 3.731 +int tdqcow_close(struct disk_driver *dd) 3.732 { 3.733 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.734 - int *fds, i; 3.735 - 3.736 - fds = malloc(sizeof(int) * MAX_IOFD); 3.737 - /*initialise the FD array*/ 3.738 - for(i=0;i<MAX_IOFD;i++) fds[i] = 0; 3.739 - 3.740 - fds[0] = s->poll_fd; 3.741 - return fds; 3.742 -} 3.743 - 3.744 -int tdqcow_close(struct td_state *bs) 3.745 -{ 3.746 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.747 + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; 3.748 uint32_t cksum, out; 3.749 int fd, offset; 3.750 3.751 @@ -1203,6 +1133,7 @@ int tdqcow_close(struct td_state *bs) 3.752 close(fd); 3.753 } 3.754 3.755 + io_destroy(s->aio_ctx); 3.756 free(s->name); 3.757 free(s->l1_table); 3.758 free(s->l2_cache); 3.759 @@ -1212,11 +1143,11 @@ int tdqcow_close(struct td_state *bs) 3.760 return 0; 3.761 } 3.762 3.763 -int tdqcow_do_callbacks(struct td_state *s, int sid) 3.764 +int tdqcow_do_callbacks(struct disk_driver *dd, int sid) 3.765 { 3.766 int ret, i, rsp = 0,*ptr; 3.767 struct io_event *ep; 3.768 - struct tdqcow_state *prv = (struct tdqcow_state *)s->private; 3.769 + struct tdqcow_state *prv = (struct tdqcow_state *)dd->private; 3.770 3.771 if (sid > MAX_IOFD) return 1; 3.772 3.773 @@ -1224,25 +1155,24 @@ int tdqcow_do_callbacks(struct td_state 3.774 ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events, 3.775 NULL); 3.776 3.777 - for (ep=prv->aio_events, i = ret; i-->0; ep++) { 3.778 + for (ep = prv->aio_events, i = ret; i-- > 0; ep++) { 3.779 struct iocb *io = ep->obj; 3.780 struct pending_aio *pio; 3.781 3.782 pio = &prv->pending_aio[(long)io->data]; 3.783 3.784 aio_unlock(prv, pio->sector); 3.785 - if (pio->id >= 0) { 3.786 - if (prv->crypt_method) 3.787 - encrypt_sectors(prv, pio->sector, 3.788 - (unsigned char *)pio->buf, 3.789 - (unsigned char *)pio->buf, 3.790 - pio->nb_sectors, 0, 3.791 - &prv->aes_decrypt_key); 3.792 - prv->nr_reqs[pio->qcow_idx]--; 3.793 - if (prv->nr_reqs[pio->qcow_idx] == 0) 3.794 - rsp += pio->cb(s, ep->res == io->u.c.nbytes ? 0 : 1, pio->id, 3.795 - pio->private); 3.796 - } else if (pio->id == -2) free(pio->buf); 3.797 + 3.798 + if (prv->crypt_method) 3.799 + encrypt_sectors(prv, pio->sector, 3.800 + (unsigned char *)pio->buf, 3.801 + (unsigned char *)pio->buf, 3.802 + pio->nb_sectors, 0, 3.803 + &prv->aes_decrypt_key); 3.804 + 3.805 + rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1, 3.806 + pio->sector, pio->nb_sectors, 3.807 + pio->id, pio->private); 3.808 3.809 prv->iocb_free[prv->iocb_free_count++] = io; 3.810 } 3.811 @@ -1250,7 +1180,7 @@ int tdqcow_do_callbacks(struct td_state 3.812 } 3.813 3.814 int qcow_create(const char *filename, uint64_t total_size, 3.815 - const char *backing_file, int sparse) 3.816 + const char *backing_file, int sparse) 3.817 { 3.818 int fd, header_size, backing_filename_len, l1_size, i; 3.819 int shift, length, adjust, flags = 0, ret = 0; 3.820 @@ -1391,9 +1321,8 @@ int qcow_create(const char *filename, ui 3.821 return 0; 3.822 } 3.823 3.824 -int qcow_make_empty(struct td_state *bs) 3.825 +int qcow_make_empty(struct tdqcow_state *s) 3.826 { 3.827 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.828 uint32_t l1_length = s->l1_size * sizeof(uint64_t); 3.829 3.830 memset(s->l1_table, 0, l1_length); 3.831 @@ -1412,19 +1341,16 @@ int qcow_make_empty(struct td_state *bs) 3.832 return 0; 3.833 } 3.834 3.835 -int qcow_get_cluster_size(struct td_state *bs) 3.836 +int qcow_get_cluster_size(struct tdqcow_state *s) 3.837 { 3.838 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.839 - 3.840 return s->cluster_size; 3.841 } 3.842 3.843 /* XXX: put compressed sectors first, then all the cluster aligned 3.844 tables to avoid losing bytes in alignment */ 3.845 -int qcow_compress_cluster(struct td_state *bs, int64_t sector_num, 3.846 +int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num, 3.847 const uint8_t *buf) 3.848 { 3.849 - struct tdqcow_state *s = (struct tdqcow_state *)bs->private; 3.850 z_stream strm; 3.851 int ret, out_len; 3.852 uint8_t *out_buf; 3.853 @@ -1463,7 +1389,7 @@ int qcow_compress_cluster(struct td_stat 3.854 /* could not compress: write normal cluster */ 3.855 //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors); 3.856 } else { 3.857 - cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 3.858 + cluster_offset = get_cluster_offset(s, sector_num << 9, 2, 3.859 out_len, 0, 0); 3.860 cluster_offset &= s->cluster_offset_mask; 3.861 lseek(s->fd, cluster_offset, SEEK_SET); 3.862 @@ -1477,15 +1403,54 @@ int qcow_compress_cluster(struct td_stat 3.863 return 0; 3.864 } 3.865 3.866 +int tdqcow_has_parent(struct disk_driver *dd) 3.867 +{ 3.868 + struct tdqcow_state *s = (struct tdqcow_state *)dd->private; 3.869 + return (s->backing_file_offset ? 1 : 0); 3.870 +} 3.871 + 3.872 +int tdqcow_get_parent(struct disk_driver *cdd, struct disk_driver *pdd) 3.873 +{ 3.874 + off_t off; 3.875 + char *buf, *filename; 3.876 + int len, secs, ret = -1; 3.877 + struct tdqcow_state *child = (struct tdqcow_state *)cdd->private; 3.878 + 3.879 + if (!child->backing_file_offset) 3.880 + return -1; 3.881 + 3.882 + /* read the backing file name */ 3.883 + len = child->backing_file_size; 3.884 + off = child->backing_file_offset - (child->backing_file_offset % 512); 3.885 + secs = (len + (child->backing_file_offset - off) + 511) >> 9; 3.886 + 3.887 + if (posix_memalign((void **)&buf, 512, secs << 9)) 3.888 + return -1; 3.889 + 3.890 + if (lseek(child->fd, off, SEEK_SET) == (off_t)-1) 3.891 + goto out; 3.892 + 3.893 + if (read(child->fd, buf, secs << 9) != secs << 9) 3.894 + goto out; 3.895 + filename = buf + (child->backing_file_offset - off); 3.896 + filename[len] = '\0'; 3.897 + 3.898 + /*Open backing file*/ 3.899 + ret = tdqcow_open(pdd, filename); 3.900 + out: 3.901 + free(buf); 3.902 + return ret; 3.903 +} 3.904 + 3.905 struct tap_disk tapdisk_qcow = { 3.906 - "tapdisk_qcow", 3.907 - sizeof(struct tdqcow_state), 3.908 - tdqcow_open, 3.909 - tdqcow_queue_read, 3.910 - tdqcow_queue_write, 3.911 - tdqcow_submit, 3.912 - tdqcow_get_fd, 3.913 - tdqcow_close, 3.914 - tdqcow_do_callbacks, 3.915 + .disk_type = "tapdisk_qcow", 3.916 + .private_data_size = sizeof(struct tdqcow_state), 3.917 + .td_open = tdqcow_open, 3.918 + .td_queue_read = tdqcow_queue_read, 3.919 + .td_queue_write = tdqcow_queue_write, 3.920 + .td_submit = tdqcow_submit, 3.921 + .td_has_parent = tdqcow_has_parent, 3.922 + .td_get_parent = tdqcow_get_parent, 3.923 + .td_close = tdqcow_close, 3.924 + .td_do_callbacks = tdqcow_do_callbacks, 3.925 }; 3.926 -
4.1 --- a/tools/blktap/drivers/block-ram.c Fri Feb 16 16:34:28 2007 +0000 4.2 +++ b/tools/blktap/drivers/block-ram.c Fri Feb 16 20:31:27 2007 -0800 4.3 @@ -123,14 +123,25 @@ static int get_image_info(struct td_stat 4.4 return 0; 4.5 } 4.6 4.7 +static inline void init_fds(struct disk_driver *dd) 4.8 +{ 4.9 + int i; 4.10 + struct tdram_state *prv = (struct tdram_state *)dd->private; 4.11 + 4.12 + for(i =0 ; i < MAX_IOFD; i++) 4.13 + dd->io_fd[i] = 0; 4.14 + 4.15 + dd->io_fd[0] = prv->poll_pipe[0]; 4.16 +} 4.17 + 4.18 /* Open the disk file and initialize ram state. */ 4.19 -int tdram_open (struct td_state *s, const char *name) 4.20 +int tdram_open (struct disk_driver *dd, const char *name) 4.21 { 4.22 + char *p; 4.23 + uint64_t size; 4.24 int i, fd, ret = 0, count = 0; 4.25 - struct tdram_state *prv = (struct tdram_state *)s->private; 4.26 - uint64_t size; 4.27 - char *p; 4.28 - s->private = prv; 4.29 + struct td_state *s = dd->td_state; 4.30 + struct tdram_state *prv = (struct tdram_state *)dd->private; 4.31 4.32 connections++; 4.33 4.34 @@ -209,88 +220,80 @@ int tdram_open (struct td_state *s, cons 4.35 ret = 0; 4.36 } 4.37 4.38 + init_fds(dd); 4.39 done: 4.40 return ret; 4.41 } 4.42 4.43 - int tdram_queue_read(struct td_state *s, uint64_t sector, 4.44 - int nb_sectors, char *buf, td_callback_t cb, 4.45 - int id, void *private) 4.46 + int tdram_queue_read(struct disk_driver *dd, uint64_t sector, 4.47 + int nb_sectors, char *buf, td_callback_t cb, 4.48 + int id, void *private) 4.49 { 4.50 - struct tdram_state *prv = (struct tdram_state *)s->private; 4.51 + struct td_state *s = dd->td_state; 4.52 + struct tdram_state *prv = (struct tdram_state *)dd->private; 4.53 int size = nb_sectors * s->sector_size; 4.54 uint64_t offset = sector * (uint64_t)s->sector_size; 4.55 - int ret; 4.56 4.57 memcpy(buf, img + offset, size); 4.58 - ret = size; 4.59 4.60 - cb(s, (ret < 0) ? ret: 0, id, private); 4.61 - 4.62 - return ret; 4.63 + return cb(dd, 0, sector, nb_sectors, id, private); 4.64 } 4.65 4.66 - int tdram_queue_write(struct td_state *s, uint64_t sector, 4.67 - int nb_sectors, char *buf, td_callback_t cb, 4.68 - int id, void *private) 4.69 +int tdram_queue_write(struct disk_driver *dd, uint64_t sector, 4.70 + int nb_sectors, char *buf, td_callback_t cb, 4.71 + int id, void *private) 4.72 { 4.73 - struct tdram_state *prv = (struct tdram_state *)s->private; 4.74 + struct td_state *s = dd->td_state; 4.75 + struct tdram_state *prv = (struct tdram_state *)dd->private; 4.76 int size = nb_sectors * s->sector_size; 4.77 uint64_t offset = sector * (uint64_t)s->sector_size; 4.78 - int ret; 4.79 4.80 - /*We assume that write access is controlled at a higher level for multiple disks*/ 4.81 + /* We assume that write access is controlled 4.82 + * at a higher level for multiple disks */ 4.83 memcpy(img + offset, buf, size); 4.84 - ret = size; 4.85 4.86 - cb(s, (ret < 0) ? ret : 0, id, private); 4.87 - 4.88 - return ret; 4.89 + return cb(dd, 0, sector, nb_sectors, id, private); 4.90 } 4.91 4.92 -int tdram_submit(struct td_state *s) 4.93 +int tdram_submit(struct disk_driver *dd) 4.94 { 4.95 return 0; 4.96 } 4.97 4.98 - 4.99 -int *tdram_get_fd(struct td_state *s) 4.100 +int tdram_close(struct disk_driver *dd) 4.101 { 4.102 - struct tdram_state *prv = (struct tdram_state *)s->private; 4.103 - int *fds, i; 4.104 - 4.105 - fds = malloc(sizeof(int) * MAX_IOFD); 4.106 - /*initialise the FD array*/ 4.107 - for(i=0;i<MAX_IOFD;i++) fds[i] = 0; 4.108 - 4.109 - fds[0] = prv->poll_pipe[0]; 4.110 - return fds; 4.111 -} 4.112 - 4.113 -int tdram_close(struct td_state *s) 4.114 -{ 4.115 - struct tdram_state *prv = (struct tdram_state *)s->private; 4.116 + struct tdram_state *prv = (struct tdram_state *)dd->private; 4.117 4.118 connections--; 4.119 4.120 return 0; 4.121 } 4.122 4.123 -int tdram_do_callbacks(struct td_state *s, int sid) 4.124 +int tdram_do_callbacks(struct disk_driver *dd, int sid) 4.125 { 4.126 /* always ask for a kick */ 4.127 return 1; 4.128 } 4.129 4.130 +int tdram_has_parent(struct disk_driver *dd) 4.131 +{ 4.132 + return 0; 4.133 +} 4.134 + 4.135 +int tdram_get_parent(struct disk_driver *dd, struct disk_driver *parent) 4.136 +{ 4.137 + return -EINVAL; 4.138 +} 4.139 + 4.140 struct tap_disk tapdisk_ram = { 4.141 - "tapdisk_ram", 4.142 - sizeof(struct tdram_state), 4.143 - tdram_open, 4.144 - tdram_queue_read, 4.145 - tdram_queue_write, 4.146 - tdram_submit, 4.147 - tdram_get_fd, 4.148 - tdram_close, 4.149 - tdram_do_callbacks, 4.150 + .disk_type = "tapdisk_ram", 4.151 + .private_data_size = sizeof(struct tdram_state), 4.152 + .td_open = tdram_open, 4.153 + .td_queue_read = tdram_queue_read, 4.154 + .td_queue_write = tdram_queue_write, 4.155 + .td_submit = tdram_submit, 4.156 + .td_has_parent = tdram_has_parent, 4.157 + .td_get_parent = tdram_get_parent, 4.158 + .td_close = tdram_close, 4.159 + .td_do_callbacks = tdram_do_callbacks, 4.160 }; 4.161 -
5.1 --- a/tools/blktap/drivers/block-sync.c Fri Feb 16 16:34:28 2007 +0000 5.2 +++ b/tools/blktap/drivers/block-sync.c Fri Feb 16 20:31:27 2007 -0800 5.3 @@ -106,12 +106,23 @@ static int get_image_info(struct td_stat 5.4 return 0; 5.5 } 5.6 5.7 +static inline void init_fds(struct disk_driver *dd) 5.8 +{ 5.9 + int i; 5.10 + struct tdsync_state *prv = (struct tdsync_state *)dd->private; 5.11 + 5.12 + for(i = 0; i < MAX_IOFD; i++) 5.13 + dd->io_fd[i] = 0; 5.14 + 5.15 + dd->io_fd[0] = prv->poll_pipe[0]; 5.16 +} 5.17 + 5.18 /* Open the disk file and initialize aio state. */ 5.19 -int tdsync_open (struct td_state *s, const char *name) 5.20 +int tdsync_open (struct disk_driver *dd, const char *name) 5.21 { 5.22 int i, fd, ret = 0; 5.23 - struct tdsync_state *prv = (struct tdsync_state *)s->private; 5.24 - s->private = prv; 5.25 + struct td_state *s = dd->td_state; 5.26 + struct tdsync_state *prv = (struct tdsync_state *)dd->private; 5.27 5.28 /* set up a pipe so that we can hand back a poll fd that won't fire.*/ 5.29 ret = pipe(prv->poll_pipe); 5.30 @@ -138,16 +149,18 @@ int tdsync_open (struct td_state *s, con 5.31 5.32 prv->fd = fd; 5.33 5.34 + init_fds(dd); 5.35 ret = get_image_info(s, fd); 5.36 done: 5.37 return ret; 5.38 } 5.39 5.40 - int tdsync_queue_read(struct td_state *s, uint64_t sector, 5.41 + int tdsync_queue_read(struct disk_driver *dd, uint64_t sector, 5.42 int nb_sectors, char *buf, td_callback_t cb, 5.43 int id, void *private) 5.44 { 5.45 - struct tdsync_state *prv = (struct tdsync_state *)s->private; 5.46 + struct td_state *s = dd->td_state; 5.47 + struct tdsync_state *prv = (struct tdsync_state *)dd->private; 5.48 int size = nb_sectors * s->sector_size; 5.49 uint64_t offset = sector * (uint64_t)s->sector_size; 5.50 int ret; 5.51 @@ -162,16 +175,15 @@ done: 5.52 } 5.53 } else ret = 0 - errno; 5.54 5.55 - cb(s, (ret < 0) ? ret: 0, id, private); 5.56 - 5.57 - return 1; 5.58 + return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private); 5.59 } 5.60 5.61 - int tdsync_queue_write(struct td_state *s, uint64_t sector, 5.62 + int tdsync_queue_write(struct disk_driver *dd, uint64_t sector, 5.63 int nb_sectors, char *buf, td_callback_t cb, 5.64 int id, void *private) 5.65 { 5.66 - struct tdsync_state *prv = (struct tdsync_state *)s->private; 5.67 + struct td_state *s = dd->td_state; 5.68 + struct tdsync_state *prv = (struct tdsync_state *)dd->private; 5.69 int size = nb_sectors * s->sector_size; 5.70 uint64_t offset = sector * (uint64_t)s->sector_size; 5.71 int ret = 0; 5.72 @@ -186,34 +198,17 @@ done: 5.73 } 5.74 } else ret = 0 - errno; 5.75 5.76 - cb(s, (ret < 0) ? ret : 0, id, private); 5.77 - 5.78 - return 1; 5.79 + return cb(dd, (ret < 0) ? ret : 0, sector, nb_sectors, id, private); 5.80 } 5.81 5.82 -int tdsync_submit(struct td_state *s) 5.83 +int tdsync_submit(struct disk_driver *dd) 5.84 { 5.85 return 0; 5.86 } 5.87 5.88 - 5.89 -int *tdsync_get_fd(struct td_state *s) 5.90 +int tdsync_close(struct disk_driver *dd) 5.91 { 5.92 - struct tdsync_state *prv = (struct tdsync_state *)s->private; 5.93 - 5.94 - int *fds, i; 5.95 - 5.96 - fds = malloc(sizeof(int) * MAX_IOFD); 5.97 - /*initialise the FD array*/ 5.98 - for(i=0;i<MAX_IOFD;i++) fds[i] = 0; 5.99 - 5.100 - fds[0] = prv->poll_pipe[0]; 5.101 - return fds; 5.102 -} 5.103 - 5.104 -int tdsync_close(struct td_state *s) 5.105 -{ 5.106 - struct tdsync_state *prv = (struct tdsync_state *)s->private; 5.107 + struct tdsync_state *prv = (struct tdsync_state *)dd->private; 5.108 5.109 close(prv->fd); 5.110 close(prv->poll_pipe[0]); 5.111 @@ -222,21 +217,31 @@ int tdsync_close(struct td_state *s) 5.112 return 0; 5.113 } 5.114 5.115 -int tdsync_do_callbacks(struct td_state *s, int sid) 5.116 +int tdsync_do_callbacks(struct disk_driver *dd, int sid) 5.117 { 5.118 /* always ask for a kick */ 5.119 return 1; 5.120 } 5.121 5.122 +int tdsync_has_parent(struct disk_driver *dd) 5.123 +{ 5.124 + return 0; 5.125 +} 5.126 + 5.127 +int tdsync_get_parent(struct disk_driver *dd, struct disk_driver *parent) 5.128 +{ 5.129 + return -EINVAL; 5.130 +} 5.131 + 5.132 struct tap_disk tapdisk_sync = { 5.133 - "tapdisk_sync", 5.134 - sizeof(struct tdsync_state), 5.135 - tdsync_open, 5.136 - tdsync_queue_read, 5.137 - tdsync_queue_write, 5.138 - tdsync_submit, 5.139 - tdsync_get_fd, 5.140 - tdsync_close, 5.141 - tdsync_do_callbacks, 5.142 + .disk_type = "tapdisk_sync", 5.143 + .private_data_size = sizeof(struct tdsync_state), 5.144 + .td_open = tdsync_open, 5.145 + .td_queue_read = tdsync_queue_read, 5.146 + .td_queue_write = tdsync_queue_write, 5.147 + .td_submit = tdsync_submit, 5.148 + .td_has_parent = tdsync_has_parent, 5.149 + .td_get_parent = tdsync_get_parent, 5.150 + .td_close = tdsync_close, 5.151 + .td_do_callbacks = tdsync_do_callbacks, 5.152 }; 5.153 -
6.1 --- a/tools/blktap/drivers/block-vmdk.c Fri Feb 16 16:34:28 2007 +0000 6.2 +++ b/tools/blktap/drivers/block-vmdk.c Fri Feb 16 20:31:27 2007 -0800 6.3 @@ -107,14 +107,25 @@ struct tdvmdk_state { 6.4 unsigned int cluster_sectors; 6.5 }; 6.6 6.7 +static inline void init_fds(struct disk_driver *dd) 6.8 +{ 6.9 + int i; 6.10 + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; 6.11 + 6.12 + for (i = 0; i < MAX_IOFD; i++) 6.13 + dd->io_fd[i] = 0; 6.14 + 6.15 + dd->io_fd[0] = prv->poll_pipe[0]; 6.16 +} 6.17 6.18 /* Open the disk file and initialize aio state. */ 6.19 -static int tdvmdk_open (struct td_state *s, const char *name) 6.20 +static int tdvmdk_open (struct disk_driver *dd, const char *name) 6.21 { 6.22 int ret, fd; 6.23 int l1_size, i; 6.24 uint32_t magic; 6.25 - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; 6.26 + struct td_state *s = dd->td_state; 6.27 + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; 6.28 6.29 /* set up a pipe so that we can hand back a poll fd that won't fire.*/ 6.30 ret = pipe(prv->poll_pipe); 6.31 @@ -206,6 +217,7 @@ static int tdvmdk_open (struct td_state 6.32 if (!prv->l2_cache) 6.33 goto fail; 6.34 prv->fd = fd; 6.35 + init_fds(dd); 6.36 DPRINTF("VMDK File opened successfully\n"); 6.37 return 0; 6.38 6.39 @@ -218,10 +230,9 @@ fail: 6.40 return -1; 6.41 } 6.42 6.43 -static uint64_t get_cluster_offset(struct td_state *s, 6.44 +static uint64_t get_cluster_offset(struct tdvmdk_state *prv, 6.45 uint64_t offset, int allocate) 6.46 { 6.47 - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; 6.48 unsigned int l1_index, l2_offset, l2_index; 6.49 int min_index, i, j; 6.50 uint32_t min_count, *l2_table, tmp; 6.51 @@ -291,16 +302,17 @@ static uint64_t get_cluster_offset(struc 6.52 return cluster_offset; 6.53 } 6.54 6.55 -static int tdvmdk_queue_read(struct td_state *s, uint64_t sector, 6.56 +static int tdvmdk_queue_read(struct disk_driver *dd, uint64_t sector, 6.57 int nb_sectors, char *buf, td_callback_t cb, 6.58 int id, void *private) 6.59 { 6.60 - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; 6.61 + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; 6.62 int index_in_cluster, n; 6.63 uint64_t cluster_offset; 6.64 int ret = 0; 6.65 + 6.66 while (nb_sectors > 0) { 6.67 - cluster_offset = get_cluster_offset(s, sector << 9, 0); 6.68 + cluster_offset = get_cluster_offset(prv, sector << 9, 0); 6.69 index_in_cluster = sector % prv->cluster_sectors; 6.70 n = prv->cluster_sectors - index_in_cluster; 6.71 if (n > nb_sectors) 6.72 @@ -321,27 +333,24 @@ static int tdvmdk_queue_read(struct td_s 6.73 buf += n * 512; 6.74 } 6.75 done: 6.76 - cb(s, ret == -1 ? -1 : 0, id, private); 6.77 - 6.78 - return 1; 6.79 + return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private); 6.80 } 6.81 6.82 -static int tdvmdk_queue_write(struct td_state *s, uint64_t sector, 6.83 +static int tdvmdk_queue_write(struct disk_driver *dd, uint64_t sector, 6.84 int nb_sectors, char *buf, td_callback_t cb, 6.85 int id, void *private) 6.86 { 6.87 - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; 6.88 + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; 6.89 int index_in_cluster, n; 6.90 uint64_t cluster_offset; 6.91 int ret = 0; 6.92 - 6.93 6.94 while (nb_sectors > 0) { 6.95 index_in_cluster = sector & (prv->cluster_sectors - 1); 6.96 n = prv->cluster_sectors - index_in_cluster; 6.97 if (n > nb_sectors) 6.98 n = nb_sectors; 6.99 - cluster_offset = get_cluster_offset(s, sector << 9, 1); 6.100 + cluster_offset = get_cluster_offset(prv, sector << 9, 1); 6.101 if (!cluster_offset) { 6.102 ret = -1; 6.103 goto done; 6.104 @@ -358,33 +367,17 @@ static int tdvmdk_queue_write(struct td 6.105 buf += n * 512; 6.106 } 6.107 done: 6.108 - cb(s, ret == -1 ? -1 : 0, id, private); 6.109 - 6.110 - return 1; 6.111 + return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private); 6.112 } 6.113 6.114 -static int tdvmdk_submit(struct td_state *s) 6.115 +static int tdvmdk_submit(struct disk_driver *dd) 6.116 { 6.117 return 0; 6.118 } 6.119 6.120 - 6.121 -static int *tdvmdk_get_fd(struct td_state *s) 6.122 +static int tdvmdk_close(struct disk_driver *dd) 6.123 { 6.124 - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; 6.125 - int *fds, i; 6.126 - 6.127 - fds = malloc(sizeof(int) * MAX_IOFD); 6.128 - /*initialise the FD array*/ 6.129 - for (i=0;i<MAX_IOFD;i++) fds[i] = 0; 6.130 - 6.131 - fds[0] = prv->poll_pipe[0]; 6.132 - return fds; 6.133 -} 6.134 - 6.135 -static int tdvmdk_close(struct td_state *s) 6.136 -{ 6.137 - struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private; 6.138 + struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private; 6.139 6.140 safer_free(prv->l1_table); 6.141 safer_free(prv->l1_backup_table); 6.142 @@ -395,21 +388,31 @@ static int tdvmdk_close(struct td_state 6.143 return 0; 6.144 } 6.145 6.146 -static int tdvmdk_do_callbacks(struct td_state *s, int sid) 6.147 +static int tdvmdk_do_callbacks(struct disk_driver *dd, int sid) 6.148 { 6.149 /* always ask for a kick */ 6.150 return 1; 6.151 } 6.152 6.153 +static int tdvmdk_has_parent(struct disk_driver *dd) 6.154 +{ 6.155 + return 0; 6.156 +} 6.157 + 6.158 +static int tdvmdk_get_parent(struct disk_driver *dd, struct disk_driver *parent) 6.159 +{ 6.160 + return -EINVAL; 6.161 +} 6.162 + 6.163 struct tap_disk tapdisk_vmdk = { 6.164 - "tapdisk_vmdk", 6.165 - sizeof(struct tdvmdk_state), 6.166 - tdvmdk_open, 6.167 - tdvmdk_queue_read, 6.168 - tdvmdk_queue_write, 6.169 - tdvmdk_submit, 6.170 - tdvmdk_get_fd, 6.171 - tdvmdk_close, 6.172 - tdvmdk_do_callbacks, 6.173 + .disk_type = "tapdisk_vmdk", 6.174 + .private_data_size = sizeof(struct tdvmdk_state), 6.175 + .td_open = tdvmdk_open, 6.176 + .td_queue_read = tdvmdk_queue_read, 6.177 + .td_queue_write = tdvmdk_queue_write, 6.178 + .td_submit = tdvmdk_submit, 6.179 + .td_has_parent = tdvmdk_has_parent, 6.180 + .td_get_parent = tdvmdk_get_parent, 6.181 + .td_close = tdvmdk_close, 6.182 + .td_do_callbacks = tdvmdk_do_callbacks, 6.183 }; 6.184 -
7.1 --- a/tools/blktap/drivers/img2qcow.c Fri Feb 16 16:34:28 2007 +0000 7.2 +++ b/tools/blktap/drivers/img2qcow.c Fri Feb 16 20:31:27 2007 -0800 7.3 @@ -147,7 +147,8 @@ static int get_image_info(struct td_stat 7.4 return 0; 7.5 } 7.6 7.7 -static int send_responses(struct td_state *s, int res, int idx, void *private) 7.8 +static int send_responses(struct disk_driver *dd, int res, uint64_t sec, 7.9 + int nr_secs, int idx, void *private) 7.10 { 7.11 if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res); 7.12 7.13 @@ -159,7 +160,7 @@ static int send_responses(struct td_stat 7.14 7.15 int main(int argc, char *argv[]) 7.16 { 7.17 - struct tap_disk *drv; 7.18 + struct disk_driver dd; 7.19 struct td_state *s; 7.20 int ret = -1, fd, len; 7.21 fd_set readfds; 7.22 @@ -195,16 +196,17 @@ int main(int argc, char *argv[]) 7.23 } else DFPRINTF("Qcow file created: size %llu sectors\n", 7.24 (long long unsigned)s->size); 7.25 7.26 - drv = &tapdisk_qcow; 7.27 - s->private = malloc(drv->private_data_size); 7.28 + dd.td_state = s; 7.29 + dd.drv = &tapdisk_qcow; 7.30 + dd.private = malloc(dd.drv->private_data_size); 7.31 7.32 /*Open qcow file*/ 7.33 - if (drv->td_open(s, argv[1])!=0) { 7.34 + if (dd.drv->td_open(&dd, argv[1])!=0) { 7.35 DFPRINTF("Unable to open Qcow file [%s]\n",argv[1]); 7.36 exit(-1); 7.37 } 7.38 7.39 - io_fd = drv->td_get_fd(s); 7.40 + io_fd = dd.io_fd; 7.41 7.42 /*Initialise the output string*/ 7.43 memset(output,0x20,25); 7.44 @@ -245,9 +247,9 @@ int main(int argc, char *argv[]) 7.45 len = (len >> 9) << 9; 7.46 } 7.47 7.48 - ret = drv->td_queue_write(s, i >> 9, 7.49 - len >> 9, buf, 7.50 - send_responses, 0, buf); 7.51 + ret = dd.drv->td_queue_write(&dd, i >> 9, 7.52 + len >> 9, buf, 7.53 + send_responses, 0, buf); 7.54 7.55 if (!ret) submit_events++; 7.56 7.57 @@ -261,7 +263,7 @@ int main(int argc, char *argv[]) 7.58 debug_output(i,s->size << 9); 7.59 7.60 if ((submit_events % 10 == 0) || complete) 7.61 - drv->td_submit(s); 7.62 + dd.drv->td_submit(&dd); 7.63 timeout.tv_usec = 0; 7.64 7.65 } else { 7.66 @@ -275,14 +277,14 @@ int main(int argc, char *argv[]) 7.67 ret = select(maxfds + 1, &readfds, (fd_set *) 0, 7.68 (fd_set *) 0, &timeout); 7.69 7.70 - if (ret > 0) drv->td_do_callbacks(s, 0); 7.71 + if (ret > 0) dd.drv->td_do_callbacks(&dd, 0); 7.72 if (complete && (returned_events == submit_events)) 7.73 running = 0; 7.74 } 7.75 memcpy(output+prev+1,"=",1); 7.76 DFPRINTF("\r%s 100%%\nTRANSFER COMPLETE\n\n", output); 7.77 - drv->td_close(s); 7.78 - free(s->private); 7.79 + dd.drv->td_close(&dd); 7.80 + free(dd.private); 7.81 free(s); 7.82 7.83 return 0;
8.1 --- a/tools/blktap/drivers/qcow2raw.c Fri Feb 16 16:34:28 2007 +0000 8.2 +++ b/tools/blktap/drivers/qcow2raw.c Fri Feb 16 20:31:27 2007 -0800 8.3 @@ -55,8 +55,7 @@ static int read_complete = 0, write_comp 8.4 static int returned_read_events = 0, returned_write_events = 0; 8.5 static int submit_events = 0; 8.6 static uint32_t read_idx = 0, write_idx = 0; 8.7 -struct tap_disk *drv1, *drv2; 8.8 -struct td_state *sqcow, *saio; 8.9 +struct disk_driver ddqcow, ddaio; 8.10 static uint64_t prev = 0, written = 0; 8.11 static char output[25]; 8.12 8.13 @@ -100,7 +99,8 @@ static inline void LOCAL_FD_SET(fd_set * 8.14 return; 8.15 } 8.16 8.17 -static int send_write_responses(struct td_state *s, int res, int idx, void *private) 8.18 +static int send_write_responses(struct disk_driver *dd, int res, uint64_t sec, 8.19 + int nr_secs, int idx, void *private) 8.20 { 8.21 if (res < 0) { 8.22 DFPRINTF("AIO FAILURE: res [%d]!\n",res); 8.23 @@ -112,12 +112,13 @@ static int send_write_responses(struct t 8.24 if (complete && (returned_write_events == submit_events)) 8.25 write_complete = 1; 8.26 8.27 - debug_output(written, s->size << 9); 8.28 + debug_output(written, dd->td_state->size << 9); 8.29 free(private); 8.30 return 0; 8.31 } 8.32 8.33 -static int send_read_responses(struct td_state *s, int res, int idx, void *private) 8.34 +static int send_read_responses(struct disk_driver *dd, int res, uint64_t sec, 8.35 + int nr_secs, int idx, void *private) 8.36 { 8.37 int ret; 8.38 8.39 @@ -128,8 +129,8 @@ static int send_read_responses(struct td 8.40 if (complete && (returned_read_events == submit_events)) 8.41 read_complete = 1; 8.42 8.43 - ret = drv2->td_queue_write(saio, idx, BLOCK_PROCESSSZ>>9, private, 8.44 - send_write_responses, idx, private); 8.45 + ret = ddaio.drv->td_queue_write(&ddaio, idx, BLOCK_PROCESSSZ>>9, private, 8.46 + send_write_responses, idx, private); 8.47 if (ret != 0) { 8.48 DFPRINTF("ERROR in submitting queue write!\n"); 8.49 return 0; 8.50 @@ -137,7 +138,7 @@ static int send_read_responses(struct td 8.51 8.52 if ( (complete && returned_read_events == submit_events) || 8.53 (returned_read_events % 10 == 0) ) { 8.54 - drv2->td_submit(saio); 8.55 + ddaio.drv->td_submit(&ddaio); 8.56 } 8.57 8.58 return 0; 8.59 @@ -161,20 +162,20 @@ int main(int argc, char *argv[]) 8.60 exit(-1); 8.61 } 8.62 8.63 - sqcow = malloc(sizeof(struct td_state)); 8.64 - saio = malloc(sizeof(struct td_state)); 8.65 + ddqcow.td_state = malloc(sizeof(struct td_state)); 8.66 + ddaio.td_state = malloc(sizeof(struct td_state)); 8.67 8.68 /*Open qcow source file*/ 8.69 - drv1 = &tapdisk_qcow; 8.70 - sqcow->private = malloc(drv1->private_data_size); 8.71 + ddqcow.drv = &tapdisk_qcow; 8.72 + ddqcow.private = malloc(ddqcow.drv->private_data_size); 8.73 8.74 - if (drv1->td_open(sqcow, argv[2])!=0) { 8.75 + if (ddqcow.drv->td_open(&ddqcow, argv[2])!=0) { 8.76 DFPRINTF("Unable to open Qcow file [%s]\n",argv[2]); 8.77 exit(-1); 8.78 } else DFPRINTF("QCOW file opened, size %llu\n", 8.79 - (long long unsigned)sqcow->size); 8.80 + (long long unsigned)ddqcow.td_state->size); 8.81 8.82 - qcowio_fd = drv1->td_get_fd(sqcow); 8.83 + qcowio_fd = ddqcow.io_fd; 8.84 8.85 /*Setup aio destination file*/ 8.86 ret = stat(argv[1],&finfo); 8.87 @@ -191,12 +192,12 @@ int main(int argc, char *argv[]) 8.88 argv[1], 0 - errno); 8.89 exit(-1); 8.90 } 8.91 - if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) { 8.92 + if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) { 8.93 DFPRINTF("Unable to create file " 8.94 "[%s] of size %llu (errno %d). " 8.95 "Exiting...\n", 8.96 argv[1], 8.97 - (long long unsigned)sqcow->size<<9, 8.98 + (long long unsigned)ddqcow.td_state->size<<9, 8.99 0 - errno); 8.100 close(fd); 8.101 exit(-1); 8.102 @@ -238,43 +239,43 @@ int main(int argc, char *argv[]) 8.103 close(fd); 8.104 exit(-1); 8.105 } 8.106 - if (size < sqcow->size<<9) { 8.107 + if (size < ddqcow.td_state->size<<9) { 8.108 DFPRINTF("ERROR: Not enough space on device " 8.109 "%s (%lu bytes available, %llu bytes required\n", 8.110 argv[1], size, 8.111 - (long long unsigned)sqcow->size<<9); 8.112 + (long long unsigned)ddqcow.td_state->size<<9); 8.113 close(fd); 8.114 exit(-1); 8.115 } 8.116 } else { 8.117 - if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) { 8.118 + if (ftruncate(fd, (off_t)ddqcow.td_state->size<<9) < 0) { 8.119 DFPRINTF("Unable to create file " 8.120 "[%s] of size %llu (errno %d). " 8.121 "Exiting...\n", 8.122 argv[1], 8.123 - (long long unsigned)sqcow->size<<9, 8.124 + (long long unsigned)ddqcow.td_state->size<<9, 8.125 0 - errno); 8.126 close(fd); 8.127 exit(-1); 8.128 } else DFPRINTF("File [%s] truncated to length %llu " 8.129 "(%llu)\n", 8.130 argv[1], 8.131 - (long long unsigned)sqcow->size<<9, 8.132 - (long long unsigned)sqcow->size); 8.133 + (long long unsigned)ddqcow.td_state->size<<9, 8.134 + (long long unsigned)ddqcow.td_state->size); 8.135 } 8.136 close(fd); 8.137 } 8.138 8.139 /*Open aio destination file*/ 8.140 - drv2 = &tapdisk_aio; 8.141 - saio->private = malloc(drv2->private_data_size); 8.142 + ddaio.drv = &tapdisk_aio; 8.143 + ddaio.private = malloc(ddaio.drv->private_data_size); 8.144 8.145 - if (drv2->td_open(saio, argv[1])!=0) { 8.146 + if (ddaio.drv->td_open(&ddaio, argv[1])!=0) { 8.147 DFPRINTF("Unable to open Qcow file [%s]\n", argv[1]); 8.148 exit(-1); 8.149 } 8.150 8.151 - aio_fd = drv2->td_get_fd(saio); 8.152 + aio_fd = ddaio.io_fd; 8.153 8.154 /*Initialise the output string*/ 8.155 memset(output,0x20,25); 8.156 @@ -298,9 +299,9 @@ int main(int argc, char *argv[]) 8.157 } 8.158 8.159 /*Attempt to read 4k sized blocks*/ 8.160 - ret = drv1->td_queue_read(sqcow, i>>9, 8.161 - BLOCK_PROCESSSZ>>9, buf, 8.162 - send_read_responses, i>>9, buf); 8.163 + ret = ddqcow.drv->td_queue_read(&ddqcow, i>>9, 8.164 + BLOCK_PROCESSSZ>>9, buf, 8.165 + send_read_responses, i>>9, buf); 8.166 8.167 if (ret < 0) { 8.168 DFPRINTF("UNABLE TO READ block [%llu]\n", 8.169 @@ -311,12 +312,12 @@ int main(int argc, char *argv[]) 8.170 submit_events++; 8.171 } 8.172 8.173 - if (i >= sqcow->size<<9) { 8.174 + if (i >= ddqcow.td_state->size<<9) { 8.175 complete = 1; 8.176 } 8.177 8.178 if ((submit_events % 10 == 0) || complete) 8.179 - drv1->td_submit(sqcow); 8.180 + ddqcow.drv->td_submit(&ddqcow); 8.181 timeout.tv_usec = 0; 8.182 8.183 } else { 8.184 @@ -332,9 +333,9 @@ int main(int argc, char *argv[]) 8.185 8.186 if (ret > 0) { 8.187 if (FD_ISSET(qcowio_fd[0], &readfds)) 8.188 - drv1->td_do_callbacks(sqcow, 0); 8.189 + ddqcow.drv->td_do_callbacks(&ddqcow, 0); 8.190 if (FD_ISSET(aio_fd[0], &readfds)) 8.191 - drv2->td_do_callbacks(saio, 0); 8.192 + ddaio.drv->td_do_callbacks(&ddaio, 0); 8.193 } 8.194 if (complete && (returned_write_events == submit_events)) 8.195 running = 0;
9.1 --- a/tools/blktap/drivers/tapdisk.c Fri Feb 16 16:34:28 2007 +0000 9.2 +++ b/tools/blktap/drivers/tapdisk.c Fri Feb 16 20:31:27 2007 -0800 9.3 @@ -48,6 +48,12 @@ static pid_t process; 9.4 int connected_disks = 0; 9.5 fd_list_entry_t *fd_start = NULL; 9.6 9.7 +int do_cow_read(struct disk_driver *dd, blkif_request_t *req, 9.8 + int sidx, uint64_t sector, int nr_secs); 9.9 + 9.10 +#define td_for_each_disk(tds, drv) \ 9.11 + for (drv = tds->disks; drv != NULL; drv = drv->next) 9.12 + 9.13 void usage(void) 9.14 { 9.15 fprintf(stderr, "blktap-utils: v1.0.0\n"); 9.16 @@ -78,10 +84,17 @@ void daemonize(void) 9.17 static void unmap_disk(struct td_state *s) 9.18 { 9.19 tapdev_info_t *info = s->ring_info; 9.20 - struct tap_disk *drv = s->drv; 9.21 + struct disk_driver *dd, *tmp; 9.22 fd_list_entry_t *entry; 9.23 9.24 - drv->td_close(s); 9.25 + dd = s->disks; 9.26 + while (dd) { 9.27 + tmp = dd->next; 9.28 + dd->drv->td_close(dd); 9.29 + free(dd->private); 9.30 + free(dd); 9.31 + dd = tmp; 9.32 + } 9.33 9.34 if (info != NULL && info->mem > 0) 9.35 munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE); 9.36 @@ -96,7 +109,6 @@ static void unmap_disk(struct td_state * 9.37 free(s->fd_entry); 9.38 free(s->blkif); 9.39 free(s->ring_info); 9.40 - free(s->private); 9.41 free(s); 9.42 9.43 return; 9.44 @@ -113,16 +125,19 @@ void sig_handler(int sig) 9.45 static inline int LOCAL_FD_SET(fd_set *readfds) 9.46 { 9.47 fd_list_entry_t *ptr; 9.48 + struct disk_driver *dd; 9.49 9.50 ptr = fd_start; 9.51 while (ptr != NULL) { 9.52 if (ptr->tap_fd) { 9.53 FD_SET(ptr->tap_fd, readfds); 9.54 - if (ptr->io_fd[READ]) 9.55 - FD_SET(ptr->io_fd[READ], readfds); 9.56 - maxfds = (ptr->io_fd[READ] > maxfds ? 9.57 - ptr->io_fd[READ]: maxfds); 9.58 - maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds); 9.59 + td_for_each_disk(ptr->s, dd) { 9.60 + if (dd->io_fd[READ]) 9.61 + FD_SET(dd->io_fd[READ], readfds); 9.62 + maxfds = (dd->io_fd[READ] > maxfds ? 9.63 + dd->io_fd[READ] : maxfds); 9.64 + } 9.65 + maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd : maxfds); 9.66 } 9.67 ptr = ptr->next; 9.68 } 9.69 @@ -130,8 +145,7 @@ static inline int LOCAL_FD_SET(fd_set *r 9.70 return 0; 9.71 } 9.72 9.73 -static inline fd_list_entry_t *add_fd_entry( 9.74 - int tap_fd, int io_fd[MAX_IOFD], struct td_state *s) 9.75 +static inline fd_list_entry_t *add_fd_entry(int tap_fd, struct td_state *s) 9.76 { 9.77 fd_list_entry_t **pprev, *entry; 9.78 int i; 9.79 @@ -139,12 +153,10 @@ static inline fd_list_entry_t *add_fd_en 9.80 DPRINTF("Adding fd_list_entry\n"); 9.81 9.82 /*Add to linked list*/ 9.83 - s->fd_entry = entry = malloc(sizeof(fd_list_entry_t)); 9.84 + s->fd_entry = entry = malloc(sizeof(fd_list_entry_t)); 9.85 entry->tap_fd = tap_fd; 9.86 - for (i = 0; i < MAX_IOFD; i++) 9.87 - entry->io_fd[i] = io_fd[i]; 9.88 - entry->s = s; 9.89 - entry->next = NULL; 9.90 + entry->s = s; 9.91 + entry->next = NULL; 9.92 9.93 pprev = &fd_start; 9.94 while (*pprev != NULL) 9.95 @@ -171,7 +183,7 @@ static inline struct td_state *get_state 9.96 static struct tap_disk *get_driver(int drivertype) 9.97 { 9.98 /* blktapctrl has passed us the driver type */ 9.99 - 9.100 + 9.101 return dtypes[drivertype]->drv; 9.102 } 9.103 9.104 @@ -183,14 +195,36 @@ static struct td_state *state_init(void) 9.105 9.106 s = malloc(sizeof(struct td_state)); 9.107 blkif = s->blkif = malloc(sizeof(blkif_t)); 9.108 - s->ring_info = malloc(sizeof(tapdev_info_t)); 9.109 + s->ring_info = calloc(1, sizeof(tapdev_info_t)); 9.110 9.111 - for (i = 0; i < MAX_REQUESTS; i++) 9.112 - blkif->pending_list[i].count = 0; 9.113 + for (i = 0; i < MAX_REQUESTS; i++) { 9.114 + blkif->pending_list[i].secs_pending = 0; 9.115 + blkif->pending_list[i].submitting = 0; 9.116 + } 9.117 9.118 return s; 9.119 } 9.120 9.121 +static struct disk_driver *disk_init(struct td_state *s, struct tap_disk *drv) 9.122 +{ 9.123 + struct disk_driver *dd; 9.124 + 9.125 + dd = calloc(1, sizeof(struct disk_driver)); 9.126 + if (!dd) 9.127 + return NULL; 9.128 + 9.129 + dd->private = malloc(drv->private_data_size); 9.130 + if (!dd->private) { 9.131 + free(dd); 9.132 + return NULL; 9.133 + } 9.134 + 9.135 + dd->drv = drv; 9.136 + dd->td_state = s; 9.137 + 9.138 + return dd; 9.139 +} 9.140 + 9.141 static int map_new_dev(struct td_state *s, int minor) 9.142 { 9.143 int tap_fd; 9.144 @@ -246,6 +280,51 @@ static int map_new_dev(struct td_state * 9.145 return -1; 9.146 } 9.147 9.148 +static int open_disk(struct td_state *s, struct disk_driver *dd, char *path) 9.149 +{ 9.150 + int err; 9.151 + struct disk_driver *d = dd; 9.152 + 9.153 + err = dd->drv->td_open(dd, path); 9.154 + if (err) 9.155 + return err; 9.156 + 9.157 + /* load backing files as necessary */ 9.158 + while (d->drv->td_has_parent(d)) { 9.159 + struct disk_driver *new; 9.160 + 9.161 + new = calloc(1, sizeof(struct disk_driver)); 9.162 + if (!new) 9.163 + goto fail; 9.164 + new->drv = d->drv; 9.165 + new->td_state = s; 9.166 + new->private = malloc(new->drv->private_data_size); 9.167 + if (!new->private) { 9.168 + free(new); 9.169 + goto fail; 9.170 + } 9.171 + 9.172 + err = d->drv->td_get_parent(d, new); 9.173 + if (err) 9.174 + goto fail; 9.175 + 9.176 + d = d->next = new; 9.177 + } 9.178 + 9.179 + return 0; 9.180 + 9.181 + fail: 9.182 + DPRINTF("failed opening disk\n"); 9.183 + while (dd) { 9.184 + d = dd->next; 9.185 + dd->drv->td_close(dd); 9.186 + free(dd->private); 9.187 + free(dd); 9.188 + dd = d; 9.189 + } 9.190 + return err; 9.191 +} 9.192 + 9.193 static int read_msg(char *buf) 9.194 { 9.195 int length, len, msglen, tap_fd, *io_fd; 9.196 @@ -255,6 +334,7 @@ static int read_msg(char *buf) 9.197 msg_newdev_t *msg_dev; 9.198 msg_pid_t *msg_pid; 9.199 struct tap_disk *drv; 9.200 + struct disk_driver *dd; 9.201 int ret = -1; 9.202 struct td_state *s = NULL; 9.203 fd_list_entry_t *entry; 9.204 @@ -289,20 +369,20 @@ static int read_msg(char *buf) 9.205 if (s == NULL) 9.206 goto params_done; 9.207 9.208 - s->drv = drv; 9.209 - s->private = malloc(drv->private_data_size); 9.210 - if (s->private == NULL) { 9.211 + s->disks = dd = disk_init(s, drv); 9.212 + if (!dd) { 9.213 free(s); 9.214 goto params_done; 9.215 } 9.216 9.217 /*Open file*/ 9.218 - ret = drv->td_open(s, path); 9.219 - io_fd = drv->td_get_fd(s); 9.220 + ret = open_disk(s, dd, path); 9.221 + if (ret) 9.222 + goto params_done; 9.223 9.224 - entry = add_fd_entry(0, io_fd, s); 9.225 + entry = add_fd_entry(0, s); 9.226 entry->cookie = msg->cookie; 9.227 - DPRINTF("Entered cookie %d\n",entry->cookie); 9.228 + DPRINTF("Entered cookie %d\n", entry->cookie); 9.229 9.230 memset(buf, 0x00, MSG_SIZE); 9.231 9.232 @@ -323,13 +403,12 @@ static int read_msg(char *buf) 9.233 free(path); 9.234 return 1; 9.235 9.236 - 9.237 - 9.238 case CTLMSG_NEWDEV: 9.239 msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t)); 9.240 9.241 s = get_state(msg->cookie); 9.242 - DPRINTF("Retrieving state, cookie %d.....[%s]\n",msg->cookie, (s == NULL ? "FAIL":"OK")); 9.243 + DPRINTF("Retrieving state, cookie %d.....[%s]\n", 9.244 + msg->cookie, (s == NULL ? "FAIL":"OK")); 9.245 if (s != NULL) { 9.246 ret = ((map_new_dev(s, msg_dev->devnum) 9.247 == msg_dev->devnum ? 0: -1)); 9.248 @@ -397,49 +476,75 @@ static inline void kick_responses(struct 9.249 } 9.250 } 9.251 9.252 -void io_done(struct td_state *s, int sid) 9.253 +void io_done(struct disk_driver *dd, int sid) 9.254 { 9.255 - struct tap_disk *drv = s->drv; 9.256 + struct tap_disk *drv = dd->drv; 9.257 9.258 if (!run) return; /*We have received signal to close*/ 9.259 9.260 - if (drv->td_do_callbacks(s, sid) > 0) kick_responses(s); 9.261 + if (drv->td_do_callbacks(dd, sid) > 0) kick_responses(dd->td_state); 9.262 9.263 return; 9.264 } 9.265 9.266 -int send_responses(struct td_state *s, int res, int idx, void *private) 9.267 +static inline uint64_t 9.268 +segment_start(blkif_request_t *req, int sidx) 9.269 { 9.270 + int i; 9.271 + uint64_t start = req->sector_number; 9.272 + 9.273 + for (i = 0; i < sidx; i++) 9.274 + start += (req->seg[i].last_sect - req->seg[i].first_sect + 1); 9.275 + 9.276 + return start; 9.277 +} 9.278 + 9.279 +uint64_t sends, responds; 9.280 +int send_responses(struct disk_driver *dd, int res, 9.281 + uint64_t sector, int nr_secs, int idx, void *private) 9.282 +{ 9.283 + pending_req_t *preq; 9.284 blkif_request_t *req; 9.285 int responses_queued = 0; 9.286 + struct td_state *s = dd->td_state; 9.287 blkif_t *blkif = s->blkif; 9.288 + int sidx = (int)private, secs_done = nr_secs; 9.289 9.290 - req = &blkif->pending_list[idx].req; 9.291 - 9.292 - if ( (idx > MAX_REQUESTS-1) || 9.293 - (blkif->pending_list[idx].count == 0) ) 9.294 + if ( (idx > MAX_REQUESTS-1) ) 9.295 { 9.296 DPRINTF("invalid index returned(%u)!\n", idx); 9.297 return 0; 9.298 } 9.299 - 9.300 - if (res != 0) { 9.301 - blkif->pending_list[idx].status = BLKIF_RSP_ERROR; 9.302 + preq = &blkif->pending_list[idx]; 9.303 + req = &preq->req; 9.304 + 9.305 + if (res == BLK_NOT_ALLOCATED) { 9.306 + res = do_cow_read(dd, req, sidx, sector, nr_secs); 9.307 + if (res >= 0) { 9.308 + secs_done = res; 9.309 + res = 0; 9.310 + } else 9.311 + secs_done = 0; 9.312 } 9.313 9.314 - blkif->pending_list[idx].count--; 9.315 + preq->secs_pending -= secs_done; 9.316 + 9.317 + if (res == -EBUSY && preq->submitting) 9.318 + return -EBUSY; /* propagate -EBUSY back to higher layers */ 9.319 + if (res) 9.320 + preq->status = BLKIF_RSP_ERROR; 9.321 9.322 - if (blkif->pending_list[idx].count == 0) 9.323 + if (!preq->submitting && preq->secs_pending == 0) 9.324 { 9.325 blkif_request_t tmp; 9.326 blkif_response_t *rsp; 9.327 - 9.328 - tmp = blkif->pending_list[idx].req; 9.329 + 9.330 + tmp = preq->req; 9.331 rsp = (blkif_response_t *)req; 9.332 9.333 rsp->id = tmp.id; 9.334 rsp->operation = tmp.operation; 9.335 - rsp->status = blkif->pending_list[idx].status; 9.336 + rsp->status = preq->status; 9.337 9.338 write_rsp_to_ring(s, rsp); 9.339 responses_queued++; 9.340 @@ -447,15 +552,51 @@ int send_responses(struct td_state *s, i 9.341 return responses_queued; 9.342 } 9.343 9.344 +int do_cow_read(struct disk_driver *dd, blkif_request_t *req, 9.345 + int sidx, uint64_t sector, int nr_secs) 9.346 +{ 9.347 + char *page; 9.348 + int ret, early; 9.349 + uint64_t seg_start, seg_end; 9.350 + struct td_state *s = dd->td_state; 9.351 + tapdev_info_t *info = s->ring_info; 9.352 + struct disk_driver *parent = dd->next; 9.353 + 9.354 + seg_start = segment_start(req, sidx); 9.355 + seg_end = seg_start + req->seg[sidx].last_sect + 1; 9.356 + 9.357 + ASSERT(sector >= seg_start && sector + nr_secs <= seg_end); 9.358 + 9.359 + page = (char *)MMAP_VADDR(info->vstart, 9.360 + (unsigned long)req->id, sidx); 9.361 + page += (req->seg[sidx].first_sect << SECTOR_SHIFT); 9.362 + page += ((sector - seg_start) << SECTOR_SHIFT); 9.363 + 9.364 + if (!parent) { 9.365 + memset(page, 0, nr_secs << SECTOR_SHIFT); 9.366 + return nr_secs; 9.367 + } 9.368 + 9.369 + /* reissue request to backing file */ 9.370 + ret = parent->drv->td_queue_read(parent, sector, nr_secs, 9.371 + page, send_responses, 9.372 + req->id, (void *)sidx); 9.373 + if (ret > 0) 9.374 + parent->early += ret; 9.375 + 9.376 + return ((ret >= 0) ? 0 : ret); 9.377 +} 9.378 + 9.379 static void get_io_request(struct td_state *s) 9.380 { 9.381 - RING_IDX rp, rc, j, i, ret; 9.382 + RING_IDX rp, rc, j, i; 9.383 blkif_request_t *req; 9.384 - int idx, nsects; 9.385 + int idx, nsects, ret; 9.386 uint64_t sector_nr; 9.387 char *page; 9.388 int early = 0; /* count early completions */ 9.389 - struct tap_disk *drv = s->drv; 9.390 + struct disk_driver *dd = s->disks; 9.391 + struct tap_disk *drv = dd->drv; 9.392 blkif_t *blkif = s->blkif; 9.393 tapdev_info_t *info = s->ring_info; 9.394 int page_size = getpagesize(); 9.395 @@ -466,23 +607,33 @@ static void get_io_request(struct td_sta 9.396 rmb(); 9.397 for (j = info->fe_ring.req_cons; j != rp; j++) 9.398 { 9.399 - int done = 0; 9.400 + int done = 0, start_seg = 0; 9.401 9.402 req = NULL; 9.403 req = RING_GET_REQUEST(&info->fe_ring, j); 9.404 ++info->fe_ring.req_cons; 9.405 9.406 if (req == NULL) continue; 9.407 - 9.408 + 9.409 idx = req->id; 9.410 - ASSERT(blkif->pending_list[idx].count == 0); 9.411 - memcpy(&blkif->pending_list[idx].req, req, sizeof(*req)); 9.412 - blkif->pending_list[idx].status = BLKIF_RSP_OKAY; 9.413 - blkif->pending_list[idx].count = req->nr_segments; 9.414 9.415 - sector_nr = req->sector_number; 9.416 + if (info->busy.req) { 9.417 + /* continue where we left off last time */ 9.418 + ASSERT(info->busy.req == req); 9.419 + start_seg = info->busy.seg_idx; 9.420 + sector_nr = segment_start(req, start_seg); 9.421 + info->busy.seg_idx = 0; 9.422 + info->busy.req = NULL; 9.423 + } else { 9.424 + ASSERT(blkif->pending_list[idx].secs_pending == 0); 9.425 + memcpy(&blkif->pending_list[idx].req, 9.426 + req, sizeof(*req)); 9.427 + blkif->pending_list[idx].status = BLKIF_RSP_OKAY; 9.428 + blkif->pending_list[idx].submitting = 1; 9.429 + sector_nr = req->sector_number; 9.430 + } 9.431 9.432 - for (i = 0; i < req->nr_segments; i++) { 9.433 + for (i = start_seg; i < req->nr_segments; i++) { 9.434 nsects = req->seg[i].last_sect - 9.435 req->seg[i].first_sect + 1; 9.436 9.437 @@ -508,31 +659,37 @@ static void get_io_request(struct td_sta 9.438 (long long unsigned) sector_nr); 9.439 continue; 9.440 } 9.441 - 9.442 + 9.443 + blkif->pending_list[idx].secs_pending += nsects; 9.444 + 9.445 switch (req->operation) 9.446 { 9.447 case BLKIF_OP_WRITE: 9.448 - ret = drv->td_queue_write(s, sector_nr, 9.449 - nsects, page, send_responses, 9.450 - idx, NULL); 9.451 - if (ret > 0) early += ret; 9.452 + ret = drv->td_queue_write(dd, sector_nr, 9.453 + nsects, page, 9.454 + send_responses, 9.455 + idx, (void *)i); 9.456 + if (ret > 0) dd->early += ret; 9.457 else if (ret == -EBUSY) { 9.458 - /* 9.459 - * TODO: Sector is locked * 9.460 - * Need to put req back on queue * 9.461 - */ 9.462 + /* put req back on queue */ 9.463 + --info->fe_ring.req_cons; 9.464 + info->busy.req = req; 9.465 + info->busy.seg_idx = i; 9.466 + goto out; 9.467 } 9.468 break; 9.469 case BLKIF_OP_READ: 9.470 - ret = drv->td_queue_read(s, sector_nr, 9.471 - nsects, page, send_responses, 9.472 - idx, NULL); 9.473 - if (ret > 0) early += ret; 9.474 + ret = drv->td_queue_read(dd, sector_nr, 9.475 + nsects, page, 9.476 + send_responses, 9.477 + idx, (void *)i); 9.478 + if (ret > 0) dd->early += ret; 9.479 else if (ret == -EBUSY) { 9.480 - /* 9.481 - * TODO: Sector is locked * 9.482 - * Need to put req back on queue * 9.483 - */ 9.484 + /* put req back on queue */ 9.485 + --info->fe_ring.req_cons; 9.486 + info->busy.req = req; 9.487 + info->busy.seg_idx = i; 9.488 + goto out; 9.489 } 9.490 break; 9.491 default: 9.492 @@ -541,14 +698,22 @@ static void get_io_request(struct td_sta 9.493 } 9.494 sector_nr += nsects; 9.495 } 9.496 + blkif->pending_list[idx].submitting = 0; 9.497 + /* force write_rsp_to_ring for synchronous case */ 9.498 + if (blkif->pending_list[idx].secs_pending == 0) 9.499 + dd->early += send_responses(dd, 0, 0, 0, idx, (void *)0); 9.500 } 9.501 9.502 + out: 9.503 /*Batch done*/ 9.504 - drv->td_submit(s); 9.505 - 9.506 - if (early > 0) 9.507 - io_done(s,10); 9.508 - 9.509 + td_for_each_disk(s, dd) { 9.510 + dd->early += dd->drv->td_submit(dd); 9.511 + if (dd->early > 0) { 9.512 + io_done(dd, 10); 9.513 + dd->early = 0; 9.514 + } 9.515 + } 9.516 + 9.517 return; 9.518 } 9.519 9.520 @@ -558,10 +723,9 @@ int main(int argc, char *argv[]) 9.521 char *p, *buf; 9.522 fd_set readfds, writefds; 9.523 fd_list_entry_t *ptr; 9.524 - struct tap_disk *drv; 9.525 struct td_state *s; 9.526 char openlogbuf[128]; 9.527 - 9.528 + 9.529 if (argc != 3) usage(); 9.530 9.531 daemonize(); 9.532 @@ -573,12 +737,12 @@ int main(int argc, char *argv[]) 9.533 signal (SIGINT, sig_handler); 9.534 9.535 /*Open the control channel*/ 9.536 - fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK); 9.537 + fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK); 9.538 fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK); 9.539 9.540 if ( (fds[READ] < 0) || (fds[WRITE] < 0) ) 9.541 { 9.542 - DPRINTF("FD open failed [%d,%d]\n",fds[READ], fds[WRITE]); 9.543 + DPRINTF("FD open failed [%d,%d]\n", fds[READ], fds[WRITE]); 9.544 exit(-1); 9.545 } 9.546 9.547 @@ -608,11 +772,22 @@ int main(int argc, char *argv[]) 9.548 { 9.549 ptr = fd_start; 9.550 while (ptr != NULL) { 9.551 - if (FD_ISSET(ptr->tap_fd, &readfds)) 9.552 + int progress_made = 0; 9.553 + struct disk_driver *dd; 9.554 + tapdev_info_t *info = ptr->s->ring_info; 9.555 + 9.556 + td_for_each_disk(ptr->s, dd) { 9.557 + if (dd->io_fd[READ] && 9.558 + FD_ISSET(dd->io_fd[READ], 9.559 + &readfds)) { 9.560 + io_done(dd, READ); 9.561 + progress_made = 1; 9.562 + } 9.563 + } 9.564 + 9.565 + if (FD_ISSET(ptr->tap_fd, &readfds) || 9.566 + (info->busy.req && progress_made)) 9.567 get_io_request(ptr->s); 9.568 - if (ptr->io_fd[READ] && 9.569 - FD_ISSET(ptr->io_fd[READ], &readfds)) 9.570 - io_done(ptr->s, READ); 9.571 9.572 ptr = ptr->next; 9.573 } 9.574 @@ -628,11 +803,8 @@ int main(int argc, char *argv[]) 9.575 ptr = fd_start; 9.576 while (ptr != NULL) { 9.577 s = ptr->s; 9.578 - drv = s->drv; 9.579 9.580 unmap_disk(s); 9.581 - drv->td_close(s); 9.582 - free(s->private); 9.583 free(s->blkif); 9.584 free(s->ring_info); 9.585 free(s);
10.1 --- a/tools/blktap/drivers/tapdisk.h Fri Feb 16 16:34:28 2007 +0000 10.2 +++ b/tools/blktap/drivers/tapdisk.h Fri Feb 16 20:31:27 2007 -0800 10.3 @@ -43,6 +43,9 @@ 10.4 * - The fd used for poll is an otherwise unused pipe, which allows poll to 10.5 * be safely called without ever returning anything. 10.6 * 10.7 + * NOTE: tapdisk uses the number of sectors submitted per request as a 10.8 + * ref count. Plugins must use the callback function to communicate the 10.9 + * completion--or error--of every sector submitted to them. 10.10 */ 10.11 10.12 #ifndef TAPDISK_H_ 10.13 @@ -65,39 +68,55 @@ 10.14 #define SECTOR_SHIFT 9 10.15 #define DEFAULT_SECTOR_SIZE 512 10.16 10.17 +#define MAX_IOFD 2 10.18 + 10.19 +#define BLK_NOT_ALLOCATED 99 10.20 + 10.21 +struct td_state; 10.22 +struct tap_disk; 10.23 + 10.24 +struct disk_driver { 10.25 + int early; 10.26 + void *private; 10.27 + int io_fd[MAX_IOFD]; 10.28 + struct tap_disk *drv; 10.29 + struct td_state *td_state; 10.30 + struct disk_driver *next; 10.31 +}; 10.32 + 10.33 /* This structure represents the state of an active virtual disk. */ 10.34 struct td_state { 10.35 - void *private; 10.36 - void *drv; 10.37 + struct disk_driver *disks; 10.38 void *blkif; 10.39 void *image; 10.40 void *ring_info; 10.41 void *fd_entry; 10.42 - char backing_file[1024]; /*Used by differencing disks, e.g. qcow*/ 10.43 unsigned long sector_size; 10.44 unsigned long long size; 10.45 unsigned int info; 10.46 }; 10.47 10.48 /* Prototype of the callback to activate as requests complete. */ 10.49 -typedef int (*td_callback_t)(struct td_state *s, int res, int id, void *prv); 10.50 +typedef int (*td_callback_t)(struct disk_driver *dd, int res, uint64_t sector, 10.51 + int nb_sectors, int id, void *private); 10.52 10.53 /* Structure describing the interface to a virtual disk implementation. */ 10.54 /* See note at the top of this file describing this interface. */ 10.55 struct tap_disk { 10.56 const char *disk_type; 10.57 int private_data_size; 10.58 - int (*td_open) (struct td_state *s, const char *name); 10.59 - int (*td_queue_read) (struct td_state *s, uint64_t sector, 10.60 - int nb_sectors, char *buf, td_callback_t cb, 10.61 + int (*td_open) (struct disk_driver *dd, const char *name); 10.62 + int (*td_queue_read) (struct disk_driver *dd, uint64_t sector, 10.63 + int nb_sectors, char *buf, td_callback_t cb, 10.64 int id, void *prv); 10.65 - int (*td_queue_write) (struct td_state *s, uint64_t sector, 10.66 - int nb_sectors, char *buf, td_callback_t cb, 10.67 + int (*td_queue_write) (struct disk_driver *dd, uint64_t sector, 10.68 + int nb_sectors, char *buf, td_callback_t cb, 10.69 int id, void *prv); 10.70 - int (*td_submit) (struct td_state *s); 10.71 - int *(*td_get_fd) (struct td_state *s); 10.72 - int (*td_close) (struct td_state *s); 10.73 - int (*td_do_callbacks)(struct td_state *s, int sid); 10.74 + int (*td_submit) (struct disk_driver *dd); 10.75 + int (*td_has_parent) (struct disk_driver *dd); 10.76 + int (*td_get_parent) (struct disk_driver *dd, struct disk_driver *p); 10.77 + int (*td_close) (struct disk_driver *dd); 10.78 + int (*td_do_callbacks)(struct disk_driver *dd, int sid); 10.79 }; 10.80 10.81 typedef struct disk_info { 10.82 @@ -119,14 +138,13 @@ extern struct tap_disk tapdisk_vmdk; 10.83 extern struct tap_disk tapdisk_ram; 10.84 extern struct tap_disk tapdisk_qcow; 10.85 10.86 -#define MAX_DISK_TYPES 20 10.87 -#define MAX_IOFD 2 10.88 +#define MAX_DISK_TYPES 20 10.89 10.90 -#define DISK_TYPE_AIO 0 10.91 -#define DISK_TYPE_SYNC 1 10.92 -#define DISK_TYPE_VMDK 2 10.93 -#define DISK_TYPE_RAM 3 10.94 -#define DISK_TYPE_QCOW 4 10.95 +#define DISK_TYPE_AIO 0 10.96 +#define DISK_TYPE_SYNC 1 10.97 +#define DISK_TYPE_VMDK 2 10.98 +#define DISK_TYPE_RAM 3 10.99 +#define DISK_TYPE_QCOW 4 10.100 10.101 10.102 /*Define Individual Disk Parameters here */ 10.103 @@ -197,12 +215,10 @@ typedef struct driver_list_entry { 10.104 typedef struct fd_list_entry { 10.105 int cookie; 10.106 int tap_fd; 10.107 - int io_fd[MAX_IOFD]; 10.108 struct td_state *s; 10.109 struct fd_list_entry **pprev, *next; 10.110 } fd_list_entry_t; 10.111 10.112 int qcow_create(const char *filename, uint64_t total_size, 10.113 const char *backing_file, int flags); 10.114 - 10.115 #endif /*TAPDISK_H_*/
11.1 --- a/tools/blktap/lib/blktaplib.h Fri Feb 16 16:34:28 2007 +0000 11.2 +++ b/tools/blktap/lib/blktaplib.h Fri Feb 16 20:31:27 2007 -0800 11.3 @@ -91,8 +91,9 @@ struct blkif; 11.4 11.5 typedef struct { 11.6 blkif_request_t req; 11.7 - struct blkif *blkif; 11.8 - int count; 11.9 + struct blkif *blkif; 11.10 + int submitting; 11.11 + int secs_pending; 11.12 int16_t status; 11.13 } pending_req_t; 11.14 11.15 @@ -116,7 +117,7 @@ typedef struct blkif { 11.16 11.17 void *prv; /* device-specific data */ 11.18 void *info; /*Image parameter passing */ 11.19 - pending_req_t pending_list[MAX_REQUESTS]; 11.20 + pending_req_t pending_list[MAX_REQUESTS]; 11.21 int devnum; 11.22 int fds[2]; 11.23 int be_id; 11.24 @@ -141,6 +142,11 @@ int blkif_init(blkif_t *blkif, long int 11.25 void free_blkif(blkif_t *blkif); 11.26 void __init_blkif(void); 11.27 11.28 +typedef struct busy_state { 11.29 + int seg_idx; 11.30 + blkif_request_t *req; 11.31 +} busy_state_t; 11.32 + 11.33 typedef struct tapdev_info { 11.34 int fd; 11.35 char *mem; 11.36 @@ -148,6 +154,7 @@ typedef struct tapdev_info { 11.37 blkif_back_ring_t fe_ring; 11.38 unsigned long vstart; 11.39 blkif_t *blkif; 11.40 + busy_state_t busy; 11.41 } tapdev_info_t; 11.42 11.43 typedef struct domid_translate {
12.1 --- a/tools/blktap/lib/xs_api.c Fri Feb 16 16:34:28 2007 +0000 12.2 +++ b/tools/blktap/lib/xs_api.c Fri Feb 16 20:31:27 2007 -0800 12.3 @@ -311,8 +311,8 @@ int unregister_xenbus_watch(struct xs_ha 12.4 } 12.5 12.6 if (!xs_unwatch(h, watch->node, token)) 12.7 - DPRINTF("XENBUS Failed to release watch %s: %i\n", 12.8 - watch->node, er); 12.9 + DPRINTF("XENBUS Failed to release watch %s\n", 12.10 + watch->node); 12.11 12.12 list_del(&watch->list); 12.13 12.14 @@ -351,9 +351,9 @@ int xs_fire_next_watch(struct xs_handle 12.15 12.16 node = res[XS_WATCH_PATH]; 12.17 token = res[XS_WATCH_TOKEN]; 12.18 - 12.19 + 12.20 w = find_watch(token); 12.21 - if (w) 12.22 + if (w) 12.23 w->callback(h, w, node); 12.24 12.25 free(res);