ia64/xen-unstable
changeset 5042:45a82b5f9fbe
bitkeeper revision 1.1486 (428df949_zDBPhEuAHGHwY3GzlSsPQ)
Merge arcadians.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk
into arcadians.cl.cam.ac.uk:/auto/anfs/nos1/akw27/xeno-clone/xeno.bk
Merge arcadians.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk
into arcadians.cl.cam.ac.uk:/auto/anfs/nos1/akw27/xeno-clone/xeno.bk
author | akw27@arcadians.cl.cam.ac.uk |
---|---|
date | Fri May 20 14:50:49 2005 +0000 (2005-05-20) |
parents | e5ee1635b8ee 15e6c09e1f43 |
children | 88e25f4669ad |
files | .rootkeys tools/blktap/Makefile tools/blktap/blkaio.c tools/blktap/blkaiolib.c tools/blktap/blkaiolib.h tools/blktap/blkcow.c tools/blktap/blkcowgnbd.c tools/blktap/blkcowimg.c tools/blktap/blkcowlib.c tools/blktap/blkcowlib.h tools/blktap/blkdump.c tools/blktap/blkgnbd.c tools/blktap/blkgnbdlib.c tools/blktap/blkgnbdlib.h tools/blktap/blkimg.c tools/blktap/blkimglib.c tools/blktap/blkimglib.h tools/blktap/block-async.c tools/blktap/block-async.h tools/blktap/blockstore-tls.c tools/blktap/blockstore.c tools/blktap/libgnbd/Makefile tools/blktap/libgnbd/gnbdtest.c tools/blktap/libgnbd/libgnbd.c tools/blktap/libgnbd/libgnbd.h tools/blktap/parallax-threaded.c tools/blktap/parallax.c tools/blktap/radix.c tools/blktap/radix.h tools/blktap/requests-async.c tools/blktap/requests-async.h tools/blktap/vdi.c tools/blktap/vdi.h |
line diff
1.1 --- a/.rootkeys Fri May 20 14:20:31 2005 +0000 1.2 +++ b/.rootkeys Fri May 20 14:50:49 2005 +0000 1.3 @@ -477,38 +477,23 @@ 40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Rul 1.4 4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile 1.5 4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README 1.6 42277b02mYXxgijE7MFeUe9d8eldMw tools/blktap/README-PARALLAX 1.7 -4209033eX_Xw94wHaOCtnU9nOAtSJA tools/blktap/blkaio.c 1.8 -4209033egwf6LDxM2hbaqi9rRdZy4A tools/blktap/blkaiolib.c 1.9 -4209033f9yELLK85Ipo2oKjr3ickgQ tools/blktap/blkaiolib.h 1.10 -4209033fL9LcSI6LXrIp5O4axbUBLg tools/blktap/blkcow.c 1.11 -4209033fUDlFGZreIyZHdP7h7yfvuQ tools/blktap/blkcowgnbd.c 1.12 -4209033fCgZzLeMOwNBFmsp99x58ZQ tools/blktap/blkcowimg.c 1.13 -4209033frfXH6oOi9AvRz08PPAndNA tools/blktap/blkcowlib.c 1.14 -4209033fhFd_y2go9HgCF395A35xJg tools/blktap/blkcowlib.h 1.15 4209033fHgtGpb_K16_xC9CpkjNZLw tools/blktap/blkdump.c 1.16 -4209033fm61CZG1RyKDW75V-eTZ9fg tools/blktap/blkgnbd.c 1.17 -4209033fVfa-R6MFgGcmsQHTDna4PA tools/blktap/blkgnbdlib.c 1.18 -4209033fIgDQbaHwHStHhPEDTtbqsA tools/blktap/blkgnbdlib.h 1.19 -4209033figp5JRsKsXY8rw4keRumkg tools/blktap/blkimg.c 1.20 -42090340V-8HKGlr00SyJGsE5jXC3A tools/blktap/blkimglib.c 1.21 -42090340c7pQbh0Km8zLcEqPd_3zIg tools/blktap/blkimglib.h 1.22 42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h 1.23 42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c 1.24 42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h 1.25 -423f270cAbkh2f-DHtT0hmCtFFXVXg tools/blktap/blockstore-tls.c 1.26 +428df8fdkg84W8yveE50EbkbTUZgjQ tools/blktap/block-async.c 1.27 +428df8feTrgGFZEBMA_dYijy9DNs1g tools/blktap/block-async.h 1.28 42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c 1.29 42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h 1.30 42371b8aL1JsxAXOd4bBhmZKDyjiJg tools/blktap/blockstored.c 1.31 42371b8aD_x3L9MKsXciMNqkuk58eQ tools/blktap/bstest.c 1.32 -42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile 1.33 -42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c 1.34 -42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c 1.35 -42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h 1.36 423f270cbEKiTMapKnCyqkuwGvgOMA tools/blktap/parallax-threaded.c 1.37 423f270cFdXryIcD7HTPUl_Dbk4DAQ tools/blktap/parallax-threaded.h 1.38 42277b03930x2TJT3PZlw6o0GERXpw tools/blktap/parallax.c 1.39 42277b03XQYq8bujXSz7JAZ8N7j_pA tools/blktap/radix.c 1.40 42277b03vZ4-jno_mgKmAcCW3ycRAg tools/blktap/radix.h 1.41 +428df8fe5RYONloDWVMkM-CfHfB1vA tools/blktap/requests-async.c 1.42 +428df8feWeKJ-9HJb5_rFqdm_xqErg tools/blktap/requests-async.h 1.43 42277b03U_wLHL-alMA0bfxGlqldXg tools/blktap/snaplog.c 1.44 42277b04Ryya-z662BEx8HnxNN0dGQ tools/blktap/snaplog.h 1.45 42277b04LxFjptgZ75Z98DUAso4Prg tools/blktap/vdi.c
2.1 --- a/tools/blktap/Makefile Fri May 20 14:20:31 2005 +0000 2.2 +++ b/tools/blktap/Makefile Fri May 20 14:50:49 2005 +0000 2.3 @@ -22,12 +22,12 @@ PLX_SRCS := 2.4 PLX_SRCS += vdi.c 2.5 PLX_SRCS += radix.c 2.6 PLX_SRCS += snaplog.c 2.7 +PLX_SRCS += blockstore.c 2.8 +PLX_SRCS += block-async.c 2.9 PLXT_SRCS := $(PLX_SRCS) 2.10 -#PLXT_SRCS += blockstore-tls.c 2.11 -PLXT_SRCS += blockstore.c 2.12 PLXT_SRCS += parallax-threaded.c 2.13 -PLX_SRCS += blockstore.c 2.14 VDI_SRCS := $(PLX_SRCS) 2.15 +PLX_SRCS += requests-async.c 2.16 PLX_SRCS += parallax.c 2.17 2.18 VDI_TOOLS := 2.19 @@ -55,10 +55,11 @@ CFLAGS += -Wp,-MD,.$(@F).d 2.20 DEPS = .*.d 2.21 2.22 OBJS = $(patsubst %.c,%.o,$(SRCS)) 2.23 +IBINS = blkdump parallax $(VDI_TOOLS) 2.24 2.25 LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) 2.26 2.27 -all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(VDI_TOOLS) parallax parallax-threaded blockstored 2.28 +all: mk-symlinks blkdump $(VDI_TOOLS) parallax parallax-threaded blockstored 2.29 $(MAKE) $(LIB) 2.30 2.31 LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse) 2.32 @@ -77,10 +78,10 @@ install: all 2.33 $(INSTALL_DIR) -p $(DESTDIR)/usr/include 2.34 $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR) 2.35 $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include 2.36 - $(INSTALL_PROG) blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(DESTDIR)/$(BLKTAP_INSTALL_DIR) 2.37 + $(INSTALL_PROG) $(IBINS) $(DESTDIR)/$(BLKTAP_INSTALL_DIR) 2.38 2.39 clean: 2.40 - rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio $(VDI_TOOLS) parallax 2.41 + rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump $(VDI_TOOLS) parallax parallax-threaded 2.42 2.43 rpm: all 2.44 rm -rf staging 2.45 @@ -101,32 +102,11 @@ libblktap.so.$(MAJOR).$(MINOR): $(OBJS) 2.46 blkdump: $(LIB) 2.47 $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkdump.c 2.48 2.49 -blkcowimg: $(LIB) blkcowimg.c blkcowlib.c blkimglib.c 2.50 - $(CC) $(CFLAGS) -o blkcowimg -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcowimg.c blkimglib.c blkcowlib.c 2.51 - 2.52 -blkcow: $(LIB) blkcow.c blkcowlib.c 2.53 - $(CC) $(CFLAGS) -o blkcow -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcow.c blkcowlib.c 2.54 - 2.55 -blkimg: $(LIB) blkimg.c blkimglib.c 2.56 - $(CC) $(CFLAGS) -o blkimg -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkimg.c blkimglib.c 2.57 - 2.58 -blkgnbd: $(LIB) blkgnbd.c blkgnbdlib.c 2.59 - $(CC) $(CFLAGS) -o blkgnbd -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkgnbd.c blkgnbdlib.c libgnbd/libgnbd.a 2.60 - 2.61 -blkcowgnbd: $(LIB) blkgnbd.c blkcowlib.c blkgnbdlib.c 2.62 - $(CC) $(CFLAGS) -o blkcowgnbd -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkcowgnbd.c blkgnbdlib.c blkcowlib.c libgnbd/libgnbd.a 2.63 - 2.64 -blkaio: $(LIB) blkaio.c blkaiolib.c 2.65 - $(CC) $(CFLAGS) -o blkaio -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkaio.c blkaiolib.c -laio -lpthread 2.66 - 2.67 parallax: $(LIB) $(PLX_SRCS) 2.68 - $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap -lpthread $(PLX_SRCS) libgnbd/libgnbd.a 2.69 + $(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap -lpthread $(PLX_SRCS) 2.70 2.71 parallax-threaded: $(LIB) $(PLXT_SRCS) 2.72 - $(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS) libgnbd/libgnbd.a 2.73 - 2.74 -vdi_test: $(LIB) $(VDI_SRCS) 2.75 - $(CC) $(CFLAGS) -g3 -o vdi_test -DVDI_STANDALONE -lpthread $(VDI_SRCS) 2.76 + $(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS) 2.77 2.78 vdi_list: $(LIB) vdi_list.c $(VDI_SRCS) 2.79 $(CC) $(CFLAGS) -g3 -o vdi_list vdi_list.c -lpthread $(VDI_SRCS) 2.80 @@ -163,16 +143,3 @@ TAGS: 2.81 2.82 -include $(DEPS) 2.83 2.84 -#Random testing targets. To be removed eventually. 2.85 - 2.86 -rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS) 2.87 - $(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS) 2.88 - 2.89 -bb-tls: $(LIB) blockstore-benchmark.c 2.90 - $(CC) $(CFLAGS) -o bb-tls blockstore-benchmark.c blockstore-tls.c -lpthread 2.91 - 2.92 -bb-trans: $(LIB) blockstore-benchmark.c 2.93 - $(CC) $(CFLAGS) -o bb-trans blockstore-benchmark.c blockstore.c -lpthread 2.94 - 2.95 -radix-test: $(LIB) radix.c blockstore.c 2.96 - $(CC) $(CFLAGS) -g3 -D RADIX_STANDALONE -o radix-test radix.c blockstore-threaded-trans.c
3.1 --- a/tools/blktap/blkaio.c Fri May 20 14:20:31 2005 +0000 3.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 3.3 @@ -1,19 +0,0 @@ 3.4 -/* blkaio.c 3.5 - * 3.6 - * libaio-backed disk. 3.7 - */ 3.8 - 3.9 -#include "blktaplib.h" 3.10 -#include "blkaiolib.h" 3.11 - 3.12 - 3.13 -int main(int argc, char *argv[]) 3.14 -{ 3.15 - aio_init(); 3.16 - 3.17 - blktap_register_ctrl_hook("aio_control", aio_control); 3.18 - blktap_register_request_hook("aio_request", aio_request); 3.19 - blktap_listen(); 3.20 - 3.21 - return 0; 3.22 -}
4.1 --- a/tools/blktap/blkaiolib.c Fri May 20 14:20:31 2005 +0000 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,489 +0,0 @@ 4.4 -/* blkaiolib.c 4.5 - * 4.6 - * file/device image-backed block device -- using linux libaio. 4.7 - * 4.8 - * (c) 2004 Andrew Warfield. 4.9 - * 4.10 - * Xend has been modified to use an amorfs:[fsid] disk tag. 4.11 - * This will show up as device type (maj:240,min:0) = 61440. 4.12 - * 4.13 - * The fsid is placed in the sec_start field of the disk extent. 4.14 - * 4.15 - * NOTE: This doesn't work. Grrr. 4.16 - */ 4.17 - 4.18 -#define _GNU_SOURCE 4.19 -#define __USE_LARGEFILE64 4.20 - 4.21 -#include <stdio.h> 4.22 -#include <stdlib.h> 4.23 -#include <fcntl.h> 4.24 -#include <string.h> 4.25 -#include <db.h> 4.26 -#include <sys/stat.h> 4.27 -#include <sys/types.h> 4.28 -#include <sys/poll.h> 4.29 -#include <unistd.h> 4.30 -#include <errno.h> 4.31 -#include <libaio.h> 4.32 -#include <pthread.h> 4.33 -#include <time.h> 4.34 -#include "blktaplib.h" 4.35 - 4.36 -//#define TMP_IMAGE_FILE_NAME "/dev/sda1" 4.37 -#define TMP_IMAGE_FILE_NAME "fc3.image" 4.38 - 4.39 -#define MAX_DOMS 1024 4.40 -#define MAX_IMGNAME_LEN 255 4.41 -#define AMORFS_DEV 61440 4.42 -#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ 4.43 -#define MAX_SEGMENTS_PER_REQ 11 4.44 -#define SECTOR_SHIFT 9 4.45 -#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) 4.46 - 4.47 -#if 1 4.48 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 4.49 -#else 4.50 -#define DPRINTF(_f, _a...) ((void)0) 4.51 -#endif 4.52 - 4.53 -#if 1 4.54 -#define ASSERT(_p) \ 4.55 - if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \ 4.56 - __LINE__, __FILE__); *(int*)0=0; } 4.57 -#else 4.58 -#define ASSERT(_p) ((void)0) 4.59 -#endif 4.60 - 4.61 -char dbg_page[4096]; 4.62 - 4.63 -typedef struct { 4.64 - /* These need to turn into an array/rbtree for multi-disk support. */ 4.65 - int fd; 4.66 - u64 fsid; 4.67 - char imgname[MAX_IMGNAME_LEN]; 4.68 - blkif_vdev_t vdevice; 4.69 -} image_t; 4.70 - 4.71 -/* Note on pending_reqs: I assume all reqs are queued before they start to 4.72 - * get filled. so count of 0 is an unused record. 4.73 - */ 4.74 -typedef struct { 4.75 - blkif_request_t req; 4.76 - int count; 4.77 -} pending_req_t; 4.78 - 4.79 -static pending_req_t pending_list[MAX_REQUESTS]; 4.80 -image_t *images[MAX_DOMS]; 4.81 - 4.82 -static io_context_t ctx; 4.83 -static struct iocb *iocb_free[MAX_AIO_REQS]; 4.84 -static int iocb_free_count; 4.85 - 4.86 -/* ---[ Notification mecahnism ]--------------------------------------- */ 4.87 - 4.88 -enum { 4.89 - READ = 0, 4.90 - WRITE = 1 4.91 -}; 4.92 - 4.93 -static int aio_notify[2]; 4.94 -static volatile int aio_listening = 0; 4.95 - 4.96 -static struct io_event aio_events[MAX_AIO_REQS]; 4.97 -static int aio_event_count = 0; 4.98 - 4.99 -/* this is commented out in libaio.h for some reason. */ 4.100 -extern int io_queue_wait(io_context_t ctx, struct timespec *timeout); 4.101 - 4.102 -static void *notifier_thread(void *arg) 4.103 -{ 4.104 - int ret; 4.105 - int msg = 0x00feeb00; 4.106 - 4.107 - printf("Notifier thread started.\n"); 4.108 - for (;;) { 4.109 - //if ((aio_listening) && ((ret = io_queue_wait(ctx, 0)) == 0)) { 4.110 - if ((aio_listening) && 4.111 - ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0)) { 4.112 - aio_event_count = ret; 4.113 - printf("[Notifying! (%d)]\n", aio_event_count); 4.114 - aio_listening = 0; 4.115 - write(aio_notify[WRITE], &msg, sizeof(msg)); 4.116 - fsync(aio_notify[WRITE]); 4.117 - } else { 4.118 - if (aio_listening) 4.119 - printf("[io_queue_wait error! %d]\n", errno); 4.120 - usleep(1000); /* Not ready to read. */ 4.121 - } 4.122 - } 4.123 -} 4.124 - 4.125 -/* -------------------------------------------------------------------- */ 4.126 - 4.127 -int aio_control(control_msg_t *msg) 4.128 -{ 4.129 - domid_t domid; 4.130 - DB *db; 4.131 - int ret; 4.132 - 4.133 - if (msg->type != CMSG_BLKIF_BE) 4.134 - { 4.135 - printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); 4.136 - return 0; 4.137 - } 4.138 - 4.139 - switch(msg->subtype) 4.140 - { 4.141 - case CMSG_BLKIF_BE_CREATE: 4.142 - if ( msg->length != sizeof(blkif_be_create_t) ) 4.143 - goto parse_error; 4.144 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", 4.145 - ((blkif_be_create_t *)msg->msg)->domid, 4.146 - ((blkif_be_create_t *)msg->msg)->blkif_handle); 4.147 - domid = ((blkif_be_create_t *)msg->msg)->domid; 4.148 - if (images[domid] != NULL) { 4.149 - printf("attempt to connect from an existing dom!\n"); 4.150 - return 0; 4.151 - } 4.152 - 4.153 - images[domid] = (image_t *)malloc(sizeof(image_t)); 4.154 - if (images[domid] == NULL) { 4.155 - printf("error allocating image record.\n"); 4.156 - return 0; 4.157 - } 4.158 - 4.159 - images[domid]->fd = -1; 4.160 - images[domid]->fsid = 0; 4.161 - 4.162 - printf("Image connected.\n"); 4.163 - break; 4.164 - 4.165 - case CMSG_BLKIF_BE_DESTROY: 4.166 - if ( msg->length != sizeof(blkif_be_destroy_t) ) 4.167 - goto parse_error; 4.168 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", 4.169 - ((blkif_be_destroy_t *)msg->msg)->domid, 4.170 - ((blkif_be_destroy_t *)msg->msg)->blkif_handle); 4.171 - 4.172 - domid = ((blkif_be_destroy_t *)msg->msg)->domid; 4.173 - if (images[domid] != NULL) { 4.174 - if (images[domid]->fd != -1) 4.175 - close( images[domid]->fd ); 4.176 - free( images[domid] ); 4.177 - images[domid] = NULL; 4.178 - } 4.179 - break; 4.180 - case CMSG_BLKIF_BE_VBD_GROW: 4.181 - { 4.182 - blkif_be_vbd_grow_t *grow; 4.183 - 4.184 - if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) 4.185 - goto parse_error; 4.186 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", 4.187 - ((blkif_be_vbd_grow_t *)msg->msg)->domid, 4.188 - ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, 4.189 - ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); 4.190 - printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", 4.191 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, 4.192 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, 4.193 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); 4.194 - grow = (blkif_be_vbd_grow_t *)msg->msg; 4.195 - domid = grow->domid; 4.196 - if (images[domid] == NULL) { 4.197 - printf("VBD_GROW on unconnected domain!\n"); 4.198 - return 0; 4.199 - } 4.200 - 4.201 - if (grow->extent.device != AMORFS_DEV) { 4.202 - printf("VBD_GROW on non-amorfs device!\n"); 4.203 - return 0; 4.204 - } 4.205 - 4.206 - /* TODO: config support for arbitrary image files/modes. */ 4.207 - sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME); 4.208 - 4.209 - images[domid]->fsid = grow->extent.sector_start; 4.210 - images[domid]->vdevice = grow->vdevice; 4.211 - images[domid]->fd = open(TMP_IMAGE_FILE_NAME, 4.212 - O_RDWR | O_DIRECT | O_LARGEFILE); 4.213 - if (images[domid]->fd < 0) { 4.214 - printf("Couldn't open image file! %d\n", errno); 4.215 - return 0; 4.216 - } 4.217 - 4.218 - printf("Image file opened. (%s)\n", images[domid]->imgname); 4.219 - break; 4.220 - } 4.221 - } 4.222 - return 0; 4.223 -parse_error: 4.224 - printf("Bad control message!\n"); 4.225 - return 0; 4.226 - 4.227 -create_failed: 4.228 - /* TODO: close the db ref. */ 4.229 - return 0; 4.230 -} 4.231 - 4.232 -int aio_request(blkif_request_t *req) 4.233 -{ 4.234 - int fd; 4.235 - u64 sector; 4.236 - char *spage, *dpage; 4.237 - int ret, i, idx; 4.238 - blkif_response_t *rsp; 4.239 - domid_t dom = ID_TO_DOM(req->id); 4.240 - 4.241 - if ((images[dom] == NULL) || (images[dom]->fd == -1)) { 4.242 - printf("Data request for unknown domain!!! %d\n", dom); 4.243 - rsp = (blkif_response_t *)req; 4.244 - rsp->id = req->id; 4.245 - rsp->operation = req->operation; 4.246 - rsp->status = BLKIF_RSP_ERROR; 4.247 - return BLKTAP_RESPOND; 4.248 - } 4.249 - 4.250 - fd = images[dom]->fd; 4.251 - 4.252 - switch (req->operation) 4.253 - { 4.254 - case BLKIF_OP_PROBE: 4.255 - { 4.256 - struct stat stat; 4.257 - vdisk_t *img_info; 4.258 - 4.259 - 4.260 - /* We expect one buffer only. */ 4.261 - if ( req->nr_segments != 1 ) 4.262 - goto err; 4.263 - 4.264 - /* Make sure the buffer is page-sized. */ 4.265 - if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || 4.266 - (blkif_last_sect (req->frame_and_sects[0]) != 7) ) 4.267 - goto err; 4.268 - 4.269 - /* loop for multiple images would start here. */ 4.270 - 4.271 - ret = fstat(fd, &stat); 4.272 - if (ret != 0) { 4.273 - printf("Couldn't stat image in PROBE!\n"); 4.274 - goto err; 4.275 - } 4.276 - 4.277 - img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); 4.278 - img_info[0].device = images[dom]->vdevice; 4.279 - img_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; 4.280 - img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT); 4.281 - 4.282 - if (img_info[0].capacity == 0) 4.283 - img_info[0].capacity = ((u64)1 << 63); // xend does this too. 4.284 - 4.285 - DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device, 4.286 - img_info[0].capacity); 4.287 - 4.288 - rsp = (blkif_response_t *)req; 4.289 - rsp->id = req->id; 4.290 - rsp->operation = BLKIF_OP_PROBE; 4.291 - rsp->status = 1; /* number of disks */ 4.292 - 4.293 - return BLKTAP_RESPOND; 4.294 - } 4.295 - case BLKIF_OP_WRITE: 4.296 - { 4.297 - unsigned long size; 4.298 - struct iocb *io; 4.299 - struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; 4.300 - 4.301 - idx = ID_TO_IDX(req->id); 4.302 - ASSERT(pending_list[idx].count == 0); 4.303 - memcpy(&pending_list[idx].req, req, sizeof(*req)); 4.304 - pending_list[idx].count = req->nr_segments; 4.305 - 4.306 - for (i = 0; i < req->nr_segments; i++) { 4.307 - 4.308 - sector = req->sector_number + (8*i); 4.309 - 4.310 - size = blkif_last_sect (req->frame_and_sects[i]) - 4.311 - blkif_first_sect(req->frame_and_sects[i]) + 1; 4.312 - 4.313 - DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 4.314 - req->sector_number, sector, 4.315 - blkif_first_sect(req->frame_and_sects[i]), 4.316 - blkif_last_sect (req->frame_and_sects[i]), 4.317 - (long)(sector << SECTOR_SHIFT)); 4.318 - 4.319 - spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 4.320 - spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; 4.321 - 4.322 - /*convert size and sector to byte offsets */ 4.323 - size <<= SECTOR_SHIFT; 4.324 - sector <<= SECTOR_SHIFT; 4.325 - 4.326 - io = iocb_free[--iocb_free_count]; 4.327 - io_prep_pwrite(io, fd, spage, size, sector); 4.328 - io->data = (void *)idx; 4.329 - ioq[i] = io; 4.330 - } 4.331 - 4.332 - ret = io_submit(ctx, req->nr_segments, ioq); 4.333 - if (ret < 0) 4.334 - printf("BADNESS: io_submit error! (%d)\n", errno); 4.335 - 4.336 - pending_list[idx].count = req->nr_segments; 4.337 - 4.338 - return BLKTAP_STOLEN; 4.339 - 4.340 - } 4.341 - case BLKIF_OP_READ: 4.342 - { 4.343 - unsigned long size; 4.344 - struct iocb *io; 4.345 - struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; 4.346 - 4.347 - idx = ID_TO_IDX(req->id); 4.348 - ASSERT(pending_list[idx].count == 0); 4.349 - memcpy(&pending_list[idx].req, req, sizeof(*req)); 4.350 - pending_list[idx].count = req->nr_segments; 4.351 - 4.352 - for (i = 0; i < req->nr_segments; i++) { 4.353 - 4.354 - sector = req->sector_number + (8*i); 4.355 - 4.356 - size = blkif_last_sect (req->frame_and_sects[i]) - 4.357 - blkif_first_sect(req->frame_and_sects[i]) + 1; 4.358 - 4.359 - dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 4.360 - dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; 4.361 - 4.362 - 4.363 - DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) " 4.364 - "pos: %15lu dpage: %p\n", 4.365 - req->sector_number, sector, 4.366 - blkif_first_sect(req->frame_and_sects[i]), 4.367 - blkif_last_sect (req->frame_and_sects[i]), 4.368 - (long)(sector << SECTOR_SHIFT), dpage); 4.369 - 4.370 - /*convert size and sector to byte offsets */ 4.371 - size <<= SECTOR_SHIFT; 4.372 - sector <<= SECTOR_SHIFT; 4.373 - 4.374 - io = iocb_free[--iocb_free_count]; 4.375 - 4.376 - io_prep_pread(io, fd, dpage, size, sector); 4.377 - io->data = (void *)idx; 4.378 - 4.379 - ioq[i] = io; 4.380 - } 4.381 - 4.382 - ret = io_submit(ctx, req->nr_segments, ioq); 4.383 - if (ret < 0) 4.384 - printf("BADNESS: io_submit error! (%d)\n", errno); 4.385 - 4.386 - 4.387 - return BLKTAP_STOLEN; 4.388 - 4.389 - } 4.390 - } 4.391 - 4.392 - printf("Unknown block operation!\n"); 4.393 -err: 4.394 - rsp = (blkif_response_t *)req; 4.395 - rsp->id = req->id; 4.396 - rsp->operation = req->operation; 4.397 - rsp->status = BLKIF_RSP_ERROR; 4.398 - return BLKTAP_RESPOND; 4.399 -} 4.400 - 4.401 - 4.402 -int aio_pollhook(int fd) 4.403 -{ 4.404 - struct io_event *ep; 4.405 - int n, ret, idx; 4.406 - blkif_request_t *req; 4.407 - blkif_response_t *rsp; 4.408 - 4.409 - DPRINTF("aio_hook(): \n"); 4.410 - 4.411 - for (ep = aio_events; aio_event_count-- > 0; ep++) { 4.412 - struct iocb *io = ep->obj; 4.413 - idx = (int) ep->data; 4.414 - 4.415 - if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){ 4.416 - printf("gnbd returned a bad cookie (%u)!\n", idx); 4.417 - break; 4.418 - } 4.419 - 4.420 - if ((int)ep->res < 0) printf("aio request error! (%d,%d)\n", 4.421 - (int)ep->res, (int)ep->res2); 4.422 - 4.423 - pending_list[idx].count--; 4.424 - iocb_free[iocb_free_count++] = io; 4.425 - 4.426 - if (pending_list[idx].count == 0) { 4.427 - blkif_request_t tmp = pending_list[idx].req; 4.428 - rsp = (blkif_response_t *)&pending_list[idx].req; 4.429 - rsp->id = tmp.id; 4.430 - rsp->operation = tmp.operation; 4.431 - rsp->status = BLKIF_RSP_OKAY; 4.432 - blktap_inject_response(rsp); 4.433 - } 4.434 - } 4.435 - 4.436 - printf("pollhook done!\n"); 4.437 - 4.438 - read(aio_notify[READ], &idx, sizeof(idx)); 4.439 - aio_listening = 1; 4.440 - 4.441 - return 0; 4.442 -} 4.443 - 4.444 -/* the image library terminates the request stream. _resp is a noop. */ 4.445 -int aio_response(blkif_response_t *rsp) 4.446 -{ 4.447 - return BLKTAP_PASS; 4.448 -} 4.449 - 4.450 -void aio_init(void) 4.451 -{ 4.452 - int i, rc; 4.453 - pthread_t p; 4.454 - 4.455 - for (i = 0; i < MAX_DOMS; i++) 4.456 - images[i] = NULL; 4.457 - 4.458 - for (i = 0; i < MAX_REQUESTS; i++) 4.459 - pending_list[i].count = 0; 4.460 - 4.461 - memset(&ctx, 0, sizeof(ctx)); 4.462 - rc = io_queue_init(MAX_AIO_REQS, &ctx); 4.463 - if (rc != 0) { 4.464 - printf("queue_init failed! (%d)\n", rc); 4.465 - exit(0); 4.466 - } 4.467 - 4.468 - for (i=0; i<MAX_AIO_REQS; i++) { 4.469 - if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) { 4.470 - printf("error allocating iocb array\n"); 4.471 - exit(0); 4.472 - } 4.473 - iocb_free_count = i; 4.474 - } 4.475 - 4.476 - rc = pipe(aio_notify); 4.477 - if (rc != 0) { 4.478 - printf("pipe failed! (%d)\n", errno); 4.479 - exit(0); 4.480 - } 4.481 - 4.482 - rc = pthread_create(&p, NULL, notifier_thread, NULL); 4.483 - if (rc != 0) { 4.484 - printf("pthread_create failed! (%d)\n", errno); 4.485 - exit(0); 4.486 - } 4.487 - 4.488 - aio_listening = 1; 4.489 - 4.490 - blktap_attach_poll(aio_notify[READ], POLLIN, aio_pollhook); 4.491 -} 4.492 -
5.1 --- a/tools/blktap/blkaiolib.h Fri May 20 14:20:31 2005 +0000 5.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 5.3 @@ -1,16 +0,0 @@ 5.4 -/* blkaiolib.h 5.5 - * 5.6 - * aio image-backed block device. 5.7 - * 5.8 - * (c) 2004 Andrew Warfield. 5.9 - * 5.10 - * Xend has been modified to use an amorfs:[fsid] disk tag. 5.11 - * This will show up as device type (maj:240,min:0) = 61440. 5.12 - * 5.13 - * The fsid is placed in the sec_start field of the disk extent. 5.14 - */ 5.15 - 5.16 -int aio_control(control_msg_t *msg); 5.17 -int aio_request(blkif_request_t *req); 5.18 -int aio_response(blkif_response_t *rsp); /* noop */ 5.19 -void aio_init(void);
6.1 --- a/tools/blktap/blkcow.c Fri May 20 14:20:31 2005 +0000 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,31 +0,0 @@ 6.4 -/* blkcow.c 6.5 - * 6.6 - * copy on write a block device. in a really inefficient way. 6.7 - * 6.8 - * (c) 2004 Andrew Warfield. 6.9 - * 6.10 - * This uses whatever backend the tap is attached to as the read-only 6.11 - * underlay -- for the moment. 6.12 - * 6.13 - * Xend has been modified to use an amorfs:[fsid] disk tag. 6.14 - * This will show up as device type (maj:240,min:0) = 61440. 6.15 - * 6.16 - * The fsid is placed in the sec_start field of the disk extent, 6.17 - * the cow plugin uses this to identify a unique overlay. 6.18 - */ 6.19 - 6.20 -#include "blktaplib.h" 6.21 -#include "blkcowlib.h" 6.22 - 6.23 - 6.24 -int main(int argc, char *argv[]) 6.25 -{ 6.26 - cow_init(); 6.27 - 6.28 - blktap_register_ctrl_hook("cow_control", cow_control); 6.29 - blktap_register_request_hook("cow_request", cow_request); 6.30 - blktap_register_response_hook("cow_response", cow_response); 6.31 - blktap_listen(); 6.32 - 6.33 - return 0; 6.34 -}
7.1 --- a/tools/blktap/blkcowgnbd.c Fri May 20 14:20:31 2005 +0000 7.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 7.3 @@ -1,24 +0,0 @@ 7.4 -/* blkcowgnbd.c 7.5 - * 7.6 - * gnbd-backed cow. 7.7 - */ 7.8 - 7.9 -#include "blktaplib.h" 7.10 -#include "blkcowlib.h" 7.11 -#include "blkgnbdlib.h" 7.12 - 7.13 - 7.14 -int main(int argc, char *argv[]) 7.15 -{ 7.16 - cow_init(); 7.17 - gnbd_init(); 7.18 - 7.19 - blktap_register_ctrl_hook("cow_control", cow_control); 7.20 - blktap_register_ctrl_hook("gnbd_control", gnbd_control); 7.21 - blktap_register_request_hook("cow_request", cow_request); 7.22 - blktap_register_request_hook("gnbd_request", gnbd_request); 7.23 - blktap_register_response_hook("cow_response", cow_response); 7.24 - blktap_listen(); 7.25 - 7.26 - return 0; 7.27 -}
8.1 --- a/tools/blktap/blkcowimg.c Fri May 20 14:20:31 2005 +0000 8.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 8.3 @@ -1,24 +0,0 @@ 8.4 -/* blkcowimg.c 8.5 - * 8.6 - * file-backed cow. 8.7 - */ 8.8 - 8.9 -#include "blktaplib.h" 8.10 -#include "blkcowlib.h" 8.11 -#include "blkimglib.h" 8.12 - 8.13 - 8.14 -int main(int argc, char *argv[]) 8.15 -{ 8.16 - cow_init(); 8.17 - image_init(); 8.18 - 8.19 - blktap_register_ctrl_hook("cow_control", cow_control); 8.20 - blktap_register_ctrl_hook("image_control", image_control); 8.21 - blktap_register_request_hook("cow_request", cow_request); 8.22 - blktap_register_request_hook("image_request", image_request); 8.23 - blktap_register_response_hook("cow_response", cow_response); 8.24 - blktap_listen(); 8.25 - 8.26 - return 0; 8.27 -}
9.1 --- a/tools/blktap/blkcowlib.c Fri May 20 14:20:31 2005 +0000 9.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 9.3 @@ -1,380 +0,0 @@ 9.4 -/* blkcowlib.c 9.5 - * 9.6 - * copy on write a block device. in a really inefficient way. 9.7 - * 9.8 - * (c) 2004 Andrew Warfield. 9.9 - * 9.10 - * This uses whatever backend the tap is attached to as the read-only 9.11 - * underlay -- for the moment. 9.12 - * 9.13 - * Xend has been modified to use an amorfs:[fsid] disk tag. 9.14 - * This will show up as device type (maj:240,min:0) = 61440. 9.15 - * 9.16 - * The fsid is placed in the sec_start field of the disk extent, 9.17 - * the cow plugin uses this to identify a unique overlay. 9.18 - */ 9.19 - 9.20 -#include <stdio.h> 9.21 -#include <stdlib.h> 9.22 -#include <string.h> 9.23 -#include <db.h> 9.24 -#include "blktaplib.h" 9.25 - 9.26 -#define MAX_DOMS 1024 9.27 -#define MAX_DBNAME_LEN 255 9.28 -#define AMORFS_DEV 61440 9.29 -#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ 9.30 - 9.31 -#if 0 9.32 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 9.33 -#else 9.34 -#define DPRINTF(_f, _a...) ((void)0) 9.35 -#endif 9.36 - 9.37 -/* Berkeley db has different params for open() after 4.1 */ 9.38 -#ifndef DB_VERSION_MAJOR 9.39 -# define DB_VERSION_MAJOR 1 9.40 -#endif /* DB_VERSION_MAJOR */ 9.41 -#ifndef DB_VERSION_MINOR 9.42 -# define DB_VERSION_MINOR 0 9.43 -#endif /* DB_VERSION_MINOR */ 9.44 - 9.45 -typedef struct { 9.46 - DB *db; 9.47 - u64 fsid; 9.48 - char dbname[MAX_DBNAME_LEN]; 9.49 -} cow_t; 9.50 - 9.51 -cow_t *cows[MAX_DOMS]; 9.52 -blkif_request_t *reread_list[MAX_REQUESTS]; 9.53 - 9.54 -int cow_control(control_msg_t *msg) 9.55 -{ 9.56 - domid_t domid; 9.57 - DB *db; 9.58 - int ret; 9.59 - 9.60 - if (msg->type != CMSG_BLKIF_BE) 9.61 - { 9.62 - printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); 9.63 - return 0; 9.64 - } 9.65 - 9.66 - switch(msg->subtype) 9.67 - { 9.68 - case CMSG_BLKIF_BE_CREATE: 9.69 - if ( msg->length != sizeof(blkif_be_create_t) ) 9.70 - goto parse_error; 9.71 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", 9.72 - ((blkif_be_create_t *)msg->msg)->domid, 9.73 - ((blkif_be_create_t *)msg->msg)->blkif_handle); 9.74 - domid = ((blkif_be_create_t *)msg->msg)->domid; 9.75 - if (cows[domid] != NULL) { 9.76 - printf("attempt to connect from an existing dom!\n"); 9.77 - return 0; 9.78 - } 9.79 - 9.80 - cows[domid] = (cow_t *)malloc(sizeof(cow_t)); 9.81 - if (cows[domid] == NULL) { 9.82 - printf("error allocating cow.\n"); 9.83 - return 0; 9.84 - } 9.85 - 9.86 - cows[domid]->db = NULL; 9.87 - cows[domid]->fsid = 0; 9.88 - 9.89 - printf("COW connected.\n"); 9.90 - break; 9.91 - 9.92 - case CMSG_BLKIF_BE_DESTROY: 9.93 - if ( msg->length != sizeof(blkif_be_destroy_t) ) 9.94 - goto parse_error; 9.95 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", 9.96 - ((blkif_be_destroy_t *)msg->msg)->domid, 9.97 - ((blkif_be_destroy_t *)msg->msg)->blkif_handle); 9.98 - 9.99 - domid = ((blkif_be_destroy_t *)msg->msg)->domid; 9.100 - if (cows[domid] != NULL) { 9.101 - if (cows[domid]->db != NULL) 9.102 - cows[domid]->db->close(cows[domid]->db, 0); 9.103 - free(cows[domid]); 9.104 - cows[domid] = NULL; 9.105 - } 9.106 - break; 9.107 - case CMSG_BLKIF_BE_VBD_GROW: 9.108 - { 9.109 - blkif_be_vbd_grow_t *grow; 9.110 - 9.111 - if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) 9.112 - goto parse_error; 9.113 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", 9.114 - ((blkif_be_vbd_grow_t *)msg->msg)->domid, 9.115 - ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, 9.116 - ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); 9.117 - printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", 9.118 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, 9.119 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, 9.120 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); 9.121 - grow = (blkif_be_vbd_grow_t *)msg->msg; 9.122 - domid = grow->domid; 9.123 - if (cows[domid] == NULL) { 9.124 - printf("VBD_GROW on unconnected domain!\n"); 9.125 - return 0; 9.126 - } 9.127 - 9.128 - if (grow->extent.device != AMORFS_DEV) { 9.129 - printf("VBD_GROW on non-amorfs device!\n"); 9.130 - return 0; 9.131 - } 9.132 - 9.133 - sprintf(&cows[domid]->dbname[0], "%020llu.db", 9.134 - grow->extent.sector_start); 9.135 - 9.136 - cows[domid]->fsid = grow->extent.sector_start; 9.137 - 9.138 - if ((ret = db_create(&db, NULL, 0)) != 0) { 9.139 - fprintf(stderr, "db_create: %s\n", db_strerror(ret)); 9.140 - return 0; 9.141 - } 9.142 - 9.143 - 9.144 -#if DB_VERSION_MAJOR < 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 1) 9.145 - 9.146 - if ((ret = db->open( db, cows[domid]->dbname, NULL, DB_BTREE, 9.147 - DB_CREATE, 0664)) != 0) { 9.148 - 9.149 -#else /* DB_VERSION >= 4.1 */ 9.150 - 9.151 - if ((ret = db->open( db, NULL, cows[domid]->dbname, NULL, DB_BTREE, 9.152 - DB_CREATE, 0664)) != 0) { 9.153 - 9.154 -#endif /* DB_VERSION < 4.1 */ 9.155 - 9.156 - db->err(db, ret, "%s", cows[domid]->dbname); 9.157 - goto create_failed; 9.158 - } 9.159 - cows[domid]->db = db; 9.160 - printf("Overlay db opened. (%s)\n", cows[domid]->dbname); 9.161 - break; 9.162 - } 9.163 - } 9.164 - return 0; 9.165 -parse_error: 9.166 - printf("Bad control message!\n"); 9.167 - return 0; 9.168 - 9.169 -create_failed: 9.170 - /* TODO: close the db ref. */ 9.171 - return 0; 9.172 -} 9.173 - 9.174 -int cow_request(blkif_request_t *req) 9.175 -{ 9.176 - DB *db; 9.177 - DBT key, data; 9.178 - u64 sector; 9.179 - char *spage, *dpage; 9.180 - int ret, i, idx; 9.181 - blkif_response_t *rsp; 9.182 - domid_t dom = ID_TO_DOM(req->id); 9.183 - 9.184 - if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) { 9.185 - printf("Data request for unknown domain!!! %d\n", dom); 9.186 - rsp = (blkif_response_t *)req; 9.187 - rsp->id = req->id; 9.188 - rsp->operation = req->operation; 9.189 - rsp->status = BLKIF_RSP_ERROR; 9.190 - return BLKTAP_RESPOND; 9.191 - } 9.192 - 9.193 - db = cows[dom]->db; 9.194 - 9.195 - switch (req->operation) 9.196 - { 9.197 - case BLKIF_OP_PROBE: 9.198 -/* debug -- delete */ 9.199 -idx = ID_TO_IDX(req->id); 9.200 -reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req)); 9.201 -memcpy(reread_list[idx], req, sizeof(*req)); 9.202 - return BLKTAP_PASS; 9.203 - 9.204 - case BLKIF_OP_WRITE: 9.205 - for (i = 0; i < req->nr_segments; i++) { 9.206 - memset(&key, 0, sizeof(key)); 9.207 - memset(&data, 0, sizeof(data)); 9.208 - 9.209 - sector = req->sector_number + (8*i); 9.210 - key.data = §or; 9.211 - key.size = sizeof(sector); 9.212 - 9.213 - spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 9.214 - data.data = spage; 9.215 - data.size = PAGE_SIZE; 9.216 - 9.217 - 9.218 - DPRINTF("cWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 9.219 - req->sector_number, sector, 9.220 - blkif_first_sect(req->frame_and_sects[i]), 9.221 - blkif_last_sect (req->frame_and_sects[i]), 9.222 - (long)(sector << 9)); 9.223 - 9.224 - if ((ret = db->put(db, NULL, &key, &data, 0)) == 0) 9.225 - DPRINTF("db: %lld: key stored.\n", *((u64 *)key.data)); 9.226 - else { 9.227 - db->err(db, ret, "DB->put"); 9.228 - goto err; 9.229 - } 9.230 - } 9.231 - 9.232 - rsp = (blkif_response_t *)req; 9.233 - rsp->id = req->id; 9.234 - rsp->operation = BLKIF_OP_WRITE; 9.235 - rsp->status = BLKIF_RSP_OKAY; 9.236 - 9.237 - return BLKTAP_RESPOND; 9.238 - 9.239 - case BLKIF_OP_READ: 9.240 - for (i = 0; i < req->nr_segments; i++) { 9.241 - memset(&key, 0, sizeof(key)); 9.242 - memset(&data, 0, sizeof(data)); 9.243 - 9.244 - sector = req->sector_number + (8*i); 9.245 - key.data = §or; 9.246 - key.size = sizeof(sector); 9.247 - 9.248 - DPRINTF("cREAD: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 9.249 - req->sector_number, sector, 9.250 - blkif_first_sect(req->frame_and_sects[i]), 9.251 - blkif_last_sect (req->frame_and_sects[i]), 9.252 - (long)(sector << 9)); 9.253 - 9.254 - if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) { 9.255 - DPRINTF("db: %llu: key retrieved (req).\n", 9.256 - *((u64 *)key.data)); 9.257 - 9.258 - dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 9.259 - spage = data.data; 9.260 - memcpy(dpage, spage, PAGE_SIZE); 9.261 - 9.262 - } else if (ret == DB_NOTFOUND) { 9.263 - idx = ID_TO_IDX(req->id); 9.264 - if (idx > MAX_REQUESTS) { 9.265 - printf("Bad index!\n"); 9.266 - goto err; 9.267 - } 9.268 - if (reread_list[idx] != NULL) { 9.269 - printf("Dupe index!\n"); 9.270 - goto err; 9.271 - } 9.272 - reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req)); 9.273 - memcpy(reread_list[idx], req, sizeof(*req)); 9.274 - return BLKTAP_PASS; 9.275 - } else { 9.276 - db->err(db, ret, "DB->get"); 9.277 - goto err; 9.278 - } 9.279 - } 9.280 - 9.281 - 9.282 - rsp = (blkif_response_t *)req; 9.283 - rsp->id = req->id; 9.284 - rsp->operation = BLKIF_OP_READ; 9.285 - rsp->status = BLKIF_RSP_OKAY; 9.286 - return BLKTAP_RESPOND; 9.287 - } 9.288 - 9.289 - printf("Unknow block operation!\n"); 9.290 - return BLKTAP_PASS; 9.291 -err: 9.292 - rsp = (blkif_response_t *)req; 9.293 - rsp->id = req->id; 9.294 - rsp->operation = req->operation; 9.295 - rsp->status = BLKIF_RSP_ERROR; 9.296 - return BLKTAP_RESPOND; 9.297 -} 9.298 - 9.299 -int cow_response(blkif_response_t *rsp) 9.300 -{ 9.301 - blkif_request_t *req; 9.302 - int i, ret; 9.303 - DB *db; 9.304 - DBT key, data; 9.305 - u64 sector; 9.306 - char *spage, *dpage; 9.307 - int idx = ID_TO_IDX(rsp->id); 9.308 - domid_t dom; 9.309 - 9.310 - /* don't touch erroring responses. */ 9.311 - if (rsp->status == BLKIF_RSP_ERROR) 9.312 - return BLKTAP_PASS; 9.313 - 9.314 - if ((rsp->operation == BLKIF_OP_READ) && (reread_list[idx] != NULL)) 9.315 - { 9.316 - req = reread_list[idx]; 9.317 - dom = ID_TO_DOM(req->id); 9.318 - 9.319 - if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) { 9.320 - printf("Response from unknown domain!!! Very badness! %d\n", dom); 9.321 - return BLKTAP_PASS; 9.322 - } 9.323 - 9.324 - db = cows[dom]->db; 9.325 - 9.326 - for (i = 0; i < req->nr_segments; i++) { 9.327 - memset(&key, 0, sizeof(key)); 9.328 - memset(&data, 0, sizeof(data)); 9.329 - 9.330 - sector = req->sector_number + (8*i); 9.331 - key.data = §or; 9.332 - key.size = sizeof(sector); 9.333 - 9.334 - if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) { 9.335 - printf("db: %llu: key retrieved (rsp).\n", 9.336 - *((u64 *)key.data)); 9.337 - 9.338 - dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 9.339 - spage = data.data; 9.340 - memcpy(dpage, spage, PAGE_SIZE); 9.341 - 9.342 - } else if (ret == DB_NOTFOUND) { 9.343 - continue; /* We read this from disk. */ 9.344 - } else { 9.345 - db->err(db, ret, "DB->get"); 9.346 - goto err; 9.347 - } 9.348 - } 9.349 - free(reread_list[idx]); 9.350 - reread_list[idx] = NULL; 9.351 - } 9.352 - 9.353 - if (rsp->operation == BLKIF_OP_PROBE) { 9.354 - 9.355 - vdisk_t *img_info; 9.356 - 9.357 - req = reread_list[idx]; 9.358 - img_info = (vdisk_t *)(char *)MMAP_VADDR(ID_TO_IDX(req->id), 0); 9.359 - for (i =0; i < rsp->status; i++) 9.360 - printf("PROBE (%d) device: 0x%04x capacity: %llu, info: 0x%04x\n", 9.361 - i, 9.362 - img_info[0].device, 9.363 - img_info[0].capacity, 9.364 - img_info[0].info); 9.365 - free(reread_list[idx]); 9.366 - reread_list[idx] = NULL; 9.367 - } 9.368 - 9.369 -err: 9.370 - return BLKTAP_PASS; 9.371 -} 9.372 - 9.373 -void cow_init(void) 9.374 -{ 9.375 - int i; 9.376 - 9.377 - for (i = 0; i < MAX_DOMS; i++) 9.378 - cows[i] = NULL; 9.379 - 9.380 - for (i = 0; i < MAX_REQUESTS; i++) 9.381 - reread_list[MAX_REQUESTS] = NULL; 9.382 -} 9.383 -
10.1 --- a/tools/blktap/blkcowlib.h Fri May 20 14:20:31 2005 +0000 10.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 10.3 @@ -1,14 +0,0 @@ 10.4 -/* blkcowlib.h 10.5 - * 10.6 - * copy on write a block device. in a really inefficient way. 10.7 - * 10.8 - * (c) 2004 Andrew Warfield. 10.9 - * 10.10 - * public interfaces to the CoW tap. 10.11 - * 10.12 - */ 10.13 - 10.14 -int cow_control (control_msg_t *msg); 10.15 -int cow_request (blkif_request_t *req); 10.16 -int cow_response (blkif_response_t *rsp); 10.17 -void cow_init (void);
11.1 --- a/tools/blktap/blkdump.c Fri May 20 14:20:31 2005 +0000 11.2 +++ b/tools/blktap/blkdump.c Fri May 20 14:50:49 2005 +0000 11.3 @@ -62,18 +62,6 @@ int control_print(control_msg_t *msg) 11.4 ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle, 11.5 ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice); 11.6 break; 11.7 - case CMSG_BLKIF_BE_VBD_GROW: 11.8 - if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) 11.9 - goto parse_error; 11.10 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", 11.11 - ((blkif_be_vbd_grow_t *)msg->msg)->domid, 11.12 - ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, 11.13 - ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); 11.14 - printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", 11.15 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, 11.16 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, 11.17 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); 11.18 - break; 11.19 default: 11.20 goto parse_error; 11.21 }
12.1 --- a/tools/blktap/blkgnbd.c Fri May 20 14:20:31 2005 +0000 12.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 12.3 @@ -1,19 +0,0 @@ 12.4 -/* blkgnbd.c 12.5 - * 12.6 - * gnbd-backed disk. 12.7 - */ 12.8 - 12.9 -#include "blktaplib.h" 12.10 -#include "blkgnbdlib.h" 12.11 - 12.12 - 12.13 -int main(int argc, char *argv[]) 12.14 -{ 12.15 - gnbd_init(); 12.16 - 12.17 - blktap_register_ctrl_hook("gnbd_control", gnbd_control); 12.18 - blktap_register_request_hook("gnbd_request", gnbd_request); 12.19 - blktap_listen(); 12.20 - 12.21 - return 0; 12.22 -}
13.1 --- a/tools/blktap/blkgnbdlib.c Fri May 20 14:20:31 2005 +0000 13.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 13.3 @@ -1,471 +0,0 @@ 13.4 -/* blkgnbdlib.c 13.5 - * 13.6 - * gnbd image-backed block device. 13.7 - * 13.8 - * (c) 2004 Andrew Warfield. 13.9 - * 13.10 - * Xend has been modified to use an amorfs:[fsid] disk tag. 13.11 - * This will show up as device type (maj:240,min:0) = 61440. 13.12 - * 13.13 - * The fsid is placed in the sec_start field of the disk extent. 13.14 - */ 13.15 - 13.16 -#include <stdio.h> 13.17 -#include <stdlib.h> 13.18 -#include <string.h> 13.19 -#include <db.h> 13.20 -#include <sys/stat.h> 13.21 -#include <sys/types.h> 13.22 -#include <unistd.h> 13.23 -#include <errno.h> 13.24 -#include <sys/poll.h> 13.25 -#include "blktaplib.h" 13.26 -#include "libgnbd/libgnbd.h" 13.27 - 13.28 -#define GNBD_SERVER "skirmish.cl.cam.ac.uk" 13.29 -#define GNBD_CLIENT "pengi-0.xeno.cl.cam.ac.uk" 13.30 -#define GNBD_MOUNT "fc2_akw27" 13.31 -#define GNBD_PORT 0x38e7 13.32 - 13.33 -#define MAX_DOMS 1024 13.34 -#define MAX_IMGNAME_LEN 255 13.35 -#define AMORFS_DEV 61440 13.36 -#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ 13.37 -#define SECTOR_SHIFT 9 13.38 - 13.39 -#if 0 13.40 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 13.41 -#else 13.42 -#define DPRINTF(_f, _a...) ((void)0) 13.43 -#endif 13.44 - 13.45 -#if 1 13.46 -#define ASSERT(_p) \ 13.47 - if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \ 13.48 - __LINE__, __FILE__); *(int*)0=0; } 13.49 -#else 13.50 -#define ASSERT(_p) ((void)0) 13.51 -#endif 13.52 - 13.53 -#define GH_DISCONNECTED 0 13.54 -#define GH_PROBEWAITING 1 13.55 -#define GH_CONNECTED 2 13.56 - 13.57 -typedef struct { 13.58 - /* These need to turn into an array/rbtree for multi-disk support. */ 13.59 - struct gnbd_handle *gh; 13.60 - int gh_state; 13.61 - int probe_idx; /* This really needs cleaning up after hotos. */ 13.62 - int fd; 13.63 - u64 fsid; 13.64 - char gnbdname[MAX_IMGNAME_LEN]; 13.65 - blkif_vdev_t vdevice; 13.66 -} gnbd_t; 13.67 - 13.68 -/* Note on pending_reqs: I assume all reqs are queued before they start to 13.69 - * get filled. so count of 0 is an unused record. 13.70 - */ 13.71 -typedef struct { 13.72 - blkif_request_t req; 13.73 - int count; 13.74 -} pending_req_t; 13.75 - 13.76 -static gnbd_t *gnbds[MAX_DOMS]; 13.77 -static pending_req_t pending_list[MAX_REQUESTS]; 13.78 -static int pending_count = 0; /* debugging */ 13.79 - 13.80 - 13.81 -gnbd_t *get_gnbd_by_fd(int fd) 13.82 -{ 13.83 - /* this is a linear scan for the moment. nees to be cleaned up for 13.84 - multi-disk support. */ 13.85 - 13.86 - int i; 13.87 - 13.88 - for (i=0; i< MAX_DOMS; i++) 13.89 - if ((gnbds[i] != NULL) && (gnbds[i]->fd == fd)) 13.90 - return gnbds[i]; 13.91 - 13.92 - return NULL; 13.93 -} 13.94 - 13.95 -int gnbd_pollhook(int fd); 13.96 - 13.97 -int gnbd_control(control_msg_t *msg) 13.98 -{ 13.99 - domid_t domid; 13.100 - DB *db; 13.101 - int ret; 13.102 - 13.103 - if (msg->type != CMSG_BLKIF_BE) 13.104 - { 13.105 - printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); 13.106 - return 0; 13.107 - } 13.108 - 13.109 - switch(msg->subtype) 13.110 - { 13.111 - case CMSG_BLKIF_BE_CREATE: 13.112 - if ( msg->length != sizeof(blkif_be_create_t) ) 13.113 - goto parse_error; 13.114 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", 13.115 - ((blkif_be_create_t *)msg->msg)->domid, 13.116 - ((blkif_be_create_t *)msg->msg)->blkif_handle); 13.117 - domid = ((blkif_be_create_t *)msg->msg)->domid; 13.118 - if (gnbds[domid] != NULL) { 13.119 - printf("attempt to connect from an existing dom!\n"); 13.120 - return 0; 13.121 - } 13.122 - 13.123 - gnbds[domid] = (gnbd_t *)malloc(sizeof(gnbd_t)); 13.124 - if (gnbds[domid] == NULL) { 13.125 - printf("error allocating gnbd record.\n"); 13.126 - return 0; 13.127 - } 13.128 - 13.129 - gnbds[domid]->gh = NULL; 13.130 - gnbds[domid]->fsid = 0; 13.131 - 13.132 - break; 13.133 - 13.134 - case CMSG_BLKIF_BE_DESTROY: 13.135 - if ( msg->length != sizeof(blkif_be_destroy_t) ) 13.136 - goto parse_error; 13.137 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", 13.138 - ((blkif_be_destroy_t *)msg->msg)->domid, 13.139 - ((blkif_be_destroy_t *)msg->msg)->blkif_handle); 13.140 - 13.141 - domid = ((blkif_be_destroy_t *)msg->msg)->domid; 13.142 - if (gnbds[domid] != NULL) { 13.143 - if (gnbds[domid]->gh != NULL) { 13.144 - blktap_detach_poll(gnbds[domid]->fd); 13.145 - free(gnbds[domid]->gh); /* XXX: Need a gnbd close call! */; 13.146 - } 13.147 - free( gnbds[domid] ); 13.148 - gnbds[domid] = NULL; 13.149 - } 13.150 - break; 13.151 - case CMSG_BLKIF_BE_VBD_GROW: 13.152 - { 13.153 - blkif_be_vbd_grow_t *grow; 13.154 - 13.155 - if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) 13.156 - goto parse_error; 13.157 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", 13.158 - ((blkif_be_vbd_grow_t *)msg->msg)->domid, 13.159 - ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, 13.160 - ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); 13.161 - printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", 13.162 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, 13.163 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, 13.164 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); 13.165 - grow = (blkif_be_vbd_grow_t *)msg->msg; 13.166 - domid = grow->domid; 13.167 - if (gnbds[domid] == NULL) { 13.168 - printf("VBD_GROW on unconnected domain!\n"); 13.169 - return 0; 13.170 - } 13.171 - 13.172 - if (grow->extent.device != AMORFS_DEV) { 13.173 - printf("VBD_GROW on non-amorfs device!\n"); 13.174 - return 0; 13.175 - } 13.176 - 13.177 - /* TODO: config support for arbitrary gnbd files/modes. */ 13.178 - sprintf(gnbds[domid]->gnbdname, GNBD_MOUNT); 13.179 - 13.180 - gnbds[domid]->fsid = grow->extent.sector_start; 13.181 - gnbds[domid]->vdevice = grow->vdevice; 13.182 - gnbds[domid]->gh_state = GH_DISCONNECTED; 13.183 - gnbds[domid]->gh = gnbd_setup(GNBD_SERVER, GNBD_PORT, 13.184 - gnbds[domid]->gnbdname, GNBD_CLIENT); 13.185 - if (gnbds[domid]->gh == NULL) { 13.186 - printf("Couldn't connect to gnbd mount!!\n"); 13.187 - return 0; 13.188 - } 13.189 - gnbds[domid]->fd = gnbd_fd(gnbds[domid]->gh); 13.190 - blktap_attach_poll(gnbds[domid]->fd, POLLIN, gnbd_pollhook); 13.191 - 13.192 - printf("gnbd mount connected. (%s)\n", gnbds[domid]->gnbdname); 13.193 - break; 13.194 - } 13.195 - } 13.196 - return 0; 13.197 -parse_error: 13.198 - printf("Bad control message!\n"); 13.199 - return 0; 13.200 - 13.201 -create_failed: 13.202 - /* TODO: close the db ref. */ 13.203 - return 0; 13.204 -} 13.205 - 13.206 -static int gnbd_blkif_probe(blkif_request_t *req, gnbd_t *gnbd) 13.207 -{ 13.208 - int fd; 13.209 - struct stat stat; 13.210 - vdisk_t *gnbd_info; 13.211 - blkif_response_t *rsp; 13.212 - 13.213 - /* We expect one buffer only. */ 13.214 - if ( req->nr_segments != 1 ) 13.215 - goto err; 13.216 - 13.217 - /* Make sure the buffer is page-sized. */ 13.218 - if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || 13.219 - (blkif_last_sect (req->frame_and_sects[0]) != 7) ) 13.220 - goto err; 13.221 - 13.222 - /* loop for multiple gnbds would start here. */ 13.223 - 13.224 - gnbd_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); 13.225 - gnbd_info[0].device = gnbd->vdevice; 13.226 - gnbd_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; 13.227 - gnbd_info[0].capacity = gnbd_sectors(gnbd->gh); 13.228 - 13.229 - printf("[SECTORS] %llu", gnbd_info[0].capacity); 13.230 - 13.231 - //if (gnbd_info[0].capacity == 0) 13.232 - // gnbd_info[0].capacity = ((u64)1 << 63); // xend does this too. 13.233 - 13.234 - DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", gnbd_info[0].device, 13.235 - gnbd_info[0].capacity); 13.236 - 13.237 - rsp = (blkif_response_t *)req; 13.238 - rsp->id = req->id; 13.239 - rsp->operation = BLKIF_OP_PROBE; 13.240 - rsp->status = 1; /* number of disks */ 13.241 - 13.242 - return BLKTAP_RESPOND; 13.243 -err: 13.244 - rsp = (blkif_response_t *)req; 13.245 - rsp->id = req->id; 13.246 - rsp->operation = req->operation; 13.247 - rsp->status = BLKIF_RSP_ERROR; 13.248 - return BLKTAP_RESPOND; 13.249 -} 13.250 - 13.251 -int gnbd_request(blkif_request_t *req) 13.252 -{ 13.253 - struct gnbd_handle *gh; 13.254 - u64 sector; 13.255 - char *spage, *dpage; 13.256 - int ret, i, idx; 13.257 - blkif_response_t *rsp; 13.258 - domid_t dom = ID_TO_DOM(req->id); 13.259 - 13.260 - if ((gnbds[dom] == NULL) || (gnbds[dom]->gh == NULL)) { 13.261 - printf("Data request for unknown domain!!! %d\n", dom); 13.262 - rsp = (blkif_response_t *)req; 13.263 - rsp->id = req->id; 13.264 - rsp->operation = req->operation; 13.265 - rsp->status = BLKIF_RSP_ERROR; 13.266 - return BLKTAP_RESPOND; 13.267 - } 13.268 - 13.269 - gh = gnbds[dom]->gh; 13.270 - 13.271 - switch (req->operation) 13.272 - { 13.273 - case BLKIF_OP_PROBE: 13.274 - { 13.275 - printf("PROBE!\n"); 13.276 - if ( gnbds[dom]->gh_state == GH_PROBEWAITING ) { 13.277 - printf("Already have a PROBE outstanding!\n"); 13.278 - goto err; 13.279 - } 13.280 - 13.281 - if ( gnbds[dom]->gh_state == GH_DISCONNECTED ) 13.282 - { 13.283 - /* need to defer until we are connected. */ 13.284 - printf("Deferring PROBE!\n"); 13.285 - idx = ID_TO_IDX(req->id); 13.286 - memcpy(&pending_list[idx].req, req, sizeof(*req)); 13.287 - ASSERT(pending_list[idx].count == 0); 13.288 - pending_list[idx].count = 1; 13.289 - 13.290 - gnbds[dom]->probe_idx = idx; 13.291 - gnbds[dom]->gh_state = GH_PROBEWAITING; 13.292 - 13.293 - return BLKTAP_STOLEN; 13.294 - } 13.295 - 13.296 - 13.297 - return gnbd_blkif_probe(req, gnbds[dom]); 13.298 - } 13.299 - case BLKIF_OP_WRITE: 13.300 - { 13.301 - unsigned long size; 13.302 - 13.303 - idx = ID_TO_IDX(req->id); 13.304 - ASSERT(pending_list[idx].count == 0); 13.305 - memcpy(&pending_list[idx].req, req, sizeof(*req)); 13.306 - pending_list[idx].count = req->nr_segments; 13.307 - pending_count++; /* dbg */ 13.308 - 13.309 - for (i = 0; i < req->nr_segments; i++) { 13.310 - 13.311 - sector = req->sector_number + (8*i); 13.312 - 13.313 - size = blkif_last_sect (req->frame_and_sects[i]) - 13.314 - blkif_first_sect(req->frame_and_sects[i]) + 1; 13.315 - 13.316 - DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 13.317 - req->sector_number, sector, 13.318 - blkif_first_sect(req->frame_and_sects[i]), 13.319 - blkif_last_sect (req->frame_and_sects[i]), 13.320 - (long)(sector << SECTOR_SHIFT)); 13.321 - 13.322 - spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 13.323 - spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; 13.324 - 13.325 - ret = gnbd_write(gh, sector, size, spage, (unsigned long)idx); 13.326 - if (ret) { 13.327 - printf("gnbd error on WRITE\n"); 13.328 - goto err; 13.329 - } 13.330 - } 13.331 -//printf("[WR] < %lu\n", (unsigned long)idx); 13.332 - 13.333 - return BLKTAP_STOLEN; 13.334 - } 13.335 - case BLKIF_OP_READ: 13.336 - { 13.337 - unsigned long size; 13.338 - 13.339 - idx = ID_TO_IDX(req->id); 13.340 - ASSERT(pending_list[idx].count == 0); 13.341 - memcpy(&pending_list[idx].req, req, sizeof(*req)); 13.342 - pending_list[idx].count = req->nr_segments; 13.343 - pending_count++; /* dbg */ 13.344 - 13.345 - for (i = 0; i < req->nr_segments; i++) { 13.346 - 13.347 - sector = req->sector_number + (8*i); 13.348 - 13.349 - size = blkif_last_sect (req->frame_and_sects[i]) - 13.350 - blkif_first_sect(req->frame_and_sects[i]) + 1; 13.351 - 13.352 - DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 13.353 - req->sector_number, sector, 13.354 - blkif_first_sect(req->frame_and_sects[i]), 13.355 - blkif_last_sect (req->frame_and_sects[i]), 13.356 - (long)(sector << SECTOR_SHIFT)); 13.357 - 13.358 - dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 13.359 - dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; 13.360 - 13.361 - ret = gnbd_read(gh, sector, size, dpage, (unsigned long)idx); 13.362 - if (ret) { 13.363 - printf("gnbd error on READ\n"); 13.364 - goto err; 13.365 - } 13.366 - 13.367 - } 13.368 -//printf("[RD] < %lu\n", (unsigned long)idx); 13.369 - 13.370 - return BLKTAP_STOLEN; 13.371 - } 13.372 - } 13.373 - 13.374 - printf("Unknown block operation!\n"); 13.375 -err: 13.376 - rsp = (blkif_response_t *)req; 13.377 - rsp->id = req->id; 13.378 - rsp->operation = req->operation; 13.379 - rsp->status = BLKIF_RSP_ERROR; 13.380 - return BLKTAP_RESPOND; 13.381 -} 13.382 - 13.383 -/* the gnbd library terminates the request stream. _resp is a noop. */ 13.384 -int gnbd_response(blkif_response_t *rsp) 13.385 -{ 13.386 - return BLKTAP_PASS; 13.387 -} 13.388 - 13.389 -int gnbd_pollhook(int fd) 13.390 -{ 13.391 - int err; 13.392 - struct gnbd_handle *gh; 13.393 - blkif_request_t *req; 13.394 - blkif_response_t *rsp; 13.395 - unsigned long idx; 13.396 - 13.397 - gnbd_t *gnbd = get_gnbd_by_fd(fd); 13.398 - 13.399 - if (gnbd == NULL) { 13.400 - printf("GNBD badness: got poll hook on unknown device. (%d)\n", fd); 13.401 - return -1; 13.402 - } 13.403 - gh = gnbd->gh; 13.404 - err = gnbd_reply(gh); 13.405 - switch (err) { 13.406 - case GNBD_LOGIN_DONE: 13.407 - if (gnbd->gh_state == GH_PROBEWAITING) { 13.408 - req = (blkif_request_t *)&pending_list[gnbd->probe_idx].req; 13.409 - printf("[!] Sending deferred PROBE!\n"); 13.410 - gnbd_blkif_probe(req, gnbd); 13.411 - pending_list[gnbd->probe_idx].count = 0; 13.412 - rsp = (blkif_response_t *)req; 13.413 - blktap_inject_response(rsp); 13.414 - } 13.415 - gnbd->gh_state = GH_CONNECTED; 13.416 - printf("GNBD_LOGIN_DONE (%d)\n", fd); 13.417 - break; 13.418 - 13.419 - case GNBD_REQUEST_DONE: /* switch to idx */ 13.420 - idx = gnbd_finished_request(gh); 13.421 - req = (blkif_request_t *)&pending_list[idx].req; 13.422 - if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){ 13.423 - printf("gnbd returned a bad cookie (%lu)!\n", idx); 13.424 - break; 13.425 - } 13.426 - 13.427 - pending_list[idx].count--; 13.428 - 13.429 - if (pending_list[idx].count == 0) { 13.430 - blkif_request_t tmp = *req; 13.431 - pending_count--; /* dbg */ 13.432 - rsp = (blkif_response_t *)req; 13.433 - rsp->id = tmp.id; 13.434 - rsp->operation = tmp.operation; 13.435 - rsp->status = BLKIF_RSP_OKAY; 13.436 - blktap_inject_response(rsp); 13.437 -/* 13.438 -if (rsp->operation == BLKIF_OP_READ) { 13.439 -printf("[RD] > %lu (%d pndg)\n", (unsigned long)idx, pending_count); 13.440 -} else if (rsp->operation == BLKIF_OP_WRITE) { 13.441 -printf("[WR] > %lu (%d pndg)\n", (unsigned long)idx, pending_count); 13.442 -} else { 13.443 -printf("[??] > %lu (%d pndg)\n", (unsigned long)idx, pending_count); 13.444 -} 13.445 -*/ 13.446 - } 13.447 - break; 13.448 - 13.449 - case GNBD_CONTINUE: 13.450 - break; 13.451 - 13.452 - case 0: 13.453 - break; 13.454 - 13.455 - default: 13.456 - printf("gnbd_reply error"); 13.457 - break; 13.458 - } 13.459 - return 0; 13.460 -} 13.461 - 13.462 -void gnbd_init(void) 13.463 -{ 13.464 - int i; 13.465 - 13.466 - for (i = 0; i < MAX_DOMS; i++) 13.467 - gnbds[i] = NULL; 13.468 - 13.469 - for (i = 0; i < MAX_REQUESTS; i++) 13.470 - pending_list[i].count = 0; 13.471 - 13.472 - printf("GNBD image plugin initialized\n"); 13.473 -} 13.474 -
14.1 --- a/tools/blktap/blkgnbdlib.h Fri May 20 14:20:31 2005 +0000 14.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 14.3 @@ -1,16 +0,0 @@ 14.4 -/* blkgnbdlib.h 14.5 - * 14.6 - * gndb image-backed block device. 14.7 - * 14.8 - * (c) 2004 Andrew Warfield. 14.9 - * 14.10 - * Xend has been modified to use an amorfs:[fsid] disk tag. 14.11 - * This will show up as device type (maj:240,min:0) = 61440. 14.12 - * 14.13 - * The fsid is placed in the sec_start field of the disk extent. 14.14 - */ 14.15 - 14.16 -int gnbd_control(control_msg_t *msg); 14.17 -int gnbd_request(blkif_request_t *req); 14.18 -int gnbd_response(blkif_response_t *rsp); /* noop */ 14.19 -void gnbd_init(void);
15.1 --- a/tools/blktap/blkimg.c Fri May 20 14:20:31 2005 +0000 15.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 15.3 @@ -1,19 +0,0 @@ 15.4 -/* blkimg.c 15.5 - * 15.6 - * file-backed disk. 15.7 - */ 15.8 - 15.9 -#include "blktaplib.h" 15.10 -#include "blkimglib.h" 15.11 - 15.12 - 15.13 -int main(int argc, char *argv[]) 15.14 -{ 15.15 - image_init(); 15.16 - 15.17 - blktap_register_ctrl_hook("image_control", image_control); 15.18 - blktap_register_request_hook("image_request", image_request); 15.19 - blktap_listen(); 15.20 - 15.21 - return 0; 15.22 -}
16.1 --- a/tools/blktap/blkimglib.c Fri May 20 14:20:31 2005 +0000 16.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 16.3 @@ -1,325 +0,0 @@ 16.4 -/* blkimglib.c 16.5 - * 16.6 - * file image-backed block device. 16.7 - * 16.8 - * (c) 2004 Andrew Warfield. 16.9 - * 16.10 - * Xend has been modified to use an amorfs:[fsid] disk tag. 16.11 - * This will show up as device type (maj:240,min:0) = 61440. 16.12 - * 16.13 - * The fsid is placed in the sec_start field of the disk extent. 16.14 - */ 16.15 - 16.16 -#include <stdio.h> 16.17 -#include <stdlib.h> 16.18 -#include <string.h> 16.19 -#include <db.h> 16.20 -#include <sys/stat.h> 16.21 -#include <sys/types.h> 16.22 -#include <unistd.h> 16.23 -#include <errno.h> 16.24 -#include "blktaplib.h" 16.25 - 16.26 -//#define TMP_IMAGE_FILE_NAME "/dev/sda1" 16.27 -#define TMP_IMAGE_FILE_NAME "fc3.image" 16.28 - 16.29 -#define MAX_DOMS 1024 16.30 -#define MAX_IMGNAME_LEN 255 16.31 -#define AMORFS_DEV 61440 16.32 -#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ 16.33 -#define SECTOR_SHIFT 9 16.34 - 16.35 -#if 0 16.36 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 16.37 -#else 16.38 -#define DPRINTF(_f, _a...) ((void)0) 16.39 -#endif 16.40 - 16.41 - 16.42 -typedef struct { 16.43 - /* These need to turn into an array/rbtree for multi-disk support. */ 16.44 - FILE *img; 16.45 - u64 fsid; 16.46 - char imgname[MAX_IMGNAME_LEN]; 16.47 - blkif_vdev_t vdevice; 16.48 -} image_t; 16.49 - 16.50 -image_t *images[MAX_DOMS]; 16.51 -blkif_request_t *reread_list[MAX_REQUESTS]; 16.52 - 16.53 -int image_control(control_msg_t *msg) 16.54 -{ 16.55 - domid_t domid; 16.56 - DB *db; 16.57 - int ret; 16.58 - 16.59 - if (msg->type != CMSG_BLKIF_BE) 16.60 - { 16.61 - printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); 16.62 - return 0; 16.63 - } 16.64 - 16.65 - switch(msg->subtype) 16.66 - { 16.67 - case CMSG_BLKIF_BE_CREATE: 16.68 - if ( msg->length != sizeof(blkif_be_create_t) ) 16.69 - goto parse_error; 16.70 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", 16.71 - ((blkif_be_create_t *)msg->msg)->domid, 16.72 - ((blkif_be_create_t *)msg->msg)->blkif_handle); 16.73 - domid = ((blkif_be_create_t *)msg->msg)->domid; 16.74 - if (images[domid] != NULL) { 16.75 - printf("attempt to connect from an existing dom!\n"); 16.76 - return 0; 16.77 - } 16.78 - 16.79 - images[domid] = (image_t *)malloc(sizeof(image_t)); 16.80 - if (images[domid] == NULL) { 16.81 - printf("error allocating image record.\n"); 16.82 - return 0; 16.83 - } 16.84 - 16.85 - images[domid]->img = NULL; 16.86 - images[domid]->fsid = 0; 16.87 - 16.88 - printf("Image connected.\n"); 16.89 - break; 16.90 - 16.91 - case CMSG_BLKIF_BE_DESTROY: 16.92 - if ( msg->length != sizeof(blkif_be_destroy_t) ) 16.93 - goto parse_error; 16.94 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", 16.95 - ((blkif_be_destroy_t *)msg->msg)->domid, 16.96 - ((blkif_be_destroy_t *)msg->msg)->blkif_handle); 16.97 - 16.98 - domid = ((blkif_be_destroy_t *)msg->msg)->domid; 16.99 - if (images[domid] != NULL) { 16.100 - if (images[domid]->img != NULL) 16.101 - fclose( images[domid]->img ); 16.102 - free( images[domid] ); 16.103 - images[domid] = NULL; 16.104 - } 16.105 - break; 16.106 - case CMSG_BLKIF_BE_VBD_GROW: 16.107 - { 16.108 - blkif_be_vbd_grow_t *grow; 16.109 - 16.110 - if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) 16.111 - goto parse_error; 16.112 - printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", 16.113 - ((blkif_be_vbd_grow_t *)msg->msg)->domid, 16.114 - ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, 16.115 - ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); 16.116 - printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", 16.117 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, 16.118 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, 16.119 - ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); 16.120 - grow = (blkif_be_vbd_grow_t *)msg->msg; 16.121 - domid = grow->domid; 16.122 - if (images[domid] == NULL) { 16.123 - printf("VBD_GROW on unconnected domain!\n"); 16.124 - return 0; 16.125 - } 16.126 - 16.127 - if (grow->extent.device != AMORFS_DEV) { 16.128 - printf("VBD_GROW on non-amorfs device!\n"); 16.129 - return 0; 16.130 - } 16.131 - 16.132 - /* TODO: config support for arbitrary image files/modes. */ 16.133 - sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME); 16.134 - 16.135 - images[domid]->fsid = grow->extent.sector_start; 16.136 - images[domid]->vdevice = grow->vdevice; 16.137 - images[domid]->img = fopen64(TMP_IMAGE_FILE_NAME, "r+"); 16.138 - if (images[domid]->img == NULL) { 16.139 - printf("Couldn't open image file!\n"); 16.140 - return 0; 16.141 - } 16.142 - 16.143 - printf("Image file opened. (%s)\n", images[domid]->imgname); 16.144 - break; 16.145 - } 16.146 - } 16.147 - return 0; 16.148 -parse_error: 16.149 - printf("Bad control message!\n"); 16.150 - return 0; 16.151 - 16.152 -create_failed: 16.153 - /* TODO: close the db ref. */ 16.154 - return 0; 16.155 -} 16.156 - 16.157 -int image_request(blkif_request_t *req) 16.158 -{ 16.159 - FILE *img; 16.160 - u64 sector; 16.161 - char *spage, *dpage; 16.162 - int ret, i, idx; 16.163 - blkif_response_t *rsp; 16.164 - domid_t dom = ID_TO_DOM(req->id); 16.165 - 16.166 - if ((images[dom] == NULL) || (images[dom]->img == NULL)) { 16.167 - printf("Data request for unknown domain!!! %d\n", dom); 16.168 - rsp = (blkif_response_t *)req; 16.169 - rsp->id = req->id; 16.170 - rsp->operation = req->operation; 16.171 - rsp->status = BLKIF_RSP_ERROR; 16.172 - return BLKTAP_RESPOND; 16.173 - } 16.174 - 16.175 - img = images[dom]->img; 16.176 - 16.177 - switch (req->operation) 16.178 - { 16.179 - case BLKIF_OP_PROBE: 16.180 - { 16.181 - int fd; 16.182 - struct stat stat; 16.183 - vdisk_t *img_info; 16.184 - 16.185 - 16.186 - /* We expect one buffer only. */ 16.187 - if ( req->nr_segments != 1 ) 16.188 - goto err; 16.189 - 16.190 - /* Make sure the buffer is page-sized. */ 16.191 - if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || 16.192 - (blkif_last_sect (req->frame_and_sects[0]) != 7) ) 16.193 - goto err; 16.194 - 16.195 - /* loop for multiple images would start here. */ 16.196 - 16.197 - fd = fileno(img); 16.198 - if (fd == -1) { 16.199 - printf("Couldn't get image fd in PROBE!\n"); 16.200 - goto err; 16.201 - } 16.202 - 16.203 - ret = fstat(fd, &stat); 16.204 - if (ret != 0) { 16.205 - printf("Couldn't stat image in PROBE!\n"); 16.206 - goto err; 16.207 - } 16.208 - 16.209 - img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); 16.210 - img_info[0].device = images[dom]->vdevice; 16.211 - img_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; 16.212 - img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT); 16.213 - 16.214 - if (img_info[0].capacity == 0) 16.215 - img_info[0].capacity = ((u64)1 << 63); // xend does this too. 16.216 - 16.217 - DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device, 16.218 - img_info[0].capacity); 16.219 - 16.220 - rsp = (blkif_response_t *)req; 16.221 - rsp->id = req->id; 16.222 - rsp->operation = BLKIF_OP_PROBE; 16.223 - rsp->status = 1; /* number of disks */ 16.224 - 16.225 - return BLKTAP_RESPOND; 16.226 - } 16.227 - case BLKIF_OP_WRITE: 16.228 - { 16.229 - unsigned long size; 16.230 - 16.231 - for (i = 0; i < req->nr_segments; i++) { 16.232 - 16.233 - sector = req->sector_number + (8*i); 16.234 - 16.235 - size = blkif_last_sect (req->frame_and_sects[i]) - 16.236 - blkif_first_sect(req->frame_and_sects[i]) + 1; 16.237 - 16.238 - ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET); 16.239 - if (ret != 0) { 16.240 - printf("fseek error on WRITE\n"); 16.241 - goto err; 16.242 - } 16.243 - 16.244 - DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 16.245 - req->sector_number, sector, 16.246 - blkif_first_sect(req->frame_and_sects[i]), 16.247 - blkif_last_sect (req->frame_and_sects[i]), 16.248 - (long)(sector << SECTOR_SHIFT)); 16.249 - 16.250 - spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 16.251 - spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; 16.252 - ret = fwrite(spage, size << SECTOR_SHIFT, 1, img); 16.253 - if (ret != 1) { 16.254 - printf("fwrite error on WRITE (%d)\n", errno); 16.255 - goto err; 16.256 - } 16.257 - } 16.258 - 16.259 - rsp = (blkif_response_t *)req; 16.260 - rsp->id = req->id; 16.261 - rsp->operation = BLKIF_OP_WRITE; 16.262 - rsp->status = BLKIF_RSP_OKAY; 16.263 - 16.264 - return BLKTAP_RESPOND; 16.265 - } 16.266 - case BLKIF_OP_READ: 16.267 - { 16.268 - unsigned long size; 16.269 - 16.270 - for (i = 0; i < req->nr_segments; i++) { 16.271 - 16.272 - sector = req->sector_number + (8*i); 16.273 - 16.274 - size = blkif_last_sect (req->frame_and_sects[i]) - 16.275 - blkif_first_sect(req->frame_and_sects[i]) + 1; 16.276 - 16.277 - ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET); 16.278 - if (ret != 0) { 16.279 - printf("fseek error on READ\n"); 16.280 - goto err; 16.281 - } 16.282 - 16.283 - DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 16.284 - req->sector_number, sector, 16.285 - blkif_first_sect(req->frame_and_sects[i]), 16.286 - blkif_last_sect (req->frame_and_sects[i]), 16.287 - (long)(sector << SECTOR_SHIFT)); 16.288 - 16.289 - dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 16.290 - dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; 16.291 - ret = fread(dpage, size << SECTOR_SHIFT, 1, img); 16.292 - if (ret != 1) { 16.293 - printf("fread error on READ\n"); 16.294 - goto err; 16.295 - } 16.296 - } 16.297 - 16.298 - rsp = (blkif_response_t *)req; 16.299 - rsp->id = req->id; 16.300 - rsp->operation = BLKIF_OP_READ; 16.301 - rsp->status = BLKIF_RSP_OKAY; 16.302 - return BLKTAP_RESPOND; 16.303 - } 16.304 - } 16.305 - 16.306 - printf("Unknow block operation!\n"); 16.307 -err: 16.308 - rsp = (blkif_response_t *)req; 16.309 - rsp->id = req->id; 16.310 - rsp->operation = req->operation; 16.311 - rsp->status = BLKIF_RSP_ERROR; 16.312 - return BLKTAP_RESPOND; 16.313 -} 16.314 - 16.315 -/* the image library terminates the request stream. _resp is a noop. */ 16.316 -int image_response(blkif_response_t *rsp) 16.317 -{ 16.318 - return BLKTAP_PASS; 16.319 -} 16.320 - 16.321 -void image_init(void) 16.322 -{ 16.323 - int i; 16.324 - 16.325 - for (i = 0; i < MAX_DOMS; i++) 16.326 - images[i] = NULL; 16.327 -} 16.328 -
17.1 --- a/tools/blktap/blkimglib.h Fri May 20 14:20:31 2005 +0000 17.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 17.3 @@ -1,16 +0,0 @@ 17.4 -/* blkimglib.h 17.5 - * 17.6 - * file image-backed block device. 17.7 - * 17.8 - * (c) 2004 Andrew Warfield. 17.9 - * 17.10 - * Xend has been modified to use an amorfs:[fsid] disk tag. 17.11 - * This will show up as device type (maj:240,min:0) = 61440. 17.12 - * 17.13 - * The fsid is placed in the sec_start field of the disk extent. 17.14 - */ 17.15 - 17.16 -int image_control(control_msg_t *msg); 17.17 -int image_request(blkif_request_t *req); 17.18 -int image_response(blkif_response_t *rsp); /* noop */ 17.19 -void image_init(void);
18.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 18.2 +++ b/tools/blktap/block-async.c Fri May 20 14:50:49 2005 +0000 18.3 @@ -0,0 +1,404 @@ 18.4 +/* block-async.c 18.5 + * 18.6 + * Asynchronous block wrappers for parallax. 18.7 + */ 18.8 + 18.9 + 18.10 +#include <stdio.h> 18.11 +#include <stdlib.h> 18.12 +#include <string.h> 18.13 +#include <pthread.h> 18.14 +#include "block-async.h" 18.15 +#include "blockstore.h" 18.16 +#include "vdi.h" 18.17 + 18.18 + 18.19 +#if 0 18.20 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 18.21 +#else 18.22 +#define DPRINTF(_f, _a...) ((void)0) 18.23 +#endif 18.24 + 18.25 +/* We have a queue of outstanding I/O requests implemented as a 18.26 + * circular producer-consumer ring with free-running buffers. 18.27 + * to allow reordering, this ring indirects to indexes in an 18.28 + * ring of io_structs. 18.29 + * 18.30 + * the block_* calls may either add an entry to this ring and return, 18.31 + * or satisfy the request immediately and call the callback directly. 18.32 + * None of the io calls in parallax should be nested enough to worry 18.33 + * about stack problems with this approach. 18.34 + */ 18.35 + 18.36 +struct read_args { 18.37 + u64 addr; 18.38 +}; 18.39 + 18.40 +struct write_args { 18.41 + u64 addr; 18.42 + char *block; 18.43 +}; 18.44 + 18.45 +struct alloc_args { 18.46 + char *block; 18.47 +}; 18.48 + 18.49 +struct pending_io_req { 18.50 + enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op; 18.51 + union { 18.52 + struct read_args r; 18.53 + struct write_args w; 18.54 + struct alloc_args a; 18.55 + } u; 18.56 + io_cb_t cb; 18.57 + void *param; 18.58 +}; 18.59 + 18.60 +void radix_lock_init(struct radix_lock *r) 18.61 +{ 18.62 + int i; 18.63 + 18.64 + pthread_mutex_init(&r->lock, NULL); 18.65 + for (i=0; i < 1024; i++) { 18.66 + r->lines[i] = 0; 18.67 + r->waiters[i] = NULL; 18.68 + r->state[i] = ANY; 18.69 + } 18.70 +} 18.71 + 18.72 +/* maximum outstanding I/O requests issued asynchronously */ 18.73 +/* must be a power of 2.*/ 18.74 +#define MAX_PENDING_IO 1024 //1024 18.75 + 18.76 +/* how many threads to concurrently issue I/O to the disk. */ 18.77 +#define IO_POOL_SIZE 10 //10 18.78 + 18.79 +static struct pending_io_req pending_io_reqs[MAX_PENDING_IO]; 18.80 +static int pending_io_list[MAX_PENDING_IO]; 18.81 +static unsigned long io_prod = 0, io_cons = 0, io_free = 0; 18.82 +#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1)) 18.83 +#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs) 18.84 +#define PENDING_IO_ENT(_x) \ 18.85 + (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]]) 18.86 +#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod) 18.87 +#define CAN_CONSUME_PENDING_IO (io_cons != io_prod) 18.88 +static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER; 18.89 +static pthread_cond_t pending_io_cond = PTHREAD_COND_INITIALIZER; 18.90 + 18.91 +static void init_pending_io(void) 18.92 +{ 18.93 + int i; 18.94 + 18.95 + for (i=0; i<MAX_PENDING_IO; i++) 18.96 + pending_io_list[i] = i; 18.97 + 18.98 +} 18.99 + 18.100 +void block_read(u64 addr, io_cb_t cb, void *param) 18.101 +{ 18.102 + struct pending_io_req *req; 18.103 + 18.104 + pthread_mutex_lock(&pending_io_lock); 18.105 + assert(CAN_PRODUCE_PENDING_IO); 18.106 + 18.107 + req = PENDING_IO_ENT(io_prod++); 18.108 + DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req); 18.109 + req->op = IO_READ; 18.110 + req->u.r.addr = addr; 18.111 + req->cb = cb; 18.112 + req->param = param; 18.113 + 18.114 + pthread_cond_signal(&pending_io_cond); 18.115 + pthread_mutex_unlock(&pending_io_lock); 18.116 +} 18.117 + 18.118 + 18.119 +void block_write(u64 addr, char *block, io_cb_t cb, void *param) 18.120 +{ 18.121 + struct pending_io_req *req; 18.122 + 18.123 + pthread_mutex_lock(&pending_io_lock); 18.124 + assert(CAN_PRODUCE_PENDING_IO); 18.125 + 18.126 + req = PENDING_IO_ENT(io_prod++); 18.127 + DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req); 18.128 + req->op = IO_WRITE; 18.129 + req->u.w.addr = addr; 18.130 + req->u.w.block = block; 18.131 + req->cb = cb; 18.132 + req->param = param; 18.133 + 18.134 + pthread_cond_signal(&pending_io_cond); 18.135 + pthread_mutex_unlock(&pending_io_lock); 18.136 +} 18.137 + 18.138 + 18.139 +void block_alloc(char *block, io_cb_t cb, void *param) 18.140 +{ 18.141 + struct pending_io_req *req; 18.142 + 18.143 + pthread_mutex_lock(&pending_io_lock); 18.144 + assert(CAN_PRODUCE_PENDING_IO); 18.145 + 18.146 + req = PENDING_IO_ENT(io_prod++); 18.147 + req->op = IO_ALLOC; 18.148 + req->u.a.block = block; 18.149 + req->cb = cb; 18.150 + req->param = param; 18.151 + 18.152 + pthread_cond_signal(&pending_io_cond); 18.153 + pthread_mutex_unlock(&pending_io_lock); 18.154 +} 18.155 + 18.156 +void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param) 18.157 +{ 18.158 + struct io_ret ret; 18.159 + pthread_mutex_lock(&r->lock); 18.160 + 18.161 + if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) { 18.162 + r->lines[row]++; 18.163 + r->state[row] = READ; 18.164 + DPRINTF("RLOCK : %3d (row: %d)\n", r->lines[row], row); 18.165 + pthread_mutex_unlock(&r->lock); 18.166 + ret.type = IO_INT_T; 18.167 + ret.u.i = 0; 18.168 + cb(ret, param); 18.169 + } else { 18.170 + struct radix_wait **rwc; 18.171 + struct radix_wait *rw = 18.172 + (struct radix_wait *) malloc (sizeof(struct radix_wait)); 18.173 + DPRINTF("RLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row); 18.174 + rw->type = RLOCK; 18.175 + rw->param = param; 18.176 + rw->cb = cb; 18.177 + rw->next = NULL; 18.178 + /* append to waiters list. */ 18.179 + rwc = &r->waiters[row]; 18.180 + while (*rwc != NULL) rwc = &(*rwc)->next; 18.181 + *rwc = rw; 18.182 + pthread_mutex_unlock(&r->lock); 18.183 + return; 18.184 + } 18.185 +} 18.186 + 18.187 + 18.188 +void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param) 18.189 +{ 18.190 + struct io_ret ret; 18.191 + pthread_mutex_lock(&r->lock); 18.192 + 18.193 + /* the second check here is redundant -- just here for debugging now. */ 18.194 + if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) { 18.195 + r->state[row] = STOP; 18.196 + r->lines[row] = -1; 18.197 + DPRINTF("WLOCK : %3d (row: %d)\n", r->lines[row], row); 18.198 + pthread_mutex_unlock(&r->lock); 18.199 + ret.type = IO_INT_T; 18.200 + ret.u.i = 0; 18.201 + cb(ret, param); 18.202 + } else { 18.203 + struct radix_wait **rwc; 18.204 + struct radix_wait *rw = 18.205 + (struct radix_wait *) malloc (sizeof(struct radix_wait)); 18.206 + DPRINTF("WLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row); 18.207 + rw->type = WLOCK; 18.208 + rw->param = param; 18.209 + rw->cb = cb; 18.210 + rw->next = NULL; 18.211 + /* append to waiters list. */ 18.212 + rwc = &r->waiters[row]; 18.213 + while (*rwc != NULL) rwc = &(*rwc)->next; 18.214 + *rwc = rw; 18.215 + pthread_mutex_unlock(&r->lock); 18.216 + return; 18.217 + } 18.218 + 18.219 +} 18.220 + 18.221 +/* called with radix_lock locked and lock count of zero. */ 18.222 +static void wake_waiters(struct radix_lock *r, int row) 18.223 +{ 18.224 + struct pending_io_req *req; 18.225 + struct radix_wait *rw; 18.226 + 18.227 + DPRINTF("prewake\n"); 18.228 + if (r->lines[row] != 0) return; 18.229 + if (r->waiters[row] == NULL) {DPRINTF("nowaiters!\n");return;} 18.230 + 18.231 + DPRINTF("wake\n"); 18.232 + if (r->waiters[row]->type == WLOCK) { 18.233 + rw = r->waiters[row]; 18.234 + pthread_mutex_lock(&pending_io_lock); 18.235 + assert(CAN_PRODUCE_PENDING_IO); 18.236 + 18.237 + req = PENDING_IO_ENT(io_prod++); 18.238 + DPRINTF("Produce (WWAKE) %lu (%p)\n", io_prod - 1, req); 18.239 + req->op = IO_WWAKE; 18.240 + req->cb = rw->cb; 18.241 + req->param = rw->param; 18.242 + r->lines[row] = -1; /* write lock the row. */ 18.243 + r->state[row] = STOP; 18.244 + r->waiters[row] = rw->next; 18.245 + free(rw); 18.246 + pthread_mutex_unlock(&pending_io_lock); 18.247 + } else /* RLOCK */ { 18.248 + while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) { 18.249 + rw = r->waiters[row]; 18.250 + pthread_mutex_lock(&pending_io_lock); 18.251 + assert(CAN_PRODUCE_PENDING_IO); 18.252 + 18.253 + req = PENDING_IO_ENT(io_prod++); 18.254 + DPRINTF("Produce (RWAKE) %lu (%p)\n", io_prod - 1, req); 18.255 + req->op = IO_RWAKE; 18.256 + req->cb = rw->cb; 18.257 + req->param = rw->param; 18.258 + r->lines[row]++; /* read lock the row. */ 18.259 + r->state[row] = READ; 18.260 + r->waiters[row] = rw->next; 18.261 + free(rw); 18.262 + pthread_mutex_unlock(&pending_io_lock); 18.263 + } 18.264 + if (r->waiters[row] != NULL) /* There is a write queued still */ 18.265 + r->state[row] = STOP; 18.266 + } 18.267 + 18.268 + DPRINTF("wakedone\n"); 18.269 + DPRINTF("prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free); 18.270 + pthread_mutex_lock(&pending_io_lock); 18.271 + pthread_cond_signal(&pending_io_cond); 18.272 + pthread_mutex_unlock(&pending_io_lock); 18.273 +} 18.274 + 18.275 +void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param) 18.276 +{ 18.277 + struct io_ret ret; 18.278 + 18.279 + pthread_mutex_lock(&r->lock); 18.280 + assert(r->lines[row] > 0); /* try to catch misuse. */ 18.281 + r->lines[row]--; 18.282 + DPRINTF("RUNLOCK: %3d (row: %d)\n", r->lines[row], row); 18.283 + if (r->lines[row] == 0) { 18.284 + r->state[row] = ANY; 18.285 + wake_waiters(r, row); 18.286 + } 18.287 + pthread_mutex_unlock(&r->lock); 18.288 + cb(ret, param); 18.289 +} 18.290 + 18.291 +void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param) 18.292 +{ 18.293 + struct io_ret ret; 18.294 + 18.295 + pthread_mutex_lock(&r->lock); 18.296 + assert(r->lines[row] == -1); /* try to catch misuse. */ 18.297 + r->lines[row] = 0; 18.298 + r->state[row] = ANY; 18.299 + DPRINTF("WUNLOCK: %3d (row: %d)\n", r->lines[row], row); 18.300 + wake_waiters(r, row); 18.301 + pthread_mutex_unlock(&r->lock); 18.302 + cb(ret, param); 18.303 +} 18.304 + 18.305 +/* consumer calls */ 18.306 +static void do_next_io_req(struct pending_io_req *req) 18.307 +{ 18.308 + struct io_ret ret; 18.309 + void *param; 18.310 + 18.311 + switch (req->op) { 18.312 + case IO_READ: 18.313 + ret.type = IO_BLOCK_T; 18.314 + ret.u.b = readblock(req->u.r.addr); 18.315 + break; 18.316 + case IO_WRITE: 18.317 + ret.type = IO_INT_T; 18.318 + ret.u.i = writeblock(req->u.w.addr, req->u.w.block); 18.319 + DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr); 18.320 + break; 18.321 + case IO_ALLOC: 18.322 + ret.type = IO_ADDR_T; 18.323 + ret.u.a = allocblock(req->u.a.block); 18.324 + break; 18.325 + case IO_RWAKE: 18.326 + DPRINTF("WAKE DEFERRED RLOCK!\n"); 18.327 + ret.type = IO_INT_T; 18.328 + ret.u.i = 0; 18.329 + break; 18.330 + case IO_WWAKE: 18.331 + DPRINTF("WAKE DEFERRED WLOCK!\n"); 18.332 + ret.type = IO_INT_T; 18.333 + ret.u.i = 0; 18.334 + break; 18.335 + default: 18.336 + DPRINTF("Unknown IO operation on pending list!\n"); 18.337 + return; 18.338 + } 18.339 + 18.340 + param = req->param; 18.341 + DPRINTF("freeing idx %d to slot %lu.\n", PENDING_IO_IDX(req), PENDING_IO_MASK(io_free)); 18.342 + pthread_mutex_lock(&pending_io_lock); 18.343 + pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req); 18.344 + DPRINTF(" : prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free); 18.345 + pthread_mutex_unlock(&pending_io_lock); 18.346 + 18.347 + assert(req->cb != NULL); 18.348 + req->cb(ret, param); 18.349 + 18.350 +} 18.351 + 18.352 +void *io_thread(void *param) 18.353 +{ 18.354 + int tid; 18.355 + struct pending_io_req *req; 18.356 + 18.357 + /* Set this thread's tid. */ 18.358 + tid = *(int *)param; 18.359 + free(param); 18.360 + 18.361 + DPRINTF("IOT %2d started.\n", tid); 18.362 + 18.363 +start: 18.364 + pthread_mutex_lock(&pending_io_lock); 18.365 + while (io_prod == io_cons) { 18.366 + pthread_cond_wait(&pending_io_cond, &pending_io_lock); 18.367 + } 18.368 + 18.369 + if (io_prod == io_cons) { 18.370 + /* unnecessary wakeup. */ 18.371 + pthread_mutex_unlock(&pending_io_lock); 18.372 + goto start; 18.373 + } 18.374 + 18.375 + req = PENDING_IO_ENT(io_cons++); 18.376 + DPRINTF("IOT %2d has req %04d(%p).\n", tid, PENDING_IO_IDX(req), req); 18.377 + DPRINTF(" : prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free); 18.378 + pthread_mutex_unlock(&pending_io_lock); 18.379 + 18.380 + 18.381 + do_next_io_req(req); 18.382 + 18.383 + goto start; 18.384 + 18.385 +} 18.386 + 18.387 +static pthread_t io_pool[IO_POOL_SIZE]; 18.388 +void start_io_threads(void) 18.389 + 18.390 +{ 18.391 + int i, tid=0; 18.392 + 18.393 + for (i=0; i < IO_POOL_SIZE; i++) { 18.394 + int ret, *t; 18.395 + t = (int *)malloc(sizeof(int)); 18.396 + *t = tid++; 18.397 + ret = pthread_create(&io_pool[i], NULL, io_thread, t); 18.398 + if (ret != 0) printf("Error starting thread %d\n", i); 18.399 + } 18.400 + 18.401 +} 18.402 + 18.403 +void init_block_async(void) 18.404 +{ 18.405 + init_pending_io(); 18.406 + start_io_threads(); 18.407 +}
19.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 19.2 +++ b/tools/blktap/block-async.h Fri May 20 14:50:49 2005 +0000 19.3 @@ -0,0 +1,69 @@ 19.4 +/* block-async.h 19.5 + * 19.6 + * Asynchronous block wrappers for parallax. 19.7 + */ 19.8 + 19.9 +#ifndef _BLOCKASYNC_H_ 19.10 +#define _BLOCKASYNC_H_ 19.11 + 19.12 +#include <assert.h> 19.13 +#include <xc.h> 19.14 +#include "vdi.h" 19.15 + 19.16 +struct io_ret 19.17 +{ 19.18 + enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type; 19.19 + union { 19.20 + u64 a; 19.21 + char *b; 19.22 + int i; 19.23 + } u; 19.24 +}; 19.25 + 19.26 +typedef void (*io_cb_t)(struct io_ret r, void *param); 19.27 + 19.28 +/* per-vdi lock structures to make sure requests run in a safe order. */ 19.29 +struct radix_wait { 19.30 + enum {RLOCK, WLOCK} type; 19.31 + io_cb_t cb; 19.32 + void *param; 19.33 + struct radix_wait *next; 19.34 +}; 19.35 + 19.36 +struct radix_lock { 19.37 + pthread_mutex_t lock; 19.38 + int lines[1024]; 19.39 + struct radix_wait *waiters[1024]; 19.40 + enum {ANY, READ, STOP} state[1024]; 19.41 +}; 19.42 +void radix_lock_init(struct radix_lock *r); 19.43 + 19.44 +void block_read(u64 addr, io_cb_t cb, void *param); 19.45 +void block_write(u64 addr, char *block, io_cb_t cb, void *param); 19.46 +void block_alloc(char *block, io_cb_t cb, void *param); 19.47 +void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param); 19.48 +void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param); 19.49 +void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param); 19.50 +void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param); 19.51 +void init_block_async(void); 19.52 + 19.53 +static inline u64 IO_ADDR(struct io_ret r) 19.54 +{ 19.55 + assert(r.type == IO_ADDR_T); 19.56 + return r.u.a; 19.57 +} 19.58 + 19.59 +static inline char *IO_BLOCK(struct io_ret r) 19.60 +{ 19.61 + assert(r.type == IO_BLOCK_T); 19.62 + return r.u.b; 19.63 +} 19.64 + 19.65 +static inline int IO_INT(struct io_ret r) 19.66 +{ 19.67 + assert(r.type == IO_INT_T); 19.68 + return r.u.i; 19.69 +} 19.70 + 19.71 + 19.72 +#endif //_BLOCKASYNC_H_
20.1 --- a/tools/blktap/blockstore-tls.c Fri May 20 14:20:31 2005 +0000 20.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 20.3 @@ -1,161 +0,0 @@ 20.4 -/************************************************************************** 20.5 - * 20.6 - * blockstore.c 20.7 - * 20.8 - * Simple block store interface 20.9 - * 20.10 - */ 20.11 - 20.12 -#include <fcntl.h> 20.13 -#include <unistd.h> 20.14 -#include <stdio.h> 20.15 -#include <stdlib.h> 20.16 -#include <string.h> 20.17 -#include <pthread.h> 20.18 -#include <sys/types.h> 20.19 -#include <sys/stat.h> 20.20 -#include "blockstore.h" 20.21 -#include "parallax-threaded.h" 20.22 - 20.23 -/*static int block_fp = -1;*/ 20.24 - 20.25 -static int fd_list[READ_POOL_SIZE+1]; 20.26 - 20.27 -/** 20.28 - * readblock: read a block from disk 20.29 - * @id: block id to read 20.30 - * 20.31 - * @return: pointer to block, NULL on error 20.32 - */ 20.33 - 20.34 -void *readblock(u64 id) 20.35 -{ 20.36 - void *block; 20.37 - int tid = (int)pthread_getspecific(tid_key); 20.38 - 20.39 - if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { 20.40 - printf ("%Ld\n", (id - 1) * BLOCK_SIZE); 20.41 - perror("readblock lseek"); 20.42 - goto err; 20.43 - } 20.44 - if ((block = malloc(BLOCK_SIZE)) == NULL) { 20.45 - perror("readblock malloc"); 20.46 - goto err; 20.47 - } 20.48 - if (read(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) { 20.49 - perror("readblock read"); 20.50 - free(block); 20.51 - goto err; 20.52 - } 20.53 - return block; 20.54 - 20.55 -err: 20.56 - return NULL; 20.57 -} 20.58 - 20.59 -/** 20.60 - * writeblock: write an existing block to disk 20.61 - * @id: block id 20.62 - * @block: pointer to block 20.63 - * 20.64 - * @return: zero on success, -1 on failure 20.65 - */ 20.66 -int writeblock(u64 id, void *block) 20.67 -{ 20.68 - int tid = (int)pthread_getspecific(tid_key); 20.69 - 20.70 - if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { 20.71 - perror("writeblock lseek"); 20.72 - goto err; 20.73 - } 20.74 - if (write(fd_list[tid], block, BLOCK_SIZE) < 0) { 20.75 - perror("writeblock write"); 20.76 - goto err; 20.77 - } 20.78 - return 0; 20.79 - 20.80 -err: 20.81 - return -1; 20.82 -} 20.83 - 20.84 -/** 20.85 - * allocblock: write a new block to disk 20.86 - * @block: pointer to block 20.87 - * 20.88 - * @return: new id of block on disk 20.89 - */ 20.90 - 20.91 -u64 allocblock(void *block) 20.92 -{ 20.93 - u64 lb; 20.94 - off64_t pos; 20.95 - int tid = (int)pthread_getspecific(tid_key); 20.96 - 20.97 - pos = lseek64(fd_list[tid], 0, SEEK_END); 20.98 - if (pos == (off64_t)-1) { 20.99 - perror("allocblock lseek"); 20.100 - goto err; 20.101 - } 20.102 - if (pos % BLOCK_SIZE != 0) { 20.103 - fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE); 20.104 - goto err; 20.105 - } 20.106 - if (write(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) { 20.107 - perror("allocblock write"); 20.108 - goto err; 20.109 - } 20.110 - lb = pos / BLOCK_SIZE + 1; 20.111 - 20.112 - return lb; 20.113 - 20.114 -err: 20.115 - return 0; 20.116 - 20.117 -} 20.118 - 20.119 - 20.120 -/** 20.121 - * newblock: get a new in-memory block set to zeros 20.122 - * 20.123 - * @return: pointer to new block, NULL on error 20.124 - */ 20.125 -void *newblock() 20.126 -{ 20.127 - void *block = malloc(BLOCK_SIZE); 20.128 - if (block == NULL) { 20.129 - perror("newblock"); 20.130 - return NULL; 20.131 - } 20.132 - memset(block, 0, BLOCK_SIZE); 20.133 - return block; 20.134 -} 20.135 - 20.136 - 20.137 -/** 20.138 - * freeblock: unallocate an in-memory block 20.139 - * @id: block id (zero if this is only in-memory) 20.140 - * @block: block to be freed 20.141 - */ 20.142 -void freeblock(void *block) 20.143 -{ 20.144 - if (block != NULL) 20.145 - free(block); 20.146 -} 20.147 - 20.148 - 20.149 -int __init_blockstore(void) 20.150 -{ 20.151 - int i; 20.152 - 20.153 - for (i=0; i<(READ_POOL_SIZE+1); i++) { 20.154 - 20.155 - fd_list[i] = open("blockstore.dat", 20.156 - O_RDWR | O_CREAT | O_LARGEFILE, 0644); 20.157 - 20.158 - if (fd_list[i] < 0) { 20.159 - perror("open"); 20.160 - return -1; 20.161 - } 20.162 - } 20.163 - return 0; 20.164 -}
21.1 --- a/tools/blktap/blockstore.c Fri May 20 14:20:31 2005 +0000 21.2 +++ b/tools/blktap/blockstore.c Fri May 20 14:50:49 2005 +0000 21.3 @@ -19,7 +19,7 @@ 21.4 #include <pthread.h> 21.5 #include "parallax-threaded.h" 21.6 21.7 -#define BLOCKSTORE_REMOTE 21.8 +//#define BLOCKSTORE_REMOTE 21.9 //#define BSDEBUG 21.10 21.11 #define RETRY_TIMEOUT 1000000 /* microseconds */ 21.12 @@ -942,7 +942,8 @@ u64 allocblock_hint(void *block, u64 hin 21.13 void *readblock(u64 id) { 21.14 void *block; 21.15 int block_fp; 21.16 - 21.17 + 21.18 +//printf("readblock(%llu)\n", id); 21.19 block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644); 21.20 21.21 if (block_fp < 0) { 21.22 @@ -1336,6 +1337,7 @@ int __init_blockstore(void) 21.23 void __exit_blockstore(void) 21.24 { 21.25 int i; 21.26 +#ifdef BLOCKSTORE_REMOTE 21.27 pthread_mutex_destroy(&ptmutex_recv); 21.28 pthread_mutex_destroy(&ptmutex_luid); 21.29 pthread_mutex_destroy(&ptmutex_queue); 21.30 @@ -1345,4 +1347,5 @@ void __exit_blockstore(void) 21.31 pthread_mutex_destroy(&(pool_thread[i].ptmutex)); 21.32 pthread_cond_destroy(&(pool_thread[i].ptcv)); 21.33 } 21.34 +#endif 21.35 }
22.1 --- a/tools/blktap/libgnbd/Makefile Fri May 20 14:20:31 2005 +0000 22.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 22.3 @@ -1,8 +0,0 @@ 22.4 - 22.5 -CFLAGS += -Wall -Werror -g 22.6 -LDFLAGS += -g 22.7 - 22.8 -libgnbd.a: libgnbd.o 22.9 - $(AR) r $@ $< 22.10 - 22.11 -gnbdtest: gnbdtest.o libgnbd.a
23.1 --- a/tools/blktap/libgnbd/gnbdtest.c Fri May 20 14:20:31 2005 +0000 23.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 23.3 @@ -1,90 +0,0 @@ 23.4 - 23.5 -#include <err.h> 23.6 -#include <stdint.h> 23.7 -#include <stdio.h> 23.8 -#include <stdlib.h> 23.9 -#include <string.h> 23.10 -#include <unistd.h> 23.11 - 23.12 -#include <sys/poll.h> 23.13 - 23.14 -#include "libgnbd.h" 23.15 - 23.16 -#define PRINTF(x) printf x 23.17 -#if 0 23.18 -#define DFPRINTF(x...) fprintf(stderr, ##x) 23.19 -#define DPRINTF(x) DFPRINTF x 23.20 -#else 23.21 -#define DPRINTF(x) 23.22 -#endif 23.23 - 23.24 -static unsigned char buf1[8 << 9]; 23.25 -static unsigned char buf2[8 << 9]; 23.26 -static unsigned char buf3[8 << 9]; 23.27 - 23.28 -int 23.29 -main(int argc, char **argv) 23.30 -{ 23.31 - struct gnbd_handle *gh; 23.32 - struct pollfd pfd[1]; 23.33 - int err, tout; 23.34 - 23.35 - gh = gnbd_setup("panik", 0x38e7, "cl349-nahant-beta2-root1", 23.36 - "arcadians.cl.cam.ac.uk"); 23.37 - if (gh == NULL) 23.38 - errx(1, "gnbd_setup"); 23.39 - 23.40 - memset(pfd, 0, sizeof(pfd)); 23.41 - pfd[0].fd = gnbd_fd(gh); 23.42 - pfd[0].events = POLLIN; 23.43 - 23.44 - while ((tout = poll(pfd, 1, 0)) >= 0) { 23.45 - if (tout == 0) 23.46 - continue; 23.47 - DPRINTF(("event\n")); 23.48 - if (pfd[0].revents) { 23.49 - err = gnbd_reply(gh); 23.50 - pfd[0].events = POLLIN; 23.51 - switch (err) { 23.52 - case GNBD_LOGIN_DONE: 23.53 - DPRINTF(("sectors: %08llu\n", 23.54 - gnbd_sectors(gh))); 23.55 - err = gnbd_read(gh, 8, 8, buf2, 1); 23.56 - if (err) 23.57 - warnx("gnbd_read"); 23.58 - err = gnbd_read(gh, 0, 8, buf1, 0); 23.59 - if (err) 23.60 - warnx("gnbd_read"); 23.61 - err = gnbd_read(gh, 16, 8, buf3, 2); 23.62 - if (err) 23.63 - warnx("gnbd_read"); 23.64 - break; 23.65 - case GNBD_REQUEST_DONE: 23.66 - DPRINTF(("request done %ld\n", 23.67 - gnbd_finished_request(gh))); 23.68 - if (0 && gnbd_finished_request(gh) == 0) { 23.69 - write(1, buf1, 8 << 9); 23.70 - err = gnbd_write(gh, 0, 8, buf1, 10); 23.71 - if (err) 23.72 - warnx("gnbd_write"); 23.73 - } 23.74 - break; 23.75 - case GNBD_CONTINUE: 23.76 - DPRINTF(("continue\n")); 23.77 - break; 23.78 - case 0: 23.79 - break; 23.80 - case GNBD_CONTINUE_WRITE: 23.81 - DPRINTF(("continue write\n")); 23.82 - pfd[0].events |= POLLOUT; 23.83 - break; 23.84 - default: 23.85 - warnx("gnbd_reply error"); 23.86 - break; 23.87 - } 23.88 - DPRINTF(("got gnbd reply\n")); 23.89 - } 23.90 - } 23.91 - 23.92 - return 0; 23.93 -}
24.1 --- a/tools/blktap/libgnbd/libgnbd.c Fri May 20 14:20:31 2005 +0000 24.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 24.3 @@ -1,647 +0,0 @@ 24.4 -/* libgnbd.c 24.5 - * 24.6 - * gnbd client library 24.7 - * 24.8 - * Copyright (c) 2005, Christian Limpach 24.9 - */ 24.10 - 24.11 -#include <byteswap.h> 24.12 -#include <endian.h> 24.13 -#include <err.h> 24.14 -#include <errno.h> 24.15 -#include <netdb.h> 24.16 -#include <stdlib.h> 24.17 -#include <string.h> 24.18 -#include <unistd.h> 24.19 - 24.20 -#include <sys/socket.h> 24.21 -#include <sys/time.h> 24.22 -#include <sys/types.h> 24.23 - 24.24 -#include <stdio.h> 24.25 - 24.26 -#include "libgnbd.h" 24.27 - 24.28 -#define PROTOCOL_VERSION 2 24.29 - 24.30 -#define EXTERN_KILL_GSERV_REQ 5 24.31 -#define EXTERN_LOGIN_REQ 6 24.32 - 24.33 -#define GNBD_REQUEST_MAGIC 0x37a07e00 24.34 -#define GNBD_KEEP_ALIVE_MAGIC 0x5b46d8c2 24.35 -#define GNBD_REPLY_MAGIC 0x41f09370 24.36 - 24.37 -enum { 24.38 - GNBD_CMD_READ = 0, 24.39 - GNBD_CMD_WRITE = 1, 24.40 - GNBD_CMD_DISC = 2, 24.41 - GNBD_CMD_PING = 3 24.42 -}; 24.43 - 24.44 -#if __BYTE_ORDER == __BIG_ENDIAN 24.45 -#define htonll(x) (x) 24.46 -#define ntohll(x) (x) 24.47 -#endif 24.48 -#if __BYTE_ORDER == __LITTLE_ENDIAN 24.49 -#define htonll(x) bswap_64(x) 24.50 -#define ntohll(x) bswap_64(x) 24.51 -#endif 24.52 - 24.53 -#define PRINTF(x) printf x 24.54 -#if 0 24.55 -#define DFPRINTF(x...) fprintf(stderr, ##x) 24.56 -#define DPRINTF(x) DFPRINTF x 24.57 -#else 24.58 -#define DPRINTF(x) 24.59 -#endif 24.60 - 24.61 -struct gnbd_request { 24.62 - struct gnbd_request *gr_next; 24.63 - unsigned char *gr_buf; 24.64 - ssize_t gr_size; 24.65 - ssize_t gr_done; 24.66 - unsigned long gr_cookie; 24.67 -}; 24.68 - 24.69 -struct gnbd_handle { 24.70 - int gh_fd; 24.71 - unsigned int gh_flags; 24.72 - uint64_t gh_sectors; 24.73 - char gh_devname[32]; 24.74 - char gh_nodename[65]; 24.75 - struct sockaddr_in gh_sin; 24.76 - struct gnbd_request *gh_outstanding_requests; 24.77 - struct gnbd_request **gh_outstanding_requests_last; 24.78 - struct gnbd_request *gh_incoming_request; 24.79 - unsigned long gh_finished_request; 24.80 -}; 24.81 -#define GHF_EXPECT_KILL_GSERV_REPLY 0x0001 24.82 -#define GHF_EXPECT_LOGIN_REPLY 0x0002 24.83 -#define GHF_INCOMING_REQUEST 0x0004 24.84 - 24.85 -struct device_req { 24.86 - char name[32]; 24.87 -}; 24.88 - 24.89 -struct node_req { 24.90 - char node_name[65]; 24.91 -}; 24.92 - 24.93 -struct login_req { 24.94 - uint64_t timestamp; 24.95 - uint16_t version; 24.96 - uint8_t pad[6]; 24.97 - char devname[32]; 24.98 -}; 24.99 - 24.100 -struct login_reply { 24.101 - uint64_t sectors; 24.102 - uint16_t version; 24.103 - uint8_t err; 24.104 - uint8_t pad[5]; 24.105 -}; 24.106 - 24.107 -struct gnbd_server_request { 24.108 - uint32_t magic; 24.109 - uint32_t type; 24.110 - char handle[8]; 24.111 - uint64_t from; 24.112 - uint32_t len; 24.113 -} __attribute__ ((packed)); 24.114 - 24.115 -struct gnbd_server_reply { 24.116 - uint32_t magic; 24.117 - uint32_t error; 24.118 - char handle[8]; 24.119 -} __attribute__ ((packed)); 24.120 - 24.121 -static int 24.122 -read_buf(int fd, void *buf, size_t count, size_t *read_count) 24.123 -{ 24.124 - int err; 24.125 - 24.126 - err = read(fd, buf, count); 24.127 - if (read_count) { 24.128 - if (err >= 0) 24.129 - *read_count = err; 24.130 - } else if (err != count) 24.131 - return EINTR; /* xxx */ 24.132 - return err < 0; 24.133 -} 24.134 - 24.135 -static int 24.136 -read_4(int fd, unsigned long *val) 24.137 -{ 24.138 - unsigned long buf; 24.139 - int err; 24.140 - 24.141 - err = read_buf(fd, &buf, sizeof(buf), NULL); 24.142 - if (err == 0) 24.143 - *val = ntohl(buf); 24.144 - return err; 24.145 -} 24.146 - 24.147 -static int 24.148 -write_buf(int fd, void *buf, size_t count) 24.149 -{ 24.150 - int err; 24.151 - 24.152 - err = write(fd, buf, count); 24.153 - return err < 0; 24.154 -} 24.155 - 24.156 -static int 24.157 -write_4(int fd, unsigned long val) 24.158 -{ 24.159 - unsigned long buf; 24.160 - int err; 24.161 - 24.162 - buf = htonl(val); 24.163 - err = write_buf(fd, &buf, sizeof(buf)); 24.164 - return err; 24.165 -} 24.166 - 24.167 - 24.168 -static int 24.169 -socket_connect(struct gnbd_handle *gh) 24.170 -{ 24.171 - int err; 24.172 - 24.173 - if (gh->gh_fd >= 0) 24.174 - return 0; 24.175 - 24.176 - gh->gh_fd = socket(PF_INET, SOCK_STREAM, 0); 24.177 - if (gh->gh_fd < 0) { 24.178 - warn("socket"); 24.179 - return gh->gh_fd; 24.180 - } 24.181 - 24.182 - err = connect(gh->gh_fd, (struct sockaddr *)&gh->gh_sin, 24.183 - sizeof(gh->gh_sin)); 24.184 - if (err) { 24.185 - warn("connect"); 24.186 - goto out; 24.187 - } 24.188 - 24.189 - return 0; 24.190 - out: 24.191 - close (gh->gh_fd); 24.192 - gh->gh_fd = -1; 24.193 - return err; 24.194 -} 24.195 - 24.196 -static int 24.197 -socket_shutdown(struct gnbd_handle *gh) 24.198 -{ 24.199 - 24.200 - close (gh->gh_fd); 24.201 - gh->gh_fd = -1; 24.202 - return 0; 24.203 -} 24.204 - 24.205 -static int 24.206 -find_request(struct gnbd_handle *gh, struct gnbd_request *gr) 24.207 -{ 24.208 - struct gnbd_request **tmp; 24.209 - 24.210 - for (tmp = &gh->gh_outstanding_requests; *tmp; 24.211 - tmp = &(*tmp)->gr_next) { 24.212 - if (*tmp == gr) { 24.213 - *tmp = (*tmp)->gr_next; 24.214 - if (*tmp == NULL) 24.215 - gh->gh_outstanding_requests_last = tmp; 24.216 - return 0; 24.217 - } 24.218 - } 24.219 - return ENOENT; 24.220 -} 24.221 - 24.222 -static int 24.223 -kill_gserv(struct gnbd_handle *gh) 24.224 -{ 24.225 - struct device_req dr; 24.226 - struct node_req nr; 24.227 - int err; 24.228 - 24.229 - DPRINTF(("gnbd_kill_gserv\n")); 24.230 - err = socket_connect(gh); 24.231 - if (err) { 24.232 - warnx("socket_connect"); 24.233 - return err; 24.234 - } 24.235 - 24.236 - err = write_4(gh->gh_fd, EXTERN_KILL_GSERV_REQ); 24.237 - if (err) { 24.238 - warnx("send EXTERN_LOGIN_REQ failed"); 24.239 - goto out; 24.240 - } 24.241 - 24.242 - strncpy(dr.name, gh->gh_devname, sizeof(dr.name)); 24.243 - err = write_buf(gh->gh_fd, &dr, sizeof(dr)); 24.244 - if (err) { 24.245 - warnx("send device_req failed"); 24.246 - goto out; 24.247 - } 24.248 - 24.249 - strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name)); 24.250 - err = write_buf(gh->gh_fd, &nr, sizeof(nr)); 24.251 - if (err) { 24.252 - warnx("send node_req failed"); 24.253 - goto out; 24.254 - } 24.255 - 24.256 - gh->gh_flags |= GHF_EXPECT_KILL_GSERV_REPLY; 24.257 - DPRINTF(("gnbd_kill_gserv ok\n")); 24.258 - 24.259 - return 0; 24.260 - out: 24.261 - socket_shutdown(gh); 24.262 - return err; 24.263 -} 24.264 - 24.265 -static int 24.266 -login(struct gnbd_handle *gh) 24.267 -{ 24.268 - struct login_req lr; 24.269 - struct node_req nr; 24.270 - int err; 24.271 - uint64_t timestamp; 24.272 - struct timeval tv; 24.273 - 24.274 - DPRINTF(("gnbd_login\n")); 24.275 - err = socket_connect(gh); 24.276 - if (err) { 24.277 - warnx("socket_connect"); 24.278 - return err; 24.279 - } 24.280 - 24.281 - err = write_4(gh->gh_fd, EXTERN_LOGIN_REQ); 24.282 - if (err) { 24.283 - warnx("send EXTERN_LOGIN_REQ failed"); 24.284 - goto out; 24.285 - } 24.286 - 24.287 - err = gettimeofday(&tv, NULL); 24.288 - if (err) { 24.289 - warnx("gettimeofday"); 24.290 - goto out; 24.291 - } 24.292 - timestamp = (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec; 24.293 - 24.294 - lr.timestamp = htonll(timestamp); 24.295 - lr.version = htons(PROTOCOL_VERSION); 24.296 - strncpy(lr.devname, gh->gh_devname, sizeof(lr.devname)); 24.297 - err = write_buf(gh->gh_fd, &lr, sizeof(lr)); 24.298 - if (err) { 24.299 - warnx("send login_req failed"); 24.300 - goto out; 24.301 - } 24.302 - 24.303 - strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name)); 24.304 - err = write_buf(gh->gh_fd, &nr, sizeof(nr)); 24.305 - if (err) { 24.306 - warnx("send node_req failed"); 24.307 - goto out; 24.308 - } 24.309 - 24.310 - gh->gh_flags |= GHF_EXPECT_LOGIN_REPLY; 24.311 - 24.312 - DPRINTF(("gnbd_login ok\n")); 24.313 - return 0; 24.314 - out: 24.315 - socket_shutdown(gh); 24.316 - return err; 24.317 -} 24.318 - 24.319 -static int 24.320 -kill_gserv_reply(struct gnbd_handle *gh) 24.321 -{ 24.322 - unsigned long reply; 24.323 - int err; 24.324 - 24.325 - DPRINTF(("read gnbd_kill_gserv_reply\n")); 24.326 - err = read_4(gh->gh_fd, &reply); 24.327 - if (err) { 24.328 - warnx("read kill_gserv_reply failed"); 24.329 - return err; 24.330 - } 24.331 - 24.332 - if (reply && reply != ENODEV) { 24.333 - warnx("kill gserv failed: %s", strerror(reply)); 24.334 - return reply; 24.335 - } 24.336 - 24.337 - gh->gh_flags &= ~GHF_EXPECT_KILL_GSERV_REPLY; 24.338 - socket_shutdown(gh); 24.339 - 24.340 - err = login(gh); 24.341 - if (err) 24.342 - warnx("gnbd_login"); 24.343 - 24.344 - return err; 24.345 -} 24.346 - 24.347 -static int 24.348 -login_reply(struct gnbd_handle *gh) 24.349 -{ 24.350 - struct login_reply lr; 24.351 - int err; 24.352 - 24.353 - DPRINTF(("read gnbd_login_reply\n")); 24.354 - err = read_buf(gh->gh_fd, &lr, sizeof(lr), NULL); 24.355 - if (err) { 24.356 - warnx("read login_reply failed"); 24.357 - return err; 24.358 - } 24.359 - 24.360 - if (lr.err) { 24.361 - if (lr.version) { 24.362 - warnx("gnbd version mismatch %04x != %04x", 24.363 - PROTOCOL_VERSION, ntohs(lr.version)); 24.364 - return EINVAL; 24.365 - } 24.366 - warnx("login refused: %s", strerror(lr.err)); 24.367 - return lr.err; 24.368 - } 24.369 - gh->gh_sectors = ntohll(lr.sectors); 24.370 - 24.371 - gh->gh_flags &= ~GHF_EXPECT_LOGIN_REPLY; 24.372 - 24.373 - return GNBD_LOGIN_DONE; 24.374 -} 24.375 - 24.376 -static int 24.377 -incoming_request(struct gnbd_handle *gh) 24.378 -{ 24.379 - struct gnbd_request *gr = gh->gh_incoming_request; 24.380 - ssize_t done; 24.381 - int err; 24.382 - 24.383 - DPRINTF(("incoming_request: done %d size %d\n", gr->gr_done, 24.384 - gr->gr_size)); 24.385 - err = read_buf(gh->gh_fd, gr->gr_buf + gr->gr_done, 24.386 - gr->gr_size - gr->gr_done, &done); 24.387 - if (err) 24.388 - goto out; 24.389 - 24.390 - DPRINTF(("incoming_request: got %d\n", done)); 24.391 - gr->gr_done += done; 24.392 - if (gr->gr_done == gr->gr_size) { 24.393 - gh->gh_flags &= ~GHF_INCOMING_REQUEST; 24.394 - gh->gh_finished_request = gr->gr_cookie; 24.395 - free(gr); 24.396 - return GNBD_REQUEST_DONE; 24.397 - } 24.398 - 24.399 - return GNBD_CONTINUE; 24.400 - 24.401 - out: 24.402 - gh->gh_flags &= ~GHF_INCOMING_REQUEST; 24.403 - gh->gh_finished_request = 0; 24.404 - free(gr); 24.405 - return err; 24.406 -} 24.407 - 24.408 - 24.409 - 24.410 -int 24.411 -gnbd_close(struct gnbd_handle *gh) 24.412 -{ 24.413 - int err; 24.414 - struct gnbd_request **tmp; 24.415 - 24.416 - for (tmp = &gh->gh_outstanding_requests; *tmp; tmp = &(*tmp)->gr_next) 24.417 - free(*tmp); 24.418 - 24.419 - if (gh->gh_flags & GHF_INCOMING_REQUEST) 24.420 - free(gh->gh_incoming_request); 24.421 - 24.422 - err = close(gh->gh_fd); 24.423 - if (err) 24.424 - warnx("close"); 24.425 - free(gh); 24.426 - 24.427 - return err; 24.428 -} 24.429 - 24.430 -int 24.431 -gnbd_fd(struct gnbd_handle *gh) 24.432 -{ 24.433 - return gh->gh_fd; 24.434 -} 24.435 - 24.436 -unsigned long 24.437 -gnbd_finished_request(struct gnbd_handle *gh) 24.438 -{ 24.439 - return gh->gh_finished_request; 24.440 -} 24.441 - 24.442 -int 24.443 -gnbd_read(struct gnbd_handle *gh, uint64_t sector, ssize_t count, 24.444 - unsigned char *buf, unsigned long cookie) 24.445 -{ 24.446 - struct gnbd_server_request gsr; 24.447 - struct gnbd_request *gr; 24.448 - int err; 24.449 - 24.450 - gr = malloc(sizeof(struct gnbd_request)); 24.451 - if (gr == NULL) 24.452 - return ENOMEM; 24.453 - memset(gr, 0, sizeof(gr)); 24.454 - 24.455 - gr->gr_buf = buf; 24.456 - gr->gr_size = count << 9; 24.457 - gr->gr_done = 0; 24.458 - gr->gr_cookie = cookie; 24.459 - 24.460 - gsr.magic = htonl(GNBD_REQUEST_MAGIC); 24.461 - gsr.type = htonl(GNBD_CMD_READ); 24.462 - gsr.from = htonll(sector << 9); 24.463 - gsr.len = htonl(gr->gr_size); 24.464 - memset(gsr.handle, 0, sizeof(gsr.handle)); 24.465 - memcpy(gsr.handle, &gr, sizeof(gr)); 24.466 - 24.467 - err = write_buf(gh->gh_fd, &gsr, sizeof(gsr)); 24.468 - if (err) { 24.469 - warnx("write_buf"); 24.470 - goto out; 24.471 - } 24.472 - 24.473 - *gh->gh_outstanding_requests_last = gr; 24.474 - gh->gh_outstanding_requests_last = &gr->gr_next; 24.475 - 24.476 - return 0; 24.477 - 24.478 - out: 24.479 - free(gr); 24.480 - return err; 24.481 -} 24.482 - 24.483 -int 24.484 -gnbd_write(struct gnbd_handle *gh, uint64_t sector, ssize_t count, 24.485 - unsigned char *buf, unsigned long cookie) 24.486 -{ 24.487 - struct gnbd_server_request gsr; 24.488 - struct gnbd_request *gr; 24.489 - int err; 24.490 - 24.491 - gr = malloc(sizeof(struct gnbd_request)); 24.492 - if (gr == NULL) 24.493 - return ENOMEM; 24.494 - memset(gr, 0, sizeof(gr)); 24.495 - 24.496 - gr->gr_buf = buf; 24.497 - gr->gr_size = count << 9; 24.498 - gr->gr_done = 0; 24.499 - gr->gr_cookie = cookie; 24.500 - 24.501 - gsr.magic = htonl(GNBD_REQUEST_MAGIC); 24.502 - gsr.type = htonl(GNBD_CMD_WRITE); 24.503 - gsr.from = htonll(sector << 9); 24.504 - gsr.len = htonl(gr->gr_size); 24.505 - memset(gsr.handle, 0, sizeof(gsr.handle)); 24.506 - memcpy(gsr.handle, &gr, sizeof(gr)); 24.507 - 24.508 - err = write_buf(gh->gh_fd, &gsr, sizeof(gsr)); 24.509 - if (err) { 24.510 - warnx("write_buf"); 24.511 - goto out; 24.512 - } 24.513 - 24.514 - /* XXX handle non-blocking socket */ 24.515 - err = write_buf(gh->gh_fd, buf, gr->gr_size); 24.516 - if (err) { 24.517 - warnx("write_buf"); 24.518 - goto out; 24.519 - } 24.520 - gr->gr_done += gr->gr_size; 24.521 - 24.522 - *gh->gh_outstanding_requests_last = gr; 24.523 - gh->gh_outstanding_requests_last = &gr->gr_next; 24.524 - 24.525 - DPRINTF(("write done\n")); 24.526 - 24.527 - return 0; 24.528 - 24.529 - out: 24.530 - free(gr); 24.531 - return err; 24.532 -} 24.533 - 24.534 -int 24.535 -gnbd_reply(struct gnbd_handle *gh) 24.536 -{ 24.537 - struct gnbd_server_reply gsr; 24.538 - struct gnbd_request *gr; 24.539 - int err; 24.540 - 24.541 - DPRINTF(("gnbd_reply flags %x\n", gh->gh_flags)); 24.542 - if ((gh->gh_flags & GHF_EXPECT_KILL_GSERV_REPLY)) 24.543 - return kill_gserv_reply(gh); 24.544 - if ((gh->gh_flags & GHF_EXPECT_LOGIN_REPLY)) 24.545 - return login_reply(gh); 24.546 - if ((gh->gh_flags & GHF_INCOMING_REQUEST)) 24.547 - return incoming_request(gh); 24.548 - 24.549 - DPRINTF(("read response\n")); 24.550 - err = read_buf(gh->gh_fd, &gsr, sizeof(gsr), NULL); 24.551 - if (err) { 24.552 - warnx("read gnbd_reply failed"); 24.553 - return err; 24.554 - } 24.555 - 24.556 - if (ntohl(gsr.error)) { 24.557 - warnx("gnbd server reply error: %s", strerror(gsr.error)); 24.558 - return gsr.error; 24.559 - } 24.560 - 24.561 - switch (ntohl(gsr.magic)) { 24.562 - case GNBD_KEEP_ALIVE_MAGIC: 24.563 - DPRINTF(("read keep alive magic\n")); 24.564 - return GNBD_CONTINUE; 24.565 - case GNBD_REPLY_MAGIC: 24.566 - DPRINTF(("read reply magic\n")); 24.567 - memcpy(&gr, gsr.handle, sizeof(gr)); 24.568 - err = find_request(gh, gr); 24.569 - if (err) { 24.570 - warnx("unknown request"); 24.571 - return err; 24.572 - } 24.573 - if (gr->gr_done != gr->gr_size) { 24.574 - gh->gh_incoming_request = gr; 24.575 - gh->gh_flags |= GHF_INCOMING_REQUEST; 24.576 - return GNBD_CONTINUE; 24.577 - } else { 24.578 - gh->gh_finished_request = gr->gr_cookie; 24.579 - free(gr); 24.580 - return GNBD_REQUEST_DONE; 24.581 - } 24.582 - default: 24.583 - break; 24.584 - } 24.585 - 24.586 - return GNBD_CONTINUE; 24.587 -} 24.588 - 24.589 -uint64_t 24.590 -gnbd_sectors(struct gnbd_handle *gh) 24.591 -{ 24.592 - 24.593 - return gh->gh_sectors; 24.594 -} 24.595 - 24.596 -struct gnbd_handle * 24.597 -gnbd_setup(char *server, unsigned int port, char *devname, char *nodename) 24.598 -{ 24.599 - struct gnbd_handle *gh; 24.600 - struct addrinfo *res, *ai; 24.601 - int err; 24.602 - 24.603 - gh = malloc(sizeof(struct gnbd_handle)); 24.604 - if (gh == NULL) 24.605 - return NULL; 24.606 - memset(gh, 0, sizeof(gh)); 24.607 - gh->gh_fd = -1; 24.608 - gh->gh_outstanding_requests_last = &gh->gh_outstanding_requests; 24.609 - 24.610 - strncpy(gh->gh_devname, devname, sizeof(gh->gh_devname)); 24.611 - strncpy(gh->gh_nodename, nodename, sizeof(gh->gh_nodename)); 24.612 - 24.613 - err = getaddrinfo(server, NULL, NULL, &res); 24.614 - if (err) { 24.615 - if (err != EAI_SYSTEM) 24.616 - warnx("getaddrinfo: %s", gai_strerror(err)); 24.617 - else 24.618 - warn("getaddrinfo: %s", gai_strerror(err)); 24.619 - goto out; 24.620 - } 24.621 - 24.622 - for (ai = res; ai; ai = ai->ai_next) { 24.623 - if (ai->ai_socktype != SOCK_STREAM) 24.624 - continue; 24.625 - if (ai->ai_family == AF_INET) 24.626 - break; 24.627 - } 24.628 - 24.629 - if (ai == NULL) 24.630 - goto out; 24.631 - 24.632 - gh->gh_sin.sin_family = ai->ai_family; 24.633 - gh->gh_sin.sin_port = htons(port); 24.634 - memcpy(&gh->gh_sin.sin_addr, 24.635 - &((struct sockaddr_in *)ai->ai_addr)->sin_addr, 24.636 - sizeof(gh->gh_sin.sin_addr)); 24.637 - 24.638 - err = kill_gserv(gh); 24.639 - if (err) { 24.640 - warnx("gnbd_kill_gserv"); 24.641 - goto out; 24.642 - } 24.643 - 24.644 - freeaddrinfo(res); 24.645 - return gh; 24.646 - out: 24.647 - free(gh); 24.648 - freeaddrinfo(res); 24.649 - return NULL; 24.650 -}
25.1 --- a/tools/blktap/libgnbd/libgnbd.h Fri May 20 14:20:31 2005 +0000 25.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 25.3 @@ -1,25 +0,0 @@ 25.4 -/* libgnbd.h 25.5 - * 25.6 - * gnbd client library 25.7 - * 25.8 - * Copyright (c) 2005, Christian Limpach 25.9 - */ 25.10 - 25.11 -#define GNBD_LOGIN_DONE 0x10001 25.12 -#define GNBD_REQUEST_DONE 0x10002 25.13 -#define GNBD_CONTINUE 0x10003 25.14 -#define GNBD_CONTINUE_WRITE 0x10004 25.15 - 25.16 -struct gnbd_handle; 25.17 -int gnbd_close(struct gnbd_handle *); 25.18 -int gnbd_fd(struct gnbd_handle *); 25.19 -unsigned long gnbd_finished_request(struct gnbd_handle *); 25.20 -int gnbd_kill_gserv(struct gnbd_handle *); 25.21 -int gnbd_login(struct gnbd_handle *); 25.22 -int gnbd_read(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *, 25.23 - unsigned long); 25.24 -int gnbd_write(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *, 25.25 - unsigned long); 25.26 -int gnbd_reply(struct gnbd_handle *); 25.27 -uint64_t gnbd_sectors(struct gnbd_handle *); 25.28 -struct gnbd_handle *gnbd_setup(char *, unsigned int, char *, char *);
26.1 --- a/tools/blktap/parallax-threaded.c Fri May 20 14:20:31 2005 +0000 26.2 +++ b/tools/blktap/parallax-threaded.c Fri May 20 14:50:49 2005 +0000 26.3 @@ -145,33 +145,33 @@ void blkif_destroy(blkif_be_destroy_t *d 26.4 destroy->status = BLKIF_BE_STATUS_OKAY; 26.5 } 26.6 26.7 -void vbd_grow(blkif_be_vbd_grow_t *grow) 26.8 +void vbd_create(blkif_be_vbd_create_t *create) 26.9 { 26.10 blkif_t *blkif; 26.11 vdi_t *vdi, **vdip; 26.12 - blkif_vdev_t vdevice = grow->vdevice; 26.13 + blkif_vdev_t vdevice = create->vdevice; 26.14 26.15 - DPRINTF("parallax (vbd_grow): grow=%p\n", grow); 26.16 + DPRINTF("parallax (vbd_create): create=%p\n", create); 26.17 26.18 - blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle); 26.19 + blkif = blkif_find_by_handle(create->domid, create->blkif_handle); 26.20 if ( blkif == NULL ) 26.21 { 26.22 - DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n", 26.23 - grow->domid, grow->blkif_handle); 26.24 - grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 26.25 + DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 26.26 + create->domid, create->blkif_handle); 26.27 + create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 26.28 return; 26.29 } 26.30 26.31 /* VDI identifier is in grow->extent.sector_start */ 26.32 - DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n", 26.33 - grow->extent.sector_start); 26.34 + DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 26.35 + (unsigned long)create->dev_handle); 26.36 26.37 - vdi = vdi_get(grow->extent.sector_start); 26.38 + vdi = vdi_get(create->dev_handle); 26.39 if (vdi == NULL) 26.40 { 26.41 - printf("parallax (vbd_grow): VDI %llx not found.\n", 26.42 - grow->extent.sector_start); 26.43 - grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 26.44 + printf("parallax (vbd_create): VDI %lx not found.\n", 26.45 + (unsigned long)create->dev_handle); 26.46 + create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 26.47 return; 26.48 } 26.49 26.50 @@ -183,7 +183,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow) 26.51 *vdip = vdi; 26.52 26.53 DPRINTF("vbd_grow: happy return!\n"); 26.54 - grow->status = BLKIF_BE_STATUS_OKAY; 26.55 + create->status = BLKIF_BE_STATUS_OKAY; 26.56 } 26.57 26.58 int parallax_control(control_msg_t *msg) 26.59 @@ -213,10 +213,10 @@ int parallax_control(control_msg_t *msg) 26.60 blkif_destroy((blkif_be_destroy_t *)msg->msg); 26.61 break; 26.62 26.63 - case CMSG_BLKIF_BE_VBD_GROW: 26.64 - if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) 26.65 + case CMSG_BLKIF_BE_VBD_CREATE: 26.66 + if ( msg->length != sizeof(blkif_be_vbd_create_t) ) 26.67 goto parse_error; 26.68 - vbd_grow((blkif_be_vbd_grow_t *)msg->msg); 26.69 + vbd_create((blkif_be_vbd_create_t *)msg->msg); 26.70 break; 26.71 } 26.72 return 0;
27.1 --- a/tools/blktap/parallax.c Fri May 20 14:20:31 2005 +0000 27.2 +++ b/tools/blktap/parallax.c Fri May 20 14:50:49 2005 +0000 27.3 @@ -10,11 +10,16 @@ 27.4 #include <stdio.h> 27.5 #include <stdlib.h> 27.6 #include <string.h> 27.7 +#include <pthread.h> 27.8 #include "blktaplib.h" 27.9 #include "blockstore.h" 27.10 #include "vdi.h" 27.11 +#include "block-async.h" 27.12 +#include "requests-async.h" 27.13 27.14 #define PARALLAX_DEV 61440 27.15 +#define SECTS_PER_NODE 8 27.16 + 27.17 27.18 #if 0 27.19 #define DPRINTF(_f, _a...) printf ( _f , ## _a ) 27.20 @@ -142,33 +147,33 @@ void blkif_destroy(blkif_be_destroy_t *d 27.21 destroy->status = BLKIF_BE_STATUS_OKAY; 27.22 } 27.23 27.24 -void vbd_grow(blkif_be_vbd_grow_t *grow) 27.25 +void vbd_create(blkif_be_vbd_create_t *create) 27.26 { 27.27 blkif_t *blkif; 27.28 vdi_t *vdi, **vdip; 27.29 - blkif_vdev_t vdevice = grow->vdevice; 27.30 + blkif_vdev_t vdevice = create->vdevice; 27.31 27.32 - DPRINTF("parallax (vbd_grow): grow=%p\n", grow); 27.33 + DPRINTF("parallax (vbd_create): create=%p\n", create); 27.34 27.35 - blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle); 27.36 + blkif = blkif_find_by_handle(create->domid, create->blkif_handle); 27.37 if ( blkif == NULL ) 27.38 { 27.39 - DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n", 27.40 - grow->domid, grow->blkif_handle); 27.41 - grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 27.42 + DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 27.43 + create->domid, create->blkif_handle); 27.44 + create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 27.45 return; 27.46 } 27.47 27.48 /* VDI identifier is in grow->extent.sector_start */ 27.49 - DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n", 27.50 - grow->extent.sector_start); 27.51 + DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 27.52 + (unsigned long)create->dev_handle); 27.53 27.54 - vdi = vdi_get(grow->extent.sector_start); 27.55 + vdi = vdi_get(create->dev_handle); 27.56 if (vdi == NULL) 27.57 { 27.58 - printf("parallax (vbd_grow): VDI %llx not found.\n", 27.59 - grow->extent.sector_start); 27.60 - grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 27.61 + printf("parallax (vbd_create): VDI %lx not found.\n", 27.62 + (unsigned long)create->dev_handle); 27.63 + create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 27.64 return; 27.65 } 27.66 27.67 @@ -180,7 +185,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow) 27.68 *vdip = vdi; 27.69 27.70 DPRINTF("vbd_grow: happy return!\n"); 27.71 - grow->status = BLKIF_BE_STATUS_OKAY; 27.72 + create->status = BLKIF_BE_STATUS_OKAY; 27.73 } 27.74 27.75 int parallax_control(control_msg_t *msg) 27.76 @@ -210,10 +215,10 @@ int parallax_control(control_msg_t *msg) 27.77 blkif_destroy((blkif_be_destroy_t *)msg->msg); 27.78 break; 27.79 27.80 - case CMSG_BLKIF_BE_VBD_GROW: 27.81 - if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) 27.82 + case CMSG_BLKIF_BE_VBD_CREATE: 27.83 + if ( msg->length != sizeof(blkif_be_vbd_create_t) ) 27.84 goto parse_error; 27.85 - vbd_grow((blkif_be_vbd_grow_t *)msg->msg); 27.86 + vbd_create((blkif_be_vbd_create_t *)msg->msg); 27.87 break; 27.88 } 27.89 return 0; 27.90 @@ -248,9 +253,9 @@ int parallax_probe(blkif_request_t *req, 27.91 img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); 27.92 img_info[nr_vdis].device = vdi->vdevice; 27.93 img_info[nr_vdis].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; 27.94 - /* The -2 here accounts for the LSB in the radix tree */ 27.95 + /* The -1 here accounts for the LSB in the radix tree */ 27.96 img_info[nr_vdis].capacity = 27.97 - ((1LL << (VDI_HEIGHT-2)) >> SECTOR_SHIFT); 27.98 + ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE); 27.99 nr_vdis++; 27.100 vdi = vdi->next; 27.101 } 27.102 @@ -274,78 +279,122 @@ err: 27.103 return BLKTAP_RESPOND; 27.104 } 27.105 27.106 +typedef struct { 27.107 + blkif_request_t *req; 27.108 + int count; 27.109 + int error; 27.110 + pthread_mutex_t mutex; 27.111 +} pending_t; 27.112 + 27.113 +#define MAX_REQUESTS 64 27.114 +pending_t pending_list[MAX_REQUESTS]; 27.115 + 27.116 +struct cb_param { 27.117 + pending_t *pent; 27.118 + int segment; 27.119 + u64 sector; 27.120 + u64 vblock; /* for debug printing -- can be removed. */ 27.121 +}; 27.122 + 27.123 +static void read_cb(struct io_ret r, void *in_param) 27.124 +{ 27.125 + struct cb_param *param = (struct cb_param *)in_param; 27.126 + pending_t *p = param->pent; 27.127 + int segment = param->segment; 27.128 + blkif_request_t *req = p->req; 27.129 + unsigned long size, offset, start; 27.130 + char *dpage, *spage; 27.131 + 27.132 + spage = IO_BLOCK(r); 27.133 + if (spage == NULL) { p->error++; goto finish; } 27.134 + dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment); 27.135 + 27.136 + /* Calculate read size and offset within the read block. */ 27.137 + 27.138 + offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE; 27.139 + size = ( blkif_last_sect (req->frame_and_sects[segment]) - 27.140 + blkif_first_sect(req->frame_and_sects[segment]) + 1 27.141 + ) << SECTOR_SHIFT; 27.142 + start = blkif_first_sect(req->frame_and_sects[segment]) 27.143 + << SECTOR_SHIFT; 27.144 + 27.145 + DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), " 27.146 + "vblock %llx, " 27.147 + "size %lx\n", 27.148 + param->sector, blkif_first_sect(p->req->frame_and_sects[segment]), 27.149 + blkif_last_sect (p->req->frame_and_sects[segment]), 27.150 + param->vblock, size); 27.151 + 27.152 + memcpy(dpage + start, spage + offset, size); 27.153 + freeblock(spage); 27.154 + 27.155 + /* Done the read. Now update the pending record. */ 27.156 + finish: 27.157 + pthread_mutex_lock(&p->mutex); 27.158 + p->count--; 27.159 + 27.160 + if (p->count == 0) { 27.161 + blkif_response_t *rsp; 27.162 + 27.163 + rsp = (blkif_response_t *)req; 27.164 + rsp->id = req->id; 27.165 + rsp->operation = BLKIF_OP_READ; 27.166 + if (p->error == 0) { 27.167 + rsp->status = BLKIF_RSP_OKAY; 27.168 + } else { 27.169 + rsp->status = BLKIF_RSP_ERROR; 27.170 + } 27.171 + blktap_inject_response(rsp); 27.172 + } 27.173 + 27.174 + pthread_mutex_unlock(&p->mutex); 27.175 + 27.176 + free(param); /* TODO: replace with cached alloc/dealloc */ 27.177 +} 27.178 + 27.179 int parallax_read(blkif_request_t *req, blkif_t *blkif) 27.180 { 27.181 blkif_response_t *rsp; 27.182 - unsigned long size, offset, start; 27.183 - u64 sector; 27.184 u64 vblock, gblock; 27.185 vdi_t *vdi; 27.186 + u64 sector; 27.187 int i; 27.188 char *dpage, *spage; 27.189 + pending_t *pent; 27.190 27.191 vdi = blkif_get_vdi(blkif, req->device); 27.192 27.193 if ( vdi == NULL ) 27.194 goto err; 27.195 + 27.196 + pent = &pending_list[ID_TO_IDX(req->id)]; 27.197 + pent->count = req->nr_segments; 27.198 + pent->req = req; 27.199 + pthread_mutex_init(&pent->mutex, NULL); 27.200 27.201 for (i = 0; i < req->nr_segments; i++) { 27.202 - 27.203 - dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 27.204 - 27.205 - /* Round the requested segment to a block address. */ 27.206 - 27.207 - sector = req->sector_number + (8*i); 27.208 - vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT; 27.209 - 27.210 - /* Get that block from the store. */ 27.211 - 27.212 - gblock = vdi_lookup_block(vdi, vblock, NULL); 27.213 - 27.214 - /* Calculate read size and offset within the read block. */ 27.215 - 27.216 - offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE; 27.217 - size = ( blkif_last_sect (req->frame_and_sects[i]) - 27.218 - blkif_first_sect(req->frame_and_sects[i]) + 1 27.219 - ) << SECTOR_SHIFT; 27.220 - start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; 27.221 - 27.222 - /* If the block does not exist in the store, return zeros. */ 27.223 - /* Otherwise, copy that region to the guest page. */ 27.224 - 27.225 - DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), " 27.226 - "vblock %llx, gblock %llx, " 27.227 - "size %lx\n", 27.228 - sector, blkif_first_sect(req->frame_and_sects[i]), 27.229 - blkif_last_sect (req->frame_and_sects[i]), 27.230 - vblock, gblock, size); 27.231 - 27.232 - if ( gblock == 0 ) { 27.233 - 27.234 - memset(dpage + start, '\0', size); 27.235 - 27.236 - } else { 27.237 - 27.238 - spage = readblock(gblock); 27.239 - 27.240 - if (spage == NULL) { 27.241 - printf("Error reading gblock from store: %Ld\n", gblock); 27.242 - goto err; 27.243 - } 27.244 - 27.245 - memcpy(dpage + start, spage + offset, size); 27.246 - 27.247 - freeblock(spage); 27.248 - } 27.249 - 27.250 + pthread_t tid; 27.251 + int ret; 27.252 + struct cb_param *p; 27.253 + 27.254 + /* Round the requested segment to a block address. */ 27.255 + sector = req->sector_number + (8*i); 27.256 + vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT; 27.257 + 27.258 + /* TODO: Replace this call to malloc with a cached allocation */ 27.259 + p = (struct cb_param *)malloc(sizeof(struct cb_param)); 27.260 + p->pent = pent; 27.261 + p->sector = sector; 27.262 + p->segment = i; 27.263 + p->vblock = vblock; /* dbg */ 27.264 + 27.265 + /* Get that block from the store. */ 27.266 + async_read(vdi, vblock, read_cb, (void *)p); 27.267 + 27.268 } 27.269 + 27.270 + return BLKTAP_STOLEN; 27.271 27.272 - rsp = (blkif_response_t *)req; 27.273 - rsp->id = req->id; 27.274 - rsp->operation = BLKIF_OP_READ; 27.275 - rsp->status = BLKIF_RSP_OKAY; 27.276 - 27.277 - return BLKTAP_RESPOND; 27.278 err: 27.279 rsp = (blkif_response_t *)req; 27.280 rsp->id = req->id; 27.281 @@ -355,6 +404,37 @@ err: 27.282 return BLKTAP_RESPOND; 27.283 } 27.284 27.285 +static void write_cb(struct io_ret r, void *in_param) 27.286 +{ 27.287 + struct cb_param *param = (struct cb_param *)in_param; 27.288 + pending_t *p = param->pent; 27.289 + blkif_request_t *req = p->req; 27.290 + 27.291 + /* catch errors from the block code. */ 27.292 + if (IO_INT(r) < 0) p->error++; 27.293 + 27.294 + pthread_mutex_lock(&p->mutex); 27.295 + p->count--; 27.296 + 27.297 + if (p->count == 0) { 27.298 + blkif_response_t *rsp; 27.299 + 27.300 + rsp = (blkif_response_t *)req; 27.301 + rsp->id = req->id; 27.302 + rsp->operation = BLKIF_OP_WRITE; 27.303 + if (p->error == 0) { 27.304 + rsp->status = BLKIF_RSP_OKAY; 27.305 + } else { 27.306 + rsp->status = BLKIF_RSP_ERROR; 27.307 + } 27.308 + blktap_inject_response(rsp); 27.309 + } 27.310 + 27.311 + pthread_mutex_unlock(&p->mutex); 27.312 + 27.313 + free(param); /* TODO: replace with cached alloc/dealloc */ 27.314 +} 27.315 + 27.316 int parallax_write(blkif_request_t *req, blkif_t *blkif) 27.317 { 27.318 blkif_response_t *rsp; 27.319 @@ -364,13 +444,20 @@ int parallax_write(blkif_request_t *req, 27.320 char *spage; 27.321 unsigned long size, offset, start; 27.322 vdi_t *vdi; 27.323 + pending_t *pent; 27.324 27.325 vdi = blkif_get_vdi(blkif, req->device); 27.326 27.327 if ( vdi == NULL ) 27.328 goto err; 27.329 + 27.330 + pent = &pending_list[ID_TO_IDX(req->id)]; 27.331 + pent->count = req->nr_segments; 27.332 + pent->req = req; 27.333 + pthread_mutex_init(&pent->mutex, NULL); 27.334 27.335 for (i = 0; i < req->nr_segments; i++) { 27.336 + struct cb_param *p; 27.337 27.338 spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 27.339 27.340 @@ -379,10 +466,6 @@ int parallax_write(blkif_request_t *req, 27.341 sector = req->sector_number + (8*i); 27.342 vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT; 27.343 27.344 - /* Get that block from the store. */ 27.345 - 27.346 - gblock = vdi_lookup_block(vdi, vblock, &writable); 27.347 - 27.348 /* Calculate read size and offset within the read block. */ 27.349 27.350 offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE; 27.351 @@ -405,27 +488,20 @@ int parallax_write(blkif_request_t *req, 27.352 printf("]\n] STRANGE WRITE!\n]\n"); 27.353 goto err; 27.354 } 27.355 - 27.356 - if (( gblock == 0 ) || ( writable == 0 )) { 27.357 - 27.358 - gblock = allocblock(spage); 27.359 - vdi_update_block(vdi, vblock, gblock); 27.360 - 27.361 - } else { 27.362 - 27.363 - /* write-in-place, no need to change mappings. */ 27.364 - writeblock(gblock, spage); 27.365 - 27.366 - } 27.367 - 27.368 + 27.369 + /* TODO: Replace this call to malloc with a cached allocation */ 27.370 + p = (struct cb_param *)malloc(sizeof(struct cb_param)); 27.371 + p->pent = pent; 27.372 + p->sector = sector; 27.373 + p->segment = i; 27.374 + p->vblock = vblock; /* dbg */ 27.375 + 27.376 + /* Issue the write to the store. */ 27.377 + async_write(vdi, vblock, spage, write_cb, (void *)p); 27.378 } 27.379 27.380 - rsp = (blkif_response_t *)req; 27.381 - rsp->id = req->id; 27.382 - rsp->operation = BLKIF_OP_WRITE; 27.383 - rsp->status = BLKIF_RSP_OKAY; 27.384 + return BLKTAP_STOLEN; 27.385 27.386 - return BLKTAP_RESPOND; 27.387 err: 27.388 rsp = (blkif_response_t *)req; 27.389 rsp->id = req->id; 27.390 @@ -477,16 +553,19 @@ void __init_parallax(void) 27.391 } 27.392 27.393 27.394 + 27.395 int main(int argc, char *argv[]) 27.396 { 27.397 DPRINTF("parallax: starting.\n"); 27.398 __init_blockstore(); 27.399 DPRINTF("parallax: initialized blockstore...\n"); 27.400 + init_block_async(); 27.401 + DPRINTF("parallax: initialized async blocks...\n"); 27.402 __init_vdi(); 27.403 DPRINTF("parallax: initialized vdi registry etc...\n"); 27.404 __init_parallax(); 27.405 DPRINTF("parallax: initialized local stuff..\n"); 27.406 - 27.407 + 27.408 blktap_register_ctrl_hook("parallax_control", parallax_control); 27.409 blktap_register_request_hook("parallax_request", parallax_request); 27.410 DPRINTF("parallax: added ctrl + request hooks, starting listen...\n");
28.1 --- a/tools/blktap/radix.c Fri May 20 14:20:31 2005 +0000 28.2 +++ b/tools/blktap/radix.c Fri May 20 14:50:49 2005 +0000 28.3 @@ -25,18 +25,6 @@ 28.4 #define DEBUG 28.5 */ 28.6 28.7 -/* 28.8 -#define STAGED 28.9 -*/ 28.10 - 28.11 -#define ZERO 0LL 28.12 -#define ONE 1LL 28.13 -#define ONEMASK 0xffffffffffffffeLL 28.14 - 28.15 - 28.16 -typedef u64 *radix_tree_node; 28.17 - 28.18 - 28.19 /* Experimental radix cache. */ 28.20 28.21 static pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER; 28.22 @@ -276,7 +264,6 @@ radix_tree_node cloneblock(radix_tree_no 28.23 * 28.24 * @return: value on success, zero on error 28.25 */ 28.26 -#ifndef STAGED 28.27 28.28 u64 lookup(int height, u64 root, u64 key) { 28.29 radix_tree_node node; 28.30 @@ -318,92 +305,6 @@ u64 lookup(int height, u64 root, u64 key 28.31 return ZERO; 28.32 } 28.33 28.34 -#else /* STAGED */ 28.35 - 28.36 - 28.37 -/* non-recursive staged lookup, assume height is 35. */ 28.38 -u64 lookup(int height, u64 root, u64 key) { 28.39 - radix_tree_node node; 28.40 - u64 mask = ONE; 28.41 - 28.42 -printf("lookup!\n"); 28.43 - assert(key >> 35 == 0); 28.44 - 28.45 - /* the root block may be smaller to ensure all leaves are full */ 28.46 - height = 27; 28.47 - 28.48 - /* now carve off equal sized chunks at each step */ 28.49 - 28.50 - /* ROOT: (LEVEL 0) KEYLEN=35*/ 28.51 - if (getid(root) == ZERO) 28.52 - return ZERO; 28.53 - 28.54 - node = (radix_tree_node) readblock(getid(root)); 28.55 - if (node == NULL) 28.56 - return ZERO; 28.57 - 28.58 - root = node[(key >> height) & RADIX_TREE_MAP_MASK]; 28.59 - mask &= root; 28.60 - freeblock(node); 28.61 - 28.62 - if (height == 0) 28.63 - return ( root & ONEMASK ) | mask; 28.64 - 28.65 - height -= RADIX_TREE_MAP_SHIFT; /* == 18 */ 28.66 - 28.67 - /* LEVEL 1: KEYLEN=26*/ 28.68 - if (getid(root) == ZERO) 28.69 - return ZERO; 28.70 - 28.71 - node = (radix_tree_node) readblock(getid(root)); 28.72 - if (node == NULL) 28.73 - return ZERO; 28.74 - 28.75 - root = node[(key >> height) & RADIX_TREE_MAP_MASK]; 28.76 - mask &= root; 28.77 - freeblock(node); 28.78 - 28.79 - if (height == 0) 28.80 - return ( root & ONEMASK ) | mask; 28.81 - 28.82 - height -= RADIX_TREE_MAP_SHIFT; /* == 9 */ 28.83 - 28.84 - /* LEVEL 2: KEYLEN=17*/ 28.85 - if (getid(root) == ZERO) 28.86 - return ZERO; 28.87 - 28.88 - node = (radix_tree_node) readblock(getid(root)); 28.89 - if (node == NULL) 28.90 - return ZERO; 28.91 - 28.92 - root = node[(key >> height) & RADIX_TREE_MAP_MASK]; 28.93 - mask &= root; 28.94 - freeblock(node); 28.95 - 28.96 - if (height == 0) 28.97 - return ( root & ONEMASK ) | mask; 28.98 - 28.99 - height -= RADIX_TREE_MAP_SHIFT; /* == 0 */ 28.100 - 28.101 - /* LEVEL 3: KEYLEN=8*/ 28.102 - if (getid(root) == ZERO) 28.103 - return ZERO; 28.104 - 28.105 - node = (radix_tree_node) readblock(getid(root)); 28.106 - if (node == NULL) 28.107 - return ZERO; 28.108 - 28.109 - root = node[(key >> height) & RADIX_TREE_MAP_MASK]; 28.110 - mask &= root; 28.111 - freeblock(node); 28.112 - 28.113 - // if (height == 0) 28.114 - return ( root & ONEMASK ) | mask; 28.115 - 28.116 -} 28.117 - 28.118 -#endif 28.119 - 28.120 /* 28.121 * update: set a radix tree entry, doing copy-on-write as necessary 28.122 * @height: height in bits of the radix tree 28.123 @@ -414,9 +315,6 @@ printf("lookup!\n"); 28.124 * @returns: (possibly new) root id on success (with LSB=1), 0 on failure 28.125 */ 28.126 28.127 -#ifndef STAGED 28.128 - 28.129 - 28.130 u64 update(int height, u64 root, u64 key, u64 val) { 28.131 int offset; 28.132 u64 child; 28.133 @@ -487,320 +385,6 @@ u64 update(int height, u64 root, u64 key 28.134 return root; 28.135 } 28.136 28.137 - 28.138 -#else /* STAGED */ 28.139 - 28.140 -/* When update is called, state->next points to the thing to call after 28.141 - * update is finished. */ 28.142 - 28.143 -struct cb_state_st; 28.144 - 28.145 -typedef struct { 28.146 - /* public stuff */ 28.147 - u64 val; 28.148 - u64 key; 28.149 - u64 result; 28.150 - 28.151 - /* internal state */ 28.152 - u64 root[4]; 28.153 - radix_tree_node node[4]; 28.154 - void (*next)(struct cb_state_st *); 28.155 - int err; 28.156 -} radix_update_t; 28.157 - 28.158 -typedef struct cb_state_st{ 28.159 - void (*next)(struct cb_state_st *); /* Next continuation. */ 28.160 - union { 28.161 - radix_update_t update; 28.162 - } radix; 28.163 -} cb_state_t; 28.164 - 28.165 -void s_readblock(cb_state_t *state, u64 id, void **ret) 28.166 -{ 28.167 - *ret = readblock(id); 28.168 - state->next(state); 28.169 -} 28.170 - 28.171 -void s_allocblock(cb_state_t *state, void *block, u64 *ret) 28.172 -{ 28.173 - *ret = allocblock(block); 28.174 - state->next(state); 28.175 -} 28.176 - 28.177 -void s_writeblock(cb_state_t *state, u64 id, void *block, int *ret) 28.178 -{ 28.179 - *ret = writeblock(id, block); 28.180 - state->next(state); 28.181 -} 28.182 - 28.183 -void cb_done(cb_state_t *state) 28.184 -{ 28.185 - printf("*** done ***\n"); 28.186 -} 28.187 - 28.188 -/* forward prototypes. */ 28.189 -void up0(cb_state_t *state); 28.190 -void up1(cb_state_t *state); 28.191 -void up2(cb_state_t *state); 28.192 -void up3(cb_state_t *state); 28.193 -void up4(cb_state_t *state); 28.194 -void up5(cb_state_t *state); 28.195 -void up6(cb_state_t *state); 28.196 -void up7(cb_state_t *state); 28.197 -void up8(cb_state_t *state); 28.198 -void up9(cb_state_t *state); 28.199 -void up10(cb_state_t *state); 28.200 -void up11(cb_state_t *state); 28.201 -void up12(cb_state_t *state); 28.202 - 28.203 -u64 update(int height, u64 root, u64 key, u64 val) 28.204 -{ 28.205 - cb_state_t state; 28.206 - radix_update_t *u = &state.radix.update; 28.207 - 28.208 - u->val = val; 28.209 - u->key = key; 28.210 - u->root[0] = root; 28.211 - u->root[1] = u->root[2] = u->root[3] = ZERO; 28.212 - u->node[0] = u->node[1] = u->node[2] = u->node[3] = NULL; 28.213 - 28.214 - /* take a copy of the higher-scoped next continuation. */ 28.215 - u->next = state->next; 28.216 - 28.217 - /* update start state */ 28.218 - state->next = up0; 28.219 - 28.220 - for (;;) 28.221 - { 28.222 - state->next(state); 28.223 - if (state->next == NULL) 28.224 - break; 28.225 - } 28.226 - 28.227 - return u->result; 28.228 -} 28.229 - 28.230 -/* c0:*/ 28.231 -void up0(cb_state_t *state) { 28.232 - radix_update_t *u = &state->radix.update; 28.233 - 28.234 - state->next = up1; 28.235 - s_readblock(state, getid(u->root[0]), (void **)&(u->node[0])); 28.236 -} 28.237 - 28.238 -/* c1: */ 28.239 -void up1(cb_state_t *state) { 28.240 - radix_update_t *u = &state->radix.update; 28.241 - 28.242 - u->root[1] = u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK]; 28.243 - if (u->root[1] == ZERO) { 28.244 - u->node[1] = (radix_tree_node) newblock(); 28.245 - /* goto next continuation (c2)*/ up2(state);return; 28.246 - } else { 28.247 - state->next = up2; 28.248 - s_readblock(state, getid(u->root[1]), (void **)&(u->node[1])); 28.249 - } 28.250 -} 28.251 - 28.252 -/* c2: */ 28.253 -void up2(cb_state_t *state) { 28.254 - radix_update_t *u = &state->radix.update; 28.255 - 28.256 - if ((u->root[1] != ZERO) && (!iswritable(u->root[1]))) { 28.257 - /* need to clone this node */ 28.258 - radix_tree_node oldnode = u->node[1]; 28.259 - u->node[1] = cloneblock(u->node[1]); 28.260 - freeblock(oldnode); 28.261 - u->root[1] = ZERO; 28.262 - } 28.263 - u->root[2] = u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK]; 28.264 - if (u->root[2] == ZERO) { 28.265 - u->node[2] = (radix_tree_node) newblock(); 28.266 - /* goto next continuation (c3)*/ up3(state);return; 28.267 - } else { 28.268 - state->next = up3; 28.269 - s_readblock(state, getid(u->root[2]), (void **)&(u->node[2])); 28.270 - } 28.271 -} 28.272 - 28.273 -/* c3: */ 28.274 -void up3(cb_state_t *state) { 28.275 - radix_update_t *u = &state->radix.update; 28.276 - 28.277 - if ((u->root[2] != ZERO) && (!iswritable(u->root[2]))) { 28.278 - /* need to clone this node */ 28.279 - radix_tree_node oldnode = u->node[2]; 28.280 - u->node[2] = cloneblock(u->node[2]); 28.281 - freeblock(oldnode); 28.282 - u->root[2] = ZERO; 28.283 - } 28.284 - u->root[3] = u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK]; 28.285 - if (u->root[3] == ZERO) { 28.286 - u->node[3] = (radix_tree_node) newblock(); 28.287 - /* goto next continuation (c4)*/ up4(state);return; 28.288 - } else { 28.289 - state->next = up4; 28.290 - s_readblock(state, getid(u->root[3]), (void **)&(u->node[3])); 28.291 - } 28.292 -} 28.293 - 28.294 -/* c4: */ 28.295 -void up4(cb_state_t *state) { 28.296 - radix_update_t *u = &state->radix.update; 28.297 - 28.298 - if ((u->root[3] != ZERO) && (!iswritable(u->root[3]))) { 28.299 - /* need to clone this node */ 28.300 - radix_tree_node oldnode = u->node[3]; 28.301 - u->node[3] = cloneblock(u->node[3]); 28.302 - freeblock(oldnode); 28.303 - u->root[3] = ZERO; 28.304 - } 28.305 - 28.306 - if (u->node[3][u->key & RADIX_TREE_MAP_MASK] == u->val){ 28.307 - /* no change, so we already owned the child */ 28.308 - /* goto last continuation (c12) */ up12(state);return; 28.309 - } 28.310 - 28.311 - u->node[3][u->key & RADIX_TREE_MAP_MASK] = u->val; 28.312 - 28.313 - /* new/cloned blocks need to be saved */ 28.314 - if (u->root[3] == ZERO) { 28.315 - /* mark this as an owned block */ 28.316 - state->next = up5; 28.317 - s_allocblock(state, u->node[3], &u->root[3]); 28.318 - /* goto continuation (c5) */ return; 28.319 - } else { 28.320 - state->next = up6; 28.321 - s_writeblock(state, getid(u->root[3]), u->node[3], &u->err); 28.322 - /* goto continuation (c6) */ return; 28.323 - } 28.324 -} 28.325 - 28.326 -/* c5: */ 28.327 -void up5(cb_state_t *state) { 28.328 - radix_update_t *u = &state->radix.update; 28.329 - 28.330 - if (u->root[3]) 28.331 - u->root[3] = writable(u->root[3]); 28.332 - /* goto continuation (c6) */ up6(state);return; 28.333 -} 28.334 - 28.335 -/* c6: */ 28.336 -void up6(cb_state_t *state) { 28.337 - radix_update_t *u = &state->radix.update; 28.338 - 28.339 - if (u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] == u->root[3]){ 28.340 - /* no change, so we already owned the child */ 28.341 - /* goto last continuation (c12) */ up12(state);return; 28.342 - } 28.343 - 28.344 - u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] = u->root[3]; 28.345 - 28.346 - /* new/cloned blocks need to be saved */ 28.347 - if (u->root[2] == ZERO) { 28.348 - /* mark this as an owned block */ 28.349 - state->next = up7; 28.350 - s_allocblock(state, u->node[2], &u->root[2]); 28.351 - /* goto continuation (c7) */return; 28.352 - } else { 28.353 - state->next = up8; 28.354 - s_writeblock(state, getid(u->root[2]), u->node[2], &u->err); 28.355 - /* goto continuation (c8) */return; 28.356 - } 28.357 -} 28.358 - 28.359 -/* c7: */ 28.360 -void up7(cb_state_t *state) { 28.361 - radix_update_t *u = &state->radix.update; 28.362 - 28.363 - if (u->root[2]) 28.364 - u->root[2] = writable(u->root[2]); 28.365 - /* goto continuation (c8) */ up8(state);return; 28.366 -} 28.367 - 28.368 -/* c8: */ 28.369 -void up8(cb_state_t *state) { 28.370 - radix_update_t *u = &state->radix.update; 28.371 - 28.372 - if (u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] == u->root[2]){ 28.373 - /* no change, so we already owned the child */ 28.374 - /* goto last continuation (c12) */ up12(state);return; 28.375 - } 28.376 - 28.377 - u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] = u->root[2]; 28.378 - 28.379 - /* new/cloned blocks need to be saved */ 28.380 - if (u->root[1] == ZERO) { 28.381 - /* mark this as an owned block */ 28.382 - state->next = up9; 28.383 - s_allocblock(state, u->node[1], &u->root[1]); 28.384 - /* goto continuation (c9) */return; 28.385 - } else { 28.386 - state->next = up10; 28.387 - s_writeblock(state, getid(u->root[1]), u->node[1], &u->err); 28.388 - /* goto continuation (c10) */return; 28.389 - } 28.390 -} 28.391 - 28.392 -/* c9: */ 28.393 -void up9(cb_state_t *state) { 28.394 - radix_update_t *u = &state->radix.update; 28.395 - 28.396 - if (u->root[1]) 28.397 - u->root[1] = writable(u->root[1]); 28.398 - /* goto continuation (c10) */ up10(state);return; 28.399 -} 28.400 - 28.401 -/* c10: */ 28.402 -void up10(cb_state_t *state) { 28.403 - radix_update_t *u = &state->radix.update; 28.404 - 28.405 - if (u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] == u->root[1]){ 28.406 - /* no change, so we already owned the child */ 28.407 - /* goto last continuation (c12) */ up12(state);return; 28.408 - } 28.409 - 28.410 - u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] = u->root[1]; 28.411 - 28.412 - /* new/cloned blocks need to be saved */ 28.413 - if (u->root[0] == ZERO) { 28.414 - /* mark this as an owned block */ 28.415 - state->next = up11; 28.416 - s_allocblock(state, u->node[0], &u->root[0]); 28.417 - /* goto continuation (c11) */ return; 28.418 - } else { 28.419 - state->next = up10; 28.420 - s_writeblock(state, getid(u->root[0]), u->node[0], &u->err); 28.421 - /* goto continuation (c12) */ return; 28.422 - } 28.423 -} 28.424 - 28.425 -/* c11: */ 28.426 -void up11(cb_state_t *state) { 28.427 - radix_update_t *u = &state->radix.update; 28.428 - 28.429 - if (u->root[0]) 28.430 - u->root[0] = writable(u->root[0]); 28.431 - /* goto continuation (c12) */ up12(state);return; 28.432 -} 28.433 - 28.434 -/* c12: */ 28.435 -void up12(cb_state_t *state) { 28.436 - radix_update_t *u = &state->radix.update; 28.437 - 28.438 - int i; 28.439 - for (i=0;i<4;i++) 28.440 - if(u->node[i] != NULL) freeblock(u->node[i]); 28.441 - 28.442 - u->result = u->root[0]; 28.443 - state->next = u->next; 28.444 - 28.445 - state->next(state);return; 28.446 -} 28.447 - 28.448 -#endif 28.449 - 28.450 - 28.451 /** 28.452 * snapshot: create a snapshot 28.453 * @root: old root node 28.454 @@ -840,7 +424,6 @@ int collapse(int height, u64 proot, u64 28.455 int i, numlinks, ret, total = 0; 28.456 radix_tree_node pnode, cnode; 28.457 28.458 -//printf("proot: %Ld\n", getid(proot)); 28.459 if (height == 0) { 28.460 height = -1; /* terminate recursion */ 28.461 } else {
29.1 --- a/tools/blktap/radix.h Fri May 20 14:20:31 2005 +0000 29.2 +++ b/tools/blktap/radix.h Fri May 20 14:50:49 2005 +0000 29.3 @@ -16,6 +16,16 @@ 29.4 #define putid(x) ((x)<<1) 29.5 #define writable(x) (((x)<<1)|1LL) 29.6 #define iswritable(x) ((x)&1LL) 29.7 +#define ZERO 0LL 29.8 +#define ONE 1LL 29.9 +#define ONEMASK 0xffffffffffffffeLL 29.10 + 29.11 +#define RADIX_TREE_MAP_SHIFT 9 29.12 +#define RADIX_TREE_MAP_MASK 0x1ff 29.13 +#define RADIX_TREE_MAP_ENTRIES 512 29.14 + 29.15 +typedef u64 *radix_tree_node; 29.16 + 29.17 29.18 /* 29.19 * main api
30.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 30.2 +++ b/tools/blktap/requests-async.c Fri May 20 14:50:49 2005 +0000 30.3 @@ -0,0 +1,629 @@ 30.4 +/* read.c 30.5 + * 30.6 + * asynchronous read experiment for parallax. 30.7 + */ 30.8 + 30.9 +#include <stdio.h> 30.10 +#include <stdlib.h> 30.11 +#include <string.h> 30.12 +#include <assert.h> 30.13 +#include <pthread.h> 30.14 +#include "requests-async.h" 30.15 +#include "vdi.h" 30.16 +#include "radix.h" 30.17 + 30.18 +#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18) 30.19 +#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9) 30.20 +#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL)) 30.21 + 30.22 + 30.23 + 30.24 +//#define STANDALONE 30.25 + 30.26 +#if 0 30.27 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 30.28 +#else 30.29 +#define DPRINTF(_f, _a...) ((void)0) 30.30 +#endif 30.31 + 30.32 + 30.33 +struct io_req { 30.34 + enum { IO_OP_READ, IO_OP_WRITE } op; 30.35 + u64 root; 30.36 + u64 vaddr; 30.37 + int state; 30.38 + io_cb_t cb; 30.39 + void *param; 30.40 + struct radix_lock *lock; 30.41 + 30.42 + /* internal stuff: */ 30.43 + struct io_ret retval;/* holds the return while we unlock. */ 30.44 + char *block; /* the block to write */ 30.45 + radix_tree_node radix[3]; 30.46 + u64 radix_addr[3]; 30.47 +}; 30.48 + 30.49 +void clear_w_bits(radix_tree_node node) 30.50 +{ 30.51 + int i; 30.52 + for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++) 30.53 + node[i] = node[i] & ONEMASK; 30.54 + return; 30.55 +} 30.56 + 30.57 +enum states { 30.58 + /* both */ 30.59 + READ_L1, 30.60 + READ_L2, 30.61 + READ_L3, 30.62 + 30.63 + /* read */ 30.64 + READ_LOCKED, 30.65 + READ_DATA, 30.66 + READ_UNLOCKED, 30.67 + RETURN_ZERO, 30.68 + 30.69 + /* write */ 30.70 + WRITE_LOCKED, 30.71 + WRITE_DATA, 30.72 + WRITE_UNLOCKED, 30.73 + 30.74 + /* L3 Zero Path */ 30.75 + ALLOC_DATA_L3z, 30.76 + WRITE_L3_L3z, 30.77 + 30.78 + /* L3 Fault Path */ 30.79 + ALLOC_DATA_L3f, 30.80 + WRITE_L3_L3f, 30.81 + 30.82 + /* L2 Zero Path */ 30.83 + ALLOC_DATA_L2z, 30.84 + WRITE_L2_L2z, 30.85 + ALLOC_L3_L2z, 30.86 + WRITE_L2_L3z, 30.87 + 30.88 + /* L2 Fault Path */ 30.89 + READ_L3_L2f, 30.90 + ALLOC_DATA_L2f, 30.91 + WRITE_L2_L2f, 30.92 + ALLOC_L3_L2f, 30.93 + WRITE_L2_L3f, 30.94 + 30.95 + /* L1 Zero Path */ 30.96 + ALLOC_DATA_L1z, 30.97 + ALLOC_L3_L1z, 30.98 + ALLOC_L2_L1z, 30.99 + WRITE_L1_L1z, 30.100 + 30.101 + /* L1 Fault Path */ 30.102 + READ_L2_L1f, 30.103 + READ_L3_L1f, 30.104 + ALLOC_DATA_L1f, 30.105 + ALLOC_L3_L1f, 30.106 + ALLOC_L2_L1f, 30.107 + WRITE_L1_L1f, 30.108 + 30.109 +}; 30.110 + 30.111 +enum radix_offsets { 30.112 + L1 = 0, 30.113 + L2 = 1, 30.114 + L3 = 2 30.115 +}; 30.116 + 30.117 + 30.118 +static void read_cb(struct io_ret ret, void *param); 30.119 +static void write_cb(struct io_ret ret, void *param); 30.120 + 30.121 + 30.122 +int async_read(vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param) 30.123 +{ 30.124 + struct io_req *req; 30.125 + 30.126 + DPRINTF("async_read\n"); 30.127 + 30.128 + req = (struct io_req *)malloc(sizeof (struct io_req)); 30.129 + req->radix[0] = req->radix[1] = req->radix[2] = NULL; 30.130 + 30.131 + if (req == NULL) {perror("req was NULL in async_read"); return(-1); } 30.132 + 30.133 + req->op = IO_OP_READ; 30.134 + req->root = vdi->radix_root; 30.135 + req->lock = vdi->radix_lock; 30.136 + req->vaddr = vaddr; 30.137 + req->cb = cb; 30.138 + req->param = param; 30.139 + req->state = READ_LOCKED; 30.140 + 30.141 + block_rlock(req->lock, L1_IDX(vaddr), read_cb, req); 30.142 + 30.143 + return 0; 30.144 +} 30.145 + 30.146 + 30.147 +int async_write(vdi_t *vdi, u64 vaddr, char *block, 30.148 + io_cb_t cb, void *param) 30.149 +{ 30.150 + struct io_req *req; 30.151 + 30.152 + 30.153 + req = (struct io_req *)malloc(sizeof (struct io_req)); 30.154 + req->radix[0] = req->radix[1] = req->radix[2] = NULL; 30.155 + //DPRINTF("async_write\n"); 30.156 + 30.157 + if (req == NULL) {perror("req was NULL in async_write"); return(-1); } 30.158 + 30.159 + req->op = IO_OP_WRITE; 30.160 + req->root = vdi->radix_root; 30.161 + req->lock = vdi->radix_lock; 30.162 + req->vaddr = vaddr; 30.163 + req->block = block; 30.164 + req->cb = cb; 30.165 + req->param = param; 30.166 + req->radix_addr[L1] = getid(req->root); /* for consistency */ 30.167 + req->state = WRITE_LOCKED; 30.168 + 30.169 + block_wlock(req->lock, L1_IDX(vaddr), write_cb, req); 30.170 + 30.171 + 30.172 + return 0; 30.173 +} 30.174 + 30.175 +void read_cb(struct io_ret ret, void *param) 30.176 +{ 30.177 + struct io_req *req = (struct io_req *)param; 30.178 + radix_tree_node node; 30.179 + u64 idx; 30.180 + char *block; 30.181 + void *req_param; 30.182 + 30.183 + DPRINTF("read_cb\n"); 30.184 + /* get record */ 30.185 + switch(req->state) { 30.186 + 30.187 + case READ_LOCKED: 30.188 + 30.189 + DPRINTF("READ_LOCKED\n"); 30.190 + req->state = READ_L1; 30.191 + block_read(getid(req->root), read_cb, req); 30.192 + break; 30.193 + 30.194 + case READ_L1: /* block is the radix root */ 30.195 + 30.196 + DPRINTF("READ_L1\n"); 30.197 + block = IO_BLOCK(ret); 30.198 + if (block == NULL) goto fail; 30.199 + node = (radix_tree_node) block; 30.200 + idx = getid( node[L1_IDX(req->vaddr)] ); 30.201 + free(block); 30.202 + if ( idx == ZERO ) { 30.203 + req->state = RETURN_ZERO; 30.204 + block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req); 30.205 + } else { 30.206 + req->state = READ_L2; 30.207 + block_read(idx, read_cb, req); 30.208 + } 30.209 + break; 30.210 + 30.211 + case READ_L2: 30.212 + 30.213 + DPRINTF("READ_L2\n"); 30.214 + block = IO_BLOCK(ret); 30.215 + if (block == NULL) goto fail; 30.216 + node = (radix_tree_node) block; 30.217 + idx = getid( node[L2_IDX(req->vaddr)] ); 30.218 + free(block); 30.219 + if ( idx == ZERO ) { 30.220 + req->state = RETURN_ZERO; 30.221 + block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req); 30.222 + } else { 30.223 + req->state = READ_L3; 30.224 + block_read(idx, read_cb, req); 30.225 + } 30.226 + break; 30.227 + 30.228 + case READ_L3: 30.229 + 30.230 + DPRINTF("READ_L3\n"); 30.231 + block = IO_BLOCK(ret); 30.232 + if (block == NULL) goto fail; 30.233 + node = (radix_tree_node) block; 30.234 + idx = getid( node[L3_IDX(req->vaddr)] ); 30.235 + free(block); 30.236 + if ( idx == ZERO ) { 30.237 + req->state = RETURN_ZERO; 30.238 + block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req); 30.239 + } else { 30.240 + req->state = READ_DATA; 30.241 + block_read(idx, read_cb, req); 30.242 + } 30.243 + break; 30.244 + 30.245 + case READ_DATA: 30.246 + 30.247 + DPRINTF("READ_DATA\n"); 30.248 + if (IO_BLOCK(ret) == NULL) goto fail; 30.249 + req->retval = ret; 30.250 + req->state = READ_UNLOCKED; 30.251 + block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req); 30.252 + break; 30.253 + 30.254 + case READ_UNLOCKED: 30.255 + { 30.256 + struct io_ret r; 30.257 + io_cb_t cb; 30.258 + DPRINTF("READ_UNLOCKED\n"); 30.259 + req_param = req->param; 30.260 + r = req->retval; 30.261 + cb = req->cb; 30.262 + free(req); 30.263 + cb(r, req_param); 30.264 + break; 30.265 + } 30.266 + 30.267 + case RETURN_ZERO: 30.268 + { 30.269 + struct io_ret r; 30.270 + io_cb_t cb; 30.271 + DPRINTF("RETURN_ZERO\n"); 30.272 + req_param = req->param; 30.273 + cb = req->cb; 30.274 + free(req); 30.275 + r.type = IO_BLOCK_T; 30.276 + r.u.b = newblock(); 30.277 + cb(r, req_param); 30.278 + break; 30.279 + } 30.280 + 30.281 + default: 30.282 + DPRINTF("*** Write: Bad state! (%d) ***\n", req->state); 30.283 + goto fail; 30.284 + } 30.285 + 30.286 + return; 30.287 + 30.288 + fail: 30.289 + { 30.290 + struct io_ret r; 30.291 + io_cb_t cb; 30.292 + DPRINTF("asyn_read had a read error.\n"); 30.293 + req_param = req->param; 30.294 + r = ret; 30.295 + cb = req->cb; 30.296 + free(req); 30.297 + cb(r, req_param); 30.298 + } 30.299 + 30.300 + 30.301 +} 30.302 + 30.303 +void write_cb(struct io_ret r, void *param) 30.304 +{ 30.305 + struct io_req *req = (struct io_req *)param; 30.306 + radix_tree_node node; 30.307 + u64 a, addr; 30.308 + void *req_param; 30.309 + 30.310 + //DPRINTF("write_cb\n"); 30.311 + switch(req->state) { 30.312 + 30.313 + case WRITE_LOCKED: 30.314 + 30.315 + DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr)); 30.316 + req->state = READ_L1; 30.317 + block_read(getid(req->root), write_cb, req); 30.318 + break; 30.319 + 30.320 + case READ_L1: /* block is the radix root */ 30.321 + 30.322 + DPRINTF("READ_L1\n"); 30.323 + node = (radix_tree_node) IO_BLOCK(r); 30.324 + if (node == NULL) goto fail; 30.325 + a = node[L1_IDX(req->vaddr)]; 30.326 + addr = getid(a); 30.327 + 30.328 + req->radix_addr[L2] = addr; 30.329 + req->radix[L1] = node; 30.330 + 30.331 + if ( addr == ZERO ) { 30.332 + /* L1 empty subtree: */ 30.333 + req->state = ALLOC_DATA_L1z; 30.334 + block_alloc( req->block, write_cb, req ); 30.335 + } else if ( !iswritable(a) ) { 30.336 + /* L1 fault: */ 30.337 + req->state = READ_L2_L1f; 30.338 + block_read( addr, write_cb, req ); 30.339 + } else { 30.340 + req->state = READ_L2; 30.341 + block_read( addr, write_cb, req ); 30.342 + } 30.343 + break; 30.344 + 30.345 + case READ_L2: 30.346 + 30.347 + DPRINTF("READ_L2\n"); 30.348 + node = (radix_tree_node) IO_BLOCK(r); 30.349 + if (node == NULL) goto fail; 30.350 + a = node[L2_IDX(req->vaddr)]; 30.351 + addr = getid(a); 30.352 + 30.353 + req->radix_addr[L3] = addr; 30.354 + req->radix[L2] = node; 30.355 + 30.356 + if ( addr == ZERO ) { 30.357 + /* L2 empty subtree: */ 30.358 + req->state = ALLOC_DATA_L2z; 30.359 + block_alloc( req->block, write_cb, req ); 30.360 + } else if ( !iswritable(a) ) { 30.361 + /* L2 fault: */ 30.362 + req->state = READ_L3_L2f; 30.363 + block_read( addr, write_cb, req ); 30.364 + } else { 30.365 + req->state = READ_L3; 30.366 + block_read( addr, write_cb, req ); 30.367 + } 30.368 + break; 30.369 + 30.370 + case READ_L3: 30.371 + 30.372 + DPRINTF("READ_L3\n"); 30.373 + node = (radix_tree_node) IO_BLOCK(r); 30.374 + if (node == NULL) goto fail; 30.375 + a = node[L3_IDX(req->vaddr)]; 30.376 + addr = getid(a); 30.377 + 30.378 + req->radix[L3] = node; 30.379 + 30.380 + if ( addr == ZERO ) { 30.381 + /* L3 fault: */ 30.382 + req->state = ALLOC_DATA_L3z; 30.383 + block_alloc( req->block, write_cb, req ); 30.384 + } else if ( !iswritable(a) ) { 30.385 + /* L3 fault: */ 30.386 + req->state = ALLOC_DATA_L3f; 30.387 + block_alloc( req->block, write_cb, req ); 30.388 + } else { 30.389 + req->state = WRITE_DATA; 30.390 + block_write( addr, req->block, write_cb, req ); 30.391 + } 30.392 + break; 30.393 + 30.394 + /* L3 Zero Path: */ 30.395 + 30.396 + case ALLOC_DATA_L3z: 30.397 + 30.398 + DPRINTF("ALLOC_DATA_L3z\n"); 30.399 + addr = IO_ADDR(r); 30.400 + a = writable(addr); 30.401 + req->radix[L3][L3_IDX(req->vaddr)] = a; 30.402 + req->state = WRITE_L3_L3z; 30.403 + block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req); 30.404 + break; 30.405 + 30.406 + /* L3 Fault Path: */ 30.407 + 30.408 + case ALLOC_DATA_L3f: 30.409 + 30.410 + DPRINTF("ALLOC_DATA_L3f\n"); 30.411 + addr = IO_ADDR(r); 30.412 + a = writable(addr); 30.413 + req->radix[L3][L3_IDX(req->vaddr)] = a; 30.414 + req->state = WRITE_L3_L3f; 30.415 + block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req); 30.416 + break; 30.417 + 30.418 + /* L2 Zero Path: */ 30.419 + 30.420 + case ALLOC_DATA_L2z: 30.421 + 30.422 + DPRINTF("ALLOC_DATA_L2z\n"); 30.423 + addr = IO_ADDR(r); 30.424 + a = writable(addr); 30.425 + req->radix[L3] = newblock(); 30.426 + req->radix[L3][L3_IDX(req->vaddr)] = a; 30.427 + req->state = ALLOC_L3_L2z; 30.428 + block_alloc( (char*)req->radix[L3], write_cb, req ); 30.429 + break; 30.430 + 30.431 + case ALLOC_L3_L2z: 30.432 + 30.433 + DPRINTF("ALLOC_L3_L2z\n"); 30.434 + addr = IO_ADDR(r); 30.435 + a = writable(addr); 30.436 + req->radix[L2][L2_IDX(req->vaddr)] = a; 30.437 + req->state = WRITE_L2_L2z; 30.438 + block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req); 30.439 + break; 30.440 + 30.441 + /* L2 Fault Path: */ 30.442 + 30.443 + case READ_L3_L2f: 30.444 + 30.445 + DPRINTF("READ_L3_L2f\n"); 30.446 + node = (radix_tree_node) IO_BLOCK(r); 30.447 + clear_w_bits(node); 30.448 + if (node == NULL) goto fail; 30.449 + a = node[L2_IDX(req->vaddr)]; 30.450 + addr = getid(a); 30.451 + 30.452 + req->radix[L3] = node; 30.453 + req->state = ALLOC_DATA_L2f; 30.454 + block_alloc( req->block, write_cb, req ); 30.455 + break; 30.456 + 30.457 + case ALLOC_DATA_L2f: 30.458 + 30.459 + DPRINTF("ALLOC_DATA_L2f\n"); 30.460 + addr = IO_ADDR(r); 30.461 + a = writable(addr); 30.462 + req->radix[L3][L3_IDX(req->vaddr)] = a; 30.463 + req->state = ALLOC_L3_L2f; 30.464 + block_alloc( (char*)req->radix[L3], write_cb, req ); 30.465 + break; 30.466 + 30.467 + case ALLOC_L3_L2f: 30.468 + 30.469 + DPRINTF("ALLOC_L3_L2f\n"); 30.470 + addr = IO_ADDR(r); 30.471 + a = writable(addr); 30.472 + req->radix[L2][L2_IDX(req->vaddr)] = a; 30.473 + req->state = WRITE_L2_L2f; 30.474 + block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req); 30.475 + break; 30.476 + 30.477 + /* L1 Zero Path: */ 30.478 + 30.479 + case ALLOC_DATA_L1z: 30.480 + 30.481 + DPRINTF("ALLOC_DATA_L1z\n"); 30.482 + addr = IO_ADDR(r); 30.483 + a = writable(addr); 30.484 + req->radix[L3] = newblock(); 30.485 + req->radix[L3][L3_IDX(req->vaddr)] = a; 30.486 + req->state = ALLOC_L3_L1z; 30.487 + block_alloc( (char*)req->radix[L3], write_cb, req ); 30.488 + break; 30.489 + 30.490 + case ALLOC_L3_L1z: 30.491 + 30.492 + DPRINTF("ALLOC_L3_L1z\n"); 30.493 + addr = IO_ADDR(r); 30.494 + a = writable(addr); 30.495 + req->radix[L2] = newblock(); 30.496 + req->radix[L2][L2_IDX(req->vaddr)] = a; 30.497 + req->state = ALLOC_L2_L1z; 30.498 + block_alloc( (char*)req->radix[L2], write_cb, req ); 30.499 + break; 30.500 + 30.501 + case ALLOC_L2_L1z: 30.502 + 30.503 + DPRINTF("ALLOC_L2_L1z\n"); 30.504 + addr = IO_ADDR(r); 30.505 + a = writable(addr); 30.506 + req->radix[L1][L1_IDX(req->vaddr)] = a; 30.507 + req->state = WRITE_L1_L1z; 30.508 + block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req); 30.509 + break; 30.510 + 30.511 + /* L1 Fault Path: */ 30.512 + 30.513 + case READ_L2_L1f: 30.514 + 30.515 + DPRINTF("READ_L2_L1f\n"); 30.516 + node = (radix_tree_node) IO_BLOCK(r); 30.517 + clear_w_bits(node); 30.518 + if (node == NULL) goto fail; 30.519 + a = node[L2_IDX(req->vaddr)]; 30.520 + addr = getid(a); 30.521 + 30.522 + req->radix_addr[L3] = addr; 30.523 + req->radix[L2] = node; 30.524 + 30.525 + if (addr == ZERO) { 30.526 + /* nothing below L2, create an empty L3 and alloc data. */ 30.527 + /* (So skip READ_L3_L1f.) */ 30.528 + req->radix[L3] = newblock(); 30.529 + req->state = ALLOC_DATA_L1f; 30.530 + block_alloc( req->block, write_cb, req ); 30.531 + } else { 30.532 + req->state = READ_L3_L1f; 30.533 + block_read( addr, write_cb, req ); 30.534 + } 30.535 + break; 30.536 + 30.537 + case READ_L3_L1f: 30.538 + 30.539 + DPRINTF("READ_L3_L1f\n"); 30.540 + node = (radix_tree_node) IO_BLOCK(r); 30.541 + clear_w_bits(node); 30.542 + if (node == NULL) goto fail; 30.543 + a = node[L2_IDX(req->vaddr)]; 30.544 + addr = getid(a); 30.545 + 30.546 + req->radix[L3] = node; 30.547 + req->state = ALLOC_DATA_L1f; 30.548 + block_alloc( req->block, write_cb, req ); 30.549 + break; 30.550 + 30.551 + case ALLOC_DATA_L1f: 30.552 + 30.553 + DPRINTF("ALLOC_DATA_L1f\n"); 30.554 + addr = IO_ADDR(r); 30.555 + a = writable(addr); 30.556 + req->radix[L3][L3_IDX(req->vaddr)] = a; 30.557 + req->state = ALLOC_L3_L1f; 30.558 + block_alloc( (char*)req->radix[L3], write_cb, req ); 30.559 + break; 30.560 + 30.561 + case ALLOC_L3_L1f: 30.562 + 30.563 + DPRINTF("ALLOC_L3_L1f\n"); 30.564 + addr = IO_ADDR(r); 30.565 + a = writable(addr); 30.566 + req->radix[L2][L2_IDX(req->vaddr)] = a; 30.567 + req->state = ALLOC_L2_L1f; 30.568 + block_alloc( (char*)req->radix[L2], write_cb, req ); 30.569 + break; 30.570 + 30.571 + case ALLOC_L2_L1f: 30.572 + 30.573 + DPRINTF("ALLOC_L2_L1f\n"); 30.574 + addr = IO_ADDR(r); 30.575 + a = writable(addr); 30.576 + req->radix[L1][L1_IDX(req->vaddr)] = a; 30.577 + req->state = WRITE_L1_L1f; 30.578 + block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req); 30.579 + break; 30.580 + 30.581 + case WRITE_DATA: 30.582 + case WRITE_L3_L3z: 30.583 + case WRITE_L3_L3f: 30.584 + case WRITE_L2_L2z: 30.585 + case WRITE_L2_L2f: 30.586 + case WRITE_L1_L1z: 30.587 + case WRITE_L1_L1f: 30.588 + { 30.589 + int i; 30.590 + DPRINTF("DONE\n"); 30.591 + /* free any saved node vals. */ 30.592 + for (i=0; i<3; i++) 30.593 + if (req->radix[i] != 0) free(req->radix[i]); 30.594 + req->retval = r; 30.595 + req->state = WRITE_UNLOCKED; 30.596 + block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req); 30.597 + break; 30.598 + } 30.599 + case WRITE_UNLOCKED: 30.600 + { 30.601 + struct io_ret r; 30.602 + io_cb_t cb; 30.603 + DPRINTF("WRITE_UNLOCKED!\n"); 30.604 + req_param = req->param; 30.605 + r = req->retval; 30.606 + cb = req->cb; 30.607 + free(req); 30.608 + cb(r, req_param); 30.609 + break; 30.610 + } 30.611 + 30.612 + default: 30.613 + DPRINTF("*** Write: Bad state! (%d) ***\n", req->state); 30.614 + goto fail; 30.615 + } 30.616 + 30.617 + return; 30.618 + 30.619 + fail: 30.620 + { 30.621 + struct io_ret r; 30.622 + io_cb_t cb; 30.623 + DPRINTF("asyn_write had a read error mid-way.\n"); 30.624 + req_param = req->param; 30.625 + cb = req->cb; 30.626 + r.type = IO_INT_T; 30.627 + r.u.i = -1; 30.628 + free(req); 30.629 + cb(r, req_param); 30.630 + } 30.631 +} 30.632 +
31.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 31.2 +++ b/tools/blktap/requests-async.h Fri May 20 14:50:49 2005 +0000 31.3 @@ -0,0 +1,19 @@ 31.4 +#ifndef _REQUESTSASYNC_H_ 31.5 +#define _REQUESTSASYNC_H_ 31.6 + 31.7 +#include "block-async.h" 31.8 +#include "blockstore.h" /* for newblock etc. */ 31.9 + 31.10 +/* 31.11 +#define BLOCK_SIZE 4096 31.12 +#define ZERO 0ULL 31.13 +#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU) 31.14 +#define iswritable(x) (((x) & 1LLU) != 0) 31.15 +#define writable(x) (((x) << 1) | 1LLU) 31.16 +#define readonly(x) ((u64)((x) << 1)) 31.17 +*/ 31.18 + 31.19 +int async_read (vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param); 31.20 +int async_write(vdi_t *vdi, u64 vaddr, char *block, io_cb_t cb, void *param); 31.21 + 31.22 +#endif //_REQUESTSASYNC_H_
32.1 --- a/tools/blktap/vdi.c Fri May 20 14:20:31 2005 +0000 32.2 +++ b/tools/blktap/vdi.c Fri May 20 14:50:49 2005 +0000 32.3 @@ -11,14 +11,16 @@ 32.4 #include <fcntl.h> 32.5 #include <string.h> 32.6 #include <sys/time.h> 32.7 +#include <pthread.h> 32.8 #include "blockstore.h" 32.9 +#include "block-async.h" 32.10 #include "radix.h" 32.11 #include "vdi.h" 32.12 32.13 #define VDI_REG_BLOCK 2LL 32.14 #define VDI_RADIX_ROOT writable(3) 32.15 32.16 -#if 1 32.17 +#if 0 32.18 #define DPRINTF(_f, _a...) printf ( _f , ## _a ) 32.19 #else 32.20 #define DPRINTF(_f, _a...) ((void)0) 32.21 @@ -66,6 +68,7 @@ vdi_registry_t *get_vdi_registry(void) 32.22 return vdi_reg; 32.23 } 32.24 32.25 + 32.26 vdi_t *vdi_create(snap_id_t *parent_snap, char *name) 32.27 { 32.28 int ret; 32.29 @@ -106,12 +109,22 @@ vdi_t *vdi_create(snap_id_t *parent_snap 32.30 vdi->id = vdi_reg->nr_vdis++; 32.31 strncpy(vdi->name, name, VDI_NAME_SZ); 32.32 vdi->name[VDI_NAME_SZ] = '\0'; 32.33 + vdi->radix_lock = NULL; /* for tidiness */ 32.34 writeblock(vdi->block, (void *)vdi); 32.35 32.36 update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block); 32.37 writeblock(VDI_REG_BLOCK, (void *)vdi_reg); 32.38 freeblock(vdi_reg); 32.39 32.40 + vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock)); 32.41 + if (vdi->radix_lock == NULL) 32.42 + { 32.43 + perror("couldn't malloc radix_lock for new vdi!"); 32.44 + freeblock(vdi); 32.45 + return NULL; 32.46 + } 32.47 + radix_lock_init(vdi->radix_lock); 32.48 + 32.49 return vdi; 32.50 } 32.51 32.52 @@ -126,6 +139,16 @@ vdi_t *vdi_get(u64 vdi_id) 32.53 return NULL; 32.54 32.55 vdi = (vdi_t *)readblock(vdi_blk); 32.56 + 32.57 + vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock)); 32.58 + if (vdi->radix_lock == NULL) 32.59 + { 32.60 + perror("couldn't malloc radix_lock for new vdi!"); 32.61 + freeblock(vdi); 32.62 + return NULL; 32.63 + } 32.64 + radix_lock_init(vdi->radix_lock); 32.65 + 32.66 return vdi; 32.67 } 32.68
33.1 --- a/tools/blktap/vdi.h Fri May 20 14:20:31 2005 +0000 33.2 +++ b/tools/blktap/vdi.h Fri May 20 14:50:49 2005 +0000 33.3 @@ -1,3 +1,5 @@ 33.4 +#ifndef _VDI_H_ 33.5 +#define _VDI_H_ 33.6 /************************************************************************** 33.7 * 33.8 * vdi.h 33.9 @@ -12,11 +14,12 @@ 33.10 #include "blktaplib.h" 33.11 #include "snaplog.h" 33.12 33.13 -#define VDI_HEIGHT 35 33.14 -#define VDI_REG_HEIGHT 35 /* why not? */ 33.15 +#define VDI_HEIGHT 27 /* Note that these are now hard-coded */ 33.16 +#define VDI_REG_HEIGHT 27 /* in the async lookup code */ 33.17 33.18 #define VDI_NAME_SZ 256 33.19 33.20 + 33.21 typedef struct vdi { 33.22 u64 id; /* unique vdi id -- used by the registry */ 33.23 u64 block; /* block where this vdi lives (also unique)*/ 33.24 @@ -24,6 +27,7 @@ typedef struct vdi { 33.25 snap_id_t snap; /* next snapshot slot for this VDI */ 33.26 struct vdi *next; /* used to hash-chain in blkif. */ 33.27 blkif_vdev_t vdevice; /* currently mounted as... */ 33.28 + struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs */ 33.29 char name[VDI_NAME_SZ];/* human readable vdi name */ 33.30 } vdi_t; 33.31 33.32 @@ -46,3 +50,5 @@ void vdi_snapshot(vdi_t *vdi); 33.33 33.34 33.35 #endif /* __VDI_H__ */ 33.36 + 33.37 +#endif //_VDI_H_