direct-io.hg

changeset 5041:15e6c09e1f43

bitkeeper revision 1.1473.1.1 (428df901D5uzzXaFBp8z6tkbP0gV0w)

- Finer-grained asynchronous dispatch in parallax daemon.
- Cleanups and cull of older code.
- Fixes to handle changes in block protocol.

Signed-off-by: andrew.warfield@cl.cam.ac.uk
author akw27@arcadians.cl.cam.ac.uk
date Fri May 20 14:49:37 2005 +0000 (2005-05-20)
parents 805a0bc42249
children 45a82b5f9fbe
files .rootkeys tools/blktap/Makefile tools/blktap/blkaio.c tools/blktap/blkaiolib.c tools/blktap/blkaiolib.h tools/blktap/blkcow.c tools/blktap/blkcowgnbd.c tools/blktap/blkcowimg.c tools/blktap/blkcowlib.c tools/blktap/blkcowlib.h tools/blktap/blkdump.c tools/blktap/blkgnbd.c tools/blktap/blkgnbdlib.c tools/blktap/blkgnbdlib.h tools/blktap/blkimg.c tools/blktap/blkimglib.c tools/blktap/blkimglib.h tools/blktap/block-async.c tools/blktap/block-async.h tools/blktap/blockstore-tls.c tools/blktap/blockstore.c tools/blktap/libgnbd/Makefile tools/blktap/libgnbd/gnbdtest.c tools/blktap/libgnbd/libgnbd.c tools/blktap/libgnbd/libgnbd.h tools/blktap/parallax-threaded.c tools/blktap/parallax.c tools/blktap/radix.c tools/blktap/radix.h tools/blktap/requests-async.c tools/blktap/requests-async.h tools/blktap/vdi.c tools/blktap/vdi.h
line diff
     1.1 --- a/.rootkeys	Thu May 19 21:14:26 2005 +0000
     1.2 +++ b/.rootkeys	Fri May 20 14:49:37 2005 +0000
     1.3 @@ -477,38 +477,23 @@ 40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Rul
     1.4  4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile
     1.5  4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README
     1.6  42277b02mYXxgijE7MFeUe9d8eldMw tools/blktap/README-PARALLAX
     1.7 -4209033eX_Xw94wHaOCtnU9nOAtSJA tools/blktap/blkaio.c
     1.8 -4209033egwf6LDxM2hbaqi9rRdZy4A tools/blktap/blkaiolib.c
     1.9 -4209033f9yELLK85Ipo2oKjr3ickgQ tools/blktap/blkaiolib.h
    1.10 -4209033fL9LcSI6LXrIp5O4axbUBLg tools/blktap/blkcow.c
    1.11 -4209033fUDlFGZreIyZHdP7h7yfvuQ tools/blktap/blkcowgnbd.c
    1.12 -4209033fCgZzLeMOwNBFmsp99x58ZQ tools/blktap/blkcowimg.c
    1.13 -4209033frfXH6oOi9AvRz08PPAndNA tools/blktap/blkcowlib.c
    1.14 -4209033fhFd_y2go9HgCF395A35xJg tools/blktap/blkcowlib.h
    1.15  4209033fHgtGpb_K16_xC9CpkjNZLw tools/blktap/blkdump.c
    1.16 -4209033fm61CZG1RyKDW75V-eTZ9fg tools/blktap/blkgnbd.c
    1.17 -4209033fVfa-R6MFgGcmsQHTDna4PA tools/blktap/blkgnbdlib.c
    1.18 -4209033fIgDQbaHwHStHhPEDTtbqsA tools/blktap/blkgnbdlib.h
    1.19 -4209033figp5JRsKsXY8rw4keRumkg tools/blktap/blkimg.c
    1.20 -42090340V-8HKGlr00SyJGsE5jXC3A tools/blktap/blkimglib.c
    1.21 -42090340c7pQbh0Km8zLcEqPd_3zIg tools/blktap/blkimglib.h
    1.22  42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h
    1.23  42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c
    1.24  42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
    1.25 -423f270cAbkh2f-DHtT0hmCtFFXVXg tools/blktap/blockstore-tls.c
    1.26 +428df8fdkg84W8yveE50EbkbTUZgjQ tools/blktap/block-async.c
    1.27 +428df8feTrgGFZEBMA_dYijy9DNs1g tools/blktap/block-async.h
    1.28  42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c
    1.29  42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h
    1.30  42371b8aL1JsxAXOd4bBhmZKDyjiJg tools/blktap/blockstored.c
    1.31  42371b8aD_x3L9MKsXciMNqkuk58eQ tools/blktap/bstest.c
    1.32 -42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile
    1.33 -42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c
    1.34 -42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
    1.35 -42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h
    1.36  423f270cbEKiTMapKnCyqkuwGvgOMA tools/blktap/parallax-threaded.c
    1.37  423f270cFdXryIcD7HTPUl_Dbk4DAQ tools/blktap/parallax-threaded.h
    1.38  42277b03930x2TJT3PZlw6o0GERXpw tools/blktap/parallax.c
    1.39  42277b03XQYq8bujXSz7JAZ8N7j_pA tools/blktap/radix.c
    1.40  42277b03vZ4-jno_mgKmAcCW3ycRAg tools/blktap/radix.h
    1.41 +428df8fe5RYONloDWVMkM-CfHfB1vA tools/blktap/requests-async.c
    1.42 +428df8feWeKJ-9HJb5_rFqdm_xqErg tools/blktap/requests-async.h
    1.43  42277b03U_wLHL-alMA0bfxGlqldXg tools/blktap/snaplog.c
    1.44  42277b04Ryya-z662BEx8HnxNN0dGQ tools/blktap/snaplog.h
    1.45  42277b04LxFjptgZ75Z98DUAso4Prg tools/blktap/vdi.c
     2.1 --- a/tools/blktap/Makefile	Thu May 19 21:14:26 2005 +0000
     2.2 +++ b/tools/blktap/Makefile	Fri May 20 14:49:37 2005 +0000
     2.3 @@ -22,12 +22,12 @@ PLX_SRCS :=
     2.4  PLX_SRCS += vdi.c 
     2.5  PLX_SRCS += radix.c 
     2.6  PLX_SRCS += snaplog.c
     2.7 +PLX_SRCS += blockstore.c 
     2.8 +PLX_SRCS += block-async.c
     2.9  PLXT_SRCS := $(PLX_SRCS)
    2.10 -#PLXT_SRCS += blockstore-tls.c
    2.11 -PLXT_SRCS += blockstore.c
    2.12  PLXT_SRCS += parallax-threaded.c
    2.13 -PLX_SRCS += blockstore.c 
    2.14  VDI_SRCS := $(PLX_SRCS)
    2.15 +PLX_SRCS += requests-async.c
    2.16  PLX_SRCS += parallax.c
    2.17  
    2.18  VDI_TOOLS :=
    2.19 @@ -55,10 +55,11 @@ CFLAGS   += -Wp,-MD,.$(@F).d
    2.20  DEPS     = .*.d
    2.21  
    2.22  OBJS     = $(patsubst %.c,%.o,$(SRCS))
    2.23 +IBINS    = blkdump parallax $(VDI_TOOLS)
    2.24  
    2.25  LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
    2.26  
    2.27 -all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(VDI_TOOLS) parallax parallax-threaded blockstored
    2.28 +all: mk-symlinks blkdump $(VDI_TOOLS) parallax parallax-threaded blockstored
    2.29  	$(MAKE) $(LIB)
    2.30  
    2.31  LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
    2.32 @@ -77,10 +78,10 @@ install: all
    2.33  	$(INSTALL_DIR) -p $(DESTDIR)/usr/include
    2.34  	$(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
    2.35  	$(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
    2.36 -	$(INSTALL_PROG) blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(DESTDIR)/$(BLKTAP_INSTALL_DIR)
    2.37 +	$(INSTALL_PROG) $(IBINS) $(DESTDIR)/$(BLKTAP_INSTALL_DIR)
    2.38  
    2.39  clean:
    2.40 -	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio $(VDI_TOOLS) parallax
    2.41 +	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump $(VDI_TOOLS) parallax parallax-threaded
    2.42  
    2.43  rpm: all
    2.44  	rm -rf staging
    2.45 @@ -101,32 +102,11 @@ libblktap.so.$(MAJOR).$(MINOR): $(OBJS)
    2.46  blkdump: $(LIB)
    2.47  	$(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkdump.c
    2.48  
    2.49 -blkcowimg: $(LIB) blkcowimg.c blkcowlib.c blkimglib.c 
    2.50 -	$(CC) $(CFLAGS) -o blkcowimg -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcowimg.c blkimglib.c blkcowlib.c
    2.51 -
    2.52 -blkcow: $(LIB) blkcow.c blkcowlib.c
    2.53 -	$(CC) $(CFLAGS) -o blkcow -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcow.c blkcowlib.c
    2.54 -
    2.55 -blkimg: $(LIB) blkimg.c blkimglib.c
    2.56 -	$(CC) $(CFLAGS) -o blkimg  -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkimg.c blkimglib.c
    2.57 -
    2.58 -blkgnbd: $(LIB) blkgnbd.c blkgnbdlib.c
    2.59 -	$(CC) $(CFLAGS) -o blkgnbd -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkgnbd.c blkgnbdlib.c libgnbd/libgnbd.a
    2.60 -
    2.61 -blkcowgnbd: $(LIB) blkgnbd.c blkcowlib.c blkgnbdlib.c
    2.62 -	$(CC) $(CFLAGS) -o blkcowgnbd -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkcowgnbd.c blkgnbdlib.c blkcowlib.c libgnbd/libgnbd.a
    2.63 -
    2.64 -blkaio: $(LIB) blkaio.c blkaiolib.c
    2.65 -	$(CC) $(CFLAGS) -o blkaio -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkaio.c blkaiolib.c -laio -lpthread
    2.66 -
    2.67  parallax: $(LIB) $(PLX_SRCS)
    2.68 -	$(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap -lpthread $(PLX_SRCS) libgnbd/libgnbd.a
    2.69 +	$(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap -lpthread $(PLX_SRCS) 
    2.70  
    2.71  parallax-threaded: $(LIB) $(PLXT_SRCS)
    2.72 -	$(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS) libgnbd/libgnbd.a
    2.73 -
    2.74 -vdi_test: $(LIB) $(VDI_SRCS)
    2.75 -	$(CC) $(CFLAGS) -g3 -o vdi_test -DVDI_STANDALONE -lpthread $(VDI_SRCS)
    2.76 +	$(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS)
    2.77  
    2.78  vdi_list: $(LIB) vdi_list.c $(VDI_SRCS)
    2.79  	$(CC) $(CFLAGS) -g3 -o vdi_list vdi_list.c -lpthread $(VDI_SRCS)
    2.80 @@ -163,16 +143,3 @@ TAGS:
    2.81  
    2.82  -include $(DEPS)
    2.83  
    2.84 -#Random testing targets.  To be removed eventually.
    2.85 -
    2.86 -rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS)
    2.87 -	$(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
    2.88 -
    2.89 -bb-tls: $(LIB) blockstore-benchmark.c
    2.90 -	$(CC) $(CFLAGS) -o bb-tls blockstore-benchmark.c blockstore-tls.c -lpthread
    2.91 -
    2.92 -bb-trans: $(LIB) blockstore-benchmark.c
    2.93 -	$(CC) $(CFLAGS) -o bb-trans blockstore-benchmark.c blockstore.c -lpthread
    2.94 -
    2.95 -radix-test: $(LIB) radix.c blockstore.c
    2.96 -	$(CC) $(CFLAGS) -g3 -D RADIX_STANDALONE -o radix-test radix.c blockstore-threaded-trans.c
     3.1 --- a/tools/blktap/blkaio.c	Thu May 19 21:14:26 2005 +0000
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,19 +0,0 @@
     3.4 -/* blkaio.c
     3.5 - *
     3.6 - * libaio-backed disk.
     3.7 - */
     3.8 -
     3.9 -#include "blktaplib.h"
    3.10 -#include "blkaiolib.h"
    3.11 -
    3.12 -
    3.13 -int main(int argc, char *argv[])
    3.14 -{
    3.15 -    aio_init();
    3.16 -    
    3.17 -    blktap_register_ctrl_hook("aio_control", aio_control);
    3.18 -    blktap_register_request_hook("aio_request", aio_request);
    3.19 -    blktap_listen();
    3.20 -    
    3.21 -    return 0;
    3.22 -}
     4.1 --- a/tools/blktap/blkaiolib.c	Thu May 19 21:14:26 2005 +0000
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,489 +0,0 @@
     4.4 -/* blkaiolib.c
     4.5 - *
     4.6 - * file/device image-backed block device -- using linux libaio.
     4.7 - * 
     4.8 - * (c) 2004 Andrew Warfield.
     4.9 - *
    4.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
    4.11 - * This will show up as device type (maj:240,min:0) = 61440.
    4.12 - *
    4.13 - * The fsid is placed in the sec_start field of the disk extent.
    4.14 - *
    4.15 - * NOTE: This doesn't work.  Grrr.
    4.16 - */
    4.17 -
    4.18 -#define _GNU_SOURCE
    4.19 -#define __USE_LARGEFILE64
    4.20 -
    4.21 -#include <stdio.h>
    4.22 -#include <stdlib.h>
    4.23 -#include <fcntl.h>
    4.24 -#include <string.h>
    4.25 -#include <db.h>       
    4.26 -#include <sys/stat.h>
    4.27 -#include <sys/types.h>
    4.28 -#include <sys/poll.h>
    4.29 -#include <unistd.h>
    4.30 -#include <errno.h>
    4.31 -#include <libaio.h>
    4.32 -#include <pthread.h>
    4.33 -#include <time.h>
    4.34 -#include "blktaplib.h"
    4.35 -
    4.36 -//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
    4.37 -#define TMP_IMAGE_FILE_NAME "fc3.image"
    4.38 -
    4.39 -#define MAX_DOMS              1024
    4.40 -#define MAX_IMGNAME_LEN        255
    4.41 -#define AMORFS_DEV           61440
    4.42 -#define MAX_REQUESTS            64 /* must be synced with the blkif drivers. */
    4.43 -#define MAX_SEGMENTS_PER_REQ    11
    4.44 -#define SECTOR_SHIFT             9
    4.45 -#define MAX_AIO_REQS   (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
    4.46 -                                                                                
    4.47 -#if 1
    4.48 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
    4.49 -#else
    4.50 -#define DPRINTF(_f, _a...) ((void)0)
    4.51 -#endif
    4.52 -           
    4.53 -#if 1                                                                        
    4.54 -#define ASSERT(_p) \
    4.55 -    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
    4.56 -    __LINE__, __FILE__); *(int*)0=0; }
    4.57 -#else
    4.58 -#define ASSERT(_p) ((void)0)
    4.59 -#endif                                                                     
    4.60 -
    4.61 -char dbg_page[4096];
    4.62 -
    4.63 -typedef struct {
    4.64 -    /* These need to turn into an array/rbtree for multi-disk support. */
    4.65 -    int  fd;
    4.66 -    u64  fsid;
    4.67 -    char imgname[MAX_IMGNAME_LEN];
    4.68 -    blkif_vdev_t   vdevice;
    4.69 -} image_t;
    4.70 -
    4.71 -/* Note on pending_reqs: I assume all reqs are queued before they start to 
    4.72 - * get filled.  so count of 0 is an unused record.
    4.73 - */
    4.74 -typedef struct {
    4.75 -    blkif_request_t  req;
    4.76 -    int              count;
    4.77 -} pending_req_t;
    4.78 -
    4.79 -static pending_req_t    pending_list[MAX_REQUESTS];
    4.80 -image_t                *images[MAX_DOMS];
    4.81 -
    4.82 -static io_context_t  ctx;
    4.83 -static struct iocb  *iocb_free[MAX_AIO_REQS];
    4.84 -static int           iocb_free_count;
    4.85 -
    4.86 -/* ---[ Notification mecahnism ]--------------------------------------- */
    4.87 -
    4.88 -enum { 
    4.89 -    READ   = 0,
    4.90 -    WRITE  = 1
    4.91 -};
    4.92 -
    4.93 -static int aio_notify[2];
    4.94 -static volatile int aio_listening = 0;
    4.95 -
    4.96 -static struct io_event aio_events[MAX_AIO_REQS];
    4.97 -static int             aio_event_count = 0;
    4.98 -
    4.99 -/* this is commented out in libaio.h for some reason. */
   4.100 -extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
   4.101 -
   4.102 -static void *notifier_thread(void *arg)
   4.103 -{
   4.104 -    int ret; 
   4.105 -    int msg = 0x00feeb00;
   4.106 -    
   4.107 -    printf("Notifier thread started.\n");
   4.108 -    for (;;) {
   4.109 -        //if ((aio_listening) && ((ret = io_queue_wait(ctx, 0)) == 0)) {
   4.110 -        if ((aio_listening) && 
   4.111 -           ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0)) {
   4.112 -            aio_event_count = ret;
   4.113 -            printf("[Notifying! (%d)]\n", aio_event_count);
   4.114 -            aio_listening = 0;
   4.115 -            write(aio_notify[WRITE], &msg, sizeof(msg));
   4.116 -            fsync(aio_notify[WRITE]);
   4.117 -        } else {
   4.118 -            if (aio_listening)
   4.119 -                printf("[io_queue_wait error! %d]\n", errno);
   4.120 -            usleep(1000); /* Not ready to read. */
   4.121 -        }
   4.122 -    }
   4.123 -}
   4.124 -
   4.125 -/* -------------------------------------------------------------------- */
   4.126 -
   4.127 -int aio_control(control_msg_t *msg)
   4.128 -{
   4.129 -    domid_t  domid;
   4.130 -    DB      *db;
   4.131 -    int      ret;
   4.132 -    
   4.133 -    if (msg->type != CMSG_BLKIF_BE) 
   4.134 -    {
   4.135 -        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
   4.136 -        return 0;
   4.137 -    }
   4.138 -    
   4.139 -    switch(msg->subtype)
   4.140 -    {
   4.141 -    case CMSG_BLKIF_BE_CREATE:
   4.142 -        if ( msg->length != sizeof(blkif_be_create_t) )
   4.143 -            goto parse_error;
   4.144 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
   4.145 -                ((blkif_be_create_t *)msg->msg)->domid,
   4.146 -                ((blkif_be_create_t *)msg->msg)->blkif_handle);
   4.147 -        domid = ((blkif_be_create_t *)msg->msg)->domid;
   4.148 -        if (images[domid] != NULL) {
   4.149 -            printf("attempt to connect from an existing dom!\n");
   4.150 -            return 0;
   4.151 -        }
   4.152 -        
   4.153 -        images[domid] = (image_t *)malloc(sizeof(image_t));
   4.154 -        if (images[domid] == NULL) {
   4.155 -            printf("error allocating image record.\n");
   4.156 -            return 0;
   4.157 -        }
   4.158 -        
   4.159 -        images[domid]->fd  = -1;
   4.160 -        images[domid]->fsid = 0;
   4.161 -        
   4.162 -        printf("Image connected.\n");
   4.163 -        break;   
   4.164 -        
   4.165 -    case CMSG_BLKIF_BE_DESTROY:
   4.166 -        if ( msg->length != sizeof(blkif_be_destroy_t) )
   4.167 -            goto parse_error;
   4.168 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
   4.169 -                ((blkif_be_destroy_t *)msg->msg)->domid,
   4.170 -                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
   4.171 -        
   4.172 -        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
   4.173 -        if (images[domid] != NULL) {
   4.174 -            if (images[domid]->fd != -1)
   4.175 -                close( images[domid]->fd );
   4.176 -            free( images[domid] );
   4.177 -            images[domid] = NULL;
   4.178 -        }
   4.179 -        break;  
   4.180 -    case CMSG_BLKIF_BE_VBD_GROW:
   4.181 -    {
   4.182 -        blkif_be_vbd_grow_t *grow;
   4.183 -        
   4.184 -        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
   4.185 -            goto parse_error;
   4.186 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
   4.187 -                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
   4.188 -                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
   4.189 -                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
   4.190 -        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
   4.191 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
   4.192 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
   4.193 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
   4.194 -        grow = (blkif_be_vbd_grow_t *)msg->msg;
   4.195 -        domid = grow->domid;
   4.196 -        if (images[domid] == NULL) {
   4.197 -            printf("VBD_GROW on unconnected domain!\n");
   4.198 -            return 0;
   4.199 -        }
   4.200 -        
   4.201 -        if (grow->extent.device != AMORFS_DEV) {
   4.202 -            printf("VBD_GROW on non-amorfs device!\n");
   4.203 -            return 0;
   4.204 -        }
   4.205 -        
   4.206 -        /* TODO: config support for arbitrary image files/modes. */
   4.207 -        sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
   4.208 -        
   4.209 -        images[domid]->fsid   = grow->extent.sector_start;
   4.210 -        images[domid]->vdevice = grow->vdevice; 
   4.211 -        images[domid]->fd = open(TMP_IMAGE_FILE_NAME, 
   4.212 -                O_RDWR | O_DIRECT | O_LARGEFILE);
   4.213 -        if (images[domid]->fd < 0) {
   4.214 -            printf("Couldn't open image file! %d\n", errno);
   4.215 -            return 0;
   4.216 -        }
   4.217 -        
   4.218 -        printf("Image file opened. (%s)\n", images[domid]->imgname);
   4.219 -        break;
   4.220 -    }    
   4.221 -    }
   4.222 -    return 0;
   4.223 -parse_error:
   4.224 -    printf("Bad control message!\n");
   4.225 -    return 0;
   4.226 -    
   4.227 -create_failed:
   4.228 -    /* TODO: close the db ref. */
   4.229 -    return 0;
   4.230 -}    
   4.231 - 
   4.232 -int aio_request(blkif_request_t *req)
   4.233 -{
   4.234 -    int fd;
   4.235 -    u64 sector;
   4.236 -    char *spage, *dpage;
   4.237 -    int ret, i, idx;
   4.238 -    blkif_response_t *rsp;
   4.239 -    domid_t dom = ID_TO_DOM(req->id);
   4.240 -    
   4.241 -    if ((images[dom] == NULL) || (images[dom]->fd == -1)) {
   4.242 -        printf("Data request for unknown domain!!! %d\n", dom);
   4.243 -        rsp = (blkif_response_t *)req;
   4.244 -        rsp->id = req->id;
   4.245 -        rsp->operation = req->operation;
   4.246 -        rsp->status = BLKIF_RSP_ERROR;
   4.247 -        return BLKTAP_RESPOND;
   4.248 -    }
   4.249 -    
   4.250 -    fd = images[dom]->fd;
   4.251 -    
   4.252 -    switch (req->operation) 
   4.253 -    {
   4.254 -    case BLKIF_OP_PROBE:
   4.255 -    {
   4.256 -        struct stat stat;
   4.257 -        vdisk_t *img_info;
   4.258 -        
   4.259 -        
   4.260 -        /* We expect one buffer only. */
   4.261 -        if ( req->nr_segments != 1 )
   4.262 -            goto err;
   4.263 -                                                                                
   4.264 -        /* Make sure the buffer is page-sized. */
   4.265 -        if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
   4.266 -             (blkif_last_sect (req->frame_and_sects[0]) != 7) )
   4.267 -            goto err;
   4.268 -
   4.269 -        /* loop for multiple images would start here. */
   4.270 -        
   4.271 -        ret = fstat(fd, &stat);
   4.272 -        if (ret != 0) {
   4.273 -            printf("Couldn't stat image in PROBE!\n");
   4.274 -            goto err;
   4.275 -        }
   4.276 -        
   4.277 -        img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
   4.278 -        img_info[0].device   = images[dom]->vdevice;
   4.279 -        img_info[0].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   4.280 -        img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
   4.281 -        
   4.282 -        if (img_info[0].capacity == 0)
   4.283 -            img_info[0].capacity = ((u64)1 << 63); // xend does this too.
   4.284 -        
   4.285 -        DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
   4.286 -                img_info[0].capacity);
   4.287 -        
   4.288 -        rsp = (blkif_response_t *)req;
   4.289 -        rsp->id = req->id;
   4.290 -        rsp->operation = BLKIF_OP_PROBE;
   4.291 -        rsp->status = 1; /* number of disks */
   4.292 -        
   4.293 -        return  BLKTAP_RESPOND;
   4.294 -    }    
   4.295 -    case BLKIF_OP_WRITE:
   4.296 -    {
   4.297 -        unsigned long size;
   4.298 -        struct iocb *io;
   4.299 -        struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; 
   4.300 -        
   4.301 -        idx = ID_TO_IDX(req->id);
   4.302 -        ASSERT(pending_list[idx].count == 0);
   4.303 -        memcpy(&pending_list[idx].req, req, sizeof(*req));
   4.304 -        pending_list[idx].count = req->nr_segments;
   4.305 -        
   4.306 -        for (i = 0; i < req->nr_segments; i++) {
   4.307 -            
   4.308 -            sector = req->sector_number + (8*i);
   4.309 -            
   4.310 -            size = blkif_last_sect (req->frame_and_sects[i]) -
   4.311 -                   blkif_first_sect(req->frame_and_sects[i]) + 1;
   4.312 -            
   4.313 -            DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
   4.314 -                    req->sector_number, sector, 
   4.315 -                    blkif_first_sect(req->frame_and_sects[i]),
   4.316 -                    blkif_last_sect (req->frame_and_sects[i]),
   4.317 -                    (long)(sector << SECTOR_SHIFT));
   4.318 -                        
   4.319 -            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   4.320 -            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
   4.321 -            
   4.322 -            /*convert size and sector to byte offsets */
   4.323 -            size   <<= SECTOR_SHIFT;
   4.324 -            sector <<= SECTOR_SHIFT;
   4.325 -            
   4.326 -            io = iocb_free[--iocb_free_count];
   4.327 -            io_prep_pwrite(io, fd, spage, size, sector);
   4.328 -            io->data = (void *)idx;
   4.329 -            ioq[i] = io;
   4.330 -        }
   4.331 -        
   4.332 -        ret = io_submit(ctx, req->nr_segments, ioq);
   4.333 -        if (ret < 0)
   4.334 -            printf("BADNESS: io_submit error! (%d)\n", errno);
   4.335 -        
   4.336 -        pending_list[idx].count = req->nr_segments;
   4.337 -        
   4.338 -        return BLKTAP_STOLEN;
   4.339 -        
   4.340 -    }
   4.341 -    case BLKIF_OP_READ:
   4.342 -    {
   4.343 -        unsigned long size;
   4.344 -        struct iocb *io;
   4.345 -        struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; 
   4.346 -        
   4.347 -        idx = ID_TO_IDX(req->id);
   4.348 -        ASSERT(pending_list[idx].count == 0);
   4.349 -        memcpy(&pending_list[idx].req, req, sizeof(*req));
   4.350 -        pending_list[idx].count = req->nr_segments;
   4.351 -        
   4.352 -        for (i = 0; i < req->nr_segments; i++) {
   4.353 -            
   4.354 -            sector  = req->sector_number + (8*i);
   4.355 -            
   4.356 -            size = blkif_last_sect (req->frame_and_sects[i]) -
   4.357 -                   blkif_first_sect(req->frame_and_sects[i]) + 1;
   4.358 -            
   4.359 -            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   4.360 -            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
   4.361 -            
   4.362 -            
   4.363 -            DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
   4.364 -                    "pos: %15lu dpage: %p\n", 
   4.365 -                    req->sector_number, sector, 
   4.366 -                    blkif_first_sect(req->frame_and_sects[i]),
   4.367 -                    blkif_last_sect (req->frame_and_sects[i]),
   4.368 -                    (long)(sector << SECTOR_SHIFT), dpage);
   4.369 -            
   4.370 -            /*convert size and sector to byte offsets */
   4.371 -            size   <<= SECTOR_SHIFT;
   4.372 -            sector <<= SECTOR_SHIFT;
   4.373 -            
   4.374 -            io = iocb_free[--iocb_free_count];
   4.375 -            
   4.376 -            io_prep_pread(io, fd, dpage, size, sector);
   4.377 -            io->data = (void *)idx;
   4.378 -            
   4.379 -            ioq[i] = io;
   4.380 -        }
   4.381 -        
   4.382 -        ret = io_submit(ctx, req->nr_segments, ioq);
   4.383 -        if (ret < 0)
   4.384 -            printf("BADNESS: io_submit error! (%d)\n", errno);
   4.385 -        
   4.386 -        
   4.387 -        return BLKTAP_STOLEN;
   4.388 -        
   4.389 -    }
   4.390 -    }
   4.391 -    
   4.392 -    printf("Unknown block operation!\n");
   4.393 -err:
   4.394 -    rsp = (blkif_response_t *)req;
   4.395 -    rsp->id = req->id;
   4.396 -    rsp->operation = req->operation;
   4.397 -    rsp->status = BLKIF_RSP_ERROR;
   4.398 -    return BLKTAP_RESPOND;  
   4.399 -}
   4.400 -
   4.401 -
   4.402 -int aio_pollhook(int fd)
   4.403 -{
   4.404 -    struct io_event *ep;
   4.405 -    int n, ret, idx;
   4.406 -    blkif_request_t *req;
   4.407 -    blkif_response_t *rsp;
   4.408 -    
   4.409 -    DPRINTF("aio_hook(): \n");
   4.410 -    
   4.411 -    for (ep = aio_events; aio_event_count-- > 0; ep++) {
   4.412 -        struct iocb *io = ep->obj;
   4.413 -        idx = (int) ep->data;
   4.414 -        
   4.415 -        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
   4.416 -            printf("gnbd returned a bad cookie (%u)!\n", idx);
   4.417 -            break;
   4.418 -        }
   4.419 -        
   4.420 -        if ((int)ep->res < 0) printf("aio request error! (%d,%d)\n", 
   4.421 -            (int)ep->res, (int)ep->res2);
   4.422 -        
   4.423 -        pending_list[idx].count--;
   4.424 -        iocb_free[iocb_free_count++] = io;
   4.425 -        
   4.426 -        if (pending_list[idx].count == 0) {
   4.427 -            blkif_request_t tmp = pending_list[idx].req;
   4.428 -            rsp = (blkif_response_t *)&pending_list[idx].req;
   4.429 -            rsp->id = tmp.id;
   4.430 -            rsp->operation = tmp.operation;
   4.431 -            rsp->status = BLKIF_RSP_OKAY;
   4.432 -            blktap_inject_response(rsp);
   4.433 -        }
   4.434 -    }
   4.435 -    
   4.436 -    printf("pollhook done!\n");
   4.437 -    
   4.438 -    read(aio_notify[READ], &idx, sizeof(idx));
   4.439 -    aio_listening = 1;
   4.440 -    
   4.441 -    return 0;
   4.442 -}
   4.443 -
   4.444 -/* the image library terminates the request stream. _resp is a noop. */
   4.445 -int aio_response(blkif_response_t *rsp)
   4.446 -{   
   4.447 -    return BLKTAP_PASS;
   4.448 -}
   4.449 -
   4.450 -void aio_init(void)
   4.451 -{
   4.452 -    int i, rc;
   4.453 -    pthread_t p;
   4.454 -    
   4.455 -    for (i = 0; i < MAX_DOMS; i++)
   4.456 -        images[i] = NULL;
   4.457 -    
   4.458 -    for (i = 0; i < MAX_REQUESTS; i++)
   4.459 -        pending_list[i].count = 0; 
   4.460 -    
   4.461 -    memset(&ctx, 0, sizeof(ctx));
   4.462 -    rc = io_queue_init(MAX_AIO_REQS, &ctx);
   4.463 -    if (rc != 0) {
   4.464 -        printf("queue_init failed! (%d)\n", rc);
   4.465 -        exit(0);
   4.466 -    }
   4.467 -    
   4.468 -    for (i=0; i<MAX_AIO_REQS; i++) {
   4.469 -        if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
   4.470 -            printf("error allocating iocb array\n");
   4.471 -            exit(0);
   4.472 -        }
   4.473 -        iocb_free_count = i;
   4.474 -    }
   4.475 -    
   4.476 -    rc = pipe(aio_notify);
   4.477 -    if (rc != 0) {
   4.478 -        printf("pipe failed! (%d)\n", errno);
   4.479 -        exit(0);
   4.480 -    }
   4.481 -    
   4.482 -    rc = pthread_create(&p, NULL, notifier_thread, NULL);
   4.483 -    if (rc != 0) {
   4.484 -        printf("pthread_create failed! (%d)\n", errno);
   4.485 -        exit(0);
   4.486 -    }
   4.487 -    
   4.488 -    aio_listening = 1;
   4.489 -    
   4.490 -    blktap_attach_poll(aio_notify[READ], POLLIN, aio_pollhook);
   4.491 -}
   4.492 -
     5.1 --- a/tools/blktap/blkaiolib.h	Thu May 19 21:14:26 2005 +0000
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,16 +0,0 @@
     5.4 -/* blkaiolib.h
     5.5 - *
     5.6 - * aio image-backed block device.
     5.7 - * 
     5.8 - * (c) 2004 Andrew Warfield.
     5.9 - *
    5.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
    5.11 - * This will show up as device type (maj:240,min:0) = 61440.
    5.12 - *
    5.13 - * The fsid is placed in the sec_start field of the disk extent.
    5.14 - */
    5.15 -
    5.16 -int aio_control(control_msg_t *msg);
    5.17 -int aio_request(blkif_request_t *req);
    5.18 -int aio_response(blkif_response_t *rsp); /* noop */
    5.19 -void aio_init(void);
     6.1 --- a/tools/blktap/blkcow.c	Thu May 19 21:14:26 2005 +0000
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,31 +0,0 @@
     6.4 -/* blkcow.c
     6.5 - *
     6.6 - * copy on write a block device.  in a really inefficient way.
     6.7 - * 
     6.8 - * (c) 2004 Andrew Warfield.
     6.9 - *
    6.10 - * This uses whatever backend the tap is attached to as the read-only
    6.11 - * underlay -- for the moment.
    6.12 - *
    6.13 - * Xend has been modified to use an amorfs:[fsid] disk tag.
    6.14 - * This will show up as device type (maj:240,min:0) = 61440.
    6.15 - *
    6.16 - * The fsid is placed in the sec_start field of the disk extent,
    6.17 - * the cow plugin uses this to identify a unique overlay.
    6.18 - */
    6.19 -
    6.20 -#include "blktaplib.h"
    6.21 -#include "blkcowlib.h"
    6.22 -
    6.23 -
    6.24 -int main(int argc, char *argv[])
    6.25 -{
    6.26 -    cow_init();
    6.27 -    
    6.28 -    blktap_register_ctrl_hook("cow_control", cow_control);
    6.29 -    blktap_register_request_hook("cow_request", cow_request);
    6.30 -    blktap_register_response_hook("cow_response", cow_response);
    6.31 -    blktap_listen();
    6.32 -    
    6.33 -    return 0;
    6.34 -}
     7.1 --- a/tools/blktap/blkcowgnbd.c	Thu May 19 21:14:26 2005 +0000
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,24 +0,0 @@
     7.4 -/* blkcowgnbd.c
     7.5 - *
     7.6 - * gnbd-backed cow.
     7.7 - */
     7.8 -
     7.9 -#include "blktaplib.h"
    7.10 -#include "blkcowlib.h"
    7.11 -#include "blkgnbdlib.h"
    7.12 -
    7.13 -
    7.14 -int main(int argc, char *argv[])
    7.15 -{
    7.16 -    cow_init();
    7.17 -    gnbd_init();
    7.18 -    
    7.19 -    blktap_register_ctrl_hook("cow_control", cow_control);
    7.20 -    blktap_register_ctrl_hook("gnbd_control", gnbd_control);
    7.21 -    blktap_register_request_hook("cow_request", cow_request);
    7.22 -    blktap_register_request_hook("gnbd_request", gnbd_request);
    7.23 -    blktap_register_response_hook("cow_response", cow_response);
    7.24 -    blktap_listen();
    7.25 -    
    7.26 -    return 0;
    7.27 -}
     8.1 --- a/tools/blktap/blkcowimg.c	Thu May 19 21:14:26 2005 +0000
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,24 +0,0 @@
     8.4 -/* blkcowimg.c
     8.5 - *
     8.6 - * file-backed cow.
     8.7 - */
     8.8 -
     8.9 -#include "blktaplib.h"
    8.10 -#include "blkcowlib.h"
    8.11 -#include "blkimglib.h"
    8.12 -
    8.13 -
    8.14 -int main(int argc, char *argv[])
    8.15 -{
    8.16 -    cow_init();
    8.17 -    image_init();
    8.18 -    
    8.19 -    blktap_register_ctrl_hook("cow_control", cow_control);
    8.20 -    blktap_register_ctrl_hook("image_control", image_control);
    8.21 -    blktap_register_request_hook("cow_request", cow_request);
    8.22 -    blktap_register_request_hook("image_request", image_request);
    8.23 -    blktap_register_response_hook("cow_response", cow_response);
    8.24 -    blktap_listen();
    8.25 -    
    8.26 -    return 0;
    8.27 -}
     9.1 --- a/tools/blktap/blkcowlib.c	Thu May 19 21:14:26 2005 +0000
     9.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.3 @@ -1,380 +0,0 @@
     9.4 -/* blkcowlib.c
     9.5 - *
     9.6 - * copy on write a block device.  in a really inefficient way.
     9.7 - * 
     9.8 - * (c) 2004 Andrew Warfield.
     9.9 - *
    9.10 - * This uses whatever backend the tap is attached to as the read-only
    9.11 - * underlay -- for the moment.
    9.12 - *
    9.13 - * Xend has been modified to use an amorfs:[fsid] disk tag.
    9.14 - * This will show up as device type (maj:240,min:0) = 61440.
    9.15 - *
    9.16 - * The fsid is placed in the sec_start field of the disk extent,
    9.17 - * the cow plugin uses this to identify a unique overlay.
    9.18 - */
    9.19 -
    9.20 -#include <stdio.h>
    9.21 -#include <stdlib.h>
    9.22 -#include <string.h>
    9.23 -#include <db.h>
    9.24 -#include "blktaplib.h"
    9.25 -
    9.26 -#define MAX_DOMS        1024
    9.27 -#define MAX_DBNAME_LEN   255
    9.28 -#define AMORFS_DEV     61440
    9.29 -#define MAX_REQUESTS      64 /* must be synced with the blkif drivers. */
    9.30 -                                                                                
    9.31 -#if 0
    9.32 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
    9.33 -#else
    9.34 -#define DPRINTF(_f, _a...) ((void)0)
    9.35 -#endif
    9.36 -    
    9.37 -/* Berkeley db has different params for open() after 4.1 */
    9.38 -#ifndef DB_VERSION_MAJOR
    9.39 -# define DB_VERSION_MAJOR 1
    9.40 -#endif /* DB_VERSION_MAJOR */
    9.41 -#ifndef DB_VERSION_MINOR
    9.42 -# define DB_VERSION_MINOR 0
    9.43 -#endif /* DB_VERSION_MINOR */
    9.44 -
    9.45 -typedef struct {
    9.46 -    DB   *db;
    9.47 -    u64  fsid;
    9.48 -    char dbname[MAX_DBNAME_LEN];
    9.49 -} cow_t;
    9.50 -
    9.51 -cow_t           *cows[MAX_DOMS];
    9.52 -blkif_request_t *reread_list[MAX_REQUESTS];
    9.53 -
    9.54 -int cow_control(control_msg_t *msg)
    9.55 -{
    9.56 -    domid_t  domid;
    9.57 -    DB      *db;
    9.58 -    int      ret;
    9.59 -    
    9.60 -    if (msg->type != CMSG_BLKIF_BE) 
    9.61 -    {
    9.62 -        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
    9.63 -        return 0;
    9.64 -    }
    9.65 -    
    9.66 -    switch(msg->subtype)
    9.67 -    {
    9.68 -    case CMSG_BLKIF_BE_CREATE:
    9.69 -        if ( msg->length != sizeof(blkif_be_create_t) )
    9.70 -            goto parse_error;
    9.71 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
    9.72 -                ((blkif_be_create_t *)msg->msg)->domid,
    9.73 -                ((blkif_be_create_t *)msg->msg)->blkif_handle);
    9.74 -        domid = ((blkif_be_create_t *)msg->msg)->domid;
    9.75 -        if (cows[domid] != NULL) {
    9.76 -            printf("attempt to connect from an existing dom!\n");
    9.77 -            return 0;
    9.78 -        }
    9.79 -        
    9.80 -        cows[domid] = (cow_t *)malloc(sizeof(cow_t));
    9.81 -        if (cows[domid] == NULL) {
    9.82 -            printf("error allocating cow.\n");
    9.83 -            return 0;
    9.84 -        }
    9.85 -        
    9.86 -        cows[domid]->db   = NULL;
    9.87 -        cows[domid]->fsid = 0;
    9.88 -        
    9.89 -        printf("COW connected.\n");
    9.90 -        break;   
    9.91 -        
    9.92 -    case CMSG_BLKIF_BE_DESTROY:
    9.93 -        if ( msg->length != sizeof(blkif_be_destroy_t) )
    9.94 -            goto parse_error;
    9.95 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
    9.96 -                ((blkif_be_destroy_t *)msg->msg)->domid,
    9.97 -                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
    9.98 -        
    9.99 -        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
   9.100 -        if (cows[domid] != NULL) {
   9.101 -            if (cows[domid]->db != NULL)
   9.102 -                cows[domid]->db->close(cows[domid]->db, 0);
   9.103 -            free(cows[domid]);
   9.104 -            cows[domid] = NULL;
   9.105 -        }
   9.106 -        break;  
   9.107 -    case CMSG_BLKIF_BE_VBD_GROW:
   9.108 -    {
   9.109 -        blkif_be_vbd_grow_t *grow;
   9.110 -        
   9.111 -        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
   9.112 -            goto parse_error;
   9.113 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
   9.114 -                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
   9.115 -                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
   9.116 -                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
   9.117 -        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
   9.118 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
   9.119 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
   9.120 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
   9.121 -        grow = (blkif_be_vbd_grow_t *)msg->msg;
   9.122 -        domid = grow->domid;
   9.123 -        if (cows[domid] == NULL) {
   9.124 -            printf("VBD_GROW on unconnected domain!\n");
   9.125 -            return 0;
   9.126 -        }
   9.127 -        
   9.128 -        if (grow->extent.device != AMORFS_DEV) {
   9.129 -            printf("VBD_GROW on non-amorfs device!\n");
   9.130 -            return 0;
   9.131 -        }
   9.132 -        
   9.133 -        sprintf(&cows[domid]->dbname[0], "%020llu.db",
   9.134 -                grow->extent.sector_start);
   9.135 -        
   9.136 -        cows[domid]->fsid = grow->extent.sector_start;
   9.137 -            
   9.138 -        if ((ret = db_create(&db, NULL, 0)) != 0) {
   9.139 -            fprintf(stderr, "db_create: %s\n", db_strerror(ret));
   9.140 -            return 0;
   9.141 -        }
   9.142 -        
   9.143 -        
   9.144 -#if DB_VERSION_MAJOR < 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 1)
   9.145 -
   9.146 -        if ((ret = db->open( db, cows[domid]->dbname, NULL, DB_BTREE, 
   9.147 -                DB_CREATE, 0664)) != 0) {
   9.148 -            
   9.149 -#else /* DB_VERSION >= 4.1 */
   9.150 -        
   9.151 -        if ((ret = db->open( db, NULL, cows[domid]->dbname, NULL, DB_BTREE, 
   9.152 -                DB_CREATE, 0664)) != 0) {
   9.153 -            
   9.154 -#endif /* DB_VERSION < 4.1 */
   9.155 -
   9.156 -            db->err(db, ret, "%s", cows[domid]->dbname);
   9.157 -            goto create_failed;
   9.158 -        }
   9.159 -        cows[domid]->db = db;
   9.160 -        printf("Overlay db opened. (%s)\n", cows[domid]->dbname);
   9.161 -        break;
   9.162 -    }    
   9.163 -    }
   9.164 -    return 0;
   9.165 -parse_error:
   9.166 -    printf("Bad control message!\n");
   9.167 -    return 0;
   9.168 -    
   9.169 -create_failed:
   9.170 -    /* TODO: close the db ref. */
   9.171 -    return 0;
   9.172 -}    
   9.173 - 
   9.174 -int cow_request(blkif_request_t *req)
   9.175 -{
   9.176 -    DB *db;
   9.177 -    DBT key, data;
   9.178 -    u64 sector;
   9.179 -    char *spage, *dpage;
   9.180 -    int ret, i, idx;
   9.181 -    blkif_response_t *rsp;
   9.182 -    domid_t dom = ID_TO_DOM(req->id);
   9.183 -    
   9.184 -    if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
   9.185 -        printf("Data request for unknown domain!!! %d\n", dom);
   9.186 -        rsp = (blkif_response_t *)req;
   9.187 -        rsp->id = req->id;
   9.188 -        rsp->operation = req->operation;
   9.189 -        rsp->status = BLKIF_RSP_ERROR;
   9.190 -        return BLKTAP_RESPOND;
   9.191 -    }
   9.192 -    
   9.193 -    db = cows[dom]->db;
   9.194 -    
   9.195 -    switch (req->operation) 
   9.196 -    {
   9.197 -    case BLKIF_OP_PROBE:
   9.198 -/* debug -- delete */
   9.199 -idx = ID_TO_IDX(req->id);
   9.200 -reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
   9.201 -memcpy(reread_list[idx], req, sizeof(*req));
   9.202 -        return  BLKTAP_PASS;
   9.203 -        
   9.204 -    case BLKIF_OP_WRITE:
   9.205 -        for (i = 0; i < req->nr_segments; i++) {
   9.206 -            memset(&key, 0, sizeof(key));
   9.207 -	    memset(&data, 0, sizeof(data));
   9.208 -            
   9.209 -            sector = req->sector_number + (8*i);
   9.210 -            key.data = &sector;
   9.211 -            key.size = sizeof(sector);
   9.212 -            
   9.213 -            spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   9.214 -            data.data = spage;
   9.215 -            data.size = PAGE_SIZE;
   9.216 -            
   9.217 -            
   9.218 -            DPRINTF("cWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
   9.219 -                    req->sector_number, sector, 
   9.220 -                    blkif_first_sect(req->frame_and_sects[i]),
   9.221 -                    blkif_last_sect (req->frame_and_sects[i]),
   9.222 -                    (long)(sector << 9));
   9.223 -            
   9.224 -            if ((ret = db->put(db, NULL, &key, &data, 0)) == 0)
   9.225 -                DPRINTF("db: %lld: key stored.\n", *((u64 *)key.data));
   9.226 -            else {
   9.227 -                db->err(db, ret, "DB->put");
   9.228 -                goto err;
   9.229 -            }
   9.230 -        }
   9.231 -        
   9.232 -        rsp = (blkif_response_t *)req;
   9.233 -        rsp->id = req->id;
   9.234 -        rsp->operation = BLKIF_OP_WRITE;
   9.235 -        rsp->status = BLKIF_RSP_OKAY;
   9.236 -        
   9.237 -        return BLKTAP_RESPOND;
   9.238 -
   9.239 -    case BLKIF_OP_READ:
   9.240 -        for (i = 0; i < req->nr_segments; i++) {
   9.241 -            memset(&key, 0, sizeof(key));
   9.242 -	    memset(&data, 0, sizeof(data));
   9.243 -            
   9.244 -            sector = req->sector_number + (8*i);
   9.245 -            key.data = &sector;
   9.246 -            key.size = sizeof(sector);
   9.247 -            
   9.248 -            DPRINTF("cREAD: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
   9.249 -                    req->sector_number, sector, 
   9.250 -                    blkif_first_sect(req->frame_and_sects[i]),
   9.251 -                    blkif_last_sect (req->frame_and_sects[i]),
   9.252 -                    (long)(sector << 9));
   9.253 -
   9.254 -            if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
   9.255 -                DPRINTF("db: %llu: key retrieved (req).\n",
   9.256 -                    *((u64 *)key.data));
   9.257 -                
   9.258 -                dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   9.259 -                spage = data.data;
   9.260 -                memcpy(dpage, spage, PAGE_SIZE);
   9.261 -
   9.262 -            } else if (ret == DB_NOTFOUND) {
   9.263 -                idx = ID_TO_IDX(req->id);
   9.264 -                if (idx > MAX_REQUESTS) {
   9.265 -                    printf("Bad index!\n");
   9.266 -                    goto err;
   9.267 -                }
   9.268 -                if (reread_list[idx] != NULL) {
   9.269 -                    printf("Dupe index!\n");
   9.270 -                    goto err;
   9.271 -                }
   9.272 -                reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
   9.273 -                memcpy(reread_list[idx], req, sizeof(*req));
   9.274 -                return BLKTAP_PASS;
   9.275 -            } else {
   9.276 -                db->err(db, ret, "DB->get");
   9.277 -                goto err;
   9.278 -            }
   9.279 -        }
   9.280 -
   9.281 -
   9.282 -        rsp = (blkif_response_t *)req;
   9.283 -        rsp->id = req->id;
   9.284 -        rsp->operation = BLKIF_OP_READ;
   9.285 -        rsp->status = BLKIF_RSP_OKAY;
   9.286 -        return BLKTAP_RESPOND;
   9.287 -    }
   9.288 -    
   9.289 -    printf("Unknow block operation!\n");
   9.290 -    return BLKTAP_PASS;
   9.291 -err:
   9.292 -    rsp = (blkif_response_t *)req;
   9.293 -    rsp->id = req->id;
   9.294 -    rsp->operation = req->operation;
   9.295 -    rsp->status = BLKIF_RSP_ERROR;
   9.296 -    return BLKTAP_RESPOND;  
   9.297 -}
   9.298 -
   9.299 -int cow_response(blkif_response_t *rsp)
   9.300 -{   
   9.301 -    blkif_request_t *req;
   9.302 -    int i, ret;
   9.303 -    DB *db;
   9.304 -    DBT key, data;
   9.305 -    u64 sector;
   9.306 -    char *spage, *dpage;
   9.307 -    int idx = ID_TO_IDX(rsp->id);
   9.308 -    domid_t dom;
   9.309 -    
   9.310 -    /* don't touch erroring responses. */
   9.311 -    if (rsp->status == BLKIF_RSP_ERROR)
   9.312 -        return BLKTAP_PASS;
   9.313 -    
   9.314 -    if ((rsp->operation == BLKIF_OP_READ) && (reread_list[idx] != NULL))
   9.315 -    {
   9.316 -        req = reread_list[idx];
   9.317 -        dom = ID_TO_DOM(req->id);
   9.318 -
   9.319 -        if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
   9.320 -            printf("Response from unknown domain!!! Very badness! %d\n", dom);
   9.321 -            return BLKTAP_PASS;
   9.322 -        }
   9.323 -    
   9.324 -        db = cows[dom]->db;
   9.325 -        
   9.326 -        for (i = 0; i < req->nr_segments; i++) {
   9.327 -            memset(&key, 0, sizeof(key));
   9.328 -	    memset(&data, 0, sizeof(data));
   9.329 -            
   9.330 -            sector = req->sector_number + (8*i);
   9.331 -            key.data = &sector;
   9.332 -            key.size = sizeof(sector);
   9.333 -            
   9.334 -            if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
   9.335 -                printf("db: %llu: key retrieved (rsp).\n",
   9.336 -                    *((u64 *)key.data));
   9.337 -                
   9.338 -                dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   9.339 -                spage = data.data;
   9.340 -                memcpy(dpage, spage, PAGE_SIZE);
   9.341 -
   9.342 -            } else if (ret == DB_NOTFOUND) {
   9.343 -                continue; /* We read this from disk. */
   9.344 -            } else {
   9.345 -                db->err(db, ret, "DB->get");
   9.346 -                goto err;
   9.347 -            }
   9.348 -        }
   9.349 -        free(reread_list[idx]);
   9.350 -        reread_list[idx] = NULL;
   9.351 -    }
   9.352 -    
   9.353 -    if (rsp->operation == BLKIF_OP_PROBE) {
   9.354 -        
   9.355 -        vdisk_t *img_info;
   9.356 -        
   9.357 -        req = reread_list[idx];
   9.358 -        img_info = (vdisk_t *)(char *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
   9.359 -        for (i =0; i < rsp->status; i++) 
   9.360 -            printf("PROBE (%d) device: 0x%04x capacity: %llu, info: 0x%04x\n", 
   9.361 -                    i,
   9.362 -                    img_info[0].device,
   9.363 -                    img_info[0].capacity,
   9.364 -                    img_info[0].info);
   9.365 -        free(reread_list[idx]);
   9.366 -        reread_list[idx] = NULL;
   9.367 -    }
   9.368 -    
   9.369 -err:
   9.370 -    return BLKTAP_PASS;
   9.371 -}
   9.372 -
   9.373 -void cow_init(void)
   9.374 -{
   9.375 -    int i;
   9.376 -    
   9.377 -    for (i = 0; i < MAX_DOMS; i++)
   9.378 -        cows[i] = NULL;
   9.379 -    
   9.380 -    for (i = 0; i < MAX_REQUESTS; i++)
   9.381 -        reread_list[MAX_REQUESTS] = NULL;
   9.382 -}
   9.383 -
    10.1 --- a/tools/blktap/blkcowlib.h	Thu May 19 21:14:26 2005 +0000
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,14 +0,0 @@
    10.4 -/* blkcowlib.h
    10.5 - *
    10.6 - * copy on write a block device.  in a really inefficient way.
    10.7 - * 
    10.8 - * (c) 2004 Andrew Warfield.
    10.9 - *
   10.10 - * public interfaces to the CoW tap.
   10.11 - *
   10.12 - */
   10.13 - 
   10.14 -int  cow_control  (control_msg_t *msg);
   10.15 -int  cow_request  (blkif_request_t *req);
   10.16 -int  cow_response (blkif_response_t *rsp);
   10.17 -void cow_init     (void);
    11.1 --- a/tools/blktap/blkdump.c	Thu May 19 21:14:26 2005 +0000
    11.2 +++ b/tools/blktap/blkdump.c	Fri May 20 14:49:37 2005 +0000
    11.3 @@ -62,18 +62,6 @@ int control_print(control_msg_t *msg)
    11.4                  ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle,
    11.5                  ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice);
    11.6          break;
    11.7 -    case CMSG_BLKIF_BE_VBD_GROW:
    11.8 -        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
    11.9 -            goto parse_error;
   11.10 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
   11.11 -                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
   11.12 -                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
   11.13 -                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
   11.14 -        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
   11.15 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
   11.16 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
   11.17 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
   11.18 -        break;
   11.19      default:
   11.20          goto parse_error;
   11.21      }
    12.1 --- a/tools/blktap/blkgnbd.c	Thu May 19 21:14:26 2005 +0000
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,19 +0,0 @@
    12.4 -/* blkgnbd.c
    12.5 - *
    12.6 - * gnbd-backed disk.
    12.7 - */
    12.8 -
    12.9 -#include "blktaplib.h"
   12.10 -#include "blkgnbdlib.h"
   12.11 -
   12.12 -
   12.13 -int main(int argc, char *argv[])
   12.14 -{
   12.15 -    gnbd_init();
   12.16 -    
   12.17 -    blktap_register_ctrl_hook("gnbd_control", gnbd_control);
   12.18 -    blktap_register_request_hook("gnbd_request", gnbd_request);
   12.19 -    blktap_listen();
   12.20 -    
   12.21 -    return 0;
   12.22 -}
    13.1 --- a/tools/blktap/blkgnbdlib.c	Thu May 19 21:14:26 2005 +0000
    13.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.3 @@ -1,471 +0,0 @@
    13.4 -/* blkgnbdlib.c
    13.5 - *
    13.6 - * gnbd image-backed block device.
    13.7 - * 
    13.8 - * (c) 2004 Andrew Warfield.
    13.9 - *
   13.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
   13.11 - * This will show up as device type (maj:240,min:0) = 61440.
   13.12 - *
   13.13 - * The fsid is placed in the sec_start field of the disk extent.
   13.14 - */
   13.15 -
   13.16 -#include <stdio.h>
   13.17 -#include <stdlib.h>
   13.18 -#include <string.h>
   13.19 -#include <db.h>       
   13.20 -#include <sys/stat.h>
   13.21 -#include <sys/types.h>
   13.22 -#include <unistd.h>
   13.23 -#include <errno.h>
   13.24 -#include <sys/poll.h>
   13.25 -#include "blktaplib.h"
   13.26 -#include "libgnbd/libgnbd.h"
   13.27 -
   13.28 -#define GNBD_SERVER  "skirmish.cl.cam.ac.uk"
   13.29 -#define GNBD_CLIENT  "pengi-0.xeno.cl.cam.ac.uk"
   13.30 -#define GNBD_MOUNT   "fc2_akw27"
   13.31 -#define GNBD_PORT    0x38e7
   13.32 -
   13.33 -#define MAX_DOMS        1024
   13.34 -#define MAX_IMGNAME_LEN  255
   13.35 -#define AMORFS_DEV     61440
   13.36 -#define MAX_REQUESTS      64 /* must be synced with the blkif drivers. */
   13.37 -#define SECTOR_SHIFT       9
   13.38 -                                                                                
   13.39 -#if 0
   13.40 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   13.41 -#else
   13.42 -#define DPRINTF(_f, _a...) ((void)0)
   13.43 -#endif
   13.44 -        
   13.45 -#if 1                                                                        
   13.46 -#define ASSERT(_p) \
   13.47 -    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
   13.48 -    __LINE__, __FILE__); *(int*)0=0; }
   13.49 -#else
   13.50 -#define ASSERT(_p) ((void)0)
   13.51 -#endif
   13.52 -
   13.53 -#define GH_DISCONNECTED 0
   13.54 -#define GH_PROBEWAITING 1
   13.55 -#define GH_CONNECTED    2
   13.56 -
   13.57 -typedef struct {
   13.58 -    /* These need to turn into an array/rbtree for multi-disk support. */
   13.59 -    struct gnbd_handle *gh;
   13.60 -    int          gh_state;
   13.61 -    int          probe_idx; /* This really needs cleaning up after hotos. */
   13.62 -    int          fd;
   13.63 -    u64          fsid;
   13.64 -    char         gnbdname[MAX_IMGNAME_LEN];
   13.65 -    blkif_vdev_t vdevice;
   13.66 -} gnbd_t;
   13.67 -
   13.68 -/* Note on pending_reqs: I assume all reqs are queued before they start to 
   13.69 - * get filled.  so count of 0 is an unused record.
   13.70 - */
   13.71 -typedef struct {
   13.72 -    blkif_request_t  req;
   13.73 -    int              count;
   13.74 -} pending_req_t;
   13.75 -
   13.76 -static gnbd_t          *gnbds[MAX_DOMS];
   13.77 -static pending_req_t    pending_list[MAX_REQUESTS];
   13.78 -static int              pending_count = 0; /* debugging */
   13.79 -
   13.80 -
   13.81 -gnbd_t *get_gnbd_by_fd(int fd)
   13.82 -{
   13.83 -    /* this is a linear scan for the moment.  nees to be cleaned up for
   13.84 -       multi-disk support. */
   13.85 -    
   13.86 -    int i;
   13.87 -    
   13.88 -    for (i=0; i< MAX_DOMS; i++) 
   13.89 -        if ((gnbds[i] != NULL) && (gnbds[i]->fd == fd))
   13.90 -            return gnbds[i];
   13.91 -    
   13.92 -    return NULL;
   13.93 -}
   13.94 -
   13.95 -int gnbd_pollhook(int fd);
   13.96 -
   13.97 -int gnbd_control(control_msg_t *msg)
   13.98 -{
   13.99 -    domid_t  domid;
  13.100 -    DB      *db;
  13.101 -    int      ret;
  13.102 -    
  13.103 -    if (msg->type != CMSG_BLKIF_BE) 
  13.104 -    {
  13.105 -        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
  13.106 -        return 0;
  13.107 -    }
  13.108 -    
  13.109 -    switch(msg->subtype)
  13.110 -    {
  13.111 -    case CMSG_BLKIF_BE_CREATE:
  13.112 -        if ( msg->length != sizeof(blkif_be_create_t) )
  13.113 -            goto parse_error;
  13.114 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
  13.115 -                ((blkif_be_create_t *)msg->msg)->domid,
  13.116 -                ((blkif_be_create_t *)msg->msg)->blkif_handle);
  13.117 -        domid = ((blkif_be_create_t *)msg->msg)->domid;
  13.118 -        if (gnbds[domid] != NULL) {
  13.119 -            printf("attempt to connect from an existing dom!\n");
  13.120 -            return 0;
  13.121 -        }
  13.122 -        
  13.123 -        gnbds[domid] = (gnbd_t *)malloc(sizeof(gnbd_t));
  13.124 -        if (gnbds[domid] == NULL) {
  13.125 -            printf("error allocating gnbd record.\n");
  13.126 -            return 0;
  13.127 -        }
  13.128 -        
  13.129 -        gnbds[domid]->gh  = NULL;
  13.130 -        gnbds[domid]->fsid = 0;
  13.131 -        
  13.132 -        break;   
  13.133 -        
  13.134 -    case CMSG_BLKIF_BE_DESTROY:
  13.135 -        if ( msg->length != sizeof(blkif_be_destroy_t) )
  13.136 -            goto parse_error;
  13.137 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
  13.138 -                ((blkif_be_destroy_t *)msg->msg)->domid,
  13.139 -                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
  13.140 -        
  13.141 -        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
  13.142 -        if (gnbds[domid] != NULL) {
  13.143 -            if (gnbds[domid]->gh != NULL) {
  13.144 -                blktap_detach_poll(gnbds[domid]->fd);
  13.145 -                free(gnbds[domid]->gh); /* XXX: Need a gnbd close call! */;
  13.146 -            }
  13.147 -            free( gnbds[domid] );
  13.148 -            gnbds[domid] = NULL;
  13.149 -        }
  13.150 -        break;  
  13.151 -    case CMSG_BLKIF_BE_VBD_GROW:
  13.152 -    {
  13.153 -        blkif_be_vbd_grow_t *grow;
  13.154 -        
  13.155 -        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
  13.156 -            goto parse_error;
  13.157 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
  13.158 -                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
  13.159 -                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
  13.160 -                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
  13.161 -        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
  13.162 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
  13.163 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
  13.164 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
  13.165 -        grow = (blkif_be_vbd_grow_t *)msg->msg;
  13.166 -        domid = grow->domid;
  13.167 -        if (gnbds[domid] == NULL) {
  13.168 -            printf("VBD_GROW on unconnected domain!\n");
  13.169 -            return 0;
  13.170 -        }
  13.171 -        
  13.172 -        if (grow->extent.device != AMORFS_DEV) {
  13.173 -            printf("VBD_GROW on non-amorfs device!\n");
  13.174 -            return 0;
  13.175 -        }
  13.176 -        
  13.177 -        /* TODO: config support for arbitrary gnbd files/modes. */
  13.178 -        sprintf(gnbds[domid]->gnbdname, GNBD_MOUNT);
  13.179 -        
  13.180 -        gnbds[domid]->fsid   = grow->extent.sector_start;
  13.181 -        gnbds[domid]->vdevice = grow->vdevice; 
  13.182 -        gnbds[domid]->gh_state = GH_DISCONNECTED;
  13.183 -        gnbds[domid]->gh = gnbd_setup(GNBD_SERVER, GNBD_PORT, 
  13.184 -            gnbds[domid]->gnbdname, GNBD_CLIENT);
  13.185 -        if (gnbds[domid]->gh == NULL) { 
  13.186 -            printf("Couldn't connect to gnbd mount!!\n");
  13.187 -            return 0;
  13.188 -        }
  13.189 -        gnbds[domid]->fd = gnbd_fd(gnbds[domid]->gh);
  13.190 -        blktap_attach_poll(gnbds[domid]->fd, POLLIN, gnbd_pollhook);
  13.191 -        
  13.192 -        printf("gnbd mount connected. (%s)\n", gnbds[domid]->gnbdname);
  13.193 -        break;
  13.194 -    }    
  13.195 -    }
  13.196 -    return 0;
  13.197 -parse_error:
  13.198 -    printf("Bad control message!\n");
  13.199 -    return 0;
  13.200 -    
  13.201 -create_failed:
  13.202 -    /* TODO: close the db ref. */
  13.203 -    return 0;
  13.204 -}    
  13.205 - 
  13.206 -static int gnbd_blkif_probe(blkif_request_t *req, gnbd_t *gnbd)
  13.207 -{
  13.208 -    int fd;
  13.209 -    struct stat stat;
  13.210 -    vdisk_t *gnbd_info;
  13.211 -    blkif_response_t *rsp;
  13.212 -
  13.213 -    /* We expect one buffer only. */
  13.214 -    if ( req->nr_segments != 1 )
  13.215 -        goto err;
  13.216 -
  13.217 -    /* Make sure the buffer is page-sized. */
  13.218 -    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
  13.219 -         (blkif_last_sect (req->frame_and_sects[0]) != 7) )
  13.220 -        goto err;
  13.221 -
  13.222 -    /* loop for multiple gnbds would start here. */
  13.223 -
  13.224 -    gnbd_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
  13.225 -    gnbd_info[0].device   = gnbd->vdevice;
  13.226 -    gnbd_info[0].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
  13.227 -    gnbd_info[0].capacity = gnbd_sectors(gnbd->gh);
  13.228 -
  13.229 -    printf("[SECTORS] %llu", gnbd_info[0].capacity);
  13.230 -
  13.231 -    //if (gnbd_info[0].capacity == 0)
  13.232 -    //    gnbd_info[0].capacity = ((u64)1 << 63); // xend does this too.
  13.233 -
  13.234 -    DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", gnbd_info[0].device,
  13.235 -            gnbd_info[0].capacity);
  13.236 -
  13.237 -    rsp = (blkif_response_t *)req;
  13.238 -    rsp->id = req->id;
  13.239 -    rsp->operation = BLKIF_OP_PROBE;
  13.240 -    rsp->status = 1; /* number of disks */
  13.241 -
  13.242 -    return  BLKTAP_RESPOND;
  13.243 -err:
  13.244 -    rsp = (blkif_response_t *)req;
  13.245 -    rsp->id = req->id;
  13.246 -    rsp->operation = req->operation;
  13.247 -    rsp->status = BLKIF_RSP_ERROR;
  13.248 -    return BLKTAP_RESPOND;  
  13.249 -}
  13.250 -
  13.251 -int gnbd_request(blkif_request_t *req)
  13.252 -{
  13.253 -    struct gnbd_handle *gh;
  13.254 -    u64 sector;
  13.255 -    char *spage, *dpage;
  13.256 -    int ret, i, idx;
  13.257 -    blkif_response_t *rsp;
  13.258 -    domid_t dom = ID_TO_DOM(req->id);
  13.259 -    
  13.260 -    if ((gnbds[dom] == NULL) || (gnbds[dom]->gh == NULL)) {
  13.261 -        printf("Data request for unknown domain!!! %d\n", dom);
  13.262 -        rsp = (blkif_response_t *)req;
  13.263 -        rsp->id = req->id;
  13.264 -        rsp->operation = req->operation;
  13.265 -        rsp->status = BLKIF_RSP_ERROR;
  13.266 -        return BLKTAP_RESPOND;
  13.267 -    }
  13.268 -    
  13.269 -    gh = gnbds[dom]->gh;
  13.270 -    
  13.271 -    switch (req->operation) 
  13.272 -    {
  13.273 -    case BLKIF_OP_PROBE:
  13.274 -    {
  13.275 -        printf("PROBE!\n");
  13.276 -        if ( gnbds[dom]->gh_state == GH_PROBEWAITING ) {
  13.277 -            printf("Already have a PROBE outstanding!\n");
  13.278 -            goto err;
  13.279 -        }
  13.280 -        
  13.281 -        if ( gnbds[dom]->gh_state == GH_DISCONNECTED )
  13.282 -        {
  13.283 -            /* need to defer until we are connected. */
  13.284 -            printf("Deferring PROBE!\n");
  13.285 -            idx = ID_TO_IDX(req->id);
  13.286 -            memcpy(&pending_list[idx].req, req, sizeof(*req));
  13.287 -            ASSERT(pending_list[idx].count == 0);
  13.288 -            pending_list[idx].count = 1;
  13.289 -            
  13.290 -            gnbds[dom]->probe_idx = idx;
  13.291 -            gnbds[dom]->gh_state  = GH_PROBEWAITING;
  13.292 -
  13.293 -            return BLKTAP_STOLEN;
  13.294 -        }
  13.295 -            
  13.296 -        
  13.297 -        return gnbd_blkif_probe(req, gnbds[dom]);
  13.298 -    }    
  13.299 -    case BLKIF_OP_WRITE:
  13.300 -    {
  13.301 -        unsigned long size;
  13.302 -        
  13.303 -        idx = ID_TO_IDX(req->id);
  13.304 -        ASSERT(pending_list[idx].count == 0);
  13.305 -        memcpy(&pending_list[idx].req, req, sizeof(*req));
  13.306 -        pending_list[idx].count = req->nr_segments;
  13.307 -        pending_count++; /* dbg */
  13.308 -        
  13.309 -        for (i = 0; i < req->nr_segments; i++) {
  13.310 -            
  13.311 -            sector = req->sector_number + (8*i);
  13.312 -            
  13.313 -            size = blkif_last_sect (req->frame_and_sects[i]) -
  13.314 -                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  13.315 -            
  13.316 -            DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  13.317 -                    req->sector_number, sector, 
  13.318 -                    blkif_first_sect(req->frame_and_sects[i]),
  13.319 -                    blkif_last_sect (req->frame_and_sects[i]),
  13.320 -                    (long)(sector << SECTOR_SHIFT));
  13.321 -                        
  13.322 -            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  13.323 -            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  13.324 -            
  13.325 -            ret = gnbd_write(gh, sector, size, spage, (unsigned long)idx);
  13.326 -            if (ret) {
  13.327 -                printf("gnbd error on WRITE\n");
  13.328 -                goto err;
  13.329 -            }
  13.330 -        }
  13.331 -//printf("[WR] < %lu\n", (unsigned long)idx);
  13.332 -        
  13.333 -        return BLKTAP_STOLEN;
  13.334 -    }
  13.335 -    case BLKIF_OP_READ:
  13.336 -    {
  13.337 -        unsigned long size;
  13.338 -        
  13.339 -        idx = ID_TO_IDX(req->id);
  13.340 -        ASSERT(pending_list[idx].count == 0);
  13.341 -        memcpy(&pending_list[idx].req, req, sizeof(*req));
  13.342 -        pending_list[idx].count = req->nr_segments;
  13.343 -        pending_count++; /* dbg */
  13.344 -            
  13.345 -        for (i = 0; i < req->nr_segments; i++) {
  13.346 -            
  13.347 -            sector  = req->sector_number + (8*i);
  13.348 -            
  13.349 -            size = blkif_last_sect (req->frame_and_sects[i]) -
  13.350 -                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  13.351 -            
  13.352 -            DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  13.353 -                    req->sector_number, sector, 
  13.354 -                    blkif_first_sect(req->frame_and_sects[i]),
  13.355 -                    blkif_last_sect (req->frame_and_sects[i]),
  13.356 -                    (long)(sector << SECTOR_SHIFT));
  13.357 -            
  13.358 -            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  13.359 -            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  13.360 -            
  13.361 -            ret = gnbd_read(gh, sector, size, dpage, (unsigned long)idx);
  13.362 -            if (ret) {
  13.363 -                printf("gnbd error on READ\n");
  13.364 -                goto err;
  13.365 -            }
  13.366 -            
  13.367 -        }
  13.368 -//printf("[RD] < %lu\n", (unsigned long)idx);
  13.369 -        
  13.370 -        return BLKTAP_STOLEN;
  13.371 -    }
  13.372 -    }
  13.373 -    
  13.374 -    printf("Unknown block operation!\n");
  13.375 -err:
  13.376 -    rsp = (blkif_response_t *)req;
  13.377 -    rsp->id = req->id;
  13.378 -    rsp->operation = req->operation;
  13.379 -    rsp->status = BLKIF_RSP_ERROR;
  13.380 -    return BLKTAP_RESPOND;  
  13.381 -}
  13.382 -
  13.383 -/* the gnbd library terminates the request stream. _resp is a noop. */
  13.384 -int gnbd_response(blkif_response_t *rsp)
  13.385 -{   
  13.386 -    return BLKTAP_PASS;
  13.387 -}
  13.388 -
  13.389 -int gnbd_pollhook(int fd)
  13.390 -{
  13.391 -    int err;
  13.392 -    struct gnbd_handle *gh;
  13.393 -    blkif_request_t *req;
  13.394 -    blkif_response_t *rsp;
  13.395 -    unsigned long idx;
  13.396 -    
  13.397 -    gnbd_t *gnbd = get_gnbd_by_fd(fd);
  13.398 -    
  13.399 -    if (gnbd == NULL) {
  13.400 -        printf("GNBD badness: got poll hook on unknown device. (%d)\n", fd);
  13.401 -        return -1;
  13.402 -    }
  13.403 -    gh = gnbd->gh;
  13.404 -    err = gnbd_reply(gh);
  13.405 -    switch (err) {
  13.406 -    case GNBD_LOGIN_DONE:
  13.407 -        if (gnbd->gh_state == GH_PROBEWAITING) {
  13.408 -            req = (blkif_request_t *)&pending_list[gnbd->probe_idx].req;
  13.409 -            printf("[!] Sending deferred PROBE!\n");
  13.410 -            gnbd_blkif_probe(req, gnbd);
  13.411 -            pending_list[gnbd->probe_idx].count = 0;
  13.412 -            rsp = (blkif_response_t *)req;
  13.413 -            blktap_inject_response(rsp);
  13.414 -        }
  13.415 -        gnbd->gh_state = GH_CONNECTED;
  13.416 -        printf("GNBD_LOGIN_DONE (%d)\n", fd); 
  13.417 -        break;
  13.418 -
  13.419 -    case GNBD_REQUEST_DONE: /* switch to idx */
  13.420 -        idx = gnbd_finished_request(gh);
  13.421 -        req = (blkif_request_t *)&pending_list[idx].req;
  13.422 -        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
  13.423 -            printf("gnbd returned a bad cookie (%lu)!\n", idx);
  13.424 -            break;
  13.425 -        }
  13.426 -        
  13.427 -        pending_list[idx].count--;
  13.428 -        
  13.429 -        if (pending_list[idx].count == 0) {
  13.430 -            blkif_request_t tmp = *req;
  13.431 -            pending_count--; /* dbg */
  13.432 -            rsp = (blkif_response_t *)req;
  13.433 -            rsp->id = tmp.id;
  13.434 -            rsp->operation = tmp.operation;
  13.435 -            rsp->status = BLKIF_RSP_OKAY;
  13.436 -            blktap_inject_response(rsp);
  13.437 -/*
  13.438 -if (rsp->operation == BLKIF_OP_READ) {
  13.439 -printf("[RD] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
  13.440 -} else if (rsp->operation == BLKIF_OP_WRITE) {
  13.441 -printf("[WR] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
  13.442 -} else  {
  13.443 -printf("[??] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
  13.444 -}
  13.445 -*/
  13.446 -        }
  13.447 -        break;
  13.448 -        
  13.449 -    case GNBD_CONTINUE:
  13.450 -        break;
  13.451 -        
  13.452 -    case 0:
  13.453 -        break;
  13.454 -        
  13.455 -    default:
  13.456 -        printf("gnbd_reply error");
  13.457 -        break;
  13.458 -    }
  13.459 -    return 0;
  13.460 -}
  13.461 -
  13.462 -void gnbd_init(void)
  13.463 -{   
  13.464 -    int i;
  13.465 -    
  13.466 -    for (i = 0; i < MAX_DOMS; i++)
  13.467 -        gnbds[i] = NULL;
  13.468 -    
  13.469 -    for (i = 0; i < MAX_REQUESTS; i++)
  13.470 -        pending_list[i].count = 0; 
  13.471 -    
  13.472 -    printf("GNBD image plugin initialized\n");
  13.473 -}
  13.474 -
    14.1 --- a/tools/blktap/blkgnbdlib.h	Thu May 19 21:14:26 2005 +0000
    14.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.3 @@ -1,16 +0,0 @@
    14.4 -/* blkgnbdlib.h
    14.5 - *
    14.6 - * gndb image-backed block device.
    14.7 - * 
    14.8 - * (c) 2004 Andrew Warfield.
    14.9 - *
   14.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
   14.11 - * This will show up as device type (maj:240,min:0) = 61440.
   14.12 - *
   14.13 - * The fsid is placed in the sec_start field of the disk extent.
   14.14 - */
   14.15 -
   14.16 -int gnbd_control(control_msg_t *msg);
   14.17 -int gnbd_request(blkif_request_t *req);
   14.18 -int gnbd_response(blkif_response_t *rsp); /* noop */
   14.19 -void gnbd_init(void);
    15.1 --- a/tools/blktap/blkimg.c	Thu May 19 21:14:26 2005 +0000
    15.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.3 @@ -1,19 +0,0 @@
    15.4 -/* blkimg.c
    15.5 - *
    15.6 - * file-backed disk.
    15.7 - */
    15.8 -
    15.9 -#include "blktaplib.h"
   15.10 -#include "blkimglib.h"
   15.11 -
   15.12 -
   15.13 -int main(int argc, char *argv[])
   15.14 -{
   15.15 -    image_init();
   15.16 -    
   15.17 -    blktap_register_ctrl_hook("image_control", image_control);
   15.18 -    blktap_register_request_hook("image_request", image_request);
   15.19 -    blktap_listen();
   15.20 -    
   15.21 -    return 0;
   15.22 -}
    16.1 --- a/tools/blktap/blkimglib.c	Thu May 19 21:14:26 2005 +0000
    16.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.3 @@ -1,325 +0,0 @@
    16.4 -/* blkimglib.c
    16.5 - *
    16.6 - * file image-backed block device.
    16.7 - * 
    16.8 - * (c) 2004 Andrew Warfield.
    16.9 - *
   16.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
   16.11 - * This will show up as device type (maj:240,min:0) = 61440.
   16.12 - *
   16.13 - * The fsid is placed in the sec_start field of the disk extent.
   16.14 - */
   16.15 -
   16.16 -#include <stdio.h>
   16.17 -#include <stdlib.h>
   16.18 -#include <string.h>
   16.19 -#include <db.h>       
   16.20 -#include <sys/stat.h>
   16.21 -#include <sys/types.h>
   16.22 -#include <unistd.h>
   16.23 -#include <errno.h>
   16.24 -#include "blktaplib.h"
   16.25 -
   16.26 -//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
   16.27 -#define TMP_IMAGE_FILE_NAME "fc3.image"
   16.28 -
   16.29 -#define MAX_DOMS        1024
   16.30 -#define MAX_IMGNAME_LEN  255
   16.31 -#define AMORFS_DEV     61440
   16.32 -#define MAX_REQUESTS      64 /* must be synced with the blkif drivers. */
   16.33 -#define SECTOR_SHIFT       9
   16.34 -                                                                                
   16.35 -#if 0
   16.36 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   16.37 -#else
   16.38 -#define DPRINTF(_f, _a...) ((void)0)
   16.39 -#endif
   16.40 -                                                                                
   16.41 -
   16.42 -typedef struct {
   16.43 -    /* These need to turn into an array/rbtree for multi-disk support. */
   16.44 -    FILE *img;
   16.45 -    u64  fsid;
   16.46 -    char imgname[MAX_IMGNAME_LEN];
   16.47 -    blkif_vdev_t   vdevice;
   16.48 -} image_t;
   16.49 -
   16.50 -image_t         *images[MAX_DOMS];
   16.51 -blkif_request_t *reread_list[MAX_REQUESTS];
   16.52 -
   16.53 -int image_control(control_msg_t *msg)
   16.54 -{
   16.55 -    domid_t  domid;
   16.56 -    DB      *db;
   16.57 -    int      ret;
   16.58 -    
   16.59 -    if (msg->type != CMSG_BLKIF_BE) 
   16.60 -    {
   16.61 -        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
   16.62 -        return 0;
   16.63 -    }
   16.64 -    
   16.65 -    switch(msg->subtype)
   16.66 -    {
   16.67 -    case CMSG_BLKIF_BE_CREATE:
   16.68 -        if ( msg->length != sizeof(blkif_be_create_t) )
   16.69 -            goto parse_error;
   16.70 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
   16.71 -                ((blkif_be_create_t *)msg->msg)->domid,
   16.72 -                ((blkif_be_create_t *)msg->msg)->blkif_handle);
   16.73 -        domid = ((blkif_be_create_t *)msg->msg)->domid;
   16.74 -        if (images[domid] != NULL) {
   16.75 -            printf("attempt to connect from an existing dom!\n");
   16.76 -            return 0;
   16.77 -        }
   16.78 -        
   16.79 -        images[domid] = (image_t *)malloc(sizeof(image_t));
   16.80 -        if (images[domid] == NULL) {
   16.81 -            printf("error allocating image record.\n");
   16.82 -            return 0;
   16.83 -        }
   16.84 -        
   16.85 -        images[domid]->img  = NULL;
   16.86 -        images[domid]->fsid = 0;
   16.87 -        
   16.88 -        printf("Image connected.\n");
   16.89 -        break;   
   16.90 -        
   16.91 -    case CMSG_BLKIF_BE_DESTROY:
   16.92 -        if ( msg->length != sizeof(blkif_be_destroy_t) )
   16.93 -            goto parse_error;
   16.94 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
   16.95 -                ((blkif_be_destroy_t *)msg->msg)->domid,
   16.96 -                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
   16.97 -        
   16.98 -        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
   16.99 -        if (images[domid] != NULL) {
  16.100 -            if (images[domid]->img != NULL)
  16.101 -                fclose( images[domid]->img );
  16.102 -            free( images[domid] );
  16.103 -            images[domid] = NULL;
  16.104 -        }
  16.105 -        break;  
  16.106 -    case CMSG_BLKIF_BE_VBD_GROW:
  16.107 -    {
  16.108 -        blkif_be_vbd_grow_t *grow;
  16.109 -        
  16.110 -        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
  16.111 -            goto parse_error;
  16.112 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
  16.113 -                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
  16.114 -                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
  16.115 -                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
  16.116 -        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
  16.117 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
  16.118 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
  16.119 -                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
  16.120 -        grow = (blkif_be_vbd_grow_t *)msg->msg;
  16.121 -        domid = grow->domid;
  16.122 -        if (images[domid] == NULL) {
  16.123 -            printf("VBD_GROW on unconnected domain!\n");
  16.124 -            return 0;
  16.125 -        }
  16.126 -        
  16.127 -        if (grow->extent.device != AMORFS_DEV) {
  16.128 -            printf("VBD_GROW on non-amorfs device!\n");
  16.129 -            return 0;
  16.130 -        }
  16.131 -        
  16.132 -        /* TODO: config support for arbitrary image files/modes. */
  16.133 -        sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
  16.134 -        
  16.135 -        images[domid]->fsid   = grow->extent.sector_start;
  16.136 -        images[domid]->vdevice = grow->vdevice; 
  16.137 -        images[domid]->img = fopen64(TMP_IMAGE_FILE_NAME, "r+");
  16.138 -        if (images[domid]->img == NULL) { 
  16.139 -            printf("Couldn't open image file!\n");
  16.140 -            return 0;
  16.141 -        }
  16.142 -        
  16.143 -        printf("Image file opened. (%s)\n", images[domid]->imgname);
  16.144 -        break;
  16.145 -    }    
  16.146 -    }
  16.147 -    return 0;
  16.148 -parse_error:
  16.149 -    printf("Bad control message!\n");
  16.150 -    return 0;
  16.151 -    
  16.152 -create_failed:
  16.153 -    /* TODO: close the db ref. */
  16.154 -    return 0;
  16.155 -}    
  16.156 - 
  16.157 -int image_request(blkif_request_t *req)
  16.158 -{
  16.159 -    FILE *img;
  16.160 -    u64 sector;
  16.161 -    char *spage, *dpage;
  16.162 -    int ret, i, idx;
  16.163 -    blkif_response_t *rsp;
  16.164 -    domid_t dom = ID_TO_DOM(req->id);
  16.165 -    
  16.166 -    if ((images[dom] == NULL) || (images[dom]->img == NULL)) {
  16.167 -        printf("Data request for unknown domain!!! %d\n", dom);
  16.168 -        rsp = (blkif_response_t *)req;
  16.169 -        rsp->id = req->id;
  16.170 -        rsp->operation = req->operation;
  16.171 -        rsp->status = BLKIF_RSP_ERROR;
  16.172 -        return BLKTAP_RESPOND;
  16.173 -    }
  16.174 -    
  16.175 -    img = images[dom]->img;
  16.176 -    
  16.177 -    switch (req->operation) 
  16.178 -    {
  16.179 -    case BLKIF_OP_PROBE:
  16.180 -    {
  16.181 -        int fd;
  16.182 -        struct stat stat;
  16.183 -        vdisk_t *img_info;
  16.184 -        
  16.185 -        
  16.186 -        /* We expect one buffer only. */
  16.187 -        if ( req->nr_segments != 1 )
  16.188 -            goto err;
  16.189 -                                                                                
  16.190 -        /* Make sure the buffer is page-sized. */
  16.191 -        if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
  16.192 -             (blkif_last_sect (req->frame_and_sects[0]) != 7) )
  16.193 -            goto err;
  16.194 -
  16.195 -        /* loop for multiple images would start here. */
  16.196 -        
  16.197 -        fd = fileno(img);
  16.198 -        if (fd == -1) {
  16.199 -            printf("Couldn't get image fd in PROBE!\n");
  16.200 -            goto err;
  16.201 -        }
  16.202 -        
  16.203 -        ret = fstat(fd, &stat);
  16.204 -        if (ret != 0) {
  16.205 -            printf("Couldn't stat image in PROBE!\n");
  16.206 -            goto err;
  16.207 -        }
  16.208 -        
  16.209 -        img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
  16.210 -        img_info[0].device   = images[dom]->vdevice;
  16.211 -        img_info[0].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
  16.212 -        img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
  16.213 -        
  16.214 -        if (img_info[0].capacity == 0)
  16.215 -            img_info[0].capacity = ((u64)1 << 63); // xend does this too.
  16.216 -        
  16.217 -        DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
  16.218 -                img_info[0].capacity);
  16.219 -        
  16.220 -        rsp = (blkif_response_t *)req;
  16.221 -        rsp->id = req->id;
  16.222 -        rsp->operation = BLKIF_OP_PROBE;
  16.223 -        rsp->status = 1; /* number of disks */
  16.224 -        
  16.225 -        return  BLKTAP_RESPOND;
  16.226 -    }    
  16.227 -    case BLKIF_OP_WRITE:
  16.228 -    {
  16.229 -        unsigned long size;
  16.230 -        
  16.231 -        for (i = 0; i < req->nr_segments; i++) {
  16.232 -            
  16.233 -            sector = req->sector_number + (8*i);
  16.234 -            
  16.235 -            size = blkif_last_sect (req->frame_and_sects[i]) -
  16.236 -                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  16.237 -            
  16.238 -            ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
  16.239 -            if (ret != 0) {
  16.240 -                printf("fseek error on WRITE\n");
  16.241 -                goto err;
  16.242 -            }
  16.243 -            
  16.244 -            DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  16.245 -                    req->sector_number, sector, 
  16.246 -                    blkif_first_sect(req->frame_and_sects[i]),
  16.247 -                    blkif_last_sect (req->frame_and_sects[i]),
  16.248 -                    (long)(sector << SECTOR_SHIFT));
  16.249 -                        
  16.250 -            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  16.251 -            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  16.252 -            ret = fwrite(spage, size << SECTOR_SHIFT, 1, img);
  16.253 -            if (ret != 1) {
  16.254 -                printf("fwrite error on WRITE (%d)\n", errno);
  16.255 -                goto err;
  16.256 -            }
  16.257 -        }
  16.258 -        
  16.259 -        rsp = (blkif_response_t *)req;
  16.260 -        rsp->id = req->id;
  16.261 -        rsp->operation = BLKIF_OP_WRITE;
  16.262 -        rsp->status = BLKIF_RSP_OKAY;
  16.263 -        
  16.264 -        return BLKTAP_RESPOND;
  16.265 -    }
  16.266 -    case BLKIF_OP_READ:
  16.267 -    {
  16.268 -        unsigned long size;
  16.269 -        
  16.270 -        for (i = 0; i < req->nr_segments; i++) {
  16.271 -            
  16.272 -            sector  = req->sector_number + (8*i);
  16.273 -            
  16.274 -            size = blkif_last_sect (req->frame_and_sects[i]) -
  16.275 -                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  16.276 -            
  16.277 -            ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
  16.278 -            if (ret != 0) {
  16.279 -                printf("fseek error on READ\n");
  16.280 -                goto err;
  16.281 -            }
  16.282 -        
  16.283 -            DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  16.284 -                    req->sector_number, sector, 
  16.285 -                    blkif_first_sect(req->frame_and_sects[i]),
  16.286 -                    blkif_last_sect (req->frame_and_sects[i]),
  16.287 -                    (long)(sector << SECTOR_SHIFT));
  16.288 -            
  16.289 -            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  16.290 -            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  16.291 -            ret = fread(dpage, size << SECTOR_SHIFT, 1, img);
  16.292 -            if (ret != 1) {
  16.293 -                printf("fread error on READ\n");
  16.294 -                goto err;
  16.295 -            }
  16.296 -        }
  16.297 -
  16.298 -        rsp = (blkif_response_t *)req;
  16.299 -        rsp->id = req->id;
  16.300 -        rsp->operation = BLKIF_OP_READ;
  16.301 -        rsp->status = BLKIF_RSP_OKAY;
  16.302 -        return BLKTAP_RESPOND;
  16.303 -    }
  16.304 -    }
  16.305 -    
  16.306 -    printf("Unknow block operation!\n");
  16.307 -err:
  16.308 -    rsp = (blkif_response_t *)req;
  16.309 -    rsp->id = req->id;
  16.310 -    rsp->operation = req->operation;
  16.311 -    rsp->status = BLKIF_RSP_ERROR;
  16.312 -    return BLKTAP_RESPOND;  
  16.313 -}
  16.314 -
  16.315 -/* the image library terminates the request stream. _resp is a noop. */
  16.316 -int image_response(blkif_response_t *rsp)
  16.317 -{   
  16.318 -    return BLKTAP_PASS;
  16.319 -}
  16.320 -
  16.321 -void image_init(void)
  16.322 -{
  16.323 -    int i;
  16.324 -    
  16.325 -    for (i = 0; i < MAX_DOMS; i++)
  16.326 -        images[i] = NULL;
  16.327 -}
  16.328 -
    17.1 --- a/tools/blktap/blkimglib.h	Thu May 19 21:14:26 2005 +0000
    17.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.3 @@ -1,16 +0,0 @@
    17.4 -/* blkimglib.h
    17.5 - *
    17.6 - * file image-backed block device.
    17.7 - * 
    17.8 - * (c) 2004 Andrew Warfield.
    17.9 - *
   17.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
   17.11 - * This will show up as device type (maj:240,min:0) = 61440.
   17.12 - *
   17.13 - * The fsid is placed in the sec_start field of the disk extent.
   17.14 - */
   17.15 -
   17.16 -int image_control(control_msg_t *msg);
   17.17 -int image_request(blkif_request_t *req);
   17.18 -int image_response(blkif_response_t *rsp); /* noop */
   17.19 -void image_init(void);
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/tools/blktap/block-async.c	Fri May 20 14:49:37 2005 +0000
    18.3 @@ -0,0 +1,404 @@
    18.4 +/* block-async.c
    18.5 + * 
    18.6 + * Asynchronous block wrappers for parallax.
    18.7 + */
    18.8 + 
    18.9 + 
   18.10 +#include <stdio.h>
   18.11 +#include <stdlib.h>
   18.12 +#include <string.h>
   18.13 +#include <pthread.h>
   18.14 +#include "block-async.h"
   18.15 +#include "blockstore.h"
   18.16 +#include "vdi.h"
   18.17 +
   18.18 +
   18.19 +#if 0
   18.20 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   18.21 +#else
   18.22 +#define DPRINTF(_f, _a...) ((void)0)
   18.23 +#endif
   18.24 +
   18.25 +/* We have a queue of outstanding I/O requests implemented as a 
   18.26 + * circular producer-consumer ring with free-running buffers.
   18.27 + * to allow reordering, this ring indirects to indexes in an 
   18.28 + * ring of io_structs.
   18.29 + * 
   18.30 + * the block_* calls may either add an entry to this ring and return, 
   18.31 + * or satisfy the request immediately and call the callback directly.
   18.32 + * None of the io calls in parallax should be nested enough to worry 
   18.33 + * about stack problems with this approach.
   18.34 + */
   18.35 +
   18.36 +struct read_args {
   18.37 +	u64 addr;
   18.38 +};
   18.39 +
   18.40 +struct write_args {
   18.41 +	u64   addr;
   18.42 +	char *block;
   18.43 +};
   18.44 +
   18.45 +struct alloc_args {
   18.46 +	char *block;
   18.47 +};
   18.48 + 
   18.49 +struct pending_io_req {
   18.50 +	enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
   18.51 +	union {
   18.52 +		struct read_args  r;
   18.53 +		struct write_args w;
   18.54 +		struct alloc_args a;
   18.55 +	} u;
   18.56 +	io_cb_t cb;
   18.57 +	void *param;
   18.58 +};
   18.59 +
   18.60 +void radix_lock_init(struct radix_lock *r)
   18.61 +{
   18.62 +	int i;
   18.63 +	
   18.64 +	pthread_mutex_init(&r->lock, NULL);
   18.65 +	for (i=0; i < 1024; i++) {
   18.66 +		r->lines[i] = 0;
   18.67 +		r->waiters[i] = NULL;
   18.68 +		r->state[i] = ANY;
   18.69 +	}
   18.70 +}
   18.71 +
   18.72 +/* maximum outstanding I/O requests issued asynchronously */
   18.73 +/* must be a power of 2.*/
   18.74 +#define MAX_PENDING_IO 1024 //1024
   18.75 +
   18.76 +/* how many threads to concurrently issue I/O to the disk. */
   18.77 +#define IO_POOL_SIZE   10 //10
   18.78 +
   18.79 +static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
   18.80 +static int pending_io_list[MAX_PENDING_IO];
   18.81 +static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
   18.82 +#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
   18.83 +#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
   18.84 +#define PENDING_IO_ENT(_x) \
   18.85 +	(&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
   18.86 +#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
   18.87 +#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
   18.88 +static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
   18.89 +static pthread_cond_t  pending_io_cond = PTHREAD_COND_INITIALIZER;
   18.90 +
   18.91 +static void init_pending_io(void)
   18.92 +{
   18.93 +	int i;
   18.94 +	
   18.95 +	for (i=0; i<MAX_PENDING_IO; i++)
   18.96 +		pending_io_list[i] = i;
   18.97 +		
   18.98 +} 
   18.99 +
  18.100 +void block_read(u64 addr, io_cb_t cb, void *param)
  18.101 +{
  18.102 +	struct pending_io_req *req;
  18.103 +	
  18.104 +	pthread_mutex_lock(&pending_io_lock);
  18.105 +	assert(CAN_PRODUCE_PENDING_IO);
  18.106 +
  18.107 +	req = PENDING_IO_ENT(io_prod++);
  18.108 +	DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
  18.109 +	req->op = IO_READ;
  18.110 +	req->u.r.addr = addr;
  18.111 +	req->cb = cb;
  18.112 +	req->param = param;
  18.113 +	
  18.114 +    pthread_cond_signal(&pending_io_cond);
  18.115 +	pthread_mutex_unlock(&pending_io_lock);	
  18.116 +}
  18.117 +
  18.118 +
  18.119 +void block_write(u64 addr, char *block, io_cb_t cb, void *param)
  18.120 +{
  18.121 +	struct pending_io_req *req;
  18.122 +	
  18.123 +	pthread_mutex_lock(&pending_io_lock);
  18.124 +	assert(CAN_PRODUCE_PENDING_IO);
  18.125 +
  18.126 +	req = PENDING_IO_ENT(io_prod++);
  18.127 +	DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
  18.128 +	req->op = IO_WRITE;
  18.129 +	req->u.w.addr  = addr;
  18.130 +	req->u.w.block = block;
  18.131 +	req->cb = cb;
  18.132 +	req->param = param;
  18.133 +	
  18.134 +    pthread_cond_signal(&pending_io_cond);
  18.135 +	pthread_mutex_unlock(&pending_io_lock);	
  18.136 +}
  18.137 +
  18.138 +
  18.139 +void block_alloc(char *block, io_cb_t cb, void *param)
  18.140 +{
  18.141 +	struct pending_io_req *req;
  18.142 +	
  18.143 +	pthread_mutex_lock(&pending_io_lock);
  18.144 +	assert(CAN_PRODUCE_PENDING_IO);
  18.145 +
  18.146 +	req = PENDING_IO_ENT(io_prod++);
  18.147 +	req->op = IO_ALLOC;
  18.148 +	req->u.a.block = block;
  18.149 +	req->cb = cb;
  18.150 +	req->param = param;
  18.151 +	
  18.152 +    pthread_cond_signal(&pending_io_cond);
  18.153 +	pthread_mutex_unlock(&pending_io_lock);	
  18.154 +}
  18.155 +
  18.156 +void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  18.157 +{
  18.158 +	struct io_ret ret;
  18.159 +	pthread_mutex_lock(&r->lock);
  18.160 +	
  18.161 +	if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
  18.162 +		r->lines[row]++;
  18.163 +		r->state[row] = READ;
  18.164 +		DPRINTF("RLOCK  : %3d (row: %d)\n", r->lines[row], row);
  18.165 +		pthread_mutex_unlock(&r->lock);
  18.166 +		ret.type = IO_INT_T;
  18.167 +		ret.u.i = 0;
  18.168 +		cb(ret, param);
  18.169 +	} else {
  18.170 +		struct radix_wait **rwc;
  18.171 +		struct radix_wait *rw = 
  18.172 +			(struct radix_wait *) malloc (sizeof(struct radix_wait));
  18.173 +		DPRINTF("RLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
  18.174 +		rw->type  = RLOCK;
  18.175 +		rw->param = param;
  18.176 +		rw->cb    = cb;
  18.177 +		rw->next  = NULL;
  18.178 +		/* append to waiters list. */
  18.179 +		rwc = &r->waiters[row];
  18.180 +		while (*rwc != NULL) rwc = &(*rwc)->next;
  18.181 +		*rwc = rw;
  18.182 +		pthread_mutex_unlock(&r->lock);
  18.183 +		return;
  18.184 +	}
  18.185 +}
  18.186 +
  18.187 +
  18.188 +void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  18.189 +{
  18.190 +	struct io_ret ret;
  18.191 +	pthread_mutex_lock(&r->lock);
  18.192 +	
  18.193 +	/* the second check here is redundant -- just here for debugging now. */
  18.194 +	if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
  18.195 +		r->state[row] = STOP;
  18.196 +		r->lines[row] = -1;
  18.197 +		DPRINTF("WLOCK  : %3d (row: %d)\n", r->lines[row], row);
  18.198 +		pthread_mutex_unlock(&r->lock);
  18.199 +		ret.type = IO_INT_T;
  18.200 +		ret.u.i = 0;
  18.201 +		cb(ret, param);
  18.202 +	} else {
  18.203 +		struct radix_wait **rwc;
  18.204 +		struct radix_wait *rw = 
  18.205 +			(struct radix_wait *) malloc (sizeof(struct radix_wait));
  18.206 +		DPRINTF("WLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
  18.207 +		rw->type  = WLOCK;
  18.208 +		rw->param = param;
  18.209 +		rw->cb    = cb;
  18.210 +		rw->next  = NULL;
  18.211 +		/* append to waiters list. */
  18.212 +		rwc = &r->waiters[row];
  18.213 +		while (*rwc != NULL) rwc = &(*rwc)->next;
  18.214 +		*rwc = rw;
  18.215 +		pthread_mutex_unlock(&r->lock);
  18.216 +		return;
  18.217 +	}
  18.218 +	
  18.219 +}
  18.220 +
  18.221 +/* called with radix_lock locked and lock count of zero. */
  18.222 +static void wake_waiters(struct radix_lock *r, int row)
  18.223 +{
  18.224 +	struct pending_io_req *req;
  18.225 +	struct radix_wait *rw;
  18.226 +	
  18.227 +	DPRINTF("prewake\n");
  18.228 +	if (r->lines[row] != 0) return;
  18.229 +	if (r->waiters[row] == NULL) {DPRINTF("nowaiters!\n");return;} 
  18.230 +	
  18.231 +	DPRINTF("wake\n");
  18.232 +	if (r->waiters[row]->type == WLOCK) {
  18.233 +		rw = r->waiters[row];
  18.234 +		pthread_mutex_lock(&pending_io_lock);
  18.235 +		assert(CAN_PRODUCE_PENDING_IO);
  18.236 +
  18.237 +		req = PENDING_IO_ENT(io_prod++);
  18.238 +		DPRINTF("Produce (WWAKE) %lu (%p)\n", io_prod - 1, req);
  18.239 +		req->op    = IO_WWAKE;
  18.240 +		req->cb    = rw->cb;
  18.241 +		req->param = rw->param;
  18.242 +		r->lines[row] = -1; /* write lock the row. */
  18.243 +		r->state[row] = STOP;
  18.244 +		r->waiters[row] = rw->next;
  18.245 +		free(rw);
  18.246 +		pthread_mutex_unlock(&pending_io_lock);
  18.247 +	} else /* RLOCK */ {
  18.248 +		while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
  18.249 +			rw = r->waiters[row];
  18.250 +			pthread_mutex_lock(&pending_io_lock);
  18.251 +			assert(CAN_PRODUCE_PENDING_IO);
  18.252 +	
  18.253 +			req = PENDING_IO_ENT(io_prod++);
  18.254 +			DPRINTF("Produce (RWAKE) %lu (%p)\n", io_prod - 1, req);
  18.255 +			req->op    = IO_RWAKE;
  18.256 +			req->cb    = rw->cb;
  18.257 +			req->param = rw->param;
  18.258 +			r->lines[row]++; /* read lock the row. */
  18.259 +			r->state[row] = READ; 
  18.260 +			r->waiters[row] = rw->next;
  18.261 +			free(rw);
  18.262 +			pthread_mutex_unlock(&pending_io_lock);
  18.263 +		}
  18.264 +		if (r->waiters[row] != NULL) /* There is a write queued still */
  18.265 +			r->state[row] = STOP;
  18.266 +	}	
  18.267 +	
  18.268 +	DPRINTF("wakedone\n");
  18.269 +	DPRINTF("prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free);
  18.270 +	pthread_mutex_lock(&pending_io_lock);
  18.271 +    pthread_cond_signal(&pending_io_cond);
  18.272 +	pthread_mutex_unlock(&pending_io_lock);
  18.273 +}
  18.274 +
  18.275 +void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  18.276 +{
  18.277 +	struct io_ret ret;
  18.278 +	
  18.279 +	pthread_mutex_lock(&r->lock);
  18.280 +	assert(r->lines[row] > 0); /* try to catch misuse. */
  18.281 +	r->lines[row]--;
  18.282 +	DPRINTF("RUNLOCK: %3d (row: %d)\n", r->lines[row], row);
  18.283 +	if (r->lines[row] == 0) {
  18.284 +		r->state[row] = ANY;
  18.285 +		wake_waiters(r, row);
  18.286 +	}
  18.287 +	pthread_mutex_unlock(&r->lock);
  18.288 +	cb(ret, param);
  18.289 +}
  18.290 +
  18.291 +void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  18.292 +{
  18.293 +	struct io_ret ret;
  18.294 +	
  18.295 +	pthread_mutex_lock(&r->lock);
  18.296 +	assert(r->lines[row] == -1); /* try to catch misuse. */
  18.297 +	r->lines[row] = 0;
  18.298 +	r->state[row] = ANY;
  18.299 +	DPRINTF("WUNLOCK: %3d (row: %d)\n", r->lines[row], row);
  18.300 +	wake_waiters(r, row);
  18.301 +	pthread_mutex_unlock(&r->lock);
  18.302 +	cb(ret, param);
  18.303 +}
  18.304 +
  18.305 +/* consumer calls */
  18.306 +static void do_next_io_req(struct pending_io_req *req)
  18.307 +{
  18.308 +	struct io_ret          ret;
  18.309 +	void  *param;
  18.310 +	
  18.311 +	switch (req->op) {
  18.312 +	case IO_READ:
  18.313 +		ret.type = IO_BLOCK_T;
  18.314 +		ret.u.b  = readblock(req->u.r.addr);
  18.315 +		break;
  18.316 +	case IO_WRITE:
  18.317 +		ret.type = IO_INT_T;
  18.318 +		ret.u.i  = writeblock(req->u.w.addr, req->u.w.block);
  18.319 +		DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
  18.320 +		break;
  18.321 +	case IO_ALLOC:
  18.322 +		ret.type = IO_ADDR_T;
  18.323 +		ret.u.a  = allocblock(req->u.a.block);
  18.324 +		break;
  18.325 +	case IO_RWAKE:
  18.326 +		DPRINTF("WAKE DEFERRED RLOCK!\n");
  18.327 +		ret.type = IO_INT_T;
  18.328 +		ret.u.i  = 0;
  18.329 +		break;
  18.330 +	case IO_WWAKE:
  18.331 +		DPRINTF("WAKE DEFERRED WLOCK!\n");
  18.332 +		ret.type = IO_INT_T;
  18.333 +		ret.u.i  = 0;
  18.334 +		break;
  18.335 +	default:
  18.336 +		DPRINTF("Unknown IO operation on pending list!\n");
  18.337 +		return;
  18.338 +	}
  18.339 +	
  18.340 +	param = req->param;
  18.341 +	DPRINTF("freeing idx %d to slot %lu.\n", PENDING_IO_IDX(req), PENDING_IO_MASK(io_free));
  18.342 +	pthread_mutex_lock(&pending_io_lock);
  18.343 +	pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
  18.344 +	DPRINTF("       : prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free);
  18.345 +	pthread_mutex_unlock(&pending_io_lock);
  18.346 +	
  18.347 +	assert(req->cb != NULL);
  18.348 +	req->cb(ret, param);
  18.349 +		
  18.350 +}
  18.351 +
  18.352 +void *io_thread(void *param) 
  18.353 +{
  18.354 +	int tid;
  18.355 +	struct pending_io_req *req;
  18.356 +	
  18.357 +	/* Set this thread's tid. */
  18.358 +    tid = *(int *)param;
  18.359 +    free(param);
  18.360 +    
  18.361 +    DPRINTF("IOT %2d started.\n", tid);
  18.362 +    
  18.363 +start:
  18.364 +    pthread_mutex_lock(&pending_io_lock);
  18.365 +    while (io_prod == io_cons) {
  18.366 +        pthread_cond_wait(&pending_io_cond, &pending_io_lock);
  18.367 +    }
  18.368 +    
  18.369 +    if (io_prod == io_cons) {
  18.370 +        /* unnecessary wakeup. */
  18.371 +        pthread_mutex_unlock(&pending_io_lock);
  18.372 +        goto start;
  18.373 +    }
  18.374 +    
  18.375 +	req = PENDING_IO_ENT(io_cons++);
  18.376 +	DPRINTF("IOT %2d has req %04d(%p).\n", tid, PENDING_IO_IDX(req), req);
  18.377 +	DPRINTF("       : prod: %lu cons: %lu free: %lu\n", io_prod, io_cons, io_free);
  18.378 +	pthread_mutex_unlock(&pending_io_lock);
  18.379 +	
  18.380 +	
  18.381 +    do_next_io_req(req);
  18.382 +    
  18.383 +	goto start;
  18.384 +	
  18.385 +}
  18.386 +
  18.387 +static pthread_t io_pool[IO_POOL_SIZE];
  18.388 +void start_io_threads(void)
  18.389 +
  18.390 +{	
  18.391 +	int i, tid=0;
  18.392 +	
  18.393 +	 for (i=0; i < IO_POOL_SIZE; i++) {
  18.394 +        int ret, *t;
  18.395 +        t = (int *)malloc(sizeof(int));
  18.396 +        *t = tid++;
  18.397 +        ret = pthread_create(&io_pool[i], NULL, io_thread, t);
  18.398 +        if (ret != 0) printf("Error starting thread %d\n", i);
  18.399 +    }
  18.400 +	
  18.401 +}
  18.402 +
  18.403 +void init_block_async(void)
  18.404 +{
  18.405 +	init_pending_io();
  18.406 +	start_io_threads();
  18.407 +}
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/tools/blktap/block-async.h	Fri May 20 14:49:37 2005 +0000
    19.3 @@ -0,0 +1,69 @@
    19.4 +/* block-async.h
    19.5 + * 
    19.6 + * Asynchronous block wrappers for parallax.
    19.7 + */
    19.8 + 
    19.9 +#ifndef _BLOCKASYNC_H_
   19.10 +#define _BLOCKASYNC_H_
   19.11 +
   19.12 +#include <assert.h>
   19.13 +#include <xc.h>
   19.14 +#include "vdi.h"
   19.15 +
   19.16 +struct io_ret
   19.17 +{
   19.18 +	enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
   19.19 +	union {
   19.20 +		u64   a;
   19.21 +		char *b;
   19.22 +		int   i;
   19.23 +	} u;
   19.24 +};
   19.25 +
   19.26 +typedef void (*io_cb_t)(struct io_ret r, void *param);
   19.27 +
   19.28 +/* per-vdi lock structures to make sure requests run in a safe order. */
   19.29 +struct radix_wait {
   19.30 +	enum {RLOCK, WLOCK} type;
   19.31 +	io_cb_t  cb;
   19.32 +	void    *param;
   19.33 +	struct radix_wait *next;
   19.34 +};
   19.35 +
   19.36 +struct radix_lock {
   19.37 +	pthread_mutex_t lock;
   19.38 +	int                    lines[1024];
   19.39 +	struct radix_wait     *waiters[1024];
   19.40 +	enum {ANY, READ, STOP} state[1024];
   19.41 +};
   19.42 +void radix_lock_init(struct radix_lock *r);
   19.43 +
   19.44 +void block_read(u64 addr, io_cb_t cb, void *param);
   19.45 +void block_write(u64 addr, char *block, io_cb_t cb, void *param);
   19.46 +void block_alloc(char *block, io_cb_t cb, void *param);
   19.47 +void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   19.48 +void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   19.49 +void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   19.50 +void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   19.51 +void init_block_async(void);
   19.52 +
   19.53 +static inline u64 IO_ADDR(struct io_ret r)
   19.54 +{
   19.55 +	assert(r.type == IO_ADDR_T);
   19.56 +	return r.u.a;
   19.57 +}
   19.58 +
   19.59 +static inline char *IO_BLOCK(struct io_ret r)
   19.60 +{
   19.61 +	assert(r.type == IO_BLOCK_T);
   19.62 +	return r.u.b;
   19.63 +}
   19.64 +
   19.65 +static inline int IO_INT(struct io_ret r)
   19.66 +{
   19.67 +	assert(r.type == IO_INT_T);
   19.68 +	return r.u.i;
   19.69 +}
   19.70 +
   19.71 +
   19.72 +#endif //_BLOCKASYNC_H_
    20.1 --- a/tools/blktap/blockstore-tls.c	Thu May 19 21:14:26 2005 +0000
    20.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.3 @@ -1,161 +0,0 @@
    20.4 -/**************************************************************************
    20.5 - * 
    20.6 - * blockstore.c
    20.7 - *
    20.8 - * Simple block store interface
    20.9 - *
   20.10 - */
   20.11 - 
   20.12 -#include <fcntl.h>
   20.13 -#include <unistd.h>
   20.14 -#include <stdio.h>
   20.15 -#include <stdlib.h>
   20.16 -#include <string.h>
   20.17 -#include <pthread.h>
   20.18 -#include <sys/types.h>
   20.19 -#include <sys/stat.h>
   20.20 -#include "blockstore.h"
   20.21 -#include "parallax-threaded.h"
   20.22 -
   20.23 -/*static int block_fp = -1;*/
   20.24 - 
   20.25 -static int fd_list[READ_POOL_SIZE+1];
   20.26 - 
   20.27 -/**
   20.28 - * readblock: read a block from disk
   20.29 - *   @id: block id to read
   20.30 - *
   20.31 - *   @return: pointer to block, NULL on error
   20.32 - */
   20.33 -
   20.34 -void *readblock(u64 id) 
   20.35 -{
   20.36 -    void *block;
   20.37 -    int tid = (int)pthread_getspecific(tid_key);
   20.38 -    
   20.39 -    if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
   20.40 -        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
   20.41 -        perror("readblock lseek");
   20.42 -        goto err;
   20.43 -    }
   20.44 -    if ((block = malloc(BLOCK_SIZE)) == NULL) {
   20.45 -        perror("readblock malloc");
   20.46 -        goto err;
   20.47 -    }
   20.48 -    if (read(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
   20.49 -        perror("readblock read");
   20.50 -        free(block);
   20.51 -        goto err;
   20.52 -    }
   20.53 -    return block;
   20.54 -    
   20.55 -err:
   20.56 -    return NULL;
   20.57 -}
   20.58 -
   20.59 -/**
   20.60 - * writeblock: write an existing block to disk
   20.61 - *   @id: block id
   20.62 - *   @block: pointer to block
   20.63 - *
   20.64 - *   @return: zero on success, -1 on failure
   20.65 - */
   20.66 -int writeblock(u64 id, void *block) 
   20.67 -{
   20.68 -    int tid = (int)pthread_getspecific(tid_key);
   20.69 -    
   20.70 -    if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
   20.71 -        perror("writeblock lseek");
   20.72 -        goto err;
   20.73 -    }
   20.74 -    if (write(fd_list[tid], block, BLOCK_SIZE) < 0) {
   20.75 -        perror("writeblock write");
   20.76 -        goto err;
   20.77 -    }
   20.78 -    return 0;
   20.79 -
   20.80 -err:
   20.81 -    return -1;
   20.82 -}
   20.83 -
   20.84 -/**
   20.85 - * allocblock: write a new block to disk
   20.86 - *   @block: pointer to block
   20.87 - *
   20.88 - *   @return: new id of block on disk
   20.89 - */
   20.90 -
   20.91 -u64 allocblock(void *block) 
   20.92 -{
   20.93 -    u64 lb;
   20.94 -    off64_t pos;
   20.95 -    int tid = (int)pthread_getspecific(tid_key);
   20.96 -
   20.97 -    pos = lseek64(fd_list[tid], 0, SEEK_END);
   20.98 -    if (pos == (off64_t)-1) {
   20.99 -        perror("allocblock lseek");
  20.100 -        goto err;
  20.101 -    }
  20.102 -    if (pos % BLOCK_SIZE != 0) {
  20.103 -        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
  20.104 -        goto err;
  20.105 -    }
  20.106 -    if (write(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
  20.107 -        perror("allocblock write");
  20.108 -        goto err;
  20.109 -    }
  20.110 -    lb = pos / BLOCK_SIZE + 1;
  20.111 -    
  20.112 -    return lb;
  20.113 -    
  20.114 -err:
  20.115 -    return 0;
  20.116 -    
  20.117 -}
  20.118 -
  20.119 -
  20.120 -/**
  20.121 - * newblock: get a new in-memory block set to zeros
  20.122 - *
  20.123 - *   @return: pointer to new block, NULL on error
  20.124 - */
  20.125 -void *newblock() 
  20.126 -{
  20.127 -    void *block = malloc(BLOCK_SIZE);
  20.128 -    if (block == NULL) {
  20.129 -        perror("newblock");
  20.130 -        return NULL;
  20.131 -    }
  20.132 -    memset(block, 0, BLOCK_SIZE);
  20.133 -    return block;
  20.134 -}
  20.135 -
  20.136 -
  20.137 -/**
  20.138 - * freeblock: unallocate an in-memory block
  20.139 - *   @id: block id (zero if this is only in-memory)
  20.140 - *   @block: block to be freed
  20.141 - */
  20.142 -void freeblock(void *block) 
  20.143 -{
  20.144 -    if (block != NULL)
  20.145 -        free(block);
  20.146 -}
  20.147 -
  20.148 -
  20.149 -int __init_blockstore(void)
  20.150 -{
  20.151 -    int i;
  20.152 -    
  20.153 -    for (i=0; i<(READ_POOL_SIZE+1); i++) {
  20.154 -        
  20.155 -        fd_list[i] = open("blockstore.dat", 
  20.156 -                O_RDWR | O_CREAT | O_LARGEFILE, 0644);
  20.157 -
  20.158 -        if (fd_list[i] < 0) {
  20.159 -            perror("open");
  20.160 -            return -1;
  20.161 -        }
  20.162 -    }
  20.163 -    return 0;
  20.164 -}
    21.1 --- a/tools/blktap/blockstore.c	Thu May 19 21:14:26 2005 +0000
    21.2 +++ b/tools/blktap/blockstore.c	Fri May 20 14:49:37 2005 +0000
    21.3 @@ -19,7 +19,7 @@
    21.4  #include <pthread.h>
    21.5  #include "parallax-threaded.h"
    21.6  
    21.7 -#define BLOCKSTORE_REMOTE
    21.8 +//#define BLOCKSTORE_REMOTE
    21.9  //#define BSDEBUG
   21.10  
   21.11  #define RETRY_TIMEOUT 1000000 /* microseconds */
   21.12 @@ -942,7 +942,8 @@ u64 allocblock_hint(void *block, u64 hin
   21.13  void *readblock(u64 id) {
   21.14      void *block;
   21.15      int block_fp;
   21.16 -    
   21.17 +   
   21.18 +//printf("readblock(%llu)\n", id); 
   21.19      block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
   21.20  
   21.21      if (block_fp < 0) {
   21.22 @@ -1336,6 +1337,7 @@ int __init_blockstore(void)
   21.23  void __exit_blockstore(void)
   21.24  {
   21.25      int i;
   21.26 +#ifdef BLOCKSTORE_REMOTE
   21.27      pthread_mutex_destroy(&ptmutex_recv);
   21.28      pthread_mutex_destroy(&ptmutex_luid);
   21.29      pthread_mutex_destroy(&ptmutex_queue);
   21.30 @@ -1345,4 +1347,5 @@ void __exit_blockstore(void)
   21.31          pthread_mutex_destroy(&(pool_thread[i].ptmutex));
   21.32          pthread_cond_destroy(&(pool_thread[i].ptcv));
   21.33      }
   21.34 +#endif
   21.35  }
    22.1 --- a/tools/blktap/libgnbd/Makefile	Thu May 19 21:14:26 2005 +0000
    22.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.3 @@ -1,8 +0,0 @@
    22.4 -
    22.5 -CFLAGS += -Wall -Werror -g
    22.6 -LDFLAGS += -g
    22.7 -
    22.8 -libgnbd.a: libgnbd.o
    22.9 -	$(AR) r $@ $<
   22.10 -
   22.11 -gnbdtest: gnbdtest.o libgnbd.a
    23.1 --- a/tools/blktap/libgnbd/gnbdtest.c	Thu May 19 21:14:26 2005 +0000
    23.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.3 @@ -1,90 +0,0 @@
    23.4 -
    23.5 -#include <err.h>
    23.6 -#include <stdint.h>
    23.7 -#include <stdio.h>
    23.8 -#include <stdlib.h>
    23.9 -#include <string.h>
   23.10 -#include <unistd.h>
   23.11 -
   23.12 -#include <sys/poll.h>
   23.13 -
   23.14 -#include "libgnbd.h"
   23.15 -
   23.16 -#define PRINTF(x) printf x
   23.17 -#if 0
   23.18 -#define DFPRINTF(x...) fprintf(stderr, ##x)
   23.19 -#define DPRINTF(x) DFPRINTF x
   23.20 -#else
   23.21 -#define DPRINTF(x)
   23.22 -#endif
   23.23 -
   23.24 -static unsigned char buf1[8 << 9];
   23.25 -static unsigned char buf2[8 << 9];
   23.26 -static unsigned char buf3[8 << 9];
   23.27 -
   23.28 -int
   23.29 -main(int argc, char **argv)
   23.30 -{
   23.31 -	struct gnbd_handle *gh;
   23.32 -	struct pollfd pfd[1];
   23.33 -	int err, tout;
   23.34 -
   23.35 -	gh = gnbd_setup("panik", 0x38e7, "cl349-nahant-beta2-root1",
   23.36 -	    "arcadians.cl.cam.ac.uk");
   23.37 -	if (gh == NULL)
   23.38 -		errx(1, "gnbd_setup");
   23.39 -
   23.40 -	memset(pfd, 0, sizeof(pfd));
   23.41 -	pfd[0].fd = gnbd_fd(gh);
   23.42 -	pfd[0].events = POLLIN;
   23.43 -
   23.44 -	while ((tout = poll(pfd, 1, 0)) >= 0) {
   23.45 -		if (tout == 0)
   23.46 -			continue;
   23.47 -		DPRINTF(("event\n"));
   23.48 -		if (pfd[0].revents) {
   23.49 -			err = gnbd_reply(gh);
   23.50 -			pfd[0].events = POLLIN;
   23.51 -			switch (err) {
   23.52 -			case GNBD_LOGIN_DONE:
   23.53 -				DPRINTF(("sectors: %08llu\n",
   23.54 -					    gnbd_sectors(gh)));
   23.55 -				err = gnbd_read(gh, 8, 8, buf2, 1);
   23.56 -				if (err)
   23.57 -					warnx("gnbd_read");
   23.58 -				err = gnbd_read(gh, 0, 8, buf1, 0);
   23.59 -				if (err)
   23.60 -					warnx("gnbd_read");
   23.61 -				err = gnbd_read(gh, 16, 8, buf3, 2);
   23.62 -				if (err)
   23.63 -					warnx("gnbd_read");
   23.64 -				break;
   23.65 -			case GNBD_REQUEST_DONE:
   23.66 -				DPRINTF(("request done %ld\n",
   23.67 -					    gnbd_finished_request(gh)));
   23.68 -				if (0 && gnbd_finished_request(gh) == 0) {
   23.69 -					write(1, buf1, 8 << 9);
   23.70 -					err = gnbd_write(gh, 0, 8, buf1, 10);
   23.71 -					if (err)
   23.72 -						warnx("gnbd_write");
   23.73 -				}
   23.74 -				break;
   23.75 -			case GNBD_CONTINUE:
   23.76 -				DPRINTF(("continue\n"));
   23.77 -				break;
   23.78 -			case 0:
   23.79 -				break;
   23.80 -			case GNBD_CONTINUE_WRITE:
   23.81 -				DPRINTF(("continue write\n"));
   23.82 -				pfd[0].events |= POLLOUT;
   23.83 -				break;
   23.84 -			default:
   23.85 -				warnx("gnbd_reply error");
   23.86 -				break;
   23.87 -			}
   23.88 -			DPRINTF(("got gnbd reply\n"));
   23.89 -		}
   23.90 -	}
   23.91 -
   23.92 -	return 0;
   23.93 -}
    24.1 --- a/tools/blktap/libgnbd/libgnbd.c	Thu May 19 21:14:26 2005 +0000
    24.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.3 @@ -1,647 +0,0 @@
    24.4 -/* libgnbd.c
    24.5 - * 
    24.6 - * gnbd client library
    24.7 - *
    24.8 - * Copyright (c) 2005, Christian Limpach
    24.9 - */
   24.10 -  
   24.11 -#include <byteswap.h>
   24.12 -#include <endian.h>
   24.13 -#include <err.h>
   24.14 -#include <errno.h>
   24.15 -#include <netdb.h>
   24.16 -#include <stdlib.h>
   24.17 -#include <string.h>
   24.18 -#include <unistd.h>
   24.19 -
   24.20 -#include <sys/socket.h>
   24.21 -#include <sys/time.h>
   24.22 -#include <sys/types.h>
   24.23 -
   24.24 -#include <stdio.h>
   24.25 -
   24.26 -#include "libgnbd.h"
   24.27 -
   24.28 -#define	PROTOCOL_VERSION	2
   24.29 -
   24.30 -#define	EXTERN_KILL_GSERV_REQ	5
   24.31 -#define	EXTERN_LOGIN_REQ	6
   24.32 -
   24.33 -#define	GNBD_REQUEST_MAGIC	0x37a07e00
   24.34 -#define	GNBD_KEEP_ALIVE_MAGIC	0x5b46d8c2
   24.35 -#define	GNBD_REPLY_MAGIC	0x41f09370
   24.36 -
   24.37 -enum {
   24.38 -	GNBD_CMD_READ = 0,
   24.39 -	GNBD_CMD_WRITE = 1,
   24.40 -	GNBD_CMD_DISC = 2,
   24.41 -	GNBD_CMD_PING = 3
   24.42 -};
   24.43 -
   24.44 -#if __BYTE_ORDER == __BIG_ENDIAN
   24.45 -#define htonll(x) (x)
   24.46 -#define ntohll(x) (x)
   24.47 -#endif
   24.48 -#if __BYTE_ORDER == __LITTLE_ENDIAN
   24.49 -#define htonll(x) bswap_64(x)
   24.50 -#define ntohll(x) bswap_64(x)
   24.51 -#endif
   24.52 -
   24.53 -#define PRINTF(x) printf x
   24.54 -#if 0
   24.55 -#define DFPRINTF(x...) fprintf(stderr, ##x)
   24.56 -#define DPRINTF(x) DFPRINTF x
   24.57 -#else
   24.58 -#define DPRINTF(x)
   24.59 -#endif
   24.60 -
   24.61 -struct gnbd_request {
   24.62 -	struct gnbd_request	*gr_next;
   24.63 -	unsigned char		*gr_buf;
   24.64 -	ssize_t			gr_size;
   24.65 -	ssize_t			gr_done;
   24.66 -	unsigned long		gr_cookie;
   24.67 -};
   24.68 -
   24.69 -struct gnbd_handle {
   24.70 -	int			gh_fd;
   24.71 -	unsigned int		gh_flags;
   24.72 -	uint64_t		gh_sectors;
   24.73 -	char			gh_devname[32];
   24.74 -	char			gh_nodename[65];
   24.75 -	struct sockaddr_in	gh_sin;
   24.76 -	struct gnbd_request	*gh_outstanding_requests;
   24.77 -	struct gnbd_request	**gh_outstanding_requests_last;
   24.78 -	struct gnbd_request	*gh_incoming_request;
   24.79 -	unsigned long		gh_finished_request;
   24.80 -};
   24.81 -#define	GHF_EXPECT_KILL_GSERV_REPLY	0x0001
   24.82 -#define	GHF_EXPECT_LOGIN_REPLY		0x0002
   24.83 -#define	GHF_INCOMING_REQUEST		0x0004
   24.84 -
   24.85 -struct device_req {
   24.86 -	char		name[32];
   24.87 -};
   24.88 -
   24.89 -struct node_req {
   24.90 -	char		node_name[65];
   24.91 -};
   24.92 -
   24.93 -struct login_req {
   24.94 -        uint64_t	timestamp;
   24.95 -        uint16_t	version;
   24.96 -        uint8_t		pad[6];
   24.97 -        char		devname[32];
   24.98 -};
   24.99 -
  24.100 -struct login_reply {
  24.101 -        uint64_t	sectors;
  24.102 -        uint16_t	version;
  24.103 -        uint8_t		err;
  24.104 -        uint8_t		pad[5];
  24.105 -};
  24.106 -
  24.107 -struct gnbd_server_request {
  24.108 -	uint32_t	magic;
  24.109 -	uint32_t	type;
  24.110 -	char		handle[8];
  24.111 -	uint64_t	from;
  24.112 -	uint32_t	len;
  24.113 -} __attribute__ ((packed));
  24.114 -
  24.115 -struct gnbd_server_reply {
  24.116 -	uint32_t	magic;
  24.117 -	uint32_t	error;
  24.118 -	char		handle[8];
  24.119 -} __attribute__ ((packed));
  24.120 -
  24.121 -static int
  24.122 -read_buf(int fd, void *buf, size_t count, size_t *read_count)
  24.123 -{
  24.124 -	int err;
  24.125 -
  24.126 -	err = read(fd, buf, count);
  24.127 -	if (read_count) {
  24.128 -		if (err >= 0)
  24.129 -			*read_count = err;
  24.130 -	} else if (err != count)
  24.131 -		return EINTR;	/* xxx */
  24.132 -	return err < 0;
  24.133 -}
  24.134 -
  24.135 -static int
  24.136 -read_4(int fd, unsigned long *val)
  24.137 -{
  24.138 -	unsigned long buf;
  24.139 -	int err;
  24.140 -
  24.141 -	err = read_buf(fd, &buf, sizeof(buf), NULL);
  24.142 -	if (err == 0)
  24.143 -		*val = ntohl(buf);
  24.144 -	return err;
  24.145 -}
  24.146 -
  24.147 -static int
  24.148 -write_buf(int fd, void *buf, size_t count)
  24.149 -{
  24.150 -	int err;
  24.151 -
  24.152 -	err = write(fd, buf, count);
  24.153 -	return err < 0;
  24.154 -}
  24.155 -
  24.156 -static int
  24.157 -write_4(int fd, unsigned long val)
  24.158 -{
  24.159 -	unsigned long buf;
  24.160 -	int err;
  24.161 -
  24.162 -	buf = htonl(val);
  24.163 -	err = write_buf(fd, &buf, sizeof(buf));
  24.164 -	return err;
  24.165 -}
  24.166 -
  24.167 -
  24.168 -static int
  24.169 -socket_connect(struct gnbd_handle *gh)
  24.170 -{
  24.171 -	int err;
  24.172 -
  24.173 -	if (gh->gh_fd >= 0)
  24.174 -		return 0;
  24.175 -
  24.176 -	gh->gh_fd = socket(PF_INET, SOCK_STREAM, 0);
  24.177 -	if (gh->gh_fd < 0) {
  24.178 -		warn("socket");
  24.179 -		return gh->gh_fd;
  24.180 -	}
  24.181 -
  24.182 -	err = connect(gh->gh_fd, (struct sockaddr *)&gh->gh_sin,
  24.183 -	    sizeof(gh->gh_sin));
  24.184 -	if (err) {
  24.185 -		warn("connect");
  24.186 -		goto out;
  24.187 -	}
  24.188 -
  24.189 -	return 0;
  24.190 - out:
  24.191 -	close (gh->gh_fd);
  24.192 -	gh->gh_fd = -1;
  24.193 -	return err;
  24.194 -}
  24.195 -
  24.196 -static int
  24.197 -socket_shutdown(struct gnbd_handle *gh)
  24.198 -{
  24.199 -
  24.200 -	close (gh->gh_fd);
  24.201 -	gh->gh_fd = -1;
  24.202 -	return 0;
  24.203 -}
  24.204 -
  24.205 -static int
  24.206 -find_request(struct gnbd_handle *gh, struct gnbd_request *gr)
  24.207 -{
  24.208 -	struct gnbd_request **tmp;
  24.209 -
  24.210 -	for (tmp = &gh->gh_outstanding_requests; *tmp;
  24.211 -	     tmp = &(*tmp)->gr_next) {
  24.212 -		if (*tmp == gr) {
  24.213 -			*tmp = (*tmp)->gr_next;
  24.214 -			if (*tmp == NULL)
  24.215 -				gh->gh_outstanding_requests_last = tmp;
  24.216 -			return 0;
  24.217 -		}
  24.218 -	}
  24.219 -	return ENOENT;
  24.220 -}
  24.221 -
  24.222 -static int
  24.223 -kill_gserv(struct gnbd_handle *gh)
  24.224 -{
  24.225 -	struct device_req dr;
  24.226 -	struct node_req nr;
  24.227 -	int err;
  24.228 -
  24.229 -	DPRINTF(("gnbd_kill_gserv\n"));
  24.230 -	err = socket_connect(gh);
  24.231 -	if (err) {
  24.232 -		warnx("socket_connect");
  24.233 -		return err;
  24.234 -	}
  24.235 -
  24.236 -	err = write_4(gh->gh_fd, EXTERN_KILL_GSERV_REQ);
  24.237 -	if (err) {
  24.238 -		warnx("send EXTERN_LOGIN_REQ failed");
  24.239 -		goto out;
  24.240 -	}
  24.241 -
  24.242 -	strncpy(dr.name, gh->gh_devname, sizeof(dr.name));
  24.243 -	err = write_buf(gh->gh_fd, &dr, sizeof(dr));
  24.244 -	if (err) {
  24.245 -		warnx("send device_req failed");
  24.246 -		goto out;
  24.247 -	}
  24.248 -
  24.249 -	strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
  24.250 -	err = write_buf(gh->gh_fd, &nr, sizeof(nr));
  24.251 -	if (err) {
  24.252 -		warnx("send node_req failed");
  24.253 -		goto out;
  24.254 -	}
  24.255 -
  24.256 -	gh->gh_flags |= GHF_EXPECT_KILL_GSERV_REPLY;
  24.257 -	DPRINTF(("gnbd_kill_gserv ok\n"));
  24.258 -
  24.259 -	return 0;
  24.260 - out:
  24.261 -	socket_shutdown(gh);
  24.262 -	return err;
  24.263 -}
  24.264 -
  24.265 -static int
  24.266 -login(struct gnbd_handle *gh)
  24.267 -{
  24.268 -	struct login_req lr;
  24.269 -	struct node_req nr;
  24.270 -	int err;
  24.271 -	uint64_t timestamp;
  24.272 -	struct timeval tv;
  24.273 -
  24.274 -	DPRINTF(("gnbd_login\n"));
  24.275 -	err = socket_connect(gh);
  24.276 -	if (err) {
  24.277 -		warnx("socket_connect");
  24.278 -		return err;
  24.279 -	}
  24.280 -
  24.281 -	err = write_4(gh->gh_fd, EXTERN_LOGIN_REQ);
  24.282 -	if (err) {
  24.283 -		warnx("send EXTERN_LOGIN_REQ failed");
  24.284 -		goto out;
  24.285 -	}
  24.286 -
  24.287 -	err = gettimeofday(&tv, NULL);
  24.288 -	if (err) {
  24.289 -		warnx("gettimeofday");
  24.290 -		goto out;
  24.291 -	}
  24.292 -	timestamp = (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
  24.293 -
  24.294 -	lr.timestamp = htonll(timestamp);
  24.295 -	lr.version = htons(PROTOCOL_VERSION);
  24.296 -	strncpy(lr.devname, gh->gh_devname, sizeof(lr.devname));
  24.297 -	err = write_buf(gh->gh_fd, &lr, sizeof(lr));
  24.298 -	if (err) {
  24.299 -		warnx("send login_req failed");
  24.300 -		goto out;
  24.301 -	}
  24.302 -
  24.303 -	strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
  24.304 -	err = write_buf(gh->gh_fd, &nr, sizeof(nr));
  24.305 -	if (err) {
  24.306 -		warnx("send node_req failed");
  24.307 -		goto out;
  24.308 -	}
  24.309 -
  24.310 -	gh->gh_flags |= GHF_EXPECT_LOGIN_REPLY;
  24.311 -
  24.312 -	DPRINTF(("gnbd_login ok\n"));
  24.313 -	return 0;
  24.314 - out:
  24.315 -	socket_shutdown(gh);
  24.316 -	return err;
  24.317 -}
  24.318 -
  24.319 -static int
  24.320 -kill_gserv_reply(struct gnbd_handle *gh)
  24.321 -{
  24.322 -	unsigned long reply;
  24.323 -	int err;
  24.324 -
  24.325 -	DPRINTF(("read gnbd_kill_gserv_reply\n"));
  24.326 -	err = read_4(gh->gh_fd, &reply);
  24.327 -	if (err) {
  24.328 -		warnx("read kill_gserv_reply failed");
  24.329 -		return err;
  24.330 -	}
  24.331 -
  24.332 -	if (reply && reply != ENODEV) {
  24.333 -		warnx("kill gserv failed: %s", strerror(reply));
  24.334 -		return reply;
  24.335 -	}
  24.336 -
  24.337 -	gh->gh_flags &= ~GHF_EXPECT_KILL_GSERV_REPLY;
  24.338 -	socket_shutdown(gh);
  24.339 -
  24.340 -	err = login(gh);
  24.341 -	if (err)
  24.342 -		warnx("gnbd_login");
  24.343 -
  24.344 -	return err;
  24.345 -}
  24.346 -
  24.347 -static int
  24.348 -login_reply(struct gnbd_handle *gh)
  24.349 -{
  24.350 -	struct login_reply lr;
  24.351 -	int err;
  24.352 -
  24.353 -	DPRINTF(("read gnbd_login_reply\n"));
  24.354 -	err = read_buf(gh->gh_fd, &lr, sizeof(lr), NULL);
  24.355 -	if (err) {
  24.356 -		warnx("read login_reply failed");
  24.357 -		return err;
  24.358 -	}
  24.359 -
  24.360 -	if (lr.err) {
  24.361 -		if (lr.version) {
  24.362 -			warnx("gnbd version mismatch %04x != %04x",
  24.363 -			    PROTOCOL_VERSION, ntohs(lr.version));
  24.364 -			return EINVAL;
  24.365 -		}
  24.366 -		warnx("login refused: %s", strerror(lr.err));
  24.367 -		return lr.err;
  24.368 -	}
  24.369 -	gh->gh_sectors = ntohll(lr.sectors);
  24.370 -
  24.371 -	gh->gh_flags &= ~GHF_EXPECT_LOGIN_REPLY;
  24.372 -
  24.373 -	return GNBD_LOGIN_DONE;
  24.374 -}
  24.375 -
  24.376 -static int
  24.377 -incoming_request(struct gnbd_handle *gh)
  24.378 -{
  24.379 -	struct gnbd_request *gr = gh->gh_incoming_request;
  24.380 -	ssize_t done;
  24.381 -	int err;
  24.382 -
  24.383 -	DPRINTF(("incoming_request: done %d size %d\n", gr->gr_done,
  24.384 -		    gr->gr_size));
  24.385 -	err = read_buf(gh->gh_fd, gr->gr_buf + gr->gr_done,
  24.386 -	    gr->gr_size - gr->gr_done, &done);
  24.387 -	if (err)
  24.388 -		goto out;
  24.389 -
  24.390 -	DPRINTF(("incoming_request: got %d\n", done));
  24.391 -	gr->gr_done += done;
  24.392 -	if (gr->gr_done == gr->gr_size) {
  24.393 -		gh->gh_flags &= ~GHF_INCOMING_REQUEST;
  24.394 -		gh->gh_finished_request = gr->gr_cookie;
  24.395 -		free(gr);
  24.396 -		return GNBD_REQUEST_DONE;
  24.397 -	}
  24.398 -
  24.399 -	return GNBD_CONTINUE;
  24.400 -
  24.401 - out:
  24.402 -	gh->gh_flags &= ~GHF_INCOMING_REQUEST;
  24.403 -	gh->gh_finished_request = 0;
  24.404 -	free(gr);
  24.405 -	return err;
  24.406 -}
  24.407 -
  24.408 -
  24.409 -
  24.410 -int
  24.411 -gnbd_close(struct gnbd_handle *gh)
  24.412 -{
  24.413 -	int err;
  24.414 -	struct gnbd_request **tmp;
  24.415 -
  24.416 -	for (tmp = &gh->gh_outstanding_requests; *tmp; tmp = &(*tmp)->gr_next)
  24.417 -		free(*tmp);
  24.418 -
  24.419 -	if (gh->gh_flags & GHF_INCOMING_REQUEST)
  24.420 -		free(gh->gh_incoming_request);
  24.421 -
  24.422 -	err = close(gh->gh_fd);
  24.423 -	if (err)
  24.424 -		warnx("close");
  24.425 -	free(gh);
  24.426 -
  24.427 -	return err;
  24.428 -}
  24.429 -
  24.430 -int
  24.431 -gnbd_fd(struct gnbd_handle *gh)
  24.432 -{
  24.433 -	return gh->gh_fd;
  24.434 -}
  24.435 -
  24.436 -unsigned long
  24.437 -gnbd_finished_request(struct gnbd_handle *gh)
  24.438 -{
  24.439 -	return gh->gh_finished_request;
  24.440 -}
  24.441 -
  24.442 -int
  24.443 -gnbd_read(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
  24.444 -    unsigned char *buf, unsigned long cookie)
  24.445 -{
  24.446 -	struct gnbd_server_request gsr;
  24.447 -	struct gnbd_request *gr;
  24.448 -	int err;
  24.449 -
  24.450 -	gr = malloc(sizeof(struct gnbd_request));
  24.451 -	if (gr == NULL)
  24.452 -		return ENOMEM;
  24.453 -	memset(gr, 0, sizeof(gr));
  24.454 -
  24.455 -	gr->gr_buf = buf;
  24.456 -	gr->gr_size = count << 9;
  24.457 -	gr->gr_done = 0;
  24.458 -	gr->gr_cookie = cookie;
  24.459 -
  24.460 -	gsr.magic = htonl(GNBD_REQUEST_MAGIC);
  24.461 -	gsr.type = htonl(GNBD_CMD_READ);
  24.462 -	gsr.from = htonll(sector << 9);
  24.463 -	gsr.len = htonl(gr->gr_size);
  24.464 -	memset(gsr.handle, 0, sizeof(gsr.handle));
  24.465 -	memcpy(gsr.handle, &gr, sizeof(gr));
  24.466 -
  24.467 -	err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
  24.468 -	if (err) {
  24.469 -		warnx("write_buf");
  24.470 -		goto out;
  24.471 -	}
  24.472 -
  24.473 -	*gh->gh_outstanding_requests_last = gr;
  24.474 -	gh->gh_outstanding_requests_last = &gr->gr_next;
  24.475 -
  24.476 -	return 0;
  24.477 -
  24.478 - out:
  24.479 -	free(gr);
  24.480 -	return err;
  24.481 -}
  24.482 -
  24.483 -int
  24.484 -gnbd_write(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
  24.485 -    unsigned char *buf, unsigned long cookie)
  24.486 -{
  24.487 -	struct gnbd_server_request gsr;
  24.488 -	struct gnbd_request *gr;
  24.489 -	int err;
  24.490 -
  24.491 -	gr = malloc(sizeof(struct gnbd_request));
  24.492 -	if (gr == NULL)
  24.493 -		return ENOMEM;
  24.494 -	memset(gr, 0, sizeof(gr));
  24.495 -
  24.496 -	gr->gr_buf = buf;
  24.497 -	gr->gr_size = count << 9;
  24.498 -	gr->gr_done = 0;
  24.499 -	gr->gr_cookie = cookie;
  24.500 -
  24.501 -	gsr.magic = htonl(GNBD_REQUEST_MAGIC);
  24.502 -	gsr.type = htonl(GNBD_CMD_WRITE);
  24.503 -	gsr.from = htonll(sector << 9);
  24.504 -	gsr.len = htonl(gr->gr_size);
  24.505 -	memset(gsr.handle, 0, sizeof(gsr.handle));
  24.506 -	memcpy(gsr.handle, &gr, sizeof(gr));
  24.507 -
  24.508 -	err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
  24.509 -	if (err) {
  24.510 -		warnx("write_buf");
  24.511 -		goto out;
  24.512 -	}
  24.513 -
  24.514 -	/* XXX handle non-blocking socket */
  24.515 -	err = write_buf(gh->gh_fd, buf, gr->gr_size);
  24.516 -	if (err) {
  24.517 -		warnx("write_buf");
  24.518 -		goto out;
  24.519 -	}
  24.520 -	gr->gr_done += gr->gr_size;
  24.521 -
  24.522 -	*gh->gh_outstanding_requests_last = gr;
  24.523 -	gh->gh_outstanding_requests_last = &gr->gr_next;
  24.524 -
  24.525 -	DPRINTF(("write done\n"));
  24.526 -
  24.527 -	return 0;
  24.528 -
  24.529 - out:
  24.530 -	free(gr);
  24.531 -	return err;
  24.532 -}
  24.533 -
  24.534 -int
  24.535 -gnbd_reply(struct gnbd_handle *gh)
  24.536 -{
  24.537 -	struct gnbd_server_reply gsr;
  24.538 -	struct gnbd_request *gr;
  24.539 -	int err;
  24.540 -
  24.541 -	DPRINTF(("gnbd_reply flags %x\n", gh->gh_flags));
  24.542 -	if ((gh->gh_flags & GHF_EXPECT_KILL_GSERV_REPLY))
  24.543 -		return kill_gserv_reply(gh);
  24.544 -	if ((gh->gh_flags & GHF_EXPECT_LOGIN_REPLY))
  24.545 -		return login_reply(gh);
  24.546 -	if ((gh->gh_flags & GHF_INCOMING_REQUEST))
  24.547 -		return incoming_request(gh);
  24.548 -
  24.549 -	DPRINTF(("read response\n"));
  24.550 -	err = read_buf(gh->gh_fd, &gsr, sizeof(gsr), NULL);
  24.551 -	if (err) {
  24.552 -		warnx("read gnbd_reply failed");
  24.553 -		return err;
  24.554 -	}
  24.555 -
  24.556 -	if (ntohl(gsr.error)) {
  24.557 -		warnx("gnbd server reply error: %s", strerror(gsr.error));
  24.558 -		return gsr.error;
  24.559 -	}
  24.560 -
  24.561 -	switch (ntohl(gsr.magic)) {
  24.562 -	case GNBD_KEEP_ALIVE_MAGIC:
  24.563 -		DPRINTF(("read keep alive magic\n"));
  24.564 -		return GNBD_CONTINUE;
  24.565 -	case GNBD_REPLY_MAGIC:
  24.566 -		DPRINTF(("read reply magic\n"));
  24.567 -		memcpy(&gr, gsr.handle, sizeof(gr));
  24.568 -		err = find_request(gh, gr);
  24.569 -		if (err) {
  24.570 -			warnx("unknown request");
  24.571 -			return err;
  24.572 -		}
  24.573 -		if (gr->gr_done != gr->gr_size) {
  24.574 -			gh->gh_incoming_request = gr;
  24.575 -			gh->gh_flags |= GHF_INCOMING_REQUEST;
  24.576 -			return GNBD_CONTINUE;
  24.577 -		} else {
  24.578 -			gh->gh_finished_request = gr->gr_cookie;
  24.579 -			free(gr);
  24.580 -			return GNBD_REQUEST_DONE;
  24.581 -		}
  24.582 -	default:
  24.583 -		break;
  24.584 -	}
  24.585 -
  24.586 -	return GNBD_CONTINUE;
  24.587 -}
  24.588 -
  24.589 -uint64_t
  24.590 -gnbd_sectors(struct gnbd_handle *gh)
  24.591 -{
  24.592 -
  24.593 -	return gh->gh_sectors;
  24.594 -}
  24.595 -
  24.596 -struct gnbd_handle *
  24.597 -gnbd_setup(char *server, unsigned int port, char *devname, char *nodename)
  24.598 -{
  24.599 -	struct gnbd_handle *gh;
  24.600 -	struct addrinfo *res, *ai;
  24.601 -	int err;
  24.602 -
  24.603 -	gh = malloc(sizeof(struct gnbd_handle));
  24.604 -	if (gh == NULL)
  24.605 -		return NULL;
  24.606 -	memset(gh, 0, sizeof(gh));
  24.607 -	gh->gh_fd = -1;
  24.608 -	gh->gh_outstanding_requests_last = &gh->gh_outstanding_requests;
  24.609 -
  24.610 -	strncpy(gh->gh_devname, devname, sizeof(gh->gh_devname));
  24.611 -	strncpy(gh->gh_nodename, nodename, sizeof(gh->gh_nodename));
  24.612 -
  24.613 -	err = getaddrinfo(server, NULL, NULL, &res);
  24.614 -	if (err) {
  24.615 -		if (err != EAI_SYSTEM)
  24.616 -			warnx("getaddrinfo: %s", gai_strerror(err));
  24.617 -		else
  24.618 -			warn("getaddrinfo: %s", gai_strerror(err));
  24.619 -		goto out;
  24.620 -	}
  24.621 -
  24.622 -	for (ai = res; ai; ai = ai->ai_next) {
  24.623 -		if (ai->ai_socktype != SOCK_STREAM)
  24.624 -			continue;
  24.625 -		if (ai->ai_family == AF_INET)
  24.626 -			break;
  24.627 -	}
  24.628 -
  24.629 -	if (ai == NULL)
  24.630 -		goto out;
  24.631 -
  24.632 -	gh->gh_sin.sin_family = ai->ai_family;
  24.633 -	gh->gh_sin.sin_port = htons(port);
  24.634 -	memcpy(&gh->gh_sin.sin_addr,
  24.635 -	    &((struct sockaddr_in *)ai->ai_addr)->sin_addr,
  24.636 -	    sizeof(gh->gh_sin.sin_addr));
  24.637 -
  24.638 -	err = kill_gserv(gh);
  24.639 -	if (err) {
  24.640 -		warnx("gnbd_kill_gserv");
  24.641 -		goto out;
  24.642 -	}
  24.643 -
  24.644 -	freeaddrinfo(res);
  24.645 -	return gh;
  24.646 - out:
  24.647 -	free(gh);
  24.648 -	freeaddrinfo(res);
  24.649 -	return NULL;
  24.650 -}
    25.1 --- a/tools/blktap/libgnbd/libgnbd.h	Thu May 19 21:14:26 2005 +0000
    25.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.3 @@ -1,25 +0,0 @@
    25.4 -/* libgnbd.h
    25.5 - *
    25.6 - * gnbd client library
    25.7 - *
    25.8 - * Copyright (c) 2005, Christian Limpach
    25.9 - */
   25.10 -     
   25.11 -#define GNBD_LOGIN_DONE		0x10001
   25.12 -#define GNBD_REQUEST_DONE	0x10002
   25.13 -#define GNBD_CONTINUE		0x10003
   25.14 -#define GNBD_CONTINUE_WRITE	0x10004
   25.15 -
   25.16 -struct gnbd_handle;
   25.17 -int gnbd_close(struct gnbd_handle *);
   25.18 -int gnbd_fd(struct gnbd_handle *);
   25.19 -unsigned long gnbd_finished_request(struct gnbd_handle *);
   25.20 -int gnbd_kill_gserv(struct gnbd_handle *);
   25.21 -int gnbd_login(struct gnbd_handle *);
   25.22 -int gnbd_read(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
   25.23 -    unsigned long);
   25.24 -int gnbd_write(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
   25.25 -    unsigned long);
   25.26 -int gnbd_reply(struct gnbd_handle *);
   25.27 -uint64_t gnbd_sectors(struct gnbd_handle *);
   25.28 -struct gnbd_handle *gnbd_setup(char *, unsigned int, char *, char *);
    26.1 --- a/tools/blktap/parallax-threaded.c	Thu May 19 21:14:26 2005 +0000
    26.2 +++ b/tools/blktap/parallax-threaded.c	Fri May 20 14:49:37 2005 +0000
    26.3 @@ -145,33 +145,33 @@ void blkif_destroy(blkif_be_destroy_t *d
    26.4      destroy->status = BLKIF_BE_STATUS_OKAY;
    26.5  }
    26.6  
    26.7 -void vbd_grow(blkif_be_vbd_grow_t *grow) 
    26.8 +void vbd_create(blkif_be_vbd_create_t *create)
    26.9  {
   26.10      blkif_t            *blkif;
   26.11      vdi_t              *vdi, **vdip;
   26.12 -    blkif_vdev_t        vdevice = grow->vdevice;
   26.13 +    blkif_vdev_t        vdevice = create->vdevice;
   26.14  
   26.15 -    DPRINTF("parallax (vbd_grow): grow=%p\n", grow); 
   26.16 +    DPRINTF("parallax (vbd_create): create=%p\n", create); 
   26.17      
   26.18 -    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
   26.19 +    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
   26.20      if ( blkif == NULL )
   26.21      {
   26.22 -        DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n", 
   26.23 -                grow->domid, grow->blkif_handle); 
   26.24 -        grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   26.25 +        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
   26.26 +                create->domid, create->blkif_handle); 
   26.27 +        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   26.28          return;
   26.29      }
   26.30  
   26.31      /* VDI identifier is in grow->extent.sector_start */
   26.32 -    DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n", 
   26.33 -            grow->extent.sector_start);
   26.34 +    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
   26.35 +            (unsigned long)create->dev_handle);
   26.36  
   26.37 -    vdi = vdi_get(grow->extent.sector_start);
   26.38 +    vdi = vdi_get(create->dev_handle);
   26.39      if (vdi == NULL)
   26.40      {
   26.41 -        printf("parallax (vbd_grow): VDI %llx not found.\n",
   26.42 -               grow->extent.sector_start);
   26.43 -        grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
   26.44 +        printf("parallax (vbd_create): VDI %lx not found.\n",
   26.45 +               (unsigned long)create->dev_handle);
   26.46 +        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
   26.47          return;
   26.48      }
   26.49      
   26.50 @@ -183,7 +183,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow)
   26.51      *vdip = vdi;
   26.52      
   26.53      DPRINTF("vbd_grow: happy return!\n"); 
   26.54 -    grow->status = BLKIF_BE_STATUS_OKAY;
   26.55 +    create->status = BLKIF_BE_STATUS_OKAY;
   26.56  }
   26.57  
   26.58  int parallax_control(control_msg_t *msg)
   26.59 @@ -213,10 +213,10 @@ int parallax_control(control_msg_t *msg)
   26.60          blkif_destroy((blkif_be_destroy_t *)msg->msg);
   26.61          break;  
   26.62          
   26.63 -    case CMSG_BLKIF_BE_VBD_GROW:
   26.64 -        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
   26.65 +    case CMSG_BLKIF_BE_VBD_CREATE:
   26.66 +        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
   26.67              goto parse_error;
   26.68 -        vbd_grow((blkif_be_vbd_grow_t *)msg->msg);
   26.69 +        vbd_create((blkif_be_vbd_create_t *)msg->msg);
   26.70          break;
   26.71      }
   26.72      return 0;
    27.1 --- a/tools/blktap/parallax.c	Thu May 19 21:14:26 2005 +0000
    27.2 +++ b/tools/blktap/parallax.c	Fri May 20 14:49:37 2005 +0000
    27.3 @@ -10,11 +10,16 @@
    27.4  #include <stdio.h>
    27.5  #include <stdlib.h>
    27.6  #include <string.h>
    27.7 +#include <pthread.h>
    27.8  #include "blktaplib.h"
    27.9  #include "blockstore.h"
   27.10  #include "vdi.h"
   27.11 +#include "block-async.h"
   27.12 +#include "requests-async.h"
   27.13  
   27.14  #define PARALLAX_DEV     61440
   27.15 +#define SECTS_PER_NODE   8
   27.16 +
   27.17  
   27.18  #if 0
   27.19  #define DPRINTF(_f, _a...) printf ( _f , ## _a )
   27.20 @@ -142,33 +147,33 @@ void blkif_destroy(blkif_be_destroy_t *d
   27.21      destroy->status = BLKIF_BE_STATUS_OKAY;
   27.22  }
   27.23  
   27.24 -void vbd_grow(blkif_be_vbd_grow_t *grow) 
   27.25 +void vbd_create(blkif_be_vbd_create_t *create)
   27.26  {
   27.27      blkif_t            *blkif;
   27.28      vdi_t              *vdi, **vdip;
   27.29 -    blkif_vdev_t        vdevice = grow->vdevice;
   27.30 +    blkif_vdev_t        vdevice = create->vdevice;
   27.31  
   27.32 -    DPRINTF("parallax (vbd_grow): grow=%p\n", grow); 
   27.33 +    DPRINTF("parallax (vbd_create): create=%p\n", create); 
   27.34      
   27.35 -    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
   27.36 +    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
   27.37      if ( blkif == NULL )
   27.38      {
   27.39 -        DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n", 
   27.40 -                grow->domid, grow->blkif_handle); 
   27.41 -        grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   27.42 +        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
   27.43 +                create->domid, create->blkif_handle); 
   27.44 +        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   27.45          return;
   27.46      }
   27.47  
   27.48      /* VDI identifier is in grow->extent.sector_start */
   27.49 -    DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n", 
   27.50 -            grow->extent.sector_start);
   27.51 +    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
   27.52 +            (unsigned long)create->dev_handle);
   27.53  
   27.54 -    vdi = vdi_get(grow->extent.sector_start);
   27.55 +    vdi = vdi_get(create->dev_handle);
   27.56      if (vdi == NULL)
   27.57      {
   27.58 -        printf("parallax (vbd_grow): VDI %llx not found.\n",
   27.59 -               grow->extent.sector_start);
   27.60 -        grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
   27.61 +        printf("parallax (vbd_create): VDI %lx not found.\n",
   27.62 +               (unsigned long)create->dev_handle);
   27.63 +        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
   27.64          return;
   27.65      }
   27.66      
   27.67 @@ -180,7 +185,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow)
   27.68      *vdip = vdi;
   27.69      
   27.70      DPRINTF("vbd_grow: happy return!\n"); 
   27.71 -    grow->status = BLKIF_BE_STATUS_OKAY;
   27.72 +    create->status = BLKIF_BE_STATUS_OKAY;
   27.73  }
   27.74  
   27.75  int parallax_control(control_msg_t *msg)
   27.76 @@ -210,10 +215,10 @@ int parallax_control(control_msg_t *msg)
   27.77          blkif_destroy((blkif_be_destroy_t *)msg->msg);
   27.78          break;  
   27.79          
   27.80 -    case CMSG_BLKIF_BE_VBD_GROW:
   27.81 -        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
   27.82 +    case CMSG_BLKIF_BE_VBD_CREATE:
   27.83 +        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
   27.84              goto parse_error;
   27.85 -        vbd_grow((blkif_be_vbd_grow_t *)msg->msg);
   27.86 +        vbd_create((blkif_be_vbd_create_t *)msg->msg);
   27.87          break;
   27.88      }
   27.89      return 0;
   27.90 @@ -248,9 +253,9 @@ int parallax_probe(blkif_request_t *req,
   27.91              img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
   27.92              img_info[nr_vdis].device   = vdi->vdevice;
   27.93              img_info[nr_vdis].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   27.94 -            /* The -2 here accounts for the LSB in the radix tree */
   27.95 +            /* The -1 here accounts for the LSB in the radix tree */
   27.96              img_info[nr_vdis].capacity = 
   27.97 -                    ((1LL << (VDI_HEIGHT-2)) >> SECTOR_SHIFT);
   27.98 +                    ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
   27.99              nr_vdis++;
  27.100              vdi = vdi->next;
  27.101          }
  27.102 @@ -274,78 +279,122 @@ err:
  27.103      return BLKTAP_RESPOND;  
  27.104  }
  27.105  
  27.106 +typedef struct {
  27.107 +    blkif_request_t *req;
  27.108 +    int              count;
  27.109 +    int              error;
  27.110 +    pthread_mutex_t  mutex;
  27.111 +} pending_t;
  27.112 +
  27.113 +#define MAX_REQUESTS 64
  27.114 +pending_t pending_list[MAX_REQUESTS];
  27.115 +
  27.116 +struct cb_param {
  27.117 +	pending_t *pent;
  27.118 +	int       segment;
  27.119 +	u64       sector; 
  27.120 +	u64       vblock; /* for debug printing -- can be removed. */
  27.121 +};
  27.122 +
  27.123 +static void read_cb(struct io_ret r, void *in_param)
  27.124 +{
  27.125 +	struct cb_param *param = (struct cb_param *)in_param;
  27.126 +	pending_t *p = param->pent;
  27.127 +	int segment = param->segment;
  27.128 +	blkif_request_t *req = p->req;
  27.129 +    unsigned long size, offset, start;
  27.130 +	char *dpage, *spage;
  27.131 +	
  27.132 +	spage  = IO_BLOCK(r);
  27.133 +	if (spage == NULL) { p->error++; goto finish; }
  27.134 +	dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
  27.135 +    
  27.136 +    /* Calculate read size and offset within the read block. */
  27.137 +
  27.138 +    offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
  27.139 +    size = ( blkif_last_sect (req->frame_and_sects[segment]) -
  27.140 +             blkif_first_sect(req->frame_and_sects[segment]) + 1
  27.141 +           ) << SECTOR_SHIFT;
  27.142 +    start = blkif_first_sect(req->frame_and_sects[segment]) 
  27.143 +            << SECTOR_SHIFT;
  27.144 +
  27.145 +    DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
  27.146 +            "vblock %llx, "
  27.147 +            "size %lx\n", 
  27.148 +            param->sector, blkif_first_sect(p->req->frame_and_sects[segment]),
  27.149 +            blkif_last_sect (p->req->frame_and_sects[segment]),
  27.150 +            param->vblock, size); 
  27.151 +
  27.152 +    memcpy(dpage + start, spage + offset, size);
  27.153 +    freeblock(spage);
  27.154 +    
  27.155 +    /* Done the read.  Now update the pending record. */
  27.156 + finish:
  27.157 +    pthread_mutex_lock(&p->mutex);
  27.158 +    p->count--;
  27.159 +    
  27.160 +	if (p->count == 0) {
  27.161 +    	blkif_response_t *rsp;
  27.162 +    	
  27.163 +        rsp = (blkif_response_t *)req;
  27.164 +        rsp->id = req->id;
  27.165 +        rsp->operation = BLKIF_OP_READ;
  27.166 +    	if (p->error == 0) {
  27.167 +	        rsp->status = BLKIF_RSP_OKAY;
  27.168 +    	} else {
  27.169 +    		rsp->status = BLKIF_RSP_ERROR;
  27.170 +    	}
  27.171 +        blktap_inject_response(rsp);       
  27.172 +    }
  27.173 +    
  27.174 +    pthread_mutex_unlock(&p->mutex);
  27.175 +	
  27.176 +	free(param); /* TODO: replace with cached alloc/dealloc */
  27.177 +}	
  27.178 +
  27.179  int parallax_read(blkif_request_t *req, blkif_t *blkif)
  27.180  {
  27.181      blkif_response_t *rsp;
  27.182 -    unsigned long size, offset, start;
  27.183 -    u64 sector;
  27.184      u64 vblock, gblock;
  27.185      vdi_t *vdi;
  27.186 +    u64 sector;
  27.187      int i;
  27.188      char *dpage, *spage;
  27.189 +    pending_t *pent;
  27.190  
  27.191      vdi = blkif_get_vdi(blkif, req->device);
  27.192      
  27.193      if ( vdi == NULL )
  27.194          goto err;
  27.195 +        
  27.196 +    pent = &pending_list[ID_TO_IDX(req->id)];
  27.197 +    pent->count = req->nr_segments;
  27.198 +    pent->req = req;
  27.199 +    pthread_mutex_init(&pent->mutex, NULL);
  27.200      
  27.201      for (i = 0; i < req->nr_segments; i++) {
  27.202 -            
  27.203 -        dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  27.204 -        
  27.205 -        /* Round the requested segment to a block address. */
  27.206 -        
  27.207 -        sector  = req->sector_number + (8*i);
  27.208 -        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
  27.209 -        
  27.210 -        /* Get that block from the store. */
  27.211 -        
  27.212 -        gblock = vdi_lookup_block(vdi, vblock, NULL);
  27.213 -        
  27.214 -        /* Calculate read size and offset within the read block. */
  27.215 -        
  27.216 -        offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
  27.217 -        size = ( blkif_last_sect (req->frame_and_sects[i]) -
  27.218 -                 blkif_first_sect(req->frame_and_sects[i]) + 1
  27.219 -               ) << SECTOR_SHIFT;
  27.220 -        start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  27.221 -        
  27.222 -        /* If the block does not exist in the store, return zeros. */
  27.223 -        /* Otherwise, copy that region to the guest page.          */
  27.224 -        
  27.225 -        DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
  27.226 -                "vblock %llx, gblock %llx, "
  27.227 -                "size %lx\n", 
  27.228 -                sector, blkif_first_sect(req->frame_and_sects[i]),
  27.229 -                blkif_last_sect (req->frame_and_sects[i]),
  27.230 -                vblock, gblock, size); 
  27.231 -       
  27.232 -        if ( gblock == 0 ) {
  27.233 -           
  27.234 -            memset(dpage + start, '\0', size);
  27.235 -            
  27.236 -        } else {
  27.237 -            
  27.238 -            spage = readblock(gblock);
  27.239 -            
  27.240 -            if (spage == NULL) {
  27.241 -                printf("Error reading gblock from store: %Ld\n", gblock);
  27.242 -                goto err;
  27.243 -            }
  27.244 -            
  27.245 -            memcpy(dpage + start, spage + offset, size);
  27.246 -            
  27.247 -            freeblock(spage);
  27.248 -        }
  27.249 -        
  27.250 +        pthread_t tid;
  27.251 +        int ret;
  27.252 +        struct cb_param *p;
  27.253 +
  27.254 +	    /* Round the requested segment to a block address. */
  27.255 +	    sector  = req->sector_number + (8*i);
  27.256 +	    vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
  27.257 +
  27.258 +		/* TODO: Replace this call to malloc with a cached allocation */
  27.259 +		p = (struct cb_param *)malloc(sizeof(struct cb_param));
  27.260 +		p->pent = pent;
  27.261 +		p->sector = sector; 
  27.262 +		p->segment = i;     
  27.263 +		p->vblock = vblock; /* dbg */
  27.264 +		
  27.265 +	    /* Get that block from the store. */
  27.266 +	    async_read(vdi, vblock, read_cb, (void *)p);
  27.267 +
  27.268      }
  27.269 +    
  27.270 +    return BLKTAP_STOLEN;
  27.271  
  27.272 -    rsp = (blkif_response_t *)req;
  27.273 -    rsp->id = req->id;
  27.274 -    rsp->operation = BLKIF_OP_READ;
  27.275 -    rsp->status = BLKIF_RSP_OKAY;
  27.276 -
  27.277 -    return BLKTAP_RESPOND;
  27.278  err:
  27.279      rsp = (blkif_response_t *)req;
  27.280      rsp->id = req->id;
  27.281 @@ -355,6 +404,37 @@ err:
  27.282      return BLKTAP_RESPOND;  
  27.283  }
  27.284  
  27.285 +static void write_cb(struct io_ret r, void *in_param)
  27.286 +{
  27.287 +	struct cb_param *param = (struct cb_param *)in_param;
  27.288 +	pending_t *p = param->pent;
  27.289 +	blkif_request_t *req = p->req;
  27.290 +
  27.291 +	/* catch errors from the block code. */
  27.292 +	if (IO_INT(r) < 0) p->error++;
  27.293 +	
  27.294 +    pthread_mutex_lock(&p->mutex);
  27.295 +    p->count--;
  27.296 +    
  27.297 +	if (p->count == 0) {
  27.298 +    	blkif_response_t *rsp;
  27.299 +    	
  27.300 +        rsp = (blkif_response_t *)req;
  27.301 +        rsp->id = req->id;
  27.302 +        rsp->operation = BLKIF_OP_WRITE;
  27.303 +    	if (p->error == 0) {
  27.304 +	        rsp->status = BLKIF_RSP_OKAY;
  27.305 +    	} else {
  27.306 +    		rsp->status = BLKIF_RSP_ERROR;
  27.307 +    	}
  27.308 +        blktap_inject_response(rsp);       
  27.309 +    }
  27.310 +    
  27.311 +    pthread_mutex_unlock(&p->mutex);
  27.312 +	
  27.313 +	free(param); /* TODO: replace with cached alloc/dealloc */
  27.314 +}
  27.315 +
  27.316  int parallax_write(blkif_request_t *req, blkif_t *blkif)
  27.317  {
  27.318      blkif_response_t *rsp;
  27.319 @@ -364,13 +444,20 @@ int parallax_write(blkif_request_t *req,
  27.320      char *spage;
  27.321      unsigned long size, offset, start;
  27.322      vdi_t *vdi;
  27.323 +    pending_t *pent;
  27.324  
  27.325      vdi = blkif_get_vdi(blkif, req->device);
  27.326      
  27.327      if ( vdi == NULL )
  27.328          goto err;
  27.329 +        
  27.330 +    pent = &pending_list[ID_TO_IDX(req->id)];
  27.331 +    pent->count = req->nr_segments;
  27.332 +    pent->req = req;
  27.333 +    pthread_mutex_init(&pent->mutex, NULL);
  27.334      
  27.335      for (i = 0; i < req->nr_segments; i++) {
  27.336 +        struct cb_param *p;
  27.337              
  27.338          spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  27.339          
  27.340 @@ -379,10 +466,6 @@ int parallax_write(blkif_request_t *req,
  27.341          sector  = req->sector_number + (8*i);
  27.342          vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
  27.343          
  27.344 -        /* Get that block from the store. */
  27.345 -        
  27.346 -        gblock   = vdi_lookup_block(vdi, vblock, &writable);
  27.347 -        
  27.348          /* Calculate read size and offset within the read block. */
  27.349          
  27.350          offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
  27.351 @@ -405,27 +488,20 @@ int parallax_write(blkif_request_t *req,
  27.352              printf("]\n] STRANGE WRITE!\n]\n");
  27.353              goto err;
  27.354          }
  27.355 -
  27.356 -        if (( gblock == 0 ) || ( writable == 0 )) {
  27.357 -         
  27.358 -            gblock = allocblock(spage);
  27.359 -            vdi_update_block(vdi, vblock, gblock);
  27.360 -            
  27.361 -        } else {
  27.362 -            
  27.363 -            /* write-in-place, no need to change mappings. */
  27.364 -            writeblock(gblock, spage);
  27.365 -            
  27.366 -        }
  27.367 -
  27.368 +        
  27.369 + 		/* TODO: Replace this call to malloc with a cached allocation */
  27.370 +		p = (struct cb_param *)malloc(sizeof(struct cb_param));
  27.371 +		p->pent = pent;
  27.372 +		p->sector = sector; 
  27.373 +		p->segment = i;     
  27.374 +		p->vblock = vblock; /* dbg */
  27.375 +		
  27.376 +        /* Issue the write to the store. */
  27.377 +	    async_write(vdi, vblock, spage, write_cb, (void *)p);
  27.378      }
  27.379  
  27.380 -    rsp = (blkif_response_t *)req;
  27.381 -    rsp->id = req->id;
  27.382 -    rsp->operation = BLKIF_OP_WRITE;
  27.383 -    rsp->status = BLKIF_RSP_OKAY;
  27.384 +    return BLKTAP_STOLEN;
  27.385  
  27.386 -    return BLKTAP_RESPOND;
  27.387  err:
  27.388      rsp = (blkif_response_t *)req;
  27.389      rsp->id = req->id;
  27.390 @@ -477,16 +553,19 @@ void __init_parallax(void)
  27.391  }
  27.392  
  27.393  
  27.394 +
  27.395  int main(int argc, char *argv[])
  27.396  {
  27.397      DPRINTF("parallax: starting.\n"); 
  27.398      __init_blockstore();
  27.399      DPRINTF("parallax: initialized blockstore...\n"); 
  27.400 +	init_block_async();
  27.401 +    DPRINTF("parallax: initialized async blocks...\n"); 
  27.402      __init_vdi();
  27.403      DPRINTF("parallax: initialized vdi registry etc...\n"); 
  27.404      __init_parallax();
  27.405      DPRINTF("parallax: initialized local stuff..\n"); 
  27.406 -    
  27.407 +
  27.408      blktap_register_ctrl_hook("parallax_control", parallax_control);
  27.409      blktap_register_request_hook("parallax_request", parallax_request);
  27.410      DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); 
    28.1 --- a/tools/blktap/radix.c	Thu May 19 21:14:26 2005 +0000
    28.2 +++ b/tools/blktap/radix.c	Fri May 20 14:49:37 2005 +0000
    28.3 @@ -25,18 +25,6 @@
    28.4  #define DEBUG
    28.5  */
    28.6  
    28.7 -/*
    28.8 -#define STAGED
    28.9 -*/
   28.10 -
   28.11 -#define ZERO 0LL
   28.12 -#define ONE 1LL
   28.13 -#define ONEMASK 0xffffffffffffffeLL
   28.14 -
   28.15 -
   28.16 -typedef u64 *radix_tree_node;
   28.17 -
   28.18 -
   28.19  /* Experimental radix cache. */
   28.20  
   28.21  static  pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
   28.22 @@ -276,7 +264,6 @@ radix_tree_node cloneblock(radix_tree_no
   28.23   *
   28.24   *   @return: value on success, zero on error
   28.25   */
   28.26 -#ifndef STAGED
   28.27  
   28.28  u64 lookup(int height, u64 root, u64 key) {
   28.29      radix_tree_node node;
   28.30 @@ -318,92 +305,6 @@ u64 lookup(int height, u64 root, u64 key
   28.31      return ZERO;
   28.32  }
   28.33  
   28.34 -#else /* STAGED */
   28.35 -
   28.36 -
   28.37 -/* non-recursive staged lookup, assume height is 35. */
   28.38 -u64 lookup(int height, u64 root, u64 key) {
   28.39 -    radix_tree_node node;
   28.40 -    u64 mask = ONE;
   28.41 -
   28.42 -printf("lookup!\n");    
   28.43 -    assert(key >> 35 == 0);
   28.44 -
   28.45 -    /* the root block may be smaller to ensure all leaves are full */
   28.46 -    height = 27;
   28.47 -
   28.48 -    /* now carve off equal sized chunks at each step */
   28.49 -    
   28.50 -    /* ROOT: (LEVEL 0) KEYLEN=35*/
   28.51 -    if (getid(root) == ZERO)
   28.52 -        return ZERO;
   28.53 -
   28.54 -    node = (radix_tree_node) readblock(getid(root));
   28.55 -    if (node == NULL)
   28.56 -        return ZERO;
   28.57 -
   28.58 -    root = node[(key >> height) & RADIX_TREE_MAP_MASK];
   28.59 -    mask &= root;
   28.60 -    freeblock(node);
   28.61 -
   28.62 -    if (height == 0)
   28.63 -        return ( root & ONEMASK ) | mask;
   28.64 -
   28.65 -    height -= RADIX_TREE_MAP_SHIFT; /* == 18 */
   28.66 -
   28.67 -    /* LEVEL 1: KEYLEN=26*/
   28.68 -    if (getid(root) == ZERO)
   28.69 -        return ZERO;
   28.70 -
   28.71 -    node = (radix_tree_node) readblock(getid(root));
   28.72 -    if (node == NULL)
   28.73 -        return ZERO;
   28.74 -
   28.75 -    root = node[(key >> height) & RADIX_TREE_MAP_MASK];
   28.76 -    mask &= root;
   28.77 -    freeblock(node);
   28.78 -
   28.79 -    if (height == 0)
   28.80 -        return ( root & ONEMASK ) | mask;
   28.81 -
   28.82 -    height -= RADIX_TREE_MAP_SHIFT; /* == 9 */
   28.83 -    
   28.84 -    /* LEVEL 2: KEYLEN=17*/
   28.85 -    if (getid(root) == ZERO)
   28.86 -        return ZERO;
   28.87 -
   28.88 -    node = (radix_tree_node) readblock(getid(root));
   28.89 -    if (node == NULL)
   28.90 -        return ZERO;
   28.91 -
   28.92 -    root = node[(key >> height) & RADIX_TREE_MAP_MASK];
   28.93 -    mask &= root;
   28.94 -    freeblock(node);
   28.95 -
   28.96 -    if (height == 0)
   28.97 -        return ( root & ONEMASK ) | mask;
   28.98 -
   28.99 -    height -= RADIX_TREE_MAP_SHIFT; /* == 0 */
  28.100 -    
  28.101 -    /* LEVEL 3: KEYLEN=8*/
  28.102 -    if (getid(root) == ZERO)
  28.103 -        return ZERO;
  28.104 -
  28.105 -    node = (radix_tree_node) readblock(getid(root));
  28.106 -    if (node == NULL)
  28.107 -        return ZERO;
  28.108 -
  28.109 -    root = node[(key >> height) & RADIX_TREE_MAP_MASK];
  28.110 -    mask &= root;
  28.111 -    freeblock(node);
  28.112 -
  28.113 -    // if (height == 0)
  28.114 -        return ( root & ONEMASK ) | mask;
  28.115 -
  28.116 -}
  28.117 -
  28.118 -#endif
  28.119 -
  28.120  /*
  28.121   * update: set a radix tree entry, doing copy-on-write as necessary
  28.122   *   @height: height in bits of the radix tree
  28.123 @@ -414,9 +315,6 @@ printf("lookup!\n");
  28.124   *   @returns: (possibly new) root id on success (with LSB=1), 0 on failure
  28.125   */
  28.126  
  28.127 -#ifndef STAGED
  28.128 -
  28.129 -
  28.130  u64 update(int height, u64 root, u64 key, u64 val) {
  28.131      int offset;
  28.132      u64 child;
  28.133 @@ -487,320 +385,6 @@ u64 update(int height, u64 root, u64 key
  28.134      return root;
  28.135  }
  28.136  
  28.137 -
  28.138 -#else /* STAGED */
  28.139 -
  28.140 -/* When update is called, state->next points to the thing to call after 
  28.141 - * update is finished. */
  28.142 -
  28.143 -struct cb_state_st;
  28.144 -
  28.145 -typedef struct {
  28.146 -    /* public stuff */
  28.147 -    u64 val;
  28.148 -    u64 key;
  28.149 -    u64 result;
  28.150 -    
  28.151 -    /* internal state */
  28.152 -    u64 root[4];
  28.153 -    radix_tree_node node[4];
  28.154 -    void (*next)(struct cb_state_st *);
  28.155 -    int err;
  28.156 -} radix_update_t;
  28.157 -
  28.158 -typedef struct cb_state_st{
  28.159 -    void (*next)(struct cb_state_st *); /* Next continuation. */
  28.160 -    union {
  28.161 -        radix_update_t update;
  28.162 -    } radix;
  28.163 -} cb_state_t;
  28.164 -
  28.165 -void s_readblock(cb_state_t *state, u64 id, void **ret)
  28.166 -{
  28.167 -    *ret = readblock(id);
  28.168 -    state->next(state);
  28.169 -}
  28.170 -
  28.171 -void s_allocblock(cb_state_t *state, void *block, u64 *ret)
  28.172 -{
  28.173 -    *ret = allocblock(block);
  28.174 -    state->next(state);
  28.175 -}
  28.176 -        
  28.177 -void s_writeblock(cb_state_t *state, u64 id, void *block, int *ret)
  28.178 -{
  28.179 -    *ret = writeblock(id, block);
  28.180 -    state->next(state);
  28.181 -}
  28.182 -
  28.183 -void cb_done(cb_state_t *state)
  28.184 -{
  28.185 -    printf("*** done ***\n");
  28.186 -}
  28.187 -
  28.188 -/* forward prototypes. */
  28.189 -void up0(cb_state_t *state);
  28.190 -void up1(cb_state_t *state);
  28.191 -void up2(cb_state_t *state);
  28.192 -void up3(cb_state_t *state);
  28.193 -void up4(cb_state_t *state);
  28.194 -void up5(cb_state_t *state);
  28.195 -void up6(cb_state_t *state);
  28.196 -void up7(cb_state_t *state);
  28.197 -void up8(cb_state_t *state);
  28.198 -void up9(cb_state_t *state);
  28.199 -void up10(cb_state_t *state);
  28.200 -void up11(cb_state_t *state);
  28.201 -void up12(cb_state_t *state);
  28.202 -
  28.203 -u64 update(int height, u64 root, u64 key, u64 val)
  28.204 -{
  28.205 -    cb_state_t state;
  28.206 -    radix_update_t *u = &state.radix.update;
  28.207 -    
  28.208 -    u->val = val;
  28.209 -    u->key = key;
  28.210 -    u->root[0] = root;
  28.211 -    u->root[1] = u->root[2] = u->root[3] = ZERO;
  28.212 -    u->node[0] = u->node[1] = u->node[2] = u->node[3] = NULL;
  28.213 -    
  28.214 -    /* take a copy of the higher-scoped next continuation. */
  28.215 -    u->next = state->next;
  28.216 -    
  28.217 -    /* update start state */
  28.218 -    state->next = up0;
  28.219 -    
  28.220 -    for (;;)
  28.221 -    {
  28.222 -        state->next(state);
  28.223 -        if (state->next == NULL) 
  28.224 -            break;
  28.225 -    }
  28.226 -    
  28.227 -    return u->result;
  28.228 -}
  28.229 -
  28.230 -/* c0:*/
  28.231 -void up0(cb_state_t *state) {
  28.232 -    radix_update_t *u = &state->radix.update;
  28.233 -    
  28.234 -    state->next = up1;
  28.235 -    s_readblock(state, getid(u->root[0]), (void **)&(u->node[0]));
  28.236 -}
  28.237 -    
  28.238 -/* c1: */
  28.239 -void up1(cb_state_t *state) {
  28.240 -    radix_update_t *u = &state->radix.update;
  28.241 -    
  28.242 -    u->root[1] = u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK];
  28.243 -    if (u->root[1] == ZERO) {
  28.244 -        u->node[1] = (radix_tree_node) newblock();
  28.245 -        /* goto next continuation (c2)*/ up2(state);return;
  28.246 -    } else {
  28.247 -        state->next = up2;
  28.248 -        s_readblock(state, getid(u->root[1]), (void **)&(u->node[1]));
  28.249 -    }
  28.250 -}
  28.251 -
  28.252 -/* c2: */
  28.253 -void up2(cb_state_t *state) {
  28.254 -    radix_update_t *u = &state->radix.update;
  28.255 -    
  28.256 -    if ((u->root[1] != ZERO) && (!iswritable(u->root[1]))) {
  28.257 -        /* need to clone this node */
  28.258 -        radix_tree_node oldnode = u->node[1];
  28.259 -        u->node[1] = cloneblock(u->node[1]);
  28.260 -        freeblock(oldnode);
  28.261 -        u->root[1] = ZERO;
  28.262 -    }
  28.263 -    u->root[2] = u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK];
  28.264 -    if (u->root[2] == ZERO) {
  28.265 -        u->node[2] = (radix_tree_node) newblock();
  28.266 -        /* goto next continuation (c3)*/ up3(state);return;
  28.267 -    } else {
  28.268 -        state->next = up3;
  28.269 -        s_readblock(state, getid(u->root[2]), (void **)&(u->node[2]));
  28.270 -    }
  28.271 -}
  28.272 -    
  28.273 -/* c3: */
  28.274 -void up3(cb_state_t *state) {
  28.275 -    radix_update_t *u = &state->radix.update;
  28.276 -    
  28.277 -    if ((u->root[2] != ZERO) && (!iswritable(u->root[2]))) {
  28.278 -        /* need to clone this node */
  28.279 -        radix_tree_node oldnode = u->node[2];
  28.280 -        u->node[2] = cloneblock(u->node[2]);
  28.281 -        freeblock(oldnode);
  28.282 -        u->root[2] = ZERO;
  28.283 -    }
  28.284 -    u->root[3] = u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK];
  28.285 -    if (u->root[3] == ZERO) {
  28.286 -        u->node[3] = (radix_tree_node) newblock();
  28.287 -        /* goto next continuation (c4)*/ up4(state);return;
  28.288 -    } else {
  28.289 -        state->next = up4;
  28.290 -        s_readblock(state, getid(u->root[3]), (void **)&(u->node[3]));
  28.291 -    }
  28.292 -}
  28.293 -    
  28.294 -/* c4: */
  28.295 -void up4(cb_state_t *state) {
  28.296 -    radix_update_t *u = &state->radix.update;
  28.297 -    
  28.298 -    if ((u->root[3] != ZERO) && (!iswritable(u->root[3]))) {
  28.299 -        /* need to clone this node */
  28.300 -        radix_tree_node oldnode = u->node[3];
  28.301 -        u->node[3] = cloneblock(u->node[3]);
  28.302 -        freeblock(oldnode);
  28.303 -        u->root[3] = ZERO;
  28.304 -    }
  28.305 -    
  28.306 -    if (u->node[3][u->key & RADIX_TREE_MAP_MASK] == u->val){
  28.307 -        /* no change, so we already owned the child */
  28.308 -        /* goto last continuation (c12) */ up12(state);return;
  28.309 -    }
  28.310 -
  28.311 -    u->node[3][u->key & RADIX_TREE_MAP_MASK] = u->val;
  28.312 -
  28.313 -    /* new/cloned blocks need to be saved */
  28.314 -    if (u->root[3] == ZERO) {
  28.315 -        /* mark this as an owned block */
  28.316 -        state->next = up5;
  28.317 -        s_allocblock(state, u->node[3], &u->root[3]);
  28.318 -        /* goto continuation (c5) */ return;
  28.319 -    } else {
  28.320 -        state->next = up6;
  28.321 -        s_writeblock(state, getid(u->root[3]), u->node[3], &u->err);
  28.322 -        /* goto continuation (c6) */ return;
  28.323 -    }
  28.324 -}
  28.325 -
  28.326 -/* c5: */
  28.327 -void up5(cb_state_t *state) {
  28.328 -    radix_update_t *u = &state->radix.update;
  28.329 -    
  28.330 -    if (u->root[3])
  28.331 -        u->root[3] = writable(u->root[3]);
  28.332 -    /* goto continuation (c6) */ up6(state);return;
  28.333 -}
  28.334 -    
  28.335 -/* c6: */
  28.336 -void up6(cb_state_t *state) {
  28.337 -    radix_update_t *u = &state->radix.update;
  28.338 -    
  28.339 -    if (u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] == u->root[3]){
  28.340 -        /* no change, so we already owned the child */
  28.341 -        /* goto last continuation (c12) */ up12(state);return;
  28.342 -    }
  28.343 -    
  28.344 -    u->node[2][u->key >> 9 & RADIX_TREE_MAP_MASK] = u->root[3];
  28.345 -
  28.346 -    /* new/cloned blocks need to be saved */
  28.347 -    if (u->root[2] == ZERO) {
  28.348 -        /* mark this as an owned block */
  28.349 -        state->next = up7;
  28.350 -        s_allocblock(state, u->node[2], &u->root[2]);
  28.351 -        /* goto continuation (c7) */return;
  28.352 -    } else {
  28.353 -        state->next = up8;
  28.354 -        s_writeblock(state, getid(u->root[2]), u->node[2], &u->err);
  28.355 -        /* goto continuation (c8) */return;
  28.356 -    }
  28.357 -}
  28.358 -
  28.359 -/* c7: */
  28.360 -void up7(cb_state_t *state) {
  28.361 -    radix_update_t *u = &state->radix.update;
  28.362 -    
  28.363 -    if (u->root[2])
  28.364 -        u->root[2] = writable(u->root[2]);
  28.365 -    /* goto continuation (c8) */ up8(state);return;
  28.366 -}
  28.367 -    
  28.368 -/* c8: */
  28.369 -void up8(cb_state_t *state) {
  28.370 -    radix_update_t *u = &state->radix.update;
  28.371 -    
  28.372 -    if (u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] == u->root[2]){
  28.373 -        /* no change, so we already owned the child */
  28.374 -        /* goto last continuation (c12) */ up12(state);return;
  28.375 -    }
  28.376 -    
  28.377 -    u->node[1][u->key >> 18 & RADIX_TREE_MAP_MASK] = u->root[2];
  28.378 -
  28.379 -    /* new/cloned blocks need to be saved */
  28.380 -    if (u->root[1] == ZERO) {
  28.381 -        /* mark this as an owned block */
  28.382 -        state->next = up9;
  28.383 -        s_allocblock(state, u->node[1], &u->root[1]);
  28.384 -        /* goto continuation (c9) */return;
  28.385 -    } else {
  28.386 -        state->next = up10;
  28.387 -        s_writeblock(state, getid(u->root[1]), u->node[1], &u->err);
  28.388 -        /* goto continuation (c10) */return;
  28.389 -    }
  28.390 -}
  28.391 -
  28.392 -/* c9: */
  28.393 -void up9(cb_state_t *state) {
  28.394 -    radix_update_t *u = &state->radix.update;
  28.395 -    
  28.396 -    if (u->root[1])
  28.397 -        u->root[1] = writable(u->root[1]);
  28.398 -    /* goto continuation (c10) */ up10(state);return;
  28.399 -}
  28.400 -    
  28.401 -/* c10: */
  28.402 -void up10(cb_state_t *state) {
  28.403 -    radix_update_t *u = &state->radix.update;
  28.404 -    
  28.405 -    if (u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] == u->root[1]){
  28.406 -        /* no change, so we already owned the child */
  28.407 -        /* goto last continuation (c12) */ up12(state);return;
  28.408 -    }
  28.409 -    
  28.410 -    u->node[0][u->key >> 27 & RADIX_TREE_MAP_MASK] = u->root[1];
  28.411 -
  28.412 -    /* new/cloned blocks need to be saved */
  28.413 -    if (u->root[0] == ZERO) {
  28.414 -        /* mark this as an owned block */
  28.415 -        state->next = up11;
  28.416 -        s_allocblock(state, u->node[0], &u->root[0]);
  28.417 -        /* goto continuation (c11) */ return;
  28.418 -    } else {
  28.419 -        state->next = up10;
  28.420 -        s_writeblock(state, getid(u->root[0]), u->node[0], &u->err);
  28.421 -        /* goto continuation (c12) */ return;
  28.422 -    }
  28.423 -}
  28.424 -
  28.425 -/* c11: */
  28.426 -void up11(cb_state_t *state) {
  28.427 -    radix_update_t *u = &state->radix.update;
  28.428 -    
  28.429 -    if (u->root[0])
  28.430 -        u->root[0] = writable(u->root[0]);
  28.431 -    /* goto continuation (c12) */ up12(state);return;
  28.432 -}
  28.433 -    
  28.434 -/* c12: */
  28.435 -void up12(cb_state_t *state) {
  28.436 -    radix_update_t *u = &state->radix.update;
  28.437 -    
  28.438 -    int i;
  28.439 -    for (i=0;i<4;i++)
  28.440 -        if(u->node[i] != NULL) freeblock(u->node[i]);
  28.441 -    
  28.442 -    u->result = u->root[0];
  28.443 -    state->next = u->next;
  28.444 -    
  28.445 -    state->next(state);return;
  28.446 -}
  28.447 -    
  28.448 -#endif
  28.449 -
  28.450 -
  28.451  /**
  28.452   * snapshot: create a snapshot
  28.453   *   @root: old root node
  28.454 @@ -840,7 +424,6 @@ int collapse(int height, u64 proot, u64 
  28.455      int i, numlinks, ret, total = 0;
  28.456      radix_tree_node pnode, cnode;
  28.457      
  28.458 -//printf("proot: %Ld\n", getid(proot));
  28.459      if (height == 0) {
  28.460          height = -1; /* terminate recursion */
  28.461      } else {        
    29.1 --- a/tools/blktap/radix.h	Thu May 19 21:14:26 2005 +0000
    29.2 +++ b/tools/blktap/radix.h	Fri May 20 14:49:37 2005 +0000
    29.3 @@ -16,6 +16,16 @@
    29.4  #define putid(x) ((x)<<1)
    29.5  #define writable(x) (((x)<<1)|1LL)
    29.6  #define iswritable(x) ((x)&1LL)
    29.7 +#define ZERO 0LL
    29.8 +#define ONE 1LL
    29.9 +#define ONEMASK 0xffffffffffffffeLL
   29.10 +
   29.11 +#define RADIX_TREE_MAP_SHIFT 9
   29.12 +#define RADIX_TREE_MAP_MASK 0x1ff
   29.13 +#define RADIX_TREE_MAP_ENTRIES 512
   29.14 +
   29.15 +typedef u64 *radix_tree_node;
   29.16 +
   29.17  
   29.18  /*
   29.19   * main api
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/tools/blktap/requests-async.c	Fri May 20 14:49:37 2005 +0000
    30.3 @@ -0,0 +1,629 @@
    30.4 +/* read.c
    30.5 + *
    30.6 + * asynchronous read experiment for parallax.
    30.7 + */
    30.8 +
    30.9 +#include <stdio.h>
   30.10 +#include <stdlib.h>
   30.11 +#include <string.h>
   30.12 +#include <assert.h>
   30.13 +#include <pthread.h>
   30.14 +#include "requests-async.h"
   30.15 +#include "vdi.h"
   30.16 +#include "radix.h"
   30.17 +
   30.18 +#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18)
   30.19 +#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9)
   30.20 +#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL))
   30.21 +
   30.22 +
   30.23 +
   30.24 +//#define STANDALONE
   30.25 +
   30.26 +#if 0
   30.27 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   30.28 +#else
   30.29 +#define DPRINTF(_f, _a...) ((void)0)
   30.30 +#endif
   30.31 +
   30.32 +
   30.33 +struct io_req {
   30.34 +    enum { IO_OP_READ, IO_OP_WRITE } op;
   30.35 +    u64        root;
   30.36 +    u64        vaddr;
   30.37 +    int        state;
   30.38 +    io_cb_t    cb;
   30.39 +    void      *param;
   30.40 +    struct radix_lock *lock;
   30.41 +
   30.42 +    /* internal stuff: */
   30.43 +    struct io_ret    retval;/* holds the return while we unlock. */
   30.44 +    char            *block; /* the block to write */
   30.45 +    radix_tree_node  radix[3];
   30.46 +    u64              radix_addr[3];
   30.47 +};
   30.48 +
   30.49 +void clear_w_bits(radix_tree_node node) 
   30.50 +{
   30.51 +	int i;
   30.52 +	for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++)
   30.53 +		node[i] = node[i] & ONEMASK;
   30.54 +	return;
   30.55 +}
   30.56 +
   30.57 +enum states {
   30.58 +    /* both */
   30.59 +    READ_L1,
   30.60 +    READ_L2,
   30.61 +    READ_L3,
   30.62 +
   30.63 +    /* read */
   30.64 +    READ_LOCKED,
   30.65 +    READ_DATA,
   30.66 +    READ_UNLOCKED,
   30.67 +    RETURN_ZERO,
   30.68 +
   30.69 +    /* write */
   30.70 +    WRITE_LOCKED,
   30.71 +    WRITE_DATA,
   30.72 +    WRITE_UNLOCKED,
   30.73 +    
   30.74 +    /* L3 Zero Path */
   30.75 +    ALLOC_DATA_L3z,
   30.76 +    WRITE_L3_L3z,
   30.77 +    
   30.78 +    /* L3 Fault Path */
   30.79 +    ALLOC_DATA_L3f,
   30.80 +    WRITE_L3_L3f,
   30.81 +    
   30.82 +    /* L2 Zero Path */
   30.83 +    ALLOC_DATA_L2z,
   30.84 +    WRITE_L2_L2z,
   30.85 +    ALLOC_L3_L2z,
   30.86 +    WRITE_L2_L3z,
   30.87 +    
   30.88 +    /* L2 Fault Path */
   30.89 +    READ_L3_L2f,
   30.90 +    ALLOC_DATA_L2f,
   30.91 +    WRITE_L2_L2f,
   30.92 +    ALLOC_L3_L2f,
   30.93 +    WRITE_L2_L3f,
   30.94 +
   30.95 +	/* L1 Zero Path */
   30.96 +    ALLOC_DATA_L1z,
   30.97 +    ALLOC_L3_L1z,
   30.98 +    ALLOC_L2_L1z,
   30.99 +    WRITE_L1_L1z,
  30.100 +
  30.101 +	/* L1 Fault Path */
  30.102 +	READ_L2_L1f,
  30.103 +	READ_L3_L1f,
  30.104 +    ALLOC_DATA_L1f,
  30.105 +    ALLOC_L3_L1f,
  30.106 +    ALLOC_L2_L1f,
  30.107 +    WRITE_L1_L1f,
  30.108 +    
  30.109 +};
  30.110 +
  30.111 +enum radix_offsets {
  30.112 +    L1 = 0, 
  30.113 +    L2 = 1,
  30.114 +    L3 = 2
  30.115 +};
  30.116 +
  30.117 +
  30.118 +static void read_cb(struct io_ret ret, void *param);
  30.119 +static void write_cb(struct io_ret ret, void *param);
  30.120 +
  30.121 +
  30.122 +int async_read(vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param)
  30.123 +{
  30.124 +    struct io_req *req;
  30.125 +
  30.126 +    DPRINTF("async_read\n");
  30.127 +
  30.128 +    req = (struct io_req *)malloc(sizeof (struct io_req));
  30.129 +	req->radix[0] = req->radix[1] = req->radix[2] = NULL;
  30.130 +
  30.131 +	if (req == NULL) {perror("req was NULL in async_read"); return(-1); }
  30.132 +	
  30.133 +    req->op    = IO_OP_READ;
  30.134 +    req->root  = vdi->radix_root;
  30.135 +    req->lock  = vdi->radix_lock; 
  30.136 +    req->vaddr = vaddr;
  30.137 +    req->cb    = cb;
  30.138 +    req->param = param;
  30.139 +    req->state = READ_LOCKED;
  30.140 +
  30.141 +	block_rlock(req->lock, L1_IDX(vaddr), read_cb, req);
  30.142 +	
  30.143 +    return 0;
  30.144 +}
  30.145 +
  30.146 +
  30.147 +int   async_write(vdi_t *vdi, u64 vaddr, char *block, 
  30.148 +                  io_cb_t cb, void *param)
  30.149 +{
  30.150 +    struct io_req *req;
  30.151 +
  30.152 +
  30.153 +    req = (struct io_req *)malloc(sizeof (struct io_req));
  30.154 +	req->radix[0] = req->radix[1] = req->radix[2] = NULL;
  30.155 +    //DPRINTF("async_write\n");
  30.156 +    
  30.157 +	if (req == NULL) {perror("req was NULL in async_write"); return(-1); }
  30.158 +
  30.159 +    req->op    = IO_OP_WRITE;
  30.160 +    req->root  = vdi->radix_root;
  30.161 +    req->lock  = vdi->radix_lock; 
  30.162 +    req->vaddr = vaddr;
  30.163 +    req->block = block;
  30.164 +    req->cb    = cb;
  30.165 +    req->param = param;
  30.166 +    req->radix_addr[L1] = getid(req->root); /* for consistency */
  30.167 +    req->state = WRITE_LOCKED;
  30.168 +
  30.169 +	block_wlock(req->lock, L1_IDX(vaddr), write_cb, req);
  30.170 +
  30.171 +
  30.172 +	return 0;
  30.173 +}
  30.174 +
  30.175 +void read_cb(struct io_ret ret, void *param)
  30.176 +{
  30.177 +    struct io_req *req = (struct io_req *)param;
  30.178 +    radix_tree_node node;
  30.179 +    u64 idx;
  30.180 +    char *block;
  30.181 +    void *req_param;
  30.182 +
  30.183 +    DPRINTF("read_cb\n");
  30.184 +    /* get record */
  30.185 +    switch(req->state) {
  30.186 +    	
  30.187 +    case READ_LOCKED: 
  30.188 +    
  30.189 +        DPRINTF("READ_LOCKED\n");
  30.190 +    	req->state = READ_L1;
  30.191 +    	block_read(getid(req->root), read_cb, req); 
  30.192 +    	break;
  30.193 +    	
  30.194 +    case READ_L1: /* block is the radix root */
  30.195 +
  30.196 +        DPRINTF("READ_L1\n");
  30.197 +        block = IO_BLOCK(ret);
  30.198 +        if (block == NULL) goto fail;
  30.199 +        node = (radix_tree_node) block;
  30.200 +        idx  = getid( node[L1_IDX(req->vaddr)] );
  30.201 +        free(block);
  30.202 +        if ( idx == ZERO ) {
  30.203 +        	req->state = RETURN_ZERO;
  30.204 +        	block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  30.205 +        } else {
  30.206 +	        req->state = READ_L2;
  30.207 +	        block_read(idx, read_cb, req);
  30.208 +        }
  30.209 +        break;
  30.210 +
  30.211 +    case READ_L2:
  30.212 +
  30.213 +        DPRINTF("READ_L2\n");
  30.214 +        block = IO_BLOCK(ret);
  30.215 +        if (block == NULL) goto fail;
  30.216 +        node = (radix_tree_node) block;
  30.217 +        idx  = getid( node[L2_IDX(req->vaddr)] );
  30.218 +        free(block);
  30.219 +        if ( idx == ZERO ) {
  30.220 +        	req->state = RETURN_ZERO;
  30.221 +        	block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  30.222 +        } else {
  30.223 +	        req->state = READ_L3;
  30.224 +	        block_read(idx, read_cb, req);
  30.225 +        }
  30.226 +        break;
  30.227 +
  30.228 +    case READ_L3:
  30.229 +    
  30.230 +        DPRINTF("READ_L3\n");
  30.231 +        block = IO_BLOCK(ret);
  30.232 +        if (block == NULL) goto fail;
  30.233 +        node = (radix_tree_node) block;
  30.234 +        idx  = getid( node[L3_IDX(req->vaddr)] );
  30.235 +        free(block);
  30.236 +        if ( idx == ZERO )  {
  30.237 +        	req->state = RETURN_ZERO;
  30.238 +        	block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  30.239 +        } else {
  30.240 +	        req->state = READ_DATA;
  30.241 +	        block_read(idx, read_cb, req);
  30.242 +        }
  30.243 +        break;
  30.244 +
  30.245 +    case READ_DATA:
  30.246 +    
  30.247 +        DPRINTF("READ_DATA\n");
  30.248 +        if (IO_BLOCK(ret) == NULL) goto fail;
  30.249 +        req->retval = ret;
  30.250 +        req->state = READ_UNLOCKED;
  30.251 +        block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  30.252 +        break;
  30.253 +        
  30.254 +    case READ_UNLOCKED:
  30.255 +	{
  30.256 +		struct io_ret r;
  30.257 +		io_cb_t cb;
  30.258 +        DPRINTF("READ_UNLOCKED\n");
  30.259 +        req_param = req->param;
  30.260 +        r         = req->retval;
  30.261 +        cb        = req->cb;
  30.262 +        free(req);
  30.263 +        cb(r, req_param);
  30.264 +        break;
  30.265 +    }
  30.266 +    
  30.267 +    case RETURN_ZERO:
  30.268 +	{
  30.269 +		struct io_ret r;
  30.270 +		io_cb_t cb;
  30.271 +	    DPRINTF("RETURN_ZERO\n");
  30.272 +	    req_param = req->param;
  30.273 +        cb        = req->cb;
  30.274 +	    free(req);
  30.275 +        r.type = IO_BLOCK_T;
  30.276 +        r.u.b = newblock();
  30.277 +	    cb(r, req_param);
  30.278 +	    break;
  30.279 +	}
  30.280 +        
  30.281 +    default:
  30.282 +    	DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
  30.283 +    	goto fail;
  30.284 +    }
  30.285 + 
  30.286 +    return;
  30.287 +
  30.288 + fail:
  30.289 +	{
  30.290 +		struct io_ret r;
  30.291 +		io_cb_t cb;
  30.292 +		DPRINTF("asyn_read had a read error.\n");
  30.293 +        req_param = req->param;
  30.294 +        r         = ret;
  30.295 +        cb        = req->cb;
  30.296 +        free(req);
  30.297 +        cb(r, req_param);
  30.298 +	}
  30.299 +
  30.300 +
  30.301 +}
  30.302 +
  30.303 +void write_cb(struct io_ret r, void *param)
  30.304 +{
  30.305 +    struct io_req *req = (struct io_req *)param;
  30.306 +    radix_tree_node node;
  30.307 +    u64 a, addr;
  30.308 +    void *req_param;
  30.309 +
  30.310 +    //DPRINTF("write_cb\n");
  30.311 +    switch(req->state) {
  30.312 +    	
  30.313 +    case WRITE_LOCKED:
  30.314 +    
  30.315 +        DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr));
  30.316 +    	req->state = READ_L1;
  30.317 +    	block_read(getid(req->root), write_cb, req); 
  30.318 +    	break;
  30.319 +    	
  30.320 +    case READ_L1: /* block is the radix root */
  30.321 +
  30.322 +        DPRINTF("READ_L1\n");
  30.323 +        node = (radix_tree_node) IO_BLOCK(r);
  30.324 +        if (node == NULL) goto fail;
  30.325 +        a    = node[L1_IDX(req->vaddr)];
  30.326 +        addr = getid(a);
  30.327 +
  30.328 +        req->radix_addr[L2] = addr;
  30.329 +        req->radix[L1] = node;
  30.330 +
  30.331 +        if ( addr == ZERO ) {
  30.332 +        	/* L1 empty subtree: */
  30.333 +        	req->state = ALLOC_DATA_L1z;
  30.334 +        	block_alloc( req->block, write_cb, req );
  30.335 +        } else if ( !iswritable(a) ) {
  30.336 +            /* L1 fault: */
  30.337 +            req->state = READ_L2_L1f;
  30.338 +            block_read( addr, write_cb, req );
  30.339 +        } else {
  30.340 +            req->state = READ_L2;
  30.341 +            block_read( addr, write_cb, req );
  30.342 +        }
  30.343 +        break;
  30.344 +    
  30.345 +    case READ_L2:
  30.346 +
  30.347 +        DPRINTF("READ_L2\n");
  30.348 +        node = (radix_tree_node) IO_BLOCK(r);
  30.349 +        if (node == NULL) goto fail;
  30.350 +        a    = node[L2_IDX(req->vaddr)];
  30.351 +        addr = getid(a);
  30.352 +
  30.353 +        req->radix_addr[L3] = addr;
  30.354 +        req->radix[L2] = node;
  30.355 +
  30.356 +        if ( addr == ZERO ) {
  30.357 +        	/* L2 empty subtree: */
  30.358 +            req->state = ALLOC_DATA_L2z;
  30.359 +            block_alloc( req->block, write_cb, req );
  30.360 +        } else if ( !iswritable(a) ) {
  30.361 +            /* L2 fault: */
  30.362 +            req->state = READ_L3_L2f;
  30.363 +            block_read( addr, write_cb, req );
  30.364 +        } else {
  30.365 +            req->state = READ_L3;
  30.366 +            block_read( addr, write_cb, req );
  30.367 +        }
  30.368 +        break;
  30.369 +    
  30.370 +    case READ_L3:
  30.371 +
  30.372 +        DPRINTF("READ_L3\n");
  30.373 +        node = (radix_tree_node) IO_BLOCK(r);
  30.374 +        if (node == NULL) goto fail;
  30.375 +        a    = node[L3_IDX(req->vaddr)];
  30.376 +        addr = getid(a);
  30.377 +
  30.378 +        req->radix[L3] = node;
  30.379 +
  30.380 +        if ( addr == ZERO ) {
  30.381 +            /* L3 fault: */
  30.382 +            req->state = ALLOC_DATA_L3z;
  30.383 +            block_alloc( req->block, write_cb, req );
  30.384 +        } else if ( !iswritable(a) ) {
  30.385 +            /* L3 fault: */
  30.386 +            req->state = ALLOC_DATA_L3f;
  30.387 +            block_alloc( req->block, write_cb, req );
  30.388 +        } else {
  30.389 +            req->state = WRITE_DATA;
  30.390 +            block_write( addr, req->block, write_cb, req );
  30.391 +        }
  30.392 +        break;
  30.393 +    
  30.394 +    /* L3 Zero Path: */
  30.395 +
  30.396 +    case ALLOC_DATA_L3z:
  30.397 +
  30.398 +        DPRINTF("ALLOC_DATA_L3z\n");
  30.399 +        addr = IO_ADDR(r);
  30.400 +        a = writable(addr);
  30.401 +        req->radix[L3][L3_IDX(req->vaddr)] = a;
  30.402 +        req->state = WRITE_L3_L3z;
  30.403 +        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
  30.404 +        break;
  30.405 +    
  30.406 +    /* L3 Fault Path: */
  30.407 +
  30.408 +    case ALLOC_DATA_L3f:
  30.409 +
  30.410 +        DPRINTF("ALLOC_DATA_L3f\n");
  30.411 +        addr = IO_ADDR(r);
  30.412 +        a = writable(addr);
  30.413 +        req->radix[L3][L3_IDX(req->vaddr)] = a;
  30.414 +        req->state = WRITE_L3_L3f;
  30.415 +        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
  30.416 +        break;
  30.417 +
  30.418 +    /* L2 Zero Path: */
  30.419 +        
  30.420 +    case ALLOC_DATA_L2z:
  30.421 +
  30.422 +        DPRINTF("ALLOC_DATA_L2z\n");
  30.423 +        addr = IO_ADDR(r);
  30.424 +        a = writable(addr);
  30.425 +        req->radix[L3] = newblock();
  30.426 +        req->radix[L3][L3_IDX(req->vaddr)] = a;
  30.427 +        req->state = ALLOC_L3_L2z;
  30.428 +        block_alloc( (char*)req->radix[L3], write_cb, req );
  30.429 +        break;
  30.430 +
  30.431 +    case ALLOC_L3_L2z:
  30.432 +
  30.433 +        DPRINTF("ALLOC_L3_L2z\n");
  30.434 +        addr = IO_ADDR(r);
  30.435 +        a = writable(addr);
  30.436 +        req->radix[L2][L2_IDX(req->vaddr)] = a;
  30.437 +        req->state = WRITE_L2_L2z;
  30.438 +        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
  30.439 +        break;
  30.440 +        
  30.441 +    /* L2 Fault Path: */
  30.442 +        
  30.443 +    case READ_L3_L2f:
  30.444 +    
  30.445 +    	DPRINTF("READ_L3_L2f\n");
  30.446 +        node = (radix_tree_node) IO_BLOCK(r);
  30.447 +        clear_w_bits(node);
  30.448 +        if (node == NULL) goto fail;
  30.449 +        a    = node[L2_IDX(req->vaddr)];
  30.450 +        addr = getid(a);
  30.451 +
  30.452 +        req->radix[L3] = node;
  30.453 +		req->state = ALLOC_DATA_L2f;
  30.454 +        block_alloc( req->block, write_cb, req );
  30.455 +        break;
  30.456 +                
  30.457 +    case ALLOC_DATA_L2f:
  30.458 +
  30.459 +        DPRINTF("ALLOC_DATA_L2f\n");
  30.460 +        addr = IO_ADDR(r);
  30.461 +        a = writable(addr);
  30.462 +        req->radix[L3][L3_IDX(req->vaddr)] = a;
  30.463 +        req->state = ALLOC_L3_L2f;
  30.464 +        block_alloc( (char*)req->radix[L3], write_cb, req );
  30.465 +        break;
  30.466 +
  30.467 +    case ALLOC_L3_L2f:
  30.468 +
  30.469 +        DPRINTF("ALLOC_L3_L2f\n");
  30.470 +        addr = IO_ADDR(r);
  30.471 +        a = writable(addr);
  30.472 +        req->radix[L2][L2_IDX(req->vaddr)] = a;
  30.473 +        req->state = WRITE_L2_L2f;
  30.474 +        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
  30.475 +        break;
  30.476 +        
  30.477 +    /* L1 Zero Path: */
  30.478 +    
  30.479 +    case ALLOC_DATA_L1z:
  30.480 +
  30.481 +        DPRINTF("ALLOC_DATA_L1z\n");
  30.482 +        addr = IO_ADDR(r);
  30.483 +        a = writable(addr);
  30.484 +        req->radix[L3] = newblock();
  30.485 +        req->radix[L3][L3_IDX(req->vaddr)] = a;
  30.486 +        req->state = ALLOC_L3_L1z;
  30.487 +        block_alloc( (char*)req->radix[L3], write_cb, req );
  30.488 +        break;
  30.489 +
  30.490 +    case ALLOC_L3_L1z:
  30.491 +
  30.492 +        DPRINTF("ALLOC_L3_L1z\n");
  30.493 +        addr = IO_ADDR(r);
  30.494 +        a = writable(addr);
  30.495 +        req->radix[L2] = newblock();
  30.496 +        req->radix[L2][L2_IDX(req->vaddr)] = a;
  30.497 +        req->state = ALLOC_L2_L1z;
  30.498 +        block_alloc( (char*)req->radix[L2], write_cb, req );
  30.499 +        break;
  30.500 +
  30.501 +    case ALLOC_L2_L1z:
  30.502 +
  30.503 +        DPRINTF("ALLOC_L2_L1z\n");
  30.504 +        addr = IO_ADDR(r);
  30.505 +        a = writable(addr);
  30.506 +        req->radix[L1][L1_IDX(req->vaddr)] = a;
  30.507 +        req->state = WRITE_L1_L1z;
  30.508 +        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
  30.509 +        break;
  30.510 +
  30.511 +    /* L1 Fault Path: */
  30.512 +        
  30.513 +    case READ_L2_L1f:
  30.514 +    
  30.515 +    	DPRINTF("READ_L2_L1f\n");
  30.516 +        node = (radix_tree_node) IO_BLOCK(r);
  30.517 +        clear_w_bits(node);
  30.518 +        if (node == NULL) goto fail;
  30.519 +        a    = node[L2_IDX(req->vaddr)];
  30.520 +        addr = getid(a);
  30.521 +
  30.522 +        req->radix_addr[L3] = addr;
  30.523 +        req->radix[L2] = node;
  30.524 +        
  30.525 +        if (addr == ZERO) {
  30.526 +        	/* nothing below L2, create an empty L3 and alloc data. */
  30.527 +        	/* (So skip READ_L3_L1f.) */
  30.528 +        	req->radix[L3] = newblock();
  30.529 +        	req->state = ALLOC_DATA_L1f;
  30.530 +        	block_alloc( req->block, write_cb, req );
  30.531 +        } else {
  30.532 +			req->state = READ_L3_L1f;
  30.533 +			block_read( addr, write_cb, req );
  30.534 +        }
  30.535 +        break;
  30.536 +        
  30.537 +    case READ_L3_L1f:
  30.538 +    
  30.539 +    	DPRINTF("READ_L3_L1f\n");
  30.540 +        node = (radix_tree_node) IO_BLOCK(r);
  30.541 +        clear_w_bits(node);
  30.542 +        if (node == NULL) goto fail;
  30.543 +        a    = node[L2_IDX(req->vaddr)];
  30.544 +        addr = getid(a);
  30.545 +
  30.546 +        req->radix[L3] = node;
  30.547 +		req->state = ALLOC_DATA_L1f;
  30.548 +        block_alloc( req->block, write_cb, req );
  30.549 +        break;
  30.550 +                
  30.551 +    case ALLOC_DATA_L1f:
  30.552 +
  30.553 +        DPRINTF("ALLOC_DATA_L1f\n");
  30.554 +        addr = IO_ADDR(r);
  30.555 +        a = writable(addr);
  30.556 +        req->radix[L3][L3_IDX(req->vaddr)] = a;
  30.557 +        req->state = ALLOC_L3_L1f;
  30.558 +        block_alloc( (char*)req->radix[L3], write_cb, req );
  30.559 +        break;
  30.560 +
  30.561 +    case ALLOC_L3_L1f:
  30.562 +
  30.563 +        DPRINTF("ALLOC_L3_L1f\n");
  30.564 +        addr = IO_ADDR(r);
  30.565 +        a = writable(addr);
  30.566 +        req->radix[L2][L2_IDX(req->vaddr)] = a;
  30.567 +        req->state = ALLOC_L2_L1f;
  30.568 +        block_alloc( (char*)req->radix[L2], write_cb, req );
  30.569 +        break;
  30.570 +
  30.571 +    case ALLOC_L2_L1f:
  30.572 +
  30.573 +        DPRINTF("ALLOC_L2_L1f\n");
  30.574 +        addr = IO_ADDR(r);
  30.575 +        a = writable(addr);
  30.576 +        req->radix[L1][L1_IDX(req->vaddr)] = a;
  30.577 +        req->state = WRITE_L1_L1f;
  30.578 +        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
  30.579 +        break;
  30.580 +
  30.581 +    case WRITE_DATA:
  30.582 +    case WRITE_L3_L3z:
  30.583 +    case WRITE_L3_L3f:
  30.584 +    case WRITE_L2_L2z:
  30.585 +    case WRITE_L2_L2f:
  30.586 +    case WRITE_L1_L1z:
  30.587 +    case WRITE_L1_L1f:
  30.588 +    {
  30.589 +    	int i;
  30.590 +        DPRINTF("DONE\n");
  30.591 +        /* free any saved node vals. */
  30.592 +        for (i=0; i<3; i++)
  30.593 +        	if (req->radix[i] != 0) free(req->radix[i]);
  30.594 +        req->retval = r;
  30.595 +        req->state = WRITE_UNLOCKED;
  30.596 +        block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req);
  30.597 +        break;
  30.598 +    }
  30.599 +    case WRITE_UNLOCKED:
  30.600 +    {
  30.601 +		struct io_ret r;
  30.602 +		io_cb_t cb;
  30.603 +        DPRINTF("WRITE_UNLOCKED!\n");
  30.604 +        req_param = req->param;
  30.605 +        r         = req->retval;
  30.606 +        cb        = req->cb;
  30.607 +	    free(req);
  30.608 +        cb(r, req_param);
  30.609 +        break;
  30.610 +    }
  30.611 +        
  30.612 +    default:
  30.613 +    	DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
  30.614 +    	goto fail;
  30.615 +    }
  30.616 +    
  30.617 +    return;
  30.618 +    
  30.619 + fail:
  30.620 +	{
  30.621 +		struct io_ret r;
  30.622 +		io_cb_t cb;
  30.623 +		DPRINTF("asyn_write had a read error mid-way.\n");
  30.624 +        req_param = req->param;
  30.625 +        cb        = req->cb;
  30.626 +        r.type = IO_INT_T;
  30.627 +        r.u.i  = -1;
  30.628 +        free(req);
  30.629 +        cb(r, req_param);
  30.630 +	}
  30.631 +}
  30.632 +
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/tools/blktap/requests-async.h	Fri May 20 14:49:37 2005 +0000
    31.3 @@ -0,0 +1,19 @@
    31.4 +#ifndef _REQUESTSASYNC_H_
    31.5 +#define _REQUESTSASYNC_H_
    31.6 +
    31.7 +#include "block-async.h"
    31.8 +#include "blockstore.h" /* for newblock etc. */
    31.9 +
   31.10 +/*
   31.11 +#define BLOCK_SIZE 4096
   31.12 +#define ZERO 0ULL
   31.13 +#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU)
   31.14 +#define iswritable(x) (((x) & 1LLU) != 0)
   31.15 +#define writable(x) (((x) << 1) | 1LLU)
   31.16 +#define readonly(x) ((u64)((x) << 1))
   31.17 +*/
   31.18 +
   31.19 +int async_read (vdi_t *vdi, u64 vaddr, io_cb_t cb, void *param);
   31.20 +int async_write(vdi_t *vdi, u64 vaddr, char *block, io_cb_t cb, void *param);
   31.21 +             
   31.22 +#endif //_REQUESTSASYNC_H_
    32.1 --- a/tools/blktap/vdi.c	Thu May 19 21:14:26 2005 +0000
    32.2 +++ b/tools/blktap/vdi.c	Fri May 20 14:49:37 2005 +0000
    32.3 @@ -11,14 +11,16 @@
    32.4  #include <fcntl.h>
    32.5  #include <string.h>
    32.6  #include <sys/time.h>
    32.7 +#include <pthread.h>
    32.8  #include "blockstore.h"
    32.9 +#include "block-async.h"
   32.10  #include "radix.h"
   32.11  #include "vdi.h"
   32.12                      
   32.13  #define VDI_REG_BLOCK   2LL
   32.14  #define VDI_RADIX_ROOT  writable(3)
   32.15                                                              
   32.16 -#if 1
   32.17 +#if 0
   32.18  #define DPRINTF(_f, _a...) printf ( _f , ## _a )
   32.19  #else
   32.20  #define DPRINTF(_f, _a...) ((void)0)
   32.21 @@ -66,6 +68,7 @@ vdi_registry_t *get_vdi_registry(void)
   32.22      return vdi_reg;
   32.23  }
   32.24  
   32.25 +
   32.26  vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
   32.27  {
   32.28      int ret;
   32.29 @@ -106,12 +109,22 @@ vdi_t *vdi_create(snap_id_t *parent_snap
   32.30      vdi->id    = vdi_reg->nr_vdis++;
   32.31      strncpy(vdi->name, name, VDI_NAME_SZ);
   32.32      vdi->name[VDI_NAME_SZ] = '\0';
   32.33 +    vdi->radix_lock = NULL; /* for tidiness */
   32.34      writeblock(vdi->block, (void *)vdi);
   32.35      
   32.36      update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
   32.37      writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
   32.38      freeblock(vdi_reg);
   32.39      
   32.40 +    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
   32.41 +    if (vdi->radix_lock == NULL) 
   32.42 +    {
   32.43 +    	perror("couldn't malloc radix_lock for new vdi!");
   32.44 +    	freeblock(vdi);
   32.45 +    	return NULL;
   32.46 +    }
   32.47 +    radix_lock_init(vdi->radix_lock);
   32.48 +    
   32.49      return vdi;
   32.50  }
   32.51  
   32.52 @@ -126,6 +139,16 @@ vdi_t *vdi_get(u64 vdi_id)
   32.53          return NULL;
   32.54      
   32.55      vdi = (vdi_t *)readblock(vdi_blk);
   32.56 +    
   32.57 +    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
   32.58 +    if (vdi->radix_lock == NULL) 
   32.59 +    {
   32.60 +    	perror("couldn't malloc radix_lock for new vdi!");
   32.61 +    	freeblock(vdi);
   32.62 +    	return NULL;
   32.63 +    }
   32.64 +    radix_lock_init(vdi->radix_lock);
   32.65 +    
   32.66      return vdi;
   32.67  }
   32.68  
    33.1 --- a/tools/blktap/vdi.h	Thu May 19 21:14:26 2005 +0000
    33.2 +++ b/tools/blktap/vdi.h	Fri May 20 14:49:37 2005 +0000
    33.3 @@ -1,3 +1,5 @@
    33.4 +#ifndef _VDI_H_
    33.5 +#define _VDI_H_
    33.6  /**************************************************************************
    33.7   * 
    33.8   * vdi.h
    33.9 @@ -12,11 +14,12 @@
   33.10  #include "blktaplib.h"
   33.11  #include "snaplog.h"
   33.12  
   33.13 -#define VDI_HEIGHT     35
   33.14 -#define VDI_REG_HEIGHT 35 /* why not? */
   33.15 +#define VDI_HEIGHT     27 /* Note that these are now hard-coded */
   33.16 +#define VDI_REG_HEIGHT 27 /* in the async lookup code           */
   33.17  
   33.18  #define VDI_NAME_SZ 256
   33.19  
   33.20 +
   33.21  typedef struct vdi {
   33.22      u64         id;               /* unique vdi id -- used by the registry   */
   33.23      u64         block;            /* block where this vdi lives (also unique)*/
   33.24 @@ -24,6 +27,7 @@ typedef struct vdi {
   33.25      snap_id_t   snap;             /* next snapshot slot for this VDI         */
   33.26      struct vdi *next;             /* used to hash-chain in blkif.            */
   33.27      blkif_vdev_t vdevice;         /* currently mounted as...                 */
   33.28 +    struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs   */ 
   33.29      char        name[VDI_NAME_SZ];/* human readable vdi name                 */
   33.30  } vdi_t;
   33.31  
   33.32 @@ -46,3 +50,5 @@ void vdi_snapshot(vdi_t *vdi);
   33.33  
   33.34  
   33.35  #endif /* __VDI_H__ */
   33.36 +
   33.37 +#endif //_VDI_H_