ia64/xen-unstable

changeset 3738:d633a3d0f36c

bitkeeper revision 1.1159.1.551 (42090342LHDFQZTluOIKtDxiXyfkHA)

Initial checkin of blktap user-land tools. These are fairly experimental,
but a few people have asked to use them. This checkin also includes
Christian's gnbd client library code.

Signed-off-by: andrew.warfield@cl.cam.ac.uk
author akw27@labyrinth.cl.cam.ac.uk
date Tue Feb 08 18:21:54 2005 +0000 (2005-02-08)
parents b8f9a4e1627b
children e2b4ca470b91 2fcf1b2bcbcf
files .rootkeys tools/blktap/Makefile tools/blktap/README tools/blktap/blkaio.c tools/blktap/blkaiolib.c tools/blktap/blkaiolib.h tools/blktap/blkcow.c tools/blktap/blkcowgnbd.c tools/blktap/blkcowimg.c tools/blktap/blkcowlib.c tools/blktap/blkcowlib.h tools/blktap/blkdump.c tools/blktap/blkgnbd.c tools/blktap/blkgnbdlib.c tools/blktap/blkgnbdlib.h tools/blktap/blkimg.c tools/blktap/blkimglib.c tools/blktap/blkimglib.h tools/blktap/blkint.h tools/blktap/blktaplib.c tools/blktap/blktaplib.h tools/blktap/libgnbd/Makefile tools/blktap/libgnbd/gnbdtest.c tools/blktap/libgnbd/libgnbd.c tools/blktap/libgnbd/libgnbd.h tools/python/xen/xend/server/blkif.py xen/include/public/io/blkif.h
line diff
     1.1 --- a/.rootkeys	Tue Feb 08 18:03:51 2005 +0000
     1.2 +++ b/.rootkeys	Tue Feb 08 18:21:54 2005 +0000
     1.3 @@ -315,6 +315,30 @@ 413cb3b53nyOv1OIeDSsCXhBFDXvJA netbsd-2.
     1.4  413aa1d0oNP8HXLvfPuMe6cSroUfSA patches/linux-2.6.9/agpgart.patch
     1.5  3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile
     1.6  40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Rules.mk
     1.7 +4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile
     1.8 +4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README
     1.9 +4209033eX_Xw94wHaOCtnU9nOAtSJA tools/blktap/blkaio.c
    1.10 +4209033egwf6LDxM2hbaqi9rRdZy4A tools/blktap/blkaiolib.c
    1.11 +4209033f9yELLK85Ipo2oKjr3ickgQ tools/blktap/blkaiolib.h
    1.12 +4209033fL9LcSI6LXrIp5O4axbUBLg tools/blktap/blkcow.c
    1.13 +4209033fUDlFGZreIyZHdP7h7yfvuQ tools/blktap/blkcowgnbd.c
    1.14 +4209033fCgZzLeMOwNBFmsp99x58ZQ tools/blktap/blkcowimg.c
    1.15 +4209033frfXH6oOi9AvRz08PPAndNA tools/blktap/blkcowlib.c
    1.16 +4209033fhFd_y2go9HgCF395A35xJg tools/blktap/blkcowlib.h
    1.17 +4209033fHgtGpb_K16_xC9CpkjNZLw tools/blktap/blkdump.c
    1.18 +4209033fm61CZG1RyKDW75V-eTZ9fg tools/blktap/blkgnbd.c
    1.19 +4209033fVfa-R6MFgGcmsQHTDna4PA tools/blktap/blkgnbdlib.c
    1.20 +4209033fIgDQbaHwHStHhPEDTtbqsA tools/blktap/blkgnbdlib.h
    1.21 +4209033figp5JRsKsXY8rw4keRumkg tools/blktap/blkimg.c
    1.22 +42090340V-8HKGlr00SyJGsE5jXC3A tools/blktap/blkimglib.c
    1.23 +42090340c7pQbh0Km8zLcEqPd_3zIg tools/blktap/blkimglib.h
    1.24 +42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h
    1.25 +42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c
    1.26 +42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
    1.27 +42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile
    1.28 +42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c
    1.29 +42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
    1.30 +42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h
    1.31  4124b307nRyK3dhn1hAsvrY76NuV3g tools/check/Makefile
    1.32  4124b307vHLUWbfpemVefmaWDcdfag tools/check/README
    1.33  4124b307jt7T3CHysgl9LijNHSe1tA tools/check/check_brctl
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/tools/blktap/Makefile	Tue Feb 08 18:21:54 2005 +0000
     2.3 @@ -0,0 +1,100 @@
     2.4 +MAJOR    = 2.0
     2.5 +MINOR    = 0
     2.6 +SONAME   = libblktap.so.$(MAJOR)
     2.7 +
     2.8 +CC       = gcc
     2.9 +
    2.10 +XEN_ROOT = ../..
    2.11 +include $(XEN_ROOT)/tools/Rules.mk
    2.12 +
    2.13 +INCLUDES += 
    2.14 +
    2.15 +SRCS     :=
    2.16 +SRCS     += blktaplib.c
    2.17 +
    2.18 +CFLAGS   += -Wall
    2.19 +CFLAGS   += -Werror
    2.20 +CFLAGS   += -Wno-unused
    2.21 +#CFLAGS   += -O3
    2.22 +CFLAGS   += -g3
    2.23 +CFLAGS   += -fno-strict-aliasing
    2.24 +CFLAGS   += -I $(XEN_LIBXC)
    2.25 +CFLAGS   += -I $(XEN_LIBXUTIL)
    2.26 +CFLAGS   += $(INCLUDES) -I.
    2.27 +CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
    2.28 +# Get gcc to generate the dependencies for us.
    2.29 +CFLAGS   += -Wp,-MD,.$(@F).d
    2.30 +DEPS     = .*.d
    2.31 +
    2.32 +OBJS     = $(patsubst %.c,%.o,$(SRCS))
    2.33 +
    2.34 +LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
    2.35 +
    2.36 +all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio
    2.37 +	$(MAKE) $(LIB)
    2.38 +
    2.39 +LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
    2.40 +mk-symlinks:
    2.41 +	[ -e xen/linux ] || mkdir -p xen/linux
    2.42 +	[ -e xen/io ]    || mkdir -p xen/io
    2.43 +	( cd xen >/dev/null ; \
    2.44 +	  ln -sf ../$(XEN_ROOT)/xen/include/public/*.h . )
    2.45 +	( cd xen/io >/dev/null ; \
    2.46 +	   ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . )
    2.47 +	( cd xen/linux >/dev/null ; \
    2.48 +	  ln -sf ../../$(LINUX_ROOT)/include/asm-xen/linux-public/*.h . )
    2.49 +
    2.50 +install: all
    2.51 +	mkdir -p $(prefix)/usr/lib
    2.52 +	mkdir -p $(prefix)/usr/include
    2.53 +	install -m0755 $(LIB) $(prefix)/usr/lib
    2.54 +	ln -sf libblktap.so.$(MAJOR).$(MINOR) \
    2.55 +                $(prefix)/usr/lib/libblktap.so.$(MAJOR)
    2.56 +	ln -sf libblktap.so.$(MAJOR) $(prefix)/usr/lib/libblktap.so
    2.57 +	install -m0644 blktaplib.h $(prefix)/usr/include
    2.58 +
    2.59 +clean:
    2.60 +	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio
    2.61 +
    2.62 +rpm: all
    2.63 +	rm -rf staging
    2.64 +	mkdir staging
    2.65 +	mkdir staging/i386
    2.66 +	rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
    2.67 +		--define "_rpmdir$$PWD/staging" -bb rpm.spec
    2.68 +	mv staging/i386/*.rpm .
    2.69 +	rm -rf staging
    2.70 +
    2.71 +libblktap.so:
    2.72 +	ln -sf libblktap.so.$(MAJOR) $@
    2.73 +libblktap.so.$(MAJOR):
    2.74 +	ln -sf libblktap.so.$(MAJOR).$(MINOR) $@
    2.75 +libblktap.so.$(MAJOR).$(MINOR): $(OBJS)
    2.76 +	$(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ -L../libxutil -lxutil -lz
    2.77 +
    2.78 +blkdump: $(LIB)
    2.79 +	$(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkdump.c
    2.80 +
    2.81 +blkcowimg: $(LIB) blkcowimg.c blkcowlib.c blkimglib.c 
    2.82 +	$(CC) $(CFLAGS) -o blkcowimg -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcowimg.c blkimglib.c blkcowlib.c
    2.83 +
    2.84 +blkcow: $(LIB) blkcow.c blkcowlib.c
    2.85 +	$(CC) $(CFLAGS) -o blkcow -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcow.c blkcowlib.c
    2.86 +
    2.87 +blkimg: $(LIB) blkimg.c blkimglib.c
    2.88 +	$(CC) $(CFLAGS) -o blkimg  -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkimg.c blkimglib.c
    2.89 +
    2.90 +blkgnbd: $(LIB) blkgnbd.c blkgnbdlib.c
    2.91 +	$(CC) $(CFLAGS) -o blkgnbd -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkgnbd.c blkgnbdlib.c libgnbd/libgnbd.a
    2.92 +
    2.93 +blkcowgnbd: $(LIB) blkgnbd.c blkcowlib.c blkgnbdlib.c
    2.94 +	$(CC) $(CFLAGS) -o blkcowgnbd -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkcowgnbd.c blkgnbdlib.c blkcowlib.c libgnbd/libgnbd.a
    2.95 +
    2.96 +blkaio: $(LIB) blkaio.c blkaiolib.c
    2.97 +	$(CC) $(CFLAGS) -o blkaio -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkaio.c blkaiolib.c -laio -lpthread
    2.98 +
    2.99 +.PHONY: TAGS clean install mk-symlinks rpm
   2.100 +TAGS:
   2.101 +	etags -t $(SRCS) *.h
   2.102 +
   2.103 +-include $(DEPS)
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/tools/blktap/README	Tue Feb 08 18:21:54 2005 +0000
     3.3 @@ -0,0 +1,149 @@
     3.4 +Block Tap User-level Interfaces
     3.5 +Andrew Warfield
     3.6 +andrew.warfield@cl.cam.ac.uk
     3.7 +February 8, 2005
     3.8 +
     3.9 +NOTE #1: The blktap is _experimental_ code.  It works for me.  Your
    3.10 +mileage may vary.  Don't use it for anything important.  Please. ;)
    3.11 +
    3.12 +NOTE #2: All of the interfaces here are likely to change.  This is all
    3.13 +early code, and I am checking it in because others want to play with
    3.14 +it.  If you use it for anything, please let me know!
    3.15 +
    3.16 +Overview:
    3.17 +---------
    3.18 +
    3.19 +This directory contains a library and set of example applications for
    3.20 +the block tap device.  The block tap hooks into the split block device
    3.21 +interfaces above Xen allowing them to be extended.  This extension can
    3.22 +be done in userspace with the help of a library.
    3.23 +
    3.24 +The tap can be installed either as an interposition domain in between
    3.25 +a frontend and backend driver pair, or as a terminating backend, in
    3.26 +which case it is responsible for serving all requests itself.
    3.27 +
    3.28 +There are two reasons that you might want to use the tap,
    3.29 +corresponding to these configurations:
    3.30 +
    3.31 + 1. To examine or modify a stream of block requests while they are
    3.32 +    in-flight (e.g. to encrypt data, or add data-driven watchpoints)
    3.33 +
    3.34 + 2. To prototype a new backend driver, serving requests from the tap
    3.35 +    rather than passing them along to the XenLinux blkback driver.
    3.36 +    (e.g. to forward block requests to a remote host)
    3.37 +
    3.38 +
    3.39 +Interface:
    3.40 +----------
    3.41 +
    3.42 +At the moment, the tap interface is similar in spirit to that of the
    3.43 +Linux netfilter.  Requests are messages from a client (frontend)
    3.44 +domain to a disk (backend) domain.  Responses are messages travelling
    3.45 +back, acknowledging the completion of a request.  the library allows
    3.46 +chains of functions to be attached to these events.  In addition,
    3.47 +hooks may be attached to handle control messages, which signify things
    3.48 +like connections from new domains.
    3.49 +
    3.50 +At present the control messages especially expose a lot of the
    3.51 +underlying driver interfaces.  This may change in the future in order
    3.52 +to simplify writing hooks.
    3.53 +
    3.54 +Here are the public interfaces:
    3.55 +
    3.56 +These allow hook functions to be chained:
    3.57 +
    3.58 + void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
    3.59 + void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
    3.60 + void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
    3.61 +
    3.62 +This allows a response to be injected, in the case where a request has
    3.63 +been removed using BLKTAP_STOLEN.
    3.64 +
    3.65 + void blktap_inject_response(blkif_response_t *);
    3.66 +
    3.67 +These let you add file descriptors and handlers to the main poll loop:
    3.68 +
    3.69 + int  blktap_attach_poll(int fd, short events, int (*func)(int));
    3.70 + void blktap_detach_poll(int fd);
    3.71 +
    3.72 +This starts the main poll loop:
    3.73 +
    3.74 + int  blktap_listen(void);
    3.75 +
    3.76 +Example:
    3.77 +--------
    3.78 +
    3.79 +blkimage.c uses an image on the local file system to serve requests to
    3.80 +a domain.  Here's what it looks like:
    3.81 +
    3.82 +---[blkimg.c]---
    3.83 +
    3.84 +/* blkimg.c
    3.85 + *
    3.86 + * file-backed disk.
    3.87 + */
    3.88 +
    3.89 +#include "blktaplib.h"
    3.90 +#include "blkimglib.h"
    3.91 +
    3.92 +
    3.93 +int main(int argc, char *argv[])
    3.94 +{
    3.95 +    image_init();
    3.96 +    
    3.97 +    blktap_register_ctrl_hook("image_control", image_control);
    3.98 +    blktap_register_request_hook("image_request", image_request);
    3.99 +    blktap_listen();
   3.100 +    
   3.101 +    return 0;
   3.102 +}
   3.103 +
   3.104 +----------------
   3.105 +
   3.106 +All of the real work is in blkimglib.c, but this illustrates the
   3.107 +actual tap interface well enough.  image_control() will be called with
   3.108 +all control messages.  image_request() handles requests.  As it reads
   3.109 +from an on-disk image file, no requests are ever passed on to a
   3.110 +backend, and so there will be no responses to process -- so there is
   3.111 +nothing registered as a response hook.
   3.112 +
   3.113 +Other examples:
   3.114 +---------------
   3.115 +
   3.116 +Here is a list of other examples in the directory:
   3.117 +
   3.118 +Things that terminate a block request stream:
   3.119 +
   3.120 +  blkimg    - Use a image file/device to serve requests
   3.121 +  blkgnbd   - Use a remote gnbd server to serve requests
   3.122 +  blkaio    - Use libaio... (DOES NOT WORK)
   3.123 +  
   3.124 +Things that don't:
   3.125 +
   3.126 +  blkdump   - Print in-flight requests.
   3.127 +  blkcow    - Really inefficient copy-on-write disks using libdb to store
   3.128 +              writes.
   3.129 +
   3.130 +There are examples of plugging these things together, for instance
   3.131 +blkcowgnbd is a read-only gnbd device with copy-on-write to a local
   3.132 +file.
   3.133 +
   3.134 +TODO:
   3.135 +-----
   3.136 +
   3.137 +- Make session tracking work.  At the moment these generally just handle a 
   3.138 +  single front-end client at a time.
   3.139 +
   3.140 +- Integrate with Xend.  Need to cleanly pass a image identifier in the connect
   3.141 +  message.
   3.142 +
   3.143 +- Make an asynchronous file-io terminator.  The libaio attempt is
   3.144 +  tragically stalled because mapped foreign pages make pfn_valid fail
   3.145 +  (they are VM_IO), and so cannot be passed to aio as targets.  A
   3.146 +  better solution may be to tear the disk interfaces out of the real
   3.147 +  backend and expose them somehow.
   3.148 +
   3.149 +- Make CoW suck less.
   3.150 +
   3.151 +- Do something more along the lines of dynamic linking for the
   3.152 +  plugins, so thatthey don't all need a new main().
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/tools/blktap/blkaio.c	Tue Feb 08 18:21:54 2005 +0000
     4.3 @@ -0,0 +1,19 @@
     4.4 +/* blkaio.c
     4.5 + *
     4.6 + * libaio-backed disk.
     4.7 + */
     4.8 +
     4.9 +#include "blktaplib.h"
    4.10 +#include "blkaiolib.h"
    4.11 +
    4.12 +
    4.13 +int main(int argc, char *argv[])
    4.14 +{
    4.15 +    aio_init();
    4.16 +    
    4.17 +    blktap_register_ctrl_hook("aio_control", aio_control);
    4.18 +    blktap_register_request_hook("aio_request", aio_request);
    4.19 +    blktap_listen();
    4.20 +    
    4.21 +    return 0;
    4.22 +}
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/tools/blktap/blkaiolib.c	Tue Feb 08 18:21:54 2005 +0000
     5.3 @@ -0,0 +1,489 @@
     5.4 +/* blkaiolib.c
     5.5 + *
     5.6 + * file/device image-backed block device -- using linux libaio.
     5.7 + * 
     5.8 + * (c) 2004 Andrew Warfield.
     5.9 + *
    5.10 + * Xend has been modified to use an amorfs:[fsid] disk tag.
    5.11 + * This will show up as device type (maj:240,min:0) = 61440.
    5.12 + *
    5.13 + * The fsid is placed in the sec_start field of the disk extent.
    5.14 + *
    5.15 + * NOTE: This doesn't work.  Grrr.
    5.16 + */
    5.17 +
    5.18 +#define _GNU_SOURCE
    5.19 +#define __USE_LARGEFILE64
    5.20 +
    5.21 +#include <stdio.h>
    5.22 +#include <stdlib.h>
    5.23 +#include <fcntl.h>
    5.24 +#include <string.h>
    5.25 +#include <db.h>       
    5.26 +#include <sys/stat.h>
    5.27 +#include <sys/types.h>
    5.28 +#include <sys/poll.h>
    5.29 +#include <unistd.h>
    5.30 +#include <errno.h>
    5.31 +#include <libaio.h>
    5.32 +#include <pthread.h>
    5.33 +#include <time.h>
    5.34 +#include "blktaplib.h"
    5.35 +
    5.36 +//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
    5.37 +#define TMP_IMAGE_FILE_NAME "fc3.image"
    5.38 +
    5.39 +#define MAX_DOMS              1024
    5.40 +#define MAX_IMGNAME_LEN        255
    5.41 +#define AMORFS_DEV           61440
    5.42 +#define MAX_REQUESTS            64 /* must be synced with the blkif drivers. */
    5.43 +#define MAX_SEGMENTS_PER_REQ    11
    5.44 +#define SECTOR_SHIFT             9
    5.45 +#define MAX_AIO_REQS   (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
    5.46 +                                                                                
    5.47 +#if 1
    5.48 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
    5.49 +#else
    5.50 +#define DPRINTF(_f, _a...) ((void)0)
    5.51 +#endif
    5.52 +           
    5.53 +#if 1                                                                        
    5.54 +#define ASSERT(_p) \
    5.55 +    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
    5.56 +    __LINE__, __FILE__); *(int*)0=0; }
    5.57 +#else
    5.58 +#define ASSERT(_p) ((void)0)
    5.59 +#endif                                                                     
    5.60 +
    5.61 +char dbg_page[4096];
    5.62 +
    5.63 +typedef struct {
    5.64 +    /* These need to turn into an array/rbtree for multi-disk support. */
    5.65 +    int  fd;
    5.66 +    u64  fsid;
    5.67 +    char imgname[MAX_IMGNAME_LEN];
    5.68 +    blkif_vdev_t   vdevice;
    5.69 +} image_t;
    5.70 +
    5.71 +/* Note on pending_reqs: I assume all reqs are queued before they start to 
    5.72 + * get filled.  so count of 0 is an unused record.
    5.73 + */
    5.74 +typedef struct {
    5.75 +    blkif_request_t  req;
    5.76 +    int              count;
    5.77 +} pending_req_t;
    5.78 +
    5.79 +static pending_req_t    pending_list[MAX_REQUESTS];
    5.80 +image_t                *images[MAX_DOMS];
    5.81 +
    5.82 +static io_context_t  ctx;
    5.83 +static struct iocb  *iocb_free[MAX_AIO_REQS];
    5.84 +static int           iocb_free_count;
    5.85 +
    5.86 +/* ---[ Notification mecahnism ]--------------------------------------- */
    5.87 +
    5.88 +enum { 
    5.89 +    READ   = 0,
    5.90 +    WRITE  = 1
    5.91 +};
    5.92 +
    5.93 +static int aio_notify[2];
    5.94 +static volatile int aio_listening = 0;
    5.95 +
    5.96 +static struct io_event aio_events[MAX_AIO_REQS];
    5.97 +static int             aio_event_count = 0;
    5.98 +
    5.99 +/* this is commented out in libaio.h for some reason. */
   5.100 +extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
   5.101 +
   5.102 +static void *notifier_thread(void *arg)
   5.103 +{
   5.104 +    int ret; 
   5.105 +    int msg = 0x00feeb00;
   5.106 +    
   5.107 +    printf("Notifier thread started.\n");
   5.108 +    for (;;) {
   5.109 +        //if ((aio_listening) && ((ret = io_queue_wait(ctx, 0)) == 0)) {
   5.110 +        if ((aio_listening) && 
   5.111 +           ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0)) {
   5.112 +            aio_event_count = ret;
   5.113 +            printf("[Notifying! (%d)]\n", aio_event_count);
   5.114 +            aio_listening = 0;
   5.115 +            write(aio_notify[WRITE], &msg, sizeof(msg));
   5.116 +            fsync(aio_notify[WRITE]);
   5.117 +        } else {
   5.118 +            if (aio_listening)
   5.119 +                printf("[io_queue_wait error! %d]\n", errno);
   5.120 +            usleep(1000); /* Not ready to read. */
   5.121 +        }
   5.122 +    }
   5.123 +}
   5.124 +
   5.125 +/* -------------------------------------------------------------------- */
   5.126 +
   5.127 +int aio_control(control_msg_t *msg)
   5.128 +{
   5.129 +    domid_t  domid;
   5.130 +    DB      *db;
   5.131 +    int      ret;
   5.132 +    
   5.133 +    if (msg->type != CMSG_BLKIF_BE) 
   5.134 +    {
   5.135 +        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
   5.136 +        return 0;
   5.137 +    }
   5.138 +    
   5.139 +    switch(msg->subtype)
   5.140 +    {
   5.141 +    case CMSG_BLKIF_BE_CREATE:
   5.142 +        if ( msg->length != sizeof(blkif_be_create_t) )
   5.143 +            goto parse_error;
   5.144 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
   5.145 +                ((blkif_be_create_t *)msg->msg)->domid,
   5.146 +                ((blkif_be_create_t *)msg->msg)->blkif_handle);
   5.147 +        domid = ((blkif_be_create_t *)msg->msg)->domid;
   5.148 +        if (images[domid] != NULL) {
   5.149 +            printf("attempt to connect from an existing dom!\n");
   5.150 +            return 0;
   5.151 +        }
   5.152 +        
   5.153 +        images[domid] = (image_t *)malloc(sizeof(image_t));
   5.154 +        if (images[domid] == NULL) {
   5.155 +            printf("error allocating image record.\n");
   5.156 +            return 0;
   5.157 +        }
   5.158 +        
   5.159 +        images[domid]->fd  = -1;
   5.160 +        images[domid]->fsid = 0;
   5.161 +        
   5.162 +        printf("Image connected.\n");
   5.163 +        break;   
   5.164 +        
   5.165 +    case CMSG_BLKIF_BE_DESTROY:
   5.166 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
   5.167 +            goto parse_error;
   5.168 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
   5.169 +                ((blkif_be_destroy_t *)msg->msg)->domid,
   5.170 +                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
   5.171 +        
   5.172 +        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
   5.173 +        if (images[domid] != NULL) {
   5.174 +            if (images[domid]->fd != -1)
   5.175 +                close( images[domid]->fd );
   5.176 +            free( images[domid] );
   5.177 +            images[domid] = NULL;
   5.178 +        }
   5.179 +        break;  
   5.180 +    case CMSG_BLKIF_BE_VBD_GROW:
   5.181 +    {
   5.182 +        blkif_be_vbd_grow_t *grow;
   5.183 +        
   5.184 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
   5.185 +            goto parse_error;
   5.186 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
   5.187 +                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
   5.188 +                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
   5.189 +                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
   5.190 +        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
   5.191 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
   5.192 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
   5.193 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
   5.194 +        grow = (blkif_be_vbd_grow_t *)msg->msg;
   5.195 +        domid = grow->domid;
   5.196 +        if (images[domid] == NULL) {
   5.197 +            printf("VBD_GROW on unconnected domain!\n");
   5.198 +            return 0;
   5.199 +        }
   5.200 +        
   5.201 +        if (grow->extent.device != AMORFS_DEV) {
   5.202 +            printf("VBD_GROW on non-amorfs device!\n");
   5.203 +            return 0;
   5.204 +        }
   5.205 +        
   5.206 +        /* TODO: config support for arbitrary image files/modes. */
   5.207 +        sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
   5.208 +        
   5.209 +        images[domid]->fsid   = grow->extent.sector_start;
   5.210 +        images[domid]->vdevice = grow->vdevice; 
   5.211 +        images[domid]->fd = open(TMP_IMAGE_FILE_NAME, 
   5.212 +                O_RDWR | O_DIRECT | O_LARGEFILE);
   5.213 +        if (images[domid]->fd < 0) {
   5.214 +            printf("Couldn't open image file! %d\n", errno);
   5.215 +            return 0;
   5.216 +        }
   5.217 +        
   5.218 +        printf("Image file opened. (%s)\n", images[domid]->imgname);
   5.219 +        break;
   5.220 +    }    
   5.221 +    }
   5.222 +    return 0;
   5.223 +parse_error:
   5.224 +    printf("Bad control message!\n");
   5.225 +    return 0;
   5.226 +    
   5.227 +create_failed:
   5.228 +    /* TODO: close the db ref. */
   5.229 +    return 0;
   5.230 +}    
   5.231 + 
   5.232 +int aio_request(blkif_request_t *req)
   5.233 +{
   5.234 +    int fd;
   5.235 +    u64 sector;
   5.236 +    char *spage, *dpage;
   5.237 +    int ret, i, idx;
   5.238 +    blkif_response_t *rsp;
   5.239 +    domid_t dom = ID_TO_DOM(req->id);
   5.240 +    
   5.241 +    if ((images[dom] == NULL) || (images[dom]->fd == -1)) {
   5.242 +        printf("Data request for unknown domain!!! %d\n", dom);
   5.243 +        rsp = (blkif_response_t *)req;
   5.244 +        rsp->id = req->id;
   5.245 +        rsp->operation = req->operation;
   5.246 +        rsp->status = BLKIF_RSP_ERROR;
   5.247 +        return BLKTAP_RESPOND;
   5.248 +    }
   5.249 +    
   5.250 +    fd = images[dom]->fd;
   5.251 +    
   5.252 +    switch (req->operation) 
   5.253 +    {
   5.254 +    case BLKIF_OP_PROBE:
   5.255 +    {
   5.256 +        struct stat stat;
   5.257 +        vdisk_t *img_info;
   5.258 +        
   5.259 +        
   5.260 +        /* We expect one buffer only. */
   5.261 +        if ( req->nr_segments != 1 )
   5.262 +            goto err;
   5.263 +                                                                                
   5.264 +        /* Make sure the buffer is page-sized. */
   5.265 +        if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
   5.266 +             (blkif_last_sect (req->frame_and_sects[0]) != 7) )
   5.267 +            goto err;
   5.268 +
   5.269 +        /* loop for multiple images would start here. */
   5.270 +        
   5.271 +        ret = fstat(fd, &stat);
   5.272 +        if (ret != 0) {
   5.273 +            printf("Couldn't stat image in PROBE!\n");
   5.274 +            goto err;
   5.275 +        }
   5.276 +        
   5.277 +        img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
   5.278 +        img_info[0].device   = images[dom]->vdevice;
   5.279 +        img_info[0].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   5.280 +        img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
   5.281 +        
   5.282 +        if (img_info[0].capacity == 0)
   5.283 +            img_info[0].capacity = ((u64)1 << 63); // xend does this too.
   5.284 +        
   5.285 +        DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
   5.286 +                img_info[0].capacity);
   5.287 +        
   5.288 +        rsp = (blkif_response_t *)req;
   5.289 +        rsp->id = req->id;
   5.290 +        rsp->operation = BLKIF_OP_PROBE;
   5.291 +        rsp->status = 1; /* number of disks */
   5.292 +        
   5.293 +        return  BLKTAP_RESPOND;
   5.294 +    }    
   5.295 +    case BLKIF_OP_WRITE:
   5.296 +    {
   5.297 +        unsigned long size;
   5.298 +        struct iocb *io;
   5.299 +        struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; 
   5.300 +        
   5.301 +        idx = ID_TO_IDX(req->id);
   5.302 +        ASSERT(pending_list[idx].count == 0);
   5.303 +        memcpy(&pending_list[idx].req, req, sizeof(*req));
   5.304 +        pending_list[idx].count = req->nr_segments;
   5.305 +        
   5.306 +        for (i = 0; i < req->nr_segments; i++) {
   5.307 +            
   5.308 +            sector = req->sector_number + (8*i);
   5.309 +            
   5.310 +            size = blkif_last_sect (req->frame_and_sects[i]) -
   5.311 +                   blkif_first_sect(req->frame_and_sects[i]) + 1;
   5.312 +            
   5.313 +            DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
   5.314 +                    req->sector_number, sector, 
   5.315 +                    blkif_first_sect(req->frame_and_sects[i]),
   5.316 +                    blkif_last_sect (req->frame_and_sects[i]),
   5.317 +                    (long)(sector << SECTOR_SHIFT));
   5.318 +                        
   5.319 +            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   5.320 +            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
   5.321 +            
   5.322 +            /*convert size and sector to byte offsets */
   5.323 +            size   <<= SECTOR_SHIFT;
   5.324 +            sector <<= SECTOR_SHIFT;
   5.325 +            
   5.326 +            io = iocb_free[--iocb_free_count];
   5.327 +            io_prep_pwrite(io, fd, spage, size, sector);
   5.328 +            io->data = (void *)idx;
   5.329 +            ioq[i] = io;
   5.330 +        }
   5.331 +        
   5.332 +        ret = io_submit(ctx, req->nr_segments, ioq);
   5.333 +        if (ret < 0)
   5.334 +            printf("BADNESS: io_submit error! (%d)\n", errno);
   5.335 +        
   5.336 +        pending_list[idx].count = req->nr_segments;
   5.337 +        
   5.338 +        return BLKTAP_STOLEN;
   5.339 +        
   5.340 +    }
   5.341 +    case BLKIF_OP_READ:
   5.342 +    {
   5.343 +        unsigned long size;
   5.344 +        struct iocb *io;
   5.345 +        struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; 
   5.346 +        
   5.347 +        idx = ID_TO_IDX(req->id);
   5.348 +        ASSERT(pending_list[idx].count == 0);
   5.349 +        memcpy(&pending_list[idx].req, req, sizeof(*req));
   5.350 +        pending_list[idx].count = req->nr_segments;
   5.351 +        
   5.352 +        for (i = 0; i < req->nr_segments; i++) {
   5.353 +            
   5.354 +            sector  = req->sector_number + (8*i);
   5.355 +            
   5.356 +            size = blkif_last_sect (req->frame_and_sects[i]) -
   5.357 +                   blkif_first_sect(req->frame_and_sects[i]) + 1;
   5.358 +            
   5.359 +            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   5.360 +            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
   5.361 +            
   5.362 +            
   5.363 +            DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
   5.364 +                    "pos: %15lu dpage: %p\n", 
   5.365 +                    req->sector_number, sector, 
   5.366 +                    blkif_first_sect(req->frame_and_sects[i]),
   5.367 +                    blkif_last_sect (req->frame_and_sects[i]),
   5.368 +                    (long)(sector << SECTOR_SHIFT), dpage);
   5.369 +            
   5.370 +            /*convert size and sector to byte offsets */
   5.371 +            size   <<= SECTOR_SHIFT;
   5.372 +            sector <<= SECTOR_SHIFT;
   5.373 +            
   5.374 +            io = iocb_free[--iocb_free_count];
   5.375 +            
   5.376 +            io_prep_pread(io, fd, dpage, size, sector);
   5.377 +            io->data = (void *)idx;
   5.378 +            
   5.379 +            ioq[i] = io;
   5.380 +        }
   5.381 +        
   5.382 +        ret = io_submit(ctx, req->nr_segments, ioq);
   5.383 +        if (ret < 0)
   5.384 +            printf("BADNESS: io_submit error! (%d)\n", errno);
   5.385 +        
   5.386 +        
   5.387 +        return BLKTAP_STOLEN;
   5.388 +        
   5.389 +    }
   5.390 +    }
   5.391 +    
   5.392 +    printf("Unknown block operation!\n");
   5.393 +err:
   5.394 +    rsp = (blkif_response_t *)req;
   5.395 +    rsp->id = req->id;
   5.396 +    rsp->operation = req->operation;
   5.397 +    rsp->status = BLKIF_RSP_ERROR;
   5.398 +    return BLKTAP_RESPOND;  
   5.399 +}
   5.400 +
   5.401 +
   5.402 +int aio_pollhook(int fd)
   5.403 +{
   5.404 +    struct io_event *ep;
   5.405 +    int n, ret, idx;
   5.406 +    blkif_request_t *req;
   5.407 +    blkif_response_t *rsp;
   5.408 +    
   5.409 +    DPRINTF("aio_hook(): \n");
   5.410 +    
   5.411 +    for (ep = aio_events; aio_event_count-- > 0; ep++) {
   5.412 +        struct iocb *io = ep->obj;
   5.413 +        idx = (int) ep->data;
   5.414 +        
   5.415 +        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
   5.416 +            printf("gnbd returned a bad cookie (%u)!\n", idx);
   5.417 +            break;
   5.418 +        }
   5.419 +        
   5.420 +        if ((int)ep->res < 0) printf("aio request error! (%d,%d)\n", 
   5.421 +            (int)ep->res, (int)ep->res2);
   5.422 +        
   5.423 +        pending_list[idx].count--;
   5.424 +        iocb_free[iocb_free_count++] = io;
   5.425 +        
   5.426 +        if (pending_list[idx].count == 0) {
   5.427 +            blkif_request_t tmp = pending_list[idx].req;
   5.428 +            rsp = (blkif_response_t *)&pending_list[idx].req;
   5.429 +            rsp->id = tmp.id;
   5.430 +            rsp->operation = tmp.operation;
   5.431 +            rsp->status = BLKIF_RSP_OKAY;
   5.432 +            blktap_inject_response(rsp);
   5.433 +        }
   5.434 +    }
   5.435 +    
   5.436 +    printf("pollhook done!\n");
   5.437 +    
   5.438 +    read(aio_notify[READ], &idx, sizeof(idx));
   5.439 +    aio_listening = 1;
   5.440 +    
   5.441 +    return 0;
   5.442 +}
   5.443 +
   5.444 +/* the image library terminates the request stream. _resp is a noop. */
   5.445 +int aio_response(blkif_response_t *rsp)
   5.446 +{   
   5.447 +    return BLKTAP_PASS;
   5.448 +}
   5.449 +
   5.450 +void aio_init(void)
   5.451 +{
   5.452 +    int i, rc;
   5.453 +    pthread_t p;
   5.454 +    
   5.455 +    for (i = 0; i < MAX_DOMS; i++)
   5.456 +        images[i] = NULL;
   5.457 +    
   5.458 +    for (i = 0; i < MAX_REQUESTS; i++)
   5.459 +        pending_list[i].count = 0; 
   5.460 +    
   5.461 +    memset(&ctx, 0, sizeof(ctx));
   5.462 +    rc = io_queue_init(MAX_AIO_REQS, &ctx);
   5.463 +    if (rc != 0) {
   5.464 +        printf("queue_init failed! (%d)\n", rc);
   5.465 +        exit(0);
   5.466 +    }
   5.467 +    
   5.468 +    for (i=0; i<MAX_AIO_REQS; i++) {
   5.469 +        if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
   5.470 +            printf("error allocating iocb array\n");
   5.471 +            exit(0);
   5.472 +        }
   5.473 +        iocb_free_count = i;
   5.474 +    }
   5.475 +    
   5.476 +    rc = pipe(aio_notify);
   5.477 +    if (rc != 0) {
   5.478 +        printf("pipe failed! (%d)\n", errno);
   5.479 +        exit(0);
   5.480 +    }
   5.481 +    
   5.482 +    rc = pthread_create(&p, NULL, notifier_thread, NULL);
   5.483 +    if (rc != 0) {
   5.484 +        printf("pthread_create failed! (%d)\n", errno);
   5.485 +        exit(0);
   5.486 +    }
   5.487 +    
   5.488 +    aio_listening = 1;
   5.489 +    
   5.490 +    blktap_attach_poll(aio_notify[READ], POLLIN, aio_pollhook);
   5.491 +}
   5.492 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/tools/blktap/blkaiolib.h	Tue Feb 08 18:21:54 2005 +0000
     6.3 @@ -0,0 +1,16 @@
     6.4 +/* blkaiolib.h
     6.5 + *
     6.6 + * aio image-backed block device.
     6.7 + * 
     6.8 + * (c) 2004 Andrew Warfield.
     6.9 + *
    6.10 + * Xend has been modified to use an amorfs:[fsid] disk tag.
    6.11 + * This will show up as device type (maj:240,min:0) = 61440.
    6.12 + *
    6.13 + * The fsid is placed in the sec_start field of the disk extent.
    6.14 + */
    6.15 +
    6.16 +int aio_control(control_msg_t *msg);
    6.17 +int aio_request(blkif_request_t *req);
    6.18 +int aio_response(blkif_response_t *rsp); /* noop */
    6.19 +void aio_init(void);
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/tools/blktap/blkcow.c	Tue Feb 08 18:21:54 2005 +0000
     7.3 @@ -0,0 +1,31 @@
     7.4 +/* blkcow.c
     7.5 + *
     7.6 + * copy on write a block device.  in a really inefficient way.
     7.7 + * 
     7.8 + * (c) 2004 Andrew Warfield.
     7.9 + *
    7.10 + * This uses whatever backend the tap is attached to as the read-only
    7.11 + * underlay -- for the moment.
    7.12 + *
    7.13 + * Xend has been modified to use an amorfs:[fsid] disk tag.
    7.14 + * This will show up as device type (maj:240,min:0) = 61440.
    7.15 + *
    7.16 + * The fsid is placed in the sec_start field of the disk extent,
    7.17 + * the cow plugin uses this to identify a unique overlay.
    7.18 + */
    7.19 +
    7.20 +#include "blktaplib.h"
    7.21 +#include "blkcowlib.h"
    7.22 +
    7.23 +
    7.24 +int main(int argc, char *argv[])
    7.25 +{
    7.26 +    cow_init();
    7.27 +    
    7.28 +    blktap_register_ctrl_hook("cow_control", cow_control);
    7.29 +    blktap_register_request_hook("cow_request", cow_request);
    7.30 +    blktap_register_response_hook("cow_response", cow_response);
    7.31 +    blktap_listen();
    7.32 +    
    7.33 +    return 0;
    7.34 +}
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/tools/blktap/blkcowgnbd.c	Tue Feb 08 18:21:54 2005 +0000
     8.3 @@ -0,0 +1,24 @@
     8.4 +/* blkcowgnbd.c
     8.5 + *
     8.6 + * gnbd-backed cow.
     8.7 + */
     8.8 +
     8.9 +#include "blktaplib.h"
    8.10 +#include "blkcowlib.h"
    8.11 +#include "blkgnbdlib.h"
    8.12 +
    8.13 +
    8.14 +int main(int argc, char *argv[])
    8.15 +{
    8.16 +    cow_init();
    8.17 +    gnbd_init();
    8.18 +    
    8.19 +    blktap_register_ctrl_hook("cow_control", cow_control);
    8.20 +    blktap_register_ctrl_hook("gnbd_control", gnbd_control);
    8.21 +    blktap_register_request_hook("cow_request", cow_request);
    8.22 +    blktap_register_request_hook("gnbd_request", gnbd_request);
    8.23 +    blktap_register_response_hook("cow_response", cow_response);
    8.24 +    blktap_listen();
    8.25 +    
    8.26 +    return 0;
    8.27 +}
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/tools/blktap/blkcowimg.c	Tue Feb 08 18:21:54 2005 +0000
     9.3 @@ -0,0 +1,24 @@
     9.4 +/* blkcowimg.c
     9.5 + *
     9.6 + * file-backed cow.
     9.7 + */
     9.8 +
     9.9 +#include "blktaplib.h"
    9.10 +#include "blkcowlib.h"
    9.11 +#include "blkimglib.h"
    9.12 +
    9.13 +
    9.14 +int main(int argc, char *argv[])
    9.15 +{
    9.16 +    cow_init();
    9.17 +    image_init();
    9.18 +    
    9.19 +    blktap_register_ctrl_hook("cow_control", cow_control);
    9.20 +    blktap_register_ctrl_hook("image_control", image_control);
    9.21 +    blktap_register_request_hook("cow_request", cow_request);
    9.22 +    blktap_register_request_hook("image_request", image_request);
    9.23 +    blktap_register_response_hook("cow_response", cow_response);
    9.24 +    blktap_listen();
    9.25 +    
    9.26 +    return 0;
    9.27 +}
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/tools/blktap/blkcowlib.c	Tue Feb 08 18:21:54 2005 +0000
    10.3 @@ -0,0 +1,380 @@
    10.4 +/* blkcowlib.c
    10.5 + *
    10.6 + * copy on write a block device.  in a really inefficient way.
    10.7 + * 
    10.8 + * (c) 2004 Andrew Warfield.
    10.9 + *
   10.10 + * This uses whatever backend the tap is attached to as the read-only
   10.11 + * underlay -- for the moment.
   10.12 + *
   10.13 + * Xend has been modified to use an amorfs:[fsid] disk tag.
   10.14 + * This will show up as device type (maj:240,min:0) = 61440.
   10.15 + *
   10.16 + * The fsid is placed in the sec_start field of the disk extent,
   10.17 + * the cow plugin uses this to identify a unique overlay.
   10.18 + */
   10.19 +
   10.20 +#include <stdio.h>
   10.21 +#include <stdlib.h>
   10.22 +#include <string.h>
   10.23 +#include <db.h>
   10.24 +#include "blktaplib.h"
   10.25 +
   10.26 +#define MAX_DOMS        1024
   10.27 +#define MAX_DBNAME_LEN   255
   10.28 +#define AMORFS_DEV     61440
   10.29 +#define MAX_REQUESTS      64 /* must be synced with the blkif drivers. */
   10.30 +                                                                                
   10.31 +#if 0
   10.32 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   10.33 +#else
   10.34 +#define DPRINTF(_f, _a...) ((void)0)
   10.35 +#endif
   10.36 +    
   10.37 +/* Berkeley db has different params for open() after 4.1 */
   10.38 +#ifndef DB_VERSION_MAJOR
   10.39 +# define DB_VERSION_MAJOR 1
   10.40 +#endif /* DB_VERSION_MAJOR */
   10.41 +#ifndef DB_VERSION_MINOR
   10.42 +# define DB_VERSION_MINOR 0
   10.43 +#endif /* DB_VERSION_MINOR */
   10.44 +
   10.45 +typedef struct {
   10.46 +    DB   *db;
   10.47 +    u64  fsid;
   10.48 +    char dbname[MAX_DBNAME_LEN];
   10.49 +} cow_t;
   10.50 +
   10.51 +cow_t           *cows[MAX_DOMS];
   10.52 +blkif_request_t *reread_list[MAX_REQUESTS];
   10.53 +
   10.54 +int cow_control(control_msg_t *msg)
   10.55 +{
   10.56 +    domid_t  domid;
   10.57 +    DB      *db;
   10.58 +    int      ret;
   10.59 +    
   10.60 +    if (msg->type != CMSG_BLKIF_BE) 
   10.61 +    {
   10.62 +        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
   10.63 +        return 0;
   10.64 +    }
   10.65 +    
   10.66 +    switch(msg->subtype)
   10.67 +    {
   10.68 +    case CMSG_BLKIF_BE_CREATE:
   10.69 +        if ( msg->length != sizeof(blkif_be_create_t) )
   10.70 +            goto parse_error;
   10.71 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
   10.72 +                ((blkif_be_create_t *)msg->msg)->domid,
   10.73 +                ((blkif_be_create_t *)msg->msg)->blkif_handle);
   10.74 +        domid = ((blkif_be_create_t *)msg->msg)->domid;
   10.75 +        if (cows[domid] != NULL) {
   10.76 +            printf("attempt to connect from an existing dom!\n");
   10.77 +            return 0;
   10.78 +        }
   10.79 +        
   10.80 +        cows[domid] = (cow_t *)malloc(sizeof(cow_t));
   10.81 +        if (cows[domid] == NULL) {
   10.82 +            printf("error allocating cow.\n");
   10.83 +            return 0;
   10.84 +        }
   10.85 +        
   10.86 +        cows[domid]->db   = NULL;
   10.87 +        cows[domid]->fsid = 0;
   10.88 +        
   10.89 +        printf("COW connected.\n");
   10.90 +        break;   
   10.91 +        
   10.92 +    case CMSG_BLKIF_BE_DESTROY:
   10.93 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
   10.94 +            goto parse_error;
   10.95 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
   10.96 +                ((blkif_be_destroy_t *)msg->msg)->domid,
   10.97 +                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
   10.98 +        
   10.99 +        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
  10.100 +        if (cows[domid] != NULL) {
  10.101 +            if (cows[domid]->db != NULL)
  10.102 +                cows[domid]->db->close(cows[domid]->db, 0);
  10.103 +            free(cows[domid]);
  10.104 +            cows[domid] = NULL;
  10.105 +        }
  10.106 +        break;  
  10.107 +    case CMSG_BLKIF_BE_VBD_GROW:
  10.108 +    {
  10.109 +        blkif_be_vbd_grow_t *grow;
  10.110 +        
  10.111 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
  10.112 +            goto parse_error;
  10.113 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
  10.114 +                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
  10.115 +                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
  10.116 +                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
  10.117 +        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
  10.118 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
  10.119 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
  10.120 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
  10.121 +        grow = (blkif_be_vbd_grow_t *)msg->msg;
  10.122 +        domid = grow->domid;
  10.123 +        if (cows[domid] == NULL) {
  10.124 +            printf("VBD_GROW on unconnected domain!\n");
  10.125 +            return 0;
  10.126 +        }
  10.127 +        
  10.128 +        if (grow->extent.device != AMORFS_DEV) {
  10.129 +            printf("VBD_GROW on non-amorfs device!\n");
  10.130 +            return 0;
  10.131 +        }
  10.132 +        
  10.133 +        sprintf(&cows[domid]->dbname[0], "%020llu.db",
  10.134 +                grow->extent.sector_start);
  10.135 +        
  10.136 +        cows[domid]->fsid = grow->extent.sector_start;
  10.137 +            
  10.138 +        if ((ret = db_create(&db, NULL, 0)) != 0) {
  10.139 +            fprintf(stderr, "db_create: %s\n", db_strerror(ret));
  10.140 +            return 0;
  10.141 +        }
  10.142 +        
  10.143 +        
  10.144 +#if DB_VERSION_MAJOR < 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 1)
  10.145 +
  10.146 +        if ((ret = db->open( db, cows[domid]->dbname, NULL, DB_BTREE, 
  10.147 +                DB_CREATE, 0664)) != 0) {
  10.148 +            
  10.149 +#else /* DB_VERSION >= 4.1 */
  10.150 +        
  10.151 +        if ((ret = db->open( db, NULL, cows[domid]->dbname, NULL, DB_BTREE, 
  10.152 +                DB_CREATE, 0664)) != 0) {
  10.153 +            
  10.154 +#endif /* DB_VERSION < 4.1 */
  10.155 +
  10.156 +            db->err(db, ret, "%s", cows[domid]->dbname);
  10.157 +            goto create_failed;
  10.158 +        }
  10.159 +        cows[domid]->db = db;
  10.160 +        printf("Overlay db opened. (%s)\n", cows[domid]->dbname);
  10.161 +        break;
  10.162 +    }    
  10.163 +    }
  10.164 +    return 0;
  10.165 +parse_error:
  10.166 +    printf("Bad control message!\n");
  10.167 +    return 0;
  10.168 +    
  10.169 +create_failed:
  10.170 +    /* TODO: close the db ref. */
  10.171 +    return 0;
  10.172 +}    
  10.173 + 
  10.174 +int cow_request(blkif_request_t *req)
  10.175 +{
  10.176 +    DB *db;
  10.177 +    DBT key, data;
  10.178 +    u64 sector;
  10.179 +    char *spage, *dpage;
  10.180 +    int ret, i, idx;
  10.181 +    blkif_response_t *rsp;
  10.182 +    domid_t dom = ID_TO_DOM(req->id);
  10.183 +    
  10.184 +    if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
  10.185 +        printf("Data request for unknown domain!!! %d\n", dom);
  10.186 +        rsp = (blkif_response_t *)req;
  10.187 +        rsp->id = req->id;
  10.188 +        rsp->operation = req->operation;
  10.189 +        rsp->status = BLKIF_RSP_ERROR;
  10.190 +        return BLKTAP_RESPOND;
  10.191 +    }
  10.192 +    
  10.193 +    db = cows[dom]->db;
  10.194 +    
  10.195 +    switch (req->operation) 
  10.196 +    {
  10.197 +    case BLKIF_OP_PROBE:
  10.198 +/* debug -- delete */
  10.199 +idx = ID_TO_IDX(req->id);
  10.200 +reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
  10.201 +memcpy(reread_list[idx], req, sizeof(*req));
  10.202 +        return  BLKTAP_PASS;
  10.203 +        
  10.204 +    case BLKIF_OP_WRITE:
  10.205 +        for (i = 0; i < req->nr_segments; i++) {
  10.206 +            memset(&key, 0, sizeof(key));
  10.207 +	    memset(&data, 0, sizeof(data));
  10.208 +            
  10.209 +            sector = req->sector_number + (8*i);
  10.210 +            key.data = &sector;
  10.211 +            key.size = sizeof(sector);
  10.212 +            
  10.213 +            spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  10.214 +            data.data = spage;
  10.215 +            data.size = PAGE_SIZE;
  10.216 +            
  10.217 +            
  10.218 +            DPRINTF("cWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  10.219 +                    req->sector_number, sector, 
  10.220 +                    blkif_first_sect(req->frame_and_sects[i]),
  10.221 +                    blkif_last_sect (req->frame_and_sects[i]),
  10.222 +                    (long)(sector << 9));
  10.223 +            
  10.224 +            if ((ret = db->put(db, NULL, &key, &data, 0)) == 0)
  10.225 +                DPRINTF("db: %lld: key stored.\n", *((u64 *)key.data));
  10.226 +            else {
  10.227 +                db->err(db, ret, "DB->put");
  10.228 +                goto err;
  10.229 +            }
  10.230 +        }
  10.231 +        
  10.232 +        rsp = (blkif_response_t *)req;
  10.233 +        rsp->id = req->id;
  10.234 +        rsp->operation = BLKIF_OP_WRITE;
  10.235 +        rsp->status = BLKIF_RSP_OKAY;
  10.236 +        
  10.237 +        return BLKTAP_RESPOND;
  10.238 +
  10.239 +    case BLKIF_OP_READ:
  10.240 +        for (i = 0; i < req->nr_segments; i++) {
  10.241 +            memset(&key, 0, sizeof(key));
  10.242 +	    memset(&data, 0, sizeof(data));
  10.243 +            
  10.244 +            sector = req->sector_number + (8*i);
  10.245 +            key.data = &sector;
  10.246 +            key.size = sizeof(sector);
  10.247 +            
  10.248 +            DPRINTF("cREAD: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  10.249 +                    req->sector_number, sector, 
  10.250 +                    blkif_first_sect(req->frame_and_sects[i]),
  10.251 +                    blkif_last_sect (req->frame_and_sects[i]),
  10.252 +                    (long)(sector << 9));
  10.253 +
  10.254 +            if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
  10.255 +                DPRINTF("db: %llu: key retrieved (req).\n",
  10.256 +                    *((u64 *)key.data));
  10.257 +                
  10.258 +                dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  10.259 +                spage = data.data;
  10.260 +                memcpy(dpage, spage, PAGE_SIZE);
  10.261 +
  10.262 +            } else if (ret == DB_NOTFOUND) {
  10.263 +                idx = ID_TO_IDX(req->id);
  10.264 +                if (idx > MAX_REQUESTS) {
  10.265 +                    printf("Bad index!\n");
  10.266 +                    goto err;
  10.267 +                }
  10.268 +                if (reread_list[idx] != NULL) {
  10.269 +                    printf("Dupe index!\n");
  10.270 +                    goto err;
  10.271 +                }
  10.272 +                reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req));
  10.273 +                memcpy(reread_list[idx], req, sizeof(*req));
  10.274 +                return BLKTAP_PASS;
  10.275 +            } else {
  10.276 +                db->err(db, ret, "DB->get");
  10.277 +                goto err;
  10.278 +            }
  10.279 +        }
  10.280 +
  10.281 +
  10.282 +        rsp = (blkif_response_t *)req;
  10.283 +        rsp->id = req->id;
  10.284 +        rsp->operation = BLKIF_OP_READ;
  10.285 +        rsp->status = BLKIF_RSP_OKAY;
  10.286 +        return BLKTAP_RESPOND;
  10.287 +    }
  10.288 +    
  10.289 +    printf("Unknow block operation!\n");
  10.290 +    return BLKTAP_PASS;
  10.291 +err:
  10.292 +    rsp = (blkif_response_t *)req;
  10.293 +    rsp->id = req->id;
  10.294 +    rsp->operation = req->operation;
  10.295 +    rsp->status = BLKIF_RSP_ERROR;
  10.296 +    return BLKTAP_RESPOND;  
  10.297 +}
  10.298 +
  10.299 +int cow_response(blkif_response_t *rsp)
  10.300 +{   
  10.301 +    blkif_request_t *req;
  10.302 +    int i, ret;
  10.303 +    DB *db;
  10.304 +    DBT key, data;
  10.305 +    u64 sector;
  10.306 +    char *spage, *dpage;
  10.307 +    int idx = ID_TO_IDX(rsp->id);
  10.308 +    domid_t dom;
  10.309 +    
  10.310 +    /* don't touch erroring responses. */
  10.311 +    if (rsp->status == BLKIF_RSP_ERROR)
  10.312 +        return BLKTAP_PASS;
  10.313 +    
  10.314 +    if ((rsp->operation == BLKIF_OP_READ) && (reread_list[idx] != NULL))
  10.315 +    {
  10.316 +        req = reread_list[idx];
  10.317 +        dom = ID_TO_DOM(req->id);
  10.318 +
  10.319 +        if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) {
  10.320 +            printf("Response from unknown domain!!! Very badness! %d\n", dom);
  10.321 +            return BLKTAP_PASS;
  10.322 +        }
  10.323 +    
  10.324 +        db = cows[dom]->db;
  10.325 +        
  10.326 +        for (i = 0; i < req->nr_segments; i++) {
  10.327 +            memset(&key, 0, sizeof(key));
  10.328 +	    memset(&data, 0, sizeof(data));
  10.329 +            
  10.330 +            sector = req->sector_number + (8*i);
  10.331 +            key.data = &sector;
  10.332 +            key.size = sizeof(sector);
  10.333 +            
  10.334 +            if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) {
  10.335 +                printf("db: %llu: key retrieved (rsp).\n",
  10.336 +                    *((u64 *)key.data));
  10.337 +                
  10.338 +                dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  10.339 +                spage = data.data;
  10.340 +                memcpy(dpage, spage, PAGE_SIZE);
  10.341 +
  10.342 +            } else if (ret == DB_NOTFOUND) {
  10.343 +                continue; /* We read this from disk. */
  10.344 +            } else {
  10.345 +                db->err(db, ret, "DB->get");
  10.346 +                goto err;
  10.347 +            }
  10.348 +        }
  10.349 +        free(reread_list[idx]);
  10.350 +        reread_list[idx] = NULL;
  10.351 +    }
  10.352 +    
  10.353 +    if (rsp->operation == BLKIF_OP_PROBE) {
  10.354 +        
  10.355 +        vdisk_t *img_info;
  10.356 +        
  10.357 +        req = reread_list[idx];
  10.358 +        img_info = (vdisk_t *)(char *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
  10.359 +        for (i =0; i < rsp->status; i++) 
  10.360 +            printf("PROBE (%d) device: 0x%04x capacity: %llu, info: 0x%04x\n", 
  10.361 +                    i,
  10.362 +                    img_info[0].device,
  10.363 +                    img_info[0].capacity,
  10.364 +                    img_info[0].info);
  10.365 +        free(reread_list[idx]);
  10.366 +        reread_list[idx] = NULL;
  10.367 +    }
  10.368 +    
  10.369 +err:
  10.370 +    return BLKTAP_PASS;
  10.371 +}
  10.372 +
  10.373 +void cow_init(void)
  10.374 +{
  10.375 +    int i;
  10.376 +    
  10.377 +    for (i = 0; i < MAX_DOMS; i++)
  10.378 +        cows[i] = NULL;
  10.379 +    
  10.380 +    for (i = 0; i < MAX_REQUESTS; i++)
  10.381 +        reread_list[MAX_REQUESTS] = NULL;
  10.382 +}
  10.383 +
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/tools/blktap/blkcowlib.h	Tue Feb 08 18:21:54 2005 +0000
    11.3 @@ -0,0 +1,14 @@
    11.4 +/* blkcowlib.h
    11.5 + *
    11.6 + * copy on write a block device.  in a really inefficient way.
    11.7 + * 
    11.8 + * (c) 2004 Andrew Warfield.
    11.9 + *
   11.10 + * public interfaces to the CoW tap.
   11.11 + *
   11.12 + */
   11.13 + 
   11.14 +int  cow_control  (control_msg_t *msg);
   11.15 +int  cow_request  (blkif_request_t *req);
   11.16 +int  cow_response (blkif_response_t *rsp);
   11.17 +void cow_init     (void);
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/tools/blktap/blkdump.c	Tue Feb 08 18:21:54 2005 +0000
    12.3 @@ -0,0 +1,151 @@
    12.4 +/* blkdump.c
    12.5 + *
    12.6 + * show a running trace of block requests as they fly by.
    12.7 + * 
    12.8 + * (c) 2004 Andrew Warfield.
    12.9 + */
   12.10 + 
   12.11 +#include <stdio.h>
   12.12 +#include "blktaplib.h"
   12.13 + 
   12.14 +int control_print(control_msg_t *msg)
   12.15 +{
   12.16 +    if (msg->type != CMSG_BLKIF_BE) 
   12.17 +    {
   12.18 +        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
   12.19 +        return 0;
   12.20 +    }
   12.21 +    
   12.22 +    switch(msg->subtype)
   12.23 +    {
   12.24 +    case CMSG_BLKIF_BE_CREATE:
   12.25 +        if ( msg->length != sizeof(blkif_be_create_t) )
   12.26 +            goto parse_error;
   12.27 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
   12.28 +                ((blkif_be_create_t *)msg->msg)->domid,
   12.29 +                ((blkif_be_create_t *)msg->msg)->blkif_handle);
   12.30 +        break; 
   12.31 +    case CMSG_BLKIF_BE_DESTROY:
   12.32 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
   12.33 +            goto parse_error;
   12.34 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
   12.35 +                ((blkif_be_destroy_t *)msg->msg)->domid,
   12.36 +                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
   12.37 +        break;   
   12.38 +    case CMSG_BLKIF_BE_CONNECT:
   12.39 +        if ( msg->length != sizeof(blkif_be_connect_t) )
   12.40 +            goto parse_error;
   12.41 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CONNECT(d:%d,h:%d)\n",
   12.42 +                ((blkif_be_connect_t *)msg->msg)->domid,
   12.43 +                ((blkif_be_connect_t *)msg->msg)->blkif_handle);
   12.44 +        break;        
   12.45 +    case CMSG_BLKIF_BE_DISCONNECT:
   12.46 +        if ( msg->length != sizeof(blkif_be_disconnect_t) )
   12.47 +            goto parse_error;
   12.48 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DISCONNECT(d:%d,h:%d)\n",
   12.49 +                ((blkif_be_disconnect_t *)msg->msg)->domid,
   12.50 +                ((blkif_be_disconnect_t *)msg->msg)->blkif_handle);
   12.51 +        break;     
   12.52 +    case CMSG_BLKIF_BE_VBD_CREATE:
   12.53 +        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
   12.54 +            goto parse_error;
   12.55 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_CREATE(d:%d,h:%d,v:%d)\n",
   12.56 +                ((blkif_be_vbd_create_t *)msg->msg)->domid,
   12.57 +                ((blkif_be_vbd_create_t *)msg->msg)->blkif_handle,
   12.58 +                ((blkif_be_vbd_create_t *)msg->msg)->vdevice);
   12.59 +        break;
   12.60 +    case CMSG_BLKIF_BE_VBD_DESTROY:
   12.61 +        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
   12.62 +            goto parse_error;
   12.63 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_DESTROY(d:%d,h:%d,v:%d)\n",
   12.64 +                ((blkif_be_vbd_destroy_t *)msg->msg)->domid,
   12.65 +                ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle,
   12.66 +                ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice);
   12.67 +        break;
   12.68 +    case CMSG_BLKIF_BE_VBD_GROW:
   12.69 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
   12.70 +            goto parse_error;
   12.71 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
   12.72 +                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
   12.73 +                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
   12.74 +                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
   12.75 +        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
   12.76 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
   12.77 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
   12.78 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
   12.79 +        break;
   12.80 +    case CMSG_BLKIF_BE_VBD_SHRINK:
   12.81 +        if ( msg->length != sizeof(blkif_be_vbd_shrink_t) )
   12.82 +            goto parse_error;
   12.83 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_SHRINK(d:%d,h:%d,v:%d)\n",
   12.84 +                ((blkif_be_vbd_shrink_t *)msg->msg)->domid,
   12.85 +                ((blkif_be_vbd_shrink_t *)msg->msg)->blkif_handle,
   12.86 +                ((blkif_be_vbd_shrink_t *)msg->msg)->vdevice);
   12.87 +        break;
   12.88 +    default:
   12.89 +        goto parse_error;
   12.90 +    }
   12.91 +   
   12.92 +    return 0; 
   12.93 +      
   12.94 +parse_error:
   12.95 +    printf("[CONTROL_MSG] Bad message type or length!\n");
   12.96 +    return 0;
   12.97 +}
   12.98 + 
   12.99 +int request_print(blkif_request_t *req)
  12.100 +{
  12.101 +    int i;
  12.102 +    unsigned long fas;
  12.103 +    
  12.104 +    if ( req->operation == BLKIF_OP_PROBE ) {
  12.105 +        printf("[%2u:%2u<%s]\n", ID_TO_DOM(req->id), ID_TO_IDX(req->id),
  12.106 +                blkif_op_name[req->operation]);
  12.107 +        return BLKTAP_PASS;
  12.108 +    } else {
  12.109 +        printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 
  12.110 +                ID_TO_DOM(req->id), ID_TO_IDX(req->id), 
  12.111 +                blkif_op_name[req->operation], 
  12.112 +                req->nr_segments, req->device, 
  12.113 +                req->sector_number);
  12.114 +        
  12.115 +        
  12.116 +        for (i=0; i < req->nr_segments; i++) {
  12.117 +            fas = req->frame_and_sects[i];
  12.118 +            printf("              (pf: 0x%8lx start: %lu stop: %lu)\n",
  12.119 +                    (fas & PAGE_MASK),
  12.120 +                    blkif_first_sect(fas),
  12.121 +                    blkif_last_sect(fas)
  12.122 +                    );
  12.123 +        }
  12.124 +            
  12.125 +    }
  12.126 +    
  12.127 +    return BLKTAP_PASS;
  12.128 +}
  12.129 +
  12.130 +int response_print(blkif_response_t *rsp)
  12.131 +{   
  12.132 +    if ( rsp->operation == BLKIF_OP_PROBE ) {
  12.133 +        printf("[%2u:%2u>%s]\n", ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id),
  12.134 +                blkif_op_name[rsp->operation]);
  12.135 +        return BLKTAP_PASS;
  12.136 +    } else {
  12.137 +        printf("[%2u:%2u>%5s] (status: %d)\n", 
  12.138 +                ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 
  12.139 +                blkif_op_name[rsp->operation], 
  12.140 +                rsp->status);
  12.141 +            
  12.142 +    }
  12.143 +    return BLKTAP_PASS;
  12.144 +}
  12.145 +
  12.146 +int main(int argc, char *argv[])
  12.147 +{
  12.148 +    blktap_register_ctrl_hook("control_print", control_print);
  12.149 +    blktap_register_request_hook("request_print", request_print);
  12.150 +    blktap_register_response_hook("response_print", response_print);
  12.151 +    blktap_listen();
  12.152 +    
  12.153 +    return 0;
  12.154 +}
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/tools/blktap/blkgnbd.c	Tue Feb 08 18:21:54 2005 +0000
    13.3 @@ -0,0 +1,19 @@
    13.4 +/* blkgnbd.c
    13.5 + *
    13.6 + * gnbd-backed disk.
    13.7 + */
    13.8 +
    13.9 +#include "blktaplib.h"
   13.10 +#include "blkgnbdlib.h"
   13.11 +
   13.12 +
   13.13 +int main(int argc, char *argv[])
   13.14 +{
   13.15 +    gnbd_init();
   13.16 +    
   13.17 +    blktap_register_ctrl_hook("gnbd_control", gnbd_control);
   13.18 +    blktap_register_request_hook("gnbd_request", gnbd_request);
   13.19 +    blktap_listen();
   13.20 +    
   13.21 +    return 0;
   13.22 +}
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/tools/blktap/blkgnbdlib.c	Tue Feb 08 18:21:54 2005 +0000
    14.3 @@ -0,0 +1,471 @@
    14.4 +/* blkgnbdlib.c
    14.5 + *
    14.6 + * gnbd image-backed block device.
    14.7 + * 
    14.8 + * (c) 2004 Andrew Warfield.
    14.9 + *
   14.10 + * Xend has been modified to use an amorfs:[fsid] disk tag.
   14.11 + * This will show up as device type (maj:240,min:0) = 61440.
   14.12 + *
   14.13 + * The fsid is placed in the sec_start field of the disk extent.
   14.14 + */
   14.15 +
   14.16 +#include <stdio.h>
   14.17 +#include <stdlib.h>
   14.18 +#include <string.h>
   14.19 +#include <db.h>       
   14.20 +#include <sys/stat.h>
   14.21 +#include <sys/types.h>
   14.22 +#include <unistd.h>
   14.23 +#include <errno.h>
   14.24 +#include <sys/poll.h>
   14.25 +#include "blktaplib.h"
   14.26 +#include "libgnbd/libgnbd.h"
   14.27 +
   14.28 +#define GNBD_SERVER  "skirmish.cl.cam.ac.uk"
   14.29 +#define GNBD_CLIENT  "pengi-0.xeno.cl.cam.ac.uk"
   14.30 +#define GNBD_MOUNT   "fc2_akw27"
   14.31 +#define GNBD_PORT    0x38e7
   14.32 +
   14.33 +#define MAX_DOMS        1024
   14.34 +#define MAX_IMGNAME_LEN  255
   14.35 +#define AMORFS_DEV     61440
   14.36 +#define MAX_REQUESTS      64 /* must be synced with the blkif drivers. */
   14.37 +#define SECTOR_SHIFT       9
   14.38 +                                                                                
   14.39 +#if 0
   14.40 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   14.41 +#else
   14.42 +#define DPRINTF(_f, _a...) ((void)0)
   14.43 +#endif
   14.44 +        
   14.45 +#if 1                                                                        
   14.46 +#define ASSERT(_p) \
   14.47 +    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
   14.48 +    __LINE__, __FILE__); *(int*)0=0; }
   14.49 +#else
   14.50 +#define ASSERT(_p) ((void)0)
   14.51 +#endif
   14.52 +
   14.53 +#define GH_DISCONNECTED 0
   14.54 +#define GH_PROBEWAITING 1
   14.55 +#define GH_CONNECTED    2
   14.56 +
   14.57 +typedef struct {
   14.58 +    /* These need to turn into an array/rbtree for multi-disk support. */
   14.59 +    struct gnbd_handle *gh;
   14.60 +    int          gh_state;
   14.61 +    int          probe_idx; /* This really needs cleaning up after hotos. */
   14.62 +    int          fd;
   14.63 +    u64          fsid;
   14.64 +    char         gnbdname[MAX_IMGNAME_LEN];
   14.65 +    blkif_vdev_t vdevice;
   14.66 +} gnbd_t;
   14.67 +
   14.68 +/* Note on pending_reqs: I assume all reqs are queued before they start to 
   14.69 + * get filled.  so count of 0 is an unused record.
   14.70 + */
   14.71 +typedef struct {
   14.72 +    blkif_request_t  req;
   14.73 +    int              count;
   14.74 +} pending_req_t;
   14.75 +
   14.76 +static gnbd_t          *gnbds[MAX_DOMS];
   14.77 +static pending_req_t    pending_list[MAX_REQUESTS];
   14.78 +static int              pending_count = 0; /* debugging */
   14.79 +
   14.80 +
   14.81 +gnbd_t *get_gnbd_by_fd(int fd)
   14.82 +{
   14.83 +    /* this is a linear scan for the moment.  nees to be cleaned up for
   14.84 +       multi-disk support. */
   14.85 +    
   14.86 +    int i;
   14.87 +    
   14.88 +    for (i=0; i< MAX_DOMS; i++) 
   14.89 +        if ((gnbds[i] != NULL) && (gnbds[i]->fd == fd))
   14.90 +            return gnbds[i];
   14.91 +    
   14.92 +    return NULL;
   14.93 +}
   14.94 +
   14.95 +int gnbd_pollhook(int fd);
   14.96 +
   14.97 +int gnbd_control(control_msg_t *msg)
   14.98 +{
   14.99 +    domid_t  domid;
  14.100 +    DB      *db;
  14.101 +    int      ret;
  14.102 +    
  14.103 +    if (msg->type != CMSG_BLKIF_BE) 
  14.104 +    {
  14.105 +        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
  14.106 +        return 0;
  14.107 +    }
  14.108 +    
  14.109 +    switch(msg->subtype)
  14.110 +    {
  14.111 +    case CMSG_BLKIF_BE_CREATE:
  14.112 +        if ( msg->length != sizeof(blkif_be_create_t) )
  14.113 +            goto parse_error;
  14.114 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
  14.115 +                ((blkif_be_create_t *)msg->msg)->domid,
  14.116 +                ((blkif_be_create_t *)msg->msg)->blkif_handle);
  14.117 +        domid = ((blkif_be_create_t *)msg->msg)->domid;
  14.118 +        if (gnbds[domid] != NULL) {
  14.119 +            printf("attempt to connect from an existing dom!\n");
  14.120 +            return 0;
  14.121 +        }
  14.122 +        
  14.123 +        gnbds[domid] = (gnbd_t *)malloc(sizeof(gnbd_t));
  14.124 +        if (gnbds[domid] == NULL) {
  14.125 +            printf("error allocating gnbd record.\n");
  14.126 +            return 0;
  14.127 +        }
  14.128 +        
  14.129 +        gnbds[domid]->gh  = NULL;
  14.130 +        gnbds[domid]->fsid = 0;
  14.131 +        
  14.132 +        break;   
  14.133 +        
  14.134 +    case CMSG_BLKIF_BE_DESTROY:
  14.135 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
  14.136 +            goto parse_error;
  14.137 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
  14.138 +                ((blkif_be_destroy_t *)msg->msg)->domid,
  14.139 +                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
  14.140 +        
  14.141 +        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
  14.142 +        if (gnbds[domid] != NULL) {
  14.143 +            if (gnbds[domid]->gh != NULL) {
  14.144 +                blktap_detach_poll(gnbds[domid]->fd);
  14.145 +                free(gnbds[domid]->gh); /* XXX: Need a gnbd close call! */;
  14.146 +            }
  14.147 +            free( gnbds[domid] );
  14.148 +            gnbds[domid] = NULL;
  14.149 +        }
  14.150 +        break;  
  14.151 +    case CMSG_BLKIF_BE_VBD_GROW:
  14.152 +    {
  14.153 +        blkif_be_vbd_grow_t *grow;
  14.154 +        
  14.155 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
  14.156 +            goto parse_error;
  14.157 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
  14.158 +                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
  14.159 +                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
  14.160 +                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
  14.161 +        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
  14.162 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
  14.163 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
  14.164 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
  14.165 +        grow = (blkif_be_vbd_grow_t *)msg->msg;
  14.166 +        domid = grow->domid;
  14.167 +        if (gnbds[domid] == NULL) {
  14.168 +            printf("VBD_GROW on unconnected domain!\n");
  14.169 +            return 0;
  14.170 +        }
  14.171 +        
  14.172 +        if (grow->extent.device != AMORFS_DEV) {
  14.173 +            printf("VBD_GROW on non-amorfs device!\n");
  14.174 +            return 0;
  14.175 +        }
  14.176 +        
  14.177 +        /* TODO: config support for arbitrary gnbd files/modes. */
  14.178 +        sprintf(gnbds[domid]->gnbdname, GNBD_MOUNT);
  14.179 +        
  14.180 +        gnbds[domid]->fsid   = grow->extent.sector_start;
  14.181 +        gnbds[domid]->vdevice = grow->vdevice; 
  14.182 +        gnbds[domid]->gh_state = GH_DISCONNECTED;
  14.183 +        gnbds[domid]->gh = gnbd_setup(GNBD_SERVER, GNBD_PORT, 
  14.184 +            gnbds[domid]->gnbdname, GNBD_CLIENT);
  14.185 +        if (gnbds[domid]->gh == NULL) { 
  14.186 +            printf("Couldn't connect to gnbd mount!!\n");
  14.187 +            return 0;
  14.188 +        }
  14.189 +        gnbds[domid]->fd = gnbd_fd(gnbds[domid]->gh);
  14.190 +        blktap_attach_poll(gnbds[domid]->fd, POLLIN, gnbd_pollhook);
  14.191 +        
  14.192 +        printf("gnbd mount connected. (%s)\n", gnbds[domid]->gnbdname);
  14.193 +        break;
  14.194 +    }    
  14.195 +    }
  14.196 +    return 0;
  14.197 +parse_error:
  14.198 +    printf("Bad control message!\n");
  14.199 +    return 0;
  14.200 +    
  14.201 +create_failed:
  14.202 +    /* TODO: close the db ref. */
  14.203 +    return 0;
  14.204 +}    
  14.205 + 
  14.206 +static int gnbd_blkif_probe(blkif_request_t *req, gnbd_t *gnbd)
  14.207 +{
  14.208 +    int fd;
  14.209 +    struct stat stat;
  14.210 +    vdisk_t *gnbd_info;
  14.211 +    blkif_response_t *rsp;
  14.212 +
  14.213 +    /* We expect one buffer only. */
  14.214 +    if ( req->nr_segments != 1 )
  14.215 +        goto err;
  14.216 +
  14.217 +    /* Make sure the buffer is page-sized. */
  14.218 +    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
  14.219 +         (blkif_last_sect (req->frame_and_sects[0]) != 7) )
  14.220 +        goto err;
  14.221 +
  14.222 +    /* loop for multiple gnbds would start here. */
  14.223 +
  14.224 +    gnbd_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
  14.225 +    gnbd_info[0].device   = gnbd->vdevice;
  14.226 +    gnbd_info[0].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
  14.227 +    gnbd_info[0].capacity = gnbd_sectors(gnbd->gh);
  14.228 +
  14.229 +    printf("[SECTORS] %llu", gnbd_info[0].capacity);
  14.230 +
  14.231 +    //if (gnbd_info[0].capacity == 0)
  14.232 +    //    gnbd_info[0].capacity = ((u64)1 << 63); // xend does this too.
  14.233 +
  14.234 +    DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", gnbd_info[0].device,
  14.235 +            gnbd_info[0].capacity);
  14.236 +
  14.237 +    rsp = (blkif_response_t *)req;
  14.238 +    rsp->id = req->id;
  14.239 +    rsp->operation = BLKIF_OP_PROBE;
  14.240 +    rsp->status = 1; /* number of disks */
  14.241 +
  14.242 +    return  BLKTAP_RESPOND;
  14.243 +err:
  14.244 +    rsp = (blkif_response_t *)req;
  14.245 +    rsp->id = req->id;
  14.246 +    rsp->operation = req->operation;
  14.247 +    rsp->status = BLKIF_RSP_ERROR;
  14.248 +    return BLKTAP_RESPOND;  
  14.249 +}
  14.250 +
  14.251 +int gnbd_request(blkif_request_t *req)
  14.252 +{
  14.253 +    struct gnbd_handle *gh;
  14.254 +    u64 sector;
  14.255 +    char *spage, *dpage;
  14.256 +    int ret, i, idx;
  14.257 +    blkif_response_t *rsp;
  14.258 +    domid_t dom = ID_TO_DOM(req->id);
  14.259 +    
  14.260 +    if ((gnbds[dom] == NULL) || (gnbds[dom]->gh == NULL)) {
  14.261 +        printf("Data request for unknown domain!!! %d\n", dom);
  14.262 +        rsp = (blkif_response_t *)req;
  14.263 +        rsp->id = req->id;
  14.264 +        rsp->operation = req->operation;
  14.265 +        rsp->status = BLKIF_RSP_ERROR;
  14.266 +        return BLKTAP_RESPOND;
  14.267 +    }
  14.268 +    
  14.269 +    gh = gnbds[dom]->gh;
  14.270 +    
  14.271 +    switch (req->operation) 
  14.272 +    {
  14.273 +    case BLKIF_OP_PROBE:
  14.274 +    {
  14.275 +        printf("PROBE!\n");
  14.276 +        if ( gnbds[dom]->gh_state == GH_PROBEWAITING ) {
  14.277 +            printf("Already have a PROBE outstanding!\n");
  14.278 +            goto err;
  14.279 +        }
  14.280 +        
  14.281 +        if ( gnbds[dom]->gh_state == GH_DISCONNECTED )
  14.282 +        {
  14.283 +            /* need to defer until we are connected. */
  14.284 +            printf("Deferring PROBE!\n");
  14.285 +            idx = ID_TO_IDX(req->id);
  14.286 +            memcpy(&pending_list[idx].req, req, sizeof(*req));
  14.287 +            ASSERT(pending_list[idx].count == 0);
  14.288 +            pending_list[idx].count = 1;
  14.289 +            
  14.290 +            gnbds[dom]->probe_idx = idx;
  14.291 +            gnbds[dom]->gh_state  = GH_PROBEWAITING;
  14.292 +
  14.293 +            return BLKTAP_STOLEN;
  14.294 +        }
  14.295 +            
  14.296 +        
  14.297 +        return gnbd_blkif_probe(req, gnbds[dom]);
  14.298 +    }    
  14.299 +    case BLKIF_OP_WRITE:
  14.300 +    {
  14.301 +        unsigned long size;
  14.302 +        
  14.303 +        idx = ID_TO_IDX(req->id);
  14.304 +        ASSERT(pending_list[idx].count == 0);
  14.305 +        memcpy(&pending_list[idx].req, req, sizeof(*req));
  14.306 +        pending_list[idx].count = req->nr_segments;
  14.307 +        pending_count++; /* dbg */
  14.308 +        
  14.309 +        for (i = 0; i < req->nr_segments; i++) {
  14.310 +            
  14.311 +            sector = req->sector_number + (8*i);
  14.312 +            
  14.313 +            size = blkif_last_sect (req->frame_and_sects[i]) -
  14.314 +                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  14.315 +            
  14.316 +            DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  14.317 +                    req->sector_number, sector, 
  14.318 +                    blkif_first_sect(req->frame_and_sects[i]),
  14.319 +                    blkif_last_sect (req->frame_and_sects[i]),
  14.320 +                    (long)(sector << SECTOR_SHIFT));
  14.321 +                        
  14.322 +            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  14.323 +            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  14.324 +            
  14.325 +            ret = gnbd_write(gh, sector, size, spage, (unsigned long)idx);
  14.326 +            if (ret) {
  14.327 +                printf("gnbd error on WRITE\n");
  14.328 +                goto err;
  14.329 +            }
  14.330 +        }
  14.331 +//printf("[WR] < %lu\n", (unsigned long)idx);
  14.332 +        
  14.333 +        return BLKTAP_STOLEN;
  14.334 +    }
  14.335 +    case BLKIF_OP_READ:
  14.336 +    {
  14.337 +        unsigned long size;
  14.338 +        
  14.339 +        idx = ID_TO_IDX(req->id);
  14.340 +        ASSERT(pending_list[idx].count == 0);
  14.341 +        memcpy(&pending_list[idx].req, req, sizeof(*req));
  14.342 +        pending_list[idx].count = req->nr_segments;
  14.343 +        pending_count++; /* dbg */
  14.344 +            
  14.345 +        for (i = 0; i < req->nr_segments; i++) {
  14.346 +            
  14.347 +            sector  = req->sector_number + (8*i);
  14.348 +            
  14.349 +            size = blkif_last_sect (req->frame_and_sects[i]) -
  14.350 +                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  14.351 +            
  14.352 +            DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  14.353 +                    req->sector_number, sector, 
  14.354 +                    blkif_first_sect(req->frame_and_sects[i]),
  14.355 +                    blkif_last_sect (req->frame_and_sects[i]),
  14.356 +                    (long)(sector << SECTOR_SHIFT));
  14.357 +            
  14.358 +            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  14.359 +            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  14.360 +            
  14.361 +            ret = gnbd_read(gh, sector, size, dpage, (unsigned long)idx);
  14.362 +            if (ret) {
  14.363 +                printf("gnbd error on READ\n");
  14.364 +                goto err;
  14.365 +            }
  14.366 +            
  14.367 +        }
  14.368 +//printf("[RD] < %lu\n", (unsigned long)idx);
  14.369 +        
  14.370 +        return BLKTAP_STOLEN;
  14.371 +    }
  14.372 +    }
  14.373 +    
  14.374 +    printf("Unknown block operation!\n");
  14.375 +err:
  14.376 +    rsp = (blkif_response_t *)req;
  14.377 +    rsp->id = req->id;
  14.378 +    rsp->operation = req->operation;
  14.379 +    rsp->status = BLKIF_RSP_ERROR;
  14.380 +    return BLKTAP_RESPOND;  
  14.381 +}
  14.382 +
  14.383 +/* the gnbd library terminates the request stream. _resp is a noop. */
  14.384 +int gnbd_response(blkif_response_t *rsp)
  14.385 +{   
  14.386 +    return BLKTAP_PASS;
  14.387 +}
  14.388 +
  14.389 +int gnbd_pollhook(int fd)
  14.390 +{
  14.391 +    int err;
  14.392 +    struct gnbd_handle *gh;
  14.393 +    blkif_request_t *req;
  14.394 +    blkif_response_t *rsp;
  14.395 +    unsigned long idx;
  14.396 +    
  14.397 +    gnbd_t *gnbd = get_gnbd_by_fd(fd);
  14.398 +    
  14.399 +    if (gnbd == NULL) {
  14.400 +        printf("GNBD badness: got poll hook on unknown device. (%d)\n", fd);
  14.401 +        return -1;
  14.402 +    }
  14.403 +    gh = gnbd->gh;
  14.404 +    err = gnbd_reply(gh);
  14.405 +    switch (err) {
  14.406 +    case GNBD_LOGIN_DONE:
  14.407 +        if (gnbd->gh_state == GH_PROBEWAITING) {
  14.408 +            req = (blkif_request_t *)&pending_list[gnbd->probe_idx].req;
  14.409 +            printf("[!] Sending deferred PROBE!\n");
  14.410 +            gnbd_blkif_probe(req, gnbd);
  14.411 +            pending_list[gnbd->probe_idx].count = 0;
  14.412 +            rsp = (blkif_response_t *)req;
  14.413 +            blktap_inject_response(rsp);
  14.414 +        }
  14.415 +        gnbd->gh_state = GH_CONNECTED;
  14.416 +        printf("GNBD_LOGIN_DONE (%d)\n", fd); 
  14.417 +        break;
  14.418 +
  14.419 +    case GNBD_REQUEST_DONE: /* switch to idx */
  14.420 +        idx = gnbd_finished_request(gh);
  14.421 +        req = (blkif_request_t *)&pending_list[idx].req;
  14.422 +        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
  14.423 +            printf("gnbd returned a bad cookie (%lu)!\n", idx);
  14.424 +            break;
  14.425 +        }
  14.426 +        
  14.427 +        pending_list[idx].count--;
  14.428 +        
  14.429 +        if (pending_list[idx].count == 0) {
  14.430 +            blkif_request_t tmp = *req;
  14.431 +            pending_count--; /* dbg */
  14.432 +            rsp = (blkif_response_t *)req;
  14.433 +            rsp->id = tmp.id;
  14.434 +            rsp->operation = tmp.operation;
  14.435 +            rsp->status = BLKIF_RSP_OKAY;
  14.436 +            blktap_inject_response(rsp);
  14.437 +/*
  14.438 +if (rsp->operation == BLKIF_OP_READ) {
  14.439 +printf("[RD] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
  14.440 +} else if (rsp->operation == BLKIF_OP_WRITE) {
  14.441 +printf("[WR] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
  14.442 +} else  {
  14.443 +printf("[??] > %lu (%d pndg)\n", (unsigned long)idx, pending_count);
  14.444 +}
  14.445 +*/
  14.446 +        }
  14.447 +        break;
  14.448 +        
  14.449 +    case GNBD_CONTINUE:
  14.450 +        break;
  14.451 +        
  14.452 +    case 0:
  14.453 +        break;
  14.454 +        
  14.455 +    default:
  14.456 +        printf("gnbd_reply error");
  14.457 +        break;
  14.458 +    }
  14.459 +    return 0;
  14.460 +}
  14.461 +
  14.462 +void gnbd_init(void)
  14.463 +{   
  14.464 +    int i;
  14.465 +    
  14.466 +    for (i = 0; i < MAX_DOMS; i++)
  14.467 +        gnbds[i] = NULL;
  14.468 +    
  14.469 +    for (i = 0; i < MAX_REQUESTS; i++)
  14.470 +        pending_list[i].count = 0; 
  14.471 +    
  14.472 +    printf("GNBD image plugin initialized\n");
  14.473 +}
  14.474 +
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/tools/blktap/blkgnbdlib.h	Tue Feb 08 18:21:54 2005 +0000
    15.3 @@ -0,0 +1,16 @@
    15.4 +/* blkgnbdlib.h
    15.5 + *
    15.6 + * gndb image-backed block device.
    15.7 + * 
    15.8 + * (c) 2004 Andrew Warfield.
    15.9 + *
   15.10 + * Xend has been modified to use an amorfs:[fsid] disk tag.
   15.11 + * This will show up as device type (maj:240,min:0) = 61440.
   15.12 + *
   15.13 + * The fsid is placed in the sec_start field of the disk extent.
   15.14 + */
   15.15 +
   15.16 +int gnbd_control(control_msg_t *msg);
   15.17 +int gnbd_request(blkif_request_t *req);
   15.18 +int gnbd_response(blkif_response_t *rsp); /* noop */
   15.19 +void gnbd_init(void);
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/tools/blktap/blkimg.c	Tue Feb 08 18:21:54 2005 +0000
    16.3 @@ -0,0 +1,19 @@
    16.4 +/* blkimg.c
    16.5 + *
    16.6 + * file-backed disk.
    16.7 + */
    16.8 +
    16.9 +#include "blktaplib.h"
   16.10 +#include "blkimglib.h"
   16.11 +
   16.12 +
   16.13 +int main(int argc, char *argv[])
   16.14 +{
   16.15 +    image_init();
   16.16 +    
   16.17 +    blktap_register_ctrl_hook("image_control", image_control);
   16.18 +    blktap_register_request_hook("image_request", image_request);
   16.19 +    blktap_listen();
   16.20 +    
   16.21 +    return 0;
   16.22 +}
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/tools/blktap/blkimglib.c	Tue Feb 08 18:21:54 2005 +0000
    17.3 @@ -0,0 +1,325 @@
    17.4 +/* blkimglib.c
    17.5 + *
    17.6 + * file image-backed block device.
    17.7 + * 
    17.8 + * (c) 2004 Andrew Warfield.
    17.9 + *
   17.10 + * Xend has been modified to use an amorfs:[fsid] disk tag.
   17.11 + * This will show up as device type (maj:240,min:0) = 61440.
   17.12 + *
   17.13 + * The fsid is placed in the sec_start field of the disk extent.
   17.14 + */
   17.15 +
   17.16 +#include <stdio.h>
   17.17 +#include <stdlib.h>
   17.18 +#include <string.h>
   17.19 +#include <db.h>       
   17.20 +#include <sys/stat.h>
   17.21 +#include <sys/types.h>
   17.22 +#include <unistd.h>
   17.23 +#include <errno.h>
   17.24 +#include "blktaplib.h"
   17.25 +
   17.26 +//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
   17.27 +#define TMP_IMAGE_FILE_NAME "fc3.image"
   17.28 +
   17.29 +#define MAX_DOMS        1024
   17.30 +#define MAX_IMGNAME_LEN  255
   17.31 +#define AMORFS_DEV     61440
   17.32 +#define MAX_REQUESTS      64 /* must be synced with the blkif drivers. */
   17.33 +#define SECTOR_SHIFT       9
   17.34 +                                                                                
   17.35 +#if 0
   17.36 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   17.37 +#else
   17.38 +#define DPRINTF(_f, _a...) ((void)0)
   17.39 +#endif
   17.40 +                                                                                
   17.41 +
   17.42 +typedef struct {
   17.43 +    /* These need to turn into an array/rbtree for multi-disk support. */
   17.44 +    FILE *img;
   17.45 +    u64  fsid;
   17.46 +    char imgname[MAX_IMGNAME_LEN];
   17.47 +    blkif_vdev_t   vdevice;
   17.48 +} image_t;
   17.49 +
   17.50 +image_t         *images[MAX_DOMS];
   17.51 +blkif_request_t *reread_list[MAX_REQUESTS];
   17.52 +
   17.53 +int image_control(control_msg_t *msg)
   17.54 +{
   17.55 +    domid_t  domid;
   17.56 +    DB      *db;
   17.57 +    int      ret;
   17.58 +    
   17.59 +    if (msg->type != CMSG_BLKIF_BE) 
   17.60 +    {
   17.61 +        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
   17.62 +        return 0;
   17.63 +    }
   17.64 +    
   17.65 +    switch(msg->subtype)
   17.66 +    {
   17.67 +    case CMSG_BLKIF_BE_CREATE:
   17.68 +        if ( msg->length != sizeof(blkif_be_create_t) )
   17.69 +            goto parse_error;
   17.70 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
   17.71 +                ((blkif_be_create_t *)msg->msg)->domid,
   17.72 +                ((blkif_be_create_t *)msg->msg)->blkif_handle);
   17.73 +        domid = ((blkif_be_create_t *)msg->msg)->domid;
   17.74 +        if (images[domid] != NULL) {
   17.75 +            printf("attempt to connect from an existing dom!\n");
   17.76 +            return 0;
   17.77 +        }
   17.78 +        
   17.79 +        images[domid] = (image_t *)malloc(sizeof(image_t));
   17.80 +        if (images[domid] == NULL) {
   17.81 +            printf("error allocating image record.\n");
   17.82 +            return 0;
   17.83 +        }
   17.84 +        
   17.85 +        images[domid]->img  = NULL;
   17.86 +        images[domid]->fsid = 0;
   17.87 +        
   17.88 +        printf("Image connected.\n");
   17.89 +        break;   
   17.90 +        
   17.91 +    case CMSG_BLKIF_BE_DESTROY:
   17.92 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
   17.93 +            goto parse_error;
   17.94 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
   17.95 +                ((blkif_be_destroy_t *)msg->msg)->domid,
   17.96 +                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
   17.97 +        
   17.98 +        domid = ((blkif_be_destroy_t *)msg->msg)->domid;
   17.99 +        if (images[domid] != NULL) {
  17.100 +            if (images[domid]->img != NULL)
  17.101 +                fclose( images[domid]->img );
  17.102 +            free( images[domid] );
  17.103 +            images[domid] = NULL;
  17.104 +        }
  17.105 +        break;  
  17.106 +    case CMSG_BLKIF_BE_VBD_GROW:
  17.107 +    {
  17.108 +        blkif_be_vbd_grow_t *grow;
  17.109 +        
  17.110 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
  17.111 +            goto parse_error;
  17.112 +        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n",
  17.113 +                ((blkif_be_vbd_grow_t *)msg->msg)->domid,
  17.114 +                ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle,
  17.115 +                ((blkif_be_vbd_grow_t *)msg->msg)->vdevice);
  17.116 +        printf("              Extent: sec_start: %llu sec_len: %llu, dev: %d\n",
  17.117 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start,
  17.118 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length,
  17.119 +                ((blkif_be_vbd_grow_t *)msg->msg)->extent.device);
  17.120 +        grow = (blkif_be_vbd_grow_t *)msg->msg;
  17.121 +        domid = grow->domid;
  17.122 +        if (images[domid] == NULL) {
  17.123 +            printf("VBD_GROW on unconnected domain!\n");
  17.124 +            return 0;
  17.125 +        }
  17.126 +        
  17.127 +        if (grow->extent.device != AMORFS_DEV) {
  17.128 +            printf("VBD_GROW on non-amorfs device!\n");
  17.129 +            return 0;
  17.130 +        }
  17.131 +        
  17.132 +        /* TODO: config support for arbitrary image files/modes. */
  17.133 +        sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME);
  17.134 +        
  17.135 +        images[domid]->fsid   = grow->extent.sector_start;
  17.136 +        images[domid]->vdevice = grow->vdevice; 
  17.137 +        images[domid]->img = fopen64(TMP_IMAGE_FILE_NAME, "r+");
  17.138 +        if (images[domid]->img == NULL) { 
  17.139 +            printf("Couldn't open image file!\n");
  17.140 +            return 0;
  17.141 +        }
  17.142 +        
  17.143 +        printf("Image file opened. (%s)\n", images[domid]->imgname);
  17.144 +        break;
  17.145 +    }    
  17.146 +    }
  17.147 +    return 0;
  17.148 +parse_error:
  17.149 +    printf("Bad control message!\n");
  17.150 +    return 0;
  17.151 +    
  17.152 +create_failed:
  17.153 +    /* TODO: close the db ref. */
  17.154 +    return 0;
  17.155 +}    
  17.156 + 
  17.157 +int image_request(blkif_request_t *req)
  17.158 +{
  17.159 +    FILE *img;
  17.160 +    u64 sector;
  17.161 +    char *spage, *dpage;
  17.162 +    int ret, i, idx;
  17.163 +    blkif_response_t *rsp;
  17.164 +    domid_t dom = ID_TO_DOM(req->id);
  17.165 +    
  17.166 +    if ((images[dom] == NULL) || (images[dom]->img == NULL)) {
  17.167 +        printf("Data request for unknown domain!!! %d\n", dom);
  17.168 +        rsp = (blkif_response_t *)req;
  17.169 +        rsp->id = req->id;
  17.170 +        rsp->operation = req->operation;
  17.171 +        rsp->status = BLKIF_RSP_ERROR;
  17.172 +        return BLKTAP_RESPOND;
  17.173 +    }
  17.174 +    
  17.175 +    img = images[dom]->img;
  17.176 +    
  17.177 +    switch (req->operation) 
  17.178 +    {
  17.179 +    case BLKIF_OP_PROBE:
  17.180 +    {
  17.181 +        int fd;
  17.182 +        struct stat stat;
  17.183 +        vdisk_t *img_info;
  17.184 +        
  17.185 +        
  17.186 +        /* We expect one buffer only. */
  17.187 +        if ( req->nr_segments != 1 )
  17.188 +            goto err;
  17.189 +                                                                                
  17.190 +        /* Make sure the buffer is page-sized. */
  17.191 +        if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
  17.192 +             (blkif_last_sect (req->frame_and_sects[0]) != 7) )
  17.193 +            goto err;
  17.194 +
  17.195 +        /* loop for multiple images would start here. */
  17.196 +        
  17.197 +        fd = fileno(img);
  17.198 +        if (fd == -1) {
  17.199 +            printf("Couldn't get image fd in PROBE!\n");
  17.200 +            goto err;
  17.201 +        }
  17.202 +        
  17.203 +        ret = fstat(fd, &stat);
  17.204 +        if (ret != 0) {
  17.205 +            printf("Couldn't stat image in PROBE!\n");
  17.206 +            goto err;
  17.207 +        }
  17.208 +        
  17.209 +        img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
  17.210 +        img_info[0].device   = images[dom]->vdevice;
  17.211 +        img_info[0].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
  17.212 +        img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT);
  17.213 +        
  17.214 +        if (img_info[0].capacity == 0)
  17.215 +            img_info[0].capacity = ((u64)1 << 63); // xend does this too.
  17.216 +        
  17.217 +        DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device,
  17.218 +                img_info[0].capacity);
  17.219 +        
  17.220 +        rsp = (blkif_response_t *)req;
  17.221 +        rsp->id = req->id;
  17.222 +        rsp->operation = BLKIF_OP_PROBE;
  17.223 +        rsp->status = 1; /* number of disks */
  17.224 +        
  17.225 +        return  BLKTAP_RESPOND;
  17.226 +    }    
  17.227 +    case BLKIF_OP_WRITE:
  17.228 +    {
  17.229 +        unsigned long size;
  17.230 +        
  17.231 +        for (i = 0; i < req->nr_segments; i++) {
  17.232 +            
  17.233 +            sector = req->sector_number + (8*i);
  17.234 +            
  17.235 +            size = blkif_last_sect (req->frame_and_sects[i]) -
  17.236 +                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  17.237 +            
  17.238 +            ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
  17.239 +            if (ret != 0) {
  17.240 +                printf("fseek error on WRITE\n");
  17.241 +                goto err;
  17.242 +            }
  17.243 +            
  17.244 +            DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  17.245 +                    req->sector_number, sector, 
  17.246 +                    blkif_first_sect(req->frame_and_sects[i]),
  17.247 +                    blkif_last_sect (req->frame_and_sects[i]),
  17.248 +                    (long)(sector << SECTOR_SHIFT));
  17.249 +                        
  17.250 +            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  17.251 +            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  17.252 +            ret = fwrite(spage, size << SECTOR_SHIFT, 1, img);
  17.253 +            if (ret != 1) {
  17.254 +                printf("fwrite error on WRITE (%d)\n", errno);
  17.255 +                goto err;
  17.256 +            }
  17.257 +        }
  17.258 +        
  17.259 +        rsp = (blkif_response_t *)req;
  17.260 +        rsp->id = req->id;
  17.261 +        rsp->operation = BLKIF_OP_WRITE;
  17.262 +        rsp->status = BLKIF_RSP_OKAY;
  17.263 +        
  17.264 +        return BLKTAP_RESPOND;
  17.265 +    }
  17.266 +    case BLKIF_OP_READ:
  17.267 +    {
  17.268 +        unsigned long size;
  17.269 +        
  17.270 +        for (i = 0; i < req->nr_segments; i++) {
  17.271 +            
  17.272 +            sector  = req->sector_number + (8*i);
  17.273 +            
  17.274 +            size = blkif_last_sect (req->frame_and_sects[i]) -
  17.275 +                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  17.276 +            
  17.277 +            ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET);
  17.278 +            if (ret != 0) {
  17.279 +                printf("fseek error on READ\n");
  17.280 +                goto err;
  17.281 +            }
  17.282 +        
  17.283 +            DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", 
  17.284 +                    req->sector_number, sector, 
  17.285 +                    blkif_first_sect(req->frame_and_sects[i]),
  17.286 +                    blkif_last_sect (req->frame_and_sects[i]),
  17.287 +                    (long)(sector << SECTOR_SHIFT));
  17.288 +            
  17.289 +            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  17.290 +            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  17.291 +            ret = fread(dpage, size << SECTOR_SHIFT, 1, img);
  17.292 +            if (ret != 1) {
  17.293 +                printf("fread error on READ\n");
  17.294 +                goto err;
  17.295 +            }
  17.296 +        }
  17.297 +
  17.298 +        rsp = (blkif_response_t *)req;
  17.299 +        rsp->id = req->id;
  17.300 +        rsp->operation = BLKIF_OP_READ;
  17.301 +        rsp->status = BLKIF_RSP_OKAY;
  17.302 +        return BLKTAP_RESPOND;
  17.303 +    }
  17.304 +    }
  17.305 +    
  17.306 +    printf("Unknow block operation!\n");
  17.307 +err:
  17.308 +    rsp = (blkif_response_t *)req;
  17.309 +    rsp->id = req->id;
  17.310 +    rsp->operation = req->operation;
  17.311 +    rsp->status = BLKIF_RSP_ERROR;
  17.312 +    return BLKTAP_RESPOND;  
  17.313 +}
  17.314 +
  17.315 +/* the image library terminates the request stream. _resp is a noop. */
  17.316 +int image_response(blkif_response_t *rsp)
  17.317 +{   
  17.318 +    return BLKTAP_PASS;
  17.319 +}
  17.320 +
  17.321 +void image_init(void)
  17.322 +{
  17.323 +    int i;
  17.324 +    
  17.325 +    for (i = 0; i < MAX_DOMS; i++)
  17.326 +        images[i] = NULL;
  17.327 +}
  17.328 +
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/tools/blktap/blkimglib.h	Tue Feb 08 18:21:54 2005 +0000
    18.3 @@ -0,0 +1,16 @@
    18.4 +/* blkimglib.h
    18.5 + *
    18.6 + * file image-backed block device.
    18.7 + * 
    18.8 + * (c) 2004 Andrew Warfield.
    18.9 + *
   18.10 + * Xend has been modified to use an amorfs:[fsid] disk tag.
   18.11 + * This will show up as device type (maj:240,min:0) = 61440.
   18.12 + *
   18.13 + * The fsid is placed in the sec_start field of the disk extent.
   18.14 + */
   18.15 +
   18.16 +int image_control(control_msg_t *msg);
   18.17 +int image_request(blkif_request_t *req);
   18.18 +int image_response(blkif_response_t *rsp); /* noop */
   18.19 +void image_init(void);
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/tools/blktap/blkint.h	Tue Feb 08 18:21:54 2005 +0000
    19.3 @@ -0,0 +1,105 @@
    19.4 +/*
    19.5 + * blkint.h
    19.6 + * 
    19.7 + * Interfaces for the Xen block interposition driver.
    19.8 + * 
    19.9 + * (c) 2004, Andrew Warfield, University of Cambridge
   19.10 + * 
   19.11 + */
   19.12 +
   19.13 +#ifndef __BLKINT_H__
   19.14 +
   19.15 +//#include "blkif.h"
   19.16 +
   19.17 +
   19.18 +#if 0
   19.19 +/* Types of ring. */
   19.20 +#define BLKIF_REQ_RING_TYPE 1
   19.21 +#define BLKIF_RSP_RING_TYPE 2
   19.22 +
   19.23 +/* generic ring struct. */
   19.24 +typedef struct blkif_generic_ring_struct {
   19.25 +    int type;
   19.26 +} blkif_generic_ring_t;
   19.27 +
   19.28 +/* A requestor's view of a ring. */
   19.29 +typedef struct blkif_req_ring_struct {
   19.30 +
   19.31 +    int type;                    /* Will be BLKIF_REQ_RING_TYPE        */
   19.32 +    BLKIF_RING_IDX req_prod;     /* PRIVATE req_prod index             */
   19.33 +    BLKIF_RING_IDX rsp_cons;     /* Response consumer index            */
   19.34 +    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
   19.35 +
   19.36 +} blkif_req_ring_t;
   19.37 +
   19.38 +#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 }
   19.39 +
   19.40 +/* A responder's view of a ring. */
   19.41 +typedef struct blkif_rsp_ring_struct {
   19.42 +
   19.43 +    int type;                    /* Will be BLKIF_REQ_RING_TYPE        */
   19.44 +    BLKIF_RING_IDX rsp_prod;     /* PRIVATE rsp_prod index             */
   19.45 +    BLKIF_RING_IDX req_cons;     /* Request consumer index             */
   19.46 +    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
   19.47 +
   19.48 +} blkif_rsp_ring_t;
   19.49 +
   19.50 +#define BLKIF_RSP_RING_INIT { BLKIF_RSP_RING_TYPE, 0, 0, 0 }
   19.51 +
   19.52 +#define RING(a) (blkif_generic_ring_t *)(a)
   19.53 +inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring);
   19.54 +#endif
   19.55 +
   19.56 +/* -------[ interposition -> character device interface ]------------- */
   19.57 +
   19.58 +/* /dev/xen/blktap resides at device number major=10, minor=202        */ 
   19.59 +#define BLKTAP_MINOR 202
   19.60 +
   19.61 +/* size of the extra VMA area to map in attached pages. */
   19.62 +#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
   19.63 +
   19.64 +/* blktap IOCTLs:                                                      */
   19.65 +#define BLKTAP_IOCTL_KICK_FE         1
   19.66 +#define BLKTAP_IOCTL_KICK_BE         2
   19.67 +#define BLKTAP_IOCTL_SETMODE         3
   19.68 +#define BLKTAP_IOCTL_PRINT_IDXS      100   
   19.69 +
   19.70 +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
   19.71 +#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
   19.72 +#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
   19.73 +#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
   19.74 +#define BLKTAP_MODE_COPY_FE          0x00000004
   19.75 +#define BLKTAP_MODE_COPY_BE          0x00000008
   19.76 +#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
   19.77 +#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
   19.78 +
   19.79 +#define BLKTAP_MODE_INTERPOSE \
   19.80 +           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
   19.81 +
   19.82 +#define BLKTAP_MODE_COPY_BOTH \
   19.83 +           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
   19.84 +
   19.85 +#define BLKTAP_MODE_COPY_BOTH_PAGES \
   19.86 +           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
   19.87 +
   19.88 +static inline int BLKTAP_MODE_VALID(unsigned long arg)
   19.89 +{
   19.90 +    return (
   19.91 +        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
   19.92 +        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
   19.93 +        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
   19.94 +        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
   19.95 +        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
   19.96 +        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
   19.97 +        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
   19.98 +        );
   19.99 +}
  19.100 +
  19.101 +
  19.102 +
  19.103 +
  19.104 +
  19.105 +
  19.106 +
  19.107 +#define __BLKINT_H__
  19.108 +#endif
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/tools/blktap/blktaplib.c	Tue Feb 08 18:21:54 2005 +0000
    20.3 @@ -0,0 +1,542 @@
    20.4 +/*
    20.5 + * blktaplib.c
    20.6 + * 
    20.7 + * userspace interface routines for the blktap driver.
    20.8 + *
    20.9 + * (c) 2004 Andrew Warfield.
   20.10 + */
   20.11 +
   20.12 +#include <stdio.h>
   20.13 +#include <stdlib.h>
   20.14 +#include <sys/mman.h>
   20.15 +#include <sys/user.h>
   20.16 +#include <err.h>
   20.17 +#include <errno.h>
   20.18 +#include <sys/types.h>
   20.19 +#include <linux/types.h>
   20.20 +#include <sys/stat.h>
   20.21 +#include <fcntl.h>
   20.22 +#include <signal.h>
   20.23 +#include <sys/poll.h>
   20.24 +#include <sys/ioctl.h>
   20.25 +#include <string.h>
   20.26 +#include <unistd.h>
   20.27 +                                                                     
   20.28 +
   20.29 +#define __COMPILING_BLKTAP_LIB
   20.30 +#include "blktaplib.h"
   20.31 +
   20.32 +#if 1
   20.33 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   20.34 +#else
   20.35 +#define DPRINTF(_f, _a...) ((void)0)
   20.36 +#endif
   20.37 +#define DEBUG_RING_IDXS 1
   20.38 +
   20.39 +#define POLLRDNORM     0x040 
   20.40 +
   20.41 +#define BLKTAP_IOCTL_KICK 1
   20.42 +
   20.43 +// this is in the header now
   20.44 +//DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t);
   20.45 +
   20.46 +void got_sig_bus();
   20.47 +void got_sig_int();
   20.48 +
   20.49 +
   20.50 +/* in kernel these are opposite, but we are a consumer now. */
   20.51 +blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
   20.52 +blkif_front_ring_t be_ring; 
   20.53 +ctrl_back_ring_t   ctrl_ring;
   20.54 +
   20.55 +
   20.56 +
   20.57 +unsigned long mmap_vstart = 0;
   20.58 +char *blktap_mem;
   20.59 +int fd = 0;
   20.60 +
   20.61 +#define BLKTAP_RING_PAGES       3 /* Ctrl, Back, Front */
   20.62 +/*#define BLKTAP_MMAP_PAGES       ((11 + 1) * 64)*/
   20.63 +#define BLKTAP_MMAP_PAGES \
   20.64 +    ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE)
   20.65 +#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES)
   20.66 +
   20.67 +
   20.68 +    
   20.69 +int bad_count = 0;
   20.70 +void bad(void)
   20.71 +{
   20.72 +    bad_count ++;
   20.73 +    if (bad_count > 50) exit(0);
   20.74 +}
   20.75 +/*-----[ ID Manipulation from tap driver code ]--------------------------*/
   20.76 +
   20.77 +#define ACTIVE_RING_IDX unsigned short
   20.78 +
   20.79 +inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
   20.80 +{
   20.81 +    return ( (fe_dom << 16) | idx );
   20.82 +}
   20.83 +
   20.84 +inline unsigned int ID_TO_IDX(unsigned long id) 
   20.85 +{ 
   20.86 +        return ( id & 0x0000ffff );
   20.87 +}
   20.88 +
   20.89 +inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
   20.90 +/*
   20.91 +static int (*request_hook)(blkif_request_t *req) = NULL;
   20.92 +static int (*response_hook)(blkif_response_t *req) = NULL;
   20.93 +*/
   20.94 +
   20.95 +/*-----[ Request/Response hook chains.]----------------------------------*/
   20.96 +
   20.97 +#define HOOK_NAME_MAX 50
   20.98 +        
   20.99 +typedef struct ctrl_hook_st {
  20.100 +    char name[HOOK_NAME_MAX];
  20.101 +    int (*func)(control_msg_t *);
  20.102 +    struct ctrl_hook_st *next;
  20.103 +} ctrl_hook_t;
  20.104 +        
  20.105 +typedef struct request_hook_st {
  20.106 +    char name[HOOK_NAME_MAX];
  20.107 +    int (*func)(blkif_request_t *);
  20.108 +    struct request_hook_st *next;
  20.109 +} request_hook_t;
  20.110 +
  20.111 +typedef struct response_hook_st {
  20.112 +    char name[HOOK_NAME_MAX];
  20.113 +    int (*func)(blkif_response_t *);
  20.114 +    struct response_hook_st *next;
  20.115 +} response_hook_t;
  20.116 +
  20.117 +static ctrl_hook_t *ctrl_hook_chain = NULL;
  20.118 +static request_hook_t *request_hook_chain = NULL;
  20.119 +static response_hook_t *response_hook_chain = NULL;
  20.120 +
  20.121 +void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)) 
  20.122 +{
  20.123 +    ctrl_hook_t *ch_ent, **c;
  20.124 +    
  20.125 +    ch_ent = (ctrl_hook_t *)malloc(sizeof(ctrl_hook_t));
  20.126 +    if (!ch_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
  20.127 +    
  20.128 +    ch_ent->func  = ch;
  20.129 +    ch_ent->next = NULL;
  20.130 +    strncpy(ch_ent->name, name, HOOK_NAME_MAX);
  20.131 +    ch_ent->name[HOOK_NAME_MAX-1] = '\0';
  20.132 +    
  20.133 +    c = &ctrl_hook_chain;
  20.134 +    while (*c != NULL) {
  20.135 +        c = &(*c)->next;
  20.136 +    }
  20.137 +    *c = ch_ent;
  20.138 +}
  20.139 +
  20.140 +void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)) 
  20.141 +{
  20.142 +    request_hook_t *rh_ent, **c;
  20.143 +    
  20.144 +    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
  20.145 +    if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
  20.146 +    
  20.147 +    rh_ent->func  = rh;
  20.148 +    rh_ent->next = NULL;
  20.149 +    strncpy(rh_ent->name, name, HOOK_NAME_MAX);
  20.150 +    
  20.151 +    c = &request_hook_chain;
  20.152 +    while (*c != NULL) {
  20.153 +        c = &(*c)->next;
  20.154 +    }
  20.155 +    *c = rh_ent;
  20.156 +}
  20.157 +
  20.158 +void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)) 
  20.159 +{
  20.160 +    response_hook_t *rh_ent, **c;
  20.161 +    
  20.162 +    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
  20.163 +    if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
  20.164 +    
  20.165 +    rh_ent->func  = rh;
  20.166 +    rh_ent->next = NULL;
  20.167 +    strncpy(rh_ent->name, name, HOOK_NAME_MAX);
  20.168 +    
  20.169 +    c = &response_hook_chain;
  20.170 +    while (*c != NULL) {
  20.171 +        c = &(*c)->next;
  20.172 +    }
  20.173 +    *c = rh_ent;
  20.174 +}
  20.175 +
  20.176 +void print_hooks(void)
  20.177 +{
  20.178 +    request_hook_t  *req_hook;
  20.179 +    response_hook_t *rsp_hook;
  20.180 +    ctrl_hook_t     *ctrl_hook;
  20.181 +    
  20.182 +    printf("Control Hooks:\n");
  20.183 +    ctrl_hook = ctrl_hook_chain;
  20.184 +    while (ctrl_hook != NULL)
  20.185 +    {
  20.186 +        printf("  [0x%p] %s\n", ctrl_hook->func, ctrl_hook->name);
  20.187 +        ctrl_hook = ctrl_hook->next;
  20.188 +    }
  20.189 +    
  20.190 +    printf("Request Hooks:\n");
  20.191 +    req_hook = request_hook_chain;
  20.192 +    while (req_hook != NULL)
  20.193 +    {
  20.194 +        printf("  [0x%p] %s\n", req_hook->func, req_hook->name);
  20.195 +        req_hook = req_hook->next;
  20.196 +    }
  20.197 +    
  20.198 +    printf("Response Hooks:\n");
  20.199 +    rsp_hook = response_hook_chain;
  20.200 +    while (rsp_hook != NULL)
  20.201 +    {
  20.202 +        printf("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
  20.203 +        rsp_hook = rsp_hook->next;
  20.204 +    }
  20.205 +}
  20.206 +        
  20.207 +/*-----[ Data to/from Backend (server) VM ]------------------------------*/
  20.208 +
  20.209 +inline int write_req_to_be_ring(blkif_request_t *req)
  20.210 +{
  20.211 +    blkif_request_t *req_d;
  20.212 +
  20.213 +    //req_d = FRONT_RING_NEXT_EMPTY_REQUEST(&be_ring);
  20.214 +    req_d = RING_GET_REQUEST(BLKIF_RING, &be_ring, be_ring.req_prod_pvt);
  20.215 +    memcpy(req_d, req, sizeof(blkif_request_t));
  20.216 +    wmb();
  20.217 +    be_ring.req_prod_pvt++;
  20.218 +    
  20.219 +    return 0;
  20.220 +}
  20.221 +
  20.222 +inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
  20.223 +{
  20.224 +    blkif_response_t *rsp_d;
  20.225 +
  20.226 +    //rsp_d = BACK_RING_NEXT_EMPTY_RESPONSE(&fe_ring);
  20.227 +    rsp_d = RING_GET_RESPONSE(BLKIF_RING, &fe_ring, fe_ring.rsp_prod_pvt);
  20.228 +    memcpy(rsp_d, rsp, sizeof(blkif_response_t));
  20.229 +    wmb();
  20.230 +    fe_ring.rsp_prod_pvt++;
  20.231 +
  20.232 +    return 0;
  20.233 +}
  20.234 +
  20.235 +static void apply_rsp_hooks(blkif_response_t *rsp)
  20.236 +{
  20.237 +    response_hook_t  *rsp_hook;
  20.238 +    
  20.239 +    rsp_hook = response_hook_chain;
  20.240 +    while (rsp_hook != NULL)
  20.241 +    {
  20.242 +        switch(rsp_hook->func(rsp))
  20.243 +        {
  20.244 +        case BLKTAP_PASS:
  20.245 +            break;
  20.246 +        default:
  20.247 +            printf("Only PASS is supported for resp hooks!\n");
  20.248 +        }
  20.249 +        rsp_hook = rsp_hook->next;
  20.250 +    }
  20.251 +}
  20.252 +
  20.253 +void blktap_inject_response(blkif_response_t *rsp)
  20.254 +{
  20.255 +    apply_rsp_hooks(rsp);
  20.256 +    write_rsp_to_fe_ring(rsp);
  20.257 +    RING_PUSH_RESPONSES(BLKIF_RING, &fe_ring);
  20.258 +    ioctl(fd, BLKTAP_IOCTL_KICK_FE);
  20.259 +}
  20.260 +
  20.261 +/*-----[ Polling fd listeners ]------------------------------------------*/
  20.262 +
  20.263 +#define MAX_POLLFDS 64
  20.264 +
  20.265 +typedef struct {
  20.266 +    int (*func)(int fd);
  20.267 +    struct pollfd *pfd;
  20.268 +    int fd;
  20.269 +    short events;
  20.270 +    int active;
  20.271 +} pollhook_t;
  20.272 +
  20.273 +static struct pollfd  pfd[MAX_POLLFDS+1];
  20.274 +static pollhook_t     pollhooks[MAX_POLLFDS];
  20.275 +static unsigned int   ph_freelist[MAX_POLLFDS];
  20.276 +static unsigned int   ph_cons, ph_prod;
  20.277 +#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
  20.278 +#define PH_IDX(x) (x % MAX_POLLFDS)
  20.279 +
  20.280 +int blktap_attach_poll(int fd, short events, int (*func)(int fd))
  20.281 +{
  20.282 +    pollhook_t *ph;
  20.283 +    
  20.284 +    if (nr_pollhooks() == MAX_POLLFDS) {
  20.285 +        printf("Too many pollhooks!\n");
  20.286 +        return -1;
  20.287 +    }
  20.288 +    
  20.289 +    ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
  20.290 +    
  20.291 +    ph->func        = func;
  20.292 +    ph->fd          = fd;
  20.293 +    ph->events      = events;
  20.294 +    ph->active      = 1;
  20.295 +    
  20.296 +    printf("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, 
  20.297 +            nr_pollhooks());
  20.298 +    
  20.299 +    return 0;
  20.300 +}
  20.301 +
  20.302 +void blktap_detach_poll(int fd)
  20.303 +{
  20.304 +    int i;
  20.305 +    
  20.306 +    for (i=0; i<MAX_POLLFDS; i++)
  20.307 +        if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
  20.308 +            ph_freelist[PH_IDX(ph_prod++)] = i;
  20.309 +            pollhooks[i].pfd->fd = -1;
  20.310 +            pollhooks[i].active = 0;
  20.311 +            break;
  20.312 +        }
  20.313 +        
  20.314 +    printf("Removed fd %d at ph index %d, now %d phs.\n", fd, i, 
  20.315 +            nr_pollhooks());
  20.316 +}
  20.317 +
  20.318 +void pollhook_init(void)
  20.319 +{
  20.320 +    int i;
  20.321 +    
  20.322 +    for (i=0; i < MAX_POLLFDS; i++) {
  20.323 +        ph_freelist[i] = (i+1) % MAX_POLLFDS;
  20.324 +        pollhooks[i].active = 0;
  20.325 +    }
  20.326 +    
  20.327 +    ph_cons = 0;
  20.328 +    ph_prod = MAX_POLLFDS;
  20.329 +}
  20.330 +
  20.331 +void __attribute__ ((constructor)) blktaplib_init(void)
  20.332 +{
  20.333 +    printf("[[ C O N S T R U C T O R ]]\n");
  20.334 +    pollhook_init();
  20.335 +}
  20.336 +
  20.337 +/*-----[ The main listen loop ]------------------------------------------*/
  20.338 +
  20.339 +int blktap_listen(void)
  20.340 +{
  20.341 +    int               notify_be, notify_fe, tap_pfd;
  20.342 +    
  20.343 +    /* comms rings: */
  20.344 +    blkif_request_t  *req;
  20.345 +    blkif_response_t *rsp;
  20.346 +    control_msg_t    *msg;
  20.347 +    blkif_sring_t    *sring;
  20.348 +    ctrl_sring_t     *csring;
  20.349 +    RING_IDX          rp, i, pfd_count; 
  20.350 +    
  20.351 +    /* handler hooks: */
  20.352 +    request_hook_t   *req_hook;
  20.353 +    response_hook_t  *rsp_hook;
  20.354 +    ctrl_hook_t      *ctrl_hook;
  20.355 +    
  20.356 +    signal (SIGBUS, got_sig_bus);
  20.357 +    signal (SIGINT, got_sig_int);
  20.358 +    
  20.359 +    print_hooks();
  20.360 +    
  20.361 +    fd = open("/dev/blktap", O_RDWR);
  20.362 +    if (fd == -1) {
  20.363 +        printf("open failed! (%d)\n", errno);
  20.364 +        goto open_failed;
  20.365 +    }
  20.366 +
  20.367 +    blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
  20.368 +             PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  20.369 +
  20.370 +    if ((int)blktap_mem == -1) {
  20.371 +        printf("mmap failed! (%d)\n", errno);
  20.372 +        goto mmap_failed;
  20.373 +    }
  20.374 +
  20.375 +    /* assign the rings to the mapped memory */
  20.376 +    csring = (ctrl_sring_t *)blktap_mem;
  20.377 +    BACK_RING_INIT(CTRL_RING, &ctrl_ring, csring);
  20.378 +    
  20.379 +    sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
  20.380 +    FRONT_RING_INIT(BLKIF_RING, &be_ring, sring);
  20.381 +    
  20.382 +    sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE));
  20.383 +    BACK_RING_INIT(BLKIF_RING, &fe_ring, sring);
  20.384 +
  20.385 +    mmap_vstart = (unsigned long)blktap_mem + (BLKTAP_RING_PAGES << PAGE_SHIFT);
  20.386 +    
  20.387 +    printf("fe_ring mapped at: %p\n", fe_ring.sring);
  20.388 +    printf("be_ring mapped at: %p\n", be_ring.sring);
  20.389 +
  20.390 +    ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
  20.391 +
  20.392 +    while(1) {
  20.393 +        int ret;
  20.394 +        
  20.395 +        /* build the poll list */
  20.396 +        
  20.397 +        DPRINTF("Building poll list.\n");
  20.398 +        
  20.399 +        pfd_count = 0;
  20.400 +        for ( i=0; i < MAX_POLLFDS; i++ ) {
  20.401 +            pollhook_t *ph = &pollhooks[i];
  20.402 +            
  20.403 +            if (ph->active) {
  20.404 +                pfd[pfd_count].fd     = ph->fd;
  20.405 +                pfd[pfd_count].events = ph->events;
  20.406 +                ph->pfd               = &pfd[pfd_count];
  20.407 +                pfd_count++;
  20.408 +            }
  20.409 +        }
  20.410 +
  20.411 +        tap_pfd = pfd_count;
  20.412 +        pfd[tap_pfd].fd = fd;
  20.413 +        pfd[tap_pfd].events = POLLIN;
  20.414 +
  20.415 +        DPRINTF("poll() %d fds.\n", pfd_count);
  20.416 +        
  20.417 +        if ( (ret = (poll(pfd, pfd_count+1, 10000)) == 0) ) {
  20.418 +            if (DEBUG_RING_IDXS)
  20.419 +                ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
  20.420 +            continue;
  20.421 +        }
  20.422 +
  20.423 +        DPRINTF("poll returned %d\n", ret);
  20.424 +
  20.425 +        for (i=0; i < MAX_POLLFDS; i++) {
  20.426 +            if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
  20.427 +                pollhooks[i].func(pollhooks[i].pfd->fd);
  20.428 +        }
  20.429 +        
  20.430 +        if (pfd[tap_pfd].revents) {
  20.431 +            
  20.432 +            /* empty the control ring */
  20.433 +            rp = ctrl_ring.sring->req_prod;
  20.434 +            rmb();
  20.435 +            for (i = ctrl_ring.req_cons; i < rp; i++)
  20.436 +            {
  20.437 +                msg = RING_GET_REQUEST(CTRL_RING, &ctrl_ring, i);
  20.438 +
  20.439 +                ctrl_hook = ctrl_hook_chain;
  20.440 +                while (ctrl_hook != NULL)
  20.441 +                {
  20.442 +                    DPRINTF("CTRL_HOOK: %s\n", ctrl_hook->name);
  20.443 +                    /* We currently don't respond to ctrl messages. */
  20.444 +                    ctrl_hook->func(msg);
  20.445 +                    ctrl_hook = ctrl_hook->next;
  20.446 +                }
  20.447 +            }
  20.448 +            /* Using this as a unidirectional ring. */
  20.449 +            ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i;
  20.450 +            RING_PUSH_RESPONSES(CTRL_RING, &ctrl_ring);
  20.451 +            
  20.452 +            /* empty the fe_ring */
  20.453 +            notify_fe = 0;
  20.454 +            notify_be = RING_HAS_UNCONSUMED_REQUESTS(BLKIF_RING, &fe_ring);
  20.455 +            rp = fe_ring.sring->req_prod;
  20.456 +            rmb();
  20.457 +            for (i = fe_ring.req_cons; i != rp; i++)
  20.458 +            {
  20.459 +                int done = 0; /* stop forwarding this request */
  20.460 +
  20.461 +                req = RING_GET_REQUEST(BLKIF_RING, &fe_ring, i);
  20.462 +
  20.463 +                DPRINTF("copying an fe request\n");
  20.464 +
  20.465 +                req_hook = request_hook_chain;
  20.466 +                while (req_hook != NULL)
  20.467 +                {
  20.468 +                    DPRINTF("REQ_HOOK: %s\n", req_hook->name);
  20.469 +                    switch(req_hook->func(req))
  20.470 +                    {
  20.471 +                    case BLKTAP_RESPOND:
  20.472 +                        apply_rsp_hooks((blkif_response_t *)req);
  20.473 +                        write_rsp_to_fe_ring((blkif_response_t *)req);
  20.474 +                        notify_fe = 1;
  20.475 +                        done = 1;
  20.476 +                        break;
  20.477 +                    case BLKTAP_STOLEN:
  20.478 +                        done = 1;
  20.479 +                        break;
  20.480 +                    case BLKTAP_PASS:
  20.481 +                        break;
  20.482 +                    default:
  20.483 +                        printf("Unknown request hook return value!\n");
  20.484 +                    }
  20.485 +                    if (done) break;
  20.486 +                    req_hook = req_hook->next;
  20.487 +                }
  20.488 +
  20.489 +                if (done == 0) write_req_to_be_ring(req);
  20.490 +
  20.491 +            }
  20.492 +            fe_ring.req_cons = i;
  20.493 +
  20.494 +            /* empty the be_ring */
  20.495 +            notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(BLKIF_RING, &be_ring);
  20.496 +            rp = be_ring.sring->rsp_prod;
  20.497 +            rmb();
  20.498 +            for (i = be_ring.rsp_cons; i != rp; i++)
  20.499 +            {
  20.500 +
  20.501 +                rsp = RING_GET_RESPONSE(BLKIF_RING, &be_ring, i);
  20.502 +
  20.503 +                DPRINTF("copying a be request\n");
  20.504 +
  20.505 +                apply_rsp_hooks(rsp);
  20.506 +                write_rsp_to_fe_ring(rsp);
  20.507 +            }
  20.508 +            be_ring.rsp_cons = i;
  20.509 +
  20.510 +            /* notify the domains */
  20.511 +
  20.512 +            if (notify_be) {
  20.513 +                DPRINTF("notifying be\n");
  20.514 +                RING_PUSH_REQUESTS(BLKIF_RING, &be_ring);
  20.515 +                ioctl(fd, BLKTAP_IOCTL_KICK_BE);
  20.516 +            }
  20.517 +
  20.518 +            if (notify_fe) {
  20.519 +                DPRINTF("notifying fe\n");
  20.520 +                RING_PUSH_RESPONSES(BLKIF_RING, &fe_ring);
  20.521 +                ioctl(fd, BLKTAP_IOCTL_KICK_FE);
  20.522 +            }
  20.523 +        }        
  20.524 +    }
  20.525 +
  20.526 +
  20.527 +    munmap(blktap_mem, PAGE_SIZE);
  20.528 +
  20.529 + mmap_failed:
  20.530 +    close(fd);
  20.531 +
  20.532 + open_failed:
  20.533 +    return 0;
  20.534 +}
  20.535 +
  20.536 +void got_sig_bus() {
  20.537 +    printf("Attempted to access a page that isn't.\n");
  20.538 +    exit(-1);
  20.539 +}
  20.540 +
  20.541 +void got_sig_int() {
  20.542 +    printf("quitting -- returning to passthrough mode.\n");
  20.543 +    if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
  20.544 +    exit(0);
  20.545 +} 
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/tools/blktap/blktaplib.h	Tue Feb 08 18:21:54 2005 +0000
    21.3 @@ -0,0 +1,76 @@
    21.4 +/* blktaplib.h
    21.5 + *
    21.6 + * userland accessors to the block tap.
    21.7 + *
    21.8 + * for the moment this is rather simple.
    21.9 + */
   21.10 + 
   21.11 +#ifndef __BLKTAPLIB_H__
   21.12 +#define __BLKTAPLIB_H__
   21.13 +
   21.14 +#include <stdint.h>
   21.15 +
   21.16 +typedef uint8_t            u8;
   21.17 +typedef uint16_t           u16;
   21.18 +typedef uint32_t           u32;
   21.19 +typedef uint64_t           u64;
   21.20 +typedef int8_t             s8;
   21.21 +typedef int16_t            s16;
   21.22 +typedef int32_t            s32;
   21.23 +typedef int64_t            s64;
   21.24 +                                                                                
   21.25 +#if defined(__i386__)
   21.26 +#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
   21.27 +#define wmb() __asm__ __volatile__ ( "" : : : "memory" )
   21.28 +#else
   21.29 +#error "Define barriers"
   21.30 +#endif
   21.31 +    
   21.32 +#include <sys/user.h>
   21.33 +#include <xen/xen.h>
   21.34 +#include <xen/io/blkif.h>
   21.35 +#include <xen/io/ring.h>
   21.36 +#include <xen/io/domain_controller.h>
   21.37 +#include "blkint.h"
   21.38 +
   21.39 +#define BLKTAP_PASS     0 /* Keep passing this request as normal. */
   21.40 +#define BLKTAP_RESPOND  1 /* Request is now a reply.  Return it.  */
   21.41 +#define BLKTAP_STOLEN   2 /* Hook has stolen request.             */
   21.42 +
   21.43 +#define domid_t unsigned short
   21.44 +
   21.45 +inline unsigned int ID_TO_IDX(unsigned long id);
   21.46 +inline domid_t ID_TO_DOM(unsigned long id);
   21.47 +
   21.48 +void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
   21.49 +void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
   21.50 +void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
   21.51 +void blktap_inject_response(blkif_response_t *);
   21.52 +int  blktap_attach_poll(int fd, short events, int (*func)(int));
   21.53 +void blktap_detach_poll(int fd);
   21.54 +int  blktap_listen(void);
   21.55 +
   21.56 +/*-----[ Accessing attached data page mappings ]-------------------------*/
   21.57 +#define MMAP_PAGES_PER_REQUEST \
   21.58 +    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
   21.59 +#define MMAP_VADDR(_req,_seg)                        \
   21.60 +    (mmap_vstart +                                   \
   21.61 +     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
   21.62 +     ((_seg) * PAGE_SIZE))
   21.63 +
   21.64 +extern unsigned long mmap_vstart;
   21.65 +
   21.66 +
   21.67 +/*-----[ Defines that are only used by library clients ]-----------------*/
   21.68 +
   21.69 +#ifndef __COMPILING_BLKTAP_LIB
   21.70 +
   21.71 +static char *blkif_op_name[] = {
   21.72 +    [BLKIF_OP_READ]       = "READ",
   21.73 +    [BLKIF_OP_WRITE]      = "WRITE",
   21.74 +    [BLKIF_OP_PROBE]      = "PROBE",
   21.75 +};
   21.76 +
   21.77 +#endif /* __COMPILING_BLKTAP_LIB */
   21.78 +    
   21.79 +#endif /* __BLKTAPLIB_H__ */
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/tools/blktap/libgnbd/Makefile	Tue Feb 08 18:21:54 2005 +0000
    22.3 @@ -0,0 +1,8 @@
    22.4 +
    22.5 +CFLAGS += -Wall -Werror -g
    22.6 +LDFLAGS += -g
    22.7 +
    22.8 +libgnbd.a: libgnbd.o
    22.9 +	$(AR) r $@ $<
   22.10 +
   22.11 +gnbdtest: gnbdtest.o libgnbd.a
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/tools/blktap/libgnbd/gnbdtest.c	Tue Feb 08 18:21:54 2005 +0000
    23.3 @@ -0,0 +1,90 @@
    23.4 +
    23.5 +#include <err.h>
    23.6 +#include <stdint.h>
    23.7 +#include <stdio.h>
    23.8 +#include <stdlib.h>
    23.9 +#include <string.h>
   23.10 +#include <unistd.h>
   23.11 +
   23.12 +#include <sys/poll.h>
   23.13 +
   23.14 +#include "libgnbd.h"
   23.15 +
   23.16 +#define PRINTF(x) printf x
   23.17 +#if 0
   23.18 +#define DFPRINTF(x...) fprintf(stderr, ##x)
   23.19 +#define DPRINTF(x) DFPRINTF x
   23.20 +#else
   23.21 +#define DPRINTF(x)
   23.22 +#endif
   23.23 +
   23.24 +static unsigned char buf1[8 << 9];
   23.25 +static unsigned char buf2[8 << 9];
   23.26 +static unsigned char buf3[8 << 9];
   23.27 +
   23.28 +int
   23.29 +main(int argc, char **argv)
   23.30 +{
   23.31 +	struct gnbd_handle *gh;
   23.32 +	struct pollfd pfd[1];
   23.33 +	int err, tout;
   23.34 +
   23.35 +	gh = gnbd_setup("panik", 0x38e7, "cl349-nahant-beta2-root1",
   23.36 +	    "arcadians.cl.cam.ac.uk");
   23.37 +	if (gh == NULL)
   23.38 +		errx(1, "gnbd_setup");
   23.39 +
   23.40 +	memset(pfd, 0, sizeof(pfd));
   23.41 +	pfd[0].fd = gnbd_fd(gh);
   23.42 +	pfd[0].events = POLLIN;
   23.43 +
   23.44 +	while ((tout = poll(pfd, 1, 0)) >= 0) {
   23.45 +		if (tout == 0)
   23.46 +			continue;
   23.47 +		DPRINTF(("event\n"));
   23.48 +		if (pfd[0].revents) {
   23.49 +			err = gnbd_reply(gh);
   23.50 +			pfd[0].events = POLLIN;
   23.51 +			switch (err) {
   23.52 +			case GNBD_LOGIN_DONE:
   23.53 +				DPRINTF(("sectors: %08llu\n",
   23.54 +					    gnbd_sectors(gh)));
   23.55 +				err = gnbd_read(gh, 8, 8, buf2, 1);
   23.56 +				if (err)
   23.57 +					warnx("gnbd_read");
   23.58 +				err = gnbd_read(gh, 0, 8, buf1, 0);
   23.59 +				if (err)
   23.60 +					warnx("gnbd_read");
   23.61 +				err = gnbd_read(gh, 16, 8, buf3, 2);
   23.62 +				if (err)
   23.63 +					warnx("gnbd_read");
   23.64 +				break;
   23.65 +			case GNBD_REQUEST_DONE:
   23.66 +				DPRINTF(("request done %ld\n",
   23.67 +					    gnbd_finished_request(gh)));
   23.68 +				if (0 && gnbd_finished_request(gh) == 0) {
   23.69 +					write(1, buf1, 8 << 9);
   23.70 +					err = gnbd_write(gh, 0, 8, buf1, 10);
   23.71 +					if (err)
   23.72 +						warnx("gnbd_write");
   23.73 +				}
   23.74 +				break;
   23.75 +			case GNBD_CONTINUE:
   23.76 +				DPRINTF(("continue\n"));
   23.77 +				break;
   23.78 +			case 0:
   23.79 +				break;
   23.80 +			case GNBD_CONTINUE_WRITE:
   23.81 +				DPRINTF(("continue write\n"));
   23.82 +				pfd[0].events |= POLLOUT;
   23.83 +				break;
   23.84 +			default:
   23.85 +				warnx("gnbd_reply error");
   23.86 +				break;
   23.87 +			}
   23.88 +			DPRINTF(("got gnbd reply\n"));
   23.89 +		}
   23.90 +	}
   23.91 +
   23.92 +	return 0;
   23.93 +}
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/tools/blktap/libgnbd/libgnbd.c	Tue Feb 08 18:21:54 2005 +0000
    24.3 @@ -0,0 +1,647 @@
    24.4 +/* libgnbd.c
    24.5 + * 
    24.6 + * gnbd client library
    24.7 + *
    24.8 + * Copyright (c) 2005, Christian Limpach
    24.9 + */
   24.10 +  
   24.11 +#include <byteswap.h>
   24.12 +#include <endian.h>
   24.13 +#include <err.h>
   24.14 +#include <errno.h>
   24.15 +#include <netdb.h>
   24.16 +#include <stdlib.h>
   24.17 +#include <string.h>
   24.18 +#include <unistd.h>
   24.19 +
   24.20 +#include <sys/socket.h>
   24.21 +#include <sys/time.h>
   24.22 +#include <sys/types.h>
   24.23 +
   24.24 +#include <stdio.h>
   24.25 +
   24.26 +#include "libgnbd.h"
   24.27 +
   24.28 +#define	PROTOCOL_VERSION	2
   24.29 +
   24.30 +#define	EXTERN_KILL_GSERV_REQ	5
   24.31 +#define	EXTERN_LOGIN_REQ	6
   24.32 +
   24.33 +#define	GNBD_REQUEST_MAGIC	0x37a07e00
   24.34 +#define	GNBD_KEEP_ALIVE_MAGIC	0x5b46d8c2
   24.35 +#define	GNBD_REPLY_MAGIC	0x41f09370
   24.36 +
   24.37 +enum {
   24.38 +	GNBD_CMD_READ = 0,
   24.39 +	GNBD_CMD_WRITE = 1,
   24.40 +	GNBD_CMD_DISC = 2,
   24.41 +	GNBD_CMD_PING = 3
   24.42 +};
   24.43 +
   24.44 +#if __BYTE_ORDER == __BIG_ENDIAN
   24.45 +#define htonll(x) (x)
   24.46 +#define ntohll(x) (x)
   24.47 +#endif
   24.48 +#if __BYTE_ORDER == __LITTLE_ENDIAN
   24.49 +#define htonll(x) bswap_64(x)
   24.50 +#define ntohll(x) bswap_64(x)
   24.51 +#endif
   24.52 +
   24.53 +#define PRINTF(x) printf x
   24.54 +#if 0
   24.55 +#define DFPRINTF(x...) fprintf(stderr, ##x)
   24.56 +#define DPRINTF(x) DFPRINTF x
   24.57 +#else
   24.58 +#define DPRINTF(x)
   24.59 +#endif
   24.60 +
   24.61 +struct gnbd_request {
   24.62 +	struct gnbd_request	*gr_next;
   24.63 +	unsigned char		*gr_buf;
   24.64 +	ssize_t			gr_size;
   24.65 +	ssize_t			gr_done;
   24.66 +	unsigned long		gr_cookie;
   24.67 +};
   24.68 +
   24.69 +struct gnbd_handle {
   24.70 +	int			gh_fd;
   24.71 +	unsigned int		gh_flags;
   24.72 +	uint64_t		gh_sectors;
   24.73 +	char			gh_devname[32];
   24.74 +	char			gh_nodename[65];
   24.75 +	struct sockaddr_in	gh_sin;
   24.76 +	struct gnbd_request	*gh_outstanding_requests;
   24.77 +	struct gnbd_request	**gh_outstanding_requests_last;
   24.78 +	struct gnbd_request	*gh_incoming_request;
   24.79 +	unsigned long		gh_finished_request;
   24.80 +};
   24.81 +#define	GHF_EXPECT_KILL_GSERV_REPLY	0x0001
   24.82 +#define	GHF_EXPECT_LOGIN_REPLY		0x0002
   24.83 +#define	GHF_INCOMING_REQUEST		0x0004
   24.84 +
   24.85 +struct device_req {
   24.86 +	char		name[32];
   24.87 +};
   24.88 +
   24.89 +struct node_req {
   24.90 +	char		node_name[65];
   24.91 +};
   24.92 +
   24.93 +struct login_req {
   24.94 +        uint64_t	timestamp;
   24.95 +        uint16_t	version;
   24.96 +        uint8_t		pad[6];
   24.97 +        char		devname[32];
   24.98 +};
   24.99 +
  24.100 +struct login_reply {
  24.101 +        uint64_t	sectors;
  24.102 +        uint16_t	version;
  24.103 +        uint8_t		err;
  24.104 +        uint8_t		pad[5];
  24.105 +};
  24.106 +
  24.107 +struct gnbd_server_request {
  24.108 +	uint32_t	magic;
  24.109 +	uint32_t	type;
  24.110 +	char		handle[8];
  24.111 +	uint64_t	from;
  24.112 +	uint32_t	len;
  24.113 +} __attribute__ ((packed));
  24.114 +
  24.115 +struct gnbd_server_reply {
  24.116 +	uint32_t	magic;
  24.117 +	uint32_t	error;
  24.118 +	char		handle[8];
  24.119 +} __attribute__ ((packed));
  24.120 +
  24.121 +static int
  24.122 +read_buf(int fd, void *buf, size_t count, size_t *read_count)
  24.123 +{
  24.124 +	int err;
  24.125 +
  24.126 +	err = read(fd, buf, count);
  24.127 +	if (read_count) {
  24.128 +		if (err >= 0)
  24.129 +			*read_count = err;
  24.130 +	} else if (err != count)
  24.131 +		return EINTR;	/* xxx */
  24.132 +	return err < 0;
  24.133 +}
  24.134 +
  24.135 +static int
  24.136 +read_4(int fd, unsigned long *val)
  24.137 +{
  24.138 +	unsigned long buf;
  24.139 +	int err;
  24.140 +
  24.141 +	err = read_buf(fd, &buf, sizeof(buf), NULL);
  24.142 +	if (err == 0)
  24.143 +		*val = ntohl(buf);
  24.144 +	return err;
  24.145 +}
  24.146 +
  24.147 +static int
  24.148 +write_buf(int fd, void *buf, size_t count)
  24.149 +{
  24.150 +	int err;
  24.151 +
  24.152 +	err = write(fd, buf, count);
  24.153 +	return err < 0;
  24.154 +}
  24.155 +
  24.156 +static int
  24.157 +write_4(int fd, unsigned long val)
  24.158 +{
  24.159 +	unsigned long buf;
  24.160 +	int err;
  24.161 +
  24.162 +	buf = htonl(val);
  24.163 +	err = write_buf(fd, &buf, sizeof(buf));
  24.164 +	return err;
  24.165 +}
  24.166 +
  24.167 +
  24.168 +static int
  24.169 +socket_connect(struct gnbd_handle *gh)
  24.170 +{
  24.171 +	int err;
  24.172 +
  24.173 +	if (gh->gh_fd >= 0)
  24.174 +		return 0;
  24.175 +
  24.176 +	gh->gh_fd = socket(PF_INET, SOCK_STREAM, 0);
  24.177 +	if (gh->gh_fd < 0) {
  24.178 +		warn("socket");
  24.179 +		return gh->gh_fd;
  24.180 +	}
  24.181 +
  24.182 +	err = connect(gh->gh_fd, (struct sockaddr *)&gh->gh_sin,
  24.183 +	    sizeof(gh->gh_sin));
  24.184 +	if (err) {
  24.185 +		warn("connect");
  24.186 +		goto out;
  24.187 +	}
  24.188 +
  24.189 +	return 0;
  24.190 + out:
  24.191 +	close (gh->gh_fd);
  24.192 +	gh->gh_fd = -1;
  24.193 +	return err;
  24.194 +}
  24.195 +
  24.196 +static int
  24.197 +socket_shutdown(struct gnbd_handle *gh)
  24.198 +{
  24.199 +
  24.200 +	close (gh->gh_fd);
  24.201 +	gh->gh_fd = -1;
  24.202 +	return 0;
  24.203 +}
  24.204 +
  24.205 +static int
  24.206 +find_request(struct gnbd_handle *gh, struct gnbd_request *gr)
  24.207 +{
  24.208 +	struct gnbd_request **tmp;
  24.209 +
  24.210 +	for (tmp = &gh->gh_outstanding_requests; *tmp;
  24.211 +	     tmp = &(*tmp)->gr_next) {
  24.212 +		if (*tmp == gr) {
  24.213 +			*tmp = (*tmp)->gr_next;
  24.214 +			if (*tmp == NULL)
  24.215 +				gh->gh_outstanding_requests_last = tmp;
  24.216 +			return 0;
  24.217 +		}
  24.218 +	}
  24.219 +	return ENOENT;
  24.220 +}
  24.221 +
  24.222 +static int
  24.223 +kill_gserv(struct gnbd_handle *gh)
  24.224 +{
  24.225 +	struct device_req dr;
  24.226 +	struct node_req nr;
  24.227 +	int err;
  24.228 +
  24.229 +	DPRINTF(("gnbd_kill_gserv\n"));
  24.230 +	err = socket_connect(gh);
  24.231 +	if (err) {
  24.232 +		warnx("socket_connect");
  24.233 +		return err;
  24.234 +	}
  24.235 +
  24.236 +	err = write_4(gh->gh_fd, EXTERN_KILL_GSERV_REQ);
  24.237 +	if (err) {
  24.238 +		warnx("send EXTERN_LOGIN_REQ failed");
  24.239 +		goto out;
  24.240 +	}
  24.241 +
  24.242 +	strncpy(dr.name, gh->gh_devname, sizeof(dr.name));
  24.243 +	err = write_buf(gh->gh_fd, &dr, sizeof(dr));
  24.244 +	if (err) {
  24.245 +		warnx("send device_req failed");
  24.246 +		goto out;
  24.247 +	}
  24.248 +
  24.249 +	strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
  24.250 +	err = write_buf(gh->gh_fd, &nr, sizeof(nr));
  24.251 +	if (err) {
  24.252 +		warnx("send node_req failed");
  24.253 +		goto out;
  24.254 +	}
  24.255 +
  24.256 +	gh->gh_flags |= GHF_EXPECT_KILL_GSERV_REPLY;
  24.257 +	DPRINTF(("gnbd_kill_gserv ok\n"));
  24.258 +
  24.259 +	return 0;
  24.260 + out:
  24.261 +	socket_shutdown(gh);
  24.262 +	return err;
  24.263 +}
  24.264 +
  24.265 +static int
  24.266 +login(struct gnbd_handle *gh)
  24.267 +{
  24.268 +	struct login_req lr;
  24.269 +	struct node_req nr;
  24.270 +	int err;
  24.271 +	uint64_t timestamp;
  24.272 +	struct timeval tv;
  24.273 +
  24.274 +	DPRINTF(("gnbd_login\n"));
  24.275 +	err = socket_connect(gh);
  24.276 +	if (err) {
  24.277 +		warnx("socket_connect");
  24.278 +		return err;
  24.279 +	}
  24.280 +
  24.281 +	err = write_4(gh->gh_fd, EXTERN_LOGIN_REQ);
  24.282 +	if (err) {
  24.283 +		warnx("send EXTERN_LOGIN_REQ failed");
  24.284 +		goto out;
  24.285 +	}
  24.286 +
  24.287 +	err = gettimeofday(&tv, NULL);
  24.288 +	if (err) {
  24.289 +		warnx("gettimeofday");
  24.290 +		goto out;
  24.291 +	}
  24.292 +	timestamp = (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
  24.293 +
  24.294 +	lr.timestamp = htonll(timestamp);
  24.295 +	lr.version = htons(PROTOCOL_VERSION);
  24.296 +	strncpy(lr.devname, gh->gh_devname, sizeof(lr.devname));
  24.297 +	err = write_buf(gh->gh_fd, &lr, sizeof(lr));
  24.298 +	if (err) {
  24.299 +		warnx("send login_req failed");
  24.300 +		goto out;
  24.301 +	}
  24.302 +
  24.303 +	strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name));
  24.304 +	err = write_buf(gh->gh_fd, &nr, sizeof(nr));
  24.305 +	if (err) {
  24.306 +		warnx("send node_req failed");
  24.307 +		goto out;
  24.308 +	}
  24.309 +
  24.310 +	gh->gh_flags |= GHF_EXPECT_LOGIN_REPLY;
  24.311 +
  24.312 +	DPRINTF(("gnbd_login ok\n"));
  24.313 +	return 0;
  24.314 + out:
  24.315 +	socket_shutdown(gh);
  24.316 +	return err;
  24.317 +}
  24.318 +
  24.319 +static int
  24.320 +kill_gserv_reply(struct gnbd_handle *gh)
  24.321 +{
  24.322 +	unsigned long reply;
  24.323 +	int err;
  24.324 +
  24.325 +	DPRINTF(("read gnbd_kill_gserv_reply\n"));
  24.326 +	err = read_4(gh->gh_fd, &reply);
  24.327 +	if (err) {
  24.328 +		warnx("read kill_gserv_reply failed");
  24.329 +		return err;
  24.330 +	}
  24.331 +
  24.332 +	if (reply && reply != ENODEV) {
  24.333 +		warnx("kill gserv failed: %s", strerror(reply));
  24.334 +		return reply;
  24.335 +	}
  24.336 +
  24.337 +	gh->gh_flags &= ~GHF_EXPECT_KILL_GSERV_REPLY;
  24.338 +	socket_shutdown(gh);
  24.339 +
  24.340 +	err = login(gh);
  24.341 +	if (err)
  24.342 +		warnx("gnbd_login");
  24.343 +
  24.344 +	return err;
  24.345 +}
  24.346 +
  24.347 +static int
  24.348 +login_reply(struct gnbd_handle *gh)
  24.349 +{
  24.350 +	struct login_reply lr;
  24.351 +	int err;
  24.352 +
  24.353 +	DPRINTF(("read gnbd_login_reply\n"));
  24.354 +	err = read_buf(gh->gh_fd, &lr, sizeof(lr), NULL);
  24.355 +	if (err) {
  24.356 +		warnx("read login_reply failed");
  24.357 +		return err;
  24.358 +	}
  24.359 +
  24.360 +	if (lr.err) {
  24.361 +		if (lr.version) {
  24.362 +			warnx("gnbd version mismatch %04x != %04x",
  24.363 +			    PROTOCOL_VERSION, ntohs(lr.version));
  24.364 +			return EINVAL;
  24.365 +		}
  24.366 +		warnx("login refused: %s", strerror(lr.err));
  24.367 +		return lr.err;
  24.368 +	}
  24.369 +	gh->gh_sectors = ntohll(lr.sectors);
  24.370 +
  24.371 +	gh->gh_flags &= ~GHF_EXPECT_LOGIN_REPLY;
  24.372 +
  24.373 +	return GNBD_LOGIN_DONE;
  24.374 +}
  24.375 +
  24.376 +static int
  24.377 +incoming_request(struct gnbd_handle *gh)
  24.378 +{
  24.379 +	struct gnbd_request *gr = gh->gh_incoming_request;
  24.380 +	ssize_t done;
  24.381 +	int err;
  24.382 +
  24.383 +	DPRINTF(("incoming_request: done %d size %d\n", gr->gr_done,
  24.384 +		    gr->gr_size));
  24.385 +	err = read_buf(gh->gh_fd, gr->gr_buf + gr->gr_done,
  24.386 +	    gr->gr_size - gr->gr_done, &done);
  24.387 +	if (err)
  24.388 +		goto out;
  24.389 +
  24.390 +	DPRINTF(("incoming_request: got %d\n", done));
  24.391 +	gr->gr_done += done;
  24.392 +	if (gr->gr_done == gr->gr_size) {
  24.393 +		gh->gh_flags &= ~GHF_INCOMING_REQUEST;
  24.394 +		gh->gh_finished_request = gr->gr_cookie;
  24.395 +		free(gr);
  24.396 +		return GNBD_REQUEST_DONE;
  24.397 +	}
  24.398 +
  24.399 +	return GNBD_CONTINUE;
  24.400 +
  24.401 + out:
  24.402 +	gh->gh_flags &= ~GHF_INCOMING_REQUEST;
  24.403 +	gh->gh_finished_request = 0;
  24.404 +	free(gr);
  24.405 +	return err;
  24.406 +}
  24.407 +
  24.408 +
  24.409 +
  24.410 +int
  24.411 +gnbd_close(struct gnbd_handle *gh)
  24.412 +{
  24.413 +	int err;
  24.414 +	struct gnbd_request **tmp;
  24.415 +
  24.416 +	for (tmp = &gh->gh_outstanding_requests; *tmp; tmp = &(*tmp)->gr_next)
  24.417 +		free(*tmp);
  24.418 +
  24.419 +	if (gh->gh_flags & GHF_INCOMING_REQUEST)
  24.420 +		free(gh->gh_incoming_request);
  24.421 +
  24.422 +	err = close(gh->gh_fd);
  24.423 +	if (err)
  24.424 +		warnx("close");
  24.425 +	free(gh);
  24.426 +
  24.427 +	return err;
  24.428 +}
  24.429 +
  24.430 +int
  24.431 +gnbd_fd(struct gnbd_handle *gh)
  24.432 +{
  24.433 +	return gh->gh_fd;
  24.434 +}
  24.435 +
  24.436 +unsigned long
  24.437 +gnbd_finished_request(struct gnbd_handle *gh)
  24.438 +{
  24.439 +	return gh->gh_finished_request;
  24.440 +}
  24.441 +
  24.442 +int
  24.443 +gnbd_read(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
  24.444 +    unsigned char *buf, unsigned long cookie)
  24.445 +{
  24.446 +	struct gnbd_server_request gsr;
  24.447 +	struct gnbd_request *gr;
  24.448 +	int err;
  24.449 +
  24.450 +	gr = malloc(sizeof(struct gnbd_request));
  24.451 +	if (gr == NULL)
  24.452 +		return ENOMEM;
  24.453 +	memset(gr, 0, sizeof(gr));
  24.454 +
  24.455 +	gr->gr_buf = buf;
  24.456 +	gr->gr_size = count << 9;
  24.457 +	gr->gr_done = 0;
  24.458 +	gr->gr_cookie = cookie;
  24.459 +
  24.460 +	gsr.magic = htonl(GNBD_REQUEST_MAGIC);
  24.461 +	gsr.type = htonl(GNBD_CMD_READ);
  24.462 +	gsr.from = htonll(sector << 9);
  24.463 +	gsr.len = htonl(gr->gr_size);
  24.464 +	memset(gsr.handle, 0, sizeof(gsr.handle));
  24.465 +	memcpy(gsr.handle, &gr, sizeof(gr));
  24.466 +
  24.467 +	err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
  24.468 +	if (err) {
  24.469 +		warnx("write_buf");
  24.470 +		goto out;
  24.471 +	}
  24.472 +
  24.473 +	*gh->gh_outstanding_requests_last = gr;
  24.474 +	gh->gh_outstanding_requests_last = &gr->gr_next;
  24.475 +
  24.476 +	return 0;
  24.477 +
  24.478 + out:
  24.479 +	free(gr);
  24.480 +	return err;
  24.481 +}
  24.482 +
  24.483 +int
  24.484 +gnbd_write(struct gnbd_handle *gh, uint64_t sector, ssize_t count,
  24.485 +    unsigned char *buf, unsigned long cookie)
  24.486 +{
  24.487 +	struct gnbd_server_request gsr;
  24.488 +	struct gnbd_request *gr;
  24.489 +	int err;
  24.490 +
  24.491 +	gr = malloc(sizeof(struct gnbd_request));
  24.492 +	if (gr == NULL)
  24.493 +		return ENOMEM;
  24.494 +	memset(gr, 0, sizeof(gr));
  24.495 +
  24.496 +	gr->gr_buf = buf;
  24.497 +	gr->gr_size = count << 9;
  24.498 +	gr->gr_done = 0;
  24.499 +	gr->gr_cookie = cookie;
  24.500 +
  24.501 +	gsr.magic = htonl(GNBD_REQUEST_MAGIC);
  24.502 +	gsr.type = htonl(GNBD_CMD_WRITE);
  24.503 +	gsr.from = htonll(sector << 9);
  24.504 +	gsr.len = htonl(gr->gr_size);
  24.505 +	memset(gsr.handle, 0, sizeof(gsr.handle));
  24.506 +	memcpy(gsr.handle, &gr, sizeof(gr));
  24.507 +
  24.508 +	err = write_buf(gh->gh_fd, &gsr, sizeof(gsr));
  24.509 +	if (err) {
  24.510 +		warnx("write_buf");
  24.511 +		goto out;
  24.512 +	}
  24.513 +
  24.514 +	/* XXX handle non-blocking socket */
  24.515 +	err = write_buf(gh->gh_fd, buf, gr->gr_size);
  24.516 +	if (err) {
  24.517 +		warnx("write_buf");
  24.518 +		goto out;
  24.519 +	}
  24.520 +	gr->gr_done += gr->gr_size;
  24.521 +
  24.522 +	*gh->gh_outstanding_requests_last = gr;
  24.523 +	gh->gh_outstanding_requests_last = &gr->gr_next;
  24.524 +
  24.525 +	DPRINTF(("write done\n"));
  24.526 +
  24.527 +	return 0;
  24.528 +
  24.529 + out:
  24.530 +	free(gr);
  24.531 +	return err;
  24.532 +}
  24.533 +
  24.534 +int
  24.535 +gnbd_reply(struct gnbd_handle *gh)
  24.536 +{
  24.537 +	struct gnbd_server_reply gsr;
  24.538 +	struct gnbd_request *gr;
  24.539 +	int err;
  24.540 +
  24.541 +	DPRINTF(("gnbd_reply flags %x\n", gh->gh_flags));
  24.542 +	if ((gh->gh_flags & GHF_EXPECT_KILL_GSERV_REPLY))
  24.543 +		return kill_gserv_reply(gh);
  24.544 +	if ((gh->gh_flags & GHF_EXPECT_LOGIN_REPLY))
  24.545 +		return login_reply(gh);
  24.546 +	if ((gh->gh_flags & GHF_INCOMING_REQUEST))
  24.547 +		return incoming_request(gh);
  24.548 +
  24.549 +	DPRINTF(("read response\n"));
  24.550 +	err = read_buf(gh->gh_fd, &gsr, sizeof(gsr), NULL);
  24.551 +	if (err) {
  24.552 +		warnx("read gnbd_reply failed");
  24.553 +		return err;
  24.554 +	}
  24.555 +
  24.556 +	if (ntohl(gsr.error)) {
  24.557 +		warnx("gnbd server reply error: %s", strerror(gsr.error));
  24.558 +		return gsr.error;
  24.559 +	}
  24.560 +
  24.561 +	switch (ntohl(gsr.magic)) {
  24.562 +	case GNBD_KEEP_ALIVE_MAGIC:
  24.563 +		DPRINTF(("read keep alive magic\n"));
  24.564 +		return GNBD_CONTINUE;
  24.565 +	case GNBD_REPLY_MAGIC:
  24.566 +		DPRINTF(("read reply magic\n"));
  24.567 +		memcpy(&gr, gsr.handle, sizeof(gr));
  24.568 +		err = find_request(gh, gr);
  24.569 +		if (err) {
  24.570 +			warnx("unknown request");
  24.571 +			return err;
  24.572 +		}
  24.573 +		if (gr->gr_done != gr->gr_size) {
  24.574 +			gh->gh_incoming_request = gr;
  24.575 +			gh->gh_flags |= GHF_INCOMING_REQUEST;
  24.576 +			return GNBD_CONTINUE;
  24.577 +		} else {
  24.578 +			gh->gh_finished_request = gr->gr_cookie;
  24.579 +			free(gr);
  24.580 +			return GNBD_REQUEST_DONE;
  24.581 +		}
  24.582 +	default:
  24.583 +		break;
  24.584 +	}
  24.585 +
  24.586 +	return GNBD_CONTINUE;
  24.587 +}
  24.588 +
  24.589 +uint64_t
  24.590 +gnbd_sectors(struct gnbd_handle *gh)
  24.591 +{
  24.592 +
  24.593 +	return gh->gh_sectors;
  24.594 +}
  24.595 +
  24.596 +struct gnbd_handle *
  24.597 +gnbd_setup(char *server, unsigned int port, char *devname, char *nodename)
  24.598 +{
  24.599 +	struct gnbd_handle *gh;
  24.600 +	struct addrinfo *res, *ai;
  24.601 +	int err;
  24.602 +
  24.603 +	gh = malloc(sizeof(struct gnbd_handle));
  24.604 +	if (gh == NULL)
  24.605 +		return NULL;
  24.606 +	memset(gh, 0, sizeof(gh));
  24.607 +	gh->gh_fd = -1;
  24.608 +	gh->gh_outstanding_requests_last = &gh->gh_outstanding_requests;
  24.609 +
  24.610 +	strncpy(gh->gh_devname, devname, sizeof(gh->gh_devname));
  24.611 +	strncpy(gh->gh_nodename, nodename, sizeof(gh->gh_nodename));
  24.612 +
  24.613 +	err = getaddrinfo(server, NULL, NULL, &res);
  24.614 +	if (err) {
  24.615 +		if (err != EAI_SYSTEM)
  24.616 +			warnx("getaddrinfo: %s", gai_strerror(err));
  24.617 +		else
  24.618 +			warn("getaddrinfo: %s", gai_strerror(err));
  24.619 +		goto out;
  24.620 +	}
  24.621 +
  24.622 +	for (ai = res; ai; ai = ai->ai_next) {
  24.623 +		if (ai->ai_socktype != SOCK_STREAM)
  24.624 +			continue;
  24.625 +		if (ai->ai_family == AF_INET)
  24.626 +			break;
  24.627 +	}
  24.628 +
  24.629 +	if (ai == NULL)
  24.630 +		goto out;
  24.631 +
  24.632 +	gh->gh_sin.sin_family = ai->ai_family;
  24.633 +	gh->gh_sin.sin_port = htons(port);
  24.634 +	memcpy(&gh->gh_sin.sin_addr,
  24.635 +	    &((struct sockaddr_in *)ai->ai_addr)->sin_addr,
  24.636 +	    sizeof(gh->gh_sin.sin_addr));
  24.637 +
  24.638 +	err = kill_gserv(gh);
  24.639 +	if (err) {
  24.640 +		warnx("gnbd_kill_gserv");
  24.641 +		goto out;
  24.642 +	}
  24.643 +
  24.644 +	freeaddrinfo(res);
  24.645 +	return gh;
  24.646 + out:
  24.647 +	free(gh);
  24.648 +	freeaddrinfo(res);
  24.649 +	return NULL;
  24.650 +}
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/tools/blktap/libgnbd/libgnbd.h	Tue Feb 08 18:21:54 2005 +0000
    25.3 @@ -0,0 +1,25 @@
    25.4 +/* libgnbd.h
    25.5 + *
    25.6 + * gnbd client library
    25.7 + *
    25.8 + * Copyright (c) 2005, Christian Limpach
    25.9 + */
   25.10 +     
   25.11 +#define GNBD_LOGIN_DONE		0x10001
   25.12 +#define GNBD_REQUEST_DONE	0x10002
   25.13 +#define GNBD_CONTINUE		0x10003
   25.14 +#define GNBD_CONTINUE_WRITE	0x10004
   25.15 +
   25.16 +struct gnbd_handle;
   25.17 +int gnbd_close(struct gnbd_handle *);
   25.18 +int gnbd_fd(struct gnbd_handle *);
   25.19 +unsigned long gnbd_finished_request(struct gnbd_handle *);
   25.20 +int gnbd_kill_gserv(struct gnbd_handle *);
   25.21 +int gnbd_login(struct gnbd_handle *);
   25.22 +int gnbd_read(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
   25.23 +    unsigned long);
   25.24 +int gnbd_write(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *,
   25.25 +    unsigned long);
   25.26 +int gnbd_reply(struct gnbd_handle *);
   25.27 +uint64_t gnbd_sectors(struct gnbd_handle *);
   25.28 +struct gnbd_handle *gnbd_setup(char *, unsigned int, char *, char *);
    26.1 --- a/tools/python/xen/xend/server/blkif.py	Tue Feb 08 18:03:51 2005 +0000
    26.2 +++ b/tools/python/xen/xend/server/blkif.py	Tue Feb 08 18:21:54 2005 +0000
    26.3 @@ -363,6 +363,20 @@ class BlkDev(controller.SplitDev):
    26.4          Blkctl.block('unbind', self.type, self.node)
    26.5  
    26.6      def setNode(self, node):
    26.7 +    
    26.8 +        # NOTE: 
    26.9 +        # This clause is testing code for storage system experiments.
   26.10 +        # Add a new disk type that will just pass an opaque id in the
   26.11 +        # start_sector and use an experimental device type.
   26.12 +        # Please contact andrew.warfield@cl.cam.ac.uk with any concerns.
   26.13 +        if self.type == 'amorfs':
   26.14 +            self.node   = node
   26.15 +            self.device =  61440 # (240,0)
   26.16 +            self.start_sector = long(self.params)
   26.17 +            self.nr_sectors = long(0)
   26.18 +            return
   26.19 +        # done.
   26.20 +            
   26.21          mounted_mode = check_mounted(self, node)
   26.22          if not '!' in self.mode and mounted_mode:
   26.23              if mounted_mode is "w":
    27.1 --- a/xen/include/public/io/blkif.h	Tue Feb 08 18:03:51 2005 +0000
    27.2 +++ b/xen/include/public/io/blkif.h	Tue Feb 08 18:21:54 2005 +0000
    27.3 @@ -9,7 +9,7 @@
    27.4  #ifndef __XEN_PUBLIC_IO_BLKIF_H__
    27.5  #define __XEN_PUBLIC_IO_BLKIF_H__
    27.6  
    27.7 -#include <asm-xen/xen-public/io/ring.h>
    27.8 +#include "ring.h"
    27.9  
   27.10  #define blkif_vdev_t   u16
   27.11  #define blkif_sector_t u64