ia64/xen-unstable

changeset 10458:840f33e54054

Remove old blktap tools.

Signed-off-by: Andrew Warfield <andrew.warfield@cl.cam.ac.uk>
author akw@localhost.localdomain
date Fri Jun 16 18:45:45 2006 -0700 (2006-06-16)
parents 533bad7c0883
children 716e365377f5
files
line diff
     1.1 --- a/tools/blktap/Makefile	Fri Jun 16 18:19:40 2006 +0100
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,94 +0,0 @@
     1.4 -MAJOR    = 3.0
     1.5 -MINOR    = 0
     1.6 -SONAME   = libblktap.so.$(MAJOR)
     1.7 -
     1.8 -XEN_ROOT = ../..
     1.9 -include $(XEN_ROOT)/tools/Rules.mk
    1.10 -
    1.11 -SUBDIRS :=
    1.12 -SUBDIRS += ublkback
    1.13 -#SUBDIRS += parallax
    1.14 -
    1.15 -BLKTAP_INSTALL_DIR = /usr/sbin
    1.16 -
    1.17 -INSTALL            = install
    1.18 -INSTALL_PROG       = $(INSTALL) -m0755
    1.19 -INSTALL_DIR        = $(INSTALL) -d -m0755
    1.20 -
    1.21 -INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
    1.22 -
    1.23 -LIBS     := -lpthread -lz
    1.24 -
    1.25 -SRCS     :=
    1.26 -SRCS     += blktaplib.c xenbus.c blkif.c
    1.27 -
    1.28 -CFLAGS   += -Werror
    1.29 -CFLAGS   += -Wno-unused
    1.30 -CFLAGS   += -fno-strict-aliasing
    1.31 -CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
    1.32 -# get asprintf():
    1.33 -CFLAGS   += -D _GNU_SOURCE
    1.34 -# Get gcc to generate the dependencies for us.
    1.35 -CFLAGS   += -Wp,-MD,.$(@F).d
    1.36 -CFLAGS   += $(INCLUDES) 
    1.37 -DEPS     = .*.d
    1.38 -
    1.39 -OBJS     = $(patsubst %.c,%.o,$(SRCS))
    1.40 -IBINS   :=
    1.41 -#IBINS   += blkdump
    1.42 -
    1.43 -LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
    1.44 -
    1.45 -.PHONY: all
    1.46 -all: mk-symlinks libblktap.so #blkdump
    1.47 -	@set -e; for subdir in $(SUBDIRS); do \
    1.48 -		$(MAKE) -C $$subdir $@;       \
    1.49 -	done
    1.50 -
    1.51 -.PHONY: install
    1.52 -install: all
    1.53 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
    1.54 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/include
    1.55 -	$(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
    1.56 -	$(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
    1.57 -	#$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
    1.58 -	@set -e; for subdir in $(SUBDIRS); do \
    1.59 -		$(MAKE) -C $$subdir $@;       \
    1.60 -	done
    1.61 -
    1.62 -.PHONY: clean
    1.63 -clean:
    1.64 -	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump
    1.65 -	@set -e; for subdir in $(SUBDIRS); do \
    1.66 -		$(MAKE) -C $$subdir $@;       \
    1.67 -	done
    1.68 -
    1.69 -.PHONY: rpm
    1.70 -rpm: all
    1.71 -	rm -rf staging
    1.72 -	mkdir staging
    1.73 -	mkdir staging/i386
    1.74 -	rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
    1.75 -		--define "_rpmdir$$PWD/staging" -bb rpm.spec
    1.76 -	mv staging/i386/*.rpm .
    1.77 -	rm -rf staging
    1.78 -
    1.79 -libblktap.so: $(OBJS) 
    1.80 -	$(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared         \
    1.81 -	      -L$(XEN_XENSTORE) -l xenstore                       \
    1.82 -	      -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
    1.83 -	ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
    1.84 -	ln -sf libblktap.so.$(MAJOR) $@
    1.85 -
    1.86 -blkdump: libblktap.so
    1.87 -	$(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
    1.88 -	      -l blktap blkdump.c
    1.89 -
    1.90 -.PHONY: TAGS clean install mk-symlinks rpm
    1.91 -
    1.92 -.PHONY: TAGS
    1.93 -TAGS:
    1.94 -	etags -t $(SRCS) *.h
    1.95 -
    1.96 --include $(DEPS)
    1.97 -
     2.1 --- a/tools/blktap/README	Fri Jun 16 18:19:40 2006 +0100
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,149 +0,0 @@
     2.4 -Block Tap User-level Interfaces
     2.5 -Andrew Warfield
     2.6 -andrew.warfield@cl.cam.ac.uk
     2.7 -February 8, 2005
     2.8 -
     2.9 -NOTE #1: The blktap is _experimental_ code.  It works for me.  Your
    2.10 -mileage may vary.  Don't use it for anything important.  Please. ;)
    2.11 -
    2.12 -NOTE #2: All of the interfaces here are likely to change.  This is all
    2.13 -early code, and I am checking it in because others want to play with
    2.14 -it.  If you use it for anything, please let me know!
    2.15 -
    2.16 -Overview:
    2.17 ----------
    2.18 -
    2.19 -This directory contains a library and set of example applications for
    2.20 -the block tap device.  The block tap hooks into the split block device
    2.21 -interfaces above Xen allowing them to be extended.  This extension can
    2.22 -be done in userspace with the help of a library.
    2.23 -
    2.24 -The tap can be installed either as an interposition domain in between
    2.25 -a frontend and backend driver pair, or as a terminating backend, in
    2.26 -which case it is responsible for serving all requests itself.
    2.27 -
    2.28 -There are two reasons that you might want to use the tap,
    2.29 -corresponding to these configurations:
    2.30 -
    2.31 - 1. To examine or modify a stream of block requests while they are
    2.32 -    in-flight (e.g. to encrypt data, or add data-driven watchpoints)
    2.33 -
    2.34 - 2. To prototype a new backend driver, serving requests from the tap
    2.35 -    rather than passing them along to the XenLinux blkback driver.
    2.36 -    (e.g. to forward block requests to a remote host)
    2.37 -
    2.38 -
    2.39 -Interface:
    2.40 -----------
    2.41 -
    2.42 -At the moment, the tap interface is similar in spirit to that of the
    2.43 -Linux netfilter.  Requests are messages from a client (frontend)
    2.44 -domain to a disk (backend) domain.  Responses are messages travelling
    2.45 -back, acknowledging the completion of a request.  the library allows
    2.46 -chains of functions to be attached to these events.  In addition,
    2.47 -hooks may be attached to handle control messages, which signify things
    2.48 -like connections from new domains.
    2.49 -
    2.50 -At present the control messages especially expose a lot of the
    2.51 -underlying driver interfaces.  This may change in the future in order
    2.52 -to simplify writing hooks.
    2.53 -
    2.54 -Here are the public interfaces:
    2.55 -
    2.56 -These allow hook functions to be chained:
    2.57 -
    2.58 - void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
    2.59 - void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
    2.60 - void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
    2.61 -
    2.62 -This allows a response to be injected, in the case where a request has
    2.63 -been removed using BLKTAP_STOLEN.
    2.64 -
    2.65 - void blktap_inject_response(blkif_response_t *);
    2.66 -
    2.67 -These let you add file descriptors and handlers to the main poll loop:
    2.68 -
    2.69 - int  blktap_attach_poll(int fd, short events, int (*func)(int));
    2.70 - void blktap_detach_poll(int fd);
    2.71 -
    2.72 -This starts the main poll loop:
    2.73 -
    2.74 - int  blktap_listen(void);
    2.75 -
    2.76 -Example:
    2.77 ---------
    2.78 -
    2.79 -blkimage.c uses an image on the local file system to serve requests to
    2.80 -a domain.  Here's what it looks like:
    2.81 -
    2.82 ----[blkimg.c]---
    2.83 -
    2.84 -/* blkimg.c
    2.85 - *
    2.86 - * file-backed disk.
    2.87 - */
    2.88 -
    2.89 -#include "blktaplib.h"
    2.90 -#include "blkimglib.h"
    2.91 -
    2.92 -
    2.93 -int main(int argc, char *argv[])
    2.94 -{
    2.95 -    image_init();
    2.96 -    
    2.97 -    blktap_register_ctrl_hook("image_control", image_control);
    2.98 -    blktap_register_request_hook("image_request", image_request);
    2.99 -    blktap_listen();
   2.100 -    
   2.101 -    return 0;
   2.102 -}
   2.103 -
   2.104 -----------------
   2.105 -
   2.106 -All of the real work is in blkimglib.c, but this illustrates the
   2.107 -actual tap interface well enough.  image_control() will be called with
   2.108 -all control messages.  image_request() handles requests.  As it reads
   2.109 -from an on-disk image file, no requests are ever passed on to a
   2.110 -backend, and so there will be no responses to process -- so there is
   2.111 -nothing registered as a response hook.
   2.112 -
   2.113 -Other examples:
   2.114 ----------------
   2.115 -
   2.116 -Here is a list of other examples in the directory:
   2.117 -
   2.118 -Things that terminate a block request stream:
   2.119 -
   2.120 -  blkimg    - Use a image file/device to serve requests
   2.121 -  blkgnbd   - Use a remote gnbd server to serve requests
   2.122 -  blkaio    - Use libaio... (DOES NOT WORK)
   2.123 -  
   2.124 -Things that don't:
   2.125 -
   2.126 -  blkdump   - Print in-flight requests.
   2.127 -  blkcow    - Really inefficient copy-on-write disks using libdb to store
   2.128 -              writes.
   2.129 -
   2.130 -There are examples of plugging these things together, for instance
   2.131 -blkcowgnbd is a read-only gnbd device with copy-on-write to a local
   2.132 -file.
   2.133 -
   2.134 -TODO:
   2.135 ------
   2.136 -
   2.137 -- Make session tracking work.  At the moment these generally just handle a 
   2.138 -  single front-end client at a time.
   2.139 -
   2.140 -- Integrate with Xend.  Need to cleanly pass a image identifier in the connect
   2.141 -  message.
   2.142 -
   2.143 -- Make an asynchronous file-io terminator.  The libaio attempt is
   2.144 -  tragically stalled because mapped foreign pages make pfn_valid fail
   2.145 -  (they are VM_IO), and so cannot be passed to aio as targets.  A
   2.146 -  better solution may be to tear the disk interfaces out of the real
   2.147 -  backend and expose them somehow.
   2.148 -
   2.149 -- Make CoW suck less.
   2.150 -
   2.151 -- Do something more along the lines of dynamic linking for the
   2.152 -  plugins, so thatthey don't all need a new main().
     3.1 --- a/tools/blktap/README.sept05	Fri Jun 16 18:19:40 2006 +0100
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,33 +0,0 @@
     3.4 -The blktap has been rewritten substantially based on the current
     3.5 -blkback driver.  I've removed passthrough support, as this is broken
     3.6 -by the move to grant tables and the lack of transitive grants.  A
     3.7 -blktap VM is now only capable of terminating block requests in
     3.8 -userspace.
     3.9 -
    3.10 -ublkback/ contains a _very_ initial cut at a user-level version of the block
    3.11 -backend driver.  It gives a working example of how the current tap
    3.12 -interfaces are used, in particular w.r.t. the vbd directories in
    3.13 -xenstore.
    3.14 -
    3.15 -parallax/ contains fairly recent parallax code.  This does not run on
    3.16 -the changed blktap interface, but should only be a couple of hours
    3.17 -work to get going again.
    3.18 -
    3.19 -All of the tricky bits are done, but there is plenty of cleaning to
    3.20 -do, and the top-level functionality is not here yet.  At the moment,
    3.21 -the daemon ignores the pdev requested by the tools and opens the file 
    3.22 -or device specified by TMP_IMAGE_FILE_NAME in ublkback.c.
    3.23 -
    3.24 -TODO:
    3.25 -1. Fix to allow pdev in the store to specify the device to open.
    3.26 -2. Add support (to tools as well) to mount arbitrary files...
    3.27 -   just write the filename to mount into the store, instead of pdev.
    3.28 -3. Reeximine blkif refcounting, it is almost certainly broken at the moment.
    3.29 -   - creating a blkif should take a reference.
    3.30 -   - each inflight request should take a reference on dequeue in blktaplib
    3.31 -   - sending responses should drop refs.
    3.32 -   - blkif should be implicitly freed when refcounts fall to 0.
    3.33 -4. Modify the parallax req/rsp code as per ublkback to use the new tap 
    3.34 -   interfaces. 
    3.35 -5. Write a front end that allows parallax and normal mounts to coexist
    3.36 -6. Allow blkback and blktap to run at the same time.
     4.1 --- a/tools/blktap/blkdump.c	Fri Jun 16 18:19:40 2006 +0100
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,62 +0,0 @@
     4.4 -/* blkdump.c
     4.5 - *
     4.6 - * show a running trace of block requests as they fly by.
     4.7 - * 
     4.8 - * (c) 2004 Andrew Warfield.
     4.9 - */
    4.10 - 
    4.11 -#include <stdio.h>
    4.12 -#include "blktaplib.h"
    4.13 - 
    4.14 -int request_print(blkif_request_t *req)
    4.15 -{
    4.16 -    int i;
    4.17 -    
    4.18 -    if ( (req->operation == BLKIF_OP_READ) ||
    4.19 -         (req->operation == BLKIF_OP_WRITE) )
    4.20 -    {
    4.21 -        printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 
    4.22 -                ID_TO_DOM(req->id), ID_TO_IDX(req->id), 
    4.23 -                blkif_op_name[req->operation], 
    4.24 -                req->nr_segments, req->handle, 
    4.25 -                req->sector_number);
    4.26 -        
    4.27 -        
    4.28 -        for (i=0; i < req->nr_segments; i++) {
    4.29 -            printf("              (gref: 0x%8x start: %u stop: %u)\n",
    4.30 -                   req->seg[i].gref,
    4.31 -                   req->seg[i].first_sect,
    4.32 -                   req->seg[i].last_sect);
    4.33 -        }
    4.34 -            
    4.35 -    } else {
    4.36 -        printf("Unknown request message type.\n");
    4.37 -    }
    4.38 -    
    4.39 -    return BLKTAP_PASS;
    4.40 -}
    4.41 -
    4.42 -int response_print(blkif_response_t *rsp)
    4.43 -{   
    4.44 -    if ( (rsp->operation == BLKIF_OP_READ) ||
    4.45 -         (rsp->operation == BLKIF_OP_WRITE) )
    4.46 -    {
    4.47 -        printf("[%2u:%2u>%5s] (status: %d)\n", 
    4.48 -                ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 
    4.49 -                blkif_op_name[rsp->operation], 
    4.50 -                rsp->status);
    4.51 -            
    4.52 -    } else {
    4.53 -        printf("Unknown request message type.\n");
    4.54 -    }
    4.55 -    return BLKTAP_PASS;
    4.56 -}
    4.57 -
    4.58 -int main(int argc, char *argv[])
    4.59 -{
    4.60 -    blktap_register_request_hook("request_print", request_print);
    4.61 -    blktap_register_response_hook("response_print", response_print);
    4.62 -    blktap_listen();
    4.63 -    
    4.64 -    return 0;
    4.65 -}
     5.1 --- a/tools/blktap/blkif.c	Fri Jun 16 18:19:40 2006 +0100
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,212 +0,0 @@
     5.4 -/*
     5.5 - * blkif.c
     5.6 - * 
     5.7 - * The blkif interface for blktap.  A blkif describes an in-use virtual disk.
     5.8 - */
     5.9 -
    5.10 -#include <stdio.h>
    5.11 -#include <stdlib.h>
    5.12 -#include <errno.h>
    5.13 -#include <string.h>
    5.14 -#include <err.h>
    5.15 -
    5.16 -#include "blktaplib.h"
    5.17 -
    5.18 -#if 1
    5.19 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
    5.20 -#else
    5.21 -#define DPRINTF(_f, _a...) ((void)0)
    5.22 -#endif
    5.23 -
    5.24 -#define BLKIF_HASHSZ 1024
    5.25 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
    5.26 -
    5.27 -static blkif_t      *blkif_hash[BLKIF_HASHSZ];
    5.28 -
    5.29 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
    5.30 -{
    5.31 -    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
    5.32 -    while ( (blkif != NULL) && 
    5.33 -            ((blkif->domid != domid) || (blkif->handle != handle)) )
    5.34 -        blkif = blkif->hash_next;
    5.35 -    return blkif;
    5.36 -}
    5.37 -
    5.38 -blkif_t *alloc_blkif(domid_t domid)
    5.39 -{
    5.40 -    blkif_t *blkif;
    5.41 -
    5.42 -    blkif = (blkif_t *)malloc(sizeof(blkif_t));
    5.43 -    if (!blkif)
    5.44 -        return NULL;
    5.45 -
    5.46 -    memset(blkif, 0, sizeof(*blkif));
    5.47 -    blkif->domid = domid;
    5.48 -
    5.49 -    return blkif;
    5.50 -}
    5.51 -
    5.52 -static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
    5.53 -void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
    5.54 -{
    5.55 -    new_blkif_hook = fn;
    5.56 -}
    5.57 -
    5.58 -int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
    5.59 -               long int readonly)
    5.60 -{
    5.61 -    domid_t domid;
    5.62 -    blkif_t **pblkif;
    5.63 -    
    5.64 -    if (blkif == NULL)
    5.65 -        return -EINVAL;
    5.66 -
    5.67 -    domid = blkif->domid;
    5.68 -    blkif->handle   = handle;
    5.69 -    blkif->pdev     = pdev;
    5.70 -    blkif->readonly = readonly;
    5.71 -
    5.72 -    /*
    5.73 -     * Call out to the new_blkif_hook. The tap application should define this,
    5.74 -     * and it should return having set blkif->ops
    5.75 -     * 
    5.76 -     */
    5.77 -    if (new_blkif_hook == NULL)
    5.78 -    {
    5.79 -        warn("Probe detected a new blkif, but no new_blkif_hook!");
    5.80 -        return -1;
    5.81 -    }
    5.82 -    new_blkif_hook(blkif);
    5.83 -
    5.84 -    /* Now wire it in. */
    5.85 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
    5.86 -    while ( *pblkif != NULL )
    5.87 -    {
    5.88 -        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
    5.89 -        {
    5.90 -            DPRINTF("Could not create blkif: already exists\n");
    5.91 -            return -1;
    5.92 -        }
    5.93 -        pblkif = &(*pblkif)->hash_next;
    5.94 -    }
    5.95 -    blkif->hash_next = NULL;
    5.96 -    *pblkif = blkif;
    5.97 -
    5.98 -    return 0;
    5.99 -}
   5.100 -
   5.101 -void free_blkif(blkif_t *blkif)
   5.102 -{
   5.103 -    blkif_t **pblkif, *curs;
   5.104 -    
   5.105 -    pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
   5.106 -    while ( (curs = *pblkif) != NULL )
   5.107 -    {
   5.108 -        if ( blkif == curs )
   5.109 -        {
   5.110 -            *pblkif = curs->hash_next;
   5.111 -        }
   5.112 -        pblkif = &curs->hash_next;
   5.113 -    }
   5.114 -    free(blkif);
   5.115 -}
   5.116 -
   5.117 -void blkif_register_request_hook(blkif_t *blkif, char *name, 
   5.118 -                                 int (*rh)(blkif_t *, blkif_request_t *, int)) 
   5.119 -{
   5.120 -    request_hook_t *rh_ent, **c;
   5.121 -    
   5.122 -    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
   5.123 -    if (!rh_ent) 
   5.124 -    {
   5.125 -        warn("couldn't allocate a new hook");
   5.126 -        return;
   5.127 -    }
   5.128 -    
   5.129 -    rh_ent->func  = rh;
   5.130 -    rh_ent->next = NULL;
   5.131 -    if (asprintf(&rh_ent->name, "%s", name) == -1)
   5.132 -    {
   5.133 -        free(rh_ent);
   5.134 -        warn("couldn't allocate a new hook name");
   5.135 -        return;
   5.136 -    }
   5.137 -    
   5.138 -    c = &blkif->request_hook_chain;
   5.139 -    while (*c != NULL) {
   5.140 -        c = &(*c)->next;
   5.141 -    }
   5.142 -    *c = rh_ent;
   5.143 -}
   5.144 -
   5.145 -void blkif_register_response_hook(blkif_t *blkif, char *name, 
   5.146 -                                  int (*rh)(blkif_t *, blkif_response_t *, int)) 
   5.147 -{
   5.148 -    response_hook_t *rh_ent, **c;
   5.149 -    
   5.150 -    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
   5.151 -    if (!rh_ent) 
   5.152 -    { 
   5.153 -        warn("couldn't allocate a new hook");
   5.154 -        return;
   5.155 -    }
   5.156 -    
   5.157 -    rh_ent->func  = rh;
   5.158 -    rh_ent->next = NULL;
   5.159 -    if (asprintf(&rh_ent->name, "%s", name) == -1)
   5.160 -    {
   5.161 -        free(rh_ent);
   5.162 -        warn("couldn't allocate a new hook name");
   5.163 -        return;
   5.164 -    }
   5.165 -    
   5.166 -    c = &blkif->response_hook_chain;
   5.167 -    while (*c != NULL) {
   5.168 -        c = &(*c)->next;
   5.169 -    }
   5.170 -    *c = rh_ent;
   5.171 -}
   5.172 -
   5.173 -void blkif_print_hooks(blkif_t *blkif)
   5.174 -{
   5.175 -    request_hook_t  *req_hook;
   5.176 -    response_hook_t *rsp_hook;
   5.177 -    
   5.178 -    DPRINTF("Request Hooks:\n");
   5.179 -    req_hook = blkif->request_hook_chain;
   5.180 -    while (req_hook != NULL)
   5.181 -    {
   5.182 -        DPRINTF("  [0x%p] %s\n", req_hook->func, req_hook->name);
   5.183 -        req_hook = req_hook->next;
   5.184 -    }
   5.185 -    
   5.186 -    DPRINTF("Response Hooks:\n");
   5.187 -    rsp_hook = blkif->response_hook_chain;
   5.188 -    while (rsp_hook != NULL)
   5.189 -    {
   5.190 -        DPRINTF("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
   5.191 -        rsp_hook = rsp_hook->next;
   5.192 -    }
   5.193 -}
   5.194 -
   5.195 -
   5.196 -long int vbd_size(blkif_t *blkif)
   5.197 -{
   5.198 -    return 1000000000;
   5.199 -}
   5.200 -
   5.201 -long int vbd_secsize(blkif_t *blkif)
   5.202 -{
   5.203 -    return 512;
   5.204 -}
   5.205 -
   5.206 -unsigned vbd_info(blkif_t *blkif)
   5.207 -{
   5.208 -    return 0;
   5.209 -}
   5.210 -
   5.211 -
   5.212 -void __init_blkif(void)
   5.213 -{    
   5.214 -    memset(blkif_hash, 0, sizeof(blkif_hash));
   5.215 -}
     6.1 --- a/tools/blktap/blktaplib.c	Fri Jun 16 18:19:40 2006 +0100
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,453 +0,0 @@
     6.4 -/*
     6.5 - * blktaplib.c
     6.6 - * 
     6.7 - * userspace interface routines for the blktap driver.
     6.8 - *
     6.9 - * (threadsafe(r) version) 
    6.10 - *
    6.11 - * (c) 2004 Andrew Warfield.
    6.12 - */
    6.13 -
    6.14 -#include <stdio.h>
    6.15 -#include <stdlib.h>
    6.16 -#include <sys/mman.h>
    6.17 -#include <sys/user.h>
    6.18 -#include <err.h>
    6.19 -#include <errno.h>
    6.20 -#include <sys/types.h>
    6.21 -#include <linux/types.h>
    6.22 -#include <sys/stat.h>
    6.23 -#include <fcntl.h>
    6.24 -#include <signal.h>
    6.25 -#include <sys/poll.h>
    6.26 -#include <sys/ioctl.h>
    6.27 -#include <string.h>
    6.28 -#include <unistd.h>
    6.29 -#include <pthread.h>
    6.30 -#include <xs.h>
    6.31 -                                                                     
    6.32 -#define __COMPILING_BLKTAP_LIB
    6.33 -#include "blktaplib.h"
    6.34 -
    6.35 -#if 0
    6.36 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
    6.37 -#else
    6.38 -#define DPRINTF(_f, _a...) ((void)0)
    6.39 -#endif
    6.40 -#define DEBUG_RING_IDXS 0
    6.41 -
    6.42 -#define POLLRDNORM     0x040 
    6.43 -
    6.44 -#define BLKTAP_IOCTL_KICK 1
    6.45 -
    6.46 -
    6.47 -void got_sig_bus();
    6.48 -void got_sig_int();
    6.49 -
    6.50 -/* in kernel these are opposite, but we are a consumer now. */
    6.51 -blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
    6.52 -blkif_front_ring_t be_ring; 
    6.53 -
    6.54 -unsigned long mmap_vstart = 0;
    6.55 -char *blktap_mem;
    6.56 -int fd = 0;
    6.57 -
    6.58 -#define BLKTAP_RING_PAGES       1 /* Front */
    6.59 -#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
    6.60 -    
    6.61 -int bad_count = 0;
    6.62 -void bad(void)
    6.63 -{
    6.64 -    bad_count ++;
    6.65 -    if (bad_count > 50) exit(0);
    6.66 -}
    6.67 -/*-----[ ID Manipulation from tap driver code ]--------------------------*/
    6.68 -
    6.69 -#define ACTIVE_RING_IDX unsigned short
    6.70 -
    6.71 -inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
    6.72 -{
    6.73 -    return ( (fe_dom << 16) | idx );
    6.74 -}
    6.75 -
    6.76 -inline unsigned int ID_TO_IDX(unsigned long id) 
    6.77 -{ 
    6.78 -        return ( id & 0x0000ffff );
    6.79 -}
    6.80 -
    6.81 -inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
    6.82 -
    6.83 -static int (*request_hook)(blkif_request_t *req) = NULL;
    6.84 -static int (*response_hook)(blkif_response_t *req) = NULL;
    6.85 -        
    6.86 -/*-----[ Data to/from Backend (server) VM ]------------------------------*/
    6.87 -
    6.88 -/*
    6.89 -
    6.90 -inline int write_req_to_be_ring(blkif_request_t *req)
    6.91 -{
    6.92 -    blkif_request_t *req_d;
    6.93 -    static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
    6.94 -
    6.95 -    pthread_mutex_lock(&be_prod_mutex);
    6.96 -    req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
    6.97 -    memcpy(req_d, req, sizeof(blkif_request_t));
    6.98 -    wmb();
    6.99 -    be_ring.req_prod_pvt++;
   6.100 -    pthread_mutex_unlock(&be_prod_mutex);
   6.101 -    
   6.102 -    return 0;
   6.103 -}
   6.104 -*/
   6.105 -
   6.106 -inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
   6.107 -{
   6.108 -    blkif_response_t *rsp_d;
   6.109 -    static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
   6.110 -
   6.111 -    pthread_mutex_lock(&fe_prod_mutex);
   6.112 -    rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
   6.113 -    memcpy(rsp_d, rsp, sizeof(blkif_response_t));
   6.114 -    wmb();
   6.115 -    fe_ring.rsp_prod_pvt++;
   6.116 -    pthread_mutex_unlock(&fe_prod_mutex);
   6.117 -
   6.118 -    return 0;
   6.119 -}
   6.120 -
   6.121 -static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
   6.122 -{
   6.123 -    response_hook_t  *rsp_hook;
   6.124 -    
   6.125 -    rsp_hook = blkif->response_hook_chain;
   6.126 -    while (rsp_hook != NULL)
   6.127 -    {
   6.128 -        switch(rsp_hook->func(blkif, rsp, 1))
   6.129 -        {
   6.130 -        case BLKTAP_PASS:
   6.131 -            break;
   6.132 -        default:
   6.133 -            printf("Only PASS is supported for resp hooks!\n");
   6.134 -        }
   6.135 -        rsp_hook = rsp_hook->next;
   6.136 -    }
   6.137 -}
   6.138 -
   6.139 -
   6.140 -static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
   6.141 -
   6.142 -void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
   6.143 -{
   6.144 -    
   6.145 -    apply_rsp_hooks(blkif, rsp);
   6.146 -  
   6.147 -    write_rsp_to_fe_ring(rsp);
   6.148 -}
   6.149 -
   6.150 -void blktap_kick_responses(void)
   6.151 -{
   6.152 -    pthread_mutex_lock(&push_mutex);
   6.153 -    
   6.154 -    RING_PUSH_RESPONSES(&fe_ring);
   6.155 -    ioctl(fd, BLKTAP_IOCTL_KICK_FE);
   6.156 -    
   6.157 -    pthread_mutex_unlock(&push_mutex);
   6.158 -}
   6.159 -
   6.160 -/*-----[ Polling fd listeners ]------------------------------------------*/
   6.161 -
   6.162 -#define MAX_POLLFDS 64
   6.163 -
   6.164 -typedef struct {
   6.165 -    int (*func)(int fd);
   6.166 -    struct pollfd *pfd;
   6.167 -    int fd;
   6.168 -    short events;
   6.169 -    int active;
   6.170 -} pollhook_t;
   6.171 -
   6.172 -static struct pollfd  pfd[MAX_POLLFDS+2]; /* tap and store are extra */
   6.173 -static pollhook_t     pollhooks[MAX_POLLFDS];
   6.174 -static unsigned int   ph_freelist[MAX_POLLFDS];
   6.175 -static unsigned int   ph_cons, ph_prod;
   6.176 -#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
   6.177 -#define PH_IDX(x) (x % MAX_POLLFDS)
   6.178 -
   6.179 -int blktap_attach_poll(int fd, short events, int (*func)(int fd))
   6.180 -{
   6.181 -    pollhook_t *ph;
   6.182 -    
   6.183 -    if (nr_pollhooks() == MAX_POLLFDS) {
   6.184 -        printf("Too many pollhooks!\n");
   6.185 -        return -1;
   6.186 -    }
   6.187 -    
   6.188 -    ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
   6.189 -    
   6.190 -    ph->func        = func;
   6.191 -    ph->fd          = fd;
   6.192 -    ph->events      = events;
   6.193 -    ph->active      = 1;
   6.194 -    
   6.195 -    DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, 
   6.196 -            nr_pollhooks());
   6.197 -    
   6.198 -    return 0;
   6.199 -}
   6.200 -
   6.201 -void blktap_detach_poll(int fd)
   6.202 -{
   6.203 -    int i;
   6.204 -    
   6.205 -    for (i=0; i<MAX_POLLFDS; i++)
   6.206 -        if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
   6.207 -            ph_freelist[PH_IDX(ph_prod++)] = i;
   6.208 -            pollhooks[i].pfd->fd = -1;
   6.209 -            pollhooks[i].active = 0;
   6.210 -            break;
   6.211 -        }
   6.212 -        
   6.213 -    DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i, 
   6.214 -            nr_pollhooks());
   6.215 -}
   6.216 -
   6.217 -void pollhook_init(void)
   6.218 -{
   6.219 -    int i;
   6.220 -    
   6.221 -    for (i=0; i < MAX_POLLFDS; i++) {
   6.222 -        ph_freelist[i] = (i+1) % MAX_POLLFDS;
   6.223 -        pollhooks[i].active = 0;
   6.224 -    }
   6.225 -    
   6.226 -    ph_cons = 0;
   6.227 -    ph_prod = MAX_POLLFDS;
   6.228 -}
   6.229 -
   6.230 -void __attribute__ ((constructor)) blktaplib_init(void)
   6.231 -{
   6.232 -    pollhook_init();
   6.233 -}
   6.234 -
   6.235 -/*-----[ The main listen loop ]------------------------------------------*/
   6.236 -
   6.237 -int blktap_listen(void)
   6.238 -{
   6.239 -    int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
   6.240 -    struct xs_handle *h;
   6.241 -    blkif_t *blkif;
   6.242 -
   6.243 -    /* comms rings: */
   6.244 -    blkif_request_t  *req;
   6.245 -    blkif_response_t *rsp;
   6.246 -    blkif_sring_t    *sring;
   6.247 -    RING_IDX          rp, i, pfd_count; 
   6.248 -    
   6.249 -    /* pending rings */
   6.250 -    blkif_request_t req_pending[BLK_RING_SIZE];
   6.251 -    /* blkif_response_t rsp_pending[BLK_RING_SIZE] */;
   6.252 -    
   6.253 -    /* handler hooks: */
   6.254 -    request_hook_t   *req_hook;
   6.255 -    response_hook_t  *rsp_hook;
   6.256 -    
   6.257 -    signal (SIGBUS, got_sig_bus);
   6.258 -    signal (SIGINT, got_sig_int);
   6.259 -    
   6.260 -    __init_blkif();
   6.261 -
   6.262 -    fd = open("/dev/blktap", O_RDWR);
   6.263 -    if (fd == -1)
   6.264 -        err(-1, "open failed!");
   6.265 -
   6.266 -    blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
   6.267 -             PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
   6.268 -
   6.269 -    if ((int)blktap_mem == -1) 
   6.270 -        err(-1, "mmap failed!");
   6.271 -
   6.272 -    /* assign the rings to the mapped memory */
   6.273 -/*
   6.274 -    sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
   6.275 -    FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
   6.276 -*/  
   6.277 -    sring = (blkif_sring_t *)((unsigned long)blktap_mem);
   6.278 -    BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
   6.279 -
   6.280 -    mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
   6.281 -
   6.282 -
   6.283 -    /* Set up store connection and watch. */
   6.284 -    h = xs_daemon_open();
   6.285 -    if (h == NULL) 
   6.286 -        err(-1, "xs_daemon_open");
   6.287 -    
   6.288 -    ret = add_blockdevice_probe_watch(h, "Domain-0");
   6.289 -    if (ret != 0)
   6.290 -        err(0, "adding device probewatch");
   6.291 -    
   6.292 -    ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
   6.293 -
   6.294 -    while(1) {
   6.295 -        int ret;
   6.296 -        
   6.297 -        /* build the poll list */
   6.298 -        pfd_count = 0;
   6.299 -        for ( i=0; i < MAX_POLLFDS; i++ ) {
   6.300 -            pollhook_t *ph = &pollhooks[i];
   6.301 -            
   6.302 -            if (ph->active) {
   6.303 -                pfd[pfd_count].fd     = ph->fd;
   6.304 -                pfd[pfd_count].events = ph->events;
   6.305 -                ph->pfd               = &pfd[pfd_count];
   6.306 -                pfd_count++;
   6.307 -            }
   6.308 -        }
   6.309 -
   6.310 -        tap_pfd = pfd_count++;
   6.311 -        pfd[tap_pfd].fd = fd;
   6.312 -        pfd[tap_pfd].events = POLLIN;
   6.313 -
   6.314 -        store_pfd = pfd_count++;
   6.315 -        pfd[store_pfd].fd = xs_fileno(h);
   6.316 -        pfd[store_pfd].events = POLLIN;
   6.317 -        
   6.318 -        if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
   6.319 -            if (DEBUG_RING_IDXS)
   6.320 -                ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
   6.321 -            continue;
   6.322 -        }
   6.323 -
   6.324 -        for (i=0; i < MAX_POLLFDS; i++) {
   6.325 -            if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
   6.326 -                pollhooks[i].func(pollhooks[i].pfd->fd);
   6.327 -        }
   6.328 -        
   6.329 -        if (pfd[store_pfd].revents) {
   6.330 -            ret = xs_fire_next_watch(h);
   6.331 -        }
   6.332 -
   6.333 -        if (pfd[tap_pfd].revents) 
   6.334 -        {    
   6.335 -            /* empty the fe_ring */
   6.336 -            notify_fe = 0;
   6.337 -            notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
   6.338 -            rp = fe_ring.sring->req_prod;
   6.339 -            rmb();
   6.340 -            for (i = fe_ring.req_cons; i != rp; i++)
   6.341 -            {
   6.342 -                int done = 0; 
   6.343 -
   6.344 -                req = RING_GET_REQUEST(&fe_ring, i);
   6.345 -                memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
   6.346 -                req = &req_pending[ID_TO_IDX(req->id)];
   6.347 -
   6.348 -                blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
   6.349 -
   6.350 -                if (blkif != NULL)
   6.351 -                {
   6.352 -                    req_hook = blkif->request_hook_chain;
   6.353 -                    while (req_hook != NULL)
   6.354 -                    {
   6.355 -                        switch(req_hook->func(blkif, req, ((i+1) == rp)))
   6.356 -                        {
   6.357 -                        case BLKTAP_RESPOND:
   6.358 -                            apply_rsp_hooks(blkif, (blkif_response_t *)req);
   6.359 -                            write_rsp_to_fe_ring((blkif_response_t *)req);
   6.360 -                            notify_fe = 1;
   6.361 -                            done = 1;
   6.362 -                            break;
   6.363 -                        case BLKTAP_STOLEN:
   6.364 -                            done = 1;
   6.365 -                            break;
   6.366 -                        case BLKTAP_PASS:
   6.367 -                            break;
   6.368 -                        default:
   6.369 -                            printf("Unknown request hook return value!\n");
   6.370 -                        }
   6.371 -                        if (done) break;
   6.372 -                        req_hook = req_hook->next;
   6.373 -                    }
   6.374 -                }
   6.375 -
   6.376 -                if (done == 0) 
   6.377 -                {
   6.378 -                    /* this was:  */
   6.379 -                    /* write_req_to_be_ring(req); */
   6.380 -
   6.381 -                    unsigned long id = req->id;
   6.382 -                    unsigned short operation = req->operation;
   6.383 -                    printf("Unterminated request!\n");
   6.384 -                    rsp = (blkif_response_t *)req;
   6.385 -                    rsp->id = id;
   6.386 -                    rsp->operation = operation;
   6.387 -                    rsp->status = BLKIF_RSP_ERROR;
   6.388 -                    write_rsp_to_fe_ring(rsp);
   6.389 -                    notify_fe = 1;
   6.390 -                    done = 1;
   6.391 -                }
   6.392 -
   6.393 -            }
   6.394 -            fe_ring.req_cons = i;
   6.395 -
   6.396 -            /* empty the be_ring */
   6.397 -/*
   6.398 -            notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
   6.399 -            rp = be_ring.sring->rsp_prod;
   6.400 -            rmb();
   6.401 -            for (i = be_ring.rsp_cons; i != rp; i++)
   6.402 -            {
   6.403 -
   6.404 -                rsp = RING_GET_RESPONSE(&be_ring, i);
   6.405 -                memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
   6.406 -                rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
   6.407 -
   6.408 -                DPRINTF("copying a be request\n");
   6.409 -
   6.410 -                apply_rsp_hooks(rsp);
   6.411 -                write_rsp_to_fe_ring(rsp);
   6.412 -            }
   6.413 -            be_ring.rsp_cons = i;
   6.414 -*/
   6.415 -            /* notify the domains */
   6.416 -/*
   6.417 -            if (notify_be) {
   6.418 -                DPRINTF("notifying be\n");
   6.419 -pthread_mutex_lock(&push_mutex);
   6.420 -                RING_PUSH_REQUESTS(&be_ring);
   6.421 -                ioctl(fd, BLKTAP_IOCTL_KICK_BE);
   6.422 -pthread_mutex_unlock(&push_mutex);
   6.423 -            }
   6.424 -*/
   6.425 -            if (notify_fe) {
   6.426 -                DPRINTF("notifying fe\n");
   6.427 -                pthread_mutex_lock(&push_mutex);
   6.428 -                RING_PUSH_RESPONSES(&fe_ring);
   6.429 -                ioctl(fd, BLKTAP_IOCTL_KICK_FE);
   6.430 -                pthread_mutex_unlock(&push_mutex);
   6.431 -            }
   6.432 -        }        
   6.433 -    }
   6.434 -
   6.435 -
   6.436 -    munmap(blktap_mem, PAGE_SIZE);
   6.437 -
   6.438 - mmap_failed:
   6.439 -    close(fd);
   6.440 -
   6.441 - open_failed:
   6.442 -    return 0;
   6.443 -}
   6.444 -
   6.445 -void got_sig_bus() {
   6.446 -    printf("Attempted to access a page that isn't.\n");
   6.447 -    exit(-1);
   6.448 -}
   6.449 -
   6.450 -void got_sig_int() {
   6.451 -    DPRINTF("quitting -- returning to passthrough mode.\n");
   6.452 -    if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
   6.453 -    close(fd);
   6.454 -    fd = 0;
   6.455 -    exit(0);
   6.456 -} 
     7.1 --- a/tools/blktap/blktaplib.h	Fri Jun 16 18:19:40 2006 +0100
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,171 +0,0 @@
     7.4 -/* blktaplib.h
     7.5 - *
     7.6 - * userland accessors to the block tap.
     7.7 - *
     7.8 - * Sept 2/05 -- I'm scaling this back to only support block remappings
     7.9 - * to user in a backend domain.  Passthrough and interposition can be readded
    7.10 - * once transitive grants are available.
    7.11 - */
    7.12 - 
    7.13 -#ifndef __BLKTAPLIB_H__
    7.14 -#define __BLKTAPLIB_H__
    7.15 -
    7.16 -#include <xenctrl.h>
    7.17 -#include <sys/user.h>
    7.18 -#include <xen/xen.h>
    7.19 -#include <xen/io/blkif.h>
    7.20 -#include <xen/io/ring.h>
    7.21 -#include <xen/io/domain_controller.h>
    7.22 -#include <xs.h>
    7.23 -
    7.24 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
    7.25 -
    7.26 -/* /dev/xen/blktap resides at device number major=10, minor=202        */ 
    7.27 -#define BLKTAP_MINOR 202
    7.28 -
    7.29 -/* size of the extra VMA area to map in attached pages. */
    7.30 -#define BLKTAP_VMA_PAGES BLK_RING_SIZE
    7.31 -
    7.32 -/* blktap IOCTLs:                                                      */
    7.33 -#define BLKTAP_IOCTL_KICK_FE         1
    7.34 -#define BLKTAP_IOCTL_KICK_BE         2
    7.35 -#define BLKTAP_IOCTL_SETMODE         3
    7.36 -#define BLKTAP_IOCTL_PRINT_IDXS      100   
    7.37 -
    7.38 -/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
    7.39 -#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
    7.40 -#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
    7.41 -#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
    7.42 -#define BLKTAP_MODE_COPY_FE          0x00000004
    7.43 -#define BLKTAP_MODE_COPY_BE          0x00000008
    7.44 -#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
    7.45 -#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
    7.46 -
    7.47 -#define BLKTAP_MODE_INTERPOSE \
    7.48 -           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
    7.49 -
    7.50 -#define BLKTAP_MODE_COPY_BOTH \
    7.51 -           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
    7.52 -
    7.53 -#define BLKTAP_MODE_COPY_BOTH_PAGES \
    7.54 -           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
    7.55 -
    7.56 -static inline int BLKTAP_MODE_VALID(unsigned long arg)
    7.57 -{
    7.58 -    return (
    7.59 -        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
    7.60 -        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
    7.61 -        ( arg == BLKTAP_MODE_INTERPOSE    ) );
    7.62 -/*
    7.63 -    return (
    7.64 -        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
    7.65 -        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
    7.66 -        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
    7.67 -        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
    7.68 -        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
    7.69 -        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
    7.70 -        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
    7.71 -        );
    7.72 -*/
    7.73 -}
    7.74 -
    7.75 -/* Return values for handling messages in hooks. */
    7.76 -#define BLKTAP_PASS     0 /* Keep passing this request as normal. */
    7.77 -#define BLKTAP_RESPOND  1 /* Request is now a reply.  Return it.  */
    7.78 -#define BLKTAP_STOLEN   2 /* Hook has stolen request.             */
    7.79 -
    7.80 -//#define domid_t unsigned short
    7.81 -
    7.82 -inline unsigned int ID_TO_IDX(unsigned long id);
    7.83 -inline domid_t ID_TO_DOM(unsigned long id);
    7.84 -
    7.85 -int  blktap_attach_poll(int fd, short events, int (*func)(int));
    7.86 -void blktap_detach_poll(int fd);
    7.87 -int  blktap_listen(void);
    7.88 -
    7.89 -struct blkif;
    7.90 -
    7.91 -typedef struct request_hook_st {
    7.92 -    char *name;
    7.93 -    int (*func)(struct blkif *, blkif_request_t *, int);
    7.94 -    struct request_hook_st *next;
    7.95 -} request_hook_t;
    7.96 -
    7.97 -typedef struct response_hook_st {
    7.98 -    char *name;
    7.99 -    int (*func)(struct blkif *, blkif_response_t *, int);
   7.100 -    struct response_hook_st *next;
   7.101 -} response_hook_t;
   7.102 -
   7.103 -struct blkif_ops {
   7.104 -    long int (*get_size)(struct blkif *blkif);
   7.105 -    long int (*get_secsize)(struct blkif *blkif);
   7.106 -    unsigned (*get_info)(struct blkif *blkif);
   7.107 -};
   7.108 -
   7.109 -typedef struct blkif {
   7.110 -    domid_t domid;
   7.111 -    long int handle;
   7.112 -
   7.113 -    long int pdev;
   7.114 -    long int readonly;
   7.115 -
   7.116 -    enum { DISCONNECTED, CONNECTED } state;
   7.117 -
   7.118 -    struct blkif_ops *ops;
   7.119 -    request_hook_t *request_hook_chain;
   7.120 -    response_hook_t *response_hook_chain;
   7.121 -
   7.122 -    struct blkif *hash_next;
   7.123 -
   7.124 -    void *prv;  /* device-specific data */
   7.125 -} blkif_t;
   7.126 -
   7.127 -void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
   7.128 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
   7.129 -blkif_t *alloc_blkif(domid_t domid);
   7.130 -int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
   7.131 -               long int readonly);
   7.132 -void free_blkif(blkif_t *blkif);
   7.133 -void __init_blkif(void);
   7.134 -
   7.135 -
   7.136 -/* xenstore/xenbus: */
   7.137 -extern int add_blockdevice_probe_watch(struct xs_handle *h, 
   7.138 -                                       const char *domname);
   7.139 -int xs_fire_next_watch(struct xs_handle *h);
   7.140 -
   7.141 -
   7.142 -void blkif_print_hooks(blkif_t *blkif);
   7.143 -void blkif_register_request_hook(blkif_t *blkif, char *name, 
   7.144 -                             int (*rh)(blkif_t *, blkif_request_t *, int));
   7.145 -void blkif_register_response_hook(blkif_t *blkif, char *name, 
   7.146 -                             int (*rh)(blkif_t *, blkif_response_t *, int));
   7.147 -void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
   7.148 -void blktap_kick_responses(void);
   7.149 -
   7.150 -/* this must match the underlying driver... */
   7.151 -#define MAX_PENDING_REQS 64
   7.152 -
   7.153 -/* Accessing attached data page mappings */
   7.154 -#define MMAP_PAGES                                              \
   7.155 -    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
   7.156 -#define MMAP_VADDR(_req,_seg)                                   \
   7.157 -    (mmap_vstart +                                              \
   7.158 -     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
   7.159 -     ((_seg) * PAGE_SIZE))
   7.160 -
   7.161 -extern unsigned long mmap_vstart;
   7.162 -
   7.163 -/* Defines that are only used by library clients */
   7.164 -
   7.165 -#ifndef __COMPILING_BLKTAP_LIB
   7.166 -
   7.167 -static char *blkif_op_name[] = {
   7.168 -    [BLKIF_OP_READ]       = "READ",
   7.169 -    [BLKIF_OP_WRITE]      = "WRITE",
   7.170 -};
   7.171 -
   7.172 -#endif /* __COMPILING_BLKTAP_LIB */
   7.173 -    
   7.174 -#endif /* __BLKTAPLIB_H__ */
     8.1 --- a/tools/blktap/list.h	Fri Jun 16 18:19:40 2006 +0100
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,55 +0,0 @@
     8.4 -/*
     8.5 - * list.h
     8.6 - * 
     8.7 - * This is a subset of linux's list.h intended to be used in user-space.
     8.8 - * 
     8.9 - */
    8.10 -
    8.11 -#ifndef __LIST_H__
    8.12 -#define __LIST_H__
    8.13 -
    8.14 -#define LIST_POISON1  ((void *) 0x00100100)
    8.15 -#define LIST_POISON2  ((void *) 0x00200200)
    8.16 -
    8.17 -struct list_head {
    8.18 -        struct list_head *next, *prev;
    8.19 -};
    8.20 - 
    8.21 -#define LIST_HEAD_INIT(name) { &(name), &(name) }
    8.22 - 
    8.23 -#define LIST_HEAD(name) \
    8.24 -        struct list_head name = LIST_HEAD_INIT(name)
    8.25 -
    8.26 -static inline void __list_add(struct list_head *new,
    8.27 -                              struct list_head *prev,
    8.28 -                              struct list_head *next)
    8.29 -{
    8.30 -        next->prev = new;
    8.31 -        new->next = next;
    8.32 -        new->prev = prev;
    8.33 -        prev->next = new;
    8.34 -}
    8.35 -
    8.36 -static inline void list_add(struct list_head *new, struct list_head *head)
    8.37 -{
    8.38 -        __list_add(new, head, head->next);
    8.39 -}
    8.40 -static inline void __list_del(struct list_head * prev, struct list_head * next)
    8.41 -{
    8.42 -        next->prev = prev;
    8.43 -        prev->next = next;
    8.44 -}
    8.45 -static inline void list_del(struct list_head *entry)
    8.46 -{
    8.47 -        __list_del(entry->prev, entry->next);
    8.48 -        entry->next = LIST_POISON1;
    8.49 -        entry->prev = LIST_POISON2;
    8.50 -}
    8.51 -#define list_entry(ptr, type, member)                                   \
    8.52 -        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
    8.53 -#define list_for_each_entry(pos, head, member)                          \
    8.54 -        for (pos = list_entry((head)->next, typeof(*pos), member);      \
    8.55 -             &pos->member != (head);                                    \
    8.56 -             pos = list_entry(pos->member.next, typeof(*pos), member))
    8.57 -
    8.58 -#endif /* __LIST_H__ */
     9.1 --- a/tools/blktap/parallax/Makefile	Fri Jun 16 18:19:40 2006 +0100
     9.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.3 @@ -1,63 +0,0 @@
     9.4 -XEN_ROOT = ../../..
     9.5 -include $(XEN_ROOT)/tools/Rules.mk
     9.6 -
     9.7 -PARALLAX_INSTALL_DIR	= /usr/sbin
     9.8 -
     9.9 -INSTALL         = install
    9.10 -INSTALL_PROG    = $(INSTALL) -m0755
    9.11 -INSTALL_DIR     = $(INSTALL) -d -m0755
    9.12 -
    9.13 -INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
    9.14 -
    9.15 -LDFLAGS = -L.. -lpthread -lz -lblktap
    9.16 -
    9.17 -#PLX_SRCS := 
    9.18 -PLX_SRCS := vdi.c 
    9.19 -PLX_SRCS += radix.c 
    9.20 -PLX_SRCS += snaplog.c
    9.21 -PLX_SRCS += blockstore.c 
    9.22 -PLX_SRCS += block-async.c
    9.23 -PLX_SRCS += requests-async.c
    9.24 -VDI_SRCS := $(PLX_SRCS)
    9.25 -PLX_SRCS += parallax.c
    9.26 -
    9.27 -#VDI_TOOLS :=
    9.28 -VDI_TOOLS := vdi_create
    9.29 -VDI_TOOLS += vdi_list
    9.30 -VDI_TOOLS += vdi_snap
    9.31 -VDI_TOOLS += vdi_snap_list
    9.32 -VDI_TOOLS += vdi_snap_delete
    9.33 -VDI_TOOLS += vdi_fill
    9.34 -VDI_TOOLS += vdi_tree
    9.35 -VDI_TOOLS += vdi_validate
    9.36 -
    9.37 -CFLAGS   += -Werror
    9.38 -CFLAGS   += -Wno-unused
    9.39 -CFLAGS   += -fno-strict-aliasing
    9.40 -CFLAGS   += $(INCLUDES)
    9.41 -CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
    9.42 -# Get gcc to generate the dependencies for us.
    9.43 -CFLAGS   += -Wp,-MD,.$(@F).d
    9.44 -DEPS     = .*.d
    9.45 -
    9.46 -OBJS     = $(patsubst %.c,%.o,$(SRCS))
    9.47 -IBINS    = parallax $(VDI_TOOLS)
    9.48 -
    9.49 -.PHONY: all
    9.50 -all: $(VDI_TOOLS) parallax blockstored
    9.51 -
    9.52 -.PHONY: install
    9.53 -install: all
    9.54 -	$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
    9.55 -
    9.56 -.PHONY: clean
    9.57 -clean:
    9.58 -	rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
    9.59 -
    9.60 -parallax: $(PLX_SRCS)
    9.61 -	$(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
    9.62 -
    9.63 -${VDI_TOOLS}: %: %.c $(VDI_SRCS)
    9.64 -	$(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
    9.65 -
    9.66 --include $(DEPS)
    10.1 --- a/tools/blktap/parallax/README	Fri Jun 16 18:19:40 2006 +0100
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,177 +0,0 @@
    10.4 -Parallax Quick Overview
    10.5 -March 3, 2005
    10.6 -
    10.7 -This is intended to provide a quick set of instructions to let you
    10.8 -guys play with the current parallax source.  In it's current form, the
    10.9 -code will let you run an arbitrary number of VMs off of a single disk
   10.10 -image, doing copy-on-write as they make updates.  Each domain is
   10.11 -assigned a virtual disk image (VDI), which may be based on a snapshot
   10.12 -of an existing image.  All of the VDI and snapshot management should
   10.13 -currently work.
   10.14 -
   10.15 -The current implementation uses a single file as a blockstore for
   10.16 -_everything_ this will soon be replaced by the fancier backend code
   10.17 -and the local cache.  As it stands, Parallax will create
   10.18 -"blockstore.dat" in the directory that you run it from, and use
   10.19 -largefile support to make this grow to unfathomable girth.  So, you
   10.20 -probably want to run the daemon off of a local disk, with a lot of
   10.21 -free space.
   10.22 -
   10.23 -Here's how to get going:
   10.24 -
   10.25 -0. Setup:
   10.26 ----------
   10.27 -
   10.28 -Pick a local directory on a disk with lots of room.  You should be
   10.29 -running from a privileged domain (e.g. dom0) with the blocktap
   10.30 -configured in and block backend NOT.
   10.31 -
   10.32 -For convenience (for the moment) copy all of the vdi tools (vdi_*) and
   10.33 -the parallax daemon from tools/blktap into this directory.
   10.34 -
   10.35 -1. Populate the blockstore:
   10.36 ----------------------------
   10.37 -
   10.38 -First you need to put at least one image into the blockstore.  You
   10.39 -will need a disk image, either as a file or local partition.  My
   10.40 -general approach has been to
   10.41 -
   10.42 -(a) make a really big sparse file with 
   10.43 -
   10.44 -        dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
   10.45 -
   10.46 -(b) put a filesystem into it
   10.47 -
   10.48 -        mkfs.ext3 ./image
   10.49 -
   10.50 -(c) mount it using loopback
   10.51 -
   10.52 -        mkdir ./mnt
   10.53 -        mount -o loop ./image
   10.54 -
   10.55 -(d) cd into it and untar one of the image files from srg-roots.
   10.56 -
   10.57 -        cd mnt
   10.58 -        tar ...
   10.59 -
   10.60 -NOTE: Beware if your system is FC3.  mkfs is not compatible with old
   10.61 -versions of fedora, and so you don't have much choice but to install
   10.62 -further fc3 images if you have used the fc3 version of mkfs.
   10.63 -
   10.64 -(e) unmount the image
   10.65 -
   10.66 -        cd ..
   10.67 -        umount mnt
   10.68 -
   10.69 -(f) now, create a new VDI to hold the image 
   10.70 -
   10.71 -        ./vdi_create "My new FC3 VDI"
   10.72 -
   10.73 -(g) get the id of the new VDI.
   10.74 -
   10.75 -        ./vdi_list
   10.76 -
   10.77 -        |      0                     My new FC3 VDI
   10.78 -
   10.79 -(0 is the VDI id... create a few more if you want.)
   10.80 -
   10.81 -(h) hoover your image into the new VDI.
   10.82 -
   10.83 -        ./vdi_fill 0 ./image
   10.84 -
   10.85 -This will pull the entire image into the blockstore and set up a
   10.86 -mapping tree for it for VDI 0.  Passing a device (i.e. /dev/sda3)
   10.87 -should also work, but vdi_fill has NO notion of sparseness yet, so you
   10.88 -are going to pump a block into the store for each block you read.
   10.89 -
   10.90 -vdi_fill will count up until it is done, and you should be ready to
   10.91 -go.  If you want to be anal, you can use vdi_validate to test the VDI
   10.92 -against the original image.
   10.93 -
   10.94 -2. Create some extra VDIs
   10.95 --------------------------
   10.96 -
   10.97 -VDIs are actually a list of snapshots, and each snapshot is a full
   10.98 -image of mappings.  So, to preserve an immutable copy of a current
   10.99 -VDI, do this:
  10.100 -
  10.101 -(a) Snapshot your new VDI.
  10.102 -
  10.103 -        ./vdi_snap 0
  10.104 -
  10.105 -Snapshotting writes the current radix root to the VDI's snapshot log,
  10.106 -and assigns it a new writable root.
  10.107 -
  10.108 -(b) look at the VDI's snapshot log.
  10.109 -
  10.110 -        ./vdi_snap_list 0
  10.111 -
  10.112 -        | 16   0      Thu Mar  3 19:27:48 2005 565111           31
  10.113 -
  10.114 -The first two columns constitute a snapshot id and represent the
  10.115 -(block, offset) of the snapshot record.  The Date tells you when the
  10.116 -snapshot was made, and 31 is the radix root node of the snapshot.
  10.117 -
  10.118 -(c) Create a new VDI, based on that snapshot, and look at the list.
  10.119 -
  10.120 -        ./vdi_create "FC3 - Copy 1" 16 0
  10.121 -        ./vdi_list
  10.122 -
  10.123 -        |      0                     My new FC3 VDI
  10.124 -        |      1                       FC3 - Copy 1
  10.125 -
  10.126 -NOTE: If you have Graphviz installed on your system, you can use
  10.127 -vdi_tree to generate a postscript of your current set of VDIs and
  10.128 -snapshots.
  10.129 -
  10.130 -
  10.131 -Create as many VDIs as you need for the VMs that you want to run.
  10.132 -
  10.133 -3. Boot some VMs:
  10.134 ------------------
  10.135 -
  10.136 -Parallax currently uses a hack in xend to pass the VDI id, you need to
  10.137 -modify the disk line of the VM config that is going to mount it.
  10.138 -
  10.139 -(a) set up your vm config, by using the following disk line:
  10.140 -
  10.141 -        disk = ['parallax:1,sda1,w,0' ]
  10.142 -
  10.143 -This example uses VDI 1 (from vdi_list above), presents it as sda1
  10.144 -(writable), and uses dom 0 as the backend.  If you were running the
  10.145 -daemon (and tap driver) in some domain other than 0, you would change
  10.146 -this last parameter.
  10.147 -
  10.148 -NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so that it knows what to do with "parallax:".
  10.149 -
  10.150 -(b) Run parallax in the backend domain.
  10.151 -
  10.152 -        ./parallax
  10.153 -
  10.154 -(c) create your new domain.
  10.155 -
  10.156 -        xm create ...
  10.157 -
  10.158 ----
  10.159 -
  10.160 -That's pretty much all there is to it at the moment.  Hope this is
  10.161 -clear enough to get you going.  Now, a few serious caveats that will
  10.162 -be sorted out in the almost immediate future:
  10.163 -
  10.164 -WARNINGS:
  10.165 ----------
  10.166 -
  10.167 -1. There is NO locking in the VDI tools at the moment, so I'd avoid
  10.168 -running them in parallel, or more importantly, running them while the
  10.169 -daemon is running.
  10.170 -
  10.171 -2. I doubt that xend will be very happy about restarting if you have
  10.172 -parallax-using domains.  So if it dies while there are active parallax
  10.173 -doms, you may need to reboot.
  10.174 -
  10.175 -3. I've turned off write-in-place.  So at the moment, EVERY block
  10.176 -write is a log append on the blockstore.  I've been having some probs
  10.177 -with the radix tree's marking of writable blocks after snapshots and
  10.178 -will sort this out very soon.
  10.179 -
  10.180 -
    11.1 --- a/tools/blktap/parallax/block-async.c	Fri Jun 16 18:19:40 2006 +0100
    11.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.3 @@ -1,393 +0,0 @@
    11.4 -/* block-async.c
    11.5 - * 
    11.6 - * Asynchronous block wrappers for parallax.
    11.7 - */
    11.8 - 
    11.9 - 
   11.10 -#include <stdio.h>
   11.11 -#include <stdlib.h>
   11.12 -#include <string.h>
   11.13 -#include <pthread.h>
   11.14 -#include "block-async.h"
   11.15 -#include "blockstore.h"
   11.16 -#include "vdi.h"
   11.17 -
   11.18 -
   11.19 -#if 0
   11.20 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   11.21 -#else
   11.22 -#define DPRINTF(_f, _a...) ((void)0)
   11.23 -#endif
   11.24 -
   11.25 -/* We have a queue of outstanding I/O requests implemented as a 
   11.26 - * circular producer-consumer ring with free-running buffers.
   11.27 - * to allow reordering, this ring indirects to indexes in an 
   11.28 - * ring of io_structs.
   11.29 - * 
   11.30 - * the block_* calls may either add an entry to this ring and return, 
   11.31 - * or satisfy the request immediately and call the callback directly.
   11.32 - * None of the io calls in parallax should be nested enough to worry 
   11.33 - * about stack problems with this approach.
   11.34 - */
   11.35 -
   11.36 -struct read_args {
   11.37 -    uint64_t addr;
   11.38 -};
   11.39 -
   11.40 -struct write_args {
   11.41 -    uint64_t   addr;
   11.42 -    char *block;
   11.43 -};
   11.44 -
   11.45 -struct alloc_args {
   11.46 -    char *block;
   11.47 -};
   11.48 - 
   11.49 -struct pending_io_req {
   11.50 -    enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
   11.51 -    union {
   11.52 -        struct read_args  r;
   11.53 -        struct write_args w;
   11.54 -        struct alloc_args a;
   11.55 -    } u;
   11.56 -    io_cb_t cb;
   11.57 -    void *param;
   11.58 -};
   11.59 -
   11.60 -void radix_lock_init(struct radix_lock *r)
   11.61 -{
   11.62 -    int i;
   11.63 -    
   11.64 -    pthread_mutex_init(&r->lock, NULL);
   11.65 -    for (i=0; i < 1024; i++) {
   11.66 -        r->lines[i] = 0;
   11.67 -        r->waiters[i] = NULL;
   11.68 -        r->state[i] = ANY;
   11.69 -    }
   11.70 -}
   11.71 -
   11.72 -/* maximum outstanding I/O requests issued asynchronously */
   11.73 -/* must be a power of 2.*/
   11.74 -#define MAX_PENDING_IO 1024
   11.75 -
   11.76 -/* how many threads to concurrently issue I/O to the disk. */
   11.77 -#define IO_POOL_SIZE   10
   11.78 -
   11.79 -static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
   11.80 -static int pending_io_list[MAX_PENDING_IO];
   11.81 -static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
   11.82 -#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
   11.83 -#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
   11.84 -#define PENDING_IO_ENT(_x) \
   11.85 -	(&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
   11.86 -#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
   11.87 -#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
   11.88 -static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
   11.89 -static pthread_cond_t  pending_io_cond = PTHREAD_COND_INITIALIZER;
   11.90 -
   11.91 -static void init_pending_io(void)
   11.92 -{
   11.93 -    int i;
   11.94 -	
   11.95 -    for (i=0; i<MAX_PENDING_IO; i++)
   11.96 -        pending_io_list[i] = i;
   11.97 -		
   11.98 -} 
   11.99 -
  11.100 -void block_read(uint64_t addr, io_cb_t cb, void *param)
  11.101 -{
  11.102 -    struct pending_io_req *req;
  11.103 -    
  11.104 -    pthread_mutex_lock(&pending_io_lock);
  11.105 -    assert(CAN_PRODUCE_PENDING_IO);
  11.106 -    
  11.107 -    req = PENDING_IO_ENT(io_prod++);
  11.108 -    DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
  11.109 -    req->op = IO_READ;
  11.110 -    req->u.r.addr = addr;
  11.111 -    req->cb = cb;
  11.112 -    req->param = param;
  11.113 -    
  11.114 -    pthread_cond_signal(&pending_io_cond);
  11.115 -    pthread_mutex_unlock(&pending_io_lock);	
  11.116 -}
  11.117 -
  11.118 -
  11.119 -void block_write(uint64_t addr, char *block, io_cb_t cb, void *param)
  11.120 -{
  11.121 -    struct pending_io_req *req;
  11.122 -    
  11.123 -    pthread_mutex_lock(&pending_io_lock);
  11.124 -    assert(CAN_PRODUCE_PENDING_IO);
  11.125 -    
  11.126 -    req = PENDING_IO_ENT(io_prod++);
  11.127 -    DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
  11.128 -    req->op = IO_WRITE;
  11.129 -    req->u.w.addr  = addr;
  11.130 -    req->u.w.block = block;
  11.131 -    req->cb = cb;
  11.132 -    req->param = param;
  11.133 -    
  11.134 -    pthread_cond_signal(&pending_io_cond);
  11.135 -    pthread_mutex_unlock(&pending_io_lock);	
  11.136 -}
  11.137 -
  11.138 -
  11.139 -void block_alloc(char *block, io_cb_t cb, void *param)
  11.140 -{
  11.141 -    struct pending_io_req *req;
  11.142 -	
  11.143 -    pthread_mutex_lock(&pending_io_lock);
  11.144 -    assert(CAN_PRODUCE_PENDING_IO);
  11.145 -    
  11.146 -    req = PENDING_IO_ENT(io_prod++);
  11.147 -    req->op = IO_ALLOC;
  11.148 -    req->u.a.block = block;
  11.149 -    req->cb = cb;
  11.150 -    req->param = param;
  11.151 -    
  11.152 -    pthread_cond_signal(&pending_io_cond);
  11.153 -    pthread_mutex_unlock(&pending_io_lock);	
  11.154 -}
  11.155 -
  11.156 -void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  11.157 -{
  11.158 -    struct io_ret ret;
  11.159 -    pthread_mutex_lock(&r->lock);
  11.160 -    
  11.161 -    if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
  11.162 -        r->lines[row]++;
  11.163 -        r->state[row] = READ;
  11.164 -        DPRINTF("RLOCK  : %3d (row: %d)\n", r->lines[row], row);
  11.165 -        pthread_mutex_unlock(&r->lock);
  11.166 -        ret.type = IO_INT_T;
  11.167 -        ret.u.i = 0;
  11.168 -        cb(ret, param);
  11.169 -    } else {
  11.170 -        struct radix_wait **rwc;
  11.171 -        struct radix_wait *rw = 
  11.172 -            (struct radix_wait *) malloc (sizeof(struct radix_wait));
  11.173 -        DPRINTF("RLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
  11.174 -        rw->type  = RLOCK;
  11.175 -        rw->param = param;
  11.176 -        rw->cb    = cb;
  11.177 -        rw->next  = NULL;
  11.178 -        /* append to waiters list. */
  11.179 -        rwc = &r->waiters[row];
  11.180 -        while (*rwc != NULL) rwc = &(*rwc)->next;
  11.181 -        *rwc = rw;
  11.182 -        pthread_mutex_unlock(&r->lock);
  11.183 -        return;
  11.184 -    }
  11.185 -}
  11.186 -
  11.187 -
  11.188 -void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  11.189 -{
  11.190 -    struct io_ret ret;
  11.191 -    pthread_mutex_lock(&r->lock);
  11.192 -    
  11.193 -    /* the second check here is redundant -- just here for debugging now. */
  11.194 -    if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
  11.195 -        r->state[row] = STOP;
  11.196 -        r->lines[row] = -1;
  11.197 -        DPRINTF("WLOCK  : %3d (row: %d)\n", r->lines[row], row);
  11.198 -        pthread_mutex_unlock(&r->lock);
  11.199 -        ret.type = IO_INT_T;
  11.200 -        ret.u.i = 0;
  11.201 -        cb(ret, param);
  11.202 -    } else {
  11.203 -        struct radix_wait **rwc;
  11.204 -        struct radix_wait *rw = 
  11.205 -            (struct radix_wait *) malloc (sizeof(struct radix_wait));
  11.206 -        DPRINTF("WLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
  11.207 -        rw->type  = WLOCK;
  11.208 -        rw->param = param;
  11.209 -        rw->cb    = cb;
  11.210 -        rw->next  = NULL;
  11.211 -        /* append to waiters list. */
  11.212 -        rwc = &r->waiters[row];
  11.213 -        while (*rwc != NULL) rwc = &(*rwc)->next;
  11.214 -        *rwc = rw;
  11.215 -        pthread_mutex_unlock(&r->lock);
  11.216 -        return;
  11.217 -    }
  11.218 -	
  11.219 -}
  11.220 -
  11.221 -/* called with radix_lock locked and lock count of zero. */
  11.222 -static void wake_waiters(struct radix_lock *r, int row)
  11.223 -{
  11.224 -    struct pending_io_req *req;
  11.225 -    struct radix_wait *rw;
  11.226 -    
  11.227 -    if (r->lines[row] != 0) return;
  11.228 -    if (r->waiters[row] == NULL) return; 
  11.229 -    
  11.230 -    if (r->waiters[row]->type == WLOCK) {
  11.231 -
  11.232 -        rw = r->waiters[row];
  11.233 -        pthread_mutex_lock(&pending_io_lock);
  11.234 -        assert(CAN_PRODUCE_PENDING_IO);
  11.235 -        
  11.236 -        req = PENDING_IO_ENT(io_prod++);
  11.237 -        req->op    = IO_WWAKE;
  11.238 -        req->cb    = rw->cb;
  11.239 -        req->param = rw->param;
  11.240 -        r->lines[row] = -1; /* write lock the row. */
  11.241 -        r->state[row] = STOP;
  11.242 -        r->waiters[row] = rw->next;
  11.243 -        free(rw);
  11.244 -        pthread_mutex_unlock(&pending_io_lock);
  11.245 -    
  11.246 -    } else /* RLOCK */ {
  11.247 -
  11.248 -        while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
  11.249 -            rw = r->waiters[row];
  11.250 -            pthread_mutex_lock(&pending_io_lock);
  11.251 -            assert(CAN_PRODUCE_PENDING_IO);
  11.252 -            
  11.253 -            req = PENDING_IO_ENT(io_prod++);
  11.254 -            req->op    = IO_RWAKE;
  11.255 -            req->cb    = rw->cb;
  11.256 -            req->param = rw->param;
  11.257 -            r->lines[row]++; /* read lock the row. */
  11.258 -            r->state[row] = READ; 
  11.259 -            r->waiters[row] = rw->next;
  11.260 -            free(rw);
  11.261 -            pthread_mutex_unlock(&pending_io_lock);
  11.262 -        }
  11.263 -
  11.264 -        if (r->waiters[row] != NULL) /* There is a write queued still */
  11.265 -            r->state[row] = STOP;
  11.266 -    }	
  11.267 -    
  11.268 -    pthread_mutex_lock(&pending_io_lock);
  11.269 -    pthread_cond_signal(&pending_io_cond);
  11.270 -    pthread_mutex_unlock(&pending_io_lock);
  11.271 -}
  11.272 -
  11.273 -void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  11.274 -{
  11.275 -    struct io_ret ret;
  11.276 -	
  11.277 -    pthread_mutex_lock(&r->lock);
  11.278 -    assert(r->lines[row] > 0); /* try to catch misuse. */
  11.279 -    r->lines[row]--;
  11.280 -    if (r->lines[row] == 0) {
  11.281 -        r->state[row] = ANY;
  11.282 -        wake_waiters(r, row);
  11.283 -    }
  11.284 -    pthread_mutex_unlock(&r->lock);
  11.285 -    cb(ret, param);
  11.286 -}
  11.287 -
  11.288 -void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  11.289 -{
  11.290 -    struct io_ret ret;
  11.291 -    
  11.292 -    pthread_mutex_lock(&r->lock);
  11.293 -    assert(r->lines[row] == -1); /* try to catch misuse. */
  11.294 -    r->lines[row] = 0;
  11.295 -    r->state[row] = ANY;
  11.296 -    wake_waiters(r, row);
  11.297 -    pthread_mutex_unlock(&r->lock);
  11.298 -    cb(ret, param);
  11.299 -}
  11.300 -
  11.301 -/* consumer calls */
  11.302 -static void do_next_io_req(struct pending_io_req *req)
  11.303 -{
  11.304 -    struct io_ret          ret;
  11.305 -    void  *param;
  11.306 -    
  11.307 -    switch (req->op) {
  11.308 -    case IO_READ:
  11.309 -        ret.type = IO_BLOCK_T;
  11.310 -        ret.u.b  = readblock(req->u.r.addr);
  11.311 -        break;
  11.312 -    case IO_WRITE:
  11.313 -        ret.type = IO_INT_T;
  11.314 -        ret.u.i  = writeblock(req->u.w.addr, req->u.w.block);
  11.315 -        DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
  11.316 -        break;
  11.317 -    case IO_ALLOC:
  11.318 -        ret.type = IO_ADDR_T;
  11.319 -        ret.u.a  = allocblock(req->u.a.block);
  11.320 -        break;
  11.321 -    case IO_RWAKE:
  11.322 -        DPRINTF("WAKE DEFERRED RLOCK!\n");
  11.323 -        ret.type = IO_INT_T;
  11.324 -        ret.u.i  = 0;
  11.325 -        break;
  11.326 -    case IO_WWAKE:
  11.327 -        DPRINTF("WAKE DEFERRED WLOCK!\n");
  11.328 -        ret.type = IO_INT_T;
  11.329 -        ret.u.i  = 0;
  11.330 -        break;
  11.331 -    default:
  11.332 -        DPRINTF("Unknown IO operation on pending list!\n");
  11.333 -        return;
  11.334 -    }
  11.335 -    
  11.336 -    param = req->param;
  11.337 -    pthread_mutex_lock(&pending_io_lock);
  11.338 -    pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
  11.339 -    pthread_mutex_unlock(&pending_io_lock);
  11.340 -	
  11.341 -    assert(req->cb != NULL);
  11.342 -    req->cb(ret, param);
  11.343 -    
  11.344 -}
  11.345 -
  11.346 -void *io_thread(void *param) 
  11.347 -{
  11.348 -    int tid;
  11.349 -    struct pending_io_req *req;
  11.350 -    
  11.351 -    /* Set this thread's tid. */
  11.352 -    tid = *(int *)param;
  11.353 -    free(param);
  11.354 -    
  11.355 -start:
  11.356 -    pthread_mutex_lock(&pending_io_lock);
  11.357 -    while (io_prod == io_cons) {
  11.358 -        pthread_cond_wait(&pending_io_cond, &pending_io_lock);
  11.359 -    }
  11.360 -    
  11.361 -    if (io_prod == io_cons) {
  11.362 -        /* unnecessary wakeup. */
  11.363 -        pthread_mutex_unlock(&pending_io_lock);
  11.364 -        goto start;
  11.365 -    }
  11.366 -    
  11.367 -    req = PENDING_IO_ENT(io_cons++);
  11.368 -    pthread_mutex_unlock(&pending_io_lock);
  11.369 -	
  11.370 -    do_next_io_req(req);
  11.371 -    
  11.372 -    goto start;
  11.373 -	
  11.374 -}
  11.375 -
  11.376 -static pthread_t io_pool[IO_POOL_SIZE];
  11.377 -void start_io_threads(void)
  11.378 -
  11.379 -{	
  11.380 -    int i, tid=0;
  11.381 -    
  11.382 -    for (i=0; i < IO_POOL_SIZE; i++) {
  11.383 -        int ret, *t;
  11.384 -        t = (int *)malloc(sizeof(int));
  11.385 -        *t = tid++;
  11.386 -        ret = pthread_create(&io_pool[i], NULL, io_thread, t);
  11.387 -        if (ret != 0) printf("Error starting thread %d\n", i);
  11.388 -    }
  11.389 -	
  11.390 -}
  11.391 -
  11.392 -void init_block_async(void)
  11.393 -{
  11.394 -    init_pending_io();
  11.395 -    start_io_threads();
  11.396 -}
    12.1 --- a/tools/blktap/parallax/block-async.h	Fri Jun 16 18:19:40 2006 +0100
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,69 +0,0 @@
    12.4 -/* block-async.h
    12.5 - * 
    12.6 - * Asynchronous block wrappers for parallax.
    12.7 - */
    12.8 - 
    12.9 -#ifndef _BLOCKASYNC_H_
   12.10 -#define _BLOCKASYNC_H_
   12.11 -
   12.12 -#include <assert.h>
   12.13 -#include <xenctrl.h>
   12.14 -#include "vdi.h"
   12.15 -
   12.16 -struct io_ret
   12.17 -{
   12.18 -    enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
   12.19 -    union {
   12.20 -        uint64_t   a;
   12.21 -        char *b;
   12.22 -        int   i;
   12.23 -    } u;
   12.24 -};
   12.25 -
   12.26 -typedef void (*io_cb_t)(struct io_ret r, void *param);
   12.27 -
   12.28 -/* per-vdi lock structures to make sure requests run in a safe order. */
   12.29 -struct radix_wait {
   12.30 -    enum {RLOCK, WLOCK} type;
   12.31 -    io_cb_t  cb;
   12.32 -    void    *param;
   12.33 -    struct radix_wait *next;
   12.34 -};
   12.35 -
   12.36 -struct radix_lock {
   12.37 -    pthread_mutex_t lock;
   12.38 -    int                    lines[1024];
   12.39 -    struct radix_wait     *waiters[1024];
   12.40 -    enum {ANY, READ, STOP} state[1024];
   12.41 -};
   12.42 -void radix_lock_init(struct radix_lock *r);
   12.43 -
   12.44 -void block_read(uint64_t addr, io_cb_t cb, void *param);
   12.45 -void block_write(uint64_t addr, char *block, io_cb_t cb, void *param);
   12.46 -void block_alloc(char *block, io_cb_t cb, void *param);
   12.47 -void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   12.48 -void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   12.49 -void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   12.50 -void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   12.51 -void init_block_async(void);
   12.52 -
   12.53 -static inline uint64_t IO_ADDR(struct io_ret r)
   12.54 -{
   12.55 -    assert(r.type == IO_ADDR_T);
   12.56 -    return r.u.a;
   12.57 -}
   12.58 -
   12.59 -static inline char *IO_BLOCK(struct io_ret r)
   12.60 -{
   12.61 -    assert(r.type == IO_BLOCK_T);
   12.62 -    return r.u.b;
   12.63 -}
   12.64 -
   12.65 -static inline int IO_INT(struct io_ret r)
   12.66 -{
   12.67 -    assert(r.type == IO_INT_T);
   12.68 -    return r.u.i;
   12.69 -}
   12.70 -
   12.71 -
   12.72 -#endif //_BLOCKASYNC_H_
    13.1 --- a/tools/blktap/parallax/blockstore.c	Fri Jun 16 18:19:40 2006 +0100
    13.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.3 @@ -1,1348 +0,0 @@
    13.4 -/**************************************************************************
    13.5 - * 
    13.6 - * blockstore.c
    13.7 - *
    13.8 - * Simple block store interface
    13.9 - *
   13.10 - */
   13.11 - 
   13.12 -#include <fcntl.h>
   13.13 -#include <unistd.h>
   13.14 -#include <stdio.h>
   13.15 -#include <stdlib.h>
   13.16 -#include <string.h>
   13.17 -#include <sys/types.h>
   13.18 -#include <sys/stat.h>
   13.19 -#include <sys/time.h>
   13.20 -#include <stdarg.h>
   13.21 -#include "blockstore.h"
   13.22 -#include <pthread.h>
   13.23 -
   13.24 -//#define BLOCKSTORE_REMOTE
   13.25 -//#define BSDEBUG
   13.26 -
   13.27 -#define RETRY_TIMEOUT 1000000 /* microseconds */
   13.28 -
   13.29 -/*****************************************************************************
   13.30 - * Debugging
   13.31 - */
   13.32 -#ifdef BSDEBUG
   13.33 -void DB(char *format, ...)
   13.34 -{
   13.35 -    va_list args;
   13.36 -    fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key));
   13.37 -    va_start(args, format);
   13.38 -    vfprintf(stderr, format, args);
   13.39 -    va_end(args);
   13.40 -}
   13.41 -#else
   13.42 -#define DB(format, ...) (void)0
   13.43 -#endif
   13.44 -
   13.45 -#ifdef BLOCKSTORE_REMOTE
   13.46 -
   13.47 -#include <sys/socket.h>
   13.48 -#include <sys/ioctl.h>
   13.49 -#include <netinet/in.h>
   13.50 -#include <netdb.h>
   13.51 -
   13.52 -/*****************************************************************************
   13.53 - * Network state                                                             *
   13.54 - *****************************************************************************/
   13.55 -
   13.56 -/* The individual disk servers we talks to. These will be referenced by
   13.57 - * an integer index into bsservers[].
   13.58 - */
   13.59 -bsserver_t bsservers[MAX_SERVERS];
   13.60 -
   13.61 -/* The cluster map. This is indexed by an integer cluster number.
   13.62 - */
   13.63 -bscluster_t bsclusters[MAX_CLUSTERS];
   13.64 -
   13.65 -/* Local socket.
   13.66 - */
   13.67 -struct sockaddr_in sin_local;
   13.68 -int bssock = 0;
   13.69 -
   13.70 -/*****************************************************************************
   13.71 - * Notification                                                              *
   13.72 - *****************************************************************************/
   13.73 -
   13.74 -typedef struct pool_thread_t_struct {
   13.75 -    pthread_mutex_t ptmutex;
   13.76 -    pthread_cond_t ptcv;
   13.77 -    int newdata;
   13.78 -} pool_thread_t;
   13.79 -
   13.80 -pool_thread_t pool_thread[READ_POOL_SIZE+1];
   13.81 -
   13.82 -#define RECV_NOTIFY(tid) { \
   13.83 -    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
   13.84 -    pool_thread[tid].newdata = 1; \
   13.85 -    DB("CV Waking %u", tid); \
   13.86 -    pthread_cond_signal(&(pool_thread[tid].ptcv)); \
   13.87 -    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
   13.88 -#define RECV_AWAIT(tid) { \
   13.89 -    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
   13.90 -    if (pool_thread[tid].newdata) { \
   13.91 -        pool_thread[tid].newdata = 0; \
   13.92 -        DB("CV Woken %u", tid); \
   13.93 -    } \
   13.94 -    else { \
   13.95 -        DB("CV Waiting %u", tid); \
   13.96 -        pthread_cond_wait(&(pool_thread[tid].ptcv), \
   13.97 -                          &(pool_thread[tid].ptmutex)); \
   13.98 -    } \
   13.99 -    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
  13.100 -
  13.101 -/*****************************************************************************
  13.102 - * Message queue management                                                  *
  13.103 - *****************************************************************************/
  13.104 -
  13.105 -/* Protects the queue manipulation critcal regions.
  13.106 - */
  13.107 -pthread_mutex_t ptmutex_queue;
  13.108 -#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue)
  13.109 -#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue)
  13.110 -
  13.111 -pthread_mutex_t ptmutex_recv;
  13.112 -#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv)
  13.113 -#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv)
  13.114 -
  13.115 -/* A message queue entry. We allocate one of these for every request we send.
  13.116 - * Asynchronous reply reception also used one of these.
  13.117 - */
  13.118 -typedef struct bsq_t_struct {
  13.119 -    struct bsq_t_struct *prev;
  13.120 -    struct bsq_t_struct *next;
  13.121 -    int status;
  13.122 -    int server;
  13.123 -    int length;
  13.124 -    struct msghdr msghdr;
  13.125 -    struct iovec iov[2];
  13.126 -    int tid;
  13.127 -    struct timeval tv_sent;
  13.128 -    bshdr_t message;
  13.129 -    void *block;
  13.130 -} bsq_t;
  13.131 -
  13.132 -#define BSQ_STATUS_MATCHED 1
  13.133 -
  13.134 -pthread_mutex_t ptmutex_luid;
  13.135 -#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid)
  13.136 -#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid)
  13.137 -
  13.138 -static uint64_t luid_cnt = 0x1000ULL;
  13.139 -uint64_t new_luid(void) {
  13.140 -    uint64_t luid;
  13.141 -    ENTER_LUID_CR;
  13.142 -    luid = luid_cnt++;
  13.143 -    LEAVE_LUID_CR;
  13.144 -    return luid;
  13.145 -}
  13.146 -
  13.147 -/* Queue of outstanding requests.
  13.148 - */
  13.149 -bsq_t *bs_head = NULL;
  13.150 -bsq_t *bs_tail = NULL;
  13.151 -int bs_qlen = 0;
  13.152 -
  13.153 -/*
  13.154 - */
  13.155 -void queuedebug(char *msg) {
  13.156 -    bsq_t *q;
  13.157 -    ENTER_QUEUE_CR;
  13.158 -    fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
  13.159 -    for (q = bs_head; q; q = q->next) {
  13.160 -        fprintf(stderr, "  luid=%016llx server=%u\n",
  13.161 -                q->message.luid, q->server);
  13.162 -    }
  13.163 -    LEAVE_QUEUE_CR;
  13.164 -}
  13.165 -
  13.166 -int enqueue(bsq_t *qe) {
  13.167 -    ENTER_QUEUE_CR;
  13.168 -    qe->next = NULL;
  13.169 -    qe->prev = bs_tail;
  13.170 -    if (!bs_head)
  13.171 -        bs_head = qe;
  13.172 -    else
  13.173 -        bs_tail->next = qe;
  13.174 -    bs_tail = qe;
  13.175 -    bs_qlen++;
  13.176 -    LEAVE_QUEUE_CR;
  13.177 -#ifdef BSDEBUG
  13.178 -    queuedebug("enqueue");
  13.179 -#endif
  13.180 -    return 0;
  13.181 -}
  13.182 -
  13.183 -int dequeue(bsq_t *qe) {
  13.184 -    bsq_t *q;
  13.185 -    ENTER_QUEUE_CR;
  13.186 -    for (q = bs_head; q; q = q->next) {
  13.187 -        if (q == qe) {
  13.188 -            if (q->prev)
  13.189 -                q->prev->next = q->next;
  13.190 -            else 
  13.191 -                bs_head = q->next;
  13.192 -            if (q->next)
  13.193 -                q->next->prev = q->prev;
  13.194 -            else
  13.195 -                bs_tail = q->prev;
  13.196 -            bs_qlen--;
  13.197 -            goto found;
  13.198 -        }
  13.199 -    }
  13.200 -
  13.201 -    LEAVE_QUEUE_CR;
  13.202 -#ifdef BSDEBUG
  13.203 -    queuedebug("dequeue not found");
  13.204 -#endif
  13.205 -    return 0;
  13.206 -
  13.207 -    found:
  13.208 -    LEAVE_QUEUE_CR;
  13.209 -#ifdef BSDEBUG
  13.210 -    queuedebug("dequeue not found");
  13.211 -#endif
  13.212 -    return 1;
  13.213 -}
  13.214 -
  13.215 -bsq_t *queuesearch(bsq_t *qe) {
  13.216 -    bsq_t *q;
  13.217 -    ENTER_QUEUE_CR;
  13.218 -    for (q = bs_head; q; q = q->next) {
  13.219 -        if ((qe->server == q->server) &&
  13.220 -            (qe->message.operation == q->message.operation) &&
  13.221 -            (qe->message.luid == q->message.luid)) {
  13.222 -
  13.223 -            if ((q->message.operation == BSOP_READBLOCK) &&
  13.224 -                ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
  13.225 -                q->block = qe->block;
  13.226 -                qe->block = NULL;
  13.227 -            }
  13.228 -            q->length = qe->length;
  13.229 -            q->message.flags = qe->message.flags;
  13.230 -            q->message.id = qe->message.id;
  13.231 -            q->status |= BSQ_STATUS_MATCHED;
  13.232 -
  13.233 -            if (q->prev)
  13.234 -                q->prev->next = q->next;
  13.235 -            else 
  13.236 -                bs_head = q->next;
  13.237 -            if (q->next)
  13.238 -                q->next->prev = q->prev;
  13.239 -            else
  13.240 -                bs_tail = q->prev;
  13.241 -            q->next = NULL;
  13.242 -            q->prev = NULL;
  13.243 -            bs_qlen--;
  13.244 -            goto found;
  13.245 -        }
  13.246 -    }
  13.247 -
  13.248 -    LEAVE_QUEUE_CR;
  13.249 -#ifdef BSDEBUG
  13.250 -    queuedebug("queuesearch not found");
  13.251 -#endif
  13.252 -    return NULL;
  13.253 -
  13.254 -    found:
  13.255 -    LEAVE_QUEUE_CR;
  13.256 -#ifdef BSDEBUG
  13.257 -    queuedebug("queuesearch found");
  13.258 -#endif
  13.259 -    return q;
  13.260 -}
  13.261 -
  13.262 -/*****************************************************************************
  13.263 - * Network communication                                                     *
  13.264 - *****************************************************************************/
  13.265 -
  13.266 -int send_message(bsq_t *qe) {
  13.267 -    int rc;
  13.268 -
  13.269 -    qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
  13.270 -    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
  13.271 -    qe->msghdr.msg_iov = qe->iov;
  13.272 -    if (qe->block)
  13.273 -        qe->msghdr.msg_iovlen = 2;
  13.274 -    else
  13.275 -        qe->msghdr.msg_iovlen = 1;
  13.276 -    qe->msghdr.msg_control = NULL;
  13.277 -    qe->msghdr.msg_controllen = 0;
  13.278 -    qe->msghdr.msg_flags = 0;
  13.279 -
  13.280 -    qe->iov[0].iov_base = (void *)&(qe->message);
  13.281 -    qe->iov[0].iov_len = MSGBUFSIZE_ID;
  13.282 -
  13.283 -    if (qe->block) {
  13.284 -        qe->iov[1].iov_base = qe->block;
  13.285 -        qe->iov[1].iov_len = BLOCK_SIZE;
  13.286 -    }
  13.287 -
  13.288 -    qe->message.luid = new_luid();
  13.289 -
  13.290 -    qe->status = 0;
  13.291 -    qe->tid = (int)pthread_getspecific(tid_key);
  13.292 -    if (enqueue(qe) < 0) {
  13.293 -        fprintf(stderr, "Error enqueuing request.\n");
  13.294 -        return -1;
  13.295 -    }
  13.296 -
  13.297 -    gettimeofday(&(qe->tv_sent), NULL);
  13.298 -    DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
  13.299 -    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
  13.300 -    //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
  13.301 -    //           (struct sockaddr *)&(bsservers[qe->server].sin),
  13.302 -    //           sizeof(struct sockaddr_in));
  13.303 -    if (rc < 0)
  13.304 -        return rc;
  13.305 -
  13.306 -    return rc;
  13.307 -}
  13.308 -
  13.309 -int recv_message(bsq_t *qe) {
  13.310 -    struct sockaddr_in from;
  13.311 -    //int flen = sizeof(from);
  13.312 -    int rc;
  13.313 -
  13.314 -    qe->msghdr.msg_name = &from;
  13.315 -    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
  13.316 -    qe->msghdr.msg_iov = qe->iov;
  13.317 -    if (qe->block)
  13.318 -        qe->msghdr.msg_iovlen = 2;
  13.319 -    else
  13.320 -        qe->msghdr.msg_iovlen = 1;
  13.321 -    qe->msghdr.msg_control = NULL;
  13.322 -    qe->msghdr.msg_controllen = 0;
  13.323 -    qe->msghdr.msg_flags = 0;
  13.324 -
  13.325 -    qe->iov[0].iov_base = (void *)&(qe->message);
  13.326 -    qe->iov[0].iov_len = MSGBUFSIZE_ID;
  13.327 -    if (qe->block) {
  13.328 -        qe->iov[1].iov_base = qe->block;
  13.329 -        qe->iov[1].iov_len = BLOCK_SIZE;
  13.330 -    }
  13.331 -
  13.332 -    rc = recvmsg(bssock, &(qe->msghdr), 0);
  13.333 -
  13.334 -    //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
  13.335 -    //               (struct sockaddr *)&from, &flen);
  13.336 -    return rc;
  13.337 -}
  13.338 -
  13.339 -int get_server_number(struct sockaddr_in *sin) {
  13.340 -    int i;
  13.341 -
  13.342 -#ifdef BSDEBUG2
  13.343 -    fprintf(stderr,
  13.344 -            "get_server_number(%u.%u.%u.%u/%u)\n",
  13.345 -            (unsigned int)sin->sin_addr.s_addr & 0xff,
  13.346 -            ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
  13.347 -            ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
  13.348 -            ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
  13.349 -            (unsigned int)sin->sin_port);
  13.350 -#endif
  13.351 -
  13.352 -    for (i = 0; i < MAX_SERVERS; i++) {
  13.353 -        if (bsservers[i].hostname) {
  13.354 -#ifdef BSDEBUG2
  13.355 -            fprintf(stderr,
  13.356 -                    "get_server_number check %u.%u.%u.%u/%u\n",
  13.357 -                    (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
  13.358 -                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
  13.359 -                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 16)&0xff,
  13.360 -                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 24)&0xff,
  13.361 -                    (unsigned int)bsservers[i].sin.sin_port);
  13.362 -#endif
  13.363 -            if ((sin->sin_family == bsservers[i].sin.sin_family) &&
  13.364 -                (sin->sin_port == bsservers[i].sin.sin_port) &&
  13.365 -                (memcmp((void *)&(sin->sin_addr),
  13.366 -                        (void *)&(bsservers[i].sin.sin_addr),
  13.367 -                        sizeof(struct in_addr)) == 0)) {
  13.368 -                return i;
  13.369 -            }
  13.370 -        }        
  13.371 -    }
  13.372 -
  13.373 -    return -1;
  13.374 -}
  13.375 -
  13.376 -void *rx_buffer = NULL;
  13.377 -bsq_t rx_qe;
  13.378 -bsq_t *recv_any(void) {
  13.379 -    struct sockaddr_in from;
  13.380 -    int rc;
  13.381 -    
  13.382 -    DB("ENTER recv_any\n");
  13.383 -
  13.384 -    rx_qe.msghdr.msg_name = &from;
  13.385 -    rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
  13.386 -    rx_qe.msghdr.msg_iov = rx_qe.iov;
  13.387 -    if (!rx_buffer) {
  13.388 -        rx_buffer = malloc(BLOCK_SIZE);
  13.389 -        if (!rx_buffer) {
  13.390 -            perror("recv_any malloc");
  13.391 -            return NULL;
  13.392 -        }
  13.393 -    }
  13.394 -    rx_qe.block = rx_buffer;
  13.395 -    rx_buffer = NULL;
  13.396 -    rx_qe.msghdr.msg_iovlen = 2;
  13.397 -    rx_qe.msghdr.msg_control = NULL;
  13.398 -    rx_qe.msghdr.msg_controllen = 0;
  13.399 -    rx_qe.msghdr.msg_flags = 0;
  13.400 -    
  13.401 -    rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
  13.402 -    rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
  13.403 -    rx_qe.iov[1].iov_base = rx_qe.block;
  13.404 -    rx_qe.iov[1].iov_len = BLOCK_SIZE;
  13.405 -
  13.406 -    rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
  13.407 -    if (rc < 0) {
  13.408 -        perror("recv_any");
  13.409 -        return NULL;
  13.410 -    }
  13.411 -
  13.412 -    rx_qe.length = rc;    
  13.413 -    rx_qe.server = get_server_number(&from);
  13.414 -
  13.415 -    DB("recv_any from %d luid=%016llx len=%u\n",
  13.416 -       rx_qe.server, rx_qe.message.luid, rx_qe.length);
  13.417 -
  13.418 -    return &rx_qe;
  13.419 -}
  13.420 -
  13.421 -void recv_recycle_buffer(bsq_t *q) {
  13.422 -    if (q->block) {
  13.423 -        rx_buffer = q->block;
  13.424 -        q->block = NULL;
  13.425 -    }
  13.426 -}
  13.427 -
  13.428 -// cycle through reading any incoming, searching for a match in the
  13.429 -// queue, until we have all we need.
  13.430 -int wait_recv(bsq_t **reqs, int numreqs) {
  13.431 -    bsq_t *q, *m;
  13.432 -    unsigned int x, i;
  13.433 -    int tid = (int)pthread_getspecific(tid_key);
  13.434 -
  13.435 -    DB("ENTER wait_recv %u\n", numreqs);
  13.436 -
  13.437 -    checkmatch:
  13.438 -    x = 0xffffffff;
  13.439 -    for (i = 0; i < numreqs; i++) {
  13.440 -        x &= reqs[i]->status;
  13.441 -    }
  13.442 -    if ((x & BSQ_STATUS_MATCHED)) {
  13.443 -        DB("LEAVE wait_recv\n");
  13.444 -        return numreqs;
  13.445 -    }
  13.446 -
  13.447 -    RECV_AWAIT(tid);
  13.448 -
  13.449 -    /*
  13.450 -    rxagain:
  13.451 -    ENTER_RECV_CR;
  13.452 -    q = recv_any();
  13.453 -    LEAVE_RECV_CR;
  13.454 -    if (!q)
  13.455 -        return -1;
  13.456 -
  13.457 -    m = queuesearch(q);
  13.458 -    recv_recycle_buffer(q);
  13.459 -    if (!m) {
  13.460 -        fprintf(stderr, "Unmatched RX\n");
  13.461 -        goto rxagain;
  13.462 -    }
  13.463 -    */
  13.464 -
  13.465 -    goto checkmatch;
  13.466 -
  13.467 -}
  13.468 -
  13.469 -/* retry
  13.470 - */
  13.471 -static int retry_count = 0;
  13.472 -int retry(bsq_t *qe)
  13.473 -{
  13.474 -    int rc;
  13.475 -    gettimeofday(&(qe->tv_sent), NULL);
  13.476 -    DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid);
  13.477 -    retry_count++;
  13.478 -    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
  13.479 -    if (rc < 0)
  13.480 -        return rc;
  13.481 -    return 0;
  13.482 -}
  13.483 -
  13.484 -/* queue runner
  13.485 - */
  13.486 -void *queue_runner(void *arg)
  13.487 -{
  13.488 -    for (;;) {
  13.489 -        struct timeval now;
  13.490 -        long long nowus, sus;
  13.491 -        bsq_t *q;
  13.492 -        int r;
  13.493 -
  13.494 -        sleep(1);
  13.495 -
  13.496 -        gettimeofday(&now, NULL);
  13.497 -        nowus = now.tv_usec + now.tv_sec * 1000000;
  13.498 -        ENTER_QUEUE_CR;
  13.499 -        r = retry_count;
  13.500 -        for (q = bs_head; q; q = q->next) {
  13.501 -            sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000;
  13.502 -            if ((nowus - sus) > RETRY_TIMEOUT) {
  13.503 -                if (retry(q) < 0) {
  13.504 -                    fprintf(stderr, "Error on sendmsg retry.\n");
  13.505 -                }
  13.506 -            }
  13.507 -        }
  13.508 -        if (r != retry_count) {
  13.509 -            fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count);
  13.510 -        }
  13.511 -        LEAVE_QUEUE_CR;
  13.512 -    }
  13.513 -}
  13.514 -
  13.515 -/* receive loop
  13.516 - */
  13.517 -void *receive_loop(void *arg)
  13.518 -{
  13.519 -    bsq_t *q, *m;
  13.520 -
  13.521 -    for(;;) {
  13.522 -        q = recv_any();
  13.523 -        if (!q) {
  13.524 -            fprintf(stderr, "recv_any error\n");
  13.525 -        }
  13.526 -        else {
  13.527 -            m = queuesearch(q);
  13.528 -            recv_recycle_buffer(q);
  13.529 -            if (!m) {
  13.530 -                fprintf(stderr, "Unmatched RX\n");
  13.531 -            }
  13.532 -            else {
  13.533 -                DB("RX MATCH");
  13.534 -                RECV_NOTIFY(m->tid);
  13.535 -            }
  13.536 -        }
  13.537 -    }
  13.538 -}
  13.539 -pthread_t pthread_recv;
  13.540 -
  13.541 -/*****************************************************************************
  13.542 - * Reading                                                                   *
  13.543 - *****************************************************************************/
  13.544 -
  13.545 -void *readblock_indiv(int server, uint64_t id) {
  13.546 -    void *block;
  13.547 -    bsq_t *qe;
  13.548 -    int len, rc;
  13.549 -
  13.550 -    qe = (bsq_t *)malloc(sizeof(bsq_t));
  13.551 -    if (!qe) {
  13.552 -        perror("readblock qe malloc");
  13.553 -        return NULL;
  13.554 -    }
  13.555 -    qe->block = NULL;
  13.556 -    
  13.557 -    /*
  13.558 -    qe->block = malloc(BLOCK_SIZE);
  13.559 -    if (!qe->block) {
  13.560 -        perror("readblock qe malloc");
  13.561 -        free((void *)qe);
  13.562 -        return NULL;
  13.563 -    }
  13.564 -    */
  13.565 -
  13.566 -    qe->server = server;
  13.567 -
  13.568 -    qe->message.operation = BSOP_READBLOCK;
  13.569 -    qe->message.flags = 0;
  13.570 -    qe->message.id = id;
  13.571 -    qe->length = MSGBUFSIZE_ID;
  13.572 -
  13.573 -    if (send_message(qe) < 0) {
  13.574 -        perror("readblock sendto");
  13.575 -        goto err;
  13.576 -    }
  13.577 -    
  13.578 -    /*len = recv_message(qe);
  13.579 -    if (len < 0) {
  13.580 -        perror("readblock recv");
  13.581 -        goto err;
  13.582 -    }*/
  13.583 -
  13.584 -    rc = wait_recv(&qe, 1);
  13.585 -    if (rc < 0) {
  13.586 -        perror("readblock recv");
  13.587 -        goto err;
  13.588 -    }
  13.589 -
  13.590 -    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
  13.591 -        fprintf(stderr, "readblock server error\n");
  13.592 -        goto err;
  13.593 -    }
  13.594 -    if (qe->length < MSGBUFSIZE_BLOCK) {
  13.595 -        fprintf(stderr, "readblock recv short (%u)\n", len);
  13.596 -        goto err;
  13.597 -    }
  13.598 -    /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
  13.599 -        perror("readblock malloc");
  13.600 -        goto err;
  13.601 -    }
  13.602 -    memcpy(block, qe->message.block, BLOCK_SIZE);
  13.603 -    */    
  13.604 -    block = qe->block;
  13.605 -
  13.606 -    free((void *)qe);
  13.607 -    return block;
  13.608 -
  13.609 -    err:
  13.610 -    free(qe->block);
  13.611 -    free((void *)qe);
  13.612 -    return NULL;
  13.613 -}
  13.614 -
  13.615 -/**
  13.616 - * readblock: read a block from disk
  13.617 - *   @id: block id to read
  13.618 - *
  13.619 - *   @return: pointer to block, NULL on error
  13.620 - */
  13.621 -void *readblock(uint64_t id) {
  13.622 -    int map = (int)BSID_MAP(id);
  13.623 -    uint64_t xid;
  13.624 -    static int i = CLUSTER_MAX_REPLICAS - 1;
  13.625 -    void *block = NULL;
  13.626 -
  13.627 -    /* special case for the "superblock" just use the first block on the
  13.628 -     * first replica. (extend to blocks < 6 for vdi bug)
  13.629 -     */
  13.630 -    if (id < 6) {
  13.631 -        block = readblock_indiv(bsclusters[map].servers[0], id);
  13.632 -        goto out;
  13.633 -    }
  13.634 -
  13.635 -    i++;
  13.636 -    if (i >= CLUSTER_MAX_REPLICAS)
  13.637 -        i = 0;
  13.638 -    switch (i) {
  13.639 -    case 0:
  13.640 -        xid = BSID_REPLICA0(id);
  13.641 -        break;
  13.642 -    case 1:
  13.643 -        xid = BSID_REPLICA1(id);
  13.644 -        break;
  13.645 -    case 2:
  13.646 -        xid = BSID_REPLICA2(id);
  13.647 -        break;
  13.648 -    }
  13.649 -    
  13.650 -    block = readblock_indiv(bsclusters[map].servers[i], xid);
  13.651 -
  13.652 -    out:
  13.653 -#ifdef BSDEBUG
  13.654 -    if (block)
  13.655 -        fprintf(stderr, "READ:  %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
  13.656 -                id,
  13.657 -                (unsigned int)((unsigned char *)block)[0],
  13.658 -                (unsigned int)((unsigned char *)block)[1],
  13.659 -                (unsigned int)((unsigned char *)block)[2],
  13.660 -                (unsigned int)((unsigned char *)block)[3],
  13.661 -                (unsigned int)((unsigned char *)block)[4],
  13.662 -                (unsigned int)((unsigned char *)block)[5],
  13.663 -                (unsigned int)((unsigned char *)block)[6],
  13.664 -                (unsigned int)((unsigned char *)block)[7]);
  13.665 -    else
  13.666 -        fprintf(stderr, "READ:  %016llx NULL\n", id);
  13.667 -#endif
  13.668 -    return block;
  13.669 -}
  13.670 -
  13.671 -/*****************************************************************************
  13.672 - * Writing                                                                   *
  13.673 - *****************************************************************************/
  13.674 -
  13.675 -bsq_t *writeblock_indiv(int server, uint64_t id, void *block) {
  13.676 -
  13.677 -    bsq_t *qe;
  13.678 -    int len;
  13.679 -
  13.680 -    qe = (bsq_t *)malloc(sizeof(bsq_t));
  13.681 -    if (!qe) {
  13.682 -        perror("writeblock qe malloc");
  13.683 -        goto err;
  13.684 -    }
  13.685 -    qe->server = server;
  13.686 -
  13.687 -    qe->message.operation = BSOP_WRITEBLOCK;
  13.688 -    qe->message.flags = 0;
  13.689 -    qe->message.id = id;
  13.690 -    //memcpy(qe->message.block, block, BLOCK_SIZE);
  13.691 -    qe->block = block;
  13.692 -    qe->length = MSGBUFSIZE_BLOCK;
  13.693 -
  13.694 -    if (send_message(qe) < 0) {
  13.695 -        perror("writeblock sendto");
  13.696 -        goto err;
  13.697 -    }
  13.698 -
  13.699 -    return qe;
  13.700 -
  13.701 -    err:
  13.702 -    free((void *)qe);
  13.703 -    return NULL;
  13.704 -}
  13.705 -    
  13.706 -
  13.707 -/**
  13.708 - * writeblock: write an existing block to disk
  13.709 - *   @id: block id
  13.710 - *   @block: pointer to block
  13.711 - *
  13.712 - *   @return: zero on success, -1 on failure
  13.713 - */
  13.714 -int writeblock(uint64_t id, void *block) {
  13.715 -    
  13.716 -    int map = (int)BSID_MAP(id);
  13.717 -    int rep0 = bsclusters[map].servers[0];
  13.718 -    int rep1 = bsclusters[map].servers[1];
  13.719 -    int rep2 = bsclusters[map].servers[2];
  13.720 -    bsq_t *reqs[3];
  13.721 -    int rc;
  13.722 -
  13.723 -    reqs[0] = reqs[1] = reqs[2] = NULL;
  13.724 -
  13.725 -#ifdef BSDEBUG
  13.726 -    fprintf(stderr,
  13.727 -            "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
  13.728 -            id,
  13.729 -            (unsigned int)((unsigned char *)block)[0],
  13.730 -            (unsigned int)((unsigned char *)block)[1],
  13.731 -            (unsigned int)((unsigned char *)block)[2],
  13.732 -            (unsigned int)((unsigned char *)block)[3],
  13.733 -            (unsigned int)((unsigned char *)block)[4],
  13.734 -            (unsigned int)((unsigned char *)block)[5],
  13.735 -            (unsigned int)((unsigned char *)block)[6],
  13.736 -            (unsigned int)((unsigned char *)block)[7]);
  13.737 -#endif
  13.738 -
  13.739 -    /* special case for the "superblock" just use the first block on the
  13.740 -     * first replica. (extend to blocks < 6 for vdi bug)
  13.741 -     */
  13.742 -    if (id < 6) {
  13.743 -        reqs[0] = writeblock_indiv(rep0, id, block);
  13.744 -        if (!reqs[0])
  13.745 -            return -1;
  13.746 -        rc = wait_recv(reqs, 1);
  13.747 -        return rc;
  13.748 -    }
  13.749 -
  13.750 -    reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
  13.751 -    if (!reqs[0])
  13.752 -        goto err;
  13.753 -    reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
  13.754 -    if (!reqs[1])
  13.755 -        goto err;
  13.756 -    reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
  13.757 -    if (!reqs[2])
  13.758 -        goto err;
  13.759 -
  13.760 -    rc = wait_recv(reqs, 3);
  13.761 -    if (rc < 0) {
  13.762 -        perror("writeblock recv");
  13.763 -        goto err;
  13.764 -    }
  13.765 -    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
  13.766 -        fprintf(stderr, "writeblock server0 error\n");
  13.767 -        goto err;
  13.768 -    }
  13.769 -    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
  13.770 -        fprintf(stderr, "writeblock server1 error\n");
  13.771 -        goto err;
  13.772 -    }
  13.773 -    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
  13.774 -        fprintf(stderr, "writeblock server2 error\n");
  13.775 -        goto err;
  13.776 -    }
  13.777 -
  13.778 -
  13.779 -    free((void *)reqs[0]);
  13.780 -    free((void *)reqs[1]);
  13.781 -    free((void *)reqs[2]);
  13.782 -    return 0;
  13.783 -
  13.784 -    err:
  13.785 -    if (reqs[0]) {
  13.786 -        dequeue(reqs[0]);
  13.787 -        free((void *)reqs[0]);
  13.788 -    }
  13.789 -    if (reqs[1]) {
  13.790 -        dequeue(reqs[1]);
  13.791 -        free((void *)reqs[1]);
  13.792 -    }
  13.793 -    if (reqs[2]) {
  13.794 -        dequeue(reqs[2]);
  13.795 -        free((void *)reqs[2]);
  13.796 -    }
  13.797 -    return -1;
  13.798 -}
  13.799 -
  13.800 -/*****************************************************************************
  13.801 - * Allocation                                                                *
  13.802 - *****************************************************************************/
  13.803 -
  13.804 -/**
  13.805 - * allocblock: write a new block to disk
  13.806 - *   @block: pointer to block
  13.807 - *
  13.808 - *   @return: new id of block on disk
  13.809 - */
  13.810 -uint64_t allocblock(void *block) {
  13.811 -    return allocblock_hint(block, 0);
  13.812 -}
  13.813 -
  13.814 -bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) {
  13.815 -    bsq_t *qe;
  13.816 -    int len;
  13.817 -
  13.818 -    qe = (bsq_t *)malloc(sizeof(bsq_t));
  13.819 -    if (!qe) {
  13.820 -        perror("allocblock_hint qe malloc");
  13.821 -        goto err;
  13.822 -    }
  13.823 -    qe->server = server;
  13.824 -
  13.825 -    qe->message.operation = BSOP_ALLOCBLOCK;
  13.826 -    qe->message.flags = 0;
  13.827 -    qe->message.id = hint;
  13.828 -    //memcpy(qe->message.block, block, BLOCK_SIZE);
  13.829 -    qe->block = block;
  13.830 -    qe->length = MSGBUFSIZE_BLOCK;
  13.831 -
  13.832 -    if (send_message(qe) < 0) {
  13.833 -        perror("allocblock_hint sendto");
  13.834 -        goto err;
  13.835 -    }
  13.836 -    
  13.837 -    return qe;
  13.838 -
  13.839 -    err:
  13.840 -    free((void *)qe);
  13.841 -    return NULL;
  13.842 -}
  13.843 -
  13.844 -/**
  13.845 - * allocblock_hint: write a new block to disk
  13.846 - *   @block: pointer to block
  13.847 - *   @hint: allocation hint
  13.848 - *
  13.849 - *   @return: new id of block on disk
  13.850 - */
  13.851 -uint64_t allocblock_hint(void *block, uint64_t hint) {
  13.852 -    int map = (int)hint;
  13.853 -    int rep0 = bsclusters[map].servers[0];
  13.854 -    int rep1 = bsclusters[map].servers[1];
  13.855 -    int rep2 = bsclusters[map].servers[2];
  13.856 -    bsq_t *reqs[3];
  13.857 -    int rc;
  13.858 -    uint64_t id0, id1, id2;
  13.859 -
  13.860 -    reqs[0] = reqs[1] = reqs[2] = NULL;
  13.861 -
  13.862 -    DB("ENTER allocblock\n");
  13.863 -
  13.864 -    reqs[0] = allocblock_hint_indiv(rep0, block, hint);
  13.865 -    if (!reqs[0])
  13.866 -        goto err;
  13.867 -    reqs[1] = allocblock_hint_indiv(rep1, block, hint);
  13.868 -    if (!reqs[1])
  13.869 -        goto err;
  13.870 -    reqs[2] = allocblock_hint_indiv(rep2, block, hint);
  13.871 -    if (!reqs[2])
  13.872 -        goto err;
  13.873 -
  13.874 -    rc = wait_recv(reqs, 3);
  13.875 -    if (rc < 0) {
  13.876 -        perror("allocblock recv");
  13.877 -        goto err;
  13.878 -    }
  13.879 -    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
  13.880 -        fprintf(stderr, "allocblock server0 error\n");
  13.881 -        goto err;
  13.882 -    }
  13.883 -    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
  13.884 -        fprintf(stderr, "allocblock server1 error\n");
  13.885 -        goto err;
  13.886 -    }
  13.887 -    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
  13.888 -        fprintf(stderr, "allocblock server2 error\n");
  13.889 -        goto err;
  13.890 -    }
  13.891 -
  13.892 -    id0 = reqs[0]->message.id;
  13.893 -    id1 = reqs[1]->message.id;
  13.894 -    id2 = reqs[2]->message.id;
  13.895 -
  13.896 -#ifdef BSDEBUG
  13.897 -    fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
  13.898 -            BSID(map, id0, id1, id2),
  13.899 -            (unsigned int)((unsigned char *)block)[0],
  13.900 -            (unsigned int)((unsigned char *)block)[1],
  13.901 -            (unsigned int)((unsigned char *)block)[2],
  13.902 -            (unsigned int)((unsigned char *)block)[3],
  13.903 -            (unsigned int)((unsigned char *)block)[4],
  13.904 -            (unsigned int)((unsigned char *)block)[5],
  13.905 -            (unsigned int)((unsigned char *)block)[6],
  13.906 -            (unsigned int)((unsigned char *)block)[7]);
  13.907 -#endif
  13.908 -    
  13.909 -    free((void *)reqs[0]);
  13.910 -    free((void *)reqs[1]);
  13.911 -    free((void *)reqs[2]);
  13.912 -    return BSID(map, id0, id1, id2);
  13.913 -
  13.914 -    err:
  13.915 -    if (reqs[0]) {
  13.916 -        dequeue(reqs[0]);
  13.917 -        free((void *)reqs[0]);
  13.918 -    }
  13.919 -    if (reqs[1]) {
  13.920 -        dequeue(reqs[1]);
  13.921 -        free((void *)reqs[1]);
  13.922 -    }
  13.923 -    if (reqs[2]) {
  13.924 -        dequeue(reqs[2]);
  13.925 -        free((void *)reqs[2]);
  13.926 -    }
  13.927 -    return 0;
  13.928 -}
  13.929 -
  13.930 -#else /* /BLOCKSTORE_REMOTE */
  13.931 -
  13.932 -/*****************************************************************************
  13.933 - * Local storage version                                                     *
  13.934 - *****************************************************************************/
  13.935 - 
  13.936 -/**
  13.937 - * readblock: read a block from disk
  13.938 - *   @id: block id to read
  13.939 - *
  13.940 - *   @return: pointer to block, NULL on error
  13.941 - */
  13.942 -
  13.943 -void *readblock(uint64_t id) {
  13.944 -    void *block;
  13.945 -    int block_fp;
  13.946 -   
  13.947 -//printf("readblock(%llu)\n", id); 
  13.948 -    block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
  13.949 -
  13.950 -    if (block_fp < 0) {
  13.951 -        perror("open");
  13.952 -        return NULL;
  13.953 -    }
  13.954 -    
  13.955 -    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  13.956 -        printf ("%Ld ", id);
  13.957 -        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
  13.958 -        perror("readblock lseek");
  13.959 -        goto err;
  13.960 -    }
  13.961 -    if ((block = malloc(BLOCK_SIZE)) == NULL) {
  13.962 -        perror("readblock malloc");
  13.963 -        goto err;
  13.964 -    }
  13.965 -    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
  13.966 -        perror("readblock read");
  13.967 -        free(block);
  13.968 -        goto err;
  13.969 -    }
  13.970 -    close(block_fp);
  13.971 -    return block;
  13.972 -    
  13.973 -err:
  13.974 -    close(block_fp);
  13.975 -    return NULL;
  13.976 -}
  13.977 -
  13.978 -/**
  13.979 - * writeblock: write an existing block to disk
  13.980 - *   @id: block id
  13.981 - *   @block: pointer to block
  13.982 - *
  13.983 - *   @return: zero on success, -1 on failure
  13.984 - */
  13.985 -int writeblock(uint64_t id, void *block) {
  13.986 -    
  13.987 -    int block_fp;
  13.988 -    
  13.989 -    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
  13.990 -
  13.991 -    if (block_fp < 0) {
  13.992 -        perror("open");
  13.993 -        return -1;
  13.994 -    }
  13.995 -
  13.996 -    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  13.997 -        perror("writeblock lseek");
  13.998 -        goto err;
  13.999 -    }
 13.1000 -    if (write(block_fp, block, BLOCK_SIZE) < 0) {
 13.1001 -        perror("writeblock write");
 13.1002 -        goto err;
 13.1003 -    }
 13.1004 -    close(block_fp);
 13.1005 -    return 0;
 13.1006 -
 13.1007 -err:
 13.1008 -    close(block_fp);
 13.1009 -    return -1;
 13.1010 -}
 13.1011 -
 13.1012 -/**
 13.1013 - * allocblock: write a new block to disk
 13.1014 - *   @block: pointer to block
 13.1015 - *
 13.1016 - *   @return: new id of block on disk
 13.1017 - */
 13.1018 -
 13.1019 -uint64_t allocblock(void *block) {
 13.1020 -    uint64_t lb;
 13.1021 -    off64_t pos;
 13.1022 -    int block_fp;
 13.1023 -    
 13.1024 -    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
 13.1025 -
 13.1026 -    if (block_fp < 0) {
 13.1027 -        perror("open");
 13.1028 -        return 0;
 13.1029 -    }
 13.1030 -
 13.1031 -    pos = lseek64(block_fp, 0, SEEK_END);
 13.1032 -    if (pos == (off64_t)-1) {
 13.1033 -        perror("allocblock lseek");
 13.1034 -        goto err;
 13.1035 -    }
 13.1036 -    if (pos % BLOCK_SIZE != 0) {
 13.1037 -        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
 13.1038 -        goto err;
 13.1039 -    }
 13.1040 -    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
 13.1041 -        perror("allocblock write");
 13.1042 -        goto err;
 13.1043 -    }
 13.1044 -    lb = pos / BLOCK_SIZE + 1;
 13.1045 -//printf("alloc(%Ld)\n", lb);
 13.1046 -    close(block_fp);
 13.1047 -    return lb;
 13.1048 -    
 13.1049 -err:
 13.1050 -    close(block_fp);
 13.1051 -    return 0;
 13.1052 -    
 13.1053 -}
 13.1054 -
 13.1055 -/**
 13.1056 - * allocblock_hint: write a new block to disk
 13.1057 - *   @block: pointer to block
 13.1058 - *   @hint: allocation hint
 13.1059 - *
 13.1060 - *   @return: new id of block on disk
 13.1061 - */
 13.1062 -uint64_t allocblock_hint(void *block, uint64_t hint) {
 13.1063 -    return allocblock(block);
 13.1064 -}
 13.1065 -
 13.1066 -#endif /* BLOCKSTORE_REMOTE */
 13.1067 -
 13.1068 -/*****************************************************************************
 13.1069 - * Memory management                                                         *
 13.1070 - *****************************************************************************/
 13.1071 -
 13.1072 -/**
 13.1073 - * newblock: get a new in-memory block set to zeros
 13.1074 - *
 13.1075 - *   @return: pointer to new block, NULL on error
 13.1076 - */
 13.1077 -void *newblock(void) {
 13.1078 -    void *block = malloc(BLOCK_SIZE);
 13.1079 -    if (block == NULL) {
 13.1080 -        perror("newblock");
 13.1081 -        return NULL;
 13.1082 -    }
 13.1083 -    memset(block, 0, BLOCK_SIZE);
 13.1084 -    return block;
 13.1085 -}
 13.1086 -
 13.1087 -
 13.1088 -/**
 13.1089 - * freeblock: unallocate an in-memory block
 13.1090 - *   @id: block id (zero if this is only in-memory)
 13.1091 - *   @block: block to be freed
 13.1092 - */
 13.1093 -void freeblock(void *block) {
 13.1094 -        free(block);
 13.1095 -}
 13.1096 -
 13.1097 -static freeblock_t *new_freeblock(void)
 13.1098 -{
 13.1099 -    freeblock_t *fb;
 13.1100 -    
 13.1101 -    fb = newblock();
 13.1102 -    
 13.1103 -    if (fb == NULL) return NULL;
 13.1104 -    
 13.1105 -    fb->magic = FREEBLOCK_MAGIC;
 13.1106 -    fb->next  = 0ULL;
 13.1107 -    fb->count = 0ULL;
 13.1108 -    memset(fb->list, 0, sizeof fb->list);
 13.1109 -    
 13.1110 -    return fb;
 13.1111 -}
 13.1112 -
 13.1113 -void releaseblock(uint64_t id)
 13.1114 -{
 13.1115 -    blockstore_super_t *bs_super;
 13.1116 -    freeblock_t *fl_current;
 13.1117 -    
 13.1118 -    /* get superblock */
 13.1119 -    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
 13.1120 -    
 13.1121 -    /* get freeblock_current */
 13.1122 -    if (bs_super->freelist_current == 0ULL) 
 13.1123 -    {
 13.1124 -        fl_current = new_freeblock();
 13.1125 -        bs_super->freelist_current = allocblock(fl_current);
 13.1126 -        writeblock(BLOCKSTORE_SUPER, bs_super);
 13.1127 -    } else {
 13.1128 -        fl_current = readblock(bs_super->freelist_current);
 13.1129 -    }
 13.1130 -    
 13.1131 -    /* if full, chain to superblock and allocate new current */
 13.1132 -    
 13.1133 -    if (fl_current->count == FREEBLOCK_SIZE) {
 13.1134 -        fl_current->next = bs_super->freelist_full;
 13.1135 -        writeblock(bs_super->freelist_current, fl_current);
 13.1136 -        bs_super->freelist_full = bs_super->freelist_current;
 13.1137 -        freeblock(fl_current);
 13.1138 -        fl_current = new_freeblock();
 13.1139 -        bs_super->freelist_current = allocblock(fl_current);
 13.1140 -        writeblock(BLOCKSTORE_SUPER, bs_super);
 13.1141 -    }
 13.1142 -    
 13.1143 -    /* append id to current */
 13.1144 -    fl_current->list[fl_current->count++] = id;
 13.1145 -    writeblock(bs_super->freelist_current, fl_current);
 13.1146 -    
 13.1147 -    freeblock(fl_current);
 13.1148 -    freeblock(bs_super);
 13.1149 -    
 13.1150 -    
 13.1151 -}
 13.1152 -
 13.1153 -/* freelist debug functions: */
 13.1154 -void freelist_count(int print_each)
 13.1155 -{
 13.1156 -    blockstore_super_t *bs_super;
 13.1157 -    freeblock_t *fb;
 13.1158 -    uint64_t total = 0, next;
 13.1159 -    
 13.1160 -    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
 13.1161 -    
 13.1162 -    if (bs_super->freelist_current == 0ULL) {
 13.1163 -        printf("freelist is empty!\n");
 13.1164 -        return;
 13.1165 -    }
 13.1166 -    
 13.1167 -    fb = readblock(bs_super->freelist_current);
 13.1168 -    printf("%Ld entires on current.\n", fb->count);
 13.1169 -    total += fb->count;
 13.1170 -    if (print_each == 1)
 13.1171 -    {
 13.1172 -        int i;
 13.1173 -        for (i=0; i< fb->count; i++)
 13.1174 -            printf("  %Ld\n", fb->list[i]);
 13.1175 -    }
 13.1176 -    
 13.1177 -    freeblock(fb);
 13.1178 -    
 13.1179 -    if (bs_super->freelist_full == 0ULL) {
 13.1180 -        printf("freelist_full is empty!\n");
 13.1181 -        return;
 13.1182 -    }
 13.1183 -    
 13.1184 -    next = bs_super->freelist_full;
 13.1185 -    for (;;) {
 13.1186 -        fb = readblock(next);
 13.1187 -        total += fb->count;
 13.1188 -        if (print_each == 1)
 13.1189 -        {
 13.1190 -            int i;
 13.1191 -            for (i=0; i< fb->count; i++)
 13.1192 -                printf("  %Ld\n", fb->list[i]);
 13.1193 -        }
 13.1194 -        next = fb->next;
 13.1195 -        freeblock(fb);
 13.1196 -        if (next == 0ULL) break;
 13.1197 -    }
 13.1198 -    printf("Total of %Ld ids on freelist.\n", total);
 13.1199 -}
 13.1200 -
 13.1201 -/*****************************************************************************
 13.1202 - * Initialisation                                                            *
 13.1203 - *****************************************************************************/
 13.1204 -
 13.1205 -int __init_blockstore(void)
 13.1206 -{
 13.1207 -    int i;
 13.1208 -    blockstore_super_t *bs_super;
 13.1209 -    uint64_t ret;
 13.1210 -    int block_fp;
 13.1211 -    
 13.1212 -#ifdef BLOCKSTORE_REMOTE
 13.1213 -    struct hostent *addr;
 13.1214 -
 13.1215 -    pthread_mutex_init(&ptmutex_queue, NULL);
 13.1216 -    pthread_mutex_init(&ptmutex_luid, NULL);
 13.1217 -    pthread_mutex_init(&ptmutex_recv, NULL);
 13.1218 -    /*pthread_mutex_init(&ptmutex_notify, NULL);*/
 13.1219 -    for (i = 0; i <= READ_POOL_SIZE; i++) {
 13.1220 -        pool_thread[i].newdata = 0;
 13.1221 -        pthread_mutex_init(&(pool_thread[i].ptmutex), NULL);
 13.1222 -        pthread_cond_init(&(pool_thread[i].ptcv), NULL);
 13.1223 -    }
 13.1224 -
 13.1225 -    bsservers[0].hostname = "firebug.cl.cam.ac.uk";
 13.1226 -    bsservers[1].hostname = "planb.cl.cam.ac.uk";
 13.1227 -    bsservers[2].hostname = "simcity.cl.cam.ac.uk";
 13.1228 -    bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
 13.1229 -    bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
 13.1230 -    bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
 13.1231 -    bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
 13.1232 -    bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
 13.1233 -    bsservers[8].hostname = NULL;
 13.1234 -    bsservers[9].hostname = NULL;
 13.1235 -    bsservers[10].hostname = NULL;
 13.1236 -    bsservers[11].hostname = NULL;
 13.1237 -    bsservers[12].hostname = NULL;
 13.1238 -    bsservers[13].hostname = NULL;
 13.1239 -    bsservers[14].hostname = NULL;
 13.1240 -    bsservers[15].hostname = NULL;
 13.1241 -
 13.1242 -    for (i = 0; i < MAX_SERVERS; i++) {
 13.1243 -        if (!bsservers[i].hostname)
 13.1244 -            continue;
 13.1245 -        addr = gethostbyname(bsservers[i].hostname);
 13.1246 -        if (!addr) {
 13.1247 -            perror("bad hostname");
 13.1248 -            return -1;
 13.1249 -        }
 13.1250 -        bsservers[i].sin.sin_family = addr->h_addrtype;
 13.1251 -        bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
 13.1252 -        bsservers[i].sin.sin_addr.s_addr = 
 13.1253 -            ((struct in_addr *)(addr->h_addr))->s_addr;
 13.1254 -    }
 13.1255 -
 13.1256 -    /* Cluster map
 13.1257 -     */
 13.1258 -    bsclusters[0].servers[0] = 0;
 13.1259 -    bsclusters[0].servers[1] = 1;
 13.1260 -    bsclusters[0].servers[2] = 2;
 13.1261 -    bsclusters[1].servers[0] = 1;
 13.1262 -    bsclusters[1].servers[1] = 2;
 13.1263 -    bsclusters[1].servers[2] = 3;
 13.1264 -    bsclusters[2].servers[0] = 2;
 13.1265 -    bsclusters[2].servers[1] = 3;
 13.1266 -    bsclusters[2].servers[2] = 4;
 13.1267 -    bsclusters[3].servers[0] = 3;
 13.1268 -    bsclusters[3].servers[1] = 4;
 13.1269 -    bsclusters[3].servers[2] = 5;
 13.1270 -    bsclusters[4].servers[0] = 4;
 13.1271 -    bsclusters[4].servers[1] = 5;
 13.1272 -    bsclusters[4].servers[2] = 6;
 13.1273 -    bsclusters[5].servers[0] = 5;
 13.1274 -    bsclusters[5].servers[1] = 6;
 13.1275 -    bsclusters[5].servers[2] = 7;
 13.1276 -    bsclusters[6].servers[0] = 6;
 13.1277 -    bsclusters[6].servers[1] = 7;
 13.1278 -    bsclusters[6].servers[2] = 0;
 13.1279 -    bsclusters[7].servers[0] = 7;
 13.1280 -    bsclusters[7].servers[1] = 0;
 13.1281 -    bsclusters[7].servers[2] = 1;
 13.1282 -
 13.1283 -    /* Local socket set up
 13.1284 -     */
 13.1285 -    bssock = socket(AF_INET, SOCK_DGRAM, 0);
 13.1286 -    if (bssock < 0) {
 13.1287 -        perror("Bad socket");
 13.1288 -        return -1;
 13.1289 -    }
 13.1290 -    memset(&sin_local, 0, sizeof(sin_local));
 13.1291 -    sin_local.sin_family = AF_INET;
 13.1292 -    sin_local.sin_port = htons(BLOCKSTORED_PORT);
 13.1293 -    sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
 13.1294 -    if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
 13.1295 -        perror("bind");
 13.1296 -        close(bssock);
 13.1297 -        return -1;
 13.1298 -    }
 13.1299 -
 13.1300 -    pthread_create(&pthread_recv, NULL, receive_loop, NULL);
 13.1301 -    pthread_create(&pthread_recv, NULL, queue_runner, NULL);
 13.1302 -
 13.1303 -#else /* /BLOCKSTORE_REMOTE */
 13.1304 -    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
 13.1305 -
 13.1306 -    if (block_fp < 0) {
 13.1307 -        perror("open");
 13.1308 -        return -1;
 13.1309 -        exit(-1);
 13.1310 -    }
 13.1311 -    
 13.1312 -    if (lseek(block_fp, 0, SEEK_END) == 0) {
 13.1313 -        bs_super = newblock();
 13.1314 -        bs_super->magic            = BLOCKSTORE_MAGIC;
 13.1315 -        bs_super->freelist_full    = 0LL;
 13.1316 -        bs_super->freelist_current = 0LL;
 13.1317 -        
 13.1318 -        ret = allocblock(bs_super);
 13.1319 -        
 13.1320 -        freeblock(bs_super);
 13.1321 -    } else {
 13.1322 -        bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
 13.1323 -        if (bs_super->magic != BLOCKSTORE_MAGIC)
 13.1324 -        {
 13.1325 -            printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
 13.1326 -            exit(-1);
 13.1327 -        }
 13.1328 -        freeblock(bs_super);
 13.1329 -    }
 13.1330 -        
 13.1331 -    close(block_fp);
 13.1332 -        
 13.1333 -#endif /*  BLOCKSTORE_REMOTE */   
 13.1334 -    return 0;
 13.1335 -}
 13.1336 -
 13.1337 -void __exit_blockstore(void)
 13.1338 -{
 13.1339 -    int i;
 13.1340 -#ifdef BLOCKSTORE_REMOTE
 13.1341 -    pthread_mutex_destroy(&ptmutex_recv);
 13.1342 -    pthread_mutex_destroy(&ptmutex_luid);
 13.1343 -    pthread_mutex_destroy(&ptmutex_queue);
 13.1344 -    /*pthread_mutex_destroy(&ptmutex_notify);
 13.1345 -      pthread_cond_destroy(&ptcv_notify);*/
 13.1346 -    for (i = 0; i <= READ_POOL_SIZE; i++) {
 13.1347 -        pthread_mutex_destroy(&(pool_thread[i].ptmutex));
 13.1348 -        pthread_cond_destroy(&(pool_thread[i].ptcv));
 13.1349 -    }
 13.1350 -#endif
 13.1351 -}
    14.1 --- a/tools/blktap/parallax/blockstore.h	Fri Jun 16 18:19:40 2006 +0100
    14.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.3 @@ -1,134 +0,0 @@
    14.4 -/**************************************************************************
    14.5 - * 
    14.6 - * blockstore.h
    14.7 - *
    14.8 - * Simple block store interface
    14.9 - *
   14.10 - */
   14.11 - 
   14.12 -#ifndef __BLOCKSTORE_H__
   14.13 -#define __BLOCKSTORE_H__
   14.14 -
   14.15 -#include <netinet/in.h>
   14.16 -#include <xenctrl.h>
   14.17 -
   14.18 -#define BLOCK_SIZE  4096
   14.19 -#define BLOCK_SHIFT   12
   14.20 -#define BLOCK_MASK  0xfffffffffffff000LL
   14.21 -
   14.22 -/* XXX SMH: where is the below supposed to be defined???? */
   14.23 -#ifndef SECTOR_SHIFT 
   14.24 -#define SECTOR_SHIFT   9 
   14.25 -#endif
   14.26 -
   14.27 -#define FREEBLOCK_SIZE  (BLOCK_SIZE / sizeof(uint64_t)) - (3 * sizeof(uint64_t))
   14.28 -#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL
   14.29 -
   14.30 -typedef struct {
   14.31 -    uint64_t magic;
   14.32 -    uint64_t next;
   14.33 -    uint64_t count;
   14.34 -    uint64_t list[FREEBLOCK_SIZE];
   14.35 -} freeblock_t; 
   14.36 -
   14.37 -#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL
   14.38 -#define BLOCKSTORE_SUPER 1ULL
   14.39 -
   14.40 -typedef struct {
   14.41 -    uint64_t magic;
   14.42 -    uint64_t freelist_full;
   14.43 -    uint64_t freelist_current;
   14.44 -} blockstore_super_t;
   14.45 -
   14.46 -extern void *newblock();
   14.47 -extern void *readblock(uint64_t id);
   14.48 -extern uint64_t allocblock(void *block);
   14.49 -extern uint64_t allocblock_hint(void *block, uint64_t hint);
   14.50 -extern int writeblock(uint64_t id, void *block);
   14.51 -
   14.52 -/* Add this blockid to a freelist, to be recycled by the allocator. */
   14.53 -extern void releaseblock(uint64_t id);
   14.54 -
   14.55 -/* this is a memory free() operation for block-sized allocations */
   14.56 -extern void freeblock(void *block);
   14.57 -extern int __init_blockstore(void);
   14.58 -
   14.59 -/* debug for freelist. */
   14.60 -void freelist_count(int print_each);
   14.61 -#define ALLOCFAIL (((uint64_t)(-1)))
   14.62 -
   14.63 -/* Distribution
   14.64 - */
   14.65 -#define BLOCKSTORED_PORT 9346
   14.66 -
   14.67 -struct bshdr_t_struct {
   14.68 -    uint32_t            operation;
   14.69 -    uint32_t            flags;
   14.70 -    uint64_t            id;
   14.71 -    uint64_t            luid;
   14.72 -} __attribute__ ((packed));
   14.73 -typedef struct bshdr_t_struct bshdr_t;
   14.74 -
   14.75 -struct bsmsg_t_struct {
   14.76 -    bshdr_t        hdr;
   14.77 -    unsigned char  block[BLOCK_SIZE];
   14.78 -} __attribute__ ((packed));
   14.79 -
   14.80 -typedef struct bsmsg_t_struct bsmsg_t;
   14.81 -
   14.82 -#define MSGBUFSIZE_OP    sizeof(uint32_t)
   14.83 -#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t))
   14.84 -#define MSGBUFSIZE_ID    (sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint64_t))
   14.85 -#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
   14.86 -
   14.87 -#define BSOP_READBLOCK  0x01
   14.88 -#define BSOP_WRITEBLOCK 0x02
   14.89 -#define BSOP_ALLOCBLOCK 0x03
   14.90 -#define BSOP_FREEBLOCK  0x04
   14.91 -
   14.92 -#define BSOP_FLAG_ERROR 0x01
   14.93 -
   14.94 -#define BS_ALLOC_SKIP 10
   14.95 -#define BS_ALLOC_HACK
   14.96 -
   14.97 -/* Remote hosts and cluster map - XXX need to generalise
   14.98 - */
   14.99 -
  14.100 -/*
  14.101 -
  14.102 -  Interim ID format is
  14.103 -
  14.104 -  63 60 59                40 39                20 19                 0
  14.105 -  +----+--------------------+--------------------+--------------------+
  14.106 -  |map | replica 2          | replica 1          | replica 0          |
  14.107 -  +----+--------------------+--------------------+--------------------+
  14.108 -
  14.109 -  The map is an index into a table detailing which machines form the
  14.110 -  cluster.
  14.111 -
  14.112 - */
  14.113 -
  14.114 -#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
  14.115 -#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
  14.116 -#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
  14.117 -#define BSID_MAP(_id)      (((_id)>>60)&0xfULL)
  14.118 -
  14.119 -#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \
  14.120 -                                         (((uint64_t)(_rep2))<<40) | \
  14.121 -                                         (((uint64_t)(_rep1))<<20) | ((uint64_t)(_rep0)))
  14.122 -
  14.123 -typedef struct bsserver_t_struct {
  14.124 -    char              *hostname;
  14.125 -    struct sockaddr_in sin;
  14.126 -} bsserver_t;
  14.127 -
  14.128 -#define MAX_SERVERS 16
  14.129 -
  14.130 -#define CLUSTER_MAX_REPLICAS 3
  14.131 -typedef struct bscluster_t_struct {
  14.132 -    int servers[CLUSTER_MAX_REPLICAS];
  14.133 -} bscluster_t;
  14.134 -
  14.135 -#define MAX_CLUSTERS 16
  14.136 -
  14.137 -#endif /* __BLOCKSTORE_H__ */
    15.1 --- a/tools/blktap/parallax/blockstored.c	Fri Jun 16 18:19:40 2006 +0100
    15.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.3 @@ -1,275 +0,0 @@
    15.4 -/**************************************************************************
    15.5 - * 
    15.6 - * blockstored.c
    15.7 - *
    15.8 - * Block store daemon.
    15.9 - *
   15.10 - */
   15.11 -
   15.12 -#include <fcntl.h>
   15.13 -#include <unistd.h>
   15.14 -#include <stdio.h>
   15.15 -#include <stdlib.h>
   15.16 -#include <string.h>
   15.17 -#include <sys/types.h>
   15.18 -#include <sys/stat.h>
   15.19 -#include <sys/socket.h>
   15.20 -#include <sys/ioctl.h>
   15.21 -#include <netinet/in.h>
   15.22 -#include <errno.h>
   15.23 -#include "blockstore.h"
   15.24 -
   15.25 -//#define BSDEBUG
   15.26 -
   15.27 -int readblock_into(uint64_t id, void *block);
   15.28 -
   15.29 -int open_socket(uint16_t port) {
   15.30 -    
   15.31 -    struct sockaddr_in sn;
   15.32 -    int sock;
   15.33 -
   15.34 -    sock = socket(AF_INET, SOCK_DGRAM, 0);
   15.35 -    if (sock < 0) {
   15.36 -        perror("Bad socket");
   15.37 -        return -1;
   15.38 -    }
   15.39 -    memset(&sn, 0, sizeof(sn));
   15.40 -    sn.sin_family = AF_INET;
   15.41 -    sn.sin_port = htons(port);
   15.42 -    sn.sin_addr.s_addr = htonl(INADDR_ANY);
   15.43 -    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
   15.44 -        perror("bind");
   15.45 -        close(sock);
   15.46 -        return -1;
   15.47 -    }
   15.48 -
   15.49 -    return sock;
   15.50 -}
   15.51 -
   15.52 -static int block_fp = -1;
   15.53 -static int bssock = -1;
   15.54 -
   15.55 -int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
   15.56 -
   15.57 -    int rc;
   15.58 -    
   15.59 -#ifdef BSDEBUG
   15.60 -    fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
   15.61 -            len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t *)buffer)->hdr.id);
   15.62 -#endif
   15.63 -    rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, sizeof(*peer));
   15.64 -    if (rc < 0) {
   15.65 -        perror("send_reply");
   15.66 -        return 1;
   15.67 -    }
   15.68 -
   15.69 -
   15.70 -    return 0;
   15.71 -}
   15.72 -
   15.73 -static bsmsg_t msgbuf;
   15.74 -
   15.75 -void service_loop(void) {
   15.76 -
   15.77 -    for (;;) {
   15.78 -        int rc, len;
   15.79 -        struct sockaddr_in from;
   15.80 -        size_t slen = sizeof(from);
   15.81 -        uint64_t bid;
   15.82 -
   15.83 -        len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
   15.84 -                       (struct sockaddr *)&from, &slen);
   15.85 -
   15.86 -        if (len < 0) {
   15.87 -            perror("recvfrom");
   15.88 -            continue;
   15.89 -        }
   15.90 -
   15.91 -        if (len < MSGBUFSIZE_OP) {
   15.92 -            fprintf(stderr, "Short packet.\n");
   15.93 -            continue;
   15.94 -        }
   15.95 -
   15.96 -#ifdef BSDEBUG
   15.97 -        fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
   15.98 -                len, msgbuf.hdr.operation, msgbuf.hdr.id);
   15.99 -#endif
  15.100 -
  15.101 -        switch (msgbuf.hdr.operation) {
  15.102 -        case BSOP_READBLOCK:
  15.103 -            if (len < MSGBUFSIZE_ID) {
  15.104 -                fprintf(stderr, "Short packet (readblock %u).\n", len);
  15.105 -                continue;
  15.106 -            }
  15.107 -            rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
  15.108 -            if (rc < 0) {
  15.109 -                fprintf(stderr, "readblock error\n");
  15.110 -                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
  15.111 -                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  15.112 -                continue;
  15.113 -            }
  15.114 -            msgbuf.hdr.flags = 0;
  15.115 -            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
  15.116 -            break;
  15.117 -        case BSOP_WRITEBLOCK:
  15.118 -            if (len < MSGBUFSIZE_BLOCK) {
  15.119 -                fprintf(stderr, "Short packet (writeblock %u).\n", len);
  15.120 -                continue;
  15.121 -            }
  15.122 -            rc = writeblock(msgbuf.hdr.id, msgbuf.block);
  15.123 -            if (rc < 0) {
  15.124 -                fprintf(stderr, "writeblock error\n");
  15.125 -                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
  15.126 -                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  15.127 -                continue;
  15.128 -            }
  15.129 -            msgbuf.hdr.flags = 0;
  15.130 -            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  15.131 -            break;
  15.132 -        case BSOP_ALLOCBLOCK:
  15.133 -            if (len < MSGBUFSIZE_BLOCK) {
  15.134 -                fprintf(stderr, "Short packet (allocblock %u).\n", len);
  15.135 -                continue;
  15.136 -            }
  15.137 -            bid = allocblock(msgbuf.block);
  15.138 -            if (bid == ALLOCFAIL) {
  15.139 -                fprintf(stderr, "allocblock error\n");
  15.140 -                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
  15.141 -                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  15.142 -                continue;
  15.143 -            }
  15.144 -            msgbuf.hdr.id = bid;
  15.145 -            msgbuf.hdr.flags = 0;
  15.146 -            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  15.147 -            break;
  15.148 -        }
  15.149 -
  15.150 -    }
  15.151 -}
  15.152 - 
  15.153 -/**
  15.154 - * readblock: read a block from disk
  15.155 - *   @id: block id to read
  15.156 - *   @block: pointer to buffer to receive block
  15.157 - *
  15.158 - *   @return: 0 if OK, other on error
  15.159 - */
  15.160 -
  15.161 -int readblock_into(uint64_t id, void *block) {
  15.162 -    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  15.163 -        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
  15.164 -        perror("readblock lseek");
  15.165 -        return -1;
  15.166 -    }
  15.167 -    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
  15.168 -        perror("readblock read");
  15.169 -        return -1;
  15.170 -    }
  15.171 -    return 0;
  15.172 -}
  15.173 -
  15.174 -/**
  15.175 - * writeblock: write an existing block to disk
  15.176 - *   @id: block id
  15.177 - *   @block: pointer to block
  15.178 - *
  15.179 - *   @return: zero on success, -1 on failure
  15.180 - */
  15.181 -int writeblock(uint64_t id, void *block) {
  15.182 -    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  15.183 -        perror("writeblock lseek");
  15.184 -        return -1;
  15.185 -    }
  15.186 -    if (write(block_fp, block, BLOCK_SIZE) < 0) {
  15.187 -        perror("writeblock write");
  15.188 -        return -1;
  15.189 -    }
  15.190 -    return 0;
  15.191 -}
  15.192 -
  15.193 -/**
  15.194 - * allocblock: write a new block to disk
  15.195 - *   @block: pointer to block
  15.196 - *
  15.197 - *   @return: new id of block on disk
  15.198 - */
  15.199 -static uint64_t lastblock = 0;
  15.200 -
  15.201 -uint64_t allocblock(void *block) {
  15.202 -    uint64_t lb;
  15.203 -    off64_t pos;
  15.204 -
  15.205 -    retry:
  15.206 -    pos = lseek64(block_fp, 0, SEEK_END);
  15.207 -    if (pos == (off64_t)-1) {
  15.208 -        perror("allocblock lseek");
  15.209 -        return ALLOCFAIL;
  15.210 -    }
  15.211 -    if (pos % BLOCK_SIZE != 0) {
  15.212 -        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
  15.213 -        return ALLOCFAIL;
  15.214 -    }
  15.215 -    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
  15.216 -        perror("allocblock write");
  15.217 -        return ALLOCFAIL;
  15.218 -    }
  15.219 -    lb = pos / BLOCK_SIZE + 1;
  15.220 -
  15.221 -#ifdef BS_ALLOC_HACK
  15.222 -    if (lb < BS_ALLOC_SKIP)
  15.223 -        goto retry;
  15.224 -#endif
  15.225 -    
  15.226 -    if (lb <= lastblock)
  15.227 -        printf("[*** %Ld alredy allocated! ***]\n", lb);
  15.228 -    
  15.229 -    lastblock = lb;
  15.230 -    return lb;
  15.231 -}
  15.232 -
  15.233 -/**
  15.234 - * newblock: get a new in-memory block set to zeros
  15.235 - *
  15.236 - *   @return: pointer to new block, NULL on error
  15.237 - */
  15.238 -void *newblock(void) {
  15.239 -    void *block = malloc(BLOCK_SIZE);
  15.240 -    if (block == NULL) {
  15.241 -        perror("newblock");
  15.242 -        return NULL;
  15.243 -    }
  15.244 -    memset(block, 0, BLOCK_SIZE);
  15.245 -    return block;
  15.246 -}
  15.247 -
  15.248 -
  15.249 -/**
  15.250 - * freeblock: unallocate an in-memory block
  15.251 - *   @id: block id (zero if this is only in-memory)
  15.252 - *   @block: block to be freed
  15.253 - */
  15.254 -void freeblock(void *block) {
  15.255 -        free(block);
  15.256 -}
  15.257 -
  15.258 -
  15.259 -int main(int argc, char **argv)
  15.260 -{
  15.261 -    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
  15.262 -
  15.263 -    if (block_fp < 0) {
  15.264 -        perror("open");
  15.265 -        return -1;
  15.266 -    }
  15.267 -
  15.268 -    bssock = open_socket(BLOCKSTORED_PORT);
  15.269 -    if (bssock < 0) {
  15.270 -        return -1;
  15.271 -    }
  15.272 -
  15.273 -    service_loop();
  15.274 -    
  15.275 -    close(bssock);
  15.276 -
  15.277 -    return 0;
  15.278 -}
    16.1 --- a/tools/blktap/parallax/bstest.c	Fri Jun 16 18:19:40 2006 +0100
    16.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.3 @@ -1,191 +0,0 @@
    16.4 -/**************************************************************************
    16.5 - * 
    16.6 - * bstest.c
    16.7 - *
    16.8 - * Block store daemon test program.
    16.9 - *
   16.10 - * usage: bstest <host>|X {r|w|a} ID 
   16.11 - *
   16.12 - */
   16.13 -
   16.14 -#include <fcntl.h>
   16.15 -#include <unistd.h>
   16.16 -#include <stdio.h>
   16.17 -#include <stdlib.h>
   16.18 -#include <string.h>
   16.19 -#include <sys/types.h>
   16.20 -#include <sys/stat.h>
   16.21 -#include <sys/socket.h>
   16.22 -#include <sys/ioctl.h>
   16.23 -#include <netinet/in.h>
   16.24 -#include <netdb.h>
   16.25 -#include <errno.h>
   16.26 -#include "blockstore.h"
   16.27 -
   16.28 -int direct(char *host, uint32_t op, uint64_t id, int len) {
   16.29 -    struct sockaddr_in sn, peer;
   16.30 -    int sock;
   16.31 -    bsmsg_t msgbuf;
   16.32 -    int rc, slen;
   16.33 -    struct hostent *addr;
   16.34 -
   16.35 -    addr = gethostbyname(host);
   16.36 -    if (!addr) {
   16.37 -        perror("bad hostname");
   16.38 -        exit(1);
   16.39 -    }
   16.40 -    peer.sin_family = addr->h_addrtype;
   16.41 -    peer.sin_port = htons(BLOCKSTORED_PORT);
   16.42 -    peer.sin_addr.s_addr =  ((struct in_addr *)(addr->h_addr))->s_addr;
   16.43 -    fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
   16.44 -            (unsigned int)(unsigned char)addr->h_addr[0],
   16.45 -            (unsigned int)(unsigned char)addr->h_addr[1],
   16.46 -            (unsigned int)(unsigned char)addr->h_addr[2],
   16.47 -            (unsigned int)(unsigned char)addr->h_addr[3]);
   16.48 -
   16.49 -    sock = socket(AF_INET, SOCK_DGRAM, 0);
   16.50 -    if (sock < 0) {
   16.51 -        perror("Bad socket");
   16.52 -        exit(1);
   16.53 -    }
   16.54 -    memset(&sn, 0, sizeof(sn));
   16.55 -    sn.sin_family = AF_INET;
   16.56 -    sn.sin_port = htons(BLOCKSTORED_PORT);
   16.57 -    sn.sin_addr.s_addr = htonl(INADDR_ANY);
   16.58 -    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
   16.59 -        perror("bind");
   16.60 -        close(sock);
   16.61 -        exit(1);
   16.62 -    }
   16.63 -
   16.64 -    memset((void *)&msgbuf, 0, sizeof(msgbuf));
   16.65 -    msgbuf.operation = op;
   16.66 -    msgbuf.id = id;
   16.67 -
   16.68 -    rc = sendto(sock, (void *)&msgbuf, len, 0,
   16.69 -                (struct sockaddr *)&peer, sizeof(peer));
   16.70 -    if (rc < 0) {
   16.71 -        perror("sendto");
   16.72 -        exit(1);
   16.73 -    }
   16.74 -
   16.75 -    slen = sizeof(peer);
   16.76 -    len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
   16.77 -                   (struct sockaddr *)&peer, &slen);
   16.78 -    if (len < 0) {
   16.79 -        perror("recvfrom");
   16.80 -        exit(1);
   16.81 -    }
   16.82 -
   16.83 -    printf("Reply %u bytes:\n", len);
   16.84 -    if (len >= MSGBUFSIZE_OP)
   16.85 -        printf("  operation: %u\n", msgbuf.operation);
   16.86 -    if (len >= MSGBUFSIZE_FLAGS)
   16.87 -        printf("  flags: 0x%x\n", msgbuf.flags);
   16.88 -    if (len >= MSGBUFSIZE_ID)
   16.89 -        printf("  id: %llu\n", msgbuf.id);
   16.90 -    if (len >= (MSGBUFSIZE_ID + 4))
   16.91 -        printf("  data: %02x %02x %02x %02x...\n",
   16.92 -               (unsigned int)msgbuf.block[0],
   16.93 -               (unsigned int)msgbuf.block[1],
   16.94 -               (unsigned int)msgbuf.block[2],
   16.95 -               (unsigned int)msgbuf.block[3]);
   16.96 -    
   16.97 -    if (sock > 0)
   16.98 -        close(sock);
   16.99 -   
  16.100 -    return 0;
  16.101 -}
  16.102 -
  16.103 -int main (int argc, char **argv) {
  16.104 -
  16.105 -    uint32_t op = 0;
  16.106 -    uint64_t id = 0;
  16.107 -    int len = 0, rc;
  16.108 -    void *block;
  16.109 -
  16.110 -    if (argc < 3) {
  16.111 -        fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
  16.112 -        return 1;
  16.113 -    }
  16.114 -
  16.115 -    switch (argv[2][0]) {
  16.116 -    case 'r':
  16.117 -    case 'R':
  16.118 -        op = BSOP_READBLOCK;
  16.119 -        len = MSGBUFSIZE_ID;
  16.120 -        break;
  16.121 -    case 'w':
  16.122 -    case 'W':
  16.123 -        op = BSOP_WRITEBLOCK;
  16.124 -        len = MSGBUFSIZE_BLOCK;
  16.125 -        break;
  16.126 -    case 'a':
  16.127 -    case 'A':
  16.128 -        op = BSOP_ALLOCBLOCK;
  16.129 -        len = MSGBUFSIZE_BLOCK;
  16.130 -        break;
  16.131 -    default:
  16.132 -        fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
  16.133 -        return 1;
  16.134 -    }
  16.135 -
  16.136 -    if (argc >= 4)
  16.137 -        id = atoll(argv[3]);
  16.138 -
  16.139 -    if (strcmp(argv[1], "X") == 0) {
  16.140 -        rc = __init_blockstore();
  16.141 -        if (rc < 0) {
  16.142 -            fprintf(stderr, "blockstore init failed.\n");
  16.143 -            return 1;
  16.144 -        }
  16.145 -        switch(op) {
  16.146 -        case BSOP_READBLOCK:
  16.147 -            block = readblock(id);
  16.148 -            if (block) {
  16.149 -                printf("data: %02x %02x %02x %02x...\n",
  16.150 -                       (unsigned int)((unsigned char*)block)[0],
  16.151 -                       (unsigned int)((unsigned char*)block)[1],
  16.152 -                       (unsigned int)((unsigned char*)block)[2],
  16.153 -                       (unsigned int)((unsigned char*)block)[3]);
  16.154 -            }
  16.155 -            break;
  16.156 -        case BSOP_WRITEBLOCK:
  16.157 -            block = malloc(BLOCK_SIZE);
  16.158 -            if (!block) {
  16.159 -                perror("bstest malloc");
  16.160 -                return 1;
  16.161 -            }
  16.162 -            memset(block, 0, BLOCK_SIZE);
  16.163 -            rc = writeblock(id, block);
  16.164 -            if (rc != 0) {
  16.165 -                printf("error\n");
  16.166 -            }
  16.167 -            else {
  16.168 -                printf("OK\n");
  16.169 -            }
  16.170 -            break;
  16.171 -        case BSOP_ALLOCBLOCK:
  16.172 -            block = malloc(BLOCK_SIZE);
  16.173 -            if (!block) {
  16.174 -                perror("bstest malloc");
  16.175 -                return 1;
  16.176 -            }
  16.177 -            memset(block, 0, BLOCK_SIZE);
  16.178 -            id = allocblock_hint(block, id);
  16.179 -            if (id == 0) {
  16.180 -                printf("error\n");
  16.181 -            }
  16.182 -            else {
  16.183 -                printf("ID: %llu\n", id);
  16.184 -            }
  16.185 -            break;
  16.186 -        }
  16.187 -    }
  16.188 -    else {
  16.189 -        direct(argv[1], op, id, len);
  16.190 -    }
  16.191 -
  16.192 -
  16.193 -    return 0;
  16.194 -}
    17.1 --- a/tools/blktap/parallax/parallax.c	Fri Jun 16 18:19:40 2006 +0100
    17.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.3 @@ -1,608 +0,0 @@
    17.4 -/**************************************************************************
    17.5 - * 
    17.6 - * parallax.c
    17.7 - *
    17.8 - * The Parallax Storage Server
    17.9 - *
   17.10 - */
   17.11 - 
   17.12 -
   17.13 -#include <stdio.h>
   17.14 -#include <stdlib.h>
   17.15 -#include <string.h>
   17.16 -#include <pthread.h>
   17.17 -#include "blktaplib.h"
   17.18 -#include "blockstore.h"
   17.19 -#include "vdi.h"
   17.20 -#include "block-async.h"
   17.21 -#include "requests-async.h"
   17.22 -
   17.23 -#define PARALLAX_DEV     61440
   17.24 -#define SECTS_PER_NODE   8
   17.25 -
   17.26 -
   17.27 -#if 0
   17.28 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   17.29 -#else
   17.30 -#define DPRINTF(_f, _a...) ((void)0)
   17.31 -#endif
   17.32 -
   17.33 -/* ------[ session records ]----------------------------------------------- */
   17.34 -
   17.35 -#define BLKIF_HASHSZ 1024
   17.36 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
   17.37 -
   17.38 -#define VDI_HASHSZ 16
   17.39 -#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
   17.40 -
   17.41 -typedef struct blkif {
   17.42 -    domid_t       domid;
   17.43 -    unsigned int  handle;
   17.44 -    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
   17.45 -    vdi_t        *vdi_hash[VDI_HASHSZ];
   17.46 -    struct blkif *hash_next;
   17.47 -} blkif_t;
   17.48 -
   17.49 -static blkif_t      *blkif_hash[BLKIF_HASHSZ];
   17.50 -
   17.51 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
   17.52 -{
   17.53 -    if ( handle != 0 )
   17.54 -        printf("blktap/parallax don't currently support non-0 dev handles!\n");
   17.55 -    
   17.56 -    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
   17.57 -    while ( (blkif != NULL) && 
   17.58 -            ((blkif->domid != domid) || (blkif->handle != handle)) )
   17.59 -        blkif = blkif->hash_next;
   17.60 -    return blkif;
   17.61 -}
   17.62 -
   17.63 -vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
   17.64 -{
   17.65 -    vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
   17.66 -    
   17.67 -    while ((vdi != NULL) && (vdi->vdevice != device))
   17.68 -        vdi = vdi->next;
   17.69 -    
   17.70 -    return vdi;
   17.71 -}
   17.72 -
   17.73 -/* ------[ control message handling ]-------------------------------------- */
   17.74 -
   17.75 -void blkif_create(blkif_be_create_t *create)
   17.76 -{
   17.77 -    domid_t       domid  = create->domid;
   17.78 -    unsigned int  handle = create->blkif_handle;
   17.79 -    blkif_t     **pblkif, *blkif;
   17.80 -
   17.81 -    DPRINTF("parallax (blkif_create): create is %p\n", create); 
   17.82 -    
   17.83 -    if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
   17.84 -    {
   17.85 -        DPRINTF("Could not create blkif: out of memory\n");
   17.86 -        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   17.87 -        return;
   17.88 -    }
   17.89 -
   17.90 -    memset(blkif, 0, sizeof(*blkif));
   17.91 -    blkif->domid  = domid;
   17.92 -    blkif->handle = handle;
   17.93 -    blkif->status = DISCONNECTED;
   17.94 -
   17.95 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   17.96 -    while ( *pblkif != NULL )
   17.97 -    {
   17.98 -        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   17.99 -        {
  17.100 -            DPRINTF("Could not create blkif: already exists (%d,%d)\n",
  17.101 -                domid, handle);
  17.102 -            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
  17.103 -            free(blkif);
  17.104 -            return;
  17.105 -        }
  17.106 -        pblkif = &(*pblkif)->hash_next;
  17.107 -    }
  17.108 -
  17.109 -    blkif->hash_next = *pblkif;
  17.110 -    *pblkif = blkif;
  17.111 -
  17.112 -    DPRINTF("Successfully created blkif\n");
  17.113 -    create->status = BLKIF_BE_STATUS_OKAY;
  17.114 -}
  17.115 -
  17.116 -void blkif_destroy(blkif_be_destroy_t *destroy)
  17.117 -{
  17.118 -    domid_t       domid  = destroy->domid;
  17.119 -    unsigned int  handle = destroy->blkif_handle;
  17.120 -    blkif_t     **pblkif, *blkif;
  17.121 -
  17.122 -    DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); 
  17.123 -    
  17.124 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
  17.125 -    while ( (blkif = *pblkif) != NULL )
  17.126 -    {
  17.127 -        if ( (blkif->domid == domid) && (blkif->handle == handle) )
  17.128 -        {
  17.129 -            if ( blkif->status != DISCONNECTED )
  17.130 -                goto still_connected;
  17.131 -            goto destroy;
  17.132 -        }
  17.133 -        pblkif = &blkif->hash_next;
  17.134 -    }
  17.135 -
  17.136 -    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  17.137 -    return;
  17.138 -
  17.139 - still_connected:
  17.140 -    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
  17.141 -    return;
  17.142 -
  17.143 - destroy:
  17.144 -    *pblkif = blkif->hash_next;
  17.145 -    free(blkif);
  17.146 -    destroy->status = BLKIF_BE_STATUS_OKAY;
  17.147 -}
  17.148 -
  17.149 -void vbd_create(blkif_be_vbd_create_t *create)
  17.150 -{
  17.151 -    blkif_t            *blkif;
  17.152 -    vdi_t              *vdi, **vdip;
  17.153 -    blkif_vdev_t        vdevice = create->vdevice;
  17.154 -
  17.155 -    DPRINTF("parallax (vbd_create): create=%p\n", create); 
  17.156 -    
  17.157 -    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
  17.158 -    if ( blkif == NULL )
  17.159 -    {
  17.160 -        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
  17.161 -                create->domid, create->blkif_handle); 
  17.162 -        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  17.163 -        return;
  17.164 -    }
  17.165 -
  17.166 -    /* VDI identifier is in grow->extent.sector_start */
  17.167 -    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
  17.168 -            (unsigned long)create->dev_handle);
  17.169 -
  17.170 -    vdi = vdi_get(create->dev_handle);
  17.171 -    if (vdi == NULL)
  17.172 -    {
  17.173 -        printf("parallax (vbd_create): VDI %lx not found.\n",
  17.174 -               (unsigned long)create->dev_handle);
  17.175 -        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
  17.176 -        return;
  17.177 -    }
  17.178 -    
  17.179 -    vdi->next = NULL;
  17.180 -    vdi->vdevice = vdevice;
  17.181 -    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
  17.182 -    while (*vdip != NULL)
  17.183 -        vdip = &(*vdip)->next;
  17.184 -    *vdip = vdi;
  17.185 -    
  17.186 -    DPRINTF("blkif_create succeeded\n"); 
  17.187 -    create->status = BLKIF_BE_STATUS_OKAY;
  17.188 -}
  17.189 -
  17.190 -void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
  17.191 -{
  17.192 -    blkif_t            *blkif;
  17.193 -    vdi_t              *vdi, **vdip;
  17.194 -    blkif_vdev_t        vdevice = destroy->vdevice;
  17.195 -    
  17.196 -    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
  17.197 -    if ( blkif == NULL )
  17.198 -    {
  17.199 -        DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
  17.200 -                destroy->domid, destroy->blkif_handle); 
  17.201 -        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  17.202 -        return;
  17.203 -    }
  17.204 -
  17.205 -    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
  17.206 -    while ((*vdip != NULL) && ((*vdip)->vdevice != vdevice))
  17.207 -        vdip = &(*vdip)->next;
  17.208 -
  17.209 -    if (*vdip != NULL) 
  17.210 -    {
  17.211 -        vdi = *vdip;
  17.212 -        *vdip = vdi->next;
  17.213 -        vdi_put(vdi);
  17.214 -    }
  17.215 -        
  17.216 -}
  17.217 -
  17.218 -int parallax_control(control_msg_t *msg)
  17.219 -{
  17.220 -    domid_t  domid;
  17.221 -    int      ret;
  17.222 -
  17.223 -    DPRINTF("parallax_control: msg is %p\n", msg); 
  17.224 -    
  17.225 -    if (msg->type != CMSG_BLKIF_BE) 
  17.226 -    {
  17.227 -        printf("Unexpected control message (%d)\n", msg->type);
  17.228 -        return 0;
  17.229 -    }
  17.230 -    
  17.231 -    switch(msg->subtype)
  17.232 -    {
  17.233 -    case CMSG_BLKIF_BE_CREATE:
  17.234 -        if ( msg->length != sizeof(blkif_be_create_t) )
  17.235 -            goto parse_error;
  17.236 -        blkif_create((blkif_be_create_t *)msg->msg);
  17.237 -        break;   
  17.238 -        
  17.239 -    case CMSG_BLKIF_BE_DESTROY:
  17.240 -        if ( msg->length != sizeof(blkif_be_destroy_t) )
  17.241 -            goto parse_error;
  17.242 -        blkif_destroy((blkif_be_destroy_t *)msg->msg);
  17.243 -        break;  
  17.244 -        
  17.245 -    case CMSG_BLKIF_BE_VBD_CREATE:
  17.246 -        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
  17.247 -            goto parse_error;
  17.248 -        vbd_create((blkif_be_vbd_create_t *)msg->msg);
  17.249 -        break;
  17.250 -        
  17.251 -    case CMSG_BLKIF_BE_VBD_DESTROY:
  17.252 -        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
  17.253 -            goto parse_error;
  17.254 -        vbd_destroy((blkif_be_vbd_destroy_t *)msg->msg);
  17.255 -        break;
  17.256 -
  17.257 -    case CMSG_BLKIF_BE_CONNECT:
  17.258 -    case CMSG_BLKIF_BE_DISCONNECT:
  17.259 -        /* we don't manage the device channel, the tap does. */
  17.260 -        break;
  17.261 -
  17.262 -    default:
  17.263 -        goto parse_error;
  17.264 -    }
  17.265 -    return 0;
  17.266 -parse_error:
  17.267 -    printf("Bad control message!\n");
  17.268 -    return 0;
  17.269 -    
  17.270 -}    
  17.271 -
  17.272 -int parallax_probe(blkif_request_t *req, blkif_t *blkif)
  17.273 -{
  17.274 -    blkif_response_t *rsp;
  17.275 -    vdisk_t *img_info;
  17.276 -    vdi_t *vdi;
  17.277 -    int i, nr_vdis = 0; 
  17.278 -
  17.279 -    DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); 
  17.280 -
  17.281 -    /* We expect one buffer only. */
  17.282 -    if ( req->nr_segments != 1 )
  17.283 -      goto err;
  17.284 -
  17.285 -    /* Make sure the buffer is page-sized. */
  17.286 -    if ( (req->seg[0].first_sect != 0) || (req->seg[0].last_sect != 7) )
  17.287 -      goto err;
  17.288 -
  17.289 -    /* fill the list of devices */
  17.290 -    for (i=0; i<VDI_HASHSZ; i++) {
  17.291 -        vdi = blkif->vdi_hash[i];
  17.292 -        while (vdi) {
  17.293 -            img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
  17.294 -            img_info[nr_vdis].device   = vdi->vdevice;
  17.295 -            img_info[nr_vdis].info     = 0;
  17.296 -            /* The -1 here accounts for the LSB in the radix tree */
  17.297 -            img_info[nr_vdis].capacity = 
  17.298 -                    ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
  17.299 -            nr_vdis++;
  17.300 -            vdi = vdi->next;
  17.301 -        }
  17.302 -    }
  17.303 -
  17.304 -    
  17.305 -    rsp = (blkif_response_t *)req;
  17.306 -    rsp->id = req->id;
  17.307 -    rsp->operation = BLKIF_OP_PROBE;
  17.308 -    rsp->status = nr_vdis; /* number of disks */
  17.309 -
  17.310 -    DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
  17.311 -    return  BLKTAP_RESPOND;
  17.312 -err:
  17.313 -    rsp = (blkif_response_t *)req;
  17.314 -    rsp->id = req->id;
  17.315 -    rsp->operation = BLKIF_OP_PROBE;
  17.316 -    rsp->status = BLKIF_RSP_ERROR;
  17.317 -    
  17.318 -    DPRINTF("parallax_probe: send error response\n"); 
  17.319 -    return BLKTAP_RESPOND;  
  17.320 -}
  17.321 -
  17.322 -typedef struct {
  17.323 -    blkif_request_t *req;
  17.324 -    int              count;
  17.325 -    int              error;
  17.326 -    pthread_mutex_t  mutex;
  17.327 -} pending_t;
  17.328 -
  17.329 -#define MAX_REQUESTS 64
  17.330 -pending_t pending_list[MAX_REQUESTS];
  17.331 -
  17.332 -struct cb_param {
  17.333 -    pending_t *pent;
  17.334 -    int       segment;
  17.335 -    uint64_t       sector; 
  17.336 -    uint64_t       vblock; /* for debug printing -- can be removed. */
  17.337 -};
  17.338 -
  17.339 -static void read_cb(struct io_ret r, void *in_param)
  17.340 -{
  17.341 -    struct cb_param *param = (struct cb_param *)in_param;
  17.342 -    pending_t *p = param->pent;
  17.343 -    int segment = param->segment;
  17.344 -    blkif_request_t *req = p->req;
  17.345 -    unsigned long size, offset, start;
  17.346 -    char *dpage, *spage;
  17.347 -	
  17.348 -    spage  = IO_BLOCK(r);
  17.349 -    if (spage == NULL) { p->error++; goto finish; }
  17.350 -    dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
  17.351 -    
  17.352 -    /* Calculate read size and offset within the read block. */
  17.353 -
  17.354 -    offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
  17.355 -    size = (req->seg[segment].last_sect - req->seg[segment].first_sect + 1) <<
  17.356 -        SECTOR_SHIFT;
  17.357 -    start = req->seg[segment].first_sect << SECTOR_SHIFT;
  17.358 -
  17.359 -    DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
  17.360 -            "vblock %llx, "
  17.361 -            "size %lx\n", 
  17.362 -            param->sector,
  17.363 -            p->req->seg[segment].first_sect,
  17.364 -            p->req->seg[segment].last_sect,
  17.365 -            param->vblock, size); 
  17.366 -
  17.367 -    memcpy(dpage + start, spage + offset, size);
  17.368 -    freeblock(spage);
  17.369 -    
  17.370 -    /* Done the read.  Now update the pending record. */
  17.371 - finish:
  17.372 -    pthread_mutex_lock(&p->mutex);
  17.373 -    p->count--;
  17.374 -    
  17.375 -    if (p->count == 0) {
  17.376 -    	blkif_response_t *rsp;
  17.377 -    	
  17.378 -        rsp = (blkif_response_t *)req;
  17.379 -        rsp->id = req->id;
  17.380 -        rsp->operation = BLKIF_OP_READ;
  17.381 -    	if (p->error == 0) {
  17.382 -            rsp->status = BLKIF_RSP_OKAY;
  17.383 -    	} else {
  17.384 -            rsp->status = BLKIF_RSP_ERROR;
  17.385 -    	}
  17.386 -        blktap_inject_response(rsp);       
  17.387 -    }
  17.388 -    
  17.389 -    pthread_mutex_unlock(&p->mutex);
  17.390 -	
  17.391 -    free(param); /* TODO: replace with cached alloc/dealloc */
  17.392 -}	
  17.393 -
  17.394 -int parallax_read(blkif_request_t *req, blkif_t *blkif)
  17.395 -{
  17.396 -    blkif_response_t *rsp;
  17.397 -    uint64_t vblock, gblock;
  17.398 -    vdi_t *vdi;
  17.399 -    uint64_t sector;
  17.400 -    int i;
  17.401 -    char *dpage, *spage;
  17.402 -    pending_t *pent;
  17.403 -
  17.404 -    vdi = blkif_get_vdi(blkif, req->device);
  17.405 -    
  17.406 -    if ( vdi == NULL )
  17.407 -        goto err;
  17.408 -        
  17.409 -    pent = &pending_list[ID_TO_IDX(req->id)];
  17.410 -    pent->count = req->nr_segments;
  17.411 -    pent->req = req;
  17.412 -    pthread_mutex_init(&pent->mutex, NULL);
  17.413 -    
  17.414 -    for (i = 0; i < req->nr_segments; i++) {
  17.415 -        pthread_t tid;
  17.416 -        int ret;
  17.417 -        struct cb_param *p;
  17.418 -        
  17.419 -        /* Round the requested segment to a block address. */
  17.420 -        sector  = req->sector_number + (8*i);
  17.421 -        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
  17.422 -        
  17.423 -        /* TODO: Replace this call to malloc with a cached allocation */
  17.424 -        p = (struct cb_param *)malloc(sizeof(struct cb_param));
  17.425 -        p->pent = pent;
  17.426 -        p->sector = sector; 
  17.427 -        p->segment = i;     
  17.428 -        p->vblock = vblock; /* dbg */
  17.429 -        
  17.430 -        /* Get that block from the store. */
  17.431 -        vdi_read(vdi, vblock, read_cb, (void *)p);    
  17.432 -    }
  17.433 -    
  17.434 -    return BLKTAP_STOLEN;
  17.435 -
  17.436 -err:
  17.437 -    rsp = (blkif_response_t *)req;
  17.438 -    rsp->id = req->id;
  17.439 -    rsp->operation = BLKIF_OP_READ;
  17.440 -    rsp->status = BLKIF_RSP_ERROR;
  17.441 -    
  17.442 -    return BLKTAP_RESPOND;  
  17.443 -}
  17.444 -
  17.445 -static void write_cb(struct io_ret r, void *in_param)
  17.446 -{
  17.447 -    struct cb_param *param = (struct cb_param *)in_param;
  17.448 -    pending_t *p = param->pent;
  17.449 -    blkif_request_t *req = p->req;
  17.450 -    
  17.451 -    /* catch errors from the block code. */
  17.452 -    if (IO_INT(r) < 0) p->error++;
  17.453 -    
  17.454 -    pthread_mutex_lock(&p->mutex);
  17.455 -    p->count--;
  17.456 -    
  17.457 -    if (p->count == 0) {
  17.458 -    	blkif_response_t *rsp;
  17.459 -    	
  17.460 -        rsp = (blkif_response_t *)req;
  17.461 -        rsp->id = req->id;
  17.462 -        rsp->operation = BLKIF_OP_WRITE;
  17.463 -    	if (p->error == 0) {
  17.464 -            rsp->status = BLKIF_RSP_OKAY;
  17.465 -    	} else {
  17.466 -            rsp->status = BLKIF_RSP_ERROR;
  17.467 -    	}
  17.468 -        blktap_inject_response(rsp);       
  17.469 -    }
  17.470 -    
  17.471 -    pthread_mutex_unlock(&p->mutex);
  17.472 -	
  17.473 -    free(param); /* TODO: replace with cached alloc/dealloc */
  17.474 -}
  17.475 -
  17.476 -int parallax_write(blkif_request_t *req, blkif_t *blkif)
  17.477 -{
  17.478 -    blkif_response_t *rsp;
  17.479 -    uint64_t sector;
  17.480 -    int i, writable = 0;
  17.481 -    uint64_t vblock, gblock;
  17.482 -    char *spage;
  17.483 -    unsigned long size, offset, start;
  17.484 -    vdi_t *vdi;
  17.485 -    pending_t *pent;
  17.486 -
  17.487 -    vdi = blkif_get_vdi(blkif, req->device);
  17.488 -    
  17.489 -    if ( vdi == NULL )
  17.490 -        goto err;
  17.491 -        
  17.492 -    pent = &pending_list[ID_TO_IDX(req->id)];
  17.493 -    pent->count = req->nr_segments;
  17.494 -    pent->req = req;
  17.495 -    pthread_mutex_init(&pent->mutex, NULL);
  17.496 -    
  17.497 -    for (i = 0; i < req->nr_segments; i++) {
  17.498 -        struct cb_param *p;
  17.499 -        
  17.500 -        spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  17.501 -        
  17.502 -        /* Round the requested segment to a block address. */
  17.503 -        
  17.504 -        sector  = req->sector_number + (8*i);
  17.505 -        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
  17.506 -        
  17.507 -        /* Calculate read size and offset within the read block. */
  17.508 -        
  17.509 -        offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
  17.510 -        size = (req->seg[i].last_sect - req->seg[i].first_sect + 1) <<
  17.511 -            SECTOR_SHIFT;
  17.512 -        start = req->seg[i].first_sect << SECTOR_SHIFT;
  17.513 -
  17.514 -        DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld),  "
  17.515 -                "vblock %llx, gblock %llx, "
  17.516 -                "size %lx\n", 
  17.517 -                sector, 
  17.518 -                req->seg[i].first_sect, req->seg[i].last_sect,
  17.519 -                vblock, gblock, size); 
  17.520 -      
  17.521 -        /* XXX: For now we just freak out if they try to write a   */
  17.522 -        /* non block-sized, block-aligned page.                    */
  17.523 -        
  17.524 -        if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) {
  17.525 -            printf("]\n] STRANGE WRITE!\n]\n");
  17.526 -            goto err;
  17.527 -        }
  17.528 -        
  17.529 -        /* TODO: Replace this call to malloc with a cached allocation */
  17.530 -        p = (struct cb_param *)malloc(sizeof(struct cb_param));
  17.531 -        p->pent = pent;
  17.532 -        p->sector = sector; 
  17.533 -        p->segment = i;     
  17.534 -        p->vblock = vblock; /* dbg */
  17.535 -        
  17.536 -        /* Issue the write to the store. */
  17.537 -        vdi_write(vdi, vblock, spage, write_cb, (void *)p);
  17.538 -    }
  17.539 -
  17.540 -    return BLKTAP_STOLEN;
  17.541 -
  17.542 -err:
  17.543 -    rsp = (blkif_response_t *)req;
  17.544 -    rsp->id = req->id;
  17.545 -    rsp->operation = BLKIF_OP_WRITE;
  17.546 -    rsp->status = BLKIF_RSP_ERROR;
  17.547 -    
  17.548 -    return BLKTAP_RESPOND;  
  17.549 -}
  17.550 -
  17.551 -int parallax_request(blkif_request_t *req)
  17.552 -{
  17.553 -    blkif_response_t *rsp;
  17.554 -    domid_t  dom   = ID_TO_DOM(req->id);
  17.555 -    blkif_t *blkif = blkif_find_by_handle(dom, 0);
  17.556 -    
  17.557 -    if (blkif == NULL)
  17.558 -        goto err;
  17.559 -    
  17.560 -    if ( req->operation == BLKIF_OP_PROBE ) {
  17.561 -        
  17.562 -        return parallax_probe(req, blkif);
  17.563 -        
  17.564 -    } else if ( req->operation == BLKIF_OP_READ ) {
  17.565 -        
  17.566 -        return parallax_read(req, blkif);
  17.567 -        
  17.568 -    } else if ( req->operation == BLKIF_OP_WRITE ) {
  17.569 -        
  17.570 -        return parallax_write(req, blkif);
  17.571 -        
  17.572 -    } else {
  17.573 -        printf("Unknown request message type!\n");
  17.574 -        /* Unknown operation */
  17.575 -        goto err;
  17.576 -    }
  17.577 -    
  17.578 -err:
  17.579 -    rsp = (blkif_response_t *)req;
  17.580 -    rsp->operation = req->operation;
  17.581 -    rsp->id = req->id;
  17.582 -    rsp->status = BLKIF_RSP_ERROR;
  17.583 -    return BLKTAP_RESPOND;  
  17.584 -}
  17.585 -
  17.586 -void __init_parallax(void) 
  17.587 -{
  17.588 -    memset(blkif_hash, 0, sizeof(blkif_hash));
  17.589 -}
  17.590 -
  17.591 -
  17.592 -
  17.593 -int main(int argc, char *argv[])
  17.594 -{
  17.595 -    DPRINTF("parallax: starting.\n"); 
  17.596 -    __init_blockstore();
  17.597 -    DPRINTF("parallax: initialized blockstore...\n"); 
  17.598 -    init_block_async();
  17.599 -    DPRINTF("parallax: initialized async blocks...\n"); 
  17.600 -    __init_vdi();
  17.601 -    DPRINTF("parallax: initialized vdi registry etc...\n"); 
  17.602 -    __init_parallax();
  17.603 -    DPRINTF("parallax: initialized local stuff..\n"); 
  17.604 -
  17.605 -    blktap_register_ctrl_hook("parallax_control", parallax_control);
  17.606 -    blktap_register_request_hook("parallax_request", parallax_request);
  17.607 -    DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); 
  17.608 -    blktap_listen();
  17.609 -    
  17.610 -    return 0;
  17.611 -}
    18.1 --- a/tools/blktap/parallax/radix.c	Fri Jun 16 18:19:40 2006 +0100
    18.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.3 @@ -1,631 +0,0 @@
    18.4 -/*
    18.5 - * Radix tree for mapping (up to) 63-bit virtual block IDs to
    18.6 - * 63-bit global block IDs
    18.7 - *
    18.8 - * Pointers within the tree set aside the least significant bit to indicate
    18.9 - * whther or not the target block is writable from this node.
   18.10 - *
   18.11 - * The block with ID 0 is assumed to be an empty block of all zeros
   18.12 - */
   18.13 -
   18.14 -#include <unistd.h>
   18.15 -#include <stdio.h>
   18.16 -#include <stdlib.h>
   18.17 -#include <assert.h>
   18.18 -#include <string.h>
   18.19 -#include <pthread.h>
   18.20 -#include "blockstore.h"
   18.21 -#include "radix.h"
   18.22 -
   18.23 -#define RADIX_TREE_MAP_SHIFT 9
   18.24 -#define RADIX_TREE_MAP_MASK 0x1ff
   18.25 -#define RADIX_TREE_MAP_ENTRIES 512
   18.26 -
   18.27 -/*
   18.28 -#define DEBUG
   18.29 -*/
   18.30 -
   18.31 -/* Experimental radix cache. */
   18.32 -
   18.33 -static  pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
   18.34 -static  int rcache_count = 0;
   18.35 -#define RCACHE_MAX 1024
   18.36 -
   18.37 -typedef struct rcache_st {
   18.38 -    radix_tree_node  *node;
   18.39 -    uint64_t               id;
   18.40 -    struct rcache_st *hash_next;
   18.41 -    struct rcache_st *cache_next;
   18.42 -    struct rcache_st *cache_prev;
   18.43 -} rcache_t;
   18.44 -
   18.45 -static rcache_t *rcache_head = NULL;
   18.46 -static rcache_t *rcache_tail = NULL;
   18.47 -
   18.48 -#define RCHASH_SIZE 512ULL
   18.49 -rcache_t *rcache[RCHASH_SIZE];
   18.50 -#define RCACHE_HASH(_id) ((_id) & (RCHASH_SIZE - 1))
   18.51 -
   18.52 -void __rcache_init(void)
   18.53 -{
   18.54 -    int i;
   18.55 -
   18.56 -    for (i=0; i<RCHASH_SIZE; i++)
   18.57 -        rcache[i] = NULL;
   18.58 -}
   18.59 -    
   18.60 -
   18.61 -void rcache_write(uint64_t id, radix_tree_node *node)
   18.62 -{
   18.63 -    rcache_t *r, *tmp, **curs;
   18.64 -    
   18.65 -    pthread_mutex_lock(&rcache_mutex);
   18.66 -    
   18.67 -    /* Is it already in the cache? */
   18.68 -    r = rcache[RCACHE_HASH(id)];
   18.69 -    
   18.70 -    for (;;) {
   18.71 -        if (r == NULL) 
   18.72 -            break;
   18.73 -        if (r->id == id) 
   18.74 -        {
   18.75 -            memcpy(r->node, node, BLOCK_SIZE);
   18.76 -            
   18.77 -            /* bring to front. */
   18.78 -            if (r != rcache_head) {
   18.79 -                
   18.80 -                if (r == rcache_tail) {
   18.81 -                    if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
   18.82 -                    rcache_tail->cache_next = NULL;
   18.83 -                }
   18.84 -
   18.85 -                tmp = r->cache_next;
   18.86 -                if (r->cache_next != NULL) r->cache_next->cache_prev 
   18.87 -                                                     = r->cache_prev;
   18.88 -                if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
   18.89 -
   18.90 -                r->cache_prev = NULL;
   18.91 -                r->cache_next = rcache_head;
   18.92 -                if (rcache_head != NULL) rcache_head->cache_prev = r;
   18.93 -                rcache_head = r;
   18.94 -            }
   18.95 -
   18.96 -//printf("Update (%Ld)\n", r->id);
   18.97 -            goto done;
   18.98 -        }
   18.99 -        r = r->hash_next;
  18.100 -    }
  18.101 -    
  18.102 -    if ( rcache_count == RCACHE_MAX ) 
  18.103 -    {
  18.104 -        /* Remove an entry */
  18.105 -        
  18.106 -        r = rcache_tail;
  18.107 -        if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
  18.108 -        rcache_tail->cache_next = NULL;
  18.109 -        freeblock(r->node);
  18.110 -        
  18.111 -        curs = &rcache[RCACHE_HASH(r->id)];
  18.112 -        while ((*curs) != r)
  18.113 -            curs = &(*curs)->hash_next;
  18.114 -        *curs = r->hash_next;
  18.115 -//printf("Evict (%Ld)\n", r->id);
  18.116 -        
  18.117 -    } else {
  18.118 -        
  18.119 -        r = (rcache_t *)malloc(sizeof(rcache_t));
  18.120 -        rcache_count++;
  18.121 -    }
  18.122 -    
  18.123 -    r->node = newblock();
  18.124 -    memcpy(r->node, node, BLOCK_SIZE);
  18.125 -    r->id = id;
  18.126 -    
  18.127 -    r->hash_next = rcache[RCACHE_HASH(id)];
  18.128 -    rcache[RCACHE_HASH(id)] = r;
  18.129 -    
  18.130 -    r->cache_prev = NULL;
  18.131 -    r->cache_next = rcache_head;
  18.132 -    if (rcache_head != NULL) rcache_head->cache_prev = r;
  18.133 -    rcache_head = r;
  18.134 -    if (rcache_tail == NULL) rcache_tail = r;
  18.135 -    
  18.136 -//printf("Added (%Ld, %p)\n", id, r->node);
  18.137 -done:
  18.138 -    pthread_mutex_unlock(&rcache_mutex);
  18.139 -}
  18.140 -
  18.141 -radix_tree_node *rcache_read(uint64_t id)
  18.142 -{
  18.143 -    rcache_t *r, *tmp;
  18.144 -    radix_tree_node *node = NULL;
  18.145 -    
  18.146 -    pthread_mutex_lock(&rcache_mutex);
  18.147 -
  18.148 -    r = rcache[RCACHE_HASH(id)];
  18.149 -    
  18.150 -    for (;;) {
  18.151 -        if (r == NULL) {
  18.152 -//printf("Miss (%Ld)\n", id);
  18.153 -            goto done;
  18.154 -        }
  18.155 -        if (r->id == id) break;
  18.156 -        r = r->hash_next;
  18.157 -    }
  18.158 -   
  18.159 -    /* bring to front. */
  18.160 -    if (r != rcache_head) 
  18.161 -    {
  18.162 -        if (r == rcache_tail) {
  18.163 -            if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
  18.164 -            rcache_tail->cache_next = NULL;
  18.165 -        }
  18.166 -        tmp = r->cache_next;
  18.167 -        if (r->cache_next != NULL) r->cache_next->cache_prev = r->cache_prev;
  18.168 -        if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
  18.169 -
  18.170 -        r->cache_prev = NULL;
  18.171 -        r->cache_next = rcache_head;
  18.172 -        if (rcache_head != NULL) rcache_head->cache_prev = r;
  18.173 -        rcache_head = r;
  18.174 -    }
  18.175 -    
  18.176 -    node = newblock();
  18.177 -    memcpy(node, r->node, BLOCK_SIZE);
  18.178 -    
  18.179 -//printf("Hit (%Ld, %p)\n", id, r->node);
  18.180 -done:
  18.181 -    pthread_mutex_unlock(&rcache_mutex);
  18.182 -    
  18.183 -    return(node);
  18.184 -}
  18.185 -
  18.186 -
  18.187 -void *rc_readblock(uint64_t id)
  18.188 -{
  18.189 -    void *ret;
  18.190 -    
  18.191 -    ret = (void *)rcache_read(id);
  18.192 -    
  18.193 -    if (ret != NULL) return ret;
  18.194 -    
  18.195 -    ret = readblock(id);
  18.196 -    
  18.197 -    if (ret != NULL)
  18.198 -        rcache_write(id, ret);
  18.199 -    
  18.200 -    return(ret);
  18.201 -}
  18.202 -
  18.203 -uint64_t rc_allocblock(void *block)
  18.204 -{
  18.205 -    uint64_t ret;
  18.206 -    
  18.207 -    ret = allocblock(block);
  18.208 -    
  18.209 -    if (ret != ZERO)
  18.210 -        rcache_write(ret, block);
  18.211 -    
  18.212 -    return(ret);
  18.213 -}
  18.214 -
  18.215 -int rc_writeblock(uint64_t id, void *block)
  18.216 -{
  18.217 -    int ret;
  18.218 -    
  18.219 -    ret = writeblock(id, block);
  18.220 -    rcache_write(id, block);
  18.221 -    
  18.222 -    return(ret);
  18.223 -}
  18.224 -
  18.225 -
  18.226 -/*
  18.227 - * block device interface and other helper functions
  18.228 - * with these functions, block id is just a 63-bit number, with
  18.229 - * no special consideration for the LSB
  18.230 - */
  18.231 -radix_tree_node cloneblock(radix_tree_node block);
  18.232 -
  18.233 -/*
  18.234 - * main api
  18.235 - * with these functions, the LSB of root always indicates
  18.236 - * whether or not the block is writable, including the return
  18.237 - * values of update and snapshot
  18.238 - */
  18.239 -uint64_t lookup(int height, uint64_t root, uint64_t key);
  18.240 -uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
  18.241 -uint64_t snapshot(uint64_t root);
  18.242 -
  18.243 -/**
  18.244 - * cloneblock: clone an existing block in memory
  18.245 - *   @block: the old block
  18.246 - *
  18.247 - *   @return: new block, with LSB cleared for every entry
  18.248 - */
  18.249 -radix_tree_node cloneblock(radix_tree_node block) {
  18.250 -    radix_tree_node node = (radix_tree_node) malloc(BLOCK_SIZE);
  18.251 -    int i;
  18.252 -    if (node == NULL) {
  18.253 -        perror("cloneblock malloc");
  18.254 -        return NULL;
  18.255 -    }
  18.256 -    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
  18.257 -        node[i] = block[i] & ONEMASK;
  18.258 -    return node;
  18.259 -}
  18.260 -
  18.261 -/**
  18.262 - * lookup: find a value given a key
  18.263 - *   @height: height in bits of the radix tree
  18.264 - *   @root: root node id, with set LSB indicating writable node
  18.265 - *   @key: key to lookup
  18.266 - *
  18.267 - *   @return: value on success, zero on error
  18.268 - */
  18.269 -
  18.270 -uint64_t lookup(int height, uint64_t root, uint64_t key) {
  18.271 -    radix_tree_node node;
  18.272 -    uint64_t mask = ONE;
  18.273 -    
  18.274 -    assert(key >> height == 0);
  18.275 -
  18.276 -    /* the root block may be smaller to ensure all leaves are full */
  18.277 -    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
  18.278 -
  18.279 -    /* now carve off equal sized chunks at each step */
  18.280 -    for (;;) {
  18.281 -        uint64_t oldroot;
  18.282 -
  18.283 -#ifdef DEBUG
  18.284 -        printf("lookup: height=%3d root=%3Ld offset=%3d%s\n", height, root,
  18.285 -                (int) ((key >> height) & RADIX_TREE_MAP_MASK),
  18.286 -                (iswritable(root) ? "" : " (readonly)"));
  18.287 -#endif
  18.288 -        
  18.289 -        if (getid(root) == ZERO)
  18.290 -            return ZERO;
  18.291 -
  18.292 -        oldroot = root;
  18.293 -        node = (radix_tree_node) rc_readblock(getid(root));
  18.294 -        if (node == NULL)
  18.295 -            return ZERO;
  18.296 -
  18.297 -        root = node[(key >> height) & RADIX_TREE_MAP_MASK];
  18.298 -        mask &= root;
  18.299 -        freeblock(node);
  18.300 -
  18.301 -        if (height == 0)
  18.302 -            return ( root & ONEMASK ) | mask;
  18.303 -
  18.304 -        height -= RADIX_TREE_MAP_SHIFT;
  18.305 -    }
  18.306 -
  18.307 -    return ZERO;
  18.308 -}
  18.309 -
  18.310 -/*
  18.311 - * update: set a radix tree entry, doing copy-on-write as necessary
  18.312 - *   @height: height in bits of the radix tree
  18.313 - *   @root: root node id, with set LSB indicating writable node
  18.314 - *   @key: key to set
  18.315 - *   @val: value to set, s.t. radix(key)=val
  18.316 - *
  18.317 - *   @returns: (possibly new) root id on success (with LSB=1), 0 on failure
  18.318 - */
  18.319 -
  18.320 -uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val) {
  18.321 -    int offset;
  18.322 -    uint64_t child;
  18.323 -    radix_tree_node node;
  18.324 -    
  18.325 -    /* base case--return val */
  18.326 -    if (height == 0)
  18.327 -        return val;
  18.328 -
  18.329 -    /* the root block may be smaller to ensure all leaves are full */
  18.330 -    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
  18.331 -    offset = (key >> height) & RADIX_TREE_MAP_MASK;
  18.332 -
  18.333 -#ifdef DEBUG
  18.334 -    printf("update: height=%3d root=%3Ld offset=%3d%s\n", height, root,
  18.335 -            offset, (iswritable(root)?"":" (clone)"));
  18.336 -#endif
  18.337 -
  18.338 -    /* load a block, or create a new one */
  18.339 -    if (root == ZERO) {
  18.340 -        node = (radix_tree_node) newblock();
  18.341 -    } else {
  18.342 -        node = (radix_tree_node) rc_readblock(getid(root));
  18.343 -
  18.344 -        if (!iswritable(root)) {
  18.345 -            /* need to clone this node */
  18.346 -            radix_tree_node oldnode = node;
  18.347 -            node = cloneblock(node);
  18.348 -            freeblock(oldnode);
  18.349 -            root = ZERO;
  18.350 -        }
  18.351 -    }
  18.352 -
  18.353 -    if (node == NULL) {
  18.354 -#ifdef DEBUG
  18.355 -        printf("update: node is null!\n");
  18.356 -#endif
  18.357 -        return ZERO;
  18.358 -    }
  18.359 -
  18.360 -    child = update(height, node[offset], key, val);
  18.361 -
  18.362 -    if (child == ZERO) {
  18.363 -        freeblock(node);
  18.364 -        return ZERO;
  18.365 -    } else if (child == node[offset]) {
  18.366 -        /* no change, so we already owned the child */
  18.367 -        assert(iswritable(root));
  18.368 -
  18.369 -        freeblock(node);
  18.370 -        return root;
  18.371 -    }
  18.372 -
  18.373 -    node[offset] = child;
  18.374 -
  18.375 -    /* new/cloned blocks need to be saved */
  18.376 -    if (root == ZERO) {
  18.377 -        /* mark this as an owned block */
  18.378 -        root = rc_allocblock(node);
  18.379 -        if (root)
  18.380 -            root = writable(root);
  18.381 -    } else if (rc_writeblock(getid(root), node) < 0) {
  18.382 -        freeblock(node);
  18.383 -        return ZERO;
  18.384 -    }
  18.385 -
  18.386 -    freeblock(node);
  18.387 -    return root;
  18.388 -}
  18.389 -
  18.390 -/**
  18.391 - * snapshot: create a snapshot
  18.392 - *   @root: old root node
  18.393 - *
  18.394 - *   @return: new root node, 0 on error
  18.395 - */
  18.396 -uint64_t snapshot(uint64_t root) {
  18.397 -    radix_tree_node node, newnode;
  18.398 -
  18.399 -    if ((node = rc_readblock(getid(root))) == NULL)
  18.400 -        return ZERO;
  18.401 -
  18.402 -    newnode = cloneblock(node);
  18.403 -    freeblock(node);
  18.404 -    if (newnode == NULL)
  18.405 -        return ZERO;
  18.406 -    
  18.407 -    root = rc_allocblock(newnode);
  18.408 -    freeblock(newnode);
  18.409 -
  18.410 -    if (root == ZERO)
  18.411 -        return ZERO;
  18.412 -    else
  18.413 -        return writable(root);
  18.414 -}
  18.415 -
  18.416 -/**
  18.417 - * collapse: collapse a parent onto a child.
  18.418 - * 
  18.419 - * NOTE: This assumes that parent and child really are, and further that
  18.420 - * there are no other children forked from this parent. (children of the
  18.421 - * child are okay...)
  18.422 - */
  18.423 -
  18.424 -int collapse(int height, uint64_t proot, uint64_t croot)
  18.425 -{
  18.426 -    int i, numlinks, ret, total = 0;
  18.427 -    radix_tree_node pnode, cnode;
  18.428 -    
  18.429 -    if (height == 0) {
  18.430 -        height = -1; /* terminate recursion */
  18.431 -    } else {        
  18.432 -        height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
  18.433 -    }
  18.434 -    numlinks = (1UL << RADIX_TREE_MAP_SHIFT);
  18.435 -
  18.436 -    /* Terminal cases: */
  18.437 -
  18.438 -    if ( (getid(proot) == ZERO) || (getid(croot) == ZERO) )
  18.439 -        return -1;
  18.440 -    
  18.441 -    /* get roots */
  18.442 -    if ((pnode = readblock(getid(proot))) == NULL)
  18.443 -        return -1;
  18.444 -    
  18.445 -    if ((cnode = readblock(getid(croot))) == NULL)
  18.446 -    {
  18.447 -        freeblock(pnode);
  18.448 -        return -1;
  18.449 -    }
  18.450 -    
  18.451 -    /* For each writable link in proot */
  18.452 -    for (i=0; i<numlinks; i++)
  18.453 -    {
  18.454 -        if ( pnode[i] == cnode[i] ) continue;
  18.455 -        
  18.456 -        /* collapse (next level) */
  18.457 -        /* if height != 0 and writable... */
  18.458 -        if (( height >= 0 ) && ( iswritable(pnode[i]) ) )
  18.459 -        {
  18.460 -            //printf("   %Ld is writable (i=%d).\n", getid(pnode[i]), i);
  18.461 -            ret = collapse(height, pnode[i], cnode[i]);
  18.462 -            if (ret == -1) 
  18.463 -            {
  18.464 -                total = -1;
  18.465 -            } else {
  18.466 -                total += ret;
  18.467 -            }
  18.468 -        }
  18.469 -    
  18.470 -        
  18.471 -    }
  18.472 -    
  18.473 -    /* if plink is writable, AND clink is writable -> free plink block */
  18.474 -    if ( ( iswritable(proot) ) && ( iswritable(croot) ) ) 
  18.475 -    {
  18.476 -        releaseblock(getid(proot));
  18.477 -        if (ret >=0) total++;
  18.478 -        //printf("   Delete %Ld\n", getid(proot));
  18.479 -    }
  18.480 -//printf("done : %Ld\n", getid(proot));
  18.481 -    return total;
  18.482 -
  18.483 -}
  18.484 -
  18.485 -
  18.486 -void print_root(uint64_t root, int height, FILE *dot_f)
  18.487 -{
  18.488 -    FILE *f;
  18.489 -    int i;
  18.490 -    radix_tree_node node;
  18.491 -    char *style[2] = { "", "style=bold,color=blue," };
  18.492 -    
  18.493 -    if (dot_f == NULL) {
  18.494 -        f = fopen("radix.dot", "w");
  18.495 -        if (f == NULL) {
  18.496 -            perror("print_root: open");
  18.497 -            return;
  18.498 -        }
  18.499 -
  18.500 -        /* write graph preamble */
  18.501 -        fprintf(f, "digraph G {\n");
  18.502 -
  18.503 -        /* add a node for this root. */
  18.504 -        fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
  18.505 -                getid(root), style[iswritable(root)], getid(root));
  18.506 -    }
  18.507 -    
  18.508 -    printf("print_root(%Ld)\n", getid(root));
  18.509 -    
  18.510 -    /* base case */
  18.511 -    if (height == 0) {
  18.512 -        /* add a node and edge for each child root */
  18.513 -        node = (radix_tree_node) readblock(getid(root));
  18.514 -        if (node == NULL)
  18.515 -            return;
  18.516 -        
  18.517 -        for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) {
  18.518 -            if (node[i] != ZERO) {
  18.519 -                fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
  18.520 -                        getid(node[i]), style[iswritable(node[i])], 
  18.521 -                        getid(node[i]));
  18.522 -                fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
  18.523 -                        getid(node[i]), i);
  18.524 -            }
  18.525 -        }
  18.526 -        freeblock(node);
  18.527 -        return;
  18.528 -    }
  18.529 -
  18.530 -    /* the root block may be smaller to ensure all leaves are full */
  18.531 -    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
  18.532 -
  18.533 -    if (getid(root) == ZERO)
  18.534 -        return;
  18.535 -
  18.536 -    node = (radix_tree_node) readblock(getid(root));
  18.537 -    if (node == NULL)
  18.538 -        return;
  18.539 -
  18.540 -    /* add a node and edge for each child root */
  18.541 -    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
  18.542 -        if (node[i] != ZERO) {
  18.543 -            fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
  18.544 -                    getid(node[i]), style[iswritable(node[i])], 
  18.545 -                    getid(node[i]));
  18.546 -
  18.547 -            print_root(node[i], height-RADIX_TREE_MAP_SHIFT, f);
  18.548 -            fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
  18.549 -                    getid(node[i]), i);
  18.550 -        }
  18.551 -
  18.552 -    freeblock(node);
  18.553 -    
  18.554 -    /* write graph postamble */
  18.555 -    if (dot_f == NULL) {
  18.556 -        fprintf(f, "}\n");
  18.557 -        fclose(f);
  18.558 -    }
  18.559 -}
  18.560 -
  18.561 -#ifdef RADIX_STANDALONE
  18.562 -
  18.563 -int main(int argc, char **argv) {
  18.564 -    uint64_t key = ZERO, val = ZERO;
  18.565 -    uint64_t root = writable(2ULL);
  18.566 -    uint64_t p = ZERO, c = ZERO;
  18.567 -    int v;
  18.568 -    char buff[4096];
  18.569 -
  18.570 -    __init_blockstore();
  18.571 -    
  18.572 -    memset(buff, 0, 4096);
  18.573 -    /*fp = open("radix.dat", O_RDWR | O_CREAT, 0644);
  18.574 -
  18.575 -    if (fp < 3) {
  18.576 -        perror("open");
  18.577 -        return -1;
  18.578 -    }
  18.579 -    if (lseek(fp, 0, SEEK_END) == 0) {
  18.580 -        write(fp, buff, 4096);
  18.581 -    }*/
  18.582 -        
  18.583 -    allocblock(buff);
  18.584 -            
  18.585 -    printf("Recognized commands:\n"
  18.586 -           "Note: the LSB of a node number indicates if it is writable\n"
  18.587 -           "  root <node>               set root to <node>\n"
  18.588 -           "  snapshot                  take a snapshot of the root\n"
  18.589 -           "  set <key> <val>           set key=val\n"
  18.590 -           "  get <key>                 query key\n"
  18.591 -           "  c <proot> <croot>         collapse\n"
  18.592 -           "  pr                        print tree to dot\n"
  18.593 -           "  pf <1=verbose>            print freelist\n"
  18.594 -           "  quit\n"
  18.595 -           "\nroot = %Ld\n", root);
  18.596 -    for (;;) {
  18.597 -        //print_root(root, 34, NULL);
  18.598 -        //system("dot radix.dot -Tps -o radix.ps");
  18.599 -
  18.600 -        printf("> ");
  18.601 -        fflush(stdout);
  18.602 -        fgets(buff, 1024, stdin);
  18.603 -        if (feof(stdin))
  18.604 -            break;
  18.605 -        if (sscanf(buff, " root %Ld", &root) == 1) {
  18.606 -            printf("root set to %Ld\n", root);
  18.607 -        } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) {
  18.608 -            root = update(34, root, key, val);
  18.609 -            printf("root = %Ld\n", root);
  18.610 -        } else if (sscanf(buff, " c %Ld %Ld", &p, &c) == 2) {
  18.611 -            v = collapse(34, p, c);
  18.612 -            printf("reclaimed %d blocks.\n", v);
  18.613 -        } else if (sscanf(buff, " get %Ld", &key) == 1) {
  18.614 -            val = lookup(34, root, key);
  18.615 -            printf("value = %Ld\n", val);
  18.616 -        } else if (!strcmp(buff, "quit\n")) {
  18.617 -            break;
  18.618 -        } else if (!strcmp(buff, "snapshot\n")) {
  18.619 -            root = snapshot(root);
  18.620 -            printf("new root = %Ld\n", root);
  18.621 -        } else if (sscanf(buff, " pr %Ld", &root) == 1) {
  18.622 -            print_root(root, 34, NULL);
  18.623 -        } else if (sscanf(buff, " pf %d", &v) == 1) {
  18.624 -            freelist_count(v);
  18.625 -        } else if (!strcmp(buff, "pf\n")) {
  18.626 -            freelist_count(0);
  18.627 -        } else {
  18.628 -            printf("command not recognized\n");
  18.629 -        }
  18.630 -    }
  18.631 -    return 0;
  18.632 -}
  18.633 -
  18.634 -#endif
    19.1 --- a/tools/blktap/parallax/radix.h	Fri Jun 16 18:19:40 2006 +0100
    19.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.3 @@ -1,45 +0,0 @@
    19.4 -/*
    19.5 - * Radix tree for mapping (up to) 63-bit virtual block IDs to
    19.6 - * 63-bit global block IDs
    19.7 - *
    19.8 - * Pointers within the tree set aside the least significant bit to indicate
    19.9 - * whther or not the target block is writable from this node.
   19.10 - *
   19.11 - * The block with ID 0 is assumed to be an empty block of all zeros
   19.12 - */
   19.13 -
   19.14 -#ifndef __RADIX_H__
   19.15 -#define __RADIX_H__
   19.16 -
   19.17 -/* I don't really like exposing these, but... */
   19.18 -#define getid(x) (((x)>>1)&0x7fffffffffffffffLL)
   19.19 -#define putid(x) ((x)<<1)
   19.20 -#define writable(x) (((x)<<1)|1LL)
   19.21 -#define iswritable(x) ((x)&1LL)
   19.22 -#define ZERO 0LL
   19.23 -#define ONE 1LL
   19.24 -#define ONEMASK 0xffffffffffffffeLL
   19.25 -
   19.26 -#define RADIX_TREE_MAP_SHIFT 9
   19.27 -#define RADIX_TREE_MAP_MASK 0x1ff
   19.28 -#define RADIX_TREE_MAP_ENTRIES 512
   19.29 -
   19.30 -typedef uint64_t *radix_tree_node;
   19.31 -
   19.32 -
   19.33 -/*
   19.34 - * main api
   19.35 - * with these functions, the LSB of root always indicates
   19.36 - * whether or not the block is writable, including the return
   19.37 - * values of update and snapshot
   19.38 - */
   19.39 -uint64_t lookup(int height, uint64_t root, uint64_t key);
   19.40 -uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
   19.41 -uint64_t snapshot(uint64_t root);
   19.42 -int collapse(int height, uint64_t proot, uint64_t croot);
   19.43 -int isprivate(int height, uint64_t root, uint64_t key);
   19.44 -
   19.45 -
   19.46 -void __rcache_init(void);
   19.47 -
   19.48 -#endif /* __RADIX_H__ */
    20.1 --- a/tools/blktap/parallax/requests-async.c	Fri Jun 16 18:19:40 2006 +0100
    20.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.3 @@ -1,762 +0,0 @@
    20.4 -/* requests-async.c
    20.5 - *
    20.6 - * asynchronous request dispatcher for radix access in parallax.
    20.7 - */
    20.8 -
    20.9 -#include <stdio.h>
   20.10 -#include <stdlib.h>
   20.11 -#include <string.h>
   20.12 -#include <ctype.h>
   20.13 -#include <assert.h>
   20.14 -#include <pthread.h>
   20.15 -#include <err.h>
   20.16 -#include <zlib.h> /* for crc32() */
   20.17 -#include "requests-async.h"
   20.18 -#include "vdi.h"
   20.19 -#include "radix.h"
   20.20 -
   20.21 -#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18)
   20.22 -#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9)
   20.23 -#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL))
   20.24 -
   20.25 -
   20.26 -#if 0
   20.27 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   20.28 -#else
   20.29 -#define DPRINTF(_f, _a...) ((void)0)
   20.30 -#endif
   20.31 -
   20.32 -struct block_info {
   20.33 -    uint32_t        crc;
   20.34 -    uint32_t        unused;
   20.35 -};
   20.36 -
   20.37 -struct io_req {
   20.38 -    enum { IO_OP_READ, IO_OP_WRITE } op;
   20.39 -    uint64_t        root;
   20.40 -    uint64_t        vaddr;
   20.41 -    int        state;
   20.42 -    io_cb_t    cb;
   20.43 -    void      *param;
   20.44 -    struct radix_lock *lock;
   20.45 -
   20.46 -    /* internal stuff: */
   20.47 -    struct io_ret     retval;/* holds the return while we unlock. */
   20.48 -    char             *block; /* the block to write */
   20.49 -    radix_tree_node   radix[3];
   20.50 -    uint64_t               radix_addr[3];
   20.51 -    struct block_info bi;
   20.52 -};
   20.53 -
   20.54 -void clear_w_bits(radix_tree_node node) 
   20.55 -{
   20.56 -    int i;
   20.57 -    for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++)
   20.58 -        node[i] = node[i] & ONEMASK;
   20.59 -    return;
   20.60 -}
   20.61 -
   20.62 -void clear_L3_w_bits(radix_tree_node node) 
   20.63 -{
   20.64 -    int i;
   20.65 -    for (i=0; i<RADIX_TREE_MAP_ENTRIES; i+=2)
   20.66 -        node[i] = node[i] & ONEMASK;
   20.67 -    return;
   20.68 -}
   20.69 -
   20.70 -enum states {
   20.71 -    /* both */
   20.72 -    READ_L1,
   20.73 -    READ_L2,
   20.74 -    READ_L3,
   20.75 -
   20.76 -    /* read */
   20.77 -    READ_LOCKED,
   20.78 -    READ_DATA,
   20.79 -    READ_UNLOCKED,
   20.80 -    RETURN_ZERO,
   20.81 -
   20.82 -    /* write */
   20.83 -    WRITE_LOCKED,
   20.84 -    WRITE_DATA,
   20.85 -    WRITE_L3,
   20.86 -    WRITE_UNLOCKED,
   20.87 -    
   20.88 -    /* L3 Zero Path */
   20.89 -    ALLOC_DATA_L3z,
   20.90 -    WRITE_L3_L3z,
   20.91 -    
   20.92 -    /* L3 Fault Path */
   20.93 -    ALLOC_DATA_L3f,
   20.94 -    WRITE_L3_L3f,
   20.95 -    
   20.96 -    /* L2 Zero Path */
   20.97 -    ALLOC_DATA_L2z,
   20.98 -    WRITE_L2_L2z,
   20.99 -    ALLOC_L3_L2z,
  20.100 -    WRITE_L2_L3z,
  20.101 -    
  20.102 -    /* L2 Fault Path */
  20.103 -    READ_L3_L2f,
  20.104 -    ALLOC_DATA_L2f,
  20.105 -    WRITE_L2_L2f,
  20.106 -    ALLOC_L3_L2f,
  20.107 -    WRITE_L2_L3f,
  20.108 -
  20.109 -    /* L1 Zero Path */
  20.110 -    ALLOC_DATA_L1z,
  20.111 -    ALLOC_L3_L1z,
  20.112 -    ALLOC_L2_L1z,
  20.113 -    WRITE_L1_L1z,
  20.114 -
  20.115 -    /* L1 Fault Path */
  20.116 -    READ_L2_L1f,
  20.117 -    READ_L3_L1f,
  20.118 -    ALLOC_DATA_L1f,
  20.119 -    ALLOC_L3_L1f,
  20.120 -    ALLOC_L2_L1f,
  20.121 -    WRITE_L1_L1f,
  20.122 -    
  20.123 -};
  20.124 -
  20.125 -enum radix_offsets {
  20.126 -    L1 = 0, 
  20.127 -    L2 = 1,
  20.128 -    L3 = 2
  20.129 -};
  20.130 -
  20.131 -
  20.132 -static void read_cb(struct io_ret ret, void *param);
  20.133 -static void write_cb(struct io_ret ret, void *param);
  20.134 -
  20.135 -int vdi_read(vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param)
  20.136 -{
  20.137 -    struct io_req *req;
  20.138 -
  20.139 -    if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
  20.140 -    /* Every second line in the bottom-level radix tree is used to      */
  20.141 -    /* store crc32 values etc. We shift the vadder here to achied this. */
  20.142 -    vaddr <<= 1;
  20.143 -
  20.144 -    req = (struct io_req *)malloc(sizeof (struct io_req));
  20.145 -    if (req == NULL) return ERR_NOMEM;
  20.146 -
  20.147 -    req->radix[0] = req->radix[1] = req->radix[2] = NULL;	
  20.148 -    req->op    = IO_OP_READ;
  20.149 -    req->root  = vdi->radix_root;
  20.150 -    req->lock  = vdi->radix_lock; 
  20.151 -    req->vaddr = vaddr;
  20.152 -    req->cb    = cb;
  20.153 -    req->param = param;
  20.154 -    req->state = READ_LOCKED;
  20.155 -
  20.156 -    block_rlock(req->lock, L1_IDX(vaddr), read_cb, req);
  20.157 -	
  20.158 -    return 0;
  20.159 -}
  20.160 -
  20.161 -
  20.162 -int   vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, 
  20.163 -                io_cb_t cb, void *param)
  20.164 -{
  20.165 -    struct io_req *req;
  20.166 -
  20.167 -    if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
  20.168 -    /* Every second line in the bottom-level radix tree is used to      */
  20.169 -    /* store crc32 values etc. We shift the vadder here to achied this. */
  20.170 -    vaddr <<= 1;
  20.171 -
  20.172 -    req = (struct io_req *)malloc(sizeof (struct io_req));
  20.173 -    if (req == NULL) return ERR_NOMEM; 
  20.174 -
  20.175 -    req->radix[0] = req->radix[1] = req->radix[2] = NULL;
  20.176 -    req->op     = IO_OP_WRITE;
  20.177 -    req->root   = vdi->radix_root;
  20.178 -    req->lock   = vdi->radix_lock; 
  20.179 -    req->vaddr  = vaddr;
  20.180 -    req->block  = block;
  20.181 -    /* Todo: add a pseodoheader to the block to include some location   */
  20.182 -    /* information in the CRC as well.                                  */
  20.183 -    req->bi.crc = (uint32_t) crc32(0L, Z_NULL, 0); 
  20.184 -    req->bi.crc = (uint32_t) crc32(req->bi.crc, block, BLOCK_SIZE); 
  20.185 -    req->bi.unused = 0xdeadbeef;
  20.186 -
  20.187 -    req->cb     = cb;
  20.188 -    req->param  = param;
  20.189 -    req->radix_addr[L1] = getid(req->root); /* for consistency */
  20.190 -    req->state  = WRITE_LOCKED;
  20.191 -
  20.192 -    block_wlock(req->lock, L1_IDX(vaddr), write_cb, req);
  20.193 -
  20.194 -
  20.195 -    return 0;
  20.196 -}
  20.197 -
  20.198 -static void read_cb(struct io_ret ret, void *param)
  20.199 -{
  20.200 -    struct io_req *req = (struct io_req *)param;
  20.201 -    radix_tree_node node;
  20.202 -    uint64_t idx;
  20.203 -    char *block;
  20.204 -    void *req_param;
  20.205 -
  20.206 -    DPRINTF("read_cb\n");
  20.207 -    /* get record */
  20.208 -    switch(req->state) {
  20.209 -    	
  20.210 -    case READ_LOCKED: 
  20.211 -    
  20.212 -        DPRINTF("READ_LOCKED\n");
  20.213 -    	req->state = READ_L1;
  20.214 -    	block_read(getid(req->root), read_cb, req); 
  20.215 -    	break;
  20.216 -    	
  20.217 -    case READ_L1: /* block is the radix root */
  20.218 -
  20.219 -        DPRINTF("READ_L1\n");
  20.220 -        block = IO_BLOCK(ret);
  20.221 -        if (block == NULL) goto fail;
  20.222 -        node = (radix_tree_node) block;
  20.223 -        idx  = getid( node[L1_IDX(req->vaddr)] );
  20.224 -        free(block);
  20.225 -        if ( idx == ZERO ) {
  20.226 -            req->state = RETURN_ZERO;
  20.227 -            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  20.228 -        } else {
  20.229 -            req->state = READ_L2;
  20.230 -            block_read(idx, read_cb, req);
  20.231 -        }
  20.232 -        break;
  20.233 -
  20.234 -    case READ_L2:
  20.235 -
  20.236 -        DPRINTF("READ_L2\n");
  20.237 -        block = IO_BLOCK(ret);
  20.238 -        if (block == NULL) goto fail;
  20.239 -        node = (radix_tree_node) block;
  20.240 -        idx  = getid( node[L2_IDX(req->vaddr)] );
  20.241 -        free(block);
  20.242 -        if ( idx == ZERO ) {
  20.243 -            req->state = RETURN_ZERO;
  20.244 -            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  20.245 -        } else {
  20.246 -            req->state = READ_L3;
  20.247 -            block_read(idx, read_cb, req);
  20.248 -        }
  20.249 -        break;
  20.250 -
  20.251 -    case READ_L3:
  20.252 -    {
  20.253 -        struct block_info *bi;
  20.254 -
  20.255 -        DPRINTF("READ_L3\n");
  20.256 -        block = IO_BLOCK(ret);
  20.257 -        if (block == NULL) goto fail;
  20.258 -        node = (radix_tree_node) block;
  20.259 -        idx  = getid( node[L3_IDX(req->vaddr)] );
  20.260 -        bi = (struct block_info *) &node[L3_IDX(req->vaddr) + 1];
  20.261 -        req->bi = *bi;
  20.262 -        free(block);
  20.263 -        if ( idx == ZERO )  {
  20.264 -            req->state = RETURN_ZERO;
  20.265 -            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  20.266 -        } else {
  20.267 -            req->state = READ_DATA;
  20.268 -            block_read(idx, read_cb, req);
  20.269 -        }
  20.270 -        break;
  20.271 -    }
  20.272 -    case READ_DATA:
  20.273 -    {
  20.274 -        uint32_t crc;
  20.275 -
  20.276 -        DPRINTF("READ_DATA\n");
  20.277 -        block = IO_BLOCK(ret);
  20.278 -        if (block == NULL) goto fail;
  20.279 -
  20.280 -        /* crc check */
  20.281 -        crc = (uint32_t) crc32(0L, Z_NULL, 0); 
  20.282 -        crc = (uint32_t) crc32(crc, block, BLOCK_SIZE); 
  20.283 -        if (crc != req->bi.crc) {
  20.284 -            /* TODO: add a retry loop here.                          */
  20.285 -            /* Do this after the cache is added -- make sure to      */
  20.286 -            /* invalidate the bad page before reissuing the read.    */
  20.287 -
  20.288 -            warn("Bad CRC on vaddr (%Lu:%d)\n", req->vaddr, req->bi.unused);
  20.289 -#ifdef PRINT_BADCRC_PAGES
  20.290 -            {
  20.291 -                int j;
  20.292 -                for (j=0; j<BLOCK_SIZE; j++) {
  20.293 -                    if isprint(block[j]) {
  20.294 -                        printf("%c", block[j]);
  20.295 -                    } else {
  20.296 -                        printf(".");
  20.297 -                    }
  20.298 -                    if ((j % 64) == 0) printf("\n");
  20.299 -                }
  20.300 -            }
  20.301 -#endif /* PRINT_BADCRC_PAGES */
  20.302 -
  20.303 -            /* fast and loose for the moment. */
  20.304 -            /* goto fail;                     */
  20.305 -        }
  20.306 -
  20.307 -        req->retval = ret;
  20.308 -        req->state = READ_UNLOCKED;
  20.309 -        block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  20.310 -        break;
  20.311 -    }
  20.312 -    case READ_UNLOCKED:
  20.313 -    {
  20.314 -        struct io_ret r;
  20.315 -        io_cb_t cb;
  20.316 -        DPRINTF("READ_UNLOCKED\n");
  20.317 -        req_param = req->param;
  20.318 -        r         = req->retval;
  20.319 -        cb        = req->cb;
  20.320 -        free(req);
  20.321 -        cb(r, req_param);
  20.322 -        break;
  20.323 -    }
  20.324 -    
  20.325 -    case RETURN_ZERO:
  20.326 -    {
  20.327 -        struct io_ret r;
  20.328 -        io_cb_t cb;
  20.329 -        DPRINTF("RETURN_ZERO\n");
  20.330 -        req_param = req->param;
  20.331 -        cb        = req->cb;
  20.332 -        free(req);
  20.333 -        r.type = IO_BLOCK_T;
  20.334 -        r.u.b = newblock();
  20.335 -        cb(r, req_param);
  20.336 -        break;
  20.337 -    }
  20.338 -        
  20.339 -    default:
  20.340 -    	DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
  20.341 -    	goto fail;
  20.342 -    }
  20.343 - 
  20.344 -    return;
  20.345 -
  20.346 - fail:
  20.347 -    {
  20.348 -        struct io_ret r;
  20.349 -        io_cb_t cb;
  20.350 -        DPRINTF("asyn_read had a read error.\n");
  20.351 -        req_param = req->param;
  20.352 -        r         = ret;
  20.353 -        cb        = req->cb;
  20.354 -        free(req);
  20.355 -        cb(r, req_param);
  20.356 -    }
  20.357 -
  20.358 -
  20.359 -}
  20.360 -
  20.361 -static void write_cb(struct io_ret r, void *param)
  20.362 -{
  20.363 -    struct io_req *req = (struct io_req *)param;
  20.364 -    radix_tree_node node;
  20.365 -    uint64_t a, addr;
  20.366 -    void *req_param;
  20.367 -    struct block_info *bi;
  20.368 -
  20.369 -    switch(req->state) {
  20.370 -    	
  20.371 -    case WRITE_LOCKED:
  20.372 -        
  20.373 -        DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr));
  20.374 -    	req->state = READ_L1;
  20.375 -    	block_read(getid(req->root), write_cb, req); 
  20.376 -    	break;
  20.377 -    	
  20.378 -    case READ_L1: /* block is the radix root */
  20.379 -
  20.380 -        DPRINTF("READ_L1\n");
  20.381 -        node = (radix_tree_node) IO_BLOCK(r);
  20.382 -        if (node == NULL) goto fail;
  20.383 -        a    = node[L1_IDX(req->vaddr)];
  20.384 -        addr = getid(a);
  20.385 -
  20.386 -        req->radix_addr[L2] = addr;
  20.387 -        req->radix[L1] = node;
  20.388 -
  20.389 -        if ( addr == ZERO ) {
  20.390 -            /* L1 empty subtree: */
  20.391 -            req->state = ALLOC_DATA_L1z;
  20.392 -            block_alloc( req->block, write_cb, req );
  20.393 -        } else if ( !iswritable(a) ) {
  20.394 -            /* L1 fault: */
  20.395 -            req->state = READ_L2_L1f;
  20.396 -            block_read( addr, write_cb, req );
  20.397 -        } else {
  20.398 -            req->state = READ_L2;
  20.399 -            block_read( addr, write_cb, req );
  20.400 -        }
  20.401 -        break;
  20.402 -    
  20.403 -    case READ_L2:
  20.404 -
  20.405 -        DPRINTF("READ_L2\n");
  20.406 -        node = (radix_tree_node) IO_BLOCK(r);
  20.407 -        if (node == NULL) goto fail;
  20.408 -        a    = node[L2_IDX(req->vaddr)];
  20.409 -        addr = getid(a);
  20.410 -
  20.411 -        req->radix_addr[L3] = addr;
  20.412 -        req->radix[L2] = node;
  20.413 -
  20.414 -        if ( addr == ZERO ) {
  20.415 -            /* L2 empty subtree: */
  20.416 -            req->state = ALLOC_DATA_L2z;
  20.417 -            block_alloc( req->block, write_cb, req );
  20.418 -        } else if ( !iswritable(a) ) {
  20.419 -            /* L2 fault: */
  20.420 -            req->state = READ_L3_L2f;
  20.421 -            block_read( addr, write_cb, req );
  20.422 -        } else {
  20.423 -            req->state = READ_L3;
  20.424 -            block_read( addr, write_cb, req );
  20.425 -        }
  20.426 -        break;
  20.427 -    
  20.428 -    case READ_L3:
  20.429 -
  20.430 -        DPRINTF("READ_L3\n");
  20.431 -        node = (radix_tree_node) IO_BLOCK(r);
  20.432 -        if (node == NULL) goto fail;
  20.433 -        a    = node[L3_IDX(req->vaddr)];
  20.434 -        addr = getid(a);
  20.435 -
  20.436 -        req->radix[L3] = node;
  20.437 -
  20.438 -        if ( addr == ZERO ) {
  20.439 -            /* L3 fault: */
  20.440 -            req->state = ALLOC_DATA_L3z;
  20.441 -            block_alloc( req->block, write_cb, req );
  20.442 -        } else if ( !iswritable(a) ) {
  20.443 -            /* L3 fault: */
  20.444 -            req->state = ALLOC_DATA_L3f;
  20.445 -            block_alloc( req->block, write_cb, req );
  20.446 -        } else {
  20.447 -            req->state = WRITE_DATA;
  20.448 -            block_write( addr, req->block, write_cb, req );
  20.449 -        }
  20.450 -        break;
  20.451 -    
  20.452 -    case WRITE_DATA:
  20.453 -
  20.454 -        DPRINTF("WRITE_DATA\n");
  20.455 -        /* The L3 radix points to the correct block, we just need to  */
  20.456 -        /* update the crc.                                            */
  20.457 -        if (IO_INT(r) < 0) goto fail;
  20.458 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  20.459 -        req->bi.unused = 101;
  20.460 -        *bi = req->bi;
  20.461 -        req->state = WRITE_L3;
  20.462 -        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
  20.463 -        break;
  20.464 -    
  20.465 -    /* L3 Zero Path: */
  20.466 -
  20.467 -    case ALLOC_DATA_L3z:
  20.468 -
  20.469 -        DPRINTF("ALLOC_DATA_L3z\n");
  20.470 -        addr = IO_ADDR(r);
  20.471 -        a = writable(addr);
  20.472 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  20.473 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  20.474 -        req->bi.unused = 102;
  20.475 -        *bi = req->bi;
  20.476 -        req->state = WRITE_L3_L3z;
  20.477 -        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
  20.478 -        break;
  20.479 -    
  20.480 -    /* L3 Fault Path: */
  20.481 -
  20.482 -    case ALLOC_DATA_L3f:
  20.483 -    
  20.484 -        DPRINTF("ALLOC_DATA_L3f\n");
  20.485 -        addr = IO_ADDR(r);
  20.486 -        a = writable(addr);
  20.487 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  20.488 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  20.489 -        req->bi.unused = 103;
  20.490 -        *bi = req->bi;
  20.491 -        req->state = WRITE_L3_L3f;
  20.492 -        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
  20.493 -        break;
  20.494 -
  20.495 -    /* L2 Zero Path: */
  20.496 -        
  20.497 -    case ALLOC_DATA_L2z:
  20.498 -
  20.499 -        DPRINTF("ALLOC_DATA_L2z\n");
  20.500 -        addr = IO_ADDR(r);
  20.501 -        a = writable(addr);
  20.502 -        req->radix[L3] = newblock();
  20.503 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  20.504 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  20.505 -        req->bi.unused = 104;
  20.506 -        *bi = req->bi;
  20.507 -        req->state = ALLOC_L3_L2z;
  20.508 -        block_alloc( (char*)req->radix[L3], write_cb, req );
  20.509 -        break;
  20.510 -
  20.511 -    case ALLOC_L3_L2z:
  20.512 -
  20.513 -        DPRINTF("ALLOC_L3_L2z\n");
  20.514 -        addr = IO_ADDR(r);
  20.515 -        a = writable(addr);
  20.516 -        req->radix[L2][L2_IDX(req->vaddr)] = a;
  20.517 -        req->state = WRITE_L2_L2z;
  20.518 -        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
  20.519 -        break;
  20.520 -        
  20.521 -    /* L2 Fault Path: */
  20.522 -        
  20.523 -    case READ_L3_L2f:
  20.524 -    
  20.525 -    	DPRINTF("READ_L3_L2f\n");
  20.526 -        node = (radix_tree_node) IO_BLOCK(r);
  20.527 -        clear_L3_w_bits(node);
  20.528 -        if (node == NULL) goto fail;
  20.529 -        a    = node[L2_IDX(req->vaddr)];
  20.530 -        addr = getid(a);
  20.531 -
  20.532 -        req->radix[L3] = node;
  20.533 -        req->state = ALLOC_DATA_L2f;
  20.534 -        block_alloc( req->block, write_cb, req );
  20.535 -        break;
  20.536 -                
  20.537 -    case ALLOC_DATA_L2f:
  20.538 -
  20.539 -        DPRINTF("ALLOC_DATA_L2f\n");
  20.540 -        addr = IO_ADDR(r);
  20.541 -        a = writable(addr);
  20.542 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  20.543 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  20.544 -        req->bi.unused = 105;
  20.545 -        *bi = req->bi;
  20.546 -        req->state = ALLOC_L3_L2f;
  20.547 -        block_alloc( (char*)req->radix[L3], write_cb, req );
  20.548 -        break;
  20.549 -
  20.550 -    case ALLOC_L3_L2f:
  20.551 -
  20.552 -        DPRINTF("ALLOC_L3_L2f\n");
  20.553 -        addr = IO_ADDR(r);
  20.554 -        a = writable(addr);
  20.555 -        req->radix[L2][L2_IDX(req->vaddr)] = a;
  20.556 -        req->state = WRITE_L2_L2f;
  20.557 -        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
  20.558 -        break;
  20.559 -        
  20.560 -    /* L1 Zero Path: */
  20.561 -    
  20.562 -    case ALLOC_DATA_L1z:
  20.563 -
  20.564 -        DPRINTF("ALLOC_DATA_L1z\n");
  20.565 -        addr = IO_ADDR(r);
  20.566 -        a = writable(addr);
  20.567 -        req->radix[L3] = newblock();
  20.568 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  20.569 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  20.570 -        req->bi.unused = 106;
  20.571 -        *bi = req->bi;
  20.572 -        req->state = ALLOC_L3_L1z;
  20.573 -        block_alloc( (char*)req->radix[L3], write_cb, req );
  20.574 -        break;
  20.575 -        
  20.576 -    case ALLOC_L3_L1z:
  20.577 -
  20.578 -        DPRINTF("ALLOC_L3_L1z\n");
  20.579 -        addr = IO_ADDR(r);
  20.580 -        a = writable(addr);
  20.581 -        req->radix[L2] = newblock();
  20.582 -        req->radix[L2][L2_IDX(req->vaddr)] = a;
  20.583 -        req->state = ALLOC_L2_L1z;
  20.584 -        block_alloc( (char*)req->radix[L2], write_cb, req );
  20.585 -        break;
  20.586 -
  20.587 -    case ALLOC_L2_L1z:
  20.588 -
  20.589 -        DPRINTF("ALLOC_L2_L1z\n");
  20.590 -        addr = IO_ADDR(r);
  20.591 -        a = writable(addr);
  20.592 -        req->radix[L1][L1_IDX(req->vaddr)] = a;
  20.593 -        req->state = WRITE_L1_L1z;
  20.594 -        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
  20.595 -        break;
  20.596 -
  20.597 -    /* L1 Fault Path: */
  20.598 -        
  20.599 -    case READ_L2_L1f:
  20.600 -    
  20.601 -    	DPRINTF("READ_L2_L1f\n");
  20.602 -        node = (radix_tree_node) IO_BLOCK(r);
  20.603 -        clear_w_bits(node);
  20.604 -        if (node == NULL) goto fail;
  20.605 -        a    = node[L2_IDX(req->vaddr)];
  20.606 -        addr = getid(a);
  20.607 -
  20.608 -        req->radix_addr[L3] = addr;
  20.609 -        req->radix[L2] = node;
  20.610 -        
  20.611 -        if (addr == ZERO) {
  20.612 -            /* nothing below L2, create an empty L3 and alloc data. */
  20.613 -            /* (So skip READ_L3_L1f.) */
  20.614 -            req->radix[L3] = newblock();
  20.615 -            req->state = ALLOC_DATA_L1f;
  20.616 -            block_alloc( req->block, write_cb, req );
  20.617 -        } else {
  20.618 -            req->state = READ_L3_L1f;
  20.619 -            block_read( addr, write_cb, req );
  20.620 -        }
  20.621 -        break;
  20.622 -        
  20.623 -    case READ_L3_L1f:
  20.624 -    
  20.625 -    	DPRINTF("READ_L3_L1f\n");
  20.626 -        node = (radix_tree_node) IO_BLOCK(r);
  20.627 -        clear_L3_w_bits(node);
  20.628 -        if (node == NULL) goto fail;
  20.629 -        a    = node[L2_IDX(req->vaddr)];
  20.630 -        addr = getid(a);
  20.631 -
  20.632 -        req->radix[L3] = node;
  20.633 -        req->state = ALLOC_DATA_L1f;
  20.634 -        block_alloc( req->block, write_cb, req );
  20.635 -        break;
  20.636 -                
  20.637 -    case ALLOC_DATA_L1f:
  20.638 -
  20.639 -        DPRINTF("ALLOC_DATA_L1f\n");
  20.640 -        addr = IO_ADDR(r);
  20.641 -        a = writable(addr);
  20.642 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  20.643 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  20.644 -        req->bi.unused = 107;
  20.645 -        *bi = req->bi;
  20.646 -        req->state = ALLOC_L3_L1f;
  20.647 -        block_alloc( (char*)req->radix[L3], write_cb, req );
  20.648 -        break;
  20.649 -
  20.650 -    case ALLOC_L3_L1f:
  20.651 -
  20.652 -        DPRINTF("ALLOC_L3_L1f\n");
  20.653 -        addr = IO_ADDR(r);
  20.654 -        a = writable(addr);
  20.655 -        req->radix[L2][L2_IDX(req->vaddr)] = a;
  20.656 -        req->state = ALLOC_L2_L1f;
  20.657 -        block_alloc( (char*)req->radix[L2], write_cb, req );
  20.658 -        break;
  20.659 -
  20.660 -    case ALLOC_L2_L1f:
  20.661 -
  20.662 -        DPRINTF("ALLOC_L2_L1f\n");
  20.663 -        addr = IO_ADDR(r);
  20.664 -        a = writable(addr);
  20.665 -        req->radix[L1][L1_IDX(req->vaddr)] = a;
  20.666 -        req->state = WRITE_L1_L1f;
  20.667 -        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
  20.668 -        break;
  20.669 -
  20.670 -    case WRITE_L3:
  20.671 -    case WRITE_L3_L3z:
  20.672 -    case WRITE_L3_L3f:
  20.673 -    case WRITE_L2_L2z:
  20.674 -    case WRITE_L2_L2f:
  20.675 -    case WRITE_L1_L1z:
  20.676 -    case WRITE_L1_L1f:
  20.677 -    {
  20.678 -    	int i;
  20.679 -        DPRINTF("DONE\n");
  20.680 -        /* free any saved node vals. */
  20.681 -        for (i=0; i<3; i++)
  20.682 -            if (req->radix[i] != 0) free(req->radix[i]);
  20.683 -        req->retval = r;
  20.684 -        req->state = WRITE_UNLOCKED;
  20.685 -        block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req);
  20.686 -        break;
  20.687 -    }
  20.688 -    case WRITE_UNLOCKED:
  20.689 -    {
  20.690 -        struct io_ret r;
  20.691 -        io_cb_t cb;
  20.692 -        DPRINTF("WRITE_UNLOCKED!\n");
  20.693 -        req_param = req->param;
  20.694 -        r         = req->retval;
  20.695 -        cb        = req->cb;
  20.696 -        free(req);
  20.697 -        cb(r, req_param);
  20.698 -        break;
  20.699 -    }
  20.700 -        
  20.701 -    default:
  20.702 -    	DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
  20.703 -    	goto fail;
  20.704 -    }
  20.705 -    
  20.706 -    return;
  20.707 -    
  20.708 - fail:
  20.709 -    {
  20.710 -        struct io_ret r;
  20.711 -        io_cb_t cb;
  20.712 -        int i;
  20.713 -
  20.714 -        DPRINTF("asyn_write had a read error mid-way.\n");
  20.715 -        req_param = req->param;
  20.716 -        cb        = req->cb;
  20.717 -        r.type = IO_INT_T;
  20.718 -        r.u.i  = -1;
  20.719 -        /* free any saved node vals. */
  20.720 -        for (i=0; i<3; i++)
  20.721 -            free(req->radix[i]);
  20.722 -        free(req);
  20.723 -        cb(r, req_param);
  20.724 -    }
  20.725 -}
  20.726 -
  20.727 -char *vdi_read_s(vdi_t *vdi, uint64_t vaddr)
  20.728 -{
  20.729 -    pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
  20.730 -    char *block = NULL;
  20.731 -    int ret;
  20.732 -
  20.733 -    void reads_cb(struct io_ret r, void *param) 
  20.734 -    {
  20.735 -        block = IO_BLOCK(r);
  20.736 -        pthread_mutex_unlock((pthread_mutex_t *)param);
  20.737 -    }
  20.738 -
  20.739 -    pthread_mutex_lock(&m);
  20.740 -    ret = vdi_read(vdi, vaddr, reads_cb, &m);
  20.741 -
  20.742 -    if (ret == 0) pthread_mutex_lock(&m);
  20.743 -    
  20.744 -    return block;
  20.745 -}
  20.746 -
  20.747 -
  20.748 -int vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block)
  20.749 -{
  20.750 -    pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
  20.751 -    int ret, result;
  20.752 -
  20.753 -    void writes_cb(struct io_ret r, void *param) 
  20.754 -    {
  20.755 -        result = IO_INT(r);
  20.756 -        pthread_mutex_unlock((pthread_mutex_t *)param);
  20.757 -    }
  20.758 -
  20.759 -    pthread_mutex_lock(&m);
  20.760 -    ret = vdi_write(vdi, vaddr, block, writes_cb, &m);
  20.761 -
  20.762 -    if (ret == 0) pthread_mutex_lock(&m);
  20.763 -    
  20.764 -    return result;
  20.765 -}
    21.1 --- a/tools/blktap/parallax/requests-async.h	Fri Jun 16 18:19:40 2006 +0100
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,29 +0,0 @@
    21.4 -#ifndef _REQUESTSASYNC_H_
    21.5 -#define _REQUESTSASYNC_H_
    21.6 -
    21.7 -#include "block-async.h"
    21.8 -#include "blockstore.h" /* for newblock etc. */
    21.9 -
   21.10 -/*
   21.11 -#define BLOCK_SIZE 4096
   21.12 -#define ZERO 0ULL
   21.13 -#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU)
   21.14 -#define iswritable(x) (((x) & 1LLU) != 0)
   21.15 -#define writable(x) (((x) << 1) | 1LLU)
   21.16 -#define readonly(x) ((uint64_t)((x) << 1))
   21.17 -*/
   21.18 -
   21.19 -#define VADDR_MASK 0x0000000003ffffffLLU /* 26-bits = 256Gig */
   21.20 -#define VALID_VADDR(x) (((x) & VADDR_MASK) == (x))
   21.21 -
   21.22 -int vdi_read (vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param);
   21.23 -int vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, io_cb_t cb, void *param);
   21.24 -             
   21.25 -/* synchronous versions: */
   21.26 -char *vdi_read_s (vdi_t *vdi, uint64_t vaddr);
   21.27 -int   vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block);
   21.28 -
   21.29 -#define ERR_BAD_VADDR  -1
   21.30 -#define ERR_NOMEM      -2
   21.31 -
   21.32 -#endif //_REQUESTSASYNC_H_
    22.1 --- a/tools/blktap/parallax/snaplog.c	Fri Jun 16 18:19:40 2006 +0100
    22.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.3 @@ -1,238 +0,0 @@
    22.4 -/**************************************************************************
    22.5 - * 
    22.6 - * snaplog.c
    22.7 - *
    22.8 - * Snapshot log on-disk data structure.
    22.9 - *
   22.10 - */
   22.11 - 
   22.12 - /* VDI histories are made from chains of snapshot logs.  These logs record 
   22.13 -  * the (radix) root and timestamp of individual snapshots.
   22.14 -  *
   22.15 -  * creation of a new VDI involves 'forking' a snapshot log, by creating a 
   22.16 -  * new, empty log (in a new VDI) and parenting it off of a record in an 
   22.17 -  * existing snapshot log.
   22.18 -  *
   22.19 -  * snapshot log blocks have at most one writer.
   22.20 -  */
   22.21 -
   22.22 -#include <stdio.h>
   22.23 -#include <stdlib.h>
   22.24 -#include <sys/time.h>
   22.25 -#include "blockstore.h"
   22.26 -#include "snaplog.h"
   22.27 -
   22.28 -
   22.29 -
   22.30 -snap_block_t *snap_get_block(uint64_t block)
   22.31 -{
   22.32 -    snap_block_t *blk = (snap_block_t *)readblock(block);
   22.33 -    
   22.34 -    if ( blk == NULL)
   22.35 -        return NULL;
   22.36 -    if ( blk->hdr.magic != SNAP_MAGIC ) {
   22.37 -        freeblock(blk);
   22.38 -        return NULL;
   22.39 -    }
   22.40 -    
   22.41 -    return blk;
   22.42 -}
   22.43 -    
   22.44 -int snap_get_id(snap_id_t *id, snap_rec_t *target)
   22.45 -{
   22.46 -    snap_block_t *blk;
   22.47 -    
   22.48 -    if ( id == NULL )
   22.49 -        return -1;
   22.50 -    
   22.51 -    blk = snap_get_block(id->block);
   22.52 -    
   22.53 -    if ( blk == NULL ) 
   22.54 -        return -1;
   22.55 -    
   22.56 -    if ( id->index > blk->hdr.nr_entries ) {
   22.57 -        freeblock(blk);
   22.58 -        return -1;
   22.59 -    }
   22.60 -    
   22.61 -    *target = blk->snaps[id->index];
   22.62 -    freeblock(blk);
   22.63 -    return 0;
   22.64 -}
   22.65 -
   22.66 -int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id,
   22.67 -                                  snap_id_t *new_id)
   22.68 -{
   22.69 -    snap_rec_t parent_rec, fork_rec;
   22.70 -    snap_block_t *blk, *pblk;
   22.71 -    /*
   22.72 -    if ( (parent_id != NULL) && (snap_get_id(parent_id, &parent_rec) != 0) )
   22.73 -        return -1;    
   22.74 -    
   22.75 -    if ( (fork_id != NULL) && (snap_get_id(fork_id, &fork_rec) != 0) )
   22.76 -        return -1;   
   22.77 -*/
   22.78 -    blk = (snap_block_t *)newblock();
   22.79 -    blk->hdr.magic  = SNAP_MAGIC;
   22.80 -    blk->hdr.nr_entries  = 0;
   22.81 -    blk->hdr.log_entries = 0;
   22.82 -    blk->hdr.immutable   = 0;
   22.83 -    
   22.84 -    if (   (parent_id  != NULL) 
   22.85 -        && (parent_id->block != fork_id->block) 
   22.86 -        && (parent_id->block != 0)) {
   22.87 -        
   22.88 -        pblk = snap_get_block(parent_id->block);
   22.89 -        blk->hdr.log_entries = pblk->hdr.log_entries;
   22.90 -        freeblock(pblk);
   22.91 -    }
   22.92 -    
   22.93 -    if (parent_id != NULL) {
   22.94 -        blk->hdr.parent_block = *parent_id;
   22.95 -        blk->hdr.fork_block   = *fork_id;
   22.96 -    } else {
   22.97 -        blk->hdr.parent_block = null_snap_id;
   22.98 -        blk->hdr.fork_block   = null_snap_id;
   22.99 -    }
  22.100 -    
  22.101 -    new_id->index = 0;
  22.102 -    new_id->block = allocblock(blk);
  22.103 -    freeblock(blk);
  22.104 -    if (new_id->block == 0)
  22.105 -        return -1;
  22.106 -    
  22.107 -    return 0;
  22.108 -}
  22.109 -
  22.110 -int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id)
  22.111 -{
  22.112 -    return __snap_block_create(parent_id, parent_id, new_id);
  22.113 -}
  22.114 -
  22.115 -int snap_append(snap_id_t *old_id, snap_rec_t *rec, snap_id_t *new_id)
  22.116 -{
  22.117 -    snap_id_t id = *old_id;
  22.118 -    snap_block_t *blk = snap_get_block(id.block);
  22.119 -    
  22.120 -    if ( rec->deleted == 1 ) {
  22.121 -        printf("Attempt to append a deleted snapshot!\n");
  22.122 -        return -1;
  22.123 -    }
  22.124 -    
  22.125 -    if ( blk->hdr.immutable != 0 ) {
  22.126 -        printf("Attempt to snap an immutable snap block!\n");
  22.127 -        return -1;
  22.128 -    }
  22.129 -    
  22.130 -    new_id->block = id.block;
  22.131 -    
  22.132 -    if (blk->hdr.nr_entries == SNAPS_PER_BLOCK) {
  22.133 -        int ret;
  22.134 -        
  22.135 -        id.index--; /* make id point to the last full record */
  22.136 -        
  22.137 -        ret = __snap_block_create(&id, &blk->hdr.fork_block, new_id);
  22.138 -        if ( ret != 0 ) {
  22.139 -            freeblock(blk);
  22.140 -            return -1;
  22.141 -        }
  22.142 -        
  22.143 -        blk->hdr.immutable = 1;
  22.144 -        writeblock(id.block, blk);
  22.145 -        freeblock(blk);
  22.146 -        blk = snap_get_block(new_id->block);
  22.147 -        id = *new_id;
  22.148 -    }
  22.149 -    
  22.150 -    blk->snaps[blk->hdr.nr_entries] = *rec;
  22.151 -    blk->hdr.nr_entries++;
  22.152 -    blk->hdr.log_entries++;
  22.153 -    new_id->index = blk->hdr.nr_entries;
  22.154 -    //printf("snap: %u %u\n", blk->hdr.nr_entries, blk->hdr.log_entries);
  22.155 -    writeblock(id.block, blk);
  22.156 -    freeblock(blk);
  22.157 -    return 0;
  22.158 -}
  22.159 -
  22.160 -int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id)
  22.161 -{
  22.162 -    snap_block_t *p_blk, *c_blk, *blk;
  22.163 -    snap_rec_t   *p_rec, *c_rec;
  22.164 -    int ret = -1;
  22.165 -    
  22.166 -    p_blk = snap_get_block(p_id->block);
  22.167 -    
  22.168 -    if (p_blk == NULL) return(-1);
  22.169 -    
  22.170 -    if (c_id->block == p_id->block)
  22.171 -    {
  22.172 -        c_blk = p_blk;
  22.173 -    } else {
  22.174 -         c_blk = snap_get_block(c_id->block);
  22.175 -    }
  22.176 -    
  22.177 -    if (p_blk == NULL) {
  22.178 -        freeblock(p_blk);
  22.179 -        return(-1);
  22.180 -    }
  22.181 -     
  22.182 -    /* parent and child must not be deleted. */
  22.183 -    p_rec = &p_blk->snaps[p_id->index];
  22.184 -    c_rec = &c_blk->snaps[c_id->index];
  22.185 -    /*
  22.186 -    if ( (p_rec->deleted == 1) || (c_rec->deleted == 1) ) {
  22.187 -        printf("One of those snaps is already deleted.\n");
  22.188 -        goto done;
  22.189 -    }
  22.190 -    */
  22.191 -    /* first non-deleted thing in the log before child must be parent. */
  22.192 -    
  22.193 -    /* XXX todo: text the range here for delete (and eventually fork) bits) */
  22.194 -    /* for now, snaps must be consecutive, on the same log page: */
  22.195 -    
  22.196 -    if ((p_id->block != c_id->block) || (p_id->index != c_id->index-1))
  22.197 -    {
  22.198 -        printf("Deleting non-consecutive snaps is not done yet.\n");
  22.199 -        goto done;
  22.200 -    }
  22.201 -    
  22.202 -    /* mark parent as deleted XXX: may need to lock parent block here.*/
  22.203 -    p_rec->deleted = 1;
  22.204 -    writeblock(p_id->block, p_blk);
  22.205 -    
  22.206 -    /* delete the parent */
  22.207 -    printf("collapse(%Ld, %Ld)\n", p_rec->radix_root, c_rec->radix_root);
  22.208 -    ret = collapse(height, p_rec->radix_root, c_rec->radix_root);
  22.209 -    
  22.210 -    /* return the number of blocks reclaimed. */
  22.211 -    
  22.212 -done:
  22.213 -    if (c_blk != p_blk) freeblock(c_blk);
  22.214 -    freeblock(p_blk);
  22.215 -    
  22.216 -    return(ret);
  22.217 -}
  22.218 -
  22.219 -void snap_print_history(snap_id_t *snap_id)
  22.220 -{
  22.221 -    snap_id_t id = *snap_id;
  22.222 -    unsigned int idx = id.index;
  22.223 -    snap_block_t *new_blk, *blk = snap_get_block(id.block);
  22.224 -    
  22.225 -    while ( blk ) {
  22.226 -        printf("[Snap block %Ld]:\n", id.block);
  22.227 -        do {
  22.228 -            printf("   %03u: root: %Ld ts: %ld.%ld\n", idx, 
  22.229 -                    blk->snaps[idx].radix_root,
  22.230 -                    blk->snaps[idx].timestamp.tv_sec,
  22.231 -                    blk->snaps[idx].timestamp.tv_usec);
  22.232 -        } while (idx-- != 0);
  22.233 -        
  22.234 -        id = blk->hdr.parent_block;
  22.235 -        if (id.block != 0) {
  22.236 -            new_blk = snap_get_block(id.block);
  22.237 -        }
  22.238 -        freeblock(blk);
  22.239 -        blk = new_blk;
  22.240 -    }
  22.241 -}
    23.1 --- a/tools/blktap/parallax/snaplog.h	Fri Jun 16 18:19:40 2006 +0100
    23.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.3 @@ -1,61 +0,0 @@
    23.4 -/**************************************************************************
    23.5 - * 
    23.6 - * snaplog.h
    23.7 - *
    23.8 - * Snapshot log on-disk data structure.
    23.9 - *
   23.10 - */
   23.11 - 
   23.12 -#include "radix.h"
   23.13 -#include "blockstore.h"    /* for BLOCK_SIZE */
   23.14 - 
   23.15 -#ifndef __SNAPLOG_H__
   23.16 -#define __SNAPLOG_H__
   23.17 -
   23.18 -typedef struct snap_id {
   23.19 -    uint64_t            block;
   23.20 -    unsigned int   index;
   23.21 -} snap_id_t;
   23.22 -
   23.23 -typedef struct snap_rec {
   23.24 -    uint64_t            radix_root;
   23.25 -    struct timeval timestamp;
   23.26 -    /* flags: */
   23.27 -    unsigned       deleted:1;
   23.28 -} snap_rec_t;
   23.29 -
   23.30 -
   23.31 -int  snap_block_create(snap_id_t *parent_id, snap_id_t *new_id);
   23.32 -int  snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id);
   23.33 -int  snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id);
   23.34 -void snap_print_history(snap_id_t *snap_id);
   23.35 -int  snap_get_id(snap_id_t *id, snap_rec_t *target);
   23.36 -
   23.37 -
   23.38 -/* exported for vdi debugging */
   23.39 -#define SNAP_MAGIC 0xff00ff0aa0ff00ffLL
   23.40 -
   23.41 -static const snap_id_t null_snap_id = { 0, 0 }; 
   23.42 -
   23.43 -typedef struct snap_block_hdr {
   23.44 -    uint64_t            magic;
   23.45 -    snap_id_t      parent_block; /* parent block within this chain */
   23.46 -    snap_id_t      fork_block;   /* where this log was forked */
   23.47 -    unsigned       log_entries;  /* total entries since forking */
   23.48 -    unsigned short nr_entries;   /* entries in snaps[] */
   23.49 -    unsigned short immutable;    /* has this snap page become immutable? */
   23.50 -} snap_block_hdr_t;
   23.51 -
   23.52 -
   23.53 -#define SNAPS_PER_BLOCK \
   23.54 -    ((BLOCK_SIZE - sizeof(snap_block_hdr_t)) / sizeof(snap_rec_t))
   23.55 -
   23.56 -typedef struct snap_block {
   23.57 -    snap_block_hdr_t hdr;
   23.58 -    snap_rec_t       snaps[SNAPS_PER_BLOCK];
   23.59 -} snap_block_t;
   23.60 -    
   23.61 -
   23.62 -snap_block_t *snap_get_block(uint64_t block);
   23.63 -
   23.64 -#endif /* __SNAPLOG_H__ */
    24.1 --- a/tools/blktap/parallax/vdi.c	Fri Jun 16 18:19:40 2006 +0100
    24.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.3 @@ -1,367 +0,0 @@
    24.4 -/**************************************************************************
    24.5 - * 
    24.6 - * vdi.c
    24.7 - *
    24.8 - * Virtual Disk Image (VDI) Interfaces
    24.9 - *
   24.10 - */
   24.11 - 
   24.12 -#include <stdio.h>
   24.13 -#include <stdlib.h>
   24.14 -#include <fcntl.h>
   24.15 -#include <string.h>
   24.16 -#include <sys/time.h>
   24.17 -#include <pthread.h>
   24.18 -#include "blockstore.h"
   24.19 -#include "block-async.h"
   24.20 -#include "requests-async.h"
   24.21 -#include "radix.h"
   24.22 -#include "vdi.h"
   24.23 -                    
   24.24 -#define VDI_REG_BLOCK   2LL
   24.25 -#define VDI_RADIX_ROOT  writable(3)
   24.26 -                                                            
   24.27 -#if 0
   24.28 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   24.29 -#else
   24.30 -#define DPRINTF(_f, _a...) ((void)0)
   24.31 -#endif
   24.32 -
   24.33 -/* I haven't decided about this registry stuff, so this is just a really
   24.34 - * quick lash-up so that there is some way to track VDIs.
   24.35 - *
   24.36 - * (Most vdi access should be with a direct handle to the block, so this
   24.37 - *  registry is just for start-of-day lookup and other control operations.)
   24.38 - */
   24.39 -
   24.40 -vdi_registry_t *create_vdi_registry(void)
   24.41 -{
   24.42 -    vdi_registry_t *reg = (vdi_registry_t *)newblock();
   24.43 -    
   24.44 -    if (reg == NULL)
   24.45 -        return NULL;
   24.46 -    
   24.47 -    /* zero-fill the vdi radix root while we have an empty block. */
   24.48 -    writeblock(VDI_RADIX_ROOT, (void *)reg);
   24.49 -    
   24.50 -    
   24.51 -    DPRINTF("[vdi.c] Creating VDI registry!\n");
   24.52 -    reg->magic      = VDI_REG_MAGIC;
   24.53 -    reg->nr_vdis    = 0;
   24.54 -    
   24.55 -    writeblock(VDI_REG_BLOCK, (void *)reg);
   24.56 -    
   24.57 -    return reg;
   24.58 -}
   24.59 -    
   24.60 -vdi_registry_t *get_vdi_registry(void)
   24.61 -{
   24.62 -    vdi_registry_t *vdi_reg = (vdi_registry_t *)readblock(VDI_REG_BLOCK);
   24.63 -    
   24.64 -    if ( vdi_reg == NULL )
   24.65 -        vdi_reg = create_vdi_registry();
   24.66 -    
   24.67 -    if ( vdi_reg->magic != VDI_REG_MAGIC ) {
   24.68 -        freeblock(vdi_reg);
   24.69 -        return NULL;
   24.70 -    }
   24.71 -    
   24.72 -    return vdi_reg;
   24.73 -}
   24.74 -
   24.75 -
   24.76 -vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
   24.77 -{
   24.78 -    int ret;
   24.79 -    vdi_t *vdi;
   24.80 -    vdi_registry_t *vdi_reg;
   24.81 -    snap_rec_t snap_rec;
   24.82 -    
   24.83 -    /* create a vdi struct */
   24.84 -    vdi = newblock();
   24.85 -    if (vdi == NULL) 
   24.86 -        return NULL;
   24.87 -    
   24.88 -    if ( snap_get_id(parent_snap, &snap_rec) == 0 ) {
   24.89 -        vdi->radix_root = snapshot(snap_rec.radix_root);
   24.90 -    } else {
   24.91 -        vdi->radix_root = allocblock((void *)vdi); /* vdi is just zeros here */
   24.92 -        vdi->radix_root = writable(vdi->radix_root); /* grr. */
   24.93 -    }
   24.94 -    
   24.95 -    /* create a snapshot log, and add it to the vdi struct */
   24.96 -    
   24.97 -    ret = snap_block_create(parent_snap, &vdi->snap);
   24.98 -    if ( ret != 0 ) {
   24.99 -        DPRINTF("Error getting snap block in vdi_create.\n");
  24.100 -        freeblock(vdi);
  24.101 -        return NULL;
  24.102 -    }
  24.103 -            
  24.104 -    /* append the vdi to the registry, fill block and id.             */
  24.105 -    /* implicit allocation means we have to write the vdi twice here. */
  24.106 -    vdi_reg    = get_vdi_registry();
  24.107 -    if ( vdi_reg == NULL ) {
  24.108 -        freeblock(vdi);
  24.109 -        return NULL;
  24.110 -    }
  24.111 -    
  24.112 -    vdi->block = allocblock((void *)vdi);
  24.113 -    vdi->id    = vdi_reg->nr_vdis++;
  24.114 -    strncpy(vdi->name, name, VDI_NAME_SZ);
  24.115 -    vdi->name[VDI_NAME_SZ] = '\0';
  24.116 -    vdi->radix_lock = NULL; /* for tidiness */
  24.117 -    writeblock(vdi->block, (void *)vdi);
  24.118 -    
  24.119 -    update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
  24.120 -    writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
  24.121 -    freeblock(vdi_reg);
  24.122 -    
  24.123 -    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
  24.124 -    if (vdi->radix_lock == NULL) 
  24.125 -    {
  24.126 -    	perror("couldn't malloc radix_lock for new vdi!");
  24.127 -    	freeblock(vdi);
  24.128 -    	return NULL;
  24.129 -    }
  24.130 -    radix_lock_init(vdi->radix_lock);
  24.131 -    
  24.132 -    return vdi;
  24.133 -}
  24.134 -
  24.135 -/* vdi_get and vdi_put currently act more like alloc/free -- they don't 
  24.136 - * do refcount-based allocation.  
  24.137 - */
  24.138 -vdi_t *vdi_get(uint64_t vdi_id)
  24.139 -{
  24.140 -    uint64_t vdi_blk;
  24.141 -    vdi_t *vdi;
  24.142 -    
  24.143 -    vdi_blk = lookup(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi_id);
  24.144 -    
  24.145 -    if ( vdi_blk == 0 )
  24.146 -        return NULL;
  24.147 -    
  24.148 -    vdi = (vdi_t *)readblock(vdi_blk);
  24.149 -    
  24.150 -    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
  24.151 -    if (vdi->radix_lock == NULL) 
  24.152 -    {
  24.153 -    	perror("couldn't malloc radix_lock for new vdi!");
  24.154 -    	freeblock(vdi);
  24.155 -    	return NULL;
  24.156 -    }
  24.157 -    radix_lock_init(vdi->radix_lock);
  24.158 -    
  24.159 -    return vdi;
  24.160 -}
  24.161 -
  24.162 -void vdi_put(vdi_t *vdi)
  24.163 -{
  24.164 -    free(vdi->radix_lock);
  24.165 -    freeblock(vdi);
  24.166 -}
  24.167 -
  24.168 -void vdi_snapshot(vdi_t *vdi)
  24.169 -{
  24.170 -    snap_rec_t rec;
  24.171 -    int ret;
  24.172 -    
  24.173 -    rec.radix_root = vdi->radix_root;
  24.174 -    gettimeofday(&rec.timestamp, NULL);
  24.175 -    rec.deleted = 0;
  24.176 -    
  24.177 -    vdi->radix_root = snapshot(vdi->radix_root);
  24.178 -    ret = snap_append(&vdi->snap, &rec, &vdi->snap);
  24.179 -    if ( ret != 0 ) {
  24.180 -        printf("snap_append returned failure\n");
  24.181 -        return;
  24.182 -    }
  24.183 -    writeblock(vdi->block, vdi);
  24.184 -}
  24.185 -    
  24.186 -int __init_vdi()
  24.187 -{
  24.188 -    /* sneak this in here for the moment. */
  24.189 -    __rcache_init();
  24.190 -    
  24.191 -    /* force the registry to be created if it doesn't exist. */
  24.192 -    vdi_registry_t *vdi_reg = get_vdi_registry();
  24.193 -    if (vdi_reg == NULL) {
  24.194 -        printf("[vdi.c] Couldn't get/create a VDI registry!\n");
  24.195 -        return -1;
  24.196 -    }
  24.197 -    freeblock(vdi_reg);
  24.198 -    
  24.199 -    
  24.200 -    return 0;
  24.201 -}
  24.202 -    
  24.203 -#ifdef VDI_STANDALONE
  24.204 -
  24.205 -#define TEST_VDIS      50
  24.206 -#define NR_ITERS    50000
  24.207 -#define FORK_POINTS   200
  24.208 -#define INIT_VDIS       3
  24.209 -#define INIT_SNAPS     40
  24.210 -
  24.211 -/* These must be of decreasing size: */
  24.212 -#define NEW_FORK       (RAND_MAX-(RAND_MAX/1000))
  24.213 -#define NEW_ROOT_VDI   (RAND_MAX-((RAND_MAX/1000)*2))
  24.214 -#define NEW_FORK_VDI   (RAND_MAX-((RAND_MAX/1000)*3))
  24.215 -
  24.216 -#define GRAPH_DOT_FILE "vdi.dot"
  24.217 -#define GRAPH_PS_FILE  "vdi.ps"
  24.218 -
  24.219 -
  24.220 -typedef struct sh_st {
  24.221 -    snap_id_t     id;
  24.222 -    struct sh_st *next;
  24.223 -} sh_t;
  24.224 -
  24.225 -#define SNAP_HASHSZ 1024
  24.226 -sh_t *node_hash[SNAP_HASHSZ];
  24.227 -#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
  24.228 -
  24.229 -#define SNAPID_EQUAL(_a,_b) \
  24.230 -    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
  24.231 -int sh_check_and_add(snap_id_t *id)
  24.232 -{
  24.233 -    sh_t **s = &node_hash[SNAP_HASH(id)];
  24.234 -    
  24.235 -    while (*s != NULL) {
  24.236 -        if (SNAPID_EQUAL(&((*s)->id), id))
  24.237 -            return 1;
  24.238 -        *s = (*s)->next;
  24.239 -    }
  24.240 -    
  24.241 -    *s = (sh_t *)malloc(sizeof(sh_t));
  24.242 -    (*s)->id = *id;
  24.243 -    (*s)->next = NULL;
  24.244 -    
  24.245 -    return 0;
  24.246 -}
  24.247 -
  24.248 -int main(int argc, char *argv[])
  24.249 -{
  24.250 -    vdi_t *vdi_list[TEST_VDIS];
  24.251 -    snap_id_t id, fork_points[FORK_POINTS];
  24.252 -    int nr_vdis = 0, nr_forks = 0;
  24.253 -    int i, j, r;
  24.254 -    FILE *f;
  24.255 -    char name[VDI_NAME_SZ];
  24.256 -    
  24.257 -    __init_blockstore();
  24.258 -    __init_vdi();
  24.259 -    
  24.260 -    printf("[o] Generating seed VDIs. (%d VDIs)\n", INIT_VDIS);
  24.261 -    
  24.262 -    for (i=0; i<INIT_VDIS; i++) {
  24.263 -        r=rand();
  24.264 -        
  24.265 -        sprintf(name, "VDI Number %d", nr_vdis);
  24.266 -        vdi_list[i] = vdi_create(NULL, name);
  24.267 -        for (j=0; j<(r%INIT_SNAPS); j++)
  24.268 -            vdi_snapshot(vdi_list[i]);
  24.269 -        fork_points[i] = vdi_list[i]->snap;
  24.270 -        nr_vdis++;
  24.271 -        nr_forks++;
  24.272 -    }
  24.273 -    
  24.274 -    printf("[o] Running a random workload. (%d iterations)\n", NR_ITERS);
  24.275 -            
  24.276 -    for (i=0; i<NR_ITERS; i++) {
  24.277 -        r = rand();
  24.278 -        
  24.279 -        if ( r > NEW_FORK ) {
  24.280 -            if ( nr_forks > FORK_POINTS )
  24.281 -                continue;
  24.282 -            id = vdi_list[r%nr_vdis]->snap;
  24.283 -            if ( ( id.block == 0 ) || ( id.index == 0 ) )
  24.284 -                continue;
  24.285 -            id.index--;
  24.286 -            fork_points[nr_forks++] = id;
  24.287 -            
  24.288 -        } else if ( r > NEW_ROOT_VDI ) {
  24.289 -            
  24.290 -            if ( nr_vdis == TEST_VDIS )
  24.291 -                continue;
  24.292 -            
  24.293 -            sprintf(name, "VDI Number %d.", nr_vdis);
  24.294 -            vdi_list[nr_vdis++] = vdi_create(NULL, name);
  24.295 -            
  24.296 -        } else if ( r > NEW_FORK_VDI ) {
  24.297 -            
  24.298 -            if ( nr_vdis == TEST_VDIS )
  24.299 -                continue;
  24.300 -            
  24.301 -            sprintf(name, "VDI Number %d.", nr_vdis);
  24.302 -            vdi_list[nr_vdis++] = vdi_create(&fork_points[r%nr_forks], name);
  24.303 -            
  24.304 -        } else /* SNAPSHOT */ {
  24.305 -            
  24.306 -            vdi_snapshot(vdi_list[r%nr_vdis]);
  24.307 -            
  24.308 -        }
  24.309 -    }
  24.310 -    
  24.311 -    /* now dump it out to a dot file. */
  24.312 -    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
  24.313 -    
  24.314 -    f = fopen(GRAPH_DOT_FILE, "w");
  24.315 -    
  24.316 -    /* write graph preamble */
  24.317 -    fprintf(f, "digraph G {\n");
  24.318 -    fprintf(f, "   rankdir=LR\n");
  24.319 -    
  24.320 -    for (i=0; i<nr_vdis; i++) {
  24.321 -        char oldnode[255];
  24.322 -        snap_block_t *blk;
  24.323 -        snap_id_t id = vdi_list[i]->snap;
  24.324 -        int nr_snaps, done=0;
  24.325 -        
  24.326 -        /* add a node for the id */
  24.327 -printf("vdi: %d\n", i);
  24.328 -        fprintf(f, "   n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
  24.329 -                id.block, id.index, vdi_list[i]->name,
  24.330 -                id.block, id.index);
  24.331 -        sprintf(oldnode, "n%Ld%d", id.block, id.index);
  24.332 -        
  24.333 -        while (id.block != 0) {
  24.334 -            blk = snap_get_block(id.block);
  24.335 -            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
  24.336 -            id = blk->hdr.fork_block;
  24.337 -            
  24.338 -            done = sh_check_and_add(&id);
  24.339 -            
  24.340 -            /* add a node for the fork_id */
  24.341 -            if (!done) {
  24.342 -                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
  24.343 -                    id.block, id.index,
  24.344 -                    id.block, id.index);
  24.345 -            }
  24.346 -            
  24.347 -            /* add an edge between them */
  24.348 -            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
  24.349 -                    id.block, id.index, oldnode, nr_snaps);
  24.350 -            sprintf(oldnode, "n%Ld%d", id.block, id.index);
  24.351 -            freeblock(blk);
  24.352 -            
  24.353 -            if (done) break;
  24.354 -        }
  24.355 -    }
  24.356 -    
  24.357 -    /* write graph postamble */
  24.358 -    fprintf(f, "}\n");
  24.359 -    fclose(f);
  24.360 -    
  24.361 -    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
  24.362 -    {
  24.363 -        char cmd[255];
  24.364 -        sprintf(cmd, "dot %s -Tps -o %s", GRAPH_DOT_FILE, GRAPH_PS_FILE);
  24.365 -        system(cmd);
  24.366 -    }
  24.367 -    return 0;
  24.368 -}
  24.369 -
  24.370 -#endif
    25.1 --- a/tools/blktap/parallax/vdi.h	Fri Jun 16 18:19:40 2006 +0100
    25.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.3 @@ -1,55 +0,0 @@
    25.4 -#ifndef _VDI_H_
    25.5 -#define _VDI_H_
    25.6 -/**************************************************************************
    25.7 - * 
    25.8 - * vdi.h
    25.9 - *
   25.10 - * Virtual Disk Image (VDI) Interfaces
   25.11 - *
   25.12 - */
   25.13 -
   25.14 -#ifndef __VDI_H__
   25.15 -#define __VDI_H__
   25.16 -
   25.17 -#include "blktaplib.h"
   25.18 -#include "snaplog.h"
   25.19 -
   25.20 -#define VDI_HEIGHT     27 /* Note that these are now hard-coded */
   25.21 -#define VDI_REG_HEIGHT 27 /* in the async lookup code           */
   25.22 -
   25.23 -#define VDI_NAME_SZ 256
   25.24 -
   25.25 -
   25.26 -typedef struct vdi {
   25.27 -    uint64_t         id;               /* unique vdi id -- used by the registry   */
   25.28 -    uint64_t         block;            /* block where this vdi lives (also unique)*/
   25.29 -    uint64_t         radix_root;       /* radix root node for block mappings      */
   25.30 -    snap_id_t   snap;             /* next snapshot slot for this VDI         */
   25.31 -    struct vdi *next;             /* used to hash-chain in blkif.            */
   25.32 -    blkif_vdev_t vdevice;         /* currently mounted as...                 */
   25.33 -    struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs   */
   25.34 -    char        name[VDI_NAME_SZ];/* human readable vdi name                 */
   25.35 -} vdi_t;
   25.36 -
   25.37 -#define VDI_REG_MAGIC   0xff00ff0bb0ff00ffLL
   25.38 -
   25.39 -typedef struct vdi_registry {
   25.40 -    uint64_t     magic;
   25.41 -    uint64_t     nr_vdis;
   25.42 -} vdi_registry_t;
   25.43 -
   25.44 -
   25.45 -int __init_vdi(void);
   25.46 -
   25.47 -vdi_t *vdi_get(uint64_t vdi_id);
   25.48 -void vdi_put(vdi_t *vdi);
   25.49 -vdi_registry_t *get_vdi_registry(void);
   25.50 -vdi_t *vdi_create(snap_id_t *parent_snap, char *name);
   25.51 -uint64_t vdi_lookup_block(vdi_t *vdi, uint64_t vdi_block, int *writable);
   25.52 -void vdi_update_block(vdi_t *vdi, uint64_t vdi_block, uint64_t g_block);
   25.53 -void vdi_snapshot(vdi_t *vdi);
   25.54 -
   25.55 -
   25.56 -#endif /* __VDI_H__ */
   25.57 -
   25.58 -#endif //_VDI_H_
    26.1 --- a/tools/blktap/parallax/vdi_create.c	Fri Jun 16 18:19:40 2006 +0100
    26.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.3 @@ -1,52 +0,0 @@
    26.4 -/**************************************************************************
    26.5 - * 
    26.6 - * vdi_create.c
    26.7 - *
    26.8 - * Create a new vdi.
    26.9 - *
   26.10 - */
   26.11 - 
   26.12 -#include <stdio.h>
   26.13 -#include <stdlib.h>
   26.14 -#include <string.h>
   26.15 -#include <sys/time.h>
   26.16 -#include "blockstore.h"
   26.17 -#include "radix.h"
   26.18 -#include "vdi.h"
   26.19 -
   26.20 -int main(int argc, char *argv[])
   26.21 -{
   26.22 -    vdi_t       *vdi;
   26.23 -    char         name[VDI_NAME_SZ] = "";
   26.24 -    snap_id_t    id;
   26.25 -    int          from_snap = 0;
   26.26 -    
   26.27 -    __init_blockstore();
   26.28 -    __init_vdi();
   26.29 -    
   26.30 -    if ( argc == 1 ) {
   26.31 -        printf("usage: %s <VDI Name> [<snap block> <snap idx>]\n", argv[0]);
   26.32 -        exit(-1);
   26.33 -    }
   26.34 -    
   26.35 -    strncpy( name, argv[1], VDI_NAME_SZ);
   26.36 -    name[VDI_NAME_SZ] = '\0';    
   26.37 -    
   26.38 -    if ( argc > 3 ) {
   26.39 -        id.block   = (uint64_t)          atoll(argv[2]);
   26.40 -        id.index   = (unsigned int) atol (argv[3]);
   26.41 -        from_snap  = 1;
   26.42 -    }
   26.43 -    
   26.44 -    vdi = vdi_create( from_snap ? &id : NULL, name);
   26.45 -    
   26.46 -    if ( vdi == NULL ) {
   26.47 -        printf("Failed to create VDI!\n");
   26.48 -        freeblock(vdi);
   26.49 -        exit(-1);
   26.50 -    }
   26.51 -    
   26.52 -    freeblock(vdi);
   26.53 -    
   26.54 -    return (0);
   26.55 -}
    27.1 --- a/tools/blktap/parallax/vdi_fill.c	Fri Jun 16 18:19:40 2006 +0100
    27.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.3 @@ -1,81 +0,0 @@
    27.4 -/**************************************************************************
    27.5 - * 
    27.6 - * vdi_fill.c
    27.7 - *
    27.8 - * Hoover a file or device into a vdi.
    27.9 - * You must first create the vdi with vdi_create.
   27.10 - *
   27.11 - */
   27.12 - 
   27.13 -#include <stdio.h>
   27.14 -#include <stdlib.h>
   27.15 -#include <string.h>
   27.16 -#include <sys/types.h>
   27.17 -#include <sys/stat.h>
   27.18 -#include <fcntl.h>
   27.19 -#include <unistd.h>
   27.20 -#include "blockstore.h"
   27.21 -#include "radix.h"
   27.22 -#include "requests-async.h"
   27.23 -#include "vdi.h"
   27.24 -
   27.25 -int main(int argc, char *argv[])
   27.26 -{
   27.27 -    vdi_t       *vdi;
   27.28 -    uint64_t          id;
   27.29 -    int          fd;
   27.30 -    struct stat  st;
   27.31 -    uint64_t          tot_size;
   27.32 -    char         spage[BLOCK_SIZE];
   27.33 -    char        *dpage;
   27.34 -    uint64_t          vblock = 0, count=0;
   27.35 -    
   27.36 -    __init_blockstore();
   27.37 -    init_block_async();
   27.38 -    __init_vdi();
   27.39 -    
   27.40 -    if ( argc < 3 ) {
   27.41 -        printf("usage: %s <VDI id> <filename>\n", argv[0]);
   27.42 -        exit(-1);
   27.43 -    }
   27.44 -        
   27.45 -    id = (uint64_t) atoll(argv[1]);
   27.46 -    
   27.47 -    vdi = vdi_get( id );
   27.48 -    
   27.49 -    if ( vdi == NULL ) {
   27.50 -        printf("Failed to retreive VDI %Ld!\n", id);
   27.51 -        exit(-1);
   27.52 -    }
   27.53 -    
   27.54 -    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
   27.55 -    
   27.56 -    if (fd < 0) {
   27.57 -        printf("Couldn't open %s!\n", argv[2]);
   27.58 -        exit(-1);
   27.59 -    }
   27.60 -    
   27.61 -    if ( fstat(fd, &st) != 0 ) {
   27.62 -        printf("Couldn't stat %s!\n", argv[2]);
   27.63 -        exit(-1);
   27.64 -    }
   27.65 -    
   27.66 -    tot_size = (uint64_t) st.st_size;
   27.67 -    printf("Filling VDI %Ld with %Ld bytes.\n", id, tot_size);
   27.68 -    
   27.69 -    printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE);    
   27.70 -    printf("           ");
   27.71 -    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
   27.72 -        vdi_write_s(vdi, vblock, spage);
   27.73 -        
   27.74 -        vblock++;
   27.75 -        if ((vblock % 512) == 0)
   27.76 -        printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
   27.77 -        fflush(stdout);
   27.78 -    }
   27.79 -    printf("\n");
   27.80 -    
   27.81 -    freeblock(vdi);
   27.82 -    
   27.83 -    return (0);
   27.84 -}
    28.1 --- a/tools/blktap/parallax/vdi_list.c	Fri Jun 16 18:19:40 2006 +0100
    28.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.3 @@ -1,47 +0,0 @@
    28.4 -/**************************************************************************
    28.5 - * 
    28.6 - * vdi_list.c
    28.7 - *
    28.8 - * Print a list of VDIs on the block store.
    28.9 - *
   28.10 - */
   28.11 - 
   28.12 -#include <stdio.h>
   28.13 -#include <stdlib.h>
   28.14 -#include <string.h>
   28.15 -#include <sys/time.h>
   28.16 -#include "blockstore.h"
   28.17 -#include "radix.h"
   28.18 -#include "vdi.h"
   28.19 -
   28.20 -int main(int argc, char *argv[])
   28.21 -{
   28.22 -    vdi_registry_t *reg;
   28.23 -    vdi_t *vdi;
   28.24 -    int i;
   28.25 -    
   28.26 -    __init_blockstore();
   28.27 -    __init_vdi();
   28.28 -    
   28.29 -    reg = get_vdi_registry();
   28.30 -    
   28.31 -    if ( reg == NULL ) {
   28.32 -        printf("couldn't get VDI registry.\n");
   28.33 -        exit(-1);
   28.34 -    }
   28.35 -    
   28.36 -    for (i=0; i < reg->nr_vdis; i++) {
   28.37 -        vdi = vdi_get(i);
   28.38 -        
   28.39 -        if ( vdi != NULL ) {
   28.40 -            
   28.41 -            printf("%10Ld %60s\n", vdi->id, vdi->name);
   28.42 -            freeblock(vdi);
   28.43 -            
   28.44 -        }
   28.45 -    }
   28.46 -    
   28.47 -    freeblock(reg);
   28.48 -    
   28.49 -    return 0;
   28.50 -}
    29.1 --- a/tools/blktap/parallax/vdi_snap.c	Fri Jun 16 18:19:40 2006 +0100
    29.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.3 @@ -1,43 +0,0 @@
    29.4 -/**************************************************************************
    29.5 - * 
    29.6 - * vdi_snap.c
    29.7 - *
    29.8 - * Snapshot a vdi.
    29.9 - *
   29.10 - */
   29.11 - 
   29.12 -#include <stdio.h>
   29.13 -#include <stdlib.h>
   29.14 -#include <string.h>
   29.15 -#include <sys/time.h>
   29.16 -#include "blockstore.h"
   29.17 -#include "radix.h"
   29.18 -#include "vdi.h"
   29.19 -
   29.20 -int main(int argc, char *argv[])
   29.21 -{
   29.22 -    vdi_t  *vdi;
   29.23 -    uint64_t     id;
   29.24 -    
   29.25 -    __init_blockstore();
   29.26 -    __init_vdi();
   29.27 -    
   29.28 -    if ( argc == 1 ) {
   29.29 -        printf("usage: %s <VDI id>\n", argv[0]);
   29.30 -        exit(-1);
   29.31 -    }
   29.32 -    
   29.33 -    id = (uint64_t) atoll(argv[1]);
   29.34 -    
   29.35 -    vdi = vdi_get(id);
   29.36 -    
   29.37 -    if ( vdi == NULL ) {
   29.38 -        printf("couldn't find the requested VDI.\n");
   29.39 -        freeblock(vdi);
   29.40 -        exit(-1);
   29.41 -    }
   29.42 -    
   29.43 -    vdi_snapshot(vdi);
   29.44 -    
   29.45 -    return 0;
   29.46 -}
    30.1 --- a/tools/blktap/parallax/vdi_snap_delete.c	Fri Jun 16 18:19:40 2006 +0100
    30.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.3 @@ -1,48 +0,0 @@
    30.4 -/**************************************************************************
    30.5 - * 
    30.6 - * vdi_snap_delete.c
    30.7 - *
    30.8 - * Delete a snapshot.
    30.9 - *
   30.10 - * This is not finished:  right now it takes a snap n and calls 
   30.11 - * snap_collapse(n,n+1).
   30.12 - *
   30.13 - * TODO: support for non-consecutive, non-same-block snaps
   30.14 - *       Avoid forking probs.
   30.15 - *
   30.16 - */
   30.17 - 
   30.18 -#include <stdio.h>
   30.19 -#include <stdlib.h>
   30.20 -#include <string.h>
   30.21 -#include <sys/time.h>
   30.22 -#include "blockstore.h"
   30.23 -#include "snaplog.h"
   30.24 -#include "radix.h"
   30.25 -#include "vdi.h"
   30.26 -
   30.27 -int main(int argc, char *argv[])
   30.28 -{
   30.29 -    snap_id_t    id, c_id;
   30.30 -    int ret;
   30.31 -    
   30.32 -    __init_blockstore();
   30.33 -    __init_vdi();
   30.34 -    
   30.35 -    if ( argc != 3 ) {
   30.36 -        printf("usage: %s <snap block> <snap idx>\n", argv[0]);
   30.37 -        exit(-1);
   30.38 -    }
   30.39 -    
   30.40 -    id.block   = (uint64_t)          atoll(argv[1]);
   30.41 -    id.index   = (unsigned int) atol (argv[2]);
   30.42 -    
   30.43 -    c_id = id;
   30.44 -    c_id.index++;
   30.45 -    
   30.46 -    ret = snap_collapse(VDI_HEIGHT, &id, &c_id);
   30.47 -    
   30.48 -    printf("Freed %d blocks.\n", ret);
   30.49 -    
   30.50 -    return 0;
   30.51 -}
    31.1 --- a/tools/blktap/parallax/vdi_snap_list.c	Fri Jun 16 18:19:40 2006 +0100
    31.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.3 @@ -1,82 +0,0 @@
    31.4 -/**************************************************************************
    31.5 - * 
    31.6 - * vdi_snap_list.c
    31.7 - *
    31.8 - * Print a list of snapshots for the specified vdi.
    31.9 - *
   31.10 - */
   31.11 - 
   31.12 -#include <stdio.h>
   31.13 -#include <stdlib.h>
   31.14 -#include <string.h>
   31.15 -#include <time.h>
   31.16 -#include <sys/time.h>
   31.17 -#include "blockstore.h"
   31.18 -#include "radix.h"
   31.19 -#include "vdi.h"
   31.20 -
   31.21 -int main(int argc, char *argv[])
   31.22 -{
   31.23 -    vdi_t        *vdi;
   31.24 -    uint64_t           id;
   31.25 -    int           i, max_snaps = -1;
   31.26 -    snap_block_t *blk;
   31.27 -    snap_id_t     sid;
   31.28 -    char         *t;
   31.29 -    
   31.30 -    __init_blockstore();
   31.31 -    __init_vdi();
   31.32 -    
   31.33 -    if ( argc == 1 ) {
   31.34 -        printf("usage: %s <VDI id> [max snaps]\n", argv[0]);
   31.35 -        exit(-1);
   31.36 -    }
   31.37 -    
   31.38 -    id = (uint64_t) atoll(argv[1]);
   31.39 -    
   31.40 -    if ( argc > 2 ) {
   31.41 -        max_snaps = atoi(argv[2]);
   31.42 -    }
   31.43 -    
   31.44 -    vdi = vdi_get(id);
   31.45 -    
   31.46 -    if ( vdi == NULL ) {
   31.47 -        printf("couldn't find the requested VDI.\n");
   31.48 -        freeblock(vdi);
   31.49 -        exit(-1);
   31.50 -    }
   31.51 -    
   31.52 -    sid = vdi->snap;
   31.53 -    sid.index--;
   31.54 -    
   31.55 -    //printf("%8s%4s%21s %12s %1s\n", "Block", "idx", "timestamp", 
   31.56 -    //    "radix root", "d");
   31.57 -    printf("%8s%4s%37s %12s %1s\n", "Block", "idx", "timestamp", 
   31.58 -            "radix root", "d");
   31.59 -     
   31.60 -    while (sid.block != 0) {
   31.61 -        blk = snap_get_block(sid.block);
   31.62 -        for (i = sid.index; i >= 0; i--) {
   31.63 -            if ( max_snaps == 0  ) {
   31.64 -                freeblock(blk);
   31.65 -                goto done;
   31.66 -            }
   31.67 -            t = ctime(&blk->snaps[i].timestamp.tv_sec);
   31.68 -            t[strlen(t)-1] = '\0';
   31.69 -            //printf("%8Ld%4u%14lu.%06lu %12Ld %1s\n",
   31.70 -            printf("%8Ld%4u%30s %06lu %12Ld %1s\n",
   31.71 -                    sid.block, i, 
   31.72 -                    //blk->snaps[i].timestamp.tv_sec,
   31.73 -                    t,
   31.74 -                    blk->snaps[i].timestamp.tv_usec,
   31.75 -                    blk->snaps[i].radix_root,
   31.76 -                    blk->snaps[i].deleted ? "*" : " ");
   31.77 -            if ( max_snaps != -1 ) 
   31.78 -                max_snaps--;
   31.79 -        }
   31.80 -        sid = blk->hdr.parent_block;
   31.81 -        freeblock(blk);
   31.82 -    }
   31.83 -done:            
   31.84 -    return 0;
   31.85 -}
    32.1 --- a/tools/blktap/parallax/vdi_tree.c	Fri Jun 16 18:19:40 2006 +0100
    32.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.3 @@ -1,132 +0,0 @@
    32.4 -/**************************************************************************
    32.5 - * 
    32.6 - * vdi_tree.c
    32.7 - *
    32.8 - * Output current vdi tree to dot and postscript.
    32.9 - *
   32.10 - */
   32.11 - 
   32.12 -#include <stdio.h>
   32.13 -#include <stdlib.h>
   32.14 -#include <string.h>
   32.15 -#include <sys/time.h>
   32.16 -#include "blockstore.h"
   32.17 -#include "radix.h"
   32.18 -#include "vdi.h"
   32.19 -
   32.20 -#define GRAPH_DOT_FILE "vdi.dot"
   32.21 -#define GRAPH_PS_FILE  "vdi.ps"
   32.22 -
   32.23 -typedef struct sh_st {
   32.24 -    snap_id_t     id;
   32.25 -    struct sh_st *next;
   32.26 -} sh_t;
   32.27 -
   32.28 -#define SNAP_HASHSZ 1024
   32.29 -sh_t *node_hash[SNAP_HASHSZ];
   32.30 -#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
   32.31 -
   32.32 -#define SNAPID_EQUAL(_a,_b) \
   32.33 -    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
   32.34 -int sh_check_and_add(snap_id_t *id)
   32.35 -{
   32.36 -    sh_t **s = &node_hash[SNAP_HASH(id)];
   32.37 -    
   32.38 -    while (*s != NULL) {
   32.39 -        if (SNAPID_EQUAL(&((*s)->id), id))
   32.40 -            return 1;
   32.41 -        *s = (*s)->next;
   32.42 -    }
   32.43 -    
   32.44 -    *s = (sh_t *)malloc(sizeof(sh_t));
   32.45 -    (*s)->id = *id;
   32.46 -    (*s)->next = NULL;
   32.47 -    
   32.48 -    return 0;
   32.49 -}
   32.50 -
   32.51 -int main(int argc, char *argv[])
   32.52 -{
   32.53 -    FILE *f;
   32.54 -    char dot_file[255] = GRAPH_DOT_FILE;
   32.55 -    char  ps_file[255] = GRAPH_PS_FILE;
   32.56 -    int nr_vdis = 0, nr_forks = 0;
   32.57 -    vdi_registry_t *reg;
   32.58 -    vdi_t *vdi;
   32.59 -    int i;
   32.60 -    
   32.61 -    __init_blockstore();
   32.62 -    __init_vdi();
   32.63 -    
   32.64 -    reg = get_vdi_registry();
   32.65 -    
   32.66 -    if ( reg == NULL ) {
   32.67 -        printf("couldn't get VDI registry.\n");
   32.68 -        exit(-1);
   32.69 -    }
   32.70 -    
   32.71 -    if ( argc > 1 ) {
   32.72 -        strncpy(ps_file, argv[1], 255);
   32.73 -        ps_file[255] = '\0';
   32.74 -    }
   32.75 -    
   32.76 -    /* now dump it out to a dot file. */
   32.77 -    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
   32.78 -    
   32.79 -    f = fopen(dot_file, "w");
   32.80 -    
   32.81 -    /* write graph preamble */
   32.82 -    fprintf(f, "digraph G {\n");
   32.83 -    fprintf(f, "   rankdir=LR\n");
   32.84 -    
   32.85 -    for (i=0; i<reg->nr_vdis; i++) {
   32.86 -        char oldnode[255];
   32.87 -        snap_block_t *blk;
   32.88 -        snap_id_t id;
   32.89 -        int nr_snaps, done=0;
   32.90 -        
   32.91 -        vdi = vdi_get(i);
   32.92 -        id = vdi->snap;
   32.93 -        /* add a node for the id */
   32.94 -printf("vdi: %d\n", i);
   32.95 -        fprintf(f, "   n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
   32.96 -                id.block, id.index, vdi->name,
   32.97 -                id.block, id.index);
   32.98 -        sprintf(oldnode, "n%Ld%d", id.block, id.index);
   32.99 -        
  32.100 -        while (id.block != 0) {
  32.101 -            blk = snap_get_block(id.block);
  32.102 -            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
  32.103 -            id = blk->hdr.fork_block;
  32.104 -            
  32.105 -            done = sh_check_and_add(&id);
  32.106 -            
  32.107 -            /* add a node for the fork_id */
  32.108 -            if (!done) {
  32.109 -                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
  32.110 -                    id.block, id.index,
  32.111 -                    id.block, id.index);
  32.112 -            }
  32.113 -            
  32.114 -            /* add an edge between them */
  32.115 -            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
  32.116 -                    id.block, id.index, oldnode, nr_snaps);
  32.117 -            sprintf(oldnode, "n%Ld%d", id.block, id.index);
  32.118 -            freeblock(blk);
  32.119 -            
  32.120 -            if (done) break;
  32.121 -        }
  32.122 -    }
  32.123 -    
  32.124 -    /* write graph postamble */
  32.125 -    fprintf(f, "}\n");
  32.126 -    fclose(f);
  32.127 -    
  32.128 -    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
  32.129 -    {
  32.130 -        char cmd[255];
  32.131 -        sprintf(cmd, "dot %s -Tps -o %s", dot_file, ps_file);
  32.132 -        system(cmd);
  32.133 -    }
  32.134 -    return 0;
  32.135 -}
    33.1 --- a/tools/blktap/parallax/vdi_unittest.c	Fri Jun 16 18:19:40 2006 +0100
    33.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.3 @@ -1,184 +0,0 @@
    33.4 -/**************************************************************************
    33.5 - * 
    33.6 - * vdi_unittest.c
    33.7 - *
    33.8 - * Run a small test workload to ensure that data access through a vdi
    33.9 - * is (at least superficially) correct.
   33.10 - *
   33.11 - */
   33.12 - 
   33.13 -#include <stdio.h>
   33.14 -#include <stdlib.h>
   33.15 -#include <string.h>
   33.16 -#include <sys/types.h>
   33.17 -#include <sys/stat.h>
   33.18 -#include <fcntl.h>
   33.19 -#include <unistd.h>
   33.20 -#include "requests-async.h"
   33.21 -#include "blockstore.h"
   33.22 -#include "radix.h"
   33.23 -#include "vdi.h"
   33.24 -
   33.25 -#define TEST_PAGES  32
   33.26 -static char *zero_page;
   33.27 -static char pages[TEST_PAGES][BLOCK_SIZE];
   33.28 -static int next_page = 0;
   33.29 -
   33.30 -void fill_test_pages(void)
   33.31 -{
   33.32 -    int i, j;
   33.33 -    long *page;
   33.34 -
   33.35 -    for (i=0; i< TEST_PAGES; i++) {
   33.36 -        page = (unsigned long *)pages[i];
   33.37 -        for (j=0; j<(BLOCK_SIZE/4); j++) {
   33.38 -            page[j] = random();
   33.39 -        }
   33.40 -    }
   33.41 -
   33.42 -    zero_page = newblock();
   33.43 -}
   33.44 -
   33.45 -inline uint64_t make_vaddr(uint64_t L1, uint64_t L2, uint64_t L3)
   33.46 -{
   33.47 -    uint64_t ret = L1;
   33.48 -
   33.49 -    ret = (ret << 9) | L2;
   33.50 -    ret = (ret << 9) | L3;
   33.51 -
   33.52 -    return ret;
   33.53 -}
   33.54 -
   33.55 -void touch_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3)
   33.56 -{
   33.57 -    uint64_t vaddr;
   33.58 -    char *page = pages[next_page++];
   33.59 -    char *rpage = NULL;
   33.60 -
   33.61 -    printf("TOUCH (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
   33.62 -
   33.63 -    vaddr = make_vaddr(L1, L2, L3);
   33.64 -    vdi_write_s(vdi, vaddr, page);
   33.65 -    rpage = vdi_read_s(vdi, vaddr);
   33.66 -
   33.67 -    if (rpage == NULL) 
   33.68 -    {
   33.69 -        printf( "read %Lu returned NULL\n", vaddr); 
   33.70 -        return; 
   33.71 -    }
   33.72 -
   33.73 -    if (memcmp(page, rpage, BLOCK_SIZE) != 0)
   33.74 -    {
   33.75 -        printf( "read %Lu returned a different page\n", vaddr);
   33.76 -        return;
   33.77 -    }
   33.78 -
   33.79 -    freeblock(rpage);
   33.80 -}
   33.81 -
   33.82 -void test_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3, char *page)
   33.83 -{
   33.84 -    uint64_t vaddr;
   33.85 -    char *rpage = NULL;
   33.86 -
   33.87 -    printf("TEST  (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
   33.88 -
   33.89 -    vaddr = make_vaddr(L1, L2, L3);
   33.90 -    rpage = vdi_read_s(vdi, vaddr);
   33.91 -
   33.92 -    if (rpage == NULL) 
   33.93 -    {
   33.94 -        printf( "read %Lu returned NULL\n", vaddr); 
   33.95 -        return; 
   33.96 -    }
   33.97 -
   33.98 -    if (memcmp(page, rpage, BLOCK_SIZE) != 0)
   33.99 -    {
  33.100 -        printf( "read %Lu returned a different page\n", vaddr);
  33.101 -        return;
  33.102 -    }
  33.103 -
  33.104 -    freeblock(rpage);
  33.105 -}
  33.106 -
  33.107 -void coverage_test(vdi_t *vdi)
  33.108 -{
  33.109 -    uint64_t vaddr;
  33.110 -    int i, j, k;
  33.111 -
  33.112 -    /* Do a series of writes and reads to test all paths through the 
  33.113 -     * async radix code.  The radix request code will dump CRC warnings
  33.114 -     * if there are data problems here as well.
  33.115 -     */
  33.116 -
  33.117 -    /* L1 Zero */
  33.118 -    touch_block(vdi, 0, 0, 0);
  33.119 -
  33.120 -    /* L2 Zero */
  33.121 -    i = next_page;
  33.122 -    touch_block(vdi, 0, 1, 0);
  33.123 -
  33.124 -    /* L3 Zero */
  33.125 -    j = next_page;
  33.126 -    touch_block(vdi, 0, 0, 1);
  33.127 -    k = next_page;
  33.128 -    touch_block(vdi, 0, 1, 1);
  33.129 -
  33.130 -    /* Direct write */
  33.131 -    touch_block(vdi, 0, 0, 0);
  33.132 -
  33.133 -    vdi_snapshot(vdi);
  33.134 -
  33.135 -    /* L1 fault */
  33.136 -    touch_block(vdi, 0, 0, 0);
  33.137 -    /* test the read-only branches that should have been copied over. */
  33.138 -    test_block(vdi, 0, 1, 0, pages[i]);
  33.139 -    test_block(vdi, 0, 0, 1, pages[j]);
  33.140 -
  33.141 -    /* L2 fault */
  33.142 -    touch_block(vdi, 0, 1, 0);
  33.143 -    test_block(vdi, 0, 1, 1, pages[k]);
  33.144 -
  33.145 -    /* L3 fault */
  33.146 -    touch_block(vdi, 0, 0, 1);
  33.147 -    
  33.148 -    /* read - L1 zero */
  33.149 -    test_block(vdi, 1, 0, 0, zero_page);
  33.150 -    
  33.151 -    /* read - L2 zero */
  33.152 -    test_block(vdi, 0, 2, 0, zero_page);
  33.153 -
  33.154 -    /* read - L3 zero */
  33.155 -    test_block(vdi, 0, 0, 2, zero_page);
  33.156 -}
  33.157 -
  33.158 -int main(int argc, char *argv[])
  33.159 -{
  33.160 -    vdi_t       *vdi;
  33.161 -    uint64_t          id;
  33.162 -    int          fd;
  33.163 -    struct stat  st;
  33.164 -    uint64_t          tot_size;
  33.165 -    char         spage[BLOCK_SIZE];
  33.166 -    char        *dpage;
  33.167 -    uint64_t          vblock = 0, count=0;
  33.168 -    
  33.169 -    __init_blockstore();
  33.170 -    init_block_async();
  33.171 -    __init_vdi();
  33.172 -        
  33.173 -    vdi = vdi_create( NULL, "UNIT TEST VDI");
  33.174 -    
  33.175 -    if ( vdi == NULL ) {
  33.176 -        printf("Failed to create VDI!\n");
  33.177 -        freeblock(vdi);
  33.178 -        exit(-1);
  33.179 -    }
  33.180 -
  33.181 -    fill_test_pages();
  33.182 -    coverage_test(vdi);
  33.183 -    
  33.184 -    freeblock(vdi);
  33.185 -    
  33.186 -    return (0);
  33.187 -}
    34.1 --- a/tools/blktap/parallax/vdi_validate.c	Fri Jun 16 18:19:40 2006 +0100
    34.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.3 @@ -1,97 +0,0 @@
    34.4 -/**************************************************************************
    34.5 - * 
    34.6 - * vdi_validate.c
    34.7 - *
    34.8 - * Intended to sanity-check vm_fill and the underlying vdi code.
    34.9 - *
   34.10 - * Block-by-block compare of a vdi with a file/device on the disk.
   34.11 - *
   34.12 - */
   34.13 - 
   34.14 -#include <stdio.h>
   34.15 -#include <stdlib.h>
   34.16 -#include <string.h>
   34.17 -#include <sys/types.h>
   34.18 -#include <sys/stat.h>
   34.19 -#include <fcntl.h>
   34.20 -#include <unistd.h>
   34.21 -#include "blockstore.h"
   34.22 -#include "radix.h"
   34.23 -#include "vdi.h"
   34.24 -#include "requests-async.h"
   34.25 -
   34.26 -int main(int argc, char *argv[])
   34.27 -{
   34.28 -    vdi_t       *vdi;
   34.29 -    uint64_t          id;
   34.30 -    int          fd;
   34.31 -    struct stat  st;
   34.32 -    uint64_t          tot_size;
   34.33 -    char         spage[BLOCK_SIZE], *dpage;
   34.34 -    char        *vpage;
   34.35 -    uint64_t          vblock = 0, count=0;
   34.36 -    
   34.37 -    __init_blockstore();
   34.38 -    init_block_async();
   34.39 -    __init_vdi();
   34.40 -    
   34.41 -    if ( argc < 3 ) {
   34.42 -        printf("usage: %s <VDI id> <filename>\n", argv[0]);
   34.43 -        exit(-1);
   34.44 -    }
   34.45 -        
   34.46 -    id = (uint64_t) atoll(argv[1]);
   34.47 -    
   34.48 -    vdi = vdi_get( id );
   34.49 -    
   34.50 -    if ( vdi == NULL ) {
   34.51 -        printf("Failed to retreive VDI %Ld!\n", id);
   34.52 -        exit(-1);
   34.53 -    }
   34.54 -    
   34.55 -    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
   34.56 -    
   34.57 -    if (fd < 0) {
   34.58 -        printf("Couldn't open %s!\n", argv[2]);
   34.59 -        exit(-1);
   34.60 -    }
   34.61 -    
   34.62 -    if ( fstat(fd, &st) != 0 ) {
   34.63 -        printf("Couldn't stat %s!\n", argv[2]);
   34.64 -        exit(-1);
   34.65 -    }
   34.66 -    
   34.67 -    tot_size = (uint64_t) st.st_size;
   34.68 -    printf("Testing VDI %Ld (%Ld bytes).\n", id, tot_size);
   34.69 -    
   34.70 -    printf("           ");
   34.71 -    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
   34.72 -
   34.73 -        dpage = vdi_read_s(vdi, vblock);
   34.74 -
   34.75 -        if (dpage == NULL) {
   34.76 -            printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock);
   34.77 -            exit(0);
   34.78 -        }
   34.79 -
   34.80 -        if (memcmp(spage, dpage, BLOCK_SIZE) != 0) {
   34.81 -            printf("\n\nblocks don't match! (%Ld)\n", vblock);
   34.82 -            exit(0);
   34.83 -        }
   34.84 -        
   34.85 -        freeblock(dpage);
   34.86 -        
   34.87 -        vblock++;
   34.88 -        if ((vblock % 1024) == 0) {
   34.89 -            printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
   34.90 -            fflush(stdout);
   34.91 -        }
   34.92 -    }
   34.93 -    printf("\n");
   34.94 -    
   34.95 -    printf("VDI %Ld looks good!\n", id);
   34.96 -    
   34.97 -    freeblock(vdi);
   34.98 -    
   34.99 -    return (0);
  34.100 -}
    35.1 --- a/tools/blktap/ublkback/Makefile	Fri Jun 16 18:19:40 2006 +0100
    35.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.3 @@ -1,41 +0,0 @@
    35.4 -
    35.5 -XEN_ROOT = ../../..
    35.6 -include $(XEN_ROOT)/tools/Rules.mk
    35.7 -
    35.8 -INCLUDES += -I..
    35.9 -
   35.10 -INSTALL            = install
   35.11 -INSTALL_PROG = $(INSTALL) -m0755
   35.12 -IBIN         = ublkback
   35.13 -INSTALL_DIR  = /usr/sbin
   35.14 -
   35.15 -CFLAGS   += -Werror
   35.16 -CFLAGS   += -Wno-unused
   35.17 -CFLAGS   += -fno-strict-aliasing
   35.18 -CFLAGS   += -I $(XEN_LIBXC)
   35.19 -CFLAGS   += $(INCLUDES) -I.
   35.20 -CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
   35.21 -# Get gcc to generate the dependencies for us.
   35.22 -CFLAGS   += -Wp,-MD,.$(@F).d
   35.23 -DEPS     = .*.d
   35.24 -
   35.25 -OBJS     = $(patsubst %.c,%.o,$(SRCS))
   35.26 -
   35.27 -.PHONY: all
   35.28 -all: $(IBIN)
   35.29 -
   35.30 -LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
   35.31 -
   35.32 -.PHONY: install
   35.33 -install:
   35.34 -	$(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR)
   35.35 -
   35.36 -.PHONY: clean
   35.37 -clean:
   35.38 -	rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN)
   35.39 -
   35.40 -ublkback: 
   35.41 -	$(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L..  \
   35.42 -	      -lblktap -laio ublkback.c ublkbacklib.c -pg
   35.43 -
   35.44 --include $(DEPS)
    36.1 --- a/tools/blktap/ublkback/ublkback.c	Fri Jun 16 18:19:40 2006 +0100
    36.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.3 @@ -1,18 +0,0 @@
    36.4 -/* ublkback.c
    36.5 - *
    36.6 - * libaio-based userlevel backend.
    36.7 - */
    36.8 -
    36.9 -#include "blktaplib.h"
   36.10 -#include "ublkbacklib.h"
   36.11 -
   36.12 -
   36.13 -int main(int argc, char *argv[])
   36.14 -{
   36.15 -    ublkback_init();
   36.16 -    
   36.17 -    register_new_blkif_hook(ublkback_new_blkif);
   36.18 -    blktap_listen();
   36.19 -    
   36.20 -    return 0;
   36.21 -}
    37.1 --- a/tools/blktap/ublkback/ublkbacklib.c	Fri Jun 16 18:19:40 2006 +0100
    37.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.3 @@ -1,473 +0,0 @@
    37.4 -/* ublkbacklib.c
    37.5 - *
    37.6 - * file/device image-backed block device -- using linux libaio.
    37.7 - * 
    37.8 - * (c) 2004 Andrew Warfield.
    37.9 - *
   37.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
   37.11 - * This will show up as device type (maj:240,min:0) = 61440.
   37.12 - *
   37.13 - * The fsid is placed in the sec_start field of the disk extent.
   37.14 - *
   37.15 - * NOTE: This doesn't work.  Grrr.
   37.16 - */
   37.17 -
   37.18 -#define _GNU_SOURCE
   37.19 -#define __USE_LARGEFILE64
   37.20 -
   37.21 -#include <stdio.h>
   37.22 -#include <stdlib.h>
   37.23 -#include <fcntl.h>
   37.24 -#include <string.h>
   37.25 -#include <db.h>       
   37.26 -#include <sys/stat.h>
   37.27 -#include <sys/types.h>
   37.28 -#include <sys/poll.h>
   37.29 -#include <unistd.h>
   37.30 -#include <errno.h>
   37.31 -#include <libaio.h>
   37.32 -#include <pthread.h>
   37.33 -#include <time.h>
   37.34 -#include <err.h>
   37.35 -#include "blktaplib.h"
   37.36 -
   37.37 -/* XXXX:  */
   37.38 -/* Current code just mounts this file/device to any requests that come in. */
   37.39 -//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
   37.40 -#define TMP_IMAGE_FILE_NAME "fc3.image"
   37.41 -
   37.42 -#define MAX_REQUESTS            64 /* must be synced with the blkif drivers. */
   37.43 -#define MAX_SEGMENTS_PER_REQ    11
   37.44 -#define SECTOR_SHIFT             9
   37.45 -#define MAX_AIO_REQS   (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
   37.46 -
   37.47 -#if 0
   37.48 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   37.49 -#else
   37.50 -#define DPRINTF(_f, _a...) ((void)0)
   37.51 -#endif
   37.52 -           
   37.53 -#if 1                                                                        
   37.54 -#define ASSERT(_p) \
   37.55 -    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
   37.56 -    __LINE__, __FILE__); *(int*)0=0; }
   37.57 -#else
   37.58 -#define ASSERT(_p) ((void)0)
   37.59 -#endif                                                                     
   37.60 -
   37.61 -/* Note on pending_reqs: I assume all reqs are queued before they start to 
   37.62 - * get filled.  so count of 0 is an unused record.
   37.63 - */
   37.64 -typedef struct {
   37.65 -    blkif_request_t  req;
   37.66 -    blkif_t         *blkif;
   37.67 -    int              count;
   37.68 -} pending_req_t;
   37.69 -
   37.70 -static pending_req_t    pending_list[MAX_REQUESTS];
   37.71 -static io_context_t  ctx;
   37.72 -static struct iocb  *iocb_free[MAX_AIO_REQS];
   37.73 -static int           iocb_free_count;
   37.74 -
   37.75 -/* ---[ Notification mecahnism ]--------------------------------------- */
   37.76 -
   37.77 -enum { 
   37.78 -    READ   = 0,
   37.79 -    WRITE  = 1
   37.80 -};
   37.81 -
   37.82 -static int aio_notify[2];
   37.83 -static volatile int aio_listening = 0;
   37.84 -static pthread_mutex_t notifier_sem = PTHREAD_MUTEX_INITIALIZER;
   37.85 -
   37.86 -static struct io_event aio_events[MAX_AIO_REQS];
   37.87 -static int             aio_event_count = 0;
   37.88 -
   37.89 -/* this is commented out in libaio.h for some reason. */
   37.90 -extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
   37.91 -
   37.92 -static void *notifier_thread(void *arg)
   37.93 -{
   37.94 -    int ret; 
   37.95 -    int msg = 0x00feeb00;
   37.96 -    
   37.97 -    DPRINTF("Notifier thread started.\n");
   37.98 -    for (;;) {
   37.99 -        pthread_mutex_lock(&notifier_sem);
  37.100 -        if ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0) {
  37.101 -            aio_event_count = ret;
  37.102 -            write(aio_notify[WRITE], &msg, sizeof(msg));
  37.103 -        } else {
  37.104 -                printf("[io_queue_wait error! %d]\n", errno);
  37.105 -                pthread_mutex_unlock(&notifier_sem);
  37.106 -        }
  37.107 -    }
  37.108 -}
  37.109 -
  37.110 -/* --- Talking to xenstore: ------------------------------------------- */
  37.111 -
  37.112 -int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done);
  37.113 -int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done);
  37.114 -
  37.115 -typedef struct image {
  37.116 -    /* These need to turn into an array/rbtree for multi-disk support. */
  37.117 -    int  fd;
  37.118 -    uint64_t  fsid;
  37.119 -    blkif_vdev_t   vdevice;
  37.120 -    long int size;
  37.121 -    long int secsize;
  37.122 -    long int info;
  37.123 -} image_t;
  37.124 -
  37.125 -long int ublkback_get_size(blkif_t *blkif)
  37.126 -{
  37.127 -    image_t *img = (image_t *)blkif->prv;
  37.128 -    return img->size;
  37.129 -}
  37.130 -
  37.131 -long int ublkback_get_secsize(blkif_t *blkif)
  37.132 -{
  37.133 -    image_t *img = (image_t *)blkif->prv;
  37.134 -    return img->secsize;
  37.135 -}
  37.136 -
  37.137 -unsigned ublkback_get_info(blkif_t *blkif)
  37.138 -{
  37.139 -    image_t *img = (image_t *)blkif->prv;
  37.140 -    return img->info;
  37.141 -}
  37.142 -
  37.143 -static struct blkif_ops ublkback_ops = {
  37.144 -    get_size:    ublkback_get_size,
  37.145 -    get_secsize: ublkback_get_secsize,
  37.146 -    get_info:    ublkback_get_info,
  37.147 -};
  37.148 -
  37.149 -int ublkback_new_blkif(blkif_t *blkif)
  37.150 -{
  37.151 -    image_t *image;
  37.152 -    struct stat stat;
  37.153 -    int ret;
  37.154 -
  37.155 -    image = (image_t *)malloc(sizeof(image_t));
  37.156 -    if (image == NULL) {
  37.157 -        printf("error allocating image record.\n");
  37.158 -        return -ENOMEM;
  37.159 -    }
  37.160 -
  37.161 -    /* Open it. */
  37.162 -    image->fd = open(TMP_IMAGE_FILE_NAME, 
  37.163 -                     O_RDWR | O_DIRECT | O_LARGEFILE);
  37.164 -
  37.165 -    if ((image->fd < 0) && (errno == EINVAL)) {
  37.166 -        /* Maybe O_DIRECT isn't supported. */
  37.167 -        warn("open() failed on '%s', trying again without O_DIRECT",
  37.168 -               TMP_IMAGE_FILE_NAME);
  37.169 -        image->fd = open(TMP_IMAGE_FILE_NAME, O_RDWR | O_LARGEFILE);
  37.170 -    }
  37.171 -
  37.172 -    if (image->fd < 0) {
  37.173 -        warn("Couldn't open image file!");
  37.174 -        free(image);
  37.175 -        return -EINVAL;
  37.176 -    }
  37.177 -
  37.178 -    /* Size it. */
  37.179 -    ret = fstat(image->fd, &stat);
  37.180 -    if (ret != 0) {
  37.181 -        printf("Couldn't stat image in PROBE!");
  37.182 -        return -EINVAL;
  37.183 -    }
  37.184 -    
  37.185 -    image->size = (stat.st_size >> SECTOR_SHIFT);
  37.186 -
  37.187 -    /* TODO: IOCTL to get size of raw device. */
  37.188 -/*
  37.189 -  ret = ioctl(img->fd, BLKGETSIZE, &blksize);
  37.190 -  if (ret != 0) {
  37.191 -  printf("Couldn't ioctl image in PROBE!\n");
  37.192 -  goto err;
  37.193 -  }
  37.194 -*/
  37.195 -    if (image->size == 0)
  37.196 -        image->size =((uint64_t) 16836057);
  37.197 -    image->secsize = 512;
  37.198 -    image->info = 0;
  37.199 -
  37.200 -    /* Register the hooks */
  37.201 -    blkif_register_request_hook(blkif, "Ublkback req.", ublkback_request);
  37.202 -    blkif_register_response_hook(blkif, "Ublkback resp.", ublkback_response);
  37.203 -
  37.204 -
  37.205 -    printf(">X<Created a new blkif! pdev was %ld, but you got %s\n", 
  37.206 -           blkif->pdev, TMP_IMAGE_FILE_NAME);
  37.207 -
  37.208 -    blkif->ops = &ublkback_ops;
  37.209 -    blkif->prv = (void *)image;
  37.210 -
  37.211 -    return 0;
  37.212 -}
  37.213 -
  37.214 -
  37.215 -/* --- Moving the bits: ----------------------------------------------- */
  37.216 -
  37.217 -static int batch_count = 0;
  37.218 -int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done)
  37.219 -{
  37.220 -    int fd;
  37.221 -    uint64_t sector;
  37.222 -    char *spage, *dpage;
  37.223 -    int ret, i, idx;
  37.224 -    blkif_response_t *rsp;
  37.225 -    domid_t dom = ID_TO_DOM(req->id);
  37.226 -    static struct iocb *ioq[MAX_SEGMENTS_PER_REQ*MAX_REQUESTS]; 
  37.227 -    static int io_idx = 0;
  37.228 -    struct iocb *io;
  37.229 -    image_t *img;
  37.230 -
  37.231 -    img = (image_t *)blkif->prv;
  37.232 -    fd = img->fd;
  37.233 -
  37.234 -    switch (req->operation) 
  37.235 -    {
  37.236 -    case BLKIF_OP_WRITE:
  37.237 -    {
  37.238 -        unsigned long size;
  37.239 -
  37.240 -        batch_count++;
  37.241 -
  37.242 -        idx = ID_TO_IDX(req->id);
  37.243 -        ASSERT(pending_list[idx].count == 0);
  37.244 -        memcpy(&pending_list[idx].req, req, sizeof(*req));
  37.245 -        pending_list[idx].count = req->nr_segments;
  37.246 -        pending_list[idx].blkif = blkif;
  37.247 -        
  37.248 -        for (i = 0; i < req->nr_segments; i++) {
  37.249 -            
  37.250 -            sector = req->sector_number + (8*i);
  37.251 -            
  37.252 -            size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
  37.253 -            
  37.254 -            if (req->seg[i].first_sect != 0)
  37.255 -                DPRINTF("iWR: sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
  37.256 -                        "pos: %15lu\n",
  37.257 -                        req->sector_number, sector, 
  37.258 -                        req->seg[i].first_sect, req->seg[i].last_sect,
  37.259 -                        (long)(sector << SECTOR_SHIFT));
  37.260 -                        
  37.261 -            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  37.262 -            spage += req->seg[i].first_sect << SECTOR_SHIFT;
  37.263 -            
  37.264 -            /*convert size and sector to byte offsets */
  37.265 -            size   <<= SECTOR_SHIFT;
  37.266 -            sector <<= SECTOR_SHIFT;
  37.267 -            
  37.268 -            io = iocb_free[--iocb_free_count];
  37.269 -            io_prep_pwrite(io, fd, spage, size, sector);
  37.270 -            io->data = (void *)idx;
  37.271 -            //ioq[i] = io;
  37.272 -            ioq[io_idx++] = io;
  37.273 -        }
  37.274 -
  37.275 -        if (batch_done) {
  37.276 -            ret = io_submit(ctx, io_idx, ioq);
  37.277 -            batch_count = 0;
  37.278 -            if (ret < 0)
  37.279 -                printf("BADNESS: io_submit error! (%d)\n", errno);
  37.280 -            io_idx = 0;
  37.281 -        }
  37.282 -        
  37.283 -        return BLKTAP_STOLEN;
  37.284 -        
  37.285 -    }
  37.286 -    case BLKIF_OP_READ:
  37.287 -    {
  37.288 -        unsigned long size;
  37.289 -        
  37.290 -        batch_count++;
  37.291 -        idx = ID_TO_IDX(req->id);
  37.292 -        ASSERT(pending_list[idx].count == 0);
  37.293 -        memcpy(&pending_list[idx].req, req, sizeof(*req));
  37.294 -        pending_list[idx].count = req->nr_segments;
  37.295 -        pending_list[idx].blkif = blkif;
  37.296 -        
  37.297 -        for (i = 0; i < req->nr_segments; i++) {
  37.298 -            
  37.299 -            sector  = req->sector_number + (8*i);
  37.300 -            
  37.301 -            size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
  37.302 -
  37.303 -            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  37.304 -            dpage += req->seg[i].first_sect << SECTOR_SHIFT;
  37.305 -            
  37.306 -            if (req->seg[i].first_sect != 0)
  37.307 -                DPRINTF("iRD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
  37.308 -                        "pos: %15lu dpage: %p\n", 
  37.309 -                        req->sector_number, sector, 
  37.310 -                        req->seg[i].first_sect, req->seg[i].last_sect,
  37.311 -                        (long)(sector << SECTOR_SHIFT), dpage);
  37.312 -            
  37.313 -            /*convert size and sector to byte offsets */
  37.314 -            size   <<= SECTOR_SHIFT;
  37.315 -            sector <<= SECTOR_SHIFT;
  37.316 -            
  37.317 -
  37.318 -            /*
  37.319 -             * NB: Looks like AIO now has non-page aligned support, this path 
  37.320 -             * can probably be removed...  Only really used for hunting
  37.321 -             * superblocks anyway... ;)
  37.322 -             */
  37.323 -            if ( ((unsigned long)dpage % PAGE_SIZE) != 0 ) {
  37.324 -                /* AIO to raw devices must be page aligned, so do this read
  37.325 -                 * synchronously.  The OS is probably just looking for 
  37.326 -                 * a superblock or something, so this won't hurt performance. 
  37.327 -                 */
  37.328 -                int ret;
  37.329 -
  37.330 -                printf("Slow path block read.\n");
  37.331 -                /* Question: do in-progress aio ops modify the file cursor? */
  37.332 -                ret = lseek(fd, sector, SEEK_SET);
  37.333 -                if (ret == (off_t)-1)
  37.334 -                    printf("lseek failed!\n");
  37.335 -                ret = read(fd, dpage, size);
  37.336 -                if (ret < 0)
  37.337 -                    printf("read problem (%d)\n", ret);
  37.338 -                printf("|\n|\n| read: %lld, %lu, %d\n|\n|\n", sector, size, ret);
  37.339 -
  37.340 -                /* not an async request any more... */
  37.341 -                pending_list[idx].count--;
  37.342 -
  37.343 -                rsp = (blkif_response_t *)req;
  37.344 -                rsp->id = req->id;
  37.345 -                rsp->operation = BLKIF_OP_READ;
  37.346 -                rsp->status = BLKIF_RSP_OKAY;
  37.347 -                return BLKTAP_RESPOND;  
  37.348 -                /* Doh -- need to flush aio if this is end-of-batch */
  37.349 -            }
  37.350 -
  37.351 -            io = iocb_free[--iocb_free_count];
  37.352 -            
  37.353 -            io_prep_pread(io, fd, dpage, size, sector);
  37.354 -            io->data = (void *)idx;
  37.355 -            
  37.356 -            ioq[io_idx++] = io;
  37.357 -            //ioq[i] = io;
  37.358 -        }
  37.359 -        
  37.360 -        if (batch_done) {
  37.361 -            ret = io_submit(ctx, io_idx, ioq);
  37.362 -            batch_count = 0;
  37.363 -            if (ret < 0)
  37.364 -                printf("BADNESS: io_submit error! (%d)\n", errno);
  37.365 -            io_idx = 0;
  37.366 -        }
  37.367 -        
  37.368 -        return BLKTAP_STOLEN;
  37.369 -        
  37.370 -    }
  37.371 -    }
  37.372 -    
  37.373 -    printf("Unknown block operation!\n");
  37.374 -err:
  37.375 -    rsp = (blkif_response_t *)req;
  37.376 -    rsp->id = req->id;
  37.377 -    rsp->operation = req->operation;
  37.378 -    rsp->status = BLKIF_RSP_ERROR;
  37.379 -    return BLKTAP_RESPOND;  
  37.380 -}
  37.381 -
  37.382 -
  37.383 -int ublkback_pollhook(int fd)
  37.384 -{
  37.385 -    struct io_event *ep;
  37.386 -    int n, ret, idx;
  37.387 -    blkif_request_t *req;
  37.388 -    blkif_response_t *rsp;
  37.389 -    int responses_queued = 0;
  37.390 -    int pages=0;
  37.391 -    
  37.392 -    for (ep = aio_events; aio_event_count-- > 0; ep++) {
  37.393 -        struct iocb *io = ep->obj;
  37.394 -        idx = (int) ep->data;
  37.395 -        
  37.396 -        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
  37.397 -            printf("invalid index returned(%u)!\n", idx);
  37.398 -            break;
  37.399 -        }
  37.400 -        
  37.401 -        if ((int)ep->res < 0) 
  37.402 -            printf("***\n***aio request error! (%d,%d)\n***\n", 
  37.403 -                   (int)ep->res, (int)ep->res2);
  37.404 -        
  37.405 -        pending_list[idx].count--;
  37.406 -        iocb_free[iocb_free_count++] = io;
  37.407 -        pages++;
  37.408 -
  37.409 -        if (pending_list[idx].count == 0) {
  37.410 -            blkif_request_t tmp = pending_list[idx].req;
  37.411 -            rsp = (blkif_response_t *)&pending_list[idx].req;
  37.412 -            rsp->id = tmp.id;
  37.413 -            rsp->operation = tmp.operation;
  37.414 -            rsp->status = BLKIF_RSP_OKAY;
  37.415 -            blkif_inject_response(pending_list[idx].blkif, rsp);
  37.416 -            responses_queued++;
  37.417 -        }
  37.418 -    }
  37.419 -
  37.420 -    if (responses_queued) {
  37.421 -        blktap_kick_responses();
  37.422 -    }
  37.423 -    
  37.424 -    read(aio_notify[READ], &idx, sizeof(idx));
  37.425 -    aio_listening = 1;
  37.426 -    pthread_mutex_unlock(&notifier_sem);
  37.427 -    
  37.428 -    return 0;
  37.429 -}
  37.430 -
  37.431 -/* the image library terminates the request stream. _resp is a noop. */
  37.432 -int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done)
  37.433 -{   
  37.434 -    return BLKTAP_PASS;
  37.435 -}
  37.436 -
  37.437 -void ublkback_init(void)
  37.438 -{
  37.439 -    int i, rc;
  37.440 -    pthread_t p;
  37.441 -    
  37.442 -    for (i = 0; i < MAX_REQUESTS; i++)
  37.443 -        pending_list[i].count = 0; 
  37.444 -    
  37.445 -    memset(&ctx, 0, sizeof(ctx));
  37.446 -    rc = io_queue_init(MAX_AIO_REQS, &ctx);
  37.447 -    if (rc != 0) {
  37.448 -        printf("queue_init failed! (%d)\n", rc);
  37.449 -        exit(0);
  37.450 -    }
  37.451 -    
  37.452 -    for (i=0; i<MAX_AIO_REQS; i++) {
  37.453 -        if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
  37.454 -            printf("error allocating iocb array\n");
  37.455 -            exit(0);
  37.456 -        }
  37.457 -        iocb_free_count = i;
  37.458 -    }
  37.459 -    
  37.460 -    rc = pipe(aio_notify);
  37.461 -    if (rc != 0) {
  37.462 -        printf("pipe failed! (%d)\n", errno);
  37.463 -        exit(0);
  37.464 -    }
  37.465 -    
  37.466 -    rc = pthread_create(&p, NULL, notifier_thread, NULL);
  37.467 -    if (rc != 0) {
  37.468 -        printf("pthread_create failed! (%d)\n", errno);
  37.469 -        exit(0);
  37.470 -    }
  37.471 -    
  37.472 -    aio_listening = 1;
  37.473 -    
  37.474 -    blktap_attach_poll(aio_notify[READ], POLLIN, ublkback_pollhook);
  37.475 -}
  37.476 -
    38.1 --- a/tools/blktap/ublkback/ublkbacklib.h	Fri Jun 16 18:19:40 2006 +0100
    38.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.3 @@ -1,16 +0,0 @@
    38.4 -/* blkaiolib.h
    38.5 - *
    38.6 - * aio image-backed block device.
    38.7 - * 
    38.8 - * (c) 2004 Andrew Warfield.
    38.9 - *
   38.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
   38.11 - * This will show up as device type (maj:240,min:0) = 61440.
   38.12 - *
   38.13 - * The fsid is placed in the sec_start field of the disk extent.
   38.14 - */
   38.15 -
   38.16 -int  ublkback_request(blkif_request_t *req, int batch_done);
   38.17 -int  ublkback_response(blkif_response_t *rsp); /* noop */
   38.18 -int  ublkback_new_blkif(blkif_t *blkif);
   38.19 -void ublkback_init(void);
    39.1 --- a/tools/blktap/xenbus.c	Fri Jun 16 18:19:40 2006 +0100
    39.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.3 @@ -1,568 +0,0 @@
    39.4 -/*
    39.5 - * xenbus.c
    39.6 - * 
    39.7 - * xenbus interface to the blocktap.
    39.8 - * 
    39.9 - * this handles the top-half of integration with block devices through the
   39.10 - * store -- the tap driver negotiates the device channel etc, while the
   39.11 - * userland tap clinet needs to sort out the disk parameters etc.
   39.12 - * 
   39.13 - * A. Warfield 2005 Based primarily on the blkback and xenbus driver code.  
   39.14 - * Comments there apply here...
   39.15 - */
   39.16 -
   39.17 -#include <stdio.h>
   39.18 -#include <stdlib.h>
   39.19 -#include <string.h>
   39.20 -#include <err.h>
   39.21 -#include <stdarg.h>
   39.22 -#include <errno.h>
   39.23 -#include <xs.h>
   39.24 -#include <sys/types.h>
   39.25 -#include <sys/stat.h>
   39.26 -#include <fcntl.h>
   39.27 -#include <poll.h>
   39.28 -#include "blktaplib.h"
   39.29 -#include "list.h"
   39.30 -
   39.31 -#if 0
   39.32 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   39.33 -#else
   39.34 -#define DPRINTF(_f, _a...) ((void)0)
   39.35 -#endif
   39.36 -
   39.37 -/* --- Xenstore / Xenbus helpers ---------------------------------------- */
   39.38 -/*
   39.39 - * These should all be pulled out into the xenstore API.  I'm faulting commands
   39.40 - * in from the xenbus interface as i need them.
   39.41 - */
   39.42 -
   39.43 -
   39.44 -/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
   39.45 -int xs_gather(struct xs_handle *xs, const char *dir, ...)
   39.46 -{
   39.47 -    va_list ap;
   39.48 -    const char *name;
   39.49 -    char *path;
   39.50 -    int ret = 0;
   39.51 -    
   39.52 -    va_start(ap, dir);
   39.53 -    while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
   39.54 -        const char *fmt = va_arg(ap, char *);
   39.55 -        void *result = va_arg(ap, void *);
   39.56 -        char *p;
   39.57 -        
   39.58 -        if (asprintf(&path, "%s/%s", dir, name) == -1)
   39.59 -        {
   39.60 -            warn("allocation error in xs_gather!\n");
   39.61 -            ret = ENOMEM;
   39.62 -            break;
   39.63 -        }
   39.64 -        p = xs_read(xs, path, NULL);
   39.65 -        free(path);
   39.66 -        if (p == NULL) {
   39.67 -            ret = ENOENT;
   39.68 -            break;
   39.69 -        }
   39.70 -        if (fmt) {
   39.71 -            if (sscanf(p, fmt, result) == 0)
   39.72 -                ret = EINVAL;
   39.73 -            free(p);
   39.74 -        } else
   39.75 -            *(char **)result = p;
   39.76 -    }
   39.77 -    va_end(ap);
   39.78 -    return ret;
   39.79 -}
   39.80 -
   39.81 -/* Single printf and write: returns -errno or 0. */
   39.82 -int xs_printf(struct xs_handle *h, const char *dir, const char *node, 
   39.83 -                  const char *fmt, ...)
   39.84 -{
   39.85 -        char *buf, *path;
   39.86 -        va_list ap;
   39.87 -        int ret;
   39.88 - 
   39.89 -        va_start(ap, fmt);
   39.90 -        ret = vasprintf(&buf, fmt, ap);
   39.91 -        va_end(ap);
   39.92 - 
   39.93 -        asprintf(&path, "%s/%s", dir, node);
   39.94 -
   39.95 -        if ((path == NULL) || (buf == NULL))
   39.96 -            return 0;
   39.97 -
   39.98 -        ret = xs_write(h, path, buf, strlen(buf)+1);
   39.99 -
  39.100 -        free(buf);
  39.101 -        free(path);
  39.102 -
  39.103 -        return ret;
  39.104 -}
  39.105 -
  39.106 -
  39.107 -int xs_exists(struct xs_handle *h, const char *path)
  39.108 -{
  39.109 -    char **d;
  39.110 -    int num;
  39.111 -
  39.112 -    d = xs_directory(h, path, &num);
  39.113 -    if (d == NULL)
  39.114 -        return 0;
  39.115 -    free(d);
  39.116 -    return 1;
  39.117 -}
  39.118 -
  39.119 -
  39.120 -
  39.121 -/* This assumes that the domain name we are looking for is unique! */
  39.122 -char *get_dom_domid(struct xs_handle *h, const char *name)
  39.123 -{
  39.124 -    char **e, *val, *domid = NULL;
  39.125 -    int num, i, len;
  39.126 -    char *path;
  39.127 -
  39.128 -    e = xs_directory(h, "/local/domain", &num);
  39.129 -
  39.130 -    i=0;
  39.131 -    while (i < num) {
  39.132 -        asprintf(&path, "/local/domain/%s/name", e[i]);
  39.133 -        val = xs_read(h, path, &len);
  39.134 -        free(path);
  39.135 -        if (val == NULL)
  39.136 -            continue;
  39.137 -        if (strcmp(val, name) == 0) {
  39.138 -            /* match! */
  39.139 -            asprintf(&path, "/local/domain/%s/domid", e[i]);
  39.140 -            domid = xs_read(h, path, &len);
  39.141 -            free(val);
  39.142 -            free(path);
  39.143 -            break;
  39.144 -        }
  39.145 -        free(val);
  39.146 -        i++;
  39.147 -    }
  39.148 -
  39.149 -    free(e);
  39.150 -    return domid;
  39.151 -}
  39.152 -
  39.153 -static int strsep_len(const char *str, char c, unsigned int len)
  39.154 -{
  39.155 -    unsigned int i;
  39.156 -    
  39.157 -    for (i = 0; str[i]; i++)
  39.158 -        if (str[i] == c) {
  39.159 -            if (len == 0)
  39.160 -                return i;
  39.161 -            len--;
  39.162 -        }
  39.163 -    return (len == 0) ? i : -ERANGE;
  39.164 -}
  39.165 -
  39.166 -
  39.167 -/* xenbus watches: */     
  39.168 -/* Register callback to watch this node. */
  39.169 -struct xenbus_watch
  39.170 -{
  39.171 -        struct list_head list;
  39.172 -        char *node;
  39.173 -        void (*callback)(struct xs_handle *h, 
  39.174 -                         struct xenbus_watch *, 
  39.175 -                         const  char *node);
  39.176 -};
  39.177 -
  39.178 -static LIST_HEAD(watches);
  39.179 -
  39.180 -/* A little paranoia: we don't just trust token. */
  39.181 -static struct xenbus_watch *find_watch(const char *token)
  39.182 -{
  39.183 -    struct xenbus_watch *i, *cmp;
  39.184 -    
  39.185 -    cmp = (void *)strtoul(token, NULL, 16);
  39.186 -    
  39.187 -    list_for_each_entry(i, &watches, list)
  39.188 -        if (i == cmp)
  39.189 -            return i;
  39.190 -    return NULL;
  39.191 -}
  39.192 -
  39.193 -/* Register callback to watch this node. like xs_watch, return 0 on failure */
  39.194 -int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
  39.195 -{
  39.196 -    /* Pointer in ascii is the token. */
  39.197 -    char token[sizeof(watch) * 2 + 1];
  39.198 -    int er;
  39.199 -    
  39.200 -    sprintf(token, "%lX", (long)watch);
  39.201 -    if (find_watch(token)) 
  39.202 -    {
  39.203 -        warn("watch collision!");
  39.204 -        return -EINVAL;
  39.205 -    }
  39.206 -    
  39.207 -    er = xs_watch(h, watch->node, token);
  39.208 -    if (er != 0) {
  39.209 -        list_add(&watch->list, &watches);
  39.210 -    } 
  39.211 -        
  39.212 -    return er;
  39.213 -}
  39.214 -
  39.215 -int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
  39.216 -{
  39.217 -    char token[sizeof(watch) * 2 + 1];
  39.218 -    int er;
  39.219 -    
  39.220 -    sprintf(token, "%lX", (long)watch);
  39.221 -    if (!find_watch(token))
  39.222 -    {
  39.223 -        warn("no such watch!");
  39.224 -        return -EINVAL;
  39.225 -    }
  39.226 -    
  39.227 -    
  39.228 -    er = xs_unwatch(h, watch->node, token);
  39.229 -    list_del(&watch->list);
  39.230 -    
  39.231 -    if (er == 0)
  39.232 -        warn("XENBUS Failed to release watch %s: %i",
  39.233 -             watch->node, er);
  39.234 -    return 0;
  39.235 -}
  39.236 -
  39.237 -/* Re-register callbacks to all watches. */
  39.238 -void reregister_xenbus_watches(struct xs_handle *h)
  39.239 -{
  39.240 -    struct xenbus_watch *watch;
  39.241 -    char token[sizeof(watch) * 2 + 1];
  39.242 -    
  39.243 -    list_for_each_entry(watch, &watches, list) {
  39.244 -        sprintf(token, "%lX", (long)watch);
  39.245 -        xs_watch(h, watch->node, token);
  39.246 -    }
  39.247 -}
  39.248 -
  39.249 -/* based on watch_thread() */
  39.250 -int xs_fire_next_watch(struct xs_handle *h)
  39.251 -{
  39.252 -    char **res;
  39.253 -    char *token;
  39.254 -    char *node = NULL;
  39.255 -    struct xenbus_watch *w;
  39.256 -    int er;
  39.257 -    unsigned int num;
  39.258 -
  39.259 -    res = xs_read_watch(h, &num);
  39.260 -    if (res == NULL) 
  39.261 -        return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */
  39.262 -
  39.263 -    node  = res[XS_WATCH_PATH];
  39.264 -    token = res[XS_WATCH_TOKEN];
  39.265 -
  39.266 -    w = find_watch(token);
  39.267 -    if (!w)
  39.268 -    {
  39.269 -        warn("unregistered watch fired");
  39.270 -        goto done;
  39.271 -    }
  39.272 -    w->callback(h, w, node);
  39.273 -
  39.274 - done:
  39.275 -    free(res);
  39.276 -    return 1;
  39.277 -}
  39.278 -
  39.279 -
  39.280 -
  39.281 -
  39.282 -/* ---------------------------------------------------------------------- */
  39.283 -
  39.284 -struct backend_info
  39.285 -{
  39.286 -    /* our communications channel */
  39.287 -    blkif_t *blkif;
  39.288 -    
  39.289 -    long int frontend_id;
  39.290 -    long int pdev;
  39.291 -    long int readonly;
  39.292 -    
  39.293 -    /* watch back end for changes */
  39.294 -    struct xenbus_watch backend_watch;
  39.295 -    char *backpath;
  39.296 -
  39.297 -    /* watch front end for changes */
  39.298 -    struct xenbus_watch watch;
  39.299 -    char *frontpath;
  39.300 -
  39.301 -    struct list_head list;
  39.302 -};
  39.303 -
  39.304 -static LIST_HEAD(belist);
  39.305 -
  39.306 -static struct backend_info *be_lookup_be(const char *bepath)
  39.307 -{
  39.308 -    struct backend_info *be;
  39.309 -
  39.310 -    list_for_each_entry(be, &belist, list)
  39.311 -        if (strcmp(bepath, be->backpath) == 0)
  39.312 -            return be;
  39.313 -    return (struct backend_info *)NULL;
  39.314 -}
  39.315 -
  39.316 -static int be_exists_be(const char *bepath)
  39.317 -{
  39.318 -    return ( be_lookup_be(bepath) != NULL );
  39.319 -}
  39.320 -
  39.321 -static struct backend_info *be_lookup_fe(const char *fepath)
  39.322 -{
  39.323 -    struct backend_info *be;
  39.324 -
  39.325 -    list_for_each_entry(be, &belist, list)
  39.326 -        if (strcmp(fepath, be->frontpath) == 0)
  39.327 -            return be;
  39.328 -    return (struct backend_info *)NULL;
  39.329 -}
  39.330 -
  39.331 -static int backend_remove(struct xs_handle *h, struct backend_info *be)
  39.332 -{
  39.333 -    /* Turn off watches. */
  39.334 -    if (be->watch.node)
  39.335 -        unregister_xenbus_watch(h, &be->watch);
  39.336 -    if (be->backend_watch.node)
  39.337 -        unregister_xenbus_watch(h, &be->backend_watch);
  39.338 -
  39.339 -    /* Unhook from be list. */
  39.340 -    list_del(&be->list);
  39.341 -
  39.342 -    /* Free everything else. */
  39.343 -    if (be->blkif)
  39.344 -        free_blkif(be->blkif);
  39.345 -    free(be->frontpath);
  39.346 -    free(be->backpath);
  39.347 -    free(be);
  39.348 -    return 0;
  39.349 -}
  39.350 -
  39.351 -static void frontend_changed(struct xs_handle *h, struct xenbus_watch *w, 
  39.352 -                     const char *fepath_im)
  39.353 -{
  39.354 -    struct backend_info *be;
  39.355 -    char *fepath = NULL;
  39.356 -    int er;
  39.357 -
  39.358 -    be = be_lookup_fe(w->node);
  39.359 -    if (be == NULL)
  39.360 -    {
  39.361 -        warn("frontend changed called for nonexistent backend! (%s)", fepath);
  39.362 -        goto fail;
  39.363 -    }
  39.364 -    
  39.365 -    /* If other end is gone, delete ourself. */
  39.366 -    if (w->node && !xs_exists(h, be->frontpath)) {
  39.367 -        DPRINTF("DELETING BE: %s\n", be->backpath);
  39.368 -        backend_remove(h, be);
  39.369 -        return;
  39.370 -    }
  39.371 -
  39.372 -    if (be->blkif == NULL || (be->blkif->state == CONNECTED))
  39.373 -        return;
  39.374 -
  39.375 -    /* Supply the information about the device the frontend needs */
  39.376 -    er = xs_transaction_start(h, be->backpath);
  39.377 -    if (er == 0) {
  39.378 -        warn("starting transaction");
  39.379 -        goto fail;
  39.380 -    }
  39.381 -    
  39.382 -    er = xs_printf(h, be->backpath, "sectors", "%lu",
  39.383 -			    be->blkif->ops->get_size(be->blkif));
  39.384 -    if (er == 0) {
  39.385 -        warn("writing sectors");
  39.386 -        goto fail;
  39.387 -    }
  39.388 -    
  39.389 -    er = xs_printf(h, be->backpath, "info", "%u",
  39.390 -			    be->blkif->ops->get_info(be->blkif));
  39.391 -    if (er == 0) {
  39.392 -        warn("writing info");
  39.393 -        goto fail;
  39.394 -    }
  39.395 -    
  39.396 -    er = xs_printf(h, be->backpath, "sector-size", "%lu",
  39.397 -			    be->blkif->ops->get_secsize(be->blkif));
  39.398 -    if (er == 0) {
  39.399 -        warn("writing sector-size");
  39.400 -        goto fail;
  39.401 -    }
  39.402 -
  39.403 -    be->blkif->state = CONNECTED;
  39.404 -
  39.405 -    xs_transaction_end(h, 0);
  39.406 -
  39.407 -    return;
  39.408 -
  39.409 - fail:
  39.410 -    free(fepath);
  39.411 -}
  39.412 -
  39.413 -
  39.414 -static void backend_changed(struct xs_handle *h, struct xenbus_watch *w, 
  39.415 -                     const char *bepath_im)
  39.416 -{
  39.417 -    struct backend_info *be;
  39.418 -    char *path = NULL, *p;
  39.419 -    int len, er;
  39.420 -    long int pdev = 0, handle;
  39.421 -
  39.422 -    be = be_lookup_be(w->node);
  39.423 -    if (be == NULL)
  39.424 -    {
  39.425 -        warn("backend changed called for nonexistent backend! (%s)", w->node);
  39.426 -        goto fail;
  39.427 -    }
  39.428 -    
  39.429 -    er = xs_gather(h, be->backpath, "physical-device", "%li", &pdev, NULL);
  39.430 -    if (er != 0) 
  39.431 -        goto fail;
  39.432 -
  39.433 -    if (be->pdev && be->pdev != pdev) {
  39.434 -        warn("changing physical-device not supported");
  39.435 -        goto fail;
  39.436 -    }
  39.437 -    be->pdev = pdev;
  39.438 -
  39.439 -    asprintf(&path, "%s/%s", w->node, "read-only");
  39.440 -    if (xs_exists(h, path))
  39.441 -        be->readonly = 1;
  39.442 -
  39.443 -    if (be->blkif == NULL) {
  39.444 -        /* Front end dir is a number, which is used as the handle. */
  39.445 -        p = strrchr(be->frontpath, '/') + 1;
  39.446 -        handle = strtoul(p, NULL, 0);
  39.447 -
  39.448 -        be->blkif = alloc_blkif(be->frontend_id);
  39.449 -        if (be->blkif == NULL) 
  39.450 -            goto fail;
  39.451 -
  39.452 -        er = blkif_init(be->blkif, handle, be->pdev, be->readonly);
  39.453 -        if (er) 
  39.454 -            goto fail;
  39.455 -
  39.456 -        DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", w->node);
  39.457 -
  39.458 -        /* Pass in NULL node to skip exist test. */
  39.459 -        frontend_changed(h, &be->watch, NULL);
  39.460 -    }
  39.461 -
  39.462 - fail:
  39.463 -    free(path);
  39.464 -}
  39.465 -
  39.466 -static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w, 
  39.467 -                         const char *bepath_im)
  39.468 -{
  39.469 -	struct backend_info *be = NULL;
  39.470 -	char *frontend = NULL, *bepath = NULL;
  39.471 -	int er, len;
  39.472 -
  39.473 -        bepath = strdup(bepath_im);
  39.474 -        if (!bepath)
  39.475 -            return;
  39.476 -        len = strsep_len(bepath, '/', 6);
  39.477 -        if (len < 0) 
  39.478 -            goto free_be;
  39.479 -        
  39.480 -        bepath[len] = '\0'; /*truncate the passed-in string with predjudice. */
  39.481 -
  39.482 -	be = malloc(sizeof(*be));
  39.483 -	if (!be) {
  39.484 -		warn("allocating backend structure");
  39.485 -		goto free_be;
  39.486 -	}
  39.487 -	memset(be, 0, sizeof(*be));
  39.488 -
  39.489 -	frontend = NULL;
  39.490 -	er = xs_gather(h, bepath,
  39.491 -                        "frontend-id", "%li", &be->frontend_id,
  39.492 -                        "frontend", NULL, &frontend,
  39.493 -                        NULL);
  39.494 -	if (er)
  39.495 -		goto free_be;
  39.496 -
  39.497 -	if (strlen(frontend) == 0 || !xs_exists(h, frontend)) {
  39.498 -            /* If we can't get a frontend path and a frontend-id,
  39.499 -             * then our bus-id is no longer valid and we need to
  39.500 -             * destroy the backend device.
  39.501 -             */
  39.502 -            DPRINTF("No frontend (%s)\n", frontend);
  39.503 -            goto free_be;
  39.504 -	}
  39.505 -
  39.506 -        /* Are we already tracking this device? */
  39.507 -        if (be_exists_be(bepath))
  39.508 -            goto free_be;
  39.509 -
  39.510 -        be->backpath = bepath;
  39.511 -	be->backend_watch.node = be->backpath;
  39.512 -	be->backend_watch.callback = backend_changed;
  39.513 -	er = register_xenbus_watch(h, &be->backend_watch);
  39.514 -	if (er == 0) {
  39.515 -		be->backend_watch.node = NULL;
  39.516 -		warn("error adding backend watch on %s", bepath);
  39.517 -		goto free_be;
  39.518 -	}
  39.519 -
  39.520 -	be->frontpath = frontend;
  39.521 -	be->watch.node = be->frontpath;
  39.522 -	be->watch.callback = frontend_changed;
  39.523 -	er = register_xenbus_watch(h, &be->watch);
  39.524 -	if (er == 0) {
  39.525 -		be->watch.node = NULL;
  39.526 -		warn("adding frontend watch on %s", be->frontpath);
  39.527 -		goto free_be;
  39.528 -	}
  39.529 -
  39.530 -        list_add(&be->list, &belist);
  39.531 -
  39.532 -        DPRINTF("[PROBE]: ADDED NEW DEVICE (%s)\n", bepath_im);
  39.533 -
  39.534 -	backend_changed(h, &be->backend_watch, bepath);
  39.535 -	return;
  39.536 -
  39.537 - free_be:
  39.538 -	if (be && (be->backend_watch.node))
  39.539 -            unregister_xenbus_watch(h, &be->backend_watch);
  39.540 -        free(frontend);
  39.541 -        free(bepath);
  39.542 -	free(be);
  39.543 -	return;
  39.544 -}
  39.545 -
  39.546 -
  39.547 -int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
  39.548 -{
  39.549 -    char *domid, *path;
  39.550 -    struct xenbus_watch *vbd_watch;
  39.551 -    int er;
  39.552 -
  39.553 -    domid = get_dom_domid(h, domname);
  39.554 -
  39.555 -    DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]");
  39.556 -
  39.557 -    asprintf(&path, "/local/domain/%s/backend/vbd", domid);
  39.558 -    if (path == NULL) 
  39.559 -        return -ENOMEM;
  39.560 -
  39.561 -    vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch));
  39.562 -    vbd_watch->node     = path;
  39.563 -    vbd_watch->callback = blkback_probe;
  39.564 -    er = register_xenbus_watch(h, vbd_watch);
  39.565 -    if (er == 0) {
  39.566 -        warn("Error adding vbd probe watch %s", path);
  39.567 -        return -EINVAL;
  39.568 -    }
  39.569 -
  39.570 -    return 0;
  39.571 -}