ia64/xen-unstable

changeset 3977:eb791201f4e2

bitkeeper revision 1.1236.7.1 (42277b06gMBo3oqaq35om--uwNImNg)

Initial checkin of parallax code.
See README-PARALLAX for details.

Signed-off-by: andrew.warfield@cl.cam.ac.uk
author akw27@arcadians.cl.cam.ac.uk
date Thu Mar 03 21:00:54 2005 +0000 (2005-03-03)
parents 4202d86eff9f
children 33aa920b497d
files .rootkeys tools/blktap/Makefile tools/blktap/README-PARALLAX tools/blktap/blktaplib.h tools/blktap/blockstore.c tools/blktap/blockstore.h tools/blktap/parallax.c tools/blktap/radix.c tools/blktap/radix.h tools/blktap/snaplog.c tools/blktap/snaplog.h tools/blktap/vdi.c tools/blktap/vdi.h tools/blktap/vdi_create.c tools/blktap/vdi_fill.c tools/blktap/vdi_list.c tools/blktap/vdi_snap.c tools/blktap/vdi_snap_list.c tools/blktap/vdi_tree.c tools/blktap/vdi_validate.c tools/python/xen/xend/server/blkif.py
line diff
     1.1 --- a/.rootkeys	Mon Feb 28 20:58:11 2005 +0000
     1.2 +++ b/.rootkeys	Thu Mar 03 21:00:54 2005 +0000
     1.3 @@ -317,6 +317,7 @@ 3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Mak
     1.4  40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Rules.mk
     1.5  4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile
     1.6  4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README
     1.7 +42277b02mYXxgijE7MFeUe9d8eldMw tools/blktap/README-PARALLAX
     1.8  4209033eX_Xw94wHaOCtnU9nOAtSJA tools/blktap/blkaio.c
     1.9  4209033egwf6LDxM2hbaqi9rRdZy4A tools/blktap/blkaiolib.c
    1.10  4209033f9yELLK85Ipo2oKjr3ickgQ tools/blktap/blkaiolib.h
    1.11 @@ -335,10 +336,26 @@ 42090340c7pQbh0Km8zLcEqPd_3zIg tools/blk
    1.12  42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h
    1.13  42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c
    1.14  42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
    1.15 +42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c
    1.16 +42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h
    1.17  42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile
    1.18  42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c
    1.19  42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
    1.20  42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h
    1.21 +42277b03930x2TJT3PZlw6o0GERXpw tools/blktap/parallax.c
    1.22 +42277b03XQYq8bujXSz7JAZ8N7j_pA tools/blktap/radix.c
    1.23 +42277b03vZ4-jno_mgKmAcCW3ycRAg tools/blktap/radix.h
    1.24 +42277b03U_wLHL-alMA0bfxGlqldXg tools/blktap/snaplog.c
    1.25 +42277b04Ryya-z662BEx8HnxNN0dGQ tools/blktap/snaplog.h
    1.26 +42277b04LxFjptgZ75Z98DUAso4Prg tools/blktap/vdi.c
    1.27 +42277b04tt5QkIvs8She8CQqH5kwpg tools/blktap/vdi.h
    1.28 +42277b04zMAhB0_946sHQ_H2vwnt0Q tools/blktap/vdi_create.c
    1.29 +42277b04xB_iUmiSm6nKcy8OV8bckA tools/blktap/vdi_fill.c
    1.30 +42277b045CJGD_rKH-ZT_-0X4knhWA tools/blktap/vdi_list.c
    1.31 +42277b043ZKx0NJSbcgptQctQ5rerg tools/blktap/vdi_snap.c
    1.32 +42277b043Fjy5-H7LyBtUPyDlZFo6A tools/blktap/vdi_snap_list.c
    1.33 +42277b04vhqD6Lq3WmGbaESoAAKdhw tools/blktap/vdi_tree.c
    1.34 +42277b047H8fTVyUf75BWAjh6Zpsqg tools/blktap/vdi_validate.c
    1.35  4124b307nRyK3dhn1hAsvrY76NuV3g tools/check/Makefile
    1.36  4124b307vHLUWbfpemVefmaWDcdfag tools/check/README
    1.37  4124b307jt7T3CHysgl9LijNHSe1tA tools/check/check_brctl
     2.1 --- a/tools/blktap/Makefile	Mon Feb 28 20:58:11 2005 +0000
     2.2 +++ b/tools/blktap/Makefile	Thu Mar 03 21:00:54 2005 +0000
     2.3 @@ -7,11 +7,34 @@ CC       = gcc
     2.4  XEN_ROOT = ../..
     2.5  include $(XEN_ROOT)/tools/Rules.mk
     2.6  
     2.7 +BLKTAP_INSTALL_DIR	= /usr/sbin
     2.8 +
     2.9 +INSTALL         = install
    2.10 +INSTALL_PROG    = $(INSTALL) -m0755
    2.11 +INSTALL_DIR     = $(INSTALL) -d -m0755
    2.12 +
    2.13  INCLUDES += 
    2.14  
    2.15  SRCS     :=
    2.16  SRCS     += blktaplib.c
    2.17  
    2.18 +PLX_SRCS := 
    2.19 +PLX_SRCS += vdi.c 
    2.20 +PLX_SRCS += radix.c 
    2.21 +PLX_SRCS += blockstore.c 
    2.22 +PLX_SRCS += snaplog.c
    2.23 +VDI_SRCS := $(PLX_SRCS)
    2.24 +PLX_SRCS += parallax.c
    2.25 +
    2.26 +VDI_TOOLS :=
    2.27 +VDI_TOOLS += vdi_create
    2.28 +VDI_TOOLS += vdi_list
    2.29 +VDI_TOOLS += vdi_snap
    2.30 +VDI_TOOLS += vdi_snap_list
    2.31 +VDI_TOOLS += vdi_fill
    2.32 +VDI_TOOLS += vdi_tree
    2.33 +VDI_TOOLS += vdi_validate
    2.34 +
    2.35  CFLAGS   += -Wall
    2.36  CFLAGS   += -Werror
    2.37  CFLAGS   += -Wno-unused
    2.38 @@ -30,7 +53,7 @@ OBJS     = $(patsubst %.c,%.o,$(SRCS))
    2.39  
    2.40  LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
    2.41  
    2.42 -all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio
    2.43 +all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(VDI_TOOLS) parallax 
    2.44  	$(MAKE) $(LIB)
    2.45  
    2.46  LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
    2.47 @@ -45,16 +68,14 @@ mk-symlinks:
    2.48  	  ln -sf ../../$(LINUX_ROOT)/include/asm-xen/linux-public/*.h . )
    2.49  
    2.50  install: all
    2.51 -	mkdir -p $(prefix)/usr/lib
    2.52 -	mkdir -p $(prefix)/usr/include
    2.53 -	install -m0755 $(LIB) $(prefix)/usr/lib
    2.54 -	ln -sf libblktap.so.$(MAJOR).$(MINOR) \
    2.55 -                $(prefix)/usr/lib/libblktap.so.$(MAJOR)
    2.56 -	ln -sf libblktap.so.$(MAJOR) $(prefix)/usr/lib/libblktap.so
    2.57 -	install -m0644 blktaplib.h $(prefix)/usr/include
    2.58 +	$(INSTALL_DIR) -p $(DESTDIR)/usr/lib
    2.59 +	$(INSTALL_DIR) -p $(DESTDIR)/usr/include
    2.60 +	$(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/lib
    2.61 +	$(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
    2.62 +	$(INSTALL_PROG) blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd $(DESTDIR)/$(BLKTAP_INSTALL_DIR)
    2.63  
    2.64  clean:
    2.65 -	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio
    2.66 +	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio $(VDI_TOOLS) parallax
    2.67  
    2.68  rpm: all
    2.69  	rm -rf staging
    2.70 @@ -93,6 +114,38 @@ blkcowgnbd: $(LIB) blkgnbd.c blkcowlib.c
    2.71  blkaio: $(LIB) blkaio.c blkaiolib.c
    2.72  	$(CC) $(CFLAGS) -o blkaio -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkaio.c blkaiolib.c -laio -lpthread
    2.73  
    2.74 +parallax: $(LIB) $(PLX_SRCS)
    2.75 +	$(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap $(PLX_SRCS) libgnbd/libgnbd.a
    2.76 +
    2.77 +vdi_test: $(LIB) $(VDI_SRCS)
    2.78 +	$(CC) $(CFLAGS) -g3 -o vdi_test -DVDI_STANDALONE $(VDI_SRCS)
    2.79 +
    2.80 +vdi_list: $(LIB) vdi_list.c $(VDI_SRCS)
    2.81 +	$(CC) $(CFLAGS) -g3 -o vdi_list vdi_list.c $(VDI_SRCS)
    2.82 +
    2.83 +vdi_create: $(LIB) vdi_create.c $(VDI_SRCS)
    2.84 +	$(CC) $(CFLAGS) -g3 -o vdi_create vdi_create.c $(VDI_SRCS)
    2.85 +
    2.86 +vdi_snap: $(LIB) vdi_snap.c $(VDI_SRCS)
    2.87 +	$(CC) $(CFLAGS) -g3 -o vdi_snap vdi_snap.c $(VDI_SRCS)
    2.88 +
    2.89 +vdi_snap_list: $(LIB) vdi_snap_list.c $(VDI_SRCS)
    2.90 +	$(CC) $(CFLAGS) -g3 -o vdi_snap_list vdi_snap_list.c $(VDI_SRCS)
    2.91 +
    2.92 +vdi_tree: $(LIB) vdi_tree.c $(VDI_SRCS)
    2.93 +	$(CC) $(CFLAGS) -g3 -o vdi_tree vdi_tree.c $(VDI_SRCS)
    2.94 +
    2.95 +vdi_fill: $(LIB) vdi_fill.c $(VDI_SRCS)
    2.96 +	$(CC) $(CFLAGS) -g3 -o vdi_fill vdi_fill.c $(VDI_SRCS)
    2.97 +
    2.98 +vdi_validate: $(LIB) vdi_validate.c $(VDI_SRCS)
    2.99 +	$(CC) $(CFLAGS) -g3 -o vdi_validate vdi_validate.c $(VDI_SRCS)
   2.100 +
   2.101 +
   2.102 +rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS)
   2.103 +	$(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
   2.104 +
   2.105 +
   2.106  .PHONY: TAGS clean install mk-symlinks rpm
   2.107  TAGS:
   2.108  	etags -t $(SRCS) *.h
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/tools/blktap/README-PARALLAX	Thu Mar 03 21:00:54 2005 +0000
     3.3 @@ -0,0 +1,177 @@
     3.4 +Parallax Quick Overview
     3.5 +March 3, 2005
     3.6 +
     3.7 +This is intended to provide a quick set of instructions to let you
     3.8 +guys play with the current parallax source.  In it's current form, the
     3.9 +code will let you run an arbitrary number of VMs off of a single disk
    3.10 +image, doing copy-on-write as they make updates.  Each domain is
    3.11 +assigned a virtual disk image (VDI), which may be based on a snapshot
    3.12 +of an existing image.  All of the VDI and snapshot management should
    3.13 +currently work.
    3.14 +
    3.15 +The current implementation uses a single file as a blockstore for
    3.16 +_everything_ this will soon be replaced by the fancier backend code
    3.17 +and the local cache.  As it stands, Parallax will create
    3.18 +"blockstore.dat" in the directory that you run it from, and use
    3.19 +largefile support to make this grow to unfathomable girth.  So, you
    3.20 +probably want to run the daemon off of a local disk, with a lot of
    3.21 +free space.
    3.22 +
    3.23 +Here's how to get going:
    3.24 +
    3.25 +0. Setup:
    3.26 +---------
    3.27 +
    3.28 +Pick a local directory on a disk with lots of room.  You should be
    3.29 +running from a privileged domain (e.g. dom0) with the blocktap
    3.30 +configured in and block backend NOT.
    3.31 +
    3.32 +For convenience (for the moment) copy all of the vdi tools (vdi_*) and
    3.33 +the parallax daemon from tools/blktap into this directory.
    3.34 +
    3.35 +1. Populate the blockstore:
    3.36 +---------------------------
    3.37 +
    3.38 +First you need to put at least one image into the blockstore.  You
    3.39 +will need a disk image, either as a file or local partition.  My
    3.40 +general approach has been to
    3.41 +
    3.42 +(a) make a really big sparse file with 
    3.43 +
    3.44 +        dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
    3.45 +
    3.46 +(b) put a filesystem into it
    3.47 +
    3.48 +        mkfs.ext3 ./image
    3.49 +
    3.50 +(c) mount it using loopback
    3.51 +
    3.52 +        mkdir ./mnt
    3.53 +        mount -o loop ./image
    3.54 +
    3.55 +(d) cd into it and untar one of the image files from srg-roots.
    3.56 +
    3.57 +        cd mnt
    3.58 +        tar ...
    3.59 +
    3.60 +NOTE: Beware if your system is FC3.  mkfs is not compatible with old
    3.61 +versions of fedora, and so you don't have much choice but to install
    3.62 +further fc3 images if you have used the fc3 version of mkfs.
    3.63 +
    3.64 +(e) unmount the image
    3.65 +
    3.66 +        cd ..
    3.67 +        umount mnt
    3.68 +
    3.69 +(f) now, create a new VDI to hold the image 
    3.70 +
    3.71 +        ./vdi_create "My new FC3 VDI"
    3.72 +
    3.73 +(g) get the id of the new VDI.
    3.74 +
    3.75 +        ./vdi_list
    3.76 +
    3.77 +        |      0                     My new FC3 VDI
    3.78 +
    3.79 +(0 is the VDI id... create a few more if you want.)
    3.80 +
    3.81 +(h) hoover your image into the new VDI.
    3.82 +
    3.83 +        ./vdi_fill 0 ./image
    3.84 +
    3.85 +This will pull the entire image into the blockstore and set up a
    3.86 +mapping tree for it for VDI 0.  Passing a device (i.e. /dev/sda3)
    3.87 +should also work, but vdi_fill has NO notion of sparseness yet, so you
    3.88 +are going to pump a block into the store for each block you read.
    3.89 +
    3.90 +vdi_fill will count up until it is done, and you should be ready to
    3.91 +go.  If you want to be anal, you can use vdi_validate to test the VDI
    3.92 +against the original image.
    3.93 +
    3.94 +2. Create some extra VDIs
    3.95 +-------------------------
    3.96 +
    3.97 +VDIs are actually a list of snapshots, and each snapshot is a full
    3.98 +image of mappings.  So, to preserve an immutable copy of a current
    3.99 +VDI, do this:
   3.100 +
   3.101 +(a) Snapshot your new VDI.
   3.102 +
   3.103 +        ./vdi_snap 0
   3.104 +
   3.105 +Snapshotting writes the current radix root to the VDI's snapshot log,
   3.106 +and assigns it a new writable root.
   3.107 +
   3.108 +(b) look at the VDI's snapshot log.
   3.109 +
   3.110 +        ./vdi_snap_list 0
   3.111 +
   3.112 +        | 16   0      Thu Mar  3 19:27:48 2005 565111           31
   3.113 +
   3.114 +The first two columns constitute a snapshot id and represent the
   3.115 +(block, offset) of the snapshot record.  The Date tells you when the
   3.116 +snapshot was made, and 31 is the radix root node of the snapshot.
   3.117 +
   3.118 +(c) Create a new VDI, based on that snapshot, and look at the list.
   3.119 +
   3.120 +        ./vdi_create "FC3 - Copy 1" 16 0
   3.121 +        ./vdi_list
   3.122 +
   3.123 +        |      0                     My new FC3 VDI
   3.124 +        |      1                       FC3 - Copy 1
   3.125 +
   3.126 +NOTE: If you have Graphviz installed on your system, you can use
   3.127 +vdi_tree to generate a postscript of your current set of VDIs and
   3.128 +snapshots.
   3.129 +
   3.130 +
   3.131 +Create as many VDIs as you need for the VMs that you want to run.
   3.132 +
   3.133 +3. Boot some VMs:
   3.134 +-----------------
   3.135 +
   3.136 +Parallax currently uses a hack in xend to pass the VDI id, you need to
   3.137 +modify the disk line of the VM config that is going to mount it.
   3.138 +
   3.139 +(a) set up your vm config, by using the following disk line:
   3.140 +
   3.141 +        disk = ['parallax:1,sda1,w,0' ]
   3.142 +
   3.143 +This example uses VDI 1 (from vdi_list above), presents it as sda1
   3.144 +(writable), and uses dom 0 as the backend.  If you were running the
   3.145 +daemon (and tap driver) in some domain other than 0, you would change
   3.146 +this last parameter.
   3.147 +
   3.148 +NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so that it knows what to do with "parallax:".
   3.149 +
   3.150 +(b) Run parallax in the backend domain.
   3.151 +
   3.152 +        ./parallax
   3.153 +
   3.154 +(c) create your new domain.
   3.155 +
   3.156 +        xm create ...
   3.157 +
   3.158 +---
   3.159 +
   3.160 +That's pretty much all there is to it at the moment.  Hope this is
   3.161 +clear enough to get you going.  Now, a few serious caveats that will
   3.162 +be sorted out in the almost immediate future:
   3.163 +
   3.164 +WARNINGS:
   3.165 +---------
   3.166 +
   3.167 +1. There is NO locking in the VDI tools at the moment, so I'd avoid
   3.168 +running them in parallel, or more importantly, running them while the
   3.169 +daemon is running.
   3.170 +
   3.171 +2. I doubt that xend will be very happy about restarting if you have
   3.172 +parallax-using domains.  So if it dies while there are active parallax
   3.173 +doms, you may need to reboot.
   3.174 +
   3.175 +3. I've turned off write-in-place.  So at the moment, EVERY block
   3.176 +write is a log append on the blockstore.  I've been having some probs
   3.177 +with the radix tree's marking of writable blocks after snapshots and
   3.178 +will sort this out very soon.
   3.179 +
   3.180 +
     4.1 --- a/tools/blktap/blktaplib.h	Mon Feb 28 20:58:11 2005 +0000
     4.2 +++ b/tools/blktap/blktaplib.h	Thu Mar 03 21:00:54 2005 +0000
     4.3 @@ -8,6 +8,9 @@
     4.4  #ifndef __BLKTAPLIB_H__
     4.5  #define __BLKTAPLIB_H__
     4.6  
     4.7 +#ifndef __SHORT_INT_TYPES__
     4.8 +#define __SHORT_INT_TYPES__
     4.9 +
    4.10  #include <stdint.h>
    4.11  
    4.12  typedef uint8_t            u8;
    4.13 @@ -18,6 +21,8 @@ typedef int8_t             s8;
    4.14  typedef int16_t            s16;
    4.15  typedef int32_t            s32;
    4.16  typedef int64_t            s64;
    4.17 +
    4.18 +#endif /*  __SHORT_INT_TYPES__ */
    4.19                                                                                  
    4.20  #if defined(__i386__)
    4.21  #define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" )
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/tools/blktap/blockstore.c	Thu Mar 03 21:00:54 2005 +0000
     5.3 @@ -0,0 +1,135 @@
     5.4 +/**************************************************************************
     5.5 + * 
     5.6 + * blockstore.c
     5.7 + *
     5.8 + * Simple block store interface
     5.9 + *
    5.10 + */
    5.11 + 
    5.12 +#include <fcntl.h>
    5.13 +#include <unistd.h>
    5.14 +#include <stdio.h>
    5.15 +#include <stdlib.h>
    5.16 +#include <string.h>
    5.17 +#include <sys/types.h>
    5.18 +#include <sys/stat.h>
    5.19 +#include "blockstore.h"
    5.20 +
    5.21 +static int block_fp = -1;
    5.22 + 
    5.23 +/**
    5.24 + * readblock: read a block from disk
    5.25 + *   @id: block id to read
    5.26 + *
    5.27 + *   @return: pointer to block, NULL on error
    5.28 + */
    5.29 +
    5.30 +void *readblock(u64 id) {
    5.31 +    void *block;
    5.32 +    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
    5.33 +        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
    5.34 +        perror("readblock lseek");
    5.35 +        return NULL;
    5.36 +    }
    5.37 +    if ((block = malloc(BLOCK_SIZE)) == NULL) {
    5.38 +        perror("readblock malloc");
    5.39 +        return NULL;
    5.40 +    }
    5.41 +    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
    5.42 +        perror("readblock read");
    5.43 +        free(block);
    5.44 +        return NULL;
    5.45 +    }
    5.46 +    return block;
    5.47 +}
    5.48 +
    5.49 +/**
    5.50 + * writeblock: write an existing block to disk
    5.51 + *   @id: block id
    5.52 + *   @block: pointer to block
    5.53 + *
    5.54 + *   @return: zero on success, -1 on failure
    5.55 + */
    5.56 +int writeblock(u64 id, void *block) {
    5.57 +    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
    5.58 +        perror("writeblock lseek");
    5.59 +        return -1;
    5.60 +    }
    5.61 +    if (write(block_fp, block, BLOCK_SIZE) < 0) {
    5.62 +        perror("writeblock write");
    5.63 +        return -1;
    5.64 +    }
    5.65 +    return 0;
    5.66 +}
    5.67 +
    5.68 +/**
    5.69 + * allocblock: write a new block to disk
    5.70 + *   @block: pointer to block
    5.71 + *
    5.72 + *   @return: new id of block on disk
    5.73 + */
    5.74 +static u64 lastblock = 0;
    5.75 +
    5.76 +u64 allocblock(void *block) {
    5.77 +    u64 lb;
    5.78 +    off64_t pos = lseek64(block_fp, 0, SEEK_END);
    5.79 +    if (pos == (off64_t)-1) {
    5.80 +        perror("allocblock lseek");
    5.81 +        return 0;
    5.82 +    }
    5.83 +    if (pos % BLOCK_SIZE != 0) {
    5.84 +        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
    5.85 +        return 0;
    5.86 +    }
    5.87 +    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
    5.88 +        perror("allocblock write");
    5.89 +        return 0;
    5.90 +    }
    5.91 +    lb = pos / BLOCK_SIZE + 1;
    5.92 +    
    5.93 +    if (lb <= lastblock)
    5.94 +        printf("[*** %Ld alredy allocated! ***]\n", lb);
    5.95 +    
    5.96 +    lastblock = lb;
    5.97 +    return lb;
    5.98 +}
    5.99 +
   5.100 +
   5.101 +/**
   5.102 + * newblock: get a new in-memory block set to zeros
   5.103 + *
   5.104 + *   @return: pointer to new block, NULL on error
   5.105 + */
   5.106 +void *newblock() {
   5.107 +    void *block = malloc(BLOCK_SIZE);
   5.108 +    if (block == NULL) {
   5.109 +        perror("newblock");
   5.110 +        return NULL;
   5.111 +    }
   5.112 +    memset(block, 0, BLOCK_SIZE);
   5.113 +    return block;
   5.114 +}
   5.115 +
   5.116 +
   5.117 +/**
   5.118 + * freeblock: unallocate an in-memory block
   5.119 + *   @id: block id (zero if this is only in-memory)
   5.120 + *   @block: block to be freed
   5.121 + */
   5.122 +void freeblock(void *block) {
   5.123 +    if (block != NULL)
   5.124 +        free(block);
   5.125 +}
   5.126 +
   5.127 +
   5.128 +int __init_blockstore(void)
   5.129 +{
   5.130 +    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
   5.131 +
   5.132 +    if (block_fp < 0) {
   5.133 +        perror("open");
   5.134 +        return -1;
   5.135 +    }
   5.136 +    
   5.137 +    return 0;
   5.138 +}
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/tools/blktap/blockstore.h	Thu Mar 03 21:00:54 2005 +0000
     6.3 @@ -0,0 +1,45 @@
     6.4 +/**************************************************************************
     6.5 + * 
     6.6 + * blockstore.h
     6.7 + *
     6.8 + * Simple block store interface
     6.9 + *
    6.10 + */
    6.11 + 
    6.12 +#ifndef __BLOCKSTORE_H__
    6.13 +#define __BLOCKSTORE_H__
    6.14 +
    6.15 +#ifndef __SHORT_INT_TYPES__
    6.16 +#define __SHORT_INT_TYPES__
    6.17 +
    6.18 +#include <stdint.h>
    6.19 +
    6.20 +typedef uint8_t            u8;
    6.21 +typedef uint16_t           u16;
    6.22 +typedef uint32_t           u32;
    6.23 +typedef uint64_t           u64;
    6.24 +typedef int8_t             s8;
    6.25 +typedef int16_t            s16;
    6.26 +typedef int32_t            s32;
    6.27 +typedef int64_t            s64;
    6.28 +                           
    6.29 +#endif /*  __SHORT_INT_TYPES__ */
    6.30 +
    6.31 +#define BLOCK_SIZE  4096
    6.32 +#define BLOCK_SHIFT   12
    6.33 +#define BLOCK_MASK  0xfffffffffffff000LL
    6.34 +
    6.35 +/* XXX SMH: where is the below supposed to be defined???? */
    6.36 +#ifndef SECTOR_SHIFT 
    6.37 +#define SECTOR_SHIFT   9 
    6.38 +#endif
    6.39 +
    6.40 +
    6.41 +extern void *newblock();
    6.42 +extern void *readblock(u64 id);
    6.43 +extern u64 allocblock(void *block);
    6.44 +extern int writeblock(u64 id, void *block);
    6.45 +extern void freeblock(void *block);
    6.46 +extern int __init_blockstore(void);
    6.47 +
    6.48 +#endif /* __BLOCKSTORE_H__ */
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/tools/blktap/parallax.c	Thu Mar 03 21:00:54 2005 +0000
     7.3 @@ -0,0 +1,498 @@
     7.4 +/**************************************************************************
     7.5 + * 
     7.6 + * parallax.c
     7.7 + *
     7.8 + * The Parallax Storage Server
     7.9 + *
    7.10 + */
    7.11 + 
    7.12 +
    7.13 +#include <stdio.h>
    7.14 +#include <stdlib.h>
    7.15 +#include <string.h>
    7.16 +#include "blktaplib.h"
    7.17 +#include "blockstore.h"
    7.18 +#include "vdi.h"
    7.19 +
    7.20 +#define PARALLAX_DEV     61440
    7.21 +
    7.22 +#if 1
    7.23 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
    7.24 +#else
    7.25 +#define DPRINTF(_f, _a...) ((void)0)
    7.26 +#endif
    7.27 +
    7.28 +/* ------[ session records ]----------------------------------------------- */
    7.29 +
    7.30 +#define BLKIF_HASHSZ 1024
    7.31 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
    7.32 +
    7.33 +#define VDI_HASHSZ 16
    7.34 +#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
    7.35 +
    7.36 +typedef struct blkif {
    7.37 +    domid_t       domid;
    7.38 +    unsigned int  handle;
    7.39 +    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
    7.40 +    vdi_t        *vdi_hash[VDI_HASHSZ];
    7.41 +    struct blkif *hash_next;
    7.42 +} blkif_t;
    7.43 +
    7.44 +static blkif_t      *blkif_hash[BLKIF_HASHSZ];
    7.45 +
    7.46 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
    7.47 +{
    7.48 +    if ( handle != 0 )
    7.49 +        printf("blktap/parallax don't currently support non-0 dev handles!\n");
    7.50 +    
    7.51 +    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
    7.52 +    while ( (blkif != NULL) && 
    7.53 +            ((blkif->domid != domid) || (blkif->handle != handle)) )
    7.54 +        blkif = blkif->hash_next;
    7.55 +    return blkif;
    7.56 +}
    7.57 +
    7.58 +vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
    7.59 +{
    7.60 +    vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
    7.61 +    
    7.62 +    while ((vdi != NULL) && (vdi->vdevice != device))
    7.63 +        vdi = vdi->next;
    7.64 +    
    7.65 +    return vdi;
    7.66 +}
    7.67 +
    7.68 +/* ------[ control message handling ]-------------------------------------- */
    7.69 +
    7.70 +void blkif_create(blkif_be_create_t *create)
    7.71 +{
    7.72 +    domid_t       domid  = create->domid;
    7.73 +    unsigned int  handle = create->blkif_handle;
    7.74 +    blkif_t     **pblkif, *blkif;
    7.75 +
    7.76 +    DPRINTF("parallax (blkif_create): create is %p\n", create); 
    7.77 +    
    7.78 +    if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
    7.79 +    {
    7.80 +        DPRINTF("Could not create blkif: out of memory\n");
    7.81 +        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
    7.82 +        return;
    7.83 +    }
    7.84 +
    7.85 +    memset(blkif, 0, sizeof(*blkif));
    7.86 +    blkif->domid  = domid;
    7.87 +    blkif->handle = handle;
    7.88 +    blkif->status = DISCONNECTED;
    7.89 +/*
    7.90 +    spin_lock_init(&blkif->vbd_lock);
    7.91 +    spin_lock_init(&blkif->blk_ring_lock);
    7.92 +    atomic_set(&blkif->refcnt, 0);
    7.93 +*/
    7.94 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
    7.95 +    while ( *pblkif != NULL )
    7.96 +    {
    7.97 +        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
    7.98 +        {
    7.99 +            DPRINTF("Could not create blkif: already exists\n");
   7.100 +            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
   7.101 +            free(blkif);
   7.102 +            return;
   7.103 +        }
   7.104 +        pblkif = &(*pblkif)->hash_next;
   7.105 +    }
   7.106 +
   7.107 +    blkif->hash_next = *pblkif;
   7.108 +    *pblkif = blkif;
   7.109 +
   7.110 +    DPRINTF("Successfully created blkif\n");
   7.111 +    create->status = BLKIF_BE_STATUS_OKAY;
   7.112 +}
   7.113 +
   7.114 +void blkif_destroy(blkif_be_destroy_t *destroy)
   7.115 +{
   7.116 +    domid_t       domid  = destroy->domid;
   7.117 +    unsigned int  handle = destroy->blkif_handle;
   7.118 +    blkif_t     **pblkif, *blkif;
   7.119 +
   7.120 +    DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); 
   7.121 +    
   7.122 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   7.123 +    while ( (blkif = *pblkif) != NULL )
   7.124 +    {
   7.125 +        if ( (blkif->domid == domid) && (blkif->handle == handle) )
   7.126 +        {
   7.127 +            if ( blkif->status != DISCONNECTED )
   7.128 +                goto still_connected;
   7.129 +            goto destroy;
   7.130 +        }
   7.131 +        pblkif = &blkif->hash_next;
   7.132 +    }
   7.133 +
   7.134 +    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   7.135 +    return;
   7.136 +
   7.137 + still_connected:
   7.138 +    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
   7.139 +    return;
   7.140 +
   7.141 + destroy:
   7.142 +    *pblkif = blkif->hash_next;
   7.143 +    /* destroy_all_vbds(blkif); */
   7.144 +    free(blkif);
   7.145 +    destroy->status = BLKIF_BE_STATUS_OKAY;
   7.146 +}
   7.147 +
   7.148 +void vbd_grow(blkif_be_vbd_grow_t *grow) 
   7.149 +{
   7.150 +    blkif_t            *blkif;
   7.151 +    vdi_t              *vdi, **vdip;
   7.152 +    blkif_vdev_t        vdevice = grow->vdevice;
   7.153 +
   7.154 +    DPRINTF("parallax (vbd_grow): grow=%p\n", grow); 
   7.155 +    
   7.156 +    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
   7.157 +    if ( blkif == NULL )
   7.158 +    {
   7.159 +        DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n", 
   7.160 +                grow->domid, grow->blkif_handle); 
   7.161 +        grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   7.162 +        return;
   7.163 +    }
   7.164 +
   7.165 +    /* VDI identifier is in grow->extent.sector_start */
   7.166 +    DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n", 
   7.167 +            grow->extent.sector_start);
   7.168 +
   7.169 +    vdi = vdi_get(grow->extent.sector_start);
   7.170 +    if (vdi == NULL)
   7.171 +    {
   7.172 +        printf("parallax (vbd_grow): VDI %llx not found.\n",
   7.173 +               grow->extent.sector_start);
   7.174 +        grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
   7.175 +        return;
   7.176 +    }
   7.177 +    
   7.178 +    vdi->next = NULL;
   7.179 +    vdi->vdevice = vdevice;
   7.180 +    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
   7.181 +    while (*vdip != NULL)
   7.182 +        vdip = &(*vdip)->next;
   7.183 +    *vdip = vdi;
   7.184 +    
   7.185 +    DPRINTF("vbd_grow: happy return!\n"); 
   7.186 +    grow->status = BLKIF_BE_STATUS_OKAY;
   7.187 +}
   7.188 +
   7.189 +int parallax_control(control_msg_t *msg)
   7.190 +{
   7.191 +    domid_t  domid;
   7.192 +    int      ret;
   7.193 +
   7.194 +    DPRINTF("parallax_control: msg is %p\n", msg); 
   7.195 +    
   7.196 +    if (msg->type != CMSG_BLKIF_BE) 
   7.197 +    {
   7.198 +        printf("Unexpected control message (%d)\n", msg->type);
   7.199 +        return 0;
   7.200 +    }
   7.201 +    
   7.202 +    switch(msg->subtype)
   7.203 +    {
   7.204 +    case CMSG_BLKIF_BE_CREATE:
   7.205 +        if ( msg->length != sizeof(blkif_be_create_t) )
   7.206 +            goto parse_error;
   7.207 +        blkif_create((blkif_be_create_t *)msg->msg);
   7.208 +        break;   
   7.209 +        
   7.210 +    case CMSG_BLKIF_BE_DESTROY:
   7.211 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
   7.212 +            goto parse_error;
   7.213 +        blkif_destroy((blkif_be_destroy_t *)msg->msg);
   7.214 +        break;  
   7.215 +        
   7.216 +    case CMSG_BLKIF_BE_VBD_GROW:
   7.217 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
   7.218 +            goto parse_error;
   7.219 +        vbd_grow((blkif_be_vbd_grow_t *)msg->msg);
   7.220 +        break;
   7.221 +    }
   7.222 +    return 0;
   7.223 +parse_error:
   7.224 +    printf("Bad control message!\n");
   7.225 +    return 0;
   7.226 +    
   7.227 +}    
   7.228 +
   7.229 +int parallax_probe(blkif_request_t *req, blkif_t *blkif)
   7.230 +{
   7.231 +    blkif_response_t *rsp;
   7.232 +    vdisk_t *img_info;
   7.233 +    vdi_t *vdi;
   7.234 +    int i, nr_vdis = 0; 
   7.235 +
   7.236 +    DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); 
   7.237 +
   7.238 +    /* We expect one buffer only. */
   7.239 +    if ( req->nr_segments != 1 )
   7.240 +      goto err;
   7.241 +
   7.242 +    /* Make sure the buffer is page-sized. */
   7.243 +    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
   7.244 +       (blkif_last_sect (req->frame_and_sects[0]) != 7) )
   7.245 +      goto err;
   7.246 +
   7.247 +    /* fill the list of devices */
   7.248 +    for (i=0; i<VDI_HASHSZ; i++) {
   7.249 +        vdi = blkif->vdi_hash[i];
   7.250 +        while (vdi) {
   7.251 +            img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
   7.252 +            img_info[nr_vdis].device   = vdi->vdevice;
   7.253 +            img_info[nr_vdis].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   7.254 +            /* The -2 here accounts for the LSB in the radix tree */
   7.255 +            img_info[nr_vdis].capacity = 
   7.256 +                    ((1LL << (VDI_HEIGHT-2)) >> SECTOR_SHIFT);
   7.257 +            nr_vdis++;
   7.258 +            vdi = vdi->next;
   7.259 +        }
   7.260 +    }
   7.261 +
   7.262 +    
   7.263 +    rsp = (blkif_response_t *)req;
   7.264 +    rsp->id = req->id;
   7.265 +    rsp->operation = BLKIF_OP_PROBE;
   7.266 +    rsp->status = nr_vdis; /* number of disks */
   7.267 +
   7.268 +    DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
   7.269 +    return  BLKTAP_RESPOND;
   7.270 +err:
   7.271 +    rsp = (blkif_response_t *)req;
   7.272 +    rsp->id = req->id;
   7.273 +    rsp->operation = BLKIF_OP_PROBE;
   7.274 +    rsp->status = BLKIF_RSP_ERROR;
   7.275 +    
   7.276 +    DPRINTF("parallax_probe: send error response\n"); 
   7.277 +    return BLKTAP_RESPOND;  
   7.278 +}
   7.279 +
   7.280 +int parallax_read(blkif_request_t *req, blkif_t *blkif)
   7.281 +{
   7.282 +    blkif_response_t *rsp;
   7.283 +    unsigned long size, offset, start;
   7.284 +    u64 sector;
   7.285 +    u64 vblock, gblock;
   7.286 +    vdi_t *vdi;
   7.287 +    int i;
   7.288 +    char *dpage, *spage;
   7.289 +
   7.290 +    vdi = blkif_get_vdi(blkif, req->device);
   7.291 +    
   7.292 +    if ( vdi == NULL )
   7.293 +        goto err;
   7.294 +    
   7.295 +    for (i = 0; i < req->nr_segments; i++) {
   7.296 +            
   7.297 +        dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   7.298 +        
   7.299 +        /* Round the requested segment to a block address. */
   7.300 +        
   7.301 +        sector  = req->sector_number + (8*i);
   7.302 +        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
   7.303 +        
   7.304 +        /* Get that block from the store. */
   7.305 +        
   7.306 +        gblock = vdi_lookup_block(vdi, vblock, NULL);
   7.307 +        
   7.308 +        /* Calculate read size and offset within the read block. */
   7.309 +        
   7.310 +        offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
   7.311 +        size = ( blkif_last_sect (req->frame_and_sects[i]) -
   7.312 +                 blkif_first_sect(req->frame_and_sects[i]) + 1
   7.313 +               ) << SECTOR_SHIFT;
   7.314 +        start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
   7.315 +        
   7.316 +        /* If the block does not exist in the store, return zeros. */
   7.317 +        /* Otherwise, copy that region to the guest page.          */
   7.318 +        
   7.319 +        DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
   7.320 +                "vblock %llx, gblock %llx, "
   7.321 +                "size %lx\n", 
   7.322 +                sector, blkif_first_sect(req->frame_and_sects[i]),
   7.323 +                blkif_last_sect (req->frame_and_sects[i]),
   7.324 +                vblock, gblock, size); 
   7.325 +       
   7.326 +        if ( gblock == 0 ) {
   7.327 +           
   7.328 +            memset(dpage + start, '\0', size);
   7.329 +            
   7.330 +        } else {
   7.331 +            
   7.332 +            spage = readblock(gblock);
   7.333 +            
   7.334 +            if (spage == NULL) {
   7.335 +                printf("Error reading gblock from store: %Ld\n", gblock);
   7.336 +                goto err;
   7.337 +            }
   7.338 +            
   7.339 +            memcpy(dpage + start, spage + offset, size);
   7.340 +            
   7.341 +            freeblock(spage);
   7.342 +        }
   7.343 +        
   7.344 +    }
   7.345 +
   7.346 +    rsp = (blkif_response_t *)req;
   7.347 +    rsp->id = req->id;
   7.348 +    rsp->operation = BLKIF_OP_WRITE;
   7.349 +    rsp->status = BLKIF_RSP_OKAY;
   7.350 +
   7.351 +    return BLKTAP_RESPOND;
   7.352 +err:
   7.353 +    rsp = (blkif_response_t *)req;
   7.354 +    rsp->id = req->id;
   7.355 +    rsp->operation = BLKIF_OP_WRITE;
   7.356 +    rsp->status = BLKIF_RSP_ERROR;
   7.357 +    
   7.358 +    return BLKTAP_RESPOND;  
   7.359 +}
   7.360 +
   7.361 +int parallax_write(blkif_request_t *req, blkif_t *blkif)
   7.362 +{
   7.363 +    blkif_response_t *rsp;
   7.364 +    u64 sector;
   7.365 +    int i, writable = 0;
   7.366 +    u64 vblock, gblock;
   7.367 +    char *spage;
   7.368 +    unsigned long size, offset, start;
   7.369 +    vdi_t *vdi;
   7.370 +
   7.371 +    vdi = blkif_get_vdi(blkif, req->device);
   7.372 +    
   7.373 +    if ( vdi == NULL )
   7.374 +        goto err;
   7.375 +    
   7.376 +    for (i = 0; i < req->nr_segments; i++) {
   7.377 +            
   7.378 +        spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   7.379 +        
   7.380 +        /* Round the requested segment to a block address. */
   7.381 +        
   7.382 +        sector  = req->sector_number + (8*i);
   7.383 +        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
   7.384 +        
   7.385 +        /* Get that block from the store. */
   7.386 +        
   7.387 +        gblock   = vdi_lookup_block(vdi, vblock, &writable);
   7.388 +        
   7.389 +        /* Calculate read size and offset within the read block. */
   7.390 +        
   7.391 +        offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
   7.392 +        size = ( blkif_last_sect (req->frame_and_sects[i]) -
   7.393 +                 blkif_first_sect(req->frame_and_sects[i]) + 1
   7.394 +               ) << SECTOR_SHIFT;
   7.395 +        start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
   7.396 +/*       
   7.397 +if (( gblock != 0 ) && ( writable == 0 )) printf("*");
   7.398 +*/
   7.399 +        DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld),  "
   7.400 +                "vblock %llx, gblock %llx, "
   7.401 +                "size %lx\n", 
   7.402 +                sector, blkif_first_sect(req->frame_and_sects[i]),
   7.403 +                blkif_last_sect (req->frame_and_sects[i]),
   7.404 +                vblock, gblock, size); 
   7.405 +        
   7.406 +        /* XXX: For now we just freak out if they try to write a   */
   7.407 +        /* non block-sized, block-aligned page.                    */
   7.408 +        
   7.409 +        if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) {
   7.410 +            printf("]\n] STRANGE WRITE!\n]\n");
   7.411 +            goto err;
   7.412 +        }
   7.413 +/* Disable write-in-place till radix is sorted out.
   7.414 +        if (( gblock == 0 ) || ( writable == 0 )) {
   7.415 +*/          
   7.416 +            gblock = allocblock(spage);
   7.417 +            vdi_update_block(vdi, vblock, gblock);
   7.418 +#if 0            
   7.419 +        } else {
   7.420 +            
   7.421 +            /* write-in-place, no need to change mappings. */
   7.422 +            writeblock(gblock, spage);
   7.423 +            
   7.424 +        }
   7.425 +#endif
   7.426 +    }
   7.427 +
   7.428 +    rsp = (blkif_response_t *)req;
   7.429 +    rsp->id = req->id;
   7.430 +    rsp->operation = BLKIF_OP_WRITE;
   7.431 +    rsp->status = BLKIF_RSP_OKAY;
   7.432 +
   7.433 +    return BLKTAP_RESPOND;
   7.434 +err:
   7.435 +    rsp = (blkif_response_t *)req;
   7.436 +    rsp->id = req->id;
   7.437 +    rsp->operation = BLKIF_OP_WRITE;
   7.438 +    rsp->status = BLKIF_RSP_ERROR;
   7.439 +    
   7.440 +    return BLKTAP_RESPOND;  
   7.441 +}
   7.442 +
   7.443 +int parallax_request(blkif_request_t *req)
   7.444 +{
   7.445 +    blkif_response_t *rsp;
   7.446 +    domid_t  dom   = ID_TO_DOM(req->id);
   7.447 +    blkif_t *blkif = blkif_find_by_handle(dom, 0);
   7.448 +
   7.449 +    //DPRINTF("parallax_request: req=%p, dom=%d, blkif=%p\n", req, dom, blkif); 
   7.450 +    
   7.451 +    if (blkif == NULL)
   7.452 +        goto err;
   7.453 +    
   7.454 +    if ( req->operation == BLKIF_OP_PROBE ) {
   7.455 +        
   7.456 +        return parallax_probe(req, blkif);
   7.457 +        
   7.458 +    } else if ( req->operation == BLKIF_OP_READ ) {
   7.459 +        
   7.460 +        return parallax_read(req, blkif);
   7.461 +        
   7.462 +    } else if ( req->operation == BLKIF_OP_WRITE ) {
   7.463 +        
   7.464 +        return parallax_write(req, blkif);
   7.465 +        
   7.466 +    } else {
   7.467 +        /* Unknown operation */
   7.468 +        goto err;
   7.469 +    }
   7.470 +    
   7.471 +err:
   7.472 +    rsp = (blkif_response_t *)req;
   7.473 +    rsp->id = req->id;
   7.474 +    rsp->operation = req->operation;
   7.475 +    rsp->status = BLKIF_RSP_ERROR;
   7.476 +    return BLKTAP_RESPOND;  
   7.477 +}
   7.478 +
   7.479 +void __init_parallax(void) 
   7.480 +{
   7.481 +    memset(blkif_hash, 0, sizeof(blkif_hash));
   7.482 +}
   7.483 +
   7.484 +
   7.485 +int main(int argc, char *argv[])
   7.486 +{
   7.487 +    DPRINTF("parallax: starting.\n"); 
   7.488 +    __init_blockstore();
   7.489 +    DPRINTF("parallax: initialized blockstore...\n"); 
   7.490 +    __init_vdi();
   7.491 +    DPRINTF("parallax: initialized vdi registry etc...\n"); 
   7.492 +    __init_parallax();
   7.493 +    DPRINTF("parallax: initialized local stuff..\n"); 
   7.494 +    
   7.495 +    blktap_register_ctrl_hook("parallax_control", parallax_control);
   7.496 +    blktap_register_request_hook("parallax_request", parallax_request);
   7.497 +    DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); 
   7.498 +    blktap_listen();
   7.499 +    
   7.500 +    return 0;
   7.501 +}
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/tools/blktap/radix.c	Thu Mar 03 21:00:54 2005 +0000
     8.3 @@ -0,0 +1,362 @@
     8.4 +/*
     8.5 + * Radix tree for mapping (up to) 63-bit virtual block IDs to
     8.6 + * 63-bit global block IDs
     8.7 + *
     8.8 + * Pointers within the tree set aside the least significant bit to indicate
     8.9 + * whther or not the target block is writable from this node.
    8.10 + *
    8.11 + * The block with ID 0 is assumed to be an empty block of all zeros
    8.12 + */
    8.13 +
    8.14 +#include <unistd.h>
    8.15 +#include <stdio.h>
    8.16 +#include <stdlib.h>
    8.17 +#include <assert.h>
    8.18 +#include <string.h>
    8.19 +#include "blockstore.h"
    8.20 +#include "radix.h"
    8.21 +
    8.22 +#define RADIX_TREE_MAP_SHIFT 9
    8.23 +#define RADIX_TREE_MAP_MASK 0x1ff
    8.24 +#define RADIX_TREE_MAP_ENTRIES 512
    8.25 +
    8.26 +/*
    8.27 +#define DEBUG
    8.28 +*/
    8.29 +
    8.30 +#define ZERO 0LL
    8.31 +#define ONE 1LL
    8.32 +#define ONEMASK 0xffffffffffffffeLL
    8.33 +
    8.34 +
    8.35 +typedef u64 *radix_tree_node;
    8.36 +
    8.37 +/*
    8.38 + * block device interface and other helper functions
    8.39 + * with these functions, block id is just a 63-bit number, with
    8.40 + * no special consideration for the LSB
    8.41 + */
    8.42 +radix_tree_node cloneblock(radix_tree_node block);
    8.43 +
    8.44 +/*
    8.45 + * main api
    8.46 + * with these functions, the LSB of root always indicates
    8.47 + * whether or not the block is writable, including the return
    8.48 + * values of update and snapshot
    8.49 + */
    8.50 +u64 lookup(int height, u64 root, u64 key);
    8.51 +u64 update(int height, u64 root, u64 key, u64 val);
    8.52 +u64 snapshot(u64 root);
    8.53 +
    8.54 +/**
    8.55 + * cloneblock: clone an existing block in memory
    8.56 + *   @block: the old block
    8.57 + *
    8.58 + *   @return: new block, with LSB cleared for every entry
    8.59 + */
    8.60 +radix_tree_node cloneblock(radix_tree_node block) {
    8.61 +    radix_tree_node node = (radix_tree_node) malloc(BLOCK_SIZE);
    8.62 +    int i;
    8.63 +    if (node == NULL) {
    8.64 +        perror("cloneblock malloc");
    8.65 +        return NULL;
    8.66 +    }
    8.67 +    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
    8.68 +        node[i] = block[i] & ONEMASK;
    8.69 +    return node;
    8.70 +}
    8.71 +
    8.72 +/**
    8.73 + * lookup: find a value given a key
    8.74 + *   @height: height in bits of the radix tree
    8.75 + *   @root: root node id, with set LSB indicating writable node
    8.76 + *   @key: key to lookup
    8.77 + *
    8.78 + *   @return: value on success, zero on error
    8.79 + */
    8.80 +u64 lookup(int height, u64 root, u64 key) {
    8.81 +    radix_tree_node node;
    8.82 +    
    8.83 +    assert(key >> height == 0);
    8.84 +
    8.85 +    /* the root block may be smaller to ensure all leaves are full */
    8.86 +    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
    8.87 +
    8.88 +    /* now carve off equal sized chunks at each step */
    8.89 +    for (;;) {
    8.90 +        u64 oldroot;
    8.91 +
    8.92 +#ifdef DEBUG
    8.93 +        printf("lookup: height=%3d root=%3Ld offset=%3d%s\n", height, root,
    8.94 +                (int) ((key >> height) & RADIX_TREE_MAP_MASK),
    8.95 +                (iswritable(root) ? "" : " (readonly)"));
    8.96 +#endif
    8.97 +        
    8.98 +        if (getid(root) == ZERO)
    8.99 +            return ZERO;
   8.100 +
   8.101 +        oldroot = root;
   8.102 +        node = (radix_tree_node) readblock(getid(root));
   8.103 +        if (node == NULL)
   8.104 +            return ZERO;
   8.105 +
   8.106 +        root = node[(key >> height) & RADIX_TREE_MAP_MASK];
   8.107 +        freeblock(node);
   8.108 +
   8.109 +        if (height == 0)
   8.110 +            return root;
   8.111 +
   8.112 +        height -= RADIX_TREE_MAP_SHIFT;
   8.113 +    }
   8.114 +
   8.115 +    return ZERO;
   8.116 +}
   8.117 +
   8.118 +/*
   8.119 + * update: set a radix tree entry, doing copy-on-write as necessary
   8.120 + *   @height: height in bits of the radix tree
   8.121 + *   @root: root node id, with set LSB indicating writable node
   8.122 + *   @key: key to set
   8.123 + *   @val: value to set, s.t. radix(key)=val
   8.124 + *
   8.125 + *   @returns: (possibly new) root id on success (with LSB=1), 0 on failure
   8.126 + */
   8.127 +u64 update(int height, u64 root, u64 key, u64 val) {
   8.128 +    int offset;
   8.129 +    u64 child;
   8.130 +    radix_tree_node node;
   8.131 +    
   8.132 +    /* base case--return val */
   8.133 +    if (height == 0)
   8.134 +        return val;
   8.135 +
   8.136 +    /* the root block may be smaller to ensure all leaves are full */
   8.137 +    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
   8.138 +    offset = (key >> height) & RADIX_TREE_MAP_MASK;
   8.139 +
   8.140 +#ifdef DEBUG
   8.141 +    printf("update: height=%3d root=%3Ld offset=%3d%s\n", height, root,
   8.142 +            offset, (iswritable(root)?"":" (clone)"));
   8.143 +#endif
   8.144 +
   8.145 +    /* load a block, or create a new one */
   8.146 +    if (root == ZERO) {
   8.147 +        node = (radix_tree_node) newblock();
   8.148 +    } else {
   8.149 +        node = (radix_tree_node) readblock(getid(root));
   8.150 +
   8.151 +        if (!iswritable(root)) {
   8.152 +            /* need to clone this node */
   8.153 +            radix_tree_node oldnode = node;
   8.154 +            node = cloneblock(node);
   8.155 +            freeblock(oldnode);
   8.156 +            root = ZERO;
   8.157 +        }
   8.158 +    }
   8.159 +
   8.160 +    if (node == NULL) {
   8.161 +#ifdef DEBUG
   8.162 +        printf("update: node is null!\n");
   8.163 +#endif
   8.164 +        return ZERO;
   8.165 +    }
   8.166 +
   8.167 +    child = update(height, node[offset], key, val);
   8.168 +
   8.169 +    if (child == ZERO) {
   8.170 +        freeblock(node);
   8.171 +        return ZERO;
   8.172 +    } else if (child == node[offset]) {
   8.173 +        /* no change, so we already owned the child */
   8.174 +        assert(iswritable(root));
   8.175 +
   8.176 +        freeblock(node);
   8.177 +        return root;
   8.178 +    }
   8.179 +
   8.180 +    node[offset] = child;
   8.181 +
   8.182 +    /* new/cloned blocks need to be saved */
   8.183 +    if (root == ZERO) {
   8.184 +        /* mark this as an owned block */
   8.185 +        root = allocblock(node);
   8.186 +        if (root)
   8.187 +            root = writable(root);
   8.188 +    } else if (writeblock(getid(root), node) < 0) {
   8.189 +        freeblock(node);
   8.190 +        return ZERO;
   8.191 +    }
   8.192 +
   8.193 +    freeblock(node);
   8.194 +    return root;
   8.195 +}
   8.196 +
   8.197 +/**
   8.198 + * snapshot: create a snapshot
   8.199 + *   @root: old root node
   8.200 + *
   8.201 + *   @return: new root node, 0 on error
   8.202 + */
   8.203 +u64 snapshot(u64 root) {
   8.204 +    radix_tree_node node, newnode;
   8.205 +
   8.206 +    if ((node = readblock(getid(root))) == NULL)
   8.207 +        return ZERO;
   8.208 +
   8.209 +    newnode = cloneblock(node);
   8.210 +    freeblock(node);
   8.211 +    if (newnode == NULL)
   8.212 +        return ZERO;
   8.213 +    
   8.214 +    root = allocblock(newnode);
   8.215 +    freeblock(newnode);
   8.216 +
   8.217 +    if (root == ZERO)
   8.218 +        return ZERO;
   8.219 +    else
   8.220 +        return writable(root);
   8.221 +}
   8.222 +
   8.223 +void print_root(u64 root, int height, u64 val, FILE *dot_f)
   8.224 +{
   8.225 +    FILE *f;
   8.226 +    int i;
   8.227 +    radix_tree_node node;
   8.228 +    char *style[2] = { "", "style=bold,color=blue," };
   8.229 +    
   8.230 +    if (dot_f == NULL) {
   8.231 +        f = fopen("radix.dot", "w");
   8.232 +        if (f == NULL) {
   8.233 +            perror("print_root: open");
   8.234 +            return;
   8.235 +        }
   8.236 +
   8.237 +        /* write graph preamble */
   8.238 +        fprintf(f, "digraph G {\n");
   8.239 +
   8.240 +        /* add a node for this root. */
   8.241 +        fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
   8.242 +                getid(root), style[iswritable(root)], getid(root));
   8.243 +    }
   8.244 +    
   8.245 +    /* base case--return val */
   8.246 +    if (height == 0) {
   8.247 +        /* add a node and edge for each child root */
   8.248 +        node = (radix_tree_node) readblock(getid(root));
   8.249 +        if (node == NULL)
   8.250 +            return;
   8.251 +        
   8.252 +        for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) {
   8.253 +            if (node[i] != 0) {
   8.254 +                fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
   8.255 +                        getid(node[i]), style[iswritable(node[i])], 
   8.256 +                        getid(node[i]));
   8.257 +                fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
   8.258 +                        getid(node[i]), i);
   8.259 +            }
   8.260 +        }
   8.261 +        return;
   8.262 +    }
   8.263 +
   8.264 +    /* the root block may be smaller to ensure all leaves are full */
   8.265 +    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
   8.266 +
   8.267 +    if (getid(root) == ZERO)
   8.268 +        return;
   8.269 +
   8.270 +    node = (radix_tree_node) readblock(getid(root));
   8.271 +    if (node == NULL)
   8.272 +        return;
   8.273 +
   8.274 +    /* add a node and edge for each child root */
   8.275 +    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
   8.276 +        if (node[i] != 0) {
   8.277 +            fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
   8.278 +                    getid(node[i]), style[iswritable(node[i])], 
   8.279 +                    getid(node[i]));
   8.280 +            print_root(node[i], height-RADIX_TREE_MAP_SHIFT, 
   8.281 +                    val + (((u64)i)<<height), f);
   8.282 +            fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
   8.283 +                    getid(node[i]), i);
   8.284 +        }
   8.285 +        
   8.286 +        /*
   8.287 +        
   8.288 +        root = node[(key >> height) & RADIX_TREE_MAP_MASK];
   8.289 +        freeblock(state, getid(oldroot), node);
   8.290 +
   8.291 +        if (height == 0)
   8.292 +            return root;
   8.293 +
   8.294 +        height -= RADIX_TREE_MAP_SHIFT;
   8.295 +        */
   8.296 +    //}
   8.297 +
   8.298 +    
   8.299 +    /* write graph postamble */
   8.300 +    if (dot_f == NULL) {
   8.301 +        fprintf(f, "}\n");
   8.302 +        fclose(f);
   8.303 +    }
   8.304 +}
   8.305 +
   8.306 +#ifdef RADIX_STANDALONE
   8.307 +
   8.308 +int main(int argc, char **argv) {
   8.309 +    u64 key = ZERO, val = ZERO;
   8.310 +    u64 root = writable(ONE);
   8.311 +    char buff[4096];
   8.312 +
   8.313 +    __init_blockstore();
   8.314 +    
   8.315 +    memset(buff, 0, 4096);
   8.316 +    /*fp = open("radix.dat", O_RDWR | O_CREAT, 0644);
   8.317 +
   8.318 +    if (fp < 3) {
   8.319 +        perror("open");
   8.320 +        return -1;
   8.321 +    }
   8.322 +    if (lseek(fp, 0, SEEK_END) == 0) {
   8.323 +        write(fp, buff, 4096);
   8.324 +    }*/
   8.325 +           
   8.326 +    printf("Recognized commands:\n"
   8.327 +           "Note: the LSB of a node number indicates if it is writable\n"
   8.328 +           "  root <node>               set root to <node>\n"
   8.329 +           "  snapshot                  take a snapshot of the root\n"
   8.330 +           "  set <key> <val>           set key=val\n"
   8.331 +           "  get <key>                 query key\n"
   8.332 +           "  quit\n"
   8.333 +           "\nroot = %Ld\n", root);
   8.334 +    for (;;) {
   8.335 +        print_root(root, 34, 0, NULL);
   8.336 +        system("dot radix.dot -Tps -o radix.ps");
   8.337 +
   8.338 +        printf("> ");
   8.339 +        fflush(stdout);
   8.340 +        fgets(buff, 1024, stdin);
   8.341 +        if (feof(stdin))
   8.342 +            break;
   8.343 +        if (sscanf(buff, " root %Ld", &root) == 1) {
   8.344 +            printf("root set to %Ld\n", root);
   8.345 +        } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) {
   8.346 +            root = update(34, root, key, val);
   8.347 +            printf("root = %Ld\n", root);
   8.348 +        } else if (sscanf(buff, " get %Ld", &key) == 1) {
   8.349 +            val = lookup(34, root, key, NULL);
   8.350 +            printf("value = %Ld\n", val);
   8.351 +        } else if (!strcmp(buff, "quit\n")) {
   8.352 +            break;
   8.353 +        } else if (!strcmp(buff, "snapshot\n")) {
   8.354 +            root = snapshot(root);
   8.355 +            printf("new root = %Ld\n", root);
   8.356 +        } else if (sscanf(buff, " pr %Ld", &root) == 1) {
   8.357 +            print_root(root, 34, 0, NULL);
   8.358 +        } else {
   8.359 +            printf("command not recognized\n");
   8.360 +        }
   8.361 +    }
   8.362 +    return 0;
   8.363 +}
   8.364 +
   8.365 +#endif
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/tools/blktap/radix.h	Thu Mar 03 21:00:54 2005 +0000
     9.3 @@ -0,0 +1,31 @@
     9.4 +/*
     9.5 + * Radix tree for mapping (up to) 63-bit virtual block IDs to
     9.6 + * 63-bit global block IDs
     9.7 + *
     9.8 + * Pointers within the tree set aside the least significant bit to indicate
     9.9 + * whther or not the target block is writable from this node.
    9.10 + *
    9.11 + * The block with ID 0 is assumed to be an empty block of all zeros
    9.12 + */
    9.13 +
    9.14 +#ifndef __RADIX_H__
    9.15 +#define __RADIX_H__
    9.16 +
    9.17 +/* I don't really like exposing these, but... */
    9.18 +#define getid(x) (((x)>>1)&0x7fffffffffffffffLL)
    9.19 +#define putid(x) ((x)<<1)
    9.20 +#define writable(x) (((x)<<1)|1LL)
    9.21 +#define iswritable(x) ((x)&1LL)
    9.22 +
    9.23 +/*
    9.24 + * main api
    9.25 + * with these functions, the LSB of root always indicates
    9.26 + * whether or not the block is writable, including the return
    9.27 + * values of update and snapshot
    9.28 + */
    9.29 +u64 lookup(int height, u64 root, u64 key);
    9.30 +u64 update(int height, u64 root, u64 key, u64 val);
    9.31 +u64 snapshot(u64 root);
    9.32 +int isprivate(int height, u64 root, u64 key);
    9.33 +
    9.34 +#endif /* __RADIX_H__ */
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/tools/blktap/snaplog.c	Thu Mar 03 21:00:54 2005 +0000
    10.3 @@ -0,0 +1,173 @@
    10.4 +/**************************************************************************
    10.5 + * 
    10.6 + * snaplog.c
    10.7 + *
    10.8 + * Snapshot log on-disk data structure.
    10.9 + *
   10.10 + */
   10.11 + 
   10.12 + /* VDI histories are made from chains of snapshot logs.  These logs record 
   10.13 +  * the (radix) root and timestamp of individual snapshots.
   10.14 +  *
   10.15 +  * creation of a new VDI involves 'forking' a snapshot log, by creating a 
   10.16 +  * new, empty log (in a new VDI) and parenting it off of a record in an 
   10.17 +  * existing snapshot log.
   10.18 +  *
   10.19 +  * snapshot log blocks have at most one writer.
   10.20 +  */
   10.21 +
   10.22 +#include <stdio.h>
   10.23 +#include <stdlib.h>
   10.24 +#include <sys/time.h>
   10.25 +#include "blockstore.h"
   10.26 +#include "snaplog.h"
   10.27 +
   10.28 +
   10.29 +
   10.30 +snap_block_t *snap_get_block(u64 block)
   10.31 +{
   10.32 +    snap_block_t *blk = (snap_block_t *)readblock(block);
   10.33 +    
   10.34 +    if ( blk == NULL)
   10.35 +        return NULL;
   10.36 +    if ( blk->hdr.magic != SNAP_MAGIC ) {
   10.37 +        freeblock(blk);
   10.38 +        return NULL;
   10.39 +    }
   10.40 +    
   10.41 +    return blk;
   10.42 +}
   10.43 +    
   10.44 +int snap_get_id(snap_id_t *id, snap_rec_t *target)
   10.45 +{
   10.46 +    snap_block_t *blk;
   10.47 +    
   10.48 +    if ( id == NULL )
   10.49 +        return -1;
   10.50 +    
   10.51 +    blk = snap_get_block(id->block);
   10.52 +    
   10.53 +    if ( blk == NULL ) 
   10.54 +        return -1;
   10.55 +    
   10.56 +    if ( id->index > blk->hdr.nr_entries ) {
   10.57 +        freeblock(blk);
   10.58 +        return -1;
   10.59 +    }
   10.60 +    
   10.61 +    *target = blk->snaps[id->index];
   10.62 +    freeblock(blk);
   10.63 +    return 0;
   10.64 +}
   10.65 +
   10.66 +int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id,
   10.67 +                                  snap_id_t *new_id)
   10.68 +{
   10.69 +    snap_rec_t parent_rec, fork_rec;
   10.70 +    snap_block_t *blk, *pblk;
   10.71 +    /*
   10.72 +    if ( (parent_id != NULL) && (snap_get_id(parent_id, &parent_rec) != 0) )
   10.73 +        return -1;    
   10.74 +    
   10.75 +    if ( (fork_id != NULL) && (snap_get_id(fork_id, &fork_rec) != 0) )
   10.76 +        return -1;   
   10.77 +*/
   10.78 +    blk = (snap_block_t *)newblock();
   10.79 +    blk->hdr.magic  = SNAP_MAGIC;
   10.80 +    blk->hdr.nr_entries  = 0;
   10.81 +    blk->hdr.log_entries = 0;
   10.82 +    blk->hdr.immutable   = 0;
   10.83 +    
   10.84 +    if (   (parent_id  != NULL) 
   10.85 +        && (parent_id->block != fork_id->block) 
   10.86 +        && (parent_id->block != 0)) {
   10.87 +        
   10.88 +        pblk = snap_get_block(parent_id->block);
   10.89 +        blk->hdr.log_entries = pblk->hdr.log_entries;
   10.90 +        freeblock(pblk);
   10.91 +    }
   10.92 +    
   10.93 +    if (parent_id != NULL) {
   10.94 +        blk->hdr.parent_block = *parent_id;
   10.95 +        blk->hdr.fork_block   = *fork_id;
   10.96 +    } else {
   10.97 +        blk->hdr.parent_block = null_snap_id;
   10.98 +        blk->hdr.fork_block   = null_snap_id;
   10.99 +    }
  10.100 +    
  10.101 +    new_id->index = 0;
  10.102 +    new_id->block = allocblock(blk);
  10.103 +    if (new_id->block == 0)
  10.104 +        return -1;
  10.105 +    
  10.106 +    return 0;
  10.107 +}
  10.108 +
  10.109 +int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id)
  10.110 +{
  10.111 +    return __snap_block_create(parent_id, parent_id, new_id);
  10.112 +}
  10.113 +
  10.114 +int snap_append(snap_id_t *old_id, snap_rec_t *rec, snap_id_t *new_id)
  10.115 +{
  10.116 +    snap_id_t id = *old_id;
  10.117 +    snap_block_t *blk = snap_get_block(id.block);
  10.118 +    
  10.119 +    if ( blk->hdr.immutable != 0 ) {
  10.120 +        printf("Attempt to snap an immutable snap block!\n");
  10.121 +        return -1;
  10.122 +    }
  10.123 +    
  10.124 +    new_id->block = id.block;
  10.125 +    
  10.126 +    if (blk->hdr.nr_entries == SNAPS_PER_BLOCK) {
  10.127 +        int ret;
  10.128 +        
  10.129 +        id.index--; /* make id point to the last full record */
  10.130 +        
  10.131 +        ret = __snap_block_create(&id, &blk->hdr.fork_block, new_id);
  10.132 +        if ( ret != 0 ) {
  10.133 +            freeblock(blk);
  10.134 +            return -1;
  10.135 +        }
  10.136 +        
  10.137 +        blk->hdr.immutable = 1;
  10.138 +        writeblock(id.block, blk);
  10.139 +        freeblock(blk);
  10.140 +        blk = snap_get_block(new_id->block);
  10.141 +        id = *new_id;
  10.142 +    }
  10.143 +    
  10.144 +    blk->snaps[blk->hdr.nr_entries] = *rec;
  10.145 +    blk->hdr.nr_entries++;
  10.146 +    blk->hdr.log_entries++;
  10.147 +    new_id->index = blk->hdr.nr_entries;
  10.148 +    //printf("snap: %u %u\n", blk->hdr.nr_entries, blk->hdr.log_entries);
  10.149 +    writeblock(id.block, blk);
  10.150 +    freeblock(blk);
  10.151 +    return 0;
  10.152 +}
  10.153 +
  10.154 +void snap_print_history(snap_id_t *snap_id)
  10.155 +{
  10.156 +    snap_id_t id = *snap_id;
  10.157 +    unsigned int idx = id.index;
  10.158 +    snap_block_t *new_blk, *blk = snap_get_block(id.block);
  10.159 +    
  10.160 +    while ( blk ) {
  10.161 +        printf("[Snap block %Ld]:\n", id.block);
  10.162 +        do {
  10.163 +            printf("   %03u: root: %Ld ts: %ld.%ld\n", idx, 
  10.164 +                    blk->snaps[idx].radix_root,
  10.165 +                    blk->snaps[idx].timestamp.tv_sec,
  10.166 +                    blk->snaps[idx].timestamp.tv_usec);
  10.167 +        } while (idx-- != 0);
  10.168 +        
  10.169 +        id = blk->hdr.parent_block;
  10.170 +        if (id.block != 0) {
  10.171 +            new_blk = snap_get_block(id.block);
  10.172 +        }
  10.173 +        freeblock(blk);
  10.174 +        blk = new_blk;
  10.175 +    }
  10.176 +}
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/tools/blktap/snaplog.h	Thu Mar 03 21:00:54 2005 +0000
    11.3 @@ -0,0 +1,52 @@
    11.4 +/**************************************************************************
    11.5 + * 
    11.6 + * snaplog.h
    11.7 + *
    11.8 + * Snapshot log on-disk data structure.
    11.9 + *
   11.10 + */
   11.11 +
   11.12 +#include "blockstore.h"    /* for BLOCK_SIZE */
   11.13 + 
   11.14 +typedef struct snap_id {
   11.15 +    u64            block;
   11.16 +    unsigned int   index;
   11.17 +} snap_id_t;
   11.18 +
   11.19 +typedef struct snap_rec {
   11.20 +    u64            radix_root;
   11.21 +    struct timeval timestamp;
   11.22 +} snap_rec_t;
   11.23 +
   11.24 +
   11.25 +int  snap_block_create(snap_id_t *parent_id, snap_id_t *new_id);
   11.26 +int  snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id);
   11.27 +void snap_print_history(snap_id_t *snap_id);
   11.28 +int  snap_get_id(snap_id_t *id, snap_rec_t *target);
   11.29 +
   11.30 +
   11.31 +/* exported for vdi debugging */
   11.32 +#define SNAP_MAGIC 0xff00ff0aa0ff00ffLL
   11.33 +
   11.34 +static const snap_id_t null_snap_id = { 0, 0 }; 
   11.35 +
   11.36 +typedef struct snap_block_hdr {
   11.37 +    u64            magic;
   11.38 +    snap_id_t      parent_block; /* parent block within this chain */
   11.39 +    snap_id_t      fork_block;   /* where this log was forked */
   11.40 +    unsigned       log_entries;  /* total entries since forking */
   11.41 +    unsigned short nr_entries;   /* entries in snaps[] */
   11.42 +    unsigned short immutable;    /* has this snap page become immutable? */
   11.43 +} snap_block_hdr_t;
   11.44 +
   11.45 +
   11.46 +#define SNAPS_PER_BLOCK \
   11.47 +    ((BLOCK_SIZE - sizeof(snap_block_hdr_t)) / sizeof(snap_rec_t))
   11.48 +
   11.49 +typedef struct snap_block {
   11.50 +    snap_block_hdr_t hdr;
   11.51 +    snap_rec_t       snaps[SNAPS_PER_BLOCK];
   11.52 +} snap_block_t;
   11.53 +    
   11.54 +
   11.55 +snap_block_t *snap_get_block(u64 block);
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/tools/blktap/vdi.c	Thu Mar 03 21:00:54 2005 +0000
    12.3 @@ -0,0 +1,353 @@
    12.4 +/**************************************************************************
    12.5 + * 
    12.6 + * vdi.c
    12.7 + *
    12.8 + * Virtual Disk Image (VDI) Interfaces
    12.9 + *
   12.10 + */
   12.11 + 
   12.12 +#include <stdio.h>
   12.13 +#include <stdlib.h>
   12.14 +#include <fcntl.h>
   12.15 +#include <string.h>
   12.16 +#include <sys/time.h>
   12.17 +#include "blockstore.h"
   12.18 +#include "radix.h"
   12.19 +#include "vdi.h"
   12.20 +                    
   12.21 +#define VDI_REG_BLOCK   1LL
   12.22 +#define VDI_RADIX_ROOT  writable(2)
   12.23 +                                                            
   12.24 +#if 1
   12.25 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   12.26 +#else
   12.27 +#define DPRINTF(_f, _a...) ((void)0)
   12.28 +#endif
   12.29 +
   12.30 +/* I haven't decided about this registry stuff, so this is just a really
   12.31 + * quick lash-up so that there is some way to track VDIs.
   12.32 + *
   12.33 + * (Most vdi access should be with a direct handle to the block, so this
   12.34 + *  registry is just for start-of-day lookup and other control operations.)
   12.35 + */
   12.36 +
   12.37 +vdi_registry_t *create_vdi_registry(void)
   12.38 +{
   12.39 +    vdi_registry_t *reg = (vdi_registry_t *)newblock();
   12.40 +    
   12.41 +    if (reg == NULL)
   12.42 +        return NULL;
   12.43 +    
   12.44 +    /* zero-fill the vdi radix root while we have an empty block. */
   12.45 +    writeblock(VDI_RADIX_ROOT, (void *)reg);
   12.46 +    
   12.47 +    
   12.48 +    DPRINTF("[vdi.c] Creating VDI registry!\n");
   12.49 +    reg->magic      = VDI_REG_MAGIC;
   12.50 +    reg->nr_vdis    = 0;
   12.51 +    
   12.52 +    writeblock(VDI_REG_BLOCK, (void *)reg);
   12.53 +    
   12.54 +    return reg;
   12.55 +}
   12.56 +    
   12.57 +vdi_registry_t *get_vdi_registry(void)
   12.58 +{
   12.59 +    vdi_registry_t *vdi_reg = (vdi_registry_t *)readblock(VDI_REG_BLOCK);
   12.60 +    
   12.61 +    if ( vdi_reg == NULL )
   12.62 +        vdi_reg = create_vdi_registry();
   12.63 +    
   12.64 +    if ( vdi_reg->magic != VDI_REG_MAGIC ) {
   12.65 +        freeblock(vdi_reg);
   12.66 +        return NULL;
   12.67 +    }
   12.68 +    
   12.69 +    return vdi_reg;
   12.70 +}
   12.71 +
   12.72 +vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
   12.73 +{
   12.74 +    int ret;
   12.75 +    vdi_t *vdi;
   12.76 +    vdi_registry_t *vdi_reg;
   12.77 +    snap_rec_t snap_rec;
   12.78 +    
   12.79 +    /* create a vdi struct */
   12.80 +    vdi = newblock();
   12.81 +    if (vdi == NULL) 
   12.82 +        return NULL;
   12.83 +    
   12.84 +    if ( snap_get_id(parent_snap, &snap_rec) == 0 ) {
   12.85 +        vdi->radix_root = snapshot(snap_rec.radix_root);
   12.86 +    } else {
   12.87 +        vdi->radix_root = allocblock((void *)vdi); /* vdi is just zeros here */
   12.88 +        vdi->radix_root = writable(vdi->radix_root); /* grr. */
   12.89 +    }
   12.90 +    
   12.91 +    /* create a snapshot log, and add it to the vdi struct */
   12.92 +    
   12.93 +    ret = snap_block_create(parent_snap, &vdi->snap);
   12.94 +    if ( ret != 0 ) {
   12.95 +        DPRINTF("Error getting snap block in vdi_create.\n");
   12.96 +        freeblock(vdi);
   12.97 +        return NULL;
   12.98 +    }
   12.99 +            
  12.100 +    /* append the vdi to the registry, fill block and id.             */
  12.101 +    /* implicit allocation means we have to write the vdi twice here. */
  12.102 +    vdi_reg    = get_vdi_registry();
  12.103 +    if ( vdi_reg == NULL ) {
  12.104 +        freeblock(vdi);
  12.105 +        return NULL;
  12.106 +    }
  12.107 +    
  12.108 +    vdi->block = allocblock((void *)vdi);
  12.109 +    vdi->id    = vdi_reg->nr_vdis++;
  12.110 +    strncpy(vdi->name, name, VDI_NAME_SZ);
  12.111 +    vdi->name[VDI_NAME_SZ] = '\0';
  12.112 +    writeblock(vdi->block, (void *)vdi);
  12.113 +    
  12.114 +    update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
  12.115 +    writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
  12.116 +    freeblock(vdi_reg);
  12.117 +    
  12.118 +    return vdi;
  12.119 +}
  12.120 +
  12.121 +vdi_t *vdi_get(u64 vdi_id)
  12.122 +{
  12.123 +    u64 vdi_blk;
  12.124 +    vdi_t *vdi;
  12.125 +    
  12.126 +    vdi_blk = lookup(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi_id);
  12.127 +    
  12.128 +    if ( vdi_blk == 0 )
  12.129 +        return NULL;
  12.130 +    
  12.131 +    vdi = (vdi_t *)readblock(vdi_blk);
  12.132 +    return vdi;
  12.133 +}
  12.134 +
  12.135 +u64 vdi_lookup_block(vdi_t *vdi, u64 vdi_block, int *writable)
  12.136 +{
  12.137 +    u64 gblock;
  12.138 +    
  12.139 +    gblock = lookup(VDI_HEIGHT, vdi->radix_root, vdi_block);
  12.140 +    
  12.141 +    if (writable != NULL) *writable = iswritable(gblock);
  12.142 +printf("lu: root: %11Ld, gblock: %11Ld, id: %11Ld, wr: %Ld\n",
  12.143 +        vdi->radix_root, gblock, getid(gblock), iswritable(gblock));
  12.144 +
  12.145 +    return getid(gblock);
  12.146 +}
  12.147 +
  12.148 +void vdi_update_block(vdi_t *vdi, u64 vdi_block, u64 g_block)
  12.149 +{
  12.150 +    u64 id;
  12.151 +    
  12.152 +    /* updates are always writable. */
  12.153 +    id = writable(g_block);
  12.154 +    
  12.155 +    vdi->radix_root = update(VDI_HEIGHT, vdi->radix_root, vdi_block, id);
  12.156 +    writeblock(vdi->block, vdi);
  12.157 +}
  12.158 +
  12.159 +void vdi_snapshot(vdi_t *vdi)
  12.160 +{
  12.161 +    snap_rec_t rec;
  12.162 +    int ret;
  12.163 +    
  12.164 +    rec.radix_root = vdi->radix_root;
  12.165 +    gettimeofday(&rec.timestamp, NULL);
  12.166 +    
  12.167 +    vdi->radix_root = snapshot(vdi->radix_root);
  12.168 +    ret = snap_append(&vdi->snap, &rec, &vdi->snap);
  12.169 +    if ( ret != 0 ) {
  12.170 +        printf("snap_append returned failure\n");
  12.171 +        return;
  12.172 +    }
  12.173 +    writeblock(vdi->block, vdi);
  12.174 +}
  12.175 +    
  12.176 +int __init_vdi()
  12.177 +{
  12.178 +    /* force the registry to be created if it doesn't exist. */
  12.179 +    vdi_registry_t *vdi_reg = get_vdi_registry();
  12.180 +    if (vdi_reg == NULL) {
  12.181 +        printf("[vdi.c] Couldn't get/create a VDI registry!\n");
  12.182 +        return -1;
  12.183 +    }
  12.184 +    freeblock(vdi_reg);
  12.185 +    
  12.186 +    return 0;
  12.187 +}
  12.188 +    
  12.189 +#ifdef VDI_STANDALONE
  12.190 +
  12.191 +#define TEST_VDIS      50
  12.192 +#define NR_ITERS    50000
  12.193 +#define FORK_POINTS   200
  12.194 +#define INIT_VDIS       3
  12.195 +#define INIT_SNAPS     40
  12.196 +
  12.197 +/* These must be of decreasing size: */
  12.198 +#define NEW_FORK       (RAND_MAX-(RAND_MAX/1000))
  12.199 +#define NEW_ROOT_VDI   (RAND_MAX-((RAND_MAX/1000)*2))
  12.200 +#define NEW_FORK_VDI   (RAND_MAX-((RAND_MAX/1000)*3))
  12.201 +
  12.202 +#define GRAPH_DOT_FILE "vdi.dot"
  12.203 +#define GRAPH_PS_FILE  "vdi.ps"
  12.204 +
  12.205 +
  12.206 +typedef struct sh_st {
  12.207 +    snap_id_t     id;
  12.208 +    struct sh_st *next;
  12.209 +} sh_t;
  12.210 +
  12.211 +#define SNAP_HASHSZ 1024
  12.212 +sh_t *node_hash[SNAP_HASHSZ];
  12.213 +#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
  12.214 +
  12.215 +#define SNAPID_EQUAL(_a,_b) \
  12.216 +    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
  12.217 +int sh_check_and_add(snap_id_t *id)
  12.218 +{
  12.219 +    sh_t **s = &node_hash[SNAP_HASH(id)];
  12.220 +    
  12.221 +    while (*s != NULL) {
  12.222 +        if (SNAPID_EQUAL(&((*s)->id), id))
  12.223 +            return 1;
  12.224 +        *s = (*s)->next;
  12.225 +    }
  12.226 +    
  12.227 +    *s = (sh_t *)malloc(sizeof(sh_t));
  12.228 +    (*s)->id = *id;
  12.229 +    (*s)->next = NULL;
  12.230 +    
  12.231 +    return 0;
  12.232 +}
  12.233 +
  12.234 +int main(int argc, char *argv[])
  12.235 +{
  12.236 +    vdi_t *vdi_list[TEST_VDIS];
  12.237 +    snap_id_t id, fork_points[FORK_POINTS];
  12.238 +    int nr_vdis = 0, nr_forks = 0;
  12.239 +    int i, j, r;
  12.240 +    FILE *f;
  12.241 +    char name[VDI_NAME_SZ];
  12.242 +    
  12.243 +    __init_blockstore();
  12.244 +    __init_vdi();
  12.245 +    
  12.246 +    printf("[o] Generating seed VDIs. (%d VDIs)\n", INIT_VDIS);
  12.247 +    
  12.248 +    for (i=0; i<INIT_VDIS; i++) {
  12.249 +        r=rand();
  12.250 +        
  12.251 +        sprintf(name, "VDI Number %d", nr_vdis);
  12.252 +        vdi_list[i] = vdi_create(NULL, name);
  12.253 +        for (j=0; j<(r%INIT_SNAPS); j++)
  12.254 +            vdi_snapshot(vdi_list[i]);
  12.255 +        fork_points[i] = vdi_list[i]->snap;
  12.256 +        nr_vdis++;
  12.257 +        nr_forks++;
  12.258 +    }
  12.259 +    
  12.260 +    printf("[o] Running a random workload. (%d iterations)\n", NR_ITERS);
  12.261 +            
  12.262 +    for (i=0; i<NR_ITERS; i++) {
  12.263 +        r = rand();
  12.264 +        
  12.265 +        if ( r > NEW_FORK ) {
  12.266 +            if ( nr_forks > FORK_POINTS )
  12.267 +                continue;
  12.268 +            id = vdi_list[r%nr_vdis]->snap;
  12.269 +            if ( ( id.block == 0 ) || ( id.index == 0 ) )
  12.270 +                continue;
  12.271 +            id.index--;
  12.272 +            fork_points[nr_forks++] = id;
  12.273 +            
  12.274 +        } else if ( r > NEW_ROOT_VDI ) {
  12.275 +            
  12.276 +            if ( nr_vdis == TEST_VDIS )
  12.277 +                continue;
  12.278 +            
  12.279 +            sprintf(name, "VDI Number %d.", nr_vdis);
  12.280 +            vdi_list[nr_vdis++] = vdi_create(NULL, name);
  12.281 +            
  12.282 +        } else if ( r > NEW_FORK_VDI ) {
  12.283 +            
  12.284 +            if ( nr_vdis == TEST_VDIS )
  12.285 +                continue;
  12.286 +            
  12.287 +            sprintf(name, "VDI Number %d.", nr_vdis);
  12.288 +            vdi_list[nr_vdis++] = vdi_create(&fork_points[r%nr_forks], name);
  12.289 +            
  12.290 +        } else /* SNAPSHOT */ {
  12.291 +            
  12.292 +            vdi_snapshot(vdi_list[r%nr_vdis]);
  12.293 +            
  12.294 +        }
  12.295 +    }
  12.296 +    
  12.297 +    /* now dump it out to a dot file. */
  12.298 +    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
  12.299 +    
  12.300 +    f = fopen(GRAPH_DOT_FILE, "w");
  12.301 +    
  12.302 +    /* write graph preamble */
  12.303 +    fprintf(f, "digraph G {\n");
  12.304 +    fprintf(f, "   rankdir=LR\n");
  12.305 +    
  12.306 +    for (i=0; i<nr_vdis; i++) {
  12.307 +        char oldnode[255];
  12.308 +        snap_block_t *blk;
  12.309 +        snap_id_t id = vdi_list[i]->snap;
  12.310 +        int nr_snaps, done=0;
  12.311 +        
  12.312 +        /* add a node for the id */
  12.313 +printf("vdi: %d\n", i);
  12.314 +        fprintf(f, "   n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
  12.315 +                id.block, id.index, vdi_list[i]->name,
  12.316 +                id.block, id.index);
  12.317 +        sprintf(oldnode, "n%Ld%d", id.block, id.index);
  12.318 +        
  12.319 +        while (id.block != 0) {
  12.320 +            blk = snap_get_block(id.block);
  12.321 +            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
  12.322 +            id = blk->hdr.fork_block;
  12.323 +            
  12.324 +            done = sh_check_and_add(&id);
  12.325 +            
  12.326 +            /* add a node for the fork_id */
  12.327 +            if (!done) {
  12.328 +                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
  12.329 +                    id.block, id.index,
  12.330 +                    id.block, id.index);
  12.331 +            }
  12.332 +            
  12.333 +            /* add an edge between them */
  12.334 +            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
  12.335 +                    id.block, id.index, oldnode, nr_snaps);
  12.336 +            sprintf(oldnode, "n%Ld%d", id.block, id.index);
  12.337 +            freeblock(blk);
  12.338 +            
  12.339 +            if (done) break;
  12.340 +        }
  12.341 +    }
  12.342 +    
  12.343 +    /* write graph postamble */
  12.344 +    fprintf(f, "}\n");
  12.345 +    fclose(f);
  12.346 +    
  12.347 +    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
  12.348 +    {
  12.349 +        char cmd[255];
  12.350 +        sprintf(cmd, "dot %s -Tps -o %s", GRAPH_DOT_FILE, GRAPH_PS_FILE);
  12.351 +        system(cmd);
  12.352 +    }
  12.353 +    return 0;
  12.354 +}
  12.355 +
  12.356 +#endif
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/tools/blktap/vdi.h	Thu Mar 03 21:00:54 2005 +0000
    13.3 @@ -0,0 +1,48 @@
    13.4 +/**************************************************************************
    13.5 + * 
    13.6 + * vdi.h
    13.7 + *
    13.8 + * Virtual Disk Image (VDI) Interfaces
    13.9 + *
   13.10 + */
   13.11 +
   13.12 +#ifndef __VDI_H__
   13.13 +#define __VDI_H__
   13.14 +
   13.15 +#include "blktaplib.h"
   13.16 +#include "snaplog.h"
   13.17 +
   13.18 +#define VDI_HEIGHT     35
   13.19 +#define VDI_REG_HEIGHT 35 /* why not? */
   13.20 +
   13.21 +#define VDI_NAME_SZ 256
   13.22 +
   13.23 +typedef struct vdi {
   13.24 +    u64         id;               /* unique vdi id -- used by the registry   */
   13.25 +    u64         block;            /* block where this vdi lives (also unique)*/
   13.26 +    u64         radix_root;       /* radix root node for block mappings      */
   13.27 +    snap_id_t   snap;             /* next snapshot slot for this VDI         */
   13.28 +    struct vdi *next;             /* used to hash-chain in blkif.            */
   13.29 +    blkif_vdev_t vdevice;         /* currently mounted as...                 */
   13.30 +    char        name[VDI_NAME_SZ];/* human readable vdi name                 */
   13.31 +} vdi_t;
   13.32 +
   13.33 +#define VDI_REG_MAGIC   0xff00ff0bb0ff00ffLL
   13.34 +
   13.35 +typedef struct vdi_registry {
   13.36 +    u64     magic;
   13.37 +    u64     nr_vdis;
   13.38 +} vdi_registry_t;
   13.39 +
   13.40 +
   13.41 +int __init_vdi(void);
   13.42 +
   13.43 +vdi_t *vdi_get(u64 vdi_id);
   13.44 +vdi_registry_t *get_vdi_registry(void);
   13.45 +vdi_t *vdi_create(snap_id_t *parent_snap, char *name);
   13.46 +u64 vdi_lookup_block(vdi_t *vdi, u64 vdi_block, int *writable);
   13.47 +void vdi_update_block(vdi_t *vdi, u64 vdi_block, u64 g_block);
   13.48 +void vdi_snapshot(vdi_t *vdi);
   13.49 +
   13.50 +
   13.51 +#endif /* __VDI_H__ */
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/tools/blktap/vdi_create.c	Thu Mar 03 21:00:54 2005 +0000
    14.3 @@ -0,0 +1,52 @@
    14.4 +/**************************************************************************
    14.5 + * 
    14.6 + * vdi_create.c
    14.7 + *
    14.8 + * Create a new vdi.
    14.9 + *
   14.10 + */
   14.11 + 
   14.12 +#include <stdio.h>
   14.13 +#include <stdlib.h>
   14.14 +#include <string.h>
   14.15 +#include <sys/time.h>
   14.16 +#include "blockstore.h"
   14.17 +#include "radix.h"
   14.18 +#include "vdi.h"
   14.19 +
   14.20 +int main(int argc, char *argv[])
   14.21 +{
   14.22 +    vdi_t       *vdi;
   14.23 +    char         name[VDI_NAME_SZ] = "";
   14.24 +    snap_id_t    id;
   14.25 +    int          from_snap = 0;
   14.26 +    
   14.27 +    __init_blockstore();
   14.28 +    __init_vdi();
   14.29 +    
   14.30 +    if ( argc == 1 ) {
   14.31 +        printf("usage: %s <VDI Name> [<snap block> <snap idx>]\n", argv[0]);
   14.32 +        exit(-1);
   14.33 +    }
   14.34 +    
   14.35 +    strncpy( name, argv[1], VDI_NAME_SZ);
   14.36 +    name[VDI_NAME_SZ] = '\0';    
   14.37 +    
   14.38 +    if ( argc > 3 ) {
   14.39 +        id.block   = (u64)          atoll(argv[2]);
   14.40 +        id.index   = (unsigned int) atol (argv[3]);
   14.41 +        from_snap  = 1;
   14.42 +    }
   14.43 +    
   14.44 +    vdi = vdi_create( from_snap ? &id : NULL, name);
   14.45 +    
   14.46 +    if ( vdi == NULL ) {
   14.47 +        printf("Failed to create VDI!\n");
   14.48 +        freeblock(vdi);
   14.49 +        exit(-1);
   14.50 +    }
   14.51 +    
   14.52 +    freeblock(vdi);
   14.53 +    
   14.54 +    return (0);
   14.55 +}
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/tools/blktap/vdi_fill.c	Thu Mar 03 21:00:54 2005 +0000
    15.3 @@ -0,0 +1,82 @@
    15.4 +/**************************************************************************
    15.5 + * 
    15.6 + * vdi_fill.c
    15.7 + *
    15.8 + * Hoover a file or device into a vdi.
    15.9 + * You must first create the vdi with vdi_create.
   15.10 + *
   15.11 + */
   15.12 + 
   15.13 +#include <stdio.h>
   15.14 +#include <stdlib.h>
   15.15 +#include <string.h>
   15.16 +#include <sys/types.h>
   15.17 +#include <sys/stat.h>
   15.18 +#include <fcntl.h>
   15.19 +#include <unistd.h>
   15.20 +#include "blockstore.h"
   15.21 +#include "radix.h"
   15.22 +#include "vdi.h"
   15.23 +
   15.24 +int main(int argc, char *argv[])
   15.25 +{
   15.26 +    vdi_t       *vdi;
   15.27 +    u64          id;
   15.28 +    int          fd;
   15.29 +    struct stat  st;
   15.30 +    u64          tot_size;
   15.31 +    char         spage[BLOCK_SIZE];
   15.32 +    char        *dpage;
   15.33 +    u64          vblock = 0, count=0;
   15.34 +    
   15.35 +    __init_blockstore();
   15.36 +    __init_vdi();
   15.37 +    
   15.38 +    if ( argc < 3 ) {
   15.39 +        printf("usage: %s <VDI id> <filename>\n", argv[0]);
   15.40 +        exit(-1);
   15.41 +    }
   15.42 +        
   15.43 +    id = (u64) atoll(argv[1]);
   15.44 +    
   15.45 +    vdi = vdi_get( id );
   15.46 +    
   15.47 +    if ( vdi == NULL ) {
   15.48 +        printf("Failed to retreive VDI %Ld!\n", id);
   15.49 +        exit(-1);
   15.50 +    }
   15.51 +    
   15.52 +    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
   15.53 +    
   15.54 +    if (fd < 0) {
   15.55 +        printf("Couldn't open %s!\n", argv[2]);
   15.56 +        exit(-1);
   15.57 +    }
   15.58 +    
   15.59 +    if ( fstat(fd, &st) != 0 ) {
   15.60 +        printf("Couldn't stat %s!\n", argv[2]);
   15.61 +        exit(-1);
   15.62 +    }
   15.63 +    
   15.64 +    tot_size = (u64) st.st_size;
   15.65 +    printf("Filling VDI %Ld with %Ld bytes.\n", id, tot_size);
   15.66 +    
   15.67 +    printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE);    
   15.68 +    printf("           ");
   15.69 +    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
   15.70 +        u64 gblock = 0;
   15.71 +        
   15.72 +        gblock = allocblock(spage);
   15.73 +        vdi_update_block(vdi, vblock, gblock);
   15.74 +        
   15.75 +        vblock++;
   15.76 +        if ((vblock % 512) == 0)
   15.77 +        printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
   15.78 +        fflush(stdout);
   15.79 +    }
   15.80 +    printf("\n");
   15.81 +    
   15.82 +    freeblock(vdi);
   15.83 +    
   15.84 +    return (0);
   15.85 +}
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/tools/blktap/vdi_list.c	Thu Mar 03 21:00:54 2005 +0000
    16.3 @@ -0,0 +1,47 @@
    16.4 +/**************************************************************************
    16.5 + * 
    16.6 + * vdi_list.c
    16.7 + *
    16.8 + * Print a list of VDIs on the block store.
    16.9 + *
   16.10 + */
   16.11 + 
   16.12 +#include <stdio.h>
   16.13 +#include <stdlib.h>
   16.14 +#include <string.h>
   16.15 +#include <sys/time.h>
   16.16 +#include "blockstore.h"
   16.17 +#include "radix.h"
   16.18 +#include "vdi.h"
   16.19 +
   16.20 +int main(int argc, char *argv[])
   16.21 +{
   16.22 +    vdi_registry_t *reg;
   16.23 +    vdi_t *vdi;
   16.24 +    int i;
   16.25 +    
   16.26 +    __init_blockstore();
   16.27 +    __init_vdi();
   16.28 +    
   16.29 +    reg = get_vdi_registry();
   16.30 +    
   16.31 +    if ( reg == NULL ) {
   16.32 +        printf("couldn't get VDI registry.\n");
   16.33 +        exit(-1);
   16.34 +    }
   16.35 +    
   16.36 +    for (i=0; i < reg->nr_vdis; i++) {
   16.37 +        vdi = vdi_get(i);
   16.38 +        
   16.39 +        if ( vdi != NULL ) {
   16.40 +            
   16.41 +            printf("%10Ld %60s\n", vdi->id, vdi->name);
   16.42 +            freeblock(vdi);
   16.43 +            
   16.44 +        }
   16.45 +    }
   16.46 +    
   16.47 +    freeblock(reg);
   16.48 +    
   16.49 +    return 0;
   16.50 +}
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/tools/blktap/vdi_snap.c	Thu Mar 03 21:00:54 2005 +0000
    17.3 @@ -0,0 +1,43 @@
    17.4 +/**************************************************************************
    17.5 + * 
    17.6 + * vdi_snap.c
    17.7 + *
    17.8 + * Snapshot a vdi.
    17.9 + *
   17.10 + */
   17.11 + 
   17.12 +#include <stdio.h>
   17.13 +#include <stdlib.h>
   17.14 +#include <string.h>
   17.15 +#include <sys/time.h>
   17.16 +#include "blockstore.h"
   17.17 +#include "radix.h"
   17.18 +#include "vdi.h"
   17.19 +
   17.20 +int main(int argc, char *argv[])
   17.21 +{
   17.22 +    vdi_t  *vdi;
   17.23 +    u64     id;
   17.24 +    
   17.25 +    __init_blockstore();
   17.26 +    __init_vdi();
   17.27 +    
   17.28 +    if ( argc == 1 ) {
   17.29 +        printf("usage: %s <VDI id>\n", argv[0]);
   17.30 +        exit(-1);
   17.31 +    }
   17.32 +    
   17.33 +    id = (u64) atoll(argv[1]);
   17.34 +    
   17.35 +    vdi = vdi_get(id);
   17.36 +    
   17.37 +    if ( vdi == NULL ) {
   17.38 +        printf("couldn't find the requested VDI.\n");
   17.39 +        freeblock(vdi);
   17.40 +        exit(-1);
   17.41 +    }
   17.42 +    
   17.43 +    vdi_snapshot(vdi);
   17.44 +    
   17.45 +    return 0;
   17.46 +}
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/tools/blktap/vdi_snap_list.c	Thu Mar 03 21:00:54 2005 +0000
    18.3 @@ -0,0 +1,79 @@
    18.4 +/**************************************************************************
    18.5 + * 
    18.6 + * vdi_snap_list.c
    18.7 + *
    18.8 + * Print a list of snapshots for the specified vdi.
    18.9 + *
   18.10 + */
   18.11 + 
   18.12 +#include <stdio.h>
   18.13 +#include <stdlib.h>
   18.14 +#include <string.h>
   18.15 +#include <time.h>
   18.16 +#include <sys/time.h>
   18.17 +#include "blockstore.h"
   18.18 +#include "radix.h"
   18.19 +#include "vdi.h"
   18.20 +
   18.21 +int main(int argc, char *argv[])
   18.22 +{
   18.23 +    vdi_t        *vdi;
   18.24 +    u64           id;
   18.25 +    int           i, max_snaps = -1;
   18.26 +    snap_block_t *blk;
   18.27 +    snap_id_t     sid;
   18.28 +    char         *t;
   18.29 +    
   18.30 +    __init_blockstore();
   18.31 +    __init_vdi();
   18.32 +    
   18.33 +    if ( argc == 1 ) {
   18.34 +        printf("usage: %s <VDI id> [max snaps]\n", argv[0]);
   18.35 +        exit(-1);
   18.36 +    }
   18.37 +    
   18.38 +    id = (u64) atoll(argv[1]);
   18.39 +    
   18.40 +    if ( argc > 2 ) {
   18.41 +        max_snaps = atoi(argv[2]);
   18.42 +    }
   18.43 +    
   18.44 +    vdi = vdi_get(id);
   18.45 +    
   18.46 +    if ( vdi == NULL ) {
   18.47 +        printf("couldn't find the requested VDI.\n");
   18.48 +        freeblock(vdi);
   18.49 +        exit(-1);
   18.50 +    }
   18.51 +    
   18.52 +    sid = vdi->snap;
   18.53 +    sid.index--;
   18.54 +    
   18.55 +    //printf("%6s%4s%21s %12s\n", "Block", "idx", "timestamp", "radix root");
   18.56 +    printf("%6s%4s%37s %12s\n", "Block", "idx", "timestamp", "radix root");
   18.57 +     
   18.58 +    while (sid.block != 0) {
   18.59 +        blk = snap_get_block(sid.block);
   18.60 +        for (i = sid.index; i >= 0; i--) {
   18.61 +            if ( max_snaps == 0  ) {
   18.62 +                freeblock(blk);
   18.63 +                goto done;
   18.64 +            }
   18.65 +            t = ctime(&blk->snaps[i].timestamp.tv_sec);
   18.66 +            t[strlen(t)-1] = '\0';
   18.67 +            //printf("%6Ld%4u%14lu.%06lu %12Ld\n",
   18.68 +            printf("%6Ld%4u%30s %06lu %12Ld\n",
   18.69 +                    sid.block, i, 
   18.70 +                    //blk->snaps[i].timestamp.tv_sec,
   18.71 +                    t,
   18.72 +                    blk->snaps[i].timestamp.tv_usec,
   18.73 +                    blk->snaps[i].radix_root);
   18.74 +            if ( max_snaps != -1 ) 
   18.75 +                max_snaps--;
   18.76 +        }
   18.77 +        sid = blk->hdr.parent_block;
   18.78 +        freeblock(blk);
   18.79 +    }
   18.80 +done:            
   18.81 +    return 0;
   18.82 +}
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/tools/blktap/vdi_tree.c	Thu Mar 03 21:00:54 2005 +0000
    19.3 @@ -0,0 +1,132 @@
    19.4 +/**************************************************************************
    19.5 + * 
    19.6 + * vdi_tree.c
    19.7 + *
    19.8 + * Output current vdi tree to dot and postscript.
    19.9 + *
   19.10 + */
   19.11 + 
   19.12 +#include <stdio.h>
   19.13 +#include <stdlib.h>
   19.14 +#include <string.h>
   19.15 +#include <sys/time.h>
   19.16 +#include "blockstore.h"
   19.17 +#include "radix.h"
   19.18 +#include "vdi.h"
   19.19 +
   19.20 +#define GRAPH_DOT_FILE "vdi.dot"
   19.21 +#define GRAPH_PS_FILE  "vdi.ps"
   19.22 +
   19.23 +typedef struct sh_st {
   19.24 +    snap_id_t     id;
   19.25 +    struct sh_st *next;
   19.26 +} sh_t;
   19.27 +
   19.28 +#define SNAP_HASHSZ 1024
   19.29 +sh_t *node_hash[SNAP_HASHSZ];
   19.30 +#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
   19.31 +
   19.32 +#define SNAPID_EQUAL(_a,_b) \
   19.33 +    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
   19.34 +int sh_check_and_add(snap_id_t *id)
   19.35 +{
   19.36 +    sh_t **s = &node_hash[SNAP_HASH(id)];
   19.37 +    
   19.38 +    while (*s != NULL) {
   19.39 +        if (SNAPID_EQUAL(&((*s)->id), id))
   19.40 +            return 1;
   19.41 +        *s = (*s)->next;
   19.42 +    }
   19.43 +    
   19.44 +    *s = (sh_t *)malloc(sizeof(sh_t));
   19.45 +    (*s)->id = *id;
   19.46 +    (*s)->next = NULL;
   19.47 +    
   19.48 +    return 0;
   19.49 +}
   19.50 +
   19.51 +int main(int argc, char *argv[])
   19.52 +{
   19.53 +    FILE *f;
   19.54 +    char dot_file[255] = GRAPH_DOT_FILE;
   19.55 +    char  ps_file[255] = GRAPH_PS_FILE;
   19.56 +    int nr_vdis = 0, nr_forks = 0;
   19.57 +    vdi_registry_t *reg;
   19.58 +    vdi_t *vdi;
   19.59 +    int i;
   19.60 +    
   19.61 +    __init_blockstore();
   19.62 +    __init_vdi();
   19.63 +    
   19.64 +    reg = get_vdi_registry();
   19.65 +    
   19.66 +    if ( reg == NULL ) {
   19.67 +        printf("couldn't get VDI registry.\n");
   19.68 +        exit(-1);
   19.69 +    }
   19.70 +    
   19.71 +    if ( argc > 1 ) {
   19.72 +        strncpy(ps_file, argv[1], 255);
   19.73 +        ps_file[255] = '\0';
   19.74 +    }
   19.75 +    
   19.76 +    /* now dump it out to a dot file. */
   19.77 +    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
   19.78 +    
   19.79 +    f = fopen(dot_file, "w");
   19.80 +    
   19.81 +    /* write graph preamble */
   19.82 +    fprintf(f, "digraph G {\n");
   19.83 +    fprintf(f, "   rankdir=LR\n");
   19.84 +    
   19.85 +    for (i=0; i<reg->nr_vdis; i++) {
   19.86 +        char oldnode[255];
   19.87 +        snap_block_t *blk;
   19.88 +        snap_id_t id;
   19.89 +        int nr_snaps, done=0;
   19.90 +        
   19.91 +        vdi = vdi_get(i);
   19.92 +        id = vdi->snap;
   19.93 +        /* add a node for the id */
   19.94 +printf("vdi: %d\n", i);
   19.95 +        fprintf(f, "   n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
   19.96 +                id.block, id.index, vdi->name,
   19.97 +                id.block, id.index);
   19.98 +        sprintf(oldnode, "n%Ld%d", id.block, id.index);
   19.99 +        
  19.100 +        while (id.block != 0) {
  19.101 +            blk = snap_get_block(id.block);
  19.102 +            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
  19.103 +            id = blk->hdr.fork_block;
  19.104 +            
  19.105 +            done = sh_check_and_add(&id);
  19.106 +            
  19.107 +            /* add a node for the fork_id */
  19.108 +            if (!done) {
  19.109 +                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
  19.110 +                    id.block, id.index,
  19.111 +                    id.block, id.index);
  19.112 +            }
  19.113 +            
  19.114 +            /* add an edge between them */
  19.115 +            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
  19.116 +                    id.block, id.index, oldnode, nr_snaps);
  19.117 +            sprintf(oldnode, "n%Ld%d", id.block, id.index);
  19.118 +            freeblock(blk);
  19.119 +            
  19.120 +            if (done) break;
  19.121 +        }
  19.122 +    }
  19.123 +    
  19.124 +    /* write graph postamble */
  19.125 +    fprintf(f, "}\n");
  19.126 +    fclose(f);
  19.127 +    
  19.128 +    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
  19.129 +    {
  19.130 +        char cmd[255];
  19.131 +        sprintf(cmd, "dot %s -Tps -o %s", dot_file, ps_file);
  19.132 +        system(cmd);
  19.133 +    }
  19.134 +    return 0;
  19.135 +}
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/tools/blktap/vdi_validate.c	Thu Mar 03 21:00:54 2005 +0000
    20.3 @@ -0,0 +1,96 @@
    20.4 +/**************************************************************************
    20.5 + * 
    20.6 + * vdi_validate.c
    20.7 + *
    20.8 + * Intended to sanity-check vm_fill and the underlying vdi code.
    20.9 + *
   20.10 + * Block-by-block compare of a vdi with a file/device on the disk.
   20.11 + *
   20.12 + */
   20.13 + 
   20.14 +#include <stdio.h>
   20.15 +#include <stdlib.h>
   20.16 +#include <string.h>
   20.17 +#include <sys/types.h>
   20.18 +#include <sys/stat.h>
   20.19 +#include <fcntl.h>
   20.20 +#include <unistd.h>
   20.21 +#include "blockstore.h"
   20.22 +#include "radix.h"
   20.23 +#include "vdi.h"
   20.24 +
   20.25 +int main(int argc, char *argv[])
   20.26 +{
   20.27 +    vdi_t       *vdi;
   20.28 +    u64          id;
   20.29 +    int          fd;
   20.30 +    struct stat  st;
   20.31 +    u64          tot_size;
   20.32 +    char         spage[BLOCK_SIZE], *dpage;
   20.33 +    char        *vpage;
   20.34 +    u64          vblock = 0, count=0;
   20.35 +    
   20.36 +    __init_blockstore();
   20.37 +    __init_vdi();
   20.38 +    
   20.39 +    if ( argc < 3 ) {
   20.40 +        printf("usage: %s <VDI id> <filename>\n", argv[0]);
   20.41 +        exit(-1);
   20.42 +    }
   20.43 +        
   20.44 +    id = (u64) atoll(argv[1]);
   20.45 +    
   20.46 +    vdi = vdi_get( id );
   20.47 +    
   20.48 +    if ( vdi == NULL ) {
   20.49 +        printf("Failed to retreive VDI %Ld!\n", id);
   20.50 +        exit(-1);
   20.51 +    }
   20.52 +    
   20.53 +    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
   20.54 +    
   20.55 +    if (fd < 0) {
   20.56 +        printf("Couldn't open %s!\n", argv[2]);
   20.57 +        exit(-1);
   20.58 +    }
   20.59 +    
   20.60 +    if ( fstat(fd, &st) != 0 ) {
   20.61 +        printf("Couldn't stat %s!\n", argv[2]);
   20.62 +        exit(-1);
   20.63 +    }
   20.64 +    
   20.65 +    tot_size = (u64) st.st_size;
   20.66 +    printf("Testing VDI %Ld (%Ld bytes).\n", id, tot_size);
   20.67 +    
   20.68 +    printf("           ");
   20.69 +    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
   20.70 +        u64 gblock = 0;
   20.71 +        
   20.72 +        gblock = vdi_lookup_block(vdi, vblock, NULL);
   20.73 +        
   20.74 +        if (gblock == 0) {
   20.75 +            printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock);
   20.76 +            exit(0);
   20.77 +        }
   20.78 +        
   20.79 +        dpage = readblock(gblock);
   20.80 +        
   20.81 +        if (memcmp(spage, dpage, BLOCK_SIZE) != 0) {
   20.82 +            printf("\n\nblocks don't match! (%Ld)\n", vblock);
   20.83 +            exit(0);
   20.84 +        }
   20.85 +        
   20.86 +        freeblock(dpage);
   20.87 +        
   20.88 +        vblock++;
   20.89 +        printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
   20.90 +        fflush(stdout);
   20.91 +    }
   20.92 +    printf("\n");
   20.93 +    
   20.94 +    printf("VDI %Ld looks good!\n", id);
   20.95 +    
   20.96 +    freeblock(vdi);
   20.97 +    
   20.98 +    return (0);
   20.99 +}
    21.1 --- a/tools/python/xen/xend/server/blkif.py	Mon Feb 28 20:58:11 2005 +0000
    21.2 +++ b/tools/python/xen/xend/server/blkif.py	Thu Mar 03 21:00:54 2005 +0000
    21.3 @@ -369,7 +369,7 @@ class BlkDev(controller.SplitDev):
    21.4          # Add a new disk type that will just pass an opaque id in the
    21.5          # start_sector and use an experimental device type.
    21.6          # Please contact andrew.warfield@cl.cam.ac.uk with any concerns.
    21.7 -        if self.type == 'amorfs':
    21.8 +        if self.type == 'parallax':
    21.9              self.node   = node
   21.10              self.device =  61440 # (240,0)
   21.11              self.start_sector = long(self.params)