direct-io.hg

changeset 4249:07d81708cc40

bitkeeper revision 1.1236.1.103 (423f2997yh5jfJbfPzgZOswMIKL-RA)

manual merge of James's stuff.
author akw27@arcadians.cl.cam.ac.uk
date Mon Mar 21 20:07:51 2005 +0000 (2005-03-21)
parents bc658811e45d e97b2a505ee8
children c378c32a9538
files .rootkeys tools/blktap/Makefile tools/blktap/blktaplib.c tools/blktap/blockstore-tls.c tools/blktap/blockstore.c tools/blktap/blockstore.h tools/blktap/parallax-threaded.c tools/blktap/parallax-threaded.h tools/blktap/parallax.c tools/blktap/radix.c tools/blktap/radix.h tools/blktap/snaplog.c tools/blktap/snaplog.h tools/blktap/vdi.c tools/blktap/vdi_snap_delete.c tools/blktap/vdi_snap_list.c
line diff
     1.1 --- a/.rootkeys	Mon Mar 21 18:05:36 2005 +0000
     1.2 +++ b/.rootkeys	Mon Mar 21 20:07:51 2005 +0000
     1.3 @@ -411,6 +411,7 @@ 42090340c7pQbh0Km8zLcEqPd_3zIg tools/blk
     1.4  42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h
     1.5  42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c
     1.6  42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
     1.7 +423f270cAbkh2f-DHtT0hmCtFFXVXg tools/blktap/blockstore-tls.c
     1.8  42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c
     1.9  42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h
    1.10  42371b8aL1JsxAXOd4bBhmZKDyjiJg tools/blktap/blockstored.c
    1.11 @@ -419,6 +420,8 @@ 42090340B3mDvcxvd9ehDHUkg46hvw tools/blk
    1.12  42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c
    1.13  42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
    1.14  42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h
    1.15 +423f270cbEKiTMapKnCyqkuwGvgOMA tools/blktap/parallax-threaded.c
    1.16 +423f270cFdXryIcD7HTPUl_Dbk4DAQ tools/blktap/parallax-threaded.h
    1.17  42277b03930x2TJT3PZlw6o0GERXpw tools/blktap/parallax.c
    1.18  42277b03XQYq8bujXSz7JAZ8N7j_pA tools/blktap/radix.c
    1.19  42277b03vZ4-jno_mgKmAcCW3ycRAg tools/blktap/radix.h
    1.20 @@ -430,6 +433,7 @@ 42277b04zMAhB0_946sHQ_H2vwnt0Q tools/blk
    1.21  42277b04xB_iUmiSm6nKcy8OV8bckA tools/blktap/vdi_fill.c
    1.22  42277b045CJGD_rKH-ZT_-0X4knhWA tools/blktap/vdi_list.c
    1.23  42277b043ZKx0NJSbcgptQctQ5rerg tools/blktap/vdi_snap.c
    1.24 +423f270c_QDjGLQ_YdaOtyBM5n9BDg tools/blktap/vdi_snap_delete.c
    1.25  42277b043Fjy5-H7LyBtUPyDlZFo6A tools/blktap/vdi_snap_list.c
    1.26  42277b04vhqD6Lq3WmGbaESoAAKdhw tools/blktap/vdi_tree.c
    1.27  42277b047H8fTVyUf75BWAjh6Zpsqg tools/blktap/vdi_validate.c
     2.1 --- a/tools/blktap/Makefile	Mon Mar 21 18:05:36 2005 +0000
     2.2 +++ b/tools/blktap/Makefile	Mon Mar 21 20:07:51 2005 +0000
     2.3 @@ -21,8 +21,12 @@ SRCS     += blktaplib.c
     2.4  PLX_SRCS := 
     2.5  PLX_SRCS += vdi.c 
     2.6  PLX_SRCS += radix.c 
     2.7 +PLX_SRCS += snaplog.c
     2.8 +PLXT_SRCS := $(PLX_SRCS)
     2.9 +#PLXT_SRCS += blockstore-tls.c
    2.10 +PLXT_SRCS += blockstore.c
    2.11 +PLXT_SRCS += parallax-threaded.c
    2.12  PLX_SRCS += blockstore.c 
    2.13 -PLX_SRCS += snaplog.c
    2.14  VDI_SRCS := $(PLX_SRCS)
    2.15  PLX_SRCS += parallax.c
    2.16  
    2.17 @@ -31,6 +35,7 @@ VDI_TOOLS += vdi_create
    2.18  VDI_TOOLS += vdi_list
    2.19  VDI_TOOLS += vdi_snap
    2.20  VDI_TOOLS += vdi_snap_list
    2.21 +VDI_TOOLS += vdi_snap_delete
    2.22  VDI_TOOLS += vdi_fill
    2.23  VDI_TOOLS += vdi_tree
    2.24  VDI_TOOLS += vdi_validate
    2.25 @@ -91,7 +96,7 @@ libblktap.so:
    2.26  libblktap.so.$(MAJOR):
    2.27  	ln -sf libblktap.so.$(MAJOR).$(MINOR) $@
    2.28  libblktap.so.$(MAJOR).$(MINOR): $(OBJS)
    2.29 -	$(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ -L../libxutil -lxutil -lz
    2.30 +	$(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ -lpthread -L../libxutil -lxutil -lz
    2.31  
    2.32  blkdump: $(LIB)
    2.33  	$(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkdump.c
    2.34 @@ -117,6 +122,9 @@ blkaio: $(LIB) blkaio.c blkaiolib.c
    2.35  parallax: $(LIB) $(PLX_SRCS)
    2.36  	$(CC) $(CFLAGS) -o parallax -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap $(PLX_SRCS) libgnbd/libgnbd.a
    2.37  
    2.38 +parallax-threaded: $(LIB) $(PLXT_SRCS)
    2.39 +	$(CC) $(CFLAGS) -o parallax-threaded -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lpthread -lblktap $(PLXT_SRCS) libgnbd/libgnbd.a
    2.40 +
    2.41  vdi_test: $(LIB) $(VDI_SRCS)
    2.42  	$(CC) $(CFLAGS) -g3 -o vdi_test -DVDI_STANDALONE $(VDI_SRCS)
    2.43  
    2.44 @@ -132,6 +140,9 @@ vdi_snap: $(LIB) vdi_snap.c $(VDI_SRCS)
    2.45  vdi_snap_list: $(LIB) vdi_snap_list.c $(VDI_SRCS)
    2.46  	$(CC) $(CFLAGS) -g3 -o vdi_snap_list vdi_snap_list.c $(VDI_SRCS)
    2.47  
    2.48 +vdi_snap_delete: $(LIB) vdi_snap_delete.c $(VDI_SRCS)
    2.49 +	$(CC) $(CFLAGS) -g3 -o vdi_snap_delete vdi_snap_delete.c $(VDI_SRCS)
    2.50 +
    2.51  vdi_tree: $(LIB) vdi_tree.c $(VDI_SRCS)
    2.52  	$(CC) $(CFLAGS) -g3 -o vdi_tree vdi_tree.c $(VDI_SRCS)
    2.53  
    2.54 @@ -146,12 +157,22 @@ blockstored: blockstored.c
    2.55  bstest: bstest.c blockstore.c
    2.56  	$(CC) $(CFLAGS) -g3 -o bstest bstest.c blockstore.c
    2.57  
    2.58 -rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS)
    2.59 -	$(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
    2.60 -
    2.61 -
    2.62  .PHONY: TAGS clean install mk-symlinks rpm
    2.63  TAGS:
    2.64  	etags -t $(SRCS) *.h
    2.65  
    2.66  -include $(DEPS)
    2.67 +
    2.68 +#Random testing targets.  To be removed eventually.
    2.69 +
    2.70 +rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS)
    2.71 +	$(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
    2.72 +
    2.73 +bb-tls: $(LIB) blockstore-benchmark.c
    2.74 +	$(CC) $(CFLAGS) -o bb-tls blockstore-benchmark.c blockstore-tls.c -lpthread
    2.75 +
    2.76 +bb-trans: $(LIB) blockstore-benchmark.c
    2.77 +	$(CC) $(CFLAGS) -o bb-trans blockstore-benchmark.c blockstore.c -lpthread
    2.78 +
    2.79 +radix-test: $(LIB) radix.c blockstore-threaded-trans.c
    2.80 +	$(CC) $(CFLAGS) -g3 -D RADIX_STANDALONE -o radix-test radix.c blockstore-threaded-trans.c
     3.1 --- a/tools/blktap/blktaplib.c	Mon Mar 21 18:05:36 2005 +0000
     3.2 +++ b/tools/blktap/blktaplib.c	Mon Mar 21 20:07:51 2005 +0000
     3.3 @@ -3,6 +3,8 @@
     3.4   * 
     3.5   * userspace interface routines for the blktap driver.
     3.6   *
     3.7 + * (threadsafe(r) version) 
     3.8 + *
     3.9   * (c) 2004 Andrew Warfield.
    3.10   */
    3.11  
    3.12 @@ -21,11 +23,13 @@
    3.13  #include <sys/ioctl.h>
    3.14  #include <string.h>
    3.15  #include <unistd.h>
    3.16 +#include <pthread.h>
    3.17 +
    3.18                                                                       
    3.19  #define __COMPILING_BLKTAP_LIB
    3.20  #include "blktaplib.h"
    3.21  
    3.22 -#if 1
    3.23 +#if 0
    3.24  #define DPRINTF(_f, _a...) printf ( _f , ## _a )
    3.25  #else
    3.26  #define DPRINTF(_f, _a...) ((void)0)
    3.27 @@ -194,15 +198,19 @@ void print_hooks(void)
    3.28          
    3.29  /*-----[ Data to/from Backend (server) VM ]------------------------------*/
    3.30  
    3.31 +
    3.32 +
    3.33  inline int write_req_to_be_ring(blkif_request_t *req)
    3.34  {
    3.35      blkif_request_t *req_d;
    3.36 +    static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
    3.37  
    3.38 -    //req_d = FRONT_RING_NEXT_EMPTY_REQUEST(&be_ring);
    3.39 +    pthread_mutex_lock(&be_prod_mutex);
    3.40      req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
    3.41      memcpy(req_d, req, sizeof(blkif_request_t));
    3.42      wmb();
    3.43      be_ring.req_prod_pvt++;
    3.44 +    pthread_mutex_unlock(&be_prod_mutex);
    3.45      
    3.46      return 0;
    3.47  }
    3.48 @@ -210,12 +218,14 @@ inline int write_req_to_be_ring(blkif_re
    3.49  inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
    3.50  {
    3.51      blkif_response_t *rsp_d;
    3.52 +    static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
    3.53  
    3.54 -    //rsp_d = BACK_RING_NEXT_EMPTY_RESPONSE(&fe_ring);
    3.55 +    pthread_mutex_lock(&fe_prod_mutex);
    3.56      rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
    3.57      memcpy(rsp_d, rsp, sizeof(blkif_response_t));
    3.58      wmb();
    3.59      fe_ring.rsp_prod_pvt++;
    3.60 +    pthread_mutex_unlock(&fe_prod_mutex);
    3.61  
    3.62      return 0;
    3.63  }
    3.64 @@ -336,6 +346,10 @@ int blktap_listen(void)
    3.65      ctrl_sring_t     *csring;
    3.66      RING_IDX          rp, i, pfd_count; 
    3.67      
    3.68 +    /* pending rings */
    3.69 +    blkif_request_t req_pending[BLKIF_RING_SIZE];
    3.70 +    blkif_response_t rsp_pending[BLKIF_RING_SIZE];
    3.71 +    
    3.72      /* handler hooks: */
    3.73      request_hook_t   *req_hook;
    3.74      response_hook_t  *rsp_hook;
    3.75 @@ -447,6 +461,8 @@ int blktap_listen(void)
    3.76                  int done = 0; /* stop forwarding this request */
    3.77  
    3.78                  req = RING_GET_REQUEST(&fe_ring, i);
    3.79 +                memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
    3.80 +                req = &req_pending[ID_TO_IDX(req->id)];
    3.81  
    3.82                  DPRINTF("copying an fe request\n");
    3.83  
    3.84 @@ -487,6 +503,8 @@ int blktap_listen(void)
    3.85              {
    3.86  
    3.87                  rsp = RING_GET_RESPONSE(&be_ring, i);
    3.88 +                memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
    3.89 +                rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
    3.90  
    3.91                  DPRINTF("copying a be request\n");
    3.92  
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/tools/blktap/blockstore-tls.c	Mon Mar 21 20:07:51 2005 +0000
     4.3 @@ -0,0 +1,161 @@
     4.4 +/**************************************************************************
     4.5 + * 
     4.6 + * blockstore.c
     4.7 + *
     4.8 + * Simple block store interface
     4.9 + *
    4.10 + */
    4.11 + 
    4.12 +#include <fcntl.h>
    4.13 +#include <unistd.h>
    4.14 +#include <stdio.h>
    4.15 +#include <stdlib.h>
    4.16 +#include <string.h>
    4.17 +#include <pthread.h>
    4.18 +#include <sys/types.h>
    4.19 +#include <sys/stat.h>
    4.20 +#include "blockstore.h"
    4.21 +#include "parallax-threaded.h"
    4.22 +
    4.23 +/*static int block_fp = -1;*/
    4.24 + 
    4.25 +static int fd_list[READ_POOL_SIZE+1];
    4.26 + 
    4.27 +/**
    4.28 + * readblock: read a block from disk
    4.29 + *   @id: block id to read
    4.30 + *
    4.31 + *   @return: pointer to block, NULL on error
    4.32 + */
    4.33 +
    4.34 +void *readblock(u64 id) 
    4.35 +{
    4.36 +    void *block;
    4.37 +    int tid = (int)pthread_getspecific(tid_key);
    4.38 +    
    4.39 +    if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
    4.40 +        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
    4.41 +        perror("readblock lseek");
    4.42 +        goto err;
    4.43 +    }
    4.44 +    if ((block = malloc(BLOCK_SIZE)) == NULL) {
    4.45 +        perror("readblock malloc");
    4.46 +        goto err;
    4.47 +    }
    4.48 +    if (read(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
    4.49 +        perror("readblock read");
    4.50 +        free(block);
    4.51 +        goto err;
    4.52 +    }
    4.53 +    return block;
    4.54 +    
    4.55 +err:
    4.56 +    return NULL;
    4.57 +}
    4.58 +
    4.59 +/**
    4.60 + * writeblock: write an existing block to disk
    4.61 + *   @id: block id
    4.62 + *   @block: pointer to block
    4.63 + *
    4.64 + *   @return: zero on success, -1 on failure
    4.65 + */
    4.66 +int writeblock(u64 id, void *block) 
    4.67 +{
    4.68 +    int tid = (int)pthread_getspecific(tid_key);
    4.69 +    
    4.70 +    if (lseek64(fd_list[tid], ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
    4.71 +        perror("writeblock lseek");
    4.72 +        goto err;
    4.73 +    }
    4.74 +    if (write(fd_list[tid], block, BLOCK_SIZE) < 0) {
    4.75 +        perror("writeblock write");
    4.76 +        goto err;
    4.77 +    }
    4.78 +    return 0;
    4.79 +
    4.80 +err:
    4.81 +    return -1;
    4.82 +}
    4.83 +
    4.84 +/**
    4.85 + * allocblock: write a new block to disk
    4.86 + *   @block: pointer to block
    4.87 + *
    4.88 + *   @return: new id of block on disk
    4.89 + */
    4.90 +
    4.91 +u64 allocblock(void *block) 
    4.92 +{
    4.93 +    u64 lb;
    4.94 +    off64_t pos;
    4.95 +    int tid = (int)pthread_getspecific(tid_key);
    4.96 +
    4.97 +    pos = lseek64(fd_list[tid], 0, SEEK_END);
    4.98 +    if (pos == (off64_t)-1) {
    4.99 +        perror("allocblock lseek");
   4.100 +        goto err;
   4.101 +    }
   4.102 +    if (pos % BLOCK_SIZE != 0) {
   4.103 +        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
   4.104 +        goto err;
   4.105 +    }
   4.106 +    if (write(fd_list[tid], block, BLOCK_SIZE) != BLOCK_SIZE) {
   4.107 +        perror("allocblock write");
   4.108 +        goto err;
   4.109 +    }
   4.110 +    lb = pos / BLOCK_SIZE + 1;
   4.111 +    
   4.112 +    return lb;
   4.113 +    
   4.114 +err:
   4.115 +    return 0;
   4.116 +    
   4.117 +}
   4.118 +
   4.119 +
   4.120 +/**
   4.121 + * newblock: get a new in-memory block set to zeros
   4.122 + *
   4.123 + *   @return: pointer to new block, NULL on error
   4.124 + */
   4.125 +void *newblock() 
   4.126 +{
   4.127 +    void *block = malloc(BLOCK_SIZE);
   4.128 +    if (block == NULL) {
   4.129 +        perror("newblock");
   4.130 +        return NULL;
   4.131 +    }
   4.132 +    memset(block, 0, BLOCK_SIZE);
   4.133 +    return block;
   4.134 +}
   4.135 +
   4.136 +
   4.137 +/**
   4.138 + * freeblock: unallocate an in-memory block
   4.139 + *   @id: block id (zero if this is only in-memory)
   4.140 + *   @block: block to be freed
   4.141 + */
   4.142 +void freeblock(void *block) 
   4.143 +{
   4.144 +    if (block != NULL)
   4.145 +        free(block);
   4.146 +}
   4.147 +
   4.148 +
   4.149 +int __init_blockstore(void)
   4.150 +{
   4.151 +    int i;
   4.152 +    
   4.153 +    for (i=0; i<(READ_POOL_SIZE+1); i++) {
   4.154 +        
   4.155 +        fd_list[i] = open("blockstore.dat", 
   4.156 +                O_RDWR | O_CREAT | O_LARGEFILE, 0644);
   4.157 +
   4.158 +        if (fd_list[i] < 0) {
   4.159 +            perror("open");
   4.160 +            return -1;
   4.161 +        }
   4.162 +    }
   4.163 +    return 0;
   4.164 +}
     5.1 --- a/tools/blktap/blockstore.c	Mon Mar 21 18:05:36 2005 +0000
     5.2 +++ b/tools/blktap/blockstore.c	Mon Mar 21 20:07:51 2005 +0000
     5.3 @@ -14,7 +14,6 @@
     5.4  #include <sys/types.h>
     5.5  #include <sys/stat.h>
     5.6  #include "blockstore.h"
     5.7 -
     5.8  #define BLOCKSTORE_REMOTE
     5.9  
    5.10  #ifdef BLOCKSTORE_REMOTE
    5.11 @@ -417,7 +416,7 @@ u64 allocblock_hint(void *block, u64 hin
    5.12  
    5.13  #else /* /BLOCKSTORE_REMOTE */
    5.14  
    5.15 -static int block_fp = -1;
    5.16 +#include "parallax-threaded.h"
    5.17   
    5.18  /**
    5.19   * readblock: read a block from disk
    5.20 @@ -428,21 +427,36 @@ static int block_fp = -1;
    5.21  
    5.22  void *readblock(u64 id) {
    5.23      void *block;
    5.24 +    int block_fp;
    5.25 +    
    5.26 +    block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
    5.27 +
    5.28 +    if (block_fp < 0) {
    5.29 +        perror("open");
    5.30 +        return NULL;
    5.31 +    }
    5.32 +    
    5.33      if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
    5.34 +        printf ("%Ld ", id);
    5.35          printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
    5.36          perror("readblock lseek");
    5.37 -        return NULL;
    5.38 +        goto err;
    5.39      }
    5.40      if ((block = malloc(BLOCK_SIZE)) == NULL) {
    5.41          perror("readblock malloc");
    5.42 -        return NULL;
    5.43 +        goto err;
    5.44      }
    5.45      if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
    5.46          perror("readblock read");
    5.47          free(block);
    5.48 -        return NULL;
    5.49 +        goto err;
    5.50      }
    5.51 +    close(block_fp);
    5.52      return block;
    5.53 +    
    5.54 +err:
    5.55 +    close(block_fp);
    5.56 +    return NULL;
    5.57  }
    5.58  
    5.59  /**
    5.60 @@ -453,15 +467,30 @@ void *readblock(u64 id) {
    5.61   *   @return: zero on success, -1 on failure
    5.62   */
    5.63  int writeblock(u64 id, void *block) {
    5.64 +    
    5.65 +    int block_fp;
    5.66 +    
    5.67 +    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
    5.68 +
    5.69 +    if (block_fp < 0) {
    5.70 +        perror("open");
    5.71 +        return -1;
    5.72 +    }
    5.73 +
    5.74      if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
    5.75          perror("writeblock lseek");
    5.76 -        return -1;
    5.77 +        goto err;
    5.78      }
    5.79      if (write(block_fp, block, BLOCK_SIZE) < 0) {
    5.80          perror("writeblock write");
    5.81 -        return -1;
    5.82 +        goto err;
    5.83      }
    5.84 +    close(block_fp);
    5.85      return 0;
    5.86 +
    5.87 +err:
    5.88 +    close(block_fp);
    5.89 +    return -1;
    5.90  }
    5.91  
    5.92  /**
    5.93 @@ -470,30 +499,41 @@ int writeblock(u64 id, void *block) {
    5.94   *
    5.95   *   @return: new id of block on disk
    5.96   */
    5.97 -static u64 lastblock = 0;
    5.98  
    5.99  u64 allocblock(void *block) {
   5.100      u64 lb;
   5.101 -    off64_t pos = lseek64(block_fp, 0, SEEK_END);
   5.102 +    off64_t pos;
   5.103 +    int block_fp;
   5.104 +    
   5.105 +    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
   5.106 +
   5.107 +    if (block_fp < 0) {
   5.108 +        perror("open");
   5.109 +        return 0;
   5.110 +    }
   5.111 +
   5.112 +    pos = lseek64(block_fp, 0, SEEK_END);
   5.113      if (pos == (off64_t)-1) {
   5.114          perror("allocblock lseek");
   5.115 -        return 0;
   5.116 +        goto err;
   5.117      }
   5.118      if (pos % BLOCK_SIZE != 0) {
   5.119          fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
   5.120 -        return 0;
   5.121 +        goto err;
   5.122      }
   5.123      if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
   5.124          perror("allocblock write");
   5.125 -        return 0;
   5.126 +        goto err;
   5.127      }
   5.128      lb = pos / BLOCK_SIZE + 1;
   5.129 +//printf("alloc(%Ld)\n", lb);
   5.130 +    close(block_fp);
   5.131 +    return lb;
   5.132      
   5.133 -    if (lb <= lastblock)
   5.134 -        printf("[*** %Ld alredy allocated! ***]\n", lb);
   5.135 +err:
   5.136 +    close(block_fp);
   5.137 +    return 0;
   5.138      
   5.139 -    lastblock = lb;
   5.140 -    return lb;
   5.141  }
   5.142  
   5.143  /**
   5.144 @@ -535,9 +575,117 @@ void freeblock(void *block) {
   5.145          free(block);
   5.146  }
   5.147  
   5.148 +static freeblock_t *new_freeblock(void)
   5.149 +{
   5.150 +    freeblock_t *fb;
   5.151 +    
   5.152 +    fb = newblock();
   5.153 +    
   5.154 +    if (fb == NULL) return NULL;
   5.155 +    
   5.156 +    fb->magic = FREEBLOCK_MAGIC;
   5.157 +    fb->next  = 0ULL;
   5.158 +    fb->count = 0ULL;
   5.159 +    memset(fb->list, 0, sizeof fb->list);
   5.160 +    
   5.161 +    return fb;
   5.162 +}
   5.163 +
   5.164 +void releaseblock(u64 id)
   5.165 +{
   5.166 +    blockstore_super_t *bs_super;
   5.167 +    freeblock_t *fl_current;
   5.168 +    
   5.169 +    /* get superblock */
   5.170 +    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
   5.171 +    
   5.172 +    /* get freeblock_current */
   5.173 +    if (bs_super->freelist_current == 0ULL) 
   5.174 +    {
   5.175 +        fl_current = new_freeblock();
   5.176 +        bs_super->freelist_current = allocblock(fl_current);
   5.177 +        writeblock(BLOCKSTORE_SUPER, bs_super);
   5.178 +    } else {
   5.179 +        fl_current = readblock(bs_super->freelist_current);
   5.180 +    }
   5.181 +    
   5.182 +    /* if full, chain to superblock and allocate new current */
   5.183 +    
   5.184 +    if (fl_current->count == FREEBLOCK_SIZE) {
   5.185 +        fl_current->next = bs_super->freelist_full;
   5.186 +        writeblock(bs_super->freelist_current, fl_current);
   5.187 +        bs_super->freelist_full = bs_super->freelist_current;
   5.188 +        freeblock(fl_current);
   5.189 +        fl_current = new_freeblock();
   5.190 +        bs_super->freelist_current = allocblock(fl_current);
   5.191 +        writeblock(BLOCKSTORE_SUPER, bs_super);
   5.192 +    }
   5.193 +    
   5.194 +    /* append id to current */
   5.195 +    fl_current->list[fl_current->count++] = id;
   5.196 +    writeblock(bs_super->freelist_current, fl_current);
   5.197 +    
   5.198 +    freeblock(fl_current);
   5.199 +    freeblock(bs_super);
   5.200 +    
   5.201 +    
   5.202 +}
   5.203 +
   5.204 +/* freelist debug functions: */
   5.205 +void freelist_count(int print_each)
   5.206 +{
   5.207 +    blockstore_super_t *bs_super;
   5.208 +    freeblock_t *fb;
   5.209 +    u64 total = 0, next;
   5.210 +    
   5.211 +    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
   5.212 +    
   5.213 +    if (bs_super->freelist_current == 0ULL) {
   5.214 +        printf("freelist is empty!\n");
   5.215 +        return;
   5.216 +    }
   5.217 +    
   5.218 +    fb = readblock(bs_super->freelist_current);
   5.219 +    printf("%Ld entires on current.\n", fb->count);
   5.220 +    total += fb->count;
   5.221 +    if (print_each == 1)
   5.222 +    {
   5.223 +        int i;
   5.224 +        for (i=0; i< fb->count; i++)
   5.225 +            printf("  %Ld\n", fb->list[i]);
   5.226 +    }
   5.227 +    
   5.228 +    freeblock(fb);
   5.229 +    
   5.230 +    if (bs_super->freelist_full == 0ULL) {
   5.231 +        printf("freelist_full is empty!\n");
   5.232 +        return;
   5.233 +    }
   5.234 +    
   5.235 +    next = bs_super->freelist_full;
   5.236 +    for (;;) {
   5.237 +        fb = readblock(next);
   5.238 +        total += fb->count;
   5.239 +        if (print_each == 1)
   5.240 +        {
   5.241 +            int i;
   5.242 +            for (i=0; i< fb->count; i++)
   5.243 +                printf("  %Ld\n", fb->list[i]);
   5.244 +        }
   5.245 +        next = fb->next;
   5.246 +        freeblock(fb);
   5.247 +        if (next == 0ULL) break;
   5.248 +    }
   5.249 +    printf("Total of %Ld ids on freelist.\n", total);
   5.250 +}
   5.251  
   5.252  int __init_blockstore(void)
   5.253  {
   5.254 +    int i;
   5.255 +    blockstore_super_t *bs_super;
   5.256 +    u64 ret;
   5.257 +    int block_fp;
   5.258 +    
   5.259  #ifdef BLOCKSTORE_REMOTE
   5.260      struct hostent *addr;
   5.261      int i;
   5.262 @@ -623,7 +771,30 @@ int __init_blockstore(void)
   5.263      if (block_fp < 0) {
   5.264          perror("open");
   5.265          return -1;
   5.266 +        exit(-1);
   5.267      }
   5.268 +    
   5.269 +    if (lseek(block_fp, 0, SEEK_END) == 0) {
   5.270 +        bs_super = newblock();
   5.271 +        bs_super->magic            = BLOCKSTORE_MAGIC;
   5.272 +        bs_super->freelist_full    = 0LL;
   5.273 +        bs_super->freelist_current = 0LL;
   5.274 +        
   5.275 +        ret = allocblock(bs_super);
   5.276 +        
   5.277 +        freeblock(bs_super);
   5.278 +    } else {
   5.279 +        bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
   5.280 +        if (bs_super->magic != BLOCKSTORE_MAGIC)
   5.281 +        {
   5.282 +            printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
   5.283 +            exit(-1);
   5.284 +        }
   5.285 +        freeblock(bs_super);
   5.286 +    }
   5.287 +        
   5.288 +    close(block_fp);
   5.289 +        
   5.290  #endif /*  BLOCKSTORE_REMOTE */   
   5.291      return 0;
   5.292  }
     6.1 --- a/tools/blktap/blockstore.h	Mon Mar 21 18:05:36 2005 +0000
     6.2 +++ b/tools/blktap/blockstore.h	Mon Mar 21 20:07:51 2005 +0000
     6.3 @@ -21,15 +21,40 @@
     6.4  #define SECTOR_SHIFT   9 
     6.5  #endif
     6.6  
     6.7 +#define FREEBLOCK_SIZE  (BLOCK_SIZE / sizeof(u64)) - (3 * sizeof(u64))
     6.8 +#define FREEBLOCK_MAGIC 0x0fee0fee0fee0fee
     6.9 +
    6.10 +typedef struct {
    6.11 +    u64 magic;
    6.12 +    u64 next;
    6.13 +    u64 count;
    6.14 +    u64 list[FREEBLOCK_SIZE];
    6.15 +} freeblock_t; 
    6.16 +
    6.17 +#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaa
    6.18 +#define BLOCKSTORE_SUPER 1ULL
    6.19 +
    6.20 +typedef struct {
    6.21 +    u64 magic;
    6.22 +    u64 freelist_full;
    6.23 +    u64 freelist_current;
    6.24 +} blockstore_super_t;
    6.25  
    6.26  extern void *newblock();
    6.27  extern void *readblock(u64 id);
    6.28  extern u64 allocblock(void *block);
    6.29  extern u64 allocblock_hint(void *block, u64 hint);
    6.30  extern int writeblock(u64 id, void *block);
    6.31 +
    6.32 +/* Add this blockid to a freelist, to be recycled by the allocator. */
    6.33 +extern void releaseblock(u64 id);
    6.34 +
    6.35 +/* this is a memory free() operation for block-sized allocations */
    6.36  extern void freeblock(void *block);
    6.37  extern int __init_blockstore(void);
    6.38  
    6.39 +/* debug for freelist. */
    6.40 +void freelist_count(int print_each);
    6.41  #define ALLOCFAIL (((u64)(-1)))
    6.42  
    6.43  /* Distribution
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/tools/blktap/parallax-threaded.c	Mon Mar 21 20:07:51 2005 +0000
     7.3 @@ -0,0 +1,654 @@
     7.4 +/**************************************************************************
     7.5 + * 
     7.6 + * parallax.c
     7.7 + *
     7.8 + * The Parallax Storage Server
     7.9 + *
    7.10 + */
    7.11 + 
    7.12 +
    7.13 +#include <stdio.h>
    7.14 +#include <stdlib.h>
    7.15 +#include <string.h>
    7.16 +#include <pthread.h>
    7.17 +#include "blktaplib.h"
    7.18 +#include "blockstore.h"
    7.19 +#include "vdi.h"
    7.20 +#include "parallax-threaded.h"
    7.21 +
    7.22 +#define PARALLAX_DEV     61440
    7.23 +
    7.24 +
    7.25 +#if 0
    7.26 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
    7.27 +#else
    7.28 +#define DPRINTF(_f, _a...) ((void)0)
    7.29 +#endif
    7.30 +
    7.31 +/* ------[ session records ]----------------------------------------------- */
    7.32 +
    7.33 +#define BLKIF_HASHSZ 1024
    7.34 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
    7.35 +
    7.36 +#define VDI_HASHSZ 16
    7.37 +#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
    7.38 +
    7.39 +typedef struct blkif {
    7.40 +    domid_t       domid;
    7.41 +    unsigned int  handle;
    7.42 +    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
    7.43 +    vdi_t        *vdi_hash[VDI_HASHSZ];
    7.44 +    struct blkif *hash_next;
    7.45 +} blkif_t;
    7.46 +
    7.47 +static blkif_t      *blkif_hash[BLKIF_HASHSZ];
    7.48 +
    7.49 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
    7.50 +{
    7.51 +    if ( handle != 0 )
    7.52 +        printf("blktap/parallax don't currently support non-0 dev handles!\n");
    7.53 +    
    7.54 +    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
    7.55 +    while ( (blkif != NULL) && 
    7.56 +            ((blkif->domid != domid) || (blkif->handle != handle)) )
    7.57 +        blkif = blkif->hash_next;
    7.58 +    return blkif;
    7.59 +}
    7.60 +
    7.61 +vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
    7.62 +{
    7.63 +    vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
    7.64 +    
    7.65 +    while ((vdi != NULL) && (vdi->vdevice != device))
    7.66 +        vdi = vdi->next;
    7.67 +    
    7.68 +    return vdi;
    7.69 +}
    7.70 +
    7.71 +/* ------[ control message handling ]-------------------------------------- */
    7.72 +
    7.73 +void blkif_create(blkif_be_create_t *create)
    7.74 +{
    7.75 +    domid_t       domid  = create->domid;
    7.76 +    unsigned int  handle = create->blkif_handle;
    7.77 +    blkif_t     **pblkif, *blkif;
    7.78 +
    7.79 +    DPRINTF("parallax (blkif_create): create is %p\n", create); 
    7.80 +    
    7.81 +    if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
    7.82 +    {
    7.83 +        DPRINTF("Could not create blkif: out of memory\n");
    7.84 +        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
    7.85 +        return;
    7.86 +    }
    7.87 +
    7.88 +    memset(blkif, 0, sizeof(*blkif));
    7.89 +    blkif->domid  = domid;
    7.90 +    blkif->handle = handle;
    7.91 +    blkif->status = DISCONNECTED;
    7.92 +/*
    7.93 +    spin_lock_init(&blkif->vbd_lock);
    7.94 +    spin_lock_init(&blkif->blk_ring_lock);
    7.95 +    atomic_set(&blkif->refcnt, 0);
    7.96 +*/
    7.97 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
    7.98 +    while ( *pblkif != NULL )
    7.99 +    {
   7.100 +        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   7.101 +        {
   7.102 +            DPRINTF("Could not create blkif: already exists\n");
   7.103 +            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
   7.104 +            free(blkif);
   7.105 +            return;
   7.106 +        }
   7.107 +        pblkif = &(*pblkif)->hash_next;
   7.108 +    }
   7.109 +
   7.110 +    blkif->hash_next = *pblkif;
   7.111 +    *pblkif = blkif;
   7.112 +
   7.113 +    DPRINTF("Successfully created blkif\n");
   7.114 +    create->status = BLKIF_BE_STATUS_OKAY;
   7.115 +}
   7.116 +
   7.117 +void blkif_destroy(blkif_be_destroy_t *destroy)
   7.118 +{
   7.119 +    domid_t       domid  = destroy->domid;
   7.120 +    unsigned int  handle = destroy->blkif_handle;
   7.121 +    blkif_t     **pblkif, *blkif;
   7.122 +
   7.123 +    DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); 
   7.124 +    
   7.125 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   7.126 +    while ( (blkif = *pblkif) != NULL )
   7.127 +    {
   7.128 +        if ( (blkif->domid == domid) && (blkif->handle == handle) )
   7.129 +        {
   7.130 +            if ( blkif->status != DISCONNECTED )
   7.131 +                goto still_connected;
   7.132 +            goto destroy;
   7.133 +        }
   7.134 +        pblkif = &blkif->hash_next;
   7.135 +    }
   7.136 +
   7.137 +    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   7.138 +    return;
   7.139 +
   7.140 + still_connected:
   7.141 +    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
   7.142 +    return;
   7.143 +
   7.144 + destroy:
   7.145 +    *pblkif = blkif->hash_next;
   7.146 +    /* destroy_all_vbds(blkif); */
   7.147 +    free(blkif);
   7.148 +    destroy->status = BLKIF_BE_STATUS_OKAY;
   7.149 +}
   7.150 +
   7.151 +void vbd_grow(blkif_be_vbd_grow_t *grow) 
   7.152 +{
   7.153 +    blkif_t            *blkif;
   7.154 +    vdi_t              *vdi, **vdip;
   7.155 +    blkif_vdev_t        vdevice = grow->vdevice;
   7.156 +
   7.157 +    DPRINTF("parallax (vbd_grow): grow=%p\n", grow); 
   7.158 +    
   7.159 +    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
   7.160 +    if ( blkif == NULL )
   7.161 +    {
   7.162 +        DPRINTF("vbd_grow attempted for non-existent blkif (%u,%u)\n", 
   7.163 +                grow->domid, grow->blkif_handle); 
   7.164 +        grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   7.165 +        return;
   7.166 +    }
   7.167 +
   7.168 +    /* VDI identifier is in grow->extent.sector_start */
   7.169 +    DPRINTF("vbd_grow: grow->extent.sector_start (id) is %llx\n", 
   7.170 +            grow->extent.sector_start);
   7.171 +
   7.172 +    vdi = vdi_get(grow->extent.sector_start);
   7.173 +    if (vdi == NULL)
   7.174 +    {
   7.175 +        printf("parallax (vbd_grow): VDI %llx not found.\n",
   7.176 +               grow->extent.sector_start);
   7.177 +        grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
   7.178 +        return;
   7.179 +    }
   7.180 +    
   7.181 +    vdi->next = NULL;
   7.182 +    vdi->vdevice = vdevice;
   7.183 +    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
   7.184 +    while (*vdip != NULL)
   7.185 +        vdip = &(*vdip)->next;
   7.186 +    *vdip = vdi;
   7.187 +    
   7.188 +    DPRINTF("vbd_grow: happy return!\n"); 
   7.189 +    grow->status = BLKIF_BE_STATUS_OKAY;
   7.190 +}
   7.191 +
   7.192 +int parallax_control(control_msg_t *msg)
   7.193 +{
   7.194 +    domid_t  domid;
   7.195 +    int      ret;
   7.196 +
   7.197 +    DPRINTF("parallax_control: msg is %p\n", msg); 
   7.198 +    
   7.199 +    if (msg->type != CMSG_BLKIF_BE) 
   7.200 +    {
   7.201 +        printf("Unexpected control message (%d)\n", msg->type);
   7.202 +        return 0;
   7.203 +    }
   7.204 +    
   7.205 +    switch(msg->subtype)
   7.206 +    {
   7.207 +    case CMSG_BLKIF_BE_CREATE:
   7.208 +        if ( msg->length != sizeof(blkif_be_create_t) )
   7.209 +            goto parse_error;
   7.210 +        blkif_create((blkif_be_create_t *)msg->msg);
   7.211 +        break;   
   7.212 +        
   7.213 +    case CMSG_BLKIF_BE_DESTROY:
   7.214 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
   7.215 +            goto parse_error;
   7.216 +        blkif_destroy((blkif_be_destroy_t *)msg->msg);
   7.217 +        break;  
   7.218 +        
   7.219 +    case CMSG_BLKIF_BE_VBD_GROW:
   7.220 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
   7.221 +            goto parse_error;
   7.222 +        vbd_grow((blkif_be_vbd_grow_t *)msg->msg);
   7.223 +        break;
   7.224 +    }
   7.225 +    return 0;
   7.226 +parse_error:
   7.227 +    printf("Bad control message!\n");
   7.228 +    return 0;
   7.229 +    
   7.230 +}    
   7.231 +
   7.232 +int parallax_probe(blkif_request_t *req, blkif_t *blkif)
   7.233 +{
   7.234 +    blkif_response_t *rsp;
   7.235 +    vdisk_t *img_info;
   7.236 +    vdi_t *vdi;
   7.237 +    int i, nr_vdis = 0; 
   7.238 +
   7.239 +    DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); 
   7.240 +
   7.241 +    /* We expect one buffer only. */
   7.242 +    if ( req->nr_segments != 1 )
   7.243 +      goto err;
   7.244 +
   7.245 +    /* Make sure the buffer is page-sized. */
   7.246 +    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
   7.247 +       (blkif_last_sect (req->frame_and_sects[0]) != 7) )
   7.248 +      goto err;
   7.249 +
   7.250 +    /* fill the list of devices */
   7.251 +    for (i=0; i<VDI_HASHSZ; i++) {
   7.252 +        vdi = blkif->vdi_hash[i];
   7.253 +        while (vdi) {
   7.254 +            img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
   7.255 +            img_info[nr_vdis].device   = vdi->vdevice;
   7.256 +            img_info[nr_vdis].info     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   7.257 +            /* The -2 here accounts for the LSB in the radix tree */
   7.258 +            img_info[nr_vdis].capacity = 
   7.259 +                    ((1LL << (VDI_HEIGHT-2)) >> SECTOR_SHIFT);
   7.260 +            nr_vdis++;
   7.261 +            vdi = vdi->next;
   7.262 +        }
   7.263 +    }
   7.264 +
   7.265 +    
   7.266 +    rsp = (blkif_response_t *)req;
   7.267 +    rsp->id = req->id;
   7.268 +    rsp->operation = BLKIF_OP_PROBE;
   7.269 +    rsp->status = nr_vdis; /* number of disks */
   7.270 +
   7.271 +    DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
   7.272 +    return  BLKTAP_RESPOND;
   7.273 +err:
   7.274 +    rsp = (blkif_response_t *)req;
   7.275 +    rsp->id = req->id;
   7.276 +    rsp->operation = BLKIF_OP_PROBE;
   7.277 +    rsp->status = BLKIF_RSP_ERROR;
   7.278 +    
   7.279 +    DPRINTF("parallax_probe: send error response\n"); 
   7.280 +    return BLKTAP_RESPOND;  
   7.281 +}
   7.282 +
   7.283 +typedef struct {
   7.284 +    blkif_request_t *req;
   7.285 +    int              count;
   7.286 +    pthread_mutex_t  mutex;
   7.287 +} pending_t;
   7.288 +
   7.289 +#define MAX_REQUESTS 64
   7.290 +pending_t pending_list[MAX_REQUESTS];
   7.291 +
   7.292 +typedef struct  {
   7.293 +    vdi_t           *vdi;
   7.294 +    blkif_request_t *req;
   7.295 +    int              segment;
   7.296 +    pending_t       *pent;
   7.297 +} readseg_params_t;
   7.298 +
   7.299 +#define DISPATCH_SIZE 1024UL
   7.300 +#define DISPATCH_MASK (DISPATCH_SIZE-1)
   7.301 +readseg_params_t dispatch_list[DISPATCH_SIZE];
   7.302 +unsigned long dprod = 0, dcons = 0;
   7.303 +pthread_mutex_t dispatch_mutex;
   7.304 +pthread_cond_t  dispatch_cond;
   7.305 +
   7.306 +void *read_segment(void *param)
   7.307 +{
   7.308 +    readseg_params_t *p;
   7.309 +    u64 vblock, gblock, sector;
   7.310 +    char *dpage, *spage;
   7.311 +    unsigned long size, start, offset;
   7.312 +    blkif_response_t *rsp;
   7.313 +    int tid;
   7.314 +    
   7.315 +unsigned long dc, dp;
   7.316 +  
   7.317 +#ifdef NOTHREADS
   7.318 +#else
   7.319 +    /* Set this thread's tid. */
   7.320 +    tid = *(int *)param;
   7.321 +    free(param);
   7.322 +
   7.323 +    pthread_setspecific(tid_key, (void *)tid);
   7.324 +
   7.325 +    printf("My tid is %d.\n", (int)pthread_getspecific(tid_key));
   7.326 +start:
   7.327 +    pthread_mutex_lock(&dispatch_mutex);
   7.328 +    while (dprod == dcons)
   7.329 +        pthread_cond_wait(&dispatch_cond, &dispatch_mutex);
   7.330 +    
   7.331 +    if (dprod == dcons) {
   7.332 +        /* unnecessary wakeup. */
   7.333 +        pthread_mutex_unlock(&dispatch_mutex);
   7.334 +        goto start;
   7.335 +    }
   7.336 +#endif
   7.337 +dc = dcons;
   7.338 +dp = dprod;
   7.339 +
   7.340 +    p = &dispatch_list[dcons & DISPATCH_MASK];
   7.341 +    dcons++;
   7.342 +#ifdef NOTHREADS
   7.343 +#else
   7.344 +    pthread_mutex_unlock(&dispatch_mutex);
   7.345 +#endif    
   7.346 +    dpage  = (char *)MMAP_VADDR(ID_TO_IDX(p->req->id), p->segment);
   7.347 +
   7.348 +    /* Round the requested segment to a block address. */
   7.349 +
   7.350 +    sector  = p->req->sector_number + (8*p->segment);
   7.351 +    vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
   7.352 +
   7.353 +    /* Get that block from the store. */
   7.354 +
   7.355 +    gblock = vdi_lookup_block(p->vdi, vblock, NULL);
   7.356 +
   7.357 +    /* Calculate read size and offset within the read block. */
   7.358 +
   7.359 +    offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
   7.360 +    size = ( blkif_last_sect (p->req->frame_and_sects[p->segment]) -
   7.361 +             blkif_first_sect(p->req->frame_and_sects[p->segment]) + 1
   7.362 +           ) << SECTOR_SHIFT;
   7.363 +    start = blkif_first_sect(p->req->frame_and_sects[p->segment]) 
   7.364 +            << SECTOR_SHIFT;
   7.365 +
   7.366 +    /* If the block does not exist in the store, return zeros. */
   7.367 +    /* Otherwise, copy that region to the guest page.          */
   7.368 +
   7.369 +//    printf("      : (%p, %d, %d) (%d) [c:%lu,p:%lu]\n", 
   7.370 +//            p->req, ID_TO_IDX(p->req->id), p->segment,
   7.371 +//            p->pent->count, dc, dp);
   7.372 +    
   7.373 +    DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
   7.374 +            "vblock %llx, gblock %llx, "
   7.375 +            "size %lx\n", 
   7.376 +            sector, blkif_first_sect(p->req->frame_and_sects[p->segment]),
   7.377 +            blkif_last_sect (p->req->frame_and_sects[p->segment]),
   7.378 +            vblock, gblock, size); 
   7.379 +
   7.380 +    if ( gblock == 0 ) {
   7.381 +
   7.382 +        memset(dpage + start, '\0', size);
   7.383 +
   7.384 +    } else {
   7.385 +
   7.386 +        spage = readblock(gblock);
   7.387 +
   7.388 +        if (spage == NULL) {
   7.389 +            printf("Error reading gblock from store: %Ld\n", gblock);
   7.390 +            goto err;
   7.391 +        }
   7.392 +
   7.393 +        memcpy(dpage + start, spage + offset, size);
   7.394 +
   7.395 +        freeblock(spage);
   7.396 +    }
   7.397 +    
   7.398 +    
   7.399 +    /* Done the read.  Now update the pending record. */
   7.400 +    
   7.401 +    pthread_mutex_lock(&p->pent->mutex);
   7.402 +    p->pent->count--;
   7.403 +    
   7.404 +    if (p->pent->count == 0) {
   7.405 +        
   7.406 +//    printf("FINISH: (%d, %d)\n", ID_TO_IDX(p->req->id), p->segment);
   7.407 +        rsp = (blkif_response_t *)p->req;
   7.408 +        rsp->id = p->req->id;
   7.409 +        rsp->operation = BLKIF_OP_READ;
   7.410 +        rsp->status = BLKIF_RSP_OKAY;
   7.411 +
   7.412 +        blktap_inject_response(rsp);       
   7.413 +    }
   7.414 +    
   7.415 +    pthread_mutex_unlock(&p->pent->mutex);
   7.416 +    
   7.417 +#ifdef NOTHREADS
   7.418 +    return NULL;
   7.419 +#else
   7.420 +    goto start;
   7.421 +#endif
   7.422 +                
   7.423 +err:
   7.424 +    printf("I am screwed!\n");
   7.425 +#ifdef NOTHREADS
   7.426 +    return NULL;
   7.427 +#else
   7.428 +    goto start;
   7.429 +#endif
   7.430 +}
   7.431 +
   7.432 +
   7.433 +int parallax_read(blkif_request_t *req, blkif_t *blkif)
   7.434 +{
   7.435 +    blkif_response_t *rsp;
   7.436 +    unsigned long size, offset, start;
   7.437 +    u64 sector;
   7.438 +    u64 vblock, gblock;
   7.439 +    vdi_t *vdi;
   7.440 +    int i;
   7.441 +    char *dpage, *spage;
   7.442 +    pending_t *pent;
   7.443 +    readseg_params_t *params;
   7.444 +
   7.445 +    vdi = blkif_get_vdi(blkif, req->device);
   7.446 +    
   7.447 +    if ( vdi == NULL )
   7.448 +        goto err;
   7.449 +
   7.450 +//    printf("START : (%p, %d, %d)\n", req, ID_TO_IDX(req->id), req->nr_segments);
   7.451 +    
   7.452 +    pent = &pending_list[ID_TO_IDX(req->id)];
   7.453 +    pent->count = req->nr_segments;
   7.454 +    pent->req = req;
   7.455 +    pthread_mutex_init(&pent->mutex, NULL);
   7.456 +       
   7.457 +    
   7.458 +    for (i = 0; i < req->nr_segments; i++) {
   7.459 +        pthread_t tid;
   7.460 +        int ret;
   7.461 +
   7.462 +        params = &dispatch_list[dprod & DISPATCH_MASK];
   7.463 +        params->pent = pent;
   7.464 +        params->vdi  = vdi;
   7.465 +        params->req  = req;         
   7.466 +        params->segment = i;
   7.467 +        wmb();
   7.468 +        dprod++;
   7.469 +        
   7.470 +        pthread_mutex_lock(&dispatch_mutex);
   7.471 +        pthread_cond_signal(&dispatch_cond);
   7.472 +        pthread_mutex_unlock(&dispatch_mutex);
   7.473 +#ifdef NOTHREADS        
   7.474 +        read_segment(NULL);
   7.475 +#endif        
   7.476 +        
   7.477 +    }
   7.478 +    
   7.479 +    
   7.480 +    
   7.481 +
   7.482 +    return BLKTAP_STOLEN;
   7.483 +
   7.484 +err:
   7.485 +    rsp = (blkif_response_t *)req;
   7.486 +    rsp->id = req->id;
   7.487 +    rsp->operation = BLKIF_OP_READ;
   7.488 +    rsp->status = BLKIF_RSP_ERROR;
   7.489 +    
   7.490 +    return BLKTAP_RESPOND;  
   7.491 +}
   7.492 +
   7.493 +int parallax_write(blkif_request_t *req, blkif_t *blkif)
   7.494 +{
   7.495 +    blkif_response_t *rsp;
   7.496 +    u64 sector;
   7.497 +    int i, writable = 0;
   7.498 +    u64 vblock, gblock;
   7.499 +    char *spage;
   7.500 +    unsigned long size, offset, start;
   7.501 +    vdi_t *vdi;
   7.502 +
   7.503 +    vdi = blkif_get_vdi(blkif, req->device);
   7.504 +    
   7.505 +    if ( vdi == NULL )
   7.506 +        goto err;
   7.507 +    
   7.508 +    for (i = 0; i < req->nr_segments; i++) {
   7.509 +            
   7.510 +        spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
   7.511 +        
   7.512 +        /* Round the requested segment to a block address. */
   7.513 +        
   7.514 +        sector  = req->sector_number + (8*i);
   7.515 +        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
   7.516 +        
   7.517 +        /* Get that block from the store. */
   7.518 +        
   7.519 +        gblock   = vdi_lookup_block(vdi, vblock, &writable);
   7.520 +        
   7.521 +        /* Calculate read size and offset within the read block. */
   7.522 +        
   7.523 +        offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
   7.524 +        size = ( blkif_last_sect (req->frame_and_sects[i]) -
   7.525 +                 blkif_first_sect(req->frame_and_sects[i]) + 1
   7.526 +               ) << SECTOR_SHIFT;
   7.527 +        start = blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
   7.528 +
   7.529 +        DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld),  "
   7.530 +                "vblock %llx, gblock %llx, "
   7.531 +                "size %lx\n", 
   7.532 +                sector, blkif_first_sect(req->frame_and_sects[i]),
   7.533 +                blkif_last_sect (req->frame_and_sects[i]),
   7.534 +                vblock, gblock, size); 
   7.535 +        
   7.536 +        /* XXX: For now we just freak out if they try to write a   */
   7.537 +        /* non block-sized, block-aligned page.                    */
   7.538 +        
   7.539 +        if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) {
   7.540 +            printf("]\n] STRANGE WRITE!\n]\n");
   7.541 +            goto err;
   7.542 +        }
   7.543 +
   7.544 +        if (( gblock == 0 ) || ( writable == 0 )) {
   7.545 +         
   7.546 +            gblock = allocblock(spage);
   7.547 +            vdi_update_block(vdi, vblock, gblock);
   7.548 +            
   7.549 +        } else {
   7.550 +            
   7.551 +            /* write-in-place, no need to change mappings. */
   7.552 +            writeblock(gblock, spage);
   7.553 +            
   7.554 +        }
   7.555 +
   7.556 +    }
   7.557 +
   7.558 +    rsp = (blkif_response_t *)req;
   7.559 +    rsp->id = req->id;
   7.560 +    rsp->operation = BLKIF_OP_WRITE;
   7.561 +    rsp->status = BLKIF_RSP_OKAY;
   7.562 +
   7.563 +    return BLKTAP_RESPOND;
   7.564 +err:
   7.565 +    rsp = (blkif_response_t *)req;
   7.566 +    rsp->id = req->id;
   7.567 +    rsp->operation = BLKIF_OP_WRITE;
   7.568 +    rsp->status = BLKIF_RSP_ERROR;
   7.569 +    
   7.570 +    return BLKTAP_RESPOND;  
   7.571 +}
   7.572 +
   7.573 +int parallax_request(blkif_request_t *req)
   7.574 +{
   7.575 +    blkif_response_t *rsp;
   7.576 +    domid_t  dom   = ID_TO_DOM(req->id);
   7.577 +    blkif_t *blkif = blkif_find_by_handle(dom, 0);
   7.578 +
   7.579 +    //DPRINTF("parallax_request: req=%p, dom=%d, blkif=%p\n", req, dom, blkif); 
   7.580 +    
   7.581 +    if (blkif == NULL)
   7.582 +        goto err;
   7.583 +    
   7.584 +    if ( req->operation == BLKIF_OP_PROBE ) {
   7.585 +        
   7.586 +        return parallax_probe(req, blkif);
   7.587 +        
   7.588 +    } else if ( req->operation == BLKIF_OP_READ ) {
   7.589 +        
   7.590 +        return parallax_read(req, blkif);
   7.591 +        
   7.592 +    } else if ( req->operation == BLKIF_OP_WRITE ) {
   7.593 +        
   7.594 +        return parallax_write(req, blkif);
   7.595 +        
   7.596 +    } else {
   7.597 +        /* Unknown operation */
   7.598 +        goto err;
   7.599 +    }
   7.600 +    
   7.601 +err:
   7.602 +    rsp = (blkif_response_t *)req;
   7.603 +    rsp->id = req->id;
   7.604 +    rsp->operation = req->operation;
   7.605 +    rsp->status = BLKIF_RSP_ERROR;
   7.606 +    return BLKTAP_RESPOND;  
   7.607 +}
   7.608 +
   7.609 +void __init_parallax(void) 
   7.610 +{
   7.611 +    memset(blkif_hash, 0, sizeof(blkif_hash));
   7.612 +}
   7.613 +
   7.614 +
   7.615 +
   7.616 +int main(int argc, char *argv[])
   7.617 +{
   7.618 +    pthread_t read_pool[READ_POOL_SIZE];
   7.619 +    int i, tid=0;
   7.620 +    
   7.621 +    DPRINTF("parallax: starting.\n"); 
   7.622 +    __init_blockstore();
   7.623 +    DPRINTF("parallax: initialized blockstore...\n"); 
   7.624 +    __init_vdi();
   7.625 +    DPRINTF("parallax: initialized vdi registry etc...\n"); 
   7.626 +    __init_parallax();
   7.627 +    DPRINTF("parallax: initialized local stuff..\n"); 
   7.628 +
   7.629 +    
   7.630 +    pthread_mutex_init(&dispatch_mutex, NULL);
   7.631 +    pthread_cond_init(&dispatch_cond, NULL);
   7.632 +    
   7.633 +    pthread_key_create(&tid_key, NULL);
   7.634 +    tid = 0;
   7.635 +    
   7.636 +#ifdef NOTHREADS
   7.637 +#else
   7.638 +    for (i=0; i < READ_POOL_SIZE; i++) {
   7.639 +        int ret, *t;
   7.640 +        t = (int *)malloc(sizeof(int));
   7.641 +        *t = tid++;
   7.642 +        ret = pthread_create(&read_pool[i], NULL, read_segment, t);
   7.643 +        if (ret != 0) printf("Error starting thread %d\n", i);
   7.644 +    }
   7.645 +#endif
   7.646 +    
   7.647 +    pthread_setspecific(tid_key, (void *)tid);
   7.648 +    
   7.649 +    printf("*My tid is %d.\n", (int)pthread_getspecific(tid_key));
   7.650 +    
   7.651 +    blktap_register_ctrl_hook("parallax_control", parallax_control);
   7.652 +    blktap_register_request_hook("parallax_request", parallax_request);
   7.653 +    DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); 
   7.654 +    blktap_listen();
   7.655 +    
   7.656 +    return 0;
   7.657 +}
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/tools/blktap/parallax-threaded.h	Mon Mar 21 20:07:51 2005 +0000
     8.3 @@ -0,0 +1,23 @@
     8.4 +/**************************************************************************
     8.5 + * 
     8.6 + * parallax-threaded.h
     8.7 + *
     8.8 + * a few thread-specific defines
     8.9 + *
    8.10 + */
    8.11 + 
    8.12 +#ifndef __PARALLAX_THREADED_H__
    8.13 +#define __PARALLAX_THREADED_H__
    8.14 + 
    8.15 +#if 0
    8.16 +/* Turn off threading. */
    8.17 +#define NOTHREADS
    8.18 +#endif
    8.19 +
    8.20 +#define READ_POOL_SIZE 128
    8.21 +
    8.22 +/* per-thread identifier */
    8.23 +pthread_key_t tid_key;
    8.24 +
    8.25 +#endif /* __PARALLAX_THREADED_H__ */
    8.26 +
     9.1 --- a/tools/blktap/parallax.c	Mon Mar 21 18:05:36 2005 +0000
     9.2 +++ b/tools/blktap/parallax.c	Mon Mar 21 20:07:51 2005 +0000
     9.3 @@ -16,7 +16,7 @@
     9.4  
     9.5  #define PARALLAX_DEV     61440
     9.6  
     9.7 -#if 1
     9.8 +#if 0
     9.9  #define DPRINTF(_f, _a...) printf ( _f , ## _a )
    9.10  #else
    9.11  #define DPRINTF(_f, _a...) ((void)0)
    9.12 @@ -342,14 +342,14 @@ int parallax_read(blkif_request_t *req, 
    9.13  
    9.14      rsp = (blkif_response_t *)req;
    9.15      rsp->id = req->id;
    9.16 -    rsp->operation = BLKIF_OP_WRITE;
    9.17 +    rsp->operation = BLKIF_OP_READ;
    9.18      rsp->status = BLKIF_RSP_OKAY;
    9.19  
    9.20      return BLKTAP_RESPOND;
    9.21  err:
    9.22      rsp = (blkif_response_t *)req;
    9.23      rsp->id = req->id;
    9.24 -    rsp->operation = BLKIF_OP_WRITE;
    9.25 +    rsp->operation = BLKIF_OP_READ;
    9.26      rsp->status = BLKIF_RSP_ERROR;
    9.27      
    9.28      return BLKTAP_RESPOND;  
    10.1 --- a/tools/blktap/radix.c	Mon Mar 21 18:05:36 2005 +0000
    10.2 +++ b/tools/blktap/radix.c	Mon Mar 21 20:07:51 2005 +0000
    10.3 @@ -219,7 +219,78 @@ u64 snapshot(u64 root) {
    10.4          return writable(root);
    10.5  }
    10.6  
    10.7 -void print_root(u64 root, int height, u64 val, FILE *dot_f)
    10.8 +/**
    10.9 + * collapse: collapse a parent onto a child.
   10.10 + * 
   10.11 + * NOTE: This assumes that parent and child really are, and further that
   10.12 + * there are no other children forked from this parent. (children of the
   10.13 + * child are okay...)
   10.14 + */
   10.15 +
   10.16 +int collapse(int height, u64 proot, u64 croot)
   10.17 +{
   10.18 +    int i, numlinks, ret, total = 0;
   10.19 +    radix_tree_node pnode, cnode;
   10.20 +    
   10.21 +//printf("proot: %Ld\n", getid(proot));
   10.22 +    if (height == 0) {
   10.23 +        height = -1; /* terminate recursion */
   10.24 +    } else {        
   10.25 +        height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
   10.26 +    }
   10.27 +    numlinks = (1UL << RADIX_TREE_MAP_SHIFT);
   10.28 +
   10.29 +    /* Terminal cases: */
   10.30 +
   10.31 +    if ( (getid(proot) == ZERO) || (getid(croot) == ZERO) )
   10.32 +        return -1;
   10.33 +    
   10.34 +    /* get roots */
   10.35 +    if ((pnode = readblock(getid(proot))) == NULL)
   10.36 +        return -1;
   10.37 +    
   10.38 +    if ((cnode = readblock(getid(croot))) == NULL)
   10.39 +    {
   10.40 +        freeblock(pnode);
   10.41 +        return -1;
   10.42 +    }
   10.43 +    
   10.44 +    /* For each writable link in proot */
   10.45 +    for (i=0; i<numlinks; i++)
   10.46 +    {
   10.47 +        if ( pnode[i] == cnode[i] ) continue;
   10.48 +        
   10.49 +        /* collapse (next level) */
   10.50 +        /* if height != 0 and writable... */
   10.51 +        if (( height >= 0 ) && ( iswritable(pnode[i]) ) )
   10.52 +        {
   10.53 +            //printf("   %Ld is writable (i=%d).\n", getid(pnode[i]), i);
   10.54 +            ret = collapse(height, pnode[i], cnode[i]);
   10.55 +            if (ret == -1) 
   10.56 +            {
   10.57 +                total = -1;
   10.58 +            } else {
   10.59 +                total += ret;
   10.60 +            }
   10.61 +        }
   10.62 +    
   10.63 +        
   10.64 +    }
   10.65 +    
   10.66 +    /* if plink is writable, AND clink is writable -> free plink block */
   10.67 +    if ( ( iswritable(proot) ) && ( iswritable(croot) ) ) 
   10.68 +    {
   10.69 +        releaseblock(getid(proot));
   10.70 +        if (ret >=0) total++;
   10.71 +        //printf("   Delete %Ld\n", getid(proot));
   10.72 +    }
   10.73 +//printf("done : %Ld\n", getid(proot));
   10.74 +    return total;
   10.75 +
   10.76 +}
   10.77 +
   10.78 +
   10.79 +void print_root(u64 root, int height, FILE *dot_f)
   10.80  {
   10.81      FILE *f;
   10.82      int i;
   10.83 @@ -241,7 +312,9 @@ void print_root(u64 root, int height, u6
   10.84                  getid(root), style[iswritable(root)], getid(root));
   10.85      }
   10.86      
   10.87 -    /* base case--return val */
   10.88 +    printf("print_root(%Ld)\n", getid(root));
   10.89 +    
   10.90 +    /* base case */
   10.91      if (height == 0) {
   10.92          /* add a node and edge for each child root */
   10.93          node = (radix_tree_node) readblock(getid(root));
   10.94 @@ -249,7 +322,7 @@ void print_root(u64 root, int height, u6
   10.95              return;
   10.96          
   10.97          for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) {
   10.98 -            if (node[i] != 0) {
   10.99 +            if (node[i] != ZERO) {
  10.100                  fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
  10.101                          getid(node[i]), style[iswritable(node[i])], 
  10.102                          getid(node[i]));
  10.103 @@ -257,6 +330,7 @@ void print_root(u64 root, int height, u6
  10.104                          getid(node[i]), i);
  10.105              }
  10.106          }
  10.107 +        freeblock(node);
  10.108          return;
  10.109      }
  10.110  
  10.111 @@ -272,28 +346,17 @@ void print_root(u64 root, int height, u6
  10.112  
  10.113      /* add a node and edge for each child root */
  10.114      for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
  10.115 -        if (node[i] != 0) {
  10.116 +        if (node[i] != ZERO) {
  10.117              fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
  10.118                      getid(node[i]), style[iswritable(node[i])], 
  10.119                      getid(node[i]));
  10.120 -            print_root(node[i], height-RADIX_TREE_MAP_SHIFT, 
  10.121 -                    val + (((u64)i)<<height), f);
  10.122 +
  10.123 +            print_root(node[i], height-RADIX_TREE_MAP_SHIFT, f);
  10.124              fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
  10.125                      getid(node[i]), i);
  10.126          }
  10.127 -        
  10.128 -        /*
  10.129 -        
  10.130 -        root = node[(key >> height) & RADIX_TREE_MAP_MASK];
  10.131 -        freeblock(state, getid(oldroot), node);
  10.132  
  10.133 -        if (height == 0)
  10.134 -            return root;
  10.135 -
  10.136 -        height -= RADIX_TREE_MAP_SHIFT;
  10.137 -        */
  10.138 -    //}
  10.139 -
  10.140 +    freeblock(node);
  10.141      
  10.142      /* write graph postamble */
  10.143      if (dot_f == NULL) {
  10.144 @@ -306,7 +369,9 @@ void print_root(u64 root, int height, u6
  10.145  
  10.146  int main(int argc, char **argv) {
  10.147      u64 key = ZERO, val = ZERO;
  10.148 -    u64 root = writable(ONE);
  10.149 +    u64 root = writable(2ULL);
  10.150 +    u64 p = ZERO, c = ZERO;
  10.151 +    int v;
  10.152      char buff[4096];
  10.153  
  10.154      __init_blockstore();
  10.155 @@ -321,18 +386,23 @@ int main(int argc, char **argv) {
  10.156      if (lseek(fp, 0, SEEK_END) == 0) {
  10.157          write(fp, buff, 4096);
  10.158      }*/
  10.159 -           
  10.160 +        
  10.161 +    allocblock(buff);
  10.162 +            
  10.163      printf("Recognized commands:\n"
  10.164             "Note: the LSB of a node number indicates if it is writable\n"
  10.165             "  root <node>               set root to <node>\n"
  10.166             "  snapshot                  take a snapshot of the root\n"
  10.167             "  set <key> <val>           set key=val\n"
  10.168             "  get <key>                 query key\n"
  10.169 +           "  c <proot> <croot>         collapse\n"
  10.170 +           "  pr                        print tree to dot\n"
  10.171 +           "  pf <1=verbose>            print freelist\n"
  10.172             "  quit\n"
  10.173             "\nroot = %Ld\n", root);
  10.174      for (;;) {
  10.175 -        print_root(root, 34, 0, NULL);
  10.176 -        system("dot radix.dot -Tps -o radix.ps");
  10.177 +        //print_root(root, 34, NULL);
  10.178 +        //system("dot radix.dot -Tps -o radix.ps");
  10.179  
  10.180          printf("> ");
  10.181          fflush(stdout);
  10.182 @@ -344,8 +414,11 @@ int main(int argc, char **argv) {
  10.183          } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) {
  10.184              root = update(34, root, key, val);
  10.185              printf("root = %Ld\n", root);
  10.186 +        } else if (sscanf(buff, " c %Ld %Ld", &p, &c) == 2) {
  10.187 +            v = collapse(34, p, c);
  10.188 +            printf("reclaimed %d blocks.\n", v);
  10.189          } else if (sscanf(buff, " get %Ld", &key) == 1) {
  10.190 -            val = lookup(34, root, key, NULL);
  10.191 +            val = lookup(34, root, key);
  10.192              printf("value = %Ld\n", val);
  10.193          } else if (!strcmp(buff, "quit\n")) {
  10.194              break;
  10.195 @@ -353,7 +426,11 @@ int main(int argc, char **argv) {
  10.196              root = snapshot(root);
  10.197              printf("new root = %Ld\n", root);
  10.198          } else if (sscanf(buff, " pr %Ld", &root) == 1) {
  10.199 -            print_root(root, 34, 0, NULL);
  10.200 +            print_root(root, 34, NULL);
  10.201 +        } else if (sscanf(buff, " pf %d", &v) == 1) {
  10.202 +            freelist_count(v);
  10.203 +        } else if (!strcmp(buff, "pf\n")) {
  10.204 +            freelist_count(0);
  10.205          } else {
  10.206              printf("command not recognized\n");
  10.207          }
    11.1 --- a/tools/blktap/radix.h	Mon Mar 21 18:05:36 2005 +0000
    11.2 +++ b/tools/blktap/radix.h	Mon Mar 21 20:07:51 2005 +0000
    11.3 @@ -26,6 +26,7 @@
    11.4  u64 lookup(int height, u64 root, u64 key);
    11.5  u64 update(int height, u64 root, u64 key, u64 val);
    11.6  u64 snapshot(u64 root);
    11.7 +int collapse(int height, u64 proot, u64 croot);
    11.8  int isprivate(int height, u64 root, u64 key);
    11.9  
   11.10  #endif /* __RADIX_H__ */
    12.1 --- a/tools/blktap/snaplog.c	Mon Mar 21 18:05:36 2005 +0000
    12.2 +++ b/tools/blktap/snaplog.c	Mon Mar 21 20:07:51 2005 +0000
    12.3 @@ -113,6 +113,11 @@ int snap_append(snap_id_t *old_id, snap_
    12.4      snap_id_t id = *old_id;
    12.5      snap_block_t *blk = snap_get_block(id.block);
    12.6      
    12.7 +    if ( rec->deleted == 1 ) {
    12.8 +        printf("Attempt to append a deleted snapshot!\n");
    12.9 +        return -1;
   12.10 +    }
   12.11 +    
   12.12      if ( blk->hdr.immutable != 0 ) {
   12.13          printf("Attempt to snap an immutable snap block!\n");
   12.14          return -1;
   12.15 @@ -148,6 +153,65 @@ int snap_append(snap_id_t *old_id, snap_
   12.16      return 0;
   12.17  }
   12.18  
   12.19 +int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id)
   12.20 +{
   12.21 +    snap_block_t *p_blk, *c_blk, *blk;
   12.22 +    snap_rec_t   *p_rec, *c_rec;
   12.23 +    int ret = -1;
   12.24 +    
   12.25 +    p_blk = snap_get_block(p_id->block);
   12.26 +    
   12.27 +    if (p_blk == NULL) return(-1);
   12.28 +    
   12.29 +    if (c_id->block == p_id->block)
   12.30 +    {
   12.31 +        c_blk = p_blk;
   12.32 +    } else {
   12.33 +         c_blk = snap_get_block(c_id->block);
   12.34 +    }
   12.35 +    
   12.36 +    if (p_blk == NULL) {
   12.37 +        freeblock(p_blk);
   12.38 +        return(-1);
   12.39 +    }
   12.40 +     
   12.41 +    /* parent and child must not be deleted. */
   12.42 +    p_rec = &p_blk->snaps[p_id->index];
   12.43 +    c_rec = &c_blk->snaps[c_id->index];
   12.44 +    /*
   12.45 +    if ( (p_rec->deleted == 1) || (c_rec->deleted == 1) ) {
   12.46 +        printf("One of those snaps is already deleted.\n");
   12.47 +        goto done;
   12.48 +    }
   12.49 +    */
   12.50 +    /* first non-deleted thing in the log before child must be parent. */
   12.51 +    
   12.52 +    /* XXX todo: text the range here for delete (and eventually fork) bits) */
   12.53 +    /* for now, snaps must be consecutive, on the same log page: */
   12.54 +    
   12.55 +    if ((p_id->block != c_id->block) || (p_id->index != c_id->index-1))
   12.56 +    {
   12.57 +        printf("Deleting non-consecutive snaps is not done yet.\n");
   12.58 +        goto done;
   12.59 +    }
   12.60 +    
   12.61 +    /* mark parent as deleted XXX: may need to lock parent block here.*/
   12.62 +    p_rec->deleted = 1;
   12.63 +    writeblock(p_id->block, p_blk);
   12.64 +    
   12.65 +    /* delete the parent */
   12.66 +    printf("collapse(%Ld, %Ld)\n", p_rec->radix_root, c_rec->radix_root);
   12.67 +    ret = collapse(height, p_rec->radix_root, c_rec->radix_root);
   12.68 +    
   12.69 +    /* return the number of blocks reclaimed. */
   12.70 +    
   12.71 +done:
   12.72 +    if (c_blk != p_blk) freeblock(c_blk);
   12.73 +    freeblock(p_blk);
   12.74 +    
   12.75 +    return(ret);
   12.76 +}
   12.77 +
   12.78  void snap_print_history(snap_id_t *snap_id)
   12.79  {
   12.80      snap_id_t id = *snap_id;
    13.1 --- a/tools/blktap/snaplog.h	Mon Mar 21 18:05:36 2005 +0000
    13.2 +++ b/tools/blktap/snaplog.h	Mon Mar 21 20:07:51 2005 +0000
    13.3 @@ -5,9 +5,13 @@
    13.4   * Snapshot log on-disk data structure.
    13.5   *
    13.6   */
    13.7 -
    13.8 + 
    13.9 +#include "radix.h"
   13.10  #include "blockstore.h"    /* for BLOCK_SIZE */
   13.11   
   13.12 +#ifndef __SNAPLOG_H__
   13.13 +#define __SNAPLOG_H__
   13.14 +
   13.15  typedef struct snap_id {
   13.16      u64            block;
   13.17      unsigned int   index;
   13.18 @@ -16,11 +20,14 @@ typedef struct snap_id {
   13.19  typedef struct snap_rec {
   13.20      u64            radix_root;
   13.21      struct timeval timestamp;
   13.22 +    /* flags: */
   13.23 +    unsigned       deleted:1;
   13.24  } snap_rec_t;
   13.25  
   13.26  
   13.27  int  snap_block_create(snap_id_t *parent_id, snap_id_t *new_id);
   13.28  int  snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id);
   13.29 +int  snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id);
   13.30  void snap_print_history(snap_id_t *snap_id);
   13.31  int  snap_get_id(snap_id_t *id, snap_rec_t *target);
   13.32  
   13.33 @@ -50,3 +57,5 @@ typedef struct snap_block {
   13.34      
   13.35  
   13.36  snap_block_t *snap_get_block(u64 block);
   13.37 +
   13.38 +#endif /* __SNAPLOG_H__ */
    14.1 --- a/tools/blktap/vdi.c	Mon Mar 21 18:05:36 2005 +0000
    14.2 +++ b/tools/blktap/vdi.c	Mon Mar 21 20:07:51 2005 +0000
    14.3 @@ -15,8 +15,8 @@
    14.4  #include "radix.h"
    14.5  #include "vdi.h"
    14.6                      
    14.7 -#define VDI_REG_BLOCK   1LL
    14.8 -#define VDI_RADIX_ROOT  writable(2)
    14.9 +#define VDI_REG_BLOCK   2LL
   14.10 +#define VDI_RADIX_ROOT  writable(3)
   14.11                                                              
   14.12  #if 1
   14.13  #define DPRINTF(_f, _a...) printf ( _f , ## _a )
   14.14 @@ -158,6 +158,7 @@ void vdi_snapshot(vdi_t *vdi)
   14.15      
   14.16      rec.radix_root = vdi->radix_root;
   14.17      gettimeofday(&rec.timestamp, NULL);
   14.18 +    rec.deleted = 0;
   14.19      
   14.20      vdi->radix_root = snapshot(vdi->radix_root);
   14.21      ret = snap_append(&vdi->snap, &rec, &vdi->snap);
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/tools/blktap/vdi_snap_delete.c	Mon Mar 21 20:07:51 2005 +0000
    15.3 @@ -0,0 +1,48 @@
    15.4 +/**************************************************************************
    15.5 + * 
    15.6 + * vdi_snap_delete.c
    15.7 + *
    15.8 + * Delete a snapshot.
    15.9 + *
   15.10 + * This is not finished:  right now it takes a snap n and calls 
   15.11 + * snap_collapse(n,n+1).
   15.12 + *
   15.13 + * TODO: support for non-consecutive, non-same-block snaps
   15.14 + *       Avoid forking probs.
   15.15 + *
   15.16 + */
   15.17 + 
   15.18 +#include <stdio.h>
   15.19 +#include <stdlib.h>
   15.20 +#include <string.h>
   15.21 +#include <sys/time.h>
   15.22 +#include "blockstore.h"
   15.23 +#include "snaplog.h"
   15.24 +#include "radix.h"
   15.25 +#include "vdi.h"
   15.26 +
   15.27 +int main(int argc, char *argv[])
   15.28 +{
   15.29 +    snap_id_t    id, c_id;
   15.30 +    int ret;
   15.31 +    
   15.32 +    __init_blockstore();
   15.33 +    __init_vdi();
   15.34 +    
   15.35 +    if ( argc != 3 ) {
   15.36 +        printf("usage: %s <snap block> <snap idx>\n", argv[0]);
   15.37 +        exit(-1);
   15.38 +    }
   15.39 +    
   15.40 +    id.block   = (u64)          atoll(argv[1]);
   15.41 +    id.index   = (unsigned int) atol (argv[2]);
   15.42 +    
   15.43 +    c_id = id;
   15.44 +    c_id.index++;
   15.45 +    
   15.46 +    ret = snap_collapse(VDI_HEIGHT, &id, &c_id);
   15.47 +    
   15.48 +    printf("Freed %d blocks.\n", ret);
   15.49 +    
   15.50 +    return 0;
   15.51 +}
    16.1 --- a/tools/blktap/vdi_snap_list.c	Mon Mar 21 18:05:36 2005 +0000
    16.2 +++ b/tools/blktap/vdi_snap_list.c	Mon Mar 21 20:07:51 2005 +0000
    16.3 @@ -49,8 +49,10 @@ int main(int argc, char *argv[])
    16.4      sid = vdi->snap;
    16.5      sid.index--;
    16.6      
    16.7 -    //printf("%8s%4s%21s %12s\n", "Block", "idx", "timestamp", "radix root");
    16.8 -    printf("%8s%4s%37s %12s\n", "Block", "idx", "timestamp", "radix root");
    16.9 +    //printf("%8s%4s%21s %12s %1s\n", "Block", "idx", "timestamp", 
   16.10 +    //    "radix root", "d");
   16.11 +    printf("%8s%4s%37s %12s %1s\n", "Block", "idx", "timestamp", 
   16.12 +            "radix root", "d");
   16.13       
   16.14      while (sid.block != 0) {
   16.15          blk = snap_get_block(sid.block);
   16.16 @@ -61,13 +63,14 @@ int main(int argc, char *argv[])
   16.17              }
   16.18              t = ctime(&blk->snaps[i].timestamp.tv_sec);
   16.19              t[strlen(t)-1] = '\0';
   16.20 -            //printf("%8Ld%4u%14lu.%06lu %12Ld\n",
   16.21 -            printf("%8Ld%4u%30s %06lu %12Ld\n",
   16.22 +            //printf("%8Ld%4u%14lu.%06lu %12Ld %1s\n",
   16.23 +            printf("%8Ld%4u%30s %06lu %12Ld %1s\n",
   16.24                      sid.block, i, 
   16.25                      //blk->snaps[i].timestamp.tv_sec,
   16.26                      t,
   16.27                      blk->snaps[i].timestamp.tv_usec,
   16.28 -                    blk->snaps[i].radix_root);
   16.29 +                    blk->snaps[i].radix_root,
   16.30 +                    blk->snaps[i].deleted ? "*" : " ");
   16.31              if ( max_snaps != -1 ) 
   16.32                  max_snaps--;
   16.33          }