ia64/xen-unstable
changeset 10458:840f33e54054
Remove old blktap tools.
Signed-off-by: Andrew Warfield <andrew.warfield@cl.cam.ac.uk>
Signed-off-by: Andrew Warfield <andrew.warfield@cl.cam.ac.uk>
author | akw@localhost.localdomain |
---|---|
date | Fri Jun 16 18:45:45 2006 -0700 (2006-06-16) |
parents | 533bad7c0883 |
children | 716e365377f5 |
files |
line diff
1.1 --- a/tools/blktap/Makefile Fri Jun 16 18:19:40 2006 +0100 1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 1.3 @@ -1,94 +0,0 @@ 1.4 -MAJOR = 3.0 1.5 -MINOR = 0 1.6 -SONAME = libblktap.so.$(MAJOR) 1.7 - 1.8 -XEN_ROOT = ../.. 1.9 -include $(XEN_ROOT)/tools/Rules.mk 1.10 - 1.11 -SUBDIRS := 1.12 -SUBDIRS += ublkback 1.13 -#SUBDIRS += parallax 1.14 - 1.15 -BLKTAP_INSTALL_DIR = /usr/sbin 1.16 - 1.17 -INSTALL = install 1.18 -INSTALL_PROG = $(INSTALL) -m0755 1.19 -INSTALL_DIR = $(INSTALL) -d -m0755 1.20 - 1.21 -INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE) 1.22 - 1.23 -LIBS := -lpthread -lz 1.24 - 1.25 -SRCS := 1.26 -SRCS += blktaplib.c xenbus.c blkif.c 1.27 - 1.28 -CFLAGS += -Werror 1.29 -CFLAGS += -Wno-unused 1.30 -CFLAGS += -fno-strict-aliasing 1.31 -CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE 1.32 -# get asprintf(): 1.33 -CFLAGS += -D _GNU_SOURCE 1.34 -# Get gcc to generate the dependencies for us. 1.35 -CFLAGS += -Wp,-MD,.$(@F).d 1.36 -CFLAGS += $(INCLUDES) 1.37 -DEPS = .*.d 1.38 - 1.39 -OBJS = $(patsubst %.c,%.o,$(SRCS)) 1.40 -IBINS := 1.41 -#IBINS += blkdump 1.42 - 1.43 -LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) 1.44 - 1.45 -.PHONY: all 1.46 -all: mk-symlinks libblktap.so #blkdump 1.47 - @set -e; for subdir in $(SUBDIRS); do \ 1.48 - $(MAKE) -C $$subdir $@; \ 1.49 - done 1.50 - 1.51 -.PHONY: install 1.52 -install: all 1.53 - $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) 1.54 - $(INSTALL_DIR) -p $(DESTDIR)/usr/include 1.55 - $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR) 1.56 - $(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include 1.57 - #$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR) 1.58 - @set -e; for subdir in $(SUBDIRS); do \ 1.59 - $(MAKE) -C $$subdir $@; \ 1.60 - done 1.61 - 1.62 -.PHONY: clean 1.63 -clean: 1.64 - rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump 1.65 - @set -e; for subdir in $(SUBDIRS); do \ 1.66 - $(MAKE) -C $$subdir $@; \ 1.67 - done 1.68 - 1.69 -.PHONY: rpm 1.70 -rpm: all 1.71 - rm -rf staging 1.72 - mkdir staging 1.73 - mkdir staging/i386 1.74 - rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \ 1.75 - --define "_rpmdir$$PWD/staging" -bb rpm.spec 1.76 - mv staging/i386/*.rpm . 1.77 - rm -rf staging 1.78 - 1.79 -libblktap.so: $(OBJS) 1.80 - $(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared \ 1.81 - -L$(XEN_XENSTORE) -l xenstore \ 1.82 - -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS) 1.83 - ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR) 1.84 - ln -sf libblktap.so.$(MAJOR) $@ 1.85 - 1.86 -blkdump: libblktap.so 1.87 - $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \ 1.88 - -l blktap blkdump.c 1.89 - 1.90 -.PHONY: TAGS clean install mk-symlinks rpm 1.91 - 1.92 -.PHONY: TAGS 1.93 -TAGS: 1.94 - etags -t $(SRCS) *.h 1.95 - 1.96 --include $(DEPS) 1.97 -
2.1 --- a/tools/blktap/README Fri Jun 16 18:19:40 2006 +0100 2.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 2.3 @@ -1,149 +0,0 @@ 2.4 -Block Tap User-level Interfaces 2.5 -Andrew Warfield 2.6 -andrew.warfield@cl.cam.ac.uk 2.7 -February 8, 2005 2.8 - 2.9 -NOTE #1: The blktap is _experimental_ code. It works for me. Your 2.10 -mileage may vary. Don't use it for anything important. Please. ;) 2.11 - 2.12 -NOTE #2: All of the interfaces here are likely to change. This is all 2.13 -early code, and I am checking it in because others want to play with 2.14 -it. If you use it for anything, please let me know! 2.15 - 2.16 -Overview: 2.17 ---------- 2.18 - 2.19 -This directory contains a library and set of example applications for 2.20 -the block tap device. The block tap hooks into the split block device 2.21 -interfaces above Xen allowing them to be extended. This extension can 2.22 -be done in userspace with the help of a library. 2.23 - 2.24 -The tap can be installed either as an interposition domain in between 2.25 -a frontend and backend driver pair, or as a terminating backend, in 2.26 -which case it is responsible for serving all requests itself. 2.27 - 2.28 -There are two reasons that you might want to use the tap, 2.29 -corresponding to these configurations: 2.30 - 2.31 - 1. To examine or modify a stream of block requests while they are 2.32 - in-flight (e.g. to encrypt data, or add data-driven watchpoints) 2.33 - 2.34 - 2. To prototype a new backend driver, serving requests from the tap 2.35 - rather than passing them along to the XenLinux blkback driver. 2.36 - (e.g. to forward block requests to a remote host) 2.37 - 2.38 - 2.39 -Interface: 2.40 ----------- 2.41 - 2.42 -At the moment, the tap interface is similar in spirit to that of the 2.43 -Linux netfilter. Requests are messages from a client (frontend) 2.44 -domain to a disk (backend) domain. Responses are messages travelling 2.45 -back, acknowledging the completion of a request. the library allows 2.46 -chains of functions to be attached to these events. In addition, 2.47 -hooks may be attached to handle control messages, which signify things 2.48 -like connections from new domains. 2.49 - 2.50 -At present the control messages especially expose a lot of the 2.51 -underlying driver interfaces. This may change in the future in order 2.52 -to simplify writing hooks. 2.53 - 2.54 -Here are the public interfaces: 2.55 - 2.56 -These allow hook functions to be chained: 2.57 - 2.58 - void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)); 2.59 - void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)); 2.60 - void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)); 2.61 - 2.62 -This allows a response to be injected, in the case where a request has 2.63 -been removed using BLKTAP_STOLEN. 2.64 - 2.65 - void blktap_inject_response(blkif_response_t *); 2.66 - 2.67 -These let you add file descriptors and handlers to the main poll loop: 2.68 - 2.69 - int blktap_attach_poll(int fd, short events, int (*func)(int)); 2.70 - void blktap_detach_poll(int fd); 2.71 - 2.72 -This starts the main poll loop: 2.73 - 2.74 - int blktap_listen(void); 2.75 - 2.76 -Example: 2.77 --------- 2.78 - 2.79 -blkimage.c uses an image on the local file system to serve requests to 2.80 -a domain. Here's what it looks like: 2.81 - 2.82 ----[blkimg.c]--- 2.83 - 2.84 -/* blkimg.c 2.85 - * 2.86 - * file-backed disk. 2.87 - */ 2.88 - 2.89 -#include "blktaplib.h" 2.90 -#include "blkimglib.h" 2.91 - 2.92 - 2.93 -int main(int argc, char *argv[]) 2.94 -{ 2.95 - image_init(); 2.96 - 2.97 - blktap_register_ctrl_hook("image_control", image_control); 2.98 - blktap_register_request_hook("image_request", image_request); 2.99 - blktap_listen(); 2.100 - 2.101 - return 0; 2.102 -} 2.103 - 2.104 ----------------- 2.105 - 2.106 -All of the real work is in blkimglib.c, but this illustrates the 2.107 -actual tap interface well enough. image_control() will be called with 2.108 -all control messages. image_request() handles requests. As it reads 2.109 -from an on-disk image file, no requests are ever passed on to a 2.110 -backend, and so there will be no responses to process -- so there is 2.111 -nothing registered as a response hook. 2.112 - 2.113 -Other examples: 2.114 ---------------- 2.115 - 2.116 -Here is a list of other examples in the directory: 2.117 - 2.118 -Things that terminate a block request stream: 2.119 - 2.120 - blkimg - Use a image file/device to serve requests 2.121 - blkgnbd - Use a remote gnbd server to serve requests 2.122 - blkaio - Use libaio... (DOES NOT WORK) 2.123 - 2.124 -Things that don't: 2.125 - 2.126 - blkdump - Print in-flight requests. 2.127 - blkcow - Really inefficient copy-on-write disks using libdb to store 2.128 - writes. 2.129 - 2.130 -There are examples of plugging these things together, for instance 2.131 -blkcowgnbd is a read-only gnbd device with copy-on-write to a local 2.132 -file. 2.133 - 2.134 -TODO: 2.135 ------ 2.136 - 2.137 -- Make session tracking work. At the moment these generally just handle a 2.138 - single front-end client at a time. 2.139 - 2.140 -- Integrate with Xend. Need to cleanly pass a image identifier in the connect 2.141 - message. 2.142 - 2.143 -- Make an asynchronous file-io terminator. The libaio attempt is 2.144 - tragically stalled because mapped foreign pages make pfn_valid fail 2.145 - (they are VM_IO), and so cannot be passed to aio as targets. A 2.146 - better solution may be to tear the disk interfaces out of the real 2.147 - backend and expose them somehow. 2.148 - 2.149 -- Make CoW suck less. 2.150 - 2.151 -- Do something more along the lines of dynamic linking for the 2.152 - plugins, so thatthey don't all need a new main().
3.1 --- a/tools/blktap/README.sept05 Fri Jun 16 18:19:40 2006 +0100 3.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 3.3 @@ -1,33 +0,0 @@ 3.4 -The blktap has been rewritten substantially based on the current 3.5 -blkback driver. I've removed passthrough support, as this is broken 3.6 -by the move to grant tables and the lack of transitive grants. A 3.7 -blktap VM is now only capable of terminating block requests in 3.8 -userspace. 3.9 - 3.10 -ublkback/ contains a _very_ initial cut at a user-level version of the block 3.11 -backend driver. It gives a working example of how the current tap 3.12 -interfaces are used, in particular w.r.t. the vbd directories in 3.13 -xenstore. 3.14 - 3.15 -parallax/ contains fairly recent parallax code. This does not run on 3.16 -the changed blktap interface, but should only be a couple of hours 3.17 -work to get going again. 3.18 - 3.19 -All of the tricky bits are done, but there is plenty of cleaning to 3.20 -do, and the top-level functionality is not here yet. At the moment, 3.21 -the daemon ignores the pdev requested by the tools and opens the file 3.22 -or device specified by TMP_IMAGE_FILE_NAME in ublkback.c. 3.23 - 3.24 -TODO: 3.25 -1. Fix to allow pdev in the store to specify the device to open. 3.26 -2. Add support (to tools as well) to mount arbitrary files... 3.27 - just write the filename to mount into the store, instead of pdev. 3.28 -3. Reeximine blkif refcounting, it is almost certainly broken at the moment. 3.29 - - creating a blkif should take a reference. 3.30 - - each inflight request should take a reference on dequeue in blktaplib 3.31 - - sending responses should drop refs. 3.32 - - blkif should be implicitly freed when refcounts fall to 0. 3.33 -4. Modify the parallax req/rsp code as per ublkback to use the new tap 3.34 - interfaces. 3.35 -5. Write a front end that allows parallax and normal mounts to coexist 3.36 -6. Allow blkback and blktap to run at the same time.
4.1 --- a/tools/blktap/blkdump.c Fri Jun 16 18:19:40 2006 +0100 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,62 +0,0 @@ 4.4 -/* blkdump.c 4.5 - * 4.6 - * show a running trace of block requests as they fly by. 4.7 - * 4.8 - * (c) 2004 Andrew Warfield. 4.9 - */ 4.10 - 4.11 -#include <stdio.h> 4.12 -#include "blktaplib.h" 4.13 - 4.14 -int request_print(blkif_request_t *req) 4.15 -{ 4.16 - int i; 4.17 - 4.18 - if ( (req->operation == BLKIF_OP_READ) || 4.19 - (req->operation == BLKIF_OP_WRITE) ) 4.20 - { 4.21 - printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 4.22 - ID_TO_DOM(req->id), ID_TO_IDX(req->id), 4.23 - blkif_op_name[req->operation], 4.24 - req->nr_segments, req->handle, 4.25 - req->sector_number); 4.26 - 4.27 - 4.28 - for (i=0; i < req->nr_segments; i++) { 4.29 - printf(" (gref: 0x%8x start: %u stop: %u)\n", 4.30 - req->seg[i].gref, 4.31 - req->seg[i].first_sect, 4.32 - req->seg[i].last_sect); 4.33 - } 4.34 - 4.35 - } else { 4.36 - printf("Unknown request message type.\n"); 4.37 - } 4.38 - 4.39 - return BLKTAP_PASS; 4.40 -} 4.41 - 4.42 -int response_print(blkif_response_t *rsp) 4.43 -{ 4.44 - if ( (rsp->operation == BLKIF_OP_READ) || 4.45 - (rsp->operation == BLKIF_OP_WRITE) ) 4.46 - { 4.47 - printf("[%2u:%2u>%5s] (status: %d)\n", 4.48 - ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 4.49 - blkif_op_name[rsp->operation], 4.50 - rsp->status); 4.51 - 4.52 - } else { 4.53 - printf("Unknown request message type.\n"); 4.54 - } 4.55 - return BLKTAP_PASS; 4.56 -} 4.57 - 4.58 -int main(int argc, char *argv[]) 4.59 -{ 4.60 - blktap_register_request_hook("request_print", request_print); 4.61 - blktap_register_response_hook("response_print", response_print); 4.62 - blktap_listen(); 4.63 - 4.64 - return 0; 4.65 -}
5.1 --- a/tools/blktap/blkif.c Fri Jun 16 18:19:40 2006 +0100 5.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 5.3 @@ -1,212 +0,0 @@ 5.4 -/* 5.5 - * blkif.c 5.6 - * 5.7 - * The blkif interface for blktap. A blkif describes an in-use virtual disk. 5.8 - */ 5.9 - 5.10 -#include <stdio.h> 5.11 -#include <stdlib.h> 5.12 -#include <errno.h> 5.13 -#include <string.h> 5.14 -#include <err.h> 5.15 - 5.16 -#include "blktaplib.h" 5.17 - 5.18 -#if 1 5.19 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 5.20 -#else 5.21 -#define DPRINTF(_f, _a...) ((void)0) 5.22 -#endif 5.23 - 5.24 -#define BLKIF_HASHSZ 1024 5.25 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) 5.26 - 5.27 -static blkif_t *blkif_hash[BLKIF_HASHSZ]; 5.28 - 5.29 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) 5.30 -{ 5.31 - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; 5.32 - while ( (blkif != NULL) && 5.33 - ((blkif->domid != domid) || (blkif->handle != handle)) ) 5.34 - blkif = blkif->hash_next; 5.35 - return blkif; 5.36 -} 5.37 - 5.38 -blkif_t *alloc_blkif(domid_t domid) 5.39 -{ 5.40 - blkif_t *blkif; 5.41 - 5.42 - blkif = (blkif_t *)malloc(sizeof(blkif_t)); 5.43 - if (!blkif) 5.44 - return NULL; 5.45 - 5.46 - memset(blkif, 0, sizeof(*blkif)); 5.47 - blkif->domid = domid; 5.48 - 5.49 - return blkif; 5.50 -} 5.51 - 5.52 -static int (*new_blkif_hook)(blkif_t *blkif) = NULL; 5.53 -void register_new_blkif_hook(int (*fn)(blkif_t *blkif)) 5.54 -{ 5.55 - new_blkif_hook = fn; 5.56 -} 5.57 - 5.58 -int blkif_init(blkif_t *blkif, long int handle, long int pdev, 5.59 - long int readonly) 5.60 -{ 5.61 - domid_t domid; 5.62 - blkif_t **pblkif; 5.63 - 5.64 - if (blkif == NULL) 5.65 - return -EINVAL; 5.66 - 5.67 - domid = blkif->domid; 5.68 - blkif->handle = handle; 5.69 - blkif->pdev = pdev; 5.70 - blkif->readonly = readonly; 5.71 - 5.72 - /* 5.73 - * Call out to the new_blkif_hook. The tap application should define this, 5.74 - * and it should return having set blkif->ops 5.75 - * 5.76 - */ 5.77 - if (new_blkif_hook == NULL) 5.78 - { 5.79 - warn("Probe detected a new blkif, but no new_blkif_hook!"); 5.80 - return -1; 5.81 - } 5.82 - new_blkif_hook(blkif); 5.83 - 5.84 - /* Now wire it in. */ 5.85 - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 5.86 - while ( *pblkif != NULL ) 5.87 - { 5.88 - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) 5.89 - { 5.90 - DPRINTF("Could not create blkif: already exists\n"); 5.91 - return -1; 5.92 - } 5.93 - pblkif = &(*pblkif)->hash_next; 5.94 - } 5.95 - blkif->hash_next = NULL; 5.96 - *pblkif = blkif; 5.97 - 5.98 - return 0; 5.99 -} 5.100 - 5.101 -void free_blkif(blkif_t *blkif) 5.102 -{ 5.103 - blkif_t **pblkif, *curs; 5.104 - 5.105 - pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)]; 5.106 - while ( (curs = *pblkif) != NULL ) 5.107 - { 5.108 - if ( blkif == curs ) 5.109 - { 5.110 - *pblkif = curs->hash_next; 5.111 - } 5.112 - pblkif = &curs->hash_next; 5.113 - } 5.114 - free(blkif); 5.115 -} 5.116 - 5.117 -void blkif_register_request_hook(blkif_t *blkif, char *name, 5.118 - int (*rh)(blkif_t *, blkif_request_t *, int)) 5.119 -{ 5.120 - request_hook_t *rh_ent, **c; 5.121 - 5.122 - rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t)); 5.123 - if (!rh_ent) 5.124 - { 5.125 - warn("couldn't allocate a new hook"); 5.126 - return; 5.127 - } 5.128 - 5.129 - rh_ent->func = rh; 5.130 - rh_ent->next = NULL; 5.131 - if (asprintf(&rh_ent->name, "%s", name) == -1) 5.132 - { 5.133 - free(rh_ent); 5.134 - warn("couldn't allocate a new hook name"); 5.135 - return; 5.136 - } 5.137 - 5.138 - c = &blkif->request_hook_chain; 5.139 - while (*c != NULL) { 5.140 - c = &(*c)->next; 5.141 - } 5.142 - *c = rh_ent; 5.143 -} 5.144 - 5.145 -void blkif_register_response_hook(blkif_t *blkif, char *name, 5.146 - int (*rh)(blkif_t *, blkif_response_t *, int)) 5.147 -{ 5.148 - response_hook_t *rh_ent, **c; 5.149 - 5.150 - rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t)); 5.151 - if (!rh_ent) 5.152 - { 5.153 - warn("couldn't allocate a new hook"); 5.154 - return; 5.155 - } 5.156 - 5.157 - rh_ent->func = rh; 5.158 - rh_ent->next = NULL; 5.159 - if (asprintf(&rh_ent->name, "%s", name) == -1) 5.160 - { 5.161 - free(rh_ent); 5.162 - warn("couldn't allocate a new hook name"); 5.163 - return; 5.164 - } 5.165 - 5.166 - c = &blkif->response_hook_chain; 5.167 - while (*c != NULL) { 5.168 - c = &(*c)->next; 5.169 - } 5.170 - *c = rh_ent; 5.171 -} 5.172 - 5.173 -void blkif_print_hooks(blkif_t *blkif) 5.174 -{ 5.175 - request_hook_t *req_hook; 5.176 - response_hook_t *rsp_hook; 5.177 - 5.178 - DPRINTF("Request Hooks:\n"); 5.179 - req_hook = blkif->request_hook_chain; 5.180 - while (req_hook != NULL) 5.181 - { 5.182 - DPRINTF(" [0x%p] %s\n", req_hook->func, req_hook->name); 5.183 - req_hook = req_hook->next; 5.184 - } 5.185 - 5.186 - DPRINTF("Response Hooks:\n"); 5.187 - rsp_hook = blkif->response_hook_chain; 5.188 - while (rsp_hook != NULL) 5.189 - { 5.190 - DPRINTF(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name); 5.191 - rsp_hook = rsp_hook->next; 5.192 - } 5.193 -} 5.194 - 5.195 - 5.196 -long int vbd_size(blkif_t *blkif) 5.197 -{ 5.198 - return 1000000000; 5.199 -} 5.200 - 5.201 -long int vbd_secsize(blkif_t *blkif) 5.202 -{ 5.203 - return 512; 5.204 -} 5.205 - 5.206 -unsigned vbd_info(blkif_t *blkif) 5.207 -{ 5.208 - return 0; 5.209 -} 5.210 - 5.211 - 5.212 -void __init_blkif(void) 5.213 -{ 5.214 - memset(blkif_hash, 0, sizeof(blkif_hash)); 5.215 -}
6.1 --- a/tools/blktap/blktaplib.c Fri Jun 16 18:19:40 2006 +0100 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,453 +0,0 @@ 6.4 -/* 6.5 - * blktaplib.c 6.6 - * 6.7 - * userspace interface routines for the blktap driver. 6.8 - * 6.9 - * (threadsafe(r) version) 6.10 - * 6.11 - * (c) 2004 Andrew Warfield. 6.12 - */ 6.13 - 6.14 -#include <stdio.h> 6.15 -#include <stdlib.h> 6.16 -#include <sys/mman.h> 6.17 -#include <sys/user.h> 6.18 -#include <err.h> 6.19 -#include <errno.h> 6.20 -#include <sys/types.h> 6.21 -#include <linux/types.h> 6.22 -#include <sys/stat.h> 6.23 -#include <fcntl.h> 6.24 -#include <signal.h> 6.25 -#include <sys/poll.h> 6.26 -#include <sys/ioctl.h> 6.27 -#include <string.h> 6.28 -#include <unistd.h> 6.29 -#include <pthread.h> 6.30 -#include <xs.h> 6.31 - 6.32 -#define __COMPILING_BLKTAP_LIB 6.33 -#include "blktaplib.h" 6.34 - 6.35 -#if 0 6.36 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 6.37 -#else 6.38 -#define DPRINTF(_f, _a...) ((void)0) 6.39 -#endif 6.40 -#define DEBUG_RING_IDXS 0 6.41 - 6.42 -#define POLLRDNORM 0x040 6.43 - 6.44 -#define BLKTAP_IOCTL_KICK 1 6.45 - 6.46 - 6.47 -void got_sig_bus(); 6.48 -void got_sig_int(); 6.49 - 6.50 -/* in kernel these are opposite, but we are a consumer now. */ 6.51 -blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */ 6.52 -blkif_front_ring_t be_ring; 6.53 - 6.54 -unsigned long mmap_vstart = 0; 6.55 -char *blktap_mem; 6.56 -int fd = 0; 6.57 - 6.58 -#define BLKTAP_RING_PAGES 1 /* Front */ 6.59 -#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES) 6.60 - 6.61 -int bad_count = 0; 6.62 -void bad(void) 6.63 -{ 6.64 - bad_count ++; 6.65 - if (bad_count > 50) exit(0); 6.66 -} 6.67 -/*-----[ ID Manipulation from tap driver code ]--------------------------*/ 6.68 - 6.69 -#define ACTIVE_RING_IDX unsigned short 6.70 - 6.71 -inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx) 6.72 -{ 6.73 - return ( (fe_dom << 16) | idx ); 6.74 -} 6.75 - 6.76 -inline unsigned int ID_TO_IDX(unsigned long id) 6.77 -{ 6.78 - return ( id & 0x0000ffff ); 6.79 -} 6.80 - 6.81 -inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); } 6.82 - 6.83 -static int (*request_hook)(blkif_request_t *req) = NULL; 6.84 -static int (*response_hook)(blkif_response_t *req) = NULL; 6.85 - 6.86 -/*-----[ Data to/from Backend (server) VM ]------------------------------*/ 6.87 - 6.88 -/* 6.89 - 6.90 -inline int write_req_to_be_ring(blkif_request_t *req) 6.91 -{ 6.92 - blkif_request_t *req_d; 6.93 - static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER; 6.94 - 6.95 - pthread_mutex_lock(&be_prod_mutex); 6.96 - req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt); 6.97 - memcpy(req_d, req, sizeof(blkif_request_t)); 6.98 - wmb(); 6.99 - be_ring.req_prod_pvt++; 6.100 - pthread_mutex_unlock(&be_prod_mutex); 6.101 - 6.102 - return 0; 6.103 -} 6.104 -*/ 6.105 - 6.106 -inline int write_rsp_to_fe_ring(blkif_response_t *rsp) 6.107 -{ 6.108 - blkif_response_t *rsp_d; 6.109 - static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER; 6.110 - 6.111 - pthread_mutex_lock(&fe_prod_mutex); 6.112 - rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt); 6.113 - memcpy(rsp_d, rsp, sizeof(blkif_response_t)); 6.114 - wmb(); 6.115 - fe_ring.rsp_prod_pvt++; 6.116 - pthread_mutex_unlock(&fe_prod_mutex); 6.117 - 6.118 - return 0; 6.119 -} 6.120 - 6.121 -static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp) 6.122 -{ 6.123 - response_hook_t *rsp_hook; 6.124 - 6.125 - rsp_hook = blkif->response_hook_chain; 6.126 - while (rsp_hook != NULL) 6.127 - { 6.128 - switch(rsp_hook->func(blkif, rsp, 1)) 6.129 - { 6.130 - case BLKTAP_PASS: 6.131 - break; 6.132 - default: 6.133 - printf("Only PASS is supported for resp hooks!\n"); 6.134 - } 6.135 - rsp_hook = rsp_hook->next; 6.136 - } 6.137 -} 6.138 - 6.139 - 6.140 -static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER; 6.141 - 6.142 -void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp) 6.143 -{ 6.144 - 6.145 - apply_rsp_hooks(blkif, rsp); 6.146 - 6.147 - write_rsp_to_fe_ring(rsp); 6.148 -} 6.149 - 6.150 -void blktap_kick_responses(void) 6.151 -{ 6.152 - pthread_mutex_lock(&push_mutex); 6.153 - 6.154 - RING_PUSH_RESPONSES(&fe_ring); 6.155 - ioctl(fd, BLKTAP_IOCTL_KICK_FE); 6.156 - 6.157 - pthread_mutex_unlock(&push_mutex); 6.158 -} 6.159 - 6.160 -/*-----[ Polling fd listeners ]------------------------------------------*/ 6.161 - 6.162 -#define MAX_POLLFDS 64 6.163 - 6.164 -typedef struct { 6.165 - int (*func)(int fd); 6.166 - struct pollfd *pfd; 6.167 - int fd; 6.168 - short events; 6.169 - int active; 6.170 -} pollhook_t; 6.171 - 6.172 -static struct pollfd pfd[MAX_POLLFDS+2]; /* tap and store are extra */ 6.173 -static pollhook_t pollhooks[MAX_POLLFDS]; 6.174 -static unsigned int ph_freelist[MAX_POLLFDS]; 6.175 -static unsigned int ph_cons, ph_prod; 6.176 -#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons)) 6.177 -#define PH_IDX(x) (x % MAX_POLLFDS) 6.178 - 6.179 -int blktap_attach_poll(int fd, short events, int (*func)(int fd)) 6.180 -{ 6.181 - pollhook_t *ph; 6.182 - 6.183 - if (nr_pollhooks() == MAX_POLLFDS) { 6.184 - printf("Too many pollhooks!\n"); 6.185 - return -1; 6.186 - } 6.187 - 6.188 - ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]]; 6.189 - 6.190 - ph->func = func; 6.191 - ph->fd = fd; 6.192 - ph->events = events; 6.193 - ph->active = 1; 6.194 - 6.195 - DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, 6.196 - nr_pollhooks()); 6.197 - 6.198 - return 0; 6.199 -} 6.200 - 6.201 -void blktap_detach_poll(int fd) 6.202 -{ 6.203 - int i; 6.204 - 6.205 - for (i=0; i<MAX_POLLFDS; i++) 6.206 - if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) { 6.207 - ph_freelist[PH_IDX(ph_prod++)] = i; 6.208 - pollhooks[i].pfd->fd = -1; 6.209 - pollhooks[i].active = 0; 6.210 - break; 6.211 - } 6.212 - 6.213 - DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i, 6.214 - nr_pollhooks()); 6.215 -} 6.216 - 6.217 -void pollhook_init(void) 6.218 -{ 6.219 - int i; 6.220 - 6.221 - for (i=0; i < MAX_POLLFDS; i++) { 6.222 - ph_freelist[i] = (i+1) % MAX_POLLFDS; 6.223 - pollhooks[i].active = 0; 6.224 - } 6.225 - 6.226 - ph_cons = 0; 6.227 - ph_prod = MAX_POLLFDS; 6.228 -} 6.229 - 6.230 -void __attribute__ ((constructor)) blktaplib_init(void) 6.231 -{ 6.232 - pollhook_init(); 6.233 -} 6.234 - 6.235 -/*-----[ The main listen loop ]------------------------------------------*/ 6.236 - 6.237 -int blktap_listen(void) 6.238 -{ 6.239 - int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret; 6.240 - struct xs_handle *h; 6.241 - blkif_t *blkif; 6.242 - 6.243 - /* comms rings: */ 6.244 - blkif_request_t *req; 6.245 - blkif_response_t *rsp; 6.246 - blkif_sring_t *sring; 6.247 - RING_IDX rp, i, pfd_count; 6.248 - 6.249 - /* pending rings */ 6.250 - blkif_request_t req_pending[BLK_RING_SIZE]; 6.251 - /* blkif_response_t rsp_pending[BLK_RING_SIZE] */; 6.252 - 6.253 - /* handler hooks: */ 6.254 - request_hook_t *req_hook; 6.255 - response_hook_t *rsp_hook; 6.256 - 6.257 - signal (SIGBUS, got_sig_bus); 6.258 - signal (SIGINT, got_sig_int); 6.259 - 6.260 - __init_blkif(); 6.261 - 6.262 - fd = open("/dev/blktap", O_RDWR); 6.263 - if (fd == -1) 6.264 - err(-1, "open failed!"); 6.265 - 6.266 - blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 6.267 - PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 6.268 - 6.269 - if ((int)blktap_mem == -1) 6.270 - err(-1, "mmap failed!"); 6.271 - 6.272 - /* assign the rings to the mapped memory */ 6.273 -/* 6.274 - sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE); 6.275 - FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE); 6.276 -*/ 6.277 - sring = (blkif_sring_t *)((unsigned long)blktap_mem); 6.278 - BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE); 6.279 - 6.280 - mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT); 6.281 - 6.282 - 6.283 - /* Set up store connection and watch. */ 6.284 - h = xs_daemon_open(); 6.285 - if (h == NULL) 6.286 - err(-1, "xs_daemon_open"); 6.287 - 6.288 - ret = add_blockdevice_probe_watch(h, "Domain-0"); 6.289 - if (ret != 0) 6.290 - err(0, "adding device probewatch"); 6.291 - 6.292 - ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); 6.293 - 6.294 - while(1) { 6.295 - int ret; 6.296 - 6.297 - /* build the poll list */ 6.298 - pfd_count = 0; 6.299 - for ( i=0; i < MAX_POLLFDS; i++ ) { 6.300 - pollhook_t *ph = &pollhooks[i]; 6.301 - 6.302 - if (ph->active) { 6.303 - pfd[pfd_count].fd = ph->fd; 6.304 - pfd[pfd_count].events = ph->events; 6.305 - ph->pfd = &pfd[pfd_count]; 6.306 - pfd_count++; 6.307 - } 6.308 - } 6.309 - 6.310 - tap_pfd = pfd_count++; 6.311 - pfd[tap_pfd].fd = fd; 6.312 - pfd[tap_pfd].events = POLLIN; 6.313 - 6.314 - store_pfd = pfd_count++; 6.315 - pfd[store_pfd].fd = xs_fileno(h); 6.316 - pfd[store_pfd].events = POLLIN; 6.317 - 6.318 - if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) { 6.319 - if (DEBUG_RING_IDXS) 6.320 - ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS); 6.321 - continue; 6.322 - } 6.323 - 6.324 - for (i=0; i < MAX_POLLFDS; i++) { 6.325 - if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) ) 6.326 - pollhooks[i].func(pollhooks[i].pfd->fd); 6.327 - } 6.328 - 6.329 - if (pfd[store_pfd].revents) { 6.330 - ret = xs_fire_next_watch(h); 6.331 - } 6.332 - 6.333 - if (pfd[tap_pfd].revents) 6.334 - { 6.335 - /* empty the fe_ring */ 6.336 - notify_fe = 0; 6.337 - notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring); 6.338 - rp = fe_ring.sring->req_prod; 6.339 - rmb(); 6.340 - for (i = fe_ring.req_cons; i != rp; i++) 6.341 - { 6.342 - int done = 0; 6.343 - 6.344 - req = RING_GET_REQUEST(&fe_ring, i); 6.345 - memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req)); 6.346 - req = &req_pending[ID_TO_IDX(req->id)]; 6.347 - 6.348 - blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle); 6.349 - 6.350 - if (blkif != NULL) 6.351 - { 6.352 - req_hook = blkif->request_hook_chain; 6.353 - while (req_hook != NULL) 6.354 - { 6.355 - switch(req_hook->func(blkif, req, ((i+1) == rp))) 6.356 - { 6.357 - case BLKTAP_RESPOND: 6.358 - apply_rsp_hooks(blkif, (blkif_response_t *)req); 6.359 - write_rsp_to_fe_ring((blkif_response_t *)req); 6.360 - notify_fe = 1; 6.361 - done = 1; 6.362 - break; 6.363 - case BLKTAP_STOLEN: 6.364 - done = 1; 6.365 - break; 6.366 - case BLKTAP_PASS: 6.367 - break; 6.368 - default: 6.369 - printf("Unknown request hook return value!\n"); 6.370 - } 6.371 - if (done) break; 6.372 - req_hook = req_hook->next; 6.373 - } 6.374 - } 6.375 - 6.376 - if (done == 0) 6.377 - { 6.378 - /* this was: */ 6.379 - /* write_req_to_be_ring(req); */ 6.380 - 6.381 - unsigned long id = req->id; 6.382 - unsigned short operation = req->operation; 6.383 - printf("Unterminated request!\n"); 6.384 - rsp = (blkif_response_t *)req; 6.385 - rsp->id = id; 6.386 - rsp->operation = operation; 6.387 - rsp->status = BLKIF_RSP_ERROR; 6.388 - write_rsp_to_fe_ring(rsp); 6.389 - notify_fe = 1; 6.390 - done = 1; 6.391 - } 6.392 - 6.393 - } 6.394 - fe_ring.req_cons = i; 6.395 - 6.396 - /* empty the be_ring */ 6.397 -/* 6.398 - notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring); 6.399 - rp = be_ring.sring->rsp_prod; 6.400 - rmb(); 6.401 - for (i = be_ring.rsp_cons; i != rp; i++) 6.402 - { 6.403 - 6.404 - rsp = RING_GET_RESPONSE(&be_ring, i); 6.405 - memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp)); 6.406 - rsp = &rsp_pending[ID_TO_IDX(rsp->id)]; 6.407 - 6.408 - DPRINTF("copying a be request\n"); 6.409 - 6.410 - apply_rsp_hooks(rsp); 6.411 - write_rsp_to_fe_ring(rsp); 6.412 - } 6.413 - be_ring.rsp_cons = i; 6.414 -*/ 6.415 - /* notify the domains */ 6.416 -/* 6.417 - if (notify_be) { 6.418 - DPRINTF("notifying be\n"); 6.419 -pthread_mutex_lock(&push_mutex); 6.420 - RING_PUSH_REQUESTS(&be_ring); 6.421 - ioctl(fd, BLKTAP_IOCTL_KICK_BE); 6.422 -pthread_mutex_unlock(&push_mutex); 6.423 - } 6.424 -*/ 6.425 - if (notify_fe) { 6.426 - DPRINTF("notifying fe\n"); 6.427 - pthread_mutex_lock(&push_mutex); 6.428 - RING_PUSH_RESPONSES(&fe_ring); 6.429 - ioctl(fd, BLKTAP_IOCTL_KICK_FE); 6.430 - pthread_mutex_unlock(&push_mutex); 6.431 - } 6.432 - } 6.433 - } 6.434 - 6.435 - 6.436 - munmap(blktap_mem, PAGE_SIZE); 6.437 - 6.438 - mmap_failed: 6.439 - close(fd); 6.440 - 6.441 - open_failed: 6.442 - return 0; 6.443 -} 6.444 - 6.445 -void got_sig_bus() { 6.446 - printf("Attempted to access a page that isn't.\n"); 6.447 - exit(-1); 6.448 -} 6.449 - 6.450 -void got_sig_int() { 6.451 - DPRINTF("quitting -- returning to passthrough mode.\n"); 6.452 - if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH ); 6.453 - close(fd); 6.454 - fd = 0; 6.455 - exit(0); 6.456 -}
7.1 --- a/tools/blktap/blktaplib.h Fri Jun 16 18:19:40 2006 +0100 7.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 7.3 @@ -1,171 +0,0 @@ 7.4 -/* blktaplib.h 7.5 - * 7.6 - * userland accessors to the block tap. 7.7 - * 7.8 - * Sept 2/05 -- I'm scaling this back to only support block remappings 7.9 - * to user in a backend domain. Passthrough and interposition can be readded 7.10 - * once transitive grants are available. 7.11 - */ 7.12 - 7.13 -#ifndef __BLKTAPLIB_H__ 7.14 -#define __BLKTAPLIB_H__ 7.15 - 7.16 -#include <xenctrl.h> 7.17 -#include <sys/user.h> 7.18 -#include <xen/xen.h> 7.19 -#include <xen/io/blkif.h> 7.20 -#include <xen/io/ring.h> 7.21 -#include <xen/io/domain_controller.h> 7.22 -#include <xs.h> 7.23 - 7.24 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 7.25 - 7.26 -/* /dev/xen/blktap resides at device number major=10, minor=202 */ 7.27 -#define BLKTAP_MINOR 202 7.28 - 7.29 -/* size of the extra VMA area to map in attached pages. */ 7.30 -#define BLKTAP_VMA_PAGES BLK_RING_SIZE 7.31 - 7.32 -/* blktap IOCTLs: */ 7.33 -#define BLKTAP_IOCTL_KICK_FE 1 7.34 -#define BLKTAP_IOCTL_KICK_BE 2 7.35 -#define BLKTAP_IOCTL_SETMODE 3 7.36 -#define BLKTAP_IOCTL_PRINT_IDXS 100 7.37 - 7.38 -/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ 7.39 -#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ 7.40 -#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 7.41 -#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 7.42 -#define BLKTAP_MODE_COPY_FE 0x00000004 7.43 -#define BLKTAP_MODE_COPY_BE 0x00000008 7.44 -#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010 7.45 -#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020 7.46 - 7.47 -#define BLKTAP_MODE_INTERPOSE \ 7.48 - (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) 7.49 - 7.50 -#define BLKTAP_MODE_COPY_BOTH \ 7.51 - (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE) 7.52 - 7.53 -#define BLKTAP_MODE_COPY_BOTH_PAGES \ 7.54 - (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES) 7.55 - 7.56 -static inline int BLKTAP_MODE_VALID(unsigned long arg) 7.57 -{ 7.58 - return ( 7.59 - ( arg == BLKTAP_MODE_PASSTHROUGH ) || 7.60 - ( arg == BLKTAP_MODE_INTERCEPT_FE ) || 7.61 - ( arg == BLKTAP_MODE_INTERPOSE ) ); 7.62 -/* 7.63 - return ( 7.64 - ( arg == BLKTAP_MODE_PASSTHROUGH ) || 7.65 - ( arg == BLKTAP_MODE_INTERCEPT_FE ) || 7.66 - ( arg == BLKTAP_MODE_INTERCEPT_BE ) || 7.67 - ( arg == BLKTAP_MODE_INTERPOSE ) || 7.68 - ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || 7.69 - ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || 7.70 - ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) 7.71 - ); 7.72 -*/ 7.73 -} 7.74 - 7.75 -/* Return values for handling messages in hooks. */ 7.76 -#define BLKTAP_PASS 0 /* Keep passing this request as normal. */ 7.77 -#define BLKTAP_RESPOND 1 /* Request is now a reply. Return it. */ 7.78 -#define BLKTAP_STOLEN 2 /* Hook has stolen request. */ 7.79 - 7.80 -//#define domid_t unsigned short 7.81 - 7.82 -inline unsigned int ID_TO_IDX(unsigned long id); 7.83 -inline domid_t ID_TO_DOM(unsigned long id); 7.84 - 7.85 -int blktap_attach_poll(int fd, short events, int (*func)(int)); 7.86 -void blktap_detach_poll(int fd); 7.87 -int blktap_listen(void); 7.88 - 7.89 -struct blkif; 7.90 - 7.91 -typedef struct request_hook_st { 7.92 - char *name; 7.93 - int (*func)(struct blkif *, blkif_request_t *, int); 7.94 - struct request_hook_st *next; 7.95 -} request_hook_t; 7.96 - 7.97 -typedef struct response_hook_st { 7.98 - char *name; 7.99 - int (*func)(struct blkif *, blkif_response_t *, int); 7.100 - struct response_hook_st *next; 7.101 -} response_hook_t; 7.102 - 7.103 -struct blkif_ops { 7.104 - long int (*get_size)(struct blkif *blkif); 7.105 - long int (*get_secsize)(struct blkif *blkif); 7.106 - unsigned (*get_info)(struct blkif *blkif); 7.107 -}; 7.108 - 7.109 -typedef struct blkif { 7.110 - domid_t domid; 7.111 - long int handle; 7.112 - 7.113 - long int pdev; 7.114 - long int readonly; 7.115 - 7.116 - enum { DISCONNECTED, CONNECTED } state; 7.117 - 7.118 - struct blkif_ops *ops; 7.119 - request_hook_t *request_hook_chain; 7.120 - response_hook_t *response_hook_chain; 7.121 - 7.122 - struct blkif *hash_next; 7.123 - 7.124 - void *prv; /* device-specific data */ 7.125 -} blkif_t; 7.126 - 7.127 -void register_new_blkif_hook(int (*fn)(blkif_t *blkif)); 7.128 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); 7.129 -blkif_t *alloc_blkif(domid_t domid); 7.130 -int blkif_init(blkif_t *blkif, long int handle, long int pdev, 7.131 - long int readonly); 7.132 -void free_blkif(blkif_t *blkif); 7.133 -void __init_blkif(void); 7.134 - 7.135 - 7.136 -/* xenstore/xenbus: */ 7.137 -extern int add_blockdevice_probe_watch(struct xs_handle *h, 7.138 - const char *domname); 7.139 -int xs_fire_next_watch(struct xs_handle *h); 7.140 - 7.141 - 7.142 -void blkif_print_hooks(blkif_t *blkif); 7.143 -void blkif_register_request_hook(blkif_t *blkif, char *name, 7.144 - int (*rh)(blkif_t *, blkif_request_t *, int)); 7.145 -void blkif_register_response_hook(blkif_t *blkif, char *name, 7.146 - int (*rh)(blkif_t *, blkif_response_t *, int)); 7.147 -void blkif_inject_response(blkif_t *blkif, blkif_response_t *); 7.148 -void blktap_kick_responses(void); 7.149 - 7.150 -/* this must match the underlying driver... */ 7.151 -#define MAX_PENDING_REQS 64 7.152 - 7.153 -/* Accessing attached data page mappings */ 7.154 -#define MMAP_PAGES \ 7.155 - (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST) 7.156 -#define MMAP_VADDR(_req,_seg) \ 7.157 - (mmap_vstart + \ 7.158 - ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \ 7.159 - ((_seg) * PAGE_SIZE)) 7.160 - 7.161 -extern unsigned long mmap_vstart; 7.162 - 7.163 -/* Defines that are only used by library clients */ 7.164 - 7.165 -#ifndef __COMPILING_BLKTAP_LIB 7.166 - 7.167 -static char *blkif_op_name[] = { 7.168 - [BLKIF_OP_READ] = "READ", 7.169 - [BLKIF_OP_WRITE] = "WRITE", 7.170 -}; 7.171 - 7.172 -#endif /* __COMPILING_BLKTAP_LIB */ 7.173 - 7.174 -#endif /* __BLKTAPLIB_H__ */
8.1 --- a/tools/blktap/list.h Fri Jun 16 18:19:40 2006 +0100 8.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 8.3 @@ -1,55 +0,0 @@ 8.4 -/* 8.5 - * list.h 8.6 - * 8.7 - * This is a subset of linux's list.h intended to be used in user-space. 8.8 - * 8.9 - */ 8.10 - 8.11 -#ifndef __LIST_H__ 8.12 -#define __LIST_H__ 8.13 - 8.14 -#define LIST_POISON1 ((void *) 0x00100100) 8.15 -#define LIST_POISON2 ((void *) 0x00200200) 8.16 - 8.17 -struct list_head { 8.18 - struct list_head *next, *prev; 8.19 -}; 8.20 - 8.21 -#define LIST_HEAD_INIT(name) { &(name), &(name) } 8.22 - 8.23 -#define LIST_HEAD(name) \ 8.24 - struct list_head name = LIST_HEAD_INIT(name) 8.25 - 8.26 -static inline void __list_add(struct list_head *new, 8.27 - struct list_head *prev, 8.28 - struct list_head *next) 8.29 -{ 8.30 - next->prev = new; 8.31 - new->next = next; 8.32 - new->prev = prev; 8.33 - prev->next = new; 8.34 -} 8.35 - 8.36 -static inline void list_add(struct list_head *new, struct list_head *head) 8.37 -{ 8.38 - __list_add(new, head, head->next); 8.39 -} 8.40 -static inline void __list_del(struct list_head * prev, struct list_head * next) 8.41 -{ 8.42 - next->prev = prev; 8.43 - prev->next = next; 8.44 -} 8.45 -static inline void list_del(struct list_head *entry) 8.46 -{ 8.47 - __list_del(entry->prev, entry->next); 8.48 - entry->next = LIST_POISON1; 8.49 - entry->prev = LIST_POISON2; 8.50 -} 8.51 -#define list_entry(ptr, type, member) \ 8.52 - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) 8.53 -#define list_for_each_entry(pos, head, member) \ 8.54 - for (pos = list_entry((head)->next, typeof(*pos), member); \ 8.55 - &pos->member != (head); \ 8.56 - pos = list_entry(pos->member.next, typeof(*pos), member)) 8.57 - 8.58 -#endif /* __LIST_H__ */
9.1 --- a/tools/blktap/parallax/Makefile Fri Jun 16 18:19:40 2006 +0100 9.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 9.3 @@ -1,63 +0,0 @@ 9.4 -XEN_ROOT = ../../.. 9.5 -include $(XEN_ROOT)/tools/Rules.mk 9.6 - 9.7 -PARALLAX_INSTALL_DIR = /usr/sbin 9.8 - 9.9 -INSTALL = install 9.10 -INSTALL_PROG = $(INSTALL) -m0755 9.11 -INSTALL_DIR = $(INSTALL) -d -m0755 9.12 - 9.13 -INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC) 9.14 - 9.15 -LDFLAGS = -L.. -lpthread -lz -lblktap 9.16 - 9.17 -#PLX_SRCS := 9.18 -PLX_SRCS := vdi.c 9.19 -PLX_SRCS += radix.c 9.20 -PLX_SRCS += snaplog.c 9.21 -PLX_SRCS += blockstore.c 9.22 -PLX_SRCS += block-async.c 9.23 -PLX_SRCS += requests-async.c 9.24 -VDI_SRCS := $(PLX_SRCS) 9.25 -PLX_SRCS += parallax.c 9.26 - 9.27 -#VDI_TOOLS := 9.28 -VDI_TOOLS := vdi_create 9.29 -VDI_TOOLS += vdi_list 9.30 -VDI_TOOLS += vdi_snap 9.31 -VDI_TOOLS += vdi_snap_list 9.32 -VDI_TOOLS += vdi_snap_delete 9.33 -VDI_TOOLS += vdi_fill 9.34 -VDI_TOOLS += vdi_tree 9.35 -VDI_TOOLS += vdi_validate 9.36 - 9.37 -CFLAGS += -Werror 9.38 -CFLAGS += -Wno-unused 9.39 -CFLAGS += -fno-strict-aliasing 9.40 -CFLAGS += $(INCLUDES) 9.41 -CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE 9.42 -# Get gcc to generate the dependencies for us. 9.43 -CFLAGS += -Wp,-MD,.$(@F).d 9.44 -DEPS = .*.d 9.45 - 9.46 -OBJS = $(patsubst %.c,%.o,$(SRCS)) 9.47 -IBINS = parallax $(VDI_TOOLS) 9.48 - 9.49 -.PHONY: all 9.50 -all: $(VDI_TOOLS) parallax blockstored 9.51 - 9.52 -.PHONY: install 9.53 -install: all 9.54 - $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR) 9.55 - 9.56 -.PHONY: clean 9.57 -clean: 9.58 - rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest 9.59 - 9.60 -parallax: $(PLX_SRCS) 9.61 - $(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS) 9.62 - 9.63 -${VDI_TOOLS}: %: %.c $(VDI_SRCS) 9.64 - $(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS) 9.65 - 9.66 --include $(DEPS)
10.1 --- a/tools/blktap/parallax/README Fri Jun 16 18:19:40 2006 +0100 10.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 10.3 @@ -1,177 +0,0 @@ 10.4 -Parallax Quick Overview 10.5 -March 3, 2005 10.6 - 10.7 -This is intended to provide a quick set of instructions to let you 10.8 -guys play with the current parallax source. In it's current form, the 10.9 -code will let you run an arbitrary number of VMs off of a single disk 10.10 -image, doing copy-on-write as they make updates. Each domain is 10.11 -assigned a virtual disk image (VDI), which may be based on a snapshot 10.12 -of an existing image. All of the VDI and snapshot management should 10.13 -currently work. 10.14 - 10.15 -The current implementation uses a single file as a blockstore for 10.16 -_everything_ this will soon be replaced by the fancier backend code 10.17 -and the local cache. As it stands, Parallax will create 10.18 -"blockstore.dat" in the directory that you run it from, and use 10.19 -largefile support to make this grow to unfathomable girth. So, you 10.20 -probably want to run the daemon off of a local disk, with a lot of 10.21 -free space. 10.22 - 10.23 -Here's how to get going: 10.24 - 10.25 -0. Setup: 10.26 ---------- 10.27 - 10.28 -Pick a local directory on a disk with lots of room. You should be 10.29 -running from a privileged domain (e.g. dom0) with the blocktap 10.30 -configured in and block backend NOT. 10.31 - 10.32 -For convenience (for the moment) copy all of the vdi tools (vdi_*) and 10.33 -the parallax daemon from tools/blktap into this directory. 10.34 - 10.35 -1. Populate the blockstore: 10.36 ---------------------------- 10.37 - 10.38 -First you need to put at least one image into the blockstore. You 10.39 -will need a disk image, either as a file or local partition. My 10.40 -general approach has been to 10.41 - 10.42 -(a) make a really big sparse file with 10.43 - 10.44 - dd if=/dev/zero of=./image bs=4K count=1 seek=[big value] 10.45 - 10.46 -(b) put a filesystem into it 10.47 - 10.48 - mkfs.ext3 ./image 10.49 - 10.50 -(c) mount it using loopback 10.51 - 10.52 - mkdir ./mnt 10.53 - mount -o loop ./image 10.54 - 10.55 -(d) cd into it and untar one of the image files from srg-roots. 10.56 - 10.57 - cd mnt 10.58 - tar ... 10.59 - 10.60 -NOTE: Beware if your system is FC3. mkfs is not compatible with old 10.61 -versions of fedora, and so you don't have much choice but to install 10.62 -further fc3 images if you have used the fc3 version of mkfs. 10.63 - 10.64 -(e) unmount the image 10.65 - 10.66 - cd .. 10.67 - umount mnt 10.68 - 10.69 -(f) now, create a new VDI to hold the image 10.70 - 10.71 - ./vdi_create "My new FC3 VDI" 10.72 - 10.73 -(g) get the id of the new VDI. 10.74 - 10.75 - ./vdi_list 10.76 - 10.77 - | 0 My new FC3 VDI 10.78 - 10.79 -(0 is the VDI id... create a few more if you want.) 10.80 - 10.81 -(h) hoover your image into the new VDI. 10.82 - 10.83 - ./vdi_fill 0 ./image 10.84 - 10.85 -This will pull the entire image into the blockstore and set up a 10.86 -mapping tree for it for VDI 0. Passing a device (i.e. /dev/sda3) 10.87 -should also work, but vdi_fill has NO notion of sparseness yet, so you 10.88 -are going to pump a block into the store for each block you read. 10.89 - 10.90 -vdi_fill will count up until it is done, and you should be ready to 10.91 -go. If you want to be anal, you can use vdi_validate to test the VDI 10.92 -against the original image. 10.93 - 10.94 -2. Create some extra VDIs 10.95 -------------------------- 10.96 - 10.97 -VDIs are actually a list of snapshots, and each snapshot is a full 10.98 -image of mappings. So, to preserve an immutable copy of a current 10.99 -VDI, do this: 10.100 - 10.101 -(a) Snapshot your new VDI. 10.102 - 10.103 - ./vdi_snap 0 10.104 - 10.105 -Snapshotting writes the current radix root to the VDI's snapshot log, 10.106 -and assigns it a new writable root. 10.107 - 10.108 -(b) look at the VDI's snapshot log. 10.109 - 10.110 - ./vdi_snap_list 0 10.111 - 10.112 - | 16 0 Thu Mar 3 19:27:48 2005 565111 31 10.113 - 10.114 -The first two columns constitute a snapshot id and represent the 10.115 -(block, offset) of the snapshot record. The Date tells you when the 10.116 -snapshot was made, and 31 is the radix root node of the snapshot. 10.117 - 10.118 -(c) Create a new VDI, based on that snapshot, and look at the list. 10.119 - 10.120 - ./vdi_create "FC3 - Copy 1" 16 0 10.121 - ./vdi_list 10.122 - 10.123 - | 0 My new FC3 VDI 10.124 - | 1 FC3 - Copy 1 10.125 - 10.126 -NOTE: If you have Graphviz installed on your system, you can use 10.127 -vdi_tree to generate a postscript of your current set of VDIs and 10.128 -snapshots. 10.129 - 10.130 - 10.131 -Create as many VDIs as you need for the VMs that you want to run. 10.132 - 10.133 -3. Boot some VMs: 10.134 ------------------ 10.135 - 10.136 -Parallax currently uses a hack in xend to pass the VDI id, you need to 10.137 -modify the disk line of the VM config that is going to mount it. 10.138 - 10.139 -(a) set up your vm config, by using the following disk line: 10.140 - 10.141 - disk = ['parallax:1,sda1,w,0' ] 10.142 - 10.143 -This example uses VDI 1 (from vdi_list above), presents it as sda1 10.144 -(writable), and uses dom 0 as the backend. If you were running the 10.145 -daemon (and tap driver) in some domain other than 0, you would change 10.146 -this last parameter. 10.147 - 10.148 -NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so that it knows what to do with "parallax:". 10.149 - 10.150 -(b) Run parallax in the backend domain. 10.151 - 10.152 - ./parallax 10.153 - 10.154 -(c) create your new domain. 10.155 - 10.156 - xm create ... 10.157 - 10.158 ---- 10.159 - 10.160 -That's pretty much all there is to it at the moment. Hope this is 10.161 -clear enough to get you going. Now, a few serious caveats that will 10.162 -be sorted out in the almost immediate future: 10.163 - 10.164 -WARNINGS: 10.165 ---------- 10.166 - 10.167 -1. There is NO locking in the VDI tools at the moment, so I'd avoid 10.168 -running them in parallel, or more importantly, running them while the 10.169 -daemon is running. 10.170 - 10.171 -2. I doubt that xend will be very happy about restarting if you have 10.172 -parallax-using domains. So if it dies while there are active parallax 10.173 -doms, you may need to reboot. 10.174 - 10.175 -3. I've turned off write-in-place. So at the moment, EVERY block 10.176 -write is a log append on the blockstore. I've been having some probs 10.177 -with the radix tree's marking of writable blocks after snapshots and 10.178 -will sort this out very soon. 10.179 - 10.180 -
11.1 --- a/tools/blktap/parallax/block-async.c Fri Jun 16 18:19:40 2006 +0100 11.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 11.3 @@ -1,393 +0,0 @@ 11.4 -/* block-async.c 11.5 - * 11.6 - * Asynchronous block wrappers for parallax. 11.7 - */ 11.8 - 11.9 - 11.10 -#include <stdio.h> 11.11 -#include <stdlib.h> 11.12 -#include <string.h> 11.13 -#include <pthread.h> 11.14 -#include "block-async.h" 11.15 -#include "blockstore.h" 11.16 -#include "vdi.h" 11.17 - 11.18 - 11.19 -#if 0 11.20 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 11.21 -#else 11.22 -#define DPRINTF(_f, _a...) ((void)0) 11.23 -#endif 11.24 - 11.25 -/* We have a queue of outstanding I/O requests implemented as a 11.26 - * circular producer-consumer ring with free-running buffers. 11.27 - * to allow reordering, this ring indirects to indexes in an 11.28 - * ring of io_structs. 11.29 - * 11.30 - * the block_* calls may either add an entry to this ring and return, 11.31 - * or satisfy the request immediately and call the callback directly. 11.32 - * None of the io calls in parallax should be nested enough to worry 11.33 - * about stack problems with this approach. 11.34 - */ 11.35 - 11.36 -struct read_args { 11.37 - uint64_t addr; 11.38 -}; 11.39 - 11.40 -struct write_args { 11.41 - uint64_t addr; 11.42 - char *block; 11.43 -}; 11.44 - 11.45 -struct alloc_args { 11.46 - char *block; 11.47 -}; 11.48 - 11.49 -struct pending_io_req { 11.50 - enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op; 11.51 - union { 11.52 - struct read_args r; 11.53 - struct write_args w; 11.54 - struct alloc_args a; 11.55 - } u; 11.56 - io_cb_t cb; 11.57 - void *param; 11.58 -}; 11.59 - 11.60 -void radix_lock_init(struct radix_lock *r) 11.61 -{ 11.62 - int i; 11.63 - 11.64 - pthread_mutex_init(&r->lock, NULL); 11.65 - for (i=0; i < 1024; i++) { 11.66 - r->lines[i] = 0; 11.67 - r->waiters[i] = NULL; 11.68 - r->state[i] = ANY; 11.69 - } 11.70 -} 11.71 - 11.72 -/* maximum outstanding I/O requests issued asynchronously */ 11.73 -/* must be a power of 2.*/ 11.74 -#define MAX_PENDING_IO 1024 11.75 - 11.76 -/* how many threads to concurrently issue I/O to the disk. */ 11.77 -#define IO_POOL_SIZE 10 11.78 - 11.79 -static struct pending_io_req pending_io_reqs[MAX_PENDING_IO]; 11.80 -static int pending_io_list[MAX_PENDING_IO]; 11.81 -static unsigned long io_prod = 0, io_cons = 0, io_free = 0; 11.82 -#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1)) 11.83 -#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs) 11.84 -#define PENDING_IO_ENT(_x) \ 11.85 - (&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]]) 11.86 -#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod) 11.87 -#define CAN_CONSUME_PENDING_IO (io_cons != io_prod) 11.88 -static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER; 11.89 -static pthread_cond_t pending_io_cond = PTHREAD_COND_INITIALIZER; 11.90 - 11.91 -static void init_pending_io(void) 11.92 -{ 11.93 - int i; 11.94 - 11.95 - for (i=0; i<MAX_PENDING_IO; i++) 11.96 - pending_io_list[i] = i; 11.97 - 11.98 -} 11.99 - 11.100 -void block_read(uint64_t addr, io_cb_t cb, void *param) 11.101 -{ 11.102 - struct pending_io_req *req; 11.103 - 11.104 - pthread_mutex_lock(&pending_io_lock); 11.105 - assert(CAN_PRODUCE_PENDING_IO); 11.106 - 11.107 - req = PENDING_IO_ENT(io_prod++); 11.108 - DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req); 11.109 - req->op = IO_READ; 11.110 - req->u.r.addr = addr; 11.111 - req->cb = cb; 11.112 - req->param = param; 11.113 - 11.114 - pthread_cond_signal(&pending_io_cond); 11.115 - pthread_mutex_unlock(&pending_io_lock); 11.116 -} 11.117 - 11.118 - 11.119 -void block_write(uint64_t addr, char *block, io_cb_t cb, void *param) 11.120 -{ 11.121 - struct pending_io_req *req; 11.122 - 11.123 - pthread_mutex_lock(&pending_io_lock); 11.124 - assert(CAN_PRODUCE_PENDING_IO); 11.125 - 11.126 - req = PENDING_IO_ENT(io_prod++); 11.127 - DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req); 11.128 - req->op = IO_WRITE; 11.129 - req->u.w.addr = addr; 11.130 - req->u.w.block = block; 11.131 - req->cb = cb; 11.132 - req->param = param; 11.133 - 11.134 - pthread_cond_signal(&pending_io_cond); 11.135 - pthread_mutex_unlock(&pending_io_lock); 11.136 -} 11.137 - 11.138 - 11.139 -void block_alloc(char *block, io_cb_t cb, void *param) 11.140 -{ 11.141 - struct pending_io_req *req; 11.142 - 11.143 - pthread_mutex_lock(&pending_io_lock); 11.144 - assert(CAN_PRODUCE_PENDING_IO); 11.145 - 11.146 - req = PENDING_IO_ENT(io_prod++); 11.147 - req->op = IO_ALLOC; 11.148 - req->u.a.block = block; 11.149 - req->cb = cb; 11.150 - req->param = param; 11.151 - 11.152 - pthread_cond_signal(&pending_io_cond); 11.153 - pthread_mutex_unlock(&pending_io_lock); 11.154 -} 11.155 - 11.156 -void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param) 11.157 -{ 11.158 - struct io_ret ret; 11.159 - pthread_mutex_lock(&r->lock); 11.160 - 11.161 - if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) { 11.162 - r->lines[row]++; 11.163 - r->state[row] = READ; 11.164 - DPRINTF("RLOCK : %3d (row: %d)\n", r->lines[row], row); 11.165 - pthread_mutex_unlock(&r->lock); 11.166 - ret.type = IO_INT_T; 11.167 - ret.u.i = 0; 11.168 - cb(ret, param); 11.169 - } else { 11.170 - struct radix_wait **rwc; 11.171 - struct radix_wait *rw = 11.172 - (struct radix_wait *) malloc (sizeof(struct radix_wait)); 11.173 - DPRINTF("RLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row); 11.174 - rw->type = RLOCK; 11.175 - rw->param = param; 11.176 - rw->cb = cb; 11.177 - rw->next = NULL; 11.178 - /* append to waiters list. */ 11.179 - rwc = &r->waiters[row]; 11.180 - while (*rwc != NULL) rwc = &(*rwc)->next; 11.181 - *rwc = rw; 11.182 - pthread_mutex_unlock(&r->lock); 11.183 - return; 11.184 - } 11.185 -} 11.186 - 11.187 - 11.188 -void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param) 11.189 -{ 11.190 - struct io_ret ret; 11.191 - pthread_mutex_lock(&r->lock); 11.192 - 11.193 - /* the second check here is redundant -- just here for debugging now. */ 11.194 - if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) { 11.195 - r->state[row] = STOP; 11.196 - r->lines[row] = -1; 11.197 - DPRINTF("WLOCK : %3d (row: %d)\n", r->lines[row], row); 11.198 - pthread_mutex_unlock(&r->lock); 11.199 - ret.type = IO_INT_T; 11.200 - ret.u.i = 0; 11.201 - cb(ret, param); 11.202 - } else { 11.203 - struct radix_wait **rwc; 11.204 - struct radix_wait *rw = 11.205 - (struct radix_wait *) malloc (sizeof(struct radix_wait)); 11.206 - DPRINTF("WLOCK : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row); 11.207 - rw->type = WLOCK; 11.208 - rw->param = param; 11.209 - rw->cb = cb; 11.210 - rw->next = NULL; 11.211 - /* append to waiters list. */ 11.212 - rwc = &r->waiters[row]; 11.213 - while (*rwc != NULL) rwc = &(*rwc)->next; 11.214 - *rwc = rw; 11.215 - pthread_mutex_unlock(&r->lock); 11.216 - return; 11.217 - } 11.218 - 11.219 -} 11.220 - 11.221 -/* called with radix_lock locked and lock count of zero. */ 11.222 -static void wake_waiters(struct radix_lock *r, int row) 11.223 -{ 11.224 - struct pending_io_req *req; 11.225 - struct radix_wait *rw; 11.226 - 11.227 - if (r->lines[row] != 0) return; 11.228 - if (r->waiters[row] == NULL) return; 11.229 - 11.230 - if (r->waiters[row]->type == WLOCK) { 11.231 - 11.232 - rw = r->waiters[row]; 11.233 - pthread_mutex_lock(&pending_io_lock); 11.234 - assert(CAN_PRODUCE_PENDING_IO); 11.235 - 11.236 - req = PENDING_IO_ENT(io_prod++); 11.237 - req->op = IO_WWAKE; 11.238 - req->cb = rw->cb; 11.239 - req->param = rw->param; 11.240 - r->lines[row] = -1; /* write lock the row. */ 11.241 - r->state[row] = STOP; 11.242 - r->waiters[row] = rw->next; 11.243 - free(rw); 11.244 - pthread_mutex_unlock(&pending_io_lock); 11.245 - 11.246 - } else /* RLOCK */ { 11.247 - 11.248 - while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) { 11.249 - rw = r->waiters[row]; 11.250 - pthread_mutex_lock(&pending_io_lock); 11.251 - assert(CAN_PRODUCE_PENDING_IO); 11.252 - 11.253 - req = PENDING_IO_ENT(io_prod++); 11.254 - req->op = IO_RWAKE; 11.255 - req->cb = rw->cb; 11.256 - req->param = rw->param; 11.257 - r->lines[row]++; /* read lock the row. */ 11.258 - r->state[row] = READ; 11.259 - r->waiters[row] = rw->next; 11.260 - free(rw); 11.261 - pthread_mutex_unlock(&pending_io_lock); 11.262 - } 11.263 - 11.264 - if (r->waiters[row] != NULL) /* There is a write queued still */ 11.265 - r->state[row] = STOP; 11.266 - } 11.267 - 11.268 - pthread_mutex_lock(&pending_io_lock); 11.269 - pthread_cond_signal(&pending_io_cond); 11.270 - pthread_mutex_unlock(&pending_io_lock); 11.271 -} 11.272 - 11.273 -void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param) 11.274 -{ 11.275 - struct io_ret ret; 11.276 - 11.277 - pthread_mutex_lock(&r->lock); 11.278 - assert(r->lines[row] > 0); /* try to catch misuse. */ 11.279 - r->lines[row]--; 11.280 - if (r->lines[row] == 0) { 11.281 - r->state[row] = ANY; 11.282 - wake_waiters(r, row); 11.283 - } 11.284 - pthread_mutex_unlock(&r->lock); 11.285 - cb(ret, param); 11.286 -} 11.287 - 11.288 -void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param) 11.289 -{ 11.290 - struct io_ret ret; 11.291 - 11.292 - pthread_mutex_lock(&r->lock); 11.293 - assert(r->lines[row] == -1); /* try to catch misuse. */ 11.294 - r->lines[row] = 0; 11.295 - r->state[row] = ANY; 11.296 - wake_waiters(r, row); 11.297 - pthread_mutex_unlock(&r->lock); 11.298 - cb(ret, param); 11.299 -} 11.300 - 11.301 -/* consumer calls */ 11.302 -static void do_next_io_req(struct pending_io_req *req) 11.303 -{ 11.304 - struct io_ret ret; 11.305 - void *param; 11.306 - 11.307 - switch (req->op) { 11.308 - case IO_READ: 11.309 - ret.type = IO_BLOCK_T; 11.310 - ret.u.b = readblock(req->u.r.addr); 11.311 - break; 11.312 - case IO_WRITE: 11.313 - ret.type = IO_INT_T; 11.314 - ret.u.i = writeblock(req->u.w.addr, req->u.w.block); 11.315 - DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr); 11.316 - break; 11.317 - case IO_ALLOC: 11.318 - ret.type = IO_ADDR_T; 11.319 - ret.u.a = allocblock(req->u.a.block); 11.320 - break; 11.321 - case IO_RWAKE: 11.322 - DPRINTF("WAKE DEFERRED RLOCK!\n"); 11.323 - ret.type = IO_INT_T; 11.324 - ret.u.i = 0; 11.325 - break; 11.326 - case IO_WWAKE: 11.327 - DPRINTF("WAKE DEFERRED WLOCK!\n"); 11.328 - ret.type = IO_INT_T; 11.329 - ret.u.i = 0; 11.330 - break; 11.331 - default: 11.332 - DPRINTF("Unknown IO operation on pending list!\n"); 11.333 - return; 11.334 - } 11.335 - 11.336 - param = req->param; 11.337 - pthread_mutex_lock(&pending_io_lock); 11.338 - pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req); 11.339 - pthread_mutex_unlock(&pending_io_lock); 11.340 - 11.341 - assert(req->cb != NULL); 11.342 - req->cb(ret, param); 11.343 - 11.344 -} 11.345 - 11.346 -void *io_thread(void *param) 11.347 -{ 11.348 - int tid; 11.349 - struct pending_io_req *req; 11.350 - 11.351 - /* Set this thread's tid. */ 11.352 - tid = *(int *)param; 11.353 - free(param); 11.354 - 11.355 -start: 11.356 - pthread_mutex_lock(&pending_io_lock); 11.357 - while (io_prod == io_cons) { 11.358 - pthread_cond_wait(&pending_io_cond, &pending_io_lock); 11.359 - } 11.360 - 11.361 - if (io_prod == io_cons) { 11.362 - /* unnecessary wakeup. */ 11.363 - pthread_mutex_unlock(&pending_io_lock); 11.364 - goto start; 11.365 - } 11.366 - 11.367 - req = PENDING_IO_ENT(io_cons++); 11.368 - pthread_mutex_unlock(&pending_io_lock); 11.369 - 11.370 - do_next_io_req(req); 11.371 - 11.372 - goto start; 11.373 - 11.374 -} 11.375 - 11.376 -static pthread_t io_pool[IO_POOL_SIZE]; 11.377 -void start_io_threads(void) 11.378 - 11.379 -{ 11.380 - int i, tid=0; 11.381 - 11.382 - for (i=0; i < IO_POOL_SIZE; i++) { 11.383 - int ret, *t; 11.384 - t = (int *)malloc(sizeof(int)); 11.385 - *t = tid++; 11.386 - ret = pthread_create(&io_pool[i], NULL, io_thread, t); 11.387 - if (ret != 0) printf("Error starting thread %d\n", i); 11.388 - } 11.389 - 11.390 -} 11.391 - 11.392 -void init_block_async(void) 11.393 -{ 11.394 - init_pending_io(); 11.395 - start_io_threads(); 11.396 -}
12.1 --- a/tools/blktap/parallax/block-async.h Fri Jun 16 18:19:40 2006 +0100 12.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 12.3 @@ -1,69 +0,0 @@ 12.4 -/* block-async.h 12.5 - * 12.6 - * Asynchronous block wrappers for parallax. 12.7 - */ 12.8 - 12.9 -#ifndef _BLOCKASYNC_H_ 12.10 -#define _BLOCKASYNC_H_ 12.11 - 12.12 -#include <assert.h> 12.13 -#include <xenctrl.h> 12.14 -#include "vdi.h" 12.15 - 12.16 -struct io_ret 12.17 -{ 12.18 - enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type; 12.19 - union { 12.20 - uint64_t a; 12.21 - char *b; 12.22 - int i; 12.23 - } u; 12.24 -}; 12.25 - 12.26 -typedef void (*io_cb_t)(struct io_ret r, void *param); 12.27 - 12.28 -/* per-vdi lock structures to make sure requests run in a safe order. */ 12.29 -struct radix_wait { 12.30 - enum {RLOCK, WLOCK} type; 12.31 - io_cb_t cb; 12.32 - void *param; 12.33 - struct radix_wait *next; 12.34 -}; 12.35 - 12.36 -struct radix_lock { 12.37 - pthread_mutex_t lock; 12.38 - int lines[1024]; 12.39 - struct radix_wait *waiters[1024]; 12.40 - enum {ANY, READ, STOP} state[1024]; 12.41 -}; 12.42 -void radix_lock_init(struct radix_lock *r); 12.43 - 12.44 -void block_read(uint64_t addr, io_cb_t cb, void *param); 12.45 -void block_write(uint64_t addr, char *block, io_cb_t cb, void *param); 12.46 -void block_alloc(char *block, io_cb_t cb, void *param); 12.47 -void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param); 12.48 -void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param); 12.49 -void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param); 12.50 -void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param); 12.51 -void init_block_async(void); 12.52 - 12.53 -static inline uint64_t IO_ADDR(struct io_ret r) 12.54 -{ 12.55 - assert(r.type == IO_ADDR_T); 12.56 - return r.u.a; 12.57 -} 12.58 - 12.59 -static inline char *IO_BLOCK(struct io_ret r) 12.60 -{ 12.61 - assert(r.type == IO_BLOCK_T); 12.62 - return r.u.b; 12.63 -} 12.64 - 12.65 -static inline int IO_INT(struct io_ret r) 12.66 -{ 12.67 - assert(r.type == IO_INT_T); 12.68 - return r.u.i; 12.69 -} 12.70 - 12.71 - 12.72 -#endif //_BLOCKASYNC_H_
13.1 --- a/tools/blktap/parallax/blockstore.c Fri Jun 16 18:19:40 2006 +0100 13.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 13.3 @@ -1,1348 +0,0 @@ 13.4 -/************************************************************************** 13.5 - * 13.6 - * blockstore.c 13.7 - * 13.8 - * Simple block store interface 13.9 - * 13.10 - */ 13.11 - 13.12 -#include <fcntl.h> 13.13 -#include <unistd.h> 13.14 -#include <stdio.h> 13.15 -#include <stdlib.h> 13.16 -#include <string.h> 13.17 -#include <sys/types.h> 13.18 -#include <sys/stat.h> 13.19 -#include <sys/time.h> 13.20 -#include <stdarg.h> 13.21 -#include "blockstore.h" 13.22 -#include <pthread.h> 13.23 - 13.24 -//#define BLOCKSTORE_REMOTE 13.25 -//#define BSDEBUG 13.26 - 13.27 -#define RETRY_TIMEOUT 1000000 /* microseconds */ 13.28 - 13.29 -/***************************************************************************** 13.30 - * Debugging 13.31 - */ 13.32 -#ifdef BSDEBUG 13.33 -void DB(char *format, ...) 13.34 -{ 13.35 - va_list args; 13.36 - fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key)); 13.37 - va_start(args, format); 13.38 - vfprintf(stderr, format, args); 13.39 - va_end(args); 13.40 -} 13.41 -#else 13.42 -#define DB(format, ...) (void)0 13.43 -#endif 13.44 - 13.45 -#ifdef BLOCKSTORE_REMOTE 13.46 - 13.47 -#include <sys/socket.h> 13.48 -#include <sys/ioctl.h> 13.49 -#include <netinet/in.h> 13.50 -#include <netdb.h> 13.51 - 13.52 -/***************************************************************************** 13.53 - * Network state * 13.54 - *****************************************************************************/ 13.55 - 13.56 -/* The individual disk servers we talks to. These will be referenced by 13.57 - * an integer index into bsservers[]. 13.58 - */ 13.59 -bsserver_t bsservers[MAX_SERVERS]; 13.60 - 13.61 -/* The cluster map. This is indexed by an integer cluster number. 13.62 - */ 13.63 -bscluster_t bsclusters[MAX_CLUSTERS]; 13.64 - 13.65 -/* Local socket. 13.66 - */ 13.67 -struct sockaddr_in sin_local; 13.68 -int bssock = 0; 13.69 - 13.70 -/***************************************************************************** 13.71 - * Notification * 13.72 - *****************************************************************************/ 13.73 - 13.74 -typedef struct pool_thread_t_struct { 13.75 - pthread_mutex_t ptmutex; 13.76 - pthread_cond_t ptcv; 13.77 - int newdata; 13.78 -} pool_thread_t; 13.79 - 13.80 -pool_thread_t pool_thread[READ_POOL_SIZE+1]; 13.81 - 13.82 -#define RECV_NOTIFY(tid) { \ 13.83 - pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \ 13.84 - pool_thread[tid].newdata = 1; \ 13.85 - DB("CV Waking %u", tid); \ 13.86 - pthread_cond_signal(&(pool_thread[tid].ptcv)); \ 13.87 - pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); } 13.88 -#define RECV_AWAIT(tid) { \ 13.89 - pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \ 13.90 - if (pool_thread[tid].newdata) { \ 13.91 - pool_thread[tid].newdata = 0; \ 13.92 - DB("CV Woken %u", tid); \ 13.93 - } \ 13.94 - else { \ 13.95 - DB("CV Waiting %u", tid); \ 13.96 - pthread_cond_wait(&(pool_thread[tid].ptcv), \ 13.97 - &(pool_thread[tid].ptmutex)); \ 13.98 - } \ 13.99 - pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); } 13.100 - 13.101 -/***************************************************************************** 13.102 - * Message queue management * 13.103 - *****************************************************************************/ 13.104 - 13.105 -/* Protects the queue manipulation critcal regions. 13.106 - */ 13.107 -pthread_mutex_t ptmutex_queue; 13.108 -#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue) 13.109 -#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue) 13.110 - 13.111 -pthread_mutex_t ptmutex_recv; 13.112 -#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv) 13.113 -#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv) 13.114 - 13.115 -/* A message queue entry. We allocate one of these for every request we send. 13.116 - * Asynchronous reply reception also used one of these. 13.117 - */ 13.118 -typedef struct bsq_t_struct { 13.119 - struct bsq_t_struct *prev; 13.120 - struct bsq_t_struct *next; 13.121 - int status; 13.122 - int server; 13.123 - int length; 13.124 - struct msghdr msghdr; 13.125 - struct iovec iov[2]; 13.126 - int tid; 13.127 - struct timeval tv_sent; 13.128 - bshdr_t message; 13.129 - void *block; 13.130 -} bsq_t; 13.131 - 13.132 -#define BSQ_STATUS_MATCHED 1 13.133 - 13.134 -pthread_mutex_t ptmutex_luid; 13.135 -#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid) 13.136 -#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid) 13.137 - 13.138 -static uint64_t luid_cnt = 0x1000ULL; 13.139 -uint64_t new_luid(void) { 13.140 - uint64_t luid; 13.141 - ENTER_LUID_CR; 13.142 - luid = luid_cnt++; 13.143 - LEAVE_LUID_CR; 13.144 - return luid; 13.145 -} 13.146 - 13.147 -/* Queue of outstanding requests. 13.148 - */ 13.149 -bsq_t *bs_head = NULL; 13.150 -bsq_t *bs_tail = NULL; 13.151 -int bs_qlen = 0; 13.152 - 13.153 -/* 13.154 - */ 13.155 -void queuedebug(char *msg) { 13.156 - bsq_t *q; 13.157 - ENTER_QUEUE_CR; 13.158 - fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen); 13.159 - for (q = bs_head; q; q = q->next) { 13.160 - fprintf(stderr, " luid=%016llx server=%u\n", 13.161 - q->message.luid, q->server); 13.162 - } 13.163 - LEAVE_QUEUE_CR; 13.164 -} 13.165 - 13.166 -int enqueue(bsq_t *qe) { 13.167 - ENTER_QUEUE_CR; 13.168 - qe->next = NULL; 13.169 - qe->prev = bs_tail; 13.170 - if (!bs_head) 13.171 - bs_head = qe; 13.172 - else 13.173 - bs_tail->next = qe; 13.174 - bs_tail = qe; 13.175 - bs_qlen++; 13.176 - LEAVE_QUEUE_CR; 13.177 -#ifdef BSDEBUG 13.178 - queuedebug("enqueue"); 13.179 -#endif 13.180 - return 0; 13.181 -} 13.182 - 13.183 -int dequeue(bsq_t *qe) { 13.184 - bsq_t *q; 13.185 - ENTER_QUEUE_CR; 13.186 - for (q = bs_head; q; q = q->next) { 13.187 - if (q == qe) { 13.188 - if (q->prev) 13.189 - q->prev->next = q->next; 13.190 - else 13.191 - bs_head = q->next; 13.192 - if (q->next) 13.193 - q->next->prev = q->prev; 13.194 - else 13.195 - bs_tail = q->prev; 13.196 - bs_qlen--; 13.197 - goto found; 13.198 - } 13.199 - } 13.200 - 13.201 - LEAVE_QUEUE_CR; 13.202 -#ifdef BSDEBUG 13.203 - queuedebug("dequeue not found"); 13.204 -#endif 13.205 - return 0; 13.206 - 13.207 - found: 13.208 - LEAVE_QUEUE_CR; 13.209 -#ifdef BSDEBUG 13.210 - queuedebug("dequeue not found"); 13.211 -#endif 13.212 - return 1; 13.213 -} 13.214 - 13.215 -bsq_t *queuesearch(bsq_t *qe) { 13.216 - bsq_t *q; 13.217 - ENTER_QUEUE_CR; 13.218 - for (q = bs_head; q; q = q->next) { 13.219 - if ((qe->server == q->server) && 13.220 - (qe->message.operation == q->message.operation) && 13.221 - (qe->message.luid == q->message.luid)) { 13.222 - 13.223 - if ((q->message.operation == BSOP_READBLOCK) && 13.224 - ((q->message.flags & BSOP_FLAG_ERROR) == 0)) { 13.225 - q->block = qe->block; 13.226 - qe->block = NULL; 13.227 - } 13.228 - q->length = qe->length; 13.229 - q->message.flags = qe->message.flags; 13.230 - q->message.id = qe->message.id; 13.231 - q->status |= BSQ_STATUS_MATCHED; 13.232 - 13.233 - if (q->prev) 13.234 - q->prev->next = q->next; 13.235 - else 13.236 - bs_head = q->next; 13.237 - if (q->next) 13.238 - q->next->prev = q->prev; 13.239 - else 13.240 - bs_tail = q->prev; 13.241 - q->next = NULL; 13.242 - q->prev = NULL; 13.243 - bs_qlen--; 13.244 - goto found; 13.245 - } 13.246 - } 13.247 - 13.248 - LEAVE_QUEUE_CR; 13.249 -#ifdef BSDEBUG 13.250 - queuedebug("queuesearch not found"); 13.251 -#endif 13.252 - return NULL; 13.253 - 13.254 - found: 13.255 - LEAVE_QUEUE_CR; 13.256 -#ifdef BSDEBUG 13.257 - queuedebug("queuesearch found"); 13.258 -#endif 13.259 - return q; 13.260 -} 13.261 - 13.262 -/***************************************************************************** 13.263 - * Network communication * 13.264 - *****************************************************************************/ 13.265 - 13.266 -int send_message(bsq_t *qe) { 13.267 - int rc; 13.268 - 13.269 - qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin); 13.270 - qe->msghdr.msg_namelen = sizeof(struct sockaddr_in); 13.271 - qe->msghdr.msg_iov = qe->iov; 13.272 - if (qe->block) 13.273 - qe->msghdr.msg_iovlen = 2; 13.274 - else 13.275 - qe->msghdr.msg_iovlen = 1; 13.276 - qe->msghdr.msg_control = NULL; 13.277 - qe->msghdr.msg_controllen = 0; 13.278 - qe->msghdr.msg_flags = 0; 13.279 - 13.280 - qe->iov[0].iov_base = (void *)&(qe->message); 13.281 - qe->iov[0].iov_len = MSGBUFSIZE_ID; 13.282 - 13.283 - if (qe->block) { 13.284 - qe->iov[1].iov_base = qe->block; 13.285 - qe->iov[1].iov_len = BLOCK_SIZE; 13.286 - } 13.287 - 13.288 - qe->message.luid = new_luid(); 13.289 - 13.290 - qe->status = 0; 13.291 - qe->tid = (int)pthread_getspecific(tid_key); 13.292 - if (enqueue(qe) < 0) { 13.293 - fprintf(stderr, "Error enqueuing request.\n"); 13.294 - return -1; 13.295 - } 13.296 - 13.297 - gettimeofday(&(qe->tv_sent), NULL); 13.298 - DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid); 13.299 - rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT); 13.300 - //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0, 13.301 - // (struct sockaddr *)&(bsservers[qe->server].sin), 13.302 - // sizeof(struct sockaddr_in)); 13.303 - if (rc < 0) 13.304 - return rc; 13.305 - 13.306 - return rc; 13.307 -} 13.308 - 13.309 -int recv_message(bsq_t *qe) { 13.310 - struct sockaddr_in from; 13.311 - //int flen = sizeof(from); 13.312 - int rc; 13.313 - 13.314 - qe->msghdr.msg_name = &from; 13.315 - qe->msghdr.msg_namelen = sizeof(struct sockaddr_in); 13.316 - qe->msghdr.msg_iov = qe->iov; 13.317 - if (qe->block) 13.318 - qe->msghdr.msg_iovlen = 2; 13.319 - else 13.320 - qe->msghdr.msg_iovlen = 1; 13.321 - qe->msghdr.msg_control = NULL; 13.322 - qe->msghdr.msg_controllen = 0; 13.323 - qe->msghdr.msg_flags = 0; 13.324 - 13.325 - qe->iov[0].iov_base = (void *)&(qe->message); 13.326 - qe->iov[0].iov_len = MSGBUFSIZE_ID; 13.327 - if (qe->block) { 13.328 - qe->iov[1].iov_base = qe->block; 13.329 - qe->iov[1].iov_len = BLOCK_SIZE; 13.330 - } 13.331 - 13.332 - rc = recvmsg(bssock, &(qe->msghdr), 0); 13.333 - 13.334 - //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0, 13.335 - // (struct sockaddr *)&from, &flen); 13.336 - return rc; 13.337 -} 13.338 - 13.339 -int get_server_number(struct sockaddr_in *sin) { 13.340 - int i; 13.341 - 13.342 -#ifdef BSDEBUG2 13.343 - fprintf(stderr, 13.344 - "get_server_number(%u.%u.%u.%u/%u)\n", 13.345 - (unsigned int)sin->sin_addr.s_addr & 0xff, 13.346 - ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff, 13.347 - ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff, 13.348 - ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff, 13.349 - (unsigned int)sin->sin_port); 13.350 -#endif 13.351 - 13.352 - for (i = 0; i < MAX_SERVERS; i++) { 13.353 - if (bsservers[i].hostname) { 13.354 -#ifdef BSDEBUG2 13.355 - fprintf(stderr, 13.356 - "get_server_number check %u.%u.%u.%u/%u\n", 13.357 - (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff, 13.358 - ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff, 13.359 - ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 16)&0xff, 13.360 - ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 24)&0xff, 13.361 - (unsigned int)bsservers[i].sin.sin_port); 13.362 -#endif 13.363 - if ((sin->sin_family == bsservers[i].sin.sin_family) && 13.364 - (sin->sin_port == bsservers[i].sin.sin_port) && 13.365 - (memcmp((void *)&(sin->sin_addr), 13.366 - (void *)&(bsservers[i].sin.sin_addr), 13.367 - sizeof(struct in_addr)) == 0)) { 13.368 - return i; 13.369 - } 13.370 - } 13.371 - } 13.372 - 13.373 - return -1; 13.374 -} 13.375 - 13.376 -void *rx_buffer = NULL; 13.377 -bsq_t rx_qe; 13.378 -bsq_t *recv_any(void) { 13.379 - struct sockaddr_in from; 13.380 - int rc; 13.381 - 13.382 - DB("ENTER recv_any\n"); 13.383 - 13.384 - rx_qe.msghdr.msg_name = &from; 13.385 - rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in); 13.386 - rx_qe.msghdr.msg_iov = rx_qe.iov; 13.387 - if (!rx_buffer) { 13.388 - rx_buffer = malloc(BLOCK_SIZE); 13.389 - if (!rx_buffer) { 13.390 - perror("recv_any malloc"); 13.391 - return NULL; 13.392 - } 13.393 - } 13.394 - rx_qe.block = rx_buffer; 13.395 - rx_buffer = NULL; 13.396 - rx_qe.msghdr.msg_iovlen = 2; 13.397 - rx_qe.msghdr.msg_control = NULL; 13.398 - rx_qe.msghdr.msg_controllen = 0; 13.399 - rx_qe.msghdr.msg_flags = 0; 13.400 - 13.401 - rx_qe.iov[0].iov_base = (void *)&(rx_qe.message); 13.402 - rx_qe.iov[0].iov_len = MSGBUFSIZE_ID; 13.403 - rx_qe.iov[1].iov_base = rx_qe.block; 13.404 - rx_qe.iov[1].iov_len = BLOCK_SIZE; 13.405 - 13.406 - rc = recvmsg(bssock, &(rx_qe.msghdr), 0); 13.407 - if (rc < 0) { 13.408 - perror("recv_any"); 13.409 - return NULL; 13.410 - } 13.411 - 13.412 - rx_qe.length = rc; 13.413 - rx_qe.server = get_server_number(&from); 13.414 - 13.415 - DB("recv_any from %d luid=%016llx len=%u\n", 13.416 - rx_qe.server, rx_qe.message.luid, rx_qe.length); 13.417 - 13.418 - return &rx_qe; 13.419 -} 13.420 - 13.421 -void recv_recycle_buffer(bsq_t *q) { 13.422 - if (q->block) { 13.423 - rx_buffer = q->block; 13.424 - q->block = NULL; 13.425 - } 13.426 -} 13.427 - 13.428 -// cycle through reading any incoming, searching for a match in the 13.429 -// queue, until we have all we need. 13.430 -int wait_recv(bsq_t **reqs, int numreqs) { 13.431 - bsq_t *q, *m; 13.432 - unsigned int x, i; 13.433 - int tid = (int)pthread_getspecific(tid_key); 13.434 - 13.435 - DB("ENTER wait_recv %u\n", numreqs); 13.436 - 13.437 - checkmatch: 13.438 - x = 0xffffffff; 13.439 - for (i = 0; i < numreqs; i++) { 13.440 - x &= reqs[i]->status; 13.441 - } 13.442 - if ((x & BSQ_STATUS_MATCHED)) { 13.443 - DB("LEAVE wait_recv\n"); 13.444 - return numreqs; 13.445 - } 13.446 - 13.447 - RECV_AWAIT(tid); 13.448 - 13.449 - /* 13.450 - rxagain: 13.451 - ENTER_RECV_CR; 13.452 - q = recv_any(); 13.453 - LEAVE_RECV_CR; 13.454 - if (!q) 13.455 - return -1; 13.456 - 13.457 - m = queuesearch(q); 13.458 - recv_recycle_buffer(q); 13.459 - if (!m) { 13.460 - fprintf(stderr, "Unmatched RX\n"); 13.461 - goto rxagain; 13.462 - } 13.463 - */ 13.464 - 13.465 - goto checkmatch; 13.466 - 13.467 -} 13.468 - 13.469 -/* retry 13.470 - */ 13.471 -static int retry_count = 0; 13.472 -int retry(bsq_t *qe) 13.473 -{ 13.474 - int rc; 13.475 - gettimeofday(&(qe->tv_sent), NULL); 13.476 - DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid); 13.477 - retry_count++; 13.478 - rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT); 13.479 - if (rc < 0) 13.480 - return rc; 13.481 - return 0; 13.482 -} 13.483 - 13.484 -/* queue runner 13.485 - */ 13.486 -void *queue_runner(void *arg) 13.487 -{ 13.488 - for (;;) { 13.489 - struct timeval now; 13.490 - long long nowus, sus; 13.491 - bsq_t *q; 13.492 - int r; 13.493 - 13.494 - sleep(1); 13.495 - 13.496 - gettimeofday(&now, NULL); 13.497 - nowus = now.tv_usec + now.tv_sec * 1000000; 13.498 - ENTER_QUEUE_CR; 13.499 - r = retry_count; 13.500 - for (q = bs_head; q; q = q->next) { 13.501 - sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000; 13.502 - if ((nowus - sus) > RETRY_TIMEOUT) { 13.503 - if (retry(q) < 0) { 13.504 - fprintf(stderr, "Error on sendmsg retry.\n"); 13.505 - } 13.506 - } 13.507 - } 13.508 - if (r != retry_count) { 13.509 - fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count); 13.510 - } 13.511 - LEAVE_QUEUE_CR; 13.512 - } 13.513 -} 13.514 - 13.515 -/* receive loop 13.516 - */ 13.517 -void *receive_loop(void *arg) 13.518 -{ 13.519 - bsq_t *q, *m; 13.520 - 13.521 - for(;;) { 13.522 - q = recv_any(); 13.523 - if (!q) { 13.524 - fprintf(stderr, "recv_any error\n"); 13.525 - } 13.526 - else { 13.527 - m = queuesearch(q); 13.528 - recv_recycle_buffer(q); 13.529 - if (!m) { 13.530 - fprintf(stderr, "Unmatched RX\n"); 13.531 - } 13.532 - else { 13.533 - DB("RX MATCH"); 13.534 - RECV_NOTIFY(m->tid); 13.535 - } 13.536 - } 13.537 - } 13.538 -} 13.539 -pthread_t pthread_recv; 13.540 - 13.541 -/***************************************************************************** 13.542 - * Reading * 13.543 - *****************************************************************************/ 13.544 - 13.545 -void *readblock_indiv(int server, uint64_t id) { 13.546 - void *block; 13.547 - bsq_t *qe; 13.548 - int len, rc; 13.549 - 13.550 - qe = (bsq_t *)malloc(sizeof(bsq_t)); 13.551 - if (!qe) { 13.552 - perror("readblock qe malloc"); 13.553 - return NULL; 13.554 - } 13.555 - qe->block = NULL; 13.556 - 13.557 - /* 13.558 - qe->block = malloc(BLOCK_SIZE); 13.559 - if (!qe->block) { 13.560 - perror("readblock qe malloc"); 13.561 - free((void *)qe); 13.562 - return NULL; 13.563 - } 13.564 - */ 13.565 - 13.566 - qe->server = server; 13.567 - 13.568 - qe->message.operation = BSOP_READBLOCK; 13.569 - qe->message.flags = 0; 13.570 - qe->message.id = id; 13.571 - qe->length = MSGBUFSIZE_ID; 13.572 - 13.573 - if (send_message(qe) < 0) { 13.574 - perror("readblock sendto"); 13.575 - goto err; 13.576 - } 13.577 - 13.578 - /*len = recv_message(qe); 13.579 - if (len < 0) { 13.580 - perror("readblock recv"); 13.581 - goto err; 13.582 - }*/ 13.583 - 13.584 - rc = wait_recv(&qe, 1); 13.585 - if (rc < 0) { 13.586 - perror("readblock recv"); 13.587 - goto err; 13.588 - } 13.589 - 13.590 - if ((qe->message.flags & BSOP_FLAG_ERROR)) { 13.591 - fprintf(stderr, "readblock server error\n"); 13.592 - goto err; 13.593 - } 13.594 - if (qe->length < MSGBUFSIZE_BLOCK) { 13.595 - fprintf(stderr, "readblock recv short (%u)\n", len); 13.596 - goto err; 13.597 - } 13.598 - /* if ((block = malloc(BLOCK_SIZE)) == NULL) { 13.599 - perror("readblock malloc"); 13.600 - goto err; 13.601 - } 13.602 - memcpy(block, qe->message.block, BLOCK_SIZE); 13.603 - */ 13.604 - block = qe->block; 13.605 - 13.606 - free((void *)qe); 13.607 - return block; 13.608 - 13.609 - err: 13.610 - free(qe->block); 13.611 - free((void *)qe); 13.612 - return NULL; 13.613 -} 13.614 - 13.615 -/** 13.616 - * readblock: read a block from disk 13.617 - * @id: block id to read 13.618 - * 13.619 - * @return: pointer to block, NULL on error 13.620 - */ 13.621 -void *readblock(uint64_t id) { 13.622 - int map = (int)BSID_MAP(id); 13.623 - uint64_t xid; 13.624 - static int i = CLUSTER_MAX_REPLICAS - 1; 13.625 - void *block = NULL; 13.626 - 13.627 - /* special case for the "superblock" just use the first block on the 13.628 - * first replica. (extend to blocks < 6 for vdi bug) 13.629 - */ 13.630 - if (id < 6) { 13.631 - block = readblock_indiv(bsclusters[map].servers[0], id); 13.632 - goto out; 13.633 - } 13.634 - 13.635 - i++; 13.636 - if (i >= CLUSTER_MAX_REPLICAS) 13.637 - i = 0; 13.638 - switch (i) { 13.639 - case 0: 13.640 - xid = BSID_REPLICA0(id); 13.641 - break; 13.642 - case 1: 13.643 - xid = BSID_REPLICA1(id); 13.644 - break; 13.645 - case 2: 13.646 - xid = BSID_REPLICA2(id); 13.647 - break; 13.648 - } 13.649 - 13.650 - block = readblock_indiv(bsclusters[map].servers[i], xid); 13.651 - 13.652 - out: 13.653 -#ifdef BSDEBUG 13.654 - if (block) 13.655 - fprintf(stderr, "READ: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n", 13.656 - id, 13.657 - (unsigned int)((unsigned char *)block)[0], 13.658 - (unsigned int)((unsigned char *)block)[1], 13.659 - (unsigned int)((unsigned char *)block)[2], 13.660 - (unsigned int)((unsigned char *)block)[3], 13.661 - (unsigned int)((unsigned char *)block)[4], 13.662 - (unsigned int)((unsigned char *)block)[5], 13.663 - (unsigned int)((unsigned char *)block)[6], 13.664 - (unsigned int)((unsigned char *)block)[7]); 13.665 - else 13.666 - fprintf(stderr, "READ: %016llx NULL\n", id); 13.667 -#endif 13.668 - return block; 13.669 -} 13.670 - 13.671 -/***************************************************************************** 13.672 - * Writing * 13.673 - *****************************************************************************/ 13.674 - 13.675 -bsq_t *writeblock_indiv(int server, uint64_t id, void *block) { 13.676 - 13.677 - bsq_t *qe; 13.678 - int len; 13.679 - 13.680 - qe = (bsq_t *)malloc(sizeof(bsq_t)); 13.681 - if (!qe) { 13.682 - perror("writeblock qe malloc"); 13.683 - goto err; 13.684 - } 13.685 - qe->server = server; 13.686 - 13.687 - qe->message.operation = BSOP_WRITEBLOCK; 13.688 - qe->message.flags = 0; 13.689 - qe->message.id = id; 13.690 - //memcpy(qe->message.block, block, BLOCK_SIZE); 13.691 - qe->block = block; 13.692 - qe->length = MSGBUFSIZE_BLOCK; 13.693 - 13.694 - if (send_message(qe) < 0) { 13.695 - perror("writeblock sendto"); 13.696 - goto err; 13.697 - } 13.698 - 13.699 - return qe; 13.700 - 13.701 - err: 13.702 - free((void *)qe); 13.703 - return NULL; 13.704 -} 13.705 - 13.706 - 13.707 -/** 13.708 - * writeblock: write an existing block to disk 13.709 - * @id: block id 13.710 - * @block: pointer to block 13.711 - * 13.712 - * @return: zero on success, -1 on failure 13.713 - */ 13.714 -int writeblock(uint64_t id, void *block) { 13.715 - 13.716 - int map = (int)BSID_MAP(id); 13.717 - int rep0 = bsclusters[map].servers[0]; 13.718 - int rep1 = bsclusters[map].servers[1]; 13.719 - int rep2 = bsclusters[map].servers[2]; 13.720 - bsq_t *reqs[3]; 13.721 - int rc; 13.722 - 13.723 - reqs[0] = reqs[1] = reqs[2] = NULL; 13.724 - 13.725 -#ifdef BSDEBUG 13.726 - fprintf(stderr, 13.727 - "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n", 13.728 - id, 13.729 - (unsigned int)((unsigned char *)block)[0], 13.730 - (unsigned int)((unsigned char *)block)[1], 13.731 - (unsigned int)((unsigned char *)block)[2], 13.732 - (unsigned int)((unsigned char *)block)[3], 13.733 - (unsigned int)((unsigned char *)block)[4], 13.734 - (unsigned int)((unsigned char *)block)[5], 13.735 - (unsigned int)((unsigned char *)block)[6], 13.736 - (unsigned int)((unsigned char *)block)[7]); 13.737 -#endif 13.738 - 13.739 - /* special case for the "superblock" just use the first block on the 13.740 - * first replica. (extend to blocks < 6 for vdi bug) 13.741 - */ 13.742 - if (id < 6) { 13.743 - reqs[0] = writeblock_indiv(rep0, id, block); 13.744 - if (!reqs[0]) 13.745 - return -1; 13.746 - rc = wait_recv(reqs, 1); 13.747 - return rc; 13.748 - } 13.749 - 13.750 - reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block); 13.751 - if (!reqs[0]) 13.752 - goto err; 13.753 - reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block); 13.754 - if (!reqs[1]) 13.755 - goto err; 13.756 - reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block); 13.757 - if (!reqs[2]) 13.758 - goto err; 13.759 - 13.760 - rc = wait_recv(reqs, 3); 13.761 - if (rc < 0) { 13.762 - perror("writeblock recv"); 13.763 - goto err; 13.764 - } 13.765 - if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) { 13.766 - fprintf(stderr, "writeblock server0 error\n"); 13.767 - goto err; 13.768 - } 13.769 - if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) { 13.770 - fprintf(stderr, "writeblock server1 error\n"); 13.771 - goto err; 13.772 - } 13.773 - if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) { 13.774 - fprintf(stderr, "writeblock server2 error\n"); 13.775 - goto err; 13.776 - } 13.777 - 13.778 - 13.779 - free((void *)reqs[0]); 13.780 - free((void *)reqs[1]); 13.781 - free((void *)reqs[2]); 13.782 - return 0; 13.783 - 13.784 - err: 13.785 - if (reqs[0]) { 13.786 - dequeue(reqs[0]); 13.787 - free((void *)reqs[0]); 13.788 - } 13.789 - if (reqs[1]) { 13.790 - dequeue(reqs[1]); 13.791 - free((void *)reqs[1]); 13.792 - } 13.793 - if (reqs[2]) { 13.794 - dequeue(reqs[2]); 13.795 - free((void *)reqs[2]); 13.796 - } 13.797 - return -1; 13.798 -} 13.799 - 13.800 -/***************************************************************************** 13.801 - * Allocation * 13.802 - *****************************************************************************/ 13.803 - 13.804 -/** 13.805 - * allocblock: write a new block to disk 13.806 - * @block: pointer to block 13.807 - * 13.808 - * @return: new id of block on disk 13.809 - */ 13.810 -uint64_t allocblock(void *block) { 13.811 - return allocblock_hint(block, 0); 13.812 -} 13.813 - 13.814 -bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) { 13.815 - bsq_t *qe; 13.816 - int len; 13.817 - 13.818 - qe = (bsq_t *)malloc(sizeof(bsq_t)); 13.819 - if (!qe) { 13.820 - perror("allocblock_hint qe malloc"); 13.821 - goto err; 13.822 - } 13.823 - qe->server = server; 13.824 - 13.825 - qe->message.operation = BSOP_ALLOCBLOCK; 13.826 - qe->message.flags = 0; 13.827 - qe->message.id = hint; 13.828 - //memcpy(qe->message.block, block, BLOCK_SIZE); 13.829 - qe->block = block; 13.830 - qe->length = MSGBUFSIZE_BLOCK; 13.831 - 13.832 - if (send_message(qe) < 0) { 13.833 - perror("allocblock_hint sendto"); 13.834 - goto err; 13.835 - } 13.836 - 13.837 - return qe; 13.838 - 13.839 - err: 13.840 - free((void *)qe); 13.841 - return NULL; 13.842 -} 13.843 - 13.844 -/** 13.845 - * allocblock_hint: write a new block to disk 13.846 - * @block: pointer to block 13.847 - * @hint: allocation hint 13.848 - * 13.849 - * @return: new id of block on disk 13.850 - */ 13.851 -uint64_t allocblock_hint(void *block, uint64_t hint) { 13.852 - int map = (int)hint; 13.853 - int rep0 = bsclusters[map].servers[0]; 13.854 - int rep1 = bsclusters[map].servers[1]; 13.855 - int rep2 = bsclusters[map].servers[2]; 13.856 - bsq_t *reqs[3]; 13.857 - int rc; 13.858 - uint64_t id0, id1, id2; 13.859 - 13.860 - reqs[0] = reqs[1] = reqs[2] = NULL; 13.861 - 13.862 - DB("ENTER allocblock\n"); 13.863 - 13.864 - reqs[0] = allocblock_hint_indiv(rep0, block, hint); 13.865 - if (!reqs[0]) 13.866 - goto err; 13.867 - reqs[1] = allocblock_hint_indiv(rep1, block, hint); 13.868 - if (!reqs[1]) 13.869 - goto err; 13.870 - reqs[2] = allocblock_hint_indiv(rep2, block, hint); 13.871 - if (!reqs[2]) 13.872 - goto err; 13.873 - 13.874 - rc = wait_recv(reqs, 3); 13.875 - if (rc < 0) { 13.876 - perror("allocblock recv"); 13.877 - goto err; 13.878 - } 13.879 - if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) { 13.880 - fprintf(stderr, "allocblock server0 error\n"); 13.881 - goto err; 13.882 - } 13.883 - if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) { 13.884 - fprintf(stderr, "allocblock server1 error\n"); 13.885 - goto err; 13.886 - } 13.887 - if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) { 13.888 - fprintf(stderr, "allocblock server2 error\n"); 13.889 - goto err; 13.890 - } 13.891 - 13.892 - id0 = reqs[0]->message.id; 13.893 - id1 = reqs[1]->message.id; 13.894 - id2 = reqs[2]->message.id; 13.895 - 13.896 -#ifdef BSDEBUG 13.897 - fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n", 13.898 - BSID(map, id0, id1, id2), 13.899 - (unsigned int)((unsigned char *)block)[0], 13.900 - (unsigned int)((unsigned char *)block)[1], 13.901 - (unsigned int)((unsigned char *)block)[2], 13.902 - (unsigned int)((unsigned char *)block)[3], 13.903 - (unsigned int)((unsigned char *)block)[4], 13.904 - (unsigned int)((unsigned char *)block)[5], 13.905 - (unsigned int)((unsigned char *)block)[6], 13.906 - (unsigned int)((unsigned char *)block)[7]); 13.907 -#endif 13.908 - 13.909 - free((void *)reqs[0]); 13.910 - free((void *)reqs[1]); 13.911 - free((void *)reqs[2]); 13.912 - return BSID(map, id0, id1, id2); 13.913 - 13.914 - err: 13.915 - if (reqs[0]) { 13.916 - dequeue(reqs[0]); 13.917 - free((void *)reqs[0]); 13.918 - } 13.919 - if (reqs[1]) { 13.920 - dequeue(reqs[1]); 13.921 - free((void *)reqs[1]); 13.922 - } 13.923 - if (reqs[2]) { 13.924 - dequeue(reqs[2]); 13.925 - free((void *)reqs[2]); 13.926 - } 13.927 - return 0; 13.928 -} 13.929 - 13.930 -#else /* /BLOCKSTORE_REMOTE */ 13.931 - 13.932 -/***************************************************************************** 13.933 - * Local storage version * 13.934 - *****************************************************************************/ 13.935 - 13.936 -/** 13.937 - * readblock: read a block from disk 13.938 - * @id: block id to read 13.939 - * 13.940 - * @return: pointer to block, NULL on error 13.941 - */ 13.942 - 13.943 -void *readblock(uint64_t id) { 13.944 - void *block; 13.945 - int block_fp; 13.946 - 13.947 -//printf("readblock(%llu)\n", id); 13.948 - block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644); 13.949 - 13.950 - if (block_fp < 0) { 13.951 - perror("open"); 13.952 - return NULL; 13.953 - } 13.954 - 13.955 - if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { 13.956 - printf ("%Ld ", id); 13.957 - printf ("%Ld\n", (id - 1) * BLOCK_SIZE); 13.958 - perror("readblock lseek"); 13.959 - goto err; 13.960 - } 13.961 - if ((block = malloc(BLOCK_SIZE)) == NULL) { 13.962 - perror("readblock malloc"); 13.963 - goto err; 13.964 - } 13.965 - if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { 13.966 - perror("readblock read"); 13.967 - free(block); 13.968 - goto err; 13.969 - } 13.970 - close(block_fp); 13.971 - return block; 13.972 - 13.973 -err: 13.974 - close(block_fp); 13.975 - return NULL; 13.976 -} 13.977 - 13.978 -/** 13.979 - * writeblock: write an existing block to disk 13.980 - * @id: block id 13.981 - * @block: pointer to block 13.982 - * 13.983 - * @return: zero on success, -1 on failure 13.984 - */ 13.985 -int writeblock(uint64_t id, void *block) { 13.986 - 13.987 - int block_fp; 13.988 - 13.989 - block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); 13.990 - 13.991 - if (block_fp < 0) { 13.992 - perror("open"); 13.993 - return -1; 13.994 - } 13.995 - 13.996 - if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { 13.997 - perror("writeblock lseek"); 13.998 - goto err; 13.999 - } 13.1000 - if (write(block_fp, block, BLOCK_SIZE) < 0) { 13.1001 - perror("writeblock write"); 13.1002 - goto err; 13.1003 - } 13.1004 - close(block_fp); 13.1005 - return 0; 13.1006 - 13.1007 -err: 13.1008 - close(block_fp); 13.1009 - return -1; 13.1010 -} 13.1011 - 13.1012 -/** 13.1013 - * allocblock: write a new block to disk 13.1014 - * @block: pointer to block 13.1015 - * 13.1016 - * @return: new id of block on disk 13.1017 - */ 13.1018 - 13.1019 -uint64_t allocblock(void *block) { 13.1020 - uint64_t lb; 13.1021 - off64_t pos; 13.1022 - int block_fp; 13.1023 - 13.1024 - block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); 13.1025 - 13.1026 - if (block_fp < 0) { 13.1027 - perror("open"); 13.1028 - return 0; 13.1029 - } 13.1030 - 13.1031 - pos = lseek64(block_fp, 0, SEEK_END); 13.1032 - if (pos == (off64_t)-1) { 13.1033 - perror("allocblock lseek"); 13.1034 - goto err; 13.1035 - } 13.1036 - if (pos % BLOCK_SIZE != 0) { 13.1037 - fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE); 13.1038 - goto err; 13.1039 - } 13.1040 - if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { 13.1041 - perror("allocblock write"); 13.1042 - goto err; 13.1043 - } 13.1044 - lb = pos / BLOCK_SIZE + 1; 13.1045 -//printf("alloc(%Ld)\n", lb); 13.1046 - close(block_fp); 13.1047 - return lb; 13.1048 - 13.1049 -err: 13.1050 - close(block_fp); 13.1051 - return 0; 13.1052 - 13.1053 -} 13.1054 - 13.1055 -/** 13.1056 - * allocblock_hint: write a new block to disk 13.1057 - * @block: pointer to block 13.1058 - * @hint: allocation hint 13.1059 - * 13.1060 - * @return: new id of block on disk 13.1061 - */ 13.1062 -uint64_t allocblock_hint(void *block, uint64_t hint) { 13.1063 - return allocblock(block); 13.1064 -} 13.1065 - 13.1066 -#endif /* BLOCKSTORE_REMOTE */ 13.1067 - 13.1068 -/***************************************************************************** 13.1069 - * Memory management * 13.1070 - *****************************************************************************/ 13.1071 - 13.1072 -/** 13.1073 - * newblock: get a new in-memory block set to zeros 13.1074 - * 13.1075 - * @return: pointer to new block, NULL on error 13.1076 - */ 13.1077 -void *newblock(void) { 13.1078 - void *block = malloc(BLOCK_SIZE); 13.1079 - if (block == NULL) { 13.1080 - perror("newblock"); 13.1081 - return NULL; 13.1082 - } 13.1083 - memset(block, 0, BLOCK_SIZE); 13.1084 - return block; 13.1085 -} 13.1086 - 13.1087 - 13.1088 -/** 13.1089 - * freeblock: unallocate an in-memory block 13.1090 - * @id: block id (zero if this is only in-memory) 13.1091 - * @block: block to be freed 13.1092 - */ 13.1093 -void freeblock(void *block) { 13.1094 - free(block); 13.1095 -} 13.1096 - 13.1097 -static freeblock_t *new_freeblock(void) 13.1098 -{ 13.1099 - freeblock_t *fb; 13.1100 - 13.1101 - fb = newblock(); 13.1102 - 13.1103 - if (fb == NULL) return NULL; 13.1104 - 13.1105 - fb->magic = FREEBLOCK_MAGIC; 13.1106 - fb->next = 0ULL; 13.1107 - fb->count = 0ULL; 13.1108 - memset(fb->list, 0, sizeof fb->list); 13.1109 - 13.1110 - return fb; 13.1111 -} 13.1112 - 13.1113 -void releaseblock(uint64_t id) 13.1114 -{ 13.1115 - blockstore_super_t *bs_super; 13.1116 - freeblock_t *fl_current; 13.1117 - 13.1118 - /* get superblock */ 13.1119 - bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER); 13.1120 - 13.1121 - /* get freeblock_current */ 13.1122 - if (bs_super->freelist_current == 0ULL) 13.1123 - { 13.1124 - fl_current = new_freeblock(); 13.1125 - bs_super->freelist_current = allocblock(fl_current); 13.1126 - writeblock(BLOCKSTORE_SUPER, bs_super); 13.1127 - } else { 13.1128 - fl_current = readblock(bs_super->freelist_current); 13.1129 - } 13.1130 - 13.1131 - /* if full, chain to superblock and allocate new current */ 13.1132 - 13.1133 - if (fl_current->count == FREEBLOCK_SIZE) { 13.1134 - fl_current->next = bs_super->freelist_full; 13.1135 - writeblock(bs_super->freelist_current, fl_current); 13.1136 - bs_super->freelist_full = bs_super->freelist_current; 13.1137 - freeblock(fl_current); 13.1138 - fl_current = new_freeblock(); 13.1139 - bs_super->freelist_current = allocblock(fl_current); 13.1140 - writeblock(BLOCKSTORE_SUPER, bs_super); 13.1141 - } 13.1142 - 13.1143 - /* append id to current */ 13.1144 - fl_current->list[fl_current->count++] = id; 13.1145 - writeblock(bs_super->freelist_current, fl_current); 13.1146 - 13.1147 - freeblock(fl_current); 13.1148 - freeblock(bs_super); 13.1149 - 13.1150 - 13.1151 -} 13.1152 - 13.1153 -/* freelist debug functions: */ 13.1154 -void freelist_count(int print_each) 13.1155 -{ 13.1156 - blockstore_super_t *bs_super; 13.1157 - freeblock_t *fb; 13.1158 - uint64_t total = 0, next; 13.1159 - 13.1160 - bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER); 13.1161 - 13.1162 - if (bs_super->freelist_current == 0ULL) { 13.1163 - printf("freelist is empty!\n"); 13.1164 - return; 13.1165 - } 13.1166 - 13.1167 - fb = readblock(bs_super->freelist_current); 13.1168 - printf("%Ld entires on current.\n", fb->count); 13.1169 - total += fb->count; 13.1170 - if (print_each == 1) 13.1171 - { 13.1172 - int i; 13.1173 - for (i=0; i< fb->count; i++) 13.1174 - printf(" %Ld\n", fb->list[i]); 13.1175 - } 13.1176 - 13.1177 - freeblock(fb); 13.1178 - 13.1179 - if (bs_super->freelist_full == 0ULL) { 13.1180 - printf("freelist_full is empty!\n"); 13.1181 - return; 13.1182 - } 13.1183 - 13.1184 - next = bs_super->freelist_full; 13.1185 - for (;;) { 13.1186 - fb = readblock(next); 13.1187 - total += fb->count; 13.1188 - if (print_each == 1) 13.1189 - { 13.1190 - int i; 13.1191 - for (i=0; i< fb->count; i++) 13.1192 - printf(" %Ld\n", fb->list[i]); 13.1193 - } 13.1194 - next = fb->next; 13.1195 - freeblock(fb); 13.1196 - if (next == 0ULL) break; 13.1197 - } 13.1198 - printf("Total of %Ld ids on freelist.\n", total); 13.1199 -} 13.1200 - 13.1201 -/***************************************************************************** 13.1202 - * Initialisation * 13.1203 - *****************************************************************************/ 13.1204 - 13.1205 -int __init_blockstore(void) 13.1206 -{ 13.1207 - int i; 13.1208 - blockstore_super_t *bs_super; 13.1209 - uint64_t ret; 13.1210 - int block_fp; 13.1211 - 13.1212 -#ifdef BLOCKSTORE_REMOTE 13.1213 - struct hostent *addr; 13.1214 - 13.1215 - pthread_mutex_init(&ptmutex_queue, NULL); 13.1216 - pthread_mutex_init(&ptmutex_luid, NULL); 13.1217 - pthread_mutex_init(&ptmutex_recv, NULL); 13.1218 - /*pthread_mutex_init(&ptmutex_notify, NULL);*/ 13.1219 - for (i = 0; i <= READ_POOL_SIZE; i++) { 13.1220 - pool_thread[i].newdata = 0; 13.1221 - pthread_mutex_init(&(pool_thread[i].ptmutex), NULL); 13.1222 - pthread_cond_init(&(pool_thread[i].ptcv), NULL); 13.1223 - } 13.1224 - 13.1225 - bsservers[0].hostname = "firebug.cl.cam.ac.uk"; 13.1226 - bsservers[1].hostname = "planb.cl.cam.ac.uk"; 13.1227 - bsservers[2].hostname = "simcity.cl.cam.ac.uk"; 13.1228 - bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/; 13.1229 - bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/; 13.1230 - bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/; 13.1231 - bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/; 13.1232 - bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/; 13.1233 - bsservers[8].hostname = NULL; 13.1234 - bsservers[9].hostname = NULL; 13.1235 - bsservers[10].hostname = NULL; 13.1236 - bsservers[11].hostname = NULL; 13.1237 - bsservers[12].hostname = NULL; 13.1238 - bsservers[13].hostname = NULL; 13.1239 - bsservers[14].hostname = NULL; 13.1240 - bsservers[15].hostname = NULL; 13.1241 - 13.1242 - for (i = 0; i < MAX_SERVERS; i++) { 13.1243 - if (!bsservers[i].hostname) 13.1244 - continue; 13.1245 - addr = gethostbyname(bsservers[i].hostname); 13.1246 - if (!addr) { 13.1247 - perror("bad hostname"); 13.1248 - return -1; 13.1249 - } 13.1250 - bsservers[i].sin.sin_family = addr->h_addrtype; 13.1251 - bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT); 13.1252 - bsservers[i].sin.sin_addr.s_addr = 13.1253 - ((struct in_addr *)(addr->h_addr))->s_addr; 13.1254 - } 13.1255 - 13.1256 - /* Cluster map 13.1257 - */ 13.1258 - bsclusters[0].servers[0] = 0; 13.1259 - bsclusters[0].servers[1] = 1; 13.1260 - bsclusters[0].servers[2] = 2; 13.1261 - bsclusters[1].servers[0] = 1; 13.1262 - bsclusters[1].servers[1] = 2; 13.1263 - bsclusters[1].servers[2] = 3; 13.1264 - bsclusters[2].servers[0] = 2; 13.1265 - bsclusters[2].servers[1] = 3; 13.1266 - bsclusters[2].servers[2] = 4; 13.1267 - bsclusters[3].servers[0] = 3; 13.1268 - bsclusters[3].servers[1] = 4; 13.1269 - bsclusters[3].servers[2] = 5; 13.1270 - bsclusters[4].servers[0] = 4; 13.1271 - bsclusters[4].servers[1] = 5; 13.1272 - bsclusters[4].servers[2] = 6; 13.1273 - bsclusters[5].servers[0] = 5; 13.1274 - bsclusters[5].servers[1] = 6; 13.1275 - bsclusters[5].servers[2] = 7; 13.1276 - bsclusters[6].servers[0] = 6; 13.1277 - bsclusters[6].servers[1] = 7; 13.1278 - bsclusters[6].servers[2] = 0; 13.1279 - bsclusters[7].servers[0] = 7; 13.1280 - bsclusters[7].servers[1] = 0; 13.1281 - bsclusters[7].servers[2] = 1; 13.1282 - 13.1283 - /* Local socket set up 13.1284 - */ 13.1285 - bssock = socket(AF_INET, SOCK_DGRAM, 0); 13.1286 - if (bssock < 0) { 13.1287 - perror("Bad socket"); 13.1288 - return -1; 13.1289 - } 13.1290 - memset(&sin_local, 0, sizeof(sin_local)); 13.1291 - sin_local.sin_family = AF_INET; 13.1292 - sin_local.sin_port = htons(BLOCKSTORED_PORT); 13.1293 - sin_local.sin_addr.s_addr = htonl(INADDR_ANY); 13.1294 - if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) { 13.1295 - perror("bind"); 13.1296 - close(bssock); 13.1297 - return -1; 13.1298 - } 13.1299 - 13.1300 - pthread_create(&pthread_recv, NULL, receive_loop, NULL); 13.1301 - pthread_create(&pthread_recv, NULL, queue_runner, NULL); 13.1302 - 13.1303 -#else /* /BLOCKSTORE_REMOTE */ 13.1304 - block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); 13.1305 - 13.1306 - if (block_fp < 0) { 13.1307 - perror("open"); 13.1308 - return -1; 13.1309 - exit(-1); 13.1310 - } 13.1311 - 13.1312 - if (lseek(block_fp, 0, SEEK_END) == 0) { 13.1313 - bs_super = newblock(); 13.1314 - bs_super->magic = BLOCKSTORE_MAGIC; 13.1315 - bs_super->freelist_full = 0LL; 13.1316 - bs_super->freelist_current = 0LL; 13.1317 - 13.1318 - ret = allocblock(bs_super); 13.1319 - 13.1320 - freeblock(bs_super); 13.1321 - } else { 13.1322 - bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER); 13.1323 - if (bs_super->magic != BLOCKSTORE_MAGIC) 13.1324 - { 13.1325 - printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n"); 13.1326 - exit(-1); 13.1327 - } 13.1328 - freeblock(bs_super); 13.1329 - } 13.1330 - 13.1331 - close(block_fp); 13.1332 - 13.1333 -#endif /* BLOCKSTORE_REMOTE */ 13.1334 - return 0; 13.1335 -} 13.1336 - 13.1337 -void __exit_blockstore(void) 13.1338 -{ 13.1339 - int i; 13.1340 -#ifdef BLOCKSTORE_REMOTE 13.1341 - pthread_mutex_destroy(&ptmutex_recv); 13.1342 - pthread_mutex_destroy(&ptmutex_luid); 13.1343 - pthread_mutex_destroy(&ptmutex_queue); 13.1344 - /*pthread_mutex_destroy(&ptmutex_notify); 13.1345 - pthread_cond_destroy(&ptcv_notify);*/ 13.1346 - for (i = 0; i <= READ_POOL_SIZE; i++) { 13.1347 - pthread_mutex_destroy(&(pool_thread[i].ptmutex)); 13.1348 - pthread_cond_destroy(&(pool_thread[i].ptcv)); 13.1349 - } 13.1350 -#endif 13.1351 -}
14.1 --- a/tools/blktap/parallax/blockstore.h Fri Jun 16 18:19:40 2006 +0100 14.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 14.3 @@ -1,134 +0,0 @@ 14.4 -/************************************************************************** 14.5 - * 14.6 - * blockstore.h 14.7 - * 14.8 - * Simple block store interface 14.9 - * 14.10 - */ 14.11 - 14.12 -#ifndef __BLOCKSTORE_H__ 14.13 -#define __BLOCKSTORE_H__ 14.14 - 14.15 -#include <netinet/in.h> 14.16 -#include <xenctrl.h> 14.17 - 14.18 -#define BLOCK_SIZE 4096 14.19 -#define BLOCK_SHIFT 12 14.20 -#define BLOCK_MASK 0xfffffffffffff000LL 14.21 - 14.22 -/* XXX SMH: where is the below supposed to be defined???? */ 14.23 -#ifndef SECTOR_SHIFT 14.24 -#define SECTOR_SHIFT 9 14.25 -#endif 14.26 - 14.27 -#define FREEBLOCK_SIZE (BLOCK_SIZE / sizeof(uint64_t)) - (3 * sizeof(uint64_t)) 14.28 -#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL 14.29 - 14.30 -typedef struct { 14.31 - uint64_t magic; 14.32 - uint64_t next; 14.33 - uint64_t count; 14.34 - uint64_t list[FREEBLOCK_SIZE]; 14.35 -} freeblock_t; 14.36 - 14.37 -#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL 14.38 -#define BLOCKSTORE_SUPER 1ULL 14.39 - 14.40 -typedef struct { 14.41 - uint64_t magic; 14.42 - uint64_t freelist_full; 14.43 - uint64_t freelist_current; 14.44 -} blockstore_super_t; 14.45 - 14.46 -extern void *newblock(); 14.47 -extern void *readblock(uint64_t id); 14.48 -extern uint64_t allocblock(void *block); 14.49 -extern uint64_t allocblock_hint(void *block, uint64_t hint); 14.50 -extern int writeblock(uint64_t id, void *block); 14.51 - 14.52 -/* Add this blockid to a freelist, to be recycled by the allocator. */ 14.53 -extern void releaseblock(uint64_t id); 14.54 - 14.55 -/* this is a memory free() operation for block-sized allocations */ 14.56 -extern void freeblock(void *block); 14.57 -extern int __init_blockstore(void); 14.58 - 14.59 -/* debug for freelist. */ 14.60 -void freelist_count(int print_each); 14.61 -#define ALLOCFAIL (((uint64_t)(-1))) 14.62 - 14.63 -/* Distribution 14.64 - */ 14.65 -#define BLOCKSTORED_PORT 9346 14.66 - 14.67 -struct bshdr_t_struct { 14.68 - uint32_t operation; 14.69 - uint32_t flags; 14.70 - uint64_t id; 14.71 - uint64_t luid; 14.72 -} __attribute__ ((packed)); 14.73 -typedef struct bshdr_t_struct bshdr_t; 14.74 - 14.75 -struct bsmsg_t_struct { 14.76 - bshdr_t hdr; 14.77 - unsigned char block[BLOCK_SIZE]; 14.78 -} __attribute__ ((packed)); 14.79 - 14.80 -typedef struct bsmsg_t_struct bsmsg_t; 14.81 - 14.82 -#define MSGBUFSIZE_OP sizeof(uint32_t) 14.83 -#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t)) 14.84 -#define MSGBUFSIZE_ID (sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint64_t)) 14.85 -#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t) 14.86 - 14.87 -#define BSOP_READBLOCK 0x01 14.88 -#define BSOP_WRITEBLOCK 0x02 14.89 -#define BSOP_ALLOCBLOCK 0x03 14.90 -#define BSOP_FREEBLOCK 0x04 14.91 - 14.92 -#define BSOP_FLAG_ERROR 0x01 14.93 - 14.94 -#define BS_ALLOC_SKIP 10 14.95 -#define BS_ALLOC_HACK 14.96 - 14.97 -/* Remote hosts and cluster map - XXX need to generalise 14.98 - */ 14.99 - 14.100 -/* 14.101 - 14.102 - Interim ID format is 14.103 - 14.104 - 63 60 59 40 39 20 19 0 14.105 - +----+--------------------+--------------------+--------------------+ 14.106 - |map | replica 2 | replica 1 | replica 0 | 14.107 - +----+--------------------+--------------------+--------------------+ 14.108 - 14.109 - The map is an index into a table detailing which machines form the 14.110 - cluster. 14.111 - 14.112 - */ 14.113 - 14.114 -#define BSID_REPLICA0(_id) ((_id)&0xfffffULL) 14.115 -#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL) 14.116 -#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL) 14.117 -#define BSID_MAP(_id) (((_id)>>60)&0xfULL) 14.118 - 14.119 -#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \ 14.120 - (((uint64_t)(_rep2))<<40) | \ 14.121 - (((uint64_t)(_rep1))<<20) | ((uint64_t)(_rep0))) 14.122 - 14.123 -typedef struct bsserver_t_struct { 14.124 - char *hostname; 14.125 - struct sockaddr_in sin; 14.126 -} bsserver_t; 14.127 - 14.128 -#define MAX_SERVERS 16 14.129 - 14.130 -#define CLUSTER_MAX_REPLICAS 3 14.131 -typedef struct bscluster_t_struct { 14.132 - int servers[CLUSTER_MAX_REPLICAS]; 14.133 -} bscluster_t; 14.134 - 14.135 -#define MAX_CLUSTERS 16 14.136 - 14.137 -#endif /* __BLOCKSTORE_H__ */
15.1 --- a/tools/blktap/parallax/blockstored.c Fri Jun 16 18:19:40 2006 +0100 15.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 15.3 @@ -1,275 +0,0 @@ 15.4 -/************************************************************************** 15.5 - * 15.6 - * blockstored.c 15.7 - * 15.8 - * Block store daemon. 15.9 - * 15.10 - */ 15.11 - 15.12 -#include <fcntl.h> 15.13 -#include <unistd.h> 15.14 -#include <stdio.h> 15.15 -#include <stdlib.h> 15.16 -#include <string.h> 15.17 -#include <sys/types.h> 15.18 -#include <sys/stat.h> 15.19 -#include <sys/socket.h> 15.20 -#include <sys/ioctl.h> 15.21 -#include <netinet/in.h> 15.22 -#include <errno.h> 15.23 -#include "blockstore.h" 15.24 - 15.25 -//#define BSDEBUG 15.26 - 15.27 -int readblock_into(uint64_t id, void *block); 15.28 - 15.29 -int open_socket(uint16_t port) { 15.30 - 15.31 - struct sockaddr_in sn; 15.32 - int sock; 15.33 - 15.34 - sock = socket(AF_INET, SOCK_DGRAM, 0); 15.35 - if (sock < 0) { 15.36 - perror("Bad socket"); 15.37 - return -1; 15.38 - } 15.39 - memset(&sn, 0, sizeof(sn)); 15.40 - sn.sin_family = AF_INET; 15.41 - sn.sin_port = htons(port); 15.42 - sn.sin_addr.s_addr = htonl(INADDR_ANY); 15.43 - if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) { 15.44 - perror("bind"); 15.45 - close(sock); 15.46 - return -1; 15.47 - } 15.48 - 15.49 - return sock; 15.50 -} 15.51 - 15.52 -static int block_fp = -1; 15.53 -static int bssock = -1; 15.54 - 15.55 -int send_reply(struct sockaddr_in *peer, void *buffer, int len) { 15.56 - 15.57 - int rc; 15.58 - 15.59 -#ifdef BSDEBUG 15.60 - fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n", 15.61 - len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t *)buffer)->hdr.id); 15.62 -#endif 15.63 - rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, sizeof(*peer)); 15.64 - if (rc < 0) { 15.65 - perror("send_reply"); 15.66 - return 1; 15.67 - } 15.68 - 15.69 - 15.70 - return 0; 15.71 -} 15.72 - 15.73 -static bsmsg_t msgbuf; 15.74 - 15.75 -void service_loop(void) { 15.76 - 15.77 - for (;;) { 15.78 - int rc, len; 15.79 - struct sockaddr_in from; 15.80 - size_t slen = sizeof(from); 15.81 - uint64_t bid; 15.82 - 15.83 - len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0, 15.84 - (struct sockaddr *)&from, &slen); 15.85 - 15.86 - if (len < 0) { 15.87 - perror("recvfrom"); 15.88 - continue; 15.89 - } 15.90 - 15.91 - if (len < MSGBUFSIZE_OP) { 15.92 - fprintf(stderr, "Short packet.\n"); 15.93 - continue; 15.94 - } 15.95 - 15.96 -#ifdef BSDEBUG 15.97 - fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n", 15.98 - len, msgbuf.hdr.operation, msgbuf.hdr.id); 15.99 -#endif 15.100 - 15.101 - switch (msgbuf.hdr.operation) { 15.102 - case BSOP_READBLOCK: 15.103 - if (len < MSGBUFSIZE_ID) { 15.104 - fprintf(stderr, "Short packet (readblock %u).\n", len); 15.105 - continue; 15.106 - } 15.107 - rc = readblock_into(msgbuf.hdr.id, msgbuf.block); 15.108 - if (rc < 0) { 15.109 - fprintf(stderr, "readblock error\n"); 15.110 - msgbuf.hdr.flags = BSOP_FLAG_ERROR; 15.111 - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 15.112 - continue; 15.113 - } 15.114 - msgbuf.hdr.flags = 0; 15.115 - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK); 15.116 - break; 15.117 - case BSOP_WRITEBLOCK: 15.118 - if (len < MSGBUFSIZE_BLOCK) { 15.119 - fprintf(stderr, "Short packet (writeblock %u).\n", len); 15.120 - continue; 15.121 - } 15.122 - rc = writeblock(msgbuf.hdr.id, msgbuf.block); 15.123 - if (rc < 0) { 15.124 - fprintf(stderr, "writeblock error\n"); 15.125 - msgbuf.hdr.flags = BSOP_FLAG_ERROR; 15.126 - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 15.127 - continue; 15.128 - } 15.129 - msgbuf.hdr.flags = 0; 15.130 - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 15.131 - break; 15.132 - case BSOP_ALLOCBLOCK: 15.133 - if (len < MSGBUFSIZE_BLOCK) { 15.134 - fprintf(stderr, "Short packet (allocblock %u).\n", len); 15.135 - continue; 15.136 - } 15.137 - bid = allocblock(msgbuf.block); 15.138 - if (bid == ALLOCFAIL) { 15.139 - fprintf(stderr, "allocblock error\n"); 15.140 - msgbuf.hdr.flags = BSOP_FLAG_ERROR; 15.141 - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 15.142 - continue; 15.143 - } 15.144 - msgbuf.hdr.id = bid; 15.145 - msgbuf.hdr.flags = 0; 15.146 - send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID); 15.147 - break; 15.148 - } 15.149 - 15.150 - } 15.151 -} 15.152 - 15.153 -/** 15.154 - * readblock: read a block from disk 15.155 - * @id: block id to read 15.156 - * @block: pointer to buffer to receive block 15.157 - * 15.158 - * @return: 0 if OK, other on error 15.159 - */ 15.160 - 15.161 -int readblock_into(uint64_t id, void *block) { 15.162 - if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { 15.163 - printf ("%Ld\n", (id - 1) * BLOCK_SIZE); 15.164 - perror("readblock lseek"); 15.165 - return -1; 15.166 - } 15.167 - if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { 15.168 - perror("readblock read"); 15.169 - return -1; 15.170 - } 15.171 - return 0; 15.172 -} 15.173 - 15.174 -/** 15.175 - * writeblock: write an existing block to disk 15.176 - * @id: block id 15.177 - * @block: pointer to block 15.178 - * 15.179 - * @return: zero on success, -1 on failure 15.180 - */ 15.181 -int writeblock(uint64_t id, void *block) { 15.182 - if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) { 15.183 - perror("writeblock lseek"); 15.184 - return -1; 15.185 - } 15.186 - if (write(block_fp, block, BLOCK_SIZE) < 0) { 15.187 - perror("writeblock write"); 15.188 - return -1; 15.189 - } 15.190 - return 0; 15.191 -} 15.192 - 15.193 -/** 15.194 - * allocblock: write a new block to disk 15.195 - * @block: pointer to block 15.196 - * 15.197 - * @return: new id of block on disk 15.198 - */ 15.199 -static uint64_t lastblock = 0; 15.200 - 15.201 -uint64_t allocblock(void *block) { 15.202 - uint64_t lb; 15.203 - off64_t pos; 15.204 - 15.205 - retry: 15.206 - pos = lseek64(block_fp, 0, SEEK_END); 15.207 - if (pos == (off64_t)-1) { 15.208 - perror("allocblock lseek"); 15.209 - return ALLOCFAIL; 15.210 - } 15.211 - if (pos % BLOCK_SIZE != 0) { 15.212 - fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE); 15.213 - return ALLOCFAIL; 15.214 - } 15.215 - if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) { 15.216 - perror("allocblock write"); 15.217 - return ALLOCFAIL; 15.218 - } 15.219 - lb = pos / BLOCK_SIZE + 1; 15.220 - 15.221 -#ifdef BS_ALLOC_HACK 15.222 - if (lb < BS_ALLOC_SKIP) 15.223 - goto retry; 15.224 -#endif 15.225 - 15.226 - if (lb <= lastblock) 15.227 - printf("[*** %Ld alredy allocated! ***]\n", lb); 15.228 - 15.229 - lastblock = lb; 15.230 - return lb; 15.231 -} 15.232 - 15.233 -/** 15.234 - * newblock: get a new in-memory block set to zeros 15.235 - * 15.236 - * @return: pointer to new block, NULL on error 15.237 - */ 15.238 -void *newblock(void) { 15.239 - void *block = malloc(BLOCK_SIZE); 15.240 - if (block == NULL) { 15.241 - perror("newblock"); 15.242 - return NULL; 15.243 - } 15.244 - memset(block, 0, BLOCK_SIZE); 15.245 - return block; 15.246 -} 15.247 - 15.248 - 15.249 -/** 15.250 - * freeblock: unallocate an in-memory block 15.251 - * @id: block id (zero if this is only in-memory) 15.252 - * @block: block to be freed 15.253 - */ 15.254 -void freeblock(void *block) { 15.255 - free(block); 15.256 -} 15.257 - 15.258 - 15.259 -int main(int argc, char **argv) 15.260 -{ 15.261 - block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644); 15.262 - 15.263 - if (block_fp < 0) { 15.264 - perror("open"); 15.265 - return -1; 15.266 - } 15.267 - 15.268 - bssock = open_socket(BLOCKSTORED_PORT); 15.269 - if (bssock < 0) { 15.270 - return -1; 15.271 - } 15.272 - 15.273 - service_loop(); 15.274 - 15.275 - close(bssock); 15.276 - 15.277 - return 0; 15.278 -}
16.1 --- a/tools/blktap/parallax/bstest.c Fri Jun 16 18:19:40 2006 +0100 16.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 16.3 @@ -1,191 +0,0 @@ 16.4 -/************************************************************************** 16.5 - * 16.6 - * bstest.c 16.7 - * 16.8 - * Block store daemon test program. 16.9 - * 16.10 - * usage: bstest <host>|X {r|w|a} ID 16.11 - * 16.12 - */ 16.13 - 16.14 -#include <fcntl.h> 16.15 -#include <unistd.h> 16.16 -#include <stdio.h> 16.17 -#include <stdlib.h> 16.18 -#include <string.h> 16.19 -#include <sys/types.h> 16.20 -#include <sys/stat.h> 16.21 -#include <sys/socket.h> 16.22 -#include <sys/ioctl.h> 16.23 -#include <netinet/in.h> 16.24 -#include <netdb.h> 16.25 -#include <errno.h> 16.26 -#include "blockstore.h" 16.27 - 16.28 -int direct(char *host, uint32_t op, uint64_t id, int len) { 16.29 - struct sockaddr_in sn, peer; 16.30 - int sock; 16.31 - bsmsg_t msgbuf; 16.32 - int rc, slen; 16.33 - struct hostent *addr; 16.34 - 16.35 - addr = gethostbyname(host); 16.36 - if (!addr) { 16.37 - perror("bad hostname"); 16.38 - exit(1); 16.39 - } 16.40 - peer.sin_family = addr->h_addrtype; 16.41 - peer.sin_port = htons(BLOCKSTORED_PORT); 16.42 - peer.sin_addr.s_addr = ((struct in_addr *)(addr->h_addr))->s_addr; 16.43 - fprintf(stderr, "Sending to: %u.%u.%u.%u\n", 16.44 - (unsigned int)(unsigned char)addr->h_addr[0], 16.45 - (unsigned int)(unsigned char)addr->h_addr[1], 16.46 - (unsigned int)(unsigned char)addr->h_addr[2], 16.47 - (unsigned int)(unsigned char)addr->h_addr[3]); 16.48 - 16.49 - sock = socket(AF_INET, SOCK_DGRAM, 0); 16.50 - if (sock < 0) { 16.51 - perror("Bad socket"); 16.52 - exit(1); 16.53 - } 16.54 - memset(&sn, 0, sizeof(sn)); 16.55 - sn.sin_family = AF_INET; 16.56 - sn.sin_port = htons(BLOCKSTORED_PORT); 16.57 - sn.sin_addr.s_addr = htonl(INADDR_ANY); 16.58 - if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) { 16.59 - perror("bind"); 16.60 - close(sock); 16.61 - exit(1); 16.62 - } 16.63 - 16.64 - memset((void *)&msgbuf, 0, sizeof(msgbuf)); 16.65 - msgbuf.operation = op; 16.66 - msgbuf.id = id; 16.67 - 16.68 - rc = sendto(sock, (void *)&msgbuf, len, 0, 16.69 - (struct sockaddr *)&peer, sizeof(peer)); 16.70 - if (rc < 0) { 16.71 - perror("sendto"); 16.72 - exit(1); 16.73 - } 16.74 - 16.75 - slen = sizeof(peer); 16.76 - len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0, 16.77 - (struct sockaddr *)&peer, &slen); 16.78 - if (len < 0) { 16.79 - perror("recvfrom"); 16.80 - exit(1); 16.81 - } 16.82 - 16.83 - printf("Reply %u bytes:\n", len); 16.84 - if (len >= MSGBUFSIZE_OP) 16.85 - printf(" operation: %u\n", msgbuf.operation); 16.86 - if (len >= MSGBUFSIZE_FLAGS) 16.87 - printf(" flags: 0x%x\n", msgbuf.flags); 16.88 - if (len >= MSGBUFSIZE_ID) 16.89 - printf(" id: %llu\n", msgbuf.id); 16.90 - if (len >= (MSGBUFSIZE_ID + 4)) 16.91 - printf(" data: %02x %02x %02x %02x...\n", 16.92 - (unsigned int)msgbuf.block[0], 16.93 - (unsigned int)msgbuf.block[1], 16.94 - (unsigned int)msgbuf.block[2], 16.95 - (unsigned int)msgbuf.block[3]); 16.96 - 16.97 - if (sock > 0) 16.98 - close(sock); 16.99 - 16.100 - return 0; 16.101 -} 16.102 - 16.103 -int main (int argc, char **argv) { 16.104 - 16.105 - uint32_t op = 0; 16.106 - uint64_t id = 0; 16.107 - int len = 0, rc; 16.108 - void *block; 16.109 - 16.110 - if (argc < 3) { 16.111 - fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n"); 16.112 - return 1; 16.113 - } 16.114 - 16.115 - switch (argv[2][0]) { 16.116 - case 'r': 16.117 - case 'R': 16.118 - op = BSOP_READBLOCK; 16.119 - len = MSGBUFSIZE_ID; 16.120 - break; 16.121 - case 'w': 16.122 - case 'W': 16.123 - op = BSOP_WRITEBLOCK; 16.124 - len = MSGBUFSIZE_BLOCK; 16.125 - break; 16.126 - case 'a': 16.127 - case 'A': 16.128 - op = BSOP_ALLOCBLOCK; 16.129 - len = MSGBUFSIZE_BLOCK; 16.130 - break; 16.131 - default: 16.132 - fprintf(stderr, "Unknown action '%s'.\n", argv[2]); 16.133 - return 1; 16.134 - } 16.135 - 16.136 - if (argc >= 4) 16.137 - id = atoll(argv[3]); 16.138 - 16.139 - if (strcmp(argv[1], "X") == 0) { 16.140 - rc = __init_blockstore(); 16.141 - if (rc < 0) { 16.142 - fprintf(stderr, "blockstore init failed.\n"); 16.143 - return 1; 16.144 - } 16.145 - switch(op) { 16.146 - case BSOP_READBLOCK: 16.147 - block = readblock(id); 16.148 - if (block) { 16.149 - printf("data: %02x %02x %02x %02x...\n", 16.150 - (unsigned int)((unsigned char*)block)[0], 16.151 - (unsigned int)((unsigned char*)block)[1], 16.152 - (unsigned int)((unsigned char*)block)[2], 16.153 - (unsigned int)((unsigned char*)block)[3]); 16.154 - } 16.155 - break; 16.156 - case BSOP_WRITEBLOCK: 16.157 - block = malloc(BLOCK_SIZE); 16.158 - if (!block) { 16.159 - perror("bstest malloc"); 16.160 - return 1; 16.161 - } 16.162 - memset(block, 0, BLOCK_SIZE); 16.163 - rc = writeblock(id, block); 16.164 - if (rc != 0) { 16.165 - printf("error\n"); 16.166 - } 16.167 - else { 16.168 - printf("OK\n"); 16.169 - } 16.170 - break; 16.171 - case BSOP_ALLOCBLOCK: 16.172 - block = malloc(BLOCK_SIZE); 16.173 - if (!block) { 16.174 - perror("bstest malloc"); 16.175 - return 1; 16.176 - } 16.177 - memset(block, 0, BLOCK_SIZE); 16.178 - id = allocblock_hint(block, id); 16.179 - if (id == 0) { 16.180 - printf("error\n"); 16.181 - } 16.182 - else { 16.183 - printf("ID: %llu\n", id); 16.184 - } 16.185 - break; 16.186 - } 16.187 - } 16.188 - else { 16.189 - direct(argv[1], op, id, len); 16.190 - } 16.191 - 16.192 - 16.193 - return 0; 16.194 -}
17.1 --- a/tools/blktap/parallax/parallax.c Fri Jun 16 18:19:40 2006 +0100 17.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 17.3 @@ -1,608 +0,0 @@ 17.4 -/************************************************************************** 17.5 - * 17.6 - * parallax.c 17.7 - * 17.8 - * The Parallax Storage Server 17.9 - * 17.10 - */ 17.11 - 17.12 - 17.13 -#include <stdio.h> 17.14 -#include <stdlib.h> 17.15 -#include <string.h> 17.16 -#include <pthread.h> 17.17 -#include "blktaplib.h" 17.18 -#include "blockstore.h" 17.19 -#include "vdi.h" 17.20 -#include "block-async.h" 17.21 -#include "requests-async.h" 17.22 - 17.23 -#define PARALLAX_DEV 61440 17.24 -#define SECTS_PER_NODE 8 17.25 - 17.26 - 17.27 -#if 0 17.28 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 17.29 -#else 17.30 -#define DPRINTF(_f, _a...) ((void)0) 17.31 -#endif 17.32 - 17.33 -/* ------[ session records ]----------------------------------------------- */ 17.34 - 17.35 -#define BLKIF_HASHSZ 1024 17.36 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) 17.37 - 17.38 -#define VDI_HASHSZ 16 17.39 -#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1)) 17.40 - 17.41 -typedef struct blkif { 17.42 - domid_t domid; 17.43 - unsigned int handle; 17.44 - enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; 17.45 - vdi_t *vdi_hash[VDI_HASHSZ]; 17.46 - struct blkif *hash_next; 17.47 -} blkif_t; 17.48 - 17.49 -static blkif_t *blkif_hash[BLKIF_HASHSZ]; 17.50 - 17.51 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) 17.52 -{ 17.53 - if ( handle != 0 ) 17.54 - printf("blktap/parallax don't currently support non-0 dev handles!\n"); 17.55 - 17.56 - blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; 17.57 - while ( (blkif != NULL) && 17.58 - ((blkif->domid != domid) || (blkif->handle != handle)) ) 17.59 - blkif = blkif->hash_next; 17.60 - return blkif; 17.61 -} 17.62 - 17.63 -vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device) 17.64 -{ 17.65 - vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)]; 17.66 - 17.67 - while ((vdi != NULL) && (vdi->vdevice != device)) 17.68 - vdi = vdi->next; 17.69 - 17.70 - return vdi; 17.71 -} 17.72 - 17.73 -/* ------[ control message handling ]-------------------------------------- */ 17.74 - 17.75 -void blkif_create(blkif_be_create_t *create) 17.76 -{ 17.77 - domid_t domid = create->domid; 17.78 - unsigned int handle = create->blkif_handle; 17.79 - blkif_t **pblkif, *blkif; 17.80 - 17.81 - DPRINTF("parallax (blkif_create): create is %p\n", create); 17.82 - 17.83 - if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL ) 17.84 - { 17.85 - DPRINTF("Could not create blkif: out of memory\n"); 17.86 - create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 17.87 - return; 17.88 - } 17.89 - 17.90 - memset(blkif, 0, sizeof(*blkif)); 17.91 - blkif->domid = domid; 17.92 - blkif->handle = handle; 17.93 - blkif->status = DISCONNECTED; 17.94 - 17.95 - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 17.96 - while ( *pblkif != NULL ) 17.97 - { 17.98 - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) 17.99 - { 17.100 - DPRINTF("Could not create blkif: already exists (%d,%d)\n", 17.101 - domid, handle); 17.102 - create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; 17.103 - free(blkif); 17.104 - return; 17.105 - } 17.106 - pblkif = &(*pblkif)->hash_next; 17.107 - } 17.108 - 17.109 - blkif->hash_next = *pblkif; 17.110 - *pblkif = blkif; 17.111 - 17.112 - DPRINTF("Successfully created blkif\n"); 17.113 - create->status = BLKIF_BE_STATUS_OKAY; 17.114 -} 17.115 - 17.116 -void blkif_destroy(blkif_be_destroy_t *destroy) 17.117 -{ 17.118 - domid_t domid = destroy->domid; 17.119 - unsigned int handle = destroy->blkif_handle; 17.120 - blkif_t **pblkif, *blkif; 17.121 - 17.122 - DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); 17.123 - 17.124 - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 17.125 - while ( (blkif = *pblkif) != NULL ) 17.126 - { 17.127 - if ( (blkif->domid == domid) && (blkif->handle == handle) ) 17.128 - { 17.129 - if ( blkif->status != DISCONNECTED ) 17.130 - goto still_connected; 17.131 - goto destroy; 17.132 - } 17.133 - pblkif = &blkif->hash_next; 17.134 - } 17.135 - 17.136 - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 17.137 - return; 17.138 - 17.139 - still_connected: 17.140 - destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; 17.141 - return; 17.142 - 17.143 - destroy: 17.144 - *pblkif = blkif->hash_next; 17.145 - free(blkif); 17.146 - destroy->status = BLKIF_BE_STATUS_OKAY; 17.147 -} 17.148 - 17.149 -void vbd_create(blkif_be_vbd_create_t *create) 17.150 -{ 17.151 - blkif_t *blkif; 17.152 - vdi_t *vdi, **vdip; 17.153 - blkif_vdev_t vdevice = create->vdevice; 17.154 - 17.155 - DPRINTF("parallax (vbd_create): create=%p\n", create); 17.156 - 17.157 - blkif = blkif_find_by_handle(create->domid, create->blkif_handle); 17.158 - if ( blkif == NULL ) 17.159 - { 17.160 - DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 17.161 - create->domid, create->blkif_handle); 17.162 - create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 17.163 - return; 17.164 - } 17.165 - 17.166 - /* VDI identifier is in grow->extent.sector_start */ 17.167 - DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 17.168 - (unsigned long)create->dev_handle); 17.169 - 17.170 - vdi = vdi_get(create->dev_handle); 17.171 - if (vdi == NULL) 17.172 - { 17.173 - printf("parallax (vbd_create): VDI %lx not found.\n", 17.174 - (unsigned long)create->dev_handle); 17.175 - create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 17.176 - return; 17.177 - } 17.178 - 17.179 - vdi->next = NULL; 17.180 - vdi->vdevice = vdevice; 17.181 - vdip = &blkif->vdi_hash[VDI_HASH(vdevice)]; 17.182 - while (*vdip != NULL) 17.183 - vdip = &(*vdip)->next; 17.184 - *vdip = vdi; 17.185 - 17.186 - DPRINTF("blkif_create succeeded\n"); 17.187 - create->status = BLKIF_BE_STATUS_OKAY; 17.188 -} 17.189 - 17.190 -void vbd_destroy(blkif_be_vbd_destroy_t *destroy) 17.191 -{ 17.192 - blkif_t *blkif; 17.193 - vdi_t *vdi, **vdip; 17.194 - blkif_vdev_t vdevice = destroy->vdevice; 17.195 - 17.196 - blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle); 17.197 - if ( blkif == NULL ) 17.198 - { 17.199 - DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 17.200 - destroy->domid, destroy->blkif_handle); 17.201 - destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 17.202 - return; 17.203 - } 17.204 - 17.205 - vdip = &blkif->vdi_hash[VDI_HASH(vdevice)]; 17.206 - while ((*vdip != NULL) && ((*vdip)->vdevice != vdevice)) 17.207 - vdip = &(*vdip)->next; 17.208 - 17.209 - if (*vdip != NULL) 17.210 - { 17.211 - vdi = *vdip; 17.212 - *vdip = vdi->next; 17.213 - vdi_put(vdi); 17.214 - } 17.215 - 17.216 -} 17.217 - 17.218 -int parallax_control(control_msg_t *msg) 17.219 -{ 17.220 - domid_t domid; 17.221 - int ret; 17.222 - 17.223 - DPRINTF("parallax_control: msg is %p\n", msg); 17.224 - 17.225 - if (msg->type != CMSG_BLKIF_BE) 17.226 - { 17.227 - printf("Unexpected control message (%d)\n", msg->type); 17.228 - return 0; 17.229 - } 17.230 - 17.231 - switch(msg->subtype) 17.232 - { 17.233 - case CMSG_BLKIF_BE_CREATE: 17.234 - if ( msg->length != sizeof(blkif_be_create_t) ) 17.235 - goto parse_error; 17.236 - blkif_create((blkif_be_create_t *)msg->msg); 17.237 - break; 17.238 - 17.239 - case CMSG_BLKIF_BE_DESTROY: 17.240 - if ( msg->length != sizeof(blkif_be_destroy_t) ) 17.241 - goto parse_error; 17.242 - blkif_destroy((blkif_be_destroy_t *)msg->msg); 17.243 - break; 17.244 - 17.245 - case CMSG_BLKIF_BE_VBD_CREATE: 17.246 - if ( msg->length != sizeof(blkif_be_vbd_create_t) ) 17.247 - goto parse_error; 17.248 - vbd_create((blkif_be_vbd_create_t *)msg->msg); 17.249 - break; 17.250 - 17.251 - case CMSG_BLKIF_BE_VBD_DESTROY: 17.252 - if ( msg->length != sizeof(blkif_be_vbd_destroy_t) ) 17.253 - goto parse_error; 17.254 - vbd_destroy((blkif_be_vbd_destroy_t *)msg->msg); 17.255 - break; 17.256 - 17.257 - case CMSG_BLKIF_BE_CONNECT: 17.258 - case CMSG_BLKIF_BE_DISCONNECT: 17.259 - /* we don't manage the device channel, the tap does. */ 17.260 - break; 17.261 - 17.262 - default: 17.263 - goto parse_error; 17.264 - } 17.265 - return 0; 17.266 -parse_error: 17.267 - printf("Bad control message!\n"); 17.268 - return 0; 17.269 - 17.270 -} 17.271 - 17.272 -int parallax_probe(blkif_request_t *req, blkif_t *blkif) 17.273 -{ 17.274 - blkif_response_t *rsp; 17.275 - vdisk_t *img_info; 17.276 - vdi_t *vdi; 17.277 - int i, nr_vdis = 0; 17.278 - 17.279 - DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); 17.280 - 17.281 - /* We expect one buffer only. */ 17.282 - if ( req->nr_segments != 1 ) 17.283 - goto err; 17.284 - 17.285 - /* Make sure the buffer is page-sized. */ 17.286 - if ( (req->seg[0].first_sect != 0) || (req->seg[0].last_sect != 7) ) 17.287 - goto err; 17.288 - 17.289 - /* fill the list of devices */ 17.290 - for (i=0; i<VDI_HASHSZ; i++) { 17.291 - vdi = blkif->vdi_hash[i]; 17.292 - while (vdi) { 17.293 - img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); 17.294 - img_info[nr_vdis].device = vdi->vdevice; 17.295 - img_info[nr_vdis].info = 0; 17.296 - /* The -1 here accounts for the LSB in the radix tree */ 17.297 - img_info[nr_vdis].capacity = 17.298 - ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE); 17.299 - nr_vdis++; 17.300 - vdi = vdi->next; 17.301 - } 17.302 - } 17.303 - 17.304 - 17.305 - rsp = (blkif_response_t *)req; 17.306 - rsp->id = req->id; 17.307 - rsp->operation = BLKIF_OP_PROBE; 17.308 - rsp->status = nr_vdis; /* number of disks */ 17.309 - 17.310 - DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis); 17.311 - return BLKTAP_RESPOND; 17.312 -err: 17.313 - rsp = (blkif_response_t *)req; 17.314 - rsp->id = req->id; 17.315 - rsp->operation = BLKIF_OP_PROBE; 17.316 - rsp->status = BLKIF_RSP_ERROR; 17.317 - 17.318 - DPRINTF("parallax_probe: send error response\n"); 17.319 - return BLKTAP_RESPOND; 17.320 -} 17.321 - 17.322 -typedef struct { 17.323 - blkif_request_t *req; 17.324 - int count; 17.325 - int error; 17.326 - pthread_mutex_t mutex; 17.327 -} pending_t; 17.328 - 17.329 -#define MAX_REQUESTS 64 17.330 -pending_t pending_list[MAX_REQUESTS]; 17.331 - 17.332 -struct cb_param { 17.333 - pending_t *pent; 17.334 - int segment; 17.335 - uint64_t sector; 17.336 - uint64_t vblock; /* for debug printing -- can be removed. */ 17.337 -}; 17.338 - 17.339 -static void read_cb(struct io_ret r, void *in_param) 17.340 -{ 17.341 - struct cb_param *param = (struct cb_param *)in_param; 17.342 - pending_t *p = param->pent; 17.343 - int segment = param->segment; 17.344 - blkif_request_t *req = p->req; 17.345 - unsigned long size, offset, start; 17.346 - char *dpage, *spage; 17.347 - 17.348 - spage = IO_BLOCK(r); 17.349 - if (spage == NULL) { p->error++; goto finish; } 17.350 - dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment); 17.351 - 17.352 - /* Calculate read size and offset within the read block. */ 17.353 - 17.354 - offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE; 17.355 - size = (req->seg[segment].last_sect - req->seg[segment].first_sect + 1) << 17.356 - SECTOR_SHIFT; 17.357 - start = req->seg[segment].first_sect << SECTOR_SHIFT; 17.358 - 17.359 - DPRINTF("ParallaxRead: sect: %lld (%ld,%ld), " 17.360 - "vblock %llx, " 17.361 - "size %lx\n", 17.362 - param->sector, 17.363 - p->req->seg[segment].first_sect, 17.364 - p->req->seg[segment].last_sect, 17.365 - param->vblock, size); 17.366 - 17.367 - memcpy(dpage + start, spage + offset, size); 17.368 - freeblock(spage); 17.369 - 17.370 - /* Done the read. Now update the pending record. */ 17.371 - finish: 17.372 - pthread_mutex_lock(&p->mutex); 17.373 - p->count--; 17.374 - 17.375 - if (p->count == 0) { 17.376 - blkif_response_t *rsp; 17.377 - 17.378 - rsp = (blkif_response_t *)req; 17.379 - rsp->id = req->id; 17.380 - rsp->operation = BLKIF_OP_READ; 17.381 - if (p->error == 0) { 17.382 - rsp->status = BLKIF_RSP_OKAY; 17.383 - } else { 17.384 - rsp->status = BLKIF_RSP_ERROR; 17.385 - } 17.386 - blktap_inject_response(rsp); 17.387 - } 17.388 - 17.389 - pthread_mutex_unlock(&p->mutex); 17.390 - 17.391 - free(param); /* TODO: replace with cached alloc/dealloc */ 17.392 -} 17.393 - 17.394 -int parallax_read(blkif_request_t *req, blkif_t *blkif) 17.395 -{ 17.396 - blkif_response_t *rsp; 17.397 - uint64_t vblock, gblock; 17.398 - vdi_t *vdi; 17.399 - uint64_t sector; 17.400 - int i; 17.401 - char *dpage, *spage; 17.402 - pending_t *pent; 17.403 - 17.404 - vdi = blkif_get_vdi(blkif, req->device); 17.405 - 17.406 - if ( vdi == NULL ) 17.407 - goto err; 17.408 - 17.409 - pent = &pending_list[ID_TO_IDX(req->id)]; 17.410 - pent->count = req->nr_segments; 17.411 - pent->req = req; 17.412 - pthread_mutex_init(&pent->mutex, NULL); 17.413 - 17.414 - for (i = 0; i < req->nr_segments; i++) { 17.415 - pthread_t tid; 17.416 - int ret; 17.417 - struct cb_param *p; 17.418 - 17.419 - /* Round the requested segment to a block address. */ 17.420 - sector = req->sector_number + (8*i); 17.421 - vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT; 17.422 - 17.423 - /* TODO: Replace this call to malloc with a cached allocation */ 17.424 - p = (struct cb_param *)malloc(sizeof(struct cb_param)); 17.425 - p->pent = pent; 17.426 - p->sector = sector; 17.427 - p->segment = i; 17.428 - p->vblock = vblock; /* dbg */ 17.429 - 17.430 - /* Get that block from the store. */ 17.431 - vdi_read(vdi, vblock, read_cb, (void *)p); 17.432 - } 17.433 - 17.434 - return BLKTAP_STOLEN; 17.435 - 17.436 -err: 17.437 - rsp = (blkif_response_t *)req; 17.438 - rsp->id = req->id; 17.439 - rsp->operation = BLKIF_OP_READ; 17.440 - rsp->status = BLKIF_RSP_ERROR; 17.441 - 17.442 - return BLKTAP_RESPOND; 17.443 -} 17.444 - 17.445 -static void write_cb(struct io_ret r, void *in_param) 17.446 -{ 17.447 - struct cb_param *param = (struct cb_param *)in_param; 17.448 - pending_t *p = param->pent; 17.449 - blkif_request_t *req = p->req; 17.450 - 17.451 - /* catch errors from the block code. */ 17.452 - if (IO_INT(r) < 0) p->error++; 17.453 - 17.454 - pthread_mutex_lock(&p->mutex); 17.455 - p->count--; 17.456 - 17.457 - if (p->count == 0) { 17.458 - blkif_response_t *rsp; 17.459 - 17.460 - rsp = (blkif_response_t *)req; 17.461 - rsp->id = req->id; 17.462 - rsp->operation = BLKIF_OP_WRITE; 17.463 - if (p->error == 0) { 17.464 - rsp->status = BLKIF_RSP_OKAY; 17.465 - } else { 17.466 - rsp->status = BLKIF_RSP_ERROR; 17.467 - } 17.468 - blktap_inject_response(rsp); 17.469 - } 17.470 - 17.471 - pthread_mutex_unlock(&p->mutex); 17.472 - 17.473 - free(param); /* TODO: replace with cached alloc/dealloc */ 17.474 -} 17.475 - 17.476 -int parallax_write(blkif_request_t *req, blkif_t *blkif) 17.477 -{ 17.478 - blkif_response_t *rsp; 17.479 - uint64_t sector; 17.480 - int i, writable = 0; 17.481 - uint64_t vblock, gblock; 17.482 - char *spage; 17.483 - unsigned long size, offset, start; 17.484 - vdi_t *vdi; 17.485 - pending_t *pent; 17.486 - 17.487 - vdi = blkif_get_vdi(blkif, req->device); 17.488 - 17.489 - if ( vdi == NULL ) 17.490 - goto err; 17.491 - 17.492 - pent = &pending_list[ID_TO_IDX(req->id)]; 17.493 - pent->count = req->nr_segments; 17.494 - pent->req = req; 17.495 - pthread_mutex_init(&pent->mutex, NULL); 17.496 - 17.497 - for (i = 0; i < req->nr_segments; i++) { 17.498 - struct cb_param *p; 17.499 - 17.500 - spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 17.501 - 17.502 - /* Round the requested segment to a block address. */ 17.503 - 17.504 - sector = req->sector_number + (8*i); 17.505 - vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT; 17.506 - 17.507 - /* Calculate read size and offset within the read block. */ 17.508 - 17.509 - offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE; 17.510 - size = (req->seg[i].last_sect - req->seg[i].first_sect + 1) << 17.511 - SECTOR_SHIFT; 17.512 - start = req->seg[i].first_sect << SECTOR_SHIFT; 17.513 - 17.514 - DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld), " 17.515 - "vblock %llx, gblock %llx, " 17.516 - "size %lx\n", 17.517 - sector, 17.518 - req->seg[i].first_sect, req->seg[i].last_sect, 17.519 - vblock, gblock, size); 17.520 - 17.521 - /* XXX: For now we just freak out if they try to write a */ 17.522 - /* non block-sized, block-aligned page. */ 17.523 - 17.524 - if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) { 17.525 - printf("]\n] STRANGE WRITE!\n]\n"); 17.526 - goto err; 17.527 - } 17.528 - 17.529 - /* TODO: Replace this call to malloc with a cached allocation */ 17.530 - p = (struct cb_param *)malloc(sizeof(struct cb_param)); 17.531 - p->pent = pent; 17.532 - p->sector = sector; 17.533 - p->segment = i; 17.534 - p->vblock = vblock; /* dbg */ 17.535 - 17.536 - /* Issue the write to the store. */ 17.537 - vdi_write(vdi, vblock, spage, write_cb, (void *)p); 17.538 - } 17.539 - 17.540 - return BLKTAP_STOLEN; 17.541 - 17.542 -err: 17.543 - rsp = (blkif_response_t *)req; 17.544 - rsp->id = req->id; 17.545 - rsp->operation = BLKIF_OP_WRITE; 17.546 - rsp->status = BLKIF_RSP_ERROR; 17.547 - 17.548 - return BLKTAP_RESPOND; 17.549 -} 17.550 - 17.551 -int parallax_request(blkif_request_t *req) 17.552 -{ 17.553 - blkif_response_t *rsp; 17.554 - domid_t dom = ID_TO_DOM(req->id); 17.555 - blkif_t *blkif = blkif_find_by_handle(dom, 0); 17.556 - 17.557 - if (blkif == NULL) 17.558 - goto err; 17.559 - 17.560 - if ( req->operation == BLKIF_OP_PROBE ) { 17.561 - 17.562 - return parallax_probe(req, blkif); 17.563 - 17.564 - } else if ( req->operation == BLKIF_OP_READ ) { 17.565 - 17.566 - return parallax_read(req, blkif); 17.567 - 17.568 - } else if ( req->operation == BLKIF_OP_WRITE ) { 17.569 - 17.570 - return parallax_write(req, blkif); 17.571 - 17.572 - } else { 17.573 - printf("Unknown request message type!\n"); 17.574 - /* Unknown operation */ 17.575 - goto err; 17.576 - } 17.577 - 17.578 -err: 17.579 - rsp = (blkif_response_t *)req; 17.580 - rsp->operation = req->operation; 17.581 - rsp->id = req->id; 17.582 - rsp->status = BLKIF_RSP_ERROR; 17.583 - return BLKTAP_RESPOND; 17.584 -} 17.585 - 17.586 -void __init_parallax(void) 17.587 -{ 17.588 - memset(blkif_hash, 0, sizeof(blkif_hash)); 17.589 -} 17.590 - 17.591 - 17.592 - 17.593 -int main(int argc, char *argv[]) 17.594 -{ 17.595 - DPRINTF("parallax: starting.\n"); 17.596 - __init_blockstore(); 17.597 - DPRINTF("parallax: initialized blockstore...\n"); 17.598 - init_block_async(); 17.599 - DPRINTF("parallax: initialized async blocks...\n"); 17.600 - __init_vdi(); 17.601 - DPRINTF("parallax: initialized vdi registry etc...\n"); 17.602 - __init_parallax(); 17.603 - DPRINTF("parallax: initialized local stuff..\n"); 17.604 - 17.605 - blktap_register_ctrl_hook("parallax_control", parallax_control); 17.606 - blktap_register_request_hook("parallax_request", parallax_request); 17.607 - DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); 17.608 - blktap_listen(); 17.609 - 17.610 - return 0; 17.611 -}
18.1 --- a/tools/blktap/parallax/radix.c Fri Jun 16 18:19:40 2006 +0100 18.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 18.3 @@ -1,631 +0,0 @@ 18.4 -/* 18.5 - * Radix tree for mapping (up to) 63-bit virtual block IDs to 18.6 - * 63-bit global block IDs 18.7 - * 18.8 - * Pointers within the tree set aside the least significant bit to indicate 18.9 - * whther or not the target block is writable from this node. 18.10 - * 18.11 - * The block with ID 0 is assumed to be an empty block of all zeros 18.12 - */ 18.13 - 18.14 -#include <unistd.h> 18.15 -#include <stdio.h> 18.16 -#include <stdlib.h> 18.17 -#include <assert.h> 18.18 -#include <string.h> 18.19 -#include <pthread.h> 18.20 -#include "blockstore.h" 18.21 -#include "radix.h" 18.22 - 18.23 -#define RADIX_TREE_MAP_SHIFT 9 18.24 -#define RADIX_TREE_MAP_MASK 0x1ff 18.25 -#define RADIX_TREE_MAP_ENTRIES 512 18.26 - 18.27 -/* 18.28 -#define DEBUG 18.29 -*/ 18.30 - 18.31 -/* Experimental radix cache. */ 18.32 - 18.33 -static pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER; 18.34 -static int rcache_count = 0; 18.35 -#define RCACHE_MAX 1024 18.36 - 18.37 -typedef struct rcache_st { 18.38 - radix_tree_node *node; 18.39 - uint64_t id; 18.40 - struct rcache_st *hash_next; 18.41 - struct rcache_st *cache_next; 18.42 - struct rcache_st *cache_prev; 18.43 -} rcache_t; 18.44 - 18.45 -static rcache_t *rcache_head = NULL; 18.46 -static rcache_t *rcache_tail = NULL; 18.47 - 18.48 -#define RCHASH_SIZE 512ULL 18.49 -rcache_t *rcache[RCHASH_SIZE]; 18.50 -#define RCACHE_HASH(_id) ((_id) & (RCHASH_SIZE - 1)) 18.51 - 18.52 -void __rcache_init(void) 18.53 -{ 18.54 - int i; 18.55 - 18.56 - for (i=0; i<RCHASH_SIZE; i++) 18.57 - rcache[i] = NULL; 18.58 -} 18.59 - 18.60 - 18.61 -void rcache_write(uint64_t id, radix_tree_node *node) 18.62 -{ 18.63 - rcache_t *r, *tmp, **curs; 18.64 - 18.65 - pthread_mutex_lock(&rcache_mutex); 18.66 - 18.67 - /* Is it already in the cache? */ 18.68 - r = rcache[RCACHE_HASH(id)]; 18.69 - 18.70 - for (;;) { 18.71 - if (r == NULL) 18.72 - break; 18.73 - if (r->id == id) 18.74 - { 18.75 - memcpy(r->node, node, BLOCK_SIZE); 18.76 - 18.77 - /* bring to front. */ 18.78 - if (r != rcache_head) { 18.79 - 18.80 - if (r == rcache_tail) { 18.81 - if (r->cache_prev != NULL) rcache_tail = r->cache_prev; 18.82 - rcache_tail->cache_next = NULL; 18.83 - } 18.84 - 18.85 - tmp = r->cache_next; 18.86 - if (r->cache_next != NULL) r->cache_next->cache_prev 18.87 - = r->cache_prev; 18.88 - if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp; 18.89 - 18.90 - r->cache_prev = NULL; 18.91 - r->cache_next = rcache_head; 18.92 - if (rcache_head != NULL) rcache_head->cache_prev = r; 18.93 - rcache_head = r; 18.94 - } 18.95 - 18.96 -//printf("Update (%Ld)\n", r->id); 18.97 - goto done; 18.98 - } 18.99 - r = r->hash_next; 18.100 - } 18.101 - 18.102 - if ( rcache_count == RCACHE_MAX ) 18.103 - { 18.104 - /* Remove an entry */ 18.105 - 18.106 - r = rcache_tail; 18.107 - if (r->cache_prev != NULL) rcache_tail = r->cache_prev; 18.108 - rcache_tail->cache_next = NULL; 18.109 - freeblock(r->node); 18.110 - 18.111 - curs = &rcache[RCACHE_HASH(r->id)]; 18.112 - while ((*curs) != r) 18.113 - curs = &(*curs)->hash_next; 18.114 - *curs = r->hash_next; 18.115 -//printf("Evict (%Ld)\n", r->id); 18.116 - 18.117 - } else { 18.118 - 18.119 - r = (rcache_t *)malloc(sizeof(rcache_t)); 18.120 - rcache_count++; 18.121 - } 18.122 - 18.123 - r->node = newblock(); 18.124 - memcpy(r->node, node, BLOCK_SIZE); 18.125 - r->id = id; 18.126 - 18.127 - r->hash_next = rcache[RCACHE_HASH(id)]; 18.128 - rcache[RCACHE_HASH(id)] = r; 18.129 - 18.130 - r->cache_prev = NULL; 18.131 - r->cache_next = rcache_head; 18.132 - if (rcache_head != NULL) rcache_head->cache_prev = r; 18.133 - rcache_head = r; 18.134 - if (rcache_tail == NULL) rcache_tail = r; 18.135 - 18.136 -//printf("Added (%Ld, %p)\n", id, r->node); 18.137 -done: 18.138 - pthread_mutex_unlock(&rcache_mutex); 18.139 -} 18.140 - 18.141 -radix_tree_node *rcache_read(uint64_t id) 18.142 -{ 18.143 - rcache_t *r, *tmp; 18.144 - radix_tree_node *node = NULL; 18.145 - 18.146 - pthread_mutex_lock(&rcache_mutex); 18.147 - 18.148 - r = rcache[RCACHE_HASH(id)]; 18.149 - 18.150 - for (;;) { 18.151 - if (r == NULL) { 18.152 -//printf("Miss (%Ld)\n", id); 18.153 - goto done; 18.154 - } 18.155 - if (r->id == id) break; 18.156 - r = r->hash_next; 18.157 - } 18.158 - 18.159 - /* bring to front. */ 18.160 - if (r != rcache_head) 18.161 - { 18.162 - if (r == rcache_tail) { 18.163 - if (r->cache_prev != NULL) rcache_tail = r->cache_prev; 18.164 - rcache_tail->cache_next = NULL; 18.165 - } 18.166 - tmp = r->cache_next; 18.167 - if (r->cache_next != NULL) r->cache_next->cache_prev = r->cache_prev; 18.168 - if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp; 18.169 - 18.170 - r->cache_prev = NULL; 18.171 - r->cache_next = rcache_head; 18.172 - if (rcache_head != NULL) rcache_head->cache_prev = r; 18.173 - rcache_head = r; 18.174 - } 18.175 - 18.176 - node = newblock(); 18.177 - memcpy(node, r->node, BLOCK_SIZE); 18.178 - 18.179 -//printf("Hit (%Ld, %p)\n", id, r->node); 18.180 -done: 18.181 - pthread_mutex_unlock(&rcache_mutex); 18.182 - 18.183 - return(node); 18.184 -} 18.185 - 18.186 - 18.187 -void *rc_readblock(uint64_t id) 18.188 -{ 18.189 - void *ret; 18.190 - 18.191 - ret = (void *)rcache_read(id); 18.192 - 18.193 - if (ret != NULL) return ret; 18.194 - 18.195 - ret = readblock(id); 18.196 - 18.197 - if (ret != NULL) 18.198 - rcache_write(id, ret); 18.199 - 18.200 - return(ret); 18.201 -} 18.202 - 18.203 -uint64_t rc_allocblock(void *block) 18.204 -{ 18.205 - uint64_t ret; 18.206 - 18.207 - ret = allocblock(block); 18.208 - 18.209 - if (ret != ZERO) 18.210 - rcache_write(ret, block); 18.211 - 18.212 - return(ret); 18.213 -} 18.214 - 18.215 -int rc_writeblock(uint64_t id, void *block) 18.216 -{ 18.217 - int ret; 18.218 - 18.219 - ret = writeblock(id, block); 18.220 - rcache_write(id, block); 18.221 - 18.222 - return(ret); 18.223 -} 18.224 - 18.225 - 18.226 -/* 18.227 - * block device interface and other helper functions 18.228 - * with these functions, block id is just a 63-bit number, with 18.229 - * no special consideration for the LSB 18.230 - */ 18.231 -radix_tree_node cloneblock(radix_tree_node block); 18.232 - 18.233 -/* 18.234 - * main api 18.235 - * with these functions, the LSB of root always indicates 18.236 - * whether or not the block is writable, including the return 18.237 - * values of update and snapshot 18.238 - */ 18.239 -uint64_t lookup(int height, uint64_t root, uint64_t key); 18.240 -uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val); 18.241 -uint64_t snapshot(uint64_t root); 18.242 - 18.243 -/** 18.244 - * cloneblock: clone an existing block in memory 18.245 - * @block: the old block 18.246 - * 18.247 - * @return: new block, with LSB cleared for every entry 18.248 - */ 18.249 -radix_tree_node cloneblock(radix_tree_node block) { 18.250 - radix_tree_node node = (radix_tree_node) malloc(BLOCK_SIZE); 18.251 - int i; 18.252 - if (node == NULL) { 18.253 - perror("cloneblock malloc"); 18.254 - return NULL; 18.255 - } 18.256 - for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) 18.257 - node[i] = block[i] & ONEMASK; 18.258 - return node; 18.259 -} 18.260 - 18.261 -/** 18.262 - * lookup: find a value given a key 18.263 - * @height: height in bits of the radix tree 18.264 - * @root: root node id, with set LSB indicating writable node 18.265 - * @key: key to lookup 18.266 - * 18.267 - * @return: value on success, zero on error 18.268 - */ 18.269 - 18.270 -uint64_t lookup(int height, uint64_t root, uint64_t key) { 18.271 - radix_tree_node node; 18.272 - uint64_t mask = ONE; 18.273 - 18.274 - assert(key >> height == 0); 18.275 - 18.276 - /* the root block may be smaller to ensure all leaves are full */ 18.277 - height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT; 18.278 - 18.279 - /* now carve off equal sized chunks at each step */ 18.280 - for (;;) { 18.281 - uint64_t oldroot; 18.282 - 18.283 -#ifdef DEBUG 18.284 - printf("lookup: height=%3d root=%3Ld offset=%3d%s\n", height, root, 18.285 - (int) ((key >> height) & RADIX_TREE_MAP_MASK), 18.286 - (iswritable(root) ? "" : " (readonly)")); 18.287 -#endif 18.288 - 18.289 - if (getid(root) == ZERO) 18.290 - return ZERO; 18.291 - 18.292 - oldroot = root; 18.293 - node = (radix_tree_node) rc_readblock(getid(root)); 18.294 - if (node == NULL) 18.295 - return ZERO; 18.296 - 18.297 - root = node[(key >> height) & RADIX_TREE_MAP_MASK]; 18.298 - mask &= root; 18.299 - freeblock(node); 18.300 - 18.301 - if (height == 0) 18.302 - return ( root & ONEMASK ) | mask; 18.303 - 18.304 - height -= RADIX_TREE_MAP_SHIFT; 18.305 - } 18.306 - 18.307 - return ZERO; 18.308 -} 18.309 - 18.310 -/* 18.311 - * update: set a radix tree entry, doing copy-on-write as necessary 18.312 - * @height: height in bits of the radix tree 18.313 - * @root: root node id, with set LSB indicating writable node 18.314 - * @key: key to set 18.315 - * @val: value to set, s.t. radix(key)=val 18.316 - * 18.317 - * @returns: (possibly new) root id on success (with LSB=1), 0 on failure 18.318 - */ 18.319 - 18.320 -uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val) { 18.321 - int offset; 18.322 - uint64_t child; 18.323 - radix_tree_node node; 18.324 - 18.325 - /* base case--return val */ 18.326 - if (height == 0) 18.327 - return val; 18.328 - 18.329 - /* the root block may be smaller to ensure all leaves are full */ 18.330 - height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT; 18.331 - offset = (key >> height) & RADIX_TREE_MAP_MASK; 18.332 - 18.333 -#ifdef DEBUG 18.334 - printf("update: height=%3d root=%3Ld offset=%3d%s\n", height, root, 18.335 - offset, (iswritable(root)?"":" (clone)")); 18.336 -#endif 18.337 - 18.338 - /* load a block, or create a new one */ 18.339 - if (root == ZERO) { 18.340 - node = (radix_tree_node) newblock(); 18.341 - } else { 18.342 - node = (radix_tree_node) rc_readblock(getid(root)); 18.343 - 18.344 - if (!iswritable(root)) { 18.345 - /* need to clone this node */ 18.346 - radix_tree_node oldnode = node; 18.347 - node = cloneblock(node); 18.348 - freeblock(oldnode); 18.349 - root = ZERO; 18.350 - } 18.351 - } 18.352 - 18.353 - if (node == NULL) { 18.354 -#ifdef DEBUG 18.355 - printf("update: node is null!\n"); 18.356 -#endif 18.357 - return ZERO; 18.358 - } 18.359 - 18.360 - child = update(height, node[offset], key, val); 18.361 - 18.362 - if (child == ZERO) { 18.363 - freeblock(node); 18.364 - return ZERO; 18.365 - } else if (child == node[offset]) { 18.366 - /* no change, so we already owned the child */ 18.367 - assert(iswritable(root)); 18.368 - 18.369 - freeblock(node); 18.370 - return root; 18.371 - } 18.372 - 18.373 - node[offset] = child; 18.374 - 18.375 - /* new/cloned blocks need to be saved */ 18.376 - if (root == ZERO) { 18.377 - /* mark this as an owned block */ 18.378 - root = rc_allocblock(node); 18.379 - if (root) 18.380 - root = writable(root); 18.381 - } else if (rc_writeblock(getid(root), node) < 0) { 18.382 - freeblock(node); 18.383 - return ZERO; 18.384 - } 18.385 - 18.386 - freeblock(node); 18.387 - return root; 18.388 -} 18.389 - 18.390 -/** 18.391 - * snapshot: create a snapshot 18.392 - * @root: old root node 18.393 - * 18.394 - * @return: new root node, 0 on error 18.395 - */ 18.396 -uint64_t snapshot(uint64_t root) { 18.397 - radix_tree_node node, newnode; 18.398 - 18.399 - if ((node = rc_readblock(getid(root))) == NULL) 18.400 - return ZERO; 18.401 - 18.402 - newnode = cloneblock(node); 18.403 - freeblock(node); 18.404 - if (newnode == NULL) 18.405 - return ZERO; 18.406 - 18.407 - root = rc_allocblock(newnode); 18.408 - freeblock(newnode); 18.409 - 18.410 - if (root == ZERO) 18.411 - return ZERO; 18.412 - else 18.413 - return writable(root); 18.414 -} 18.415 - 18.416 -/** 18.417 - * collapse: collapse a parent onto a child. 18.418 - * 18.419 - * NOTE: This assumes that parent and child really are, and further that 18.420 - * there are no other children forked from this parent. (children of the 18.421 - * child are okay...) 18.422 - */ 18.423 - 18.424 -int collapse(int height, uint64_t proot, uint64_t croot) 18.425 -{ 18.426 - int i, numlinks, ret, total = 0; 18.427 - radix_tree_node pnode, cnode; 18.428 - 18.429 - if (height == 0) { 18.430 - height = -1; /* terminate recursion */ 18.431 - } else { 18.432 - height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT; 18.433 - } 18.434 - numlinks = (1UL << RADIX_TREE_MAP_SHIFT); 18.435 - 18.436 - /* Terminal cases: */ 18.437 - 18.438 - if ( (getid(proot) == ZERO) || (getid(croot) == ZERO) ) 18.439 - return -1; 18.440 - 18.441 - /* get roots */ 18.442 - if ((pnode = readblock(getid(proot))) == NULL) 18.443 - return -1; 18.444 - 18.445 - if ((cnode = readblock(getid(croot))) == NULL) 18.446 - { 18.447 - freeblock(pnode); 18.448 - return -1; 18.449 - } 18.450 - 18.451 - /* For each writable link in proot */ 18.452 - for (i=0; i<numlinks; i++) 18.453 - { 18.454 - if ( pnode[i] == cnode[i] ) continue; 18.455 - 18.456 - /* collapse (next level) */ 18.457 - /* if height != 0 and writable... */ 18.458 - if (( height >= 0 ) && ( iswritable(pnode[i]) ) ) 18.459 - { 18.460 - //printf(" %Ld is writable (i=%d).\n", getid(pnode[i]), i); 18.461 - ret = collapse(height, pnode[i], cnode[i]); 18.462 - if (ret == -1) 18.463 - { 18.464 - total = -1; 18.465 - } else { 18.466 - total += ret; 18.467 - } 18.468 - } 18.469 - 18.470 - 18.471 - } 18.472 - 18.473 - /* if plink is writable, AND clink is writable -> free plink block */ 18.474 - if ( ( iswritable(proot) ) && ( iswritable(croot) ) ) 18.475 - { 18.476 - releaseblock(getid(proot)); 18.477 - if (ret >=0) total++; 18.478 - //printf(" Delete %Ld\n", getid(proot)); 18.479 - } 18.480 -//printf("done : %Ld\n", getid(proot)); 18.481 - return total; 18.482 - 18.483 -} 18.484 - 18.485 - 18.486 -void print_root(uint64_t root, int height, FILE *dot_f) 18.487 -{ 18.488 - FILE *f; 18.489 - int i; 18.490 - radix_tree_node node; 18.491 - char *style[2] = { "", "style=bold,color=blue," }; 18.492 - 18.493 - if (dot_f == NULL) { 18.494 - f = fopen("radix.dot", "w"); 18.495 - if (f == NULL) { 18.496 - perror("print_root: open"); 18.497 - return; 18.498 - } 18.499 - 18.500 - /* write graph preamble */ 18.501 - fprintf(f, "digraph G {\n"); 18.502 - 18.503 - /* add a node for this root. */ 18.504 - fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n", 18.505 - getid(root), style[iswritable(root)], getid(root)); 18.506 - } 18.507 - 18.508 - printf("print_root(%Ld)\n", getid(root)); 18.509 - 18.510 - /* base case */ 18.511 - if (height == 0) { 18.512 - /* add a node and edge for each child root */ 18.513 - node = (radix_tree_node) readblock(getid(root)); 18.514 - if (node == NULL) 18.515 - return; 18.516 - 18.517 - for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) { 18.518 - if (node[i] != ZERO) { 18.519 - fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n", 18.520 - getid(node[i]), style[iswritable(node[i])], 18.521 - getid(node[i])); 18.522 - fprintf(f, " n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 18.523 - getid(node[i]), i); 18.524 - } 18.525 - } 18.526 - freeblock(node); 18.527 - return; 18.528 - } 18.529 - 18.530 - /* the root block may be smaller to ensure all leaves are full */ 18.531 - height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT; 18.532 - 18.533 - if (getid(root) == ZERO) 18.534 - return; 18.535 - 18.536 - node = (radix_tree_node) readblock(getid(root)); 18.537 - if (node == NULL) 18.538 - return; 18.539 - 18.540 - /* add a node and edge for each child root */ 18.541 - for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) 18.542 - if (node[i] != ZERO) { 18.543 - fprintf(f, " n%Ld [%sshape=box,label=\"%Ld\"];\n", 18.544 - getid(node[i]), style[iswritable(node[i])], 18.545 - getid(node[i])); 18.546 - 18.547 - print_root(node[i], height-RADIX_TREE_MAP_SHIFT, f); 18.548 - fprintf(f, " n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 18.549 - getid(node[i]), i); 18.550 - } 18.551 - 18.552 - freeblock(node); 18.553 - 18.554 - /* write graph postamble */ 18.555 - if (dot_f == NULL) { 18.556 - fprintf(f, "}\n"); 18.557 - fclose(f); 18.558 - } 18.559 -} 18.560 - 18.561 -#ifdef RADIX_STANDALONE 18.562 - 18.563 -int main(int argc, char **argv) { 18.564 - uint64_t key = ZERO, val = ZERO; 18.565 - uint64_t root = writable(2ULL); 18.566 - uint64_t p = ZERO, c = ZERO; 18.567 - int v; 18.568 - char buff[4096]; 18.569 - 18.570 - __init_blockstore(); 18.571 - 18.572 - memset(buff, 0, 4096); 18.573 - /*fp = open("radix.dat", O_RDWR | O_CREAT, 0644); 18.574 - 18.575 - if (fp < 3) { 18.576 - perror("open"); 18.577 - return -1; 18.578 - } 18.579 - if (lseek(fp, 0, SEEK_END) == 0) { 18.580 - write(fp, buff, 4096); 18.581 - }*/ 18.582 - 18.583 - allocblock(buff); 18.584 - 18.585 - printf("Recognized commands:\n" 18.586 - "Note: the LSB of a node number indicates if it is writable\n" 18.587 - " root <node> set root to <node>\n" 18.588 - " snapshot take a snapshot of the root\n" 18.589 - " set <key> <val> set key=val\n" 18.590 - " get <key> query key\n" 18.591 - " c <proot> <croot> collapse\n" 18.592 - " pr print tree to dot\n" 18.593 - " pf <1=verbose> print freelist\n" 18.594 - " quit\n" 18.595 - "\nroot = %Ld\n", root); 18.596 - for (;;) { 18.597 - //print_root(root, 34, NULL); 18.598 - //system("dot radix.dot -Tps -o radix.ps"); 18.599 - 18.600 - printf("> "); 18.601 - fflush(stdout); 18.602 - fgets(buff, 1024, stdin); 18.603 - if (feof(stdin)) 18.604 - break; 18.605 - if (sscanf(buff, " root %Ld", &root) == 1) { 18.606 - printf("root set to %Ld\n", root); 18.607 - } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) { 18.608 - root = update(34, root, key, val); 18.609 - printf("root = %Ld\n", root); 18.610 - } else if (sscanf(buff, " c %Ld %Ld", &p, &c) == 2) { 18.611 - v = collapse(34, p, c); 18.612 - printf("reclaimed %d blocks.\n", v); 18.613 - } else if (sscanf(buff, " get %Ld", &key) == 1) { 18.614 - val = lookup(34, root, key); 18.615 - printf("value = %Ld\n", val); 18.616 - } else if (!strcmp(buff, "quit\n")) { 18.617 - break; 18.618 - } else if (!strcmp(buff, "snapshot\n")) { 18.619 - root = snapshot(root); 18.620 - printf("new root = %Ld\n", root); 18.621 - } else if (sscanf(buff, " pr %Ld", &root) == 1) { 18.622 - print_root(root, 34, NULL); 18.623 - } else if (sscanf(buff, " pf %d", &v) == 1) { 18.624 - freelist_count(v); 18.625 - } else if (!strcmp(buff, "pf\n")) { 18.626 - freelist_count(0); 18.627 - } else { 18.628 - printf("command not recognized\n"); 18.629 - } 18.630 - } 18.631 - return 0; 18.632 -} 18.633 - 18.634 -#endif
19.1 --- a/tools/blktap/parallax/radix.h Fri Jun 16 18:19:40 2006 +0100 19.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 19.3 @@ -1,45 +0,0 @@ 19.4 -/* 19.5 - * Radix tree for mapping (up to) 63-bit virtual block IDs to 19.6 - * 63-bit global block IDs 19.7 - * 19.8 - * Pointers within the tree set aside the least significant bit to indicate 19.9 - * whther or not the target block is writable from this node. 19.10 - * 19.11 - * The block with ID 0 is assumed to be an empty block of all zeros 19.12 - */ 19.13 - 19.14 -#ifndef __RADIX_H__ 19.15 -#define __RADIX_H__ 19.16 - 19.17 -/* I don't really like exposing these, but... */ 19.18 -#define getid(x) (((x)>>1)&0x7fffffffffffffffLL) 19.19 -#define putid(x) ((x)<<1) 19.20 -#define writable(x) (((x)<<1)|1LL) 19.21 -#define iswritable(x) ((x)&1LL) 19.22 -#define ZERO 0LL 19.23 -#define ONE 1LL 19.24 -#define ONEMASK 0xffffffffffffffeLL 19.25 - 19.26 -#define RADIX_TREE_MAP_SHIFT 9 19.27 -#define RADIX_TREE_MAP_MASK 0x1ff 19.28 -#define RADIX_TREE_MAP_ENTRIES 512 19.29 - 19.30 -typedef uint64_t *radix_tree_node; 19.31 - 19.32 - 19.33 -/* 19.34 - * main api 19.35 - * with these functions, the LSB of root always indicates 19.36 - * whether or not the block is writable, including the return 19.37 - * values of update and snapshot 19.38 - */ 19.39 -uint64_t lookup(int height, uint64_t root, uint64_t key); 19.40 -uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val); 19.41 -uint64_t snapshot(uint64_t root); 19.42 -int collapse(int height, uint64_t proot, uint64_t croot); 19.43 -int isprivate(int height, uint64_t root, uint64_t key); 19.44 - 19.45 - 19.46 -void __rcache_init(void); 19.47 - 19.48 -#endif /* __RADIX_H__ */
20.1 --- a/tools/blktap/parallax/requests-async.c Fri Jun 16 18:19:40 2006 +0100 20.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 20.3 @@ -1,762 +0,0 @@ 20.4 -/* requests-async.c 20.5 - * 20.6 - * asynchronous request dispatcher for radix access in parallax. 20.7 - */ 20.8 - 20.9 -#include <stdio.h> 20.10 -#include <stdlib.h> 20.11 -#include <string.h> 20.12 -#include <ctype.h> 20.13 -#include <assert.h> 20.14 -#include <pthread.h> 20.15 -#include <err.h> 20.16 -#include <zlib.h> /* for crc32() */ 20.17 -#include "requests-async.h" 20.18 -#include "vdi.h" 20.19 -#include "radix.h" 20.20 - 20.21 -#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18) 20.22 -#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9) 20.23 -#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL)) 20.24 - 20.25 - 20.26 -#if 0 20.27 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 20.28 -#else 20.29 -#define DPRINTF(_f, _a...) ((void)0) 20.30 -#endif 20.31 - 20.32 -struct block_info { 20.33 - uint32_t crc; 20.34 - uint32_t unused; 20.35 -}; 20.36 - 20.37 -struct io_req { 20.38 - enum { IO_OP_READ, IO_OP_WRITE } op; 20.39 - uint64_t root; 20.40 - uint64_t vaddr; 20.41 - int state; 20.42 - io_cb_t cb; 20.43 - void *param; 20.44 - struct radix_lock *lock; 20.45 - 20.46 - /* internal stuff: */ 20.47 - struct io_ret retval;/* holds the return while we unlock. */ 20.48 - char *block; /* the block to write */ 20.49 - radix_tree_node radix[3]; 20.50 - uint64_t radix_addr[3]; 20.51 - struct block_info bi; 20.52 -}; 20.53 - 20.54 -void clear_w_bits(radix_tree_node node) 20.55 -{ 20.56 - int i; 20.57 - for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++) 20.58 - node[i] = node[i] & ONEMASK; 20.59 - return; 20.60 -} 20.61 - 20.62 -void clear_L3_w_bits(radix_tree_node node) 20.63 -{ 20.64 - int i; 20.65 - for (i=0; i<RADIX_TREE_MAP_ENTRIES; i+=2) 20.66 - node[i] = node[i] & ONEMASK; 20.67 - return; 20.68 -} 20.69 - 20.70 -enum states { 20.71 - /* both */ 20.72 - READ_L1, 20.73 - READ_L2, 20.74 - READ_L3, 20.75 - 20.76 - /* read */ 20.77 - READ_LOCKED, 20.78 - READ_DATA, 20.79 - READ_UNLOCKED, 20.80 - RETURN_ZERO, 20.81 - 20.82 - /* write */ 20.83 - WRITE_LOCKED, 20.84 - WRITE_DATA, 20.85 - WRITE_L3, 20.86 - WRITE_UNLOCKED, 20.87 - 20.88 - /* L3 Zero Path */ 20.89 - ALLOC_DATA_L3z, 20.90 - WRITE_L3_L3z, 20.91 - 20.92 - /* L3 Fault Path */ 20.93 - ALLOC_DATA_L3f, 20.94 - WRITE_L3_L3f, 20.95 - 20.96 - /* L2 Zero Path */ 20.97 - ALLOC_DATA_L2z, 20.98 - WRITE_L2_L2z, 20.99 - ALLOC_L3_L2z, 20.100 - WRITE_L2_L3z, 20.101 - 20.102 - /* L2 Fault Path */ 20.103 - READ_L3_L2f, 20.104 - ALLOC_DATA_L2f, 20.105 - WRITE_L2_L2f, 20.106 - ALLOC_L3_L2f, 20.107 - WRITE_L2_L3f, 20.108 - 20.109 - /* L1 Zero Path */ 20.110 - ALLOC_DATA_L1z, 20.111 - ALLOC_L3_L1z, 20.112 - ALLOC_L2_L1z, 20.113 - WRITE_L1_L1z, 20.114 - 20.115 - /* L1 Fault Path */ 20.116 - READ_L2_L1f, 20.117 - READ_L3_L1f, 20.118 - ALLOC_DATA_L1f, 20.119 - ALLOC_L3_L1f, 20.120 - ALLOC_L2_L1f, 20.121 - WRITE_L1_L1f, 20.122 - 20.123 -}; 20.124 - 20.125 -enum radix_offsets { 20.126 - L1 = 0, 20.127 - L2 = 1, 20.128 - L3 = 2 20.129 -}; 20.130 - 20.131 - 20.132 -static void read_cb(struct io_ret ret, void *param); 20.133 -static void write_cb(struct io_ret ret, void *param); 20.134 - 20.135 -int vdi_read(vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param) 20.136 -{ 20.137 - struct io_req *req; 20.138 - 20.139 - if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR; 20.140 - /* Every second line in the bottom-level radix tree is used to */ 20.141 - /* store crc32 values etc. We shift the vadder here to achied this. */ 20.142 - vaddr <<= 1; 20.143 - 20.144 - req = (struct io_req *)malloc(sizeof (struct io_req)); 20.145 - if (req == NULL) return ERR_NOMEM; 20.146 - 20.147 - req->radix[0] = req->radix[1] = req->radix[2] = NULL; 20.148 - req->op = IO_OP_READ; 20.149 - req->root = vdi->radix_root; 20.150 - req->lock = vdi->radix_lock; 20.151 - req->vaddr = vaddr; 20.152 - req->cb = cb; 20.153 - req->param = param; 20.154 - req->state = READ_LOCKED; 20.155 - 20.156 - block_rlock(req->lock, L1_IDX(vaddr), read_cb, req); 20.157 - 20.158 - return 0; 20.159 -} 20.160 - 20.161 - 20.162 -int vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, 20.163 - io_cb_t cb, void *param) 20.164 -{ 20.165 - struct io_req *req; 20.166 - 20.167 - if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR; 20.168 - /* Every second line in the bottom-level radix tree is used to */ 20.169 - /* store crc32 values etc. We shift the vadder here to achied this. */ 20.170 - vaddr <<= 1; 20.171 - 20.172 - req = (struct io_req *)malloc(sizeof (struct io_req)); 20.173 - if (req == NULL) return ERR_NOMEM; 20.174 - 20.175 - req->radix[0] = req->radix[1] = req->radix[2] = NULL; 20.176 - req->op = IO_OP_WRITE; 20.177 - req->root = vdi->radix_root; 20.178 - req->lock = vdi->radix_lock; 20.179 - req->vaddr = vaddr; 20.180 - req->block = block; 20.181 - /* Todo: add a pseodoheader to the block to include some location */ 20.182 - /* information in the CRC as well. */ 20.183 - req->bi.crc = (uint32_t) crc32(0L, Z_NULL, 0); 20.184 - req->bi.crc = (uint32_t) crc32(req->bi.crc, block, BLOCK_SIZE); 20.185 - req->bi.unused = 0xdeadbeef; 20.186 - 20.187 - req->cb = cb; 20.188 - req->param = param; 20.189 - req->radix_addr[L1] = getid(req->root); /* for consistency */ 20.190 - req->state = WRITE_LOCKED; 20.191 - 20.192 - block_wlock(req->lock, L1_IDX(vaddr), write_cb, req); 20.193 - 20.194 - 20.195 - return 0; 20.196 -} 20.197 - 20.198 -static void read_cb(struct io_ret ret, void *param) 20.199 -{ 20.200 - struct io_req *req = (struct io_req *)param; 20.201 - radix_tree_node node; 20.202 - uint64_t idx; 20.203 - char *block; 20.204 - void *req_param; 20.205 - 20.206 - DPRINTF("read_cb\n"); 20.207 - /* get record */ 20.208 - switch(req->state) { 20.209 - 20.210 - case READ_LOCKED: 20.211 - 20.212 - DPRINTF("READ_LOCKED\n"); 20.213 - req->state = READ_L1; 20.214 - block_read(getid(req->root), read_cb, req); 20.215 - break; 20.216 - 20.217 - case READ_L1: /* block is the radix root */ 20.218 - 20.219 - DPRINTF("READ_L1\n"); 20.220 - block = IO_BLOCK(ret); 20.221 - if (block == NULL) goto fail; 20.222 - node = (radix_tree_node) block; 20.223 - idx = getid( node[L1_IDX(req->vaddr)] ); 20.224 - free(block); 20.225 - if ( idx == ZERO ) { 20.226 - req->state = RETURN_ZERO; 20.227 - block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req); 20.228 - } else { 20.229 - req->state = READ_L2; 20.230 - block_read(idx, read_cb, req); 20.231 - } 20.232 - break; 20.233 - 20.234 - case READ_L2: 20.235 - 20.236 - DPRINTF("READ_L2\n"); 20.237 - block = IO_BLOCK(ret); 20.238 - if (block == NULL) goto fail; 20.239 - node = (radix_tree_node) block; 20.240 - idx = getid( node[L2_IDX(req->vaddr)] ); 20.241 - free(block); 20.242 - if ( idx == ZERO ) { 20.243 - req->state = RETURN_ZERO; 20.244 - block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req); 20.245 - } else { 20.246 - req->state = READ_L3; 20.247 - block_read(idx, read_cb, req); 20.248 - } 20.249 - break; 20.250 - 20.251 - case READ_L3: 20.252 - { 20.253 - struct block_info *bi; 20.254 - 20.255 - DPRINTF("READ_L3\n"); 20.256 - block = IO_BLOCK(ret); 20.257 - if (block == NULL) goto fail; 20.258 - node = (radix_tree_node) block; 20.259 - idx = getid( node[L3_IDX(req->vaddr)] ); 20.260 - bi = (struct block_info *) &node[L3_IDX(req->vaddr) + 1]; 20.261 - req->bi = *bi; 20.262 - free(block); 20.263 - if ( idx == ZERO ) { 20.264 - req->state = RETURN_ZERO; 20.265 - block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req); 20.266 - } else { 20.267 - req->state = READ_DATA; 20.268 - block_read(idx, read_cb, req); 20.269 - } 20.270 - break; 20.271 - } 20.272 - case READ_DATA: 20.273 - { 20.274 - uint32_t crc; 20.275 - 20.276 - DPRINTF("READ_DATA\n"); 20.277 - block = IO_BLOCK(ret); 20.278 - if (block == NULL) goto fail; 20.279 - 20.280 - /* crc check */ 20.281 - crc = (uint32_t) crc32(0L, Z_NULL, 0); 20.282 - crc = (uint32_t) crc32(crc, block, BLOCK_SIZE); 20.283 - if (crc != req->bi.crc) { 20.284 - /* TODO: add a retry loop here. */ 20.285 - /* Do this after the cache is added -- make sure to */ 20.286 - /* invalidate the bad page before reissuing the read. */ 20.287 - 20.288 - warn("Bad CRC on vaddr (%Lu:%d)\n", req->vaddr, req->bi.unused); 20.289 -#ifdef PRINT_BADCRC_PAGES 20.290 - { 20.291 - int j; 20.292 - for (j=0; j<BLOCK_SIZE; j++) { 20.293 - if isprint(block[j]) { 20.294 - printf("%c", block[j]); 20.295 - } else { 20.296 - printf("."); 20.297 - } 20.298 - if ((j % 64) == 0) printf("\n"); 20.299 - } 20.300 - } 20.301 -#endif /* PRINT_BADCRC_PAGES */ 20.302 - 20.303 - /* fast and loose for the moment. */ 20.304 - /* goto fail; */ 20.305 - } 20.306 - 20.307 - req->retval = ret; 20.308 - req->state = READ_UNLOCKED; 20.309 - block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req); 20.310 - break; 20.311 - } 20.312 - case READ_UNLOCKED: 20.313 - { 20.314 - struct io_ret r; 20.315 - io_cb_t cb; 20.316 - DPRINTF("READ_UNLOCKED\n"); 20.317 - req_param = req->param; 20.318 - r = req->retval; 20.319 - cb = req->cb; 20.320 - free(req); 20.321 - cb(r, req_param); 20.322 - break; 20.323 - } 20.324 - 20.325 - case RETURN_ZERO: 20.326 - { 20.327 - struct io_ret r; 20.328 - io_cb_t cb; 20.329 - DPRINTF("RETURN_ZERO\n"); 20.330 - req_param = req->param; 20.331 - cb = req->cb; 20.332 - free(req); 20.333 - r.type = IO_BLOCK_T; 20.334 - r.u.b = newblock(); 20.335 - cb(r, req_param); 20.336 - break; 20.337 - } 20.338 - 20.339 - default: 20.340 - DPRINTF("*** Write: Bad state! (%d) ***\n", req->state); 20.341 - goto fail; 20.342 - } 20.343 - 20.344 - return; 20.345 - 20.346 - fail: 20.347 - { 20.348 - struct io_ret r; 20.349 - io_cb_t cb; 20.350 - DPRINTF("asyn_read had a read error.\n"); 20.351 - req_param = req->param; 20.352 - r = ret; 20.353 - cb = req->cb; 20.354 - free(req); 20.355 - cb(r, req_param); 20.356 - } 20.357 - 20.358 - 20.359 -} 20.360 - 20.361 -static void write_cb(struct io_ret r, void *param) 20.362 -{ 20.363 - struct io_req *req = (struct io_req *)param; 20.364 - radix_tree_node node; 20.365 - uint64_t a, addr; 20.366 - void *req_param; 20.367 - struct block_info *bi; 20.368 - 20.369 - switch(req->state) { 20.370 - 20.371 - case WRITE_LOCKED: 20.372 - 20.373 - DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr)); 20.374 - req->state = READ_L1; 20.375 - block_read(getid(req->root), write_cb, req); 20.376 - break; 20.377 - 20.378 - case READ_L1: /* block is the radix root */ 20.379 - 20.380 - DPRINTF("READ_L1\n"); 20.381 - node = (radix_tree_node) IO_BLOCK(r); 20.382 - if (node == NULL) goto fail; 20.383 - a = node[L1_IDX(req->vaddr)]; 20.384 - addr = getid(a); 20.385 - 20.386 - req->radix_addr[L2] = addr; 20.387 - req->radix[L1] = node; 20.388 - 20.389 - if ( addr == ZERO ) { 20.390 - /* L1 empty subtree: */ 20.391 - req->state = ALLOC_DATA_L1z; 20.392 - block_alloc( req->block, write_cb, req ); 20.393 - } else if ( !iswritable(a) ) { 20.394 - /* L1 fault: */ 20.395 - req->state = READ_L2_L1f; 20.396 - block_read( addr, write_cb, req ); 20.397 - } else { 20.398 - req->state = READ_L2; 20.399 - block_read( addr, write_cb, req ); 20.400 - } 20.401 - break; 20.402 - 20.403 - case READ_L2: 20.404 - 20.405 - DPRINTF("READ_L2\n"); 20.406 - node = (radix_tree_node) IO_BLOCK(r); 20.407 - if (node == NULL) goto fail; 20.408 - a = node[L2_IDX(req->vaddr)]; 20.409 - addr = getid(a); 20.410 - 20.411 - req->radix_addr[L3] = addr; 20.412 - req->radix[L2] = node; 20.413 - 20.414 - if ( addr == ZERO ) { 20.415 - /* L2 empty subtree: */ 20.416 - req->state = ALLOC_DATA_L2z; 20.417 - block_alloc( req->block, write_cb, req ); 20.418 - } else if ( !iswritable(a) ) { 20.419 - /* L2 fault: */ 20.420 - req->state = READ_L3_L2f; 20.421 - block_read( addr, write_cb, req ); 20.422 - } else { 20.423 - req->state = READ_L3; 20.424 - block_read( addr, write_cb, req ); 20.425 - } 20.426 - break; 20.427 - 20.428 - case READ_L3: 20.429 - 20.430 - DPRINTF("READ_L3\n"); 20.431 - node = (radix_tree_node) IO_BLOCK(r); 20.432 - if (node == NULL) goto fail; 20.433 - a = node[L3_IDX(req->vaddr)]; 20.434 - addr = getid(a); 20.435 - 20.436 - req->radix[L3] = node; 20.437 - 20.438 - if ( addr == ZERO ) { 20.439 - /* L3 fault: */ 20.440 - req->state = ALLOC_DATA_L3z; 20.441 - block_alloc( req->block, write_cb, req ); 20.442 - } else if ( !iswritable(a) ) { 20.443 - /* L3 fault: */ 20.444 - req->state = ALLOC_DATA_L3f; 20.445 - block_alloc( req->block, write_cb, req ); 20.446 - } else { 20.447 - req->state = WRITE_DATA; 20.448 - block_write( addr, req->block, write_cb, req ); 20.449 - } 20.450 - break; 20.451 - 20.452 - case WRITE_DATA: 20.453 - 20.454 - DPRINTF("WRITE_DATA\n"); 20.455 - /* The L3 radix points to the correct block, we just need to */ 20.456 - /* update the crc. */ 20.457 - if (IO_INT(r) < 0) goto fail; 20.458 - bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; 20.459 - req->bi.unused = 101; 20.460 - *bi = req->bi; 20.461 - req->state = WRITE_L3; 20.462 - block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req); 20.463 - break; 20.464 - 20.465 - /* L3 Zero Path: */ 20.466 - 20.467 - case ALLOC_DATA_L3z: 20.468 - 20.469 - DPRINTF("ALLOC_DATA_L3z\n"); 20.470 - addr = IO_ADDR(r); 20.471 - a = writable(addr); 20.472 - req->radix[L3][L3_IDX(req->vaddr)] = a; 20.473 - bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; 20.474 - req->bi.unused = 102; 20.475 - *bi = req->bi; 20.476 - req->state = WRITE_L3_L3z; 20.477 - block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req); 20.478 - break; 20.479 - 20.480 - /* L3 Fault Path: */ 20.481 - 20.482 - case ALLOC_DATA_L3f: 20.483 - 20.484 - DPRINTF("ALLOC_DATA_L3f\n"); 20.485 - addr = IO_ADDR(r); 20.486 - a = writable(addr); 20.487 - req->radix[L3][L3_IDX(req->vaddr)] = a; 20.488 - bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; 20.489 - req->bi.unused = 103; 20.490 - *bi = req->bi; 20.491 - req->state = WRITE_L3_L3f; 20.492 - block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req); 20.493 - break; 20.494 - 20.495 - /* L2 Zero Path: */ 20.496 - 20.497 - case ALLOC_DATA_L2z: 20.498 - 20.499 - DPRINTF("ALLOC_DATA_L2z\n"); 20.500 - addr = IO_ADDR(r); 20.501 - a = writable(addr); 20.502 - req->radix[L3] = newblock(); 20.503 - req->radix[L3][L3_IDX(req->vaddr)] = a; 20.504 - bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; 20.505 - req->bi.unused = 104; 20.506 - *bi = req->bi; 20.507 - req->state = ALLOC_L3_L2z; 20.508 - block_alloc( (char*)req->radix[L3], write_cb, req ); 20.509 - break; 20.510 - 20.511 - case ALLOC_L3_L2z: 20.512 - 20.513 - DPRINTF("ALLOC_L3_L2z\n"); 20.514 - addr = IO_ADDR(r); 20.515 - a = writable(addr); 20.516 - req->radix[L2][L2_IDX(req->vaddr)] = a; 20.517 - req->state = WRITE_L2_L2z; 20.518 - block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req); 20.519 - break; 20.520 - 20.521 - /* L2 Fault Path: */ 20.522 - 20.523 - case READ_L3_L2f: 20.524 - 20.525 - DPRINTF("READ_L3_L2f\n"); 20.526 - node = (radix_tree_node) IO_BLOCK(r); 20.527 - clear_L3_w_bits(node); 20.528 - if (node == NULL) goto fail; 20.529 - a = node[L2_IDX(req->vaddr)]; 20.530 - addr = getid(a); 20.531 - 20.532 - req->radix[L3] = node; 20.533 - req->state = ALLOC_DATA_L2f; 20.534 - block_alloc( req->block, write_cb, req ); 20.535 - break; 20.536 - 20.537 - case ALLOC_DATA_L2f: 20.538 - 20.539 - DPRINTF("ALLOC_DATA_L2f\n"); 20.540 - addr = IO_ADDR(r); 20.541 - a = writable(addr); 20.542 - req->radix[L3][L3_IDX(req->vaddr)] = a; 20.543 - bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; 20.544 - req->bi.unused = 105; 20.545 - *bi = req->bi; 20.546 - req->state = ALLOC_L3_L2f; 20.547 - block_alloc( (char*)req->radix[L3], write_cb, req ); 20.548 - break; 20.549 - 20.550 - case ALLOC_L3_L2f: 20.551 - 20.552 - DPRINTF("ALLOC_L3_L2f\n"); 20.553 - addr = IO_ADDR(r); 20.554 - a = writable(addr); 20.555 - req->radix[L2][L2_IDX(req->vaddr)] = a; 20.556 - req->state = WRITE_L2_L2f; 20.557 - block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req); 20.558 - break; 20.559 - 20.560 - /* L1 Zero Path: */ 20.561 - 20.562 - case ALLOC_DATA_L1z: 20.563 - 20.564 - DPRINTF("ALLOC_DATA_L1z\n"); 20.565 - addr = IO_ADDR(r); 20.566 - a = writable(addr); 20.567 - req->radix[L3] = newblock(); 20.568 - req->radix[L3][L3_IDX(req->vaddr)] = a; 20.569 - bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; 20.570 - req->bi.unused = 106; 20.571 - *bi = req->bi; 20.572 - req->state = ALLOC_L3_L1z; 20.573 - block_alloc( (char*)req->radix[L3], write_cb, req ); 20.574 - break; 20.575 - 20.576 - case ALLOC_L3_L1z: 20.577 - 20.578 - DPRINTF("ALLOC_L3_L1z\n"); 20.579 - addr = IO_ADDR(r); 20.580 - a = writable(addr); 20.581 - req->radix[L2] = newblock(); 20.582 - req->radix[L2][L2_IDX(req->vaddr)] = a; 20.583 - req->state = ALLOC_L2_L1z; 20.584 - block_alloc( (char*)req->radix[L2], write_cb, req ); 20.585 - break; 20.586 - 20.587 - case ALLOC_L2_L1z: 20.588 - 20.589 - DPRINTF("ALLOC_L2_L1z\n"); 20.590 - addr = IO_ADDR(r); 20.591 - a = writable(addr); 20.592 - req->radix[L1][L1_IDX(req->vaddr)] = a; 20.593 - req->state = WRITE_L1_L1z; 20.594 - block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req); 20.595 - break; 20.596 - 20.597 - /* L1 Fault Path: */ 20.598 - 20.599 - case READ_L2_L1f: 20.600 - 20.601 - DPRINTF("READ_L2_L1f\n"); 20.602 - node = (radix_tree_node) IO_BLOCK(r); 20.603 - clear_w_bits(node); 20.604 - if (node == NULL) goto fail; 20.605 - a = node[L2_IDX(req->vaddr)]; 20.606 - addr = getid(a); 20.607 - 20.608 - req->radix_addr[L3] = addr; 20.609 - req->radix[L2] = node; 20.610 - 20.611 - if (addr == ZERO) { 20.612 - /* nothing below L2, create an empty L3 and alloc data. */ 20.613 - /* (So skip READ_L3_L1f.) */ 20.614 - req->radix[L3] = newblock(); 20.615 - req->state = ALLOC_DATA_L1f; 20.616 - block_alloc( req->block, write_cb, req ); 20.617 - } else { 20.618 - req->state = READ_L3_L1f; 20.619 - block_read( addr, write_cb, req ); 20.620 - } 20.621 - break; 20.622 - 20.623 - case READ_L3_L1f: 20.624 - 20.625 - DPRINTF("READ_L3_L1f\n"); 20.626 - node = (radix_tree_node) IO_BLOCK(r); 20.627 - clear_L3_w_bits(node); 20.628 - if (node == NULL) goto fail; 20.629 - a = node[L2_IDX(req->vaddr)]; 20.630 - addr = getid(a); 20.631 - 20.632 - req->radix[L3] = node; 20.633 - req->state = ALLOC_DATA_L1f; 20.634 - block_alloc( req->block, write_cb, req ); 20.635 - break; 20.636 - 20.637 - case ALLOC_DATA_L1f: 20.638 - 20.639 - DPRINTF("ALLOC_DATA_L1f\n"); 20.640 - addr = IO_ADDR(r); 20.641 - a = writable(addr); 20.642 - req->radix[L3][L3_IDX(req->vaddr)] = a; 20.643 - bi = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1]; 20.644 - req->bi.unused = 107; 20.645 - *bi = req->bi; 20.646 - req->state = ALLOC_L3_L1f; 20.647 - block_alloc( (char*)req->radix[L3], write_cb, req ); 20.648 - break; 20.649 - 20.650 - case ALLOC_L3_L1f: 20.651 - 20.652 - DPRINTF("ALLOC_L3_L1f\n"); 20.653 - addr = IO_ADDR(r); 20.654 - a = writable(addr); 20.655 - req->radix[L2][L2_IDX(req->vaddr)] = a; 20.656 - req->state = ALLOC_L2_L1f; 20.657 - block_alloc( (char*)req->radix[L2], write_cb, req ); 20.658 - break; 20.659 - 20.660 - case ALLOC_L2_L1f: 20.661 - 20.662 - DPRINTF("ALLOC_L2_L1f\n"); 20.663 - addr = IO_ADDR(r); 20.664 - a = writable(addr); 20.665 - req->radix[L1][L1_IDX(req->vaddr)] = a; 20.666 - req->state = WRITE_L1_L1f; 20.667 - block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req); 20.668 - break; 20.669 - 20.670 - case WRITE_L3: 20.671 - case WRITE_L3_L3z: 20.672 - case WRITE_L3_L3f: 20.673 - case WRITE_L2_L2z: 20.674 - case WRITE_L2_L2f: 20.675 - case WRITE_L1_L1z: 20.676 - case WRITE_L1_L1f: 20.677 - { 20.678 - int i; 20.679 - DPRINTF("DONE\n"); 20.680 - /* free any saved node vals. */ 20.681 - for (i=0; i<3; i++) 20.682 - if (req->radix[i] != 0) free(req->radix[i]); 20.683 - req->retval = r; 20.684 - req->state = WRITE_UNLOCKED; 20.685 - block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req); 20.686 - break; 20.687 - } 20.688 - case WRITE_UNLOCKED: 20.689 - { 20.690 - struct io_ret r; 20.691 - io_cb_t cb; 20.692 - DPRINTF("WRITE_UNLOCKED!\n"); 20.693 - req_param = req->param; 20.694 - r = req->retval; 20.695 - cb = req->cb; 20.696 - free(req); 20.697 - cb(r, req_param); 20.698 - break; 20.699 - } 20.700 - 20.701 - default: 20.702 - DPRINTF("*** Write: Bad state! (%d) ***\n", req->state); 20.703 - goto fail; 20.704 - } 20.705 - 20.706 - return; 20.707 - 20.708 - fail: 20.709 - { 20.710 - struct io_ret r; 20.711 - io_cb_t cb; 20.712 - int i; 20.713 - 20.714 - DPRINTF("asyn_write had a read error mid-way.\n"); 20.715 - req_param = req->param; 20.716 - cb = req->cb; 20.717 - r.type = IO_INT_T; 20.718 - r.u.i = -1; 20.719 - /* free any saved node vals. */ 20.720 - for (i=0; i<3; i++) 20.721 - free(req->radix[i]); 20.722 - free(req); 20.723 - cb(r, req_param); 20.724 - } 20.725 -} 20.726 - 20.727 -char *vdi_read_s(vdi_t *vdi, uint64_t vaddr) 20.728 -{ 20.729 - pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER; 20.730 - char *block = NULL; 20.731 - int ret; 20.732 - 20.733 - void reads_cb(struct io_ret r, void *param) 20.734 - { 20.735 - block = IO_BLOCK(r); 20.736 - pthread_mutex_unlock((pthread_mutex_t *)param); 20.737 - } 20.738 - 20.739 - pthread_mutex_lock(&m); 20.740 - ret = vdi_read(vdi, vaddr, reads_cb, &m); 20.741 - 20.742 - if (ret == 0) pthread_mutex_lock(&m); 20.743 - 20.744 - return block; 20.745 -} 20.746 - 20.747 - 20.748 -int vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block) 20.749 -{ 20.750 - pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER; 20.751 - int ret, result; 20.752 - 20.753 - void writes_cb(struct io_ret r, void *param) 20.754 - { 20.755 - result = IO_INT(r); 20.756 - pthread_mutex_unlock((pthread_mutex_t *)param); 20.757 - } 20.758 - 20.759 - pthread_mutex_lock(&m); 20.760 - ret = vdi_write(vdi, vaddr, block, writes_cb, &m); 20.761 - 20.762 - if (ret == 0) pthread_mutex_lock(&m); 20.763 - 20.764 - return result; 20.765 -}
21.1 --- a/tools/blktap/parallax/requests-async.h Fri Jun 16 18:19:40 2006 +0100 21.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 21.3 @@ -1,29 +0,0 @@ 21.4 -#ifndef _REQUESTSASYNC_H_ 21.5 -#define _REQUESTSASYNC_H_ 21.6 - 21.7 -#include "block-async.h" 21.8 -#include "blockstore.h" /* for newblock etc. */ 21.9 - 21.10 -/* 21.11 -#define BLOCK_SIZE 4096 21.12 -#define ZERO 0ULL 21.13 -#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU) 21.14 -#define iswritable(x) (((x) & 1LLU) != 0) 21.15 -#define writable(x) (((x) << 1) | 1LLU) 21.16 -#define readonly(x) ((uint64_t)((x) << 1)) 21.17 -*/ 21.18 - 21.19 -#define VADDR_MASK 0x0000000003ffffffLLU /* 26-bits = 256Gig */ 21.20 -#define VALID_VADDR(x) (((x) & VADDR_MASK) == (x)) 21.21 - 21.22 -int vdi_read (vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param); 21.23 -int vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, io_cb_t cb, void *param); 21.24 - 21.25 -/* synchronous versions: */ 21.26 -char *vdi_read_s (vdi_t *vdi, uint64_t vaddr); 21.27 -int vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block); 21.28 - 21.29 -#define ERR_BAD_VADDR -1 21.30 -#define ERR_NOMEM -2 21.31 - 21.32 -#endif //_REQUESTSASYNC_H_
22.1 --- a/tools/blktap/parallax/snaplog.c Fri Jun 16 18:19:40 2006 +0100 22.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 22.3 @@ -1,238 +0,0 @@ 22.4 -/************************************************************************** 22.5 - * 22.6 - * snaplog.c 22.7 - * 22.8 - * Snapshot log on-disk data structure. 22.9 - * 22.10 - */ 22.11 - 22.12 - /* VDI histories are made from chains of snapshot logs. These logs record 22.13 - * the (radix) root and timestamp of individual snapshots. 22.14 - * 22.15 - * creation of a new VDI involves 'forking' a snapshot log, by creating a 22.16 - * new, empty log (in a new VDI) and parenting it off of a record in an 22.17 - * existing snapshot log. 22.18 - * 22.19 - * snapshot log blocks have at most one writer. 22.20 - */ 22.21 - 22.22 -#include <stdio.h> 22.23 -#include <stdlib.h> 22.24 -#include <sys/time.h> 22.25 -#include "blockstore.h" 22.26 -#include "snaplog.h" 22.27 - 22.28 - 22.29 - 22.30 -snap_block_t *snap_get_block(uint64_t block) 22.31 -{ 22.32 - snap_block_t *blk = (snap_block_t *)readblock(block); 22.33 - 22.34 - if ( blk == NULL) 22.35 - return NULL; 22.36 - if ( blk->hdr.magic != SNAP_MAGIC ) { 22.37 - freeblock(blk); 22.38 - return NULL; 22.39 - } 22.40 - 22.41 - return blk; 22.42 -} 22.43 - 22.44 -int snap_get_id(snap_id_t *id, snap_rec_t *target) 22.45 -{ 22.46 - snap_block_t *blk; 22.47 - 22.48 - if ( id == NULL ) 22.49 - return -1; 22.50 - 22.51 - blk = snap_get_block(id->block); 22.52 - 22.53 - if ( blk == NULL ) 22.54 - return -1; 22.55 - 22.56 - if ( id->index > blk->hdr.nr_entries ) { 22.57 - freeblock(blk); 22.58 - return -1; 22.59 - } 22.60 - 22.61 - *target = blk->snaps[id->index]; 22.62 - freeblock(blk); 22.63 - return 0; 22.64 -} 22.65 - 22.66 -int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id, 22.67 - snap_id_t *new_id) 22.68 -{ 22.69 - snap_rec_t parent_rec, fork_rec; 22.70 - snap_block_t *blk, *pblk; 22.71 - /* 22.72 - if ( (parent_id != NULL) && (snap_get_id(parent_id, &parent_rec) != 0) ) 22.73 - return -1; 22.74 - 22.75 - if ( (fork_id != NULL) && (snap_get_id(fork_id, &fork_rec) != 0) ) 22.76 - return -1; 22.77 -*/ 22.78 - blk = (snap_block_t *)newblock(); 22.79 - blk->hdr.magic = SNAP_MAGIC; 22.80 - blk->hdr.nr_entries = 0; 22.81 - blk->hdr.log_entries = 0; 22.82 - blk->hdr.immutable = 0; 22.83 - 22.84 - if ( (parent_id != NULL) 22.85 - && (parent_id->block != fork_id->block) 22.86 - && (parent_id->block != 0)) { 22.87 - 22.88 - pblk = snap_get_block(parent_id->block); 22.89 - blk->hdr.log_entries = pblk->hdr.log_entries; 22.90 - freeblock(pblk); 22.91 - } 22.92 - 22.93 - if (parent_id != NULL) { 22.94 - blk->hdr.parent_block = *parent_id; 22.95 - blk->hdr.fork_block = *fork_id; 22.96 - } else { 22.97 - blk->hdr.parent_block = null_snap_id; 22.98 - blk->hdr.fork_block = null_snap_id; 22.99 - } 22.100 - 22.101 - new_id->index = 0; 22.102 - new_id->block = allocblock(blk); 22.103 - freeblock(blk); 22.104 - if (new_id->block == 0) 22.105 - return -1; 22.106 - 22.107 - return 0; 22.108 -} 22.109 - 22.110 -int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id) 22.111 -{ 22.112 - return __snap_block_create(parent_id, parent_id, new_id); 22.113 -} 22.114 - 22.115 -int snap_append(snap_id_t *old_id, snap_rec_t *rec, snap_id_t *new_id) 22.116 -{ 22.117 - snap_id_t id = *old_id; 22.118 - snap_block_t *blk = snap_get_block(id.block); 22.119 - 22.120 - if ( rec->deleted == 1 ) { 22.121 - printf("Attempt to append a deleted snapshot!\n"); 22.122 - return -1; 22.123 - } 22.124 - 22.125 - if ( blk->hdr.immutable != 0 ) { 22.126 - printf("Attempt to snap an immutable snap block!\n"); 22.127 - return -1; 22.128 - } 22.129 - 22.130 - new_id->block = id.block; 22.131 - 22.132 - if (blk->hdr.nr_entries == SNAPS_PER_BLOCK) { 22.133 - int ret; 22.134 - 22.135 - id.index--; /* make id point to the last full record */ 22.136 - 22.137 - ret = __snap_block_create(&id, &blk->hdr.fork_block, new_id); 22.138 - if ( ret != 0 ) { 22.139 - freeblock(blk); 22.140 - return -1; 22.141 - } 22.142 - 22.143 - blk->hdr.immutable = 1; 22.144 - writeblock(id.block, blk); 22.145 - freeblock(blk); 22.146 - blk = snap_get_block(new_id->block); 22.147 - id = *new_id; 22.148 - } 22.149 - 22.150 - blk->snaps[blk->hdr.nr_entries] = *rec; 22.151 - blk->hdr.nr_entries++; 22.152 - blk->hdr.log_entries++; 22.153 - new_id->index = blk->hdr.nr_entries; 22.154 - //printf("snap: %u %u\n", blk->hdr.nr_entries, blk->hdr.log_entries); 22.155 - writeblock(id.block, blk); 22.156 - freeblock(blk); 22.157 - return 0; 22.158 -} 22.159 - 22.160 -int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id) 22.161 -{ 22.162 - snap_block_t *p_blk, *c_blk, *blk; 22.163 - snap_rec_t *p_rec, *c_rec; 22.164 - int ret = -1; 22.165 - 22.166 - p_blk = snap_get_block(p_id->block); 22.167 - 22.168 - if (p_blk == NULL) return(-1); 22.169 - 22.170 - if (c_id->block == p_id->block) 22.171 - { 22.172 - c_blk = p_blk; 22.173 - } else { 22.174 - c_blk = snap_get_block(c_id->block); 22.175 - } 22.176 - 22.177 - if (p_blk == NULL) { 22.178 - freeblock(p_blk); 22.179 - return(-1); 22.180 - } 22.181 - 22.182 - /* parent and child must not be deleted. */ 22.183 - p_rec = &p_blk->snaps[p_id->index]; 22.184 - c_rec = &c_blk->snaps[c_id->index]; 22.185 - /* 22.186 - if ( (p_rec->deleted == 1) || (c_rec->deleted == 1) ) { 22.187 - printf("One of those snaps is already deleted.\n"); 22.188 - goto done; 22.189 - } 22.190 - */ 22.191 - /* first non-deleted thing in the log before child must be parent. */ 22.192 - 22.193 - /* XXX todo: text the range here for delete (and eventually fork) bits) */ 22.194 - /* for now, snaps must be consecutive, on the same log page: */ 22.195 - 22.196 - if ((p_id->block != c_id->block) || (p_id->index != c_id->index-1)) 22.197 - { 22.198 - printf("Deleting non-consecutive snaps is not done yet.\n"); 22.199 - goto done; 22.200 - } 22.201 - 22.202 - /* mark parent as deleted XXX: may need to lock parent block here.*/ 22.203 - p_rec->deleted = 1; 22.204 - writeblock(p_id->block, p_blk); 22.205 - 22.206 - /* delete the parent */ 22.207 - printf("collapse(%Ld, %Ld)\n", p_rec->radix_root, c_rec->radix_root); 22.208 - ret = collapse(height, p_rec->radix_root, c_rec->radix_root); 22.209 - 22.210 - /* return the number of blocks reclaimed. */ 22.211 - 22.212 -done: 22.213 - if (c_blk != p_blk) freeblock(c_blk); 22.214 - freeblock(p_blk); 22.215 - 22.216 - return(ret); 22.217 -} 22.218 - 22.219 -void snap_print_history(snap_id_t *snap_id) 22.220 -{ 22.221 - snap_id_t id = *snap_id; 22.222 - unsigned int idx = id.index; 22.223 - snap_block_t *new_blk, *blk = snap_get_block(id.block); 22.224 - 22.225 - while ( blk ) { 22.226 - printf("[Snap block %Ld]:\n", id.block); 22.227 - do { 22.228 - printf(" %03u: root: %Ld ts: %ld.%ld\n", idx, 22.229 - blk->snaps[idx].radix_root, 22.230 - blk->snaps[idx].timestamp.tv_sec, 22.231 - blk->snaps[idx].timestamp.tv_usec); 22.232 - } while (idx-- != 0); 22.233 - 22.234 - id = blk->hdr.parent_block; 22.235 - if (id.block != 0) { 22.236 - new_blk = snap_get_block(id.block); 22.237 - } 22.238 - freeblock(blk); 22.239 - blk = new_blk; 22.240 - } 22.241 -}
23.1 --- a/tools/blktap/parallax/snaplog.h Fri Jun 16 18:19:40 2006 +0100 23.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 23.3 @@ -1,61 +0,0 @@ 23.4 -/************************************************************************** 23.5 - * 23.6 - * snaplog.h 23.7 - * 23.8 - * Snapshot log on-disk data structure. 23.9 - * 23.10 - */ 23.11 - 23.12 -#include "radix.h" 23.13 -#include "blockstore.h" /* for BLOCK_SIZE */ 23.14 - 23.15 -#ifndef __SNAPLOG_H__ 23.16 -#define __SNAPLOG_H__ 23.17 - 23.18 -typedef struct snap_id { 23.19 - uint64_t block; 23.20 - unsigned int index; 23.21 -} snap_id_t; 23.22 - 23.23 -typedef struct snap_rec { 23.24 - uint64_t radix_root; 23.25 - struct timeval timestamp; 23.26 - /* flags: */ 23.27 - unsigned deleted:1; 23.28 -} snap_rec_t; 23.29 - 23.30 - 23.31 -int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id); 23.32 -int snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id); 23.33 -int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id); 23.34 -void snap_print_history(snap_id_t *snap_id); 23.35 -int snap_get_id(snap_id_t *id, snap_rec_t *target); 23.36 - 23.37 - 23.38 -/* exported for vdi debugging */ 23.39 -#define SNAP_MAGIC 0xff00ff0aa0ff00ffLL 23.40 - 23.41 -static const snap_id_t null_snap_id = { 0, 0 }; 23.42 - 23.43 -typedef struct snap_block_hdr { 23.44 - uint64_t magic; 23.45 - snap_id_t parent_block; /* parent block within this chain */ 23.46 - snap_id_t fork_block; /* where this log was forked */ 23.47 - unsigned log_entries; /* total entries since forking */ 23.48 - unsigned short nr_entries; /* entries in snaps[] */ 23.49 - unsigned short immutable; /* has this snap page become immutable? */ 23.50 -} snap_block_hdr_t; 23.51 - 23.52 - 23.53 -#define SNAPS_PER_BLOCK \ 23.54 - ((BLOCK_SIZE - sizeof(snap_block_hdr_t)) / sizeof(snap_rec_t)) 23.55 - 23.56 -typedef struct snap_block { 23.57 - snap_block_hdr_t hdr; 23.58 - snap_rec_t snaps[SNAPS_PER_BLOCK]; 23.59 -} snap_block_t; 23.60 - 23.61 - 23.62 -snap_block_t *snap_get_block(uint64_t block); 23.63 - 23.64 -#endif /* __SNAPLOG_H__ */
24.1 --- a/tools/blktap/parallax/vdi.c Fri Jun 16 18:19:40 2006 +0100 24.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 24.3 @@ -1,367 +0,0 @@ 24.4 -/************************************************************************** 24.5 - * 24.6 - * vdi.c 24.7 - * 24.8 - * Virtual Disk Image (VDI) Interfaces 24.9 - * 24.10 - */ 24.11 - 24.12 -#include <stdio.h> 24.13 -#include <stdlib.h> 24.14 -#include <fcntl.h> 24.15 -#include <string.h> 24.16 -#include <sys/time.h> 24.17 -#include <pthread.h> 24.18 -#include "blockstore.h" 24.19 -#include "block-async.h" 24.20 -#include "requests-async.h" 24.21 -#include "radix.h" 24.22 -#include "vdi.h" 24.23 - 24.24 -#define VDI_REG_BLOCK 2LL 24.25 -#define VDI_RADIX_ROOT writable(3) 24.26 - 24.27 -#if 0 24.28 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 24.29 -#else 24.30 -#define DPRINTF(_f, _a...) ((void)0) 24.31 -#endif 24.32 - 24.33 -/* I haven't decided about this registry stuff, so this is just a really 24.34 - * quick lash-up so that there is some way to track VDIs. 24.35 - * 24.36 - * (Most vdi access should be with a direct handle to the block, so this 24.37 - * registry is just for start-of-day lookup and other control operations.) 24.38 - */ 24.39 - 24.40 -vdi_registry_t *create_vdi_registry(void) 24.41 -{ 24.42 - vdi_registry_t *reg = (vdi_registry_t *)newblock(); 24.43 - 24.44 - if (reg == NULL) 24.45 - return NULL; 24.46 - 24.47 - /* zero-fill the vdi radix root while we have an empty block. */ 24.48 - writeblock(VDI_RADIX_ROOT, (void *)reg); 24.49 - 24.50 - 24.51 - DPRINTF("[vdi.c] Creating VDI registry!\n"); 24.52 - reg->magic = VDI_REG_MAGIC; 24.53 - reg->nr_vdis = 0; 24.54 - 24.55 - writeblock(VDI_REG_BLOCK, (void *)reg); 24.56 - 24.57 - return reg; 24.58 -} 24.59 - 24.60 -vdi_registry_t *get_vdi_registry(void) 24.61 -{ 24.62 - vdi_registry_t *vdi_reg = (vdi_registry_t *)readblock(VDI_REG_BLOCK); 24.63 - 24.64 - if ( vdi_reg == NULL ) 24.65 - vdi_reg = create_vdi_registry(); 24.66 - 24.67 - if ( vdi_reg->magic != VDI_REG_MAGIC ) { 24.68 - freeblock(vdi_reg); 24.69 - return NULL; 24.70 - } 24.71 - 24.72 - return vdi_reg; 24.73 -} 24.74 - 24.75 - 24.76 -vdi_t *vdi_create(snap_id_t *parent_snap, char *name) 24.77 -{ 24.78 - int ret; 24.79 - vdi_t *vdi; 24.80 - vdi_registry_t *vdi_reg; 24.81 - snap_rec_t snap_rec; 24.82 - 24.83 - /* create a vdi struct */ 24.84 - vdi = newblock(); 24.85 - if (vdi == NULL) 24.86 - return NULL; 24.87 - 24.88 - if ( snap_get_id(parent_snap, &snap_rec) == 0 ) { 24.89 - vdi->radix_root = snapshot(snap_rec.radix_root); 24.90 - } else { 24.91 - vdi->radix_root = allocblock((void *)vdi); /* vdi is just zeros here */ 24.92 - vdi->radix_root = writable(vdi->radix_root); /* grr. */ 24.93 - } 24.94 - 24.95 - /* create a snapshot log, and add it to the vdi struct */ 24.96 - 24.97 - ret = snap_block_create(parent_snap, &vdi->snap); 24.98 - if ( ret != 0 ) { 24.99 - DPRINTF("Error getting snap block in vdi_create.\n"); 24.100 - freeblock(vdi); 24.101 - return NULL; 24.102 - } 24.103 - 24.104 - /* append the vdi to the registry, fill block and id. */ 24.105 - /* implicit allocation means we have to write the vdi twice here. */ 24.106 - vdi_reg = get_vdi_registry(); 24.107 - if ( vdi_reg == NULL ) { 24.108 - freeblock(vdi); 24.109 - return NULL; 24.110 - } 24.111 - 24.112 - vdi->block = allocblock((void *)vdi); 24.113 - vdi->id = vdi_reg->nr_vdis++; 24.114 - strncpy(vdi->name, name, VDI_NAME_SZ); 24.115 - vdi->name[VDI_NAME_SZ] = '\0'; 24.116 - vdi->radix_lock = NULL; /* for tidiness */ 24.117 - writeblock(vdi->block, (void *)vdi); 24.118 - 24.119 - update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block); 24.120 - writeblock(VDI_REG_BLOCK, (void *)vdi_reg); 24.121 - freeblock(vdi_reg); 24.122 - 24.123 - vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock)); 24.124 - if (vdi->radix_lock == NULL) 24.125 - { 24.126 - perror("couldn't malloc radix_lock for new vdi!"); 24.127 - freeblock(vdi); 24.128 - return NULL; 24.129 - } 24.130 - radix_lock_init(vdi->radix_lock); 24.131 - 24.132 - return vdi; 24.133 -} 24.134 - 24.135 -/* vdi_get and vdi_put currently act more like alloc/free -- they don't 24.136 - * do refcount-based allocation. 24.137 - */ 24.138 -vdi_t *vdi_get(uint64_t vdi_id) 24.139 -{ 24.140 - uint64_t vdi_blk; 24.141 - vdi_t *vdi; 24.142 - 24.143 - vdi_blk = lookup(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi_id); 24.144 - 24.145 - if ( vdi_blk == 0 ) 24.146 - return NULL; 24.147 - 24.148 - vdi = (vdi_t *)readblock(vdi_blk); 24.149 - 24.150 - vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock)); 24.151 - if (vdi->radix_lock == NULL) 24.152 - { 24.153 - perror("couldn't malloc radix_lock for new vdi!"); 24.154 - freeblock(vdi); 24.155 - return NULL; 24.156 - } 24.157 - radix_lock_init(vdi->radix_lock); 24.158 - 24.159 - return vdi; 24.160 -} 24.161 - 24.162 -void vdi_put(vdi_t *vdi) 24.163 -{ 24.164 - free(vdi->radix_lock); 24.165 - freeblock(vdi); 24.166 -} 24.167 - 24.168 -void vdi_snapshot(vdi_t *vdi) 24.169 -{ 24.170 - snap_rec_t rec; 24.171 - int ret; 24.172 - 24.173 - rec.radix_root = vdi->radix_root; 24.174 - gettimeofday(&rec.timestamp, NULL); 24.175 - rec.deleted = 0; 24.176 - 24.177 - vdi->radix_root = snapshot(vdi->radix_root); 24.178 - ret = snap_append(&vdi->snap, &rec, &vdi->snap); 24.179 - if ( ret != 0 ) { 24.180 - printf("snap_append returned failure\n"); 24.181 - return; 24.182 - } 24.183 - writeblock(vdi->block, vdi); 24.184 -} 24.185 - 24.186 -int __init_vdi() 24.187 -{ 24.188 - /* sneak this in here for the moment. */ 24.189 - __rcache_init(); 24.190 - 24.191 - /* force the registry to be created if it doesn't exist. */ 24.192 - vdi_registry_t *vdi_reg = get_vdi_registry(); 24.193 - if (vdi_reg == NULL) { 24.194 - printf("[vdi.c] Couldn't get/create a VDI registry!\n"); 24.195 - return -1; 24.196 - } 24.197 - freeblock(vdi_reg); 24.198 - 24.199 - 24.200 - return 0; 24.201 -} 24.202 - 24.203 -#ifdef VDI_STANDALONE 24.204 - 24.205 -#define TEST_VDIS 50 24.206 -#define NR_ITERS 50000 24.207 -#define FORK_POINTS 200 24.208 -#define INIT_VDIS 3 24.209 -#define INIT_SNAPS 40 24.210 - 24.211 -/* These must be of decreasing size: */ 24.212 -#define NEW_FORK (RAND_MAX-(RAND_MAX/1000)) 24.213 -#define NEW_ROOT_VDI (RAND_MAX-((RAND_MAX/1000)*2)) 24.214 -#define NEW_FORK_VDI (RAND_MAX-((RAND_MAX/1000)*3)) 24.215 - 24.216 -#define GRAPH_DOT_FILE "vdi.dot" 24.217 -#define GRAPH_PS_FILE "vdi.ps" 24.218 - 24.219 - 24.220 -typedef struct sh_st { 24.221 - snap_id_t id; 24.222 - struct sh_st *next; 24.223 -} sh_t; 24.224 - 24.225 -#define SNAP_HASHSZ 1024 24.226 -sh_t *node_hash[SNAP_HASHSZ]; 24.227 -#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ) 24.228 - 24.229 -#define SNAPID_EQUAL(_a,_b) \ 24.230 - (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index)) 24.231 -int sh_check_and_add(snap_id_t *id) 24.232 -{ 24.233 - sh_t **s = &node_hash[SNAP_HASH(id)]; 24.234 - 24.235 - while (*s != NULL) { 24.236 - if (SNAPID_EQUAL(&((*s)->id), id)) 24.237 - return 1; 24.238 - *s = (*s)->next; 24.239 - } 24.240 - 24.241 - *s = (sh_t *)malloc(sizeof(sh_t)); 24.242 - (*s)->id = *id; 24.243 - (*s)->next = NULL; 24.244 - 24.245 - return 0; 24.246 -} 24.247 - 24.248 -int main(int argc, char *argv[]) 24.249 -{ 24.250 - vdi_t *vdi_list[TEST_VDIS]; 24.251 - snap_id_t id, fork_points[FORK_POINTS]; 24.252 - int nr_vdis = 0, nr_forks = 0; 24.253 - int i, j, r; 24.254 - FILE *f; 24.255 - char name[VDI_NAME_SZ]; 24.256 - 24.257 - __init_blockstore(); 24.258 - __init_vdi(); 24.259 - 24.260 - printf("[o] Generating seed VDIs. (%d VDIs)\n", INIT_VDIS); 24.261 - 24.262 - for (i=0; i<INIT_VDIS; i++) { 24.263 - r=rand(); 24.264 - 24.265 - sprintf(name, "VDI Number %d", nr_vdis); 24.266 - vdi_list[i] = vdi_create(NULL, name); 24.267 - for (j=0; j<(r%INIT_SNAPS); j++) 24.268 - vdi_snapshot(vdi_list[i]); 24.269 - fork_points[i] = vdi_list[i]->snap; 24.270 - nr_vdis++; 24.271 - nr_forks++; 24.272 - } 24.273 - 24.274 - printf("[o] Running a random workload. (%d iterations)\n", NR_ITERS); 24.275 - 24.276 - for (i=0; i<NR_ITERS; i++) { 24.277 - r = rand(); 24.278 - 24.279 - if ( r > NEW_FORK ) { 24.280 - if ( nr_forks > FORK_POINTS ) 24.281 - continue; 24.282 - id = vdi_list[r%nr_vdis]->snap; 24.283 - if ( ( id.block == 0 ) || ( id.index == 0 ) ) 24.284 - continue; 24.285 - id.index--; 24.286 - fork_points[nr_forks++] = id; 24.287 - 24.288 - } else if ( r > NEW_ROOT_VDI ) { 24.289 - 24.290 - if ( nr_vdis == TEST_VDIS ) 24.291 - continue; 24.292 - 24.293 - sprintf(name, "VDI Number %d.", nr_vdis); 24.294 - vdi_list[nr_vdis++] = vdi_create(NULL, name); 24.295 - 24.296 - } else if ( r > NEW_FORK_VDI ) { 24.297 - 24.298 - if ( nr_vdis == TEST_VDIS ) 24.299 - continue; 24.300 - 24.301 - sprintf(name, "VDI Number %d.", nr_vdis); 24.302 - vdi_list[nr_vdis++] = vdi_create(&fork_points[r%nr_forks], name); 24.303 - 24.304 - } else /* SNAPSHOT */ { 24.305 - 24.306 - vdi_snapshot(vdi_list[r%nr_vdis]); 24.307 - 24.308 - } 24.309 - } 24.310 - 24.311 - /* now dump it out to a dot file. */ 24.312 - printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis); 24.313 - 24.314 - f = fopen(GRAPH_DOT_FILE, "w"); 24.315 - 24.316 - /* write graph preamble */ 24.317 - fprintf(f, "digraph G {\n"); 24.318 - fprintf(f, " rankdir=LR\n"); 24.319 - 24.320 - for (i=0; i<nr_vdis; i++) { 24.321 - char oldnode[255]; 24.322 - snap_block_t *blk; 24.323 - snap_id_t id = vdi_list[i]->snap; 24.324 - int nr_snaps, done=0; 24.325 - 24.326 - /* add a node for the id */ 24.327 -printf("vdi: %d\n", i); 24.328 - fprintf(f, " n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 24.329 - id.block, id.index, vdi_list[i]->name, 24.330 - id.block, id.index); 24.331 - sprintf(oldnode, "n%Ld%d", id.block, id.index); 24.332 - 24.333 - while (id.block != 0) { 24.334 - blk = snap_get_block(id.block); 24.335 - nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index); 24.336 - id = blk->hdr.fork_block; 24.337 - 24.338 - done = sh_check_and_add(&id); 24.339 - 24.340 - /* add a node for the fork_id */ 24.341 - if (!done) { 24.342 - fprintf(f, " n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 24.343 - id.block, id.index, 24.344 - id.block, id.index); 24.345 - } 24.346 - 24.347 - /* add an edge between them */ 24.348 - fprintf(f, " n%Ld%d -> %s [label=\"%u snapshots\"]\n", 24.349 - id.block, id.index, oldnode, nr_snaps); 24.350 - sprintf(oldnode, "n%Ld%d", id.block, id.index); 24.351 - freeblock(blk); 24.352 - 24.353 - if (done) break; 24.354 - } 24.355 - } 24.356 - 24.357 - /* write graph postamble */ 24.358 - fprintf(f, "}\n"); 24.359 - fclose(f); 24.360 - 24.361 - printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE); 24.362 - { 24.363 - char cmd[255]; 24.364 - sprintf(cmd, "dot %s -Tps -o %s", GRAPH_DOT_FILE, GRAPH_PS_FILE); 24.365 - system(cmd); 24.366 - } 24.367 - return 0; 24.368 -} 24.369 - 24.370 -#endif
25.1 --- a/tools/blktap/parallax/vdi.h Fri Jun 16 18:19:40 2006 +0100 25.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 25.3 @@ -1,55 +0,0 @@ 25.4 -#ifndef _VDI_H_ 25.5 -#define _VDI_H_ 25.6 -/************************************************************************** 25.7 - * 25.8 - * vdi.h 25.9 - * 25.10 - * Virtual Disk Image (VDI) Interfaces 25.11 - * 25.12 - */ 25.13 - 25.14 -#ifndef __VDI_H__ 25.15 -#define __VDI_H__ 25.16 - 25.17 -#include "blktaplib.h" 25.18 -#include "snaplog.h" 25.19 - 25.20 -#define VDI_HEIGHT 27 /* Note that these are now hard-coded */ 25.21 -#define VDI_REG_HEIGHT 27 /* in the async lookup code */ 25.22 - 25.23 -#define VDI_NAME_SZ 256 25.24 - 25.25 - 25.26 -typedef struct vdi { 25.27 - uint64_t id; /* unique vdi id -- used by the registry */ 25.28 - uint64_t block; /* block where this vdi lives (also unique)*/ 25.29 - uint64_t radix_root; /* radix root node for block mappings */ 25.30 - snap_id_t snap; /* next snapshot slot for this VDI */ 25.31 - struct vdi *next; /* used to hash-chain in blkif. */ 25.32 - blkif_vdev_t vdevice; /* currently mounted as... */ 25.33 - struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs */ 25.34 - char name[VDI_NAME_SZ];/* human readable vdi name */ 25.35 -} vdi_t; 25.36 - 25.37 -#define VDI_REG_MAGIC 0xff00ff0bb0ff00ffLL 25.38 - 25.39 -typedef struct vdi_registry { 25.40 - uint64_t magic; 25.41 - uint64_t nr_vdis; 25.42 -} vdi_registry_t; 25.43 - 25.44 - 25.45 -int __init_vdi(void); 25.46 - 25.47 -vdi_t *vdi_get(uint64_t vdi_id); 25.48 -void vdi_put(vdi_t *vdi); 25.49 -vdi_registry_t *get_vdi_registry(void); 25.50 -vdi_t *vdi_create(snap_id_t *parent_snap, char *name); 25.51 -uint64_t vdi_lookup_block(vdi_t *vdi, uint64_t vdi_block, int *writable); 25.52 -void vdi_update_block(vdi_t *vdi, uint64_t vdi_block, uint64_t g_block); 25.53 -void vdi_snapshot(vdi_t *vdi); 25.54 - 25.55 - 25.56 -#endif /* __VDI_H__ */ 25.57 - 25.58 -#endif //_VDI_H_
26.1 --- a/tools/blktap/parallax/vdi_create.c Fri Jun 16 18:19:40 2006 +0100 26.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 26.3 @@ -1,52 +0,0 @@ 26.4 -/************************************************************************** 26.5 - * 26.6 - * vdi_create.c 26.7 - * 26.8 - * Create a new vdi. 26.9 - * 26.10 - */ 26.11 - 26.12 -#include <stdio.h> 26.13 -#include <stdlib.h> 26.14 -#include <string.h> 26.15 -#include <sys/time.h> 26.16 -#include "blockstore.h" 26.17 -#include "radix.h" 26.18 -#include "vdi.h" 26.19 - 26.20 -int main(int argc, char *argv[]) 26.21 -{ 26.22 - vdi_t *vdi; 26.23 - char name[VDI_NAME_SZ] = ""; 26.24 - snap_id_t id; 26.25 - int from_snap = 0; 26.26 - 26.27 - __init_blockstore(); 26.28 - __init_vdi(); 26.29 - 26.30 - if ( argc == 1 ) { 26.31 - printf("usage: %s <VDI Name> [<snap block> <snap idx>]\n", argv[0]); 26.32 - exit(-1); 26.33 - } 26.34 - 26.35 - strncpy( name, argv[1], VDI_NAME_SZ); 26.36 - name[VDI_NAME_SZ] = '\0'; 26.37 - 26.38 - if ( argc > 3 ) { 26.39 - id.block = (uint64_t) atoll(argv[2]); 26.40 - id.index = (unsigned int) atol (argv[3]); 26.41 - from_snap = 1; 26.42 - } 26.43 - 26.44 - vdi = vdi_create( from_snap ? &id : NULL, name); 26.45 - 26.46 - if ( vdi == NULL ) { 26.47 - printf("Failed to create VDI!\n"); 26.48 - freeblock(vdi); 26.49 - exit(-1); 26.50 - } 26.51 - 26.52 - freeblock(vdi); 26.53 - 26.54 - return (0); 26.55 -}
27.1 --- a/tools/blktap/parallax/vdi_fill.c Fri Jun 16 18:19:40 2006 +0100 27.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 27.3 @@ -1,81 +0,0 @@ 27.4 -/************************************************************************** 27.5 - * 27.6 - * vdi_fill.c 27.7 - * 27.8 - * Hoover a file or device into a vdi. 27.9 - * You must first create the vdi with vdi_create. 27.10 - * 27.11 - */ 27.12 - 27.13 -#include <stdio.h> 27.14 -#include <stdlib.h> 27.15 -#include <string.h> 27.16 -#include <sys/types.h> 27.17 -#include <sys/stat.h> 27.18 -#include <fcntl.h> 27.19 -#include <unistd.h> 27.20 -#include "blockstore.h" 27.21 -#include "radix.h" 27.22 -#include "requests-async.h" 27.23 -#include "vdi.h" 27.24 - 27.25 -int main(int argc, char *argv[]) 27.26 -{ 27.27 - vdi_t *vdi; 27.28 - uint64_t id; 27.29 - int fd; 27.30 - struct stat st; 27.31 - uint64_t tot_size; 27.32 - char spage[BLOCK_SIZE]; 27.33 - char *dpage; 27.34 - uint64_t vblock = 0, count=0; 27.35 - 27.36 - __init_blockstore(); 27.37 - init_block_async(); 27.38 - __init_vdi(); 27.39 - 27.40 - if ( argc < 3 ) { 27.41 - printf("usage: %s <VDI id> <filename>\n", argv[0]); 27.42 - exit(-1); 27.43 - } 27.44 - 27.45 - id = (uint64_t) atoll(argv[1]); 27.46 - 27.47 - vdi = vdi_get( id ); 27.48 - 27.49 - if ( vdi == NULL ) { 27.50 - printf("Failed to retreive VDI %Ld!\n", id); 27.51 - exit(-1); 27.52 - } 27.53 - 27.54 - fd = open(argv[2], O_RDONLY | O_LARGEFILE); 27.55 - 27.56 - if (fd < 0) { 27.57 - printf("Couldn't open %s!\n", argv[2]); 27.58 - exit(-1); 27.59 - } 27.60 - 27.61 - if ( fstat(fd, &st) != 0 ) { 27.62 - printf("Couldn't stat %s!\n", argv[2]); 27.63 - exit(-1); 27.64 - } 27.65 - 27.66 - tot_size = (uint64_t) st.st_size; 27.67 - printf("Filling VDI %Ld with %Ld bytes.\n", id, tot_size); 27.68 - 27.69 - printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE); 27.70 - printf(" "); 27.71 - while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) { 27.72 - vdi_write_s(vdi, vblock, spage); 27.73 - 27.74 - vblock++; 27.75 - if ((vblock % 512) == 0) 27.76 - printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock); 27.77 - fflush(stdout); 27.78 - } 27.79 - printf("\n"); 27.80 - 27.81 - freeblock(vdi); 27.82 - 27.83 - return (0); 27.84 -}
28.1 --- a/tools/blktap/parallax/vdi_list.c Fri Jun 16 18:19:40 2006 +0100 28.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 28.3 @@ -1,47 +0,0 @@ 28.4 -/************************************************************************** 28.5 - * 28.6 - * vdi_list.c 28.7 - * 28.8 - * Print a list of VDIs on the block store. 28.9 - * 28.10 - */ 28.11 - 28.12 -#include <stdio.h> 28.13 -#include <stdlib.h> 28.14 -#include <string.h> 28.15 -#include <sys/time.h> 28.16 -#include "blockstore.h" 28.17 -#include "radix.h" 28.18 -#include "vdi.h" 28.19 - 28.20 -int main(int argc, char *argv[]) 28.21 -{ 28.22 - vdi_registry_t *reg; 28.23 - vdi_t *vdi; 28.24 - int i; 28.25 - 28.26 - __init_blockstore(); 28.27 - __init_vdi(); 28.28 - 28.29 - reg = get_vdi_registry(); 28.30 - 28.31 - if ( reg == NULL ) { 28.32 - printf("couldn't get VDI registry.\n"); 28.33 - exit(-1); 28.34 - } 28.35 - 28.36 - for (i=0; i < reg->nr_vdis; i++) { 28.37 - vdi = vdi_get(i); 28.38 - 28.39 - if ( vdi != NULL ) { 28.40 - 28.41 - printf("%10Ld %60s\n", vdi->id, vdi->name); 28.42 - freeblock(vdi); 28.43 - 28.44 - } 28.45 - } 28.46 - 28.47 - freeblock(reg); 28.48 - 28.49 - return 0; 28.50 -}
29.1 --- a/tools/blktap/parallax/vdi_snap.c Fri Jun 16 18:19:40 2006 +0100 29.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 29.3 @@ -1,43 +0,0 @@ 29.4 -/************************************************************************** 29.5 - * 29.6 - * vdi_snap.c 29.7 - * 29.8 - * Snapshot a vdi. 29.9 - * 29.10 - */ 29.11 - 29.12 -#include <stdio.h> 29.13 -#include <stdlib.h> 29.14 -#include <string.h> 29.15 -#include <sys/time.h> 29.16 -#include "blockstore.h" 29.17 -#include "radix.h" 29.18 -#include "vdi.h" 29.19 - 29.20 -int main(int argc, char *argv[]) 29.21 -{ 29.22 - vdi_t *vdi; 29.23 - uint64_t id; 29.24 - 29.25 - __init_blockstore(); 29.26 - __init_vdi(); 29.27 - 29.28 - if ( argc == 1 ) { 29.29 - printf("usage: %s <VDI id>\n", argv[0]); 29.30 - exit(-1); 29.31 - } 29.32 - 29.33 - id = (uint64_t) atoll(argv[1]); 29.34 - 29.35 - vdi = vdi_get(id); 29.36 - 29.37 - if ( vdi == NULL ) { 29.38 - printf("couldn't find the requested VDI.\n"); 29.39 - freeblock(vdi); 29.40 - exit(-1); 29.41 - } 29.42 - 29.43 - vdi_snapshot(vdi); 29.44 - 29.45 - return 0; 29.46 -}
30.1 --- a/tools/blktap/parallax/vdi_snap_delete.c Fri Jun 16 18:19:40 2006 +0100 30.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 30.3 @@ -1,48 +0,0 @@ 30.4 -/************************************************************************** 30.5 - * 30.6 - * vdi_snap_delete.c 30.7 - * 30.8 - * Delete a snapshot. 30.9 - * 30.10 - * This is not finished: right now it takes a snap n and calls 30.11 - * snap_collapse(n,n+1). 30.12 - * 30.13 - * TODO: support for non-consecutive, non-same-block snaps 30.14 - * Avoid forking probs. 30.15 - * 30.16 - */ 30.17 - 30.18 -#include <stdio.h> 30.19 -#include <stdlib.h> 30.20 -#include <string.h> 30.21 -#include <sys/time.h> 30.22 -#include "blockstore.h" 30.23 -#include "snaplog.h" 30.24 -#include "radix.h" 30.25 -#include "vdi.h" 30.26 - 30.27 -int main(int argc, char *argv[]) 30.28 -{ 30.29 - snap_id_t id, c_id; 30.30 - int ret; 30.31 - 30.32 - __init_blockstore(); 30.33 - __init_vdi(); 30.34 - 30.35 - if ( argc != 3 ) { 30.36 - printf("usage: %s <snap block> <snap idx>\n", argv[0]); 30.37 - exit(-1); 30.38 - } 30.39 - 30.40 - id.block = (uint64_t) atoll(argv[1]); 30.41 - id.index = (unsigned int) atol (argv[2]); 30.42 - 30.43 - c_id = id; 30.44 - c_id.index++; 30.45 - 30.46 - ret = snap_collapse(VDI_HEIGHT, &id, &c_id); 30.47 - 30.48 - printf("Freed %d blocks.\n", ret); 30.49 - 30.50 - return 0; 30.51 -}
31.1 --- a/tools/blktap/parallax/vdi_snap_list.c Fri Jun 16 18:19:40 2006 +0100 31.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 31.3 @@ -1,82 +0,0 @@ 31.4 -/************************************************************************** 31.5 - * 31.6 - * vdi_snap_list.c 31.7 - * 31.8 - * Print a list of snapshots for the specified vdi. 31.9 - * 31.10 - */ 31.11 - 31.12 -#include <stdio.h> 31.13 -#include <stdlib.h> 31.14 -#include <string.h> 31.15 -#include <time.h> 31.16 -#include <sys/time.h> 31.17 -#include "blockstore.h" 31.18 -#include "radix.h" 31.19 -#include "vdi.h" 31.20 - 31.21 -int main(int argc, char *argv[]) 31.22 -{ 31.23 - vdi_t *vdi; 31.24 - uint64_t id; 31.25 - int i, max_snaps = -1; 31.26 - snap_block_t *blk; 31.27 - snap_id_t sid; 31.28 - char *t; 31.29 - 31.30 - __init_blockstore(); 31.31 - __init_vdi(); 31.32 - 31.33 - if ( argc == 1 ) { 31.34 - printf("usage: %s <VDI id> [max snaps]\n", argv[0]); 31.35 - exit(-1); 31.36 - } 31.37 - 31.38 - id = (uint64_t) atoll(argv[1]); 31.39 - 31.40 - if ( argc > 2 ) { 31.41 - max_snaps = atoi(argv[2]); 31.42 - } 31.43 - 31.44 - vdi = vdi_get(id); 31.45 - 31.46 - if ( vdi == NULL ) { 31.47 - printf("couldn't find the requested VDI.\n"); 31.48 - freeblock(vdi); 31.49 - exit(-1); 31.50 - } 31.51 - 31.52 - sid = vdi->snap; 31.53 - sid.index--; 31.54 - 31.55 - //printf("%8s%4s%21s %12s %1s\n", "Block", "idx", "timestamp", 31.56 - // "radix root", "d"); 31.57 - printf("%8s%4s%37s %12s %1s\n", "Block", "idx", "timestamp", 31.58 - "radix root", "d"); 31.59 - 31.60 - while (sid.block != 0) { 31.61 - blk = snap_get_block(sid.block); 31.62 - for (i = sid.index; i >= 0; i--) { 31.63 - if ( max_snaps == 0 ) { 31.64 - freeblock(blk); 31.65 - goto done; 31.66 - } 31.67 - t = ctime(&blk->snaps[i].timestamp.tv_sec); 31.68 - t[strlen(t)-1] = '\0'; 31.69 - //printf("%8Ld%4u%14lu.%06lu %12Ld %1s\n", 31.70 - printf("%8Ld%4u%30s %06lu %12Ld %1s\n", 31.71 - sid.block, i, 31.72 - //blk->snaps[i].timestamp.tv_sec, 31.73 - t, 31.74 - blk->snaps[i].timestamp.tv_usec, 31.75 - blk->snaps[i].radix_root, 31.76 - blk->snaps[i].deleted ? "*" : " "); 31.77 - if ( max_snaps != -1 ) 31.78 - max_snaps--; 31.79 - } 31.80 - sid = blk->hdr.parent_block; 31.81 - freeblock(blk); 31.82 - } 31.83 -done: 31.84 - return 0; 31.85 -}
32.1 --- a/tools/blktap/parallax/vdi_tree.c Fri Jun 16 18:19:40 2006 +0100 32.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 32.3 @@ -1,132 +0,0 @@ 32.4 -/************************************************************************** 32.5 - * 32.6 - * vdi_tree.c 32.7 - * 32.8 - * Output current vdi tree to dot and postscript. 32.9 - * 32.10 - */ 32.11 - 32.12 -#include <stdio.h> 32.13 -#include <stdlib.h> 32.14 -#include <string.h> 32.15 -#include <sys/time.h> 32.16 -#include "blockstore.h" 32.17 -#include "radix.h" 32.18 -#include "vdi.h" 32.19 - 32.20 -#define GRAPH_DOT_FILE "vdi.dot" 32.21 -#define GRAPH_PS_FILE "vdi.ps" 32.22 - 32.23 -typedef struct sh_st { 32.24 - snap_id_t id; 32.25 - struct sh_st *next; 32.26 -} sh_t; 32.27 - 32.28 -#define SNAP_HASHSZ 1024 32.29 -sh_t *node_hash[SNAP_HASHSZ]; 32.30 -#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ) 32.31 - 32.32 -#define SNAPID_EQUAL(_a,_b) \ 32.33 - (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index)) 32.34 -int sh_check_and_add(snap_id_t *id) 32.35 -{ 32.36 - sh_t **s = &node_hash[SNAP_HASH(id)]; 32.37 - 32.38 - while (*s != NULL) { 32.39 - if (SNAPID_EQUAL(&((*s)->id), id)) 32.40 - return 1; 32.41 - *s = (*s)->next; 32.42 - } 32.43 - 32.44 - *s = (sh_t *)malloc(sizeof(sh_t)); 32.45 - (*s)->id = *id; 32.46 - (*s)->next = NULL; 32.47 - 32.48 - return 0; 32.49 -} 32.50 - 32.51 -int main(int argc, char *argv[]) 32.52 -{ 32.53 - FILE *f; 32.54 - char dot_file[255] = GRAPH_DOT_FILE; 32.55 - char ps_file[255] = GRAPH_PS_FILE; 32.56 - int nr_vdis = 0, nr_forks = 0; 32.57 - vdi_registry_t *reg; 32.58 - vdi_t *vdi; 32.59 - int i; 32.60 - 32.61 - __init_blockstore(); 32.62 - __init_vdi(); 32.63 - 32.64 - reg = get_vdi_registry(); 32.65 - 32.66 - if ( reg == NULL ) { 32.67 - printf("couldn't get VDI registry.\n"); 32.68 - exit(-1); 32.69 - } 32.70 - 32.71 - if ( argc > 1 ) { 32.72 - strncpy(ps_file, argv[1], 255); 32.73 - ps_file[255] = '\0'; 32.74 - } 32.75 - 32.76 - /* now dump it out to a dot file. */ 32.77 - printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis); 32.78 - 32.79 - f = fopen(dot_file, "w"); 32.80 - 32.81 - /* write graph preamble */ 32.82 - fprintf(f, "digraph G {\n"); 32.83 - fprintf(f, " rankdir=LR\n"); 32.84 - 32.85 - for (i=0; i<reg->nr_vdis; i++) { 32.86 - char oldnode[255]; 32.87 - snap_block_t *blk; 32.88 - snap_id_t id; 32.89 - int nr_snaps, done=0; 32.90 - 32.91 - vdi = vdi_get(i); 32.92 - id = vdi->snap; 32.93 - /* add a node for the id */ 32.94 -printf("vdi: %d\n", i); 32.95 - fprintf(f, " n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 32.96 - id.block, id.index, vdi->name, 32.97 - id.block, id.index); 32.98 - sprintf(oldnode, "n%Ld%d", id.block, id.index); 32.99 - 32.100 - while (id.block != 0) { 32.101 - blk = snap_get_block(id.block); 32.102 - nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index); 32.103 - id = blk->hdr.fork_block; 32.104 - 32.105 - done = sh_check_and_add(&id); 32.106 - 32.107 - /* add a node for the fork_id */ 32.108 - if (!done) { 32.109 - fprintf(f, " n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 32.110 - id.block, id.index, 32.111 - id.block, id.index); 32.112 - } 32.113 - 32.114 - /* add an edge between them */ 32.115 - fprintf(f, " n%Ld%d -> %s [label=\"%u snapshots\"]\n", 32.116 - id.block, id.index, oldnode, nr_snaps); 32.117 - sprintf(oldnode, "n%Ld%d", id.block, id.index); 32.118 - freeblock(blk); 32.119 - 32.120 - if (done) break; 32.121 - } 32.122 - } 32.123 - 32.124 - /* write graph postamble */ 32.125 - fprintf(f, "}\n"); 32.126 - fclose(f); 32.127 - 32.128 - printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE); 32.129 - { 32.130 - char cmd[255]; 32.131 - sprintf(cmd, "dot %s -Tps -o %s", dot_file, ps_file); 32.132 - system(cmd); 32.133 - } 32.134 - return 0; 32.135 -}
33.1 --- a/tools/blktap/parallax/vdi_unittest.c Fri Jun 16 18:19:40 2006 +0100 33.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 33.3 @@ -1,184 +0,0 @@ 33.4 -/************************************************************************** 33.5 - * 33.6 - * vdi_unittest.c 33.7 - * 33.8 - * Run a small test workload to ensure that data access through a vdi 33.9 - * is (at least superficially) correct. 33.10 - * 33.11 - */ 33.12 - 33.13 -#include <stdio.h> 33.14 -#include <stdlib.h> 33.15 -#include <string.h> 33.16 -#include <sys/types.h> 33.17 -#include <sys/stat.h> 33.18 -#include <fcntl.h> 33.19 -#include <unistd.h> 33.20 -#include "requests-async.h" 33.21 -#include "blockstore.h" 33.22 -#include "radix.h" 33.23 -#include "vdi.h" 33.24 - 33.25 -#define TEST_PAGES 32 33.26 -static char *zero_page; 33.27 -static char pages[TEST_PAGES][BLOCK_SIZE]; 33.28 -static int next_page = 0; 33.29 - 33.30 -void fill_test_pages(void) 33.31 -{ 33.32 - int i, j; 33.33 - long *page; 33.34 - 33.35 - for (i=0; i< TEST_PAGES; i++) { 33.36 - page = (unsigned long *)pages[i]; 33.37 - for (j=0; j<(BLOCK_SIZE/4); j++) { 33.38 - page[j] = random(); 33.39 - } 33.40 - } 33.41 - 33.42 - zero_page = newblock(); 33.43 -} 33.44 - 33.45 -inline uint64_t make_vaddr(uint64_t L1, uint64_t L2, uint64_t L3) 33.46 -{ 33.47 - uint64_t ret = L1; 33.48 - 33.49 - ret = (ret << 9) | L2; 33.50 - ret = (ret << 9) | L3; 33.51 - 33.52 - return ret; 33.53 -} 33.54 - 33.55 -void touch_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3) 33.56 -{ 33.57 - uint64_t vaddr; 33.58 - char *page = pages[next_page++]; 33.59 - char *rpage = NULL; 33.60 - 33.61 - printf("TOUCH (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3); 33.62 - 33.63 - vaddr = make_vaddr(L1, L2, L3); 33.64 - vdi_write_s(vdi, vaddr, page); 33.65 - rpage = vdi_read_s(vdi, vaddr); 33.66 - 33.67 - if (rpage == NULL) 33.68 - { 33.69 - printf( "read %Lu returned NULL\n", vaddr); 33.70 - return; 33.71 - } 33.72 - 33.73 - if (memcmp(page, rpage, BLOCK_SIZE) != 0) 33.74 - { 33.75 - printf( "read %Lu returned a different page\n", vaddr); 33.76 - return; 33.77 - } 33.78 - 33.79 - freeblock(rpage); 33.80 -} 33.81 - 33.82 -void test_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3, char *page) 33.83 -{ 33.84 - uint64_t vaddr; 33.85 - char *rpage = NULL; 33.86 - 33.87 - printf("TEST (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3); 33.88 - 33.89 - vaddr = make_vaddr(L1, L2, L3); 33.90 - rpage = vdi_read_s(vdi, vaddr); 33.91 - 33.92 - if (rpage == NULL) 33.93 - { 33.94 - printf( "read %Lu returned NULL\n", vaddr); 33.95 - return; 33.96 - } 33.97 - 33.98 - if (memcmp(page, rpage, BLOCK_SIZE) != 0) 33.99 - { 33.100 - printf( "read %Lu returned a different page\n", vaddr); 33.101 - return; 33.102 - } 33.103 - 33.104 - freeblock(rpage); 33.105 -} 33.106 - 33.107 -void coverage_test(vdi_t *vdi) 33.108 -{ 33.109 - uint64_t vaddr; 33.110 - int i, j, k; 33.111 - 33.112 - /* Do a series of writes and reads to test all paths through the 33.113 - * async radix code. The radix request code will dump CRC warnings 33.114 - * if there are data problems here as well. 33.115 - */ 33.116 - 33.117 - /* L1 Zero */ 33.118 - touch_block(vdi, 0, 0, 0); 33.119 - 33.120 - /* L2 Zero */ 33.121 - i = next_page; 33.122 - touch_block(vdi, 0, 1, 0); 33.123 - 33.124 - /* L3 Zero */ 33.125 - j = next_page; 33.126 - touch_block(vdi, 0, 0, 1); 33.127 - k = next_page; 33.128 - touch_block(vdi, 0, 1, 1); 33.129 - 33.130 - /* Direct write */ 33.131 - touch_block(vdi, 0, 0, 0); 33.132 - 33.133 - vdi_snapshot(vdi); 33.134 - 33.135 - /* L1 fault */ 33.136 - touch_block(vdi, 0, 0, 0); 33.137 - /* test the read-only branches that should have been copied over. */ 33.138 - test_block(vdi, 0, 1, 0, pages[i]); 33.139 - test_block(vdi, 0, 0, 1, pages[j]); 33.140 - 33.141 - /* L2 fault */ 33.142 - touch_block(vdi, 0, 1, 0); 33.143 - test_block(vdi, 0, 1, 1, pages[k]); 33.144 - 33.145 - /* L3 fault */ 33.146 - touch_block(vdi, 0, 0, 1); 33.147 - 33.148 - /* read - L1 zero */ 33.149 - test_block(vdi, 1, 0, 0, zero_page); 33.150 - 33.151 - /* read - L2 zero */ 33.152 - test_block(vdi, 0, 2, 0, zero_page); 33.153 - 33.154 - /* read - L3 zero */ 33.155 - test_block(vdi, 0, 0, 2, zero_page); 33.156 -} 33.157 - 33.158 -int main(int argc, char *argv[]) 33.159 -{ 33.160 - vdi_t *vdi; 33.161 - uint64_t id; 33.162 - int fd; 33.163 - struct stat st; 33.164 - uint64_t tot_size; 33.165 - char spage[BLOCK_SIZE]; 33.166 - char *dpage; 33.167 - uint64_t vblock = 0, count=0; 33.168 - 33.169 - __init_blockstore(); 33.170 - init_block_async(); 33.171 - __init_vdi(); 33.172 - 33.173 - vdi = vdi_create( NULL, "UNIT TEST VDI"); 33.174 - 33.175 - if ( vdi == NULL ) { 33.176 - printf("Failed to create VDI!\n"); 33.177 - freeblock(vdi); 33.178 - exit(-1); 33.179 - } 33.180 - 33.181 - fill_test_pages(); 33.182 - coverage_test(vdi); 33.183 - 33.184 - freeblock(vdi); 33.185 - 33.186 - return (0); 33.187 -}
34.1 --- a/tools/blktap/parallax/vdi_validate.c Fri Jun 16 18:19:40 2006 +0100 34.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 34.3 @@ -1,97 +0,0 @@ 34.4 -/************************************************************************** 34.5 - * 34.6 - * vdi_validate.c 34.7 - * 34.8 - * Intended to sanity-check vm_fill and the underlying vdi code. 34.9 - * 34.10 - * Block-by-block compare of a vdi with a file/device on the disk. 34.11 - * 34.12 - */ 34.13 - 34.14 -#include <stdio.h> 34.15 -#include <stdlib.h> 34.16 -#include <string.h> 34.17 -#include <sys/types.h> 34.18 -#include <sys/stat.h> 34.19 -#include <fcntl.h> 34.20 -#include <unistd.h> 34.21 -#include "blockstore.h" 34.22 -#include "radix.h" 34.23 -#include "vdi.h" 34.24 -#include "requests-async.h" 34.25 - 34.26 -int main(int argc, char *argv[]) 34.27 -{ 34.28 - vdi_t *vdi; 34.29 - uint64_t id; 34.30 - int fd; 34.31 - struct stat st; 34.32 - uint64_t tot_size; 34.33 - char spage[BLOCK_SIZE], *dpage; 34.34 - char *vpage; 34.35 - uint64_t vblock = 0, count=0; 34.36 - 34.37 - __init_blockstore(); 34.38 - init_block_async(); 34.39 - __init_vdi(); 34.40 - 34.41 - if ( argc < 3 ) { 34.42 - printf("usage: %s <VDI id> <filename>\n", argv[0]); 34.43 - exit(-1); 34.44 - } 34.45 - 34.46 - id = (uint64_t) atoll(argv[1]); 34.47 - 34.48 - vdi = vdi_get( id ); 34.49 - 34.50 - if ( vdi == NULL ) { 34.51 - printf("Failed to retreive VDI %Ld!\n", id); 34.52 - exit(-1); 34.53 - } 34.54 - 34.55 - fd = open(argv[2], O_RDONLY | O_LARGEFILE); 34.56 - 34.57 - if (fd < 0) { 34.58 - printf("Couldn't open %s!\n", argv[2]); 34.59 - exit(-1); 34.60 - } 34.61 - 34.62 - if ( fstat(fd, &st) != 0 ) { 34.63 - printf("Couldn't stat %s!\n", argv[2]); 34.64 - exit(-1); 34.65 - } 34.66 - 34.67 - tot_size = (uint64_t) st.st_size; 34.68 - printf("Testing VDI %Ld (%Ld bytes).\n", id, tot_size); 34.69 - 34.70 - printf(" "); 34.71 - while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) { 34.72 - 34.73 - dpage = vdi_read_s(vdi, vblock); 34.74 - 34.75 - if (dpage == NULL) { 34.76 - printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock); 34.77 - exit(0); 34.78 - } 34.79 - 34.80 - if (memcmp(spage, dpage, BLOCK_SIZE) != 0) { 34.81 - printf("\n\nblocks don't match! (%Ld)\n", vblock); 34.82 - exit(0); 34.83 - } 34.84 - 34.85 - freeblock(dpage); 34.86 - 34.87 - vblock++; 34.88 - if ((vblock % 1024) == 0) { 34.89 - printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock); 34.90 - fflush(stdout); 34.91 - } 34.92 - } 34.93 - printf("\n"); 34.94 - 34.95 - printf("VDI %Ld looks good!\n", id); 34.96 - 34.97 - freeblock(vdi); 34.98 - 34.99 - return (0); 34.100 -}
35.1 --- a/tools/blktap/ublkback/Makefile Fri Jun 16 18:19:40 2006 +0100 35.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 35.3 @@ -1,41 +0,0 @@ 35.4 - 35.5 -XEN_ROOT = ../../.. 35.6 -include $(XEN_ROOT)/tools/Rules.mk 35.7 - 35.8 -INCLUDES += -I.. 35.9 - 35.10 -INSTALL = install 35.11 -INSTALL_PROG = $(INSTALL) -m0755 35.12 -IBIN = ublkback 35.13 -INSTALL_DIR = /usr/sbin 35.14 - 35.15 -CFLAGS += -Werror 35.16 -CFLAGS += -Wno-unused 35.17 -CFLAGS += -fno-strict-aliasing 35.18 -CFLAGS += -I $(XEN_LIBXC) 35.19 -CFLAGS += $(INCLUDES) -I. 35.20 -CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE 35.21 -# Get gcc to generate the dependencies for us. 35.22 -CFLAGS += -Wp,-MD,.$(@F).d 35.23 -DEPS = .*.d 35.24 - 35.25 -OBJS = $(patsubst %.c,%.o,$(SRCS)) 35.26 - 35.27 -.PHONY: all 35.28 -all: $(IBIN) 35.29 - 35.30 -LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse) 35.31 - 35.32 -.PHONY: install 35.33 -install: 35.34 - $(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR) 35.35 - 35.36 -.PHONY: clean 35.37 -clean: 35.38 - rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN) 35.39 - 35.40 -ublkback: 35.41 - $(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L.. \ 35.42 - -lblktap -laio ublkback.c ublkbacklib.c -pg 35.43 - 35.44 --include $(DEPS)
36.1 --- a/tools/blktap/ublkback/ublkback.c Fri Jun 16 18:19:40 2006 +0100 36.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 36.3 @@ -1,18 +0,0 @@ 36.4 -/* ublkback.c 36.5 - * 36.6 - * libaio-based userlevel backend. 36.7 - */ 36.8 - 36.9 -#include "blktaplib.h" 36.10 -#include "ublkbacklib.h" 36.11 - 36.12 - 36.13 -int main(int argc, char *argv[]) 36.14 -{ 36.15 - ublkback_init(); 36.16 - 36.17 - register_new_blkif_hook(ublkback_new_blkif); 36.18 - blktap_listen(); 36.19 - 36.20 - return 0; 36.21 -}
37.1 --- a/tools/blktap/ublkback/ublkbacklib.c Fri Jun 16 18:19:40 2006 +0100 37.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 37.3 @@ -1,473 +0,0 @@ 37.4 -/* ublkbacklib.c 37.5 - * 37.6 - * file/device image-backed block device -- using linux libaio. 37.7 - * 37.8 - * (c) 2004 Andrew Warfield. 37.9 - * 37.10 - * Xend has been modified to use an amorfs:[fsid] disk tag. 37.11 - * This will show up as device type (maj:240,min:0) = 61440. 37.12 - * 37.13 - * The fsid is placed in the sec_start field of the disk extent. 37.14 - * 37.15 - * NOTE: This doesn't work. Grrr. 37.16 - */ 37.17 - 37.18 -#define _GNU_SOURCE 37.19 -#define __USE_LARGEFILE64 37.20 - 37.21 -#include <stdio.h> 37.22 -#include <stdlib.h> 37.23 -#include <fcntl.h> 37.24 -#include <string.h> 37.25 -#include <db.h> 37.26 -#include <sys/stat.h> 37.27 -#include <sys/types.h> 37.28 -#include <sys/poll.h> 37.29 -#include <unistd.h> 37.30 -#include <errno.h> 37.31 -#include <libaio.h> 37.32 -#include <pthread.h> 37.33 -#include <time.h> 37.34 -#include <err.h> 37.35 -#include "blktaplib.h" 37.36 - 37.37 -/* XXXX: */ 37.38 -/* Current code just mounts this file/device to any requests that come in. */ 37.39 -//#define TMP_IMAGE_FILE_NAME "/dev/sda1" 37.40 -#define TMP_IMAGE_FILE_NAME "fc3.image" 37.41 - 37.42 -#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ 37.43 -#define MAX_SEGMENTS_PER_REQ 11 37.44 -#define SECTOR_SHIFT 9 37.45 -#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) 37.46 - 37.47 -#if 0 37.48 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 37.49 -#else 37.50 -#define DPRINTF(_f, _a...) ((void)0) 37.51 -#endif 37.52 - 37.53 -#if 1 37.54 -#define ASSERT(_p) \ 37.55 - if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \ 37.56 - __LINE__, __FILE__); *(int*)0=0; } 37.57 -#else 37.58 -#define ASSERT(_p) ((void)0) 37.59 -#endif 37.60 - 37.61 -/* Note on pending_reqs: I assume all reqs are queued before they start to 37.62 - * get filled. so count of 0 is an unused record. 37.63 - */ 37.64 -typedef struct { 37.65 - blkif_request_t req; 37.66 - blkif_t *blkif; 37.67 - int count; 37.68 -} pending_req_t; 37.69 - 37.70 -static pending_req_t pending_list[MAX_REQUESTS]; 37.71 -static io_context_t ctx; 37.72 -static struct iocb *iocb_free[MAX_AIO_REQS]; 37.73 -static int iocb_free_count; 37.74 - 37.75 -/* ---[ Notification mecahnism ]--------------------------------------- */ 37.76 - 37.77 -enum { 37.78 - READ = 0, 37.79 - WRITE = 1 37.80 -}; 37.81 - 37.82 -static int aio_notify[2]; 37.83 -static volatile int aio_listening = 0; 37.84 -static pthread_mutex_t notifier_sem = PTHREAD_MUTEX_INITIALIZER; 37.85 - 37.86 -static struct io_event aio_events[MAX_AIO_REQS]; 37.87 -static int aio_event_count = 0; 37.88 - 37.89 -/* this is commented out in libaio.h for some reason. */ 37.90 -extern int io_queue_wait(io_context_t ctx, struct timespec *timeout); 37.91 - 37.92 -static void *notifier_thread(void *arg) 37.93 -{ 37.94 - int ret; 37.95 - int msg = 0x00feeb00; 37.96 - 37.97 - DPRINTF("Notifier thread started.\n"); 37.98 - for (;;) { 37.99 - pthread_mutex_lock(¬ifier_sem); 37.100 - if ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0) { 37.101 - aio_event_count = ret; 37.102 - write(aio_notify[WRITE], &msg, sizeof(msg)); 37.103 - } else { 37.104 - printf("[io_queue_wait error! %d]\n", errno); 37.105 - pthread_mutex_unlock(¬ifier_sem); 37.106 - } 37.107 - } 37.108 -} 37.109 - 37.110 -/* --- Talking to xenstore: ------------------------------------------- */ 37.111 - 37.112 -int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done); 37.113 -int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done); 37.114 - 37.115 -typedef struct image { 37.116 - /* These need to turn into an array/rbtree for multi-disk support. */ 37.117 - int fd; 37.118 - uint64_t fsid; 37.119 - blkif_vdev_t vdevice; 37.120 - long int size; 37.121 - long int secsize; 37.122 - long int info; 37.123 -} image_t; 37.124 - 37.125 -long int ublkback_get_size(blkif_t *blkif) 37.126 -{ 37.127 - image_t *img = (image_t *)blkif->prv; 37.128 - return img->size; 37.129 -} 37.130 - 37.131 -long int ublkback_get_secsize(blkif_t *blkif) 37.132 -{ 37.133 - image_t *img = (image_t *)blkif->prv; 37.134 - return img->secsize; 37.135 -} 37.136 - 37.137 -unsigned ublkback_get_info(blkif_t *blkif) 37.138 -{ 37.139 - image_t *img = (image_t *)blkif->prv; 37.140 - return img->info; 37.141 -} 37.142 - 37.143 -static struct blkif_ops ublkback_ops = { 37.144 - get_size: ublkback_get_size, 37.145 - get_secsize: ublkback_get_secsize, 37.146 - get_info: ublkback_get_info, 37.147 -}; 37.148 - 37.149 -int ublkback_new_blkif(blkif_t *blkif) 37.150 -{ 37.151 - image_t *image; 37.152 - struct stat stat; 37.153 - int ret; 37.154 - 37.155 - image = (image_t *)malloc(sizeof(image_t)); 37.156 - if (image == NULL) { 37.157 - printf("error allocating image record.\n"); 37.158 - return -ENOMEM; 37.159 - } 37.160 - 37.161 - /* Open it. */ 37.162 - image->fd = open(TMP_IMAGE_FILE_NAME, 37.163 - O_RDWR | O_DIRECT | O_LARGEFILE); 37.164 - 37.165 - if ((image->fd < 0) && (errno == EINVAL)) { 37.166 - /* Maybe O_DIRECT isn't supported. */ 37.167 - warn("open() failed on '%s', trying again without O_DIRECT", 37.168 - TMP_IMAGE_FILE_NAME); 37.169 - image->fd = open(TMP_IMAGE_FILE_NAME, O_RDWR | O_LARGEFILE); 37.170 - } 37.171 - 37.172 - if (image->fd < 0) { 37.173 - warn("Couldn't open image file!"); 37.174 - free(image); 37.175 - return -EINVAL; 37.176 - } 37.177 - 37.178 - /* Size it. */ 37.179 - ret = fstat(image->fd, &stat); 37.180 - if (ret != 0) { 37.181 - printf("Couldn't stat image in PROBE!"); 37.182 - return -EINVAL; 37.183 - } 37.184 - 37.185 - image->size = (stat.st_size >> SECTOR_SHIFT); 37.186 - 37.187 - /* TODO: IOCTL to get size of raw device. */ 37.188 -/* 37.189 - ret = ioctl(img->fd, BLKGETSIZE, &blksize); 37.190 - if (ret != 0) { 37.191 - printf("Couldn't ioctl image in PROBE!\n"); 37.192 - goto err; 37.193 - } 37.194 -*/ 37.195 - if (image->size == 0) 37.196 - image->size =((uint64_t) 16836057); 37.197 - image->secsize = 512; 37.198 - image->info = 0; 37.199 - 37.200 - /* Register the hooks */ 37.201 - blkif_register_request_hook(blkif, "Ublkback req.", ublkback_request); 37.202 - blkif_register_response_hook(blkif, "Ublkback resp.", ublkback_response); 37.203 - 37.204 - 37.205 - printf(">X<Created a new blkif! pdev was %ld, but you got %s\n", 37.206 - blkif->pdev, TMP_IMAGE_FILE_NAME); 37.207 - 37.208 - blkif->ops = &ublkback_ops; 37.209 - blkif->prv = (void *)image; 37.210 - 37.211 - return 0; 37.212 -} 37.213 - 37.214 - 37.215 -/* --- Moving the bits: ----------------------------------------------- */ 37.216 - 37.217 -static int batch_count = 0; 37.218 -int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done) 37.219 -{ 37.220 - int fd; 37.221 - uint64_t sector; 37.222 - char *spage, *dpage; 37.223 - int ret, i, idx; 37.224 - blkif_response_t *rsp; 37.225 - domid_t dom = ID_TO_DOM(req->id); 37.226 - static struct iocb *ioq[MAX_SEGMENTS_PER_REQ*MAX_REQUESTS]; 37.227 - static int io_idx = 0; 37.228 - struct iocb *io; 37.229 - image_t *img; 37.230 - 37.231 - img = (image_t *)blkif->prv; 37.232 - fd = img->fd; 37.233 - 37.234 - switch (req->operation) 37.235 - { 37.236 - case BLKIF_OP_WRITE: 37.237 - { 37.238 - unsigned long size; 37.239 - 37.240 - batch_count++; 37.241 - 37.242 - idx = ID_TO_IDX(req->id); 37.243 - ASSERT(pending_list[idx].count == 0); 37.244 - memcpy(&pending_list[idx].req, req, sizeof(*req)); 37.245 - pending_list[idx].count = req->nr_segments; 37.246 - pending_list[idx].blkif = blkif; 37.247 - 37.248 - for (i = 0; i < req->nr_segments; i++) { 37.249 - 37.250 - sector = req->sector_number + (8*i); 37.251 - 37.252 - size = req->seg[i].last_sect - req->seg[i].first_sect + 1; 37.253 - 37.254 - if (req->seg[i].first_sect != 0) 37.255 - DPRINTF("iWR: sec_nr: %10llu sec: %10llu (%1lu,%1lu) " 37.256 - "pos: %15lu\n", 37.257 - req->sector_number, sector, 37.258 - req->seg[i].first_sect, req->seg[i].last_sect, 37.259 - (long)(sector << SECTOR_SHIFT)); 37.260 - 37.261 - spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 37.262 - spage += req->seg[i].first_sect << SECTOR_SHIFT; 37.263 - 37.264 - /*convert size and sector to byte offsets */ 37.265 - size <<= SECTOR_SHIFT; 37.266 - sector <<= SECTOR_SHIFT; 37.267 - 37.268 - io = iocb_free[--iocb_free_count]; 37.269 - io_prep_pwrite(io, fd, spage, size, sector); 37.270 - io->data = (void *)idx; 37.271 - //ioq[i] = io; 37.272 - ioq[io_idx++] = io; 37.273 - } 37.274 - 37.275 - if (batch_done) { 37.276 - ret = io_submit(ctx, io_idx, ioq); 37.277 - batch_count = 0; 37.278 - if (ret < 0) 37.279 - printf("BADNESS: io_submit error! (%d)\n", errno); 37.280 - io_idx = 0; 37.281 - } 37.282 - 37.283 - return BLKTAP_STOLEN; 37.284 - 37.285 - } 37.286 - case BLKIF_OP_READ: 37.287 - { 37.288 - unsigned long size; 37.289 - 37.290 - batch_count++; 37.291 - idx = ID_TO_IDX(req->id); 37.292 - ASSERT(pending_list[idx].count == 0); 37.293 - memcpy(&pending_list[idx].req, req, sizeof(*req)); 37.294 - pending_list[idx].count = req->nr_segments; 37.295 - pending_list[idx].blkif = blkif; 37.296 - 37.297 - for (i = 0; i < req->nr_segments; i++) { 37.298 - 37.299 - sector = req->sector_number + (8*i); 37.300 - 37.301 - size = req->seg[i].last_sect - req->seg[i].first_sect + 1; 37.302 - 37.303 - dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); 37.304 - dpage += req->seg[i].first_sect << SECTOR_SHIFT; 37.305 - 37.306 - if (req->seg[i].first_sect != 0) 37.307 - DPRINTF("iRD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) " 37.308 - "pos: %15lu dpage: %p\n", 37.309 - req->sector_number, sector, 37.310 - req->seg[i].first_sect, req->seg[i].last_sect, 37.311 - (long)(sector << SECTOR_SHIFT), dpage); 37.312 - 37.313 - /*convert size and sector to byte offsets */ 37.314 - size <<= SECTOR_SHIFT; 37.315 - sector <<= SECTOR_SHIFT; 37.316 - 37.317 - 37.318 - /* 37.319 - * NB: Looks like AIO now has non-page aligned support, this path 37.320 - * can probably be removed... Only really used for hunting 37.321 - * superblocks anyway... ;) 37.322 - */ 37.323 - if ( ((unsigned long)dpage % PAGE_SIZE) != 0 ) { 37.324 - /* AIO to raw devices must be page aligned, so do this read 37.325 - * synchronously. The OS is probably just looking for 37.326 - * a superblock or something, so this won't hurt performance. 37.327 - */ 37.328 - int ret; 37.329 - 37.330 - printf("Slow path block read.\n"); 37.331 - /* Question: do in-progress aio ops modify the file cursor? */ 37.332 - ret = lseek(fd, sector, SEEK_SET); 37.333 - if (ret == (off_t)-1) 37.334 - printf("lseek failed!\n"); 37.335 - ret = read(fd, dpage, size); 37.336 - if (ret < 0) 37.337 - printf("read problem (%d)\n", ret); 37.338 - printf("|\n|\n| read: %lld, %lu, %d\n|\n|\n", sector, size, ret); 37.339 - 37.340 - /* not an async request any more... */ 37.341 - pending_list[idx].count--; 37.342 - 37.343 - rsp = (blkif_response_t *)req; 37.344 - rsp->id = req->id; 37.345 - rsp->operation = BLKIF_OP_READ; 37.346 - rsp->status = BLKIF_RSP_OKAY; 37.347 - return BLKTAP_RESPOND; 37.348 - /* Doh -- need to flush aio if this is end-of-batch */ 37.349 - } 37.350 - 37.351 - io = iocb_free[--iocb_free_count]; 37.352 - 37.353 - io_prep_pread(io, fd, dpage, size, sector); 37.354 - io->data = (void *)idx; 37.355 - 37.356 - ioq[io_idx++] = io; 37.357 - //ioq[i] = io; 37.358 - } 37.359 - 37.360 - if (batch_done) { 37.361 - ret = io_submit(ctx, io_idx, ioq); 37.362 - batch_count = 0; 37.363 - if (ret < 0) 37.364 - printf("BADNESS: io_submit error! (%d)\n", errno); 37.365 - io_idx = 0; 37.366 - } 37.367 - 37.368 - return BLKTAP_STOLEN; 37.369 - 37.370 - } 37.371 - } 37.372 - 37.373 - printf("Unknown block operation!\n"); 37.374 -err: 37.375 - rsp = (blkif_response_t *)req; 37.376 - rsp->id = req->id; 37.377 - rsp->operation = req->operation; 37.378 - rsp->status = BLKIF_RSP_ERROR; 37.379 - return BLKTAP_RESPOND; 37.380 -} 37.381 - 37.382 - 37.383 -int ublkback_pollhook(int fd) 37.384 -{ 37.385 - struct io_event *ep; 37.386 - int n, ret, idx; 37.387 - blkif_request_t *req; 37.388 - blkif_response_t *rsp; 37.389 - int responses_queued = 0; 37.390 - int pages=0; 37.391 - 37.392 - for (ep = aio_events; aio_event_count-- > 0; ep++) { 37.393 - struct iocb *io = ep->obj; 37.394 - idx = (int) ep->data; 37.395 - 37.396 - if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){ 37.397 - printf("invalid index returned(%u)!\n", idx); 37.398 - break; 37.399 - } 37.400 - 37.401 - if ((int)ep->res < 0) 37.402 - printf("***\n***aio request error! (%d,%d)\n***\n", 37.403 - (int)ep->res, (int)ep->res2); 37.404 - 37.405 - pending_list[idx].count--; 37.406 - iocb_free[iocb_free_count++] = io; 37.407 - pages++; 37.408 - 37.409 - if (pending_list[idx].count == 0) { 37.410 - blkif_request_t tmp = pending_list[idx].req; 37.411 - rsp = (blkif_response_t *)&pending_list[idx].req; 37.412 - rsp->id = tmp.id; 37.413 - rsp->operation = tmp.operation; 37.414 - rsp->status = BLKIF_RSP_OKAY; 37.415 - blkif_inject_response(pending_list[idx].blkif, rsp); 37.416 - responses_queued++; 37.417 - } 37.418 - } 37.419 - 37.420 - if (responses_queued) { 37.421 - blktap_kick_responses(); 37.422 - } 37.423 - 37.424 - read(aio_notify[READ], &idx, sizeof(idx)); 37.425 - aio_listening = 1; 37.426 - pthread_mutex_unlock(¬ifier_sem); 37.427 - 37.428 - return 0; 37.429 -} 37.430 - 37.431 -/* the image library terminates the request stream. _resp is a noop. */ 37.432 -int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done) 37.433 -{ 37.434 - return BLKTAP_PASS; 37.435 -} 37.436 - 37.437 -void ublkback_init(void) 37.438 -{ 37.439 - int i, rc; 37.440 - pthread_t p; 37.441 - 37.442 - for (i = 0; i < MAX_REQUESTS; i++) 37.443 - pending_list[i].count = 0; 37.444 - 37.445 - memset(&ctx, 0, sizeof(ctx)); 37.446 - rc = io_queue_init(MAX_AIO_REQS, &ctx); 37.447 - if (rc != 0) { 37.448 - printf("queue_init failed! (%d)\n", rc); 37.449 - exit(0); 37.450 - } 37.451 - 37.452 - for (i=0; i<MAX_AIO_REQS; i++) { 37.453 - if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) { 37.454 - printf("error allocating iocb array\n"); 37.455 - exit(0); 37.456 - } 37.457 - iocb_free_count = i; 37.458 - } 37.459 - 37.460 - rc = pipe(aio_notify); 37.461 - if (rc != 0) { 37.462 - printf("pipe failed! (%d)\n", errno); 37.463 - exit(0); 37.464 - } 37.465 - 37.466 - rc = pthread_create(&p, NULL, notifier_thread, NULL); 37.467 - if (rc != 0) { 37.468 - printf("pthread_create failed! (%d)\n", errno); 37.469 - exit(0); 37.470 - } 37.471 - 37.472 - aio_listening = 1; 37.473 - 37.474 - blktap_attach_poll(aio_notify[READ], POLLIN, ublkback_pollhook); 37.475 -} 37.476 -
38.1 --- a/tools/blktap/ublkback/ublkbacklib.h Fri Jun 16 18:19:40 2006 +0100 38.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 38.3 @@ -1,16 +0,0 @@ 38.4 -/* blkaiolib.h 38.5 - * 38.6 - * aio image-backed block device. 38.7 - * 38.8 - * (c) 2004 Andrew Warfield. 38.9 - * 38.10 - * Xend has been modified to use an amorfs:[fsid] disk tag. 38.11 - * This will show up as device type (maj:240,min:0) = 61440. 38.12 - * 38.13 - * The fsid is placed in the sec_start field of the disk extent. 38.14 - */ 38.15 - 38.16 -int ublkback_request(blkif_request_t *req, int batch_done); 38.17 -int ublkback_response(blkif_response_t *rsp); /* noop */ 38.18 -int ublkback_new_blkif(blkif_t *blkif); 38.19 -void ublkback_init(void);
39.1 --- a/tools/blktap/xenbus.c Fri Jun 16 18:19:40 2006 +0100 39.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 39.3 @@ -1,568 +0,0 @@ 39.4 -/* 39.5 - * xenbus.c 39.6 - * 39.7 - * xenbus interface to the blocktap. 39.8 - * 39.9 - * this handles the top-half of integration with block devices through the 39.10 - * store -- the tap driver negotiates the device channel etc, while the 39.11 - * userland tap clinet needs to sort out the disk parameters etc. 39.12 - * 39.13 - * A. Warfield 2005 Based primarily on the blkback and xenbus driver code. 39.14 - * Comments there apply here... 39.15 - */ 39.16 - 39.17 -#include <stdio.h> 39.18 -#include <stdlib.h> 39.19 -#include <string.h> 39.20 -#include <err.h> 39.21 -#include <stdarg.h> 39.22 -#include <errno.h> 39.23 -#include <xs.h> 39.24 -#include <sys/types.h> 39.25 -#include <sys/stat.h> 39.26 -#include <fcntl.h> 39.27 -#include <poll.h> 39.28 -#include "blktaplib.h" 39.29 -#include "list.h" 39.30 - 39.31 -#if 0 39.32 -#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 39.33 -#else 39.34 -#define DPRINTF(_f, _a...) ((void)0) 39.35 -#endif 39.36 - 39.37 -/* --- Xenstore / Xenbus helpers ---------------------------------------- */ 39.38 -/* 39.39 - * These should all be pulled out into the xenstore API. I'm faulting commands 39.40 - * in from the xenbus interface as i need them. 39.41 - */ 39.42 - 39.43 - 39.44 -/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ 39.45 -int xs_gather(struct xs_handle *xs, const char *dir, ...) 39.46 -{ 39.47 - va_list ap; 39.48 - const char *name; 39.49 - char *path; 39.50 - int ret = 0; 39.51 - 39.52 - va_start(ap, dir); 39.53 - while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { 39.54 - const char *fmt = va_arg(ap, char *); 39.55 - void *result = va_arg(ap, void *); 39.56 - char *p; 39.57 - 39.58 - if (asprintf(&path, "%s/%s", dir, name) == -1) 39.59 - { 39.60 - warn("allocation error in xs_gather!\n"); 39.61 - ret = ENOMEM; 39.62 - break; 39.63 - } 39.64 - p = xs_read(xs, path, NULL); 39.65 - free(path); 39.66 - if (p == NULL) { 39.67 - ret = ENOENT; 39.68 - break; 39.69 - } 39.70 - if (fmt) { 39.71 - if (sscanf(p, fmt, result) == 0) 39.72 - ret = EINVAL; 39.73 - free(p); 39.74 - } else 39.75 - *(char **)result = p; 39.76 - } 39.77 - va_end(ap); 39.78 - return ret; 39.79 -} 39.80 - 39.81 -/* Single printf and write: returns -errno or 0. */ 39.82 -int xs_printf(struct xs_handle *h, const char *dir, const char *node, 39.83 - const char *fmt, ...) 39.84 -{ 39.85 - char *buf, *path; 39.86 - va_list ap; 39.87 - int ret; 39.88 - 39.89 - va_start(ap, fmt); 39.90 - ret = vasprintf(&buf, fmt, ap); 39.91 - va_end(ap); 39.92 - 39.93 - asprintf(&path, "%s/%s", dir, node); 39.94 - 39.95 - if ((path == NULL) || (buf == NULL)) 39.96 - return 0; 39.97 - 39.98 - ret = xs_write(h, path, buf, strlen(buf)+1); 39.99 - 39.100 - free(buf); 39.101 - free(path); 39.102 - 39.103 - return ret; 39.104 -} 39.105 - 39.106 - 39.107 -int xs_exists(struct xs_handle *h, const char *path) 39.108 -{ 39.109 - char **d; 39.110 - int num; 39.111 - 39.112 - d = xs_directory(h, path, &num); 39.113 - if (d == NULL) 39.114 - return 0; 39.115 - free(d); 39.116 - return 1; 39.117 -} 39.118 - 39.119 - 39.120 - 39.121 -/* This assumes that the domain name we are looking for is unique! */ 39.122 -char *get_dom_domid(struct xs_handle *h, const char *name) 39.123 -{ 39.124 - char **e, *val, *domid = NULL; 39.125 - int num, i, len; 39.126 - char *path; 39.127 - 39.128 - e = xs_directory(h, "/local/domain", &num); 39.129 - 39.130 - i=0; 39.131 - while (i < num) { 39.132 - asprintf(&path, "/local/domain/%s/name", e[i]); 39.133 - val = xs_read(h, path, &len); 39.134 - free(path); 39.135 - if (val == NULL) 39.136 - continue; 39.137 - if (strcmp(val, name) == 0) { 39.138 - /* match! */ 39.139 - asprintf(&path, "/local/domain/%s/domid", e[i]); 39.140 - domid = xs_read(h, path, &len); 39.141 - free(val); 39.142 - free(path); 39.143 - break; 39.144 - } 39.145 - free(val); 39.146 - i++; 39.147 - } 39.148 - 39.149 - free(e); 39.150 - return domid; 39.151 -} 39.152 - 39.153 -static int strsep_len(const char *str, char c, unsigned int len) 39.154 -{ 39.155 - unsigned int i; 39.156 - 39.157 - for (i = 0; str[i]; i++) 39.158 - if (str[i] == c) { 39.159 - if (len == 0) 39.160 - return i; 39.161 - len--; 39.162 - } 39.163 - return (len == 0) ? i : -ERANGE; 39.164 -} 39.165 - 39.166 - 39.167 -/* xenbus watches: */ 39.168 -/* Register callback to watch this node. */ 39.169 -struct xenbus_watch 39.170 -{ 39.171 - struct list_head list; 39.172 - char *node; 39.173 - void (*callback)(struct xs_handle *h, 39.174 - struct xenbus_watch *, 39.175 - const char *node); 39.176 -}; 39.177 - 39.178 -static LIST_HEAD(watches); 39.179 - 39.180 -/* A little paranoia: we don't just trust token. */ 39.181 -static struct xenbus_watch *find_watch(const char *token) 39.182 -{ 39.183 - struct xenbus_watch *i, *cmp; 39.184 - 39.185 - cmp = (void *)strtoul(token, NULL, 16); 39.186 - 39.187 - list_for_each_entry(i, &watches, list) 39.188 - if (i == cmp) 39.189 - return i; 39.190 - return NULL; 39.191 -} 39.192 - 39.193 -/* Register callback to watch this node. like xs_watch, return 0 on failure */ 39.194 -int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch) 39.195 -{ 39.196 - /* Pointer in ascii is the token. */ 39.197 - char token[sizeof(watch) * 2 + 1]; 39.198 - int er; 39.199 - 39.200 - sprintf(token, "%lX", (long)watch); 39.201 - if (find_watch(token)) 39.202 - { 39.203 - warn("watch collision!"); 39.204 - return -EINVAL; 39.205 - } 39.206 - 39.207 - er = xs_watch(h, watch->node, token); 39.208 - if (er != 0) { 39.209 - list_add(&watch->list, &watches); 39.210 - } 39.211 - 39.212 - return er; 39.213 -} 39.214 - 39.215 -int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch) 39.216 -{ 39.217 - char token[sizeof(watch) * 2 + 1]; 39.218 - int er; 39.219 - 39.220 - sprintf(token, "%lX", (long)watch); 39.221 - if (!find_watch(token)) 39.222 - { 39.223 - warn("no such watch!"); 39.224 - return -EINVAL; 39.225 - } 39.226 - 39.227 - 39.228 - er = xs_unwatch(h, watch->node, token); 39.229 - list_del(&watch->list); 39.230 - 39.231 - if (er == 0) 39.232 - warn("XENBUS Failed to release watch %s: %i", 39.233 - watch->node, er); 39.234 - return 0; 39.235 -} 39.236 - 39.237 -/* Re-register callbacks to all watches. */ 39.238 -void reregister_xenbus_watches(struct xs_handle *h) 39.239 -{ 39.240 - struct xenbus_watch *watch; 39.241 - char token[sizeof(watch) * 2 + 1]; 39.242 - 39.243 - list_for_each_entry(watch, &watches, list) { 39.244 - sprintf(token, "%lX", (long)watch); 39.245 - xs_watch(h, watch->node, token); 39.246 - } 39.247 -} 39.248 - 39.249 -/* based on watch_thread() */ 39.250 -int xs_fire_next_watch(struct xs_handle *h) 39.251 -{ 39.252 - char **res; 39.253 - char *token; 39.254 - char *node = NULL; 39.255 - struct xenbus_watch *w; 39.256 - int er; 39.257 - unsigned int num; 39.258 - 39.259 - res = xs_read_watch(h, &num); 39.260 - if (res == NULL) 39.261 - return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */ 39.262 - 39.263 - node = res[XS_WATCH_PATH]; 39.264 - token = res[XS_WATCH_TOKEN]; 39.265 - 39.266 - w = find_watch(token); 39.267 - if (!w) 39.268 - { 39.269 - warn("unregistered watch fired"); 39.270 - goto done; 39.271 - } 39.272 - w->callback(h, w, node); 39.273 - 39.274 - done: 39.275 - free(res); 39.276 - return 1; 39.277 -} 39.278 - 39.279 - 39.280 - 39.281 - 39.282 -/* ---------------------------------------------------------------------- */ 39.283 - 39.284 -struct backend_info 39.285 -{ 39.286 - /* our communications channel */ 39.287 - blkif_t *blkif; 39.288 - 39.289 - long int frontend_id; 39.290 - long int pdev; 39.291 - long int readonly; 39.292 - 39.293 - /* watch back end for changes */ 39.294 - struct xenbus_watch backend_watch; 39.295 - char *backpath; 39.296 - 39.297 - /* watch front end for changes */ 39.298 - struct xenbus_watch watch; 39.299 - char *frontpath; 39.300 - 39.301 - struct list_head list; 39.302 -}; 39.303 - 39.304 -static LIST_HEAD(belist); 39.305 - 39.306 -static struct backend_info *be_lookup_be(const char *bepath) 39.307 -{ 39.308 - struct backend_info *be; 39.309 - 39.310 - list_for_each_entry(be, &belist, list) 39.311 - if (strcmp(bepath, be->backpath) == 0) 39.312 - return be; 39.313 - return (struct backend_info *)NULL; 39.314 -} 39.315 - 39.316 -static int be_exists_be(const char *bepath) 39.317 -{ 39.318 - return ( be_lookup_be(bepath) != NULL ); 39.319 -} 39.320 - 39.321 -static struct backend_info *be_lookup_fe(const char *fepath) 39.322 -{ 39.323 - struct backend_info *be; 39.324 - 39.325 - list_for_each_entry(be, &belist, list) 39.326 - if (strcmp(fepath, be->frontpath) == 0) 39.327 - return be; 39.328 - return (struct backend_info *)NULL; 39.329 -} 39.330 - 39.331 -static int backend_remove(struct xs_handle *h, struct backend_info *be) 39.332 -{ 39.333 - /* Turn off watches. */ 39.334 - if (be->watch.node) 39.335 - unregister_xenbus_watch(h, &be->watch); 39.336 - if (be->backend_watch.node) 39.337 - unregister_xenbus_watch(h, &be->backend_watch); 39.338 - 39.339 - /* Unhook from be list. */ 39.340 - list_del(&be->list); 39.341 - 39.342 - /* Free everything else. */ 39.343 - if (be->blkif) 39.344 - free_blkif(be->blkif); 39.345 - free(be->frontpath); 39.346 - free(be->backpath); 39.347 - free(be); 39.348 - return 0; 39.349 -} 39.350 - 39.351 -static void frontend_changed(struct xs_handle *h, struct xenbus_watch *w, 39.352 - const char *fepath_im) 39.353 -{ 39.354 - struct backend_info *be; 39.355 - char *fepath = NULL; 39.356 - int er; 39.357 - 39.358 - be = be_lookup_fe(w->node); 39.359 - if (be == NULL) 39.360 - { 39.361 - warn("frontend changed called for nonexistent backend! (%s)", fepath); 39.362 - goto fail; 39.363 - } 39.364 - 39.365 - /* If other end is gone, delete ourself. */ 39.366 - if (w->node && !xs_exists(h, be->frontpath)) { 39.367 - DPRINTF("DELETING BE: %s\n", be->backpath); 39.368 - backend_remove(h, be); 39.369 - return; 39.370 - } 39.371 - 39.372 - if (be->blkif == NULL || (be->blkif->state == CONNECTED)) 39.373 - return; 39.374 - 39.375 - /* Supply the information about the device the frontend needs */ 39.376 - er = xs_transaction_start(h, be->backpath); 39.377 - if (er == 0) { 39.378 - warn("starting transaction"); 39.379 - goto fail; 39.380 - } 39.381 - 39.382 - er = xs_printf(h, be->backpath, "sectors", "%lu", 39.383 - be->blkif->ops->get_size(be->blkif)); 39.384 - if (er == 0) { 39.385 - warn("writing sectors"); 39.386 - goto fail; 39.387 - } 39.388 - 39.389 - er = xs_printf(h, be->backpath, "info", "%u", 39.390 - be->blkif->ops->get_info(be->blkif)); 39.391 - if (er == 0) { 39.392 - warn("writing info"); 39.393 - goto fail; 39.394 - } 39.395 - 39.396 - er = xs_printf(h, be->backpath, "sector-size", "%lu", 39.397 - be->blkif->ops->get_secsize(be->blkif)); 39.398 - if (er == 0) { 39.399 - warn("writing sector-size"); 39.400 - goto fail; 39.401 - } 39.402 - 39.403 - be->blkif->state = CONNECTED; 39.404 - 39.405 - xs_transaction_end(h, 0); 39.406 - 39.407 - return; 39.408 - 39.409 - fail: 39.410 - free(fepath); 39.411 -} 39.412 - 39.413 - 39.414 -static void backend_changed(struct xs_handle *h, struct xenbus_watch *w, 39.415 - const char *bepath_im) 39.416 -{ 39.417 - struct backend_info *be; 39.418 - char *path = NULL, *p; 39.419 - int len, er; 39.420 - long int pdev = 0, handle; 39.421 - 39.422 - be = be_lookup_be(w->node); 39.423 - if (be == NULL) 39.424 - { 39.425 - warn("backend changed called for nonexistent backend! (%s)", w->node); 39.426 - goto fail; 39.427 - } 39.428 - 39.429 - er = xs_gather(h, be->backpath, "physical-device", "%li", &pdev, NULL); 39.430 - if (er != 0) 39.431 - goto fail; 39.432 - 39.433 - if (be->pdev && be->pdev != pdev) { 39.434 - warn("changing physical-device not supported"); 39.435 - goto fail; 39.436 - } 39.437 - be->pdev = pdev; 39.438 - 39.439 - asprintf(&path, "%s/%s", w->node, "read-only"); 39.440 - if (xs_exists(h, path)) 39.441 - be->readonly = 1; 39.442 - 39.443 - if (be->blkif == NULL) { 39.444 - /* Front end dir is a number, which is used as the handle. */ 39.445 - p = strrchr(be->frontpath, '/') + 1; 39.446 - handle = strtoul(p, NULL, 0); 39.447 - 39.448 - be->blkif = alloc_blkif(be->frontend_id); 39.449 - if (be->blkif == NULL) 39.450 - goto fail; 39.451 - 39.452 - er = blkif_init(be->blkif, handle, be->pdev, be->readonly); 39.453 - if (er) 39.454 - goto fail; 39.455 - 39.456 - DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", w->node); 39.457 - 39.458 - /* Pass in NULL node to skip exist test. */ 39.459 - frontend_changed(h, &be->watch, NULL); 39.460 - } 39.461 - 39.462 - fail: 39.463 - free(path); 39.464 -} 39.465 - 39.466 -static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w, 39.467 - const char *bepath_im) 39.468 -{ 39.469 - struct backend_info *be = NULL; 39.470 - char *frontend = NULL, *bepath = NULL; 39.471 - int er, len; 39.472 - 39.473 - bepath = strdup(bepath_im); 39.474 - if (!bepath) 39.475 - return; 39.476 - len = strsep_len(bepath, '/', 6); 39.477 - if (len < 0) 39.478 - goto free_be; 39.479 - 39.480 - bepath[len] = '\0'; /*truncate the passed-in string with predjudice. */ 39.481 - 39.482 - be = malloc(sizeof(*be)); 39.483 - if (!be) { 39.484 - warn("allocating backend structure"); 39.485 - goto free_be; 39.486 - } 39.487 - memset(be, 0, sizeof(*be)); 39.488 - 39.489 - frontend = NULL; 39.490 - er = xs_gather(h, bepath, 39.491 - "frontend-id", "%li", &be->frontend_id, 39.492 - "frontend", NULL, &frontend, 39.493 - NULL); 39.494 - if (er) 39.495 - goto free_be; 39.496 - 39.497 - if (strlen(frontend) == 0 || !xs_exists(h, frontend)) { 39.498 - /* If we can't get a frontend path and a frontend-id, 39.499 - * then our bus-id is no longer valid and we need to 39.500 - * destroy the backend device. 39.501 - */ 39.502 - DPRINTF("No frontend (%s)\n", frontend); 39.503 - goto free_be; 39.504 - } 39.505 - 39.506 - /* Are we already tracking this device? */ 39.507 - if (be_exists_be(bepath)) 39.508 - goto free_be; 39.509 - 39.510 - be->backpath = bepath; 39.511 - be->backend_watch.node = be->backpath; 39.512 - be->backend_watch.callback = backend_changed; 39.513 - er = register_xenbus_watch(h, &be->backend_watch); 39.514 - if (er == 0) { 39.515 - be->backend_watch.node = NULL; 39.516 - warn("error adding backend watch on %s", bepath); 39.517 - goto free_be; 39.518 - } 39.519 - 39.520 - be->frontpath = frontend; 39.521 - be->watch.node = be->frontpath; 39.522 - be->watch.callback = frontend_changed; 39.523 - er = register_xenbus_watch(h, &be->watch); 39.524 - if (er == 0) { 39.525 - be->watch.node = NULL; 39.526 - warn("adding frontend watch on %s", be->frontpath); 39.527 - goto free_be; 39.528 - } 39.529 - 39.530 - list_add(&be->list, &belist); 39.531 - 39.532 - DPRINTF("[PROBE]: ADDED NEW DEVICE (%s)\n", bepath_im); 39.533 - 39.534 - backend_changed(h, &be->backend_watch, bepath); 39.535 - return; 39.536 - 39.537 - free_be: 39.538 - if (be && (be->backend_watch.node)) 39.539 - unregister_xenbus_watch(h, &be->backend_watch); 39.540 - free(frontend); 39.541 - free(bepath); 39.542 - free(be); 39.543 - return; 39.544 -} 39.545 - 39.546 - 39.547 -int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname) 39.548 -{ 39.549 - char *domid, *path; 39.550 - struct xenbus_watch *vbd_watch; 39.551 - int er; 39.552 - 39.553 - domid = get_dom_domid(h, domname); 39.554 - 39.555 - DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]"); 39.556 - 39.557 - asprintf(&path, "/local/domain/%s/backend/vbd", domid); 39.558 - if (path == NULL) 39.559 - return -ENOMEM; 39.560 - 39.561 - vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch)); 39.562 - vbd_watch->node = path; 39.563 - vbd_watch->callback = blkback_probe; 39.564 - er = register_xenbus_watch(h, vbd_watch); 39.565 - if (er == 0) { 39.566 - warn("Error adding vbd probe watch %s", path); 39.567 - return -EINVAL; 39.568 - } 39.569 - 39.570 - return 0; 39.571 -}