ia64/xen-unstable

changeset 19647:1c627434605e

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:52:31 2009 +0100 (2009-05-26)
parents f210a633571c
children f0e2df69a8eb
files .hgignore tools/Makefile tools/blktap2/Makefile tools/blktap2/README tools/blktap2/daemon/Makefile tools/blktap2/daemon/lib/Makefile tools/blktap2/daemon/lib/xs_api.c tools/blktap2/daemon/lib/xs_api.h tools/blktap2/daemon/tapdisk-channel.c tools/blktap2/daemon/tapdisk-daemon.c tools/blktap2/daemon/tapdisk-dispatch-common.c tools/blktap2/daemon/tapdisk-dispatch.h tools/blktap2/drivers/Makefile tools/blktap2/drivers/aes.c tools/blktap2/drivers/aes.h tools/blktap2/drivers/atomicio.c tools/blktap2/drivers/blk.h tools/blktap2/drivers/blk_linux.c tools/blktap2/drivers/blktap2.h tools/blktap2/drivers/block-aio.c tools/blktap2/drivers/block-cache.c tools/blktap2/drivers/block-log.c tools/blktap2/drivers/block-qcow.c tools/blktap2/drivers/block-ram.c tools/blktap2/drivers/block-vhd.c tools/blktap2/drivers/bswap.h tools/blktap2/drivers/check_gcrypt tools/blktap2/drivers/disktypes.h tools/blktap2/drivers/img2qcow.c tools/blktap2/drivers/io-optimize.c tools/blktap2/drivers/io-optimize.h tools/blktap2/drivers/lock.c tools/blktap2/drivers/lock.h tools/blktap2/drivers/log.h tools/blktap2/drivers/profile.h tools/blktap2/drivers/qcow-create.c tools/blktap2/drivers/qcow.h tools/blktap2/drivers/qcow2raw.c tools/blktap2/drivers/scheduler.c tools/blktap2/drivers/scheduler.h tools/blktap2/drivers/tapdisk-client.c tools/blktap2/drivers/tapdisk-diff.c tools/blktap2/drivers/tapdisk-driver.c tools/blktap2/drivers/tapdisk-driver.h tools/blktap2/drivers/tapdisk-filter.c tools/blktap2/drivers/tapdisk-filter.h tools/blktap2/drivers/tapdisk-image.c tools/blktap2/drivers/tapdisk-image.h tools/blktap2/drivers/tapdisk-interface.c tools/blktap2/drivers/tapdisk-interface.h tools/blktap2/drivers/tapdisk-ipc.c tools/blktap2/drivers/tapdisk-ipc.h tools/blktap2/drivers/tapdisk-log.c tools/blktap2/drivers/tapdisk-log.h tools/blktap2/drivers/tapdisk-queue.c tools/blktap2/drivers/tapdisk-queue.h tools/blktap2/drivers/tapdisk-ring.c tools/blktap2/drivers/tapdisk-ring.h tools/blktap2/drivers/tapdisk-server.c tools/blktap2/drivers/tapdisk-server.h tools/blktap2/drivers/tapdisk-stream.c tools/blktap2/drivers/tapdisk-utils.c tools/blktap2/drivers/tapdisk-utils.h tools/blktap2/drivers/tapdisk-vbd.c tools/blktap2/drivers/tapdisk-vbd.h tools/blktap2/drivers/tapdisk.c tools/blktap2/drivers/tapdisk.h tools/blktap2/drivers/tapdisk2.c tools/blktap2/drivers/td.c tools/blktap2/drivers/xmsnap tools/blktap2/include/Makefile tools/blktap2/include/atomicio.h tools/blktap2/include/blktaplib.h tools/blktap2/include/libvhd-journal.h tools/blktap2/include/libvhd.h tools/blktap2/include/list.h tools/blktap2/include/lvm-util.h tools/blktap2/include/relative-path.h tools/blktap2/include/tapdisk-message.h tools/blktap2/include/vhd-util.h tools/blktap2/include/vhd.h tools/blktap2/lvm/Makefile tools/blktap2/lvm/lvm-util.c tools/blktap2/vhd/Makefile tools/blktap2/vhd/lib/Makefile tools/blktap2/vhd/lib/atomicio.c tools/blktap2/vhd/lib/libvhd-journal.c tools/blktap2/vhd/lib/libvhd.c tools/blktap2/vhd/lib/relative-path.c tools/blktap2/vhd/lib/vhd-util-check.c tools/blktap2/vhd/lib/vhd-util-coalesce.c tools/blktap2/vhd/lib/vhd-util-create.c tools/blktap2/vhd/lib/vhd-util-fill.c tools/blktap2/vhd/lib/vhd-util-modify.c tools/blktap2/vhd/lib/vhd-util-query.c tools/blktap2/vhd/lib/vhd-util-read.c tools/blktap2/vhd/lib/vhd-util-repair.c tools/blktap2/vhd/lib/vhd-util-resize.c tools/blktap2/vhd/lib/vhd-util-revert.c tools/blktap2/vhd/lib/vhd-util-scan.c tools/blktap2/vhd/lib/vhd-util-set-field.c tools/blktap2/vhd/lib/vhd-util-snapshot.c tools/blktap2/vhd/vhd-update.c tools/blktap2/vhd/vhd-util.c tools/check/check_uuid_devel tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/server/BlktapController.py tools/python/xen/xend/server/DevController.py
line diff
     1.1 --- a/.hgignore	Tue May 26 11:05:04 2009 +0100
     1.2 +++ b/.hgignore	Tue May 26 11:52:31 2009 +0100
     1.3 @@ -103,7 +103,19 @@
     1.4  ^stubdom/lwip/
     1.5  ^stubdom/ioemu/
     1.6  ^tools/.*/build/lib.*/.*\.py$
     1.7 -^tools/blktap/Makefile\.smh$
     1.8 +^tools/blktap2/daemon/blktapctrl$
     1.9 +^tools/blktap2/drivers/img2qcow$
    1.10 +^tools/blktap2/drivers/lock-util$
    1.11 +^tools/blktap2/drivers/qcow-create$
    1.12 +^tools/blktap2/drivers/qcow2raw$
    1.13 +^tools/blktap2/drivers/tapdisk$
    1.14 +^tools/blktap2/drivers/tapdisk-client$
    1.15 +^tools/blktap2/drivers/tapdisk-diff$
    1.16 +^tools/blktap2/drivers/tapdisk-stream$
    1.17 +^tools/blktap2/drivers/tapdisk2$
    1.18 +^tools/blktap2/drivers/td-util$
    1.19 +^tools/blktap2/vhd/vhd-update$
    1.20 +^tools/blktap2/vhd/vhd-util$
    1.21  ^tools/blktap/drivers/blktapctrl$
    1.22  ^tools/blktap/drivers/img2qcow$
    1.23  ^tools/blktap/drivers/qcow-create$
     2.1 --- a/tools/Makefile	Tue May 26 11:05:04 2009 +0100
     2.2 +++ b/tools/Makefile	Tue May 26 11:52:31 2009 +0100
     2.3 @@ -22,6 +22,7 @@ SUBDIRS-$(VTPM_TOOLS) += vtpm
     2.4  SUBDIRS-y += xenstat
     2.5  SUBDIRS-$(CONFIG_Linux) += libaio
     2.6  SUBDIRS-$(CONFIG_Linux) += blktap
     2.7 +SUBDIRS-$(CONFIG_Linux) += blktap2
     2.8  SUBDIRS-y += libfsimage
     2.9  SUBDIRS-$(LIBXENAPI_BINDINGS) += libxen
    2.10  SUBDIRS-$(CONFIG_Linux) += fs-back
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/tools/blktap2/Makefile	Tue May 26 11:52:31 2009 +0100
     3.3 @@ -0,0 +1,34 @@
     3.4 +XEN_ROOT = ../..
     3.5 +include $(XEN_ROOT)/tools/Rules.mk
     3.6 +
     3.7 +CFLAGS  += $(CFLAGS_libxenctrl)
     3.8 +LDFLAGS += $(LDFLAGS_libxenctrl)
     3.9 +
    3.10 +SUBDIRS-y :=
    3.11 +SUBDIRS-y += include
    3.12 +SUBDIRS-y += lvm
    3.13 +SUBDIRS-y += vhd
    3.14 +SUBDIRS-y += drivers
    3.15 +SUBDIRS-y += daemon
    3.16 +
    3.17 +.PHONY: all
    3.18 +all: build
    3.19 +
    3.20 +.PHONY: build
    3.21 +build:
    3.22 +	@set -e; for subdir in $(SUBDIRS-y); do \
    3.23 +	$(MAKE) -C $$subdir all;       \
    3.24 +		done
    3.25 +
    3.26 +.PHONY: install
    3.27 +install:
    3.28 +	@set -e; for subdir in $(SUBDIRS-y); do \
    3.29 +		$(MAKE) -C $$subdir install; \
    3.30 +	done
    3.31 +
    3.32 +.PHONY: clean
    3.33 +clean:
    3.34 +	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) TAGS
    3.35 +	@set -e; for subdir in $(SUBDIRS-y); do \
    3.36 +	$(MAKE) -C $$subdir clean;       \
    3.37 +		done
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/tools/blktap2/README	Tue May 26 11:52:31 2009 +0100
     4.3 @@ -0,0 +1,122 @@
     4.4 +Blktap Userspace Tools + Library
     4.5 +================================
     4.6 +
     4.7 +Andrew Warfield and Julian Chesterfield
     4.8 +16th June 2006
     4.9 +
    4.10 +{firstname.lastname}@cl.cam.ac.uk
    4.11 +
    4.12 +The blktap userspace toolkit provides a user-level disk I/O
    4.13 +interface. The blktap mechanism involves a kernel driver that acts
    4.14 +similarly to the existing Xen/Linux blkback driver, and a set of
    4.15 +associated user-level libraries.  Using these tools, blktap allows
    4.16 +virtual block devices presented to VMs to be implemented in userspace
    4.17 +and to be backed by raw partitions, files, network, etc.
    4.18 +
    4.19 +The key benefit of blktap is that it makes it easy and fast to write
    4.20 +arbitrary block backends, and that these user-level backends actually
    4.21 +perform very well.  Specifically:
    4.22 +
    4.23 +- Metadata disk formats such as Copy-on-Write, encrypted disks, sparse
    4.24 +  formats and other compression features can be easily implemented.
    4.25 +
    4.26 +- Accessing file-based images from userspace avoids problems related
    4.27 +  to flushing dirty pages which are present in the Linux loopback
    4.28 +  driver.  (Specifically, doing a large number of writes to an
    4.29 +  NFS-backed image don't result in the OOM killer going berserk.)
    4.30 +
    4.31 +- Per-disk handler processes enable easier userspace policing of block
    4.32 +  resources, and process-granularity QoS techniques (disk scheduling
    4.33 +  and related tools) may be trivially applied to block devices.
    4.34 +
    4.35 +- It's very easy to take advantage of userspace facilities such as
    4.36 +  networking libraries, compression utilities, peer-to-peer
    4.37 +  file-sharing systems and so on to build more complex block backends.
    4.38 +
    4.39 +- Crashes are contained -- incremental development/debugging is very
    4.40 +  fast.
    4.41 +
    4.42 +How it works (in one paragraph):
    4.43 +
    4.44 +Working in conjunction with the kernel blktap driver, all disk I/O
    4.45 +requests from VMs are passed to the userspace deamon (using a shared
    4.46 +memory interface) through a character device. Each active disk is
    4.47 +mapped to an individual device node, allowing per-disk processes to
    4.48 +implement individual block devices where desired.  The userspace
    4.49 +drivers are implemented using asynchronous (Linux libaio),
    4.50 +O_DIRECT-based calls to preserve the unbuffered, batched and
    4.51 +asynchronous request dispatch achieved with the existing blkback
    4.52 +code.  We provide a simple, asynchronous virtual disk interface that
    4.53 +makes it quite easy to add new disk implementations.
    4.54 +
    4.55 +As of June 2006 the current supported disk formats are:
    4.56 +
    4.57 + - Raw Images (both on partitions and in image files)
    4.58 + - File-backed Qcow disks
    4.59 + - Standalone sparse Qcow disks
    4.60 + - Fast shareable RAM disk between VMs (requires some form of cluster-based 
    4.61 +   filesystem support e.g. OCFS2 in the guest kernel)
    4.62 + - Some VMDK images - your mileage may vary
    4.63 +
    4.64 +Raw and QCow images have asynchronous backends and so should perform
    4.65 +fairly well.  VMDK is based directly on the qemu vmdk driver, which is
    4.66 +synchronous (a.k.a. slow).
    4.67 +
    4.68 +Build and Installation Instructions
    4.69 +===================================
    4.70 +
    4.71 +Make to configure the blktap backend driver in your dom0 kernel.  It
    4.72 +will cooperate fine with the existing backend driver, so you can
    4.73 +experiment with tap disks without breaking existing VM configs.
    4.74 +
    4.75 +To build the tools separately, "make && make install" in 
    4.76 +tools/blktap.
    4.77 +
    4.78 +
    4.79 +Using the Tools
    4.80 +===============
    4.81 +
    4.82 +Prepare the image for booting. For qcow files use the qcow utilities
    4.83 +installed earlier. e.g. qcow-create generates a blank standalone image
    4.84 +or a file-backed CoW image. img2qcow takes an existing image or
    4.85 +partition and creates a sparse, standalone qcow-based file.
    4.86 +
    4.87 +The userspace disk agent is configured to start automatically via xend
    4.88 +(alternatively you can start it manually => 'blktapctrl')
    4.89 +
    4.90 +Customise the VM config file to use the 'tap' handler, followed by the
    4.91 +driver type. e.g. for a raw image such as a file or partition:
    4.92 +
    4.93 +disk = ['tap:aio:<FILENAME>,sda1,w']
    4.94 +
    4.95 +e.g. for a qcow image:
    4.96 +
    4.97 +disk = ['tap:qcow:<FILENAME>,sda1,w']
    4.98 +
    4.99 +
   4.100 +Mounting images in Dom0 using the blktap driver
   4.101 +===============================================
   4.102 +Tap (and blkback) disks are also mountable in Dom0 without requiring an
   4.103 +active VM to attach. You will need to build a xenlinux Dom0 kernel that
   4.104 +includes the blkfront driver (e.g. the default 'make world' or 
   4.105 +'make kernels' build. Simply use the xm command-line tool to activate
   4.106 +the backend disks, and blkfront will generate a virtual block device that
   4.107 +can be accessed in the same way as a loop device or partition:
   4.108 +
   4.109 +e.g. for a raw image file <FILENAME> that would normally be mounted using
   4.110 +the loopback driver (such as 'mount -o loop <FILENAME> /mnt/disk'), do the
   4.111 +following:
   4.112 +
   4.113 +xm block-attach 0 tap:aio:<FILENAME> /dev/xvda1 w 0
   4.114 +mount /dev/xvda1 /mnt/disk        <--- don't use loop driver
   4.115 +
   4.116 +In this way, you can use any of the userspace device-type drivers built
   4.117 +with the blktap userspace toolkit to open and mount disks such as qcow
   4.118 +or vmdk images:
   4.119 +
   4.120 +xm block-attach 0 tap:qcow:<FILENAME> /dev/xvda1 w 0
   4.121 +mount /dev/xvda1 /mnt/disk
   4.122 +
   4.123 +
   4.124 +
   4.125 + 
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/tools/blktap2/daemon/Makefile	Tue May 26 11:52:31 2009 +0100
     5.3 @@ -0,0 +1,55 @@
     5.4 +XEN_ROOT=../../../
     5.5 +BLKTAP_ROOT := ..
     5.6 +include $(XEN_ROOT)/tools/Rules.mk
     5.7 +
     5.8 +IBIN          = blktapctrl
     5.9 +INST_DIR      = $(SBINDIR)
    5.10 +
    5.11 +LIBDIR        = lib
    5.12 +
    5.13 +LIBS         := -lxenstore
    5.14 +LIBS         += -Llib
    5.15 +LIBS         += -lblktap
    5.16 +LIBS         += -lxenctrl
    5.17 +
    5.18 +ifneq ($(USE_SYSTEM_LIBRARIES),y)
    5.19 +INCLUDES     += -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
    5.20 +LIBS         += -L $(XEN_LIBXC) -L $(XEN_XENSTORE)
    5.21 +endif
    5.22 +
    5.23 +OBJS         := tapdisk-dispatch-common.o
    5.24 +OBJS         += tapdisk-channel.o
    5.25 +
    5.26 +CFLAGS       += -Werror
    5.27 +CFLAGS       += -Wno-unused
    5.28 +CFLAGS       += -fno-strict-aliasing -fPIC
    5.29 +CFLAGS       += -Ilib -I../include -I../drivers -I../../include $(INCLUDES)
    5.30 +CFLAGS       += -D_GNU_SOURCE
    5.31 +CFLAGS       += -g
    5.32 +
    5.33 +# Get gcc to generate the dependencies for us.
    5.34 +CFLAGS       += -Wp,-MD,.$(@F).d
    5.35 +DEPS          = .*.d
    5.36 +
    5.37 +all: libblktap $(IBIN)
    5.38 +
    5.39 +blktapctrl: tapdisk-daemon.c $(OBJS)
    5.40 +	$(CC) $(CFLAGS) -o blktapctrl tapdisk-daemon.c $(LIBS) $(OBJS)
    5.41 +
    5.42 +libblktap:
    5.43 +	@set -e
    5.44 +	$(MAKE) -C $(LIBDIR) all
    5.45 +
    5.46 +install: all
    5.47 +	$(MAKE) -C $(LIBDIR) install
    5.48 +	$(INSTALL_DIR) -p $(DESTDIR)$(INST_DIR)
    5.49 +	$(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INST_DIR)
    5.50 +
    5.51 +clean:
    5.52 +	$(MAKE) -C $(LIBDIR) clean
    5.53 +	rm -rf *.o *~ $(IBIN) $(DEPS) xen TAGS
    5.54 +
    5.55 +.PHONY: all clean install blktapctrl libblktap
    5.56 +
    5.57 +-include $(DEPS)
    5.58 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/tools/blktap2/daemon/lib/Makefile	Tue May 26 11:52:31 2009 +0100
     6.3 @@ -0,0 +1,69 @@
     6.4 +XEN_ROOT=../../../../
     6.5 +BLKTAP_ROOT := ../../
     6.6 +include $(XEN_ROOT)/tools/Rules.mk
     6.7 +
     6.8 +MAJOR    = 3.1
     6.9 +MINOR    = 0
    6.10 +SONAME   = libblktap.so.$(MAJOR)
    6.11 +
    6.12 +BLKTAP_INSTALL_DIR = /usr/sbin
    6.13 +
    6.14 +LIBS     := -lxenstore
    6.15 +
    6.16 +ifneq ($(USE_SYSTEM_LIBRARIES),y)
    6.17 +INCLUDES += -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
    6.18 +LIBS     += -L$(XEN_XENSTORE)
    6.19 +endif
    6.20 +
    6.21 +SRCS     :=
    6.22 +SRCS     += xs_api.c
    6.23 +CFLAGS   += -Werror
    6.24 +CFLAGS   += -Wno-unused
    6.25 +CFLAGS   += -fno-strict-aliasing -fPIC
    6.26 +# get asprintf():
    6.27 +CFLAGS   += -D _GNU_SOURCE
    6.28 +CFLAGS   += -g
    6.29 +CFLAGS   += -I../../include -I../../../include/ $(INCLUDES) 
    6.30 +
    6.31 +
    6.32 +# Get gcc to generate the dependencies for us.
    6.33 +CFLAGS  += -Wp,-MD,.$(@F).d
    6.34 +DEPS     = .*.d
    6.35 +
    6.36 +OBJS     = $(patsubst %.c,%.o,$(SRCS))
    6.37 +IBINS   :=
    6.38 +
    6.39 +LIB      = libblktap.a libblktap.so.$(MAJOR).$(MINOR)
    6.40 +
    6.41 +.PHONY: all
    6.42 +all: build
    6.43 +
    6.44 +.PHONY: build
    6.45 +build: libblktap.a
    6.46 +
    6.47 +.PHONY: libblktap
    6.48 +libblktap: libblktap.a
    6.49 +
    6.50 +install: all
    6.51 +	$(INSTALL_DIR) -p $(DESTDIR)$(LIBDIR)
    6.52 +	$(INSTALL_DATA) $(LIB) $(DESTDIR)$(LIBDIR)
    6.53 +	ln -sf libblktap.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)/libblktap.so.$(MAJOR)
    6.54 +	ln -sf libblktap.so.$(MAJOR) $(DESTDIR)$(LIBDIR)/libblktap.so
    6.55 +
    6.56 +clean:
    6.57 +	rm -rf *.a *.so* *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS
    6.58 +
    6.59 +libblktap.a: $(OBJS) 
    6.60 +	$(CC) $(CFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_CFLAGS) \
    6.61 +	      -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
    6.62 +	ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
    6.63 +	ln -sf libblktap.so.$(MAJOR) libblktap.so
    6.64 +	$(AR) rc $@ libblktap.so
    6.65 +
    6.66 +.PHONY: TAGS all build clean install libblktap
    6.67 +
    6.68 +TAGS:
    6.69 +	etags -t $(SRCS) *.h
    6.70 +
    6.71 +-include $(DEPS)
    6.72 +
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/tools/blktap2/daemon/lib/xs_api.c	Tue May 26 11:52:31 2009 +0100
     7.3 @@ -0,0 +1,323 @@
     7.4 +/*
     7.5 + * xs_api.c
     7.6 + * 
     7.7 + * blocktap interface functions to xenstore
     7.8 + *
     7.9 + * (c) 2005 Andrew Warfield and Julian Chesterfield
    7.10 + *
    7.11 + *
    7.12 + * This program is free software; you can redistribute it and/or
    7.13 + * modify it under the terms of the GNU General Public License version 2
    7.14 + * as published by the Free Software Foundation; or, when distributed
    7.15 + * separately from the Linux kernel or incorporated into other
    7.16 + * software packages, subject to the following license:
    7.17 + *
    7.18 + * Permission is hereby granted, free of charge, to any person obtaining a copy
    7.19 + * of this source file (the "Software"), to deal in the Software without
    7.20 + * restriction, including without limitation the rights to use, copy, modify,
    7.21 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
    7.22 + * and to permit persons to whom the Software is furnished to do so, subject to
    7.23 + * the following conditions:
    7.24 + *
    7.25 + * The above copyright notice and this permission notice shall be included in
    7.26 + * all copies or substantial portions of the Software.
    7.27 + *
    7.28 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    7.29 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    7.30 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    7.31 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    7.32 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    7.33 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    7.34 + * IN THE SOFTWARE.
    7.35 + *
    7.36 + */
    7.37 +
    7.38 +#include <time.h>
    7.39 +#include <stdio.h>
    7.40 +#include <errno.h>
    7.41 +#include <stdlib.h>
    7.42 +#include <string.h>
    7.43 +#include <stdarg.h>
    7.44 +#include <xs.h>
    7.45 +
    7.46 +#include "xs_api.h"
    7.47 +#include "blktaplib.h"
    7.48 +
    7.49 +#define DOMNAME "Domain-0"
    7.50 +#define BASE_DEV_VAL 2048
    7.51 +
    7.52 +static LIST_HEAD(watches);
    7.53 +
    7.54 +int
    7.55 +xs_gather(struct xs_handle *xs, const char *dir, ...)
    7.56 +{
    7.57 +	va_list ap;
    7.58 +	const char *name;
    7.59 +	char *path, **e;
    7.60 +	int ret = 0, num,i;
    7.61 +	unsigned int len;
    7.62 +	xs_transaction_t xth;
    7.63 +
    7.64 +again:
    7.65 +	if ((xth = xs_transaction_start(xs)) == XBT_NULL) {
    7.66 +		DPRINTF("unable to start xs trasanction\n");
    7.67 +		ret = ENOMEM;
    7.68 +		return ret;
    7.69 +	}
    7.70 +
    7.71 +	va_start(ap, dir);
    7.72 +	while ((ret == 0) && (name = va_arg(ap, char *)) != NULL) {
    7.73 +		char *p;
    7.74 +		const char *fmt = va_arg(ap, char *);
    7.75 +		void *result = va_arg(ap, void *);
    7.76 +		
    7.77 +		if (asprintf(&path, "%s/%s", dir, name) == -1) {
    7.78 +			EPRINTF("allocation error in xs_gather!\n");
    7.79 +			ret = ENOMEM;
    7.80 +			break;
    7.81 +		}
    7.82 +
    7.83 +		p = xs_read(xs, xth, path, &len);
    7.84 +		free(path);
    7.85 +
    7.86 +		if (!p) {
    7.87 +			ret = ENOENT;
    7.88 +			break;
    7.89 +		}
    7.90 +
    7.91 +		if (fmt) {
    7.92 +			if (sscanf(p, fmt, result) == 0)
    7.93 +				ret = EINVAL;
    7.94 +			free(p);
    7.95 +		} else
    7.96 +			*(char **)result = p;
    7.97 +	}
    7.98 +
    7.99 +	va_end(ap);
   7.100 +
   7.101 +	if (!xs_transaction_end(xs, xth, ret)) {
   7.102 +		if (ret == 0 && errno == EAGAIN)
   7.103 +			goto again;
   7.104 +		else
   7.105 +			ret = errno;
   7.106 +	}
   7.107 +
   7.108 +	return ret;
   7.109 +}
   7.110 +
   7.111 +/* Single printf and write: returns -errno or 0. */
   7.112 +int
   7.113 +xs_printf(struct xs_handle *h, const char *dir,
   7.114 +	  const char *node, const char *fmt, ...)
   7.115 +{
   7.116 +	int ret;
   7.117 +	va_list ap;
   7.118 +	char *buf, *path;
   7.119 +
   7.120 +	va_start(ap, fmt);
   7.121 +	ret = vasprintf(&buf, fmt, ap);
   7.122 +	va_end(ap);
   7.123 +
   7.124 +	if (ret == -1)
   7.125 +		return 0;
   7.126 +
   7.127 +	ret = asprintf(&path, "%s/%s", dir, node);
   7.128 +	if (ret == -1) {
   7.129 +		free(buf);
   7.130 +		return 0;
   7.131 +	}
   7.132 +
   7.133 +	ret = xs_write(h, XBT_NULL, path, buf, strlen(buf)+1);
   7.134 +
   7.135 +	free(buf);
   7.136 +	free(path);
   7.137 +
   7.138 +	return ret;
   7.139 +}
   7.140 +
   7.141 +int
   7.142 +xs_exists(struct xs_handle *h, const char *path)
   7.143 +{
   7.144 +	char **d;
   7.145 +	unsigned int num;
   7.146 +	xs_transaction_t xth;
   7.147 +
   7.148 +	if ((xth = xs_transaction_start(h)) == XBT_NULL) {
   7.149 +		EPRINTF("unable to start xs trasanction\n");
   7.150 +		return 0;
   7.151 +	}
   7.152 +
   7.153 +	d = xs_directory(h, xth, path, &num);
   7.154 +	xs_transaction_end(h, xth, 0);
   7.155 +	if (!d)
   7.156 +		return 0;
   7.157 +
   7.158 +	free(d);
   7.159 +	return 1;
   7.160 +}
   7.161 +
   7.162 +
   7.163 +
   7.164 +/**
   7.165 + * This assumes that the domain name we are looking for is unique. 
   7.166 + * Name parameter Domain-0 
   7.167 + */
   7.168 +char *
   7.169 +get_dom_domid(struct xs_handle *h)
   7.170 +{
   7.171 +	int i;
   7.172 +	xs_transaction_t xth;
   7.173 +	unsigned int num, len;
   7.174 +	char *val, *path, *domid, **e;
   7.175 +
   7.176 +	e     = NULL;
   7.177 +	domid = NULL;
   7.178 +
   7.179 +	if ((xth = xs_transaction_start(h)) == XBT_NULL) {
   7.180 +		EPRINTF("unable to start xs trasanction\n");
   7.181 +		return NULL;
   7.182 +	}
   7.183 +
   7.184 +	e = xs_directory(h, xth, "/local/domain", &num);
   7.185 +	if (e == NULL)
   7.186 +		goto done;
   7.187 +
   7.188 +	for (i = 0; (i < num) && (domid == NULL); i++) {
   7.189 +		if (asprintf(&path, "/local/domain/%s/name", e[i]) == -1)
   7.190 +			break;
   7.191 +
   7.192 +		val = xs_read(h, xth, path, &len);
   7.193 +		free(path);
   7.194 +		if (val == NULL)
   7.195 +			continue;
   7.196 +
   7.197 +		if (strcmp(val, DOMNAME) == 0) {
   7.198 +			/* match! */
   7.199 +			if (asprintf(&path, 
   7.200 +				     "/local/domain/%s/domid", e[i]) == -1) {
   7.201 +				free(val);
   7.202 +				break;
   7.203 +			}
   7.204 +			domid = xs_read(h, xth, path, &len);
   7.205 +			free(path);
   7.206 +		}
   7.207 +		free(val);
   7.208 +	}
   7.209 +
   7.210 + done:
   7.211 +	xs_transaction_end(h, xth, 0);
   7.212 +	free(e);
   7.213 +	return domid;
   7.214 +}
   7.215 +
   7.216 +/*
   7.217 + * a little paranoia: we don't just trust token
   7.218 + */
   7.219 +static struct xenbus_watch *find_watch(const char *token)
   7.220 +{
   7.221 +	int ret;
   7.222 +	long nonce;
   7.223 +	unsigned long addr;
   7.224 +	struct xenbus_watch *i, *cmp;
   7.225 +
   7.226 +	ret = sscanf(token, "%lX:%lX", &addr, &nonce);
   7.227 +	if (ret != 2) {
   7.228 +		EPRINTF("invalid watch token %s\n", token);
   7.229 +		return NULL;
   7.230 +	}
   7.231 +
   7.232 +	cmp = (struct xenbus_watch *)addr;
   7.233 +	list_for_each_entry(i, &watches, list)
   7.234 +		if (i == cmp && i->nonce == nonce)
   7.235 +			return i;
   7.236 +
   7.237 +	return NULL;
   7.238 +}
   7.239 +
   7.240 +/*
   7.241 + * Register callback to watch this node;
   7.242 + * like xs_watch, return 0 on failure
   7.243 + */
   7.244 +int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
   7.245 +{
   7.246 +	/* Pointer in ascii is the token. */
   7.247 +	char token[(sizeof(watch) + sizeof(long)) * 2 + 2];
   7.248 +
   7.249 +	/* 1-second granularity should suffice here */
   7.250 +	watch->nonce = time(NULL);
   7.251 +
   7.252 +	sprintf(token, "%lX:%lX", (long)watch, watch->nonce);
   7.253 +	if (find_watch(token)) {
   7.254 +		EPRINTF("watch collision!\n");
   7.255 +		return -EINVAL;
   7.256 +	}
   7.257 +
   7.258 +	if (!xs_watch(h, watch->node, token)) {
   7.259 +		EPRINTF("unable to set watch!\n");
   7.260 +		return -EINVAL;
   7.261 +	}
   7.262 +
   7.263 +	list_add(&watch->list, &watches);
   7.264 +
   7.265 +	return 0;
   7.266 +}
   7.267 +
   7.268 +int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
   7.269 +{
   7.270 +	char token[(sizeof(watch) + sizeof(long)) * 2 + 2];
   7.271 +
   7.272 +	sprintf(token, "%lX:%lX", (long)watch, watch->nonce);
   7.273 +	if (!find_watch(token)) {
   7.274 +		EPRINTF("no such watch!\n");
   7.275 +		return -EINVAL;
   7.276 +	}
   7.277 +
   7.278 +	if (!xs_unwatch(h, watch->node, token))
   7.279 +		EPRINTF("XENBUS Failed to release watch %s\n", watch->node);
   7.280 +
   7.281 +	list_del(&watch->list);
   7.282 +
   7.283 +	return 0;
   7.284 +}
   7.285 +
   7.286 +/*
   7.287 + * re-register callbacks to all watches
   7.288 + */
   7.289 +void reregister_xenbus_watches(struct xs_handle *h)
   7.290 +{
   7.291 +	struct xenbus_watch *watch;
   7.292 +	char token[(sizeof(watch) + sizeof(long)) * 2 + 2];
   7.293 +
   7.294 +	list_for_each_entry(watch, &watches, list) {
   7.295 +		sprintf(token, "%lX:%lX", (long)watch, watch->nonce);
   7.296 +		xs_watch(h, watch->node, token);
   7.297 +	}
   7.298 +}
   7.299 +
   7.300 +/*
   7.301 + * based on watch_thread() 
   7.302 + */
   7.303 +int xs_fire_next_watch(struct xs_handle *h)
   7.304 +{
   7.305 +	unsigned int num;
   7.306 +	struct xenbus_watch *w;
   7.307 +	char **res, *token, *node = NULL;
   7.308 +
   7.309 +	res = xs_read_watch(h, &num);
   7.310 +	if (res == NULL) 
   7.311 +		return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */
   7.312 +
   7.313 +	node  = res[XS_WATCH_PATH];
   7.314 +	token = res[XS_WATCH_TOKEN];
   7.315 +	DPRINTF("got watch %s on %s\n", token, node);
   7.316 +
   7.317 +	w = find_watch(token);
   7.318 +	if (w) 
   7.319 +		w->callback(h, w, node);
   7.320 +
   7.321 +	DPRINTF("handled watch %s on %s\n", token, node);
   7.322 +
   7.323 +	free(res);
   7.324 +
   7.325 +	return 1;
   7.326 +}
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/tools/blktap2/daemon/lib/xs_api.h	Tue May 26 11:52:31 2009 +0100
     8.3 @@ -0,0 +1,62 @@
     8.4 +/*
     8.5 + * xs_api.h
     8.6 + *
     8.7 + * (c) 2005 Andrew Warfield and Julian Chesterfield
     8.8 + *
     8.9 + *
    8.10 + * This program is free software; you can redistribute it and/or
    8.11 + * modify it under the terms of the GNU General Public License version 2
    8.12 + * as published by the Free Software Foundation; or, when distributed
    8.13 + * separately from the Linux kernel or incorporated into other
    8.14 + * software packages, subject to the following license:
    8.15 + *
    8.16 + * Permission is hereby granted, free of charge, to any person obtaining a copy
    8.17 + * of this source file (the "Software"), to deal in the Software without
    8.18 + * restriction, including without limitation the rights to use, copy, modify,
    8.19 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
    8.20 + * and to permit persons to whom the Software is furnished to do so, subject to
    8.21 + * the following conditions:
    8.22 + *
    8.23 + * The above copyright notice and this permission notice shall be included in
    8.24 + * all copies or substantial portions of the Software.
    8.25 + *
    8.26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    8.27 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    8.28 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    8.29 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    8.30 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    8.31 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    8.32 + * IN THE SOFTWARE.
    8.33 + */
    8.34 +
    8.35 +#ifndef _XS_API_H_
    8.36 +#define _XS_API_H_
    8.37 +
    8.38 +#include <xs.h>
    8.39 +
    8.40 +#include "list.h"
    8.41 +
    8.42 +struct xenbus_watch
    8.43 +{
    8.44 +        struct list_head  list;
    8.45 +        char             *node;
    8.46 +	void             *data;
    8.47 +	long              nonce;
    8.48 +        void (*callback) (struct xs_handle *h, 
    8.49 +			  struct xenbus_watch *, 
    8.50 +			  const  char *node);
    8.51 +};
    8.52 +
    8.53 +int xs_gather(struct xs_handle *xs, const char *dir, ...);
    8.54 +int xs_printf(struct xs_handle *h, const char *dir, const char *node, 
    8.55 +	      const char *fmt, ...) __attribute__((format(printf, 4, 5)));
    8.56 +int xs_exists(struct xs_handle *h, const char *path);
    8.57 +char *get_dom_domid(struct xs_handle *h);
    8.58 +int convert_dev_name_to_num(char *name);
    8.59 +
    8.60 +int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch);
    8.61 +int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch);
    8.62 +void reregister_xenbus_watches(struct xs_handle *h);
    8.63 +int xs_fire_next_watch(struct xs_handle *h);
    8.64 +
    8.65 +#endif
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/tools/blktap2/daemon/tapdisk-channel.c	Tue May 26 11:52:31 2009 +0100
     9.3 @@ -0,0 +1,1367 @@
     9.4 +/* Copyright (c) 2008, XenSource Inc.
     9.5 + * All rights reserved.
     9.6 + *
     9.7 + * Redistribution and use in source and binary forms, with or without
     9.8 + * modification, are permitted provided that the following conditions are met:
     9.9 + *     * Redistributions of source code must retain the above copyright
    9.10 + *       notice, this list of conditions and the following disclaimer.
    9.11 + *     * Redistributions in binary form must reproduce the above copyright
    9.12 + *       notice, this list of conditions and the following disclaimer in the
    9.13 + *       documentation and/or other materials provided with the distribution.
    9.14 + *     * Neither the name of XenSource Inc. nor the names of its contributors
    9.15 + *       may be used to endorse or promote products derived from this software
    9.16 + *       without specific prior written permission.
    9.17 + *
    9.18 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    9.19 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    9.20 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    9.21 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
    9.22 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    9.23 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    9.24 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    9.25 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    9.26 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    9.27 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    9.28 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    9.29 +*/
    9.30 +#include <stdio.h>
    9.31 +#include <errno.h>
    9.32 +#include <fcntl.h>
    9.33 +#include <stdlib.h>
    9.34 +#include <unistd.h>
    9.35 +#include <string.h>
    9.36 +#include <stdarg.h>
    9.37 +#include <sys/wait.h>
    9.38 +#include <sys/ioctl.h>
    9.39 +#include <sys/resource.h>
    9.40 +
    9.41 +#include <xs.h>
    9.42 +#include "disktypes.h"
    9.43 +#include "tapdisk-dispatch.h"
    9.44 +
    9.45 +#define TAPDISK_CHANNEL_IDLE          1
    9.46 +#define TAPDISK_CHANNEL_WAIT_PID      2
    9.47 +#define TAPDISK_CHANNEL_WAIT_OPEN     3
    9.48 +#define TAPDISK_CHANNEL_WAIT_PAUSE    4
    9.49 +#define TAPDISK_CHANNEL_WAIT_RESUME   5
    9.50 +#define TAPDISK_CHANNEL_WAIT_CLOSE    6
    9.51 +#define TAPDISK_CHANNEL_CLOSED        7
    9.52 +
    9.53 +static void tapdisk_channel_error(tapdisk_channel_t *,
    9.54 +				  const char *fmt, ...)
    9.55 +  __attribute__((format(printf, 2, 3)));
    9.56 +static void tapdisk_channel_fatal(tapdisk_channel_t *,
    9.57 +				  const char *fmt, ...)
    9.58 +  __attribute__((format(printf, 2, 3)));
    9.59 +static int tapdisk_channel_parse_params(tapdisk_channel_t *);
    9.60 +static void tapdisk_channel_pause_event(struct xs_handle *,
    9.61 +					struct xenbus_watch *,
    9.62 +					const char *);
    9.63 +
    9.64 +static int
    9.65 +tapdisk_channel_check_uuid(tapdisk_channel_t *channel)
    9.66 +{
    9.67 +	uint32_t uuid;
    9.68 +	char *uuid_str;
    9.69 +
    9.70 +	uuid_str = xs_read(channel->xsh, XBT_NULL, channel->uuid_str, NULL);
    9.71 +	if (!uuid_str)
    9.72 +		return -errno;
    9.73 +
    9.74 +	uuid = strtoul(uuid_str, NULL, 10);
    9.75 +	free(uuid_str);
    9.76 +
    9.77 +	if (uuid != channel->cookie)
    9.78 +		return -EINVAL;
    9.79 +
    9.80 +	return 0;
    9.81 +}
    9.82 +
    9.83 +static inline int
    9.84 +tapdisk_channel_validate_watch(tapdisk_channel_t *channel, const char *path)
    9.85 +{
    9.86 +	int err, len;
    9.87 +
    9.88 +	len = strsep_len(path, '/', 7);
    9.89 +	if (len < 0)
    9.90 +		return -EINVAL;
    9.91 +
    9.92 +	err = tapdisk_channel_check_uuid(channel);
    9.93 +	if (err)
    9.94 +		return err;
    9.95 +
    9.96 +	if (!xs_exists(channel->xsh, path))
    9.97 +		return -ENOENT;
    9.98 +
    9.99 +	return 0;
   9.100 +}
   9.101 +
   9.102 +static inline int
   9.103 +tapdisk_channel_validate_message(tapdisk_channel_t *channel,
   9.104 +				 tapdisk_message_t *message)
   9.105 +{
   9.106 +	switch (message->type) {
   9.107 +	case TAPDISK_MESSAGE_PID_RSP:
   9.108 +		if (channel->state != TAPDISK_CHANNEL_WAIT_PID)
   9.109 +			return -EINVAL;
   9.110 +		break;
   9.111 +
   9.112 +	case TAPDISK_MESSAGE_OPEN_RSP:
   9.113 +		if (channel->state != TAPDISK_CHANNEL_WAIT_OPEN)
   9.114 +			return -EINVAL;
   9.115 +		break;
   9.116 +
   9.117 +	case TAPDISK_MESSAGE_PAUSE_RSP:
   9.118 +		if (channel->state != TAPDISK_CHANNEL_WAIT_PAUSE)
   9.119 +			return -EINVAL;
   9.120 +		break;
   9.121 +
   9.122 +	case TAPDISK_MESSAGE_RESUME_RSP:
   9.123 +		if (channel->state != TAPDISK_CHANNEL_WAIT_RESUME)
   9.124 +			return -EINVAL;
   9.125 +		break;
   9.126 +
   9.127 +	case TAPDISK_MESSAGE_CLOSE_RSP:
   9.128 +		if (channel->state != TAPDISK_CHANNEL_WAIT_CLOSE)
   9.129 +			return -EINVAL;
   9.130 +		break;
   9.131 +
   9.132 +	case TAPDISK_MESSAGE_RUNTIME_ERROR:
   9.133 +		/*
   9.134 +		 * runtime errors can be received at any time
   9.135 +		 * and should not affect the state machine
   9.136 +		 */
   9.137 +		return 0;
   9.138 +	}
   9.139 +
   9.140 +	channel->state = TAPDISK_CHANNEL_IDLE;
   9.141 +	return 0;
   9.142 +}
   9.143 +
   9.144 +static int
   9.145 +tapdisk_channel_send_message(tapdisk_channel_t *channel,
   9.146 +			     tapdisk_message_t *message, int timeout)
   9.147 +{
   9.148 +	fd_set writefds;
   9.149 +	struct timeval tv;
   9.150 +	int ret, len, offset;
   9.151 +
   9.152 +	tv.tv_sec  = timeout;
   9.153 +	tv.tv_usec = 0;
   9.154 +	offset     = 0;
   9.155 +	len        = sizeof(tapdisk_message_t);
   9.156 +
   9.157 +	DPRINTF("%s: sending '%s' message to %d:%d\n",
   9.158 +		channel->path, tapdisk_message_name(message->type),
   9.159 +		channel->channel_id, channel->cookie);
   9.160 +
   9.161 +	if (channel->state != TAPDISK_CHANNEL_IDLE &&
   9.162 +	    message->type  != TAPDISK_MESSAGE_CLOSE)
   9.163 +		EPRINTF("%s: writing message to non-idle channel (%d)\n",
   9.164 +			channel->path, channel->state);
   9.165 +
   9.166 +	while (offset < len) {
   9.167 +		FD_ZERO(&writefds);
   9.168 +		FD_SET(channel->write_fd, &writefds);
   9.169 +
   9.170 +		/* we don't bother reinitializing tv. at worst, it will wait a
   9.171 +		 * bit more time than expected. */
   9.172 +
   9.173 +		ret = select(channel->write_fd + 1,
   9.174 +			     NULL, &writefds, NULL, &tv);
   9.175 +		if (ret == -1)
   9.176 +			break;
   9.177 +		else if (FD_ISSET(channel->write_fd, &writefds)) {
   9.178 +			ret = write(channel->write_fd,
   9.179 +				    message + offset, len - offset);
   9.180 +			if (ret <= 0)
   9.181 +				break;
   9.182 +			offset += ret;
   9.183 +		} else
   9.184 +			break;
   9.185 +	}
   9.186 +
   9.187 +	if (offset != len) {
   9.188 +		EPRINTF("%s: error writing '%s' message to %d:%d\n",
   9.189 +			channel->path, tapdisk_message_name(message->type),
   9.190 +			channel->channel_id, channel->cookie);
   9.191 +		return -EIO;
   9.192 +	}
   9.193 +
   9.194 +	switch (message->type) {
   9.195 +	case TAPDISK_MESSAGE_PID:
   9.196 +		channel->state = TAPDISK_CHANNEL_WAIT_PID;
   9.197 +		break;
   9.198 +
   9.199 +	case TAPDISK_MESSAGE_OPEN:
   9.200 +		channel->state = TAPDISK_CHANNEL_WAIT_OPEN;
   9.201 +		break;
   9.202 +
   9.203 +	case TAPDISK_MESSAGE_PAUSE:
   9.204 +		channel->state = TAPDISK_CHANNEL_WAIT_PAUSE;
   9.205 +		break;
   9.206 +
   9.207 +	case TAPDISK_MESSAGE_RESUME:
   9.208 +		channel->state = TAPDISK_CHANNEL_WAIT_RESUME;
   9.209 +		break;
   9.210 +
   9.211 +	case TAPDISK_MESSAGE_CLOSE:
   9.212 +		channel->state = TAPDISK_CHANNEL_WAIT_CLOSE;
   9.213 +		break;
   9.214 +
   9.215 +	default:
   9.216 +		EPRINTF("%s: unrecognized message type %d\n",
   9.217 +			channel->path, message->type);
   9.218 +	}
   9.219 +
   9.220 +	return 0;
   9.221 +}
   9.222 +
   9.223 +static void
   9.224 +__tapdisk_channel_error(tapdisk_channel_t *channel,
   9.225 +			const char *fmt, va_list ap)
   9.226 +{
   9.227 +	int err;
   9.228 +	char *dir, *buf, *message;
   9.229 +
   9.230 +	err = vasprintf(&buf, fmt, ap);
   9.231 +	if (err == -1) {
   9.232 +		EPRINTF("failed to allocate error message\n");
   9.233 +		buf = NULL;
   9.234 +	}
   9.235 +
   9.236 +	if (buf)
   9.237 +		message = buf;
   9.238 +	else
   9.239 +		message = "tapdisk error";
   9.240 +
   9.241 +	EPRINTF("%s: %s\n", channel->path, message);
   9.242 +
   9.243 +	err = asprintf(&dir, "%s/tapdisk-error", channel->path);
   9.244 +	if (err == -1) {
   9.245 +		EPRINTF("%s: failed to write %s\n", __func__, message);
   9.246 +		dir = NULL;
   9.247 +		goto out;
   9.248 +	}
   9.249 +
   9.250 +	xs_write(channel->xsh, XBT_NULL, dir, message, strlen(message));
   9.251 +
   9.252 +out:
   9.253 +	free(dir);
   9.254 +	free(buf);
   9.255 +}
   9.256 +
   9.257 +static void
   9.258 +tapdisk_channel_error(tapdisk_channel_t *channel, const char *fmt, ...)
   9.259 +{
   9.260 +	va_list ap;
   9.261 +
   9.262 +	va_start(ap, fmt);
   9.263 +	__tapdisk_channel_error(channel, fmt, ap);
   9.264 +	va_end(ap);
   9.265 +}
   9.266 +
   9.267 +static void
   9.268 +tapdisk_channel_fatal(tapdisk_channel_t *channel, const char *fmt, ...)
   9.269 +{
   9.270 +	va_list ap;
   9.271 +
   9.272 +	va_start(ap, fmt);
   9.273 +	__tapdisk_channel_error(channel, fmt, ap);
   9.274 +	va_end(ap);
   9.275 +
   9.276 +	tapdisk_channel_close(channel);
   9.277 +}
   9.278 +
   9.279 +static int
   9.280 +tapdisk_channel_connect_backdev(tapdisk_channel_t *channel)
   9.281 +{
   9.282 +	int err, major, minor;
   9.283 +	char *s, *path, *devname;
   9.284 +
   9.285 +	s       = NULL;
   9.286 +	path    = NULL;
   9.287 +	devname = NULL;
   9.288 +
   9.289 +	err = ioctl(channel->blktap_fd,
   9.290 +		    BLKTAP_IOCTL_BACKDEV_SETUP, channel->minor);
   9.291 +	if (err) {
   9.292 +		err = -errno;
   9.293 +		goto fail;
   9.294 +	}
   9.295 +
   9.296 +	err = asprintf(&path, "%s/backdev-node", channel->path);
   9.297 +	if (err == -1) {
   9.298 +		path = NULL;
   9.299 +		err  = -ENOMEM;
   9.300 +		goto fail;
   9.301 +	}
   9.302 +
   9.303 +	s = xs_read(channel->xsh, XBT_NULL, path, NULL);
   9.304 +	if (!s) {
   9.305 +		err = -errno;
   9.306 +		goto fail;
   9.307 +	}
   9.308 +
   9.309 +	err = sscanf(s, "%d:%d", &major, &minor);
   9.310 +	if (err != 2) {
   9.311 +		err = -EINVAL;
   9.312 +		goto fail;
   9.313 +	}
   9.314 +
   9.315 +	err = asprintf(&devname,"%s/%s%d",
   9.316 +		       BLKTAP_DEV_DIR, BACKDEV_NAME, minor);
   9.317 +	if (err == -1) {
   9.318 +		devname = NULL;
   9.319 +		err = -ENOMEM;
   9.320 +		goto fail;
   9.321 +	}
   9.322 +
   9.323 +	err = make_blktap_device(devname, major, minor, S_IFBLK | 0600);
   9.324 +	if (err)
   9.325 +		goto fail;
   9.326 +
   9.327 +	free(path);
   9.328 +	err = asprintf(&path, "%s/backdev-path", channel->path);
   9.329 +	if (err == -1) {
   9.330 +		path = NULL;
   9.331 +		err  = -ENOMEM;
   9.332 +		goto fail;
   9.333 +	}
   9.334 +
   9.335 +	err = xs_write(channel->xsh, XBT_NULL, path, devname, strlen(devname));
   9.336 +	if (err == 0) {
   9.337 +		err = -errno;
   9.338 +		goto fail;
   9.339 +	}
   9.340 +
   9.341 +	err = 0;
   9.342 + out:
   9.343 +	free(devname);
   9.344 +	free(path);
   9.345 +	free(s);
   9.346 +	return err;
   9.347 +
   9.348 + fail:
   9.349 +	EPRINTF("backdev setup failed [%d]\n", err);
   9.350 +	goto out;
   9.351 +}
   9.352 +
   9.353 +static int
   9.354 +tapdisk_channel_complete_connection(tapdisk_channel_t *channel)
   9.355 +{
   9.356 +	int err;
   9.357 +	char *path;
   9.358 +
   9.359 +	if (!xs_printf(channel->xsh, channel->path,
   9.360 +		       "sectors", "%llu", channel->image.size)) {
   9.361 +		EPRINTF("ERROR: Failed writing sectors");
   9.362 +		return -errno;
   9.363 +	}
   9.364 +
   9.365 +	if (!xs_printf(channel->xsh, channel->path,
   9.366 +		       "sector-size", "%lu", channel->image.secsize)) {
   9.367 +		EPRINTF("ERROR: Failed writing sector-size");
   9.368 +		return -errno;
   9.369 +	}
   9.370 +
   9.371 +	if (!xs_printf(channel->xsh, channel->path,
   9.372 +		       "info", "%u", channel->image.info)) {
   9.373 +		EPRINTF("ERROR: Failed writing info");
   9.374 +		return -errno;
   9.375 +	}
   9.376 +
   9.377 +	err = tapdisk_channel_connect_backdev(channel);
   9.378 +	if (err)
   9.379 +		goto clean;
   9.380 +
   9.381 +	channel->connected = 1;
   9.382 +	return 0;
   9.383 +
   9.384 + clean:
   9.385 +	if (asprintf(&path, "%s/info", channel->path) == -1)
   9.386 +		return err;
   9.387 +
   9.388 +	if (!xs_rm(channel->xsh, XBT_NULL, path))
   9.389 +		goto clean_out;
   9.390 +
   9.391 +	free(path);
   9.392 +	if (asprintf(&path, "%s/sector-size", channel->path) == -1)
   9.393 +		return err;
   9.394 +
   9.395 +	if (!xs_rm(channel->xsh, XBT_NULL, path))
   9.396 +		goto clean_out;
   9.397 +
   9.398 +	free(path);
   9.399 +	if (asprintf(&path, "%s/sectors", channel->path) == -1)
   9.400 +		return err;
   9.401 +
   9.402 +	xs_rm(channel->xsh, XBT_NULL, path);
   9.403 +
   9.404 + clean_out:
   9.405 +	free(path);
   9.406 +	return err;
   9.407 +}
   9.408 +
   9.409 +static int
   9.410 +tapdisk_channel_send_open_request(tapdisk_channel_t *channel)
   9.411 +{
   9.412 +	int len;
   9.413 +	tapdisk_message_t message;
   9.414 +
   9.415 +	memset(&message, 0, sizeof(tapdisk_message_t));
   9.416 +
   9.417 +	len = strlen(channel->vdi_path);
   9.418 +
   9.419 +	message.type              = TAPDISK_MESSAGE_OPEN;
   9.420 +	message.cookie            = channel->cookie;
   9.421 +	message.drivertype        = channel->drivertype;
   9.422 +	message.u.params.storage  = channel->storage;
   9.423 +	message.u.params.devnum   = channel->minor;
   9.424 +	message.u.params.domid    = channel->domid;
   9.425 +	message.u.params.path_len = len;
   9.426 +	strncpy(message.u.params.path, channel->vdi_path, len);
   9.427 +
   9.428 +	if (channel->mode == 'r')
   9.429 +		message.u.params.flags |= TAPDISK_MESSAGE_FLAG_RDONLY;
   9.430 +	if (channel->shared)
   9.431 +		message.u.params.flags |= TAPDISK_MESSAGE_FLAG_SHARED;
   9.432 +
   9.433 +	/* TODO: clean this up */
   9.434 +	if (xs_exists(channel->xsh, "/local/domain/0/tapdisk/add-cache"))
   9.435 +		message.u.params.flags |= TAPDISK_MESSAGE_FLAG_ADD_CACHE;
   9.436 +	if (xs_exists(channel->xsh, "/local/domain/0/tapdisk/log-dirty"))
   9.437 +		message.u.params.flags |= TAPDISK_MESSAGE_FLAG_LOG_DIRTY;
   9.438 +
   9.439 +	return tapdisk_channel_send_message(channel, &message, 2);
   9.440 +}
   9.441 +
   9.442 +static int
   9.443 +tapdisk_channel_receive_open_response(tapdisk_channel_t *channel,
   9.444 +				      tapdisk_message_t *message)
   9.445 +{
   9.446 +	int err;
   9.447 +
   9.448 +	channel->image.size    = message->u.image.sectors;
   9.449 +	channel->image.secsize = message->u.image.sector_size;
   9.450 +	channel->image.info    = message->u.image.info;
   9.451 +
   9.452 +	err = tapdisk_channel_complete_connection(channel);
   9.453 +	if (err)
   9.454 +		goto fail;
   9.455 +
   9.456 +	/* did we receive a pause request before the connection completed? */
   9.457 +	if (channel->pause_needed) {
   9.458 +		DPRINTF("%s: deferred pause request\n", channel->path);
   9.459 +		tapdisk_channel_pause_event(channel->xsh,
   9.460 +					    &channel->pause_watch,
   9.461 +					    channel->pause_str);
   9.462 +		channel->pause_needed = 0;
   9.463 +	}
   9.464 +
   9.465 +	return 0;
   9.466 +
   9.467 +fail:
   9.468 +	tapdisk_channel_fatal(channel,
   9.469 +			      "failure completing connection: %d", err);
   9.470 +	return err;
   9.471 +}
   9.472 +
   9.473 +static int
   9.474 +tapdisk_channel_send_shutdown_request(tapdisk_channel_t *channel)
   9.475 +{
   9.476 +	tapdisk_message_t message;
   9.477 +
   9.478 +	memset(&message, 0, sizeof(tapdisk_message_t));
   9.479 +
   9.480 +	message.type       = TAPDISK_MESSAGE_CLOSE;
   9.481 +	message.drivertype = channel->drivertype;
   9.482 +	message.cookie     = channel->cookie;
   9.483 +
   9.484 +	return tapdisk_channel_send_message(channel, &message, 2);
   9.485 +}
   9.486 +
   9.487 +static int
   9.488 +tapdisk_channel_receive_shutdown_response(tapdisk_channel_t *channel,
   9.489 +					  tapdisk_message_t *message)
   9.490 +{
   9.491 +	channel->open  = 0;
   9.492 +	channel->state = TAPDISK_CHANNEL_CLOSED;
   9.493 +	tapdisk_channel_close(channel);
   9.494 +	return 0;
   9.495 +}
   9.496 +
   9.497 +static int
   9.498 +tapdisk_channel_receive_runtime_error(tapdisk_channel_t *channel,
   9.499 +				      tapdisk_message_t *message)
   9.500 +{
   9.501 +	tapdisk_channel_error(channel,
   9.502 +			      "runtime error: %s", message->u.string.text);
   9.503 +	return 0;
   9.504 +}
   9.505 +
   9.506 +static int
   9.507 +tapdisk_channel_send_pid_request(tapdisk_channel_t *channel)
   9.508 +{
   9.509 +	int err;
   9.510 +	tapdisk_message_t message;
   9.511 +
   9.512 +	memset(&message, 0, sizeof(tapdisk_message_t));
   9.513 +
   9.514 +	message.type       = TAPDISK_MESSAGE_PID;
   9.515 +	message.drivertype = channel->drivertype;
   9.516 +	message.cookie     = channel->cookie;
   9.517 +
   9.518 +	err = tapdisk_channel_send_message(channel, &message, 2);
   9.519 +
   9.520 +	if (!err)
   9.521 +		channel->open = 1;
   9.522 +
   9.523 +	return err;
   9.524 +}
   9.525 +
   9.526 +static int
   9.527 +tapdisk_channel_receive_pid_response(tapdisk_channel_t *channel,
   9.528 +				     tapdisk_message_t *message)
   9.529 +{
   9.530 +	int err;
   9.531 +
   9.532 +	channel->tapdisk_pid = message->u.tapdisk_pid;
   9.533 +
   9.534 +	DPRINTF("%s: tapdisk pid: %d\n", channel->path, channel->tapdisk_pid);
   9.535 +
   9.536 +	err = setpriority(PRIO_PROCESS, channel->tapdisk_pid, PRIO_SPECIAL_IO);
   9.537 +	if (err) {
   9.538 +		tapdisk_channel_fatal(channel,
   9.539 +				      "setting tapdisk priority: %d", err);
   9.540 +		return err;
   9.541 +	}
   9.542 +
   9.543 +	err = tapdisk_channel_send_open_request(channel);
   9.544 +	if (err) {
   9.545 +		tapdisk_channel_fatal(channel,
   9.546 +				      "sending open request: %d", err);
   9.547 +		return err;
   9.548 +	}
   9.549 +
   9.550 +	return 0;
   9.551 +}
   9.552 +
   9.553 +static int
   9.554 +tapdisk_channel_send_pause_request(tapdisk_channel_t *channel)
   9.555 +{
   9.556 +	tapdisk_message_t message;
   9.557 +
   9.558 +	memset(&message, 0, sizeof(tapdisk_message_t));
   9.559 +
   9.560 +	DPRINTF("pausing %s\n", channel->path);
   9.561 +
   9.562 +	message.type       = TAPDISK_MESSAGE_PAUSE;
   9.563 +	message.drivertype = channel->drivertype;
   9.564 +	message.cookie     = channel->cookie;
   9.565 +
   9.566 +	return tapdisk_channel_send_message(channel, &message, 2);
   9.567 +}
   9.568 +
   9.569 +static int
   9.570 +tapdisk_channel_receive_pause_response(tapdisk_channel_t *channel,
   9.571 +				       tapdisk_message_t *message)
   9.572 +{
   9.573 +	int err;
   9.574 +
   9.575 +	if (!xs_write(channel->xsh, XBT_NULL,
   9.576 +		      channel->pause_done_str, "", strlen(""))) {
   9.577 +		err = -errno;
   9.578 +		goto fail;
   9.579 +	}
   9.580 +
   9.581 +	return 0;
   9.582 +
   9.583 +fail:
   9.584 +	tapdisk_channel_fatal(channel,
   9.585 +			      "failure receiving pause response: %d\n", err);
   9.586 +	return err;
   9.587 +}
   9.588 +
   9.589 +static int
   9.590 +tapdisk_channel_send_resume_request(tapdisk_channel_t *channel)
   9.591 +{
   9.592 +	int len;
   9.593 +	tapdisk_message_t message;
   9.594 +
   9.595 +	memset(&message, 0, sizeof(tapdisk_message_t));
   9.596 +
   9.597 +	len = strlen(channel->vdi_path);
   9.598 +
   9.599 +	DPRINTF("resuming %s\n", channel->path);
   9.600 +
   9.601 +	message.type              = TAPDISK_MESSAGE_RESUME;
   9.602 +	message.drivertype        = channel->drivertype;
   9.603 +	message.cookie            = channel->cookie;
   9.604 +	message.u.params.path_len = len;
   9.605 +	strncpy(message.u.params.path, channel->vdi_path, len);
   9.606 +
   9.607 +	return tapdisk_channel_send_message(channel, &message, 2);
   9.608 +}
   9.609 +
   9.610 +static int
   9.611 +tapdisk_channel_receive_resume_response(tapdisk_channel_t *channel,
   9.612 +					tapdisk_message_t *message)
   9.613 +{
   9.614 +	int err;
   9.615 +
   9.616 +	if (!xs_rm(channel->xsh, XBT_NULL, channel->pause_done_str)) {
   9.617 +		err = -errno;
   9.618 +		goto fail;
   9.619 +	}
   9.620 +
   9.621 +	return 0;
   9.622 +
   9.623 +fail:
   9.624 +	tapdisk_channel_fatal(channel,
   9.625 +			      "failure receiving pause response: %d", err);
   9.626 +	return err;
   9.627 +}
   9.628 +
   9.629 +static void
   9.630 +tapdisk_channel_shutdown_event(struct xs_handle *xsh,
   9.631 +			       struct xenbus_watch *watch, const char *path)
   9.632 +{
   9.633 +	int err;
   9.634 +	tapdisk_channel_t *channel;
   9.635 +
   9.636 +	channel = watch->data;
   9.637 +
   9.638 +	DPRINTF("%s: got watch on %s\n", channel->path, path);
   9.639 +
   9.640 +	if (!xs_exists(channel->xsh, channel->path)) {
   9.641 +		tapdisk_channel_close(channel);
   9.642 +		return;
   9.643 +	}
   9.644 +
   9.645 +	err = tapdisk_channel_validate_watch(channel, path);
   9.646 +	if (err) {
   9.647 +		if (err == -EINVAL)
   9.648 +			tapdisk_channel_fatal(channel, "bad shutdown watch");
   9.649 +		return;
   9.650 +	}
   9.651 +
   9.652 +	tapdisk_channel_send_shutdown_request(channel);
   9.653 +}
   9.654 +
   9.655 +static void
   9.656 +tapdisk_channel_pause_event(struct xs_handle *xsh,
   9.657 +			    struct xenbus_watch *watch, const char *path)
   9.658 +{
   9.659 +	int err, paused;
   9.660 +	tapdisk_channel_t *channel;
   9.661 +
   9.662 +	channel = watch->data;
   9.663 +
   9.664 +	DPRINTF("%s: got watch on %s\n", channel->path, path);
   9.665 +
   9.666 +	if (!xs_exists(channel->xsh, channel->path)) {
   9.667 +		tapdisk_channel_close(channel);
   9.668 +		return;
   9.669 +	}
   9.670 +
   9.671 +	/* NB: The VBD is essentially considered ready since the
   9.672 +	 * backend hotplug event ocurred, which is just after
   9.673 +	 * start-tapdisk, not after watch registration. We start
   9.674 +	 * testing xenstore keys with the very first shot, but defer
   9.675 +	 * until after connection completion. */
   9.676 +
   9.677 +	err = tapdisk_channel_validate_watch(channel, path);
   9.678 +	if (err) {
   9.679 +		if (err == -EINVAL)
   9.680 +			tapdisk_channel_fatal(channel, "bad pause watch");
   9.681 +
   9.682 +		if (err != -ENOENT)
   9.683 +			return;
   9.684 +
   9.685 +		err = 0;
   9.686 +	}
   9.687 +
   9.688 +	paused  = xs_exists(xsh, channel->pause_done_str);
   9.689 +
   9.690 +	if (xs_exists(xsh, channel->pause_str)) {
   9.691 +		/*
   9.692 +		 * Duplicate requests are a protocol validation, but
   9.693 +		 * impossible to identify if watch registration and an
   9.694 +		 * actual pause request may fire separately in close
   9.695 +		 * succession. Warn, but do not signal an error.
   9.696 +		 */
   9.697 +		int pausing = channel->state == TAPDISK_CHANNEL_WAIT_PAUSE;
   9.698 +		if (pausing || paused) {
   9.699 +			DPRINTF("Ignoring pause event for %s vbd %s\n",
   9.700 +				pausing ? "pausing" : "paused", channel->path);
   9.701 +			goto out;
   9.702 +		}
   9.703 +
   9.704 +		/* defer if tapdisk is not ready yet */
   9.705 +		if (!channel->connected) {
   9.706 +			DPRINTF("%s: deferring pause request\n", path);
   9.707 +			channel->pause_needed = 1;
   9.708 +			goto out;
   9.709 +		}
   9.710 +
   9.711 +		err = tapdisk_channel_send_pause_request(channel);
   9.712 +
   9.713 +	} else if (xs_exists(xsh, channel->pause_done_str)) {
   9.714 +		free(channel->params);
   9.715 +		channel->params   = NULL;
   9.716 +		channel->vdi_path = NULL;
   9.717 +
   9.718 +		err = xs_gather(channel->xsh, channel->path,
   9.719 +				"params", NULL, &channel->params, NULL);
   9.720 +		if (err) {
   9.721 +			EPRINTF("failure re-reading params: %d\n", err);
   9.722 +			channel->params = NULL;
   9.723 +			goto out;
   9.724 +		}
   9.725 +
   9.726 +		err = tapdisk_channel_parse_params(channel);
   9.727 +		if (err)
   9.728 +			goto out;
   9.729 +
   9.730 +		err = tapdisk_channel_send_resume_request(channel);
   9.731 +		if (err)
   9.732 +			goto out;
   9.733 +	}
   9.734 +
   9.735 +	err = 0;
   9.736 +
   9.737 +out:
   9.738 +	if (err)
   9.739 +		tapdisk_channel_error(channel, "pause event failed: %d", err);
   9.740 +}
   9.741 +
   9.742 +static int
   9.743 +tapdisk_channel_open_control_socket(char *devname)
   9.744 +{
   9.745 +	int err, fd;
   9.746 +	fd_set socks;
   9.747 +	struct timeval timeout;
   9.748 +
   9.749 +	err = mkdir(BLKTAP_CTRL_DIR, 0755);
   9.750 +	if (err == -1 && errno != EEXIST) {
   9.751 +		EPRINTF("Failure creating %s directory: %d\n",
   9.752 +			BLKTAP_CTRL_DIR, errno);
   9.753 +		return -errno;
   9.754 +	}
   9.755 +
   9.756 +	err = mkfifo(devname, S_IRWXU | S_IRWXG | S_IRWXO);
   9.757 +	if (err) {
   9.758 +		if (errno == EEXIST) {
   9.759 +			/*
   9.760 +			 * Remove fifo since it may have data from
   9.761 +			 * it's previous use --- earlier invocation
   9.762 +			 * of tapdisk may not have read all messages.
   9.763 +			 */
   9.764 +			err = unlink(devname);
   9.765 +			if (err) {
   9.766 +				EPRINTF("ERROR: unlink(%s) failed (%d)\n",
   9.767 +					devname, errno);
   9.768 +				return -errno;
   9.769 +			}
   9.770 +
   9.771 +			err = mkfifo(devname, S_IRWXU | S_IRWXG | S_IRWXO);
   9.772 +		}
   9.773 +
   9.774 +		if (err) {
   9.775 +			EPRINTF("ERROR: pipe failed (%d)\n", errno);
   9.776 +			return -errno;
   9.777 +		}
   9.778 +	}
   9.779 +
   9.780 +	fd = open(devname, O_RDWR | O_NONBLOCK);
   9.781 +	if (fd == -1) {
   9.782 +		EPRINTF("Failed to open %s\n", devname);
   9.783 +		return -errno;
   9.784 +	}
   9.785 +
   9.786 +	return fd;
   9.787 +}
   9.788 +
   9.789 +static int
   9.790 +tapdisk_channel_get_device_number(tapdisk_channel_t *channel)
   9.791 +{
   9.792 +	char *devname;
   9.793 +	domid_translate_t tr;
   9.794 +	int major, minor, err;
   9.795 +
   9.796 +	tr.domid = channel->domid;
   9.797 +        tr.busid = channel->busid;
   9.798 +
   9.799 +	minor = ioctl(channel->blktap_fd, BLKTAP_IOCTL_NEWINTF, tr);
   9.800 +	if (minor <= 0 || minor > MAX_TAP_DEV) {
   9.801 +		EPRINTF("invalid dev id: %d\n", minor);
   9.802 +		return -EINVAL;
   9.803 +	}
   9.804 +
   9.805 +	major = ioctl(channel->blktap_fd, BLKTAP_IOCTL_MAJOR, minor);
   9.806 +	if (major < 0) {
   9.807 +		EPRINTF("invalid major id: %d\n", major);
   9.808 +		return -EINVAL;
   9.809 +	}
   9.810 +
   9.811 +	err = asprintf(&devname, "%s/%s%d",
   9.812 +		       BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor);
   9.813 +	if (err == -1) {
   9.814 +		EPRINTF("get_new_dev: malloc failed\n");
   9.815 +		return -ENOMEM;
   9.816 +	}
   9.817 +
   9.818 +	err = make_blktap_device(devname, major, minor, S_IFCHR | 0600);
   9.819 +	free(devname);
   9.820 +
   9.821 +	if (err)
   9.822 +		return err;
   9.823 +
   9.824 +	DPRINTF("Received device id %d and major %d, "
   9.825 +		"sent domid %d and be_id %d\n",
   9.826 +		minor, major, tr.domid, tr.busid);
   9.827 +
   9.828 +	channel->major = major;
   9.829 +	channel->minor = minor;
   9.830 +
   9.831 +	return 0;
   9.832 +}
   9.833 +
   9.834 +static int
   9.835 +tapdisk_channel_start_process(tapdisk_channel_t *channel,
   9.836 +			      char *write_dev, char *read_dev)
   9.837 +{
   9.838 +	pid_t child;
   9.839 +	char *argv[] = { "tapdisk", write_dev, read_dev, NULL };
   9.840 +
   9.841 +	if ((child = fork()) == -1)
   9.842 +		return -errno;
   9.843 +
   9.844 +	if (!child) {
   9.845 +		int i;
   9.846 +		for (i = 0 ; i < sysconf(_SC_OPEN_MAX) ; i++)
   9.847 +			if (i != STDIN_FILENO &&
   9.848 +			    i != STDOUT_FILENO &&
   9.849 +			    i != STDERR_FILENO)
   9.850 +				close(i);
   9.851 +
   9.852 +		execvp("tapdisk", argv);
   9.853 +		_exit(1);
   9.854 +	} else {
   9.855 +		pid_t got;
   9.856 +		do {
   9.857 +			got = waitpid(child, NULL, 0);
   9.858 +		} while (got != child);
   9.859 +	}
   9.860 +	return 0;
   9.861 +}
   9.862 +
   9.863 +static int
   9.864 +tapdisk_channel_launch_tapdisk(tapdisk_channel_t *channel)
   9.865 +{
   9.866 +	int err;
   9.867 +	char *read_dev, *write_dev;
   9.868 +
   9.869 +	read_dev          = NULL;
   9.870 +	write_dev         = NULL;
   9.871 +	channel->read_fd  = -1;
   9.872 +	channel->write_fd = -1;
   9.873 +
   9.874 +	err = tapdisk_channel_get_device_number(channel);
   9.875 +	if (err)
   9.876 +		return err;
   9.877 +
   9.878 +	err = asprintf(&write_dev,
   9.879 +		       "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, channel->minor);
   9.880 +	if (err == -1) {
   9.881 +		err = -ENOMEM;
   9.882 +		write_dev = NULL;
   9.883 +		goto fail;
   9.884 +	}
   9.885 +
   9.886 +	err = asprintf(&read_dev,
   9.887 +		       "%s/tapctrlread%d", BLKTAP_CTRL_DIR, channel->minor);
   9.888 +	if (err == -1) {
   9.889 +		err = -ENOMEM;
   9.890 +		read_dev = NULL;
   9.891 +		goto fail;
   9.892 +	}
   9.893 +
   9.894 +	channel->write_fd = tapdisk_channel_open_control_socket(write_dev);
   9.895 +	if (channel->write_fd < 0) {
   9.896 +		err = channel->write_fd;
   9.897 +		channel->write_fd = -1;
   9.898 +		goto fail;
   9.899 +	}
   9.900 +
   9.901 +	channel->read_fd = tapdisk_channel_open_control_socket(read_dev);
   9.902 +	if (channel->read_fd < 0) {
   9.903 +		err = channel->read_fd;
   9.904 +		channel->read_fd = -1;
   9.905 +		goto fail;
   9.906 +	}
   9.907 +
   9.908 +	err = tapdisk_channel_start_process(channel, write_dev, read_dev);
   9.909 +	if (err)
   9.910 +		goto fail;
   9.911 +
   9.912 +	channel->open       = 1;
   9.913 +	channel->channel_id = channel->write_fd;
   9.914 +
   9.915 +	free(read_dev);
   9.916 +	free(write_dev);
   9.917 +
   9.918 +	DPRINTF("process launched, channel = %d:%d\n",
   9.919 +		channel->channel_id, channel->cookie);
   9.920 +
   9.921 +	return tapdisk_channel_send_pid_request(channel);
   9.922 +
   9.923 +fail:
   9.924 +	free(read_dev);
   9.925 +	free(write_dev);
   9.926 +	if (channel->read_fd != -1)
   9.927 +		close(channel->read_fd);
   9.928 +	if (channel->write_fd != -1)
   9.929 +		close(channel->write_fd);
   9.930 +	return err;
   9.931 +}
   9.932 +
   9.933 +static int
   9.934 +tapdisk_channel_connect(tapdisk_channel_t *channel)
   9.935 +{
   9.936 +	int err;
   9.937 +
   9.938 +	tapdisk_daemon_find_channel(channel);
   9.939 +
   9.940 +	if (!channel->tapdisk_pid)
   9.941 +		return tapdisk_channel_launch_tapdisk(channel);
   9.942 +
   9.943 +	DPRINTF("%s: process exists: %d, channel = %d:%d\n",
   9.944 +		channel->path, channel->tapdisk_pid,
   9.945 +		channel->channel_id, channel->cookie);
   9.946 +
   9.947 +	err = tapdisk_channel_get_device_number(channel);
   9.948 +	if (err)
   9.949 +		return err;
   9.950 +
   9.951 +	return tapdisk_channel_send_pid_request(channel);
   9.952 +}
   9.953 +
   9.954 +static int
   9.955 +tapdisk_channel_init(tapdisk_channel_t *channel)
   9.956 +{
   9.957 +	int err;
   9.958 +
   9.959 +	channel->uuid_str          = NULL;
   9.960 +	channel->pause_str         = NULL;
   9.961 +	channel->pause_done_str    = NULL;
   9.962 +	channel->shutdown_str      = NULL;
   9.963 +	channel->share_tapdisk_str = NULL;
   9.964 +
   9.965 +	err = asprintf(&channel->uuid_str,
   9.966 +		       "%s/tapdisk-uuid", channel->path);
   9.967 +	if (err == -1) {
   9.968 +		channel->uuid_str = NULL;
   9.969 +		goto fail;
   9.970 +	}
   9.971 +
   9.972 +	err = asprintf(&channel->pause_str, "%s/pause", channel->path);
   9.973 +	if (err == -1) {
   9.974 +		channel->pause_str = NULL;
   9.975 +		goto fail;
   9.976 +	}
   9.977 +
   9.978 +	err = asprintf(&channel->pause_done_str,
   9.979 +		       "%s/pause-done", channel->path);
   9.980 +	if (err == -1) {
   9.981 +		channel->pause_done_str = NULL;
   9.982 +		goto fail;
   9.983 +	}
   9.984 +
   9.985 +	err = asprintf(&channel->shutdown_str,
   9.986 +		       "%s/shutdown-tapdisk", channel->path);
   9.987 +	if (err == -1) {
   9.988 +		channel->shutdown_str = NULL;
   9.989 +		goto fail;
   9.990 +	}
   9.991 +
   9.992 +	channel->share_tapdisk_str = "/local/domain/0/tapdisk/share-tapdisks";
   9.993 +
   9.994 +	return 0;
   9.995 +
   9.996 +fail:
   9.997 +	free(channel->uuid_str);
   9.998 +	free(channel->pause_str);
   9.999 +	free(channel->pause_done_str);
  9.1000 +	free(channel->shutdown_str);
  9.1001 +	channel->uuid_str          = NULL;
  9.1002 +	channel->pause_str         = NULL;
  9.1003 +	channel->pause_done_str    = NULL;
  9.1004 +	channel->shutdown_str      = NULL;
  9.1005 +	channel->share_tapdisk_str = NULL;
  9.1006 +	return -ENOMEM;
  9.1007 +}
  9.1008 +
  9.1009 +static int
  9.1010 +tapdisk_channel_set_watches(tapdisk_channel_t *channel)
  9.1011 +{
  9.1012 +	int err;
  9.1013 +
  9.1014 +	/* watch for pause events */
  9.1015 +	channel->pause_watch.node            = channel->pause_str;
  9.1016 +	channel->pause_watch.callback        = tapdisk_channel_pause_event;
  9.1017 +	channel->pause_watch.data            = channel;
  9.1018 +	err = register_xenbus_watch(channel->xsh, &channel->pause_watch);
  9.1019 +	if (err) {
  9.1020 +		channel->pause_watch.node    = NULL;
  9.1021 +		goto fail;
  9.1022 +	}
  9.1023 +
  9.1024 +	/* watch for shutdown events */
  9.1025 +	channel->shutdown_watch.node         = channel->shutdown_str;
  9.1026 +	channel->shutdown_watch.callback     = tapdisk_channel_shutdown_event;
  9.1027 +	channel->shutdown_watch.data         = channel;
  9.1028 +	err = register_xenbus_watch(channel->xsh, &channel->shutdown_watch);
  9.1029 +	if (err) {
  9.1030 +		channel->shutdown_watch.node = NULL;
  9.1031 +		goto fail;
  9.1032 +	}
  9.1033 +
  9.1034 +	return 0;
  9.1035 +
  9.1036 +fail:
  9.1037 +	if (channel->pause_watch.node) {
  9.1038 +		unregister_xenbus_watch(channel->xsh, &channel->pause_watch);
  9.1039 +		channel->pause_watch.node    = NULL;
  9.1040 +	}
  9.1041 +	if (channel->shutdown_watch.node) {
  9.1042 +		unregister_xenbus_watch(channel->xsh, &channel->shutdown_watch);
  9.1043 +		channel->shutdown_watch.node = NULL;
  9.1044 +	}
  9.1045 +	return err;
  9.1046 +}
  9.1047 +
  9.1048 +static void
  9.1049 +tapdisk_channel_get_storage_type(tapdisk_channel_t *channel)
  9.1050 +{
  9.1051 +	int err, type;
  9.1052 +	unsigned int len;
  9.1053 +	char *path, *stype;
  9.1054 +
  9.1055 +	channel->storage = TAPDISK_STORAGE_TYPE_DEFAULT;
  9.1056 +
  9.1057 +	err = asprintf(&path, "%s/sm-data/storage-type", channel->path);
  9.1058 +	if (err == -1)
  9.1059 +		return;
  9.1060 +
  9.1061 +	stype = xs_read(channel->xsh, XBT_NULL, path, &len);
  9.1062 +	if (!stype)
  9.1063 +		goto out;
  9.1064 +	else if (!strcmp(stype, "nfs"))
  9.1065 +		channel->storage = TAPDISK_STORAGE_TYPE_NFS;
  9.1066 +	else if (!strcmp(stype, "ext"))
  9.1067 +		channel->storage = TAPDISK_STORAGE_TYPE_EXT;
  9.1068 +	else if (!strcmp(stype, "lvm"))
  9.1069 +		channel->storage = TAPDISK_STORAGE_TYPE_LVM;
  9.1070 +
  9.1071 +out:
  9.1072 +	free(path);
  9.1073 +	free(stype);
  9.1074 +}
  9.1075 +
  9.1076 +static int
  9.1077 +tapdisk_channel_get_busid(tapdisk_channel_t *channel)
  9.1078 +{
  9.1079 +	int len, end;
  9.1080 +	const char *ptr;
  9.1081 +	char *tptr, num[10];
  9.1082 +
  9.1083 +	len = strsep_len(channel->path, '/', 6);
  9.1084 +	end = strlen(channel->path);
  9.1085 +	if(len < 0 || end < 0) {
  9.1086 +		EPRINTF("invalid path: %s\n", channel->path);
  9.1087 +		return -EINVAL;
  9.1088 +	}
  9.1089 +	
  9.1090 +	ptr = channel->path + len + 1;
  9.1091 +	strncpy(num, ptr, end - len);
  9.1092 +	tptr = num + (end - (len + 1));
  9.1093 +	*tptr = '\0';
  9.1094 +
  9.1095 +	channel->busid = atoi(num);
  9.1096 +	return 0;
  9.1097 +}
  9.1098 +
  9.1099 +static int
  9.1100 +tapdisk_channel_parse_params(tapdisk_channel_t *channel)
  9.1101 +{
  9.1102 +	int i, size, err;
  9.1103 +	unsigned int len;
  9.1104 +	char *ptr, *path, handle[10];
  9.1105 +	char *vdi_type;
  9.1106 +	char *vtype;
  9.1107 +
  9.1108 +	path = channel->params;
  9.1109 +	size = sizeof(dtypes) / sizeof(disk_info_t *);
  9.1110 +
  9.1111 +	if (strlen(path) + 1 >= TAPDISK_MESSAGE_MAX_PATH_LENGTH)
  9.1112 +		goto fail;
  9.1113 +
  9.1114 +	ptr = strchr(path, ':');
  9.1115 +	if (!ptr)
  9.1116 +		goto fail;
  9.1117 +
  9.1118 +	channel->vdi_path = ptr + 1;
  9.1119 +	memcpy(handle, path, (ptr - path));
  9.1120 +	ptr  = handle + (ptr - path);
  9.1121 +	*ptr = '\0';
  9.1122 +
  9.1123 +	err = asprintf(&vdi_type, "%s/sm-data/vdi-type", channel->path);
  9.1124 +	if (err == -1)
  9.1125 +		goto fail;
  9.1126 +
  9.1127 +	if (xs_exists(channel->xsh, vdi_type)) {
  9.1128 +		vtype = xs_read(channel->xsh, XBT_NULL, vdi_type, &len);
  9.1129 +		free(vdi_type);
  9.1130 +		if (!vtype)
  9.1131 +			goto fail;
  9.1132 +		if (len >= sizeof(handle) - 1) {
  9.1133 +			free(vtype);
  9.1134 +			goto fail;
  9.1135 +		}
  9.1136 +		sprintf(handle, "%s", vtype);
  9.1137 +		free(vtype);
  9.1138 +	}
  9.1139 +
  9.1140 +	for (i = 0; i < size; i++) {
  9.1141 +		if (strncmp(handle, dtypes[i]->handle, (ptr - path)))
  9.1142 +			continue;
  9.1143 +
  9.1144 +		if (dtypes[i]->idnum == -1)
  9.1145 +			goto fail;
  9.1146 +
  9.1147 +		channel->drivertype = dtypes[i]->idnum;
  9.1148 +		return 0;
  9.1149 +	}
  9.1150 +
  9.1151 +fail:
  9.1152 +	EPRINTF("%s: invalid blktap params: %s\n",
  9.1153 +		channel->path, channel->params);
  9.1154 +	channel->vdi_path = NULL;
  9.1155 +	return -EINVAL;
  9.1156 +}
  9.1157 +
  9.1158 +static int
  9.1159 +tapdisk_channel_gather_info(tapdisk_channel_t *channel)
  9.1160 +{
  9.1161 +	int err;
  9.1162 +
  9.1163 +	err = xs_gather(channel->xsh, channel->path,
  9.1164 +			"frontend", NULL, &channel->frontpath,
  9.1165 +			"frontend-id", "%li", &channel->domid,
  9.1166 +			"params", NULL, &channel->params,
  9.1167 +			"mode", "%c", &channel->mode, NULL);
  9.1168 +	if (err) {
  9.1169 +		EPRINTF("could not find device info: %d\n", err);
  9.1170 +		return err;
  9.1171 +	}
  9.1172 +
  9.1173 +	err = tapdisk_channel_parse_params(channel);
  9.1174 +	if (err)
  9.1175 +		return err;
  9.1176 +
  9.1177 +	err = tapdisk_channel_get_busid(channel);
  9.1178 +	if (err)
  9.1179 +		return err;
  9.1180 +
  9.1181 +	tapdisk_channel_get_storage_type(channel);
  9.1182 +
  9.1183 +	return 0;
  9.1184 +}
  9.1185 +
  9.1186 +static int
  9.1187 +tapdisk_channel_verify_start_request(tapdisk_channel_t *channel)
  9.1188 +{
  9.1189 +	char *path;
  9.1190 +	unsigned int err;
  9.1191 +
  9.1192 +	err = asprintf(&path, "%s/start-tapdisk", channel->path);
  9.1193 +	if (err == -1)
  9.1194 +		goto mem_fail;
  9.1195 +
  9.1196 +	if (!xs_exists(channel->xsh, path))
  9.1197 +		goto fail;
  9.1198 +
  9.1199 +	free(path);
  9.1200 +	err = asprintf(&path, "%s/shutdown-request", channel->path);
  9.1201 +	if (err == -1)
  9.1202 +		goto mem_fail;
  9.1203 +
  9.1204 +	if (xs_exists(channel->xsh, path))
  9.1205 +		goto fail;
  9.1206 +
  9.1207 +	if (xs_exists(channel->xsh, channel->shutdown_str))
  9.1208 +		goto fail;
  9.1209 +
  9.1210 +	free(path);
  9.1211 +	err = asprintf(&path, "%s/shutdown-done", channel->path);
  9.1212 +	if (err == -1)
  9.1213 +		goto mem_fail;
  9.1214 +
  9.1215 +	if (xs_exists(channel->xsh, path))
  9.1216 +		goto fail;
  9.1217 +
  9.1218 +	free(path);
  9.1219 +
  9.1220 +	return 0;
  9.1221 +
  9.1222 +fail:
  9.1223 +	free(path);
  9.1224 +	EPRINTF("%s:%s: invalid start request\n", __func__, channel->path);
  9.1225 +	return -EINVAL;
  9.1226 +
  9.1227 +mem_fail:
  9.1228 +	EPRINTF("%s:%s: out of memory\n", __func__, channel->path);
  9.1229 +	return -ENOMEM;
  9.1230 +}
  9.1231 +
  9.1232 +void
  9.1233 +tapdisk_channel_close(tapdisk_channel_t *channel)
  9.1234 +{
  9.1235 +	if (channel->channel_id)
  9.1236 +		DPRINTF("%s: closing channel %d:%d\n",
  9.1237 +			channel->path, channel->channel_id, channel->cookie);
  9.1238 +
  9.1239 +	if (channel->open)
  9.1240 +		tapdisk_channel_send_shutdown_request(channel);
  9.1241 +
  9.1242 +	if (channel->pause_watch.node) {
  9.1243 +		unregister_xenbus_watch(channel->xsh, &channel->pause_watch);
  9.1244 +		channel->pause_watch.node = NULL;
  9.1245 +	}
  9.1246 +
  9.1247 +	if (channel->shutdown_watch.node) {
  9.1248 +		unregister_xenbus_watch(channel->xsh, &channel->shutdown_watch);
  9.1249 +		channel->shutdown_watch.node = NULL;
  9.1250 +	}
  9.1251 +
  9.1252 +	tapdisk_daemon_close_channel(channel);
  9.1253 +
  9.1254 +	free(channel->params);
  9.1255 +	free(channel->frontpath);
  9.1256 +	free(channel->shutdown_str);
  9.1257 +	free(channel->pause_done_str);
  9.1258 +	free(channel->pause_str);
  9.1259 +	free(channel->uuid_str);
  9.1260 +	free(channel->path);
  9.1261 +	free(channel);
  9.1262 +}
  9.1263 +
  9.1264 +int
  9.1265 +tapdisk_channel_open(tapdisk_channel_t **_channel,
  9.1266 +		     char *path, struct xs_handle *xsh,
  9.1267 +		     int blktap_fd, uint16_t cookie)
  9.1268 +{
  9.1269 +	int err;
  9.1270 +	char *msg;
  9.1271 +	tapdisk_channel_t *channel;
  9.1272 +
  9.1273 +	msg       = NULL;
  9.1274 +	*_channel = NULL;
  9.1275 +
  9.1276 +	channel = calloc(1, sizeof(tapdisk_channel_t));
  9.1277 +	if (!channel)
  9.1278 +		return -ENOMEM;
  9.1279 +
  9.1280 +	channel->xsh       = xsh;
  9.1281 +	channel->blktap_fd = blktap_fd;
  9.1282 +	channel->cookie    = cookie;
  9.1283 +	channel->state     = TAPDISK_CHANNEL_IDLE;
  9.1284 +
  9.1285 +	INIT_LIST_HEAD(&channel->list);
  9.1286 +
  9.1287 +	channel->path = strdup(path);
  9.1288 +	if (!channel->path) {
  9.1289 +		err = -ENOMEM;
  9.1290 +		goto fail;
  9.1291 +	}
  9.1292 +
  9.1293 +	err = tapdisk_channel_init(channel);
  9.1294 +	if (err) {
  9.1295 +		msg = "allocating device";
  9.1296 +		goto fail;
  9.1297 +	}
  9.1298 +
  9.1299 +	err = tapdisk_channel_check_uuid(channel);
  9.1300 +	if (err) {
  9.1301 +		msg = "checking uuid";
  9.1302 +		goto fail;
  9.1303 +	}
  9.1304 +
  9.1305 +	err = tapdisk_channel_gather_info(channel);
  9.1306 +	if (err) {
  9.1307 +		msg = "gathering parameters";
  9.1308 +		goto fail;
  9.1309 +	}
  9.1310 +
  9.1311 +	err = tapdisk_channel_verify_start_request(channel);
  9.1312 +	if (err) {
  9.1313 +		msg = "invalid start request";
  9.1314 +		goto fail;
  9.1315 +	}
  9.1316 +
  9.1317 +	err = tapdisk_channel_set_watches(channel);
  9.1318 +	if (err) {
  9.1319 +		msg = "registering xenstore watches";
  9.1320 +		goto fail;
  9.1321 +	}
  9.1322 +
  9.1323 +	err = tapdisk_channel_connect(channel);
  9.1324 +	if (err) {
  9.1325 +		msg = "connecting to tapdisk";
  9.1326 +		goto fail;
  9.1327 +	}
  9.1328 +
  9.1329 +	*_channel = channel;
  9.1330 +	return 0;
  9.1331 +
  9.1332 +fail:
  9.1333 +	tapdisk_channel_fatal(channel, "%s: %d", (msg ? : "failure"), err);
  9.1334 +	return err;
  9.1335 +}
  9.1336 +
  9.1337 +int
  9.1338 +tapdisk_channel_receive_message(tapdisk_channel_t *c, tapdisk_message_t *m)
  9.1339 +{
  9.1340 +	int err;
  9.1341 +
  9.1342 +	err = tapdisk_channel_validate_message(c, m);
  9.1343 +	if (err)
  9.1344 +		goto fail;
  9.1345 +
  9.1346 +	switch (m->type) {
  9.1347 +	case TAPDISK_MESSAGE_PID_RSP:
  9.1348 +		return tapdisk_channel_receive_pid_response(c, m);
  9.1349 +
  9.1350 +	case TAPDISK_MESSAGE_OPEN_RSP:
  9.1351 +		return tapdisk_channel_receive_open_response(c, m);
  9.1352 +
  9.1353 +	case TAPDISK_MESSAGE_PAUSE_RSP:
  9.1354 +		return tapdisk_channel_receive_pause_response(c, m);
  9.1355 +
  9.1356 +	case TAPDISK_MESSAGE_RESUME_RSP:
  9.1357 +		return tapdisk_channel_receive_resume_response(c, m);
  9.1358 +
  9.1359 +	case TAPDISK_MESSAGE_CLOSE_RSP:
  9.1360 +		return tapdisk_channel_receive_shutdown_response(c, m);
  9.1361 +
  9.1362 +	case TAPDISK_MESSAGE_RUNTIME_ERROR:
  9.1363 +		return tapdisk_channel_receive_runtime_error(c, m);
  9.1364 +	}
  9.1365 +
  9.1366 +fail:
  9.1367 +	tapdisk_channel_fatal(c, "received unexpected message %s in state %d",
  9.1368 +			      tapdisk_message_name(m->type), c->state);
  9.1369 +	return -EINVAL;
  9.1370 +}
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/tools/blktap2/daemon/tapdisk-daemon.c	Tue May 26 11:52:31 2009 +0100
    10.3 @@ -0,0 +1,599 @@
    10.4 +/* Copyright (c) 2008, XenSource Inc.
    10.5 + * All rights reserved.
    10.6 + *
    10.7 + * Redistribution and use in source and binary forms, with or without
    10.8 + * modification, are permitted provided that the following conditions are met:
    10.9 + *     * Redistributions of source code must retain the above copyright
   10.10 + *       notice, this list of conditions and the following disclaimer.
   10.11 + *     * Redistributions in binary form must reproduce the above copyright
   10.12 + *       notice, this list of conditions and the following disclaimer in the
   10.13 + *       documentation and/or other materials provided with the distribution.
   10.14 + *     * Neither the name of XenSource Inc. nor the names of its contributors
   10.15 + *       may be used to endorse or promote products derived from this software
   10.16 + *       without specific prior written permission.
   10.17 + *
   10.18 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   10.19 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   10.20 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   10.21 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   10.22 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   10.23 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   10.24 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   10.25 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   10.26 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   10.27 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   10.28 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   10.29 +*/
   10.30 +#include <stdio.h>
   10.31 +#include <errno.h>
   10.32 +#include <fcntl.h>
   10.33 +#include <unistd.h>
   10.34 +#include <stdlib.h>
   10.35 +#include <string.h>
   10.36 +#include <sys/ioctl.h>
   10.37 +
   10.38 +#include <xs.h>
   10.39 +#include "disktypes.h"
   10.40 +#include "tapdisk-dispatch.h"
   10.41 +
   10.42 +#define TAPDISK_DAEMON_DOMID_WATCH   "domid-watch"
   10.43 +#define TAPDISK_DAEMON_PIDFILE       "/var/run/blktapctrl.pid"
   10.44 +
   10.45 +typedef struct tapdisk_daemon {
   10.46 +	char                         *node;
   10.47 +	int                           blktap_fd;
   10.48 +	uint16_t                      cookie;
   10.49 +
   10.50 +	struct xs_handle             *xsh;
   10.51 +	struct list_head              channels;
   10.52 +	struct xenbus_watch           watch;
   10.53 +} tapdisk_daemon_t;
   10.54 +
   10.55 +static tapdisk_daemon_t tapdisk_daemon;
   10.56 +
   10.57 +#define tapdisk_daemon_for_each_channel(c, tmp) \
   10.58 +	list_for_each_entry_safe(c, tmp, &tapdisk_daemon.channels, list)
   10.59 +
   10.60 +#define MAX(a, b) ((a) >= (b) ? (a) : (b))
   10.61 +
   10.62 +static void
   10.63 +tapdisk_daemon_print_drivers(void)
   10.64 +{
   10.65 +	int i, size;
   10.66 +
   10.67 +	DPRINTF("blktap-daemon: v1.0.2\n");
   10.68 +
   10.69 +	size = sizeof(dtypes) / sizeof(disk_info_t *);
   10.70 +	for (i = 0; i < size; i++)
   10.71 +		DPRINTF("Found driver: [%s]\n", dtypes[i]->name);
   10.72 +}
   10.73 +
   10.74 +static int
   10.75 +tapdisk_daemon_write_pidfile(long pid)
   10.76 +{
   10.77 +	char buf[100];
   10.78 +	int len, fd, flags, err;
   10.79 +
   10.80 +	fd = open(TAPDISK_DAEMON_PIDFILE, O_RDWR | O_CREAT, 0600);
   10.81 +	if (fd == -1) {
   10.82 +		EPRINTF("Opening pid file failed (%d)\n", errno);
   10.83 +		return -errno;
   10.84 +	}
   10.85 +
   10.86 +	/* We exit silently if daemon already running */
   10.87 +	err = lockf(fd, F_TLOCK, 0);
   10.88 +	if (err == -1)
   10.89 +		exit(0);
   10.90 +
   10.91 +	/* Set FD_CLOEXEC, so that tapdisk doesn't get this file descriptor */
   10.92 +	flags = fcntl(fd, F_GETFD);
   10.93 +	if (flags == -1) {
   10.94 +		EPRINTF("F_GETFD failed (%d)\n", errno);
   10.95 +		return -errno;
   10.96 +	}
   10.97 +
   10.98 +	flags |= FD_CLOEXEC;
   10.99 +	err = fcntl(fd, F_SETFD, flags);
  10.100 +	if (err == -1) {
  10.101 +		EPRINTF("F_SETFD failed (%d)\n", errno);
  10.102 +		return -errno;
  10.103 +	}
  10.104 +
  10.105 +	len = sprintf(buf, "%ld\n", pid);
  10.106 +	err = write(fd, buf, len);
  10.107 +	if (err != len) {
  10.108 +		EPRINTF("Writing pid file failed (%d)\n", errno);
  10.109 +		return -errno;
  10.110 +	}
  10.111 +
  10.112 +	return 0;
  10.113 +}
  10.114 +
  10.115 +static int
  10.116 +tapdisk_daemon_init(void)
  10.117 +{
  10.118 +	char *devname;
  10.119 +	int i, err, blktap_major;
  10.120 +
  10.121 +	memset(&tapdisk_daemon, 0, sizeof(tapdisk_daemon_t));
  10.122 +
  10.123 +	err = asprintf(&devname, "%s/%s0", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME);
  10.124 +	if (err == -1) {
  10.125 +		devname = NULL;
  10.126 +		err = -ENOMEM;
  10.127 +		goto fail;
  10.128 +	}
  10.129 +
  10.130 +	err = xc_find_device_number("blktap0");
  10.131 +	if (err < 0)
  10.132 +		goto fail;
  10.133 +
  10.134 +	blktap_major = major(err);
  10.135 +	err = make_blktap_device(devname, blktap_major, 0, S_IFCHR | 0600);
  10.136 +	if (err)
  10.137 +		goto fail;
  10.138 +
  10.139 +	tapdisk_daemon.blktap_fd = open(devname, O_RDWR);
  10.140 +	if (tapdisk_daemon.blktap_fd == -1) {
  10.141 +		err = -errno;
  10.142 +		EPRINTF("blktap0 open failed\n");
  10.143 +		goto fail;
  10.144 +	}
  10.145 +
  10.146 +	for (i = 0; i < 2; i++) {
  10.147 +		tapdisk_daemon.xsh = xs_daemon_open();
  10.148 +		if (!tapdisk_daemon.xsh) {
  10.149 +			EPRINTF("xs_daemon_open failed -- is xenstore running?\n");
  10.150 +			sleep(2);
  10.151 +		} else
  10.152 +			break;
  10.153 +	}
  10.154 +
  10.155 +	if (!tapdisk_daemon.xsh) {
  10.156 +		err = -ENOSYS;
  10.157 +		goto fail;
  10.158 +	}
  10.159 +
  10.160 +	INIT_LIST_HEAD(&tapdisk_daemon.channels);
  10.161 +
  10.162 +	free(devname);
  10.163 +	return 0;
  10.164 +
  10.165 +fail:
  10.166 +	if (tapdisk_daemon.blktap_fd > 0)
  10.167 +		close(tapdisk_daemon.blktap_fd);
  10.168 +	free(devname);
  10.169 +	memset(&tapdisk_daemon, 0, sizeof(tapdisk_daemon_t));
  10.170 +	EPRINTF("%s: %d\n", __func__, err);
  10.171 +
  10.172 +	return err;
  10.173 +}
  10.174 +
  10.175 +static int
  10.176 +tapdisk_daemon_set_node(void)
  10.177 +{
  10.178 +	int err;
  10.179 +	char *domid;
  10.180 +
  10.181 +	domid = get_dom_domid(tapdisk_daemon.xsh);
  10.182 +	if (!domid)
  10.183 +		return -EAGAIN;
  10.184 +
  10.185 +	err = asprintf(&tapdisk_daemon.node,
  10.186 +		       "/local/domain/%s/backend/tap", domid);
  10.187 +	if (err == -1) {
  10.188 +		tapdisk_daemon.node = NULL;
  10.189 +		err = -ENOMEM;
  10.190 +		goto out;
  10.191 +	}
  10.192 +
  10.193 +	err = 0;
  10.194 +
  10.195 +out:
  10.196 +	free(domid);
  10.197 +	return err;
  10.198 +}
  10.199 +
  10.200 +static int
  10.201 +tapdisk_daemon_get_domid(void)
  10.202 +{
  10.203 +	int err;
  10.204 +	unsigned int num;
  10.205 +	char **res, *node, *token, *domid;
  10.206 +
  10.207 +	res = xs_read_watch(tapdisk_daemon.xsh, &num);
  10.208 +	if (!res)
  10.209 +		return -EAGAIN;
  10.210 +
  10.211 +	err   = 0;
  10.212 +	node  = res[XS_WATCH_PATH];
  10.213 +	token = res[XS_WATCH_TOKEN];
  10.214 +
  10.215 +	if (strcmp(token, TAPDISK_DAEMON_DOMID_WATCH)) {
  10.216 +		err = -EINVAL;
  10.217 +		goto out;
  10.218 +	}
  10.219 +
  10.220 +	err = tapdisk_daemon_set_node();
  10.221 +
  10.222 +out:
  10.223 +	free(res);
  10.224 +	return err;
  10.225 +}
  10.226 +
  10.227 +static int
  10.228 +tapdisk_daemon_wait_for_domid(void)
  10.229 +{
  10.230 +	int err;
  10.231 +	char *domid;
  10.232 +	fd_set readfds;
  10.233 +
  10.234 +	err = tapdisk_daemon_set_node();
  10.235 +	if (!err)
  10.236 +		return 0;
  10.237 +
  10.238 +	if (!xs_watch(tapdisk_daemon.xsh, "/local/domain",
  10.239 +		      TAPDISK_DAEMON_DOMID_WATCH)) {
  10.240 +		EPRINTF("unable to set domain id watch\n");
  10.241 +		return -EINVAL;
  10.242 +	}
  10.243 +
  10.244 +	do {
  10.245 +		FD_ZERO(&readfds);
  10.246 +		FD_SET(xs_fileno(tapdisk_daemon.xsh), &readfds);
  10.247 +
  10.248 +		select(xs_fileno(tapdisk_daemon.xsh) + 1,
  10.249 +		       &readfds, NULL, NULL, NULL);
  10.250 +
  10.251 +		if (FD_ISSET(xs_fileno(tapdisk_daemon.xsh), &readfds))
  10.252 +			err = tapdisk_daemon_get_domid();
  10.253 +		else
  10.254 +			err = -EAGAIN;
  10.255 +	} while (err == -EAGAIN);
  10.256 +
  10.257 +	xs_unwatch(tapdisk_daemon.xsh,
  10.258 +		   "/local/domain", TAPDISK_DAEMON_DOMID_WATCH);
  10.259 +	return err;
  10.260 +}
  10.261 +
  10.262 +static inline int
  10.263 +tapdisk_daemon_new_vbd_event(const char *node)
  10.264 +{
  10.265 +	return (!strcmp(node, "start-tapdisk"));
  10.266 +}
  10.267 +
  10.268 +static int
  10.269 +tapdisk_daemon_write_uuid(char *path, uint32_t uuid)
  10.270 +{
  10.271 +	int err;
  10.272 +	char *cpath, uuid_str[12];
  10.273 +
  10.274 +	snprintf(uuid_str, sizeof(uuid_str), "%u", uuid);
  10.275 +
  10.276 +	err = asprintf(&cpath, "%s/tapdisk-uuid", path);
  10.277 +	if (err == -1)
  10.278 +		return -ENOMEM;
  10.279 +
  10.280 +	err = xs_write(tapdisk_daemon.xsh, XBT_NULL,
  10.281 +		       cpath, uuid_str, strlen(uuid_str));
  10.282 +	free(cpath);
  10.283 +
  10.284 +	return (err ? 0 : -errno);
  10.285 +}
  10.286 +
  10.287 +static void
  10.288 +tapdisk_daemon_probe(struct xs_handle *xsh,
  10.289 +		     struct xenbus_watch *watch, const char *path)
  10.290 +{
  10.291 +	char *cpath;
  10.292 +	int len, err;
  10.293 +	uint32_t cookie;
  10.294 +	const char *node;
  10.295 +	tapdisk_channel_t *channel;
  10.296 +
  10.297 +	len = strsep_len(path, '/', 7);
  10.298 +	if (len < 0)
  10.299 +		return;
  10.300 +
  10.301 +	node = path + len + 1;
  10.302 +
  10.303 +	if (!tapdisk_daemon_new_vbd_event(node))
  10.304 +		return;
  10.305 +
  10.306 +	if (!xs_exists(xsh, path))
  10.307 +		return;
  10.308 +
  10.309 +	cpath = strdup(path);
  10.310 +	if (!cpath) {
  10.311 +		EPRINTF("failed to allocate control path for %s\n", path);
  10.312 +		return;
  10.313 +	}
  10.314 +	cpath[len] = '\0';
  10.315 +
  10.316 +	cookie = tapdisk_daemon.cookie++;
  10.317 +	err    = tapdisk_daemon_write_uuid(cpath, cookie);
  10.318 +	if (err)
  10.319 +		goto out;
  10.320 +
  10.321 +	DPRINTF("%s: got watch on %s, uuid = %u\n", __func__, path, cookie);
  10.322 +
  10.323 +	err = tapdisk_channel_open(&channel, cpath,
  10.324 +				   tapdisk_daemon.xsh,
  10.325 +				   tapdisk_daemon.blktap_fd,
  10.326 +				   cookie);
  10.327 +	if (!err)
  10.328 +		list_add(&channel->list, &tapdisk_daemon.channels);
  10.329 +	else
  10.330 +		EPRINTF("failed to open tapdisk channel for %s: %d\n",
  10.331 +			path, err);
  10.332 +
  10.333 +out:
  10.334 +	free(cpath);
  10.335 +}
  10.336 +
  10.337 +static int
  10.338 +tapdisk_daemon_start(void)
  10.339 +{
  10.340 +	int err;
  10.341 +
  10.342 +	err = tapdisk_daemon_wait_for_domid();
  10.343 +	if (err)
  10.344 +		return err;
  10.345 +
  10.346 +	tapdisk_daemon.watch.node     = tapdisk_daemon.node;
  10.347 +	tapdisk_daemon.watch.callback = tapdisk_daemon_probe;
  10.348 +
  10.349 +	err = register_xenbus_watch(tapdisk_daemon.xsh, &tapdisk_daemon.watch);
  10.350 +	if (err)
  10.351 +		goto fail;
  10.352 +
  10.353 +	ioctl(tapdisk_daemon.blktap_fd,
  10.354 +	      BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE);
  10.355 +	ioctl(tapdisk_daemon.blktap_fd, BLKTAP_IOCTL_SENDPID, getpid());
  10.356 +
  10.357 +	return 0;
  10.358 +
  10.359 +fail:
  10.360 +	free(tapdisk_daemon.node);
  10.361 +	tapdisk_daemon.node       = NULL;
  10.362 +	tapdisk_daemon.watch.node = NULL;
  10.363 +	EPRINTF("%s: %d\n", __func__, err);
  10.364 +	return err;
  10.365 +}
  10.366 +
  10.367 +static int
  10.368 +tapdisk_daemon_stop(void)
  10.369 +{
  10.370 +	unregister_xenbus_watch(tapdisk_daemon.xsh, &tapdisk_daemon.watch);
  10.371 +
  10.372 +	ioctl(tapdisk_daemon.blktap_fd,
  10.373 +	      BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH);
  10.374 +	close(tapdisk_daemon.blktap_fd);
  10.375 +
  10.376 +	return 0;
  10.377 +}
  10.378 +
  10.379 +static void
  10.380 +tapdisk_daemon_free(void)
  10.381 +{
  10.382 +	free(tapdisk_daemon.node);
  10.383 +	xs_daemon_close(tapdisk_daemon.xsh);
  10.384 +	memset(&tapdisk_daemon, 0, sizeof(tapdisk_daemon_t));
  10.385 +}
  10.386 +
  10.387 +static int
  10.388 +tapdisk_daemon_read_message(int fd, tapdisk_message_t *message, int timeout)
  10.389 +{
  10.390 +	fd_set readfds;
  10.391 +	struct timeval tv;
  10.392 +	int ret, len, offset;
  10.393 +
  10.394 +	tv.tv_sec  = timeout;
  10.395 +	tv.tv_usec = 0;
  10.396 +	offset     = 0;
  10.397 +	len        = sizeof(tapdisk_message_t);
  10.398 +
  10.399 +	memset(message, 0, sizeof(tapdisk_message_t));
  10.400 +
  10.401 +	while (offset < len) {
  10.402 +		FD_ZERO(&readfds);
  10.403 +		FD_SET(fd, &readfds);
  10.404 +
  10.405 +		/* we don't bother reinitializing tv. at worst, it will wait a
  10.406 +		 * bit more time than expected. */
  10.407 +
  10.408 +		ret = select(fd + 1, &readfds, NULL, NULL, &tv);
  10.409 +		if (ret == -1)
  10.410 +			break;
  10.411 +		else if (FD_ISSET(fd, &readfds)) {
  10.412 +			ret = read(fd, message + offset, len - offset);
  10.413 +			if (ret <= 0)
  10.414 +				break;
  10.415 +			offset += ret;
  10.416 +		} else
  10.417 +			break;
  10.418 +	}
  10.419 +
  10.420 +	return (offset == len ? 0 : -EIO);
  10.421 +}
  10.422 +
  10.423 +static int
  10.424 +tapdisk_daemon_receive_message(int fd)
  10.425 +{
  10.426 +	int err;
  10.427 +	tapdisk_message_t m;
  10.428 +	tapdisk_channel_t *c, *tmp;
  10.429 +
  10.430 +	err = tapdisk_daemon_read_message(fd, &m, 2);
  10.431 +	if (err) {
  10.432 +		EPRINTF("failed reading message on %d: %d\n", fd, err);
  10.433 +		return err;
  10.434 +	}
  10.435 +
  10.436 +	tapdisk_daemon_for_each_channel(c, tmp)
  10.437 +		if (c->cookie == m.cookie && c->read_fd == fd) {
  10.438 +			DPRINTF("got '%s' message from %d:%d\n",
  10.439 +				tapdisk_message_name(m.type),
  10.440 +				c->channel_id, c->cookie);
  10.441 +
  10.442 +			return tapdisk_channel_receive_message(c, &m);
  10.443 +		}
  10.444 +
  10.445 +	EPRINTF("unrecognized message on %d: '%s' (uuid = %u)\n",
  10.446 +		fd, tapdisk_message_name(m.type), m.cookie);
  10.447 +
  10.448 +	return -EINVAL;
  10.449 +}
  10.450 +
  10.451 +static int
  10.452 +tapdisk_daemon_set_fds(fd_set *readfds)
  10.453 +{
  10.454 +	int max, fd;
  10.455 +	tapdisk_channel_t *channel, *tmp;
  10.456 +
  10.457 +	max = xs_fileno(tapdisk_daemon.xsh);
  10.458 +
  10.459 +	FD_ZERO(readfds);
  10.460 +	FD_SET(max, readfds);
  10.461 +
  10.462 +	tapdisk_daemon_for_each_channel(channel, tmp) {
  10.463 +		fd  = channel->read_fd;
  10.464 +		max = MAX(fd, max);
  10.465 +		FD_SET(fd, readfds);
  10.466 +	}
  10.467 +
  10.468 +	return max;
  10.469 +}
  10.470 +
  10.471 +static int
  10.472 +tapdisk_daemon_check_fds(fd_set *readfds)
  10.473 +{
  10.474 +	int err;
  10.475 +	tapdisk_channel_t *channel, *tmp;
  10.476 +
  10.477 +	if (FD_ISSET(xs_fileno(tapdisk_daemon.xsh), readfds))
  10.478 +		xs_fire_next_watch(tapdisk_daemon.xsh);
  10.479 +
  10.480 +	tapdisk_daemon_for_each_channel(channel, tmp)
  10.481 +		if (FD_ISSET(channel->read_fd, readfds))
  10.482 +			return tapdisk_daemon_receive_message(channel->read_fd);
  10.483 +
  10.484 +	return 0;
  10.485 +}
  10.486 +
  10.487 +static int
  10.488 +tapdisk_daemon_run(void)
  10.489 +{
  10.490 +	int err, max;
  10.491 +	fd_set readfds;
  10.492 +
  10.493 +	while (1) {
  10.494 +		max = tapdisk_daemon_set_fds(&readfds);
  10.495 +
  10.496 +		err = select(max + 1, &readfds, NULL, NULL, NULL);
  10.497 +		if (err < 0)
  10.498 +			continue;
  10.499 +
  10.500 +		err = tapdisk_daemon_check_fds(&readfds);
  10.501 +	}
  10.502 +
  10.503 +	return err;
  10.504 +}
  10.505 +
  10.506 +void
  10.507 +tapdisk_daemon_find_channel(tapdisk_channel_t *channel)
  10.508 +{
  10.509 +	tapdisk_channel_t *c, *tmp;
  10.510 +
  10.511 +	channel->read_fd     = 0;
  10.512 +	channel->write_fd    = 0;
  10.513 +	channel->tapdisk_pid = 0;
  10.514 +
  10.515 +	/* do we want multiple vbds per tapdisk? */
  10.516 +	if (!xs_exists(tapdisk_daemon.xsh, channel->share_tapdisk_str)) {
  10.517 +		channel->shared = 0;
  10.518 +		return;
  10.519 +	}
  10.520 +
  10.521 +	channel->shared = 1;
  10.522 +
  10.523 +	/* check if we already have a process started */
  10.524 +	tapdisk_daemon_for_each_channel(c, tmp)
  10.525 +		if (c->drivertype == channel->drivertype) {
  10.526 +			channel->write_fd    = c->write_fd;
  10.527 +			channel->read_fd     = c->read_fd;
  10.528 +			channel->channel_id  = c->channel_id;
  10.529 +			channel->tapdisk_pid = c->tapdisk_pid;
  10.530 +			return;
  10.531 +		}
  10.532 +}
  10.533 +
  10.534 +void
  10.535 +tapdisk_daemon_close_channel(tapdisk_channel_t *channel)
  10.536 +{
  10.537 +	tapdisk_channel_t *c, *tmp;
  10.538 +
  10.539 +	list_del(&channel->list);
  10.540 +
  10.541 +	tapdisk_daemon_for_each_channel(c, tmp)
  10.542 +		if (c->channel_id == channel->channel_id)
  10.543 +			return;
  10.544 +
  10.545 +	close(channel->read_fd);
  10.546 +	close(channel->write_fd);
  10.547 +}
  10.548 +
  10.549 +int
  10.550 +main(int argc, char *argv[])
  10.551 +{
  10.552 +	int err;
  10.553 +	char buf[128];
  10.554 +
  10.555 +	if (daemon(0, 0)) {
  10.556 +	  EPRINTF("daemon() failed (%d)\n", errno);
  10.557 +	  return -errno;
  10.558 +	}
  10.559 +
  10.560 +#define CORE_DUMP
  10.561 +#if defined(CORE_DUMP)
  10.562 +#include <sys/resource.h>
  10.563 +	{
  10.564 +		/* set up core-dumps*/
  10.565 +		struct rlimit rlim;
  10.566 +		rlim.rlim_cur = RLIM_INFINITY;
  10.567 +		rlim.rlim_max = RLIM_INFINITY;
  10.568 +		if (setrlimit(RLIMIT_CORE, &rlim) < 0)
  10.569 +			EPRINTF("setrlimit failed: %d\n", errno);
  10.570 +	}
  10.571 +#endif
  10.572 +
  10.573 +	snprintf(buf, sizeof(buf), "BLKTAP-DAEMON[%d]", getpid());
  10.574 +	openlog(buf, LOG_CONS | LOG_ODELAY, LOG_DAEMON);
  10.575 +
  10.576 +	err = tapdisk_daemon_write_pidfile(getpid());
  10.577 +	if (err)
  10.578 +		goto out;
  10.579 +
  10.580 +	tapdisk_daemon_print_drivers();
  10.581 +
  10.582 +	err = tapdisk_daemon_init();
  10.583 +	if (err)
  10.584 +		goto out;
  10.585 +
  10.586 +	err = tapdisk_daemon_start();
  10.587 +	if (err)
  10.588 +		goto out;
  10.589 +
  10.590 +	tapdisk_daemon_run();
  10.591 +
  10.592 +	tapdisk_daemon_stop();
  10.593 +	tapdisk_daemon_free();
  10.594 +
  10.595 +	err = 0;
  10.596 +
  10.597 +out:
  10.598 +	if (err)
  10.599 +		EPRINTF("failed to start %s: %d\n", argv[0], err);
  10.600 +	closelog();
  10.601 +	return err;
  10.602 +}
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/tools/blktap2/daemon/tapdisk-dispatch-common.c	Tue May 26 11:52:31 2009 +0100
    11.3 @@ -0,0 +1,94 @@
    11.4 +/*
    11.5 + * (c) 2005 Andrew Warfield and Julian Chesterfield
    11.6 + *
    11.7 + * This program is free software; you can redistribute it and/or
    11.8 + * modify it under the terms of the GNU General Public License version 2
    11.9 + * as published by the Free Software Foundation; or, when distributed
   11.10 + * separately from the Linux kernel or incorporated into other
   11.11 + * software packages, subject to the following license:
   11.12 + *
   11.13 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   11.14 + * of this source file (the "Software"), to deal in the Software without
   11.15 + * restriction, including without limitation the rights to use, copy, modify,
   11.16 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   11.17 + * and to permit persons to whom the Software is furnished to do so, subject to
   11.18 + * the following conditions:
   11.19 + *
   11.20 + * The above copyright notice and this permission notice shall be included in
   11.21 + * all copies or substantial portions of the Software.
   11.22 + *
   11.23 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   11.24 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   11.25 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   11.26 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   11.27 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   11.28 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   11.29 + * IN THE SOFTWARE.
   11.30 + */
   11.31 +#include <errno.h>
   11.32 +#include <fcntl.h>
   11.33 +#include <unistd.h>
   11.34 +#include <sys/stat.h>
   11.35 +#include <sys/types.h>
   11.36 +
   11.37 +#include "tapdisk-dispatch.h"
   11.38 +
   11.39 +int
   11.40 +strsep_len(const char *str, char c, unsigned int len)
   11.41 +{
   11.42 +	unsigned int i;
   11.43 +	
   11.44 +	for (i = 0; str[i]; i++)
   11.45 +		if (str[i] == c) {
   11.46 +			if (len == 0)
   11.47 +				return i;
   11.48 +			len--;
   11.49 +		}
   11.50 +
   11.51 +	return (len == 0) ? i : -ERANGE;
   11.52 +}
   11.53 +
   11.54 +int
   11.55 +make_blktap_device(char *devname, int major, int minor, int perm)
   11.56 +{
   11.57 +	int err;
   11.58 +
   11.59 +	err = unlink(devname);
   11.60 +	if (err && errno != ENOENT) {
   11.61 +		EPRINTF("unlink %s failed: %d\n", devname, errno);
   11.62 +		return -errno;
   11.63 +	}
   11.64 +
   11.65 +	/* Need to create device */
   11.66 +	err = mkdir(BLKTAP_DEV_DIR, 0755);
   11.67 +	if (err && errno != EEXIST) {
   11.68 +		EPRINTF("Failed to create %s directory\n", BLKTAP_DEV_DIR);
   11.69 +		return -errno;
   11.70 +	}
   11.71 +
   11.72 +	err = mknod(devname, perm, makedev(major, minor));
   11.73 +	if (err) {
   11.74 +		int ret = -errno;
   11.75 +		struct stat st;
   11.76 +
   11.77 +		EPRINTF("mknod %s failed: %d\n", devname, -errno);
   11.78 +
   11.79 +		err = lstat(devname, &st);
   11.80 +		if (err) {
   11.81 +			DPRINTF("lstat %s failed: %d\n", devname, -errno);
   11.82 +			err = access(devname, F_OK);
   11.83 +			if (err)
   11.84 +				DPRINTF("access %s failed: %d\n", devname, -errno);
   11.85 +			else
   11.86 +				DPRINTF("access %s succeeded\n", devname);
   11.87 +		} else
   11.88 +			DPRINTF("lstat %s: %u:%u\n", devname,
   11.89 +				(unsigned int)st.st_rdev >> 8,
   11.90 +				(unsigned int)st.st_rdev & 0xff);
   11.91 +
   11.92 +		return ret;
   11.93 +	}
   11.94 +
   11.95 +	DPRINTF("Created %s device\n", devname);
   11.96 +	return 0;
   11.97 +}
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/tools/blktap2/daemon/tapdisk-dispatch.h	Tue May 26 11:52:31 2009 +0100
    12.3 @@ -0,0 +1,95 @@
    12.4 +/* Copyright (c) 2008, XenSource Inc.
    12.5 + * All rights reserved.
    12.6 + *
    12.7 + * Redistribution and use in source and binary forms, with or without
    12.8 + * modification, are permitted provided that the following conditions are met:
    12.9 + *     * Redistributions of source code must retain the above copyright
   12.10 + *       notice, this list of conditions and the following disclaimer.
   12.11 + *     * Redistributions in binary form must reproduce the above copyright
   12.12 + *       notice, this list of conditions and the following disclaimer in the
   12.13 + *       documentation and/or other materials provided with the distribution.
   12.14 + *     * Neither the name of XenSource Inc. nor the names of its contributors
   12.15 + *       may be used to endorse or promote products derived from this software
   12.16 + *       without specific prior written permission.
   12.17 + *
   12.18 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   12.19 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   12.20 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   12.21 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   12.22 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   12.23 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   12.24 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   12.25 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   12.26 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   12.27 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   12.28 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   12.29 +*/
   12.30 +#ifndef _TAPDISK_DISPATCH_H_
   12.31 +#define _TAPDISK_DISPATCH_H_
   12.32 +
   12.33 +#include "xs_api.h"
   12.34 +#include "blktaplib.h"
   12.35 +#include "tapdisk-message.h"
   12.36 +
   12.37 +struct tapdisk_channel {
   12.38 +	int                       state;
   12.39 +
   12.40 +	int                       read_fd;
   12.41 +	int                       write_fd;
   12.42 +	int                       blktap_fd;
   12.43 +	int                       channel_id;
   12.44 +
   12.45 +	char                      mode;
   12.46 +	char                      shared;
   12.47 +	char                      open;
   12.48 +	unsigned int              domid;
   12.49 +	unsigned int              busid;
   12.50 +	unsigned int              major;
   12.51 +	unsigned int              minor;
   12.52 +	unsigned int              storage;
   12.53 +	unsigned int              drivertype;
   12.54 +	uint16_t                  cookie;
   12.55 +	pid_t                     tapdisk_pid;
   12.56 +
   12.57 +	/*
   12.58 +	 * special accounting needed to handle pause
   12.59 +	 * requests received before tapdisk process is ready
   12.60 +	 */
   12.61 +	char                      connected;
   12.62 +	char                      pause_needed;
   12.63 +
   12.64 +	char                     *path;
   12.65 +	char                     *frontpath;
   12.66 +	char                     *params;
   12.67 +	char                     *vdi_path;
   12.68 +	char                     *uuid_str;
   12.69 +	char                     *pause_str;
   12.70 +	char                     *pause_done_str;
   12.71 +	char                     *shutdown_str;
   12.72 +	char                     *share_tapdisk_str;
   12.73 +
   12.74 +	image_t                   image;
   12.75 +
   12.76 +	struct list_head          list;
   12.77 +	struct xenbus_watch       pause_watch;
   12.78 +	struct xenbus_watch       shutdown_watch;
   12.79 +
   12.80 +	struct xs_handle         *xsh;
   12.81 +};
   12.82 +
   12.83 +typedef struct tapdisk_channel tapdisk_channel_t;
   12.84 +
   12.85 +int strsep_len(const char *str, char c, unsigned int len);
   12.86 +int make_blktap_device(char *devname, int major, int minor, int perm);
   12.87 +
   12.88 +int tapdisk_channel_open(tapdisk_channel_t **,
   12.89 +			 char *node, struct xs_handle *,
   12.90 +			 int blktap_fd, uint16_t cookie);
   12.91 +void tapdisk_channel_close(tapdisk_channel_t *);
   12.92 +
   12.93 +void tapdisk_daemon_find_channel(tapdisk_channel_t *);
   12.94 +void tapdisk_daemon_close_channel(tapdisk_channel_t *);
   12.95 +
   12.96 +int tapdisk_channel_receive_message(tapdisk_channel_t *, tapdisk_message_t *);
   12.97 +
   12.98 +#endif
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/tools/blktap2/drivers/Makefile	Tue May 26 11:52:31 2009 +0100
    13.3 @@ -0,0 +1,105 @@
    13.4 +XEN_ROOT=../../../
    13.5 +BLKTAP_ROOT= ..
    13.6 +include $(XEN_ROOT)/tools/Rules.mk
    13.7 +
    13.8 +LIBVHDDIR  = $(BLKTAP_ROOT)/vhd/lib
    13.9 +
   13.10 +IBIN       = tapdisk tapdisk2 td-util tapdisk-client tapdisk-stream tapdisk-diff
   13.11 +QCOW_UTIL  = img2qcow qcow-create qcow2raw
   13.12 +LOCK_UTIL  = lock-util
   13.13 +INST_DIR   = $(SBINDIR)
   13.14 +
   13.15 +CFLAGS    += -Werror -g -O0
   13.16 +CFLAGS    += -Wno-unused
   13.17 +CFLAGS    += -fno-strict-aliasing
   13.18 +CFLAGS    += -I../lib -I../../libxc
   13.19 +CFLAGS    += -I../include -I../../include
   13.20 +CFLAGS    += -I $(LIBAIO_DIR)
   13.21 +CFLAGS    += -D_GNU_SOURCE
   13.22 +CFLAGS    += -DUSE_NFS_LOCKS
   13.23 +
   13.24 +ifeq ($(CONFIG_X86_64),y)
   13.25 +CFLAGS            += -fPIC
   13.26 +endif
   13.27 +
   13.28 +LIBS      += -lrt -lz
   13.29 +
   13.30 +ifeq ($(shell . ./check_gcrypt $(CC)),yes)
   13.31 +CFLAGS += -DUSE_GCRYPT
   13.32 +CRYPT_LIB += -lgcrypt
   13.33 +else
   13.34 +CRYPT_LIB += -lcrypto
   13.35 +$(warning === libgcrypt not installed: falling back to libcrypto ===)
   13.36 +endif
   13.37 +
   13.38 +LDFLAGS_img := $(CRYPT_LIB) -lpthread -lz
   13.39 +
   13.40 +tapdisk tapdisk2 td-util tapdisk-stream tapdisk-diff $(QCOW_UTIL): LIBS += -L$(LIBVHDDIR) -lvhd -luuid
   13.41 +
   13.42 +LIBAIO_DIR = $(XEN_ROOT)/tools/libaio/src
   13.43 +tapdisk tapdisk2 tapdisk-stream tapdisk-diff $(QCOW_UTIL): AIOLIBS := $(LIBAIO_DIR)/libaio.a
   13.44 +tapdisk tapdisk-client tapdisk-stream tapdisk-diff $(QCOW_UTIL): CFLAGS  += -I$(LIBAIO_DIR) -I$(XEN_LIBXC)
   13.45 +
   13.46 +ifeq ($(VHD_STATIC),y)
   13.47 +td-util: CFLAGS += -static
   13.48 +endif
   13.49 +
   13.50 +TAP-OBJS-y  := scheduler.o
   13.51 +TAP-OBJS-y  += tapdisk-ipc.o
   13.52 +TAP-OBJS-y  += tapdisk-vbd.o
   13.53 +TAP-OBJS-y  += tapdisk-image.o
   13.54 +TAP-OBJS-y  += tapdisk-driver.o
   13.55 +TAP-OBJS-y  += tapdisk-interface.o
   13.56 +TAP-OBJS-y  += tapdisk-server.o
   13.57 +TAP-OBJS-y  += tapdisk-queue.o
   13.58 +TAP-OBJS-y  += tapdisk-filter.o
   13.59 +TAP-OBJS-y  += tapdisk-log.o
   13.60 +TAP-OBJS-y  += tapdisk-utils.o
   13.61 +TAP-OBJS-y  += io-optimize.o
   13.62 +TAP-OBJS-y  += lock.o
   13.63 +TAP-OBJS-$(CONFIG_Linux)  += blk_linux.o
   13.64 +
   13.65 +MISC-OBJS-y := atomicio.o
   13.66 +
   13.67 +BLK-OBJS-y  := block-aio.o
   13.68 +BLK-OBJS-y  += block-ram.o
   13.69 +BLK-OBJS-y  += block-cache.o
   13.70 +BLK-OBJS-y  += block-vhd.o
   13.71 +BLK-OBJS-y  += block-log.o
   13.72 +BLK-OBJS-y  += block-qcow.o
   13.73 +BLK-OBJS-y  += aes.o
   13.74 +
   13.75 +all: $(IBIN) lock-util qcow-util
   13.76 +
   13.77 +tapdisk: $(TAP-OBJS-y) $(BLK-OBJS-y) $(MISC-OBJS-y) tapdisk.c
   13.78 +	$(CC) $(CFLAGS) -o $@ $^ $(LIBS) $(AIOLIBS)  $(LDFLAGS_img)
   13.79 +
   13.80 +tapdisk2: $(TAP-OBJS-y) $(BLK-OBJS-y) $(MISC-OBJS-y) tapdisk2.c
   13.81 +	$(CC) $(CFLAGS) -o $@ $^ $(LIBS) $(AIOLIBS) $(LDFLAGS_img)
   13.82 +
   13.83 +tapdisk-client: tapdisk-client.o
   13.84 +	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)  $(LDFLAGS_img)
   13.85 +
   13.86 +tapdisk-stream tapdisk-diff: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y)
   13.87 +	$(CC) $(CFLAGS) -o $@ $^ $(LIBS) $(AIOLIBS)  $(LDFLAGS_img)
   13.88 +
   13.89 +td-util: td.o tapdisk-utils.o tapdisk-log.o
   13.90 +	$(CC) $(CFLAGS) -o $@ $^ $(LIBS)  $(LDFLAGS_img)
   13.91 +
   13.92 +lock-util: lock.c
   13.93 +	$(CC) $(CFLAGS) -DUTIL -o lock-util lock.c $(LIBS)
   13.94 +
   13.95 +.PHONY: qcow-util
   13.96 +qcow-util: img2qcow qcow2raw qcow-create
   13.97 +
   13.98 +img2qcow qcow2raw qcow-create: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y)
   13.99 +	$(CC) $(CFLAGS) -o $@ $^ $(LIBS) $(AIOLIBS) $(LDFLAGS_img)
  13.100 +
  13.101 +install: all
  13.102 +	$(INSTALL_DIR) -p $(DESTDIR)$(INST_DIR)
  13.103 +	$(INSTALL_PROG) $(IBIN) $(LOCK_UTIL) $(QCOW_UTIL) $(DESTDIR)$(INST_DIR)
  13.104 +
  13.105 +clean:
  13.106 +	rm -rf *.o *~ xen TAGS $(IBIN) $(LIB) $(LOCK_UTIL) $(QCOW_UTIL)
  13.107 +
  13.108 +.PHONY: clean install
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/tools/blktap2/drivers/aes.c	Tue May 26 11:52:31 2009 +0100
    14.3 @@ -0,0 +1,1319 @@
    14.4 +/**
    14.5 + * 
    14.6 + * aes.c - integrated in QEMU by Fabrice Bellard from the OpenSSL project.
    14.7 + */
    14.8 +/*
    14.9 + * rijndael-alg-fst.c
   14.10 + *
   14.11 + * @version 3.0 (December 2000)
   14.12 + *
   14.13 + * Optimised ANSI C code for the Rijndael cipher (now AES)
   14.14 + *
   14.15 + * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
   14.16 + * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
   14.17 + * @author Paulo Barreto <paulo.barreto@terra.com.br>
   14.18 + *
   14.19 + * This code is hereby placed in the public domain.
   14.20 + *
   14.21 + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
   14.22 + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   14.23 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   14.24 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
   14.25 + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   14.26 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   14.27 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   14.28 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   14.29 + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
   14.30 + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
   14.31 + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   14.32 + */
   14.33 +//#include "vl.h"
   14.34 +#include <inttypes.h>
   14.35 +#include <string.h>
   14.36 +#include "aes.h"
   14.37 +
   14.38 +//#define NDEBUG
   14.39 +#include <assert.h>
   14.40 +
   14.41 +typedef uint32_t u32;
   14.42 +typedef uint16_t u16;
   14.43 +typedef uint8_t u8;
   14.44 +
   14.45 +#define MAXKC   (256/32)
   14.46 +#define MAXKB   (256/8)
   14.47 +#define MAXNR   14
   14.48 +
   14.49 +/* This controls loop-unrolling in aes_core.c */
   14.50 +#undef FULL_UNROLL
   14.51 +# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] <<  8) ^ ((u32)(pt)[3]))
   14.52 +# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >>  8); (ct)[3] = (u8)(st); }
   14.53 +
   14.54 +/*
   14.55 +Te0[x] = S [x].[02, 01, 01, 03];
   14.56 +Te1[x] = S [x].[03, 02, 01, 01];
   14.57 +Te2[x] = S [x].[01, 03, 02, 01];
   14.58 +Te3[x] = S [x].[01, 01, 03, 02];
   14.59 +Te4[x] = S [x].[01, 01, 01, 01];
   14.60 +
   14.61 +Td0[x] = Si[x].[0e, 09, 0d, 0b];
   14.62 +Td1[x] = Si[x].[0b, 0e, 09, 0d];
   14.63 +Td2[x] = Si[x].[0d, 0b, 0e, 09];
   14.64 +Td3[x] = Si[x].[09, 0d, 0b, 0e];
   14.65 +Td4[x] = Si[x].[01, 01, 01, 01];
   14.66 +*/
   14.67 +
   14.68 +static const u32 Te0[256] = {
   14.69 +    0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
   14.70 +    0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
   14.71 +    0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
   14.72 +    0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
   14.73 +    0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
   14.74 +    0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
   14.75 +    0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
   14.76 +    0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
   14.77 +    0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
   14.78 +    0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
   14.79 +    0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
   14.80 +    0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
   14.81 +    0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
   14.82 +    0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
   14.83 +    0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
   14.84 +    0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
   14.85 +    0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
   14.86 +    0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
   14.87 +    0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
   14.88 +    0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
   14.89 +    0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
   14.90 +    0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
   14.91 +    0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
   14.92 +    0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
   14.93 +    0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
   14.94 +    0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
   14.95 +    0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
   14.96 +    0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
   14.97 +    0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
   14.98 +    0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
   14.99 +    0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
  14.100 +    0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
  14.101 +    0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
  14.102 +    0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
  14.103 +    0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
  14.104 +    0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
  14.105 +    0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
  14.106 +    0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
  14.107 +    0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
  14.108 +    0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
  14.109 +    0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
  14.110 +    0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
  14.111 +    0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
  14.112 +    0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
  14.113 +    0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
  14.114 +    0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
  14.115 +    0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
  14.116 +    0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
  14.117 +    0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
  14.118 +    0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
  14.119 +    0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
  14.120 +    0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
  14.121 +    0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
  14.122 +    0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
  14.123 +    0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
  14.124 +    0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
  14.125 +    0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
  14.126 +    0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
  14.127 +    0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
  14.128 +    0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
  14.129 +    0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
  14.130 +    0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
  14.131 +    0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
  14.132 +    0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
  14.133 +};
  14.134 +static const u32 Te1[256] = {
  14.135 +    0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
  14.136 +    0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
  14.137 +    0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
  14.138 +    0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
  14.139 +    0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
  14.140 +    0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
  14.141 +    0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
  14.142 +    0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
  14.143 +    0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
  14.144 +    0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
  14.145 +    0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
  14.146 +    0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
  14.147 +    0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
  14.148 +    0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
  14.149 +    0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
  14.150 +    0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
  14.151 +    0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
  14.152 +    0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
  14.153 +    0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
  14.154 +    0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
  14.155 +    0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
  14.156 +    0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
  14.157 +    0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
  14.158 +    0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
  14.159 +    0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
  14.160 +    0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
  14.161 +    0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
  14.162 +    0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
  14.163 +    0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
  14.164 +    0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
  14.165 +    0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
  14.166 +    0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
  14.167 +    0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
  14.168 +    0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
  14.169 +    0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
  14.170 +    0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
  14.171 +    0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
  14.172 +    0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
  14.173 +    0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
  14.174 +    0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
  14.175 +    0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
  14.176 +    0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
  14.177 +    0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
  14.178 +    0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
  14.179 +    0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
  14.180 +    0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
  14.181 +    0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
  14.182 +    0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
  14.183 +    0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
  14.184 +    0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
  14.185 +    0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
  14.186 +    0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
  14.187 +    0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
  14.188 +    0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
  14.189 +    0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
  14.190 +    0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
  14.191 +    0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
  14.192 +    0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
  14.193 +    0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
  14.194 +    0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
  14.195 +    0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
  14.196 +    0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
  14.197 +    0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
  14.198 +    0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
  14.199 +};
  14.200 +static const u32 Te2[256] = {
  14.201 +    0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
  14.202 +    0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
  14.203 +    0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
  14.204 +    0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
  14.205 +    0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
  14.206 +    0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
  14.207 +    0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
  14.208 +    0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
  14.209 +    0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
  14.210 +    0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
  14.211 +    0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
  14.212 +    0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
  14.213 +    0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
  14.214 +    0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
  14.215 +    0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
  14.216 +    0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
  14.217 +    0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
  14.218 +    0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
  14.219 +    0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
  14.220 +    0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
  14.221 +    0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
  14.222 +    0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
  14.223 +    0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
  14.224 +    0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
  14.225 +    0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
  14.226 +    0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
  14.227 +    0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
  14.228 +    0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
  14.229 +    0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
  14.230 +    0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
  14.231 +    0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
  14.232 +    0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
  14.233 +    0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
  14.234 +    0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
  14.235 +    0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
  14.236 +    0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
  14.237 +    0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
  14.238 +    0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
  14.239 +    0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
  14.240 +    0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
  14.241 +    0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
  14.242 +    0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
  14.243 +    0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
  14.244 +    0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
  14.245 +    0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
  14.246 +    0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
  14.247 +    0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
  14.248 +    0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
  14.249 +    0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
  14.250 +    0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
  14.251 +    0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
  14.252 +    0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
  14.253 +    0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
  14.254 +    0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
  14.255 +    0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
  14.256 +    0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
  14.257 +    0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
  14.258 +    0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
  14.259 +    0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
  14.260 +    0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
  14.261 +    0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
  14.262 +    0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
  14.263 +    0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
  14.264 +    0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
  14.265 +};
  14.266 +static const u32 Te3[256] = {
  14.267 +
  14.268 +    0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
  14.269 +    0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
  14.270 +    0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
  14.271 +    0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
  14.272 +    0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
  14.273 +    0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
  14.274 +    0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
  14.275 +    0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
  14.276 +    0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
  14.277 +    0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
  14.278 +    0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
  14.279 +    0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
  14.280 +    0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
  14.281 +    0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
  14.282 +    0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
  14.283 +    0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
  14.284 +    0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
  14.285 +    0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
  14.286 +    0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
  14.287 +    0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
  14.288 +    0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
  14.289 +    0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
  14.290 +    0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
  14.291 +    0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
  14.292 +    0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
  14.293 +    0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
  14.294 +    0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
  14.295 +    0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
  14.296 +    0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
  14.297 +    0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
  14.298 +    0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
  14.299 +    0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
  14.300 +    0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
  14.301 +    0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
  14.302 +    0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
  14.303 +    0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
  14.304 +    0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
  14.305 +    0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
  14.306 +    0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
  14.307 +    0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
  14.308 +    0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
  14.309 +    0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
  14.310 +    0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
  14.311 +    0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
  14.312 +    0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
  14.313 +    0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
  14.314 +    0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
  14.315 +    0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
  14.316 +    0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
  14.317 +    0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
  14.318 +    0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
  14.319 +    0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
  14.320 +    0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
  14.321 +    0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
  14.322 +    0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
  14.323 +    0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
  14.324 +    0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
  14.325 +    0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
  14.326 +    0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
  14.327 +    0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
  14.328 +    0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
  14.329 +    0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
  14.330 +    0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
  14.331 +    0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
  14.332 +};
  14.333 +static const u32 Te4[256] = {
  14.334 +    0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
  14.335 +    0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
  14.336 +    0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
  14.337 +    0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
  14.338 +    0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
  14.339 +    0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
  14.340 +    0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
  14.341 +    0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
  14.342 +    0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
  14.343 +    0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
  14.344 +    0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
  14.345 +    0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
  14.346 +    0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
  14.347 +    0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
  14.348 +    0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
  14.349 +    0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
  14.350 +    0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
  14.351 +    0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
  14.352 +    0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
  14.353 +    0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
  14.354 +    0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
  14.355 +    0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
  14.356 +    0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
  14.357 +    0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
  14.358 +    0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
  14.359 +    0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
  14.360 +    0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
  14.361 +    0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
  14.362 +    0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
  14.363 +    0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
  14.364 +    0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
  14.365 +    0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
  14.366 +    0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
  14.367 +    0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
  14.368 +    0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
  14.369 +    0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
  14.370 +    0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
  14.371 +    0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
  14.372 +    0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
  14.373 +    0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
  14.374 +    0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
  14.375 +    0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
  14.376 +    0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
  14.377 +    0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
  14.378 +    0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
  14.379 +    0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
  14.380 +    0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
  14.381 +    0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
  14.382 +    0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
  14.383 +    0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
  14.384 +    0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
  14.385 +    0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
  14.386 +    0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
  14.387 +    0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
  14.388 +    0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
  14.389 +    0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
  14.390 +    0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
  14.391 +    0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
  14.392 +    0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
  14.393 +    0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
  14.394 +    0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
  14.395 +    0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
  14.396 +    0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
  14.397 +    0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
  14.398 +};
  14.399 +static const u32 Td0[256] = {
  14.400 +    0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
  14.401 +    0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
  14.402 +    0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
  14.403 +    0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
  14.404 +    0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
  14.405 +    0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
  14.406 +    0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
  14.407 +    0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
  14.408 +    0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
  14.409 +    0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
  14.410 +    0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
  14.411 +    0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
  14.412 +    0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
  14.413 +    0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
  14.414 +    0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
  14.415 +    0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
  14.416 +    0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
  14.417 +    0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
  14.418 +    0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
  14.419 +    0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
  14.420 +    0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
  14.421 +    0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
  14.422 +    0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
  14.423 +    0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
  14.424 +    0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
  14.425 +    0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
  14.426 +    0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
  14.427 +    0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
  14.428 +    0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
  14.429 +    0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
  14.430 +    0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
  14.431 +    0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
  14.432 +    0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
  14.433 +    0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
  14.434 +    0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
  14.435 +    0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
  14.436 +    0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
  14.437 +    0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
  14.438 +    0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
  14.439 +    0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
  14.440 +    0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
  14.441 +    0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
  14.442 +    0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
  14.443 +    0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
  14.444 +    0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
  14.445 +    0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
  14.446 +    0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
  14.447 +    0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
  14.448 +    0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
  14.449 +    0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
  14.450 +    0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
  14.451 +    0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
  14.452 +    0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
  14.453 +    0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
  14.454 +    0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
  14.455 +    0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
  14.456 +    0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
  14.457 +    0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
  14.458 +    0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
  14.459 +    0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
  14.460 +    0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
  14.461 +    0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
  14.462 +    0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
  14.463 +    0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
  14.464 +};
  14.465 +static const u32 Td1[256] = {
  14.466 +    0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
  14.467 +    0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
  14.468 +    0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
  14.469 +    0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
  14.470 +    0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
  14.471 +    0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
  14.472 +    0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
  14.473 +    0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
  14.474 +    0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
  14.475 +    0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
  14.476 +    0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
  14.477 +    0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
  14.478 +    0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
  14.479 +    0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
  14.480 +    0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
  14.481 +    0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
  14.482 +    0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
  14.483 +    0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
  14.484 +    0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
  14.485 +    0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
  14.486 +    0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
  14.487 +    0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
  14.488 +    0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
  14.489 +    0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
  14.490 +    0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
  14.491 +    0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
  14.492 +    0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
  14.493 +    0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
  14.494 +    0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
  14.495 +    0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
  14.496 +    0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
  14.497 +    0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
  14.498 +    0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
  14.499 +    0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
  14.500 +    0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
  14.501 +    0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
  14.502 +    0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
  14.503 +    0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
  14.504 +    0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
  14.505 +    0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
  14.506 +    0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
  14.507 +    0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
  14.508 +    0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
  14.509 +    0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
  14.510 +    0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
  14.511 +    0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
  14.512 +    0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
  14.513 +    0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
  14.514 +    0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
  14.515 +    0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
  14.516 +    0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
  14.517 +    0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
  14.518 +    0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
  14.519 +    0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
  14.520 +    0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
  14.521 +    0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
  14.522 +    0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
  14.523 +    0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
  14.524 +    0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
  14.525 +    0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
  14.526 +    0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
  14.527 +    0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
  14.528 +    0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
  14.529 +    0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
  14.530 +};
  14.531 +static const u32 Td2[256] = {
  14.532 +    0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
  14.533 +    0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
  14.534 +    0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
  14.535 +    0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
  14.536 +    0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
  14.537 +    0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
  14.538 +    0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
  14.539 +    0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
  14.540 +    0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
  14.541 +    0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
  14.542 +    0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
  14.543 +    0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
  14.544 +    0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
  14.545 +    0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
  14.546 +    0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
  14.547 +    0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
  14.548 +    0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
  14.549 +    0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
  14.550 +    0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
  14.551 +    0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
  14.552 +
  14.553 +    0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
  14.554 +    0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
  14.555 +    0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
  14.556 +    0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
  14.557 +    0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
  14.558 +    0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
  14.559 +    0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
  14.560 +    0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
  14.561 +    0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
  14.562 +    0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
  14.563 +    0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
  14.564 +    0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
  14.565 +    0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
  14.566 +    0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
  14.567 +    0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
  14.568 +    0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
  14.569 +    0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
  14.570 +    0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
  14.571 +    0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
  14.572 +    0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
  14.573 +    0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
  14.574 +    0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
  14.575 +    0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
  14.576 +    0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
  14.577 +    0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
  14.578 +    0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
  14.579 +    0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
  14.580 +    0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
  14.581 +    0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
  14.582 +    0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
  14.583 +    0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
  14.584 +    0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
  14.585 +    0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
  14.586 +    0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
  14.587 +    0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
  14.588 +    0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
  14.589 +    0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
  14.590 +    0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
  14.591 +    0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
  14.592 +    0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
  14.593 +    0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
  14.594 +    0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
  14.595 +    0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
  14.596 +    0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
  14.597 +};
  14.598 +static const u32 Td3[256] = {
  14.599 +    0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
  14.600 +    0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
  14.601 +    0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
  14.602 +    0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
  14.603 +    0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
  14.604 +    0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
  14.605 +    0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
  14.606 +    0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
  14.607 +    0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
  14.608 +    0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
  14.609 +    0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
  14.610 +    0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
  14.611 +    0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
  14.612 +    0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
  14.613 +    0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
  14.614 +    0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
  14.615 +    0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
  14.616 +    0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
  14.617 +    0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
  14.618 +    0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
  14.619 +    0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
  14.620 +    0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
  14.621 +    0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
  14.622 +    0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
  14.623 +    0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
  14.624 +    0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
  14.625 +    0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
  14.626 +    0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
  14.627 +    0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
  14.628 +    0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
  14.629 +    0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
  14.630 +    0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
  14.631 +    0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
  14.632 +    0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
  14.633 +    0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
  14.634 +    0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
  14.635 +    0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
  14.636 +    0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
  14.637 +    0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
  14.638 +    0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
  14.639 +    0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
  14.640 +    0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
  14.641 +    0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
  14.642 +    0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
  14.643 +    0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
  14.644 +    0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
  14.645 +    0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
  14.646 +    0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
  14.647 +    0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
  14.648 +    0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
  14.649 +    0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
  14.650 +    0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
  14.651 +    0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
  14.652 +    0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
  14.653 +    0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
  14.654 +    0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
  14.655 +    0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
  14.656 +    0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
  14.657 +    0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
  14.658 +    0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
  14.659 +    0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
  14.660 +    0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
  14.661 +    0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
  14.662 +    0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
  14.663 +};
  14.664 +static const u32 Td4[256] = {
  14.665 +    0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
  14.666 +    0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
  14.667 +    0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
  14.668 +    0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
  14.669 +    0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
  14.670 +    0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
  14.671 +    0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
  14.672 +    0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
  14.673 +    0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
  14.674 +    0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
  14.675 +    0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
  14.676 +    0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
  14.677 +    0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
  14.678 +    0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
  14.679 +    0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
  14.680 +    0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
  14.681 +    0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
  14.682 +    0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
  14.683 +    0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
  14.684 +    0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
  14.685 +    0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
  14.686 +    0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
  14.687 +    0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
  14.688 +    0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
  14.689 +    0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
  14.690 +    0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
  14.691 +    0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
  14.692 +    0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
  14.693 +    0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
  14.694 +    0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
  14.695 +    0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
  14.696 +    0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
  14.697 +    0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
  14.698 +    0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
  14.699 +    0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
  14.700 +    0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
  14.701 +    0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
  14.702 +    0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
  14.703 +    0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
  14.704 +    0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
  14.705 +    0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
  14.706 +    0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
  14.707 +    0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
  14.708 +    0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
  14.709 +    0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
  14.710 +    0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
  14.711 +    0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
  14.712 +    0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
  14.713 +    0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
  14.714 +    0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
  14.715 +    0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
  14.716 +    0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
  14.717 +    0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
  14.718 +    0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
  14.719 +    0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
  14.720 +    0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
  14.721 +    0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
  14.722 +    0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
  14.723 +    0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
  14.724 +    0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
  14.725 +    0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
  14.726 +    0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
  14.727 +    0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
  14.728 +    0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
  14.729 +};
  14.730 +static const u32 rcon[] = {
  14.731 +	0x01000000, 0x02000000, 0x04000000, 0x08000000,
  14.732 +	0x10000000, 0x20000000, 0x40000000, 0x80000000,
  14.733 +	0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
  14.734 +};
  14.735 +
  14.736 +/**
  14.737 + * Expand the cipher key into the encryption key schedule.
  14.738 + */
  14.739 +int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
  14.740 +			AES_KEY *key) {
  14.741 +
  14.742 +	u32 *rk;
  14.743 +   	int i = 0;
  14.744 +	u32 temp;
  14.745 +
  14.746 +	if (!userKey || !key)
  14.747 +		return -1;
  14.748 +	if (bits != 128 && bits != 192 && bits != 256)
  14.749 +		return -2;
  14.750 +
  14.751 +	rk = key->rd_key;
  14.752 +
  14.753 +	if (bits==128)
  14.754 +		key->rounds = 10;
  14.755 +	else if (bits==192)
  14.756 +		key->rounds = 12;
  14.757 +	else
  14.758 +		key->rounds = 14;
  14.759 +
  14.760 +	rk[0] = GETU32(userKey     );
  14.761 +	rk[1] = GETU32(userKey +  4);
  14.762 +	rk[2] = GETU32(userKey +  8);
  14.763 +	rk[3] = GETU32(userKey + 12);
  14.764 +	if (bits == 128) {
  14.765 +		while (1) {
  14.766 +			temp  = rk[3];
  14.767 +			rk[4] = rk[0] ^
  14.768 +				(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
  14.769 +				(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
  14.770 +				(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
  14.771 +				(Te4[(temp >> 24)       ] & 0x000000ff) ^
  14.772 +				rcon[i];
  14.773 +			rk[5] = rk[1] ^ rk[4];
  14.774 +			rk[6] = rk[2] ^ rk[5];
  14.775 +			rk[7] = rk[3] ^ rk[6];
  14.776 +			if (++i == 10) {
  14.777 +				return 0;
  14.778 +			}
  14.779 +			rk += 4;
  14.780 +		}
  14.781 +	}
  14.782 +	rk[4] = GETU32(userKey + 16);
  14.783 +	rk[5] = GETU32(userKey + 20);
  14.784 +	if (bits == 192) {
  14.785 +		while (1) {
  14.786 +			temp = rk[ 5];
  14.787 +			rk[ 6] = rk[ 0] ^
  14.788 +				(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
  14.789 +				(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
  14.790 +				(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
  14.791 +				(Te4[(temp >> 24)       ] & 0x000000ff) ^
  14.792 +				rcon[i];
  14.793 +			rk[ 7] = rk[ 1] ^ rk[ 6];
  14.794 +			rk[ 8] = rk[ 2] ^ rk[ 7];
  14.795 +			rk[ 9] = rk[ 3] ^ rk[ 8];
  14.796 +			if (++i == 8) {
  14.797 +				return 0;
  14.798 +			}
  14.799 +			rk[10] = rk[ 4] ^ rk[ 9];
  14.800 +			rk[11] = rk[ 5] ^ rk[10];
  14.801 +			rk += 6;
  14.802 +		}
  14.803 +	}
  14.804 +	rk[6] = GETU32(userKey + 24);
  14.805 +	rk[7] = GETU32(userKey + 28);
  14.806 +	if (bits == 256) {
  14.807 +		while (1) {
  14.808 +			temp = rk[ 7];
  14.809 +			rk[ 8] = rk[ 0] ^
  14.810 +				(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
  14.811 +				(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
  14.812 +				(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
  14.813 +				(Te4[(temp >> 24)       ] & 0x000000ff) ^
  14.814 +				rcon[i];
  14.815 +			rk[ 9] = rk[ 1] ^ rk[ 8];
  14.816 +			rk[10] = rk[ 2] ^ rk[ 9];
  14.817 +			rk[11] = rk[ 3] ^ rk[10];
  14.818 +			if (++i == 7) {
  14.819 +				return 0;
  14.820 +			}
  14.821 +			temp = rk[11];
  14.822 +			rk[12] = rk[ 4] ^
  14.823 +				(Te4[(temp >> 24)       ] & 0xff000000) ^
  14.824 +				(Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
  14.825 +				(Te4[(temp >>  8) & 0xff] & 0x0000ff00) ^
  14.826 +				(Te4[(temp      ) & 0xff] & 0x000000ff);
  14.827 +			rk[13] = rk[ 5] ^ rk[12];
  14.828 +			rk[14] = rk[ 6] ^ rk[13];
  14.829 +			rk[15] = rk[ 7] ^ rk[14];
  14.830 +
  14.831 +			rk += 8;
  14.832 +        	}
  14.833 +	}
  14.834 +	return 0;
  14.835 +}
  14.836 +
  14.837 +/**
  14.838 + * Expand the cipher key into the decryption key schedule.
  14.839 + */
  14.840 +int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
  14.841 +			 AES_KEY *key) {
  14.842 +
  14.843 +        u32 *rk;
  14.844 +	int i, j, status;
  14.845 +	u32 temp;
  14.846 +
  14.847 +	/* first, start with an encryption schedule */
  14.848 +	status = AES_set_encrypt_key(userKey, bits, key);
  14.849 +	if (status < 0)
  14.850 +		return status;
  14.851 +
  14.852 +	rk = key->rd_key;
  14.853 +
  14.854 +	/* invert the order of the round keys: */
  14.855 +	for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
  14.856 +		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
  14.857 +		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
  14.858 +		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
  14.859 +		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
  14.860 +	}
  14.861 +	/* apply the inverse MixColumn transform to all round keys but the first and the last: */
  14.862 +	for (i = 1; i < (key->rounds); i++) {
  14.863 +		rk += 4;
  14.864 +		rk[0] =
  14.865 +			Td0[Te4[(rk[0] >> 24)       ] & 0xff] ^
  14.866 +			Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
  14.867 +			Td2[Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
  14.868 +			Td3[Te4[(rk[0]      ) & 0xff] & 0xff];
  14.869 +		rk[1] =
  14.870 +			Td0[Te4[(rk[1] >> 24)       ] & 0xff] ^
  14.871 +			Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
  14.872 +			Td2[Te4[(rk[1] >>  8) & 0xff] & 0xff] ^
  14.873 +			Td3[Te4[(rk[1]      ) & 0xff] & 0xff];
  14.874 +		rk[2] =
  14.875 +			Td0[Te4[(rk[2] >> 24)       ] & 0xff] ^
  14.876 +			Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
  14.877 +			Td2[Te4[(rk[2] >>  8) & 0xff] & 0xff] ^
  14.878 +			Td3[Te4[(rk[2]      ) & 0xff] & 0xff];
  14.879 +		rk[3] =
  14.880 +			Td0[Te4[(rk[3] >> 24)       ] & 0xff] ^
  14.881 +			Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
  14.882 +			Td2[Te4[(rk[3] >>  8) & 0xff] & 0xff] ^
  14.883 +			Td3[Te4[(rk[3]      ) & 0xff] & 0xff];
  14.884 +	}
  14.885 +	return 0;
  14.886 +}
  14.887 +
  14.888 +#ifndef AES_ASM
  14.889 +/*
  14.890 + * Encrypt a single block
  14.891 + * in and out can overlap
  14.892 + */
  14.893 +void AES_encrypt(const unsigned char *in, unsigned char *out,
  14.894 +		 const AES_KEY *key) {
  14.895 +
  14.896 +	const u32 *rk;
  14.897 +	u32 s0, s1, s2, s3, t0, t1, t2, t3;
  14.898 +#ifndef FULL_UNROLL
  14.899 +	int r;
  14.900 +#endif /* ?FULL_UNROLL */
  14.901 +
  14.902 +	assert(in && out && key);
  14.903 +	rk = key->rd_key;
  14.904 +
  14.905 +	/*
  14.906 +	 * map byte array block to cipher state
  14.907 +	 * and add initial round key:
  14.908 +	 */
  14.909 +	s0 = GETU32(in     ) ^ rk[0];
  14.910 +	s1 = GETU32(in +  4) ^ rk[1];
  14.911 +	s2 = GETU32(in +  8) ^ rk[2];
  14.912 +	s3 = GETU32(in + 12) ^ rk[3];
  14.913 +#ifdef FULL_UNROLL
  14.914 +	/* round 1: */
  14.915 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
  14.916 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
  14.917 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
  14.918 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
  14.919 +   	/* round 2: */
  14.920 +   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
  14.921 +   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
  14.922 +   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
  14.923 +   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
  14.924 +	/* round 3: */
  14.925 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
  14.926 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
  14.927 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
  14.928 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
  14.929 +   	/* round 4: */
  14.930 +   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
  14.931 +   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
  14.932 +   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
  14.933 +   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
  14.934 +	/* round 5: */
  14.935 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
  14.936 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
  14.937 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
  14.938 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
  14.939 +   	/* round 6: */
  14.940 +   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
  14.941 +   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
  14.942 +   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
  14.943 +   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
  14.944 +	/* round 7: */
  14.945 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
  14.946 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
  14.947 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
  14.948 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
  14.949 +   	/* round 8: */
  14.950 +   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
  14.951 +   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
  14.952 +   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
  14.953 +   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
  14.954 +	/* round 9: */
  14.955 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
  14.956 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
  14.957 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
  14.958 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
  14.959 +    if (key->rounds > 10) {
  14.960 +        /* round 10: */
  14.961 +        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
  14.962 +        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
  14.963 +        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
  14.964 +        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
  14.965 +        /* round 11: */
  14.966 +        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
  14.967 +        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
  14.968 +        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
  14.969 +        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
  14.970 +        if (key->rounds > 12) {
  14.971 +            /* round 12: */
  14.972 +            s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
  14.973 +            s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
  14.974 +            s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
  14.975 +            s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
  14.976 +            /* round 13: */
  14.977 +            t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
  14.978 +            t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
  14.979 +            t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
  14.980 +            t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
  14.981 +        }
  14.982 +    }
  14.983 +    rk += key->rounds << 2;
  14.984 +#else  /* !FULL_UNROLL */
  14.985 +    /*
  14.986 +     * Nr - 1 full rounds:
  14.987 +     */
  14.988 +    r = key->rounds >> 1;
  14.989 +    for (;;) {
  14.990 +        t0 =
  14.991 +            Te0[(s0 >> 24)       ] ^
  14.992 +            Te1[(s1 >> 16) & 0xff] ^
  14.993 +            Te2[(s2 >>  8) & 0xff] ^
  14.994 +            Te3[(s3      ) & 0xff] ^
  14.995 +            rk[4];
  14.996 +        t1 =
  14.997 +            Te0[(s1 >> 24)       ] ^
  14.998 +            Te1[(s2 >> 16) & 0xff] ^
  14.999 +            Te2[(s3 >>  8) & 0xff] ^
 14.1000 +            Te3[(s0      ) & 0xff] ^
 14.1001 +            rk[5];
 14.1002 +        t2 =
 14.1003 +            Te0[(s2 >> 24)       ] ^
 14.1004 +            Te1[(s3 >> 16) & 0xff] ^
 14.1005 +            Te2[(s0 >>  8) & 0xff] ^
 14.1006 +            Te3[(s1      ) & 0xff] ^
 14.1007 +            rk[6];
 14.1008 +        t3 =
 14.1009 +            Te0[(s3 >> 24)       ] ^
 14.1010 +            Te1[(s0 >> 16) & 0xff] ^
 14.1011 +            Te2[(s1 >>  8) & 0xff] ^
 14.1012 +            Te3[(s2      ) & 0xff] ^
 14.1013 +            rk[7];
 14.1014 +
 14.1015 +        rk += 8;
 14.1016 +        if (--r == 0) {
 14.1017 +            break;
 14.1018 +        }
 14.1019 +
 14.1020 +        s0 =
 14.1021 +            Te0[(t0 >> 24)       ] ^
 14.1022 +            Te1[(t1 >> 16) & 0xff] ^
 14.1023 +            Te2[(t2 >>  8) & 0xff] ^
 14.1024 +            Te3[(t3      ) & 0xff] ^
 14.1025 +            rk[0];
 14.1026 +        s1 =
 14.1027 +            Te0[(t1 >> 24)       ] ^
 14.1028 +            Te1[(t2 >> 16) & 0xff] ^
 14.1029 +            Te2[(t3 >>  8) & 0xff] ^
 14.1030 +            Te3[(t0      ) & 0xff] ^
 14.1031 +            rk[1];
 14.1032 +        s2 =
 14.1033 +            Te0[(t2 >> 24)       ] ^
 14.1034 +            Te1[(t3 >> 16) & 0xff] ^
 14.1035 +            Te2[(t0 >>  8) & 0xff] ^
 14.1036 +            Te3[(t1      ) & 0xff] ^
 14.1037 +            rk[2];
 14.1038 +        s3 =
 14.1039 +            Te0[(t3 >> 24)       ] ^
 14.1040 +            Te1[(t0 >> 16) & 0xff] ^
 14.1041 +            Te2[(t1 >>  8) & 0xff] ^
 14.1042 +            Te3[(t2      ) & 0xff] ^
 14.1043 +            rk[3];
 14.1044 +    }
 14.1045 +#endif /* ?FULL_UNROLL */
 14.1046 +    /*
 14.1047 +	 * apply last round and
 14.1048 +	 * map cipher state to byte array block:
 14.1049 +	 */
 14.1050 +	s0 =
 14.1051 +		(Te4[(t0 >> 24)       ] & 0xff000000) ^
 14.1052 +		(Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
 14.1053 +		(Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
 14.1054 +		(Te4[(t3      ) & 0xff] & 0x000000ff) ^
 14.1055 +		rk[0];
 14.1056 +	PUTU32(out     , s0);
 14.1057 +	s1 =
 14.1058 +		(Te4[(t1 >> 24)       ] & 0xff000000) ^
 14.1059 +		(Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
 14.1060 +		(Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
 14.1061 +		(Te4[(t0      ) & 0xff] & 0x000000ff) ^
 14.1062 +		rk[1];
 14.1063 +	PUTU32(out +  4, s1);
 14.1064 +	s2 =
 14.1065 +		(Te4[(t2 >> 24)       ] & 0xff000000) ^
 14.1066 +		(Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
 14.1067 +		(Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
 14.1068 +		(Te4[(t1      ) & 0xff] & 0x000000ff) ^
 14.1069 +		rk[2];
 14.1070 +	PUTU32(out +  8, s2);
 14.1071 +	s3 =
 14.1072 +		(Te4[(t3 >> 24)       ] & 0xff000000) ^
 14.1073 +		(Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
 14.1074 +		(Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
 14.1075 +		(Te4[(t2      ) & 0xff] & 0x000000ff) ^
 14.1076 +		rk[3];
 14.1077 +	PUTU32(out + 12, s3);
 14.1078 +}
 14.1079 +
 14.1080 +/*
 14.1081 + * Decrypt a single block
 14.1082 + * in and out can overlap
 14.1083 + */
 14.1084 +void AES_decrypt(const unsigned char *in, unsigned char *out,
 14.1085 +		 const AES_KEY *key) {
 14.1086 +
 14.1087 +	const u32 *rk;
 14.1088 +	u32 s0, s1, s2, s3, t0, t1, t2, t3;
 14.1089 +#ifndef FULL_UNROLL
 14.1090 +	int r;
 14.1091 +#endif /* ?FULL_UNROLL */
 14.1092 +
 14.1093 +	assert(in && out && key);
 14.1094 +	rk = key->rd_key;
 14.1095 +
 14.1096 +	/*
 14.1097 +	 * map byte array block to cipher state
 14.1098 +	 * and add initial round key:
 14.1099 +	 */
 14.1100 +    s0 = GETU32(in     ) ^ rk[0];
 14.1101 +    s1 = GETU32(in +  4) ^ rk[1];
 14.1102 +    s2 = GETU32(in +  8) ^ rk[2];
 14.1103 +    s3 = GETU32(in + 12) ^ rk[3];
 14.1104 +#ifdef FULL_UNROLL
 14.1105 +    /* round 1: */
 14.1106 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
 14.1107 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
 14.1108 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
 14.1109 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
 14.1110 +    /* round 2: */
 14.1111 +    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
 14.1112 +    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
 14.1113 +    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
 14.1114 +    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
 14.1115 +    /* round 3: */
 14.1116 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
 14.1117 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
 14.1118 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
 14.1119 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
 14.1120 +    /* round 4: */
 14.1121 +    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
 14.1122 +    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
 14.1123 +    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
 14.1124 +    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
 14.1125 +    /* round 5: */
 14.1126 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
 14.1127 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
 14.1128 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
 14.1129 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
 14.1130 +    /* round 6: */
 14.1131 +    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
 14.1132 +    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
 14.1133 +    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
 14.1134 +    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
 14.1135 +    /* round 7: */
 14.1136 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
 14.1137 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
 14.1138 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
 14.1139 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
 14.1140 +    /* round 8: */
 14.1141 +    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
 14.1142 +    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
 14.1143 +    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
 14.1144 +    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
 14.1145 +    /* round 9: */
 14.1146 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
 14.1147 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
 14.1148 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
 14.1149 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
 14.1150 +    if (key->rounds > 10) {
 14.1151 +        /* round 10: */
 14.1152 +        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
 14.1153 +        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
 14.1154 +        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
 14.1155 +        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
 14.1156 +        /* round 11: */
 14.1157 +        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
 14.1158 +        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
 14.1159 +        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
 14.1160 +        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
 14.1161 +        if (key->rounds > 12) {
 14.1162 +            /* round 12: */
 14.1163 +            s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
 14.1164 +            s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
 14.1165 +            s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
 14.1166 +            s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
 14.1167 +            /* round 13: */
 14.1168 +            t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
 14.1169 +            t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
 14.1170 +            t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
 14.1171 +            t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
 14.1172 +        }
 14.1173 +    }
 14.1174 +	rk += key->rounds << 2;
 14.1175 +#else  /* !FULL_UNROLL */
 14.1176 +    /*
 14.1177 +     * Nr - 1 full rounds:
 14.1178 +     */
 14.1179 +    r = key->rounds >> 1;
 14.1180 +    for (;;) {
 14.1181 +        t0 =
 14.1182 +            Td0[(s0 >> 24)       ] ^
 14.1183 +            Td1[(s3 >> 16) & 0xff] ^
 14.1184 +            Td2[(s2 >>  8) & 0xff] ^
 14.1185 +            Td3[(s1      ) & 0xff] ^
 14.1186 +            rk[4];
 14.1187 +        t1 =
 14.1188 +            Td0[(s1 >> 24)       ] ^
 14.1189 +            Td1[(s0 >> 16) & 0xff] ^
 14.1190 +            Td2[(s3 >>  8) & 0xff] ^
 14.1191 +            Td3[(s2      ) & 0xff] ^
 14.1192 +            rk[5];
 14.1193 +        t2 =
 14.1194 +            Td0[(s2 >> 24)       ] ^
 14.1195 +            Td1[(s1 >> 16) & 0xff] ^
 14.1196 +            Td2[(s0 >>  8) & 0xff] ^
 14.1197 +            Td3[(s3      ) & 0xff] ^
 14.1198 +            rk[6];
 14.1199 +        t3 =
 14.1200 +            Td0[(s3 >> 24)       ] ^
 14.1201 +            Td1[(s2 >> 16) & 0xff] ^
 14.1202 +            Td2[(s1 >>  8) & 0xff] ^
 14.1203 +            Td3[(s0      ) & 0xff] ^
 14.1204 +            rk[7];
 14.1205 +
 14.1206 +        rk += 8;
 14.1207 +        if (--r == 0) {
 14.1208 +            break;
 14.1209 +        }
 14.1210 +
 14.1211 +        s0 =
 14.1212 +            Td0[(t0 >> 24)       ] ^
 14.1213 +            Td1[(t3 >> 16) & 0xff] ^
 14.1214 +            Td2[(t2 >>  8) & 0xff] ^
 14.1215 +            Td3[(t1      ) & 0xff] ^
 14.1216 +            rk[0];
 14.1217 +        s1 =
 14.1218 +            Td0[(t1 >> 24)       ] ^
 14.1219 +            Td1[(t0 >> 16) & 0xff] ^
 14.1220 +            Td2[(t3 >>  8) & 0xff] ^
 14.1221 +            Td3[(t2      ) & 0xff] ^
 14.1222 +            rk[1];
 14.1223 +        s2 =
 14.1224 +            Td0[(t2 >> 24)       ] ^
 14.1225 +            Td1[(t1 >> 16) & 0xff] ^
 14.1226 +            Td2[(t0 >>  8) & 0xff] ^
 14.1227 +            Td3[(t3      ) & 0xff] ^
 14.1228 +            rk[2];
 14.1229 +        s3 =
 14.1230 +            Td0[(t3 >> 24)       ] ^
 14.1231 +            Td1[(t2 >> 16) & 0xff] ^
 14.1232 +            Td2[(t1 >>  8) & 0xff] ^
 14.1233 +            Td3[(t0      ) & 0xff] ^
 14.1234 +            rk[3];
 14.1235 +    }
 14.1236 +#endif /* ?FULL_UNROLL */
 14.1237 +    /*
 14.1238 +	 * apply last round and
 14.1239 +	 * map cipher state to byte array block:
 14.1240 +	 */
 14.1241 +   	s0 =
 14.1242 +   		(Td4[(t0 >> 24)       ] & 0xff000000) ^
 14.1243 +   		(Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
 14.1244 +   		(Td4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
 14.1245 +   		(Td4[(t1      ) & 0xff] & 0x000000ff) ^
 14.1246 +   		rk[0];
 14.1247 +	PUTU32(out     , s0);
 14.1248 +   	s1 =
 14.1249 +   		(Td4[(t1 >> 24)       ] & 0xff000000) ^
 14.1250 +   		(Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
 14.1251 +   		(Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
 14.1252 +   		(Td4[(t2      ) & 0xff] & 0x000000ff) ^
 14.1253 +   		rk[1];
 14.1254 +	PUTU32(out +  4, s1);
 14.1255 +   	s2 =
 14.1256 +   		(Td4[(t2 >> 24)       ] & 0xff000000) ^
 14.1257 +   		(Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
 14.1258 +   		(Td4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
 14.1259 +   		(Td4[(t3      ) & 0xff] & 0x000000ff) ^
 14.1260 +   		rk[2];
 14.1261 +	PUTU32(out +  8, s2);
 14.1262 +   	s3 =
 14.1263 +   		(Td4[(t3 >> 24)       ] & 0xff000000) ^
 14.1264 +   		(Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
 14.1265 +   		(Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
 14.1266 +   		(Td4[(t0      ) & 0xff] & 0x000000ff) ^
 14.1267 +   		rk[3];
 14.1268 +	PUTU32(out + 12, s3);
 14.1269 +}
 14.1270 +
 14.1271 +#endif /* AES_ASM */
 14.1272 +
 14.1273 +void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
 14.1274 +		     const unsigned long length, const AES_KEY *key,
 14.1275 +		     unsigned char *ivec, const int enc) 
 14.1276 +{
 14.1277 +
 14.1278 +	unsigned long n;
 14.1279 +	unsigned long len = length;
 14.1280 +	unsigned char tmp[AES_BLOCK_SIZE];
 14.1281 +
 14.1282 +	assert(in && out && key && ivec);
 14.1283 +
 14.1284 +	if (enc) {
 14.1285 +		while (len >= AES_BLOCK_SIZE) {
 14.1286 +			for(n=0; n < AES_BLOCK_SIZE; ++n)
 14.1287 +				tmp[n] = in[n] ^ ivec[n];
 14.1288 +			AES_encrypt(tmp, out, key);
 14.1289 +			memcpy(ivec, out, AES_BLOCK_SIZE);
 14.1290 +			len -= AES_BLOCK_SIZE;
 14.1291 +			in += AES_BLOCK_SIZE;
 14.1292 +			out += AES_BLOCK_SIZE;
 14.1293 +		}
 14.1294 +		if (len) {
 14.1295 +			for(n=0; n < len; ++n)
 14.1296 +				tmp[n] = in[n] ^ ivec[n];
 14.1297 +			for(n=len; n < AES_BLOCK_SIZE; ++n)
 14.1298 +				tmp[n] = ivec[n];
 14.1299 +			AES_encrypt(tmp, tmp, key);
 14.1300 +			memcpy(out, tmp, AES_BLOCK_SIZE);
 14.1301 +			memcpy(ivec, tmp, AES_BLOCK_SIZE);
 14.1302 +		}			
 14.1303 +	} else {
 14.1304 +		while (len >= AES_BLOCK_SIZE) {
 14.1305 +			memcpy(tmp, in, AES_BLOCK_SIZE);
 14.1306 +			AES_decrypt(in, out, key);
 14.1307 +			for(n=0; n < AES_BLOCK_SIZE; ++n)
 14.1308 +				out[n] ^= ivec[n];
 14.1309 +			memcpy(ivec, tmp, AES_BLOCK_SIZE);
 14.1310 +			len -= AES_BLOCK_SIZE;
 14.1311 +			in += AES_BLOCK_SIZE;
 14.1312 +			out += AES_BLOCK_SIZE;
 14.1313 +		}
 14.1314 +		if (len) {
 14.1315 +			memcpy(tmp, in, AES_BLOCK_SIZE);
 14.1316 +			AES_decrypt(tmp, tmp, key);
 14.1317 +			for(n=0; n < len; ++n)
 14.1318 +				out[n] = tmp[n] ^ ivec[n];
 14.1319 +			memcpy(ivec, tmp, AES_BLOCK_SIZE);
 14.1320 +		}			
 14.1321 +	}
 14.1322 +}
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/tools/blktap2/drivers/aes.h	Tue May 26 11:52:31 2009 +0100
    15.3 @@ -0,0 +1,28 @@
    15.4 +#ifndef QEMU_AES_H
    15.5 +#define QEMU_AES_H
    15.6 +
    15.7 +#include <stdint.h>
    15.8 +
    15.9 +#define AES_MAXNR 14
   15.10 +#define AES_BLOCK_SIZE 16
   15.11 +
   15.12 +struct aes_key_st {
   15.13 +    uint32_t rd_key[4 *(AES_MAXNR + 1)];
   15.14 +    int rounds;
   15.15 +};
   15.16 +typedef struct aes_key_st AES_KEY;
   15.17 +
   15.18 +int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
   15.19 +	AES_KEY *key);
   15.20 +int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
   15.21 +	AES_KEY *key);
   15.22 +
   15.23 +void AES_encrypt(const unsigned char *in, unsigned char *out,
   15.24 +	const AES_KEY *key);
   15.25 +void AES_decrypt(const unsigned char *in, unsigned char *out,
   15.26 +	const AES_KEY *key);
   15.27 +void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
   15.28 +		     const unsigned long length, const AES_KEY *key,
   15.29 +		     unsigned char *ivec, const int enc);
   15.30 +
   15.31 +#endif
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/tools/blktap2/drivers/atomicio.c	Tue May 26 11:52:31 2009 +0100
    16.3 @@ -0,0 +1,61 @@
    16.4 +/*
    16.5 + * Copyright (c) 2005 Anil Madhavapeddy. All rights reserved.
    16.6 + * Copyright (c) 1995,1999 Theo de Raadt.  All rights reserved.
    16.7 + * All rights reserved.
    16.8 + *
    16.9 + * Redistribution and use in source and binary forms, with or without
   16.10 + * modification, are permitted provided that the following conditions
   16.11 + * are met:
   16.12 + * 1. Redistributions of source code must retain the above copyright
   16.13 + *    notice, this list of conditions and the following disclaimer.
   16.14 + * 2. Redistributions in binary form must reproduce the above copyright
   16.15 + *    notice, this list of conditions and the following disclaimer in the
   16.16 + *    documentation and/or other materials provided with the distribution.
   16.17 + *
   16.18 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   16.19 + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   16.20 + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   16.21 + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   16.22 + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   16.23 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   16.24 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   16.25 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   16.26 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   16.27 + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   16.28 + */
   16.29 +
   16.30 +#include <stdlib.h>
   16.31 +#include <errno.h>
   16.32 +#include "atomicio.h"
   16.33 +
   16.34 +/*
   16.35 + * ensure all of data on socket comes through. f==read || f==vwrite
   16.36 + */
   16.37 +size_t
   16.38 +atomicio(f, fd, _s, n)
   16.39 +	ssize_t (*f) (int, void *, size_t);
   16.40 +	int fd;
   16.41 +	void *_s;
   16.42 +	size_t n;
   16.43 +{
   16.44 +	char *s = _s;
   16.45 +	size_t pos = 0;
   16.46 +	ssize_t res;
   16.47 +
   16.48 +	while (n > pos) {
   16.49 +		res = (f) (fd, s + pos, n - pos);
   16.50 +		switch (res) {
   16.51 +		case -1:
   16.52 +			if (errno == EINTR || errno == EAGAIN)
   16.53 +				continue;
   16.54 +			return 0;
   16.55 +		case 0:
   16.56 +			errno = EPIPE;
   16.57 +			return pos;
   16.58 +		default:
   16.59 +			pos += (size_t)res;
   16.60 +		}
   16.61 +	}
   16.62 +	return (pos);
   16.63 +}
   16.64 +
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/tools/blktap2/drivers/blk.h	Tue May 26 11:52:31 2009 +0100
    17.3 @@ -0,0 +1,30 @@
    17.4 +/* 
    17.5 + * Copyright (c) 2008, XenSource Inc.
    17.6 + * All rights reserved.
    17.7 + *
    17.8 + * Redistribution and use in source and binary forms, with or without
    17.9 + * modification, are permitted provided that the following conditions are met:
   17.10 + *     * Redistributions of source code must retain the above copyright
   17.11 + *       notice, this list of conditions and the following disclaimer.
   17.12 + *     * Redistributions in binary form must reproduce the above copyright
   17.13 + *       notice, this list of conditions and the following disclaimer in the
   17.14 + *       documentation and/or other materials provided with the distribution.
   17.15 + *     * Neither the name of XenSource Inc. nor the names of its contributors
   17.16 + *       may be used to endorse or promote products derived from this software
   17.17 + *       without specific prior written permission.
   17.18 + *
   17.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   17.20 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   17.21 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   17.22 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   17.23 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   17.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   17.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   17.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   17.27 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   17.28 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   17.29 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   17.30 +*/
   17.31 +
   17.32 +int blk_getimagesize(int fd, uint64_t *size);
   17.33 +int blk_getsectorsize(int fd, uint64_t *sector_size);
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/tools/blktap2/drivers/blk_linux.c	Tue May 26 11:52:31 2009 +0100
    18.3 @@ -0,0 +1,43 @@
    18.4 +#include <inttypes.h>
    18.5 +#include <sys/ioctl.h>
    18.6 +#include <linux/fs.h>
    18.7 +#include <linux/errno.h>
    18.8 +#include "tapdisk.h"
    18.9 +#include "blk.h"
   18.10 +
   18.11 +int blk_getimagesize(int fd, uint64_t *size)
   18.12 +{
   18.13 +	int rc;
   18.14 +
   18.15 +	*size = 0;
   18.16 +	rc = ioctl(fd, BLKGETSIZE, size);
   18.17 +	if (rc) {
   18.18 +		DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
   18.19 +		return -EINVAL;
   18.20 +	}
   18.21 +
   18.22 +	return 0;
   18.23 +}
   18.24 +
   18.25 +int blk_getsectorsize(int fd, uint64_t *sector_size)
   18.26 +{
   18.27 +#if defined(BLKSSZGET)
   18.28 +	int rc;
   18.29 +
   18.30 +	*sector_size = DEFAULT_SECTOR_SIZE;
   18.31 +	rc = ioctl(fd, BLKSSZGET, sector_size);
   18.32 +	if (rc) {
   18.33 +		DPRINTF("ERR: BLKSSZGET failed. Falling back to use default sector size");
   18.34 +		*sector_size = DEFAULT_SECTOR_SIZE;
   18.35 +	}
   18.36 +
   18.37 +	if (*sector_size != DEFAULT_SECTOR_SIZE)
   18.38 +		DPRINTF("Note: sector size is %"PRIu64" (not %u)\n",
   18.39 +			*sector_size, DEFAULT_SECTOR_SIZE);
   18.40 +#else
   18.41 +	*sector_size = DEFAULT_SECTOR_SIZE;
   18.42 +#endif
   18.43 +
   18.44 +	return 0;
   18.45 +}
   18.46 +
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/tools/blktap2/drivers/blktap2.h	Tue May 26 11:52:31 2009 +0100
    19.3 @@ -0,0 +1,66 @@
    19.4 +/*
    19.5 + * Copyright (c) 2008, XenSource Inc.
    19.6 + * All rights reserved.
    19.7 + *
    19.8 + * Redistribution and use in source and binary forms, with or without
    19.9 + * modification, are permitted provided that the following conditions are met:
   19.10 + *     * Redistributions of source code must retain the above copyright
   19.11 + *       notice, this list of conditions and the following disclaimer.
   19.12 + *     * Redistributions in binary form must reproduce the above copyright
   19.13 + *       notice, this list of conditions and the following disclaimer in the
   19.14 + *       documentation and/or other materials provided with the distribution.
   19.15 + *     * Neither the name of XenSource Inc. nor the names of its contributors
   19.16 + *       may be used to endorse or promote products derived from this software
   19.17 + *       without specific prior written permission.
   19.18 + *
   19.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19.20 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   19.21 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   19.22 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   19.23 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   19.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   19.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   19.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   19.27 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   19.28 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   19.29 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   19.30 + */
   19.31 +#ifndef _BLKTAP_2_H_
   19.32 +#define _BLKTAP_2_H_
   19.33 +
   19.34 +#define MISC_MAJOR_NUMBER              10
   19.35 +
   19.36 +#define BLKTAP2_MAX_MESSAGE_LEN        256
   19.37 +
   19.38 +#define BLKTAP2_RING_MESSAGE_PAUSE     1
   19.39 +#define BLKTAP2_RING_MESSAGE_RESUME    2
   19.40 +#define BLKTAP2_RING_MESSAGE_CLOSE     3
   19.41 +
   19.42 +#define BLKTAP2_IOCTL_KICK_FE          1
   19.43 +#define BLKTAP2_IOCTL_ALLOC_TAP        200
   19.44 +#define BLKTAP2_IOCTL_FREE_TAP         201
   19.45 +#define BLKTAP2_IOCTL_CREATE_DEVICE    202
   19.46 +#define BLKTAP2_IOCTL_SET_PARAMS       203
   19.47 +#define BLKTAP2_IOCTL_PAUSE            204
   19.48 +#define BLKTAP2_IOCTL_REOPEN           205
   19.49 +#define BLKTAP2_IOCTL_RESUME           206
   19.50 +
   19.51 +#define BLKTAP2_CONTROL_NAME           "blktap-control"
   19.52 +#define BLKTAP2_DIRECTORY              "/dev/xen/blktap-2"
   19.53 +#define BLKTAP2_CONTROL_DEVICE         BLKTAP2_DIRECTORY"/control"
   19.54 +#define BLKTAP2_RING_DEVICE            BLKTAP2_DIRECTORY"/blktap"
   19.55 +#define BLKTAP2_IO_DEVICE              BLKTAP2_DIRECTORY"/tapdev"
   19.56 +
   19.57 +struct blktap2_handle {
   19.58 +	unsigned int                   ring;
   19.59 +	unsigned int                   device;
   19.60 +	unsigned int                   minor;
   19.61 +};
   19.62 +
   19.63 +struct blktap2_params {
   19.64 +	char                           name[BLKTAP2_MAX_MESSAGE_LEN];
   19.65 +	unsigned long long             capacity;
   19.66 +	unsigned long                  sector_size;
   19.67 +};
   19.68 +
   19.69 +#endif
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/tools/blktap2/drivers/block-aio.c	Tue May 26 11:52:31 2009 +0100
    20.3 @@ -0,0 +1,272 @@
    20.4 +/* 
    20.5 + * Copyright (c) 2007, XenSource Inc.
    20.6 + * All rights reserved.
    20.7 + *
    20.8 + * Redistribution and use in source and binary forms, with or without
    20.9 + * modification, are permitted provided that the following conditions are met:
   20.10 + *     * Redistributions of source code must retain the above copyright
   20.11 + *       notice, this list of conditions and the following disclaimer.
   20.12 + *     * Redistributions in binary form must reproduce the above copyright
   20.13 + *       notice, this list of conditions and the following disclaimer in the
   20.14 + *       documentation and/or other materials provided with the distribution.
   20.15 + *     * Neither the name of XenSource Inc. nor the names of its contributors
   20.16 + *       may be used to endorse or promote products derived from this software
   20.17 + *       without specific prior written permission.
   20.18 + *
   20.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   20.20 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20.21 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   20.22 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   20.23 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   20.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   20.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   20.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   20.27 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   20.28 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   20.29 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   20.30 + */
   20.31 +
   20.32 +
   20.33 +#include <errno.h>
   20.34 +#include <libaio.h>
   20.35 +#include <fcntl.h>
   20.36 +#include <stdio.h>
   20.37 +#include <stdlib.h>
   20.38 +#include <unistd.h>
   20.39 +#include <sys/statvfs.h>
   20.40 +#include <sys/stat.h>
   20.41 +#include <sys/ioctl.h>
   20.42 +#include <linux/fs.h>
   20.43 +
   20.44 +#include "tapdisk.h"
   20.45 +#include "tapdisk-driver.h"
   20.46 +#include "tapdisk-interface.h"
   20.47 +
   20.48 +#define MAX_AIO_REQS         TAPDISK_DATA_REQUESTS
   20.49 +
   20.50 +struct tdaio_state;
   20.51 +
   20.52 +struct aio_request {
   20.53 +	td_request_t         treq;
   20.54 +	struct tiocb         tiocb;
   20.55 +	struct tdaio_state  *state;
   20.56 +};
   20.57 +
   20.58 +struct tdaio_state {
   20.59 +	int                  fd;
   20.60 +	td_driver_t         *driver;
   20.61 +
   20.62 +	int                  aio_free_count;	
   20.63 +	struct aio_request   aio_requests[MAX_AIO_REQS];
   20.64 +	struct aio_request  *aio_free_list[MAX_AIO_REQS];
   20.65 +};
   20.66 +
   20.67 +/*Get Image size, secsize*/
   20.68 +static int tdaio_get_image_info(int fd, td_disk_info_t *info)
   20.69 +{
   20.70 +	int ret;
   20.71 +	long size;
   20.72 +	unsigned long total_size;
   20.73 +	struct statvfs statBuf;
   20.74 +	struct stat stat;
   20.75 +
   20.76 +	ret = fstat(fd, &stat);
   20.77 +	if (ret != 0) {
   20.78 +		DPRINTF("ERROR: fstat failed, Couldn't stat image");
   20.79 +		return -EINVAL;
   20.80 +	}
   20.81 +
   20.82 +	if (S_ISBLK(stat.st_mode)) {
   20.83 +		/*Accessing block device directly*/
   20.84 +		info->size = 0;
   20.85 +		if (ioctl(fd,BLKGETSIZE,&info->size)!=0) {
   20.86 +			DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
   20.87 +			return -EINVAL;
   20.88 +		}
   20.89 +
   20.90 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
   20.91 +			"sector_shift [%llu]\n",
   20.92 +			(long long unsigned)(info->size << SECTOR_SHIFT),
   20.93 +			(long long unsigned)info->size);
   20.94 +
   20.95 +		/*Get the sector size*/
   20.96 +#if defined(BLKSSZGET)
   20.97 +		{
   20.98 +			int arg;
   20.99 +			info->sector_size = DEFAULT_SECTOR_SIZE;
  20.100 +			ioctl(fd, BLKSSZGET, &info->sector_size);
  20.101 +			
  20.102 +			if (info->sector_size != DEFAULT_SECTOR_SIZE)
  20.103 +				DPRINTF("Note: sector size is %ld (not %d)\n",
  20.104 +					info->sector_size, DEFAULT_SECTOR_SIZE);
  20.105 +		}
  20.106 +#else
  20.107 +		info->sector_size = DEFAULT_SECTOR_SIZE;
  20.108 +#endif
  20.109 +
  20.110 +	} else {
  20.111 +		/*Local file? try fstat instead*/
  20.112 +		info->size = (stat.st_size >> SECTOR_SHIFT);
  20.113 +		info->sector_size = DEFAULT_SECTOR_SIZE;
  20.114 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
  20.115 +			"sector_shift [%llu]\n",
  20.116 +			(long long unsigned)(info->size << SECTOR_SHIFT),
  20.117 +			(long long unsigned)info->size);
  20.118 +	}
  20.119 +
  20.120 +	if (info->size == 0) {		
  20.121 +		info->size =((uint64_t) 16836057);
  20.122 +		info->sector_size = DEFAULT_SECTOR_SIZE;
  20.123 +	}
  20.124 +	info->info = 0;
  20.125 +
  20.126 +	return 0;
  20.127 +}
  20.128 +
  20.129 +/* Open the disk file and initialize aio state. */
  20.130 +int tdaio_open(td_driver_t *driver, const char *name, td_flag_t flags)
  20.131 +{
  20.132 +	int i, fd, ret, o_flags;
  20.133 +	struct tdaio_state *prv;
  20.134 +
  20.135 +	ret = 0;
  20.136 +	prv = (struct tdaio_state *)driver->data;
  20.137 +
  20.138 +	DPRINTF("block-aio open('%s')", name);
  20.139 +
  20.140 +	memset(prv, 0, sizeof(struct tdaio_state));
  20.141 +
  20.142 +	prv->aio_free_count = MAX_AIO_REQS;
  20.143 +	for (i = 0; i < MAX_AIO_REQS; i++)
  20.144 +		prv->aio_free_list[i] = &prv->aio_requests[i];
  20.145 +
  20.146 +	/* Open the file */
  20.147 +	o_flags = O_DIRECT | O_LARGEFILE | 
  20.148 +		((flags & TD_OPEN_RDONLY) ? O_RDONLY : O_RDWR);
  20.149 +        fd = open(name, o_flags);
  20.150 +
  20.151 +        if ( (fd == -1) && (errno == EINVAL) ) {
  20.152 +
  20.153 +                /* Maybe O_DIRECT isn't supported. */
  20.154 +		o_flags &= ~O_DIRECT;
  20.155 +                fd = open(name, o_flags);
  20.156 +                if (fd != -1) DPRINTF("WARNING: Accessing image without"
  20.157 +                                     "O_DIRECT! (%s)\n", name);
  20.158 +
  20.159 +        } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
  20.160 +	
  20.161 +        if (fd == -1) {
  20.162 +		DPRINTF("Unable to open [%s] (%d)!\n", name, 0 - errno);
  20.163 +        	ret = 0 - errno;
  20.164 +        	goto done;
  20.165 +        }
  20.166 +
  20.167 +	ret = tdaio_get_image_info(fd, &driver->info);
  20.168 +	if (ret) {
  20.169 +		close(fd);
  20.170 +		goto done;
  20.171 +	}
  20.172 +
  20.173 +        prv->fd = fd;
  20.174 +
  20.175 +done:
  20.176 +	return ret;	
  20.177 +}
  20.178 +
  20.179 +void tdaio_complete(void *arg, struct tiocb *tiocb, int err)
  20.180 +{
  20.181 +	struct aio_request *aio = (struct aio_request *)arg;
  20.182 +	struct tdaio_state *prv = aio->state;
  20.183 +
  20.184 +	td_complete_request(aio->treq, err);
  20.185 +	prv->aio_free_list[prv->aio_free_count++] = aio;
  20.186 +}
  20.187 +
  20.188 +void tdaio_queue_read(td_driver_t *driver, td_request_t treq)
  20.189 +{
  20.190 +	int size;
  20.191 +	uint64_t offset;
  20.192 +	struct aio_request *aio;
  20.193 +	struct tdaio_state *prv;
  20.194 +
  20.195 +	prv    = (struct tdaio_state *)driver->data;
  20.196 +	size   = treq.secs * driver->info.sector_size;
  20.197 +	offset = treq.sec  * (uint64_t)driver->info.sector_size;
  20.198 +
  20.199 +	if (prv->aio_free_count == 0)
  20.200 +		goto fail;
  20.201 +
  20.202 +	aio        = prv->aio_free_list[--prv->aio_free_count];
  20.203 +	aio->treq  = treq;
  20.204 +	aio->state = prv;
  20.205 +
  20.206 +	td_prep_read(&aio->tiocb, prv->fd, treq.buf,
  20.207 +		     size, offset, tdaio_complete, aio);
  20.208 +	td_queue_tiocb(driver, &aio->tiocb);
  20.209 +
  20.210 +	return;
  20.211 +
  20.212 +fail:
  20.213 +	td_complete_request(treq, -EBUSY);
  20.214 +}
  20.215 +
  20.216 +void tdaio_queue_write(td_driver_t *driver, td_request_t treq)
  20.217 +{
  20.218 +	int size;
  20.219 +	uint64_t offset;
  20.220 +	struct aio_request *aio;
  20.221 +	struct tdaio_state *prv;
  20.222 +
  20.223 +	prv     = (struct tdaio_state *)driver->data;
  20.224 +	size    = treq.secs * driver->info.sector_size;
  20.225 +	offset  = treq.sec  * (uint64_t)driver->info.sector_size;
  20.226 +
  20.227 +	if (prv->aio_free_count == 0)
  20.228 +		goto fail;
  20.229 +
  20.230 +	aio        = prv->aio_free_list[--prv->aio_free_count];
  20.231 +	aio->treq  = treq;
  20.232 +	aio->state = prv;
  20.233 +
  20.234 +	td_prep_write(&aio->tiocb, prv->fd, treq.buf,
  20.235 +		      size, offset, tdaio_complete, aio);
  20.236 +	td_queue_tiocb(driver, &aio->tiocb);
  20.237 +
  20.238 +	return;
  20.239 +
  20.240 +fail:
  20.241 +	td_complete_request(treq, -EBUSY);
  20.242 +}
  20.243 +
  20.244 +int tdaio_close(td_driver_t *driver)
  20.245 +{
  20.246 +	struct tdaio_state *prv = (struct tdaio_state *)driver->data;
  20.247 +	
  20.248 +	close(prv->fd);
  20.249 +
  20.250 +	return 0;
  20.251 +}
  20.252 +
  20.253 +int tdaio_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
  20.254 +{
  20.255 +	return TD_NO_PARENT;
  20.256 +}
  20.257 +
  20.258 +int tdaio_validate_parent(td_driver_t *driver,
  20.259 +			  td_driver_t *pdriver, td_flag_t flags)
  20.260 +{
  20.261 +	return -EINVAL;
  20.262 +}
  20.263 +
  20.264 +struct tap_disk tapdisk_aio = {
  20.265 +	.disk_type          = "tapdisk_aio",
  20.266 +	.flags              = 0,
  20.267 +	.private_data_size  = sizeof(struct tdaio_state),
  20.268 +	.td_open            = tdaio_open,
  20.269 +	.td_close           = tdaio_close,
  20.270 +	.td_queue_read      = tdaio_queue_read,
  20.271 +	.td_queue_write     = tdaio_queue_write,
  20.272 +	.td_get_parent_id   = tdaio_get_parent_id,
  20.273 +	.td_validate_parent = tdaio_validate_parent,
  20.274 +	.td_debug           = NULL,
  20.275 +};
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/tools/blktap2/drivers/block-cache.c	Tue May 26 11:52:31 2009 +0100
    21.3 @@ -0,0 +1,787 @@
    21.4 +/* 
    21.5 + * Copyright (c) 2008, XenSource Inc.
    21.6 + * All rights reserved.
    21.7 + *
    21.8 + * Redistribution and use in source and binary forms, with or without
    21.9 + * modification, are permitted provided that the following conditions are met:
   21.10 + *     * Redistributions of source code must retain the above copyright
   21.11 + *       notice, this list of conditions and the following disclaimer.
   21.12 + *     * Redistributions in binary form must reproduce the above copyright
   21.13 + *       notice, this list of conditions and the following disclaimer in the
   21.14 + *       documentation and/or other materials provided with the distribution.
   21.15 + *     * Neither the name of XenSource Inc. nor the names of its contributors
   21.16 + *       may be used to endorse or promote products derived from this software
   21.17 + *       without specific prior written permission.
   21.18 + *
   21.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   21.20 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   21.21 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21.22 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   21.23 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   21.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   21.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   21.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   21.27 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   21.28 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   21.29 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   21.30 + */
   21.31 +#include <errno.h>
   21.32 +#include <fcntl.h>
   21.33 +#include <unistd.h>
   21.34 +#include <stdlib.h>
   21.35 +#include <sys/mman.h>
   21.36 +
   21.37 +#include "tapdisk.h"
   21.38 +#include "tapdisk-utils.h"
   21.39 +#include "tapdisk-driver.h"
   21.40 +#include "tapdisk-server.h"
   21.41 +#include "tapdisk-interface.h"
   21.42 +
   21.43 +#ifdef DEBUG
   21.44 +#define DBG(_f, _a...) tlog_write(TLOG_DBG, _f, ##_a)
   21.45 +#else
   21.46 +#define DBG(_f, _a...) ((void)0)
   21.47 +#endif
   21.48 +
   21.49 +#define WARN(_f, _a...) tlog_write(TLOG_WARN, _f, ##_a)
   21.50 +
   21.51 +#define RADIX_TREE_PAGE_SHIFT           12 /* 4K pages */
   21.52 +#define RADIX_TREE_PAGE_SIZE            (1 << RADIX_TREE_PAGE_SHIFT)
   21.53 +
   21.54 +#define RADIX_TREE_NODE_SHIFT           9 /* 512B nodes */
   21.55 +#define RADIX_TREE_NODE_SIZE            (1 << RADIX_TREE_NODE_SHIFT)
   21.56 +#define RADIX_TREE_NODE_MASK            (RADIX_TREE_NODE_SIZE - 1)
   21.57 +
   21.58 +#define BLOCK_CACHE_NODES_PER_PAGE      (1 << (RADIX_TREE_PAGE_SHIFT - RADIX_TREE_NODE_SHIFT))
   21.59 +
   21.60 +#define BLOCK_CACHE_MAX_SIZE            (10 << 20) /* 100MB cache */
   21.61 +#define BLOCK_CACHE_REQUESTS            (TAPDISK_DATA_REQUESTS << 3)
   21.62 +#define BLOCK_CACHE_PAGE_IDLETIME       60
   21.63 +
   21.64 +typedef struct radix_tree               radix_tree_t;
   21.65 +typedef struct radix_tree_node          radix_tree_node_t;
   21.66 +typedef struct radix_tree_link          radix_tree_link_t;
   21.67 +typedef struct radix_tree_leaf          radix_tree_leaf_t;
   21.68 +typedef struct radix_tree_page          radix_tree_page_t;
   21.69 +
   21.70 +typedef struct block_cache              block_cache_t;
   21.71 +typedef struct block_cache_request      block_cache_request_t;
   21.72 +typedef struct block_cache_stats        block_cache_stats_t;
   21.73 +
   21.74 +struct radix_tree_page {
   21.75 +	char                           *buf;
   21.76 +	size_t                          size;
   21.77 +	uint64_t                        sec;
   21.78 +	radix_tree_link_t              *owners[BLOCK_CACHE_NODES_PER_PAGE];
   21.79 +};
   21.80 +
   21.81 +struct radix_tree_leaf {
   21.82 +	radix_tree_page_t              *page;
   21.83 +	char                           *buf;
   21.84 +};
   21.85 +
   21.86 +struct radix_tree_link {
   21.87 +	uint32_t                        time;
   21.88 +	union {
   21.89 +		radix_tree_node_t      *next;
   21.90 +		radix_tree_leaf_t       leaf;
   21.91 +	} u;
   21.92 +};
   21.93 +
   21.94 +struct radix_tree_node {
   21.95 +	int                             height;
   21.96 +	radix_tree_link_t               links[RADIX_TREE_NODE_SIZE];
   21.97 +};
   21.98 +
   21.99 +struct radix_tree {
  21.100 +	int                             height;
  21.101 +	uint64_t                        size;
  21.102 +	uint32_t                        nodes;
  21.103 +	radix_tree_node_t              *root;
  21.104 +
  21.105 +	block_cache_t                  *cache;
  21.106 +};
  21.107 +
  21.108 +struct block_cache_request {
  21.109 +	int                             err;
  21.110 +	char                           *buf;
  21.111 +	uint64_t                        secs;
  21.112 +	td_request_t                    treq;
  21.113 +	block_cache_t                  *cache;
  21.114 +};
  21.115 +
  21.116 +struct block_cache_stats {
  21.117 +	uint64_t                        reads;
  21.118 +	uint64_t                        hits;
  21.119 +	uint64_t                        misses;
  21.120 +	uint64_t                        prunes;
  21.121 +};
  21.122 +
  21.123 +struct block_cache {
  21.124 +	int                             ptype;
  21.125 +	char                           *name;
  21.126 +
  21.127 +	uint64_t                        sectors;
  21.128 +
  21.129 +	block_cache_request_t           requests[BLOCK_CACHE_REQUESTS];
  21.130 +	block_cache_request_t          *request_free_list[BLOCK_CACHE_REQUESTS];
  21.131 +	int                             requests_free;
  21.132 +
  21.133 +	event_id_t                      timeout_id;
  21.134 +
  21.135 +	radix_tree_t                    tree;
  21.136 +
  21.137 +	block_cache_stats_t             stats;
  21.138 +};
  21.139 +
  21.140 +static inline uint64_t
  21.141 +radix_tree_calculate_size(int height)
  21.142 +{
  21.143 +	return (uint64_t)RADIX_TREE_NODE_SIZE <<
  21.144 +	  (height * RADIX_TREE_NODE_SHIFT);
  21.145 +}
  21.146 +
  21.147 +static inline int
  21.148 +radix_tree_calculate_height(uint64_t sectors)
  21.149 +{
  21.150 +	int height;
  21.151 +	uint64_t tree_size;
  21.152 +
  21.153 +	height = 1;  /* always allocate root node */
  21.154 +	tree_size = radix_tree_calculate_size(height);
  21.155 +	while (sectors > tree_size)
  21.156 +		tree_size = radix_tree_calculate_size(++height);
  21.157 +
  21.158 +	return height;
  21.159 +}
  21.160 +
  21.161 +static inline int
  21.162 +radix_tree_index(radix_tree_node_t *node, uint64_t sector)
  21.163 +{
  21.164 +	return ((sector >> (node->height * RADIX_TREE_NODE_SHIFT)) &
  21.165 +		RADIX_TREE_NODE_MASK);
  21.166 +}
  21.167 +
  21.168 +static inline int
  21.169 +radix_tree_node_contains_leaves(radix_tree_t *tree, radix_tree_node_t *node)
  21.170 +{
  21.171 +	return (node->height == 0);
  21.172 +}
  21.173 +
  21.174 +static inline int
  21.175 +radix_tree_node_is_root(radix_tree_t *tree, radix_tree_node_t *node)
  21.176 +{
  21.177 +	return (node->height == tree->height);
  21.178 +}
  21.179 +
  21.180 +static inline uint64_t
  21.181 +radix_tree_size(radix_tree_t *tree)
  21.182 +{
  21.183 +	return tree->size + tree->nodes * sizeof(radix_tree_node_t);
  21.184 +}
  21.185 +
  21.186 +static inline void
  21.187 +radix_tree_clear_link(radix_tree_link_t *link)
  21.188 +{
  21.189 +	if (link)
  21.190 +		memset(link, 0, sizeof(radix_tree_link_t));
  21.191 +}
  21.192 +
  21.193 +static inline radix_tree_node_t *
  21.194 +radix_tree_allocate_node(radix_tree_t *tree, int height)
  21.195 +{
  21.196 +	radix_tree_node_t *node;
  21.197 +
  21.198 +	node = calloc(1, sizeof(radix_tree_node_t));
  21.199 +	if (!node)
  21.200 +		return NULL;
  21.201 +
  21.202 +	node->height = height;
  21.203 +	tree->nodes++;
  21.204 +
  21.205 +	return node;
  21.206 +}
  21.207 +
  21.208 +static inline radix_tree_node_t *
  21.209 +radix_tree_allocate_child_node(radix_tree_t *tree, radix_tree_node_t *parent)
  21.210 +{
  21.211 +	return radix_tree_allocate_node(tree, parent->height - 1);
  21.212 +}
  21.213 +
  21.214 +void
  21.215 +radix_tree_free_node(radix_tree_t *tree, radix_tree_node_t *node)
  21.216 +{
  21.217 +	if (!node)
  21.218 +		return;
  21.219 +
  21.220 +	free(node);
  21.221 +	tree->nodes--;
  21.222 +}
  21.223 +
  21.224 +static inline radix_tree_page_t *
  21.225 +radix_tree_allocate_page(radix_tree_t *tree,
  21.226 +			 char *buf, uint64_t sec, size_t size)
  21.227 +{
  21.228 +	radix_tree_page_t *page;
  21.229 +
  21.230 +	page = calloc(1, sizeof(radix_tree_page_t));
  21.231 +	if (!page)
  21.232 +		return NULL;
  21.233 +
  21.234 +	page->buf   = buf;
  21.235 +	page->sec   = sec;
  21.236 +	page->size  = size;
  21.237 +	tree->size += size;
  21.238 +
  21.239 +	return page;
  21.240 +}
  21.241 +
  21.242 +static inline void
  21.243 +radix_tree_free_page(radix_tree_t *tree, radix_tree_page_t *page)
  21.244 +{
  21.245 +	int i;
  21.246 +
  21.247 +	for (i = 0; i < page->size >> RADIX_TREE_NODE_SHIFT; i++)
  21.248 +		DBG("%s: ejecting sector 0x%llx\n",
  21.249 +		    tree->cache->name, page->sec + i);
  21.250 +
  21.251 +	tree->cache->stats.prunes += (page->size >> RADIX_TREE_NODE_SHIFT);
  21.252 +	tree->size -= page->size;
  21.253 +	free(page->buf);
  21.254 +	free(page);
  21.255 +}
  21.256 +
  21.257 +/*
  21.258 + * remove a leaf and the shared radix_tree_page_t containing its buffer.
  21.259 + * leaves are deleted, nodes are not; gc will reap the nodes later.
  21.260 + */
  21.261 +static void
  21.262 +radix_tree_remove_page(radix_tree_t *tree, radix_tree_page_t *page)
  21.263 +{
  21.264 +	int i;
  21.265 +
  21.266 +	if (!page)
  21.267 +		return;
  21.268 +
  21.269 +	for (i = 0; i < BLOCK_CACHE_NODES_PER_PAGE; i++)
  21.270 +		radix_tree_clear_link(page->owners[i]);
  21.271 +
  21.272 +	radix_tree_free_page(tree, page);
  21.273 +}
  21.274 +
  21.275 +static void
  21.276 +radix_tree_insert_leaf(radix_tree_t *tree, radix_tree_link_t *link,
  21.277 +		       radix_tree_page_t *page, off_t off)
  21.278 +{
  21.279 +	int i;
  21.280 +
  21.281 +	if (off + RADIX_TREE_NODE_SIZE > page->size)
  21.282 +		return;
  21.283 +
  21.284 +	for (i = 0; i < BLOCK_CACHE_NODES_PER_PAGE; i++) {
  21.285 +		if (page->owners[i])
  21.286 +			continue;
  21.287 +
  21.288 +		page->owners[i]   = link;
  21.289 +		link->u.leaf.page = page;
  21.290 +		link->u.leaf.buf  = page->buf + off;
  21.291 +
  21.292 +		break;
  21.293 +	}
  21.294 +}
  21.295 +
  21.296 +static char *
  21.297 +radix_tree_find_leaf(radix_tree_t *tree, uint64_t sector)
  21.298 +{
  21.299 +	int idx;
  21.300 +	struct timeval now;
  21.301 +	radix_tree_link_t *link;
  21.302 +	radix_tree_node_t *node;
  21.303 +
  21.304 +	node = tree->root;
  21.305 +	gettimeofday(&now, NULL);
  21.306 +
  21.307 +	do {
  21.308 +		idx        = radix_tree_index(node, sector);
  21.309 +		link       = node->links + idx;
  21.310 +		link->time = now.tv_sec;
  21.311 +
  21.312 +		if (radix_tree_node_contains_leaves(tree, node))
  21.313 +			return link->u.leaf.buf;
  21.314 +
  21.315 +		if (!link->u.next)
  21.316 +			return NULL;
  21.317 +
  21.318 +		node = link->u.next;
  21.319 +	} while (1);
  21.320 +}
  21.321 +
  21.322 +static char *
  21.323 +radix_tree_add_leaf(radix_tree_t *tree, uint64_t sector,
  21.324 +		    radix_tree_page_t *page, off_t off)
  21.325 +{
  21.326 +	int idx;
  21.327 +	struct timeval now;
  21.328 +	radix_tree_link_t *link;
  21.329 +	radix_tree_node_t *node;
  21.330 +
  21.331 +	node = tree->root;
  21.332 +	gettimeofday(&now, NULL);
  21.333 +
  21.334 +	do {
  21.335 +		idx        = radix_tree_index(node, sector);
  21.336 +		link       = node->links + idx;
  21.337 +		link->time = now.tv_sec;
  21.338 +
  21.339 +		if (radix_tree_node_contains_leaves(tree, node)) {
  21.340 +			radix_tree_remove_page(tree, link->u.leaf.page);
  21.341 +			radix_tree_insert_leaf(tree, link, page, off);
  21.342 +			return link->u.leaf.buf;
  21.343 +		}
  21.344 +
  21.345 +		if (!link->u.next) {
  21.346 +			link->u.next = radix_tree_allocate_child_node(tree,
  21.347 +								      node);
  21.348 +			if (!link->u.next)
  21.349 +				return NULL;
  21.350 +		}
  21.351 +
  21.352 +		node = link->u.next;
  21.353 +	} while (1);
  21.354 +}
  21.355 +
  21.356 +static int
  21.357 +radix_tree_add_leaves(radix_tree_t *tree, char *buf,
  21.358 +		      uint64_t sector, uint64_t sectors)
  21.359 +{
  21.360 +	int i;
  21.361 +	radix_tree_page_t *page;
  21.362 +
  21.363 +	page = radix_tree_allocate_page(tree, buf, sector,
  21.364 +					sectors << RADIX_TREE_NODE_SHIFT);
  21.365 +	if (!page)
  21.366 +		return -ENOMEM;
  21.367 +
  21.368 +	for (i = 0; i < sectors; i++)
  21.369 +		if (!radix_tree_add_leaf(tree, sector + i, 
  21.370 +					 page, (i << RADIX_TREE_NODE_SHIFT)))
  21.371 +			goto fail;
  21.372 +
  21.373 +	return 0;
  21.374 +
  21.375 +fail:
  21.376 +	page->buf = NULL;
  21.377 +	radix_tree_remove_page(tree, page);
  21.378 +	return -ENOMEM;
  21.379 +}
  21.380 +
  21.381 +static void
  21.382 +radix_tree_delete_branch(radix_tree_t *tree, radix_tree_node_t *node)
  21.383 +{
  21.384 +	int i;
  21.385 +	radix_tree_link_t *link;
  21.386 +
  21.387 +	if (!node)
  21.388 +		return;
  21.389 +
  21.390 +	for (i = 0; i < RADIX_TREE_NODE_SIZE; i++) {
  21.391 +		link = node->links + i;
  21.392 +
  21.393 +		if (radix_tree_node_contains_leaves(tree, node))
  21.394 +			radix_tree_remove_page(tree, link->u.leaf.page);
  21.395 +		else
  21.396 +			radix_tree_delete_branch(tree, link->u.next);
  21.397 +
  21.398 +		radix_tree_clear_link(link);
  21.399 +	}
  21.400 +
  21.401 +	radix_tree_free_node(tree, node);
  21.402 +}
  21.403 +
  21.404 +static inline void
  21.405 +radix_tree_destroy(radix_tree_t *tree)
  21.406 +{
  21.407 +	radix_tree_delete_branch(tree, tree->root);
  21.408 +	tree->root = NULL;
  21.409 +}
  21.410 +
  21.411 +/*
  21.412 + * returns 1 if @node is empty after pruning, 0 otherwise
  21.413 + */
  21.414 +static int
  21.415 +radix_tree_prune_branch(radix_tree_t *tree,
  21.416 +			radix_tree_node_t *node, uint32_t now)
  21.417 +{
  21.418 +	int i, empty;
  21.419 +	radix_tree_link_t *link;
  21.420 +
  21.421 +	empty = 1;
  21.422 +	if (!node)
  21.423 +		return empty;
  21.424 +
  21.425 +	for (i = 0; i < RADIX_TREE_NODE_SIZE; i++) {
  21.426 +		link = node->links + i;
  21.427 +
  21.428 +		if (now - link->time < BLOCK_CACHE_PAGE_IDLETIME) {
  21.429 +			if (radix_tree_node_contains_leaves(tree, node)) {
  21.430 +				empty = 0;
  21.431 +				continue;
  21.432 +			}
  21.433 +
  21.434 +			if (radix_tree_prune_branch(tree, link->u.next, now))
  21.435 +				radix_tree_clear_link(link);
  21.436 +			else
  21.437 +				empty = 0;
  21.438 +
  21.439 +			continue;
  21.440 +		}
  21.441 +
  21.442 +		if (radix_tree_node_contains_leaves(tree, node))
  21.443 +			radix_tree_remove_page(tree, link->u.leaf.page);
  21.444 +		else
  21.445 +			radix_tree_delete_branch(tree, link->u.next);
  21.446 +
  21.447 +		radix_tree_clear_link(link);
  21.448 +	}
  21.449 +
  21.450 +	if (empty && !radix_tree_node_is_root(tree, node))
  21.451 +		radix_tree_free_node(tree, node);
  21.452 +
  21.453 +	return empty;
  21.454 +}
  21.455 +
  21.456 +/*
  21.457 + * walk tree and free any node that has been idle for too long
  21.458 + */
  21.459 +static void
  21.460 +radix_tree_prune(radix_tree_t *tree)
  21.461 +{
  21.462 +	struct timeval now;
  21.463 +
  21.464 +	if (!tree->root)
  21.465 +		return;
  21.466 +
  21.467 +	DPRINTF("tree %s has %"PRIu64" bytes\n",
  21.468 +		tree->cache->name, tree->size);
  21.469 +
  21.470 +	gettimeofday(&now, NULL);
  21.471 +	radix_tree_prune_branch(tree, tree->root, now.tv_sec);
  21.472 +
  21.473 +	DPRINTF("tree %s now has %"PRIu64" bytes\n",
  21.474 +		tree->cache->name, tree->size);
  21.475 +}
  21.476 +
  21.477 +static inline int
  21.478 +radix_tree_initialize(radix_tree_t *tree, uint64_t sectors)
  21.479 +{
  21.480 +	tree->height = radix_tree_calculate_height(sectors);
  21.481 +	tree->root   = radix_tree_allocate_node(tree, tree->height);
  21.482 +	if (!tree->root)
  21.483 +		return -ENOMEM;
  21.484 +
  21.485 +	return 0;
  21.486 +}
  21.487 +
  21.488 +static inline void
  21.489 +radix_tree_free(radix_tree_t *tree)
  21.490 +{
  21.491 +	radix_tree_destroy(tree);
  21.492 +}
  21.493 +
  21.494 +static void
  21.495 +block_cache_prune_event(event_id_t id, char mode, void *private)
  21.496 +{
  21.497 +	radix_tree_t *tree;
  21.498 +	block_cache_t *cache;
  21.499 +
  21.500 +	cache = (block_cache_t *)private;
  21.501 +	tree  = &cache->tree;
  21.502 +
  21.503 +	radix_tree_prune(tree);
  21.504 +}
  21.505 +
  21.506 +static inline block_cache_request_t *
  21.507 +block_cache_get_request(block_cache_t *cache)
  21.508 +{
  21.509 +	if (!cache->requests_free)
  21.510 +		return NULL;
  21.511 +
  21.512 +	return cache->request_free_list[--cache->requests_free];
  21.513 +}
  21.514 +
  21.515 +static inline void
  21.516 +block_cache_put_request(block_cache_t *cache, block_cache_request_t *breq)
  21.517 +{
  21.518 +	memset(breq, 0, sizeof(block_cache_request_t));
  21.519 +	cache->request_free_list[cache->requests_free++] = breq;
  21.520 +}
  21.521 +
  21.522 +static int
  21.523 +block_cache_open(td_driver_t *driver, const char *name, td_flag_t flags)
  21.524 +{
  21.525 +	int i, err;
  21.526 +	radix_tree_t *tree;
  21.527 +	block_cache_t *cache;
  21.528 +
  21.529 +	if (!td_flag_test(flags, TD_OPEN_RDONLY))
  21.530 +		return -EINVAL;
  21.531 +
  21.532 +	if (driver->info.sector_size != RADIX_TREE_NODE_SIZE)
  21.533 +		return -EINVAL;
  21.534 +
  21.535 +	cache = (block_cache_t *)driver->data;
  21.536 +	err   = tapdisk_namedup(&cache->name, (char *)name);
  21.537 +	if (err)
  21.538 +		return -ENOMEM;
  21.539 +
  21.540 +	cache->sectors = driver->info.size;
  21.541 +
  21.542 +	tree = &cache->tree;
  21.543 +	err  = radix_tree_initialize(tree, cache->sectors);
  21.544 +	if (err)
  21.545 +		goto fail;
  21.546 +
  21.547 +	tree->cache = cache;
  21.548 +	cache->requests_free = BLOCK_CACHE_REQUESTS;
  21.549 +	for (i = 0; i < BLOCK_CACHE_REQUESTS; i++)
  21.550 +		cache->request_free_list[i] = cache->requests + i;
  21.551 +
  21.552 +	cache->timeout_id = tapdisk_server_register_event(SCHEDULER_POLL_TIMEOUT,
  21.553 +							  -1, /* dummy fd */
  21.554 +							  BLOCK_CACHE_PAGE_IDLETIME << 1,
  21.555 +							  block_cache_prune_event,
  21.556 +							  cache);
  21.557 +	if (cache->timeout_id < 0)
  21.558 +		goto fail;
  21.559 +
  21.560 +	DPRINTF("opening cache for %s, sectors: %"PRIu64", "
  21.561 +		"tree: %p, height: %d\n",
  21.562 +		cache->name, cache->sectors, tree, tree->height);
  21.563 +
  21.564 +	if (mlockall(MCL_CURRENT | MCL_FUTURE))
  21.565 +		DPRINTF("mlockall failed: %d\n", -errno);
  21.566 +
  21.567 +	return 0;
  21.568 +
  21.569 +fail:
  21.570 +	free(cache->name);
  21.571 +	radix_tree_free(&cache->tree);
  21.572 +	return err;
  21.573 +}
  21.574 +
  21.575 +static int
  21.576 +block_cache_close(td_driver_t *driver)
  21.577 +{
  21.578 +	radix_tree_t *tree;
  21.579 +	block_cache_t *cache;
  21.580 +
  21.581 +	cache = (block_cache_t *)driver->data;
  21.582 +	tree  = &cache->tree;
  21.583 +
  21.584 +	DPRINTF("closing cache for %s\n", cache->name);
  21.585 +
  21.586 +	tapdisk_server_unregister_event(cache->timeout_id);
  21.587 +	radix_tree_free(tree);
  21.588 +	free(cache->name);
  21.589 +
  21.590 +	return 0;
  21.591 +}
  21.592 +
  21.593 +static inline uint64_t
  21.594 +block_cache_hash(block_cache_t *cache, char *buf)
  21.595 +{
  21.596 +	int i, n;
  21.597 +	uint64_t cksm, *data;
  21.598 +
  21.599 +	return 0;
  21.600 +
  21.601 +	cksm = 0;
  21.602 +	data = (uint64_t *)buf;
  21.603 +	n    = RADIX_TREE_NODE_SIZE / sizeof(uint64_t);
  21.604 +
  21.605 +	for (i = 0; i < n; i++)
  21.606 +		cksm += data[i];
  21.607 +
  21.608 +	return ~cksm;
  21.609 +}
  21.610 +
  21.611 +static void
  21.612 +block_cache_hit(block_cache_t *cache, td_request_t treq, char *iov[])
  21.613 +{
  21.614 +	int i;
  21.615 +	off_t off;
  21.616 +
  21.617 +	cache->stats.hits += treq.secs;
  21.618 +
  21.619 +	for (i = 0; i < treq.secs; i++) {
  21.620 +		DBG("%s: block cache hit: sec 0x%08llx, hash: 0x%08llx\n",
  21.621 +		    cache->name, treq.sec + i, block_cache_hash(cache, iov[i]));
  21.622 +
  21.623 +		off = i << RADIX_TREE_NODE_SHIFT;
  21.624 +		memcpy(treq.buf + off, iov[i], RADIX_TREE_NODE_SIZE);
  21.625 +	}
  21.626 +
  21.627 +	td_complete_request(treq, 0);
  21.628 +}
  21.629 +
  21.630 +static void
  21.631 +block_cache_populate_cache(td_request_t clone, int err)
  21.632 +{
  21.633 +	int i;
  21.634 +	radix_tree_t *tree;
  21.635 +	block_cache_t *cache;
  21.636 +	block_cache_request_t *breq;
  21.637 +
  21.638 +	breq        = (block_cache_request_t *)clone.cb_data;
  21.639 +	cache       = breq->cache;
  21.640 +	tree        = &cache->tree;
  21.641 +	breq->secs -= clone.secs;
  21.642 +	breq->err   = (breq->err ? breq->err : err);
  21.643 +
  21.644 +	if (breq->secs)
  21.645 +		return;
  21.646 +
  21.647 +	if (breq->err) {
  21.648 +		free(breq->buf);
  21.649 +		goto out;
  21.650 +	}
  21.651 +
  21.652 +	for (i = 0; i < breq->treq.secs; i++) {
  21.653 +		off_t off = i << RADIX_TREE_NODE_SHIFT;
  21.654 +		DBG("%s: populating sec 0x%08llx\n",
  21.655 +		    cache->name, breq->treq.sec + i);
  21.656 +		memcpy(breq->treq.buf + off,
  21.657 +		       breq->buf + off, RADIX_TREE_NODE_SIZE);
  21.658 +	}
  21.659 +
  21.660 +	if (radix_tree_add_leaves(tree, breq->buf,
  21.661 +				  breq->treq.sec, breq->treq.secs))
  21.662 +		free(breq->buf);
  21.663 +
  21.664 +out:
  21.665 +	td_complete_request(breq->treq, breq->err);
  21.666 +	block_cache_put_request(cache, breq);
  21.667 +}
  21.668 +
  21.669 +static void
  21.670 +block_cache_miss(block_cache_t *cache, td_request_t treq)
  21.671 +{
  21.672 +	char *buf;
  21.673 +	size_t size;
  21.674 +	td_request_t clone;
  21.675 +	radix_tree_t *tree;
  21.676 +	block_cache_request_t *breq;
  21.677 +
  21.678 +	DBG("%s: block cache miss: sec 0x%08llx\n", cache->name, treq.sec);
  21.679 +
  21.680 +	clone = treq;
  21.681 +	tree  = &cache->tree;
  21.682 +	size  = treq.secs << RADIX_TREE_NODE_SHIFT;
  21.683 +
  21.684 +	cache->stats.misses += treq.secs;
  21.685 +
  21.686 +	if (radix_tree_size(tree) + size >= BLOCK_CACHE_MAX_SIZE)
  21.687 +		goto out;
  21.688 +
  21.689 +	breq = block_cache_get_request(cache);
  21.690 +	if (!breq)
  21.691 +		goto out;
  21.692 +
  21.693 +	if (posix_memalign((void **)&buf, RADIX_TREE_NODE_SIZE, size)) {
  21.694 +		block_cache_put_request(cache, breq);
  21.695 +		goto out;
  21.696 +	}
  21.697 +
  21.698 +	breq->treq    = treq;
  21.699 +	breq->secs    = treq.secs;
  21.700 +	breq->err     = 0;
  21.701 +	breq->buf     = buf;
  21.702 +	breq->cache   = cache;
  21.703 +
  21.704 +	clone.buf     = buf;
  21.705 +	clone.cb      = block_cache_populate_cache;
  21.706 +	clone.cb_data = breq;
  21.707 +
  21.708 +out:
  21.709 +	td_forward_request(clone);
  21.710 +}
  21.711 +
  21.712 +static void
  21.713 +block_cache_queue_read(td_driver_t *driver, td_request_t treq)
  21.714 +{
  21.715 +	int i;
  21.716 +	radix_tree_t *tree;
  21.717 +	block_cache_t *cache;
  21.718 +	char *iov[BLOCK_CACHE_NODES_PER_PAGE];
  21.719 +
  21.720 +	cache = (block_cache_t *)driver->data;
  21.721 +	tree  = &cache->tree;
  21.722 +
  21.723 +	cache->stats.reads += treq.secs;
  21.724 +
  21.725 +	if (treq.secs > BLOCK_CACHE_NODES_PER_PAGE)
  21.726 +		return td_forward_request(treq);
  21.727 +
  21.728 +	for (i = 0; i < treq.secs; i++) {
  21.729 +		iov[i] = radix_tree_find_leaf(tree, treq.sec + i);
  21.730 +		if (!iov[i])
  21.731 +			return block_cache_miss(cache, treq);
  21.732 +	}
  21.733 +
  21.734 +	return block_cache_hit(cache, treq, iov);
  21.735 +}
  21.736 +
  21.737 +static void
  21.738 +block_cache_queue_write(td_driver_t *driver, td_request_t treq)
  21.739 +{
  21.740 +	td_complete_request(treq, -EPERM);
  21.741 +}
  21.742 +
  21.743 +static int
  21.744 +block_cache_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
  21.745 +{
  21.746 +	return -EINVAL;
  21.747 +}
  21.748 +
  21.749 +static int
  21.750 +block_cache_validate_parent(td_driver_t *driver,
  21.751 +			    td_driver_t *pdriver, td_flag_t flags)
  21.752 +{
  21.753 +	block_cache_t *cache;
  21.754 +
  21.755 +	if (!td_flag_test(pdriver->state, TD_DRIVER_RDONLY))
  21.756 +		return -EINVAL;
  21.757 +
  21.758 +	cache = (block_cache_t *)driver->data;
  21.759 +	if (strcmp(driver->name, pdriver->name))
  21.760 +		return -EINVAL;
  21.761 +
  21.762 +	return 0;
  21.763 +}
  21.764 +
  21.765 +static void
  21.766 +block_cache_debug(td_driver_t *driver)
  21.767 +{
  21.768 +	block_cache_t *cache;
  21.769 +	block_cache_stats_t *stats;
  21.770 +
  21.771 +	cache = (block_cache_t *)driver->data;
  21.772 +	stats = &cache->stats;
  21.773 +
  21.774 +	WARN("BLOCK CACHE %s\n", cache->name);
  21.775 +	WARN("reads: %"PRIu64", hits: %"PRIu64", misses: %"PRIu64", prunes: %"PRIu64"\n",
  21.776 +	     stats->reads, stats->hits, stats->misses, stats->prunes);
  21.777 +}
  21.778 +
  21.779 +struct tap_disk tapdisk_block_cache = {
  21.780 +	.disk_type                  = "tapdisk_block_cache",
  21.781 +	.flags                      = 0,
  21.782 +	.private_data_size          = sizeof(block_cache_t),
  21.783 +	.td_open                    = block_cache_open,
  21.784 +	.td_close                   = block_cache_close,
  21.785 +	.td_queue_read              = block_cache_queue_read,
  21.786 +	.td_queue_write             = block_cache_queue_write,
  21.787 +	.td_get_parent_id           = block_cache_get_parent_id,
  21.788 +	.td_validate_parent         = block_cache_validate_parent,
  21.789 +	.td_debug                   = block_cache_debug,
  21.790 +};
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/tools/blktap2/drivers/block-log.c	Tue May 26 11:52:31 2009 +0100
    22.3 @@ -0,0 +1,688 @@
    22.4 +/* 
    22.5 + * Copyright (c) 2008, XenSource Inc.
    22.6 + * All rights reserved.
    22.7 + *
    22.8 + * Redistribution and use in source and binary forms, with or without
    22.9 + * modification, are permitted provided that the following conditions are met:
   22.10 + *     * Redistributions of source code must retain the above copyright
   22.11 + *       notice, this list of conditions and the following disclaimer.
   22.12 + *     * Redistributions in binary form must reproduce the above copyright
   22.13 + *       notice, this list of conditions and the following disclaimer in the
   22.14 + *       documentation and/or other materials provided with the distribution.
   22.15 + *     * Neither the name of XenSource Inc. nor the names of its contributors
   22.16 + *       may be used to endorse or promote products derived from this software
   22.17 + *       without specific prior written permission.
   22.18 + *
   22.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22.20 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   22.21 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   22.22 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   22.23 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   22.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   22.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   22.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   22.27 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   22.28 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   22.29 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   22.30 + */
   22.31 +
   22.32 +/* Driver to sit on top of another disk and log writes, in order
   22.33 + * to synchronize two distinct disks
   22.34 + *
   22.35 + * On receipt of a control request it can export a list of dirty
   22.36 + * sectors in the following format:
   22.37 + * struct writerange {
   22.38 + *   u64 sector;
   22.39 + *   u32 count;
   22.40 + * }
   22.41 + * terminated by { 0, 0 }
   22.42 + */
   22.43 +
   22.44 +#include <errno.h>
   22.45 +#include <stdio.h>
   22.46 +#include <fcntl.h>
   22.47 +#include <unistd.h>
   22.48 +#include <stdlib.h>
   22.49 +#include <sys/mman.h>
   22.50 +#include <sys/socket.h>
   22.51 +#include <sys/un.h>
   22.52 +
   22.53 +#include "log.h"
   22.54 +#include "tapdisk.h"
   22.55 +#include "tapdisk-server.h"
   22.56 +#include "tapdisk-driver.h"
   22.57 +#include "tapdisk-interface.h"
   22.58 +
   22.59 +#define MAX_CONNECTIONS 1
   22.60 +
   22.61 +typedef struct poll_fd {
   22.62 +  int          fd;
   22.63 +  event_id_t   id;
   22.64 +} poll_fd_t;
   22.65 +
   22.66 +struct tdlog_state {
   22.67 +  uint64_t     size;
   22.68 +
   22.69 +  void*        writelog;
   22.70 +
   22.71 +  char*        ctlpath;
   22.72 +  poll_fd_t    ctl;
   22.73 +
   22.74 +  int          connected;
   22.75 +  poll_fd_t    connections[MAX_CONNECTIONS];
   22.76 +
   22.77 +  char*        shmpath;
   22.78 +  void*        shm;
   22.79 +
   22.80 +  log_sring_t* sring;
   22.81 +  log_back_ring_t bring;
   22.82 +};
   22.83 +
   22.84 +#define BDPRINTF(_f, _a...) syslog (LOG_DEBUG, "log: " _f "\n", ## _a)
   22.85 +
   22.86 +#define BWPRINTF(_f, _a...) syslog (LOG_WARNING, "log: " _f "\n", ## _a)
   22.87 +
   22.88 +static void ctl_accept(event_id_t, char, void *);
   22.89 +static void ctl_request(event_id_t, char, void *);
   22.90 +
   22.91 +/* -- write log -- */
   22.92 +
   22.93 +/* large flat bitmaps don't scale particularly well either in size or scan
   22.94 + * time, but they'll do for now */
   22.95 +#define BITS_PER_LONG (sizeof(unsigned long) * 8)
   22.96 +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
   22.97 +
   22.98 +#define BITMAP_ENTRY(_nr, _bmap) ((unsigned long*)(_bmap))[(_nr)/BITS_PER_LONG]
   22.99 +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG)
  22.100 +
  22.101 +static inline int test_bit(int nr, void* bmap)
  22.102 +{
  22.103 +  return (BITMAP_ENTRY(nr, bmap) >> BITMAP_SHIFT(nr)) & 1;
  22.104 +}
  22.105 +
  22.106 +static inline void clear_bit(int nr, void* bmap)
  22.107 +{
  22.108 +  BITMAP_ENTRY(nr, bmap) &= ~(1UL << BITMAP_SHIFT(nr));
  22.109 +}
  22.110 +
  22.111 +static inline void set_bit(int nr, void* bmap)
  22.112 +{
  22.113 +  BITMAP_ENTRY(nr, bmap) |= (1UL << BITMAP_SHIFT(nr));
  22.114 +}
  22.115 +
  22.116 +static inline int bitmap_size(uint64_t sz)
  22.117 +{
  22.118 +  return sz >> 3;
  22.119 +}
  22.120 +
  22.121 +static int writelog_create(struct tdlog_state *s)
  22.122 +{
  22.123 +  uint64_t bmsize;
  22.124 +
  22.125 +  bmsize = bitmap_size(s->size);
  22.126 +
  22.127 +  BDPRINTF("allocating %"PRIu64" bytes for dirty bitmap", bmsize);
  22.128 +
  22.129 +  if (!(s->writelog = calloc(bmsize, 1))) {
  22.130 +    BWPRINTF("could not allocate dirty bitmap of size %"PRIu64, bmsize);
  22.131 +    return -1;
  22.132 +  }
  22.133 +
  22.134 +  return 0;
  22.135 +}
  22.136 +
  22.137 +static int writelog_free(struct tdlog_state *s)
  22.138 +{
  22.139 +  if (s->writelog)
  22.140 +    free(s->writelog);
  22.141 +
  22.142 +  return 0;
  22.143 +}
  22.144 +
  22.145 +static int writelog_set(struct tdlog_state* s, uint64_t sector, int count)
  22.146 +{
  22.147 +  int i;
  22.148 +
  22.149 +  for (i = 0; i < count; i++) 
  22.150 +    set_bit(sector + i, s->writelog);
  22.151 +
  22.152 +  return 0;
  22.153 +}
  22.154 +
  22.155 +/* if end is 0, clear to end of disk */
  22.156 +int writelog_clear(struct tdlog_state* s, uint64_t start, uint64_t end)
  22.157 +{
  22.158 +  if (!end)
  22.159 +    end = s->size;
  22.160 +
  22.161 +  /* clear to word boundaries */
  22.162 +  while (BITMAP_SHIFT(start))
  22.163 +    clear_bit(start++, s->writelog);
  22.164 +  while (BITMAP_SHIFT(end))
  22.165 +    clear_bit(end--, s->writelog);
  22.166 +
  22.167 +  memset(s->writelog + start / BITS_PER_LONG, 0, (end - start) >> 3);
  22.168 +
  22.169 +  return 0;
  22.170 +}
  22.171 +
  22.172 +/* returns last block exported (may not be end of disk if shm region
  22.173 + * overflows) */
  22.174 +static uint64_t writelog_export(struct tdlog_state* s)
  22.175 +{
  22.176 +  struct disk_range* range = s->shm;
  22.177 +  uint64_t i = 0;
  22.178 +
  22.179 +  BDPRINTF("sector count: %"PRIu64, s->size);
  22.180 +
  22.181 +  for (i = 0; i < s->size; i++) {
  22.182 +    if (test_bit(i, s->writelog)) {
  22.183 +      /* range start */
  22.184 +      range->sector = i;
  22.185 +      range->count = 1;
  22.186 +      /* find end */
  22.187 +      for (i++; i < s->size && test_bit(i, s->writelog); i++)
  22.188 +	range->count++;
  22.189 +
  22.190 +      BDPRINTF("export: dirty extent %"PRIu64":%u",
  22.191 +	       range->sector, range->count);
  22.192 +      range++;
  22.193 +
  22.194 +      /* out of space in shared memory region */
  22.195 +      if ((void*)range >= bmend(s->shm)) {
  22.196 +	BDPRINTF("out of space in shm region at sector %"PRIu64, i);
  22.197 +	return i;
  22.198 +      }
  22.199 +
  22.200 +      /* undo forloop increment */
  22.201 +      i--;
  22.202 +    }
  22.203 +  }
  22.204 +
  22.205 +  /* NULL-terminate range list */
  22.206 +  range->sector = 0;
  22.207 +  range->count = 0;
  22.208 +
  22.209 +  return i;
  22.210 +}
  22.211 +
  22.212 +/* -- communication channel -- */
  22.213 +
  22.214 +/* remove FS special characters in up to len bytes of path */
  22.215 +static inline void path_escape(char* path, size_t len) {
  22.216 +  int i;
  22.217 +
  22.218 +  for (i = 0; i < len && path[i]; i++)
  22.219 +    if (strchr(":/", path[i]))
  22.220 +      path[i] = '_';
  22.221 +}
  22.222 +
  22.223 +static char* ctl_makepath(const char* name, const char* ext)
  22.224 +{
  22.225 +  char* res;
  22.226 +  char *file;
  22.227 +
  22.228 +  file = strrchr(name, '/');
  22.229 +  if (!file) {
  22.230 +    BWPRINTF("invalid name %s\n", name);
  22.231 +    return NULL;
  22.232 +  }
  22.233 +
  22.234 +  if (asprintf(&res, BLKTAP_CTRL_DIR "/log_%s.%s", file, ext) < 0) {
  22.235 +    BWPRINTF("could not allocate path");
  22.236 +    return NULL;
  22.237 +  }
  22.238 +
  22.239 +  path_escape(res + strlen(BLKTAP_CTRL_DIR) + 5, strlen(file));
  22.240 +
  22.241 +  return res;
  22.242 +}
  22.243 +
  22.244 +static int shmem_open(struct tdlog_state* s, const char* name)
  22.245 +{
  22.246 +  int i, l, fd;
  22.247 +
  22.248 +  /* device name -> path */
  22.249 +  if (asprintf(&s->shmpath, "/log_%s.wlog", name) < 0) {
  22.250 +    BWPRINTF("could not allocate shm path");
  22.251 +    return -1;
  22.252 +  }
  22.253 +
  22.254 +  path_escape(s->shmpath + 5, strlen(name));
  22.255 +
  22.256 +  if ((fd = shm_open(s->shmpath, O_CREAT|O_RDWR, 0750)) < 0) {
  22.257 +    BWPRINTF("could not open shared memory file %s: %s", s->shmpath,
  22.258 +	     strerror(errno));
  22.259 +    goto err;
  22.260 +  }
  22.261 +  if (ftruncate(fd, SHMSIZE) < 0) {
  22.262 +    BWPRINTF("error truncating shmem to size %u", SHMSIZE);
  22.263 +    close(fd);
  22.264 +    goto err;
  22.265 +  }
  22.266 +
  22.267 +  s->shm = mmap(NULL, SHMSIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
  22.268 +  close(fd);
  22.269 +  if (s->shm == MAP_FAILED) {
  22.270 +    BWPRINTF("could not mmap write log shm: %s", strerror(errno));
  22.271 +    goto err;
  22.272 +  }
  22.273 +  return 0;
  22.274 +
  22.275 +  err:
  22.276 +  s->shm = NULL;
  22.277 +  free(s->shmpath);
  22.278 +  s->shmpath = NULL;
  22.279 +  return -1;
  22.280 +}
  22.281 +
  22.282 +static int shmem_close(struct tdlog_state* s)
  22.283 +{
  22.284 +  if (s->shm) {
  22.285 +    munmap(s->shm, SHMSIZE);
  22.286 +    s->shm = NULL;
  22.287 +  }
  22.288 +
  22.289 +  if (s->shmpath) {
  22.290 +    shm_unlink(s->shmpath);
  22.291 +    s->shmpath = NULL;
  22.292 +  }
  22.293 +
  22.294 +  return 0;
  22.295 +}
  22.296 +
  22.297 +/* control socket */
  22.298 +
  22.299 +static int ctl_open(struct tdlog_state* s, const char* name)
  22.300 +{
  22.301 +  struct sockaddr_un saddr;
  22.302 +
  22.303 +  if (!(s->ctlpath = ctl_makepath(name, "ctl")))
  22.304 +    return -1;
  22.305 +
  22.306 +  if ((s->ctl.fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
  22.307 +    BWPRINTF("error opening control socket: %s", strerror(errno));
  22.308 +    goto err;
  22.309 +  }
  22.310 +
  22.311 +  memset(&saddr, 0, sizeof(saddr));
  22.312 +  saddr.sun_family = AF_UNIX;
  22.313 +  memcpy(saddr.sun_path, s->ctlpath, strlen(s->ctlpath));
  22.314 +  if (unlink(s->ctlpath) && errno != ENOENT) {
  22.315 +    BWPRINTF("error unlinking old socket path %s: %s", s->ctlpath,
  22.316 +	     strerror(errno));
  22.317 +    goto err_sock;
  22.318 +  }
  22.319 +    
  22.320 +  if (bind(s->ctl.fd, &saddr, sizeof(saddr)) < 0) {
  22.321 +    BWPRINTF("error binding control socket to %s: %s", s->ctlpath,
  22.322 +	     strerror(errno));
  22.323 +    goto err_sock;
  22.324 +  }
  22.325 +
  22.326 +  if (listen(s->ctl.fd, 1) < 0) {
  22.327 +    BWPRINTF("error listening on control socket: %s", strerror(errno));
  22.328 +    goto err_sock;
  22.329 +  }
  22.330 +
  22.331 +  s->ctl.id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
  22.332 +					    s->ctl.fd, 0, ctl_accept, s);
  22.333 +  if (s->ctl.id < 0) {
  22.334 +    BWPRINTF("error register event handler: %s", strerror(s->ctl.id));
  22.335 +    goto err_sock;
  22.336 +  }
  22.337 +
  22.338 +  return 0;
  22.339 +
  22.340 +  err_sock:
  22.341 +  close(s->ctl.fd);
  22.342 +  s->ctl.fd = -1;
  22.343 +  err:
  22.344 +  free(s->ctlpath);
  22.345 +  s->ctlpath = NULL;
  22.346 +
  22.347 +  return -1;
  22.348 +}
  22.349 +
  22.350 +static int ctl_close(struct tdlog_state* s)
  22.351 +{
  22.352 +  while (s->connected) {
  22.353 +    tapdisk_server_unregister_event(s->connections[s->connected].id);
  22.354 +    close(s->connections[s->connected].fd);
  22.355 +    s->connections[s->connected].fd = -1;
  22.356 +    s->connections[s->connected].id = 0;
  22.357 +    s->connected--;
  22.358 +  }
  22.359 +
  22.360 +  if (s->ctl.fd >= 0) {
  22.361 +    tapdisk_server_unregister_event(s->ctl.id);
  22.362 +    close(s->ctl.fd);
  22.363 +    s->ctl.fd = -1;
  22.364 +    s->ctl.id = 0;
  22.365 +  }
  22.366 +
  22.367 +  if (s->ctlpath) {
  22.368 +    unlink(s->ctlpath);
  22.369 +    free(s->ctlpath);
  22.370 +    s->ctlpath = NULL;
  22.371 +  }
  22.372 +
  22.373 +  /* XXX this must be fixed once requests are actually in flight */
  22.374 +  /* could just drain the existing ring here first */
  22.375 +  if (s->sring) {
  22.376 +    SHARED_RING_INIT(s->sring);
  22.377 +    BACK_RING_INIT(&s->bring, s->sring, SRINGSIZE);
  22.378 +  }
  22.379 +
  22.380 +  return 0;
  22.381 +}
  22.382 +
  22.383 +/* walk list of open sockets, close matching fd */
  22.384 +static int ctl_close_sock(struct tdlog_state* s, int fd)
  22.385 +{
  22.386 +  int i;
  22.387 +
  22.388 +  for (i = 0; i <= s->connected; i++) {
  22.389 +    if (s->connections[i].fd == fd) {
  22.390 +      tapdisk_server_unregister_event(s->connections[i].id);
  22.391 +      close(s->connections[i].fd);
  22.392 +      s->connections[i].fd = -1;
  22.393 +      s->connections[i].id = 0;
  22.394 +      s->connected--;
  22.395 +      return 0;
  22.396 +    }
  22.397 +  }
  22.398 +
  22.399 +  BWPRINTF("requested to close unknown socket %d", fd);
  22.400 +  return -1;
  22.401 +}
  22.402 +
  22.403 +static void ctl_accept(event_id_t id, char mode, void *private)
  22.404 +{
  22.405 +  struct tdlog_state* s = (struct tdlog_state *)private;
  22.406 +  int fd;
  22.407 +  event_id_t cid;
  22.408 +
  22.409 +  if ((fd = accept(s->ctl.fd, NULL, NULL)) < 0) {
  22.410 +    BWPRINTF("error accepting control connection: %s", strerror(errno));
  22.411 +    return;
  22.412 +  }
  22.413 +
  22.414 +  if (s->connected) {
  22.415 +    BWPRINTF("control session in progress, closing new connection");
  22.416 +    close(fd);
  22.417 +    return;
  22.418 +  }
  22.419 +
  22.420 +  cid = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
  22.421 +				      fd, 0, ctl_request, s);
  22.422 +  if (cid < 0) {
  22.423 +    BWPRINTF("error registering connection event handler: %s", strerror(cid));
  22.424 +    close(fd);
  22.425 +    return;
  22.426 +  }
  22.427 +
  22.428 +  s->connections[s->connected].fd = fd;
  22.429 +  s->connections[s->connected].id = cid;
  22.430 +  s->connected++;
  22.431 +}
  22.432 +
  22.433 +/* response format: 4 bytes shmsize, 0-terminated path */
  22.434 +static int ctl_get_shmpath(struct tdlog_state* s, int fd)
  22.435 +{
  22.436 +  char msg[CTLRSPLEN_SHMP + 1];
  22.437 +  uint32_t sz;
  22.438 +  int rc;
  22.439 +
  22.440 +  BDPRINTF("ctl: sending shared memory parameters (size: %u, path: %s)",
  22.441 +	   SHMSIZE, s->shmpath);
  22.442 +
  22.443 +  /* TMP: sanity-check shm */
  22.444 +  sz = 0xdeadbeef;
  22.445 +  memcpy(s->shm, &sz, sizeof(sz));
  22.446 +
  22.447 +  sz = SHMSIZE;
  22.448 +  memcpy(msg, &sz, sizeof(sz));
  22.449 +  snprintf(msg + sizeof(sz), sizeof(msg) - sizeof(sz), "%s", s->shmpath);
  22.450 +  if ((rc = write(fd, msg, CTLRSPLEN_SHMP)) < 0) {
  22.451 +    BWPRINTF("error writing shmpath: %s", strerror(errno));
  22.452 +    return -1;
  22.453 +  }
  22.454 +
  22.455 +  return 0;
  22.456 +}
  22.457 +
  22.458 +static int ctl_peek_writes(struct tdlog_state* s, int fd)
  22.459 +{
  22.460 +  int rc;
  22.461 +
  22.462 +  BDPRINTF("ctl: peeking bitmap");
  22.463 +
  22.464 +  writelog_export(s);
  22.465 +
  22.466 +  if ((rc = write(fd, "done", CTLRSPLEN_PEEK)) < 0) {
  22.467 +    BWPRINTF("error writing peek ack: %s", strerror(errno));
  22.468 +    return -1;
  22.469 +  }
  22.470 +
  22.471 +  return 0;
  22.472 +}
  22.473 +
  22.474 +static int ctl_clear_writes(struct tdlog_state* s, int fd)
  22.475 +{
  22.476 +  int rc;
  22.477 +
  22.478 +  BDPRINTF("ctl: clearing bitmap");
  22.479 +
  22.480 +  writelog_clear(s, 0, 0);
  22.481 +
  22.482 +  if ((rc = write(fd, "done", CTLRSPLEN_CLEAR)) < 0) {
  22.483 +    BWPRINTF("error writing clear ack: %s", strerror(errno));
  22.484 +    return -1;
  22.485 +  }
  22.486 +
  22.487 +  return 0;
  22.488 +}
  22.489 +
  22.490 +/* get dirty bitmap and clear it atomically */
  22.491 +static int ctl_get_writes(struct tdlog_state* s, int fd)
  22.492 +{
  22.493 +  int rc;
  22.494 +
  22.495 +  BDPRINTF("ctl: getting bitmap");
  22.496 +
  22.497 +  writelog_export(s);
  22.498 +  writelog_clear(s, 0, 0);
  22.499 +
  22.500 +  if ((rc = write(fd, "done", CTLRSPLEN_GET)) < 0) {
  22.501 +    BWPRINTF("error writing get ack: %s", strerror(errno));
  22.502 +    return -1;
  22.503 +  }
  22.504 +
  22.505 +  return 0;
  22.506 +}
  22.507 +
  22.508 +/* get requests from ring */
  22.509 +static int ctl_kick(struct tdlog_state* s, int fd)
  22.510 +{
  22.511 +  RING_IDX reqstart, reqend;
  22.512 +  log_request_t req;
  22.513 +
  22.514 +  /* XXX testing */
  22.515 +  RING_IDX rspstart, rspend;
  22.516 +  log_response_t rsp;
  22.517 +  struct log_ctlmsg msg;
  22.518 +  int rc;
  22.519 +
  22.520 +  reqstart = s->bring.req_cons;
  22.521 +  reqend = s->sring->req_prod;
  22.522 +
  22.523 +  BDPRINTF("ctl: ring kicked (start = %u, end = %u)", reqstart, reqend);
  22.524 +
  22.525 +  while (reqstart != reqend) {
  22.526 +    /* XXX actually submit these! */
  22.527 +    memcpy(&req, RING_GET_REQUEST(&s->bring, reqstart), sizeof(req));
  22.528 +    BDPRINTF("ctl: read request %"PRIu64":%u", req.sector, req.count);
  22.529 +    s->bring.req_cons = ++reqstart;
  22.530 +
  22.531 +    rsp.sector = req.sector;
  22.532 +    rsp.count = req.count;
  22.533 +    memcpy(RING_GET_RESPONSE(&s->bring, s->bring.rsp_prod_pvt), &rsp,
  22.534 +	   sizeof(rsp));
  22.535 +    s->bring.rsp_prod_pvt++;
  22.536 +  }
  22.537 +
  22.538 +  RING_PUSH_RESPONSES(&s->bring);
  22.539 +  memset(&msg, 0, sizeof(msg));
  22.540 +  memcpy(msg.msg, LOGCMD_KICK, 4);
  22.541 +  if ((rc = write(fd, &msg, sizeof(msg))) < 0) {
  22.542 +    BWPRINTF("error sending notify: %s", strerror(errno));
  22.543 +    return -1;
  22.544 +  } else if (rc < sizeof(msg)) {
  22.545 +    BWPRINTF("short notify write (%d/%zd)", rc, sizeof(msg));
  22.546 +    return -1;
  22.547 +  }
  22.548 +
  22.549 +  return 0;
  22.550 +}
  22.551 +
  22.552 +static int ctl_do_request(struct tdlog_state* s, int fd, struct log_ctlmsg* msg)
  22.553 +{
  22.554 +  if (!strncmp(msg->msg, LOGCMD_SHMP, 4)) {
  22.555 +    return ctl_get_shmpath(s, fd);
  22.556 +  } else if (!strncmp(msg->msg, LOGCMD_PEEK, 4)) {
  22.557 +    return ctl_peek_writes(s, fd);
  22.558 +  } else if (!strncmp(msg->msg, LOGCMD_CLEAR, 4)) {
  22.559 +    return ctl_clear_writes(s, fd);
  22.560 +  } else if (!strncmp(msg->msg, LOGCMD_GET, 4)) {
  22.561 +    return ctl_get_writes(s, fd);
  22.562 +  } else if (!strncmp(msg->msg, LOGCMD_KICK, 4)) {
  22.563 +    return ctl_kick(s, fd);
  22.564 +  }
  22.565 +
  22.566 +  BWPRINTF("unknown control request %.4s", msg->msg);
  22.567 +  return -1;
  22.568 +}
  22.569 +
  22.570 +static inline int ctl_find_connection(struct tdlog_state *s, event_id_t id)
  22.571 +{
  22.572 +  int i;
  22.573 +
  22.574 +  for (i = 0; i < s->connected; i++)
  22.575 +    if (s->connections[i].id == id)
  22.576 +      return s->connections[i].fd;
  22.577 +
  22.578 +  BWPRINTF("unrecognized event callback id %d", id);
  22.579 +  return -1;
  22.580 +}
  22.581 +
  22.582 +static void ctl_request(event_id_t id, char mode, void *private)
  22.583 +{
  22.584 +  struct tdlog_state* s = (struct tdlog_state*)private;
  22.585 +  struct log_ctlmsg msg;
  22.586 +  int rc, i, fd = -1;
  22.587 +
  22.588 +  fd = ctl_find_connection(s, id);
  22.589 +  if (fd == -1)
  22.590 +    return;
  22.591 +
  22.592 +  if ((rc = read(fd, &msg, sizeof(msg))) < 0) {
  22.593 +    BWPRINTF("error reading from ctl socket %d, closing: %s", fd,
  22.594 +	     strerror(errno));
  22.595 +    ctl_close_sock(s, fd);
  22.596 +    return;
  22.597 +  } else if (rc == 0) {
  22.598 +    BDPRINTF("ctl_request: EOF, closing socket");
  22.599 +    ctl_close_sock(s, fd);
  22.600 +    return;
  22.601 +  } else if (rc < sizeof(msg)) {
  22.602 +    BWPRINTF("short request received (%d/%zd bytes), ignoring", rc,
  22.603 +	     sizeof(msg));
  22.604 +    return;
  22.605 +  }
  22.606 +
  22.607 +  ctl_do_request(s, fd, &msg);
  22.608 +}
  22.609 +
  22.610 +/* -- interface -- */
  22.611 +
  22.612 +static int tdlog_close(td_driver_t*);
  22.613 +
  22.614 +static int tdlog_open(td_driver_t* driver, const char* name, td_flag_t flags)
  22.615 +{
  22.616 +  struct tdlog_state* s = (struct tdlog_state*)driver->data;
  22.617 +  int rc;
  22.618 +
  22.619 +  memset(s, 0, sizeof(*s));
  22.620 +
  22.621 +  s->size = driver->info.size;
  22.622 +
  22.623 +  if ((rc = writelog_create(s))) {
  22.624 +    tdlog_close(driver);
  22.625 +    return rc;
  22.626 +  }
  22.627 +  if ((rc = shmem_open(s, name))) {
  22.628 +    tdlog_close(driver);
  22.629 +    return rc;
  22.630 +  }
  22.631 +  if ((rc = ctl_open(s, name))) {
  22.632 +    tdlog_close(driver);
  22.633 +    return rc;
  22.634 +  }
  22.635 +
  22.636 +  s->sring = (log_sring_t*)sringstart(s->shm);
  22.637 +  SHARED_RING_INIT(s->sring);
  22.638 +  BACK_RING_INIT(&s->bring, s->sring, SRINGSIZE);
  22.639 +
  22.640 +  BDPRINTF("opened ctl socket");
  22.641 +
  22.642 +  return 0;
  22.643 +}
  22.644 +
  22.645 +static int tdlog_close(td_driver_t* driver)
  22.646 +{
  22.647 +  struct tdlog_state* s = (struct tdlog_state*)driver->data;
  22.648 +
  22.649 +  ctl_close(s);
  22.650 +  shmem_close(s);
  22.651 +  writelog_free(s);
  22.652 +
  22.653 +  return 0;
  22.654 +}
  22.655 +
  22.656 +static void tdlog_queue_read(td_driver_t* driver, td_request_t treq)
  22.657 +{
  22.658 +  td_forward_request(treq);
  22.659 +}
  22.660 +
  22.661 +static void tdlog_queue_write(td_driver_t* driver, td_request_t treq)
  22.662 +{
  22.663 +  struct tdlog_state* s = (struct tdlog_state*)driver->data;
  22.664 +  int rc;
  22.665 +
  22.666 +  writelog_set(s, treq.sec, treq.secs);
  22.667 +  td_forward_request(treq);
  22.668 +}
  22.669 +
  22.670 +static int tdlog_get_parent_id(td_driver_t* driver, td_disk_id_t* id)
  22.671 +{
  22.672 +  return -EINVAL;
  22.673 +}
  22.674 +
  22.675 +static int tdlog_validate_parent(td_driver_t *driver,
  22.676 +				 td_driver_t *parent, td_flag_t flags)
  22.677 +{
  22.678 +  return 0;
  22.679 +}
  22.680 +
  22.681 +struct tap_disk tapdisk_log = {
  22.682 +  .disk_type          = "tapdisk_log",
  22.683 +  .private_data_size  = sizeof(struct tdlog_state),
  22.684 +  .flags              = 0,
  22.685 +  .td_open            = tdlog_open,
  22.686 +  .td_close           = tdlog_close,
  22.687 +  .td_queue_read      = tdlog_queue_read,
  22.688 +  .td_queue_write     = tdlog_queue_write,
  22.689 +  .td_get_parent_id   = tdlog_get_parent_id,
  22.690 +  .td_validate_parent = tdlog_validate_parent,
  22.691 +};
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/tools/blktap2/drivers/block-qcow.c	Tue May 26 11:52:31 2009 +0100
    23.3 @@ -0,0 +1,1517 @@
    23.4 +/* block-qcow.c
    23.5 + *
    23.6 + * Asynchronous Qemu copy-on-write disk implementation.
    23.7 + * Code based on the Qemu implementation
    23.8 + * (see copyright notice below)
    23.9 + *
   23.10 + * (c) 2006 Andrew Warfield and Julian Chesterfield
   23.11 + *
   23.12 + */
   23.13 +
   23.14 +/*
   23.15 + * Block driver for the QCOW format
   23.16 + * 
   23.17 + * Copyright (c) 2004 Fabrice Bellard
   23.18 + * 
   23.19 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   23.20 + * of this software and associated documentation files(the "Software"), to deal
   23.21 + * in the Software without restriction, including without limitation the rights
   23.22 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   23.23 + * copies of the Software, and to permit persons to whom the Software is
   23.24 + * furnished to do so, subject to the following conditions:
   23.25 + */
   23.26 +
   23.27 +#include <errno.h>
   23.28 +#include <fcntl.h>
   23.29 +#include <stdio.h>
   23.30 +#include <stdlib.h>
   23.31 +#include <unistd.h>
   23.32 +#include <sys/statvfs.h>
   23.33 +#include <sys/stat.h>
   23.34 +#include <sys/ioctl.h>
   23.35 +#include <linux/fs.h>
   23.36 +#include <string.h>
   23.37 +#include <zlib.h>
   23.38 +#include <inttypes.h>
   23.39 +#include <libaio.h>
   23.40 +#include <openssl/md5.h>
   23.41 +#include "bswap.h"
   23.42 +#include "aes.h"
   23.43 +
   23.44 +#include "tapdisk.h"
   23.45 +#include "tapdisk-driver.h"
   23.46 +#include "tapdisk-interface.h"
   23.47 +#include "qcow.h"
   23.48 +#include "blk.h"
   23.49 +#include "atomicio.h"
   23.50 +
   23.51 +/* *BSD has no O_LARGEFILE */
   23.52 +#ifndef O_LARGEFILE
   23.53 +#define O_LARGEFILE     0
   23.54 +#endif
   23.55 +
   23.56 +#if 1
   23.57 +#define ASSERT(_p) \
   23.58 +    if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
   23.59 +    __LINE__, __FILE__); *(int*)0=0; }
   23.60 +#else
   23.61 +#define ASSERT(_p) ((void)0)
   23.62 +#endif
   23.63 +
   23.64 +struct pending_aio {
   23.65 +        td_callback_t cb;
   23.66 +        int id;
   23.67 +        void *private;
   23.68 +	int nb_sectors;
   23.69 +	char *buf;
   23.70 +	uint64_t sector;
   23.71 +};
   23.72 +
   23.73 +#undef IOCB_IDX
   23.74 +#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
   23.75 +
   23.76 +#define ZERO_TEST(_b) (_b | 0x00)
   23.77 +
   23.78 +struct qcow_request {
   23.79 +	td_request_t         treq;
   23.80 +	struct tiocb         tiocb;
   23.81 +	struct tdqcow_state  *state;
   23.82 +};
   23.83 +
   23.84 +static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
   23.85 +
   23.86 +#ifdef USE_GCRYPT
   23.87 +
   23.88 +#include <gcrypt.h>
   23.89 +
   23.90 +uint32_t gen_cksum(char *ptr, int len)
   23.91 +{
   23.92 +  int i;
   23.93 +  uint32_t md[4];
   23.94 +
   23.95 +  /* Generate checksum */
   23.96 +  gcry_md_hash_buffer(GCRY_MD_MD5, md, ptr, len);
   23.97 +
   23.98 +  return md[0];
   23.99 +}
  23.100 +
  23.101 +#else /* use libcrypto */
  23.102 +
  23.103 +#include <openssl/md5.h>
  23.104 +
  23.105 +uint32_t gen_cksum(char *ptr, int len)
  23.106 +{
  23.107 +  int i;
  23.108 +  unsigned char *md;
  23.109 +  uint32_t ret;
  23.110 +
  23.111 +  md = malloc(MD5_DIGEST_LENGTH);
  23.112 +  if(!md) return 0;
  23.113 +
  23.114 +  /* Generate checksum */
  23.115 +  if (MD5((unsigned char *)ptr, len, md) != md)
  23.116 +    ret = 0;
  23.117 +  else
  23.118 +    memcpy(&ret, md, sizeof(uint32_t));
  23.119 +
  23.120 +  free(md);
  23.121 +  return ret;
  23.122 +}
  23.123 +
  23.124 +#endif
  23.125 +
  23.126 +
  23.127 +static void free_aio_state(struct tdqcow_state* s)
  23.128 +{
  23.129 +	free(s->aio_requests);
  23.130 +	free(s->aio_free_list);
  23.131 +}
  23.132 +
  23.133 +static int init_aio_state(td_driver_t *driver)
  23.134 +{
  23.135 +	int i, ret;
  23.136 +	td_disk_info_t *bs = &(driver->info);
  23.137 +	struct tdqcow_state   *s  = (struct tdqcow_state *)driver->data;
  23.138 +	
  23.139 +        // A segment (i.e. a page) can span multiple clusters
  23.140 +        s->max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
  23.141 +	  MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
  23.142 +
  23.143 +	s->aio_free_count = s->max_aio_reqs;
  23.144 +
  23.145 +	if (!(s->aio_requests  = calloc(s->max_aio_reqs, sizeof(struct qcow_request))) || 
  23.146 +	    !(s->aio_free_list = calloc(s->max_aio_reqs, sizeof(struct qcow_request)))) {
  23.147 +	    DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
  23.148 +		    s->max_aio_reqs);
  23.149 +	    goto fail;
  23.150 +	}
  23.151 +
  23.152 +	for (i = 0; i < s->max_aio_reqs; i++)
  23.153 +		s->aio_free_list[i] = &s->aio_requests[i];
  23.154 +
  23.155 +        DPRINTF("AIO state initialised\n");
  23.156 +
  23.157 +        return 0;
  23.158 + fail:
  23.159 +	return -1;
  23.160 +}
  23.161 +
  23.162 +int get_filesize(char *filename, uint64_t *size, struct stat *st)
  23.163 +{
  23.164 +	int fd;
  23.165 +	QCowHeader header;
  23.166 +
  23.167 +	/*Set to the backing file size*/
  23.168 +	fd = open(filename, O_RDONLY);
  23.169 +	if (fd < 0)
  23.170 +		return -1;
  23.171 +	if (read(fd, &header, sizeof(header)) < sizeof(header)) {
  23.172 +		close(fd);
  23.173 +		return -1;
  23.174 +	}
  23.175 +	close(fd);
  23.176 +	
  23.177 +	be32_to_cpus(&header.magic);
  23.178 +	be64_to_cpus(&header.size);
  23.179 +	if (header.magic == QCOW_MAGIC) {
  23.180 +		*size = header.size >> SECTOR_SHIFT;
  23.181 +		return 0;
  23.182 +	}
  23.183 +
  23.184 +	if(S_ISBLK(st->st_mode)) {
  23.185 +		fd = open(filename, O_RDONLY);
  23.186 +		if (fd < 0)
  23.187 +			return -1;
  23.188 +		if (blk_getimagesize(fd, size) != 0) {
  23.189 +			printf("Unable to get Block device size\n");
  23.190 +			close(fd);
  23.191 +			return -1;
  23.192 +		}
  23.193 +		close(fd);
  23.194 +	} else *size = (st->st_size >> SECTOR_SHIFT);	
  23.195 +	return 0;
  23.196 +}
  23.197 +
  23.198 +static int qcow_set_key(struct tdqcow_state *s, const char *key)
  23.199 +{
  23.200 +	uint8_t keybuf[16];
  23.201 +	int len, i;
  23.202 +	
  23.203 +	memset(keybuf, 0, 16);
  23.204 +	len = strlen(key);
  23.205 +	if (len > 16)
  23.206 +		len = 16;
  23.207 +	/* XXX: we could compress the chars to 7 bits to increase
  23.208 +	   entropy */
  23.209 +	for (i = 0; i < len; i++) {
  23.210 +		keybuf[i] = key[i];
  23.211 +	}
  23.212 +	s->crypt_method = s->crypt_method_header;
  23.213 +	
  23.214 +	if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
  23.215 +		return -1;
  23.216 +	if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
  23.217 +		return -1;
  23.218 +#if 0
  23.219 +	/* test */
  23.220 +	{
  23.221 +		uint8_t in[16];
  23.222 +		uint8_t out[16];
  23.223 +		uint8_t tmp[16];
  23.224 +		for (i=0; i<16; i++)
  23.225 +			in[i] = i;
  23.226 +		AES_encrypt(in, tmp, &s->aes_encrypt_key);
  23.227 +		AES_decrypt(tmp, out, &s->aes_decrypt_key);
  23.228 +		for (i = 0; i < 16; i++)
  23.229 +			DPRINTF(" %02x", tmp[i]);
  23.230 +		DPRINTF("\n");
  23.231 +		for (i = 0; i < 16; i++)
  23.232 +			DPRINTF(" %02x", out[i]);
  23.233 +		DPRINTF("\n");
  23.234 +	}
  23.235 +#endif
  23.236 +	return 0;
  23.237 +}
  23.238 +
  23.239 +void tdqcow_complete(void *arg, struct tiocb *tiocb, int err)
  23.240 +{
  23.241 +	struct qcow_request *aio = (struct qcow_request *)arg;
  23.242 +	struct tdqcow_state *s = aio->state;
  23.243 +
  23.244 +	td_complete_request(aio->treq, err);
  23.245 +
  23.246 +	s->aio_free_list[s->aio_free_count++] = aio;
  23.247 +}
  23.248 +
  23.249 +static void async_read(td_driver_t *driver, td_request_t treq)
  23.250 +{
  23.251 +	int size;
  23.252 +	uint64_t offset;
  23.253 +	struct qcow_request *aio;
  23.254 +	struct tdqcow_state *prv;
  23.255 +
  23.256 +	prv    = (struct tdqcow_state *)driver->data;
  23.257 +	size   = treq.secs * driver->info.sector_size;
  23.258 +	offset = treq.sec  * (uint64_t)driver->info.sector_size;
  23.259 +
  23.260 +	if (prv->aio_free_count == 0)
  23.261 +		goto fail;
  23.262 +
  23.263 +	aio        = prv->aio_free_list[--prv->aio_free_count];
  23.264 +	aio->treq  = treq;
  23.265 +	aio->state = prv;
  23.266 +
  23.267 +	td_prep_read(&aio->tiocb, prv->fd, treq.buf,
  23.268 +		     size, offset, tdqcow_complete, aio);
  23.269 +	td_queue_tiocb(driver, &aio->tiocb);
  23.270 +
  23.271 +	return;
  23.272 +
  23.273 +fail:
  23.274 +	td_complete_request(treq, -EBUSY);
  23.275 +}
  23.276 +
  23.277 +static void async_write(td_driver_t *driver, td_request_t treq)
  23.278 +{
  23.279 +	int size;
  23.280 +	uint64_t offset;
  23.281 +	struct qcow_request *aio;
  23.282 +	struct tdqcow_state *prv;
  23.283 +
  23.284 +	prv     = (struct tdqcow_state *)driver->data;
  23.285 +	size    = treq.secs * driver->info.sector_size;
  23.286 +	offset  = treq.sec  * (uint64_t)driver->info.sector_size;
  23.287 +
  23.288 +	if (prv->aio_free_count == 0)
  23.289 +		goto fail;
  23.290 +
  23.291 +	aio        = prv->aio_free_list[--prv->aio_free_count];
  23.292 +	aio->treq  = treq;
  23.293 +	aio->state = prv;
  23.294 +
  23.295 +	td_prep_write(&aio->tiocb, prv->fd, treq.buf,
  23.296 +		      size, offset, tdqcow_complete, aio);
  23.297 +	td_queue_tiocb(driver, &aio->tiocb);
  23.298 +
  23.299 +	return;
  23.300 +
  23.301 +fail:
  23.302 +	td_complete_request(treq, -EBUSY);
  23.303 +}
  23.304 +
  23.305 +/* 
  23.306 + * The crypt function is compatible with the linux cryptoloop
  23.307 + * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
  23.308 + * supported .
  23.309 + */
  23.310 +static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
  23.311 +                            uint8_t *out_buf, const uint8_t *in_buf,
  23.312 +                            int nb_sectors, int enc,
  23.313 +                            const AES_KEY *key)
  23.314 +{
  23.315 +	union {
  23.316 +		uint64_t ll[2];
  23.317 +		uint8_t b[16];
  23.318 +	} ivec;
  23.319 +	int i;
  23.320 +	
  23.321 +	for (i = 0; i < nb_sectors; i++) {
  23.322 +		ivec.ll[0] = cpu_to_le64(sector_num);
  23.323 +		ivec.ll[1] = 0;
  23.324 +		AES_cbc_encrypt(in_buf, out_buf, 512, key, 
  23.325 +				ivec.b, enc);
  23.326 +		sector_num++;
  23.327 +		in_buf += 512;
  23.328 +		out_buf += 512;
  23.329 +	}
  23.330 +}
  23.331 +
  23.332 +int qtruncate(int fd, off_t length, int sparse)
  23.333 +{
  23.334 +	int ret, i; 
  23.335 +	int current = 0, rem = 0;
  23.336 +	uint64_t sectors;
  23.337 +	struct stat st;
  23.338 +	char *buf;
  23.339 +
  23.340 +	/* If length is greater than the current file len
  23.341 +	 * we synchronously write zeroes to the end of the 
  23.342 +	 * file, otherwise we truncate the length down
  23.343 +	 */
  23.344 +	ret = fstat(fd, &st);
  23.345 +	if (ret == -1) 
  23.346 +		return -1;
  23.347 +	if (S_ISBLK(st.st_mode))
  23.348 +		return 0;
  23.349 +
  23.350 +	sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
  23.351 +	current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
  23.352 +	rem     = st.st_size % DEFAULT_SECTOR_SIZE;
  23.353 +
  23.354 +	/* If we are extending this file, we write zeros to the end --
  23.355 +	 * this tries to ensure that the extents allocated wind up being
  23.356 +	 * contiguous on disk.
  23.357 +	 */
  23.358 +	if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
  23.359 +		/*We are extending the file*/
  23.360 +		if ((ret = posix_memalign((void **)&buf, 
  23.361 +					  512, DEFAULT_SECTOR_SIZE))) {
  23.362 +			DPRINTF("posix_memalign failed: %d\n", ret);
  23.363 +			return -1;
  23.364 +		}
  23.365 +		memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
  23.366 +		if (lseek(fd, 0, SEEK_END)==-1) {
  23.367 +			DPRINTF("Lseek EOF failed (%d), internal error\n",
  23.368 +				errno);
  23.369 +			free(buf);
  23.370 +			return -1;
  23.371 +		}
  23.372 +		if (rem) {
  23.373 +			ret = write(fd, buf, rem);
  23.374 +			if (ret != rem) {
  23.375 +				DPRINTF("write failed: ret = %d, err = %s\n",
  23.376 +					ret, strerror(errno));
  23.377 +				free(buf);
  23.378 +				return -1;
  23.379 +			}
  23.380 +		}
  23.381 +		for (i = current; i < sectors; i++ ) {
  23.382 +			ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
  23.383 +			if (ret != DEFAULT_SECTOR_SIZE) {
  23.384 +				DPRINTF("write failed: ret = %d, err = %s\n",
  23.385 +					ret, strerror(errno));
  23.386 +				free(buf);
  23.387 +				return -1;
  23.388 +			}
  23.389 +		}
  23.390 +		free(buf);
  23.391 +	} else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
  23.392 +		if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) {
  23.393 +			DPRINTF("Ftruncate failed (%s)\n", strerror(errno));
  23.394 +			return -1;
  23.395 +		}
  23.396 +	return 0;
  23.397 +}
  23.398 +
  23.399 +/* 'allocate' is:
  23.400 + *
  23.401 + * 0 to not allocate.
  23.402 + *
  23.403 + * 1 to allocate a normal cluster (for sector indexes 'n_start' to
  23.404 + * 'n_end')
  23.405 + *
  23.406 + * 2 to allocate a compressed cluster of size
  23.407 + * 'compressed_size'. 'compressed_size' must be > 0 and <
  23.408 + * cluster_size 
  23.409 + *
  23.410 + * return 0 if not allocated.
  23.411 + */
  23.412 +static uint64_t get_cluster_offset(struct tdqcow_state *s,
  23.413 +                                   uint64_t offset, int allocate,
  23.414 +                                   int compressed_size,
  23.415 +                                   int n_start, int n_end)
  23.416 +{
  23.417 +	int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
  23.418 +	char *tmp_ptr2, *l2_ptr, *l1_ptr;
  23.419 +	uint64_t *tmp_ptr;
  23.420 +	uint64_t l2_offset, *l2_table, cluster_offset, tmp;
  23.421 +	uint32_t min_count;
  23.422 +	int new_l2_table;
  23.423 +
  23.424 +	/*Check L1 table for the extent offset*/
  23.425 +	l1_index = offset >> (s->l2_bits + s->cluster_bits);
  23.426 +	l2_offset = s->l1_table[l1_index];
  23.427 +	new_l2_table = 0;
  23.428 +	if (!l2_offset) {
  23.429 +		if (!allocate)
  23.430 +			return 0;
  23.431 +		/* 
  23.432 +		 * allocating a new l2 entry + extent 
  23.433 +		 * at the end of the file, we must also
  23.434 +		 * update the L1 entry safely.
  23.435 +		 */
  23.436 +		l2_offset = s->fd_end;
  23.437 +
  23.438 +		/* round to cluster size */
  23.439 +		l2_offset = (l2_offset + s->cluster_size - 1) 
  23.440 +			& ~(s->cluster_size - 1);
  23.441 +
  23.442 +		/* update the L1 entry */
  23.443 +		s->l1_table[l1_index] = l2_offset;
  23.444 +		
  23.445 +		/*Truncate file for L2 table 
  23.446 +		 *(initialised to zero in case we crash)*/
  23.447 +		if (qtruncate(s->fd, 
  23.448 +			      l2_offset + (s->l2_size * sizeof(uint64_t)),
  23.449 +			      s->sparse) != 0) {
  23.450 +			DPRINTF("ERROR truncating file\n");
  23.451 +			return 0;
  23.452 +		}
  23.453 +		s->fd_end = l2_offset + (s->l2_size * sizeof(uint64_t));
  23.454 +
  23.455 +		/*Update the L1 table entry on disk
  23.456 +                 * (for O_DIRECT we write 4KByte blocks)*/
  23.457 +		l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
  23.458 +		l1_ptr = (char *)s->l1_table + (l1_sector << 12);
  23.459 +
  23.460 +		if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
  23.461 +			DPRINTF("ERROR allocating memory for L1 table\n");
  23.462 +		}
  23.463 +		memcpy(tmp_ptr, l1_ptr, 4096);
  23.464 +
  23.465 +		/* Convert block to write to big endian */
  23.466 +		for(i = 0; i < 4096 / sizeof(uint64_t); i++) {
  23.467 +			cpu_to_be64s(&tmp_ptr[i]);
  23.468 +		}
  23.469 +
  23.470 +		/*
  23.471 +		 * Issue non-asynchronous L1 write.
  23.472 +		 * For safety, we must ensure that
  23.473 +		 * entry is written before blocks.
  23.474 +		 */
  23.475 +		lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
  23.476 +		if (write(s->fd, tmp_ptr, 4096) != 4096) {
  23.477 +			free(tmp_ptr);
  23.478 +		 	return 0;
  23.479 +		}
  23.480 +		free(tmp_ptr);
  23.481 +
  23.482 +		new_l2_table = 1;
  23.483 +		goto cache_miss;
  23.484 +	} else if (s->min_cluster_alloc == s->l2_size) {
  23.485 +		/*Fast-track the request*/
  23.486 +		cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
  23.487 +		l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
  23.488 +		return cluster_offset + (l2_index * s->cluster_size);
  23.489 +	}
  23.490 +
  23.491 +	/*Check to see if L2 entry is already cached*/
  23.492 +	for (i = 0; i < L2_CACHE_SIZE; i++) {
  23.493 +		if (l2_offset == s->l2_cache_offsets[i]) {
  23.494 +			/* increment the hit count */
  23.495 +			if (++s->l2_cache_counts[i] == 0xffffffff) {
  23.496 +				for (j = 0; j < L2_CACHE_SIZE; j++) {
  23.497 +					s->l2_cache_counts[j] >>= 1;
  23.498 +				}
  23.499 +			}
  23.500 +			l2_table = s->l2_cache + (i << s->l2_bits);
  23.501 +			goto found;
  23.502 +		}
  23.503 +	}
  23.504 +
  23.505 +cache_miss:
  23.506 +	/* not found: load a new entry in the least used one */
  23.507 +	min_index = 0;
  23.508 +	min_count = 0xffffffff;
  23.509 +	for (i = 0; i < L2_CACHE_SIZE; i++) {
  23.510 +		if (s->l2_cache_counts[i] < min_count) {
  23.511 +			min_count = s->l2_cache_counts[i];
  23.512 +			min_index = i;
  23.513 +		}
  23.514 +	}
  23.515 +	l2_table = s->l2_cache + (min_index << s->l2_bits);
  23.516 +
  23.517 +	/*If extent pre-allocated, read table from disk, 
  23.518 +	 *otherwise write new table to disk*/
  23.519 +	if (new_l2_table) {
  23.520 +		/*Should we allocate the whole extent? Adjustable parameter.*/
  23.521 +		if (s->cluster_alloc == s->l2_size) {
  23.522 +			cluster_offset = l2_offset + 
  23.523 +				(s->l2_size * sizeof(uint64_t));
  23.524 +			cluster_offset = (cluster_offset + s->cluster_size - 1)
  23.525 +				& ~(s->cluster_size - 1);
  23.526 +			if (qtruncate(s->fd, cluster_offset + 
  23.527 +				  (s->cluster_size * s->l2_size), 
  23.528 +				      s->sparse) != 0) {
  23.529 +				DPRINTF("ERROR truncating file\n");
  23.530 +				return 0;
  23.531 +			}
  23.532 +			s->fd_end = cluster_offset + 
  23.533 +				(s->cluster_size * s->l2_size);
  23.534 +			for (i = 0; i < s->l2_size; i++) {
  23.535 +				l2_table[i] = cpu_to_be64(cluster_offset + 
  23.536 +							  (i*s->cluster_size));
  23.537 +			}  
  23.538 +		} else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
  23.539 +
  23.540 +		lseek(s->fd, l2_offset, SEEK_SET);
  23.541 +		if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
  23.542 +		   s->l2_size * sizeof(uint64_t))
  23.543 +			return 0;
  23.544 +	} else {
  23.545 +		lseek(s->fd, l2_offset, SEEK_SET);
  23.546 +		if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) != 
  23.547 +		    s->l2_size * sizeof(uint64_t))
  23.548 +			return 0;
  23.549 +	}
  23.550 +	
  23.551 +	/*Update the cache entries*/ 
  23.552 +	s->l2_cache_offsets[min_index] = l2_offset;
  23.553 +	s->l2_cache_counts[min_index] = 1;
  23.554 +
  23.555 +found:
  23.556 +	/*The extent is split into 's->l2_size' blocks of 
  23.557 +	 *size 's->cluster_size'*/
  23.558 +	l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
  23.559 +	cluster_offset = be64_to_cpu(l2_table[l2_index]);
  23.560 +
  23.561 +	if (!cluster_offset || 
  23.562 +	    ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
  23.563 +		if (!allocate)
  23.564 +			return 0;
  23.565 +		
  23.566 +		if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
  23.567 +		    (n_end - n_start) < s->cluster_sectors) {
  23.568 +			/* cluster is already allocated but compressed, we must
  23.569 +			   decompress it in the case it is not completely
  23.570 +			   overwritten */
  23.571 +			if (decompress_cluster(s, cluster_offset) < 0)
  23.572 +				return 0;
  23.573 +			cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
  23.574 +			cluster_offset = (cluster_offset + s->cluster_size - 1)
  23.575 +				& ~(s->cluster_size - 1);
  23.576 +			/* write the cluster content - not asynchronous */
  23.577 +			lseek(s->fd, cluster_offset, SEEK_SET);
  23.578 +			if (write(s->fd, s->cluster_cache, s->cluster_size) != 
  23.579 +			    s->cluster_size)
  23.580 +			    return -1;
  23.581 +		} else {
  23.582 +			/* allocate a new cluster */
  23.583 +			cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
  23.584 +			if (allocate == 1) {
  23.585 +				/* round to cluster size */
  23.586 +				cluster_offset = 
  23.587 +					(cluster_offset + s->cluster_size - 1) 
  23.588 +					& ~(s->cluster_size - 1);
  23.589 +				if (qtruncate(s->fd, cluster_offset + 
  23.590 +					      s->cluster_size, s->sparse)!=0) {
  23.591 +					DPRINTF("ERROR truncating file\n");
  23.592 +					return 0;
  23.593 +				}
  23.594 +				s->fd_end = (cluster_offset + s->cluster_size);
  23.595 +				/* if encrypted, we must initialize the cluster
  23.596 +				   content which won't be written */
  23.597 +				if (s->crypt_method && 
  23.598 +				    (n_end - n_start) < s->cluster_sectors) {
  23.599 +					uint64_t start_sect;
  23.600 +					start_sect = (offset & 
  23.601 +						      ~(s->cluster_size - 1)) 
  23.602 +							      >> 9;
  23.603 +					memset(s->cluster_data + 512, 
  23.604 +					       0xaa, 512);
  23.605 +					for (i = 0; i < s->cluster_sectors;i++)
  23.606 +					{
  23.607 +						if (i < n_start || i >= n_end) 
  23.608 +						{
  23.609 +							encrypt_sectors(s, start_sect + i, 
  23.610 +									s->cluster_data, 
  23.611 +									s->cluster_data + 512, 1, 1,
  23.612 +									&s->aes_encrypt_key);
  23.613 +							lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
  23.614 +							if (write(s->fd, s->cluster_data, 512) != 512)
  23.615 +								return -1;
  23.616 +						}
  23.617 +					}
  23.618 +				}
  23.619 +			} else {
  23.620 +				cluster_offset |= QCOW_OFLAG_COMPRESSED | 
  23.621 +					(uint64_t)compressed_size 
  23.622 +						<< (63 - s->cluster_bits);
  23.623 +			}
  23.624 +		}
  23.625 +		/* update L2 table */
  23.626 +		tmp = cpu_to_be64(cluster_offset);
  23.627 +		l2_table[l2_index] = tmp;
  23.628 +
  23.629 +		/*For IO_DIRECT we write 4KByte blocks*/
  23.630 +		l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
  23.631 +		l2_ptr = (char *)l2_table + (l2_sector << 12);
  23.632 +		
  23.633 +		if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
  23.634 +			DPRINTF("ERROR allocating memory for L1 table\n");
  23.635 +		}
  23.636 +		memcpy(tmp_ptr2, l2_ptr, 4096);
  23.637 +		lseek(s->fd, l2_offset + (l2_sector << 12), SEEK_SET);
  23.638 +		if (write(s->fd, tmp_ptr2, 4096) != 4096) {
  23.639 +			free(tmp_ptr2);
  23.640 +			return -1;
  23.641 +		}
  23.642 +		free(tmp_ptr2);
  23.643 +	}
  23.644 +	return cluster_offset;
  23.645 +}
  23.646 +
  23.647 +static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num,
  23.648 +                             int nb_sectors, int *pnum)
  23.649 +{
  23.650 +	int index_in_cluster, n;
  23.651 +	uint64_t cluster_offset;
  23.652 +
  23.653 +	cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0);
  23.654 +	index_in_cluster = sector_num & (s->cluster_sectors - 1);
  23.655 +	n = s->cluster_sectors - index_in_cluster;
  23.656 +	if (n > nb_sectors)
  23.657 +		n = nb_sectors;
  23.658 +	*pnum = n;
  23.659 +	return (cluster_offset != 0);
  23.660 +}
  23.661 +
  23.662 +static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
  23.663 +                             const uint8_t *buf, int buf_size)
  23.664 +{
  23.665 +	z_stream strm1, *strm = &strm1;
  23.666 +	int ret, out_len;
  23.667 +	
  23.668 +	memset(strm, 0, sizeof(*strm));
  23.669 +	
  23.670 +	strm->next_in = (uint8_t *)buf;
  23.671 +	strm->avail_in = buf_size;
  23.672 +	strm->next_out = out_buf;
  23.673 +	strm->avail_out = out_buf_size;
  23.674 +	
  23.675 +	ret = inflateInit2(strm, -12);
  23.676 +	if (ret != Z_OK)
  23.677 +		return -1;
  23.678 +	ret = inflate(strm, Z_FINISH);
  23.679 +	out_len = strm->next_out - out_buf;
  23.680 +	if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
  23.681 +	    (out_len != out_buf_size) ) {
  23.682 +		inflateEnd(strm);
  23.683 +		return -1;
  23.684 +	}
  23.685 +	inflateEnd(strm);
  23.686 +	return 0;
  23.687 +}
  23.688 +                              
  23.689 +static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
  23.690 +{
  23.691 +	int ret, csize;
  23.692 +	uint64_t coffset;
  23.693 +
  23.694 +	coffset = cluster_offset & s->cluster_offset_mask;
  23.695 +	if (s->cluster_cache_offset != coffset) {
  23.696 +		csize = cluster_offset >> (63 - s->cluster_bits);
  23.697 +		csize &= (s->cluster_size - 1);
  23.698 +		lseek(s->fd, coffset, SEEK_SET);
  23.699 +		ret = read(s->fd, s->cluster_data, csize);
  23.700 +		if (ret != csize) 
  23.701 +			return -1;
  23.702 +		if (decompress_buffer(s->cluster_cache, s->cluster_size,
  23.703 +				      s->cluster_data, csize) < 0) {
  23.704 +			return -1;
  23.705 +		}
  23.706 +		s->cluster_cache_offset = coffset;
  23.707 +	}
  23.708 +	return 0;
  23.709 +}
  23.710 +
  23.711 +static int
  23.712 +tdqcow_read_header(int fd, QCowHeader *header)
  23.713 +{
  23.714 +	int err;
  23.715 +	char *buf;
  23.716 +	struct stat st;
  23.717 +	size_t size, expected;
  23.718 +
  23.719 +	memset(header, 0, sizeof(*header));
  23.720 +
  23.721 +	err = fstat(fd, &st);
  23.722 +	if (err)
  23.723 +		return -errno;
  23.724 +
  23.725 +	err = lseek(fd, 0, SEEK_SET);
  23.726 +	if (err == (off_t)-1)
  23.727 +		return -errno;
  23.728 +
  23.729 +	size = (sizeof(*header) + 511) & ~511;
  23.730 +	err = posix_memalign((void **)&buf, 512, size);
  23.731 +	if (err)
  23.732 +		return err;
  23.733 +
  23.734 +	expected = size;
  23.735 +	if (st.st_size < size)
  23.736 +		expected = st.st_size;
  23.737 +
  23.738 +	errno = 0;
  23.739 +	err = read(fd, buf, size);
  23.740 +	if (err != expected) {
  23.741 +		err = (errno ? -errno : -EIO);
  23.742 +		goto out;
  23.743 +	}
  23.744 +
  23.745 +	memcpy(header, buf, sizeof(*header));
  23.746 +	be32_to_cpus(&header->magic);
  23.747 +	be32_to_cpus(&header->version);
  23.748 +	be64_to_cpus(&header->backing_file_offset);
  23.749 +	be32_to_cpus(&header->backing_file_size);
  23.750 +	be32_to_cpus(&header->mtime);
  23.751 +	be64_to_cpus(&header->size);
  23.752 +	be32_to_cpus(&header->crypt_method);
  23.753 +	be64_to_cpus(&header->l1_table_offset);
  23.754 +
  23.755 +	err = 0;
  23.756 +
  23.757 +out:
  23.758 +	free(buf);
  23.759 +	return err;
  23.760 +}
  23.761 +
  23.762 +static int
  23.763 +tdqcow_load_l1_table(struct tdqcow_state *s, QCowHeader *header)
  23.764 +{
  23.765 +	char *buf;
  23.766 +	struct stat st;
  23.767 +	size_t expected;
  23.768 +	int i, err, shift;
  23.769 +	QCowHeader_ext *exthdr;
  23.770 +	uint32_t l1_table_bytes, l1_table_block, l1_table_size;
  23.771 +
  23.772 +	buf         = NULL;
  23.773 +	s->l1_table = NULL;
  23.774 +
  23.775 +	shift = s->cluster_bits + s->l2_bits;
  23.776 +
  23.777 +	s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
  23.778 +	s->l1_table_offset = header->l1_table_offset;
  23.779 +
  23.780 +	s->min_cluster_alloc = 1; /* default */
  23.781 +
  23.782 +	l1_table_bytes = s->l1_size * sizeof(uint64_t);
  23.783 +	l1_table_size  = (l1_table_bytes + 4095) & ~4095;
  23.784 +	l1_table_block = (l1_table_bytes + s->l1_table_offset + 4095) & ~4095;
  23.785 +
  23.786 +	DPRINTF("L1 Table offset detected: %"PRIu64", size %d (%d)\n",
  23.787 +		(uint64_t)s->l1_table_offset,
  23.788 +		(int) (s->l1_size * sizeof(uint64_t)), 
  23.789 +		l1_table_size);
  23.790 +
  23.791 +	err = fstat(s->fd, &st);
  23.792 +	if (err) {
  23.793 +		err = -errno;
  23.794 +		goto out;
  23.795 +	}
  23.796 +
  23.797 +	err = lseek(s->fd, 0, SEEK_SET);
  23.798 +	if (err == (off_t)-1) {
  23.799 +		err = -errno;
  23.800 +		goto out;
  23.801 +	}
  23.802 +
  23.803 +	err = posix_memalign((void **)&buf, 512, l1_table_block);
  23.804 +	if (err) {
  23.805 +		buf = NULL;
  23.806 +		goto out;
  23.807 +	}
  23.808 +
  23.809 +	err = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
  23.810 +	if (err) {
  23.811 +		s->l1_table = NULL;
  23.812 +		goto out;
  23.813 +	}
  23.814 +
  23.815 +	memset(buf, 0, l1_table_block);
  23.816 +	memset(s->l1_table, 0, l1_table_size);
  23.817 +
  23.818 +	expected = l1_table_block;
  23.819 +	if (st.st_size < l1_table_block)
  23.820 +		expected = st.st_size;
  23.821 +
  23.822 +	errno = 0;
  23.823 +	err = read(s->fd, buf, l1_table_block);
  23.824 +	if (err != expected) {
  23.825 +		err = (errno ? -errno : -EIO);
  23.826 +		goto out;
  23.827 +	}
  23.828 +
  23.829 +	memcpy(s->l1_table, buf + s->l1_table_offset, l1_table_size);
  23.830 +	exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
  23.831 +
  23.832 +	/* check for xen extended header */
  23.833 +	if (s->l1_table_offset % 4096 == 0 &&
  23.834 +	    be32_to_cpu(exthdr->xmagic) == XEN_MAGIC) {
  23.835 +		uint32_t flags = be32_to_cpu(exthdr->flags);
  23.836 +		uint32_t cksum = be32_to_cpu(exthdr->cksum);
  23.837 +
  23.838 +		/*
  23.839 +		 * Try to detect old tapdisk images. They have to be fixed
  23.840 +		 * because they use big endian rather than native endian for
  23.841 +		 * the L1 table.  After this block, the l1 table will
  23.842 +		 * definitely be in BIG endian.
  23.843 +		 */
  23.844 +		if (!(flags & EXTHDR_L1_BIG_ENDIAN)) {
  23.845 +			DPRINTF("qcow: converting to big endian L1 table\n");
  23.846 +
  23.847 +			/* convert to big endian */
  23.848 +			for (i = 0; i < s->l1_size; i++)
  23.849 +				cpu_to_be64s(&s->l1_table[i]);
  23.850 +
  23.851 +			flags |= EXTHDR_L1_BIG_ENDIAN;
  23.852 +			exthdr->flags = cpu_to_be32(flags);
  23.853 +
  23.854 +			memcpy(buf + s->l1_table_offset,
  23.855 +			       s->l1_table, l1_table_size);
  23.856 +			
  23.857 +			err = lseek(s->fd, 0, SEEK_SET);
  23.858 +			if (err == (off_t)-1) {
  23.859 +				err = -errno;
  23.860 +				goto out;
  23.861 +			}
  23.862 +
  23.863 +			err = atomicio(vwrite, s->fd, buf, l1_table_block);
  23.864 +			if (err != l1_table_block) {
  23.865 +				err = -errno;
  23.866 +				goto out;
  23.867 +			}
  23.868 +		}
  23.869 +
  23.870 +		/* check the L1 table checksum */
  23.871 +		if (cksum != gen_cksum((char *)s->l1_table,
  23.872 +				       s->l1_size * sizeof(uint64_t)))
  23.873 +			DPRINTF("qcow: bad L1 checksum\n");
  23.874 +		else {
  23.875 +			s->extended = 1;
  23.876 +			s->sparse = (be32_to_cpu(exthdr->flags) & SPARSE_FILE);
  23.877 +			s->min_cluster_alloc =
  23.878 +				be32_to_cpu(exthdr->min_cluster_alloc);
  23.879 +		}
  23.880 +	}
  23.881 +
  23.882 +	/* convert L1 table to native endian for operation */
  23.883 +	for (i = 0; i < s->l1_size; i++)
  23.884 +		be64_to_cpus(&s->l1_table[i]);
  23.885 +
  23.886 +	err = 0;
  23.887 +
  23.888 +out:
  23.889 +	if (err) {
  23.890 +		free(buf);
  23.891 +		free(s->l1_table);
  23.892 +		s->l1_table = NULL;
  23.893 +	}
  23.894 +	return err;
  23.895 +}
  23.896 +
  23.897 +/* Open the disk file and initialize qcow state. */
  23.898 +int tdqcow_open (td_driver_t *driver, const char *name, td_flag_t flags)
  23.899 +{
  23.900 +	int fd, len, i, ret, size, o_flags;
  23.901 +	td_disk_info_t *bs = &(driver->info);
  23.902 +	struct tdqcow_state   *s  = (struct tdqcow_state *)driver->data;
  23.903 +	QCowHeader header;
  23.904 +	uint64_t final_cluster = 0;
  23.905 +
  23.906 + 	DPRINTF("QCOW: Opening %s\n", name);
  23.907 +
  23.908 +	o_flags = O_DIRECT | O_LARGEFILE | 
  23.909 +		((flags == TD_OPEN_RDONLY) ? O_RDONLY : O_RDWR);
  23.910 +	fd = open(name, o_flags);
  23.911 +	if (fd < 0) {
  23.912 +		DPRINTF("Unable to open %s (%d)\n", name, -errno);
  23.913 +		return -1;
  23.914 +	}
  23.915 +
  23.916 +	s->fd = fd;
  23.917 +	s->name = strdup(name);
  23.918 +	if (!s->name)
  23.919 +		goto fail;
  23.920 +
  23.921 +	if (tdqcow_read_header(fd, &header))
  23.922 +		goto fail;
  23.923 +
  23.924 +	if (header.magic != QCOW_MAGIC)
  23.925 +		goto fail;
  23.926 +
  23.927 +	switch (header.version) {
  23.928 +	case QCOW_VERSION:
  23.929 +		break;
  23.930 +	case 2:
  23.931 +	  //TODO: Port qcow2 to new blktap framework.
  23.932 +	  //		close(fd);
  23.933 +	  //		dd->drv = &tapdisk_qcow2;
  23.934 +	  //		return dd->drv->td_open(dd, name, flags);
  23.935 +	  goto fail;
  23.936 +	default:
  23.937 +		goto fail;
  23.938 +	}
  23.939 +
  23.940 +	if (header.size <= 1 || header.cluster_bits < 9)
  23.941 +		goto fail;
  23.942 +	if (header.crypt_method > QCOW_CRYPT_AES)
  23.943 +		goto fail;
  23.944 +	s->crypt_method_header = header.crypt_method;
  23.945 +	if (s->crypt_method_header)
  23.946 +		s->encrypted = 1;
  23.947 +	s->cluster_bits = header.cluster_bits;
  23.948 +	s->cluster_size = 1 << s->cluster_bits;
  23.949 +	s->cluster_sectors = 1 << (s->cluster_bits - 9);
  23.950 +	s->l2_bits = header.l2_bits;
  23.951 +	s->l2_size = 1 << s->l2_bits;
  23.952 +	s->cluster_alloc = s->l2_size;
  23.953 +	bs->size = header.size / 512;
  23.954 +	s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
  23.955 +	s->backing_file_offset = header.backing_file_offset;
  23.956 +	s->backing_file_size   = header.backing_file_size;
  23.957 +
  23.958 +	/* allocate and load l1 table */
  23.959 +	if (tdqcow_load_l1_table(s, &header))
  23.960 +		goto fail;
  23.961 +
  23.962 +	/* alloc L2 cache */
  23.963 +	size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
  23.964 +	ret = posix_memalign((void **)&s->l2_cache, 4096, size);
  23.965 +	if(ret != 0) goto fail;
  23.966 +
  23.967 +	size = s->cluster_size;
  23.968 +	ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
  23.969 +	if(ret != 0) goto fail;
  23.970 +
  23.971 +	ret = posix_memalign((void **)&s->cluster_data, 4096, size);
  23.972 +	if(ret != 0) goto fail;
  23.973 +	s->cluster_cache_offset = -1;
  23.974 +
  23.975 +	if (s->backing_file_offset != 0)
  23.976 +		s->cluster_alloc = 1; /*Cannot use pre-alloc*/
  23.977 +
  23.978 +        bs->sector_size = 512;
  23.979 +        bs->info = 0;
  23.980 +
  23.981 +	for(i = 0; i < s->l1_size; i++)
  23.982 +		if (s->l1_table[i] > final_cluster)
  23.983 +			final_cluster = s->l1_table[i];
  23.984 +
  23.985 +	if (init_aio_state(driver)!=0) {
  23.986 +	  DPRINTF("Unable to initialise AIO state\n");
  23.987 +	  free_aio_state(s);
  23.988 +	  goto fail;
  23.989 +	}
  23.990 +
  23.991 +	if (!final_cluster)
  23.992 +		s->fd_end = s->l1_table_offset +
  23.993 +			((s->l1_size * sizeof(uint64_t) + 4095) & ~4095);
  23.994 +	else {
  23.995 +		s->fd_end = lseek64(fd, 0, SEEK_END);
  23.996 +		if (s->fd_end == (off64_t)-1)
  23.997 +			goto fail;
  23.998 +	}
  23.999 +
 23.1000 +	return 0;
 23.1001 +	
 23.1002 +fail:
 23.1003 +	DPRINTF("QCOW Open failed\n");
 23.1004 +
 23.1005 +	free_aio_state(s);
 23.1006 +	free(s->l1_table);
 23.1007 +	free(s->l2_cache);
 23.1008 +	free(s->cluster_cache);
 23.1009 +	free(s->cluster_data);
 23.1010 +	close(fd);
 23.1011 +	return -1;
 23.1012 +}
 23.1013 +
 23.1014 +void tdqcow_queue_read(td_driver_t *driver, td_request_t treq)
 23.1015 +{
 23.1016 +	struct tdqcow_state   *s  = (struct tdqcow_state *)driver->data;
 23.1017 +	int ret = 0, index_in_cluster, n, i;
 23.1018 +	uint64_t cluster_offset, sector, nb_sectors;
 23.1019 +	struct qcow_prv* prv;
 23.1020 +	td_request_t clone = treq;
 23.1021 +	char* buf = treq.buf;
 23.1022 +
 23.1023 +	sector     = treq.sec;
 23.1024 +	nb_sectors = treq.secs;
 23.1025 +
 23.1026 +	/*We store a local record of the request*/
 23.1027 +	while (nb_sectors > 0) {
 23.1028 +		cluster_offset = 
 23.1029 +			get_cluster_offset(s, sector << 9, 0, 0, 0, 0);
 23.1030 +		index_in_cluster = sector & (s->cluster_sectors - 1);
 23.1031 +		n = s->cluster_sectors - index_in_cluster;
 23.1032 +		if (n > nb_sectors)
 23.1033 +			n = nb_sectors;
 23.1034 +
 23.1035 +		if (s->aio_free_count == 0) {
 23.1036 +			td_complete_request(treq, -EBUSY);
 23.1037 +			return;
 23.1038 +		}
 23.1039 +		
 23.1040 +		if(!cluster_offset) {
 23.1041 +			treq.buf  = buf;
 23.1042 +			treq.sec  = sector;
 23.1043 +			treq.secs = n;
 23.1044 +			td_forward_request(treq);
 23.1045 +
 23.1046 +		} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
 23.1047 +			if (decompress_cluster(s, cluster_offset) < 0) {
 23.1048 +				td_complete_request(treq, -EIO);
 23.1049 +				goto done;
 23.1050 +			}
 23.1051 +			memcpy(buf, s->cluster_cache + index_in_cluster * 512, 
 23.1052 +			       512 * n);
 23.1053 +			
 23.1054 +			treq.buf  = buf;
 23.1055 +			treq.sec  = sector;
 23.1056 +			treq.secs = n;
 23.1057 +			td_complete_request(treq, 0);
 23.1058 +		} else {
 23.1059 +		  clone.buf  = buf;
 23.1060 +		  clone.sec  = (cluster_offset>>9)+index_in_cluster;
 23.1061 +		  clone.secs = n;
 23.1062 +		  async_read(driver, clone);
 23.1063 +		}
 23.1064 +		nb_sectors -= n;
 23.1065 +		sector += n;
 23.1066 +		buf += n * 512;
 23.1067 +	}
 23.1068 +done:
 23.1069 +	return;
 23.1070 +}
 23.1071 +
 23.1072 +void tdqcow_queue_write(td_driver_t *driver, td_request_t treq)
 23.1073 +{
 23.1074 +	struct tdqcow_state   *s  = (struct tdqcow_state *)driver->data;
 23.1075 +	int ret = 0, index_in_cluster, n, i;
 23.1076 +	uint64_t cluster_offset, sector, nb_sectors;
 23.1077 +	td_callback_t cb;
 23.1078 +	struct qcow_prv* prv;
 23.1079 +	char* buf = treq.buf;
 23.1080 +	td_request_t clone=treq;
 23.1081 +
 23.1082 +	sector     = treq.sec;
 23.1083 +	nb_sectors = treq.secs;
 23.1084 +		   
 23.1085 +	/*We store a local record of the request*/
 23.1086 +	while (nb_sectors > 0) {
 23.1087 +		index_in_cluster = sector & (s->cluster_sectors - 1);
 23.1088 +		n = s->cluster_sectors - index_in_cluster;
 23.1089 +		if (n > nb_sectors)
 23.1090 +			n = nb_sectors;
 23.1091 +
 23.1092 +		if (s->aio_free_count == 0) {
 23.1093 +			td_complete_request(treq, -EBUSY);
 23.1094 +			return;
 23.1095 +		}
 23.1096 +
 23.1097 +		cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
 23.1098 +						    index_in_cluster, 
 23.1099 +						    index_in_cluster+n);
 23.1100 +		if (!cluster_offset) {
 23.1101 +			DPRINTF("Ooops, no write cluster offset!\n");
 23.1102 +			td_complete_request(treq, -EIO);
 23.1103 +			return;
 23.1104 +		}
 23.1105 +
 23.1106 +		if (s->crypt_method) {
 23.1107 +			encrypt_sectors(s, sector, s->cluster_data, 
 23.1108 +					(unsigned char *)buf, n, 1,
 23.1109 +					&s->aes_encrypt_key);
 23.1110 +
 23.1111 +			clone.buf  = buf;
 23.1112 +			clone.sec  = (cluster_offset>>9) + index_in_cluster;
 23.1113 +			clone.secs = n;
 23.1114 +			async_write(driver, clone);
 23.1115 +		} else {
 23.1116 +		  clone.buf  = buf;
 23.1117 +		  clone.sec  = (cluster_offset>>9) + index_in_cluster;
 23.1118 +		  clone.secs = n;
 23.1119 +
 23.1120 +		  async_write(driver, clone);
 23.1121 +		}
 23.1122 +		
 23.1123 +		nb_sectors -= n;
 23.1124 +		sector += n;
 23.1125 +		buf += n * 512;
 23.1126 +	}
 23.1127 +	s->cluster_cache_offset = -1; /* disable compressed cache */
 23.1128 +
 23.1129 +	return;
 23.1130 +}
 23.1131 +
 23.1132 +static int
 23.1133 +tdqcow_update_checksum(struct tdqcow_state *s)
 23.1134 +{
 23.1135 +	int i, fd, err;
 23.1136 +	uint32_t offset, cksum, out;
 23.1137 +
 23.1138 +	if (!s->extended)
 23.1139 +		return 0;
 23.1140 +
 23.1141 +	fd = open(s->name, O_WRONLY | O_LARGEFILE); /* open without O_DIRECT */
 23.1142 +	if (fd == -1) {
 23.1143 +		err = errno;
 23.1144 +		goto out;
 23.1145 +	}
 23.1146 +
 23.1147 +	offset = sizeof(QCowHeader) + offsetof(QCowHeader_ext, cksum);
 23.1148 +	if (lseek(fd, offset, SEEK_SET) == (off_t)-1) {
 23.1149 +		err = errno;
 23.1150 +		goto out;
 23.1151 +	}
 23.1152 +
 23.1153 +	/* convert to big endian for checksum */
 23.1154 +	for (i = 0; i < s->l1_size; i++)
 23.1155 +		cpu_to_be64s(&s->l1_table[i]);
 23.1156 +
 23.1157 +	cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
 23.1158 +
 23.1159 +	/* and back again... */
 23.1160 +	for (i = 0; i < s->l1_size; i++)
 23.1161 +		be64_to_cpus(&s->l1_table[i]);
 23.1162 +
 23.1163 +	DPRINTF("Writing cksum: %d", cksum);
 23.1164 +
 23.1165 +	out = cpu_to_be32(cksum);
 23.1166 +	if (write(fd, &out, sizeof(out)) != sizeof(out)) {
 23.1167 +		err = errno;
 23.1168 +		goto out;
 23.1169 +	}
 23.1170 +
 23.1171 +	err = 0;
 23.1172 +
 23.1173 +out:
 23.1174 +	if (err)
 23.1175 +		DPRINTF("failed to update checksum: %d\n", err);
 23.1176 +	if (fd != -1)
 23.1177 +		close(fd);
 23.1178 +	return err;
 23.1179 +}
 23.1180 + 		
 23.1181 +int tdqcow_close(td_driver_t *driver)
 23.1182 +{
 23.1183 +	struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
 23.1184 +
 23.1185 +	/*Update the hdr cksum*/
 23.1186 +	tdqcow_update_checksum(s);
 23.1187 +
 23.1188 +	free_aio_state(s);
 23.1189 +	free(s->name);
 23.1190 +	free(s->l1_table);
 23.1191 +	free(s->l2_cache);
 23.1192 +	free(s->cluster_cache);
 23.1193 +	free(s->cluster_data);
 23.1194 +	close(s->fd);	
 23.1195 +	return 0;
 23.1196 +}
 23.1197 +
 23.1198 +int qcow_create(const char *filename, uint64_t total_size,
 23.1199 +		const char *backing_file, int sparse)
 23.1200 +{
 23.1201 +	int fd, header_size, backing_filename_len, l1_size, i;
 23.1202 +	int shift, length, adjust, flags = 0, ret = 0;
 23.1203 +	QCowHeader header;
 23.1204 +	QCowHeader_ext exthdr;
 23.1205 +	char backing_filename[PATH_MAX], *ptr;
 23.1206 +	uint64_t tmp, size, total_length;
 23.1207 +	struct stat st;
 23.1208 +
 23.1209 +	DPRINTF("Qcow_create: size %"PRIu64"\n",total_size);
 23.1210 +
 23.1211 +	fd = open(filename, 
 23.1212 +		  O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
 23.1213 +		  0644);
 23.1214 +	if (fd < 0)
 23.1215 +		return -1;
 23.1216 +
 23.1217 +	memset(&header, 0, sizeof(header));
 23.1218 +	header.magic = cpu_to_be32(QCOW_MAGIC);
 23.1219 +	header.version = cpu_to_be32(QCOW_VERSION);
 23.1220 +
 23.1221 +	/*Create extended header fields*/
 23.1222 +	exthdr.xmagic = cpu_to_be32(XEN_MAGIC);
 23.1223 +
 23.1224 +	header_size = sizeof(header) + sizeof(QCowHeader_ext);
 23.1225 +	backing_filename_len = 0;
 23.1226 +	size = (total_size >> SECTOR_SHIFT);
 23.1227 +	if (backing_file) {
 23.1228 +		if (strcmp(backing_file, "fat:")) {
 23.1229 +			const char *p;
 23.1230 +			/* XXX: this is a hack: we do not attempt to 
 23.1231 +			 *check for URL like syntax */
 23.1232 +			p = strchr(backing_file, ':');
 23.1233 +			if (p && (p - backing_file) >= 2) {
 23.1234 +				/* URL like but exclude "c:" like filenames */
 23.1235 +				strncpy(backing_filename, backing_file,
 23.1236 +					sizeof(backing_filename));
 23.1237 +			} else {
 23.1238 +				if (realpath(backing_file, backing_filename) == NULL ||
 23.1239 +				    stat(backing_filename, &st) != 0) {
 23.1240 +					return -1;
 23.1241 +				}
 23.1242 +			}
 23.1243 +			header.backing_file_offset = cpu_to_be64(header_size);
 23.1244 +			backing_filename_len = strlen(backing_filename);
 23.1245 +			header.backing_file_size = cpu_to_be32(
 23.1246 +				backing_filename_len);
 23.1247 +			header_size += backing_filename_len;
 23.1248 +			
 23.1249 +			/*Set to the backing file size*/
 23.1250 +			if(get_filesize(backing_filename, &size, &st)) {
 23.1251 +				return -1;
 23.1252 +			}
 23.1253 +			DPRINTF("Backing file size detected: %"PRId64" sectors" 
 23.1254 +				"(total %"PRId64" [%"PRId64" MB])\n", 
 23.1255 +				size, 
 23.1256 +				(uint64_t)(size << SECTOR_SHIFT), 
 23.1257 +				(uint64_t)(size >> 11));
 23.1258 +		} else {
 23.1259 +			backing_file = NULL;
 23.1260 +			DPRINTF("Setting file size: %"PRId64" (total %"PRId64")\n", 
 23.1261 +				total_size, 
 23.1262 +				(uint64_t) (total_size << SECTOR_SHIFT));
 23.1263 +		}
 23.1264 +		header.mtime = cpu_to_be32(st.st_mtime);
 23.1265 +		header.cluster_bits = 9; /* 512 byte cluster to avoid copying
 23.1266 +					    unmodifyed sectors */
 23.1267 +		header.l2_bits = 12; /* 32 KB L2 tables */
 23.1268 +		exthdr.min_cluster_alloc = cpu_to_be32(1);
 23.1269 +	} else {
 23.1270 +		DPRINTF("Setting file size: %"PRId64" sectors" 
 23.1271 +			"(total %"PRId64" [%"PRId64" MB])\n", 
 23.1272 +			size, 
 23.1273 +			(uint64_t) (size << SECTOR_SHIFT), 
 23.1274 +			(uint64_t) (size >> 11));
 23.1275 +		header.cluster_bits = 12; /* 4 KB clusters */
 23.1276 +		header.l2_bits = 9; /* 4 KB L2 tables */
 23.1277 +		exthdr.min_cluster_alloc = cpu_to_be32(1 << 9);
 23.1278 +	}
 23.1279 +	/*Set the header size value*/
 23.1280 +	header.size = cpu_to_be64(size * 512);
 23.1281 +	
 23.1282 +	header_size = (header_size + 7) & ~7;
 23.1283 +	if (header_size % 4096 > 0) {
 23.1284 +		header_size = ((header_size >> 12) + 1) << 12;
 23.1285 +	}
 23.1286 +
 23.1287 +	shift = header.cluster_bits + header.l2_bits;
 23.1288 +	l1_size = ((size * 512) + (1LL << shift) - 1) >> shift;
 23.1289 +
 23.1290 +	header.l1_table_offset = cpu_to_be64(header_size);
 23.1291 +	DPRINTF("L1 Table offset: %d, size %d\n",
 23.1292 +		header_size,
 23.1293 +		(int)(l1_size * sizeof(uint64_t)));
 23.1294 +	header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
 23.1295 +
 23.1296 +	ptr = calloc(1, l1_size * sizeof(uint64_t));
 23.1297 +	exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t)));
 23.1298 +	printf("Created cksum: %d\n",exthdr.cksum);
 23.1299 +	free(ptr);
 23.1300 +
 23.1301 +	/*adjust file length to system page size boundary*/
 23.1302 +	length = ROUNDUP(header_size + (l1_size * sizeof(uint64_t)),
 23.1303 +		getpagesize());
 23.1304 +	if (qtruncate(fd, length, 0)!=0) {
 23.1305 +		DPRINTF("ERROR truncating file\n");
 23.1306 +		return -1;
 23.1307 +	}
 23.1308 +
 23.1309 +	if (sparse == 0) {
 23.1310 +		/*Filesize is length+l1_size*(1 << s->l2_bits)+(size*512)*/
 23.1311 +		total_length = length + (l1_size * (1 << 9)) + (size * 512);
 23.1312 +		if (qtruncate(fd, total_length, 0)!=0) {
 23.1313 +                        DPRINTF("ERROR truncating file\n");
 23.1314 +                        return -1;
 23.1315 +		}
 23.1316 +		printf("File truncated to length %"PRIu64"\n",total_length);
 23.1317 +	} else
 23.1318 +		flags = SPARSE_FILE;
 23.1319 +
 23.1320 +	flags |= EXTHDR_L1_BIG_ENDIAN;
 23.1321 +	exthdr.flags = cpu_to_be32(flags);
 23.1322 +	
 23.1323 +	/* write all the data */
 23.1324 +	lseek(fd, 0, SEEK_SET);
 23.1325 +	ret += write(fd, &header, sizeof(header));
 23.1326 +	ret += write(fd, &exthdr, sizeof(exthdr));
 23.1327 +	if (backing_file)
 23.1328 +		ret += write(fd, backing_filename, backing_filename_len);
 23.1329 +
 23.1330 +	lseek(fd, header_size, SEEK_SET);
 23.1331 +	tmp = 0;
 23.1332 +	for (i = 0;i < l1_size; i++) {
 23.1333 +		ret += write(fd, &tmp, sizeof(tmp));
 23.1334 +	}
 23.1335 +
 23.1336 +	close(fd);
 23.1337 +
 23.1338 +	return 0;
 23.1339 +}
 23.1340 +
 23.1341 +static int qcow_make_empty(struct tdqcow_state *s)
 23.1342 +{
 23.1343 +	uint32_t l1_length = s->l1_size * sizeof(uint64_t);
 23.1344 +
 23.1345 +	memset(s->l1_table, 0, l1_length);
 23.1346 +	lseek(s->fd, s->l1_table_offset, SEEK_SET);
 23.1347 +	if (write(s->fd, s->l1_table, l1_length) < 0)
 23.1348 +		return -1;
 23.1349 +	if (qtruncate(s->fd, s->l1_table_offset + l1_length, s->sparse)!=0) {
 23.1350 +		DPRINTF("ERROR truncating file\n");
 23.1351 +		return -1;
 23.1352 +	}
 23.1353 +
 23.1354 +	memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
 23.1355 +	memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
 23.1356 +	memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
 23.1357 +
 23.1358 +	return 0;
 23.1359 +}
 23.1360 +
 23.1361 +static int qcow_get_cluster_size(struct tdqcow_state *s)
 23.1362 +{
 23.1363 +	return s->cluster_size;
 23.1364 +}
 23.1365 +
 23.1366 +/* XXX: put compressed sectors first, then all the cluster aligned
 23.1367 +   tables to avoid losing bytes in alignment */
 23.1368 +static int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num, 
 23.1369 +                          const uint8_t *buf)
 23.1370 +{
 23.1371 +	z_stream strm;
 23.1372 +	int ret, out_len;
 23.1373 +	uint8_t *out_buf;
 23.1374 +	uint64_t cluster_offset;
 23.1375 +
 23.1376 +	out_buf = malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
 23.1377 +	if (!out_buf)
 23.1378 +		return -1;
 23.1379 +
 23.1380 +	/* best compression, small window, no zlib header */
 23.1381 +	memset(&strm, 0, sizeof(strm));
 23.1382 +	ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
 23.1383 +			   Z_DEFLATED, -12, 
 23.1384 +			   9, Z_DEFAULT_STRATEGY);
 23.1385 +	if (ret != 0) {
 23.1386 +		free(out_buf);
 23.1387 +		return -1;
 23.1388 +	}
 23.1389 +
 23.1390 +	strm.avail_in = s->cluster_size;
 23.1391 +	strm.next_in = (uint8_t *)buf;
 23.1392 +	strm.avail_out = s->cluster_size;
 23.1393 +	strm.next_out = out_buf;
 23.1394 +
 23.1395 +	ret = deflate(&strm, Z_FINISH);
 23.1396 +	if (ret != Z_STREAM_END && ret != Z_OK) {
 23.1397 +		free(out_buf);
 23.1398 +		deflateEnd(&strm);
 23.1399 +		return -1;
 23.1400 +	}
 23.1401 +	out_len = strm.next_out - out_buf;
 23.1402 +
 23.1403 +	deflateEnd(&strm);
 23.1404 +
 23.1405 +	if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
 23.1406 +		/* could not compress: write normal cluster */
 23.1407 +		//tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
 23.1408 +	} else {
 23.1409 +		cluster_offset = get_cluster_offset(s, sector_num << 9, 2, 
 23.1410 +                                            out_len, 0, 0);
 23.1411 +		cluster_offset &= s->cluster_offset_mask;
 23.1412 +		lseek(s->fd, cluster_offset, SEEK_SET);
 23.1413 +		if (write(s->fd, out_buf, out_len) != out_len) {
 23.1414 +			free(out_buf);
 23.1415 +			return -1;
 23.1416 +		}
 23.1417 +	}
 23.1418 +	
 23.1419 +	free(out_buf);
 23.1420 +	return 0;
 23.1421 +}
 23.1422 +
 23.1423 +static int
 23.1424 +tdqcow_get_image_type(const char *file, int *type)
 23.1425 +{
 23.1426 +	int fd;
 23.1427 +	size_t size;
 23.1428 +	QCowHeader header;
 23.1429 +
 23.1430 +	fd = open(file, O_RDONLY);
 23.1431 +	if (fd == -1)
 23.1432 +		return -errno;
 23.1433 +
 23.1434 +	size = read(fd, &header, sizeof(header));
 23.1435 +	close(fd);
 23.1436 +	if (size != sizeof(header))
 23.1437 +		return (errno ? -errno : -EIO);
 23.1438 +
 23.1439 +	be32_to_cpus(&header.magic);
 23.1440 +	if (header.magic == QCOW_MAGIC)
 23.1441 +		*type = DISK_TYPE_QCOW;
 23.1442 +	else
 23.1443 +		*type = DISK_TYPE_AIO;
 23.1444 +
 23.1445 +	return 0;
 23.1446 +}
 23.1447 +
 23.1448 +int tdqcow_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
 23.1449 +{
 23.1450 +	off_t off;
 23.1451 +	char *buf, *filename;
 23.1452 +	int len, secs, type, err = -EINVAL;
 23.1453 +	struct tdqcow_state *child  = (struct tdqcow_state *)driver->data;
 23.1454 +
 23.1455 +	if (!child->backing_file_offset)
 23.1456 +		return TD_NO_PARENT;
 23.1457 +
 23.1458 +	/* read the backing file name */
 23.1459 +	len  = child->backing_file_size;
 23.1460 +	off  = child->backing_file_offset - (child->backing_file_offset % 512);
 23.1461 +	secs = (len + (child->backing_file_offset - off) + 511) >> 9;
 23.1462 +
 23.1463 +	if (posix_memalign((void **)&buf, 512, secs << 9)) 
 23.1464 +		return -1;
 23.1465 +
 23.1466 +	if (lseek(child->fd, off, SEEK_SET) == (off_t)-1)
 23.1467 +		goto out;
 23.1468 +
 23.1469 +	if (read(child->fd, buf, secs << 9) != secs << 9)
 23.1470 +		goto out;
 23.1471 +	filename       = buf + (child->backing_file_offset - off);
 23.1472 +	filename[len]  = '\0';
 23.1473 +
 23.1474 +	if (tdqcow_get_image_type(filename, &type))
 23.1475 +		goto out;
 23.1476 +
 23.1477 +	id->name       = strdup(filename);
 23.1478 +	id->drivertype = type;
 23.1479 +	err            = 0;
 23.1480 + out:
 23.1481 +	free(buf);
 23.1482 +	return err;
 23.1483 +}
 23.1484 +
 23.1485 +int tdqcow_validate_parent(td_driver_t *driver,
 23.1486 +			  td_driver_t *pdriver, td_flag_t flags)
 23.1487 +{
 23.1488 +	struct stat stats;
 23.1489 +	uint64_t psize, csize;
 23.1490 +	struct tdqcow_state *c = (struct tdqcow_state *)driver->data;
 23.1491 +	struct tdqcow_state *p = (struct tdqcow_state *)pdriver->data;
 23.1492 +	
 23.1493 +	if (stat(p->name, &stats))
 23.1494 +		return -EINVAL;
 23.1495 +	if (get_filesize(p->name, &psize, &stats))
 23.1496 +		return -EINVAL;
 23.1497 +
 23.1498 +	if (stat(c->name, &stats))
 23.1499 +		return -EINVAL;
 23.1500 +	if (get_filesize(c->name, &csize, &stats))
 23.1501 +		return -EINVAL;
 23.1502 +
 23.1503 +	if (csize != psize)
 23.1504 +		return -EINVAL;
 23.1505 +
 23.1506 +	return 0;
 23.1507 +}
 23.1508 +
 23.1509 +struct tap_disk tapdisk_qcow = {
 23.1510 +	.disk_type           = "tapdisk_qcow",
 23.1511 +	.flags              = 0,
 23.1512 +	.private_data_size   = sizeof(struct tdqcow_state),
 23.1513 +	.td_open             = tdqcow_open,
 23.1514 +	.td_close            = tdqcow_close,
 23.1515 +	.td_queue_read       = tdqcow_queue_read,
 23.1516 +	.td_queue_write      = tdqcow_queue_write,
 23.1517 +	.td_get_parent_id    = tdqcow_get_parent_id,
 23.1518 +	.td_validate_parent  = tdqcow_validate_parent,
 23.1519 +	.td_debug           = NULL,
 23.1520 +};
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/tools/blktap2/drivers/block-ram.c	Tue May 26 11:52:31 2009 +0100
    24.3 @@ -0,0 +1,269 @@
    24.4 +/* 
    24.5 + * Copyright (c) 2007, XenSource Inc.
    24.6 + * All rights reserved.
    24.7 + *
    24.8 + * Redistribution and use in source and binary forms, with or without
    24.9 + * modification, are permitted provided that the following conditions are met:
   24.10 + *     * Redistributions of source code must retain the above copyright
   24.11 + *       notice, this list of conditions and the following disclaimer.
   24.12 + *     * Redistributions in binary form must reproduce the above copyright
   24.13 + *       notice, this list of conditions and the following disclaimer in the
   24.14 + *       documentation and/or other materials provided with the distribution.
   24.15 + *     * Neither the name of XenSource Inc. nor the names of its contributors
   24.16 + *       may be used to endorse or promote products derived from this software
   24.17 + *       without specific prior written permission.
   24.18 + *
   24.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24.20 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24.21 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   24.22 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   24.23 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   24.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   24.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   24.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   24.27 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   24.28 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   24.29 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   24.30 + */
   24.31 +
   24.32 +#include <errno.h>
   24.33 +#include <fcntl.h>
   24.34 +#include <stdio.h>
   24.35 +#include <stdlib.h>
   24.36 +#include <unistd.h>
   24.37 +#include <sys/statvfs.h>
   24.38 +#include <sys/stat.h>
   24.39 +#include <sys/ioctl.h>
   24.40 +#include <linux/fs.h>
   24.41 +#include <string.h>
   24.42 +
   24.43 +#include "tapdisk.h"
   24.44 +#include "tapdisk-driver.h"
   24.45 +#include "tapdisk-interface.h"
   24.46 +
   24.47 +char *img;
   24.48 +long int   disksector_size;
   24.49 +long int   disksize;
   24.50 +long int   diskinfo;
   24.51 +static int connections = 0;
   24.52 +
   24.53 +struct tdram_state {
   24.54 +        int fd;
   24.55 +};
   24.56 +
   24.57 +/*Get Image size, secsize*/
   24.58 +static int get_image_info(int fd, td_disk_info_t *info)
   24.59 +{
   24.60 +	int ret;
   24.61 +	long size;
   24.62 +	unsigned long total_size;
   24.63 +	struct statvfs statBuf;
   24.64 +	struct stat stat;
   24.65 +
   24.66 +	ret = fstat(fd, &stat);
   24.67 +	if (ret != 0) {
   24.68 +		DPRINTF("ERROR: fstat failed, Couldn't stat image");
   24.69 +		return -EINVAL;
   24.70 +	}
   24.71 +
   24.72 +	if (S_ISBLK(stat.st_mode)) {
   24.73 +		/*Accessing block device directly*/
   24.74 +		info->size = 0;
   24.75 +		if (ioctl(fd,BLKGETSIZE,&info->size)!=0) {
   24.76 +			DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
   24.77 +			return -EINVAL;
   24.78 +		}
   24.79 +
   24.80 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
   24.81 +			"sector_shift [%llu]\n",
   24.82 +			(long long unsigned)(info->size << SECTOR_SHIFT),
   24.83 +			(long long unsigned)info->size);
   24.84 +
   24.85 +		/*Get the sector size*/
   24.86 +#if defined(BLKSSZGET)
   24.87 +		{
   24.88 +			int arg;
   24.89 +			info->sector_size = DEFAULT_SECTOR_SIZE;
   24.90 +			ioctl(fd, BLKSSZGET, &info->sector_size);
   24.91 +			
   24.92 +			if (info->sector_size != DEFAULT_SECTOR_SIZE)
   24.93 +				DPRINTF("Note: sector size is %ld (not %d)\n",
   24.94 +					info->sector_size, DEFAULT_SECTOR_SIZE);
   24.95 +		}
   24.96 +#else
   24.97 +		info->sector_size = DEFAULT_SECTOR_SIZE;
   24.98 +#endif
   24.99 +
  24.100 +	} else {
  24.101 +		/*Local file? try fstat instead*/
  24.102 +		info->size = (stat.st_size >> SECTOR_SHIFT);
  24.103 +		info->sector_size = DEFAULT_SECTOR_SIZE;
  24.104 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
  24.105 +			"sector_shift [%llu]\n",
  24.106 +			(long long unsigned)(info->size << SECTOR_SHIFT),
  24.107 +			(long long unsigned)info->size);
  24.108 +	}
  24.109 +
  24.110 +	if (info->size == 0) {		
  24.111 +		info->size =((uint64_t) MAX_RAMDISK_SIZE);
  24.112 +		info->sector_size = DEFAULT_SECTOR_SIZE;
  24.113 +	}
  24.114 +	info->info = 0;
  24.115 +
  24.116 +        /*Store variables locally*/
  24.117 +	disksector_size = info->sector_size;
  24.118 +	disksize        = info->size;
  24.119 +	diskinfo        = info->info;
  24.120 +	DPRINTF("Image sector_size: \n\t[%lu]\n",
  24.121 +		info->sector_size);
  24.122 +
  24.123 +	return 0;
  24.124 +}
  24.125 +
  24.126 +/* Open the disk file and initialize ram state. */
  24.127 +int tdram_open (td_driver_t *driver, const char *name, td_flag_t flags)
  24.128 +{
  24.129 +	char *p;
  24.130 +	uint64_t size;
  24.131 +	int i, fd, ret = 0, count = 0, o_flags;
  24.132 +	struct tdram_state *prv = (struct tdram_state *)driver->data;
  24.133 +
  24.134 +	connections++;
  24.135 +
  24.136 +	if (connections > 1) {
  24.137 +		driver->info.sector_size = disksector_size;
  24.138 +		driver->info.size        = disksize;
  24.139 +		driver->info.info        = diskinfo; 
  24.140 +		DPRINTF("Image already open, returning parameters:\n");
  24.141 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
  24.142 +			"sector_shift [%llu]\n",
  24.143 +			(long long unsigned)(driver->info.size << SECTOR_SHIFT),
  24.144 +			(long long unsigned)driver->info.size);
  24.145 +		DPRINTF("Image sector_size: \n\t[%lu]\n",
  24.146 +			driver->info.sector_size);
  24.147 +
  24.148 +		prv->fd = -1;
  24.149 +		goto done;
  24.150 +	}
  24.151 +
  24.152 +	/* Open the file */
  24.153 +	o_flags = O_DIRECT | O_LARGEFILE | 
  24.154 +		((flags == TD_OPEN_RDONLY) ? O_RDONLY : O_RDWR);
  24.155 +        fd = open(name, o_flags);
  24.156 +
  24.157 +        if ((fd == -1) && (errno == EINVAL)) {
  24.158 +
  24.159 +                /* Maybe O_DIRECT isn't supported. */
  24.160 +		o_flags &= ~O_DIRECT;
  24.161 +                fd = open(name, o_flags);
  24.162 +                if (fd != -1) DPRINTF("WARNING: Accessing image without"
  24.163 +                                     "O_DIRECT! (%s)\n", name);
  24.164 +
  24.165 +        } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
  24.166 +	
  24.167 +        if (fd == -1) {
  24.168 +		DPRINTF("Unable to open [%s]!\n",name);
  24.169 +        	ret = 0 - errno;
  24.170 +        	goto done;
  24.171 +        }
  24.172 +
  24.173 +        prv->fd = fd;
  24.174 +
  24.175 +	ret = get_image_info(fd, &driver->info);
  24.176 +	size = MAX_RAMDISK_SIZE;
  24.177 +
  24.178 +	if (driver->info.size > size) {
  24.179 +		DPRINTF("Disk exceeds limit, must be less than [%d]MB",
  24.180 +			(MAX_RAMDISK_SIZE<<SECTOR_SHIFT)>>20);
  24.181 +		return -ENOMEM;
  24.182 +	}
  24.183 +
  24.184 +	/*Read the image into memory*/
  24.185 +	if (posix_memalign((void **)&img, 
  24.186 +			   DEFAULT_SECTOR_SIZE,
  24.187 +			   driver->info.size << SECTOR_SHIFT)) {
  24.188 +		DPRINTF("Mem malloc failed\n");
  24.189 +		return -errno;
  24.190 +	}
  24.191 +	p = img;
  24.192 +	DPRINTF("Reading %llu bytes.......",
  24.193 +		(long long unsigned)driver->info.size << SECTOR_SHIFT);
  24.194 +
  24.195 +	for (i = 0; i < driver->info.size; i++) {
  24.196 +		ret = read(prv->fd, p, driver->info.sector_size);
  24.197 +		if (ret != driver->info.sector_size) {
  24.198 +			DPRINTF("ret = %d, errno = %d\n", ret, errno);
  24.199 +			ret = 0 - errno;
  24.200 +			break;
  24.201 +		} else {
  24.202 +			count += ret;
  24.203 +			p = img + count;
  24.204 +		}
  24.205 +	}
  24.206 +	DPRINTF("[%d]\n",count);
  24.207 +	if (count != driver->info.size << SECTOR_SHIFT) {
  24.208 +		ret = -1;
  24.209 +	} else {
  24.210 +		ret = 0;
  24.211 +	}
  24.212 +
  24.213 +done:
  24.214 +	return ret;
  24.215 +}
  24.216 +
  24.217 +void tdram_queue_read(td_driver_t *driver, td_request_t treq)
  24.218 +{
  24.219 +	struct tdram_state *prv = (struct tdram_state *)driver->data;
  24.220 +	int      size    = treq.secs * driver->info.sector_size;
  24.221 +	uint64_t offset  = treq.sec * (uint64_t)driver->info.sector_size;
  24.222 +
  24.223 +	memcpy(treq.buf, img + offset, size);
  24.224 +
  24.225 +	td_complete_request(treq, 0);
  24.226 +}
  24.227 +
  24.228 +void tdram_queue_write(td_driver_t *driver, td_request_t treq)
  24.229 +{
  24.230 +	struct tdram_state *prv = (struct tdram_state *)driver->data;
  24.231 +	int      size    = treq.secs * driver->info.sector_size;
  24.232 +	uint64_t offset  = treq.sec * (uint64_t)driver->info.sector_size;
  24.233 +	
  24.234 +	/* We assume that write access is controlled
  24.235 +	 * at a higher level for multiple disks */
  24.236 +	memcpy(img + offset, treq.buf, size);
  24.237 +
  24.238 +	td_complete_request(treq, 0);
  24.239 +}
  24.240 +
  24.241 +int tdram_close(td_driver_t *driver)
  24.242 +{
  24.243 +	struct tdram_state *prv = (struct tdram_state *)driver->data;
  24.244 +	
  24.245 +	connections--;
  24.246 +	
  24.247 +	return 0;
  24.248 +}
  24.249 +
  24.250 +int tdram_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
  24.251 +{
  24.252 +	return TD_NO_PARENT;
  24.253 +}
  24.254 +
  24.255 +int tdram_validate_parent(td_driver_t *driver,
  24.256 +			  td_driver_t *pdriver, td_flag_t flags)
  24.257 +{
  24.258 +	return -EINVAL;
  24.259 +}
  24.260 +
  24.261 +struct tap_disk tapdisk_ram = {
  24.262 +	.disk_type          = "tapdisk_ram",
  24.263 +	.flags              = 0,
  24.264 +	.private_data_size  = sizeof(struct tdram_state),
  24.265 +	.td_open            = tdram_open,
  24.266 +	.td_close           = tdram_close,
  24.267 +	.td_queue_read      = tdram_queue_read,
  24.268 +	.td_queue_write     = tdram_queue_write,
  24.269 +	.td_get_parent_id   = tdram_get_parent_id,
  24.270 +	.td_validate_parent = tdram_validate_parent,
  24.271 +	.td_debug           = NULL,
  24.272 +};
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/tools/blktap2/drivers/block-vhd.c	Tue May 26 11:52:31 2009 +0100
    25.3 @@ -0,0 +1,2321 @@
    25.4 +/* 
    25.5 + * Copyright (c) 2008, XenSource Inc.
    25.6 + * All rights reserved.
    25.7 + *
    25.8 + * Redistribution and use in source and binary forms, with or without
    25.9 + * modification, are permitted provided that the following conditions are met:
   25.10 + *     * Redistributions of source code must retain the above copyright
   25.11 + *       notice, this list of conditions and the following disclaimer.
   25.12 + *     * Redistributions in binary form must reproduce the above copyright
   25.13 + *       notice, this list of conditions and the following disclaimer in the
   25.14 + *       documentation and/or other materials provided with the distribution.
   25.15 + *     * Neither the name of XenSource Inc. nor the names of its contributors
   25.16 + *       may be used to endorse or promote products derived from this software
   25.17 + *       without specific prior written permission.
   25.18 + *
   25.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25.20 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25.21 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   25.22 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   25.23 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   25.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   25.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   25.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   25.27 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   25.28 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   25.29 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25.30 + *
   25.31 + * A note on write transactions:
   25.32 + * Writes that require updating the BAT or bitmaps cannot be signaled
   25.33 + * as complete until all updates have reached disk.  Transactions are
   25.34 + * used to ensure proper ordering in these cases.  The two types of
   25.35 + * transactions are as follows:
   25.36 + *   - Bitmap updates only: data writes that require updates to the same
   25.37 + *     bitmap are grouped in a transaction.  Only after all data writes
   25.38 + *     in a transaction complete does the bitmap write commence.  Only
   25.39 + *     after the bitmap write finishes are the data writes signalled as
   25.40 + *     complete.
   25.41 + *   - BAT and bitmap updates: data writes are grouped in transactions
   25.42 + *     as above, but a special extra write is included in the transaction,
   25.43 + *     which zeros out the newly allocated bitmap on disk.  When the data
   25.44 + *     writes and the zero-bitmap write complete, the BAT and bitmap writes
   25.45 + *     are started in parallel.  The transaction is completed only after both
   25.46 + *     the BAT and bitmap writes successfully return.
   25.47 + */
   25.48 +
   25.49 +#include <errno.h>
   25.50 +#include <fcntl.h>
   25.51 +#include <stdio.h>
   25.52 +#include <stdlib.h>
   25.53 +#include <unistd.h>
   25.54 +#include <sys/stat.h>
   25.55 +#include <sys/ioctl.h>
   25.56 +#include <uuid/uuid.h> /* For whatever reason, Linux packages this in */
   25.57 +                       /* e2fsprogs-devel.                            */
   25.58 +#include <string.h>    /* for memset.                                 */
   25.59 +#include <libaio.h>
   25.60 +#include <sys/mman.h>
   25.61 +
   25.62 +#include "libvhd.h"
   25.63 +#include "tapdisk.h"
   25.64 +#include "tapdisk-driver.h"
   25.65 +#include "tapdisk-interface.h"
   25.66 +
   25.67 +unsigned int SPB;
   25.68 +
   25.69 +#define DEBUGGING   2
   25.70 +#define ASSERTING   1
   25.71 +#define MICROSOFT_COMPAT
   25.72 +
   25.73 +#define VHD_BATMAP_MAX_RETRIES 10
   25.74 +
   25.75 +#define __TRACE(s)							\
   25.76 +	do {								\
   25.77 +		DBG(TLOG_DBG, "%s: QUEUED: %" PRIu64 ", COMPLETED: %"	\
   25.78 +		    PRIu64", RETURNED: %" PRIu64 ", DATA_ALLOCATED: "	\
   25.79 +		    "%lu, BBLK: 0x%04x\n",				\
   25.80 +		    s->vhd.file, s->queued, s->completed, s->returned,	\
   25.81 +		    VHD_REQS_DATA - s->vreq_free_count,			\
   25.82 +		    s->bat.pbw_blk);					\
   25.83 +	} while(0)
   25.84 +
   25.85 +#define __ASSERT(_p)							\
   25.86 +	if (!(_p)) {							\
   25.87 +		DPRINTF("%s:%d: FAILED ASSERTION: '%s'\n",		\
   25.88 +			__FILE__, __LINE__, #_p);			\
   25.89 +		DBG(TLOG_WARN, "%s:%d: FAILED ASSERTION: '%s'\n",	\
   25.90 +		    __FILE__, __LINE__, #_p);				\
   25.91 +		tlog_flush();						\
   25.92 +		*(int*)0 = 0;						\
   25.93 +	}
   25.94 +
   25.95 +#if (DEBUGGING == 1)
   25.96 +  #define DBG(level, _f, _a...)      DPRINTF(_f, ##_a)
   25.97 +  #define ERR(err, _f, _a...)        DPRINTF("ERROR: %d: " _f, err, ##_a)
   25.98 +  #define TRACE(s)                   ((void)0)
   25.99 +#elif (DEBUGGING == 2)
  25.100 +  #define DBG(level, _f, _a...)      tlog_write(level, _f, ##_a)
  25.101 +  #define ERR(err, _f, _a...)	     tlog_error(err, _f, ##_a)
  25.102 +  #define TRACE(s)                   __TRACE(s)
  25.103 +#else
  25.104 +  #define DBG(level, _f, _a...)      ((void)0)
  25.105 +  #define ERR(err, _f, _a...)        ((void)0)
  25.106 +  #define TRACE(s)                   ((void)0)
  25.107 +#endif
  25.108 +
  25.109 +#if (ASSERTING == 1)
  25.110 +  #define ASSERT(_p)                 __ASSERT(_p)
  25.111 +#else
  25.112 +  #define ASSERT(_p)                 ((void)0)
  25.113 +#endif
  25.114 +
  25.115 +/******VHD DEFINES******/
  25.116 +#define VHD_CACHE_SIZE               32
  25.117 +
  25.118 +#define VHD_REQS_DATA                TAPDISK_DATA_REQUESTS
  25.119 +#define VHD_REQS_META                (VHD_CACHE_SIZE + 2)
  25.120 +#define VHD_REQS_TOTAL               (VHD_REQS_DATA + VHD_REQS_META)
  25.121 +
  25.122 +#define VHD_OP_BAT_WRITE             0
  25.123 +#define VHD_OP_DATA_READ             1
  25.124 +#define VHD_OP_DATA_WRITE            2
  25.125 +#define VHD_OP_BITMAP_READ           3
  25.126 +#define VHD_OP_BITMAP_WRITE          4
  25.127 +#define VHD_OP_ZERO_BM_WRITE         5
  25.128 +
  25.129 +#define VHD_BM_BAT_LOCKED            0
  25.130 +#define VHD_BM_BAT_CLEAR             1
  25.131 +#define VHD_BM_BIT_CLEAR             2
  25.132 +#define VHD_BM_BIT_SET               3
  25.133 +#define VHD_BM_NOT_CACHED            4
  25.134 +#define VHD_BM_READ_PENDING          5
  25.135 +
  25.136 +#define VHD_FLAG_OPEN_RDONLY         1
  25.137 +#define VHD_FLAG_OPEN_NO_CACHE       2
  25.138 +#define VHD_FLAG_OPEN_QUIET          4
  25.139 +#define VHD_FLAG_OPEN_STRICT         8
  25.140 +#define VHD_FLAG_OPEN_QUERY          16
  25.141 +#define VHD_FLAG_OPEN_PREALLOCATE    32
  25.142 +
  25.143 +#define VHD_FLAG_BAT_LOCKED          1
  25.144 +#define VHD_FLAG_BAT_WRITE_STARTED   2
  25.145 +
  25.146 +#define VHD_FLAG_BM_UPDATE_BAT       1
  25.147 +#define VHD_FLAG_BM_WRITE_PENDING    2
  25.148 +#define VHD_FLAG_BM_READ_PENDING     4
  25.149 +#define VHD_FLAG_BM_LOCKED           8
  25.150 +
  25.151 +#define VHD_FLAG_REQ_UPDATE_BAT      1
  25.152 +#define VHD_FLAG_REQ_UPDATE_BITMAP   2
  25.153 +#define VHD_FLAG_REQ_QUEUED          4
  25.154 +#define VHD_FLAG_REQ_FINISHED        8
  25.155 +
  25.156 +#define VHD_FLAG_TX_LIVE             1
  25.157 +#define VHD_FLAG_TX_UPDATE_BAT       2
  25.158 +
  25.159 +typedef uint8_t vhd_flag_t;
  25.160 +
  25.161 +struct vhd_state;
  25.162 +struct vhd_request;
  25.163 +
  25.164 +struct vhd_req_list {
  25.165 +	struct vhd_request       *head;
  25.166 +	struct vhd_request       *tail;
  25.167 +};
  25.168 +
  25.169 +struct vhd_transaction {
  25.170 +	int                       error;
  25.171 +	int                       closed;
  25.172 +	int                       started;
  25.173 +	int                       finished;
  25.174 +	vhd_flag_t                status;
  25.175 +	struct vhd_req_list       requests;
  25.176 +};
  25.177 +
  25.178 +struct vhd_request {
  25.179 +	int                       error;
  25.180 +	uint8_t                   op;
  25.181 +	vhd_flag_t                flags;
  25.182 +	td_request_t              treq;
  25.183 +	struct tiocb              tiocb;
  25.184 +	struct vhd_state         *state;
  25.185 +	struct vhd_request       *next;
  25.186 +	struct vhd_transaction   *tx;
  25.187 +};
  25.188 +
  25.189 +struct vhd_bat_state {
  25.190 +	vhd_bat_t                 bat;
  25.191 +	vhd_batmap_t              batmap;
  25.192 +	vhd_flag_t                status;
  25.193 +	uint32_t                  pbw_blk;     /* blk num of pending write */
  25.194 +	uint64_t                  pbw_offset;  /* file offset of same */
  25.195 +	struct vhd_request        req;         /* for writing bat table */
  25.196 +	struct vhd_request        zero_req;    /* for initializing bitmaps */
  25.197 +	char                     *bat_buf;
  25.198 +};
  25.199 +
  25.200 +struct vhd_bitmap {
  25.201 +	u32                       blk;
  25.202 +	u64                       seqno;       /* lru sequence number */
  25.203 +	vhd_flag_t                status;
  25.204 +
  25.205 +	char                     *map;         /* map should only be modified
  25.206 +					        * in finish_bitmap_write */
  25.207 +	char                     *shadow;      /* in-memory bitmap changes are 
  25.208 +					        * made to shadow and copied to
  25.209 +					        * map only after having been
  25.210 +					        * flushed to disk */
  25.211 +	struct vhd_transaction    tx;          /* transaction data structure
  25.212 +						* encapsulating data, bitmap, 
  25.213 +						* and bat writes */
  25.214 +	struct vhd_req_list       queue;       /* data writes waiting for next
  25.215 +						* transaction */
  25.216 +	struct vhd_req_list       waiting;     /* pending requests that cannot
  25.217 +					        * be serviced until this bitmap
  25.218 +					        * is read from disk */
  25.219 +	struct vhd_request        req;
  25.220 +};
  25.221 +
  25.222 +struct vhd_state {
  25.223 +	vhd_flag_t                flags;
  25.224 +
  25.225 +        /* VHD stuff */
  25.226 +	vhd_context_t             vhd;
  25.227 +	u32                       spp;         /* sectors per page */
  25.228 +        u32                       spb;         /* sectors per block */
  25.229 +        u64                       next_db;     /* pointer to the next 
  25.230 +						* (unallocated) datablock */
  25.231 +
  25.232 +	struct vhd_bat_state      bat;
  25.233 +
  25.234 +	u64                       bm_lru;      /* lru sequence number */
  25.235 +	u32                       bm_secs;     /* size of bitmap, in sectors */
  25.236 +	struct vhd_bitmap        *bitmap[VHD_CACHE_SIZE];
  25.237 +
  25.238 +	int                       bm_free_count;
  25.239 +	struct vhd_bitmap        *bitmap_free[VHD_CACHE_SIZE];
  25.240 +	struct vhd_bitmap         bitmap_list[VHD_CACHE_SIZE];
  25.241 +
  25.242 +	int                       vreq_free_count;
  25.243 +	struct vhd_request       *vreq_free[VHD_REQS_DATA];
  25.244 +	struct vhd_request        vreq_list[VHD_REQS_DATA];
  25.245 +
  25.246 +	td_driver_t              *driver;
  25.247 +
  25.248 +	uint64_t                  queued;
  25.249 +	uint64_t                  completed;
  25.250 +	uint64_t                  returned;
  25.251 +	uint64_t                  reads;
  25.252 +	uint64_t                  read_size;
  25.253 +	uint64_t                  writes;
  25.254 +	uint64_t                  write_size;
  25.255 +};
  25.256 +
  25.257 +#define test_vhd_flag(word, flag)  ((word) & (flag))
  25.258 +#define set_vhd_flag(word, flag)   ((word) |= (flag))
  25.259 +#define clear_vhd_flag(word, flag) ((word) &= ~(flag))
  25.260 +
  25.261 +#define bat_entry(s, blk)          ((s)->bat.bat.bat[(blk)])
  25.262 +
  25.263 +static void vhd_complete(void *, struct tiocb *, int);
  25.264 +static void finish_data_transaction(struct vhd_state *, struct vhd_bitmap *);
  25.265 +
  25.266 +static struct vhd_state  *_vhd_master;
  25.267 +static unsigned long      _vhd_zsize;
  25.268 +static char              *_vhd_zeros;
  25.269 +
  25.270 +static int
  25.271 +vhd_initialize(struct vhd_state *s)
  25.272 +{
  25.273 +	if (_vhd_zeros)
  25.274 +		return 0;
  25.275 +
  25.276 +	_vhd_zsize = 2 * getpagesize();
  25.277 +	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE))
  25.278 +		_vhd_zsize += VHD_BLOCK_SIZE;
  25.279 +
  25.280 +	_vhd_zeros = mmap(0, _vhd_zsize, PROT_READ,
  25.281 +			  MAP_SHARED | MAP_ANONYMOUS, -1, 0);
  25.282 +	if (_vhd_zeros == MAP_FAILED) {
  25.283 +		EPRINTF("vhd_initialize failed: %d\n", -errno);
  25.284 +		_vhd_zeros = NULL;
  25.285 +		_vhd_zsize = 0;
  25.286 +		return -errno;
  25.287 +	}
  25.288 +
  25.289 +	_vhd_master = s;
  25.290 +	return 0;
  25.291 +}
  25.292 +
  25.293 +static void
  25.294 +vhd_free(struct vhd_state *s)
  25.295 +{
  25.296 +	if (_vhd_master != s || !_vhd_zeros)
  25.297 +		return;
  25.298 +
  25.299 +	munmap(_vhd_zeros, _vhd_zsize);
  25.300 +	_vhd_zsize  = 0;
  25.301 +	_vhd_zeros  = NULL;
  25.302 +	_vhd_master = NULL;
  25.303 +}
  25.304 +
  25.305 +static char *
  25.306 +_get_vhd_zeros(const char *func, unsigned long size)
  25.307 +{
  25.308 +	if (!_vhd_zeros || _vhd_zsize < size) {
  25.309 +		EPRINTF("invalid zero request from %s: %lu, %lu, %p\n",
  25.310 +			func, size, _vhd_zsize, _vhd_zeros);
  25.311 +		ASSERT(0);
  25.312 +	}
  25.313 +
  25.314 +	return _vhd_zeros;
  25.315 +}
  25.316 +
  25.317 +#define vhd_zeros(size)	_get_vhd_zeros(__func__, size)
  25.318 +
  25.319 +static inline void
  25.320 +set_batmap(struct vhd_state *s, uint32_t blk)
  25.321 +{
  25.322 +	if (s->bat.batmap.map) {
  25.323 +		vhd_batmap_set(&s->vhd, &s->bat.batmap, blk);
  25.324 +		DBG(TLOG_DBG, "block 0x%x completely full\n", blk);
  25.325 +	}
  25.326 +}
  25.327 +
  25.328 +static inline int
  25.329 +test_batmap(struct vhd_state *s, uint32_t blk)
  25.330 +{
  25.331 +	if (!s->bat.batmap.map)
  25.332 +		return 0;
  25.333 +	return vhd_batmap_test(&s->vhd, &s->bat.batmap, blk);
  25.334 +}
  25.335 +
  25.336 +static int
  25.337 +vhd_kill_footer(struct vhd_state *s)
  25.338 +{
  25.339 +	int err;
  25.340 +	off64_t end;
  25.341 +	char *zeros;
  25.342 +
  25.343 +	if (s->vhd.footer.type == HD_TYPE_FIXED)
  25.344 +		return 0;
  25.345 +
  25.346 +	err = posix_memalign((void **)&zeros, 512, 512);
  25.347 +	if (err)
  25.348 +		return -err;
  25.349 +
  25.350 +	err = 1;
  25.351 +	memset(zeros, 0xc7c7c7c7, 512);
  25.352 +
  25.353 +	if ((end = lseek64(s->vhd.fd, 0, SEEK_END)) == -1)
  25.354 +		goto fail;
  25.355 +
  25.356 +	if (lseek64(s->vhd.fd, (end - 512), SEEK_SET) == -1)
  25.357 +		goto fail;
  25.358 +
  25.359 +	if (write(s->vhd.fd, zeros, 512) != 512)
  25.360 +		goto fail;
  25.361 +
  25.362 +	err = 0;
  25.363 +
  25.364 + fail:
  25.365 +	free(zeros);
  25.366 +	if (err)
  25.367 +		return (errno ? -errno : -EIO);
  25.368 +	return 0;
  25.369 +}
  25.370 +
  25.371 +static inline int
  25.372 +find_next_free_block(struct vhd_state *s)
  25.373 +{
  25.374 +	int err;
  25.375 +	off64_t eom;
  25.376 +	uint32_t i, entry;
  25.377 +
  25.378 +	err = vhd_end_of_headers(&s->vhd, &eom);
  25.379 +	if (err)
  25.380 +		return err;
  25.381 +
  25.382 +	s->next_db = secs_round_up(eom);
  25.383 +
  25.384 +	for (i = 0; i < s->bat.bat.entries; i++) {
  25.385 +		entry = bat_entry(s, i);
  25.386 +		if (entry != DD_BLK_UNUSED && entry >= s->next_db)
  25.387 +			s->next_db = entry + s->spb + s->bm_secs;
  25.388 +	}
  25.389 +
  25.390 +	return 0;
  25.391 +}
  25.392 +
  25.393 +static void
  25.394 +vhd_free_bat(struct vhd_state *s)
  25.395 +{
  25.396 +	free(s->bat.bat.bat);
  25.397 +	free(s->bat.batmap.map);
  25.398 +	free(s->bat.bat_buf);
  25.399 +	memset(&s->bat, 0, sizeof(struct vhd_bat));
  25.400 +}
  25.401 +
  25.402 +static int
  25.403 +vhd_initialize_bat(struct vhd_state *s)
  25.404 +{
  25.405 +	int err, psize, batmap_required, i;
  25.406 +
  25.407 +	memset(&s->bat, 0, sizeof(struct vhd_bat));
  25.408 +
  25.409 +	psize = getpagesize();
  25.410 +
  25.411 +	err = vhd_read_bat(&s->vhd, &s->bat.bat);
  25.412 +	if (err) {
  25.413 +		EPRINTF("%s: reading bat: %d\n", s->vhd.file, err);
  25.414 +		return err;
  25.415 +	}
  25.416 +
  25.417 +	batmap_required = 1;
  25.418 +	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY)) {
  25.419 +		batmap_required = 0;
  25.420 +	} else {
  25.421 +		err = find_next_free_block(s);
  25.422 +		if (err)
  25.423 +			goto fail;
  25.424 +	}
  25.425 +
  25.426 +	if (vhd_has_batmap(&s->vhd)) {
  25.427 +		for (i = 0; i < VHD_BATMAP_MAX_RETRIES; i++) {
  25.428 +			err = vhd_read_batmap(&s->vhd, &s->bat.batmap);
  25.429 +			if (err) {
  25.430 +				EPRINTF("%s: reading batmap: %d\n",
  25.431 +						s->vhd.file, err);
  25.432 +				if (batmap_required)
  25.433 +					goto fail;
  25.434 +			} else {
  25.435 +				break;
  25.436 +			}
  25.437 +		}
  25.438 +		if (err)
  25.439 +			EPRINTF("%s: ignoring non-critical batmap error\n",
  25.440 +					s->vhd.file);
  25.441 +	}
  25.442 +
  25.443 +	err = posix_memalign((void **)&s->bat.bat_buf,
  25.444 +			     VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
  25.445 +	if (err) {
  25.446 +		s->bat.bat_buf = NULL;
  25.447 +		goto fail;
  25.448 +	}
  25.449 +
  25.450 +	return 0;
  25.451 +
  25.452 +fail:
  25.453 +	vhd_free_bat(s);
  25.454 +	return err;
  25.455 +}
  25.456 +
  25.457 +static void
  25.458 +vhd_free_bitmap_cache(struct vhd_state *s)
  25.459 +{
  25.460 +	int i;
  25.461 +	struct vhd_bitmap *bm;
  25.462 +
  25.463 +	for (i = 0; i < VHD_CACHE_SIZE; i++) {
  25.464 +		bm = s->bitmap_list + i;
  25.465 +		free(bm->map);
  25.466 +		free(bm->shadow);
  25.467 +		s->bitmap_free[i] = NULL;
  25.468 +	}
  25.469 +
  25.470 +	memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
  25.471 +}
  25.472 +
  25.473 +static int
  25.474 +vhd_initialize_bitmap_cache(struct vhd_state *s)
  25.475 +{
  25.476 +	int i, err, map_size;
  25.477 +	struct vhd_bitmap *bm;
  25.478 +
  25.479 +	memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
  25.480 +
  25.481 +	s->bm_lru        = 0;
  25.482 +	map_size         = vhd_sectors_to_bytes(s->bm_secs);
  25.483 +	s->bm_free_count = VHD_CACHE_SIZE;
  25.484 +
  25.485 +	for (i = 0; i < VHD_CACHE_SIZE; i++) {
  25.486 +		bm = s->bitmap_list + i;
  25.487 +
  25.488 +		err = posix_memalign((void **)&bm->map, 512, map_size);
  25.489 +		if (err) {
  25.490 +			bm->map = NULL;
  25.491 +			goto fail;
  25.492 +		}
  25.493 +
  25.494 +		err = posix_memalign((void **)&bm->shadow, 512, map_size);
  25.495 +		if (err) {
  25.496 +			bm->shadow = NULL;
  25.497 +			goto fail;
  25.498 +		}
  25.499 +
  25.500 +		memset(bm->map, 0, map_size);
  25.501 +		memset(bm->shadow, 0, map_size);
  25.502 +		s->bitmap_free[i] = bm;
  25.503 +	}
  25.504 +
  25.505 +	return 0;
  25.506 +
  25.507 +fail:
  25.508 +	vhd_free_bitmap_cache(s);
  25.509 +	return err;
  25.510 +}
  25.511 +
  25.512 +static int
  25.513 +vhd_initialize_dynamic_disk(struct vhd_state *s)
  25.514 +{
  25.515 +	int err;
  25.516 +
  25.517 +	err = vhd_get_header(&s->vhd);
  25.518 +	if (err) {
  25.519 +		if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
  25.520 +			EPRINTF("Error reading VHD DD header.\n");
  25.521 +		return err;
  25.522 +	}
  25.523 +
  25.524 +	if (s->vhd.header.hdr_ver != 0x00010000) {
  25.525 +		EPRINTF("unsupported header version! (0x%x)\n",
  25.526 +			s->vhd.header.hdr_ver);
  25.527 +		return -EINVAL;
  25.528 +	}
  25.529 +
  25.530 +	s->spp     = getpagesize() >> VHD_SECTOR_SHIFT;
  25.531 +	s->spb     = s->vhd.header.block_size >> VHD_SECTOR_SHIFT;
  25.532 +	s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
  25.533 +
  25.534 +	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
  25.535 +		return 0;
  25.536 +
  25.537 +	err = vhd_initialize_bat(s);
  25.538 +	if (err)
  25.539 +		return err;
  25.540 +
  25.541 +	err = vhd_initialize_bitmap_cache(s);
  25.542 +	if (err) {
  25.543 +		vhd_free_bat(s);
  25.544 +		return err;
  25.545 +	}
  25.546 +
  25.547 +	return 0;
  25.548 +}
  25.549 +
  25.550 +static int
  25.551 +vhd_check_version(struct vhd_state *s)
  25.552 +{
  25.553 +	if (strncmp(s->vhd.footer.crtr_app, "tap", 3))
  25.554 +		return 0;
  25.555 +
  25.556 +	if (s->vhd.footer.crtr_ver > VHD_CURRENT_VERSION) {
  25.557 +		if (!test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
  25.558 +			EPRINTF("WARNING: %s vhd creator version 0x%08x, "
  25.559 +				"but only versions up to 0x%08x are "
  25.560 +				"supported for IO\n", s->vhd.file,
  25.561 +				s->vhd.footer.crtr_ver, VHD_CURRENT_VERSION);
  25.562 +
  25.563 +		return -EINVAL;
  25.564 +	}
  25.565 +
  25.566 +	return 0;
  25.567 +}
  25.568 +
  25.569 +static void
  25.570 +vhd_log_open(struct vhd_state *s)
  25.571 +{
  25.572 +	char buf[5];
  25.573 +	uint32_t i, allocated, full;
  25.574 +
  25.575 +	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
  25.576 +		return;
  25.577 +
  25.578 +	snprintf(buf, sizeof(buf), "%s", s->vhd.footer.crtr_app);
  25.579 +	if (!vhd_type_dynamic(&s->vhd)) {
  25.580 +		DPRINTF("%s version: %s 0x%08x\n",
  25.581 +			s->vhd.file, buf, s->vhd.footer.crtr_ver);
  25.582 +		return;
  25.583 +	}
  25.584 +
  25.585 +	allocated = 0;
  25.586 +	full      = 0;
  25.587 +
  25.588 +	for (i = 0; i < s->bat.bat.entries; i++) {
  25.589 +		if (bat_entry(s, i) != DD_BLK_UNUSED)
  25.590 +			allocated++;
  25.591 +		if (test_batmap(s, i))
  25.592 +			full++;
  25.593 +	}
  25.594 +
  25.595 +	DPRINTF("%s version: %s 0x%08x, b: %u, a: %u, f: %u, n: %"PRIu64"\n",
  25.596 +		s->vhd.file, buf, s->vhd.footer.crtr_ver, s->bat.bat.entries,
  25.597 +		allocated, full, s->next_db);
  25.598 +}
  25.599 +
  25.600 +static int
  25.601 +__vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags)
  25.602 +{
  25.603 +        int i, o_flags, err;
  25.604 +	struct vhd_state *s;
  25.605 +
  25.606 +        DBG(TLOG_INFO, "vhd_open: %s\n", name);
  25.607 +	if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT))
  25.608 +		libvhd_set_log_level(1);
  25.609 +
  25.610 +	s = (struct vhd_state *)driver->data;
  25.611 +	memset(s, 0, sizeof(struct vhd_state));
  25.612 +
  25.613 +	s->flags  = flags;
  25.614 +	s->driver = driver;
  25.615 +
  25.616 +	err = vhd_initialize(s);
  25.617 +	if (err)
  25.618 +		return err;
  25.619 +
  25.620 +	o_flags = ((test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) ? 
  25.621 +		   VHD_OPEN_RDONLY : VHD_OPEN_RDWR);
  25.622 +
  25.623 +	err = vhd_open(&s->vhd, name, o_flags);
  25.624 +	if (err) {
  25.625 +		libvhd_set_log_level(1);
  25.626 +		err = vhd_open(&s->vhd, name, o_flags);
  25.627 +		if (err) {
  25.628 +			EPRINTF("Unable to open [%s] (%d)!\n", name, err);
  25.629 +			return err;
  25.630 +		}
  25.631 +	}
  25.632 +
  25.633 +	err = vhd_check_version(s);
  25.634 +	if (err)
  25.635 +		goto fail;
  25.636 +
  25.637 +	s->spb = s->spp = 1;
  25.638 +
  25.639 +	if (vhd_type_dynamic(&s->vhd)) {
  25.640 +		err = vhd_initialize_dynamic_disk(s);
  25.641 +		if (err)
  25.642 +			goto fail;
  25.643 +	}
  25.644 +
  25.645 +	vhd_log_open(s);
  25.646 +
  25.647 +	SPB = s->spb;
  25.648 +
  25.649 +	s->vreq_free_count = VHD_REQS_DATA;
  25.650 +	for (i = 0; i < VHD_REQS_DATA; i++)
  25.651 +		s->vreq_free[i] = s->vreq_list + i;
  25.652 +
  25.653 +	driver->info.size        = s->vhd.footer.curr_size >> VHD_SECTOR_SHIFT;
  25.654 +	driver->info.sector_size = VHD_SECTOR_SIZE;
  25.655 +	driver->info.info        = 0;
  25.656 +
  25.657 +        DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%lu, inf:%u)\n",
  25.658 +	    driver->info.size, driver->info.sector_size, driver->info.info);
  25.659 +
  25.660 +	if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT) && 
  25.661 +	    !test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) {
  25.662 +		err = vhd_kill_footer(s);
  25.663 +		if (err) {
  25.664 +			DPRINTF("ERROR killing footer: %d\n", err);
  25.665 +			goto fail;
  25.666 +		}
  25.667 +		s->writes++;
  25.668 +	}
  25.669 +
  25.670 +        return 0;
  25.671 +
  25.672 + fail:
  25.673 +	vhd_free_bat(s);
  25.674 +	vhd_free_bitmap_cache(s);
  25.675 +	vhd_close(&s->vhd);
  25.676 +	vhd_free(s);
  25.677 +	return err;
  25.678 +}
  25.679 +
  25.680 +static int
  25.681 +_vhd_open(td_driver_t *driver, const char *name, td_flag_t flags)
  25.682 +{
  25.683 +	vhd_flag_t vhd_flags = 0;
  25.684 +
  25.685 +	if (flags & TD_OPEN_RDONLY)
  25.686 +		vhd_flags |= VHD_FLAG_OPEN_RDONLY;
  25.687 +	if (flags & TD_OPEN_QUIET)
  25.688 +		vhd_flags |= VHD_FLAG_OPEN_QUIET;
  25.689 +	if (flags & TD_OPEN_STRICT)
  25.690 +		vhd_flags |= VHD_FLAG_OPEN_STRICT;
  25.691 +	if (flags & TD_OPEN_QUERY)
  25.692 +		vhd_flags |= (VHD_FLAG_OPEN_QUERY  |
  25.693 +			      VHD_FLAG_OPEN_QUIET  |
  25.694 +			      VHD_FLAG_OPEN_RDONLY |
  25.695 +			      VHD_FLAG_OPEN_NO_CACHE);
  25.696 +
  25.697 +	/* pre-allocate for all but NFS and LVM storage */
  25.698 +	if (driver->storage != TAPDISK_STORAGE_TYPE_NFS &&
  25.699 +	    driver->storage != TAPDISK_STORAGE_TYPE_LVM)
  25.700 +		vhd_flags |= VHD_FLAG_OPEN_PREALLOCATE;
  25.701 +
  25.702 +	return __vhd_open(driver, name, vhd_flags);
  25.703 +}
  25.704 +
  25.705 +static void
  25.706 +vhd_log_close(struct vhd_state *s)
  25.707 +{
  25.708 +	uint32_t i, allocated, full;
  25.709 +
  25.710 +	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_QUIET))
  25.711 +		return;
  25.712 +
  25.713 +	allocated = 0;
  25.714 +	full      = 0;
  25.715 +
  25.716 +	for (i = 0; i < s->bat.bat.entries; i++) {
  25.717 +		if (bat_entry(s, i) != DD_BLK_UNUSED)
  25.718 +			allocated++;
  25.719 +		if (test_batmap(s, i))
  25.720 +			full++;
  25.721 +	}
  25.722 +
  25.723 +	DPRINTF("%s: b: %u, a: %u, f: %u, n: %"PRIu64"\n",
  25.724 +		s->vhd.file, s->bat.bat.entries, allocated, full, s->next_db);
  25.725 +}
  25.726 +
  25.727 +static int
  25.728 +_vhd_close(td_driver_t *driver)
  25.729 +{
  25.730 +	int err;
  25.731 +	struct vhd_state *s;
  25.732 +	struct vhd_bitmap *bm;
  25.733 +	
  25.734 +	DBG(TLOG_WARN, "vhd_close\n");
  25.735 +	s = (struct vhd_state *)driver->data;
  25.736 +
  25.737 +	/* don't write footer if tapdisk is read-only */
  25.738 +	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
  25.739 +		goto free;
  25.740 +	
  25.741 +	/* 
  25.742 +	 * write footer if:
  25.743 +	 *   - we killed it on open (opened with strict) 
  25.744 +	 *   - we've written data since opening
  25.745 +	 */
  25.746 +	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_STRICT) || s->writes) {
  25.747 +		memcpy(&s->vhd.bat, &s->bat.bat, sizeof(vhd_bat_t));
  25.748 +		err = vhd_write_footer(&s->vhd, &s->vhd.footer);
  25.749 +		memset(&s->vhd.bat, 0, sizeof(vhd_bat_t));
  25.750 +
  25.751 +		if (err)
  25.752 +			EPRINTF("writing %s footer: %d\n", s->vhd.file, err);
  25.753 +
  25.754 +		if (!vhd_has_batmap(&s->vhd))
  25.755 +			goto free;
  25.756 +
  25.757 +		err = vhd_write_batmap(&s->vhd, &s->bat.batmap);
  25.758 +		if (err)
  25.759 +			EPRINTF("writing %s batmap: %d\n", s->vhd.file, err);
  25.760 +	}
  25.761 +
  25.762 + free:
  25.763 +	vhd_log_close(s);
  25.764 +	vhd_free_bat(s);
  25.765 +	vhd_free_bitmap_cache(s);
  25.766 +	vhd_close(&s->vhd);
  25.767 +	vhd_free(s);
  25.768 +
  25.769 +	memset(s, 0, sizeof(struct vhd_state));
  25.770 +
  25.771 +	return 0;
  25.772 +}
  25.773 +
  25.774 +int
  25.775 +vhd_validate_parent(td_driver_t *child_driver,
  25.776 +		    td_driver_t *parent_driver, td_flag_t flags)
  25.777 +{
  25.778 +	struct stat stats;
  25.779 +	struct vhd_state *child  = (struct vhd_state *)child_driver->data;
  25.780 +	struct vhd_state *parent;
  25.781 +
  25.782 +	if (parent_driver->type != DISK_TYPE_VHD) {
  25.783 +		if (child_driver->type != DISK_TYPE_VHD)
  25.784 +			return -EINVAL;
  25.785 +		if (child->vhd.footer.type != HD_TYPE_DIFF)
  25.786 +			return -EINVAL;
  25.787 +		if (!vhd_parent_raw(&child->vhd))
  25.788 +			return -EINVAL;
  25.789 +		return 0;
  25.790 +	}
  25.791 +
  25.792 +	parent = (struct vhd_state *)parent_driver->data;
  25.793 +
  25.794 +	/* 
  25.795 +	 * This check removed because of cases like:
  25.796 +	 *   - parent VHD marked as 'hidden'
  25.797 +	 *   - parent VHD modified during coalesce
  25.798 +	 */
  25.799 +	/*
  25.800 +	if (stat(parent->vhd.file, &stats)) {
  25.801 +		DPRINTF("ERROR stating parent file %s\n", parent->vhd.file);
  25.802 +		return -errno;
  25.803 +	}
  25.804 +
  25.805 +	if (child->hdr.prt_ts != vhd_time(stats.st_mtime)) {
  25.806 +		DPRINTF("ERROR: parent file has been modified since "
  25.807 +			"snapshot.  Child image no longer valid.\n");
  25.808 +		return -EINVAL;
  25.809 +	}
  25.810 +	*/
  25.811 +
  25.812 +	if (uuid_compare(child->vhd.header.prt_uuid, parent->vhd.footer.uuid)) {
  25.813 +		DPRINTF("ERROR: %s: %s, %s: parent uuid has changed since "
  25.814 +			"snapshot.  Child image no longer valid.\n",
  25.815 +			__func__, child->vhd.file, parent->vhd.file);
  25.816 +		return -EINVAL;
  25.817 +	}
  25.818 +
  25.819 +	/* TODO: compare sizes */
  25.820 +	
  25.821 +	return 0;
  25.822 +}
  25.823 +
  25.824 +int
  25.825 +vhd_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
  25.826 +{
  25.827 +	int err;
  25.828 +	char *parent;
  25.829 +	struct vhd_state *s;
  25.830 +
  25.831 +	DBG(TLOG_DBG, "\n");
  25.832 +	memset(id, 0, sizeof(td_disk_id_t));
  25.833 +
  25.834 +	s = (struct vhd_state *)driver->data;
  25.835 +
  25.836 +	if (s->vhd.footer.type != HD_TYPE_DIFF)
  25.837 +		return TD_NO_PARENT;
  25.838 +
  25.839 +	err = vhd_parent_locator_get(&s->vhd, &parent);
  25.840 +	if (err)
  25.841 +		return err;
  25.842 +
  25.843 +	id->name       = parent;
  25.844 +	id->drivertype = DISK_TYPE_VHD;
  25.845 +	if (vhd_parent_raw(&s->vhd)) {
  25.846 +		DPRINTF("VHD: parent is raw\n");
  25.847 +		id->drivertype = DISK_TYPE_AIO;
  25.848 +	}
  25.849 +	return 0;
  25.850 +}
  25.851 +
  25.852 +static inline void
  25.853 +clear_req_list(struct vhd_req_list *list)
  25.854 +{
  25.855 +	list->head = list->tail = NULL;
  25.856 +}
  25.857 +
  25.858 +static inline void
  25.859 +add_to_tail(struct vhd_req_list *list, struct vhd_request *e)
  25.860 +{
  25.861 +	if (!list->head) 
  25.862 +		list->head = list->tail = e;
  25.863 +	else 
  25.864 +		list->tail = list->tail->next = e;
  25.865 +}
  25.866 +
  25.867 +static inline int
  25.868 +remove_from_req_list(struct vhd_req_list *list, struct vhd_request *e)
  25.869 +{
  25.870 +	struct vhd_request *i = list->head;
  25.871 +
  25.872 +	if (list->head == e) {
  25.873 +		if (list->tail == e)
  25.874 +			clear_req_list(list);
  25.875 +		else
  25.876 +			list->head = list->head->next;
  25.877 +		return 0;
  25.878 +	}
  25.879 +
  25.880 +	while (i->next) {
  25.881 +		if (i->next == e) {
  25.882 +			if (list->tail == e) {
  25.883 +				i->next = NULL;
  25.884 +				list->tail = i;
  25.885 +			} else
  25.886 +				i->next = i->next->next;
  25.887 +			return 0;
  25.888 +		}
  25.889 +		i = i->next;
  25.890 +	}
  25.891 +
  25.892 +	return -EINVAL;
  25.893 +}
  25.894 +
  25.895 +static inline void
  25.896 +init_vhd_request(struct vhd_state *s, struct vhd_request *req)
  25.897 +{
  25.898 +	memset(req, 0, sizeof(struct vhd_request));
  25.899 +	req->state = s;
  25.900 +}
  25.901 +
  25.902 +static inline void
  25.903 +init_tx(struct vhd_transaction *tx)
  25.904 +{
  25.905 +	memset(tx, 0, sizeof(struct vhd_transaction));
  25.906 +}
  25.907 +
  25.908 +static inline void
  25.909 +add_to_transaction(struct vhd_transaction *tx, struct vhd_request *r)
  25.910 +{
  25.911 +	ASSERT(!tx->closed);
  25.912 +
  25.913 +	r->tx = tx;
  25.914 +	tx->started++;
  25.915 +	add_to_tail(&tx->requests, r);
  25.916 +	set_vhd_flag(tx->status, VHD_FLAG_TX_LIVE);
  25.917 +
  25.918 +	DBG(TLOG_DBG, "blk: 0x%04"PRIx64", lsec: 0x%08"PRIx64", tx: %p, "
  25.919 +	    "started: %d, finished: %d, status: %u\n",
  25.920 +	    r->treq.sec / SPB, r->treq.sec, tx,
  25.921 +	    tx->started, tx->finished, tx->status);
  25.922 +}
  25.923 +
  25.924 +static inline int
  25.925 +transaction_completed(struct vhd_transaction *tx)
  25.926 +{
  25.927 +	return (tx->started == tx->finished);
  25.928 +}
  25.929 +
  25.930 +static inline void
  25.931 +init_bat(struct vhd_state *s)
  25.932 +{
  25.933 +	s->bat.req.tx     = NULL;
  25.934 +	s->bat.req.next   = NULL;
  25.935 +	s->bat.req.error  = 0;
  25.936 +	s->bat.pbw_blk    = 0;
  25.937 +	s->bat.pbw_offset = 0;
  25.938 +	s->bat.status     = 0;
  25.939 +}
  25.940 +
  25.941 +static inline void
  25.942 +lock_bat(struct vhd_state *s)
  25.943 +{
  25.944 +	set_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
  25.945 +}
  25.946 +
  25.947 +static inline void
  25.948 +unlock_bat(struct vhd_state *s)
  25.949 +{
  25.950 +	clear_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
  25.951 +}
  25.952 +
  25.953 +static inline int
  25.954 +bat_locked(struct vhd_state *s)
  25.955 +{
  25.956 +	return test_vhd_flag(s->bat.status, VHD_FLAG_BAT_LOCKED);
  25.957 +}
  25.958 +
  25.959 +static inline void
  25.960 +init_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
  25.961 +{
  25.962 +	bm->blk    = 0;
  25.963 +	bm->seqno  = 0;
  25.964 +	bm->status = 0;
  25.965 +	init_tx(&bm->tx);
  25.966 +	clear_req_list(&bm->queue);
  25.967 +	clear_req_list(&bm->waiting);
  25.968 +	memset(bm->map, 0, vhd_sectors_to_bytes(s->bm_secs));
  25.969 +	memset(bm->shadow, 0, vhd_sectors_to_bytes(s->bm_secs));
  25.970 +	init_vhd_request(s, &bm->req);
  25.971 +}
  25.972 +
  25.973 +static inline struct vhd_bitmap *
  25.974 +get_bitmap(struct vhd_state *s, uint32_t block)
  25.975 +{
  25.976 +	int i;
  25.977 +	struct vhd_bitmap *bm;
  25.978 +
  25.979 +	for (i = 0; i < VHD_CACHE_SIZE; i++) {
  25.980 +		bm = s->bitmap[i];
  25.981 +		if (bm && bm->blk == block)
  25.982 +			return bm;
  25.983 +	}
  25.984 +
  25.985 +	return NULL;
  25.986 +}
  25.987 +
  25.988 +static inline void
  25.989 +lock_bitmap(struct vhd_bitmap *bm)
  25.990 +{
  25.991 +	set_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
  25.992 +}
  25.993 +
  25.994 +static inline void
  25.995 +unlock_bitmap(struct vhd_bitmap *bm)
  25.996 +{
  25.997 +	clear_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
  25.998 +}
  25.999 +
 25.1000 +static inline int
 25.1001 +bitmap_locked(struct vhd_bitmap *bm)
 25.1002 +{
 25.1003 +	return test_vhd_flag(bm->status, VHD_FLAG_BM_LOCKED);
 25.1004 +}
 25.1005 +
 25.1006 +static inline int
 25.1007 +bitmap_valid(struct vhd_bitmap *bm)
 25.1008 +{
 25.1009 +	return !test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING);
 25.1010 +}
 25.1011 +
 25.1012 +static inline int
 25.1013 +bitmap_in_use(struct vhd_bitmap *bm)
 25.1014 +{
 25.1015 +	return (test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING)  ||
 25.1016 +		test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING) ||
 25.1017 +		test_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT) ||
 25.1018 +		bm->waiting.head || bm->tx.requests.head || bm->queue.head);
 25.1019 +}
 25.1020 +
 25.1021 +static inline int
 25.1022 +bitmap_full(struct vhd_state *s, struct vhd_bitmap *bm)
 25.1023 +{
 25.1024 +	int i, n;
 25.1025 +
 25.1026 +	n = s->spb >> 3;
 25.1027 +	for (i = 0; i < n; i++)
 25.1028 +		if (bm->map[i] != (char)0xFF)
 25.1029 +			return 0;
 25.1030 +
 25.1031 +	DBG(TLOG_DBG, "bitmap 0x%04x full\n", bm->blk);
 25.1032 +	return 1;
 25.1033 +}
 25.1034 +
 25.1035 +static struct vhd_bitmap *
 25.1036 +remove_lru_bitmap(struct vhd_state *s)
 25.1037 +{
 25.1038 +	int i, idx = 0;
 25.1039 +	u64 seq = s->bm_lru;
 25.1040 +	struct vhd_bitmap *bm, *lru = NULL;
 25.1041 +
 25.1042 +	for (i = 0; i < VHD_CACHE_SIZE; i++) {
 25.1043 +		bm = s->bitmap[i];
 25.1044 +		if (bm && bm->seqno < seq && !bitmap_locked(bm)) {
 25.1045 +			idx = i;
 25.1046 +			lru = bm;
 25.1047 +			seq = lru->seqno;
 25.1048 +		}
 25.1049 +	}
 25.1050 +
 25.1051 +	if (lru) {
 25.1052 +		s->bitmap[idx] = NULL;
 25.1053 +		ASSERT(!bitmap_in_use(lru));
 25.1054 +	}
 25.1055 +
 25.1056 +	return  lru;
 25.1057 +}
 25.1058 +
 25.1059 +static int
 25.1060 +alloc_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap **bitmap, uint32_t blk)
 25.1061 +{
 25.1062 +	struct vhd_bitmap *bm;
 25.1063 +	
 25.1064 +	*bitmap = NULL;
 25.1065 +
 25.1066 +	if (s->bm_free_count > 0) {
 25.1067 +		bm = s->bitmap_free[--s->bm_free_count];
 25.1068 +	} else {
 25.1069 +		bm = remove_lru_bitmap(s);
 25.1070 +		if (!bm)
 25.1071 +			return -EBUSY;
 25.1072 +	}
 25.1073 +
 25.1074 +	init_vhd_bitmap(s, bm);
 25.1075 +	bm->blk = blk;
 25.1076 +	*bitmap = bm;
 25.1077 +
 25.1078 +	return 0;
 25.1079 +}
 25.1080 +
 25.1081 +static inline uint64_t
 25.1082 +__bitmap_lru_seqno(struct vhd_state *s)
 25.1083 +{
 25.1084 +	int i;
 25.1085 +	struct vhd_bitmap *bm;
 25.1086 +
 25.1087 +	if (s->bm_lru == 0xffffffff) {
 25.1088 +		s->bm_lru = 0;
 25.1089 +		for (i = 0; i < VHD_CACHE_SIZE; i++) {
 25.1090 +			bm = s->bitmap[i];
 25.1091 +			if (bm) {
 25.1092 +				bm->seqno >>= 1;
 25.1093 +				if (bm->seqno > s->bm_lru)
 25.1094 +					s->bm_lru = bm->seqno;
 25.1095 +			}
 25.1096 +		}
 25.1097 +	}
 25.1098 +
 25.1099 +	return ++s->bm_lru;
 25.1100 +}
 25.1101 +
 25.1102 +static inline void
 25.1103 +touch_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
 25.1104 +{
 25.1105 +	bm->seqno = __bitmap_lru_seqno(s);
 25.1106 +}
 25.1107 +
 25.1108 +static inline void
 25.1109 +install_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
 25.1110 +{
 25.1111 +	int i;
 25.1112 +	for (i = 0; i < VHD_CACHE_SIZE; i++) {
 25.1113 +		if (!s->bitmap[i]) {
 25.1114 +			touch_bitmap(s, bm);
 25.1115 +			s->bitmap[i] = bm;
 25.1116 +			return;
 25.1117 +		}
 25.1118 +	}
 25.1119 +
 25.1120 +	ASSERT(0);
 25.1121 +}
 25.1122 +
 25.1123 +static inline void
 25.1124 +free_vhd_bitmap(struct vhd_state *s, struct vhd_bitmap *bm)
 25.1125 +{
 25.1126 +	int i;
 25.1127 +
 25.1128 +	for (i = 0; i < VHD_CACHE_SIZE; i++)
 25.1129 +		if (s->bitmap[i] == bm)
 25.1130 +			break;
 25.1131 +
 25.1132 +	ASSERT(!bitmap_locked(bm));
 25.1133 +	ASSERT(!bitmap_in_use(bm));
 25.1134 +	ASSERT(i < VHD_CACHE_SIZE);
 25.1135 +
 25.1136 +	s->bitmap[i] = NULL;
 25.1137 +	s->bitmap_free[s->bm_free_count++] = bm;
 25.1138 +}
 25.1139 +
 25.1140 +static int
 25.1141 +read_bitmap_cache(struct vhd_state *s, uint64_t sector, uint8_t op)
 25.1142 +{
 25.1143 +	u32 blk, sec;
 25.1144 +	struct vhd_bitmap *bm;
 25.1145 +
 25.1146 +	/* in fixed disks, every block is present */
 25.1147 +	if (s->vhd.footer.type == HD_TYPE_FIXED) 
 25.1148 +		return VHD_BM_BIT_SET;
 25.1149 +
 25.1150 +	blk = sector / s->spb;
 25.1151 +	sec = sector % s->spb;
 25.1152 +
 25.1153 +	if (blk > s->vhd.header.max_bat_size) {
 25.1154 +		DPRINTF("ERROR: sec %"PRIu64" out of range, op = %d\n",
 25.1155 +			sector, op);
 25.1156 +		return -EINVAL;
 25.1157 +	}
 25.1158 +
 25.1159 +	if (bat_entry(s, blk) == DD_BLK_UNUSED) {
 25.1160 +		if (op == VHD_OP_DATA_WRITE &&
 25.1161 +		    s->bat.pbw_blk != blk && bat_locked(s))
 25.1162 +			return VHD_BM_BAT_LOCKED;
 25.1163 +
 25.1164 +		return VHD_BM_BAT_CLEAR;
 25.1165 +	}
 25.1166 +
 25.1167 +	if (test_batmap(s, blk)) {
 25.1168 +		DBG(TLOG_DBG, "batmap set for 0x%04x\n", blk);
 25.1169 +		return VHD_BM_BIT_SET;
 25.1170 +	}
 25.1171 +
 25.1172 +	bm = get_bitmap(s, blk);
 25.1173 +	if (!bm)
 25.1174 +		return VHD_BM_NOT_CACHED;
 25.1175 +
 25.1176 +	/* bump lru count */
 25.1177 +	touch_bitmap(s, bm);
 25.1178 +
 25.1179 +	if (test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING))
 25.1180 +		return VHD_BM_READ_PENDING;
 25.1181 +
 25.1182 +	return ((vhd_bitmap_test(&s->vhd, bm->map, sec)) ? 
 25.1183 +		VHD_BM_BIT_SET : VHD_BM_BIT_CLEAR);
 25.1184 +}
 25.1185 +
 25.1186 +static int
 25.1187 +read_bitmap_cache_span(struct vhd_state *s, 
 25.1188 +		       uint64_t sector, int nr_secs, int value)
 25.1189 +{
 25.1190 +	int ret;
 25.1191 +	u32 blk, sec;
 25.1192 +	struct vhd_bitmap *bm;
 25.1193 +
 25.1194 +	/* in fixed disks, every block is present */
 25.1195 +	if (s->vhd.footer.type == HD_TYPE_FIXED) 
 25.1196 +		return nr_secs;
 25.1197 +
 25.1198 +	sec = sector % s->spb;
 25.1199 +	blk = sector / s->spb;
 25.1200 +
 25.1201 +	if (test_batmap(s, blk))
 25.1202 +		return MIN(nr_secs, s->spb - sec);
 25.1203 +
 25.1204 +	bm  = get_bitmap(s, blk);
 25.1205 +	
 25.1206 +	ASSERT(bm && bitmap_valid(bm));
 25.1207 +
 25.1208 +	for (ret = 0; sec < s->spb && ret < nr_secs; sec++, ret++)
 25.1209 +		if (vhd_bitmap_test(&s->vhd, bm->map, sec) != value)
 25.1210 +			break;
 25.1211 +
 25.1212 +	return ret;
 25.1213 +}
 25.1214 +
 25.1215 +static inline struct vhd_request *
 25.1216 +alloc_vhd_request(struct vhd_state *s)
 25.1217 +{
 25.1218 +	struct vhd_request *req = NULL;
 25.1219 +	
 25.1220 +	if (s->vreq_free_count > 0) {
 25.1221 +		req = s->vreq_free[--s->vreq_free_count];
 25.1222 +		ASSERT(req->treq.secs == 0);
 25.1223 +		init_vhd_request(s, req);
 25.1224 +		return req;
 25.1225 +	}
 25.1226 +
 25.1227 +	return NULL;
 25.1228 +}
 25.1229 +
 25.1230 +static inline void
 25.1231 +free_vhd_request(struct vhd_state *s, struct vhd_request *req)
 25.1232 +{
 25.1233 +	memset(req, 0, sizeof(struct vhd_request));
 25.1234 +	s->vreq_free[s->vreq_free_count++] = req;
 25.1235 +}
 25.1236 +
 25.1237 +static inline void
 25.1238 +aio_read(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
 25.1239 +{
 25.1240 +	struct tiocb *tiocb = &req->tiocb;
 25.1241 +
 25.1242 +	td_prep_read(tiocb, s->vhd.fd, req->treq.buf,
 25.1243 +		     vhd_sectors_to_bytes(req->treq.secs),
 25.1244 +		     offset, vhd_complete, req);
 25.1245 +	td_queue_tiocb(s->driver, tiocb);
 25.1246 +
 25.1247 +	s->queued++;
 25.1248 +	s->reads++;
 25.1249 +	s->read_size += req->treq.secs;
 25.1250 +	TRACE(s);
 25.1251 +}
 25.1252 +
 25.1253 +static inline void
 25.1254 +aio_write(struct vhd_state *s, struct vhd_request *req, uint64_t offset)
 25.1255 +{
 25.1256 +	struct tiocb *tiocb = &req->tiocb;
 25.1257 +
 25.1258 +	td_prep_write(tiocb, s->vhd.fd, req->treq.buf,
 25.1259 +		      vhd_sectors_to_bytes(req->treq.secs),
 25.1260 +		      offset, vhd_complete, req);
 25.1261 +	td_queue_tiocb(s->driver, tiocb);
 25.1262 +
 25.1263 +	s->queued++;
 25.1264 +	s->writes++;
 25.1265 +	s->write_size += req->treq.secs;
 25.1266 +	TRACE(s);
 25.1267 +}
 25.1268 +
 25.1269 +static inline uint64_t
 25.1270 +reserve_new_block(struct vhd_state *s, uint32_t blk)
 25.1271 +{
 25.1272 +	int gap = 0;
 25.1273 +
 25.1274 +	ASSERT(!test_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED));
 25.1275 +
 25.1276 +	/* data region of segment should begin on page boundary */
 25.1277 +	if ((s->next_db + s->bm_secs) % s->spp)
 25.1278 +		gap = (s->spp - ((s->next_db + s->bm_secs) % s->spp));
 25.1279 +
 25.1280 +	s->bat.pbw_blk    = blk;
 25.1281 +	s->bat.pbw_offset = s->next_db + gap;
 25.1282 +
 25.1283 +	return s->next_db;
 25.1284 +}
 25.1285 +
 25.1286 +static int
 25.1287 +schedule_bat_write(struct vhd_state *s)
 25.1288 +{
 25.1289 +	int i;
 25.1290 +	u32 blk;
 25.1291 +	char *buf;
 25.1292 +	u64 offset;
 25.1293 +	struct vhd_request *req;
 25.1294 +
 25.1295 +	ASSERT(bat_locked(s));
 25.1296 +
 25.1297 +	req = &s->bat.req;
 25.1298 +	buf = s->bat.bat_buf;
 25.1299 +	blk = s->bat.pbw_blk;
 25.1300 +
 25.1301 +	init_vhd_request(s, req);
 25.1302 +	memcpy(buf, &bat_entry(s, blk - (blk % 128)), 512);
 25.1303 +
 25.1304 +	((u32 *)buf)[blk % 128] = s->bat.pbw_offset;
 25.1305 +
 25.1306 +	for (i = 0; i < 128; i++)
 25.1307 +		BE32_OUT(&((u32 *)buf)[i]);
 25.1308 +
 25.1309 +	offset         = s->vhd.header.table_offset + (blk - (blk % 128)) * 4;
 25.1310 +	req->treq.secs = 1;
 25.1311 +	req->treq.buf  = buf;
 25.1312 +	req->op        = VHD_OP_BAT_WRITE;
 25.1313 +	req->next      = NULL;
 25.1314 +
 25.1315 +	aio_write(s, req, offset);
 25.1316 +	set_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED);
 25.1317 +
 25.1318 +	DBG(TLOG_DBG, "blk: 0x%04x, pbwo: 0x%08"PRIx64", "
 25.1319 +	    "table_offset: 0x%08"PRIx64"\n", blk, s->bat.pbw_offset, offset);
 25.1320 +
 25.1321 +	return 0;
 25.1322 +}
 25.1323 +
 25.1324 +static void
 25.1325 +schedule_zero_bm_write(struct vhd_state *s,
 25.1326 +		       struct vhd_bitmap *bm, uint64_t lb_end)
 25.1327 +{
 25.1328 +	uint64_t offset;
 25.1329 +	struct vhd_request *req = &s->bat.zero_req;
 25.1330 +
 25.1331 +	init_vhd_request(s, req);
 25.1332 +
 25.1333 +	offset         = vhd_sectors_to_bytes(lb_end);
 25.1334 +	req->op        = VHD_OP_ZERO_BM_WRITE;
 25.1335 +	req->treq.sec  = s->bat.pbw_blk * s->spb;
 25.1336 +	req->treq.secs = (s->bat.pbw_offset - lb_end) + s->bm_secs;
 25.1337 +	req->treq.buf  = vhd_zeros(vhd_sectors_to_bytes(req->treq.secs));
 25.1338 +	req->next      = NULL;
 25.1339 +
 25.1340 +	DBG(TLOG_DBG, "blk: 0x%04x, writing zero bitmap at 0x%08"PRIx64"\n",
 25.1341 +	    s->bat.pbw_blk, offset);
 25.1342 +
 25.1343 +	lock_bitmap(bm);
 25.1344 +	add_to_transaction(&bm->tx, req);
 25.1345 +	aio_write(s, req, offset);
 25.1346 +}
 25.1347 +
 25.1348 +static int
 25.1349 +update_bat(struct vhd_state *s, uint32_t blk)
 25.1350 +{
 25.1351 +	int err;
 25.1352 +	uint64_t lb_end;
 25.1353 +	struct vhd_bitmap *bm;
 25.1354 +
 25.1355 +	ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
 25.1356 +	
 25.1357 +	if (bat_locked(s)) {
 25.1358 +		ASSERT(s->bat.pbw_blk == blk);
 25.1359 +		return 0;
 25.1360 +	}
 25.1361 +
 25.1362 +	/* empty bitmap could already be in
 25.1363 +	 * cache if earlier bat update failed */
 25.1364 +	bm = get_bitmap(s, blk);
 25.1365 +	if (!bm) {
 25.1366 +		/* install empty bitmap in cache */
 25.1367 +		err = alloc_vhd_bitmap(s, &bm, blk);
 25.1368 +		if (err) 
 25.1369 +			return err;
 25.1370 +
 25.1371 +		install_bitmap(s, bm);
 25.1372 +	}
 25.1373 +
 25.1374 +	lock_bat(s);
 25.1375 +	lb_end = reserve_new_block(s, blk);
 25.1376 +	schedule_zero_bm_write(s, bm, lb_end);
 25.1377 +	set_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT);
 25.1378 +
 25.1379 +	return 0;
 25.1380 +}
 25.1381 +
 25.1382 +static int
 25.1383 +allocate_block(struct vhd_state *s, uint32_t blk)
 25.1384 +{
 25.1385 +	char *zeros;
 25.1386 +	int err, gap;
 25.1387 +	uint64_t offset, size;
 25.1388 +	struct vhd_bitmap *bm;
 25.1389 +
 25.1390 +	ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
 25.1391 +
 25.1392 +	if (bat_locked(s)) {
 25.1393 +		ASSERT(s->bat.pbw_blk == blk);
 25.1394 +		if (s->bat.req.error)
 25.1395 +			return -EBUSY;
 25.1396 +		return 0;
 25.1397 +	}
 25.1398 +
 25.1399 +	gap            = 0;
 25.1400 +	s->bat.pbw_blk = blk;
 25.1401 +	offset         = vhd_sectors_to_bytes(s->next_db);
 25.1402 +
 25.1403 +	/* data region of segment should begin on page boundary */
 25.1404 +	if ((s->next_db + s->bm_secs) % s->spp) {
 25.1405 +		gap = (s->spp - ((s->next_db + s->bm_secs) % s->spp));
 25.1406 +		s->next_db += gap;
 25.1407 +	}
 25.1408 +
 25.1409 +	s->bat.pbw_offset = s->next_db;
 25.1410 +
 25.1411 +	DBG(TLOG_DBG, "blk: 0x%04x, pbwo: 0x%08"PRIx64"\n",
 25.1412 +	    blk, s->bat.pbw_offset);
 25.1413 +
 25.1414 +	if (lseek(s->vhd.fd, offset, SEEK_SET) == (off_t)-1) {
 25.1415 +		ERR(errno, "lseek failed\n");
 25.1416 +		return -errno;
 25.1417 +	}
 25.1418 +
 25.1419 +	size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
 25.1420 +	err  = write(s->vhd.fd, vhd_zeros(size), size);
 25.1421 +	if (err != size) {
 25.1422 +		err = (err == -1 ? -errno : -EIO);
 25.1423 +		ERR(err, "write failed");
 25.1424 +		return err;
 25.1425 +	}
 25.1426 +
 25.1427 +	/* empty bitmap could already be in
 25.1428 +	 * cache if earlier bat update failed */
 25.1429 +	bm = get_bitmap(s, blk);
 25.1430 +	if (!bm) {
 25.1431 +		/* install empty bitmap in cache */
 25.1432 +		err = alloc_vhd_bitmap(s, &bm, blk);
 25.1433 +		if (err) 
 25.1434 +			return err;
 25.1435 +
 25.1436 +		install_bitmap(s, bm);
 25.1437 +	}
 25.1438 +
 25.1439 +	lock_bat(s);
 25.1440 +	lock_bitmap(bm);
 25.1441 +	schedule_bat_write(s);
 25.1442 +	add_to_transaction(&bm->tx, &s->bat.req);
 25.1443 +
 25.1444 +	return 0;
 25.1445 +}
 25.1446 +
 25.1447 +static int 
 25.1448 +schedule_data_read(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
 25.1449 +{
 25.1450 +	u64 offset;
 25.1451 +	u32 blk = 0, sec = 0;
 25.1452 +	struct vhd_bitmap  *bm;
 25.1453 +	struct vhd_request *req;
 25.1454 +
 25.1455 +	if (s->vhd.footer.type == HD_TYPE_FIXED) {
 25.1456 +		offset = vhd_sectors_to_bytes(treq.sec);
 25.1457 +		goto make_request;
 25.1458 +	}
 25.1459 +
 25.1460 +	blk    = treq.sec / s->spb;
 25.1461 +	sec    = treq.sec % s->spb;
 25.1462 +	bm     = get_bitmap(s, blk);
 25.1463 +	offset = bat_entry(s, blk);
 25.1464 +
 25.1465 +	ASSERT(offset != DD_BLK_UNUSED);
 25.1466 +	ASSERT(test_batmap(s, blk) || (bm && bitmap_valid(bm)));
 25.1467 +
 25.1468 +	offset += s->bm_secs + sec;
 25.1469 +	offset  = vhd_sectors_to_bytes(offset);
 25.1470 +
 25.1471 + make_request:
 25.1472 +	req = alloc_vhd_request(s);
 25.1473 +	if (!req) 
 25.1474 +		return -EBUSY;
 25.1475 +
 25.1476 +	req->treq  = treq;
 25.1477 +	req->flags = flags;
 25.1478 +	req->op    = VHD_OP_DATA_READ;
 25.1479 +	req->next  = NULL;
 25.1480 +
 25.1481 +	aio_read(s, req, offset);
 25.1482 +
 25.1483 +	DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, sec: 0x%04x, "
 25.1484 +	    "nr_secs: 0x%04x, offset: 0x%08"PRIx64", flags: 0x%08x, buf: %p\n",
 25.1485 +	    s->vhd.file, treq.sec, blk, sec, treq.secs, offset, req->flags,
 25.1486 +	    treq.buf);
 25.1487 +
 25.1488 +	return 0;
 25.1489 +}
 25.1490 +
 25.1491 +static int
 25.1492 +schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
 25.1493 +{
 25.1494 +	int err;
 25.1495 +	u64 offset;
 25.1496 +	u32 blk = 0, sec = 0;
 25.1497 +	struct vhd_bitmap  *bm = NULL;
 25.1498 +	struct vhd_request *req;
 25.1499 +
 25.1500 +	if (s->vhd.footer.type == HD_TYPE_FIXED) {
 25.1501 +		offset = vhd_sectors_to_bytes(treq.sec);
 25.1502 +		goto make_request;
 25.1503 +	}
 25.1504 +
 25.1505 +	blk    = treq.sec / s->spb;
 25.1506 +	sec    = treq.sec % s->spb;
 25.1507 +	offset = bat_entry(s, blk);
 25.1508 +
 25.1509 +	if (test_vhd_flag(flags, VHD_FLAG_REQ_UPDATE_BAT)) {
 25.1510 +		if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_PREALLOCATE))
 25.1511 +			err = allocate_block(s, blk);
 25.1512 +		else
 25.1513 +			err = update_bat(s, blk);
 25.1514 +
 25.1515 +		if (err)
 25.1516 +			return err;
 25.1517 +
 25.1518 +		offset = s->bat.pbw_offset;
 25.1519 +	}
 25.1520 +
 25.1521 +	offset += s->bm_secs + sec;
 25.1522 +	offset  = vhd_sectors_to_bytes(offset);
 25.1523 +
 25.1524 + make_request:
 25.1525 +	req = alloc_vhd_request(s);
 25.1526 +	if (!req)
 25.1527 +		return -EBUSY;
 25.1528 +
 25.1529 +	req->treq  = treq;
 25.1530 +	req->flags = flags;
 25.1531 +	req->op    = VHD_OP_DATA_WRITE;
 25.1532 +	req->next  = NULL;
 25.1533 +
 25.1534 +	if (test_vhd_flag(flags, VHD_FLAG_REQ_UPDATE_BITMAP)) {
 25.1535 +		bm = get_bitmap(s, blk);
 25.1536 +		ASSERT(bm && bitmap_valid(bm));
 25.1537 +		lock_bitmap(bm);
 25.1538 +
 25.1539 +		if (bm->tx.closed) {
 25.1540 +			add_to_tail(&bm->queue, req);
 25.1541 +			set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
 25.1542 +		} else
 25.1543 +			add_to_transaction(&bm->tx, req);
 25.1544 +	}
 25.1545 +
 25.1546 +	aio_write(s, req, offset);
 25.1547 +
 25.1548 +	DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, sec: 0x%04x, "
 25.1549 +	    "nr_secs: 0x%04x, offset: 0x%08"PRIx64", flags: 0x%08x\n",
 25.1550 +	    s->vhd.file, treq.sec, blk, sec, treq.secs, offset, req->flags);
 25.1551 +
 25.1552 +	return 0;
 25.1553 +}
 25.1554 +
 25.1555 +static int 
 25.1556 +schedule_bitmap_read(struct vhd_state *s, uint32_t blk)
 25.1557 +{
 25.1558 +	int err;
 25.1559 +	u64 offset;
 25.1560 +	struct vhd_bitmap  *bm;
 25.1561 +	struct vhd_request *req = NULL;
 25.1562 +
 25.1563 +	ASSERT(vhd_type_dynamic(&s->vhd));
 25.1564 +
 25.1565 +	offset = bat_entry(s, blk);
 25.1566 +
 25.1567 +	ASSERT(offset != DD_BLK_UNUSED);
 25.1568 +	ASSERT(!get_bitmap(s, blk));
 25.1569 +
 25.1570 +	offset = vhd_sectors_to_bytes(offset);
 25.1571 +
 25.1572 +	err = alloc_vhd_bitmap(s, &bm, blk);
 25.1573 +	if (err)
 25.1574 +		return err;
 25.1575 +
 25.1576 +	req = &bm->req;
 25.1577 +	init_vhd_request(s, req);
 25.1578 +
 25.1579 +	req->treq.sec  = blk * s->spb;
 25.1580 +	req->treq.secs = s->bm_secs;
 25.1581 +	req->treq.buf  = bm->map;
 25.1582 +	req->treq.cb   = NULL;
 25.1583 +	req->op        = VHD_OP_BITMAP_READ;
 25.1584 +	req->next      = NULL;
 25.1585 +
 25.1586 +	aio_read(s, req, offset);
 25.1587 +	lock_bitmap(bm);
 25.1588 +	install_bitmap(s, bm);
 25.1589 +	set_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING);
 25.1590 +
 25.1591 +	DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x, nr_secs: 0x%04x, "
 25.1592 +	    "offset: 0x%08"PRIx64"\n", s->vhd.file, req->treq.sec, blk,
 25.1593 +	    req->treq.secs, offset);
 25.1594 +
 25.1595 +	return 0;
 25.1596 +}
 25.1597 +
 25.1598 +static void
 25.1599 +schedule_bitmap_write(struct vhd_state *s, uint32_t blk)
 25.1600 +{
 25.1601 +	u64 offset;
 25.1602 +	struct vhd_bitmap  *bm;
 25.1603 +	struct vhd_request *req;
 25.1604 +
 25.1605 +	bm     = get_bitmap(s, blk);
 25.1606 +	offset = bat_entry(s, blk);
 25.1607 +
 25.1608 +	ASSERT(vhd_type_dynamic(&s->vhd));
 25.1609 +	ASSERT(bm && bitmap_valid(bm) &&
 25.1610 +	       !test_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING));
 25.1611 +
 25.1612 +	if (offset == DD_BLK_UNUSED) {
 25.1613 +		ASSERT(bat_locked(s) && s->bat.pbw_blk == blk);
 25.1614 +		offset = s->bat.pbw_offset;
 25.1615 +	}
 25.1616 +	
 25.1617 +	offset = vhd_sectors_to_bytes(offset);
 25.1618 +
 25.1619 +	req = &bm->req;
 25.1620 +	init_vhd_request(s, req);
 25.1621 +
 25.1622 +	req->treq.sec  = blk * s->spb;
 25.1623 +	req->treq.secs = s->bm_secs;
 25.1624 +	req->treq.buf  = bm->shadow;
 25.1625 +	req->treq.cb   = NULL;
 25.1626 +	req->op        = VHD_OP_BITMAP_WRITE;
 25.1627 +	req->next      = NULL;
 25.1628 +
 25.1629 +	aio_write(s, req, offset);
 25.1630 +	lock_bitmap(bm);
 25.1631 +	touch_bitmap(s, bm);     /* bump lru count */
 25.1632 +	set_vhd_flag(bm->status, VHD_FLAG_BM_WRITE_PENDING);
 25.1633 +
 25.1634 +	DBG(TLOG_DBG, "%s: blk: 0x%04x, sec: 0x%08"PRIx64", nr_secs: 0x%04x, "
 25.1635 +	    "offset: 0x%"PRIx64"\n", s->vhd.file, blk, req->treq.sec,
 25.1636 +	    req->treq.secs, offset);
 25.1637 +}
 25.1638 +
 25.1639 +/* 
 25.1640 + * queued requests will be submitted once the bitmap
 25.1641 + * describing them is read and the requests are validated. 
 25.1642 + */
 25.1643 +static int
 25.1644 +__vhd_queue_request(struct vhd_state *s, uint8_t op, td_request_t treq)
 25.1645 +{
 25.1646 +	u32 blk;
 25.1647 +	struct vhd_bitmap  *bm;
 25.1648 +	struct vhd_request *req;
 25.1649 +
 25.1650 +	ASSERT(vhd_type_dynamic(&s->vhd));
 25.1651 +
 25.1652 +	blk = treq.sec / s->spb;
 25.1653 +	bm  = get_bitmap(s, blk);
 25.1654 +
 25.1655 +	ASSERT(bm && test_vhd_flag(bm->status, VHD_FLAG_BM_READ_PENDING));
 25.1656 +
 25.1657 +	req = alloc_vhd_request(s);
 25.1658 +	if (!req)
 25.1659 +		return -EBUSY;
 25.1660 +
 25.1661 +	req->treq = treq;
 25.1662 +	req->op   = op;
 25.1663 +	req->next = NULL;
 25.1664 +
 25.1665 +	add_to_tail(&bm->waiting, req);
 25.1666 +	lock_bitmap(bm);
 25.1667 +
 25.1668 +	DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", blk: 0x%04x nr_secs: 0x%04x, "
 25.1669 +	    "op: %u\n", s->vhd.file, treq.sec, blk, treq.secs, op);
 25.1670 +
 25.1671 +	TRACE(s);
 25.1672 +	return 0;
 25.1673 +}
 25.1674 +
 25.1675 +static void
 25.1676 +vhd_queue_read(td_driver_t *driver, td_request_t treq)
 25.1677 +{
 25.1678 +	struct vhd_state *s = (struct vhd_state *)driver->data;
 25.1679 +
 25.1680 +	DBG(TLOG_DBG, "%s: lsec: 0x%08"PRIx64", secs: 0x%04x (seg: %d)\n",
 25.1681 +	    s->vhd.file, treq.sec, treq.secs, treq.sidx);
 25.1682 +
 25.1683 +	while (treq.secs) {
 25.1684 +		int err;
 25.1685 +		td_request_t clone;
 25.1686 +
 25.1687 +		err   = 0;
 25.1688 +		clone = treq;
 25.1689 +
 25.1690 +		switch (read_bitmap_cache(s, clone.sec, VHD_OP_DATA_READ)) {
 25.1691 +		case -EINVAL:
 25.1692 +			err = -EINVAL;
 25.1693 +			goto fail;
 25.1694 +
 25.1695 +		case VHD_BM_BAT_CLEAR:
 25.1696 +			clone.secs = MIN(clone.secs, s->spb - (clone.sec % s->spb));
 25.1697 +			td_forward_request(clone);
 25.1698 +			break;
</