ia64/xen-unstable

changeset 10736:2937703f0ed0

Added blktap support. Includes kernel driver (enabled as CONFIG_XEN_BLKDEV_TAP=y) and userspace tools. The userspace deamon (blktapctrl) is enabled by default when xend is activated. For further information on using and configuring blktap see tools/blktap/README.
author jchesterfield@dhcp92.uk.xensource.com
date Thu Jul 13 10:13:26 2006 +0100 (2006-07-13)
parents af9809f51f81
children 8eb79da98499
files buildconfigs/linux-defconfig_xen0_x86_32 buildconfigs/linux-defconfig_xen0_x86_64 buildconfigs/linux-defconfig_xen_x86_32 buildconfigs/linux-defconfig_xen_x86_64 linux-2.6-xen-sparse/drivers/xen/Kconfig linux-2.6-xen-sparse/drivers/xen/Makefile linux-2.6-xen-sparse/drivers/xen/blktap/Makefile linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c linux-2.6-xen-sparse/drivers/xen/blktap/common.h linux-2.6-xen-sparse/drivers/xen/blktap/interface.c linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c patches/linux-2.6.16.13/blktap-aio-16_03_06.patch tools/Makefile tools/blktap/Makefile tools/blktap/README tools/blktap/drivers/Makefile tools/blktap/drivers/aes.c tools/blktap/drivers/aes.h tools/blktap/drivers/blktapctrl.c tools/blktap/drivers/blktapctrl.h tools/blktap/drivers/block-aio.c tools/blktap/drivers/block-qcow.c tools/blktap/drivers/block-ram.c tools/blktap/drivers/block-sync.c tools/blktap/drivers/block-vmdk.c tools/blktap/drivers/bswap.h tools/blktap/drivers/img2qcow.c tools/blktap/drivers/qcow-create.c tools/blktap/drivers/qcow2raw.c tools/blktap/drivers/tapdisk.c tools/blktap/drivers/tapdisk.h tools/blktap/lib/Makefile tools/blktap/lib/blkif.c tools/blktap/lib/blktaplib.h tools/blktap/lib/list.h tools/blktap/lib/xenbus.c tools/blktap/lib/xs_api.c tools/blktap/lib/xs_api.h tools/examples/Makefile tools/examples/blktap tools/examples/xen-backend.agent tools/examples/xen-backend.rules tools/libaio/COPYING tools/libaio/ChangeLog tools/libaio/INSTALL tools/libaio/Makefile tools/libaio/TODO tools/libaio/harness/Makefile tools/libaio/harness/README tools/libaio/harness/attic/0.t tools/libaio/harness/attic/1.t tools/libaio/harness/cases/10.t tools/libaio/harness/cases/11.t tools/libaio/harness/cases/12.t tools/libaio/harness/cases/13.t tools/libaio/harness/cases/14.t tools/libaio/harness/cases/2.t tools/libaio/harness/cases/3.t tools/libaio/harness/cases/4.t tools/libaio/harness/cases/5.t tools/libaio/harness/cases/6.t tools/libaio/harness/cases/7.t tools/libaio/harness/cases/8.t tools/libaio/harness/cases/aio_setup.h tools/libaio/harness/cases/common-7-8.h tools/libaio/harness/main.c tools/libaio/harness/runtests.sh tools/libaio/libaio.spec tools/libaio/man/aio.3 tools/libaio/man/aio_cancel.3 tools/libaio/man/aio_cancel64.3 tools/libaio/man/aio_error.3 tools/libaio/man/aio_error64.3 tools/libaio/man/aio_fsync.3 tools/libaio/man/aio_fsync64.3 tools/libaio/man/aio_init.3 tools/libaio/man/aio_read.3 tools/libaio/man/aio_read64.3 tools/libaio/man/aio_return.3 tools/libaio/man/aio_return64.3 tools/libaio/man/aio_suspend.3 tools/libaio/man/aio_suspend64.3 tools/libaio/man/aio_write.3 tools/libaio/man/aio_write64.3 tools/libaio/man/io.3 tools/libaio/man/io_cancel.1 tools/libaio/man/io_cancel.3 tools/libaio/man/io_destroy.1 tools/libaio/man/io_fsync.3 tools/libaio/man/io_getevents.1 tools/libaio/man/io_getevents.3 tools/libaio/man/io_prep_fsync.3 tools/libaio/man/io_prep_pread.3 tools/libaio/man/io_prep_pwrite.3 tools/libaio/man/io_queue_init.3 tools/libaio/man/io_queue_release.3 tools/libaio/man/io_queue_run.3 tools/libaio/man/io_queue_wait.3 tools/libaio/man/io_set_callback.3 tools/libaio/man/io_setup.1 tools/libaio/man/io_submit.1 tools/libaio/man/io_submit.3 tools/libaio/man/lio_listio.3 tools/libaio/man/lio_listio64.3 tools/libaio/src/Makefile tools/libaio/src/compat-0_1.c tools/libaio/src/io_cancel.c tools/libaio/src/io_destroy.c tools/libaio/src/io_getevents.c tools/libaio/src/io_queue_init.c tools/libaio/src/io_queue_release.c tools/libaio/src/io_queue_run.c tools/libaio/src/io_queue_wait.c tools/libaio/src/io_setup.c tools/libaio/src/io_submit.c tools/libaio/src/libaio.h tools/libaio/src/libaio.map tools/libaio/src/raw_syscall.c tools/libaio/src/syscall-alpha.h tools/libaio/src/syscall-i386.h tools/libaio/src/syscall-ia64.h tools/libaio/src/syscall-ppc.h tools/libaio/src/syscall-s390.h tools/libaio/src/syscall-x86_64.h tools/libaio/src/syscall.h tools/libaio/src/vsys_def.h tools/misc/xend tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/server/BlktapController.py tools/python/xen/xm/create.py tools/python/xen/xm/main.py tools/xenstore/Makefile xen/common/grant_table.c
line diff
     1.1 --- a/buildconfigs/linux-defconfig_xen0_x86_32	Thu Jul 13 09:55:14 2006 +0100
     1.2 +++ b/buildconfigs/linux-defconfig_xen0_x86_32	Thu Jul 13 10:13:26 2006 +0100
     1.3 @@ -1322,6 +1322,7 @@ CONFIG_XEN_PCIDEV_BACKEND=y
     1.4  CONFIG_XEN_PCIDEV_BACKEND_PASS=y
     1.5  # CONFIG_XEN_PCIDEV_BE_DEBUG is not set
     1.6  CONFIG_XEN_BLKDEV_BACKEND=y
     1.7 +CONFIG_XEN_BLKDEV_TAP=y
     1.8  CONFIG_XEN_NETDEV_BACKEND=y
     1.9  # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
    1.10  CONFIG_XEN_NETDEV_LOOPBACK=y
     2.1 --- a/buildconfigs/linux-defconfig_xen0_x86_64	Thu Jul 13 09:55:14 2006 +0100
     2.2 +++ b/buildconfigs/linux-defconfig_xen0_x86_64	Thu Jul 13 10:13:26 2006 +0100
     2.3 @@ -1263,6 +1263,7 @@ CONFIG_XEN_PCIDEV_BACKEND=y
     2.4  CONFIG_XEN_PCIDEV_BACKEND_PASS=y
     2.5  # CONFIG_XEN_PCIDEV_BE_DEBUG is not set
     2.6  CONFIG_XEN_BLKDEV_BACKEND=y
     2.7 +CONFIG_XEN_BLKDEV_TAP=y
     2.8  CONFIG_XEN_NETDEV_BACKEND=y
     2.9  # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
    2.10  CONFIG_XEN_NETDEV_LOOPBACK=y
     3.1 --- a/buildconfigs/linux-defconfig_xen_x86_32	Thu Jul 13 09:55:14 2006 +0100
     3.2 +++ b/buildconfigs/linux-defconfig_xen_x86_32	Thu Jul 13 10:13:26 2006 +0100
     3.3 @@ -3023,6 +3023,7 @@ CONFIG_XEN_PCIDEV_BACKEND_VPCI=y
     3.4  # CONFIG_XEN_PCIDEV_BACKEND_PASS is not set
     3.5  # CONFIG_XEN_PCIDEV_BE_DEBUG is not set
     3.6  CONFIG_XEN_BLKDEV_BACKEND=y
     3.7 +CONFIG_XEN_BLKDEV_TAP=y
     3.8  CONFIG_XEN_NETDEV_BACKEND=y
     3.9  # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
    3.10  CONFIG_XEN_NETDEV_LOOPBACK=y
     4.1 --- a/buildconfigs/linux-defconfig_xen_x86_64	Thu Jul 13 09:55:14 2006 +0100
     4.2 +++ b/buildconfigs/linux-defconfig_xen_x86_64	Thu Jul 13 10:13:26 2006 +0100
     4.3 @@ -2855,6 +2855,7 @@ CONFIG_XEN_PCIDEV_BACKEND=m
     4.4  CONFIG_XEN_PCIDEV_BACKEND_PASS=y
     4.5  # CONFIG_XEN_PCIDEV_BE_DEBUG is not set
     4.6  CONFIG_XEN_BLKDEV_BACKEND=y
     4.7 +CONFIG_XEN_BLKDEV_TAP=y
     4.8  CONFIG_XEN_NETDEV_BACKEND=y
     4.9  # CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER is not set
    4.10  CONFIG_XEN_NETDEV_LOOPBACK=y
     5.1 --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig	Thu Jul 13 09:55:14 2006 +0100
     5.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig	Thu Jul 13 10:13:26 2006 +0100
     5.3 @@ -94,6 +94,18 @@ config XEN_XENBUS_DEV
     5.4  	depends on PROC_FS
     5.5  	default y
     5.6  
     5.7 +config XEN_BLKDEV_TAP
     5.8 +	tristate "Blockk device tap backend"
     5.9 +	depends on XEN_BACKEND
    5.10 +	default XEN_PRIVILEGED_GUEST
    5.11 +	help
    5.12 +	  The block tap driver is an alternative to the block back driver 
    5.13 +          and allows VM block requests to be redirected to userspace through
    5.14 +          a device interface.  The tap allows user-space development of 
    5.15 +          high-performance block backends, where disk images may be implemented
    5.16 +          as files, in memory, or on other hosts across the network.  This 
    5.17 +	  driver can safely coexist with the existing blockback driver.
    5.18 +
    5.19  config XEN_NETDEV_BACKEND
    5.20  	tristate "Network-device backend driver"
    5.21          depends on XEN_BACKEND && NET
     6.1 --- a/linux-2.6-xen-sparse/drivers/xen/Makefile	Thu Jul 13 09:55:14 2006 +0100
     6.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile	Thu Jul 13 10:13:26 2006 +0100
     6.3 @@ -8,6 +8,7 @@ obj-$(CONFIG_XEN_UTIL)			+= util.o
     6.4  obj-$(CONFIG_XEN_BALLOON)		+= balloon/
     6.5  obj-$(CONFIG_XEN_DEVMEM)		+= char/
     6.6  obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
     6.7 +obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
     6.8  obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
     6.9  obj-$(CONFIG_XEN_TPMDEV_BACKEND)	+= tpmback/
    6.10  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= blkfront/
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile	Thu Jul 13 10:13:26 2006 +0100
     7.3 @@ -0,0 +1,3 @@
     7.4 +LINUXINCLUDE += -I../xen/include/public/io
     7.5 +obj-y	:= xenbus.o interface.o blktap.o 
     7.6 +
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Thu Jul 13 10:13:26 2006 +0100
     8.3 @@ -0,0 +1,1439 @@
     8.4 +/******************************************************************************
     8.5 + * drivers/xen/blktap/blktap.c
     8.6 + * 
     8.7 + * Back-end driver for user level virtual block devices. This portion of the
     8.8 + * driver exports a 'unified' block-device interface that can be accessed
     8.9 + * by any operating system that implements a compatible front end. Requests
    8.10 + * are remapped to a user-space memory region.
    8.11 + *
    8.12 + * Based on the blkback driver code.
    8.13 + * 
    8.14 + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
    8.15 + *
    8.16 + * This program is free software; you can redistribute it and/or
    8.17 + * modify it under the terms of the GNU General Public License version 2
    8.18 + * as published by the Free Software Foundation; or, when distributed
    8.19 + * separately from the Linux kernel or incorporated into other
    8.20 + * software packages, subject to the following license:
    8.21 + * 
    8.22 + * Permission is hereby granted, free of charge, to any person obtaining a copy
    8.23 + * of this source file (the "Software"), to deal in the Software without
    8.24 + * restriction, including without limitation the rights to use, copy, modify,
    8.25 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
    8.26 + * and to permit persons to whom the Software is furnished to do so, subject to
    8.27 + * the following conditions:
    8.28 + * 
    8.29 + * The above copyright notice and this permission notice shall be included in
    8.30 + * all copies or substantial portions of the Software.
    8.31 + * 
    8.32 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    8.33 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    8.34 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    8.35 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    8.36 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    8.37 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    8.38 + * IN THE SOFTWARE.
    8.39 + */
    8.40 +
    8.41 +#include <linux/spinlock.h>
    8.42 +#include <linux/kthread.h>
    8.43 +#include <linux/list.h>
    8.44 +#include <asm/hypervisor.h>
    8.45 +#include "common.h"
    8.46 +#include <xen/balloon.h>
    8.47 +#include <linux/kernel.h>
    8.48 +#include <linux/fs.h>
    8.49 +#include <linux/mm.h>
    8.50 +#include <linux/miscdevice.h>
    8.51 +#include <linux/errno.h>
    8.52 +#include <linux/major.h>
    8.53 +#include <linux/gfp.h>
    8.54 +#include <linux/poll.h>
    8.55 +#include <asm/tlbflush.h>
    8.56 +#include <linux/devfs_fs_kernel.h>
    8.57 +
    8.58 +#define MAX_TAP_DEV 100     /*the maximum number of tapdisk ring devices    */
    8.59 +#define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
    8.60 +
    8.61 +/*
    8.62 + * The maximum number of requests that can be outstanding at any time
    8.63 + * is determined by 
    8.64 + *
    8.65 + *   [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] 
    8.66 + *
    8.67 + * where mmap_alloc < MAX_DYNAMIC_MEM.
    8.68 + *
    8.69 + * TODO:
    8.70 + * mmap_alloc is initialised to 2 and should be adjustable on the fly via
    8.71 + * sysfs.
    8.72 + */
    8.73 +#define MAX_DYNAMIC_MEM 64
    8.74 +#define MAX_PENDING_REQS 64   
    8.75 +#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
    8.76 +#define MMAP_VADDR(_start, _req,_seg)                                   \
    8.77 +        (_start +                                                       \
    8.78 +         ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
    8.79 +         ((_seg) * PAGE_SIZE))
    8.80 +static int blkif_reqs = MAX_PENDING_REQS;
    8.81 +static int mmap_pages = MMAP_PAGES;
    8.82 +
    8.83 +#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we
    8.84 +		      * have a bunch of pages reserved for shared
    8.85 +		      * memory rings.
    8.86 +		      */
    8.87 +
    8.88 +/*Data struct associated with each of the tapdisk devices*/
    8.89 +typedef struct tap_blkif {
    8.90 +	struct vm_area_struct *vma;   /*Shared memory area                   */
    8.91 +	unsigned long rings_vstart;   /*Kernel memory mapping                */
    8.92 +	unsigned long user_vstart;    /*User memory mapping                  */
    8.93 +	unsigned long dev_inuse;      /*One process opens device at a time.  */
    8.94 +	unsigned long dev_pending;    /*In process of being opened           */
    8.95 +	unsigned long ring_ok;        /*make this ring->state                */
    8.96 +	blkif_front_ring_t ufe_ring;  /*Rings up to user space.              */
    8.97 +	wait_queue_head_t wait;       /*for poll                             */
    8.98 +	unsigned long mode;           /*current switching mode               */
    8.99 +	int minor;                    /*Minor number for tapdisk device      */
   8.100 +	pid_t pid;                    /*tapdisk process id                   */
   8.101 +	enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
   8.102 +						  shutdown                   */
   8.103 +	unsigned long *idx_map;       /*Record the user ring id to kern 
   8.104 +					[req id, idx] tuple                  */
   8.105 +	blkif_t *blkif;               /*Associate blkif with tapdev          */
   8.106 +} tap_blkif_t;
   8.107 +
   8.108 +/*Private data struct associated with the inode*/
   8.109 +typedef struct private_info {
   8.110 +	int idx;
   8.111 +} private_info_t;
   8.112 +
   8.113 +/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
   8.114 +typedef struct domid_translate {
   8.115 +	unsigned short domid;
   8.116 +	unsigned short busid;
   8.117 +} domid_translate_t ;
   8.118 +
   8.119 +
   8.120 +domid_translate_t  translate_domid[MAX_TAP_DEV];
   8.121 +tap_blkif_t *tapfds[MAX_TAP_DEV];
   8.122 +
   8.123 +static int __init set_blkif_reqs(char *str)
   8.124 +{
   8.125 +	get_option(&str, &blkif_reqs);
   8.126 +	return 1;
   8.127 +}
   8.128 +__setup("blkif_reqs=", set_blkif_reqs);
   8.129 +
   8.130 +/* Run-time switchable: /sys/module/blktap/parameters/ */
   8.131 +static unsigned int log_stats = 0;
   8.132 +static unsigned int debug_lvl = 0;
   8.133 +module_param(log_stats, int, 0644);
   8.134 +module_param(debug_lvl, int, 0644);
   8.135 +
   8.136 +/*
   8.137 + * Each outstanding request that we've passed to the lower device layers has a 
   8.138 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 
   8.139 + * the pendcnt towards zero. When it hits zero, the specified domain has a 
   8.140 + * response queued for it, with the saved 'id' passed back.
   8.141 + */
   8.142 +typedef struct {
   8.143 +	blkif_t       *blkif;
   8.144 +	unsigned long  id;
   8.145 +	unsigned short mem_idx;
   8.146 +	int            nr_pages;
   8.147 +	atomic_t       pendcnt;
   8.148 +	unsigned short operation;
   8.149 +	int            status;
   8.150 +	struct list_head free_list;
   8.151 +	int            inuse;
   8.152 +} pending_req_t;
   8.153 +
   8.154 +static pending_req_t *pending_reqs[MAX_PENDING_REQS];
   8.155 +static struct list_head pending_free;
   8.156 +static DEFINE_SPINLOCK(pending_free_lock);
   8.157 +static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
   8.158 +static int alloc_pending_reqs;
   8.159 +
   8.160 +typedef unsigned int PEND_RING_IDX;
   8.161 +
   8.162 +static inline int MASK_PEND_IDX(int i) { 
   8.163 +	return (i & (MAX_PENDING_REQS-1)); 
   8.164 +}
   8.165 +
   8.166 +static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
   8.167 +	return (req - pending_reqs[idx]);
   8.168 +}
   8.169 +
   8.170 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
   8.171 +
   8.172 +#define BLKBACK_INVALID_HANDLE (~0)
   8.173 +
   8.174 +typedef struct mmap_page {
   8.175 +	unsigned long start;
   8.176 +	struct page *mpage;
   8.177 +} mmap_page_t;
   8.178 +
   8.179 +static mmap_page_t mmap_start[MAX_DYNAMIC_MEM];
   8.180 +static unsigned short mmap_alloc = 0;
   8.181 +static unsigned short mmap_lock = 0;
   8.182 +static unsigned short mmap_inuse = 0;
   8.183 +static unsigned long *pending_addrs[MAX_DYNAMIC_MEM];
   8.184 +
   8.185 +/******************************************************************
   8.186 + * GRANT HANDLES
   8.187 + */
   8.188 +
   8.189 +/* When using grant tables to map a frame for device access then the
   8.190 + * handle returned must be used to unmap the frame. This is needed to
   8.191 + * drop the ref count on the frame.
   8.192 + */
   8.193 +struct grant_handle_pair
   8.194 +{
   8.195 +        grant_handle_t kernel;
   8.196 +        grant_handle_t user;
   8.197 +};
   8.198 +
   8.199 +static struct grant_handle_pair 
   8.200 +    pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
   8.201 +#define pending_handle(_id, _idx, _i) \
   8.202 +    (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
   8.203 +    + (_i)])
   8.204 +
   8.205 +
   8.206 +static int blktap_read_ufe_ring(int idx); /*local prototypes*/
   8.207 +
   8.208 +#define BLKTAP_MINOR 0  /*/dev/xen/blktap resides at device number
   8.209 +			  major=254, minor numbers begin at 0            */ 
   8.210 +#define BLKTAP_DEV_MAJOR 254         /* TODO: Make major number dynamic  *
   8.211 +                                      * and create devices in the kernel *
   8.212 +				      */
   8.213 +#define BLKTAP_DEV_DIR  "/dev/xen"
   8.214 +
   8.215 +/* blktap IOCTLs: */
   8.216 +#define BLKTAP_IOCTL_KICK_FE         1
   8.217 +#define BLKTAP_IOCTL_KICK_BE         2 /* currently unused */
   8.218 +#define BLKTAP_IOCTL_SETMODE         3
   8.219 +#define BLKTAP_IOCTL_SENDPID	     4
   8.220 +#define BLKTAP_IOCTL_NEWINTF	     5
   8.221 +#define BLKTAP_IOCTL_MINOR	     6
   8.222 +#define BLKTAP_IOCTL_MAJOR	     7
   8.223 +#define BLKTAP_QUERY_ALLOC_REQS      8
   8.224 +#define BLKTAP_IOCTL_FREEINTF        9
   8.225 +#define BLKTAP_IOCTL_PRINT_IDXS      100  
   8.226 +
   8.227 +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
   8.228 +#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
   8.229 +#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
   8.230 +#define BLKTAP_MODE_INTERCEPT_BE     0x00000002  /* unimp.             */
   8.231 +
   8.232 +#define BLKTAP_MODE_INTERPOSE \
   8.233 +           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
   8.234 +
   8.235 +
   8.236 +static inline int BLKTAP_MODE_VALID(unsigned long arg)
   8.237 +{
   8.238 +	return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
   8.239 +		(arg == BLKTAP_MODE_INTERCEPT_FE) ||
   8.240 +                (arg == BLKTAP_MODE_INTERPOSE   ));
   8.241 +}
   8.242 +
   8.243 +/* Requests passing through the tap to userspace are re-assigned an ID.
   8.244 + * We must record a mapping between the BE [IDX,ID] tuple and the userspace
   8.245 + * ring ID. 
   8.246 + */
   8.247 +
   8.248 +static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
   8.249 +{
   8.250 +        return ((fe_dom << 16) | MASK_PEND_IDX(idx));
   8.251 +}
   8.252 +
   8.253 +extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id)
   8.254 +{
   8.255 +        return (PEND_RING_IDX)(id & 0x0000ffff);
   8.256 +}
   8.257 +
   8.258 +extern inline int ID_TO_MIDX(unsigned long id)
   8.259 +{
   8.260 +        return (int)(id >> 16);
   8.261 +}
   8.262 +
   8.263 +#define INVALID_REQ 0xdead0000
   8.264 +
   8.265 +/*TODO: Convert to a free list*/
   8.266 +static inline int GET_NEXT_REQ(unsigned long *idx_map)
   8.267 +{
   8.268 +	int i;
   8.269 +	for (i = 0; i < MAX_PENDING_REQS; i++)
   8.270 +		if (idx_map[i] == INVALID_REQ) return i;
   8.271 +
   8.272 +	return INVALID_REQ;
   8.273 +}
   8.274 +
   8.275 +
   8.276 +#define BLKTAP_INVALID_HANDLE(_g) \
   8.277 +    (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
   8.278 +
   8.279 +#define BLKTAP_INVALIDATE_HANDLE(_g) do {       \
   8.280 +    (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
   8.281 +    } while(0)
   8.282 +
   8.283 +
   8.284 +/******************************************************************
   8.285 + * BLKTAP VM OPS
   8.286 + */
   8.287 +
   8.288 +static struct page *blktap_nopage(struct vm_area_struct *vma,
   8.289 +				  unsigned long address,
   8.290 +				  int *type)
   8.291 +{
   8.292 +	/*
   8.293 +	 * if the page has not been mapped in by the driver then return
   8.294 +	 * NOPAGE_SIGBUS to the domain.
   8.295 +	 */
   8.296 +
   8.297 +	return NOPAGE_SIGBUS;
   8.298 +}
   8.299 +
   8.300 +struct vm_operations_struct blktap_vm_ops = {
   8.301 +	nopage:   blktap_nopage,
   8.302 +};
   8.303 +
   8.304 +/******************************************************************
   8.305 + * BLKTAP FILE OPS
   8.306 + */
   8.307 + 
   8.308 +/*Function Declarations*/
   8.309 +static int get_next_free_dev(void);
   8.310 +static int blktap_open(struct inode *inode, struct file *filp);
   8.311 +static int blktap_release(struct inode *inode, struct file *filp);
   8.312 +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
   8.313 +static int blktap_ioctl(struct inode *inode, struct file *filp,
   8.314 +                        unsigned int cmd, unsigned long arg);
   8.315 +static unsigned int blktap_poll(struct file *file, poll_table *wait);
   8.316 +
   8.317 +struct miscdevice *set_misc(int minor, char *name, int dev);
   8.318 +
   8.319 +static struct file_operations blktap_fops = {
   8.320 +	.owner   = THIS_MODULE,
   8.321 +	.poll    = blktap_poll,
   8.322 +	.ioctl   = blktap_ioctl,
   8.323 +	.open    = blktap_open,
   8.324 +	.release = blktap_release,
   8.325 +	.mmap    = blktap_mmap,
   8.326 +};
   8.327 +
   8.328 +
   8.329 +static int get_next_free_dev(void)
   8.330 +{
   8.331 +	tap_blkif_t *info;
   8.332 +	int i = 0, ret = -1;
   8.333 +	unsigned long flags;
   8.334 +
   8.335 +	spin_lock_irqsave(&pending_free_lock, flags);
   8.336 +	
   8.337 +	while (i < MAX_TAP_DEV) {
   8.338 +		info = tapfds[i];
   8.339 +		if ( (tapfds[i] != NULL) && (info->dev_inuse == 0)
   8.340 +			&& (info->dev_pending == 0) ) {
   8.341 +			info->dev_pending = 1;
   8.342 +			ret = i;
   8.343 +			goto done;
   8.344 +		}
   8.345 +		i++;
   8.346 +	}
   8.347 +	
   8.348 +done:
   8.349 +	spin_unlock_irqrestore(&pending_free_lock, flags);
   8.350 +	return ret;
   8.351 +}
   8.352 +
   8.353 +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) 
   8.354 +{
   8.355 +	int i;
   8.356 +		
   8.357 +	for (i = 0; i < MAX_TAP_DEV; i++)
   8.358 +		if ( (translate_domid[i].domid == domid)
   8.359 +		    && (translate_domid[i].busid == xenbus_id) ) {
   8.360 +			tapfds[i]->blkif = blkif;
   8.361 +			tapfds[i]->status = RUNNING;
   8.362 +			return i;
   8.363 +		}
   8.364 +	return -1;
   8.365 +}
   8.366 +
   8.367 +void signal_tapdisk(int idx) 
   8.368 +{
   8.369 +	tap_blkif_t *info;
   8.370 +	struct task_struct *ptask;
   8.371 +
   8.372 +	info = tapfds[idx];
   8.373 +	if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
   8.374 +		ptask = find_task_by_pid(info->pid);
   8.375 +		if (ptask) { 
   8.376 +			info->status = CLEANSHUTDOWN;
   8.377 + 		}
   8.378 +	}
   8.379 +	info->blkif = NULL;
   8.380 +	return;
   8.381 +}
   8.382 +
   8.383 +static int blktap_open(struct inode *inode, struct file *filp)
   8.384 +{
   8.385 +	blkif_sring_t *sring;
   8.386 +	int idx = iminor(inode) - BLKTAP_MINOR;
   8.387 +	tap_blkif_t *info;
   8.388 +	private_info_t *prv;
   8.389 +	int i;
   8.390 +	
   8.391 +	if (tapfds[idx] == NULL) {
   8.392 +		WPRINTK("Unable to open device /dev/xen/blktap%d\n",
   8.393 +		       idx);
   8.394 +		return -ENOMEM;
   8.395 +	}
   8.396 +	DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
   8.397 +	
   8.398 +	info = tapfds[idx];
   8.399 +	
   8.400 +	/*Only one process can access device at a time*/
   8.401 +	if (test_and_set_bit(0, &info->dev_inuse))
   8.402 +		return -EBUSY;
   8.403 +
   8.404 +	info->dev_pending = 0;
   8.405 +	    
   8.406 +	/* Allocate the fe ring. */
   8.407 +	sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
   8.408 +	if (sring == NULL)
   8.409 +		goto fail_nomem;
   8.410 +
   8.411 +	SetPageReserved(virt_to_page(sring));
   8.412 +    
   8.413 +	SHARED_RING_INIT(sring);
   8.414 +	FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
   8.415 +	
   8.416 +	prv = kzalloc(sizeof(private_info_t),GFP_KERNEL);
   8.417 +	prv->idx = idx;
   8.418 +	filp->private_data = prv;
   8.419 +	info->vma = NULL;
   8.420 +
   8.421 +	info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, 
   8.422 +				GFP_KERNEL);
   8.423 +	
   8.424 +	if (idx > 0) {
   8.425 +		init_waitqueue_head(&info->wait);
   8.426 +		for (i = 0; i < MAX_PENDING_REQS; i++) 
   8.427 +			info->idx_map[i] = INVALID_REQ;
   8.428 +	}
   8.429 +
   8.430 +	DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
   8.431 +	return 0;
   8.432 +
   8.433 + fail_nomem:
   8.434 +	return -ENOMEM;
   8.435 +}
   8.436 +
   8.437 +static int blktap_release(struct inode *inode, struct file *filp)
   8.438 +{
   8.439 +	int idx = iminor(inode) - BLKTAP_MINOR;
   8.440 +	tap_blkif_t *info;
   8.441 +	
   8.442 +	if (tapfds[idx] == NULL) {
   8.443 +		WPRINTK("Trying to free device that doesn't exist "
   8.444 +		       "[/dev/xen/blktap%d]\n",idx);
   8.445 +		return -1;
   8.446 +	}
   8.447 +	info = tapfds[idx];
   8.448 +	info->dev_inuse = 0;
   8.449 +	DPRINTK("Freeing device [/dev/xen/blktap%d]\n",idx);
   8.450 +
   8.451 +	/* Free the ring page. */
   8.452 +	ClearPageReserved(virt_to_page(info->ufe_ring.sring));
   8.453 +	free_page((unsigned long) info->ufe_ring.sring);
   8.454 +
   8.455 +	/* Clear any active mappings and free foreign map table */
   8.456 +	if (info->vma) {
   8.457 +		zap_page_range(
   8.458 +			info->vma, info->vma->vm_start, 
   8.459 +			info->vma->vm_end - info->vma->vm_start, NULL);
   8.460 +		info->vma = NULL;
   8.461 +	}
   8.462 +	
   8.463 +	if (filp->private_data) kfree(filp->private_data);
   8.464 +
   8.465 +	if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
   8.466 +		kthread_stop(info->blkif->xenblkd);
   8.467 +		info->blkif->xenblkd = NULL;
   8.468 +		info->status = CLEANSHUTDOWN;
   8.469 +	}	
   8.470 +	return 0;
   8.471 +}
   8.472 +
   8.473 +
   8.474 +/* Note on mmap:
   8.475 + * We need to map pages to user space in a way that will allow the block
   8.476 + * subsystem set up direct IO to them.  This couldn't be done before, because
   8.477 + * there isn't really a sane way to translate a user virtual address down to a 
   8.478 + * physical address when the page belongs to another domain.
   8.479 + *
   8.480 + * My first approach was to map the page in to kernel memory, add an entry
   8.481 + * for it in the physical frame list (using alloc_lomem_region as in blkback)
   8.482 + * and then attempt to map that page up to user space.  This is disallowed
   8.483 + * by xen though, which realizes that we don't really own the machine frame
   8.484 + * underlying the physical page.
   8.485 + *
   8.486 + * The new approach is to provide explicit support for this in xen linux.
   8.487 + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
   8.488 + * mapped from other vms.  vma->vm_private_data is set up as a mapping 
   8.489 + * from pages to actual page structs.  There is a new clause in get_user_pages
   8.490 + * that does the right thing for this sort of mapping.
   8.491 + */
   8.492 +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
   8.493 +{
   8.494 +	int size;
   8.495 +	struct page **map;
   8.496 +	int i;
   8.497 +	private_info_t *prv;
   8.498 +	tap_blkif_t *info;
   8.499 +
   8.500 +	/*Retrieve the dev info*/
   8.501 +	prv = (private_info_t *)filp->private_data;
   8.502 +	if (prv == NULL) {
   8.503 +		WPRINTK("blktap: mmap, retrieving idx failed\n");
   8.504 +		return -ENOMEM;
   8.505 +	}
   8.506 +	info = tapfds[prv->idx];
   8.507 +	
   8.508 +	vma->vm_flags |= VM_RESERVED;
   8.509 +	vma->vm_ops = &blktap_vm_ops;
   8.510 +
   8.511 +	size = vma->vm_end - vma->vm_start;
   8.512 +	if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
   8.513 +		WPRINTK("you _must_ map exactly %d pages!\n",
   8.514 +		       mmap_pages + RING_PAGES);
   8.515 +		return -EAGAIN;
   8.516 +	}
   8.517 +
   8.518 +	size >>= PAGE_SHIFT;
   8.519 +	info->rings_vstart = vma->vm_start;
   8.520 +	info->user_vstart  = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
   8.521 +    
   8.522 +	/* Map the ring pages to the start of the region and reserve it. */
   8.523 +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   8.524 +
   8.525 +	if (remap_pfn_range(vma, vma->vm_start, 
   8.526 +			    __pa(info->ufe_ring.sring) >> PAGE_SHIFT, 
   8.527 +			    PAGE_SIZE, vma->vm_page_prot)) {
   8.528 +		WPRINTK("Mapping user ring failed!\n");
   8.529 +		goto fail;
   8.530 +	}
   8.531 +
   8.532 +	/* Mark this VM as containing foreign pages, and set up mappings. */
   8.533 +	map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
   8.534 +		      * sizeof(struct page_struct*),
   8.535 +		      GFP_KERNEL);
   8.536 +	if (map == NULL) {
   8.537 +		WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
   8.538 +		goto fail;
   8.539 +	}
   8.540 +
   8.541 +	for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
   8.542 +		map[i] = NULL;
   8.543 +    
   8.544 +	vma->vm_private_data = map;
   8.545 +	vma->vm_flags |= VM_FOREIGN;
   8.546 +
   8.547 +	info->vma = vma;
   8.548 +	info->ring_ok = 1;
   8.549 +	return 0;
   8.550 + fail:
   8.551 +	/* Clear any active mappings. */
   8.552 +	zap_page_range(vma, vma->vm_start, 
   8.553 +		       vma->vm_end - vma->vm_start, NULL);
   8.554 +
   8.555 +	return -ENOMEM;
   8.556 +}
   8.557 +
   8.558 +
   8.559 +static int blktap_ioctl(struct inode *inode, struct file *filp,
   8.560 +                        unsigned int cmd, unsigned long arg)
   8.561 +{
   8.562 +	int idx = iminor(inode) - BLKTAP_MINOR;
   8.563 +	switch(cmd) {
   8.564 +	case BLKTAP_IOCTL_KICK_FE: 
   8.565 +	{
   8.566 +		/* There are fe messages to process. */
   8.567 +		return blktap_read_ufe_ring(idx);
   8.568 +	}
   8.569 +	case BLKTAP_IOCTL_SETMODE:
   8.570 +	{
   8.571 +		tap_blkif_t *info = tapfds[idx];
   8.572 +		
   8.573 +		if ( (idx > 0) && (idx < MAX_TAP_DEV) 
   8.574 +		     && (tapfds[idx] != NULL) ) 
   8.575 +		{
   8.576 +			if (BLKTAP_MODE_VALID(arg)) {
   8.577 +				info->mode = arg;
   8.578 +				/* XXX: may need to flush rings here. */
   8.579 +				DPRINTK("blktap: set mode to %lx\n", 
   8.580 +				       arg);
   8.581 +				return 0;
   8.582 +			}
   8.583 +		}
   8.584 +		return 0;
   8.585 +	}
   8.586 +	case BLKTAP_IOCTL_PRINT_IDXS:
   8.587 +        {
   8.588 +		tap_blkif_t *info = tapfds[idx];
   8.589 +		
   8.590 +		if ( (idx > 0) && (idx < MAX_TAP_DEV) 
   8.591 +		     && (tapfds[idx] != NULL) ) 
   8.592 +		{
   8.593 +			printk("User Rings: \n-----------\n");
   8.594 +			printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
   8.595 +				"| req_prod: %2d, rsp_prod: %2d\n",
   8.596 +				info->ufe_ring.rsp_cons,
   8.597 +				info->ufe_ring.req_prod_pvt,
   8.598 +				info->ufe_ring.sring->req_prod,
   8.599 +				info->ufe_ring.sring->rsp_prod);
   8.600 +		}
   8.601 +            	return 0;
   8.602 +        }
   8.603 +	case BLKTAP_IOCTL_SENDPID:
   8.604 +	{
   8.605 +		tap_blkif_t *info = tapfds[idx];
   8.606 +		
   8.607 +		if ( (idx > 0) && (idx < MAX_TAP_DEV) 
   8.608 +		     && (tapfds[idx] != NULL) ) 
   8.609 +		{
   8.610 +			info->pid = (pid_t)arg;
   8.611 +			DPRINTK("blktap: pid received %d\n", 
   8.612 +			       info->pid);
   8.613 +		}
   8.614 +		return 0;
   8.615 +	}
   8.616 +	case BLKTAP_IOCTL_NEWINTF:
   8.617 +	{		
   8.618 +		uint64_t val = (uint64_t)arg;
   8.619 +		domid_translate_t *tr = (domid_translate_t *)&val;
   8.620 +		int newdev;
   8.621 +
   8.622 +		DPRINTK("NEWINTF Req for domid %d and bus id %d\n", 
   8.623 +		       tr->domid, tr->busid);
   8.624 +		newdev = get_next_free_dev();
   8.625 +		if (newdev < 1) {
   8.626 +			WPRINTK("Error initialising /dev/xen/blktap - "
   8.627 +				"No more devices\n");
   8.628 +			return -1;
   8.629 +		}
   8.630 +		translate_domid[newdev].domid = tr->domid;
   8.631 +		translate_domid[newdev].busid = tr->busid;
   8.632 +		return newdev;
   8.633 +	}
   8.634 +	case BLKTAP_IOCTL_FREEINTF:
   8.635 +	{
   8.636 +		unsigned long dev = arg;
   8.637 +		tap_blkif_t *info = NULL;
   8.638 +
   8.639 +		if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];
   8.640 +
   8.641 +		if ( (info != NULL) && (info->dev_pending) )
   8.642 +			info->dev_pending = 0;
   8.643 +		return 0;
   8.644 +	}
   8.645 +	case BLKTAP_IOCTL_MINOR:
   8.646 +	{
   8.647 +		unsigned long dev = arg;
   8.648 +		tap_blkif_t *info = NULL;
   8.649 +		
   8.650 +		if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];
   8.651 +		
   8.652 +		if (info != NULL) return info->minor;
   8.653 +		else return -1;
   8.654 +	}
   8.655 +	case BLKTAP_IOCTL_MAJOR:
   8.656 +		return BLKTAP_DEV_MAJOR;
   8.657 +
   8.658 +	case BLKTAP_QUERY_ALLOC_REQS:
   8.659 +	{
   8.660 +		WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
   8.661 +		       alloc_pending_reqs, blkif_reqs);
   8.662 +		return (alloc_pending_reqs/blkif_reqs) * 100;
   8.663 +	}
   8.664 +	}
   8.665 +	return -ENOIOCTLCMD;
   8.666 +}
   8.667 +
   8.668 +static unsigned int blktap_poll(struct file *file, poll_table *wait)
   8.669 +{
   8.670 +	private_info_t *prv;
   8.671 +	tap_blkif_t *info;
   8.672 +	
   8.673 +	/*Retrieve the dev info*/
   8.674 +	prv = (private_info_t *)file->private_data;
   8.675 +	if (prv == NULL) {
   8.676 +		WPRINTK(" poll, retrieving idx failed\n");
   8.677 +		return 0;
   8.678 +	}
   8.679 +	
   8.680 +	if (prv->idx == 0) return 0;
   8.681 +	
   8.682 +	info = tapfds[prv->idx];
   8.683 +	
   8.684 +	poll_wait(file, &info->wait, wait);
   8.685 +	if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
   8.686 +		flush_tlb_all();
   8.687 +		RING_PUSH_REQUESTS(&info->ufe_ring);
   8.688 +		return POLLIN | POLLRDNORM;
   8.689 +	}
   8.690 +	return 0;
   8.691 +}
   8.692 +
   8.693 +void blktap_kick_user(int idx)
   8.694 +{
   8.695 +	tap_blkif_t *info;
   8.696 +
   8.697 +	if (idx == 0) return;
   8.698 +	
   8.699 +	info = tapfds[idx];
   8.700 +	
   8.701 +	if (info != NULL) wake_up_interruptible(&info->wait);
   8.702 +	return;
   8.703 +}
   8.704 +
   8.705 +static int do_block_io_op(blkif_t *blkif);
   8.706 +static void dispatch_rw_block_io(blkif_t *blkif,
   8.707 +				 blkif_request_t *req,
   8.708 +				 pending_req_t *pending_req);
   8.709 +static void make_response(blkif_t *blkif, unsigned long id, 
   8.710 +                          unsigned short op, int st);
   8.711 +
   8.712 +/******************************************************************
   8.713 + * misc small helpers
   8.714 + */
   8.715 +/* FIXME: Return ENOMEM properly on failure to allocate additional reqs. */
   8.716 +static void req_increase(void)
   8.717 +{
   8.718 +	int i, j;
   8.719 +	struct page *page;
   8.720 +	unsigned long flags;
   8.721 +
   8.722 +	spin_lock_irqsave(&pending_free_lock, flags);
   8.723 +
   8.724 +	if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) 
   8.725 +		goto done;
   8.726 +
   8.727 +	pending_reqs[mmap_alloc]  = kzalloc(sizeof(pending_req_t) *
   8.728 +					blkif_reqs, GFP_KERNEL);
   8.729 +	pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) *
   8.730 +					mmap_pages, GFP_KERNEL);
   8.731 +
   8.732 +	if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) {
   8.733 +		kfree(pending_reqs[mmap_alloc]);
   8.734 +		kfree(pending_addrs[mmap_alloc]);
   8.735 +		WPRINTK("%s: out of memory\n", __FUNCTION__); 
   8.736 +		goto done;
   8.737 +	}
   8.738 +
   8.739 +#ifdef __ia64__
   8.740 +	extern unsigned long alloc_empty_foreign_map_page_range(
   8.741 +		unsigned long pages);
   8.742 +	mmap_start[mmap_alloc].start = (unsigned long)
   8.743 +		alloc_empty_foreign_map_page_range(mmap_pages);
   8.744 +#else /* ! ia64 */
   8.745 +	page = balloon_alloc_empty_page_range(mmap_pages);
   8.746 +	BUG_ON(page == NULL);
   8.747 +
   8.748 +	/* Pin all of the pages. */
   8.749 +	for (i=0; i<mmap_pages; i++)
   8.750 +		get_page(&page[i]);
   8.751 +
   8.752 +	mmap_start[mmap_alloc].start = 
   8.753 +		(unsigned long)pfn_to_kaddr(page_to_pfn(page));
   8.754 +	mmap_start[mmap_alloc].mpage = page;
   8.755 +
   8.756 +#endif
   8.757 +	DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
   8.758 +	        __FUNCTION__, blkif_reqs, mmap_pages, 
   8.759 +	       mmap_start[mmap_alloc].start);
   8.760 +
   8.761 +	BUG_ON(mmap_start[mmap_alloc].start == 0);
   8.762 +
   8.763 +	for (i = 0; i < mmap_pages; i++) 
   8.764 +		pending_addrs[mmap_alloc][i] = 
   8.765 +			mmap_start[mmap_alloc].start + (i << PAGE_SHIFT);
   8.766 +
   8.767 +	for (i = 0; i < MAX_PENDING_REQS ; i++) {
   8.768 +		list_add_tail(&pending_reqs[mmap_alloc][i].free_list, 
   8.769 +			      &pending_free);
   8.770 +		pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
   8.771 +		for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
   8.772 +			BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc, 
   8.773 +								 i, j));
   8.774 +	}
   8.775 +
   8.776 +	mmap_alloc++;
   8.777 +	DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
   8.778 + done:
   8.779 +	spin_unlock_irqrestore(&pending_free_lock, flags);
   8.780 +
   8.781 +}
   8.782 +
   8.783 +static void mmap_req_del(int mmap)
   8.784 +{
   8.785 +	int i;
   8.786 +	struct page *page;
   8.787 +
   8.788 +	/*Spinlock already acquired*/
   8.789 +	kfree(pending_reqs[mmap]);
   8.790 +	kfree(pending_addrs[mmap]);
   8.791 +
   8.792 +#ifdef __ia64__
   8.793 +	/*Not sure what goes here yet!*/
   8.794 +#else
   8.795 +
   8.796 +	/* Unpin all of the pages. */
   8.797 +	page = mmap_start[mmap].mpage;
   8.798 +	for (i=0; i<mmap_pages; i++)
   8.799 +		put_page(&page[i]);
   8.800 +
   8.801 +	balloon_dealloc_empty_page_range(mmap_start[mmap].mpage, mmap_pages);
   8.802 +#endif
   8.803 +
   8.804 +	mmap_lock = 0;
   8.805 +	DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
   8.806 +	mmap_alloc--;
   8.807 +}
   8.808 +
   8.809 +/*N.B. Currently unused - will be accessed via sysfs*/
   8.810 +static void req_decrease(void)
   8.811 +{
   8.812 +	pending_req_t *req;
   8.813 +	int i;
   8.814 +	unsigned long flags;
   8.815 +
   8.816 +	spin_lock_irqsave(&pending_free_lock, flags);
   8.817 +
   8.818 +	DPRINTK("Req decrease called.\n");
   8.819 +	if (mmap_lock || mmap_alloc == 1) 
   8.820 +		goto done;
   8.821 +
   8.822 +	mmap_lock = 1;
   8.823 +	mmap_inuse = MAX_PENDING_REQS;
   8.824 +	
   8.825 +        /*Go through reqs and remove any that aren't in use*/
   8.826 +	for (i = 0; i < MAX_PENDING_REQS ; i++) {
   8.827 +		req = &pending_reqs[mmap_alloc-1][i];
   8.828 +		if (req->inuse == 0) {
   8.829 +			list_del(&req->free_list);
   8.830 +			mmap_inuse--;
   8.831 +		}
   8.832 +	}
   8.833 +	if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
   8.834 + done:
   8.835 +	spin_unlock_irqrestore(&pending_free_lock, flags);
   8.836 +	return;
   8.837 +}
   8.838 +
   8.839 +static pending_req_t* alloc_req(void)
   8.840 +{
   8.841 +	pending_req_t *req = NULL;
   8.842 +	unsigned long flags;
   8.843 +
   8.844 +	spin_lock_irqsave(&pending_free_lock, flags);
   8.845 +
   8.846 +	if (!list_empty(&pending_free)) {
   8.847 +		req = list_entry(pending_free.next, pending_req_t, free_list);
   8.848 +		list_del(&req->free_list);
   8.849 +	}
   8.850 +
   8.851 +	if (req) {
   8.852 +		req->inuse = 1;
   8.853 +		alloc_pending_reqs++;
   8.854 +	}
   8.855 +	spin_unlock_irqrestore(&pending_free_lock, flags);
   8.856 +
   8.857 +	return req;
   8.858 +}
   8.859 +
   8.860 +static void free_req(pending_req_t *req)
   8.861 +{
   8.862 +	unsigned long flags;
   8.863 +	int was_empty;
   8.864 +
   8.865 +	spin_lock_irqsave(&pending_free_lock, flags);
   8.866 +
   8.867 +	alloc_pending_reqs--;
   8.868 +	req->inuse = 0;
   8.869 +	if (mmap_lock && (req->mem_idx == mmap_alloc-1)) {
   8.870 +		mmap_inuse--;
   8.871 +		if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
   8.872 +		spin_unlock_irqrestore(&pending_free_lock, flags);
   8.873 +		return;
   8.874 +	}
   8.875 +	was_empty = list_empty(&pending_free);
   8.876 +	list_add(&req->free_list, &pending_free);
   8.877 +
   8.878 +	spin_unlock_irqrestore(&pending_free_lock, flags);
   8.879 +
   8.880 +	if (was_empty)
   8.881 +		wake_up(&pending_free_wq);
   8.882 +}
   8.883 +
   8.884 +static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int 
   8.885 +			    tapidx)
   8.886 +{
   8.887 +	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
   8.888 +	unsigned int i, invcount = 0;
   8.889 +	struct grant_handle_pair *khandle;
   8.890 +	uint64_t ptep;
   8.891 +	int ret, mmap_idx;
   8.892 +	unsigned long kvaddr, uvaddr;
   8.893 +
   8.894 +	tap_blkif_t *info = tapfds[tapidx];
   8.895 +	
   8.896 +	if (info == NULL) {
   8.897 +		WPRINTK("fast_flush: Couldn't get info!\n");
   8.898 +		return;
   8.899 +	}
   8.900 +	mmap_idx = req->mem_idx;
   8.901 +
   8.902 +	for (i = 0; i < req->nr_pages; i++) {
   8.903 +		kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i);
   8.904 +		uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
   8.905 +
   8.906 +		khandle = &pending_handle(mmap_idx, k_idx, i);
   8.907 +		if (BLKTAP_INVALID_HANDLE(khandle)) {
   8.908 +			WPRINTK("BLKTAP_INVALID_HANDLE\n");
   8.909 +			continue;
   8.910 +		}
   8.911 +		gnttab_set_unmap_op(&unmap[invcount], 
   8.912 +			MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i), 
   8.913 +				    GNTMAP_host_map, khandle->kernel);
   8.914 +		invcount++;
   8.915 +
   8.916 +		if (create_lookup_pte_addr(
   8.917 +		    info->vma->vm_mm,
   8.918 +		    MMAP_VADDR(info->user_vstart, u_idx, i), 
   8.919 +		    &ptep) !=0) {
   8.920 +			WPRINTK("Couldn't get a pte addr!\n");
   8.921 +			return;
   8.922 +		}
   8.923 +
   8.924 +		gnttab_set_unmap_op(&unmap[invcount], 
   8.925 +			ptep, GNTMAP_host_map,
   8.926 +			khandle->user);
   8.927 +		invcount++;
   8.928 +            
   8.929 +		BLKTAP_INVALIDATE_HANDLE(khandle);
   8.930 +	}
   8.931 +	ret = HYPERVISOR_grant_table_op(
   8.932 +		GNTTABOP_unmap_grant_ref, unmap, invcount);
   8.933 +	BUG_ON(ret);
   8.934 +	
   8.935 +	if (info->vma != NULL)
   8.936 +		zap_page_range(info->vma, 
   8.937 +			       MMAP_VADDR(info->user_vstart, u_idx, 0), 
   8.938 +			       req->nr_pages << PAGE_SHIFT, NULL);
   8.939 +}
   8.940 +
   8.941 +/******************************************************************
   8.942 + * SCHEDULER FUNCTIONS
   8.943 + */
   8.944 +
   8.945 +static void print_stats(blkif_t *blkif)
   8.946 +{
   8.947 +	printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d\n",
   8.948 +	       current->comm, blkif->st_oo_req,
   8.949 +	       blkif->st_rd_req, blkif->st_wr_req);
   8.950 +	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
   8.951 +	blkif->st_rd_req = 0;
   8.952 +	blkif->st_wr_req = 0;
   8.953 +	blkif->st_oo_req = 0;
   8.954 +}
   8.955 +
   8.956 +int tap_blkif_schedule(void *arg)
   8.957 +{
   8.958 +	blkif_t *blkif = arg;
   8.959 +
   8.960 +	blkif_get(blkif);
   8.961 +
   8.962 +	if (debug_lvl)
   8.963 +		printk(KERN_DEBUG "%s: started\n", current->comm);
   8.964 +
   8.965 +	while (!kthread_should_stop()) {
   8.966 +		wait_event_interruptible(
   8.967 +			blkif->wq,
   8.968 +			blkif->waiting_reqs || kthread_should_stop());
   8.969 +		wait_event_interruptible(
   8.970 +			pending_free_wq,
   8.971 +			!list_empty(&pending_free) || kthread_should_stop());
   8.972 +
   8.973 +		blkif->waiting_reqs = 0;
   8.974 +		smp_mb(); /* clear flag *before* checking for work */
   8.975 +
   8.976 +		if (do_block_io_op(blkif))
   8.977 +			blkif->waiting_reqs = 1;
   8.978 +
   8.979 +		if (log_stats && time_after(jiffies, blkif->st_print))
   8.980 +			print_stats(blkif);
   8.981 +	}
   8.982 +
   8.983 +	if (log_stats)
   8.984 +		print_stats(blkif);
   8.985 +	if (debug_lvl)
   8.986 +		printk(KERN_DEBUG "%s: exiting\n", current->comm);
   8.987 +
   8.988 +	blkif->xenblkd = NULL;
   8.989 +	blkif_put(blkif);
   8.990 +
   8.991 +	return 0;
   8.992 +}
   8.993 +
   8.994 +/******************************************************************
   8.995 + * COMPLETION CALLBACK -- Called by user level ioctl()
   8.996 + */
   8.997 +
   8.998 +static int blktap_read_ufe_ring(int idx)
   8.999 +{
  8.1000 +	/* This is called to read responses from the UFE ring. */
  8.1001 +	RING_IDX i, j, rp;
  8.1002 +	blkif_response_t *resp;
  8.1003 +	blkif_t *blkif=NULL;
  8.1004 +	int pending_idx, usr_idx, mmap_idx;
  8.1005 +	pending_req_t *pending_req;
  8.1006 +	tap_blkif_t *info;
  8.1007 +	
  8.1008 +	info = tapfds[idx];
  8.1009 +	if (info == NULL) {
  8.1010 +		return 0;
  8.1011 +	}
  8.1012 +
  8.1013 +	/* We currently only forward packets in INTERCEPT_FE mode. */
  8.1014 +	if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
  8.1015 +		return 0;
  8.1016 +
  8.1017 +	/* for each outstanding message on the UFEring  */
  8.1018 +	rp = info->ufe_ring.sring->rsp_prod;
  8.1019 +	rmb();
  8.1020 +        
  8.1021 +	for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
  8.1022 +		resp = RING_GET_RESPONSE(&info->ufe_ring, i);
  8.1023 +		++info->ufe_ring.rsp_cons;
  8.1024 +
  8.1025 +		/*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
  8.1026 +		usr_idx = (int)resp->id;
  8.1027 +		pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
  8.1028 +		mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
  8.1029 +
  8.1030 +		if ( (mmap_idx >= mmap_alloc) || 
  8.1031 +		   (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) )
  8.1032 +			WPRINTK("Incorrect req map"
  8.1033 +			       "[%d], internal map [%d,%d (%d)]\n", 
  8.1034 +			       usr_idx, mmap_idx, 
  8.1035 +			       ID_TO_IDX(info->idx_map[usr_idx]),
  8.1036 +			       MASK_PEND_IDX(
  8.1037 +				       ID_TO_IDX(info->idx_map[usr_idx])));
  8.1038 +
  8.1039 +		pending_req = &pending_reqs[mmap_idx][pending_idx];
  8.1040 +		blkif = pending_req->blkif;
  8.1041 +
  8.1042 +		for (j = 0; j < pending_req->nr_pages; j++) {
  8.1043 +
  8.1044 +			unsigned long kvaddr, uvaddr;
  8.1045 +			struct page **map = info->vma->vm_private_data;
  8.1046 +			struct page *pg;
  8.1047 +			int offset; 
  8.1048 +
  8.1049 +			uvaddr  = MMAP_VADDR(info->user_vstart, usr_idx, j);
  8.1050 +			kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
  8.1051 +					    pending_idx, j);
  8.1052 +
  8.1053 +			pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
  8.1054 +			ClearPageReserved(pg);
  8.1055 +			offset = (uvaddr - info->vma->vm_start) 
  8.1056 +				>> PAGE_SHIFT;
  8.1057 +			map[offset] = NULL;
  8.1058 +		}
  8.1059 +		fast_flush_area(pending_req, pending_idx, usr_idx, idx); 
  8.1060 +		make_response(blkif, pending_req->id, resp->operation,
  8.1061 +			      resp->status);
  8.1062 +		info->idx_map[usr_idx] = INVALID_REQ;
  8.1063 +		blkif_put(pending_req->blkif);
  8.1064 +		free_req(pending_req);
  8.1065 +	}
  8.1066 +		
  8.1067 +	return 0;
  8.1068 +}
  8.1069 +
  8.1070 +
  8.1071 +/******************************************************************************
  8.1072 + * NOTIFICATION FROM GUEST OS.
  8.1073 + */
  8.1074 +
  8.1075 +static void blkif_notify_work(blkif_t *blkif)
  8.1076 +{
  8.1077 +	blkif->waiting_reqs = 1;
  8.1078 +	wake_up(&blkif->wq);
  8.1079 +}
  8.1080 +
  8.1081 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
  8.1082 +{
  8.1083 +	blkif_notify_work(dev_id);
  8.1084 +	return IRQ_HANDLED;
  8.1085 +}
  8.1086 +
  8.1087 +
  8.1088 +
  8.1089 +/******************************************************************
  8.1090 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
  8.1091 + */
  8.1092 +static int print_dbug = 1;
  8.1093 +static int do_block_io_op(blkif_t *blkif)
  8.1094 +{
  8.1095 +	blkif_back_ring_t *blk_ring = &blkif->blk_ring;
  8.1096 +	blkif_request_t *req;
  8.1097 +	pending_req_t *pending_req;
  8.1098 +	RING_IDX rc, rp;
  8.1099 +	int more_to_do = 0;
  8.1100 +	tap_blkif_t *info;
  8.1101 +
  8.1102 +	rc = blk_ring->req_cons;
  8.1103 +	rp = blk_ring->sring->req_prod;
  8.1104 +	rmb(); /* Ensure we see queued requests up to 'rp'. */
  8.1105 +
  8.1106 +	/*Check blkif has corresponding UE ring*/
  8.1107 +	if (blkif->dev_num == -1) {
  8.1108 +		/*oops*/
  8.1109 +		if (print_dbug) {
  8.1110 +			WPRINTK("Corresponding UE " 
  8.1111 +			       "ring does not exist!\n");
  8.1112 +			print_dbug = 0; /*We only print this message once*/
  8.1113 +		}
  8.1114 +		return 1; 
  8.1115 +	}
  8.1116 +
  8.1117 +	info = tapfds[blkif->dev_num];
  8.1118 +	if (info == NULL || !info->dev_inuse) {
  8.1119 +		if (print_dbug) {
  8.1120 +			WPRINTK("Can't get UE info!\n");
  8.1121 +			print_dbug = 0;
  8.1122 +		}
  8.1123 +		return 1;
  8.1124 +	}
  8.1125 +
  8.1126 +	while (rc != rp) {
  8.1127 +		
  8.1128 +		if (RING_FULL(&info->ufe_ring)) {
  8.1129 +			WPRINTK("RING_FULL! More to do\n");
  8.1130 +			more_to_do = 1;
  8.1131 +			break;
  8.1132 +		}
  8.1133 +		
  8.1134 +		if (RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
  8.1135 +			WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
  8.1136 +			       " More to do\n");
  8.1137 +			more_to_do = 1;
  8.1138 +			break;		
  8.1139 +		}
  8.1140 +
  8.1141 +		pending_req = alloc_req();
  8.1142 +		if (NULL == pending_req) {
  8.1143 +			blkif->st_oo_req++;
  8.1144 +			more_to_do = 1;
  8.1145 +			break;
  8.1146 +		}
  8.1147 +
  8.1148 +		req = RING_GET_REQUEST(blk_ring, rc);
  8.1149 +		blk_ring->req_cons = ++rc; /* before make_response() */	
  8.1150 +
  8.1151 +		switch (req->operation) {
  8.1152 +		case BLKIF_OP_READ:
  8.1153 +			blkif->st_rd_req++;
  8.1154 +			dispatch_rw_block_io(blkif, req, pending_req);
  8.1155 +			break;
  8.1156 +
  8.1157 +		case BLKIF_OP_WRITE:
  8.1158 +			blkif->st_wr_req++;
  8.1159 +			dispatch_rw_block_io(blkif, req, pending_req);
  8.1160 +			break;
  8.1161 +
  8.1162 +		default:
  8.1163 +			WPRINTK("unknown operation [%d]\n",
  8.1164 +				req->operation);
  8.1165 +			make_response(blkif, req->id, req->operation,
  8.1166 +				      BLKIF_RSP_ERROR);
  8.1167 +			free_req(pending_req);
  8.1168 +			break;
  8.1169 +		}
  8.1170 +	}
  8.1171 +		
  8.1172 +	blktap_kick_user(blkif->dev_num);
  8.1173 +
  8.1174 +	return more_to_do;
  8.1175 +}
  8.1176 +
  8.1177 +static void dispatch_rw_block_io(blkif_t *blkif,
  8.1178 +				 blkif_request_t *req,
  8.1179 +				 pending_req_t *pending_req)
  8.1180 +{
  8.1181 +	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
  8.1182 +	int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
  8.1183 +	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
  8.1184 +	unsigned int nseg;
  8.1185 +	int ret, i;
  8.1186 +	tap_blkif_t *info = tapfds[blkif->dev_num];
  8.1187 +	uint64_t sector;
  8.1188 +	
  8.1189 +	blkif_request_t *target;
  8.1190 +	int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
  8.1191 +	int usr_idx = GET_NEXT_REQ(info->idx_map);
  8.1192 +	uint16_t mmap_idx = pending_req->mem_idx;
  8.1193 +
  8.1194 +	/*Check we have space on user ring - should never fail*/
  8.1195 +	if(usr_idx == INVALID_REQ) goto fail_flush;
  8.1196 +	
  8.1197 +	/* Check that number of segments is sane. */
  8.1198 +	nseg = req->nr_segments;
  8.1199 +	if ( unlikely(nseg == 0) || 
  8.1200 +	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
  8.1201 +		WPRINTK("Bad number of segments in request (%d)\n", nseg);
  8.1202 +		goto fail_response;
  8.1203 +	}
  8.1204 +	
  8.1205 +	/* Make sure userspace is ready. */
  8.1206 +	if (!info->ring_ok) {
  8.1207 +		WPRINTK("blktap: ring not ready for requests!\n");
  8.1208 +		goto fail_response;
  8.1209 +	}
  8.1210 +
  8.1211 +	if (RING_FULL(&info->ufe_ring)) {
  8.1212 +		WPRINTK("blktap: fe_ring is full, can't add "
  8.1213 +			"IO Request will be dropped. %d %d\n",
  8.1214 +			RING_SIZE(&info->ufe_ring),
  8.1215 +			RING_SIZE(&blkif->blk_ring));
  8.1216 +		goto fail_response;
  8.1217 +	}
  8.1218 +
  8.1219 +	pending_req->blkif     = blkif;
  8.1220 +	pending_req->id        = req->id;
  8.1221 +	pending_req->operation = operation;
  8.1222 +	pending_req->status    = BLKIF_RSP_OKAY;
  8.1223 +	pending_req->nr_pages  = nseg;
  8.1224 +	op = 0;
  8.1225 +	for (i = 0; i < nseg; i++) {
  8.1226 +		unsigned long uvaddr;
  8.1227 +		unsigned long kvaddr;
  8.1228 +		uint64_t ptep;
  8.1229 +		struct page *page;
  8.1230 +		uint32_t flags;
  8.1231 +
  8.1232 +		uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
  8.1233 +		kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
  8.1234 +				    pending_idx, i);
  8.1235 +		page = virt_to_page(kvaddr);
  8.1236 +
  8.1237 +		sector = req->sector_number + (8*i);
  8.1238 +		if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) {
  8.1239 +			WPRINTK("BLKTAP: Sector request greater" 
  8.1240 +			       "than size\n");
  8.1241 +			WPRINTK("BLKTAP: %s request sector" 
  8.1242 +			       "[%llu,%llu], Total [%llu]\n",
  8.1243 +			       (req->operation == 
  8.1244 +				BLKIF_OP_WRITE ? "WRITE" : "READ"),
  8.1245 +				(long long unsigned) sector,
  8.1246 +				(long long unsigned) sector>>9,
  8.1247 +				blkif->sectors);
  8.1248 +		}
  8.1249 +
  8.1250 +		flags = GNTMAP_host_map;
  8.1251 +		if (operation == WRITE)
  8.1252 +			flags |= GNTMAP_readonly;
  8.1253 +		gnttab_set_map_op(&map[op], kvaddr, flags,
  8.1254 +				  req->seg[i].gref, blkif->domid);
  8.1255 +		op++;
  8.1256 +
  8.1257 +		/* Now map it to user. */
  8.1258 +		ret = create_lookup_pte_addr(info->vma->vm_mm, 
  8.1259 +					     uvaddr, &ptep);
  8.1260 +		if (ret) {
  8.1261 +			WPRINTK("Couldn't get a pte addr!\n");
  8.1262 +			fast_flush_area(pending_req, pending_idx, usr_idx, 
  8.1263 +					blkif->dev_num);
  8.1264 +			goto fail_flush;
  8.1265 +		}
  8.1266 +
  8.1267 +		flags = GNTMAP_host_map | GNTMAP_application_map
  8.1268 +			| GNTMAP_contains_pte;
  8.1269 +		if (operation == WRITE)
  8.1270 +			flags |= GNTMAP_readonly;
  8.1271 +		gnttab_set_map_op(&map[op], ptep, flags,
  8.1272 +				  req->seg[i].gref, blkif->domid);
  8.1273 +		op++;
  8.1274 +	}
  8.1275 +
  8.1276 +	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
  8.1277 +	BUG_ON(ret);
  8.1278 +
  8.1279 +	for (i = 0; i < (nseg*2); i+=2) {
  8.1280 +		unsigned long uvaddr;
  8.1281 +		unsigned long kvaddr;
  8.1282 +		unsigned long offset;
  8.1283 +		struct page *pg;
  8.1284 +
  8.1285 +		uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
  8.1286 +		kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
  8.1287 +				    pending_idx, i/2);
  8.1288 +
  8.1289 +		if (unlikely(map[i].status != 0)) {
  8.1290 +			WPRINTK("invalid kernel buffer -- "
  8.1291 +				"could not remap it\n");
  8.1292 +			goto fail_flush;
  8.1293 +		}
  8.1294 +
  8.1295 +		if (unlikely(map[i+1].status != 0)) {
  8.1296 +			WPRINTK("invalid user buffer -- "
  8.1297 +				"could not remap it\n");
  8.1298 +			goto fail_flush;
  8.1299 +		}
  8.1300 +
  8.1301 +		pending_handle(mmap_idx, pending_idx, i/2).kernel 
  8.1302 +			= map[i].handle;
  8.1303 +		pending_handle(mmap_idx, pending_idx, i/2).user   
  8.1304 +			= map[i+1].handle;
  8.1305 +#ifdef CONFIG_XEN_IA64_DOM0_NON_VP
  8.1306 +		pending_addrs[mmap_idx][vaddr_pagenr(pending_req, i)] =
  8.1307 +			(unsigned long)gnttab_map_vaddr(map[i]);
  8.1308 +#else
  8.1309 +		set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
  8.1310 +			FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
  8.1311 +#endif
  8.1312 +		offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
  8.1313 +		pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
  8.1314 +		((struct page **)info->vma->vm_private_data)[offset] =
  8.1315 +			pg;
  8.1316 +	}
  8.1317 +	/* Mark mapped pages as reserved: */
  8.1318 +	for (i = 0; i < req->nr_segments; i++) {
  8.1319 +		unsigned long kvaddr;
  8.1320 +		struct page *pg;
  8.1321 +
  8.1322 +		kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
  8.1323 +				    pending_idx, i);
  8.1324 +		pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
  8.1325 +		SetPageReserved(pg);
  8.1326 +	}
  8.1327 +	
  8.1328 +	/*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
  8.1329 +	info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
  8.1330 +
  8.1331 +	blkif_get(blkif);
  8.1332 +	/* Finally, write the request message to the user ring. */
  8.1333 +	target = RING_GET_REQUEST(&info->ufe_ring,
  8.1334 +				  info->ufe_ring.req_prod_pvt);
  8.1335 +	memcpy(target, req, sizeof(*req));
  8.1336 +	target->id = usr_idx;
  8.1337 +	info->ufe_ring.req_prod_pvt++;
  8.1338 +	return;
  8.1339 +
  8.1340 + fail_flush:
  8.1341 +	WPRINTK("Reached Fail_flush\n");
  8.1342 +	fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
  8.1343 + fail_response:
  8.1344 +	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
  8.1345 +	free_req(pending_req);
  8.1346 +} 
  8.1347 +
  8.1348 +
  8.1349 +
  8.1350 +/******************************************************************
  8.1351 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
  8.1352 + */
  8.1353 +
  8.1354 +
  8.1355 +static void make_response(blkif_t *blkif, unsigned long id, 
  8.1356 +                          unsigned short op, int st)
  8.1357 +{
  8.1358 +	blkif_response_t *resp;
  8.1359 +	unsigned long     flags;
  8.1360 +	blkif_back_ring_t *blk_ring = &blkif->blk_ring;
  8.1361 +	int more_to_do = 0;
  8.1362 +	int notify;
  8.1363 +
  8.1364 +	spin_lock_irqsave(&blkif->blk_ring_lock, flags);
  8.1365 +	/* Place on the response ring for the relevant domain. */ 
  8.1366 +	resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
  8.1367 +	resp->id        = id;
  8.1368 +	resp->operation = op;
  8.1369 +	resp->status    = st;
  8.1370 +	blk_ring->rsp_prod_pvt++;
  8.1371 +	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
  8.1372 +
  8.1373 +	if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
  8.1374 +		/*
  8.1375 +		 * Tail check for pending requests. Allows frontend to avoid
  8.1376 +		 * notifications if requests are already in flight (lower
  8.1377 +		 * overheads and promotes batching).
  8.1378 +		 */
  8.1379 +		RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
  8.1380 +	} else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
  8.1381 +		more_to_do = 1;
  8.1382 +
  8.1383 +	}	
  8.1384 +	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
  8.1385 +	if (more_to_do)
  8.1386 +		blkif_notify_work(blkif);
  8.1387 +	if (notify)
  8.1388 +		notify_remote_via_irq(blkif->irq);
  8.1389 +}
  8.1390 +
  8.1391 +static int __init blkif_init(void)
  8.1392 +{
  8.1393 +	int i,ret,blktap_dir;
  8.1394 +	tap_blkif_t *info;
  8.1395 +
  8.1396 +	if (!is_running_on_xen())
  8.1397 +		return -ENODEV;
  8.1398 +
  8.1399 +	INIT_LIST_HEAD(&pending_free);
  8.1400 +        for(i = 0; i < 2; i++) req_increase();
  8.1401 +
  8.1402 +	tap_blkif_interface_init();
  8.1403 +
  8.1404 +	alloc_pending_reqs = 0;
  8.1405 +
  8.1406 +	tap_blkif_xenbus_init();
  8.1407 +
  8.1408 +	/*Create the blktap devices, but do not map memory or waitqueue*/
  8.1409 +	for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF;
  8.1410 +
  8.1411 +	ret = register_chrdev(BLKTAP_DEV_MAJOR,"blktap",&blktap_fops);
  8.1412 +	blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL);
  8.1413 +
  8.1414 +	if ( (ret < 0)||(blktap_dir < 0) ) {
  8.1415 +		WPRINTK("Couldn't register /dev/xen/blktap\n");
  8.1416 +		return -ENOMEM;
  8.1417 +	}	
  8.1418 +	
  8.1419 +	for(i = 0; i < MAX_TAP_DEV; i++ ) {
  8.1420 +		info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
  8.1421 +		if(tapfds[i] == NULL) return -ENOMEM;
  8.1422 +		info->minor = i;
  8.1423 +		info->pid = 0;
  8.1424 +		info->blkif = NULL;
  8.1425 +
  8.1426 +		ret = devfs_mk_cdev(MKDEV(BLKTAP_DEV_MAJOR, i),
  8.1427 +			S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
  8.1428 +
  8.1429 +		if(ret != 0) return -ENOMEM;
  8.1430 +		info->dev_pending = info->dev_inuse = 0;
  8.1431 +
  8.1432 +		DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
  8.1433 +	}
  8.1434 +	
  8.1435 +	DPRINTK("Blktap device successfully created\n");
  8.1436 +
  8.1437 +	return 0;
  8.1438 +}
  8.1439 +
  8.1440 +module_init(blkif_init);
  8.1441 +
  8.1442 +MODULE_LICENSE("Dual BSD/GPL");
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h	Thu Jul 13 10:13:26 2006 +0100
     9.3 @@ -0,0 +1,120 @@
     9.4 +/* 
     9.5 + * This program is free software; you can redistribute it and/or
     9.6 + * modify it under the terms of the GNU General Public License version 2
     9.7 + * as published by the Free Software Foundation; or, when distributed
     9.8 + * separately from the Linux kernel or incorporated into other
     9.9 + * software packages, subject to the following license:
    9.10 + * 
    9.11 + * Permission is hereby granted, free of charge, to any person obtaining a copy
    9.12 + * of this source file (the "Software"), to deal in the Software without
    9.13 + * restriction, including without limitation the rights to use, copy, modify,
    9.14 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
    9.15 + * and to permit persons to whom the Software is furnished to do so, subject to
    9.16 + * the following conditions:
    9.17 + * 
    9.18 + * The above copyright notice and this permission notice shall be included in
    9.19 + * all copies or substantial portions of the Software.
    9.20 + * 
    9.21 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    9.22 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    9.23 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    9.24 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    9.25 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    9.26 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    9.27 + * IN THE SOFTWARE.
    9.28 + */
    9.29 +
    9.30 +#ifndef __BLKIF__BACKEND__COMMON_H__
    9.31 +#define __BLKIF__BACKEND__COMMON_H__
    9.32 +
    9.33 +#include <linux/config.h>
    9.34 +#include <linux/version.h>
    9.35 +#include <linux/module.h>
    9.36 +#include <linux/interrupt.h>
    9.37 +#include <linux/slab.h>
    9.38 +#include <linux/blkdev.h>
    9.39 +#include <linux/vmalloc.h>
    9.40 +#include <asm/io.h>
    9.41 +#include <asm/setup.h>
    9.42 +#include <asm/pgalloc.h>
    9.43 +#include <xen/evtchn.h>
    9.44 +#include <asm/hypervisor.h>
    9.45 +#include <xen/interface/io/blkif.h>
    9.46 +#include <xen/interface/io/ring.h>
    9.47 +#include <xen/gnttab.h>
    9.48 +#include <xen/driver_util.h>
    9.49 +
    9.50 +#define DPRINTK(_f, _a...) pr_debug("(file=%s, line=%d) " _f, \
    9.51 +                                    __FILE__ , __LINE__ , ## _a )
    9.52 +
    9.53 +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
    9.54 +
    9.55 +struct backend_info; 
    9.56 +
    9.57 +typedef struct blkif_st {
    9.58 +	/* Unique identifier for this interface. */
    9.59 +	domid_t           domid;
    9.60 +	unsigned int      handle;
    9.61 +	/* Physical parameters of the comms window. */
    9.62 +	unsigned int      evtchn;
    9.63 +	unsigned int      irq;
    9.64 +	/* Comms information. */
    9.65 +	blkif_back_ring_t blk_ring;
    9.66 +	struct vm_struct *blk_ring_area;
    9.67 +	/* Back pointer to the backend_info. */
    9.68 +	struct backend_info *be; 
    9.69 +	/* Private fields. */
    9.70 +	spinlock_t       blk_ring_lock;
    9.71 +	atomic_t         refcnt;
    9.72 +
    9.73 +	wait_queue_head_t   wq;
    9.74 +	struct task_struct  *xenblkd;
    9.75 +	unsigned int        waiting_reqs;
    9.76 +	request_queue_t     *plug;
    9.77 +
    9.78 +	/* statistics */
    9.79 +	unsigned long       st_print;
    9.80 +	int                 st_rd_req;
    9.81 +	int                 st_wr_req;
    9.82 +	int                 st_oo_req;
    9.83 +
    9.84 +	wait_queue_head_t waiting_to_free;
    9.85 +
    9.86 +	grant_handle_t shmem_handle;
    9.87 +	grant_ref_t    shmem_ref;
    9.88 +	
    9.89 +	int		dev_num;
    9.90 +	uint64_t        sectors;
    9.91 +} blkif_t;
    9.92 +
    9.93 +blkif_t *tap_alloc_blkif(domid_t domid);
    9.94 +void tap_blkif_free(blkif_t *blkif);
    9.95 +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, 
    9.96 +		  unsigned int evtchn);
    9.97 +
    9.98 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
    9.99 +#define blkif_put(_b)					\
   9.100 +	do {						\
   9.101 +		if (atomic_dec_and_test(&(_b)->refcnt))	\
   9.102 +			wake_up(&(_b)->waiting_to_free);\
   9.103 +	} while (0)
   9.104 +
   9.105 +
   9.106 +struct phys_req {
   9.107 +	unsigned short       dev;
   9.108 +	unsigned short       nr_sects;
   9.109 +	struct block_device *bdev;
   9.110 +	blkif_sector_t       sector_number;
   9.111 +};
   9.112 +
   9.113 +void tap_blkif_interface_init(void);
   9.114 +
   9.115 +void tap_blkif_xenbus_init(void);
   9.116 +
   9.117 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
   9.118 +int tap_blkif_schedule(void *arg);
   9.119 +
   9.120 +int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
   9.121 +void signal_tapdisk(int idx);
   9.122 +
   9.123 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c	Thu Jul 13 10:13:26 2006 +0100
    10.3 @@ -0,0 +1,165 @@
    10.4 +/******************************************************************************
    10.5 + * drivers/xen/blktap/interface.c
    10.6 + * 
    10.7 + * Block-device interface management.
    10.8 + * 
    10.9 + * Copyright (c) 2004, Keir Fraser
   10.10 + *
   10.11 + * This program is free software; you can redistribute it and/or
   10.12 + * modify it under the terms of the GNU General Public License version 2
   10.13 + * as published by the Free Software Foundation; or, when distributed
   10.14 + * separately from the Linux kernel or incorporated into other
   10.15 + * software packages, subject to the following license:
   10.16 + *
   10.17 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   10.18 + * of this source file (the "Software"), to deal in the Software without
   10.19 + * restriction, including without limitation the rights to use, copy, modify,
   10.20 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   10.21 + * and to permit persons to whom the Software is furnished to do so, subject to
   10.22 + * the following conditions:
   10.23 + *
   10.24 + * The above copyright notice and this permission notice shall be included in
   10.25 + * all copies or substantial portions of the Software.
   10.26 + *
   10.27 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   10.28 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   10.29 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   10.30 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   10.31 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   10.32 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   10.33 + * IN THE SOFTWARE.
   10.34 +
   10.35 + */
   10.36 +
   10.37 +#include "common.h"
   10.38 +#include <xen/evtchn.h>
   10.39 +
   10.40 +static kmem_cache_t *blkif_cachep;
   10.41 +
   10.42 +blkif_t *tap_alloc_blkif(domid_t domid)
   10.43 +{
   10.44 +	blkif_t *blkif;
   10.45 +
   10.46 +	blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
   10.47 +	if (!blkif)
   10.48 +		return ERR_PTR(-ENOMEM);
   10.49 +
   10.50 +	memset(blkif, 0, sizeof(*blkif));
   10.51 +	blkif->domid = domid;
   10.52 +	spin_lock_init(&blkif->blk_ring_lock);
   10.53 +	atomic_set(&blkif->refcnt, 1);
   10.54 +	init_waitqueue_head(&blkif->wq);
   10.55 +	blkif->st_print = jiffies;
   10.56 +	init_waitqueue_head(&blkif->waiting_to_free);
   10.57 +
   10.58 +	return blkif;
   10.59 +}
   10.60 +
   10.61 +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
   10.62 +{
   10.63 +	struct gnttab_map_grant_ref op;
   10.64 +	int ret;
   10.65 +
   10.66 +	gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
   10.67 +			  GNTMAP_host_map, shared_page, blkif->domid);
   10.68 +
   10.69 +	lock_vm_area(blkif->blk_ring_area);
   10.70 +	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
   10.71 +	unlock_vm_area(blkif->blk_ring_area);
   10.72 +	BUG_ON(ret);
   10.73 +
   10.74 +	if (op.status) {
   10.75 +		DPRINTK(" Grant table operation failure !\n");
   10.76 +		return op.status;
   10.77 +	}
   10.78 +
   10.79 +	blkif->shmem_ref = shared_page;
   10.80 +	blkif->shmem_handle = op.handle;
   10.81 +
   10.82 +#ifdef CONFIG_XEN_IA64_DOM0_NON_VP
   10.83 +	/* on some arch's, map_grant_ref behaves like mmap, in that the
   10.84 +	 * passed address is a hint and a different address may be returned */
   10.85 +	blkif->blk_ring_area->addr = gnttab_map_vaddr(op);
   10.86 +#endif
   10.87 +
   10.88 +	return 0;
   10.89 +}
   10.90 +
   10.91 +static void unmap_frontend_page(blkif_t *blkif)
   10.92 +{
   10.93 +	struct gnttab_unmap_grant_ref op;
   10.94 +	int ret;
   10.95 +
   10.96 +	gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
   10.97 +			    GNTMAP_host_map, blkif->shmem_handle);
   10.98 +
   10.99 +	lock_vm_area(blkif->blk_ring_area);
  10.100 +	ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
  10.101 +	unlock_vm_area(blkif->blk_ring_area);
  10.102 +	BUG_ON(ret);
  10.103 +}
  10.104 +
  10.105 +int tap_blkif_map(blkif_t *blkif, unsigned long shared_page, 
  10.106 +		  unsigned int evtchn)
  10.107 +{
  10.108 +	blkif_sring_t *sring;
  10.109 +	int err;
  10.110 +	struct evtchn_bind_interdomain bind_interdomain;
  10.111 +
  10.112 +	/* Already connected through? */
  10.113 +	if (blkif->irq)
  10.114 +		return 0;
  10.115 +
  10.116 +	if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
  10.117 +		return -ENOMEM;
  10.118 +
  10.119 +	err = map_frontend_page(blkif, shared_page);
  10.120 +	if (err) {
  10.121 +		free_vm_area(blkif->blk_ring_area);
  10.122 +		return err;
  10.123 +	}
  10.124 +
  10.125 +	bind_interdomain.remote_dom  = blkif->domid;
  10.126 +	bind_interdomain.remote_port = evtchn;
  10.127 +
  10.128 +	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
  10.129 +					  &bind_interdomain);
  10.130 +	if (err) {
  10.131 +		unmap_frontend_page(blkif);
  10.132 +		free_vm_area(blkif->blk_ring_area);
  10.133 +		return err;
  10.134 +	}
  10.135 +
  10.136 +	blkif->evtchn = bind_interdomain.local_port;
  10.137 +
  10.138 +	sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
  10.139 +	BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
  10.140 +
  10.141 +	blkif->irq = bind_evtchn_to_irqhandler(
  10.142 +		blkif->evtchn, tap_blkif_be_int, 0, "blkif-backend", blkif);
  10.143 +
  10.144 +	return 0;
  10.145 +}
  10.146 +
  10.147 +void tap_blkif_free(blkif_t *blkif)
  10.148 +{
  10.149 +	atomic_dec(&blkif->refcnt);
  10.150 +	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
  10.151 +
  10.152 +	/* Already disconnected? */
  10.153 +	if (blkif->irq)
  10.154 +		unbind_from_irqhandler(blkif->irq, blkif);
  10.155 +
  10.156 +	if (blkif->blk_ring.sring) {
  10.157 +		unmap_frontend_page(blkif);
  10.158 +		free_vm_area(blkif->blk_ring_area);
  10.159 +	}
  10.160 +
  10.161 +	kmem_cache_free(blkif_cachep, blkif);
  10.162 +}
  10.163 +
  10.164 +void __init tap_blkif_interface_init(void)
  10.165 +{
  10.166 +	blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t), 
  10.167 +					 0, 0, NULL, NULL);
  10.168 +}
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c	Thu Jul 13 10:13:26 2006 +0100
    11.3 @@ -0,0 +1,354 @@
    11.4 +/* drivers/xen/blktap/xenbus.c
    11.5 + *
    11.6 + * Xenbus code for blktap
    11.7 + *
    11.8 + * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
    11.9 + *
   11.10 + * Based on the blkback xenbus code:
   11.11 + *
   11.12 + * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
   11.13 + * Copyright (C) 2005 XenSource Ltd
   11.14 + *
   11.15 + * This program is free software; you can redistribute it and/or
   11.16 + * modify it under the terms of the GNU General Public License version 2
   11.17 + * as published by the Free Software Foundation; or, when distributed
   11.18 + * separately from the Linux kernel or incorporated into other
   11.19 + * software packages, subject to the following license:
   11.20 + *
   11.21 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   11.22 + * of this source file (the "Software"), to deal in the Software without
   11.23 + * restriction, including without limitation the rights to use, copy, modify,
   11.24 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   11.25 + * and to permit persons to whom the Software is furnished to do so, subject to
   11.26 + * the following conditions:
   11.27 + *
   11.28 + * The above copyright notice and this permission notice shall be included in
   11.29 + * all copies or substantial portions of the Software.
   11.30 + *
   11.31 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   11.32 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   11.33 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   11.34 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   11.35 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   11.36 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   11.37 + * IN THE SOFTWARE.
   11.38 + */
   11.39 +
   11.40 +#include <stdarg.h>
   11.41 +#include <linux/module.h>
   11.42 +#include <linux/kthread.h>
   11.43 +#include <xen/xenbus.h>
   11.44 +#include "common.h"
   11.45 +
   11.46 +
   11.47 +struct backend_info
   11.48 +{
   11.49 +	struct xenbus_device *dev;
   11.50 +	blkif_t *blkif;
   11.51 +	struct xenbus_watch backend_watch;
   11.52 +	int xenbus_id;
   11.53 +};
   11.54 +
   11.55 +
   11.56 +static void connect(struct backend_info *);
   11.57 +static int connect_ring(struct backend_info *);
   11.58 +static int blktap_remove(struct xenbus_device *dev);
   11.59 +static int blktap_probe(struct xenbus_device *dev,
   11.60 +			 const struct xenbus_device_id *id);
   11.61 +static void tap_backend_changed(struct xenbus_watch *, const char **,
   11.62 +			    unsigned int);
   11.63 +static void tap_frontend_changed(struct xenbus_device *dev,
   11.64 +			     enum xenbus_state frontend_state);
   11.65 +
   11.66 +static int strsep_len(const char *str, char c, unsigned int len)
   11.67 +{
   11.68 +        unsigned int i;
   11.69 +
   11.70 +        for (i = 0; str[i]; i++)
   11.71 +                if (str[i] == c) {
   11.72 +                        if (len == 0)
   11.73 +                                return i;
   11.74 +                        len--;
   11.75 +                }
   11.76 +        return (len == 0) ? i : -ERANGE;
   11.77 +}
   11.78 +
   11.79 +static long get_id(const char *str)
   11.80 +{
   11.81 +        int len,end;
   11.82 +        const char *ptr;
   11.83 +        char *tptr, num[10];
   11.84 +	
   11.85 +        len = strsep_len(str, '/', 2);
   11.86 +        end = strlen(str);
   11.87 +        if ( (len < 0) || (end < 0) ) return -1;
   11.88 +	
   11.89 +        ptr = str + len + 1;
   11.90 +        strncpy(num,ptr,end - len);
   11.91 +        tptr = num + (end - (len + 1));
   11.92 +        *tptr = '\0';
   11.93 +	DPRINTK("Get_id called for %s (%s)\n",str,num);
   11.94 +	
   11.95 +        return simple_strtol(num, NULL, 10);
   11.96 +}				
   11.97 +
   11.98 +static void tap_update_blkif_status(blkif_t *blkif)
   11.99 +{ 
  11.100 +	int err;
  11.101 +
  11.102 +	/* Not ready to connect? */
  11.103 +	if(!blkif->irq || !blkif->sectors) {
  11.104 +		return;
  11.105 +	} 
  11.106 +
  11.107 +	/* Already connected? */
  11.108 +	if (blkif->be->dev->state == XenbusStateConnected)
  11.109 +		return;
  11.110 +
  11.111 +	/* Attempt to connect: exit if we fail to. */
  11.112 +	connect(blkif->be);
  11.113 +	if (blkif->be->dev->state != XenbusStateConnected)
  11.114 +		return;
  11.115 +
  11.116 +	blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif,
  11.117 +				     "xvd %d",
  11.118 +				     blkif->domid);
  11.119 +
  11.120 +	if (IS_ERR(blkif->xenblkd)) {
  11.121 +		err = PTR_ERR(blkif->xenblkd);
  11.122 +		blkif->xenblkd = NULL;
  11.123 +		xenbus_dev_fatal(blkif->be->dev, err, "start xenblkd");
  11.124 +		WPRINTK("Error starting thread\n");
  11.125 +	}
  11.126 +}
  11.127 +
  11.128 +static int blktap_remove(struct xenbus_device *dev)
  11.129 +{
  11.130 +	struct backend_info *be = dev->dev.driver_data;
  11.131 +
  11.132 +	if (be->backend_watch.node) {
  11.133 +		unregister_xenbus_watch(&be->backend_watch);
  11.134 +		kfree(be->backend_watch.node);
  11.135 +		be->backend_watch.node = NULL;
  11.136 +	}
  11.137 +	if (be->blkif) {
  11.138 +		if (be->blkif->xenblkd)
  11.139 +			kthread_stop(be->blkif->xenblkd);
  11.140 +		signal_tapdisk(be->blkif->dev_num);
  11.141 +		tap_blkif_free(be->blkif);
  11.142 +		be->blkif = NULL;
  11.143 +	}
  11.144 +	kfree(be);
  11.145 +	dev->dev.driver_data = NULL;
  11.146 +	return 0;
  11.147 +}
  11.148 +
  11.149 +/**
  11.150 + * Entry point to this code when a new device is created.  Allocate
  11.151 + * the basic structures, and watch the store waiting for the
  11.152 + * user-space program to tell us the physical device info.  Switch to
  11.153 + * InitWait.
  11.154 + */
  11.155 +static int blktap_probe(struct xenbus_device *dev,
  11.156 +			 const struct xenbus_device_id *id)
  11.157 +{
  11.158 +	int err;
  11.159 +	struct backend_info *be = kzalloc(sizeof(struct backend_info),
  11.160 +					  GFP_KERNEL);
  11.161 +	if (!be) {
  11.162 +		xenbus_dev_fatal(dev, -ENOMEM,
  11.163 +				 "allocating backend structure");
  11.164 +		return -ENOMEM;
  11.165 +	}
  11.166 +
  11.167 +	be->dev = dev;
  11.168 +	dev->dev.driver_data = be;
  11.169 +	be->xenbus_id = get_id(dev->nodename);
  11.170 +
  11.171 +	be->blkif = tap_alloc_blkif(dev->otherend_id);
  11.172 +	if (IS_ERR(be->blkif)) {
  11.173 +		err = PTR_ERR(be->blkif);
  11.174 +		be->blkif = NULL;
  11.175 +		xenbus_dev_fatal(dev, err, "creating block interface");
  11.176 +		goto fail;
  11.177 +	}
  11.178 +
  11.179 +	/* setup back pointer */
  11.180 +	be->blkif->be = be; 
  11.181 +	be->blkif->sectors = 0;
  11.182 +
  11.183 +	/* set a watch on disk info, waiting for userspace to update details*/
  11.184 +	err = xenbus_watch_path2(dev, dev->nodename, "info",
  11.185 +				 &be->backend_watch, tap_backend_changed);
  11.186 +	if (err)
  11.187 +		goto fail;
  11.188 +	
  11.189 +	err = xenbus_switch_state(dev, XenbusStateInitWait);
  11.190 +	if (err)
  11.191 +		goto fail;
  11.192 +	return 0;
  11.193 +
  11.194 +fail:
  11.195 +	DPRINTK("blktap probe failed");
  11.196 +	blktap_remove(dev);
  11.197 +	return err;
  11.198 +}
  11.199 +
  11.200 +
  11.201 +/**
  11.202 + * Callback received when the user space code has placed the device
  11.203 + * information in xenstore. 
  11.204 + */
  11.205 +static void tap_backend_changed(struct xenbus_watch *watch,
  11.206 +			    const char **vec, unsigned int len)
  11.207 +{
  11.208 +	int err;
  11.209 +	unsigned long info;
  11.210 +	struct backend_info *be
  11.211 +		= container_of(watch, struct backend_info, backend_watch);
  11.212 +	struct xenbus_device *dev = be->dev;
  11.213 +	
  11.214 +	/** 
  11.215 +	 * Check to see whether userspace code has opened the image 
  11.216 +	 * and written sector
  11.217 +	 * and disk info to xenstore
  11.218 +	 */
  11.219 +	err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info, 
  11.220 +			    NULL);	
  11.221 +	if (err) {
  11.222 +		xenbus_dev_error(dev, err, "getting info");
  11.223 +		return;
  11.224 +	}
  11.225 +
  11.226 +	DPRINTK("Userspace update on disk info, %lu\n",info);
  11.227 +
  11.228 +	err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu", 
  11.229 +			    &be->blkif->sectors, NULL);
  11.230 +
  11.231 +	/* Associate tap dev with domid*/
  11.232 +	be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id, 
  11.233 +					  be->blkif);
  11.234 +	DPRINTK("Thread started for domid [%d], connecting disk\n", 
  11.235 +		be->blkif->dev_num);
  11.236 +
  11.237 +	tap_update_blkif_status(be->blkif);
  11.238 +}
  11.239 +
  11.240 +/**
  11.241 + * Callback received when the frontend's state changes.
  11.242 + */
  11.243 +static void tap_frontend_changed(struct xenbus_device *dev,
  11.244 +			     enum xenbus_state frontend_state)
  11.245 +{
  11.246 +	struct backend_info *be = dev->dev.driver_data;
  11.247 +	int err;
  11.248 +
  11.249 +	DPRINTK("");
  11.250 +
  11.251 +	switch (frontend_state) {
  11.252 +	case XenbusStateInitialising:
  11.253 +		break;
  11.254 +
  11.255 +	case XenbusStateInitialised:
  11.256 +	case XenbusStateConnected:
  11.257 +		/* Ensure we connect even when two watches fire in 
  11.258 +		   close successsion and we miss the intermediate value 
  11.259 +		   of frontend_state. */
  11.260 +		if (dev->state == XenbusStateConnected)
  11.261 +			break;
  11.262 +
  11.263 +		err = connect_ring(be);
  11.264 +		if (err)
  11.265 +			break;
  11.266 +		tap_update_blkif_status(be->blkif);
  11.267 +		break;
  11.268 +
  11.269 +	case XenbusStateClosing:
  11.270 +		xenbus_switch_state(dev, XenbusStateClosing);
  11.271 +		break;
  11.272 +
  11.273 +	case XenbusStateClosed:
  11.274 +		device_unregister(&dev->dev);
  11.275 +		break;
  11.276 +
  11.277 +	case XenbusStateUnknown:
  11.278 +	case XenbusStateInitWait:
  11.279 +	default:
  11.280 +		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
  11.281 +				 frontend_state);
  11.282 +		break;
  11.283 +	}
  11.284 +}
  11.285 +
  11.286 +
  11.287 +/**
  11.288 + * Switch to Connected state.
  11.289 + */
  11.290 +static void connect(struct backend_info *be)
  11.291 +{
  11.292 +	int err;
  11.293 +
  11.294 +	struct xenbus_device *dev = be->dev;
  11.295 +
  11.296 +	err = xenbus_switch_state(dev, XenbusStateConnected);
  11.297 +	if (err)
  11.298 +		xenbus_dev_fatal(dev, err, "switching to Connected state",
  11.299 +				 dev->nodename);
  11.300 +
  11.301 +	return;
  11.302 +}
  11.303 +
  11.304 +
  11.305 +static int connect_ring(struct backend_info *be)
  11.306 +{
  11.307 +	struct xenbus_device *dev = be->dev;
  11.308 +	unsigned long ring_ref;
  11.309 +	unsigned int evtchn;
  11.310 +	int err;
  11.311 +
  11.312 +	DPRINTK("%s", dev->otherend);
  11.313 +
  11.314 +	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", 
  11.315 +			    &ring_ref, "event-channel", "%u", &evtchn, NULL);
  11.316 +	if (err) {
  11.317 +		xenbus_dev_fatal(dev, err,
  11.318 +				 "reading %s/ring-ref and event-channel",
  11.319 +				 dev->otherend);
  11.320 +		return err;
  11.321 +	}
  11.322 +
  11.323 +	/* Map the shared frame, irq etc. */
  11.324 +	err = tap_blkif_map(be->blkif, ring_ref, evtchn);
  11.325 +	if (err) {
  11.326 +		xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
  11.327 +				 ring_ref, evtchn);
  11.328 +		return err;
  11.329 +	} 
  11.330 +
  11.331 +	return 0;
  11.332 +}
  11.333 +
  11.334 +
  11.335 +/* ** Driver Registration ** */
  11.336 +
  11.337 +
  11.338 +static struct xenbus_device_id blktap_ids[] = {
  11.339 +	{ "tap" },
  11.340 +	{ "" }
  11.341 +};
  11.342 +
  11.343 +
  11.344 +static struct xenbus_driver blktap = {
  11.345 +	.name = "tap",
  11.346 +	.owner = THIS_MODULE,
  11.347 +	.ids = blktap_ids,
  11.348 +	.probe = blktap_probe,
  11.349 +	.remove = blktap_remove,
  11.350 +	.otherend_changed = tap_frontend_changed
  11.351 +};
  11.352 +
  11.353 +
  11.354 +void tap_blkif_xenbus_init(void)
  11.355 +{
  11.356 +	xenbus_register_backend(&blktap);
  11.357 +}
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/patches/linux-2.6.16.13/blktap-aio-16_03_06.patch	Thu Jul 13 10:13:26 2006 +0100
    12.3 @@ -0,0 +1,297 @@
    12.4 +diff -pruN ../pristine-linux-2.6.16-rc5/fs/aio.c ./fs/aio.c
    12.5 +--- ../pristine-linux-2.6.16-rc5/fs/aio.c	2006-03-14 14:10:10.827401387 +0000
    12.6 ++++ ./fs/aio.c	2006-03-16 09:57:53.898316582 +0000
    12.7 +@@ -34,6 +34,11 @@
    12.8 + #include <asm/uaccess.h>
    12.9 + #include <asm/mmu_context.h>
   12.10 + 
   12.11 ++#ifdef CONFIG_EPOLL
   12.12 ++#include <linux/poll.h>
   12.13 ++#include <linux/eventpoll.h>
   12.14 ++#endif
   12.15 ++
   12.16 + #if DEBUG > 1
   12.17 + #define dprintk		printk
   12.18 + #else
   12.19 +@@ -1016,6 +1021,10 @@ put_rq:
   12.20 + 	if (waitqueue_active(&ctx->wait))
   12.21 + 		wake_up(&ctx->wait);
   12.22 + 
   12.23 ++#ifdef CONFIG_EPOLL
   12.24 ++	if (ctx->file && waitqueue_active(&ctx->poll_wait))
   12.25 ++		wake_up(&ctx->poll_wait);
   12.26 ++#endif
   12.27 + 	if (ret)
   12.28 + 		put_ioctx(ctx);
   12.29 + 
   12.30 +@@ -1025,6 +1034,8 @@ put_rq:
   12.31 + /* aio_read_evt
   12.32 +  *	Pull an event off of the ioctx's event ring.  Returns the number of 
   12.33 +  *	events fetched (0 or 1 ;-)
   12.34 ++ *	If ent parameter is 0, just returns the number of events that would
   12.35 ++ *	be fetched.
   12.36 +  *	FIXME: make this use cmpxchg.
   12.37 +  *	TODO: make the ringbuffer user mmap()able (requires FIXME).
   12.38 +  */
   12.39 +@@ -1047,13 +1058,18 @@ static int aio_read_evt(struct kioctx *i
   12.40 + 
   12.41 + 	head = ring->head % info->nr;
   12.42 + 	if (head != ring->tail) {
   12.43 +-		struct io_event *evp = aio_ring_event(info, head, KM_USER1);
   12.44 +-		*ent = *evp;
   12.45 +-		head = (head + 1) % info->nr;
   12.46 +-		smp_mb(); /* finish reading the event before updatng the head */
   12.47 +-		ring->head = head;
   12.48 +-		ret = 1;
   12.49 +-		put_aio_ring_event(evp, KM_USER1);
   12.50 ++		if (ent) { /* event requested */
   12.51 ++			struct io_event *evp =
   12.52 ++				aio_ring_event(info, head, KM_USER1);
   12.53 ++			*ent = *evp;
   12.54 ++			head = (head + 1) % info->nr;
   12.55 ++			/* finish reading the event before updatng the head */
   12.56 ++			smp_mb();
   12.57 ++			ring->head = head;
   12.58 ++			ret = 1;
   12.59 ++			put_aio_ring_event(evp, KM_USER1);
   12.60 ++		} else /* only need to know availability */
   12.61 ++			ret = 1;
   12.62 + 	}
   12.63 + 	spin_unlock(&info->ring_lock);
   12.64 + 
   12.65 +@@ -1236,9 +1252,78 @@ static void io_destroy(struct kioctx *io
   12.66 + 
   12.67 + 	aio_cancel_all(ioctx);
   12.68 + 	wait_for_all_aios(ioctx);
   12.69 ++#ifdef CONFIG_EPOLL
   12.70 ++	/* forget the poll file, but it's up to the user to close it */
   12.71 ++	if (ioctx->file) {
   12.72 ++		ioctx->file->private_data = 0;
   12.73 ++		ioctx->file = 0;
   12.74 ++	}
   12.75 ++#endif
   12.76 + 	put_ioctx(ioctx);	/* once for the lookup */
   12.77 + }
   12.78 + 
   12.79 ++#ifdef CONFIG_EPOLL
   12.80 ++
   12.81 ++static int aio_queue_fd_close(struct inode *inode, struct file *file)
   12.82 ++{
   12.83 ++	struct kioctx *ioctx = file->private_data;
   12.84 ++	if (ioctx) {
   12.85 ++		file->private_data = 0;
   12.86 ++		spin_lock_irq(&ioctx->ctx_lock);
   12.87 ++		ioctx->file = 0;
   12.88 ++		spin_unlock_irq(&ioctx->ctx_lock);
   12.89 ++	}
   12.90 ++	return 0;
   12.91 ++}
   12.92 ++
   12.93 ++static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
   12.94 ++{	unsigned int pollflags = 0;
   12.95 ++	struct kioctx *ioctx = file->private_data;
   12.96 ++
   12.97 ++	if (ioctx) {
   12.98 ++
   12.99 ++		spin_lock_irq(&ioctx->ctx_lock);
  12.100 ++		/* Insert inside our poll wait queue */
  12.101 ++		poll_wait(file, &ioctx->poll_wait, wait);
  12.102 ++
  12.103 ++		/* Check our condition */
  12.104 ++		if (aio_read_evt(ioctx, 0))
  12.105 ++			pollflags = POLLIN | POLLRDNORM;
  12.106 ++		spin_unlock_irq(&ioctx->ctx_lock);
  12.107 ++	}
  12.108 ++
  12.109 ++	return pollflags;
  12.110 ++}
  12.111 ++
  12.112 ++static struct file_operations aioq_fops = {
  12.113 ++	.release	= aio_queue_fd_close,
  12.114 ++	.poll		= aio_queue_fd_poll
  12.115 ++};
  12.116 ++
  12.117 ++/* make_aio_fd:
  12.118 ++ *  Create a file descriptor that can be used to poll the event queue.
  12.119 ++ *  Based and piggybacked on the excellent epoll code.
  12.120 ++ */
  12.121 ++
  12.122 ++static int make_aio_fd(struct kioctx *ioctx)
  12.123 ++{
  12.124 ++	int error, fd;
  12.125 ++	struct inode *inode;
  12.126 ++	struct file *file;
  12.127 ++
  12.128 ++	error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
  12.129 ++	if (error)
  12.130 ++		return error;
  12.131 ++
  12.132 ++	/* associate the file with the IO context */
  12.133 ++	file->private_data = ioctx;
  12.134 ++	ioctx->file = file;
  12.135 ++	init_waitqueue_head(&ioctx->poll_wait);
  12.136 ++	return fd;
  12.137 ++}
  12.138 ++#endif
  12.139 ++
  12.140 ++
  12.141 + /* sys_io_setup:
  12.142 +  *	Create an aio_context capable of receiving at least nr_events.
  12.143 +  *	ctxp must not point to an aio_context that already exists, and
  12.144 +@@ -1251,18 +1336,30 @@ static void io_destroy(struct kioctx *io
  12.145 +  *	resources are available.  May fail with -EFAULT if an invalid
  12.146 +  *	pointer is passed for ctxp.  Will fail with -ENOSYS if not
  12.147 +  *	implemented.
  12.148 ++ *
  12.149 ++ *	To request a selectable fd, the user context has to be initialized
  12.150 ++ *	to 1, instead of 0, and the return value is the fd.
  12.151 ++ *	This keeps the system call compatible, since a non-zero value
  12.152 ++ *	was not allowed so far.
  12.153 +  */
  12.154 + asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
  12.155 + {
  12.156 + 	struct kioctx *ioctx = NULL;
  12.157 + 	unsigned long ctx;
  12.158 + 	long ret;
  12.159 ++	int make_fd = 0;
  12.160 + 
  12.161 + 	ret = get_user(ctx, ctxp);
  12.162 + 	if (unlikely(ret))
  12.163 + 		goto out;
  12.164 + 
  12.165 + 	ret = -EINVAL;
  12.166 ++#ifdef CONFIG_EPOLL
  12.167 ++	if (ctx == 1) {
  12.168 ++		make_fd = 1;
  12.169 ++		ctx = 0;
  12.170 ++	}
  12.171 ++#endif
  12.172 + 	if (unlikely(ctx || nr_events == 0)) {
  12.173 + 		pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
  12.174 + 		         ctx, nr_events);
  12.175 +@@ -1273,8 +1370,12 @@ asmlinkage long sys_io_setup(unsigned nr
  12.176 + 	ret = PTR_ERR(ioctx);
  12.177 + 	if (!IS_ERR(ioctx)) {
  12.178 + 		ret = put_user(ioctx->user_id, ctxp);
  12.179 +-		if (!ret)
  12.180 +-			return 0;
  12.181 ++#ifdef CONFIG_EPOLL
  12.182 ++		if (make_fd && ret >= 0)
  12.183 ++			ret = make_aio_fd(ioctx);
  12.184 ++#endif
  12.185 ++		if (ret >= 0)
  12.186 ++			return ret;
  12.187 + 
  12.188 + 		get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
  12.189 + 		io_destroy(ioctx);
  12.190 +
  12.191 +diff -pruN ../pristine-linux-2.6.16-rc5/fs/eventpoll.c ./fs/eventpoll.c
  12.192 +--- ../pristine-linux-2.6.16-rc5/fs/eventpoll.c	2006-01-03 03:21:10.000000000 +0000
  12.193 ++++ ./fs/eventpoll.c	2006-03-16 10:04:35.469956167 +0000
  12.194 +@@ -235,8 +235,6 @@ struct ep_pqueue {
  12.195 + 
  12.196 + static void ep_poll_safewake_init(struct poll_safewake *psw);
  12.197 + static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
  12.198 +-static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
  12.199 +-		    struct eventpoll *ep);
  12.200 + static int ep_alloc(struct eventpoll **pep);
  12.201 + static void ep_free(struct eventpoll *ep);
  12.202 + static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
  12.203 +@@ -266,7 +264,7 @@ static int ep_events_transfer(struct eve
  12.204 + static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
  12.205 + 		   int maxevents, long timeout);
  12.206 + static int eventpollfs_delete_dentry(struct dentry *dentry);
  12.207 +-static struct inode *ep_eventpoll_inode(void);
  12.208 ++static struct inode *ep_eventpoll_inode(struct file_operations *fops);
  12.209 + static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
  12.210 + 					      int flags, const char *dev_name,
  12.211 + 					      void *data);
  12.212 +@@ -525,7 +523,7 @@ asmlinkage long sys_epoll_create(int siz
  12.213 + 	 * Creates all the items needed to setup an eventpoll file. That is,
  12.214 + 	 * a file structure, and inode and a free file descriptor.
  12.215 + 	 */
  12.216 +-	error = ep_getfd(&fd, &inode, &file, ep);
  12.217 ++	error = ep_getfd(&fd, &inode, &file, ep, &eventpoll_fops);
  12.218 + 	if (error)
  12.219 + 		goto eexit_2;
  12.220 + 
  12.221 +@@ -710,8 +708,8 @@ eexit_1:
  12.222 + /*
  12.223 +  * Creates the file descriptor to be used by the epoll interface.
  12.224 +  */
  12.225 +-static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
  12.226 +-		    struct eventpoll *ep)
  12.227 ++int ep_getfd(int *efd, struct inode **einode, struct file **efile,
  12.228 ++		    struct eventpoll *ep, struct file_operations *fops)
  12.229 + {
  12.230 + 	struct qstr this;
  12.231 + 	char name[32];
  12.232 +@@ -727,7 +725,7 @@ static int ep_getfd(int *efd, struct ino
  12.233 + 		goto eexit_1;
  12.234 + 
  12.235 + 	/* Allocates an inode from the eventpoll file system */
  12.236 +-	inode = ep_eventpoll_inode();
  12.237 ++	inode = ep_eventpoll_inode(fops);
  12.238 + 	error = PTR_ERR(inode);
  12.239 + 	if (IS_ERR(inode))
  12.240 + 		goto eexit_2;
  12.241 +@@ -758,7 +756,7 @@ static int ep_getfd(int *efd, struct ino
  12.242 + 
  12.243 + 	file->f_pos = 0;
  12.244 + 	file->f_flags = O_RDONLY;
  12.245 +-	file->f_op = &eventpoll_fops;
  12.246 ++	file->f_op = fops;
  12.247 + 	file->f_mode = FMODE_READ;
  12.248 + 	file->f_version = 0;
  12.249 + 	file->private_data = ep;
  12.250 +@@ -1574,7 +1572,7 @@ static int eventpollfs_delete_dentry(str
  12.251 + }
  12.252 + 
  12.253 + 
  12.254 +-static struct inode *ep_eventpoll_inode(void)
  12.255 ++static struct inode *ep_eventpoll_inode(struct file_operations *fops)
  12.256 + {
  12.257 + 	int error = -ENOMEM;
  12.258 + 	struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);
  12.259 +@@ -1582,7 +1580,7 @@ static struct inode *ep_eventpoll_inode(
  12.260 + 	if (!inode)
  12.261 + 		goto eexit_1;
  12.262 + 
  12.263 +-	inode->i_fop = &eventpoll_fops;
  12.264 ++	inode->i_fop = fops;
  12.265 + 
  12.266 + 	/*
  12.267 + 	 * Mark the inode dirty from the very beginning,
  12.268 +
  12.269 +diff -pruN ../pristine-linux-2.6.16-rc5/include/linux/aio.h ./include/linux/aio.h
  12.270 +--- ../pristine-linux-2.6.16-rc5/include/linux/aio.h	2006-03-14 14:10:21.597916731 +0000
  12.271 ++++ ./include/linux/aio.h	2006-03-16 10:05:39.848833028 +0000
  12.272 +@@ -191,6 +191,11 @@ struct kioctx {
  12.273 + 	struct aio_ring_info	ring_info;
  12.274 + 
  12.275 + 	struct work_struct	wq;
  12.276 ++#ifdef CONFIG_EPOLL
  12.277 ++	// poll integration
  12.278 ++	wait_queue_head_t       poll_wait;
  12.279 ++	struct file		*file;
  12.280 ++#endif
  12.281 + };
  12.282 + 
  12.283 + /* prototypes */
  12.284 +
  12.285 +diff -pruN ../pristine-linux-2.6.16-rc5/include/linux/eventpoll.h ./include/linux/eventpoll.h
  12.286 +--- ../pristine-linux-2.6.16-rc5/include/linux/eventpoll.h	2006-01-03 03:21:10.000000000 +0000
  12.287 ++++ ./include/linux/eventpoll.h	2006-03-16 10:08:51.577809317 +0000
  12.288 +@@ -86,6 +86,12 @@ static inline void eventpoll_release(str
  12.289 + }
  12.290 + 
  12.291 + 
  12.292 ++/*
  12.293 ++ * called by aio code to create fd that can poll the  aio event queueQ
  12.294 ++ */
  12.295 ++struct eventpoll;
  12.296 ++int ep_getfd(int *efd, struct inode **einode, struct file **efile,
  12.297 ++             struct eventpoll *ep, struct file_operations *fops);
  12.298 + #else
  12.299 + 
  12.300 + static inline void eventpoll_init_file(struct file *file) {}
    13.1 --- a/tools/Makefile	Thu Jul 13 09:55:14 2006 +0100
    13.2 +++ b/tools/Makefile	Thu Jul 13 10:13:26 2006 +0100
    13.3 @@ -16,6 +16,8 @@ SUBDIRS-y += guest-headers
    13.4  SUBDIRS-$(VTPM_TOOLS) += vtpm_manager
    13.5  SUBDIRS-$(VTPM_TOOLS) += vtpm
    13.6  SUBDIRS-y += xenstat
    13.7 +SUBDIRS-y += libaio
    13.8 +SUBDIRS-y += blktap
    13.9  
   13.10  # These don't cross-compile
   13.11  ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/tools/blktap/Makefile	Thu Jul 13 10:13:26 2006 +0100
    14.3 @@ -0,0 +1,28 @@
    14.4 +XEN_ROOT = ../..
    14.5 +include $(XEN_ROOT)/tools/Rules.mk
    14.6 +
    14.7 +SUBDIRS-y :=
    14.8 +SUBDIRS-y += lib
    14.9 +SUBDIRS-y += drivers
   14.10 +
   14.11 +.PHONY: all
   14.12 +all: build
   14.13 +
   14.14 +.PHONY: build
   14.15 +build: mk-symlinks
   14.16 +	@set -e; for subdir in $(SUBDIRS-y); do \
   14.17 +	$(MAKE) -C $$subdir all;       \
   14.18 +		done
   14.19 +
   14.20 +.PHONY: install
   14.21 +install:
   14.22 +	@set -e; for subdir in $(SUBDIRS-y); do \
   14.23 +		$(MAKE) -C $$subdir install; \
   14.24 +	done
   14.25 +
   14.26 +.PHONY: clean
   14.27 +clean:
   14.28 +	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS
   14.29 +	@set -e; for subdir in $(SUBDIRS-y); do \
   14.30 +	$(MAKE) -C $$subdir clean;       \
   14.31 +		done
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/tools/blktap/README	Thu Jul 13 10:13:26 2006 +0100
    15.3 @@ -0,0 +1,122 @@
    15.4 +Blktap Userspace Tools + Library
    15.5 +================================
    15.6 +
    15.7 +Andrew Warfield and Julian Chesterfield
    15.8 +16th June 2006
    15.9 +
   15.10 +{firstname.lastname}@cl.cam.ac.uk
   15.11 +
   15.12 +The blktap userspace toolkit provides a user-level disk I/O
   15.13 +interface. The blktap mechanism involves a kernel driver that acts
   15.14 +similarly to the existing Xen/Linux blkback driver, and a set of
   15.15 +associated user-level libraries.  Using these tools, blktap allows
   15.16 +virtual block devices presented to VMs to be implemented in userspace
   15.17 +and to be backed by raw partitions, files, network, etc.
   15.18 +
   15.19 +The key benefit of blktap is that it makes it easy and fast to write
   15.20 +arbitrary block backends, and that these user-level backends actually
   15.21 +perform very well.  Specifically:
   15.22 +
   15.23 +- Metadata disk formats such as Copy-on-Write, encrypted disks, sparse
   15.24 +  formats and other compression features can be easily implemented.
   15.25 +
   15.26 +- Accessing file-based images from userspace avoids problems related
   15.27 +  to flushing dirty pages which are present in the Linux loopback
   15.28 +  driver.  (Specifically, doing a large number of writes to an
   15.29 +  NFS-backed image don't result in the OOM killer going berserk.)
   15.30 +
   15.31 +- Per-disk handler processes enable easier userspace policing of block
   15.32 +  resources, and process-granularity QoS techniques (disk scheduling
   15.33 +  and related tools) may be trivially applied to block devices.
   15.34 +
   15.35 +- It's very easy to take advantage of userspace facilities such as
   15.36 +  networking libraries, compression utilities, peer-to-peer
   15.37 +  file-sharing systems and so on to build more complex block backends.
   15.38 +
   15.39 +- Crashes are contained -- incremental development/debugging is very
   15.40 +  fast.
   15.41 +
   15.42 +How it works (in one paragraph):
   15.43 +
   15.44 +Working in conjunction with the kernel blktap driver, all disk I/O
   15.45 +requests from VMs are passed to the userspace deamon (using a shared
   15.46 +memory interface) through a character device. Each active disk is
   15.47 +mapped to an individual device node, allowing per-disk processes to
   15.48 +implement individual block devices where desired.  The userspace
   15.49 +drivers are implemented using asynchronous (Linux libaio),
   15.50 +O_DIRECT-based calls to preserve the unbuffered, batched and
   15.51 +asynchronous request dispatch achieved with the existing blkback
   15.52 +code.  We provide a simple, asynchronous virtual disk interface that
   15.53 +makes it quite easy to add new disk implementations.
   15.54 +
   15.55 +As of June 2006 the current supported disk formats are:
   15.56 +
   15.57 + - Raw Images (both on partitions and in image files)
   15.58 + - File-backed Qcow disks
   15.59 + - Standalone sparse Qcow disks
   15.60 + - Fast shareable RAM disk between VMs (requires some form of cluster-based 
   15.61 +   filesystem support e.g. OCFS2 in the guest kernel)
   15.62 + - Some VMDK images - your mileage may vary
   15.63 +
   15.64 +Raw and QCow images have asynchronous backends and so should perform
   15.65 +fairly well.  VMDK is based directly on the qemu vmdk driver, which is
   15.66 +synchronous (a.k.a. slow).
   15.67 +
   15.68 +Build and Installation Instructions
   15.69 +===================================
   15.70 +
   15.71 +Make to configure the blktap backend driver in your dom0 kernel.  It
   15.72 +will cooperate fine with the existing backend driver, so you can
   15.73 +experiment with tap disks without breaking existing VM configs.
   15.74 +
   15.75 +To build the tools separately, "make && make install" in 
   15.76 +tools/blktap.
   15.77 +
   15.78 +
   15.79 +Using the Tools
   15.80 +===============
   15.81 +
   15.82 +Prepare the image for booting. For qcow files use the qcow utilities
   15.83 +installed earlier. e.g. qcow-create generates a blank standalone image
   15.84 +or a file-backed CoW image. img2qcow takes an existing image or
   15.85 +partition and creates a sparse, standalone qcow-based file.
   15.86 +
   15.87 +The userspace disk agent is configured to start automatically via xend
   15.88 +(alternatively you can start it manually => 'blktapctrl')
   15.89 +
   15.90 +Customise the VM config file to use the 'tap' handler, followed by the
   15.91 +driver type. e.g. for a raw image such as a file or partition:
   15.92 +
   15.93 +disk = ['tap:aio:<FILENAME>,sda1,w']
   15.94 +
   15.95 +e.g. for a qcow image:
   15.96 +
   15.97 +disk = ['tap:qcow:<FILENAME>,sda1,w']
   15.98 +
   15.99 +
  15.100 +Mounting images in Dom0 using the blktap driver
  15.101 +===============================================
  15.102 +Tap (and blkback) disks are also mountable in Dom0 without requiring an
  15.103 +active VM to attach. You will need to build a xenlinux Dom0 kernel that
  15.104 +includes the blkfront driver (e.g. the default 'make world' or 
  15.105 +'make kernels' build. Simply use the xm command-line tool to activate
  15.106 +the backend disks, and blkfront will generate a virtual block device that
  15.107 +can be accessed in the same way as a loop device or partition:
  15.108 +
  15.109 +e.g. for a raw image file <FILENAME> that would normally be mounted using
  15.110 +the loopback driver (such as 'mount -o loop <FILENAME> /mnt/disk'), do the
  15.111 +following:
  15.112 +
  15.113 +xm block-attach 0 tap:aio:<FILENAME> /dev/xvda1 w 0
  15.114 +mount /dev/xvda1 /mnt/disk        <--- don't use loop driver
  15.115 +
  15.116 +In this way, you can use any of the userspace device-type drivers built
  15.117 +with the blktap userspace toolkit to open and mount disks such as qcow
  15.118 +or vmdk images:
  15.119 +
  15.120 +xm block-attach 0 tap:qcow:<FILENAME> /dev/xvda1 w 0
  15.121 +mount /dev/xvda1 /mnt/disk
  15.122 +
  15.123 +
  15.124 +
  15.125 + 
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/tools/blktap/drivers/Makefile	Thu Jul 13 10:13:26 2006 +0100
    16.3 @@ -0,0 +1,76 @@
    16.4 +XEN_ROOT = ../../..
    16.5 +include $(XEN_ROOT)/tools/Rules.mk
    16.6 +
    16.7 +INCLUDES += -I.. -I../lib
    16.8 +
    16.9 +INSTALL      = install
   16.10 +INSTALL_PROG = $(INSTALL) -m0755
   16.11 +IBIN         = blktapctrl tapdisk
   16.12 +QCOW_UTIL    = img2qcow qcow2raw qcow-create
   16.13 +INSTALL_DIR  = /usr/sbin
   16.14 +LIBAIO_DIR   = ../../libaio/src
   16.15 +
   16.16 +CFLAGS   += -fPIC
   16.17 +CFLAGS   += -Wall
   16.18 +CFLAGS   += -Werror
   16.19 +CFLAGS   += -Wno-unused
   16.20 +CFLAGS   += -g3
   16.21 +CFLAGS   += -fno-strict-aliasing
   16.22 +CFLAGS   += -I $(XEN_LIBXC) -I $(LIBAIO_DIR)
   16.23 +CFLAGS   += $(INCLUDES) -I. -I../../xenstore 
   16.24 +CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
   16.25 +CFLAGS   += -D_GNU_SOURCE
   16.26 +
   16.27 +# Get gcc to generate the dependencies for us.
   16.28 +CFLAGS   += -Wp,-MD,.$(@F).d
   16.29 +DEPS     = .*.d
   16.30 +
   16.31 +THREADLIB := -lpthread -lz
   16.32 +LIBS      := -L. -L.. -L../lib
   16.33 +LIBS      += -L$(XEN_LIBXC)
   16.34 +LIBS      += -lblktap
   16.35 +LIBS      += -lcrypto
   16.36 +LIBS      += -lz
   16.37 +LIBS      += -L$(XEN_XENSTORE) -lxenstore
   16.38 +
   16.39 +AIOLIBS   := -L $(LIBAIO_DIR)
   16.40 +AIOLIBS   += -laio
   16.41 +AIOLIBS   += -static
   16.42 +
   16.43 +BLK-OBJS  := block-aio.o 
   16.44 +BLK-OBJS  += block-sync.o 
   16.45 +BLK-OBJS  += block-vmdk.o
   16.46 +BLK-OBJS  += block-ram.o 
   16.47 +BLK-OBJS  += block-qcow.o
   16.48 +BLK-OBJS  += aes.o
   16.49 +
   16.50 +all: $(IBIN) qcow-util
   16.51 +
   16.52 +LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
   16.53 +
   16.54 +
   16.55 +blktapctrl: 
   16.56 +	$(CC) $(CFLAGS) -o blktapctrl $(LIBS) blktapctrl.c
   16.57 +
   16.58 +tapdisk: $(BLK-OBJS)
   16.59 +	$(CC) $(CFLAGS) -o tapdisk $(BLK-OBJS) tapdisk.c \
   16.60 +		$(AIOLIBS) $(LIBS)
   16.61 +
   16.62 +
   16.63 +qcow-util: $(BLK-OBJS)
   16.64 +	$(CC) $(CFLAGS) -o img2qcow $(BLK-OBJS) img2qcow.c \
   16.65 +		$(AIOLIBS)  $(LIBS)
   16.66 +	$(CC) $(CFLAGS) -o qcow2raw $(BLK-OBJS) qcow2raw.c  \
   16.67 +		$(AIOLIBS)  $(LIBS)
   16.68 +	$(CC) $(CFLAGS) -o qcow-create $(BLK-OBJS) qcow-create.c  \
   16.69 +		$(AIOLIBS)  $(LIBS)
   16.70 +
   16.71 +install: all
   16.72 +	$(INSTALL_PROG) $(IBIN) $(QCOW_UTIL) $(DESTDIR)$(INSTALL_DIR)
   16.73 +
   16.74 +clean:
   16.75 +	rm -rf *.o *~ $(DEPS) xen TAGS $(IBIN) $(LIB) $(QCOW_UTIL)
   16.76 +
   16.77 +.PHONY: clean install
   16.78 +
   16.79 +-include $(DEPS)
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/tools/blktap/drivers/aes.c	Thu Jul 13 10:13:26 2006 +0100
    17.3 @@ -0,0 +1,1319 @@
    17.4 +/**
    17.5 + * 
    17.6 + * aes.c - integrated in QEMU by Fabrice Bellard from the OpenSSL project.
    17.7 + */
    17.8 +/*
    17.9 + * rijndael-alg-fst.c
   17.10 + *
   17.11 + * @version 3.0 (December 2000)
   17.12 + *
   17.13 + * Optimised ANSI C code for the Rijndael cipher (now AES)
   17.14 + *
   17.15 + * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
   17.16 + * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
   17.17 + * @author Paulo Barreto <paulo.barreto@terra.com.br>
   17.18 + *
   17.19 + * This code is hereby placed in the public domain.
   17.20 + *
   17.21 + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
   17.22 + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   17.23 + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17.24 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
   17.25 + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   17.26 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   17.27 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
   17.28 + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   17.29 + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
   17.30 + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
   17.31 + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   17.32 + */
   17.33 +//#include "vl.h"
   17.34 +#include <inttypes.h>
   17.35 +#include <string.h>
   17.36 +#include "aes.h"
   17.37 +
   17.38 +//#define NDEBUG
   17.39 +#include <assert.h>
   17.40 +
   17.41 +typedef uint32_t u32;
   17.42 +typedef uint16_t u16;
   17.43 +typedef uint8_t u8;
   17.44 +
   17.45 +#define MAXKC   (256/32)
   17.46 +#define MAXKB   (256/8)
   17.47 +#define MAXNR   14
   17.48 +
   17.49 +/* This controls loop-unrolling in aes_core.c */
   17.50 +#undef FULL_UNROLL
   17.51 +# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] <<  8) ^ ((u32)(pt)[3]))
   17.52 +# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >>  8); (ct)[3] = (u8)(st); }
   17.53 +
   17.54 +/*
   17.55 +Te0[x] = S [x].[02, 01, 01, 03];
   17.56 +Te1[x] = S [x].[03, 02, 01, 01];
   17.57 +Te2[x] = S [x].[01, 03, 02, 01];
   17.58 +Te3[x] = S [x].[01, 01, 03, 02];
   17.59 +Te4[x] = S [x].[01, 01, 01, 01];
   17.60 +
   17.61 +Td0[x] = Si[x].[0e, 09, 0d, 0b];
   17.62 +Td1[x] = Si[x].[0b, 0e, 09, 0d];
   17.63 +Td2[x] = Si[x].[0d, 0b, 0e, 09];
   17.64 +Td3[x] = Si[x].[09, 0d, 0b, 0e];
   17.65 +Td4[x] = Si[x].[01, 01, 01, 01];
   17.66 +*/
   17.67 +
   17.68 +static const u32 Te0[256] = {
   17.69 +    0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
   17.70 +    0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
   17.71 +    0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
   17.72 +    0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
   17.73 +    0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
   17.74 +    0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
   17.75 +    0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
   17.76 +    0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
   17.77 +    0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
   17.78 +    0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
   17.79 +    0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
   17.80 +    0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
   17.81 +    0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
   17.82 +    0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
   17.83 +    0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
   17.84 +    0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
   17.85 +    0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
   17.86 +    0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
   17.87 +    0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
   17.88 +    0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
   17.89 +    0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
   17.90 +    0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
   17.91 +    0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
   17.92 +    0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
   17.93 +    0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
   17.94 +    0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
   17.95 +    0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
   17.96 +    0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
   17.97 +    0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
   17.98 +    0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
   17.99 +    0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
  17.100 +    0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
  17.101 +    0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
  17.102 +    0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
  17.103 +    0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
  17.104 +    0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
  17.105 +    0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
  17.106 +    0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
  17.107 +    0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
  17.108 +    0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
  17.109 +    0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
  17.110 +    0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
  17.111 +    0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
  17.112 +    0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
  17.113 +    0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
  17.114 +    0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
  17.115 +    0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
  17.116 +    0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
  17.117 +    0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
  17.118 +    0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
  17.119 +    0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
  17.120 +    0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
  17.121 +    0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
  17.122 +    0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
  17.123 +    0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
  17.124 +    0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
  17.125 +    0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
  17.126 +    0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
  17.127 +    0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
  17.128 +    0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
  17.129 +    0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
  17.130 +    0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
  17.131 +    0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
  17.132 +    0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
  17.133 +};
  17.134 +static const u32 Te1[256] = {
  17.135 +    0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
  17.136 +    0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
  17.137 +    0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
  17.138 +    0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
  17.139 +    0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
  17.140 +    0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
  17.141 +    0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
  17.142 +    0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
  17.143 +    0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
  17.144 +    0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
  17.145 +    0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
  17.146 +    0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
  17.147 +    0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
  17.148 +    0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
  17.149 +    0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
  17.150 +    0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
  17.151 +    0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
  17.152 +    0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
  17.153 +    0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
  17.154 +    0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
  17.155 +    0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
  17.156 +    0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
  17.157 +    0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
  17.158 +    0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
  17.159 +    0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
  17.160 +    0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
  17.161 +    0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
  17.162 +    0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
  17.163 +    0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
  17.164 +    0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
  17.165 +    0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
  17.166 +    0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
  17.167 +    0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
  17.168 +    0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
  17.169 +    0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
  17.170 +    0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
  17.171 +    0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
  17.172 +    0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
  17.173 +    0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
  17.174 +    0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
  17.175 +    0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
  17.176 +    0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
  17.177 +    0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
  17.178 +    0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
  17.179 +    0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
  17.180 +    0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
  17.181 +    0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
  17.182 +    0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
  17.183 +    0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
  17.184 +    0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
  17.185 +    0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
  17.186 +    0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
  17.187 +    0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
  17.188 +    0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
  17.189 +    0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
  17.190 +    0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
  17.191 +    0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
  17.192 +    0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
  17.193 +    0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
  17.194 +    0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
  17.195 +    0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
  17.196 +    0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
  17.197 +    0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
  17.198 +    0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
  17.199 +};
  17.200 +static const u32 Te2[256] = {
  17.201 +    0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
  17.202 +    0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
  17.203 +    0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
  17.204 +    0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
  17.205 +    0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
  17.206 +    0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
  17.207 +    0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
  17.208 +    0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
  17.209 +    0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
  17.210 +    0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
  17.211 +    0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
  17.212 +    0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
  17.213 +    0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
  17.214 +    0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
  17.215 +    0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
  17.216 +    0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
  17.217 +    0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
  17.218 +    0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
  17.219 +    0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
  17.220 +    0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
  17.221 +    0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
  17.222 +    0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
  17.223 +    0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
  17.224 +    0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
  17.225 +    0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
  17.226 +    0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
  17.227 +    0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
  17.228 +    0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
  17.229 +    0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
  17.230 +    0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
  17.231 +    0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
  17.232 +    0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
  17.233 +    0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
  17.234 +    0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
  17.235 +    0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
  17.236 +    0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
  17.237 +    0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
  17.238 +    0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
  17.239 +    0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
  17.240 +    0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
  17.241 +    0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
  17.242 +    0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
  17.243 +    0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
  17.244 +    0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
  17.245 +    0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
  17.246 +    0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
  17.247 +    0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
  17.248 +    0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
  17.249 +    0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
  17.250 +    0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
  17.251 +    0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
  17.252 +    0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
  17.253 +    0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
  17.254 +    0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
  17.255 +    0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
  17.256 +    0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
  17.257 +    0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
  17.258 +    0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
  17.259 +    0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
  17.260 +    0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
  17.261 +    0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
  17.262 +    0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
  17.263 +    0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
  17.264 +    0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
  17.265 +};
  17.266 +static const u32 Te3[256] = {
  17.267 +
  17.268 +    0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
  17.269 +    0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
  17.270 +    0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
  17.271 +    0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
  17.272 +    0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
  17.273 +    0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
  17.274 +    0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
  17.275 +    0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
  17.276 +    0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
  17.277 +    0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
  17.278 +    0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
  17.279 +    0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
  17.280 +    0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
  17.281 +    0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
  17.282 +    0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
  17.283 +    0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
  17.284 +    0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
  17.285 +    0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
  17.286 +    0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
  17.287 +    0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
  17.288 +    0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
  17.289 +    0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
  17.290 +    0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
  17.291 +    0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
  17.292 +    0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
  17.293 +    0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
  17.294 +    0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
  17.295 +    0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
  17.296 +    0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
  17.297 +    0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
  17.298 +    0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
  17.299 +    0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
  17.300 +    0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
  17.301 +    0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
  17.302 +    0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
  17.303 +    0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
  17.304 +    0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
  17.305 +    0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
  17.306 +    0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
  17.307 +    0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
  17.308 +    0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
  17.309 +    0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
  17.310 +    0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
  17.311 +    0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
  17.312 +    0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
  17.313 +    0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
  17.314 +    0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
  17.315 +    0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
  17.316 +    0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
  17.317 +    0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
  17.318 +    0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
  17.319 +    0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
  17.320 +    0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
  17.321 +    0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
  17.322 +    0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
  17.323 +    0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
  17.324 +    0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
  17.325 +    0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
  17.326 +    0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
  17.327 +    0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
  17.328 +    0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
  17.329 +    0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
  17.330 +    0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
  17.331 +    0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
  17.332 +};
  17.333 +static const u32 Te4[256] = {
  17.334 +    0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
  17.335 +    0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
  17.336 +    0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
  17.337 +    0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
  17.338 +    0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
  17.339 +    0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
  17.340 +    0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
  17.341 +    0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
  17.342 +    0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
  17.343 +    0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
  17.344 +    0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
  17.345 +    0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
  17.346 +    0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
  17.347 +    0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
  17.348 +    0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
  17.349 +    0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
  17.350 +    0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
  17.351 +    0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
  17.352 +    0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
  17.353 +    0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
  17.354 +    0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
  17.355 +    0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
  17.356 +    0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
  17.357 +    0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
  17.358 +    0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
  17.359 +    0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
  17.360 +    0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
  17.361 +    0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
  17.362 +    0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
  17.363 +    0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
  17.364 +    0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
  17.365 +    0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
  17.366 +    0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
  17.367 +    0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
  17.368 +    0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
  17.369 +    0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
  17.370 +    0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
  17.371 +    0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
  17.372 +    0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
  17.373 +    0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
  17.374 +    0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
  17.375 +    0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
  17.376 +    0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
  17.377 +    0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
  17.378 +    0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
  17.379 +    0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
  17.380 +    0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
  17.381 +    0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
  17.382 +    0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
  17.383 +    0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
  17.384 +    0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
  17.385 +    0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
  17.386 +    0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
  17.387 +    0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
  17.388 +    0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
  17.389 +    0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
  17.390 +    0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
  17.391 +    0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
  17.392 +    0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
  17.393 +    0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
  17.394 +    0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
  17.395 +    0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
  17.396 +    0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
  17.397 +    0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
  17.398 +};
  17.399 +static const u32 Td0[256] = {
  17.400 +    0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
  17.401 +    0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
  17.402 +    0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
  17.403 +    0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
  17.404 +    0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
  17.405 +    0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
  17.406 +    0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
  17.407 +    0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
  17.408 +    0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
  17.409 +    0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
  17.410 +    0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
  17.411 +    0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
  17.412 +    0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
  17.413 +    0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
  17.414 +    0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
  17.415 +    0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
  17.416 +    0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
  17.417 +    0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
  17.418 +    0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
  17.419 +    0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
  17.420 +    0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
  17.421 +    0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
  17.422 +    0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
  17.423 +    0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
  17.424 +    0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
  17.425 +    0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
  17.426 +    0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
  17.427 +    0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
  17.428 +    0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
  17.429 +    0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
  17.430 +    0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
  17.431 +    0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
  17.432 +    0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
  17.433 +    0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
  17.434 +    0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
  17.435 +    0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
  17.436 +    0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
  17.437 +    0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
  17.438 +    0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
  17.439 +    0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
  17.440 +    0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
  17.441 +    0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
  17.442 +    0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
  17.443 +    0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
  17.444 +    0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
  17.445 +    0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
  17.446 +    0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
  17.447 +    0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
  17.448 +    0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
  17.449 +    0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
  17.450 +    0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
  17.451 +    0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
  17.452 +    0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
  17.453 +    0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
  17.454 +    0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
  17.455 +    0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
  17.456 +    0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
  17.457 +    0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
  17.458 +    0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
  17.459 +    0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
  17.460 +    0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
  17.461 +    0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
  17.462 +    0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
  17.463 +    0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
  17.464 +};
  17.465 +static const u32 Td1[256] = {
  17.466 +    0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
  17.467 +    0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
  17.468 +    0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
  17.469 +    0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
  17.470 +    0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
  17.471 +    0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
  17.472 +    0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
  17.473 +    0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
  17.474 +    0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
  17.475 +    0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
  17.476 +    0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
  17.477 +    0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
  17.478 +    0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
  17.479 +    0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
  17.480 +    0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
  17.481 +    0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
  17.482 +    0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
  17.483 +    0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
  17.484 +    0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
  17.485 +    0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
  17.486 +    0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
  17.487 +    0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
  17.488 +    0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
  17.489 +    0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
  17.490 +    0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
  17.491 +    0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
  17.492 +    0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
  17.493 +    0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
  17.494 +    0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
  17.495 +    0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
  17.496 +    0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
  17.497 +    0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
  17.498 +    0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
  17.499 +    0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
  17.500 +    0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
  17.501 +    0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
  17.502 +    0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
  17.503 +    0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
  17.504 +    0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
  17.505 +    0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
  17.506 +    0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
  17.507 +    0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
  17.508 +    0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
  17.509 +    0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
  17.510 +    0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
  17.511 +    0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
  17.512 +    0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
  17.513 +    0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
  17.514 +    0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
  17.515 +    0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
  17.516 +    0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
  17.517 +    0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
  17.518 +    0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
  17.519 +    0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
  17.520 +    0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
  17.521 +    0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
  17.522 +    0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
  17.523 +    0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
  17.524 +    0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
  17.525 +    0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
  17.526 +    0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
  17.527 +    0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
  17.528 +    0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
  17.529 +    0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
  17.530 +};
  17.531 +static const u32 Td2[256] = {
  17.532 +    0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
  17.533 +    0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
  17.534 +    0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
  17.535 +    0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
  17.536 +    0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
  17.537 +    0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
  17.538 +    0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
  17.539 +    0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
  17.540 +    0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
  17.541 +    0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
  17.542 +    0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
  17.543 +    0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
  17.544 +    0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
  17.545 +    0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
  17.546 +    0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
  17.547 +    0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
  17.548 +    0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
  17.549 +    0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
  17.550 +    0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
  17.551 +    0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
  17.552 +
  17.553 +    0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
  17.554 +    0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
  17.555 +    0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
  17.556 +    0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
  17.557 +    0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
  17.558 +    0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
  17.559 +    0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
  17.560 +    0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
  17.561 +    0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
  17.562 +    0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
  17.563 +    0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
  17.564 +    0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
  17.565 +    0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
  17.566 +    0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
  17.567 +    0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
  17.568 +    0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
  17.569 +    0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
  17.570 +    0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
  17.571 +    0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
  17.572 +    0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
  17.573 +    0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
  17.574 +    0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
  17.575 +    0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
  17.576 +    0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
  17.577 +    0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
  17.578 +    0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
  17.579 +    0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
  17.580 +    0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
  17.581 +    0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
  17.582 +    0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
  17.583 +    0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
  17.584 +    0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
  17.585 +    0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
  17.586 +    0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
  17.587 +    0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
  17.588 +    0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
  17.589 +    0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
  17.590 +    0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
  17.591 +    0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
  17.592 +    0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
  17.593 +    0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
  17.594 +    0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
  17.595 +    0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
  17.596 +    0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
  17.597 +};
  17.598 +static const u32 Td3[256] = {
  17.599 +    0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
  17.600 +    0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
  17.601 +    0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
  17.602 +    0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
  17.603 +    0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
  17.604 +    0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
  17.605 +    0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
  17.606 +    0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
  17.607 +    0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
  17.608 +    0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
  17.609 +    0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
  17.610 +    0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
  17.611 +    0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
  17.612 +    0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
  17.613 +    0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
  17.614 +    0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
  17.615 +    0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
  17.616 +    0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
  17.617 +    0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
  17.618 +    0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
  17.619 +    0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
  17.620 +    0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
  17.621 +    0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
  17.622 +    0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
  17.623 +    0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
  17.624 +    0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
  17.625 +    0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
  17.626 +    0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
  17.627 +    0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
  17.628 +    0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
  17.629 +    0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
  17.630 +    0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
  17.631 +    0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
  17.632 +    0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
  17.633 +    0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
  17.634 +    0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
  17.635 +    0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
  17.636 +    0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
  17.637 +    0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
  17.638 +    0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
  17.639 +    0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
  17.640 +    0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
  17.641 +    0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
  17.642 +    0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
  17.643 +    0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
  17.644 +    0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
  17.645 +    0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
  17.646 +    0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
  17.647 +    0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
  17.648 +    0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
  17.649 +    0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
  17.650 +    0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
  17.651 +    0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
  17.652 +    0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
  17.653 +    0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
  17.654 +    0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
  17.655 +    0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
  17.656 +    0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
  17.657 +    0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
  17.658 +    0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
  17.659 +    0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
  17.660 +    0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
  17.661 +    0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
  17.662 +    0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
  17.663 +};
  17.664 +static const u32 Td4[256] = {
  17.665 +    0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
  17.666 +    0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
  17.667 +    0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
  17.668 +    0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
  17.669 +    0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
  17.670 +    0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
  17.671 +    0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
  17.672 +    0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
  17.673 +    0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
  17.674 +    0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
  17.675 +    0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
  17.676 +    0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
  17.677 +    0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
  17.678 +    0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
  17.679 +    0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
  17.680 +    0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
  17.681 +    0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
  17.682 +    0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
  17.683 +    0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
  17.684 +    0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
  17.685 +    0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
  17.686 +    0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
  17.687 +    0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
  17.688 +    0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
  17.689 +    0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
  17.690 +    0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
  17.691 +    0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
  17.692 +    0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
  17.693 +    0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
  17.694 +    0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
  17.695 +    0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
  17.696 +    0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
  17.697 +    0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
  17.698 +    0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
  17.699 +    0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
  17.700 +    0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
  17.701 +    0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
  17.702 +    0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
  17.703 +    0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
  17.704 +    0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
  17.705 +    0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
  17.706 +    0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
  17.707 +    0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
  17.708 +    0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
  17.709 +    0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
  17.710 +    0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
  17.711 +    0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
  17.712 +    0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
  17.713 +    0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
  17.714 +    0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
  17.715 +    0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
  17.716 +    0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
  17.717 +    0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
  17.718 +    0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
  17.719 +    0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
  17.720 +    0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
  17.721 +    0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
  17.722 +    0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
  17.723 +    0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
  17.724 +    0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
  17.725 +    0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
  17.726 +    0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
  17.727 +    0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
  17.728 +    0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
  17.729 +};
  17.730 +static const u32 rcon[] = {
  17.731 +	0x01000000, 0x02000000, 0x04000000, 0x08000000,
  17.732 +	0x10000000, 0x20000000, 0x40000000, 0x80000000,
  17.733 +	0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
  17.734 +};
  17.735 +
  17.736 +/**
  17.737 + * Expand the cipher key into the encryption key schedule.
  17.738 + */
  17.739 +int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
  17.740 +			AES_KEY *key) {
  17.741 +
  17.742 +	u32 *rk;
  17.743 +   	int i = 0;
  17.744 +	u32 temp;
  17.745 +
  17.746 +	if (!userKey || !key)
  17.747 +		return -1;
  17.748 +	if (bits != 128 && bits != 192 && bits != 256)
  17.749 +		return -2;
  17.750 +
  17.751 +	rk = key->rd_key;
  17.752 +
  17.753 +	if (bits==128)
  17.754 +		key->rounds = 10;
  17.755 +	else if (bits==192)
  17.756 +		key->rounds = 12;
  17.757 +	else
  17.758 +		key->rounds = 14;
  17.759 +
  17.760 +	rk[0] = GETU32(userKey     );
  17.761 +	rk[1] = GETU32(userKey +  4);
  17.762 +	rk[2] = GETU32(userKey +  8);
  17.763 +	rk[3] = GETU32(userKey + 12);
  17.764 +	if (bits == 128) {
  17.765 +		while (1) {
  17.766 +			temp  = rk[3];
  17.767 +			rk[4] = rk[0] ^
  17.768 +				(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
  17.769 +				(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
  17.770 +				(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
  17.771 +				(Te4[(temp >> 24)       ] & 0x000000ff) ^
  17.772 +				rcon[i];
  17.773 +			rk[5] = rk[1] ^ rk[4];
  17.774 +			rk[6] = rk[2] ^ rk[5];
  17.775 +			rk[7] = rk[3] ^ rk[6];
  17.776 +			if (++i == 10) {
  17.777 +				return 0;
  17.778 +			}
  17.779 +			rk += 4;
  17.780 +		}
  17.781 +	}
  17.782 +	rk[4] = GETU32(userKey + 16);
  17.783 +	rk[5] = GETU32(userKey + 20);
  17.784 +	if (bits == 192) {
  17.785 +		while (1) {
  17.786 +			temp = rk[ 5];
  17.787 +			rk[ 6] = rk[ 0] ^
  17.788 +				(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
  17.789 +				(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
  17.790 +				(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
  17.791 +				(Te4[(temp >> 24)       ] & 0x000000ff) ^
  17.792 +				rcon[i];
  17.793 +			rk[ 7] = rk[ 1] ^ rk[ 6];
  17.794 +			rk[ 8] = rk[ 2] ^ rk[ 7];
  17.795 +			rk[ 9] = rk[ 3] ^ rk[ 8];
  17.796 +			if (++i == 8) {
  17.797 +				return 0;
  17.798 +			}
  17.799 +			rk[10] = rk[ 4] ^ rk[ 9];
  17.800 +			rk[11] = rk[ 5] ^ rk[10];
  17.801 +			rk += 6;
  17.802 +		}
  17.803 +	}
  17.804 +	rk[6] = GETU32(userKey + 24);
  17.805 +	rk[7] = GETU32(userKey + 28);
  17.806 +	if (bits == 256) {
  17.807 +		while (1) {
  17.808 +			temp = rk[ 7];
  17.809 +			rk[ 8] = rk[ 0] ^
  17.810 +				(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
  17.811 +				(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
  17.812 +				(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
  17.813 +				(Te4[(temp >> 24)       ] & 0x000000ff) ^
  17.814 +				rcon[i];
  17.815 +			rk[ 9] = rk[ 1] ^ rk[ 8];
  17.816 +			rk[10] = rk[ 2] ^ rk[ 9];
  17.817 +			rk[11] = rk[ 3] ^ rk[10];
  17.818 +			if (++i == 7) {
  17.819 +				return 0;
  17.820 +			}
  17.821 +			temp = rk[11];
  17.822 +			rk[12] = rk[ 4] ^
  17.823 +				(Te4[(temp >> 24)       ] & 0xff000000) ^
  17.824 +				(Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
  17.825 +				(Te4[(temp >>  8) & 0xff] & 0x0000ff00) ^
  17.826 +				(Te4[(temp      ) & 0xff] & 0x000000ff);
  17.827 +			rk[13] = rk[ 5] ^ rk[12];
  17.828 +			rk[14] = rk[ 6] ^ rk[13];
  17.829 +			rk[15] = rk[ 7] ^ rk[14];
  17.830 +
  17.831 +			rk += 8;
  17.832 +        	}
  17.833 +	}
  17.834 +	return 0;
  17.835 +}
  17.836 +
  17.837 +/**
  17.838 + * Expand the cipher key into the decryption key schedule.
  17.839 + */
  17.840 +int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
  17.841 +			 AES_KEY *key) {
  17.842 +
  17.843 +        u32 *rk;
  17.844 +	int i, j, status;
  17.845 +	u32 temp;
  17.846 +
  17.847 +	/* first, start with an encryption schedule */
  17.848 +	status = AES_set_encrypt_key(userKey, bits, key);
  17.849 +	if (status < 0)
  17.850 +		return status;
  17.851 +
  17.852 +	rk = key->rd_key;
  17.853 +
  17.854 +	/* invert the order of the round keys: */
  17.855 +	for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
  17.856 +		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
  17.857 +		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
  17.858 +		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
  17.859 +		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
  17.860 +	}
  17.861 +	/* apply the inverse MixColumn transform to all round keys but the first and the last: */
  17.862 +	for (i = 1; i < (key->rounds); i++) {
  17.863 +		rk += 4;
  17.864 +		rk[0] =
  17.865 +			Td0[Te4[(rk[0] >> 24)       ] & 0xff] ^
  17.866 +			Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
  17.867 +			Td2[Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
  17.868 +			Td3[Te4[(rk[0]      ) & 0xff] & 0xff];
  17.869 +		rk[1] =
  17.870 +			Td0[Te4[(rk[1] >> 24)       ] & 0xff] ^
  17.871 +			Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
  17.872 +			Td2[Te4[(rk[1] >>  8) & 0xff] & 0xff] ^
  17.873 +			Td3[Te4[(rk[1]      ) & 0xff] & 0xff];
  17.874 +		rk[2] =
  17.875 +			Td0[Te4[(rk[2] >> 24)       ] & 0xff] ^
  17.876 +			Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
  17.877 +			Td2[Te4[(rk[2] >>  8) & 0xff] & 0xff] ^
  17.878 +			Td3[Te4[(rk[2]      ) & 0xff] & 0xff];
  17.879 +		rk[3] =
  17.880 +			Td0[Te4[(rk[3] >> 24)       ] & 0xff] ^
  17.881 +			Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
  17.882 +			Td2[Te4[(rk[3] >>  8) & 0xff] & 0xff] ^
  17.883 +			Td3[Te4[(rk[3]      ) & 0xff] & 0xff];
  17.884 +	}
  17.885 +	return 0;
  17.886 +}
  17.887 +
  17.888 +#ifndef AES_ASM
  17.889 +/*
  17.890 + * Encrypt a single block
  17.891 + * in and out can overlap
  17.892 + */
  17.893 +void AES_encrypt(const unsigned char *in, unsigned char *out,
  17.894 +		 const AES_KEY *key) {
  17.895 +
  17.896 +	const u32 *rk;
  17.897 +	u32 s0, s1, s2, s3, t0, t1, t2, t3;
  17.898 +#ifndef FULL_UNROLL
  17.899 +	int r;
  17.900 +#endif /* ?FULL_UNROLL */
  17.901 +
  17.902 +	assert(in && out && key);
  17.903 +	rk = key->rd_key;
  17.904 +
  17.905 +	/*
  17.906 +	 * map byte array block to cipher state
  17.907 +	 * and add initial round key:
  17.908 +	 */
  17.909 +	s0 = GETU32(in     ) ^ rk[0];
  17.910 +	s1 = GETU32(in +  4) ^ rk[1];
  17.911 +	s2 = GETU32(in +  8) ^ rk[2];
  17.912 +	s3 = GETU32(in + 12) ^ rk[3];
  17.913 +#ifdef FULL_UNROLL
  17.914 +	/* round 1: */
  17.915 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
  17.916 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
  17.917 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
  17.918 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
  17.919 +   	/* round 2: */
  17.920 +   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
  17.921 +   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
  17.922 +   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
  17.923 +   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
  17.924 +	/* round 3: */
  17.925 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
  17.926 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
  17.927 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
  17.928 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
  17.929 +   	/* round 4: */
  17.930 +   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
  17.931 +   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
  17.932 +   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
  17.933 +   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
  17.934 +	/* round 5: */
  17.935 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
  17.936 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
  17.937 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
  17.938 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
  17.939 +   	/* round 6: */
  17.940 +   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
  17.941 +   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
  17.942 +   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
  17.943 +   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
  17.944 +	/* round 7: */
  17.945 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
  17.946 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
  17.947 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
  17.948 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
  17.949 +   	/* round 8: */
  17.950 +   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
  17.951 +   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
  17.952 +   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
  17.953 +   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
  17.954 +	/* round 9: */
  17.955 +   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
  17.956 +   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
  17.957 +   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
  17.958 +   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
  17.959 +    if (key->rounds > 10) {
  17.960 +        /* round 10: */
  17.961 +        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
  17.962 +        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
  17.963 +        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
  17.964 +        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
  17.965 +        /* round 11: */
  17.966 +        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
  17.967 +        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
  17.968 +        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
  17.969 +        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
  17.970 +        if (key->rounds > 12) {
  17.971 +            /* round 12: */
  17.972 +            s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
  17.973 +            s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
  17.974 +            s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
  17.975 +            s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
  17.976 +            /* round 13: */
  17.977 +            t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
  17.978 +            t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
  17.979 +            t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
  17.980 +            t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
  17.981 +        }
  17.982 +    }
  17.983 +    rk += key->rounds << 2;
  17.984 +#else  /* !FULL_UNROLL */
  17.985 +    /*
  17.986 +     * Nr - 1 full rounds:
  17.987 +     */
  17.988 +    r = key->rounds >> 1;
  17.989 +    for (;;) {
  17.990 +        t0 =
  17.991 +            Te0[(s0 >> 24)       ] ^
  17.992 +            Te1[(s1 >> 16) & 0xff] ^
  17.993 +            Te2[(s2 >>  8) & 0xff] ^
  17.994 +            Te3[(s3      ) & 0xff] ^
  17.995 +            rk[4];
  17.996 +        t1 =
  17.997 +            Te0[(s1 >> 24)       ] ^
  17.998 +            Te1[(s2 >> 16) & 0xff] ^
  17.999 +            Te2[(s3 >>  8) & 0xff] ^
 17.1000 +            Te3[(s0      ) & 0xff] ^
 17.1001 +            rk[5];
 17.1002 +        t2 =
 17.1003 +            Te0[(s2 >> 24)       ] ^
 17.1004 +            Te1[(s3 >> 16) & 0xff] ^
 17.1005 +            Te2[(s0 >>  8) & 0xff] ^
 17.1006 +            Te3[(s1      ) & 0xff] ^
 17.1007 +            rk[6];
 17.1008 +        t3 =
 17.1009 +            Te0[(s3 >> 24)       ] ^
 17.1010 +            Te1[(s0 >> 16) & 0xff] ^
 17.1011 +            Te2[(s1 >>  8) & 0xff] ^
 17.1012 +            Te3[(s2      ) & 0xff] ^
 17.1013 +            rk[7];
 17.1014 +
 17.1015 +        rk += 8;
 17.1016 +        if (--r == 0) {
 17.1017 +            break;
 17.1018 +        }
 17.1019 +
 17.1020 +        s0 =
 17.1021 +            Te0[(t0 >> 24)       ] ^
 17.1022 +            Te1[(t1 >> 16) & 0xff] ^
 17.1023 +            Te2[(t2 >>  8) & 0xff] ^
 17.1024 +            Te3[(t3      ) & 0xff] ^
 17.1025 +            rk[0];
 17.1026 +        s1 =
 17.1027 +            Te0[(t1 >> 24)       ] ^
 17.1028 +            Te1[(t2 >> 16) & 0xff] ^
 17.1029 +            Te2[(t3 >>  8) & 0xff] ^
 17.1030 +            Te3[(t0      ) & 0xff] ^
 17.1031 +            rk[1];
 17.1032 +        s2 =
 17.1033 +            Te0[(t2 >> 24)       ] ^
 17.1034 +            Te1[(t3 >> 16) & 0xff] ^
 17.1035 +            Te2[(t0 >>  8) & 0xff] ^
 17.1036 +            Te3[(t1      ) & 0xff] ^
 17.1037 +            rk[2];
 17.1038 +        s3 =
 17.1039 +            Te0[(t3 >> 24)       ] ^
 17.1040 +            Te1[(t0 >> 16) & 0xff] ^
 17.1041 +            Te2[(t1 >>  8) & 0xff] ^
 17.1042 +            Te3[(t2      ) & 0xff] ^
 17.1043 +            rk[3];
 17.1044 +    }
 17.1045 +#endif /* ?FULL_UNROLL */
 17.1046 +    /*
 17.1047 +	 * apply last round and
 17.1048 +	 * map cipher state to byte array block:
 17.1049 +	 */
 17.1050 +	s0 =
 17.1051 +		(Te4[(t0 >> 24)       ] & 0xff000000) ^
 17.1052 +		(Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
 17.1053 +		(Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
 17.1054 +		(Te4[(t3      ) & 0xff] & 0x000000ff) ^
 17.1055 +		rk[0];
 17.1056 +	PUTU32(out     , s0);
 17.1057 +	s1 =
 17.1058 +		(Te4[(t1 >> 24)       ] & 0xff000000) ^
 17.1059 +		(Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
 17.1060 +		(Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
 17.1061 +		(Te4[(t0      ) & 0xff] & 0x000000ff) ^
 17.1062 +		rk[1];
 17.1063 +	PUTU32(out +  4, s1);
 17.1064 +	s2 =
 17.1065 +		(Te4[(t2 >> 24)       ] & 0xff000000) ^
 17.1066 +		(Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
 17.1067 +		(Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
 17.1068 +		(Te4[(t1      ) & 0xff] & 0x000000ff) ^
 17.1069 +		rk[2];
 17.1070 +	PUTU32(out +  8, s2);
 17.1071 +	s3 =
 17.1072 +		(Te4[(t3 >> 24)       ] & 0xff000000) ^
 17.1073 +		(Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
 17.1074 +		(Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
 17.1075 +		(Te4[(t2      ) & 0xff] & 0x000000ff) ^
 17.1076 +		rk[3];
 17.1077 +	PUTU32(out + 12, s3);
 17.1078 +}
 17.1079 +
 17.1080 +/*
 17.1081 + * Decrypt a single block
 17.1082 + * in and out can overlap
 17.1083 + */
 17.1084 +void AES_decrypt(const unsigned char *in, unsigned char *out,
 17.1085 +		 const AES_KEY *key) {
 17.1086 +
 17.1087 +	const u32 *rk;
 17.1088 +	u32 s0, s1, s2, s3, t0, t1, t2, t3;
 17.1089 +#ifndef FULL_UNROLL
 17.1090 +	int r;
 17.1091 +#endif /* ?FULL_UNROLL */
 17.1092 +
 17.1093 +	assert(in && out && key);
 17.1094 +	rk = key->rd_key;
 17.1095 +
 17.1096 +	/*
 17.1097 +	 * map byte array block to cipher state
 17.1098 +	 * and add initial round key:
 17.1099 +	 */
 17.1100 +    s0 = GETU32(in     ) ^ rk[0];
 17.1101 +    s1 = GETU32(in +  4) ^ rk[1];
 17.1102 +    s2 = GETU32(in +  8) ^ rk[2];
 17.1103 +    s3 = GETU32(in + 12) ^ rk[3];
 17.1104 +#ifdef FULL_UNROLL
 17.1105 +    /* round 1: */
 17.1106 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
 17.1107 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
 17.1108 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
 17.1109 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
 17.1110 +    /* round 2: */
 17.1111 +    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
 17.1112 +    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
 17.1113 +    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
 17.1114 +    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
 17.1115 +    /* round 3: */
 17.1116 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
 17.1117 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
 17.1118 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
 17.1119 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
 17.1120 +    /* round 4: */
 17.1121 +    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
 17.1122 +    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
 17.1123 +    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
 17.1124 +    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
 17.1125 +    /* round 5: */
 17.1126 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
 17.1127 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
 17.1128 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
 17.1129 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
 17.1130 +    /* round 6: */
 17.1131 +    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
 17.1132 +    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
 17.1133 +    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
 17.1134 +    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
 17.1135 +    /* round 7: */
 17.1136 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
 17.1137 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
 17.1138 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
 17.1139 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
 17.1140 +    /* round 8: */
 17.1141 +    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
 17.1142 +    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
 17.1143 +    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
 17.1144 +    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
 17.1145 +    /* round 9: */
 17.1146 +    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
 17.1147 +    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
 17.1148 +    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
 17.1149 +    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
 17.1150 +    if (key->rounds > 10) {
 17.1151 +        /* round 10: */
 17.1152 +        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
 17.1153 +        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
 17.1154 +        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
 17.1155 +        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
 17.1156 +        /* round 11: */
 17.1157 +        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
 17.1158 +        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
 17.1159 +        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
 17.1160 +        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
 17.1161 +        if (key->rounds > 12) {
 17.1162 +            /* round 12: */
 17.1163 +            s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
 17.1164 +            s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
 17.1165 +            s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
 17.1166 +            s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
 17.1167 +            /* round 13: */
 17.1168 +            t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
 17.1169 +            t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
 17.1170 +            t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
 17.1171 +            t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
 17.1172 +        }
 17.1173 +    }
 17.1174 +	rk += key->rounds << 2;
 17.1175 +#else  /* !FULL_UNROLL */
 17.1176 +    /*
 17.1177 +     * Nr - 1 full rounds:
 17.1178 +     */
 17.1179 +    r = key->rounds >> 1;
 17.1180 +    for (;;) {
 17.1181 +        t0 =
 17.1182 +            Td0[(s0 >> 24)       ] ^
 17.1183 +            Td1[(s3 >> 16) & 0xff] ^
 17.1184 +            Td2[(s2 >>  8) & 0xff] ^
 17.1185 +            Td3[(s1      ) & 0xff] ^
 17.1186 +            rk[4];
 17.1187 +        t1 =
 17.1188 +            Td0[(s1 >> 24)       ] ^
 17.1189 +            Td1[(s0 >> 16) & 0xff] ^
 17.1190 +            Td2[(s3 >>  8) & 0xff] ^
 17.1191 +            Td3[(s2      ) & 0xff] ^
 17.1192 +            rk[5];
 17.1193 +        t2 =
 17.1194 +            Td0[(s2 >> 24)       ] ^
 17.1195 +            Td1[(s1 >> 16) & 0xff] ^
 17.1196 +            Td2[(s0 >>  8) & 0xff] ^
 17.1197 +            Td3[(s3      ) & 0xff] ^
 17.1198 +            rk[6];
 17.1199 +        t3 =
 17.1200 +            Td0[(s3 >> 24)       ] ^
 17.1201 +            Td1[(s2 >> 16) & 0xff] ^
 17.1202 +            Td2[(s1 >>  8) & 0xff] ^
 17.1203 +            Td3[(s0      ) & 0xff] ^
 17.1204 +            rk[7];
 17.1205 +
 17.1206 +        rk += 8;
 17.1207 +        if (--r == 0) {
 17.1208 +            break;
 17.1209 +        }
 17.1210 +
 17.1211 +        s0 =
 17.1212 +            Td0[(t0 >> 24)       ] ^
 17.1213 +            Td1[(t3 >> 16) & 0xff] ^
 17.1214 +            Td2[(t2 >>  8) & 0xff] ^
 17.1215 +            Td3[(t1      ) & 0xff] ^
 17.1216 +            rk[0];
 17.1217 +        s1 =
 17.1218 +            Td0[(t1 >> 24)       ] ^
 17.1219 +            Td1[(t0 >> 16) & 0xff] ^
 17.1220 +            Td2[(t3 >>  8) & 0xff] ^
 17.1221 +            Td3[(t2      ) & 0xff] ^
 17.1222 +            rk[1];
 17.1223 +        s2 =
 17.1224 +            Td0[(t2 >> 24)       ] ^
 17.1225 +            Td1[(t1 >> 16) & 0xff] ^
 17.1226 +            Td2[(t0 >>  8) & 0xff] ^
 17.1227 +            Td3[(t3      ) & 0xff] ^
 17.1228 +            rk[2];
 17.1229 +        s3 =
 17.1230 +            Td0[(t3 >> 24)       ] ^
 17.1231 +            Td1[(t2 >> 16) & 0xff] ^
 17.1232 +            Td2[(t1 >>  8) & 0xff] ^
 17.1233 +            Td3[(t0      ) & 0xff] ^
 17.1234 +            rk[3];
 17.1235 +    }
 17.1236 +#endif /* ?FULL_UNROLL */
 17.1237 +    /*
 17.1238 +	 * apply last round and
 17.1239 +	 * map cipher state to byte array block:
 17.1240 +	 */
 17.1241 +   	s0 =
 17.1242 +   		(Td4[(t0 >> 24)       ] & 0xff000000) ^
 17.1243 +   		(Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
 17.1244 +   		(Td4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
 17.1245 +   		(Td4[(t1      ) & 0xff] & 0x000000ff) ^
 17.1246 +   		rk[0];
 17.1247 +	PUTU32(out     , s0);
 17.1248 +   	s1 =
 17.1249 +   		(Td4[(t1 >> 24)       ] & 0xff000000) ^
 17.1250 +   		(Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
 17.1251 +   		(Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
 17.1252 +   		(Td4[(t2      ) & 0xff] & 0x000000ff) ^
 17.1253 +   		rk[1];
 17.1254 +	PUTU32(out +  4, s1);
 17.1255 +   	s2 =
 17.1256 +   		(Td4[(t2 >> 24)       ] & 0xff000000) ^
 17.1257 +   		(Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
 17.1258 +   		(Td4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
 17.1259 +   		(Td4[(t3      ) & 0xff] & 0x000000ff) ^
 17.1260 +   		rk[2];
 17.1261 +	PUTU32(out +  8, s2);
 17.1262 +   	s3 =
 17.1263 +   		(Td4[(t3 >> 24)       ] & 0xff000000) ^
 17.1264 +   		(Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
 17.1265 +   		(Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
 17.1266 +   		(Td4[(t0      ) & 0xff] & 0x000000ff) ^
 17.1267 +   		rk[3];
 17.1268 +	PUTU32(out + 12, s3);
 17.1269 +}
 17.1270 +
 17.1271 +#endif /* AES_ASM */
 17.1272 +
 17.1273 +void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
 17.1274 +		     const unsigned long length, const AES_KEY *key,
 17.1275 +		     unsigned char *ivec, const int enc) 
 17.1276 +{
 17.1277 +
 17.1278 +	unsigned long n;
 17.1279 +	unsigned long len = length;
 17.1280 +	unsigned char tmp[AES_BLOCK_SIZE];
 17.1281 +
 17.1282 +	assert(in && out && key && ivec);
 17.1283 +
 17.1284 +	if (enc) {
 17.1285 +		while (len >= AES_BLOCK_SIZE) {
 17.1286 +			for(n=0; n < AES_BLOCK_SIZE; ++n)
 17.1287 +				tmp[n] = in[n] ^ ivec[n];
 17.1288 +			AES_encrypt(tmp, out, key);
 17.1289 +			memcpy(ivec, out, AES_BLOCK_SIZE);
 17.1290 +			len -= AES_BLOCK_SIZE;
 17.1291 +			in += AES_BLOCK_SIZE;
 17.1292 +			out += AES_BLOCK_SIZE;
 17.1293 +		}
 17.1294 +		if (len) {
 17.1295 +			for(n=0; n < len; ++n)
 17.1296 +				tmp[n] = in[n] ^ ivec[n];
 17.1297 +			for(n=len; n < AES_BLOCK_SIZE; ++n)
 17.1298 +				tmp[n] = ivec[n];
 17.1299 +			AES_encrypt(tmp, tmp, key);
 17.1300 +			memcpy(out, tmp, AES_BLOCK_SIZE);
 17.1301 +			memcpy(ivec, tmp, AES_BLOCK_SIZE);
 17.1302 +		}			
 17.1303 +	} else {
 17.1304 +		while (len >= AES_BLOCK_SIZE) {
 17.1305 +			memcpy(tmp, in, AES_BLOCK_SIZE);
 17.1306 +			AES_decrypt(in, out, key);
 17.1307 +			for(n=0; n < AES_BLOCK_SIZE; ++n)
 17.1308 +				out[n] ^= ivec[n];
 17.1309 +			memcpy(ivec, tmp, AES_BLOCK_SIZE);
 17.1310 +			len -= AES_BLOCK_SIZE;
 17.1311 +			in += AES_BLOCK_SIZE;
 17.1312 +			out += AES_BLOCK_SIZE;
 17.1313 +		}
 17.1314 +		if (len) {
 17.1315 +			memcpy(tmp, in, AES_BLOCK_SIZE);
 17.1316 +			AES_decrypt(tmp, tmp, key);
 17.1317 +			for(n=0; n < len; ++n)
 17.1318 +				out[n] = tmp[n] ^ ivec[n];
 17.1319 +			memcpy(ivec, tmp, AES_BLOCK_SIZE);
 17.1320 +		}			
 17.1321 +	}
 17.1322 +}
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/tools/blktap/drivers/aes.h	Thu Jul 13 10:13:26 2006 +0100
    18.3 @@ -0,0 +1,26 @@
    18.4 +#ifndef QEMU_AES_H
    18.5 +#define QEMU_AES_H
    18.6 +
    18.7 +#define AES_MAXNR 14
    18.8 +#define AES_BLOCK_SIZE 16
    18.9 +
   18.10 +struct aes_key_st {
   18.11 +    uint32_t rd_key[4 *(AES_MAXNR + 1)];
   18.12 +    int rounds;
   18.13 +};
   18.14 +typedef struct aes_key_st AES_KEY;
   18.15 +
   18.16 +int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
   18.17 +	AES_KEY *key);
   18.18 +int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
   18.19 +	AES_KEY *key);
   18.20 +
   18.21 +void AES_encrypt(const unsigned char *in, unsigned char *out,
   18.22 +	const AES_KEY *key);
   18.23 +void AES_decrypt(const unsigned char *in, unsigned char *out,
   18.24 +	const AES_KEY *key);
   18.25 +void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
   18.26 +		     const unsigned long length, const AES_KEY *key,
   18.27 +		     unsigned char *ivec, const int enc);
   18.28 +
   18.29 +#endif
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/tools/blktap/drivers/blktapctrl.c	Thu Jul 13 10:13:26 2006 +0100
    19.3 @@ -0,0 +1,704 @@
    19.4 +/*
    19.5 + * blktapctrl.c
    19.6 + * 
    19.7 + * userspace controller for the blktap disks.
    19.8 + * As requests for new block devices arrive,
    19.9 + * the controller spawns off a separate process
   19.10 + * per-disk.
   19.11 + *
   19.12 + *
   19.13 + * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield.
   19.14 + *
   19.15 + * This program is free software; you can redistribute it and/or
   19.16 + * modify it under the terms of the GNU General Public License version 2
   19.17 + * as published by the Free Software Foundation; or, when distributed
   19.18 + * separately from the Linux kernel or incorporated into other
   19.19 + * software packages, subject to the following license:
   19.20 + *
   19.21 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   19.22 + * of this source file (the "Software"), to deal in the Software without
   19.23 + * restriction, including without limitation the rights to use, copy, modify,
   19.24 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   19.25 + * and to permit persons to whom the Software is furnished to do so, subject to
   19.26 + * the following conditions:
   19.27 + *
   19.28 + * The above copyright notice and this permission notice shall be included in
   19.29 + * all copies or substantial portions of the Software.
   19.30 + *
   19.31 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   19.32 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   19.33 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   19.34 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19.35 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   19.36 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   19.37 + * IN THE SOFTWARE.
   19.38 + */
   19.39 +
   19.40 +#include <stdio.h>
   19.41 +#include <stdlib.h>
   19.42 +#include <sys/mman.h>
   19.43 +#include <sys/user.h>
   19.44 +#include <err.h>
   19.45 +#include <errno.h>
   19.46 +#include <sys/types.h>
   19.47 +#include <linux/types.h>
   19.48 +#include <signal.h>
   19.49 +#include <fcntl.h>
   19.50 +#include <sys/poll.h>
   19.51 +#include <sys/ioctl.h>
   19.52 +#include <string.h>
   19.53 +#include <unistd.h>
   19.54 +#include <xs.h>
   19.55 +#include <printf.h>
   19.56 +#include <sys/time.h>
   19.57 +#include <syslog.h>
   19.58 +                                                                     
   19.59 +#include "blktaplib.h"
   19.60 +#include "blktapctrl.h"
   19.61 +#include "tapdisk.h"
   19.62 +
   19.63 +#define NUM_POLL_FDS 2
   19.64 +#define MSG_SIZE 4096
   19.65 +#define MAX_TIMEOUT 10
   19.66 +#define MAX_RAND_VAL 0xFFFF
   19.67 +
   19.68 +int run = 1;
   19.69 +int max_timeout = MAX_TIMEOUT;
   19.70 +int ctlfd = 0;
   19.71 +
   19.72 +static int open_ctrl_socket(char *devname);
   19.73 +static int write_msg(int fd, int msgtype, void *ptr, void *ptr2);
   19.74 +static int read_msg(int fd, int msgtype, void *ptr);
   19.75 +static driver_list_entry_t *active_disks[MAX_DISK_TYPES];
   19.76 +
   19.77 +void sig_handler(int sig)
   19.78 +{
   19.79 +	run = 0;	
   19.80 +}
   19.81 +
   19.82 +static void init_driver_list(void)
   19.83 +{
   19.84 +	int i;
   19.85 +
   19.86 +	for (i = 0; i < MAX_DISK_TYPES; i++)
   19.87 +		active_disks[i] = NULL;
   19.88 +	return;
   19.89 +}
   19.90 +
   19.91 +static void init_rng(void)
   19.92 +{
   19.93 +	static uint32_t seed;
   19.94 +	struct timeval tv;
   19.95 +
   19.96 +	gettimeofday(&tv, NULL);
   19.97 +	seed = tv.tv_usec;
   19.98 +	srand48(seed);
   19.99 +	return;
  19.100 +}
  19.101 +
  19.102 +static void make_blktap_dev(char *devname, int major, int minor)
  19.103 +{
  19.104 +	struct stat st;
  19.105 +	
  19.106 +	if (lstat(devname, &st) != 0) {
  19.107 +		/*Need to create device*/
  19.108 +		if (mkdir(BLKTAP_DEV_DIR, 0755) == 0)
  19.109 +			DPRINTF("Created %s directory\n",BLKTAP_DEV_DIR);
  19.110 +		if (mknod(devname, S_IFCHR|0600,
  19.111 +                	makedev(major, minor)) == 0)
  19.112 +			DPRINTF("Created %s device\n",devname);
  19.113 +	} else DPRINTF("%s device already exists\n",devname);
  19.114 +}
  19.115 +
  19.116 +static int get_new_dev(int *major, int *minor, blkif_t *blkif)
  19.117 +{
  19.118 +	domid_translate_t tr;
  19.119 +	int ret;
  19.120 +	char *devname;
  19.121 +	
  19.122 +	tr.domid = blkif->domid;
  19.123 +        tr.busid = (unsigned short)blkif->be_id;
  19.124 +	ret = ioctl(ctlfd, BLKTAP_IOCTL_NEWINTF, tr );
  19.125 +	
  19.126 +	if ( (ret <= 0)||(ret > MAX_TAP_DEV) ) {
  19.127 +		DPRINTF("Incorrect Dev ID [%d]\n",ret);
  19.128 +		return -1;
  19.129 +	}
  19.130 +	
  19.131 +	*minor = ret;
  19.132 +	*major = ioctl(ctlfd, BLKTAP_IOCTL_MAJOR, ret );
  19.133 +	if (*major < 0) {
  19.134 +		DPRINTF("Incorrect Major ID [%d]\n",*major);
  19.135 +		return -1;
  19.136 +	}
  19.137 +
  19.138 +	asprintf(&devname,"%s/%s%d",BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, *minor);
  19.139 +	make_blktap_dev(devname,*major,*minor);	
  19.140 +	DPRINTF("Received device id %d and major %d, "
  19.141 +		"sent domid %d and be_id %d\n",
  19.142 +		*minor, *major, tr.domid, tr.busid);
  19.143 +	return 0;
  19.144 +}
  19.145 +
  19.146 +static int get_tapdisk_pid(blkif_t *blkif)
  19.147 +{
  19.148 +	int ret;
  19.149 +
  19.150 +	if ((ret = write_msg(blkif->fds[WRITE], CTLMSG_PID, blkif, NULL)) 
  19.151 +	    <= 0) {
  19.152 +		DPRINTF("Write_msg failed - CTLMSG_PID(%d)\n", ret);
  19.153 +		return -EINVAL;
  19.154 +	}
  19.155 +
  19.156 +	if ((ret = read_msg(blkif->fds[READ], CTLMSG_PID_RSP, blkif))
  19.157 +	     <= 0) {
  19.158 +		DPRINTF("Read_msg failure - CTLMSG_PID(%d)\n", ret);
  19.159 +		return -EINVAL;
  19.160 +	}	
  19.161 +	return 1;
  19.162 +}
  19.163 +
  19.164 +static blkif_t *test_path(char *path, char **dev, int *type)
  19.165 +{
  19.166 +	char *ptr, handle[10];
  19.167 +	int i, size;
  19.168 +
  19.169 +	size = sizeof(dtypes)/sizeof(disk_info_t *);
  19.170 +	*type = MAX_DISK_TYPES + 1;
  19.171 +
  19.172 +	if ( (ptr = strstr(path, ":"))!=NULL) {
  19.173 +		memcpy(handle, path, (ptr - path));
  19.174 +		*dev = ptr + 1;
  19.175 +		ptr = handle + (ptr - path);
  19.176 +		*ptr = '\0';
  19.177 +		DPRINTF("Detected handle: [%s]\n",handle);
  19.178 +
  19.179 +		for (i = 0; i < size; i++) {
  19.180 +			if (strncmp(handle, dtypes[i]->handle, (ptr - path))
  19.181 +			    ==0) {
  19.182 +				*type = dtypes[i]->idnum;
  19.183 +
  19.184 +				if (dtypes[i]->single_handler == 1) {
  19.185 +					/* Check whether tapdisk process 
  19.186 +					   already exists */
  19.187 +					if (active_disks[dtypes[i]->idnum] 
  19.188 +					    == NULL) return NULL;
  19.189 +					else 
  19.190 +						return active_disks[dtypes[i]->idnum]->blkif;
  19.191 +				}
  19.192 +			}
  19.193 +		}
  19.194 +	} else *dev = NULL;
  19.195 +
  19.196 +	return NULL;
  19.197 +}
  19.198 +
  19.199 +static void add_disktype(blkif_t *blkif, int type)
  19.200 +{
  19.201 +	driver_list_entry_t *entry, *ptr, *last;
  19.202 +
  19.203 +	if (type > MAX_DISK_TYPES) return;
  19.204 +
  19.205 +	entry = malloc(sizeof(driver_list_entry_t));
  19.206 +	entry->blkif = blkif;
  19.207 +	entry->next = NULL;
  19.208 +	ptr = active_disks[type];
  19.209 +
  19.210 +	if (ptr == NULL) {
  19.211 +		active_disks[type] = entry;
  19.212 +		entry->prev = NULL;
  19.213 +		return;
  19.214 +	}
  19.215 +
  19.216 +	while (ptr != NULL) {
  19.217 +		last = ptr;
  19.218 +		ptr = ptr->next;
  19.219 +	}
  19.220 +
  19.221 +	/*We've found the end of the list*/
  19.222 +        last->next = entry;
  19.223 +	entry->prev = last;
  19.224 +	
  19.225 +	return;
  19.226 +}
  19.227 +
  19.228 +static int del_disktype(blkif_t *blkif)
  19.229 +{
  19.230 +	driver_list_entry_t *ptr, *cur, *last;
  19.231 +	int type = blkif->drivertype, count = 0, close = 0;
  19.232 +
  19.233 +	if (type > MAX_DISK_TYPES) return 1;
  19.234 +
  19.235 +	ptr = active_disks[type];
  19.236 +	last = NULL;
  19.237 +	while (ptr != NULL) {
  19.238 +		count++;
  19.239 +		if (blkif == ptr->blkif) {
  19.240 +			cur = ptr;
  19.241 +			if (ptr->next != NULL) {
  19.242 +				/*There's more later in the chain*/
  19.243 +				if (!last) {
  19.244 +					/*We're first in the list*/
  19.245 +					active_disks[type] = ptr->next;
  19.246 +					ptr = ptr->next;
  19.247 +					ptr->prev = NULL;
  19.248 +				}
  19.249 +				else {
  19.250 +					/*We're sandwiched*/
  19.251 +					last->next = ptr->next;
  19.252 +					ptr = ptr->next;
  19.253 +					ptr->prev = last;
  19.254 +				}
  19.255 +				
  19.256 +			} else if (last) {
  19.257 +				/*There's more earlier in the chain*/
  19.258 +				last->next = NULL;
  19.259 +			} else {
  19.260 +				/*We're the only entry*/
  19.261 +				active_disks[type] = NULL;
  19.262 +				if(dtypes[type]->single_handler == 1) 
  19.263 +					close = 1;
  19.264 +			}
  19.265 +			DPRINTF("DEL_DISKTYPE: Freeing entry\n");
  19.266 +			free(cur);
  19.267 +			if (dtypes[type]->single_handler == 0) close = 1;
  19.268 +
  19.269 +			return close;
  19.270 +		}
  19.271 +		last = ptr;
  19.272 +		ptr = ptr->next;
  19.273 +	}
  19.274 +	DPRINTF("DEL_DISKTYPE: No match\n");
  19.275 +	return 1;
  19.276 +}
  19.277 +
  19.278 +static int write_msg(int fd, int msgtype, void *ptr, void *ptr2)
  19.279 +{
  19.280 +	blkif_t *blkif;
  19.281 +	blkif_info_t *blk;
  19.282 +	msg_hdr_t *msg;
  19.283 +	msg_newdev_t *msg_dev;
  19.284 +	char *p, *buf, *path;
  19.285 +	int msglen, len, ret;
  19.286 +	fd_set writefds;
  19.287 +	struct timeval timeout;
  19.288 +	image_t *image, *img;
  19.289 +	uint32_t seed;
  19.290 +
  19.291 +	blkif = (blkif_t *)ptr;
  19.292 +	blk = blkif->info;
  19.293 +	image = blkif->prv;
  19.294 +	len = 0;
  19.295 +
  19.296 +	switch (msgtype)
  19.297 +	{
  19.298 +	case CTLMSG_PARAMS:
  19.299 +		path = (char *)ptr2;
  19.300 +		DPRINTF("Write_msg called: CTLMSG_PARAMS, sending [%s, %s]\n",
  19.301 +			blk->params, path);
  19.302 +
  19.303 +		msglen = sizeof(msg_hdr_t) + strlen(path) + 1;
  19.304 +		buf = malloc(msglen);
  19.305 +
  19.306 +		/*Assign header fields*/
  19.307 +		msg = (msg_hdr_t *)buf;
  19.308 +		msg->type = CTLMSG_PARAMS;
  19.309 +		msg->len = msglen;
  19.310 +		msg->drivertype = blkif->drivertype;
  19.311 +
  19.312 +		gettimeofday(&timeout, NULL);
  19.313 +		msg->cookie = blkif->cookie;
  19.314 +		DPRINTF("Generated cookie, %d\n",blkif->cookie);
  19.315 +
  19.316 +		/*Copy blk->params to msg*/
  19.317 +		p = buf + sizeof(msg_hdr_t);
  19.318 +		memcpy(p, path, strlen(path) + 1);
  19.319 +
  19.320 +		break;
  19.321 +
  19.322 +	case CTLMSG_NEWDEV:
  19.323 +		DPRINTF("Write_msg called: CTLMSG_NEWDEV\n");
  19.324 +
  19.325 +		msglen = sizeof(msg_hdr_t) + sizeof(msg_newdev_t);
  19.326 +		buf = malloc(msglen);
  19.327 +		
  19.328 +		/*Assign header fields*/
  19.329 +		msg = (msg_hdr_t *)buf;
  19.330 +		msg->type = CTLMSG_NEWDEV;
  19.331 +		msg->len = msglen;
  19.332 +		msg->drivertype = blkif->drivertype;
  19.333 +		msg->cookie = blkif->cookie;
  19.334 +		
  19.335 +		msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
  19.336 +		msg_dev->devnum = blkif->minor;
  19.337 +		msg_dev->domid = blkif->domid;
  19.338 +
  19.339 +		break;
  19.340 +
  19.341 +	case CTLMSG_CLOSE:
  19.342 +		DPRINTF("Write_msg called: CTLMSG_CLOSE\n");
  19.343 +
  19.344 +		msglen = sizeof(msg_hdr_t);
  19.345 +		buf = malloc(msglen);
  19.346 +		
  19.347 +		/*Assign header fields*/
  19.348 +		msg = (msg_hdr_t *)buf;
  19.349 +		msg->type = CTLMSG_CLOSE;
  19.350 +		msg->len = msglen;
  19.351 +		msg->drivertype = blkif->drivertype;
  19.352 +		msg->cookie = blkif->cookie;
  19.353 +		
  19.354 +		break;
  19.355 +
  19.356 +	case CTLMSG_PID:
  19.357 +		DPRINTF("Write_msg called: CTLMSG_PID\n");
  19.358 +
  19.359 +		msglen = sizeof(msg_hdr_t);
  19.360 +		buf = malloc(msglen);
  19.361 +		
  19.362 +		/*Assign header fields*/
  19.363 +		msg = (msg_hdr_t *)buf;
  19.364 +		msg->type = CTLMSG_PID;
  19.365 +		msg->len = msglen;
  19.366 +		msg->drivertype = blkif->drivertype;
  19.367 +		msg->cookie = blkif->cookie;
  19.368 +		
  19.369 +		break;
  19.370 +		
  19.371 +	default:
  19.372 +		return -1;
  19.373 +	}
  19.374 +
  19.375 +	/*Now send the message*/
  19.376 +	ret = 0;
  19.377 +	FD_ZERO(&writefds);
  19.378 +	FD_SET(fd,&writefds);
  19.379 +	timeout.tv_sec = max_timeout; /*Wait for up to max_timeout seconds*/
  19.380 +	timeout.tv_usec = 0;
  19.381 +	if (select(fd+1, (fd_set *) 0, &writefds, 
  19.382 +		  (fd_set *) 0, &timeout) > 0) {
  19.383 +		len = write(fd, buf, msglen);
  19.384 +		if (len == -1) DPRINTF("Write failed: (%d)\n",errno);
  19.385 +	}
  19.386 +	free(buf);
  19.387 +
  19.388 +	return len;
  19.389 +}
  19.390 +
  19.391 +static int read_msg(int fd, int msgtype, void *ptr)
  19.392 +{
  19.393 +	blkif_t *blkif;
  19.394 +	blkif_info_t *blk;
  19.395 +	msg_hdr_t *msg;
  19.396 +	msg_pid_t *msg_pid;
  19.397 +	char *p, *buf;
  19.398 +	int msglen = MSG_SIZE, len, ret;
  19.399 +	fd_set readfds;
  19.400 +	struct timeval timeout;
  19.401 +	image_t *image, *img;
  19.402 +
  19.403 +
  19.404 +	blkif = (blkif_t *)ptr;
  19.405 +	blk = blkif->info;
  19.406 +	image = blkif->prv;
  19.407 +
  19.408 +	buf = malloc(MSG_SIZE);
  19.409 +
  19.410 +	ret = 0;
  19.411 +	FD_ZERO(&readfds);
  19.412 +	FD_SET(fd,&readfds);
  19.413 +	timeout.tv_sec = max_timeout; /*Wait for up to max_timeout seconds*/ 
  19.414 +	timeout.tv_usec = 0;
  19.415 +	if (select(fd+1, &readfds,  (fd_set *) 0,
  19.416 +		  (fd_set *) 0, &timeout) > 0) {
  19.417 +		ret = read(fd, buf, msglen);
  19.418 +		
  19.419 +	}			
  19.420 +	if (ret > 0) {
  19.421 +		msg = (msg_hdr_t *)buf;
  19.422 +		switch (msg->type)
  19.423 +		{
  19.424 +		case CTLMSG_IMG:
  19.425 +			img = (image_t *)(buf + sizeof(msg_hdr_t));
  19.426 +			image->size = img->size;
  19.427 +			image->secsize = img->secsize;
  19.428 +			image->info = img->info;
  19.429 +
  19.430 +			DPRINTF("Received CTLMSG_IMG: %lu, %lu, %lu\n",
  19.431 +				image->size, image->secsize, image->info);
  19.432 +			if(msgtype != CTLMSG_IMG) ret = 0;
  19.433 +			break;
  19.434 +			
  19.435 +		case CTLMSG_IMG_FAIL:
  19.436 +			DPRINTF("Received CTLMSG_IMG_FAIL, "
  19.437 +				"unable to open image\n");
  19.438 +			ret = 0;
  19.439 +			break;
  19.440 +				
  19.441 +		case CTLMSG_NEWDEV_RSP:
  19.442 +			DPRINTF("Received CTLMSG_NEWDEV_RSP\n");
  19.443 +			if(msgtype != CTLMSG_NEWDEV_RSP) ret = 0;
  19.444 +			break;
  19.445 +			
  19.446 +		case CTLMSG_NEWDEV_FAIL:
  19.447 +			DPRINTF("Received CTLMSG_NEWDEV_FAIL\n");
  19.448 +			ret = 0;
  19.449 +			break;
  19.450 +			
  19.451 +		case CTLMSG_CLOSE_RSP:
  19.452 +			DPRINTF("Received CTLMSG_CLOSE_RSP\n");
  19.453 +			if (msgtype != CTLMSG_CLOSE_RSP) ret = 0;
  19.454 +			break;
  19.455 +
  19.456 +		case CTLMSG_PID_RSP:
  19.457 +			DPRINTF("Received CTLMSG_PID_RSP\n");
  19.458 +			if (msgtype != CTLMSG_PID_RSP) ret = 0;
  19.459 +			else {
  19.460 +				msg_pid = (msg_pid_t *)
  19.461 +					(buf + sizeof(msg_hdr_t));
  19.462 +				blkif->tappid = msg_pid->pid;
  19.463 +				DPRINTF("\tPID: [%d]\n",blkif->tappid);
  19.464 +			}
  19.465 +			break;
  19.466 +		default:
  19.467 +			DPRINTF("UNKNOWN MESSAGE TYPE RECEIVED\n");
  19.468 +			ret = 0;
  19.469 +			break;
  19.470 +		}
  19.471 +	} 
  19.472 +	
  19.473 +	free(buf);
  19.474 +	
  19.475 +	return ret;
  19.476 +
  19.477 +}
  19.478 +
  19.479 +int blktapctrl_new_blkif(blkif_t *blkif)
  19.480 +{
  19.481 +	blkif_info_t *blk;
  19.482 +	int major, minor, fd_read, fd_write, type, new;
  19.483 +	char *rdctldev, *wrctldev, *cmd, *ptr;
  19.484 +	image_t *image;
  19.485 +	blkif_t *exist = NULL;
  19.486 +
  19.487 +	DPRINTF("Received a poll for a new vbd\n");
  19.488 +	if ( ((blk=blkif->info) != NULL) && (blk->params != NULL) ) {
  19.489 +		if (get_new_dev(&major, &minor, blkif)<0)
  19.490 +			return -1;
  19.491 +
  19.492 +		exist = test_path(blk->params, &ptr, &type);
  19.493 +		blkif->drivertype = type;
  19.494 +		blkif->cookie = lrand48() % MAX_RAND_VAL;
  19.495 +
  19.496 +		if (!exist) {
  19.497 +			DPRINTF("Process does not exist:\n");
  19.498 +			asprintf(&rdctldev, "/dev/xen/tapctrlread%d", minor);
  19.499 +			blkif->fds[READ] = open_ctrl_socket(rdctldev);
  19.500 +
  19.501 +
  19.502 +			asprintf(&wrctldev, "/dev/xen/tapctrlwrite%d", minor);
  19.503 +			blkif->fds[WRITE] = open_ctrl_socket(wrctldev);
  19.504 +			
  19.505 +			if (blkif->fds[READ] == -1 || blkif->fds[WRITE] == -1) 
  19.506 +				goto fail;
  19.507 +
  19.508 +			/*launch the new process*/
  19.509 +			asprintf(&cmd, "tapdisk %s %s", wrctldev, rdctldev);
  19.510 +			DPRINTF("Launching process, CMDLINE [%s]\n",cmd);
  19.511 +			if (system(cmd) == -1) {
  19.512 +				DPRINTF("Unable to fork, cmdline: [%s]\n",cmd);
  19.513 +				return -1;
  19.514 +			}
  19.515 +
  19.516 +			free(rdctldev);
  19.517 +			free(wrctldev);
  19.518 +			free(cmd);
  19.519 +		} else {
  19.520 +			DPRINTF("Process exists!\n");
  19.521 +			blkif->fds[READ] = exist->fds[READ];
  19.522 +			blkif->fds[WRITE] = exist->fds[WRITE];
  19.523 +		}
  19.524 +
  19.525 +		add_disktype(blkif, type);
  19.526 +		blkif->major = major;
  19.527 +		blkif->minor = minor;
  19.528 +
  19.529 +		image = (image_t *)malloc(sizeof(image_t));
  19.530 +		blkif->prv = (void *)image;
  19.531 +		blkif->ops = &tapdisk_ops;
  19.532 +
  19.533 +		/*Retrieve the PID of the new process*/
  19.534 +		if (get_tapdisk_pid(blkif) <= 0) {
  19.535 +			DPRINTF("Unable to contact disk process\n");
  19.536 +			goto fail;
  19.537 +		}
  19.538 +
  19.539 +		/* Both of the following read and write calls will block up to 
  19.540 +		 * max_timeout val*/
  19.541 +		if (write_msg(blkif->fds[WRITE], CTLMSG_PARAMS, blkif, ptr) 
  19.542 +		    <= 0) {
  19.543 +			DPRINTF("Write_msg failed - CTLMSG_PARAMS\n");
  19.544 +			goto fail;
  19.545 +		}
  19.546 +
  19.547 +		if (read_msg(blkif->fds[READ], CTLMSG_IMG, blkif) <= 0) {
  19.548 +			DPRINTF("Read_msg failure - CTLMSG_IMG\n");
  19.549 +			goto fail;
  19.550 +		}
  19.551 +
  19.552 +	} else return -1;
  19.553 +
  19.554 +	return 0;
  19.555 +fail:
  19.556 +	ioctl(ctlfd, BLKTAP_IOCTL_FREEINTF, minor);
  19.557 +	return -EINVAL;
  19.558 +}
  19.559 +
  19.560 +int map_new_blktapctrl(blkif_t *blkif)
  19.561 +{
  19.562 +	DPRINTF("Received a poll for a new devmap\n");
  19.563 +	if (write_msg(blkif->fds[WRITE], CTLMSG_NEWDEV, blkif, NULL) <= 0) {
  19.564 +		DPRINTF("Write_msg failed - CTLMSG_NEWDEV\n");
  19.565 +		return -EINVAL;
  19.566 +	}
  19.567 +
  19.568 +	if (read_msg(blkif->fds[READ], CTLMSG_NEWDEV_RSP, blkif) <= 0) {
  19.569 +		DPRINTF("Read_msg failed - CTLMSG_NEWDEV_RSP\n");
  19.570 +		return -EINVAL;
  19.571 +	}
  19.572 +	DPRINTF("Exiting map_new_blktapctrl\n");
  19.573 +
  19.574 +	return blkif->minor - 1;
  19.575 +}
  19.576 +
  19.577 +int unmap_blktapctrl(blkif_t *blkif)
  19.578 +{
  19.579 +	DPRINTF("Unmapping vbd\n");
  19.580 +
  19.581 +	if (write_msg(blkif->fds[WRITE], CTLMSG_CLOSE, blkif, NULL) <= 0) {
  19.582 +		DPRINTF("Write_msg failed - CTLMSG_CLOSE\n");
  19.583 +		return -EINVAL;
  19.584 +	}
  19.585 +
  19.586 +	if (del_disktype(blkif)) {
  19.587 +		close(blkif->fds[WRITE]);
  19.588 +		close(blkif->fds[READ]);
  19.589 +
  19.590 +	}
  19.591 +	return 0;
  19.592 +}
  19.593 +
  19.594 +int open_ctrl_socket(char *devname)
  19.595 +{
  19.596 +	int ret;
  19.597 +	int ipc_fd;
  19.598 +	char *cmd;
  19.599 +	fd_set socks;
  19.600 +	struct timeval timeout;
  19.601 +
  19.602 +	ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO);
  19.603 +	if ( (ret != 0) && (errno != EEXIST) ) {
  19.604 +		DPRINTF("ERROR: pipe failed (%d)\n", errno);
  19.605 +		exit(0);
  19.606 +	}
  19.607 +
  19.608 +	ipc_fd = open(devname,O_RDWR|O_NONBLOCK);
  19.609 +
  19.610 +	if (ipc_fd < 0) {
  19.611 +		DPRINTF("FD open failed\n");
  19.612 +		return -1;
  19.613 +	}
  19.614 +
  19.615 +	return ipc_fd;
  19.616 +}
  19.617 +
  19.618 +static void print_drivers(void)
  19.619 +{
  19.620 +	int i, size;
  19.621 +
  19.622 +	size = sizeof(dtypes)/sizeof(disk_info_t *);
  19.623 +	DPRINTF("blktapctrl: v1.0.0\n");
  19.624 +	for (i = 0; i < size; i++)
  19.625 +		DPRINTF("Found driver: [%s]\n",dtypes[i]->name);
  19.626 +} 
  19.627 +
  19.628 +int main(int argc, char *argv[])
  19.629 +{
  19.630 +	char *devname;
  19.631 +	tapdev_info_t *ctlinfo;
  19.632 +	int tap_pfd, store_pfd, xs_fd, ret, timeout, pfd_count;
  19.633 +	struct xs_handle *h;
  19.634 +	struct pollfd  pfd[NUM_POLL_FDS];
  19.635 +	pid_t process;
  19.636 +
  19.637 +	__init_blkif();
  19.638 +	openlog("BLKTAPCTRL", LOG_CONS|LOG_ODELAY, LOG_DAEMON);
  19.639 +
  19.640 +	print_drivers();
  19.641 +	init_driver_list();
  19.642 +	init_rng();
  19.643 +
  19.644 +	register_new_blkif_hook(blktapctrl_new_blkif);
  19.645 +	register_new_devmap_hook(map_new_blktapctrl);
  19.646 +	register_new_unmap_hook(unmap_blktapctrl);
  19.647 +
  19.648 +	/*Attach to blktap0 */	
  19.649 +	asprintf(&devname,"%s/%s0", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME);
  19.650 +	make_blktap_dev(devname,254,0);
  19.651 +	ctlfd = open(devname, O_RDWR);
  19.652 +	if (ctlfd == -1) {
  19.653 +		DPRINTF("blktap0 open failed\n");
  19.654 +		goto open_failed;
  19.655 +	}
  19.656 +
  19.657 +	/* Set up store connection and watch. */
  19.658 +	h = xs_daemon_open();
  19.659 +	if (h == NULL) {
  19.660 +		DPRINTF("xs_daemon_open failed -- "
  19.661 +			"is xenstore running?\n");
  19.662 +		goto open_failed;
  19.663 +	}
  19.664 +	
  19.665 +	ret = add_blockdevice_probe_watch(h, "Domain-0");
  19.666 +	if (ret != 0) {
  19.667 +		DPRINTF("adding device probewatch\n");
  19.668 +		goto open_failed;
  19.669 +	}
  19.670 +
  19.671 +	ioctl(ctlfd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
  19.672 +
  19.673 +	process = getpid();
  19.674 +	ret = ioctl(ctlfd, BLKTAP_IOCTL_SENDPID, process );
  19.675 +
  19.676 +	/*Static pollhooks*/
  19.677 +	pfd_count = 0;
  19.678 +	tap_pfd = pfd_count++;
  19.679 +	pfd[tap_pfd].fd = ctlfd;
  19.680 +	pfd[tap_pfd].events = POLLIN;
  19.681 +	
  19.682 +	store_pfd = pfd_count++;
  19.683 +	pfd[store_pfd].fd = xs_fileno(h);
  19.684 +	pfd[store_pfd].events = POLLIN;
  19.685 +
  19.686 +	while (run) {
  19.687 +		timeout = 1000; /*Milliseconds*/
  19.688 +                ret = poll(pfd, pfd_count, timeout);
  19.689 +
  19.690 +		if (ret > 0) {
  19.691 +			if (pfd[store_pfd].revents) {
  19.692 +				ret = xs_fire_next_watch(h);
  19.693 +			}
  19.694 +		}
  19.695 +	}
  19.696 +
  19.697 +	ioctl(ctlfd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
  19.698 +	close(ctlfd);
  19.699 +	closelog();
  19.700 +
  19.701 +	return 0;
  19.702 +	
  19.703 + open_failed:
  19.704 +	DPRINTF("Unable to start blktapctrl\n");
  19.705 +	closelog();
  19.706 +	return -1;
  19.707 +}
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/tools/blktap/drivers/blktapctrl.h	Thu Jul 13 10:13:26 2006 +0100
    20.3 @@ -0,0 +1,55 @@
    20.4 +/* blktapctrl.h
    20.5 + *
    20.6 + * controller image utils.
    20.7 + * 
    20.8 + * (c) 2004-6 Andrew Warfield and Julian Chesterfield
    20.9 + *
   20.10 + * This program is free software; you can redistribute it and/or
   20.11 + * modify it under the terms of the GNU General Public License version 2
   20.12 + * as published by the Free Software Foundation; or, when distributed
   20.13 + * separately from the Linux kernel or incorporated into other
   20.14 + * software packages, subject to the following license:
   20.15 + *
   20.16 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   20.17 + * of this source file (the "Software"), to deal in the Software without
   20.18 + * restriction, including without limitation the rights to use, copy, modify,
   20.19 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   20.20 + * and to permit persons to whom the Software is furnished to do so, subject to
   20.21 + * the following conditions:
   20.22 + *
   20.23 + * The above copyright notice and this permission notice shall be included in
   20.24 + * all copies or substantial portions of the Software.
   20.25 + *
   20.26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   20.27 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   20.28 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   20.29 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   20.30 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20.31 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   20.32 + * IN THE SOFTWARE.
   20.33 + */
   20.34 +
   20.35 +
   20.36 +static inline long int tapdisk_get_size(blkif_t *blkif)
   20.37 +{
   20.38 +	image_t *img = (image_t *)blkif->prv;
   20.39 +	return img->size;
   20.40 +}
   20.41 +
   20.42 +static inline long int tapdisk_get_secsize(blkif_t *blkif)
   20.43 +{
   20.44 +	image_t *img = (image_t *)blkif->prv;
   20.45 +	return img->secsize;
   20.46 +}
   20.47 +
   20.48 +static inline unsigned tapdisk_get_info(blkif_t *blkif)
   20.49 +{
   20.50 +	image_t *img = (image_t *)blkif->prv;
   20.51 +	return img->info;
   20.52 +}
   20.53 +
   20.54 +struct blkif_ops tapdisk_ops = {
   20.55 +	.get_size = tapdisk_get_size,
   20.56 +	.get_secsize = tapdisk_get_secsize,
   20.57 +	.get_info = tapdisk_get_info,
   20.58 +};
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/tools/blktap/drivers/block-aio.c	Thu Jul 13 10:13:26 2006 +0100
    21.3 @@ -0,0 +1,327 @@
    21.4 +/* block-aio.c
    21.5 + *
    21.6 + * libaio-based raw disk implementation.
    21.7 + *
    21.8 + * (c) 2006 Andrew Warfield and Julian Chesterfield
    21.9 + *
   21.10 + * NB: This code is not thread-safe.
   21.11 + *
   21.12 + * This program is free software; you can redistribute it and/or
   21.13 + * modify it under the terms of the GNU General Public License version 2
   21.14 + * as published by the Free Software Foundation; or, when distributed
   21.15 + * separately from the Linux kernel or incorporated into other
   21.16 + * software packages, subject to the following license:
   21.17 + *
   21.18 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   21.19 + * of this source file (the "Software"), to deal in the Software without
   21.20 + * restriction, including without limitation the rights to use, copy, modify,
   21.21 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   21.22 + * and to permit persons to whom the Software is furnished to do so, subject to
   21.23 + * the following conditions:
   21.24 + *
   21.25 + * The above copyright notice and this permission notice shall be included in
   21.26 + * all copies or substantial portions of the Software.
   21.27 + *
   21.28 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   21.29 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   21.30 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   21.31 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   21.32 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   21.33 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21.34 + * IN THE SOFTWARE.
   21.35 + */
   21.36 +
   21.37 +
   21.38 +#include <errno.h>
   21.39 +#include <libaio.h>
   21.40 +#include <fcntl.h>
   21.41 +#include <stdio.h>
   21.42 +#include <stdlib.h>
   21.43 +#include <unistd.h>
   21.44 +#include <sys/statvfs.h>
   21.45 +#include <sys/stat.h>
   21.46 +#include <sys/ioctl.h>
   21.47 +#include <linux/fs.h>
   21.48 +#include "tapdisk.h"
   21.49 +
   21.50 +
   21.51 +/**
   21.52 + * We used a kernel patch to return an fd associated with the AIO context
   21.53 + * so that we can concurrently poll on synchronous and async descriptors.
   21.54 + * This is signalled by passing 1 as the io context to io_setup.
   21.55 + */
   21.56 +#define REQUEST_ASYNC_FD 1
   21.57 +
   21.58 +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
   21.59 +
   21.60 +struct pending_aio {
   21.61 +	td_callback_t cb;
   21.62 +	int id;
   21.63 +	void *private;
   21.64 +};
   21.65 +
   21.66 +struct tdaio_state {
   21.67 +	int fd;
   21.68 +	
   21.69 +	/* libaio state */
   21.70 +	io_context_t       aio_ctx;
   21.71 +	struct iocb        iocb_list  [MAX_AIO_REQS];
   21.72 +	struct iocb       *iocb_free  [MAX_AIO_REQS];
   21.73 +	struct pending_aio pending_aio[MAX_AIO_REQS];
   21.74 +	int                iocb_free_count;
   21.75 +	struct iocb       *iocb_queue[MAX_AIO_REQS];
   21.76 +	int                iocb_queued;
   21.77 +	int                poll_fd; /* NB: we require aio_poll support */
   21.78 +	struct io_event    aio_events[MAX_AIO_REQS];
   21.79 +};
   21.80 +
   21.81 +#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
   21.82 +
   21.83 +/*Get Image size, secsize*/
   21.84 +static int get_image_info(struct td_state *s, int fd)
   21.85 +{
   21.86 +	int ret;
   21.87 +	long size;
   21.88 +	unsigned long total_size;
   21.89 +	struct statvfs statBuf;
   21.90 +	struct stat stat;
   21.91 +
   21.92 +	ret = fstat(fd, &stat);
   21.93 +	if (ret != 0) {
   21.94 +		DPRINTF("ERROR: fstat failed, Couldn't stat image");
   21.95 +		return -EINVAL;
   21.96 +	}
   21.97 +
   21.98 +	if (S_ISBLK(stat.st_mode)) {
   21.99 +		/*Accessing block device directly*/
  21.100 +		s->size = 0;
  21.101 +		if (ioctl(fd,BLKGETSIZE,&s->size)!=0) {
  21.102 +			DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
  21.103 +			return -EINVAL;
  21.104 +		}
  21.105 +
  21.106 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
  21.107 +			"sector_shift [%llu]\n",
  21.108 +			(long long unsigned)(s->size << SECTOR_SHIFT),
  21.109 +			(long long unsigned)s->size);
  21.110 +
  21.111 +		/*Get the sector size*/
  21.112 +#if defined(BLKSSZGET)
  21.113 +		{
  21.114 +			int arg;
  21.115 +			s->sector_size = DEFAULT_SECTOR_SIZE;
  21.116 +			ioctl(fd, BLKSSZGET, &s->sector_size);
  21.117 +			
  21.118 +			if (s->sector_size != DEFAULT_SECTOR_SIZE)
  21.119 +				DPRINTF("Note: sector size is %ld (not %d)\n",
  21.120 +					s->sector_size, DEFAULT_SECTOR_SIZE);
  21.121 +		}
  21.122 +#else
  21.123 +		s->sector_size = DEFAULT_SECTOR_SIZE;
  21.124 +#endif
  21.125 +
  21.126 +	} else {
  21.127 +		/*Local file? try fstat instead*/
  21.128 +		s->size = (stat.st_size >> SECTOR_SHIFT);
  21.129 +		s->sector_size = DEFAULT_SECTOR_SIZE;
  21.130 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
  21.131 +			"sector_shift [%llu]\n",
  21.132 +			(long long unsigned)(s->size << SECTOR_SHIFT),
  21.133 +			(long long unsigned)s->size);
  21.134 +	}
  21.135 +
  21.136 +	if (s->size == 0) {		
  21.137 +		s->size =((uint64_t) 16836057);
  21.138 +		s->sector_size = DEFAULT_SECTOR_SIZE;
  21.139 +	}
  21.140 +	s->info = 0;
  21.141 +
  21.142 +	return 0;
  21.143 +}
  21.144 +
  21.145 +/* Open the disk file and initialize aio state. */
  21.146 +int tdaio_open (struct td_state *s, const char *name)
  21.147 +{
  21.148 +	int i, fd, ret = 0;
  21.149 +	struct tdaio_state *prv = (struct tdaio_state *)s->private;
  21.150 +	s->private = prv;
  21.151 +
  21.152 +	DPRINTF("XXX: block-aio open('%s')", name);
  21.153 +	/* Initialize AIO */
  21.154 +	prv->iocb_free_count = MAX_AIO_REQS;
  21.155 +	prv->iocb_queued     = 0;
  21.156 +	
  21.157 +	prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
  21.158 +	prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
  21.159 +
  21.160 +	if (prv->poll_fd < 0) {
  21.161 +		ret = prv->poll_fd;
  21.162 +		DPRINTF("Couldn't get fd for AIO poll support.  This is "
  21.163 +			"probably because your kernel does not have the "
  21.164 +			"aio-poll patch applied.\n");
  21.165 +		goto done;
  21.166 +	}
  21.167 +
  21.168 +	for (i=0;i<MAX_AIO_REQS;i++)
  21.169 +		prv->iocb_free[i] = &prv->iocb_list[i];
  21.170 +
  21.171 +	/* Open the file */
  21.172 +        fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
  21.173 +
  21.174 +        if ( (fd == -1) && (errno == EINVAL) ) {
  21.175 +
  21.176 +                /* Maybe O_DIRECT isn't supported. */
  21.177 +                fd = open(name, O_RDWR | O_LARGEFILE);
  21.178 +                if (fd != -1) DPRINTF("WARNING: Accessing image without"
  21.179 +                                     "O_DIRECT! (%s)\n", name);
  21.180 +
  21.181 +        } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
  21.182 +	
  21.183 +        if (fd == -1) {
  21.184 +		DPRINTF("Unable to open [%s] (%d)!\n", name, 0 - errno);
  21.185 +        	ret = 0 - errno;
  21.186 +        	goto done;
  21.187 +        }
  21.188 +
  21.189 +        prv->fd = fd;
  21.190 +
  21.191 +	ret = get_image_info(s, fd);
  21.192 +done:
  21.193 +	return ret;	
  21.194 +}
  21.195 +
  21.196 +int tdaio_queue_read(struct td_state *s, uint64_t sector,
  21.197 +			       int nb_sectors, char *buf, td_callback_t cb,
  21.198 +			       int id, void *private)
  21.199 +{
  21.200 +	struct   iocb *io;
  21.201 +	struct   pending_aio *pio;
  21.202 +	struct   tdaio_state *prv = (struct tdaio_state *)s->private;
  21.203 +	int      size    = nb_sectors * s->sector_size;
  21.204 +	uint64_t offset  = sector * (uint64_t)s->sector_size;
  21.205 +	long     ioidx;
  21.206 +	
  21.207 +	if (prv->iocb_free_count == 0)
  21.208 +		return -ENOMEM;
  21.209 +	io = prv->iocb_free[--prv->iocb_free_count];
  21.210 +	
  21.211 +	ioidx = IOCB_IDX(prv, io);
  21.212 +	pio = &prv->pending_aio[ioidx];
  21.213 +	pio->cb = cb;
  21.214 +	pio->id = id;
  21.215 +	pio->private = private;
  21.216 +	
  21.217 +	io_prep_pread(io, prv->fd, buf, size, offset);
  21.218 +	io->data = (void *)ioidx;
  21.219 +	
  21.220 +	prv->iocb_queue[prv->iocb_queued++] = io;
  21.221 +	
  21.222 +	return 0;
  21.223 +}
  21.224 +			
  21.225 +int tdaio_queue_write(struct td_state *s, uint64_t sector,
  21.226 +			       int nb_sectors, char *buf, td_callback_t cb,
  21.227 +			       int id, void *private)
  21.228 +{
  21.229 +	struct   iocb *io;
  21.230 +	struct   pending_aio *pio;
  21.231 +	struct   tdaio_state *prv = (struct tdaio_state *)s->private;
  21.232 +	int      size    = nb_sectors * s->sector_size;
  21.233 +	uint64_t offset  = sector * (uint64_t)s->sector_size;
  21.234 +	long     ioidx;
  21.235 +	
  21.236 +	if (prv->iocb_free_count == 0)
  21.237 +		return -ENOMEM;
  21.238 +	io = prv->iocb_free[--prv->iocb_free_count];
  21.239 +	
  21.240 +	ioidx = IOCB_IDX(prv, io);
  21.241 +	pio = &prv->pending_aio[ioidx];
  21.242 +	pio->cb = cb;
  21.243 +	pio->id = id;
  21.244 +	pio->private = private;
  21.245 +	
  21.246 +	io_prep_pwrite(io, prv->fd, buf, size, offset);
  21.247 +	io->data = (void *)ioidx;
  21.248 +	
  21.249 +	prv->iocb_queue[prv->iocb_queued++] = io;
  21.250 +	
  21.251 +	return 0;
  21.252 +}
  21.253 +			
  21.254 +int tdaio_submit(struct td_state *s)
  21.255 +{
  21.256 +	int ret;
  21.257 +	struct   tdaio_state *prv = (struct tdaio_state *)s->private;
  21.258 +
  21.259 +	ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
  21.260 +	
  21.261 +	/* XXX: TODO: Handle error conditions here. */
  21.262 +	
  21.263 +	/* Success case: */
  21.264 +	prv->iocb_queued = 0;
  21.265 +	
  21.266 +	return ret;
  21.267 +}
  21.268 +
  21.269 +int *tdaio_get_fd(struct td_state *s)
  21.270 +{
  21.271 +	struct tdaio_state *prv = (struct tdaio_state *)s->private;
  21.272 +	int *fds, i;
  21.273 +
  21.274 +	fds = malloc(sizeof(int) * MAX_IOFD);
  21.275 +	/*initialise the FD array*/
  21.276 +	for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
  21.277 +
  21.278 +	fds[0] = prv->poll_fd;
  21.279 +
  21.280 +	return fds;	
  21.281 +}
  21.282 +
  21.283 +int tdaio_close(struct td_state *s)
  21.284 +{
  21.285 +	struct tdaio_state *prv = (struct tdaio_state *)s->private;
  21.286 +	
  21.287 +	io_destroy(prv->aio_ctx);
  21.288 +	close(prv->fd);
  21.289 +	
  21.290 +	return 0;
  21.291 +}
  21.292 +
  21.293 +int tdaio_do_callbacks(struct td_state *s, int sid)
  21.294 +{
  21.295 +	int ret, i, rsp = 0;
  21.296 +	struct io_event *ep;
  21.297 +	struct tdaio_state *prv = (struct tdaio_state *)s->private;
  21.298 +
  21.299 +	/* Non-blocking test for completed io. */
  21.300 +	ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
  21.301 +			   NULL);
  21.302 +			
  21.303 +	for (ep=prv->aio_events,i=ret; i-->0; ep++) {
  21.304 +		struct iocb        *io  = ep->obj;
  21.305 +		struct pending_aio *pio;
  21.306 +		
  21.307 +		pio = &prv->pending_aio[(long)io->data];
  21.308 +		
  21.309 +		if (ep->res != io->u.c.nbytes) {
  21.310 +			/* TODO: handle this case better. */
  21.311 +			DPRINTF("AIO did less than I asked it to. \n");
  21.312 +		}
  21.313 +		rsp += pio->cb(s, ep->res2, pio->id, pio->private);
  21.314 +
  21.315 +		prv->iocb_free[prv->iocb_free_count++] = io;
  21.316 +	}
  21.317 +	return rsp;
  21.318 +}
  21.319 +	
  21.320 +struct tap_disk tapdisk_aio = {
  21.321 +	"tapdisk_aio",
  21.322 +	sizeof(struct tdaio_state),
  21.323 +	tdaio_open,
  21.324 +	tdaio_queue_read,
  21.325 +	tdaio_queue_write,
  21.326 +	tdaio_submit,
  21.327 +	tdaio_get_fd,
  21.328 +	tdaio_close,
  21.329 +	tdaio_do_callbacks,
  21.330 +};
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/tools/blktap/drivers/block-qcow.c	Thu Jul 13 10:13:26 2006 +0100
    22.3 @@ -0,0 +1,1369 @@
    22.4 +/* block-qcow.c
    22.5 + *
    22.6 + * Asynchronous Qemu copy-on-write disk implementation.
    22.7 + * Code based on the Qemu implementation
    22.8 + * (see copyright notice below)
    22.9 + *
   22.10 + * (c) 2006 Andrew Warfield and Julian Chesterfield
   22.11 + *
   22.12 + */
   22.13 +
   22.14 +/*
   22.15 + * Block driver for the QCOW format
   22.16 + * 
   22.17 + * Copyright (c) 2004 Fabrice Bellard
   22.18 + * 
   22.19 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   22.20 + * of this software and associated documentation files(the "Software"), to deal
   22.21 + * in the Software without restriction, including without limitation the rights
   22.22 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   22.23 + * copies of the Software, and to permit persons to whom the Software is
   22.24 + * furnished to do so, subject to the following conditions:
   22.25 + */
   22.26 +
   22.27 +#include <errno.h>
   22.28 +#include <fcntl.h>
   22.29 +#include <stdio.h>
   22.30 +#include <stdlib.h>
   22.31 +#include <unistd.h>
   22.32 +#include <sys/statvfs.h>
   22.33 +#include <sys/stat.h>
   22.34 +#include <sys/ioctl.h>
   22.35 +#include <linux/fs.h>
   22.36 +#include <string.h>
   22.37 +#include <zlib.h>
   22.38 +#include <inttypes.h>
   22.39 +#include <libaio.h>
   22.40 +#include <openssl/md5.h>
   22.41 +#include "bswap.h"
   22.42 +#include "aes.h"
   22.43 +#include "tapdisk.h"
   22.44 +
   22.45 +#if 1
   22.46 +#define ASSERT(_p) \
   22.47 +    if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
   22.48 +    __LINE__, __FILE__); *(int*)0=0; }
   22.49 +#else
   22.50 +#define ASSERT(_p) ((void)0)
   22.51 +#endif
   22.52 +
   22.53 +
   22.54 +/******AIO DEFINES******/
   22.55 +#define REQUEST_ASYNC_FD 1
   22.56 +#define MAX_QCOW_IDS  0xFFFF
   22.57 +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ * 8)
   22.58 +
   22.59 +struct pending_aio {
   22.60 +        td_callback_t cb;
   22.61 +        int id;
   22.62 +        void *private;
   22.63 +	int nb_sectors;
   22.64 +	char *buf;
   22.65 +	uint64_t sector;
   22.66 +	int qcow_idx;
   22.67 +};
   22.68 +
   22.69 +#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
   22.70 +
   22.71 +#define ZERO_TEST(_b) (_b | 0x00)
   22.72 +
   22.73 +/**************************************************************/
   22.74 +/* QEMU COW block driver with compression and encryption support */
   22.75 +
   22.76 +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
   22.77 +#define XEN_MAGIC  (('X' << 24) | ('E' << 16) | ('N' << 8) | 0xfb)
   22.78 +#define QCOW_VERSION 1
   22.79 +
   22.80 +#define QCOW_CRYPT_NONE 0
   22.81 +#define QCOW_CRYPT_AES  1
   22.82 +
   22.83 +#define QCOW_OFLAG_COMPRESSED (1LL << 63)
   22.84 +
   22.85 +#ifndef O_BINARY
   22.86 +#define O_BINARY 0
   22.87 +#endif
   22.88 +
   22.89 +typedef struct QCowHeader {
   22.90 +	uint32_t magic;
   22.91 +	uint32_t version;
   22.92 +	uint64_t backing_file_offset;
   22.93 +	uint32_t backing_file_size;
   22.94 +	uint32_t mtime;
   22.95 +	uint64_t size; /* in bytes */
   22.96 +	uint8_t cluster_bits;
   22.97 +	uint8_t l2_bits;
   22.98 +	uint32_t crypt_method;
   22.99 +	uint64_t l1_table_offset;
  22.100 +} QCowHeader;
  22.101 +
  22.102 +/*Extended header for Xen enhancements*/
  22.103 +typedef struct QCowHeader_ext {
  22.104 +        uint32_t xmagic;
  22.105 +        uint32_t cksum;
  22.106 +        uint32_t min_cluster_alloc;
  22.107 +} QCowHeader_ext;
  22.108 +
  22.109 +#define L2_CACHE_SIZE 16  /*Fixed allocation in Qemu*/
  22.110 +
  22.111 +struct tdqcow_state {
  22.112 +        int fd;                        /*Main Qcow file descriptor */
  22.113 +	uint64_t fd_end;               /*Store a local record of file length */
  22.114 +	int bfd;                       /*Backing file descriptor*/
  22.115 +	char *name;                    /*Record of the filename*/
  22.116 +	int poll_pipe[2];              /*dummy fd for polling on */
  22.117 +	int encrypted;                 /*File contents are encrypted or plain*/
  22.118 +	int cluster_bits;              /*Determines length of cluster as 
  22.119 +					*indicated by file hdr*/
  22.120 +	int cluster_size;              /*Length of cluster*/
  22.121 +	int cluster_sectors;           /*Number of sectors per cluster*/
  22.122 +	int cluster_alloc;             /*Blktap fix for allocating full 
  22.123 +					*extents*/
  22.124 +	int min_cluster_alloc;         /*Blktap historical extent alloc*/
  22.125 +	int l2_bits;                   /*Size of L2 table entry*/
  22.126 +	int l2_size;                   /*Full table size*/
  22.127 +	int l1_size;                   /*L1 table size*/
  22.128 +	uint64_t cluster_offset_mask;    
  22.129 +	uint64_t l1_table_offset;      /*L1 table offset from beginning of 
  22.130 +					*file*/
  22.131 +	uint64_t *l1_table;            /*L1 table entries*/
  22.132 +	uint64_t *l2_cache;            /*We maintain a cache of size 
  22.133 +					*L2_CACHE_SIZE of most read entries*/
  22.134 +	uint64_t l2_cache_offsets[L2_CACHE_SIZE];     /*L2 cache entries*/
  22.135 +	uint32_t l2_cache_counts[L2_CACHE_SIZE];      /*Cache access record*/
  22.136 +	uint8_t *cluster_cache;          
  22.137 +	uint8_t *cluster_data;
  22.138 +	uint8_t *sector_lock;          /*Locking bitmap for AIO reads/writes*/
  22.139 +	uint64_t cluster_cache_offset; /**/
  22.140 +	uint32_t crypt_method;         /*current crypt method, 0 if no 
  22.141 +					*key yet */
  22.142 +	uint32_t crypt_method_header;  /**/
  22.143 +	AES_KEY aes_encrypt_key;       /*AES key*/
  22.144 +	AES_KEY aes_decrypt_key;       /*AES key*/
  22.145 +        /* libaio state */
  22.146 +        io_context_t       aio_ctx;
  22.147 +	int		   nr_reqs [MAX_QCOW_IDS];
  22.148 +        struct iocb        iocb_list  [MAX_AIO_REQS];
  22.149 +        struct iocb       *iocb_free  [MAX_AIO_REQS];
  22.150 +        struct pending_aio pending_aio[MAX_AIO_REQS];
  22.151 +        int                iocb_free_count;
  22.152 +        struct iocb       *iocb_queue[MAX_AIO_REQS];
  22.153 +        int                iocb_queued;
  22.154 +        int                poll_fd;      /* NB: we require aio_poll support */
  22.155 +        struct io_event    aio_events[MAX_AIO_REQS];
  22.156 +};
  22.157 +
  22.158 +static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
  22.159 +
  22.160 +static int init_aio_state(struct td_state *bs)
  22.161 +{
  22.162 +        int i;
  22.163 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
  22.164 +        long     ioidx;
  22.165 +
  22.166 +        /*Initialize Locking bitmap*/
  22.167 +	s->sector_lock = calloc(1, bs->size);
  22.168 +	
  22.169 +	if (!s->sector_lock) {
  22.170 +		DPRINTF("Failed to allocate sector lock\n");
  22.171 +		goto fail;
  22.172 +	}
  22.173 +
  22.174 +        /* Initialize AIO */
  22.175 +        s->iocb_free_count = MAX_AIO_REQS;
  22.176 +        s->iocb_queued     = 0;
  22.177 +
  22.178 +        /*Signal kernel to create Poll FD for Asyc completion events*/
  22.179 +        s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;   
  22.180 +        s->poll_fd = io_setup(MAX_AIO_REQS, &s->aio_ctx);
  22.181 +
  22.182 +        if (s->poll_fd < 0) {
  22.183 +                DPRINTF("Retrieving Async poll fd failed\n");
  22.184 +		goto fail;
  22.185 +        }
  22.186 +
  22.187 +        for (i=0;i<MAX_AIO_REQS;i++)
  22.188 +                s->iocb_free[i] = &s->iocb_list[i];
  22.189 +	for (i=0;i<MAX_QCOW_IDS;i++)
  22.190 +		s->nr_reqs[i] = 0;
  22.191 +        DPRINTF("AIO state initialised\n");
  22.192 +
  22.193 +        return 0;
  22.194 +
  22.195 + fail:
  22.196 +	return -1;
  22.197 +}
  22.198 +
  22.199 +/*
  22.200 + *Test if block is zero. 
  22.201 + * Return: 
  22.202 + *       1 for TRUE
  22.203 + *       0 for FALSE
  22.204 + */
  22.205 +static inline int IS_ZERO(char *buf, int len)
  22.206 +{
  22.207 +	int i;
  22.208 +
  22.209 +	for (i = 0; i < len; i++) {
  22.210 +		/*if not zero, return false*/
  22.211 +		if (ZERO_TEST(*(buf + i))) return 0; 
  22.212 +	}
  22.213 +	return 1;
  22.214 +}
  22.215 +
  22.216 +static uint32_t gen_cksum(char *ptr, int len)
  22.217 +{
  22.218 +	unsigned char *md;
  22.219 +	uint32_t ret;
  22.220 +
  22.221 +	md = malloc(MD5_DIGEST_LENGTH);
  22.222 +
  22.223 +	if(!md) return 0;
  22.224 +
  22.225 +	if (MD5((unsigned char *)ptr, len, md) != md) return 0;
  22.226 +
  22.227 +	memcpy(&ret, md, sizeof(uint32_t));
  22.228 +	free(md);
  22.229 +	return ret;
  22.230 +}
  22.231 +
  22.232 +static int qcow_set_key(struct td_state *bs, const char *key)
  22.233 +{
  22.234 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
  22.235 +	uint8_t keybuf[16];
  22.236 +	int len, i;
  22.237 +	
  22.238 +	memset(keybuf, 0, 16);
  22.239 +	len = strlen(key);
  22.240 +	if (len > 16)
  22.241 +		len = 16;
  22.242 +	/* XXX: we could compress the chars to 7 bits to increase
  22.243 +	   entropy */
  22.244 +	for (i = 0; i < len; i++) {
  22.245 +		keybuf[i] = key[i];
  22.246 +	}
  22.247 +	s->crypt_method = s->crypt_method_header;
  22.248 +	
  22.249 +	if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
  22.250 +		return -1;
  22.251 +	if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
  22.252 +		return -1;
  22.253 +#if 0
  22.254 +	/* test */
  22.255 +	{
  22.256 +		uint8_t in[16];
  22.257 +		uint8_t out[16];
  22.258 +		uint8_t tmp[16];
  22.259 +		for (i=0; i<16; i++)
  22.260 +			in[i] = i;
  22.261 +		AES_encrypt(in, tmp, &s->aes_encrypt_key);
  22.262 +		AES_decrypt(tmp, out, &s->aes_decrypt_key);
  22.263 +		for (i = 0; i < 16; i++)
  22.264 +			DPRINTF(" %02x", tmp[i]);
  22.265 +		DPRINTF("\n");
  22.266 +		for (i = 0; i < 16; i++)
  22.267 +			DPRINTF(" %02x", out[i]);
  22.268 +		DPRINTF("\n");
  22.269 +	}
  22.270 +#endif
  22.271 +	return 0;
  22.272 +}
  22.273 +
  22.274 +static int async_read(struct tdqcow_state *s, int fd, int size, 
  22.275 +		     uint64_t offset,
  22.276 +		     char *buf, td_callback_t cb,
  22.277 +		     int id, uint64_t sector, int qcow_idx, void *private)
  22.278 +{
  22.279 +        struct   iocb *io;
  22.280 +        struct   pending_aio *pio;
  22.281 +	long     ioidx;
  22.282 +
  22.283 +        io = s->iocb_free[--s->iocb_free_count];
  22.284 +
  22.285 +        ioidx = IOCB_IDX(s, io);
  22.286 +        pio = &s->pending_aio[ioidx];
  22.287 +        pio->cb = cb;
  22.288 +        pio->id = id;
  22.289 +        pio->private = private;
  22.290 +	pio->nb_sectors = size/512;
  22.291 +	pio->buf = buf;
  22.292 +	pio->sector = sector;
  22.293 +	pio->qcow_idx = qcow_idx;
  22.294 +
  22.295 +        io_prep_pread(io, fd, buf, size, offset);
  22.296 +        io->data = (void *)ioidx;
  22.297 +
  22.298 +        s->iocb_queue[s->iocb_queued++] = io;
  22.299 +
  22.300 +        return 1;
  22.301 +}
  22.302 +
  22.303 +static int async_write(struct tdqcow_state *s, int fd, int size, 
  22.304 +		     uint64_t offset,
  22.305 +		     char *buf, td_callback_t cb,
  22.306 +		      int id, uint64_t sector, int qcow_idx, void *private)
  22.307 +{
  22.308 +        struct   iocb *io;
  22.309 +        struct   pending_aio *pio;
  22.310 +	long     ioidx;
  22.311 +
  22.312 +        io = s->iocb_free[--s->iocb_free_count];
  22.313 +
  22.314 +        ioidx = IOCB_IDX(s, io);
  22.315 +        pio = &s->pending_aio[ioidx];
  22.316 +        pio->cb = cb;
  22.317 +        pio->id = id;
  22.318 +        pio->private = private;
  22.319 +	pio->nb_sectors = size/512;
  22.320 +	pio->buf = buf;
  22.321 +	pio->sector = sector;
  22.322 +	pio->qcow_idx = qcow_idx;
  22.323 +
  22.324 +        io_prep_pwrite(io, fd, buf, size, offset);
  22.325 +        io->data = (void *)ioidx;
  22.326 +
  22.327 +        s->iocb_queue[s->iocb_queued++] = io;
  22.328 +
  22.329 +        return 1;
  22.330 +}
  22.331 +
  22.332 +/*TODO: Fix sector span!*/
  22.333 +static int aio_can_lock(struct tdqcow_state *s, uint64_t sector)
  22.334 +{
  22.335 +	return (s->sector_lock[sector] ? 0 : 1);
  22.336 +}
  22.337 +
  22.338 +static int aio_lock(struct tdqcow_state *s, uint64_t sector)
  22.339 +{
  22.340 +	return ++s->sector_lock[sector];
  22.341 +}
  22.342 +
  22.343 +static void aio_unlock(struct tdqcow_state *s, uint64_t sector)
  22.344 +{
  22.345 +	if (!s->sector_lock[sector]) return;
  22.346 +
  22.347 +	--s->sector_lock[sector];
  22.348 +	return;
  22.349 +}
  22.350 +
  22.351 +/*TODO - Use a freelist*/
  22.352 +static int get_free_idx(struct tdqcow_state *s)
  22.353 +{
  22.354 +	int i;
  22.355 +	
  22.356 +	for(i = 0; i < MAX_QCOW_IDS; i++) {
  22.357 +		if(s->nr_reqs[i] == 0) return i;
  22.358 +	}
  22.359 +	return -1;
  22.360 +}
  22.361 +
  22.362 +/* 
  22.363 + * The crypt function is compatible with the linux cryptoloop
  22.364 + * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
  22.365 + * supported .
  22.366 + */
  22.367 +static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
  22.368 +                            uint8_t *out_buf, const uint8_t *in_buf,
  22.369 +                            int nb_sectors, int enc,
  22.370 +                            const AES_KEY *key)
  22.371 +{
  22.372 +	union {
  22.373 +		uint64_t ll[2];
  22.374 +		uint8_t b[16];
  22.375 +	} ivec;
  22.376 +	int i;
  22.377 +	
  22.378 +	for (i = 0; i < nb_sectors; i++) {
  22.379 +		ivec.ll[0] = cpu_to_le64(sector_num);
  22.380 +		ivec.ll[1] = 0;
  22.381 +		AES_cbc_encrypt(in_buf, out_buf, 512, key, 
  22.382 +				ivec.b, enc);
  22.383 +		sector_num++;
  22.384 +		in_buf += 512;
  22.385 +		out_buf += 512;
  22.386 +	}
  22.387 +}
  22.388 +
  22.389 +
  22.390 +/* 'allocate' is:
  22.391 + *
  22.392 + * 0 to not allocate.
  22.393 + *
  22.394 + * 1 to allocate a normal cluster (for sector indexes 'n_start' to
  22.395 + * 'n_end')
  22.396 + *
  22.397 + * 2 to allocate a compressed cluster of size
  22.398 + * 'compressed_size'. 'compressed_size' must be > 0 and <
  22.399 + * cluster_size 
  22.400 + *
  22.401 + * return 0 if not allocated.
  22.402 + */
  22.403 +static uint64_t get_cluster_offset(struct td_state *bs,
  22.404 +                                   uint64_t offset, int allocate,
  22.405 +                                   int compressed_size,
  22.406 +                                   int n_start, int n_end)
  22.407 +{
  22.408 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
  22.409 +	int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
  22.410 +	char *tmp_ptr, *tmp_ptr2, *l2_ptr, *l1_ptr;
  22.411 +	uint64_t l2_offset, *l2_table, cluster_offset, tmp;
  22.412 +	uint32_t min_count;
  22.413 +	int new_l2_table;
  22.414 +
  22.415 +	/*Check L1 table for the extent offset*/
  22.416 +	l1_index = offset >> (s->l2_bits + s->cluster_bits);
  22.417 +	l2_offset = s->l1_table[l1_index];
  22.418 +	new_l2_table = 0;
  22.419 +	if (!l2_offset) {
  22.420 +		if (!allocate)
  22.421 +			return 0;
  22.422 +		/* 
  22.423 +		 * allocating a new l2 entry + extent 
  22.424 +		 * at the end of the file, we must also
  22.425 +		 * update the L1 entry safely.
  22.426 +		 */
  22.427 +		l2_offset = s->fd_end;
  22.428 +
  22.429 +		/* round to cluster size */
  22.430 +		l2_offset = (l2_offset + s->cluster_size - 1) 
  22.431 +			& ~(s->cluster_size - 1);
  22.432 +
  22.433 +		/* update the L1 entry */
  22.434 +		s->l1_table[l1_index] = l2_offset;
  22.435 +		tmp = cpu_to_be64(l2_offset);
  22.436 +		
  22.437 +		/*Truncate file for L2 table 
  22.438 +		 *(initialised to zero in case we crash)*/
  22.439 +		ftruncate(s->fd, l2_offset + (s->l2_size * sizeof(uint64_t)));
  22.440 +		s->fd_end += (s->l2_size * sizeof(uint64_t));
  22.441 +
  22.442 +		/*Update the L1 table entry on disk
  22.443 +                 * (for O_DIRECT we write 4KByte blocks)*/
  22.444 +		l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
  22.445 +		l1_ptr = (char *)s->l1_table + (l1_sector << 12);
  22.446 +
  22.447 +		if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
  22.448 +			DPRINTF("ERROR allocating memory for L1 table\n");
  22.449 +		}
  22.450 +		memcpy(tmp_ptr, l1_ptr, 4096);
  22.451 +
  22.452 +		/*
  22.453 +		 * Issue non-asynchronous L1 write.
  22.454 +		 * For safety, we must ensure that
  22.455 +		 * entry is written before blocks.
  22.456 +		 */
  22.457 +		lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
  22.458 +		if (write(s->fd, tmp_ptr, 4096) != 4096)
  22.459 +			return 0;
  22.460 +		free(tmp_ptr);
  22.461 +
  22.462 +		new_l2_table = 1;
  22.463 +		goto cache_miss;
  22.464 +	} else if (s->min_cluster_alloc == s->l2_size) {
  22.465 +		/*Fast-track the request*/
  22.466 +		cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
  22.467 +		l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
  22.468 +		return cluster_offset + (l2_index * s->cluster_size);
  22.469 +	}
  22.470 +
  22.471 +	/*Check to see if L2 entry is already cached*/
  22.472 +	for (i = 0; i < L2_CACHE_SIZE; i++) {
  22.473 +		if (l2_offset == s->l2_cache_offsets[i]) {
  22.474 +			/* increment the hit count */
  22.475 +			if (++s->l2_cache_counts[i] == 0xffffffff) {
  22.476 +				for (j = 0; j < L2_CACHE_SIZE; j++) {
  22.477 +					s->l2_cache_counts[j] >>= 1;
  22.478 +				}
  22.479 +			}
  22.480 +			l2_table = s->l2_cache + (i << s->l2_bits);
  22.481 +			goto found;
  22.482 +		}
  22.483 +	}
  22.484 +
  22.485 +cache_miss:
  22.486 +	/* not found: load a new entry in the least used one */
  22.487 +	min_index = 0;
  22.488 +	min_count = 0xffffffff;
  22.489 +	for (i = 0; i < L2_CACHE_SIZE; i++) {
  22.490 +		if (s->l2_cache_counts[i] < min_count) {
  22.491 +			min_count = s->l2_cache_counts[i];
  22.492 +			min_index = i;
  22.493 +		}
  22.494 +	}
  22.495 +	l2_table = s->l2_cache + (min_index << s->l2_bits);
  22.496 +
  22.497 +	/*If extent pre-allocated, read table from disk, 
  22.498 +	 *otherwise write new table to disk*/
  22.499 +	if (new_l2_table) {
  22.500 +		/*Should we allocate the whole extent? Adjustable parameter.*/
  22.501 +		if (s->cluster_alloc == s->l2_size) {
  22.502 +			cluster_offset = l2_offset + 
  22.503 +				(s->l2_size * sizeof(uint64_t));
  22.504 +			cluster_offset = (cluster_offset + s->cluster_size - 1)
  22.505 +				& ~(s->cluster_size - 1);
  22.506 +			ftruncate(s->fd, cluster_offset + 
  22.507 +				  (s->cluster_size * s->l2_size));
  22.508 +			s->fd_end = cluster_offset + 
  22.509 +				(s->cluster_size * s->l2_size);
  22.510 +			for (i = 0; i < s->l2_size; i++) {
  22.511 +				l2_table[i] = cpu_to_be64(cluster_offset + 
  22.512 +							  (i*s->cluster_size));
  22.513 +			}  
  22.514 +		} else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
  22.515 +
  22.516 +		lseek(s->fd, l2_offset, SEEK_SET);
  22.517 +		if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
  22.518 +		    s->l2_size * sizeof(uint64_t))
  22.519 +			return 0;
  22.520 +	} else {
  22.521 +		lseek(s->fd, l2_offset, SEEK_SET);
  22.522 +		if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) != 
  22.523 +		    s->l2_size * sizeof(uint64_t))
  22.524 +			return 0;
  22.525 +	}
  22.526 +	
  22.527 +	/*Update the cache entries*/ 
  22.528 +	s->l2_cache_offsets[min_index] = l2_offset;
  22.529 +	s->l2_cache_counts[min_index] = 1;
  22.530 +
  22.531 +found:
  22.532 +	/*The extent is split into 's->l2_size' blocks of 
  22.533 +	 *size 's->cluster_size'*/
  22.534 +	l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
  22.535 +	cluster_offset = be64_to_cpu(l2_table[l2_index]);
  22.536 +
  22.537 +	if (!cluster_offset || 
  22.538 +	    ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
  22.539 +		if (!allocate)
  22.540 +			return 0;
  22.541 +		
  22.542 +		if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
  22.543 +		    (n_end - n_start) < s->cluster_sectors) {
  22.544 +			/* cluster is already allocated but compressed, we must
  22.545 +			   decompress it in the case it is not completely
  22.546 +			   overwritten */
  22.547 +			if (decompress_cluster(s, cluster_offset) < 0)
  22.548 +				return 0;
  22.549 +			cluster_offset = lseek(s->fd, 0, SEEK_END);
  22.550 +			cluster_offset = (cluster_offset + s->cluster_size - 1)
  22.551 +				& ~(s->cluster_size - 1);
  22.552 +			/* write the cluster content - not asynchronous */
  22.553 +			lseek(s->fd, cluster_offset, SEEK_SET);
  22.554 +			if (write(s->fd, s->cluster_cache, s->cluster_size) != 
  22.555 +			    s->cluster_size)
  22.556 +			    return -1;
  22.557 +		} else {
  22.558 +			/* allocate a new cluster */
  22.559 +			cluster_offset = lseek(s->fd, 0, SEEK_END);
  22.560 +			if (allocate == 1) {
  22.561 +				/* round to cluster size */
  22.562 +				cluster_offset = 
  22.563 +					(cluster_offset + s->cluster_size - 1) 
  22.564 +					& ~(s->cluster_size - 1);
  22.565 +				ftruncate(s->fd, cluster_offset + 
  22.566 +					  s->cluster_size);
  22.567 +				/* if encrypted, we must initialize the cluster
  22.568 +				   content which won't be written */
  22.569 +				if (s->crypt_method && 
  22.570 +				    (n_end - n_start) < s->cluster_sectors) {
  22.571 +					uint64_t start_sect;
  22.572 +					start_sect = (offset & 
  22.573 +						      ~(s->cluster_size - 1)) 
  22.574 +							      >> 9;
  22.575 +					memset(s->cluster_data + 512, 
  22.576 +					       0xaa, 512);
  22.577 +					for (i = 0; i < s->cluster_sectors;i++)
  22.578 +					{
  22.579 +						if (i < n_start || i >= n_end) 
  22.580 +						{
  22.581 +							encrypt_sectors(s, start_sect + i, 
  22.582 +									s->cluster_data, 
  22.583 +									s->cluster_data + 512, 1, 1,
  22.584 +									&s->aes_encrypt_key);
  22.585 +							lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
  22.586 +							if (write(s->fd, s->cluster_data, 512) != 512)
  22.587 +								return -1;
  22.588 +						}
  22.589 +					}
  22.590 +				}
  22.591 +			} else {
  22.592 +				cluster_offset |= QCOW_OFLAG_COMPRESSED | 
  22.593 +					(uint64_t)compressed_size 
  22.594 +						<< (63 - s->cluster_bits);
  22.595 +			}
  22.596 +		}
  22.597 +		/* update L2 table */
  22.598 +		tmp = cpu_to_be64(cluster_offset);
  22.599 +		l2_table[l2_index] = tmp;
  22.600 +
  22.601 +		/*For IO_DIRECT we write 4KByte blocks*/
  22.602 +		l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
  22.603 +		l2_ptr = (char *)l2_table + (l2_sector << 12);
  22.604 +		
  22.605 +		if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
  22.606 +			DPRINTF("ERROR allocating memory for L1 table\n");
  22.607 +		}
  22.608 +		memcpy(tmp_ptr2, l2_ptr, 4096);
  22.609 +		aio_lock(s, offset >> 9);
  22.610 +		async_write(s, s->fd, 4096, l2_offset + (l2_sector << 12), 
  22.611 +			    tmp_ptr2, 0, -2, offset >> 9, 0, NULL);
  22.612 +	}
  22.613 +	return cluster_offset;
  22.614 +}
  22.615 +
  22.616 +static void init_cluster_cache(struct td_state *bs)
  22.617 +{
  22.618 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
  22.619 +	uint32_t count = 0;
  22.620 +	int i, cluster_entries;
  22.621 +
  22.622 +	cluster_entries = s->cluster_size / 512;
  22.623 +	DPRINTF("Initialising Cluster cache, %d sectors per cluster (%d cluster size)\n",
  22.624 +		cluster_entries, s->cluster_size);
  22.625 +
  22.626 +	for (i = 0; i < bs->size; i += cluster_entries) {
  22.627 +		if (get_cluster_offset(bs, i << 9, 0, 0, 0, 1)) count++;
  22.628 +		if (count >= L2_CACHE_SIZE) return;
  22.629 +	}
  22.630 +	DPRINTF("Finished cluster initialisation, added %d entries\n", count);
  22.631 +	return;
  22.632 +}
  22.633 +
  22.634 +static int qcow_is_allocated(struct td_state *bs, int64_t sector_num, 
  22.635 +                             int nb_sectors, int *pnum)
  22.636 +{
  22.637 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
  22.638 +
  22.639 +	int index_in_cluster, n;
  22.640 +	uint64_t cluster_offset;
  22.641 +
  22.642 +	cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
  22.643 +	index_in_cluster = sector_num & (s->cluster_sectors - 1);
  22.644 +	n = s->cluster_sectors - index_in_cluster;
  22.645 +	if (n > nb_sectors)
  22.646 +		n = nb_sectors;
  22.647 +	*pnum = n;
  22.648 +	return (cluster_offset != 0);
  22.649 +}
  22.650 +
  22.651 +static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
  22.652 +                             const uint8_t *buf, int buf_size)
  22.653 +{
  22.654 +	z_stream strm1, *strm = &strm1;
  22.655 +	int ret, out_len;
  22.656 +	
  22.657 +	memset(strm, 0, sizeof(*strm));
  22.658 +	
  22.659 +	strm->next_in = (uint8_t *)buf;
  22.660 +	strm->avail_in = buf_size;
  22.661 +	strm->next_out = out_buf;
  22.662 +	strm->avail_out = out_buf_size;
  22.663 +	
  22.664 +	ret = inflateInit2(strm, -12);
  22.665 +	if (ret != Z_OK)
  22.666 +		return -1;
  22.667 +	ret = inflate(strm, Z_FINISH);
  22.668 +	out_len = strm->next_out - out_buf;
  22.669 +	if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
  22.670 +	    (out_len != out_buf_size) ) {
  22.671 +		inflateEnd(strm);
  22.672 +		return -1;
  22.673 +	}
  22.674 +	inflateEnd(strm);
  22.675 +	return 0;
  22.676 +}
  22.677 +                              
  22.678 +static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
  22.679 +{
  22.680 +	int ret, csize;
  22.681 +	uint64_t coffset;
  22.682 +
  22.683 +	coffset = cluster_offset & s->cluster_offset_mask;
  22.684 +	if (s->cluster_cache_offset != coffset) {
  22.685 +		csize = cluster_offset >> (63 - s->cluster_bits);
  22.686 +		csize &= (s->cluster_size - 1);
  22.687 +		lseek(s->fd, coffset, SEEK_SET);
  22.688 +		ret = read(s->fd, s->cluster_data, csize);
  22.689 +		if (ret != csize) 
  22.690 +			return -1;
  22.691 +		if (decompress_buffer(s->cluster_cache, s->cluster_size,
  22.692 +				      s->cluster_data, csize) < 0) {
  22.693 +			return -1;
  22.694 +		}
  22.695 +		s->cluster_cache_offset = coffset;
  22.696 +	}
  22.697 +	return 0;
  22.698 +}
  22.699 +
  22.700 +/* Open the disk file and initialize qcow state. */
  22.701 +int tdqcow_open (struct td_state *bs, const char *name)
  22.702 +{
  22.703 +	int fd, len, i, shift, ret, size, l1_table_size;
  22.704 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
  22.705 +	char *buf;
  22.706 +	QCowHeader *header;
  22.707 +	QCowHeader_ext *exthdr;
  22.708 +	uint32_t cksum;
  22.709 +
  22.710 + 	DPRINTF("QCOW: Opening %s\n",name);
  22.711 +	/* set up a pipe so that we can hand back a poll fd that won't fire.*/
  22.712 +	ret = pipe(s->poll_pipe);
  22.713 +	if (ret != 0)
  22.714 +		return (0 - errno);
  22.715 +
  22.716 +	fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
  22.717 +	if (fd < 0) {
  22.718 +		DPRINTF("Unable to open %s (%d)\n",name,0 - errno);
  22.719 +		return -1;
  22.720 +	}
  22.721 +
  22.722 +	s->fd = fd;
  22.723 +	asprintf(&s->name,"%s", name);
  22.724 +
  22.725 +	ASSERT(sizeof(header) < 512);
  22.726 +
  22.727 +	ret = posix_memalign((void **)&buf, 512, 512);
  22.728 +	if (ret != 0) goto fail;
  22.729 +
  22.730 +	if (read(fd, buf, 512) != 512)
  22.731 +		goto fail;
  22.732 +
  22.733 +	header = (QCowHeader *)buf;
  22.734 +	be32_to_cpus(&header->magic);
  22.735 +	be32_to_cpus(&header->version);
  22.736 +	be64_to_cpus(&header->backing_file_offset);
  22.737 +	be32_to_cpus(&header->backing_file_size);
  22.738 +	be32_to_cpus(&header->mtime);
  22.739 +	be64_to_cpus(&header->size);
  22.740 +	be32_to_cpus(&header->crypt_method);
  22.741 +	be64_to_cpus(&header->l1_table_offset);
  22.742 +   
  22.743 +	if (header->magic != QCOW_MAGIC || header->version > QCOW_VERSION)
  22.744 +		goto fail;
  22.745 +	if (header->size <= 1 || header->cluster_bits < 9)
  22.746 +		goto fail;
  22.747 +	if (header->crypt_method > QCOW_CRYPT_AES)
  22.748 +		goto fail;
  22.749 +	s->crypt_method_header = header->crypt_method;
  22.750 +	if (s->crypt_method_header)
  22.751 +		s->encrypted = 1;
  22.752 +	s->cluster_bits = header->cluster_bits;
  22.753 +	s->cluster_size = 1 << s->cluster_bits;
  22.754 +	s->cluster_sectors = 1 << (s->cluster_bits - 9);
  22.755 +	s->l2_bits = header->l2_bits;
  22.756 +	s->l2_size = 1 << s->l2_bits;
  22.757 +	s->cluster_alloc = s->l2_size;
  22.758 +	bs->size = header->size / 512;
  22.759 +	s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
  22.760 +	
  22.761 +	/* read the level 1 table */
  22.762 +	shift = s->cluster_bits + s->l2_bits;
  22.763 +	s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
  22.764 +	
  22.765 +	s->l1_table_offset = header->l1_table_offset;
  22.766 +
  22.767 +	/*allocate a 4Kbyte multiple of memory*/
  22.768 +	l1_table_size = s->l1_size * sizeof(uint64_t);
  22.769 +	if (l1_table_size % 4096 > 0) {
  22.770 +		l1_table_size = ((l1_table_size >> 12) + 1) << 12;
  22.771 +	}
  22.772 +	ret = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
  22.773 +	if (ret != 0) goto fail;
  22.774 +	memset(s->l1_table, 0x00, l1_table_size);
  22.775 +
  22.776 +	DPRINTF("L1 Table offset detected: %llu, size %d (%d)\n",
  22.777 +		(long long)s->l1_table_offset,
  22.778 +		(int) (s->l1_size * sizeof(uint64_t)), 
  22.779 +		l1_table_size);
  22.780 +
  22.781 +	lseek(fd, s->l1_table_offset, SEEK_SET);
  22.782 +	if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
  22.783 +		goto fail;
  22.784 +/*	for(i = 0;i < s->l1_size; i++) {
  22.785 +		//be64_to_cpus(&s->l1_table[i]);
  22.786 +		DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]);
  22.787 +		}*/
  22.788 +
  22.789 +	/* alloc L2 cache */
  22.790 +	size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
  22.791 +	ret = posix_memalign((void **)&s->l2_cache, 4096, size);
  22.792 +	if(ret != 0) goto fail;
  22.793 +
  22.794 +	size = s->cluster_size;
  22.795 +	ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
  22.796 +	if(ret != 0) goto fail;
  22.797 +
  22.798 +	ret = posix_memalign((void **)&s->cluster_data, 4096, size);
  22.799 +	if(ret != 0) goto fail;
  22.800 +	s->cluster_cache_offset = -1;
  22.801 +
  22.802 +	/* read the backing file name */
  22.803 +	s->bfd = -1;
  22.804 +	if (header->backing_file_offset != 0) {
  22.805 +		DPRINTF("Reading backing file data\n");
  22.806 +		len = header->backing_file_size;
  22.807 +		if (len > 1023)
  22.808 +			len = 1023;
  22.809 +
  22.810 +                /*TODO - Fix read size for O_DIRECT and use original fd!*/
  22.811 +		fd = open(name, O_RDONLY | O_LARGEFILE);
  22.812 +
  22.813 +		lseek(fd, header->backing_file_offset, SEEK_SET);
  22.814 +		if (read(fd, bs->backing_file, len) != len)
  22.815 +			goto fail;
  22.816 +		bs->backing_file[len] = '\0';
  22.817 +		close(fd);
  22.818 +		/***********************************/
  22.819 +
  22.820 +		/*Open backing file*/
  22.821 +		fd = open(bs->backing_file, O_RDONLY | O_DIRECT | O_LARGEFILE);
  22.822 +		if (fd < 0) {
  22.823 +			DPRINTF("Unable to open backing file: %s\n",
  22.824 +				bs->backing_file);
  22.825 +			goto fail;
  22.826 +		}
  22.827 +		s->bfd = fd;
  22.828 +		s->cluster_alloc = 1; /*Cannot use pre-alloc*/
  22.829 +	}
  22.830 +
  22.831 +        bs->sector_size = 512;
  22.832 +        bs->info = 0;
  22.833 +	
  22.834 +	/*Detect min_cluster_alloc*/
  22.835 +	s->min_cluster_alloc = 1; /*Default*/
  22.836 +	if (s->bfd == -1 && (s->l1_table_offset % 4096 == 0) ) {
  22.837 +		/*We test to see if the xen magic # exists*/
  22.838 +		exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
  22.839 +		be32_to_cpus(&exthdr->xmagic);
  22.840 +		if(exthdr->xmagic != XEN_MAGIC) 
  22.841 +			goto end_xenhdr;
  22.842 +
  22.843 +		/*Finally check the L1 table cksum*/
  22.844 +		be32_to_cpus(&exthdr->cksum);
  22.845 +		cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
  22.846 +		if(exthdr->cksum != cksum)
  22.847 +			goto end_xenhdr;
  22.848 +			
  22.849 +		be32_to_cpus(&exthdr->min_cluster_alloc);
  22.850 +		s->min_cluster_alloc = exthdr->min_cluster_alloc; 
  22.851 +	}
  22.852 +
  22.853 + end_xenhdr:
  22.854 +	if (init_aio_state(bs)!=0) {
  22.855 +		DPRINTF("Unable to initialise AIO state\n");
  22.856 +		goto fail;
  22.857 +	}
  22.858 +	s->fd_end = lseek(s->fd, 0, SEEK_END);
  22.859 +
  22.860 +	return 0;
  22.861 +	
  22.862 +fail:
  22.863 +	DPRINTF("QCOW Open failed\n");
  22.864 +	free(s->l1_table);
  22.865 +	free(s->l2_cache);
  22.866 +	free(s->cluster_cache);
  22.867 +	free(s->cluster_data);
  22.868 +	close(fd);
  22.869 +	return -1;
  22.870 +}
  22.871 +
  22.872 + int tdqcow_queue_read(struct td_state *bs, uint64_t sector,
  22.873 +			       int nb_sectors, char *buf, td_callback_t cb,
  22.874 +			       int id, void *private)
  22.875 +{
  22.876 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
  22.877 +	int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
  22.878 +	uint64_t cluster_offset;
  22.879 +
  22.880 +	/*Check we can get a lock*/
  22.881 +	for (i = 0; i < nb_sectors; i++)
  22.882 +		if (!aio_can_lock(s, sector + i)) {
  22.883 +			DPRINTF("AIO_CAN_LOCK failed [%llu]\n", 
  22.884 +				(long long) sector + i);
  22.885 +			return -EBUSY;
  22.886 +		}
  22.887 +	
  22.888 +	/*We store a local record of the request*/
  22.889 +	qcow_idx = get_free_idx(s);
  22.890 +	while (nb_sectors > 0) {
  22.891 +		cluster_offset = 
  22.892 +			get_cluster_offset(bs, sector << 9, 0, 0, 0, 0);
  22.893 +		index_in_cluster = sector & (s->cluster_sectors - 1);
  22.894 +		n = s->cluster_sectors - index_in_cluster;
  22.895 +		if (n > nb_sectors)
  22.896 +			n = nb_sectors;
  22.897 +
  22.898 +		if (s->iocb_free_count == 0 || !aio_lock(s, sector)) {
  22.899 +			DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" 
  22.900 +				"[%llu]\n", s->iocb_free_count, 
  22.901 +				(long long) sector);
  22.902 +			return -ENOMEM;
  22.903 +		}
  22.904 +		
  22.905 +		if (!cluster_offset && (s->bfd > 0)) {
  22.906 +			s->nr_reqs[qcow_idx]++;
  22.907 +			asubmit += async_read(s, s->bfd, n * 512, sector << 9, 
  22.908 +					      buf, cb, id, sector, 
  22.909 +					      qcow_idx, private);
  22.910 +		} else if(!cluster_offset) {
  22.911 +			memset(buf, 0, 512 * n);
  22.912 +			aio_unlock(s, sector);
  22.913 +		} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
  22.914 +			if (decompress_cluster(s, cluster_offset) < 0) {
  22.915 +				ret = -1;
  22.916 +				goto done;
  22.917 +			}
  22.918 +			memcpy(buf, s->cluster_cache + index_in_cluster * 512, 
  22.919 +			       512 * n);
  22.920 +		} else {			
  22.921 +			s->nr_reqs[qcow_idx]++;
  22.922 +			asubmit += async_read(s, s->fd, n * 512, 
  22.923 +					      (cluster_offset + 
  22.924 +					       index_in_cluster * 512), 
  22.925 +					      buf, cb, id, sector, 
  22.926 +					      qcow_idx, private);
  22.927 +		}
  22.928 +		nb_sectors -= n;
  22.929 +		sector += n;
  22.930 +		buf += n * 512;
  22.931 +	}
  22.932 +done:
  22.933 +        /*Callback if no async requests outstanding*/
  22.934 +        if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
  22.935 +
  22.936 +	return 0;
  22.937 +}
  22.938 +
  22.939 + int tdqcow_queue_write(struct td_state *bs, uint64_t sector,
  22.940 +			       int nb_sectors, char *buf, td_callback_t cb,
  22.941 +			       int id, void *private)
  22.942 +{
  22.943 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
  22.944 +	int ret = 0, index_in_cluster, n, i, qcow_idx, asubmit = 0;
  22.945 +	uint64_t cluster_offset;
  22.946 +
  22.947 +	/*Check we can get a lock*/
  22.948 +	for (i = 0; i < nb_sectors; i++)
  22.949 +		if (!aio_can_lock(s, sector + i))  {
  22.950 +			DPRINTF("AIO_CAN_LOCK failed [%llu]\n", 
  22.951 +				(long long) (sector + i));
  22.952 +			return -EBUSY;
  22.953 +		}
  22.954 +		   
  22.955 +	/*We store a local record of the request*/
  22.956 +	qcow_idx = get_free_idx(s);	
  22.957 +	while (nb_sectors > 0) {
  22.958 +		index_in_cluster = sector & (s->cluster_sectors - 1);
  22.959 +		n = s->cluster_sectors - index_in_cluster;
  22.960 +		if (n > nb_sectors)
  22.961 +			n = nb_sectors;
  22.962 +
  22.963 +		if (s->iocb_free_count == 0 || !aio_lock(s, sector)){
  22.964 +			DPRINTF("AIO_LOCK or iocb_free_count (%d) failed" 
  22.965 +				"[%llu]\n", s->iocb_free_count, 
  22.966 +				(long long) sector);
  22.967 +			return -ENOMEM;
  22.968 +		}
  22.969 +
  22.970 +		if (!IS_ZERO(buf,n * 512)) {
  22.971 +
  22.972 +			cluster_offset = get_cluster_offset(bs, sector << 9, 
  22.973 +							    1, 0, 
  22.974 +							    index_in_cluster, 
  22.975 +							    index_in_cluster+n
  22.976 +				);
  22.977 +			if (!cluster_offset) {
  22.978 +				DPRINTF("Ooops, no write cluster offset!\n");
  22.979 +				ret = -1;
  22.980 +				goto done;
  22.981 +			}
  22.982 +
  22.983 +			if (s->crypt_method) {
  22.984 +				encrypt_sectors(s, sector, s->cluster_data, 
  22.985 +						(unsigned char *)buf, n, 1,
  22.986 +						&s->aes_encrypt_key);
  22.987 +				s->nr_reqs[qcow_idx]++;
  22.988 +				asubmit += async_write(s, s->fd, n * 512, 
  22.989 +						       (cluster_offset + 
  22.990 +							index_in_cluster*512), 
  22.991 +						       (char *)s->cluster_data,
  22.992 +						       cb, id, sector, 
  22.993 +						       qcow_idx, private);
  22.994 +			} else {
  22.995 +				s->nr_reqs[qcow_idx]++;
  22.996 +				asubmit += async_write(s, s->fd, n * 512, 
  22.997 +						       (cluster_offset + 
  22.998 +							index_in_cluster*512),
  22.999 +						       buf, cb, id, sector, 
 22.1000 +						       qcow_idx, private);
 22.1001 +			}
 22.1002 +		} else {
 22.1003 +			/*Write data contains zeros, but we must check to see 
 22.1004 +			  if cluster already allocated*/
 22.1005 +			cluster_offset = get_cluster_offset(bs, sector << 9, 
 22.1006 +							    0, 0, 
 22.1007 +							    index_in_cluster, 
 22.1008 +							    index_in_cluster+n
 22.1009 +				);	
 22.1010 +			if(cluster_offset) {
 22.1011 +				if (s->crypt_method) {
 22.1012 +					encrypt_sectors(s, sector, 
 22.1013 +							s->cluster_data, 
 22.1014 +							(unsigned char *)buf, 
 22.1015 +							n, 1,
 22.1016 +							&s->aes_encrypt_key);
 22.1017 +					s->nr_reqs[qcow_idx]++;
 22.1018 +					asubmit += async_write(s, s->fd, 
 22.1019 +							       n * 512, 
 22.1020 +							       (cluster_offset+
 22.1021 +								index_in_cluster * 512), 
 22.1022 +							       (char *)s->cluster_data, cb, id, sector, 
 22.1023 +							       qcow_idx, private);
 22.1024 +				} else {
 22.1025 +					s->nr_reqs[qcow_idx]++;
 22.1026 +					asubmit += async_write(s, s->fd, n*512,
 22.1027 +							       cluster_offset + index_in_cluster * 512, 
 22.1028 +							       buf, cb, id, sector, 
 22.1029 +							       qcow_idx, private);
 22.1030 +				}
 22.1031 +			}
 22.1032 +			else aio_unlock(s, sector);
 22.1033 +		}
 22.1034 +		nb_sectors -= n;
 22.1035 +		sector += n;
 22.1036 +		buf += n * 512;
 22.1037 +	}
 22.1038 +	s->cluster_cache_offset = -1; /* disable compressed cache */
 22.1039 +
 22.1040 +done:
 22.1041 +	/*Callback if no async requests outstanding*/
 22.1042 +        if (!asubmit) return cb(bs, ret == -1 ? -1 : 0, id, private);
 22.1043 +
 22.1044 +	return 0;
 22.1045 +}
 22.1046 + 		
 22.1047 +int tdqcow_submit(struct td_state *bs)
 22.1048 +{
 22.1049 +        int ret;
 22.1050 +        struct   tdqcow_state *prv = (struct tdqcow_state *)bs->private;
 22.1051 +
 22.1052 +        ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
 22.1053 +
 22.1054 +        /* XXX: TODO: Handle error conditions here. */
 22.1055 +
 22.1056 +        /* Success case: */
 22.1057 +        prv->iocb_queued = 0;
 22.1058 +
 22.1059 +        return ret;
 22.1060 +}
 22.1061 +
 22.1062 +
 22.1063 +int *tdqcow_get_fd(struct td_state *bs)
 22.1064 +{
 22.1065 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
 22.1066 +	int *fds, i;
 22.1067 +
 22.1068 +	fds = malloc(sizeof(int) * MAX_IOFD);
 22.1069 +	/*initialise the FD array*/
 22.1070 +	for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
 22.1071 +
 22.1072 +	fds[0] = s->poll_fd;
 22.1073 +	return fds;
 22.1074 +}
 22.1075 +
 22.1076 +int tdqcow_close(struct td_state *bs)
 22.1077 +{
 22.1078 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
 22.1079 +	uint32_t cksum, out;
 22.1080 +	int fd, offset;
 22.1081 +
 22.1082 +	/*Update the hdr cksum*/
 22.1083 +	if(s->min_cluster_alloc == s->l2_size) {
 22.1084 +		cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
 22.1085 +		printf("Writing cksum: %d",cksum);
 22.1086 +		fd = open(s->name, O_WRONLY | O_LARGEFILE); /*Open without O_DIRECT*/
 22.1087 +		offset = sizeof(QCowHeader) + sizeof(uint32_t);
 22.1088 +		lseek(fd, offset, SEEK_SET);
 22.1089 +		out = cpu_to_be32(cksum);
 22.1090 +		write(fd, &out, sizeof(uint32_t));
 22.1091 +		close(fd);
 22.1092 +	}
 22.1093 +
 22.1094 +	free(s->name);
 22.1095 +	free(s->l1_table);
 22.1096 +	free(s->l2_cache);
 22.1097 +	free(s->cluster_cache);
 22.1098 +	free(s->cluster_data);
 22.1099 +	close(s->fd);	
 22.1100 +	return 0;
 22.1101 +}
 22.1102 +
 22.1103 +int tdqcow_do_callbacks(struct td_state *s, int sid)
 22.1104 +{
 22.1105 +        int ret, i, rsp = 0,*ptr;
 22.1106 +        struct io_event *ep;
 22.1107 +        struct tdqcow_state *prv = (struct tdqcow_state *)s->private;
 22.1108 +
 22.1109 +        if (sid > MAX_IOFD) return 1;
 22.1110 +	
 22.1111 +	/* Non-blocking test for completed io. */
 22.1112 +        ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
 22.1113 +                           NULL);
 22.1114 +
 22.1115 +        for (ep=prv->aio_events, i = ret; i-->0; ep++) {
 22.1116 +                struct iocb        *io  = ep->obj;
 22.1117 +                struct pending_aio *pio;
 22.1118 +
 22.1119 +                pio = &prv->pending_aio[(long)io->data];
 22.1120 +
 22.1121 +                if (ep->res != io->u.c.nbytes) {
 22.1122 +                        /* TODO: handle this case better. */
 22.1123 +			ptr = (int *)&ep->res;
 22.1124 +                        DPRINTF("AIO did less than I asked it to "
 22.1125 +				"[%lu,%lu,%d]\n", 
 22.1126 +				ep->res, io->u.c.nbytes, *ptr);
 22.1127 +                }
 22.1128 +		aio_unlock(prv, pio->sector);
 22.1129 +		if (pio->id >= 0) {
 22.1130 +			if (prv->crypt_method)
 22.1131 +				encrypt_sectors(prv, pio->sector, 
 22.1132 +						(unsigned char *)pio->buf, 
 22.1133 +						(unsigned char *)pio->buf, 
 22.1134 +						pio->nb_sectors, 0, 
 22.1135 +						&prv->aes_decrypt_key);
 22.1136 +			prv->nr_reqs[pio->qcow_idx]--;
 22.1137 +			if (prv->nr_reqs[pio->qcow_idx] == 0) 
 22.1138 +				rsp += pio->cb(s, ep->res2, pio->id, 
 22.1139 +					       pio->private);
 22.1140 +		} else if (pio->id == -2) free(pio->buf);
 22.1141 +
 22.1142 +                prv->iocb_free[prv->iocb_free_count++] = io;
 22.1143 +        }
 22.1144 +        return rsp;
 22.1145 +}
 22.1146 +
 22.1147 +int qcow_create(const char *filename, uint64_t total_size,
 22.1148 +                      const char *backing_file, int flags)
 22.1149 +{
 22.1150 +	int fd, header_size, backing_filename_len, l1_size, i;
 22.1151 +	int shift, length, adjust, ret = 0;
 22.1152 +	QCowHeader header;
 22.1153 +	QCowHeader_ext exthdr;
 22.1154 +	char backing_filename[1024], *ptr;
 22.1155 +	uint64_t tmp, size;
 22.1156 +	struct stat st;
 22.1157 +
 22.1158 +	DPRINTF("Qcow_create: size %llu\n",(long long unsigned)total_size);
 22.1159 +
 22.1160 +	fd = open(filename, 
 22.1161 +		  O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 
 22.1162 +		  0644);
 22.1163 +	if (fd < 0)
 22.1164 +		return -1;
 22.1165 +
 22.1166 +	memset(&header, 0, sizeof(header));
 22.1167 +	header.magic = cpu_to_be32(QCOW_MAGIC);
 22.1168 +	header.version = cpu_to_be32(QCOW_VERSION);
 22.1169 +
 22.1170 +	/*Create extended header fields*/
 22.1171 +	exthdr.xmagic = cpu_to_be32(XEN_MAGIC);
 22.1172 +
 22.1173 +	header_size = sizeof(header) + sizeof(QCowHeader_ext);
 22.1174 +	backing_filename_len = 0;
 22.1175 +	size = (total_size >> SECTOR_SHIFT);
 22.1176 +	if (backing_file) {
 22.1177 +		if (strcmp(backing_file, "fat:")) {
 22.1178 +			const char *p;
 22.1179 +			/* XXX: this is a hack: we do not attempt to 
 22.1180 +			 *check for URL like syntax */
 22.1181 +			p = strchr(backing_file, ':');
 22.1182 +			if (p && (p - backing_file) >= 2) {
 22.1183 +				/* URL like but exclude "c:" like filenames */
 22.1184 +				strncpy(backing_filename, backing_file,
 22.1185 +					sizeof(backing_filename));
 22.1186 +			} else {
 22.1187 +				realpath(backing_file, backing_filename);
 22.1188 +				if (stat(backing_filename, &st) != 0) {
 22.1189 +					return -1;
 22.1190 +				}
 22.1191 +			}
 22.1192 +			header.backing_file_offset = cpu_to_be64(header_size);
 22.1193 +			backing_filename_len = strlen(backing_filename);
 22.1194 +			header.backing_file_size = cpu_to_be32(
 22.1195 +				backing_filename_len);
 22.1196 +			header_size += backing_filename_len;
 22.1197 +			
 22.1198 +			/*Set to the backing file size*/
 22.1199 +			size = (st.st_size >> SECTOR_SHIFT);
 22.1200 +			DPRINTF("Backing file size detected: %lld sectors" 
 22.1201 +				"(total %lld [%lld MB])\n", 
 22.1202 +				(long long)total_size, 
 22.1203 +				(long long)(total_size << SECTOR_SHIFT), 
 22.1204 +				(long long)(total_size >> 11));
 22.1205 +		} else {
 22.1206 +			backing_file = NULL;
 22.1207 +			DPRINTF("Setting file size: %lld (total %lld)\n", 
 22.1208 +				(long long) total_size, 
 22.1209 +				(long long) (total_size << SECTOR_SHIFT));
 22.1210 +		}
 22.1211 +		header.mtime = cpu_to_be32(st.st_mtime);
 22.1212 +		header.cluster_bits = 9; /* 512 byte cluster to avoid copying
 22.1213 +					    unmodifyed sectors */
 22.1214 +		header.l2_bits = 12; /* 32 KB L2 tables */
 22.1215 +		exthdr.min_cluster_alloc = cpu_to_be32(1);
 22.1216 +	} else {
 22.1217 +		DPRINTF("Setting file size: %lld sectors" 
 22.1218 +			"(total %lld [%lld MB])\n", 
 22.1219 +			(long long) size, 
 22.1220 +			(long long) (size << SECTOR_SHIFT), 
 22.1221 +			(long long) (size >> 11));
 22.1222 +		header.cluster_bits = 12; /* 4 KB clusters */
 22.1223 +		header.l2_bits = 9; /* 4 KB L2 tables */
 22.1224 +		exthdr.min_cluster_alloc = cpu_to_be32(1 << 9);
 22.1225 +	}
 22.1226 +	/*Set the header size value*/
 22.1227 +	header.size = cpu_to_be64(size * 512);
 22.1228 +	
 22.1229 +	header_size = (header_size + 7) & ~7;
 22.1230 +	if (header_size % 4096 > 0) {
 22.1231 +		header_size = ((header_size >> 12) + 1) << 12;
 22.1232 +	}
 22.1233 +
 22.1234 +	shift = header.cluster_bits + header.l2_bits;
 22.1235 +	l1_size = ((size * 512) + (1LL << shift) - 1) >> shift;
 22.1236 +
 22.1237 +	header.l1_table_offset = cpu_to_be64(header_size);
 22.1238 +	DPRINTF("L1 Table offset: %d, size %d\n",
 22.1239 +		header_size,
 22.1240 +		(int)(l1_size * sizeof(uint64_t)));
 22.1241 +	if (flags) {
 22.1242 +		header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
 22.1243 +	} else {
 22.1244 +		header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
 22.1245 +	}
 22.1246 +
 22.1247 +	ptr = calloc(1, l1_size * sizeof(uint64_t));
 22.1248 +	exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t)));
 22.1249 +	printf("Created cksum: %d\n",exthdr.cksum);
 22.1250 +	free(ptr);
 22.1251 +	
 22.1252 +	/* write all the data */
 22.1253 +	ret += write(fd, &header, sizeof(header));
 22.1254 +	ret += write(fd, &exthdr, sizeof(exthdr));
 22.1255 +	if (backing_file) {
 22.1256 +		ret += write(fd, backing_filename, backing_filename_len);
 22.1257 +	}
 22.1258 +	lseek(fd, header_size, SEEK_SET);
 22.1259 +	tmp = 0;
 22.1260 +	for (i = 0;i < l1_size; i++) {
 22.1261 +		ret += write(fd, &tmp, sizeof(tmp));
 22.1262 +	}
 22.1263 +
 22.1264 +	/*adjust file length to 4 KByte boundary*/
 22.1265 +	length = header_size + l1_size * sizeof(uint64_t);
 22.1266 +	if (length % 4096 > 0) {
 22.1267 +		length = ((length >> 12) + 1) << 12;
 22.1268 +		ftruncate(fd, length);
 22.1269 +		DPRINTF("Adjusted filelength to %d for 4 "
 22.1270 +			"Kbyte alignment\n",length);
 22.1271 +	}
 22.1272 +
 22.1273 +	close(fd);
 22.1274 +
 22.1275 +	return 0;
 22.1276 +}
 22.1277 +
 22.1278 +int qcow_make_empty(struct td_state *bs)
 22.1279 +{
 22.1280 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
 22.1281 +	uint32_t l1_length = s->l1_size * sizeof(uint64_t);
 22.1282 +
 22.1283 +	memset(s->l1_table, 0, l1_length);
 22.1284 +	lseek(s->fd, s->l1_table_offset, SEEK_SET);
 22.1285 +	if (write(s->fd, s->l1_table, l1_length) < 0)
 22.1286 +		return -1;
 22.1287 +	ftruncate(s->fd, s->l1_table_offset + l1_length);
 22.1288 +
 22.1289 +	memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
 22.1290 +	memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
 22.1291 +	memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
 22.1292 +
 22.1293 +	return 0;
 22.1294 +}
 22.1295 +
 22.1296 +int qcow_get_cluster_size(struct td_state *bs)
 22.1297 +{
 22.1298 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
 22.1299 +
 22.1300 +	return s->cluster_size;
 22.1301 +}
 22.1302 +
 22.1303 +/* XXX: put compressed sectors first, then all the cluster aligned
 22.1304 +   tables to avoid losing bytes in alignment */
 22.1305 +int qcow_compress_cluster(struct td_state *bs, int64_t sector_num, 
 22.1306 +                          const uint8_t *buf)
 22.1307 +{
 22.1308 +	struct tdqcow_state *s = (struct tdqcow_state *)bs->private;
 22.1309 +	z_stream strm;
 22.1310 +	int ret, out_len;
 22.1311 +	uint8_t *out_buf;
 22.1312 +	uint64_t cluster_offset;
 22.1313 +
 22.1314 +	out_buf = malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
 22.1315 +	if (!out_buf)
 22.1316 +		return -1;
 22.1317 +
 22.1318 +	/* best compression, small window, no zlib header */
 22.1319 +	memset(&strm, 0, sizeof(strm));
 22.1320 +	ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
 22.1321 +			   Z_DEFLATED, -12, 
 22.1322 +			   9, Z_DEFAULT_STRATEGY);
 22.1323 +	if (ret != 0) {
 22.1324 +		free(out_buf);
 22.1325 +		return -1;
 22.1326 +	}
 22.1327 +
 22.1328 +	strm.avail_in = s->cluster_size;
 22.1329 +	strm.next_in = (uint8_t *)buf;
 22.1330 +	strm.avail_out = s->cluster_size;
 22.1331 +	strm.next_out = out_buf;
 22.1332 +
 22.1333 +	ret = deflate(&strm, Z_FINISH);
 22.1334 +	if (ret != Z_STREAM_END && ret != Z_OK) {
 22.1335 +		free(out_buf);
 22.1336 +		deflateEnd(&strm);
 22.1337 +		return -1;
 22.1338 +	}
 22.1339 +	out_len = strm.next_out - out_buf;
 22.1340 +
 22.1341 +	deflateEnd(&strm);
 22.1342 +
 22.1343 +	if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
 22.1344 +		/* could not compress: write normal cluster */
 22.1345 +		//tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
 22.1346 +	} else {
 22.1347 +		cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 
 22.1348 +                                            out_len, 0, 0);
 22.1349 +		cluster_offset &= s->cluster_offset_mask;
 22.1350 +		lseek(s->fd, cluster_offset, SEEK_SET);
 22.1351 +		if (write(s->fd, out_buf, out_len) != out_len) {
 22.1352 +			free(out_buf);
 22.1353 +			return -1;
 22.1354 +		}
 22.1355 +	}
 22.1356 +	
 22.1357 +	free(out_buf);
 22.1358 +	return 0;
 22.1359 +}
 22.1360 +
 22.1361 +struct tap_disk tapdisk_qcow = {
 22.1362 +	"tapdisk_qcow",
 22.1363 +	sizeof(struct tdqcow_state),
 22.1364 +	tdqcow_open,
 22.1365 +	tdqcow_queue_read,
 22.1366 +	tdqcow_queue_write,
 22.1367 +	tdqcow_submit,
 22.1368 +	tdqcow_get_fd,
 22.1369 +	tdqcow_close,
 22.1370 +	tdqcow_do_callbacks,
 22.1371 +};
 22.1372 +
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/tools/blktap/drivers/block-ram.c	Thu Jul 13 10:13:26 2006 +0100
    23.3 @@ -0,0 +1,296 @@
    23.4 +/* block-ram.c
    23.5 + *
    23.6 + * Fast Ramdisk implementation.
    23.7 + *
    23.8 + * (c) 2006 Andrew Warfield and Julian Chesterfield
    23.9 + *
   23.10 + * This program is free software; you can redistribute it and/or
   23.11 + * modify it under the terms of the GNU General Public License version 2
   23.12 + * as published by the Free Software Foundation; or, when distributed
   23.13 + * separately from the Linux kernel or incorporated into other
   23.14 + * software packages, subject to the following license:
   23.15 + *
   23.16 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   23.17 + * of this source file (the "Software"), to deal in the Software without
   23.18 + * restriction, including without limitation the rights to use, copy, modify,
   23.19 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   23.20 + * and to permit persons to whom the Software is furnished to do so, subject to
   23.21 + * the following conditions:
   23.22 + *
   23.23 + * The above copyright notice and this permission notice shall be included in
   23.24 + * all copies or substantial portions of the Software.
   23.25 + *
   23.26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   23.27 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   23.28 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   23.29 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   23.30 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   23.31 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   23.32 + * IN THE SOFTWARE.
   23.33 + */
   23.34 +
   23.35 +#include <errno.h>
   23.36 +#include <fcntl.h>
   23.37 +#include <stdio.h>
   23.38 +#include <stdlib.h>
   23.39 +#include <unistd.h>
   23.40 +#include <sys/statvfs.h>
   23.41 +#include <sys/stat.h>
   23.42 +#include <sys/ioctl.h>
   23.43 +#include <linux/fs.h>
   23.44 +#include <string.h>
   23.45 +#include "tapdisk.h"
   23.46 +
   23.47 +#define MAX_DISK_SIZE 1024000 /*500MB disk limit*/
   23.48 +
   23.49 +char *img;
   23.50 +long int   disksector_size;
   23.51 +long int   disksize;
   23.52 +long int   diskinfo;
   23.53 +static int connections = 0;
   23.54 +
   23.55 +struct tdram_state {
   23.56 +        int fd;
   23.57 +	int poll_pipe[2]; /* dummy fd for polling on */
   23.58 +};
   23.59 +
   23.60 +/*Get Image size, secsize*/
   23.61 +static int get_image_info(struct td_state *s, int fd)
   23.62 +{
   23.63 +	int ret;
   23.64 +	long size;
   23.65 +	unsigned long total_size;
   23.66 +	struct statvfs statBuf;
   23.67 +	struct stat stat;
   23.68 +
   23.69 +	ret = fstat(fd, &stat);
   23.70 +	if (ret != 0) {
   23.71 +		DPRINTF("ERROR: fstat failed, Couldn't stat image");
   23.72 +		return -EINVAL;
   23.73 +	}
   23.74 +
   23.75 +	if (S_ISBLK(stat.st_mode)) {
   23.76 +		/*Accessing block device directly*/
   23.77 +		s->size = 0;
   23.78 +		if (ioctl(fd,BLKGETSIZE,&s->size)!=0) {
   23.79 +			DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
   23.80 +			return -EINVAL;
   23.81 +		}
   23.82 +
   23.83 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
   23.84 +			"sector_shift [%llu]\n",
   23.85 +			(long long unsigned)(s->size << SECTOR_SHIFT),
   23.86 +			(long long unsigned)s->size);
   23.87 +
   23.88 +		/*Get the sector size*/
   23.89 +#if defined(BLKSSZGET)
   23.90 +		{
   23.91 +			int arg;
   23.92 +			s->sector_size = DEFAULT_SECTOR_SIZE;
   23.93 +			ioctl(fd, BLKSSZGET, &s->sector_size);
   23.94 +			
   23.95 +			if (s->sector_size != DEFAULT_SECTOR_SIZE)
   23.96 +				DPRINTF("Note: sector size is %ld (not %d)\n",
   23.97 +					s->sector_size, DEFAULT_SECTOR_SIZE);
   23.98 +		}
   23.99 +#else
  23.100 +		s->sector_size = DEFAULT_SECTOR_SIZE;
  23.101 +#endif
  23.102 +
  23.103 +	} else {
  23.104 +		/*Local file? try fstat instead*/
  23.105 +		s->size = (stat.st_size >> SECTOR_SHIFT);
  23.106 +		s->sector_size = DEFAULT_SECTOR_SIZE;
  23.107 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
  23.108 +			"sector_shift [%llu]\n",
  23.109 +			(long long unsigned)(s->size << SECTOR_SHIFT),
  23.110 +			(long long unsigned)s->size);
  23.111 +	}
  23.112 +
  23.113 +	if (s->size == 0) {		
  23.114 +		s->size =((uint64_t) MAX_DISK_SIZE);
  23.115 +		s->sector_size = DEFAULT_SECTOR_SIZE;
  23.116 +	}
  23.117 +	s->info = 0;
  23.118 +
  23.119 +        /*Store variables locally*/
  23.120 +	disksector_size = s->sector_size;
  23.121 +	disksize        = s->size;
  23.122 +	diskinfo        = s->info;
  23.123 +	DPRINTF("Image sector_size: \n\t[%lu]\n",
  23.124 +		s->sector_size);
  23.125 +
  23.126 +	return 0;
  23.127 +}
  23.128 +
  23.129 +/* Open the disk file and initialize ram state. */
  23.130 +int tdram_open (struct td_state *s, const char *name)
  23.131 +{
  23.132 +	int i, fd, ret = 0, count = 0;
  23.133 +	struct tdram_state *prv = (struct tdram_state *)s->private;
  23.134 +	uint64_t size;
  23.135 +	char *p;
  23.136 +	s->private = prv;
  23.137 +
  23.138 +	connections++;
  23.139 +	
  23.140 +	/* set up a pipe so that we can hand back a poll fd that won't fire.*/
  23.141 +	ret = pipe(prv->poll_pipe);
  23.142 +	if (ret != 0)
  23.143 +		return (0 - errno);
  23.144 +
  23.145 +	if (connections > 1) {
  23.146 +		s->sector_size = disksector_size;
  23.147 +		s->size        = disksize;
  23.148 +		s->info        = diskinfo; 
  23.149 +		DPRINTF("Image already open, returning parameters:\n");
  23.150 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
  23.151 +			"sector_shift [%llu]\n",
  23.152 +			(long long unsigned)(s->size << SECTOR_SHIFT),
  23.153 +			(long long unsigned)s->size);
  23.154 +		DPRINTF("Image sector_size: \n\t[%lu]\n",
  23.155 +			s->sector_size);
  23.156 +
  23.157 +		prv->fd = -1;
  23.158 +		goto done;
  23.159 +	}
  23.160 +
  23.161 +	/* Open the file */
  23.162 +        fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
  23.163 +
  23.164 +        if ((fd == -1) && (errno == EINVAL)) {
  23.165 +
  23.166 +                /* Maybe O_DIRECT isn't supported. */
  23.167 +                fd = open(name, O_RDWR | O_LARGEFILE);
  23.168 +                if (fd != -1) DPRINTF("WARNING: Accessing image without"
  23.169 +                                     "O_DIRECT! (%s)\n", name);
  23.170 +
  23.171 +        } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
  23.172 +	
  23.173 +        if (fd == -1) {
  23.174 +		DPRINTF("Unable to open [%s]!\n",name);
  23.175 +        	ret = 0 - errno;
  23.176 +        	goto done;
  23.177 +        }
  23.178 +
  23.179 +        prv->fd = fd;
  23.180 +
  23.181 +	ret = get_image_info(s, fd);
  23.182 +	size = MAX_DISK_SIZE;
  23.183 +
  23.184 +	if (s->size > size) {
  23.185 +		DPRINTF("Disk exceeds limit, must be less than [%d]MB",
  23.186 +			(MAX_DISK_SIZE<<SECTOR_SHIFT)>>20);
  23.187 +		return -ENOMEM;
  23.188 +	}
  23.189 +
  23.190 +	/*Read the image into memory*/
  23.191 +	p = img = malloc(s->size << SECTOR_SHIFT);
  23.192 +	if (img == NULL) {
  23.193 +		DPRINTF("Mem malloc failed\n");
  23.194 +		return -1;
  23.195 +	}
  23.196 +	DPRINTF("Reading %llu bytes.......",(long long unsigned)s->size << SECTOR_SHIFT);
  23.197 +
  23.198 +	for (i = 0; i < s->size; i++) {
  23.199 +		ret = read(prv->fd, p, s->sector_size);
  23.200 +		if (ret != s->sector_size) {
  23.201 +			ret = 0 - errno;
  23.202 +			break;
  23.203 +		} else {
  23.204 +			count += ret;
  23.205 +			p = img + count;
  23.206 +		}
  23.207 +	}
  23.208 +	DPRINTF("[%d]\n",count);
  23.209 +	if (count != s->size << SECTOR_SHIFT) {
  23.210 +		ret = -1;
  23.211 +	} else {
  23.212 +		ret = 0;
  23.213 +	} 
  23.214 +
  23.215 +done:
  23.216 +	return ret;
  23.217 +}
  23.218 +
  23.219 + int tdram_queue_read(struct td_state *s, uint64_t sector,
  23.220 +			       int nb_sectors, char *buf, td_callback_t cb,
  23.221 +			       int id, void *private)
  23.222 +{
  23.223 +	struct tdram_state *prv = (struct tdram_state *)s->private;
  23.224 +	int      size    = nb_sectors * s->sector_size;
  23.225 +	uint64_t offset  = sector * (uint64_t)s->sector_size;
  23.226 +	int ret;
  23.227 +
  23.228 +	memcpy(buf, img + offset, size);
  23.229 +	ret = size;
  23.230 +
  23.231 +	cb(s, (ret < 0) ? ret: 0, id, private);
  23.232 +
  23.233 +	return ret;
  23.234 +}
  23.235 +
  23.236 + int tdram_queue_write(struct td_state *s, uint64_t sector,
  23.237 +			       int nb_sectors, char *buf, td_callback_t cb,
  23.238 +			       int id, void *private)
  23.239 +{
  23.240 +	struct tdram_state *prv = (struct tdram_state *)s->private;
  23.241 +	int      size    = nb_sectors * s->sector_size;
  23.242 +	uint64_t offset  = sector * (uint64_t)s->sector_size;
  23.243 +	int ret;
  23.244 +	
  23.245 +	/*We assume that write access is controlled at a higher level for multiple disks*/
  23.246 +	memcpy(img + offset, buf, size);
  23.247 +	ret = size;
  23.248 +
  23.249 +	cb(s, (ret < 0) ? ret : 0, id, private);
  23.250 +
  23.251 +	return ret;
  23.252 +}
  23.253 + 		
  23.254 +int tdram_submit(struct td_state *s)
  23.255 +{
  23.256 +	return 0;	
  23.257 +}
  23.258 +
  23.259 +
  23.260 +int *tdram_get_fd(struct td_state *s)
  23.261 +{
  23.262 +	struct tdram_state *prv = (struct tdram_state *)s->private;
  23.263 +        int *fds, i;
  23.264 +
  23.265 +        fds = malloc(sizeof(int) * MAX_IOFD);
  23.266 +        /*initialise the FD array*/
  23.267 +        for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
  23.268 +
  23.269 +        fds[0] = prv->poll_pipe[0];
  23.270 +        return fds;	
  23.271 +}
  23.272 +
  23.273 +int tdram_close(struct td_state *s)
  23.274 +{
  23.275 +	struct tdram_state *prv = (struct tdram_state *)s->private;
  23.276 +	
  23.277 +	connections--;
  23.278 +	
  23.279 +	return 0;
  23.280 +}
  23.281 +
  23.282 +int tdram_do_callbacks(struct td_state *s, int sid)
  23.283 +{
  23.284 +	/* always ask for a kick */
  23.285 +	return 1;
  23.286 +}
  23.287 +
  23.288 +struct tap_disk tapdisk_ram = {
  23.289 +	"tapdisk_ram",
  23.290 +	sizeof(struct tdram_state),
  23.291 +	tdram_open,
  23.292 +	tdram_queue_read,
  23.293 +	tdram_queue_write,
  23.294 +	tdram_submit,
  23.295 +	tdram_get_fd,
  23.296 +	tdram_close,
  23.297 +	tdram_do_callbacks,
  23.298 +};
  23.299 +
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/tools/blktap/drivers/block-sync.c	Thu Jul 13 10:13:26 2006 +0100
    24.3 @@ -0,0 +1,242 @@
    24.4 +/* block-sync.c
    24.5 + *
    24.6 + * simple slow synchronous raw disk implementation.
    24.7 + *
    24.8 + * (c) 2006 Andrew Warfield and Julian Chesterfield
    24.9 + *
   24.10 + * This program is free software; you can redistribute it and/or
   24.11 + * modify it under the terms of the GNU General Public License version 2
   24.12 + * as published by the Free Software Foundation; or, when distributed
   24.13 + * separately from the Linux kernel or incorporated into other
   24.14 + * software packages, subject to the following license:
   24.15 + *
   24.16 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   24.17 + * of this source file (the "Software"), to deal in the Software without
   24.18 + * restriction, including without limitation the rights to use, copy, modify,
   24.19 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   24.20 + * and to permit persons to whom the Software is furnished to do so, subject to
   24.21 + * the following conditions:
   24.22 + *
   24.23 + * The above copyright notice and this permission notice shall be included in
   24.24 + * all copies or substantial portions of the Software.
   24.25 + *
   24.26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   24.27 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   24.28 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   24.29 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   24.30 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   24.31 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   24.32 + * IN THE SOFTWARE.
   24.33 + */
   24.34 +
   24.35 +#include <errno.h>
   24.36 +#include <fcntl.h>
   24.37 +#include <stdio.h>
   24.38 +#include <stdlib.h>
   24.39 +#include <unistd.h>
   24.40 +#include <sys/statvfs.h>
   24.41 +#include <sys/stat.h>
   24.42 +#include <sys/ioctl.h>
   24.43 +#include <linux/fs.h>
   24.44 +#include "tapdisk.h"
   24.45 +
   24.46 +struct tdsync_state {
   24.47 +	int fd;
   24.48 +	int poll_pipe[2]; /* dummy fd for polling on */
   24.49 +};
   24.50 +	
   24.51 +/*Get Image size, secsize*/
   24.52 +static int get_image_info(struct td_state *s, int fd)
   24.53 +{
   24.54 +	int ret;
   24.55 +	long size;
   24.56 +	unsigned long total_size;
   24.57 +	struct statvfs statBuf;
   24.58 +	struct stat stat;
   24.59 +
   24.60 +	ret = fstat(fd, &stat);
   24.61 +	if (ret != 0) {
   24.62 +		DPRINTF("ERROR: fstat failed, Couldn't stat image");
   24.63 +		return -EINVAL;
   24.64 +	}
   24.65 +
   24.66 +	if (S_ISBLK(stat.st_mode)) {
   24.67 +		/*Accessing block device directly*/
   24.68 +		s->size = 0;
   24.69 +		if (ioctl(fd,BLKGETSIZE,&s->size)!=0) {
   24.70 +			DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
   24.71 +			return -EINVAL;
   24.72 +		}
   24.73 +
   24.74 +		DPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
   24.75 +			"sector_shift [%llu]\n",
   24.76 +			(long long unsigned)(s->size << SECTOR_SHIFT),
   24.77 +			(long long unsigned)s->size);
   24.78 +
   24.79 +		/*Get the sector size*/
   24.80 +#if defined(BLKSSZGET)
   24.81 +		{
   24.82 +			int arg;
   24.83 +			s->sector_size = DEFAULT_SECTOR_SIZE;
   24.84 +			ioctl(fd, BLKSSZGET, &s->sector_size);
   24.85 +			
   24.86 +			if (s->sector_size != DEFAULT_SECTOR_SIZE)
   24.87 +				DPRINTF("Note: sector size is %ld (not %d)\n",
   24.88 +					s->sector_size, DEFAULT_SECTOR_SIZE);
   24.89 +		}
   24.90 +#else
   24.91 +		s->sector_size = DEFAULT_SECTOR_SIZE;
   24.92 +#endif
   24.93 +
   24.94 +	} else {
   24.95 +		/*Local file? try fstat instead*/
   24.96 +		s->size = (stat.st_size >> SECTOR_SHIFT);
   24.97 +		s->sector_size = DEFAULT_SECTOR_SIZE;
   24.98 +		DPRINTF("Image size: \n\tpre sector_shift  [%lluu]\n\tpost "
   24.99 +			"sector_shift [%lluu]\n",
  24.100 +			(long long unsigned)(s->size << SECTOR_SHIFT),
  24.101 +			(long long unsigned)s->size);
  24.102 +	}
  24.103 +
  24.104 +	if (s->size == 0)
  24.105 +		return -EINVAL;
  24.106 +
  24.107 +	s->info = 0;
  24.108 +
  24.109 +	return 0;
  24.110 +}
  24.111 +
  24.112 +/* Open the disk file and initialize aio state. */
  24.113 +int tdsync_open (struct td_state *s, const char *name)
  24.114 +{
  24.115 +	int i, fd, ret = 0;
  24.116 +	struct tdsync_state *prv = (struct tdsync_state *)s->private;
  24.117 +	s->private = prv;
  24.118 +	
  24.119 +	/* set up a pipe so that we can hand back a poll fd that won't fire.*/
  24.120 +	ret = pipe(prv->poll_pipe);
  24.121 +	if (ret != 0)
  24.122 +		return (0 - errno);
  24.123 +	
  24.124 +	/* Open the file */
  24.125 +        fd = open(name, O_RDWR | O_DIRECT | O_LARGEFILE);
  24.126 +
  24.127 +        if ( (fd == -1) && (errno == EINVAL) ) {
  24.128 +
  24.129 +                /* Maybe O_DIRECT isn't supported. */
  24.130 +                fd = open(name, O_RDWR | O_LARGEFILE);
  24.131 +                if (fd != -1) DPRINTF("WARNING: Accessing image without"
  24.132 +                                     "O_DIRECT! (%s)\n", name);
  24.133 +
  24.134 +        } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
  24.135 +	
  24.136 +        if (fd == -1) {
  24.137 +		DPRINTF("Unable to open [%s]!\n",name);
  24.138 +        	ret = 0 - errno;
  24.139 +        	goto done;
  24.140 +        }
  24.141 +
  24.142 +        prv->fd = fd;
  24.143 +
  24.144 +	ret = get_image_info(s, fd);
  24.145 +done:
  24.146 +	return ret;	
  24.147 +}
  24.148 +
  24.149 + int tdsync_queue_read(struct td_state *s, uint64_t sector,
  24.150 +			       int nb_sectors, char *buf, td_callback_t cb,
  24.151 +			       int id, void *private)
  24.152 +{
  24.153 +	struct tdsync_state *prv = (struct tdsync_state *)s->private;
  24.154 +	int      size    = nb_sectors * s->sector_size;
  24.155 +	uint64_t offset  = sector * (uint64_t)s->sector_size;
  24.156 +	int ret;
  24.157 +	
  24.158 +	ret = lseek(prv->fd, offset, SEEK_SET);
  24.159 +	if (ret != (off_t)-1) {
  24.160 +		ret = read(prv->fd, buf, size);
  24.161 +		if (ret != size) {
  24.162 +			ret = 0 - errno;
  24.163 +		} else {
  24.164 +			ret = 1;
  24.165 +		} 
  24.166 +	} else ret = 0 - errno;
  24.167 +		
  24.168 +	cb(s, (ret < 0) ? ret: 0, id, private);
  24.169 +	
  24.170 +	return 1;
  24.171 +}
  24.172 +
  24.173 + int tdsync_queue_write(struct td_state *s, uint64_t sector,
  24.174 +			       int nb_sectors, char *buf, td_callback_t cb,
  24.175 +			       int id, void *private)
  24.176 +{
  24.177 +	struct tdsync_state *prv = (struct tdsync_state *)s->private;
  24.178 +	int      size    = nb_sectors * s->sector_size;
  24.179 +	uint64_t offset  = sector * (uint64_t)s->sector_size;
  24.180 +	int ret = 0;
  24.181 +	
  24.182 +	ret = lseek(prv->fd, offset, SEEK_SET);
  24.183 +	if (ret != (off_t)-1) {
  24.184 +		ret = write(prv->fd, buf, size);
  24.185 +		if (ret != size) {
  24.186 +			ret = 0 - errno;
  24.187 +		} else {
  24.188 +			ret = 1;
  24.189 +		}
  24.190 +	} else ret = 0 - errno;
  24.191 +		
  24.192 +	cb(s, (ret < 0) ? ret : 0, id, private);
  24.193 +	
  24.194 +	return 1;
  24.195 +}
  24.196 + 		
  24.197 +int tdsync_submit(struct td_state *s)
  24.198 +{
  24.199 +	return 0;	
  24.200 +}
  24.201 +
  24.202 +
  24.203 +int *tdsync_get_fd(struct td_state *s)
  24.204 +{
  24.205 +	struct tdsync_state *prv = (struct tdsync_state *)s->private;
  24.206 +	
  24.207 +	int *fds, i;
  24.208 +
  24.209 +	fds = malloc(sizeof(int) * MAX_IOFD);
  24.210 +	/*initialise the FD array*/
  24.211 +	for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
  24.212 +
  24.213 +	fds[0] = prv->poll_pipe[0];
  24.214 +	return fds;
  24.215 +}
  24.216 +
  24.217 +int tdsync_close(struct td_state *s)
  24.218 +{
  24.219 +	struct tdsync_state *prv = (struct tdsync_state *)s->private;
  24.220 +	
  24.221 +	close(prv->fd);
  24.222 +	close(prv->poll_pipe[0]);
  24.223 +	close(prv->poll_pipe[1]);
  24.224 +	
  24.225 +	return 0;
  24.226 +}
  24.227 +
  24.228 +int tdsync_do_callbacks(struct td_state *s, int sid)
  24.229 +{
  24.230 +	/* always ask for a kick */
  24.231 +	return 1;
  24.232 +}
  24.233 +
  24.234 +struct tap_disk tapdisk_sync = {
  24.235 +	"tapdisk_sync",
  24.236 +	sizeof(struct tdsync_state),
  24.237 +	tdsync_open,
  24.238 +	tdsync_queue_read,
  24.239 +	tdsync_queue_write,
  24.240 +	tdsync_submit,
  24.241 +	tdsync_get_fd,
  24.242 +	tdsync_close,
  24.243 +	tdsync_do_callbacks,
  24.244 +};
  24.245 +
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/tools/blktap/drivers/block-vmdk.c	Thu Jul 13 10:13:26 2006 +0100
    25.3 @@ -0,0 +1,415 @@
    25.4 +/* block-vmdk.c
    25.5 + *
    25.6 + * VMware Disk format implementation.
    25.7 + *
    25.8 + * (c) 2006 Andrew Warfield and Julian Chesterfield
    25.9 + *
   25.10 + * This is largely the same as the vmdk driver in Qemu, I've just twisted it
   25.11 + * to match our interfaces.  The original (BSDish) Copyright message appears 
   25.12 + * below:
   25.13 + */
   25.14 + 
   25.15 +/*
   25.16 + * Block driver for the VMDK format
   25.17 + * 
   25.18 + * Copyright (c) 2004 Fabrice Bellard
   25.19 + * Copyright (c) 2005 Filip Navara
   25.20 + * 
   25.21 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   25.22 + * of this software and associated documentation files (the "Software"), to deal
   25.23 + * in the Software without restriction, including without limitation the rights
   25.24 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   25.25 + * copies of the Software, and to permit persons to whom the Software is
   25.26 + * furnished to do so, subject to the following conditions:
   25.27 + *
   25.28 + * The above copyright notice and this permission notice shall be included in
   25.29 + * all copies or substantial portions of the Software.
   25.30 + *
   25.31 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   25.32 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   25.33 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
   25.34 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   25.35 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   25.36 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
   25.37 + * THE SOFTWARE.
   25.38 + */
   25.39 +
   25.40 +#include <errno.h>
   25.41 +#include <fcntl.h>
   25.42 +#include <stdio.h>
   25.43 +#include <stdlib.h>
   25.44 +#include <unistd.h>
   25.45 +#include <sys/statvfs.h>
   25.46 +#include <sys/stat.h>
   25.47 +#include <sys/ioctl.h>
   25.48 +#include <linux/fs.h>
   25.49 +#include <string.h>
   25.50 +#include "tapdisk.h"
   25.51 +#include "bswap.h"
   25.52 +
   25.53 +#define safer_free(_x)       \
   25.54 +  do {                       \
   25.55 +  	if (NULL != _x) {    \
   25.56 +  		free(_x);    \
   25.57 +  		(_x) = NULL; \
   25.58 +  	}                    \
   25.59 +  } while (0) ;
   25.60 +
   25.61 +#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
   25.62 +#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
   25.63 +
   25.64 +typedef struct {
   25.65 +    uint32_t version;
   25.66 +    uint32_t flags;
   25.67 +    uint32_t disk_sectors;
   25.68 +    uint32_t granularity;
   25.69 +    uint32_t l1dir_offset;
   25.70 +    uint32_t l1dir_size;
   25.71 +    uint32_t file_sectors;
   25.72 +    uint32_t cylinders;
   25.73 +    uint32_t heads;
   25.74 +    uint32_t sectors_per_track;
   25.75 +} VMDK3Header;
   25.76 +
   25.77 +typedef struct {
   25.78 +    uint32_t version;
   25.79 +    uint32_t flags;
   25.80 +    int64_t capacity;
   25.81 +    int64_t granularity;
   25.82 +    int64_t desc_offset;
   25.83 +    int64_t desc_size;
   25.84 +    int32_t num_gtes_per_gte;
   25.85 +    int64_t rgd_offset;
   25.86 +    int64_t gd_offset;
   25.87 +    int64_t grain_offset;
   25.88 +    char filler[1];
   25.89 +    char check_bytes[4];
   25.90 +} __attribute__((packed)) VMDK4Header;
   25.91 +
   25.92 +#define L2_CACHE_SIZE 16
   25.93 +
   25.94 +struct tdvmdk_state {
   25.95 +        int fd;
   25.96 +	int poll_pipe[2]; /* dummy fd for polling on */
   25.97 +	
   25.98 +    	unsigned int l1_size;
   25.99 +    	int64_t l1_table_offset;
  25.100 +    	int64_t l1_backup_table_offset;
  25.101 +    	uint32_t l1_entry_sectors;
  25.102 +    	unsigned int l2_size;
  25.103 +	
  25.104 +    	uint32_t *l1_table;
  25.105 +    	uint32_t *l1_backup_table;
  25.106 +    	uint32_t *l2_cache;
  25.107 +    	uint32_t l2_cache_offsets[L2_CACHE_SIZE];
  25.108 +    	uint32_t l2_cache_counts[L2_CACHE_SIZE];
  25.109 +    	
  25.110 +    	unsigned int cluster_sectors;
  25.111 +};
  25.112 +
  25.113 +
  25.114 +/* Open the disk file and initialize aio state. */
  25.115 +static int tdvmdk_open (struct td_state *s, const char *name)
  25.116 +{
  25.117 +	int ret, fd;
  25.118 +    	int l1_size, i;
  25.119 +    	uint32_t magic;
  25.120 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
  25.121 +
  25.122 +	/* set up a pipe so that we can hand back a poll fd that won't fire.*/
  25.123 +	ret = pipe(prv->poll_pipe);
  25.124 +	if (ret != 0)
  25.125 +		return -1;
  25.126 +	
  25.127 +	/* Open the file */
  25.128 +        fd = open(name, O_RDWR | O_LARGEFILE); 
  25.129 +
  25.130 +        if ( (fd == -1) && (errno == EINVAL) ) {
  25.131 +
  25.132 +                /* Maybe O_DIRECT isn't supported. */
  25.133 +                fd = open(name, O_RDWR | O_LARGEFILE);
  25.134 +                if (fd != -1) DPRINTF("WARNING: Accessing image without"
  25.135 +                                     "O_DIRECT! (%s)\n", name);
  25.136 +
  25.137 +        } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
  25.138 +	
  25.139 +        if (fd == -1) {
  25.140 +		DPRINTF("Unable to open [%s]!\n",name);
  25.141 +        	ret = 0 - errno;
  25.142 +        	return -1;
  25.143 +        }
  25.144 +        
  25.145 +        prv->fd = fd;
  25.146 +        
  25.147 +        /* Grok the vmdk header. */
  25.148 +    	if ((ret = read(fd, &magic, sizeof(magic))) != sizeof(magic))
  25.149 +        	goto fail;
  25.150 +    	magic = be32_to_cpu(magic);
  25.151 +    	if (magic == VMDK3_MAGIC) {
  25.152 +        	VMDK3Header header;
  25.153 +        	if (read(fd, &header, sizeof(header)) != 
  25.154 +            		sizeof(header)) 
  25.155 +            		goto fail;
  25.156 +        	prv->cluster_sectors = le32_to_cpu(header.granularity);
  25.157 +        	prv->l2_size = 1 << 9;
  25.158 +        	prv->l1_size = 1 << 6;
  25.159 +        	s->size = le32_to_cpu(header.disk_sectors);
  25.160 +        	prv->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
  25.161 +        	prv->l1_backup_table_offset = 0;
  25.162 +        	prv->l1_entry_sectors = prv->l2_size * prv->cluster_sectors;
  25.163 +    	} else if (magic == VMDK4_MAGIC) {
  25.164 +        	VMDK4Header header;
  25.165 +        
  25.166 +        	if (read(fd, &header, sizeof(header)) != sizeof(header))
  25.167 +            		goto fail;
  25.168 +        	s->size = le32_to_cpu(header.capacity);
  25.169 +        	prv->cluster_sectors = le32_to_cpu(header.granularity);
  25.170 +        	prv->l2_size = le32_to_cpu(header.num_gtes_per_gte);
  25.171 +        	prv->l1_entry_sectors = prv->l2_size * prv->cluster_sectors;
  25.172 +        	if (prv->l1_entry_sectors <= 0)
  25.173 +            		goto fail;
  25.174 +        	prv->l1_size = (s->size + prv->l1_entry_sectors - 1) 
  25.175 +            		       / prv->l1_entry_sectors;
  25.176 +        	prv->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
  25.177 +        	prv->l1_backup_table_offset = 
  25.178 +        		le64_to_cpu(header.gd_offset) << 9;
  25.179 +    	} else {
  25.180 +        	goto fail;
  25.181 +    	}
  25.182 +    	/* read the L1 table */
  25.183 +    	l1_size = prv->l1_size * sizeof(uint32_t);
  25.184 +    	prv->l1_table = malloc(l1_size);
  25.185 +    	if (!prv->l1_table)
  25.186 +        	goto fail;
  25.187 +    	if (lseek(fd, prv->l1_table_offset, SEEK_SET) == -1)
  25.188 +        	goto fail;
  25.189 +    	if (read(fd, prv->l1_table, l1_size) != l1_size)
  25.190 +        	goto fail;
  25.191 +    	for (i = 0; i < prv->l1_size; i++) {
  25.192 +        	le32_to_cpus(&prv->l1_table[i]);
  25.193 +    	}
  25.194 +
  25.195 +    	if (prv->l1_backup_table_offset) {
  25.196 +        	prv->l1_backup_table = malloc(l1_size);
  25.197 +        	if (!prv->l1_backup_table)
  25.198 +            		goto fail;
  25.199 +        	if (lseek(fd, prv->l1_backup_table_offset, SEEK_SET) == -1)
  25.200 +            		goto fail;
  25.201 +        	if (read(fd, prv->l1_backup_table, l1_size) != l1_size)
  25.202 +            		goto fail;
  25.203 +        	for(i = 0; i < prv->l1_size; i++) {
  25.204 +            		le32_to_cpus(&prv->l1_backup_table[i]);
  25.205 +        	}
  25.206 +    	}
  25.207 +
  25.208 +    	prv->l2_cache = malloc(prv->l2_size * L2_CACHE_SIZE *sizeof(uint32_t));
  25.209 +    	if (!prv->l2_cache)
  25.210 +        	goto fail;
  25.211 +    	prv->fd = fd;
  25.212 +	DPRINTF("VMDK File opened successfully\n");
  25.213 +    	return 0;
  25.214 +	
  25.215 +fail:
  25.216 +	DPRINTF("VMDK File open failed.\n"); 
  25.217 +   	safer_free(prv->l1_backup_table);
  25.218 +    	free(prv->l1_table);
  25.219 +    	free(prv->l2_cache);
  25.220 +    	close(fd);
  25.221 +	return -1;
  25.222 +}
  25.223 +
  25.224 +static uint64_t get_cluster_offset(struct td_state *s, 
  25.225 +                                   uint64_t offset, int allocate)
  25.226 +{
  25.227 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
  25.228 +    	unsigned int l1_index, l2_offset, l2_index;
  25.229 +    	int min_index, i, j;
  25.230 +    	uint32_t min_count, *l2_table, tmp;
  25.231 +    	uint64_t cluster_offset;
  25.232 +    
  25.233 +    	l1_index = (offset >> 9) / prv->l1_entry_sectors;
  25.234 +    	if (l1_index >= prv->l1_size)
  25.235 +        	return 0;
  25.236 +    	l2_offset = prv->l1_table[l1_index];
  25.237 +    	if (!l2_offset)
  25.238 +        	return 0;
  25.239 +    	for (i = 0; i < L2_CACHE_SIZE; i++) {
  25.240 +        	if (l2_offset == prv->l2_cache_offsets[i]) {
  25.241 +            		/* increment the hit count */
  25.242 +            		if (++prv->l2_cache_counts[i] == 0xffffffff) {
  25.243 +	                	for(j = 0; j < L2_CACHE_SIZE; j++) {
  25.244 +	                    		prv->l2_cache_counts[j] >>= 1;
  25.245 +	                	}
  25.246 +            		}
  25.247 +            		l2_table = prv->l2_cache + (i * prv->l2_size);
  25.248 +            		goto found;
  25.249 +        	}
  25.250 +    	}
  25.251 +    	/* not found: load a new entry in the least used one */
  25.252 +    	min_index = 0;
  25.253 +    	min_count = 0xffffffff;
  25.254 +    	for (i = 0; i < L2_CACHE_SIZE; i++) {
  25.255 +        	if (prv->l2_cache_counts[i] < min_count) {
  25.256 +            		min_count = prv->l2_cache_counts[i];
  25.257 +            		min_index = i;
  25.258 +        	}
  25.259 +    	}
  25.260 +    	l2_table = prv->l2_cache + (min_index * prv->l2_size);
  25.261 +    	lseek(prv->fd, (int64_t)l2_offset * 512, SEEK_SET);
  25.262 +    	if (read(prv->fd, l2_table, prv->l2_size * sizeof(uint32_t)) != 
  25.263 +        	 prv->l2_size * sizeof(uint32_t))
  25.264 +        	return 0;
  25.265 +    	prv->l2_cache_offsets[min_index] = l2_offset;
  25.266 +    	prv->l2_cache_counts[min_index] = 1;
  25.267 + found:
  25.268 +    	l2_index = ((offset >> 9) / prv->cluster_sectors) % prv->l2_size;
  25.269 +    	cluster_offset = le32_to_cpu(l2_table[l2_index]);
  25.270 +    	if (!cluster_offset) {
  25.271 +        	if (!allocate)
  25.272 +            		return 0;
  25.273 +        	cluster_offset = lseek(prv->fd, 0, SEEK_END);
  25.274 +        	ftruncate(prv->fd, cluster_offset + 
  25.275 +			  (prv->cluster_sectors << 9));
  25.276 +        	cluster_offset >>= 9;
  25.277 +        	/* update L2 table */
  25.278 +        	tmp = cpu_to_le32(cluster_offset);
  25.279 +        	l2_table[l2_index] = tmp;
  25.280 +        	lseek(prv->fd, ((int64_t)l2_offset * 512) + 
  25.281 +        	      (l2_index * sizeof(tmp)), SEEK_SET);
  25.282 +        	if (write(prv->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
  25.283 +            		return 0;
  25.284 +        	/* update backup L2 table */
  25.285 +        	if (prv->l1_backup_table_offset != 0) {
  25.286 +            		l2_offset = prv->l1_backup_table[l1_index];
  25.287 +            	lseek(prv->fd, ((int64_t)l2_offset * 512) + 
  25.288 +            		(l2_index * sizeof(tmp)), SEEK_SET);
  25.289 +            	if (write(prv->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
  25.290 +                	return 0;
  25.291 +        	}
  25.292 +    	}
  25.293 +    	cluster_offset <<= 9;
  25.294 +    	return cluster_offset;
  25.295 +}
  25.296 +
  25.297 +static int tdvmdk_queue_read(struct td_state *s, uint64_t sector,
  25.298 +			       int nb_sectors, char *buf, td_callback_t cb,
  25.299 +			       int id, void *private)
  25.300 +{
  25.301 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
  25.302 +    	int index_in_cluster, n;
  25.303 +    	uint64_t cluster_offset;
  25.304 +    	int ret = 0;
  25.305 +    	while (nb_sectors > 0) {
  25.306 +        	cluster_offset = get_cluster_offset(s, sector << 9, 0);
  25.307 +        	index_in_cluster = sector % prv->cluster_sectors;
  25.308 +        	n = prv->cluster_sectors - index_in_cluster;
  25.309 +        	if (n > nb_sectors)
  25.310 +            		n = nb_sectors;
  25.311 +        	if (!cluster_offset) {
  25.312 +            		memset(buf, 0, 512 * n);
  25.313 +        	} else {
  25.314 +            		lseek(prv->fd, cluster_offset + index_in_cluster * 512,
  25.315 +            	      	      SEEK_SET);
  25.316 +            		ret = read(prv->fd, buf, n * 512);
  25.317 +            		if (ret != n * 512) {
  25.318 +                		ret = -1;
  25.319 +                		goto done;
  25.320 +            		}
  25.321 +        	}
  25.322 +        	nb_sectors -= n;
  25.323 +        	sector     += n;
  25.324 +        	buf += n * 512;
  25.325 +    	}
  25.326 +done:
  25.327 +	cb(s, ret == -1 ? -1 : 0, id, private);
  25.328 +	
  25.329 +	return 1;
  25.330 +}
  25.331 +
  25.332 +static  int tdvmdk_queue_write(struct td_state *s, uint64_t sector,
  25.333 +			       int nb_sectors, char *buf, td_callback_t cb,
  25.334 +			       int id, void *private)
  25.335 +{
  25.336 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
  25.337 +    	int index_in_cluster, n;
  25.338 +    	uint64_t cluster_offset;
  25.339 +    	int ret = 0;
  25.340 +    	
  25.341 +
  25.342 +    	while (nb_sectors > 0) {
  25.343 +        	index_in_cluster = sector & (prv->cluster_sectors - 1);
  25.344 +        	n = prv->cluster_sectors - index_in_cluster;
  25.345 +        	if (n > nb_sectors)
  25.346 +            		n = nb_sectors;
  25.347 +        	cluster_offset = get_cluster_offset(s, sector << 9, 1);
  25.348 +        	if (!cluster_offset) {
  25.349 +            		ret = -1;
  25.350 +            		goto done;
  25.351 +        	}
  25.352 +        	lseek(prv->fd, cluster_offset + index_in_cluster * 512, 
  25.353 +        	      SEEK_SET);
  25.354 +        	ret = write(prv->fd, buf, n * 512);
  25.355 +        	if (ret != n * 512) {
  25.356 +            		ret = -1;
  25.357 +            		goto done;
  25.358 +        	}
  25.359 +        	nb_sectors -= n;
  25.360 +        	sector     += n;
  25.361 +        	buf += n * 512;
  25.362 +    	}
  25.363 +done:
  25.364 +	cb(s, ret == -1 ? -1 : 0, id, private);
  25.365 +	
  25.366 +	return 1;
  25.367 +}
  25.368 + 		
  25.369 +static int tdvmdk_submit(struct td_state *s)
  25.370 +{
  25.371 +	return 0;	
  25.372 +}
  25.373 +
  25.374 +
  25.375 +static int *tdvmdk_get_fd(struct td_state *s)
  25.376 +{
  25.377 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
  25.378 +        int *fds, i;
  25.379 +
  25.380 +        fds = malloc(sizeof(int) * MAX_IOFD);
  25.381 +        /*initialise the FD array*/
  25.382 +        for (i=0;i<MAX_IOFD;i++) fds[i] = 0;
  25.383 +
  25.384 +        fds[0] = prv->poll_pipe[0];
  25.385 +        return fds;
  25.386 +}
  25.387 +
  25.388 +static int tdvmdk_close(struct td_state *s)
  25.389 +{
  25.390 +	struct tdvmdk_state *prv = (struct tdvmdk_state *)s->private;
  25.391 +	
  25.392 +    	safer_free(prv->l1_table);
  25.393 +    	safer_free(prv->l1_backup_table);
  25.394 +    	safer_free(prv->l2_cache);
  25.395 +    	close(prv->fd);
  25.396 +	close(prv->poll_pipe[0]);
  25.397 +	close(prv->poll_pipe[1]);
  25.398 +	return 0;
  25.399 +}
  25.400 +
  25.401 +static int tdvmdk_do_callbacks(struct td_state *s, int sid)
  25.402 +{
  25.403 +	/* always ask for a kick */
  25.404 +	return 1;
  25.405 +}
  25.406 +
  25.407 +struct tap_disk tapdisk_vmdk = {
  25.408 +	"tapdisk_vmdk",
  25.409 +	sizeof(struct tdvmdk_state),
  25.410 +	tdvmdk_open,
  25.411 +	tdvmdk_queue_read,
  25.412 +	tdvmdk_queue_write,
  25.413 +	tdvmdk_submit,
  25.414 +	tdvmdk_get_fd,
  25.415 +	tdvmdk_close,
  25.416 +	tdvmdk_do_callbacks,
  25.417 +};
  25.418 +
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/tools/blktap/drivers/bswap.h	Thu Jul 13 10:13:26 2006 +0100
    26.3 @@ -0,0 +1,202 @@
    26.4 +#ifndef BSWAP_H
    26.5 +#define BSWAP_H
    26.6 +
    26.7 +//#include "config-host.h"
    26.8 +
    26.9 +#include <inttypes.h>
   26.10 +
   26.11 +#ifdef HAVE_BYTESWAP_H
   26.12 +#include <byteswap.h>
   26.13 +#else
   26.14 +
   26.15 +#define bswap_16(x) \
   26.16 +({ \
   26.17 +	uint16_t __x = (x); \
   26.18 +	((uint16_t)( \
   26.19 +		(((uint16_t)(__x) & (uint16_t)0x00ffU) << 8) | \
   26.20 +		(((uint16_t)(__x) & (uint16_t)0xff00U) >> 8) )); \
   26.21 +})
   26.22 +
   26.23 +#define bswap_32(x) \
   26.24 +({ \
   26.25 +	uint32_t __x = (x); \
   26.26 +	((uint32_t)( \
   26.27 +		(((uint32_t)(__x) & (uint32_t)0x000000ffUL) << 24) | \
   26.28 +		(((uint32_t)(__x) & (uint32_t)0x0000ff00UL) <<  8) | \
   26.29 +		(((uint32_t)(__x) & (uint32_t)0x00ff0000UL) >>  8) | \
   26.30 +		(((uint32_t)(__x) & (uint32_t)0xff000000UL) >> 24) )); \
   26.31 +})
   26.32 +
   26.33 +#define bswap_64(x) \
   26.34 +({ \
   26.35 +	uint64_t __x = (x); \
   26.36 +	((uint64_t)( \
   26.37 +		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000000000ffULL) << 56) | \
   26.38 +		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000000000ff00ULL) << 40) | \
   26.39 +		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \
   26.40 +		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000ff000000ULL) <<  8) | \
   26.41 +	        (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000ff00000000ULL) >>  8) | \
   26.42 +		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \
   26.43 +		(uint64_t)(((uint64_t)(__x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \
   26.44 +		(uint64_t)(((uint64_t)(__x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \
   26.45 +})
   26.46 +
   26.47 +#endif /* !HAVE_BYTESWAP_H */
   26.48 +
   26.49 +static inline uint16_t bswap16(uint16_t x)
   26.50 +{
   26.51 +    return bswap_16(x);
   26.52 +}
   26.53 +
   26.54 +static inline uint32_t bswap32(uint32_t x) 
   26.55 +{
   26.56 +    return bswap_32(x);
   26.57 +}
   26.58 +
   26.59 +static inline uint64_t bswap64(uint64_t x) 
   26.60 +{
   26.61 +    return bswap_64(x);
   26.62 +}
   26.63 +
   26.64 +static inline void bswap16s(uint16_t *s)
   26.65 +{
   26.66 +    *s = bswap16(*s);
   26.67 +}
   26.68 +
   26.69 +static inline void bswap32s(uint32_t *s)
   26.70 +{
   26.71 +    *s = bswap32(*s);
   26.72 +}
   26.73 +
   26.74 +static inline void bswap64s(uint64_t *s)
   26.75 +{
   26.76 +    *s = bswap64(*s);
   26.77 +}
   26.78 +
   26.79 +#if defined(WORDS_BIGENDIAN)
   26.80 +#define be_bswap(v, size) (v)
   26.81 +#define le_bswap(v, size) bswap ## size(v)
   26.82 +#define be_bswaps(v, size)
   26.83 +#define le_bswaps(p, size) *p = bswap ## size(*p);
   26.84 +#else
   26.85 +#define le_bswap(v, size) (v)
   26.86 +#define be_bswap(v, size) bswap ## size(v)
   26.87 +#define le_bswaps(v, size)
   26.88 +#define be_bswaps(p, size) *p = bswap ## size(*p);
   26.89 +#endif
   26.90 +
   26.91 +#define CPU_CONVERT(endian, size, type)\
   26.92 +static inline type endian ## size ## _to_cpu(type v)\
   26.93 +{\
   26.94 +    return endian ## _bswap(v, size);\
   26.95 +}\
   26.96 +\
   26.97 +static inline type cpu_to_ ## endian ## size(type v)\
   26.98 +{\
   26.99 +    return endian ## _bswap(v, size);\
  26.100 +}\
  26.101 +\
  26.102 +static inline void endian ## size ## _to_cpus(type *p)\
  26.103 +{\
  26.104 +    endian ## _bswaps(p, size)\
  26.105 +}\
  26.106 +\
  26.107 +static inline void cpu_to_ ## endian ## size ## s(type *p)\
  26.108 +{\
  26.109 +    endian ## _bswaps(p, size)\
  26.110 +}\
  26.111 +\
  26.112 +static inline type endian ## size ## _to_cpup(const type *p)\
  26.113 +{\
  26.114 +    return endian ## size ## _to_cpu(*p);\
  26.115 +}\
  26.116 +\
  26.117 +static inline void cpu_to_ ## endian ## size ## w(type *p, type v)\
  26.118 +{\
  26.119 +     *p = cpu_to_ ## endian ## size(v);\
  26.120 +}
  26.121 +
  26.122 +CPU_CONVERT(be, 16, uint16_t)
  26.123 +CPU_CONVERT(be, 32, uint32_t)
  26.124 +CPU_CONVERT(be, 64, uint64_t)
  26.125 +
  26.126 +CPU_CONVERT(le, 16, uint16_t)
  26.127 +CPU_CONVERT(le, 32, uint32_t)
  26.128 +CPU_CONVERT(le, 64, uint64_t)
  26.129 +
  26.130 +/* unaligned versions (optimized for frequent unaligned accesses)*/
  26.131 +
  26.132 +#if defined(__i386__) || defined(__powerpc__)
  26.133 +
  26.134 +#define cpu_to_le16wu(p, v) cpu_to_le16w(p, v)
  26.135 +#define cpu_to_le32wu(p, v) cpu_to_le32w(p, v)
  26.136 +#define le16_to_cpupu(p) le16_to_cpup(p)
  26.137 +#define le32_to_cpupu(p) le32_to_cpup(p)
  26.138 +
  26.139 +#define cpu_to_be16wu(p, v) cpu_to_be16w(p, v)
  26.140 +#define cpu_to_be32wu(p, v) cpu_to_be32w(p, v)
  26.141 +
  26.142 +#else
  26.143 +
  26.144 +static inline void cpu_to_le16wu(uint16_t *p, uint16_t v)
  26.145 +{
  26.146 +    uint8_t *p1 = (uint8_t *)p;
  26.147 +
  26.148 +    p1[0] = v;
  26.149 +    p1[1] = v >> 8;
  26.150 +}
  26.151 +
  26.152 +static inline void cpu_to_le32wu(uint32_t *p, uint32_t v)
  26.153 +{
  26.154 +    uint8_t *p1 = (uint8_t *)p;
  26.155 +
  26.156 +    p1[0] = v;
  26.157 +    p1[1] = v >> 8;
  26.158 +    p1[2] = v >> 16;
  26.159 +    p1[3] = v >> 24;
  26.160 +}
  26.161 +
  26.162 +static inline uint16_t le16_to_cpupu(const uint16_t *p)
  26.163 +{
  26.164 +    const uint8_t *p1 = (const uint8_t *)p;
  26.165 +    return p1[0] | (p1[1] << 8);
  26.166 +}
  26.167 +
  26.168 +static inline uint32_t le32_to_cpupu(const uint32_t *p)
  26.169 +{
  26.170 +    const uint8_t *p1 = (const uint8_t *)p;
  26.171 +    return p1[0] | (p1[1] << 8) | (p1[2] << 16) | (p1[3] << 24);
  26.172 +}
  26.173 +
  26.174 +static inline void cpu_to_be16wu(uint16_t *p, uint16_t v)
  26.175 +{
  26.176 +    uint8_t *p1 = (uint8_t *)p;
  26.177 +
  26.178 +    p1[0] = v >> 8;
  26.179 +    p1[1] = v;
  26.180 +}
  26.181 +
  26.182 +static inline void cpu_to_be32wu(uint32_t *p, uint32_t v)
  26.183 +{
  26.184 +    uint8_t *p1 = (uint8_t *)p;
  26.185 +
  26.186 +    p1[0] = v >> 24;
  26.187 +    p1[1] = v >> 16;
  26.188 +    p1[2] = v >> 8;
  26.189 +    p1[3] = v;
  26.190 +}
  26.191 +
  26.192 +#endif
  26.193 +
  26.194 +#ifdef WORDS_BIGENDIAN
  26.195 +#define cpu_to_32wu cpu_to_be32wu
  26.196 +#else
  26.197 +#define cpu_to_32wu cpu_to_le32wu
  26.198 +#endif
  26.199 +
  26.200 +#undef le_bswap
  26.201 +#undef be_bswap
  26.202 +#undef le_bswaps
  26.203 +#undef be_bswaps
  26.204 +
  26.205 +#endif /* BSWAP_H */
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/tools/blktap/drivers/img2qcow.c	Thu Jul 13 10:13:26 2006 +0100
    27.3 @@ -0,0 +1,289 @@
    27.4 +/* img2qcow.c
    27.5 + *
    27.6 + * Generates a qcow format disk and fills it from an existing image.
    27.7 + *
    27.8 + * (c) 2006 Julian Chesterfield and Andrew Warfield
    27.9 + *
   27.10 + * This program is free software; you can redistribute it and/or
   27.11 + * modify it under the terms of the GNU General Public License version 2
   27.12 + * as published by the Free Software Foundation; or, when distributed
   27.13 + * separately from the Linux kernel or incorporated into other
   27.14 + * software packages, subject to the following license:
   27.15 + *
   27.16 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   27.17 + * of this source file (the "Software"), to deal in the Software without
   27.18 + * restriction, including without limitation the rights to use, copy, modify,
   27.19 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   27.20 + * and to permit persons to whom the Software is furnished to do so, subject to
   27.21 + * the following conditions:
   27.22 + *
   27.23 + * The above copyright notice and this permission notice shall be included in
   27.24 + * all copies or substantial portions of the Software.
   27.25 + *
   27.26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   27.27 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   27.28 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   27.29 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   27.30 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   27.31 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   27.32 + * IN THE SOFTWARE.
   27.33 + */
   27.34 +
   27.35 +#include <errno.h>
   27.36 +#include <fcntl.h>
   27.37 +#include <stdio.h>
   27.38 +#include <stdlib.h>
   27.39 +#include <unistd.h>
   27.40 +#include <sys/statvfs.h>
   27.41 +#include <sys/stat.h>
   27.42 +#include <sys/ioctl.h>
   27.43 +#include <linux/fs.h>
   27.44 +#include <string.h>
   27.45 +#include "tapdisk.h"
   27.46 +
   27.47 +#if 1
   27.48 +#define DFPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a )
   27.49 +#else
   27.50 +#define DFPRINTF(_f, _a...) ((void)0)
   27.51 +#endif
   27.52 +
   27.53 +#define TAPDISK 1
   27.54 +#define BLOCK_PROCESSSZ 4096
   27.55 +
   27.56 +static int maxfds, *io_fd, running = 1, complete = 0;
   27.57 +static int returned_events = 0, submit_events = 0;
   27.58 +static uint64_t prev = 0;
   27.59 +static char output[25];
   27.60 +
   27.61 +void print_bytes(void *ptr, int length) {
   27.62 +
   27.63 +  int i,k;
   27.64 +  unsigned char *p = ptr;
   27.65 +
   27.66 +    DFPRINTF("Buf dump, length %d:\n",length);
   27.67 +    for (k = 0; k < length; k++) {
   27.68 +        DFPRINTF("%x",*p);
   27.69 +        *p++;
   27.70 +	if(k % 16 == 0) DFPRINTF("\n");
   27.71 +        else if(k % 2 == 0) DFPRINTF(" ");	
   27.72 +    }
   27.73 +    DFPRINTF("\n");
   27.74 +    return;
   27.75 +}
   27.76 +
   27.77 +void debug_output(uint64_t progress, uint64_t size)
   27.78 +{
   27.79 +	uint64_t blocks = size/20;
   27.80 +
   27.81 +	/*Output progress every 5% */	
   27.82 +	if (progress/blocks > prev) {
   27.83 +		memcpy(output+prev+1,"=>",2);
   27.84 +		prev++;
   27.85 +		DFPRINTF("\r%s     %llu%%", output, 
   27.86 +			(long long)(prev-1)*5);
   27.87 +	}
   27.88 +	return;
   27.89 +}
   27.90 +
   27.91 +static inline void LOCAL_FD_SET(fd_set *readfds) 
   27.92 +{
   27.93 +	FD_SET(io_fd[0], readfds);
   27.94 +	maxfds = io_fd[0] + 1;
   27.95 +	
   27.96 +	return;
   27.97 +}
   27.98 +
   27.99 +static int get_image_info(struct td_state *s, int fd)
  27.100 +{
  27.101 +	int ret;
  27.102 +	long size;
  27.103 +	unsigned long total_size;
  27.104 +	struct statvfs statBuf;
  27.105 +	struct stat stat;
  27.106 +
  27.107 +	ret = fstat(fd, &stat);
  27.108 +	if (ret != 0) {
  27.109 +		DFPRINTF("ERROR: fstat failed, Couldn't stat image");
  27.110 +		return -EINVAL;
  27.111 +	}
  27.112 +
  27.113 +	if (S_ISBLK(stat.st_mode)) {
  27.114 +		/*Accessing block device directly*/
  27.115 +		s->size = 0;
  27.116 +		if (ioctl(fd,BLKGETSIZE,&s->size)!=0) {
  27.117 +			DFPRINTF("ERR: BLKGETSIZE failed, "
  27.118 +				 "couldn't stat image");
  27.119 +			return -EINVAL;
  27.120 +		}
  27.121 +
  27.122 +		DFPRINTF("Image size: \n\tpre sector_shift  [%llu]\n\tpost "
  27.123 +			"sector_shift [%llu]\n",
  27.124 +			(long long unsigned)(s->size << SECTOR_SHIFT),
  27.125 +			(long long unsigned)s->size);
  27.126 +
  27.127 +		/*Get the sector size*/
  27.128 +#if defined(BLKSSZGET)
  27.129 +		{
  27.130 +			int arg;
  27.131 +			s->sector_size = DEFAULT_SECTOR_SIZE;
  27.132 +			ioctl(fd, BLKSSZGET, &s->sector_size);
  27.133 +			
  27.134 +			if (s->sector_size != DEFAULT_SECTOR_SIZE)
  27.135 +				DFPRINTF("Note: sector size is %ld (not %d)\n",
  27.136 +					s->sector_size, DEFAULT_SECTOR_SIZE);
  27.137 +		}
  27.138 +#else
  27.139 +		s->sector_size = DEFAULT_SECTOR_SIZE;
  27.140 +#endif
  27.141 +
  27.142 +	} else {
  27.143 +		/*Local file? try fstat instead*/
  27.144 +		s->size = (stat.st_size >> SECTOR_SHIFT);
  27.145 +		s->sector_size = DEFAULT_SECTOR_SIZE;
  27.146 +		DFPRINTF("Image size: [%llu]\n",
  27.147 +			(long long unsigned)s->size);
  27.148 +	}
  27.149 +
  27.150 +	return 0;
  27.151 +}
  27.152 +
  27.153 +static int send_responses(struct td_state *s, int res, int idx, void *private)
  27.154 +{
  27.155 +	if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res);
  27.156 +	
  27.157 +	returned_events++;
  27.158 +	
  27.159 +	free(private);
  27.160 +	return 0;
  27.161 +}
  27.162 +
  27.163 +int main(int argc, char *argv[])
  27.164 +{
  27.165 +	struct tap_disk *drv;
  27.166 +	struct td_state *s;
  27.167 +	int ret = -1, fd, len;
  27.168 +	fd_set readfds;
  27.169 +	struct timeval timeout;
  27.170 +	uint64_t i;
  27.171 +	char *buf;
  27.172 +
  27.173 +	if (argc != 3) {
  27.174 +		fprintf(stderr, "Qcow-utils: v1.0.0\n");
  27.175 +		fprintf(stderr, "usage: %s <QCOW FILENAME> <SRC IMAGE>\n", 
  27.176 +			argv[0]);
  27.177 +		exit(-1);
  27.178 +	}
  27.179 +
  27.180 +	s = malloc(sizeof(struct td_state));
  27.181 +	
  27.182 +	/*Open image*/
  27.183 +	fd = open(argv[2], O_RDONLY | O_LARGEFILE);
  27.184 +	
  27.185 +        if (fd == -1) {
  27.186 +                DFPRINTF("Unable to open [%s], (err %d)!\n",argv[2],0 - errno);
  27.187 +                exit(-1);
  27.188 +        }
  27.189 +	
  27.190 +	get_image_info(s, fd);
  27.191 +	
  27.192 +	/*Create qcow file*/
  27.193 +	ret = qcow_create(argv[1],s->size<<SECTOR_SHIFT,NULL,0);
  27.194 +	
  27.195 +	if (ret < 0) {
  27.196 +		DFPRINTF("Unable to create QCOW file\n");
  27.197 +		exit(-1);
  27.198 +	} else DFPRINTF("Qcow file created: size %llu sectors\n",
  27.199 +			(long long unsigned)s->size);
  27.200 +	
  27.201 +	drv = &tapdisk_qcow;
  27.202 +	s->private = malloc(drv->private_data_size);
  27.203 +
  27.204 +        /*Open qcow file*/
  27.205 +        if (drv->td_open(s, argv[1])!=0) {
  27.206 +		DFPRINTF("Unable to open Qcow file [%s]\n",argv[1]);
  27.207 +		exit(-1);
  27.208 +	}
  27.209 +
  27.210 +	io_fd = drv->td_get_fd(s);
  27.211 +
  27.212 +	/*Initialise the output string*/
  27.213 +	memset(output,0x20,25);
  27.214 +	output[0] = '[';
  27.215 +	output[22] = ']';
  27.216 +	output[23] = '\0';
  27.217 +	DFPRINTF("%s",output);
  27.218 +
  27.219 +	i = 0;
  27.220 +	while (running) {
  27.221 +		timeout.tv_sec = 0;
  27.222 +		
  27.223 +		if (!complete) {
  27.224 +			/*Read sector from image*/
  27.225 +			if (lseek(fd, i, SEEK_SET) == (off_t)-1) {
  27.226 +				DFPRINTF("Unable to access file offset %llu\n",
  27.227 +				       (long long)i);
  27.228 +				exit(-1);
  27.229 +			}
  27.230 +			
  27.231 +			if( (ret = posix_memalign((void **)&buf, 
  27.232 +						  BLOCK_PROCESSSZ, 
  27.233 +						  BLOCK_PROCESSSZ)) != 0) {
  27.234 +				DFPRINTF("Unable to read memalign buf (%d)\n",ret);
  27.235 +				exit(-1);				
  27.236 +			}
  27.237 +		
  27.238 +			/*We attempt to read 4k sized blocks*/
  27.239 +			len = read(fd, buf, BLOCK_PROCESSSZ);
  27.240 +			if (len < 512) {
  27.241 +				DFPRINTF("Unable to read sector %llu\n",
  27.242 +				       (long long unsigned) (i >> 9));
  27.243 +				complete = 1;
  27.244 +				continue;
  27.245 +			}
  27.246 +			
  27.247 +			if (len % 512) {
  27.248 +				len = (len >> 9) << 9;
  27.249 +			}
  27.250 +
  27.251 +			ret = drv->td_queue_write(s, i >> 9,
  27.252 +						  len >> 9, buf, 
  27.253 +						  send_responses, 0, buf);
  27.254 +				
  27.255 +			if (!ret) submit_events++;
  27.256 +				
  27.257 +			if (ret < 0) {
  27.258 +				DFPRINTF("UNABLE TO WRITE block [%llu]\n",
  27.259 +				       (long long unsigned) (i >> 9));
  27.260 +			} else i += len;
  27.261 +			
  27.262 +			if (i >> 9 == s->size) complete = 1;
  27.263 +
  27.264 +			debug_output(i,s->size << 9);
  27.265 +			
  27.266 +			if ((submit_events % 10 == 0) || complete) 
  27.267 +				drv->td_submit(s);
  27.268 +			timeout.tv_usec = 0;
  27.269 +			
  27.270 +		} else {
  27.271 +			timeout.tv_usec = 1000;
  27.272 +			if (!submit_events) running = 0;
  27.273 +		}
  27.274 +		
  27.275 +
  27.276 +		/*Check AIO FD*/
  27.277 +		LOCAL_FD_SET(&readfds);
  27.278 +                ret = select(maxfds + 1, &readfds, (fd_set *) 0,
  27.279 +                             (fd_set *) 0, &timeout);
  27.280 +			     
  27.281 +		if (ret > 0) drv->td_do_callbacks(s, 0);
  27.282 +		if (complete && (returned_events == submit_events)) 
  27.283 +			running = 0;
  27.284 +	}
  27.285 +	memcpy(output+prev+1,"=",1);
  27.286 +	DFPRINTF("\r%s     100%%\nTRANSFER COMPLETE\n\n", output);
  27.287 +        drv->td_close(s);
  27.288 +        free(s->private);
  27.289 +        free(s);
  27.290 +		
  27.291 +	return 0;
  27.292 +}
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/tools/blktap/drivers/qcow-create.c	Thu Jul 13 10:13:26 2006 +0100
    28.3 @@ -0,0 +1,80 @@
    28.4 +/* qcow-create.c
    28.5 + *
    28.6 + * Generates a qcow format disk.
    28.7 + *
    28.8 + * (c) 2006 Andrew Warfield and Julian Chesterfield
    28.9 + *
   28.10 + * This program is free software; you can redistribute it and/or
   28.11 + * modify it under the terms of the GNU General Public License version 2
   28.12 + * as published by the Free Software Foundation; or, when distributed
   28.13 + * separately from the Linux kernel or incorporated into other
   28.14 + * software packages, subject to the following license:
   28.15 + *
   28.16 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   28.17 + * of this source file (the "Software"), to deal in the Software without
   28.18 + * restriction, including without limitation the rights to use, copy, modify,
   28.19 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   28.20 + * and to permit persons to whom the Software is furnished to do so, subject to
   28.21 + * the following conditions:
   28.22 + *
   28.23 + * The above copyright notice and this permission notice shall be included in
   28.24 + * all copies or substantial portions of the Software.
   28.25 + *
   28.26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   28.27 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   28.28 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   28.29 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   28.30 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   28.31 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   28.32 + * IN THE SOFTWARE.
   28.33 + */
   28.34 +
   28.35 +#include <errno.h>
   28.36 +#include <fcntl.h>
   28.37 +#include <stdio.h>
   28.38 +#include <stdlib.h>
   28.39 +#include <unistd.h>
   28.40 +#include <sys/statvfs.h>
   28.41 +#include <sys/stat.h>
   28.42 +#include <sys/ioctl.h>
   28.43 +#include <linux/fs.h>
   28.44 +#include <string.h>
   28.45 +#include "tapdisk.h"
   28.46 +
   28.47 +#if 1
   28.48 +#define DFPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a )
   28.49 +#else
   28.50 +#define DFPRINTF(_f, _a...) ((void)0)
   28.51 +#endif
   28.52 +
   28.53 +
   28.54 +int main(int argc, char *argv[])
   28.55 +{
   28.56 +	int ret = -1;
   28.57 +	uint64_t size;
   28.58 +
   28.59 +	if ( (argc < 3) || (argc > 4) ) {
   28.60 +		fprintf(stderr, "Qcow-utils: v1.0.0\n");
   28.61 +		fprintf(stderr, 
   28.62 +			"usage: %s <SIZE(MB)> <FILENAME> "
   28.63 +			"[<BACKING_FILENAME>]\n", 
   28.64 +			argv[0]);
   28.65 +		exit(-1);
   28.66 +	}
   28.67 +
   28.68 +	size = atoi(argv[1]);
   28.69 +	size = size << 20;
   28.70 +	DFPRINTF("Creating file size %llu\n",(long long unsigned)size);
   28.71 +	switch(argc) {
   28.72 +	case 3: 
   28.73 +		ret = qcow_create(argv[2],size,NULL,0);
   28.74 +		break;
   28.75 +	case 4:
   28.76 +		ret = qcow_create(argv[2],size,argv[3],0);
   28.77 +		break;		
   28.78 +	}
   28.79 +	if (ret < 0) DPRINTF("Unable to create QCOW file\n");
   28.80 +	else DPRINTF("QCOW file successfully created\n");
   28.81 +
   28.82 +	return 0;
   28.83 +}
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/tools/blktap/drivers/qcow2raw.c	Thu Jul 13 10:13:26 2006 +0100
    29.3 @@ -0,0 +1,346 @@
    29.4 +/* qcow2raw.c
    29.5 + *
    29.6 + * Generates raw image data from an existing qcow image
    29.7 + *
    29.8 + * (c) 2006 Julian Chesterfield and Andrew Warfield
    29.9 + *
   29.10 + * This program is free software; you can redistribute it and/or
   29.11 + * modify it under the terms of the GNU General Public License version 2
   29.12 + * as published by the Free Software Foundation; or, when distributed
   29.13 + * separately from the Linux kernel or incorporated into other
   29.14 + * software packages, subject to the following license:
   29.15 + *
   29.16 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   29.17 + * of this source file (the "Software"), to deal in the Software without
   29.18 + * restriction, including without limitation the rights to use, copy, modify,
   29.19 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   29.20 + * and to permit persons to whom the Software is furnished to do so, subject to
   29.21 + * the following conditions:
   29.22 + *
   29.23 + * The above copyright notice and this permission notice shall be included in
   29.24 + * all copies or substantial portions of the Software.
   29.25 + *
   29.26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   29.27 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   29.28 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   29.29 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   29.30 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   29.31 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   29.32 + * IN THE SOFTWARE.
   29.33 + */
   29.34 +
   29.35 +#include <errno.h>
   29.36 +#include <fcntl.h>
   29.37 +#include <stdio.h>
   29.38 +#include <stdlib.h>
   29.39 +#include <unistd.h>
   29.40 +#include <sys/statvfs.h>
   29.41 +#include <sys/stat.h>
   29.42 +#include <sys/ioctl.h>
   29.43 +#include <linux/fs.h>
   29.44 +#include <string.h>
   29.45 +#include "tapdisk.h"
   29.46 +
   29.47 +#if 1
   29.48 +#define DFPRINTF(_f, _a...) fprintf ( stderr, _f , ## _a )
   29.49 +#else
   29.50 +#define DFPRINTF(_f, _a...) ((void)0)
   29.51 +#endif
   29.52 +
   29.53 +#define TAPDISK 1
   29.54 +#define BLOCK_PROCESSSZ 4096
   29.55 +
   29.56 +static int maxfds, *qcowio_fd, *aio_fd, running = 1, complete = 0; 
   29.57 +static int read_complete = 0, write_complete = 0;
   29.58 +static int returned_read_events = 0, returned_write_events = 0;
   29.59 +static int submit_events = 0;
   29.60 +static uint32_t read_idx = 0, write_idx = 0;
   29.61 +struct tap_disk *drv1, *drv2;
   29.62 +struct td_state *sqcow, *saio;
   29.63 +static uint64_t prev = 0, written = 0;
   29.64 +static char output[25];
   29.65 +
   29.66 +void print_bytes(void *ptr, int length) {
   29.67 +
   29.68 +  int i,k;
   29.69 +  unsigned char *p = ptr;
   29.70 +
   29.71 +    DFPRINTF("Buf dump, length %d:\n",length);
   29.72 +    for (k = 0; k < length; k++) {
   29.73 +        DFPRINTF("%x",*p);
   29.74 +        *p++;
   29.75 +	if (k % 16 == 0) DFPRINTF("\n");
   29.76 +        else if (k % 2 == 0) DFPRINTF(" ");	
   29.77 +    }
   29.78 +    DFPRINTF("\n");
   29.79 +    return;
   29.80 +}
   29.81 +
   29.82 +void debug_output(uint64_t progress, uint64_t size)
   29.83 +{
   29.84 +	/*Output progress every 5% */	
   29.85 +	uint64_t blocks = size/20;
   29.86 +
   29.87 +	if (progress/blocks > prev) {
   29.88 +		memcpy(output+prev+1,"=>",2);
   29.89 +		prev++;
   29.90 +		DFPRINTF("\r%s     %llu%%", 
   29.91 +			output, (long long)((prev-1)*5));
   29.92 +	}
   29.93 +	return;
   29.94 +}
   29.95 +
   29.96 +static inline void LOCAL_FD_SET(fd_set *readfds) 
   29.97 +{
   29.98 +	FD_SET(qcowio_fd[0], readfds);
   29.99 +	FD_SET(aio_fd[0], readfds);
  29.100 +	
  29.101 +	maxfds = (qcowio_fd[0] > aio_fd[0] ? qcowio_fd[0] : aio_fd[0]) + 1;
  29.102 +	
  29.103 +	return;
  29.104 +}
  29.105 +
  29.106 +static int send_write_responses(struct td_state *s, int res, int idx, void *private)
  29.107 +{
  29.108 +	if (res < 0) {
  29.109 +		DFPRINTF("AIO FAILURE: res [%d]!\n",res);
  29.110 +		return 0;
  29.111 +	}
  29.112 +	written += BLOCK_PROCESSSZ;
  29.113 +	returned_write_events++;
  29.114 +	write_idx = idx;
  29.115 +	if (complete && (returned_write_events == submit_events)) 
  29.116 +		write_complete = 1;
  29.117 +
  29.118 +	debug_output(written, s->size << 9);
  29.119 +	free(private);
  29.120 +	return 0;
  29.121 +}
  29.122 +
  29.123 +static int send_read_responses(struct td_state *s, int res, int idx, void *private)
  29.124 +{
  29.125 +	int ret;
  29.126 +
  29.127 +	if (res < 0) DFPRINTF("AIO FAILURE: res [%d]!\n",res);
  29.128 +	
  29.129 +	returned_read_events++;
  29.130 +	read_idx = idx;
  29.131 +	if (complete && (returned_read_events == submit_events)) 
  29.132 +		read_complete = 1;
  29.133 +	
  29.134 +	ret = drv2->td_queue_write(saio, idx, BLOCK_PROCESSSZ>>9, private, 
  29.135 +				   send_write_responses, idx, private);
  29.136 +	if (ret != 0) {
  29.137 +		DFPRINTF("ERROR in submitting queue write!\n");
  29.138 +		return 0;
  29.139 +	}
  29.140 +
  29.141 +	if ( (complete && returned_read_events == submit_events) || 
  29.142 +	     (returned_read_events % 10 == 0) ) {
  29.143 +		drv2->td_submit(saio);
  29.144 +	}
  29.145 +
  29.146 +	return 0;
  29.147 +}
  29.148 +
  29.149 +int main(int argc, char *argv[])
  29.150 +{
  29.151 +	int ret = -1, fd, len,input;
  29.152 +	long int size;
  29.153 +	fd_set readfds;
  29.154 +	struct timeval timeout;
  29.155 +	uint64_t i;
  29.156 +	char *buf;
  29.157 +	struct stat finfo;
  29.158 +
  29.159 +	if (argc != 3) {
  29.160 +		fprintf(stderr, "Qcow-utils: v1.0.0\n");
  29.161 +		fprintf(stderr, "usage: %s <Dest File descriptor> "
  29.162 +			"<Qcow SRC IMAGE>\n", 
  29.163 +		       argv[0]);
  29.164 +		exit(-1);
  29.165 +	}
  29.166 +
  29.167 +	sqcow = malloc(sizeof(struct td_state));
  29.168 +	saio  = malloc(sizeof(struct td_state));
  29.169 +	
  29.170 +	/*Open qcow source file*/	
  29.171 +	drv1 = &tapdisk_qcow;
  29.172 +	sqcow->private = malloc(drv1->private_data_size);
  29.173 +
  29.174 +        if (drv1->td_open(sqcow, argv[2])!=0) {
  29.175 +		DFPRINTF("Unable to open Qcow file [%s]\n",argv[2]);
  29.176 +		exit(-1);
  29.177 +	} else DFPRINTF("QCOW file opened, size %llu\n",
  29.178 +		      (long long unsigned)sqcow->size);
  29.179 +
  29.180 +	qcowio_fd = drv1->td_get_fd(sqcow);
  29.181 +
  29.182 +        /*Setup aio destination file*/
  29.183 +	ret = stat(argv[1],&finfo);
  29.184 +	if (ret == -1) {
  29.185 +		/*Check errno*/
  29.186 +		switch(errno) {
  29.187 +		case ENOENT:
  29.188 +			/*File doesn't exist, create*/
  29.189 +			fd = open(argv[1], 
  29.190 +				  O_RDWR | O_LARGEFILE | O_CREAT, 0644);
  29.191 +			if (fd < 0) {
  29.192 +				DFPRINTF("ERROR creating file [%s] "
  29.193 +					 "(errno %d)\n",
  29.194 +				       argv[1], 0 - errno);
  29.195 +				exit(-1);
  29.196 +			}
  29.197 +			if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
  29.198 +				DFPRINTF("Unable to create file "
  29.199 +					"[%s] of size %llu (errno %d). "
  29.200 +					 "Exiting...\n",
  29.201 +					argv[1], 
  29.202 +					(long long unsigned)sqcow->size<<9, 
  29.203 +					0 - errno);
  29.204 +				close(fd);
  29.205 +				exit(-1);
  29.206 +			}
  29.207 +			close(fd);
  29.208 +			break;
  29.209 +		case  ENXIO:
  29.210 +			DFPRINTF("ERROR Device [%s] does not exist\n",argv[1]);
  29.211 +			exit(-1);
  29.212 +		default: 
  29.213 +			DFPRINTF("An error occurred opening Device [%s] "
  29.214 +				 "(errno %d)\n",
  29.215 +			       argv[1], 0 - errno);
  29.216 +			exit(-1);
  29.217 +		}
  29.218 +	} else {		
  29.219 +		fprintf(stderr, "WARNING: All existing data in "
  29.220 +			"%s will be overwritten.\nDo you wish to continue? "
  29.221 +			"(y or n)  ",
  29.222 +			argv[1]);
  29.223 +		if (getchar() != 'y') {
  29.224 +			DFPRINTF("Exiting...\n");
  29.225 +			exit(-1);
  29.226 +		}
  29.227 +		
  29.228 +		/*TODO - Test the existing file or device for adequate space*/
  29.229 +		fd = open(argv[1], O_RDWR | O_LARGEFILE);
  29.230 +		if (fd < 0) {
  29.231 +			DFPRINTF("ERROR: opening file [%s] (errno %d)\n",
  29.232 +			       argv[1], 0 - errno);
  29.233 +			exit(-1);
  29.234 +		}
  29.235 +
  29.236 +		if (S_ISBLK(finfo.st_mode)) {
  29.237 +			if(ioctl(fd,BLKGETSIZE,&size)!=0) {
  29.238 +				DFPRINTF("ERROR: BLKGETSIZE failed, "
  29.239 +					"couldn't stat image [%s]\n", 
  29.240 +					argv[1]);
  29.241 +				close(fd);
  29.242 +				exit(-1);
  29.243 +			}
  29.244 +			if (size < sqcow->size<<9) {
  29.245 +				DFPRINTF("ERROR: Not enough space on device "
  29.246 +					"%s (%lu bytes available, %llu bytes required\n",
  29.247 +					argv[1], size, 
  29.248 +					(long long unsigned)sqcow->size<<9);
  29.249 +				close(fd);
  29.250 +				exit(-1);				
  29.251 +			}
  29.252 +		} else {
  29.253 +			if (ftruncate(fd, (off_t)sqcow->size<<9) < 0) {
  29.254 +				DFPRINTF("Unable to create file "
  29.255 +					"[%s] of size %llu (errno %d). "
  29.256 +					 "Exiting...\n",
  29.257 +					argv[1], 
  29.258 +					(long long unsigned)sqcow->size<<9, 
  29.259 +					 0 - errno);
  29.260 +				close(fd);
  29.261 +				exit(-1);
  29.262 +			} else DFPRINTF("File [%s] truncated to length %llu "
  29.263 +					"(%llu)\n", 
  29.264 +				       argv[1], 
  29.265 +				       (long long unsigned)sqcow->size<<9, 
  29.266 +				       (long long unsigned)sqcow->size);
  29.267 +		}
  29.268 +		close(fd);
  29.269 +	}
  29.270 +
  29.271 +	/*Open aio destination file*/	
  29.272 +	drv2 = &tapdisk_aio;
  29.273 +	saio->private = malloc(drv2->private_data_size);
  29.274 +
  29.275 +        if (drv2->td_open(saio, argv[1])!=0) {
  29.276 +		DFPRINTF("Unable to open Qcow file [%s]\n", argv[1]);
  29.277 +		exit(-1);
  29.278 +	}
  29.279 +
  29.280 +	aio_fd = drv2->td_get_fd(saio);
  29.281 +
  29.282 +	/*Initialise the output string*/
  29.283 +	memset(output,0x20,25);
  29.284 +	output[0] = '[';
  29.285 +	output[22] = ']';
  29.286 +	output[23] = '\0';
  29.287 +	DFPRINTF("%s",output);
  29.288 +
  29.289 +	i = 0;
  29.290 +	while (running) {
  29.291 +		timeout.tv_sec = 0;
  29.292 +		
  29.293 +		if (!complete) {
  29.294 +			/*Read Pages from qcow image*/
  29.295 +			if ( (ret = posix_memalign((void **)&buf, 
  29.296 +						   BLOCK_PROCESSSZ, 
  29.297 +						   BLOCK_PROCESSSZ))
  29.298 +			     != 0) {
  29.299 +				DFPRINTF("Unable to alloc memory (%d)\n",ret);
  29.300 +				exit(-1);				
  29.301 +			}
  29.302 +		
  29.303 +			/*Attempt to read 4k sized blocks*/
  29.304 +			ret = drv1->td_queue_read(sqcow, i>>9,
  29.305 +						  BLOCK_PROCESSSZ>>9, buf, 
  29.306 +						  send_read_responses, i>>9, buf);
  29.307 +
  29.308 +			if (ret < 0) {
  29.309 +				DFPRINTF("UNABLE TO READ block [%llu]\n",
  29.310 +				       (long long unsigned)i);
  29.311 +				exit(-1);
  29.312 +			} else {
  29.313 +				i += BLOCK_PROCESSSZ;
  29.314 +				submit_events++;
  29.315 +			}
  29.316 +
  29.317 +			if (i >= sqcow->size<<9) {
  29.318 +				complete = 1;
  29.319 +			}
  29.320 +			
  29.321 +			if ((submit_events % 10 == 0) || complete) 
  29.322 +				drv1->td_submit(sqcow);
  29.323 +			timeout.tv_usec = 0;
  29.324 +			
  29.325 +		} else {
  29.326 +			timeout.tv_usec = 1000;
  29.327 +			if (!submit_events) running = 0;
  29.328 +		}
  29.329 +		
  29.330 +
  29.331 +		/*Check AIO FD*/
  29.332 +		LOCAL_FD_SET(&readfds);
  29.333 +                ret = select(maxfds + 1, &readfds, (fd_set *) 0,
  29.334 +                             (fd_set *) 0, &timeout);
  29.335 +			     
  29.336 +		if (ret > 0) {
  29.337 +			if (FD_ISSET(qcowio_fd[0], &readfds)) 
  29.338 +				drv1->td_do_callbacks(sqcow, 0);
  29.339 +			if (FD_ISSET(aio_fd[0], &readfds)) 
  29.340 +				drv2->td_do_callbacks(saio, 0);
  29.341 +		}
  29.342 +		if (complete && (returned_write_events == submit_events)) 
  29.343 +			running = 0;
  29.344 +	}
  29.345 +	memcpy(output+prev+1,"=",1);
  29.346 +	DFPRINTF("\r%s     100%%\nTRANSFER COMPLETE\n\n", output);
  29.347 +		
  29.348 +	return 0;
  29.349 +}
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/tools/blktap/drivers/tapdisk.c	Thu Jul 13 10:13:26 2006 +0100
    30.3 @@ -0,0 +1,671 @@
    30.4 +/* tapdisk.c
    30.5 + *
    30.6 + * separate disk process, spawned by blktapctrl. Inherits code from driver 
    30.7 + * plugins
    30.8 + * 
    30.9 + * Copyright (c) 2005 Julian Chesterfield and Andrew Warfield.
   30.10 + *
   30.11 + */
   30.12 +
   30.13 +#define MSG_SIZE 4096
   30.14 +#define TAPDISK
   30.15 +
   30.16 +#include <stdio.h>
   30.17 +#include <stdlib.h>
   30.18 +#include <sys/mman.h>
   30.19 +#include <fcntl.h>
   30.20 +#include <string.h>
   30.21 +#include <signal.h>
   30.22 +#include <sys/stat.h>
   30.23 +#include <sys/types.h>
   30.24 +#include <sys/poll.h>
   30.25 +#include <unistd.h>
   30.26 +#include <errno.h>
   30.27 +#include <pthread.h>
   30.28 +#include <time.h>
   30.29 +#include <err.h>
   30.30 +#include <poll.h>
   30.31 +#include <sys/statvfs.h>
   30.32 +#include <sys/ioctl.h>
   30.33 +#include <linux/fs.h>
   30.34 +#include "blktaplib.h"
   30.35 +#include "tapdisk.h"
   30.36 +
   30.37 +#if 1                                                                        
   30.38 +#define ASSERT(_p) \
   30.39 +    if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
   30.40 +    __LINE__, __FILE__); *(int*)0=0; }
   30.41 +#else
   30.42 +#define ASSERT(_p) ((void)0)
   30.43 +#endif 
   30.44 +
   30.45 +#define INPUT 0
   30.46 +#define OUTPUT 1
   30.47 +
   30.48 +static int maxfds, fds[2], run = 1;
   30.49 +
   30.50 +static pid_t process;
   30.51 +int connected_disks = 0;
   30.52 +fd_list_entry_t *fd_start = NULL;
   30.53 +
   30.54 +void usage(void) 
   30.55 +{
   30.56 +	fprintf(stderr, "blktap-utils: v1.0.0\n");
   30.57 +	fprintf(stderr, "usage: tapdisk <READ fifo> <WRITE fifo>\n");
   30.58 +        exit(-1);
   30.59 +}
   30.60 +
   30.61 +void daemonize(void)
   30.62 +{
   30.63 +	int i;
   30.64 +
   30.65 +	if (getppid()==1) return; /* already a daemon */
   30.66 +	if (fork() != 0) exit(0);
   30.67 +
   30.68 +#if 0
   30.69 +	/*Set new program session ID and close all descriptors*/
   30.70 +	setsid();
   30.71 +	for (i = getdtablesize(); i >= 0; --i) close(i);
   30.72 +
   30.73 +	/*Send all I/O to /dev/null */
   30.74 +	i = open("/dev/null",O_RDWR);
   30.75 +	dup(i); 
   30.76 +	dup(i);
   30.77 +#endif
   30.78 +	return;
   30.79 +}
   30.80 +
   30.81 +static void unmap_disk(struct td_state *s)
   30.82 +{
   30.83 +	tapdev_info_t *info = s->ring_info;
   30.84 +	struct tap_disk *drv = s->drv;
   30.85 +	fd_list_entry_t *ptr, *prev;
   30.86 +
   30.87 +	drv->td_close(s);
   30.88 +
   30.89 +	if (info != NULL && info->mem > 0) 
   30.90 +	        munmap(info->mem, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE);
   30.91 +
   30.92 +	ptr = s->fd_entry;
   30.93 +	prev = ptr->prev;
   30.94 +
   30.95 +	if (prev) {
   30.96 +		/*There are entries earlier in the list*/
   30.97 +		prev->next = ptr->next;
   30.98 +		if (ptr->next) {
   30.99 +			ptr = ptr->next;
  30.100 +			ptr->prev = prev;
  30.101 +		}
  30.102 +	} else {
  30.103 +		/*We are the first entry in list*/
  30.104 +		if (ptr->next) {
  30.105 +			ptr = ptr->next;
  30.106 +			fd_start = ptr;
  30.107 +			ptr->prev = NULL;
  30.108 +		} else fd_start = NULL;
  30.109 +	}
  30.110 +
  30.111 +	close(info->fd);
  30.112 +
  30.113 +	free(s->fd_entry);
  30.114 +	free(s->blkif);
  30.115 +	free(s->ring_info);
  30.116 +	free(s);
  30.117 +
  30.118 +	return;
  30.119 +
  30.120 +}
  30.121 +
  30.122 +void sig_handler(int sig)
  30.123 +{
  30.124 +	/*Received signal to close. If no disks are active, we close app.*/
  30.125 +
  30.126 +	if (connected_disks < 1) run = 0;	
  30.127 +}
  30.128 +
  30.129 +static inline int LOCAL_FD_SET(fd_set *readfds)
  30.130 +{
  30.131 +	fd_list_entry_t *ptr;
  30.132 +	int i;
  30.133 +
  30.134 +	ptr = fd_start;
  30.135 +	while (ptr != NULL) {
  30.136 +		if (ptr->tap_fd) {
  30.137 +			FD_SET(ptr->tap_fd, readfds);
  30.138 +			for (i = 0; i < MAX_IOFD; i++) {
  30.139 +				if (ptr->io_fd[i]) 
  30.140 +					FD_SET(ptr->io_fd[i], readfds);
  30.141 +				maxfds = (ptr->io_fd[i] > maxfds ? 
  30.142 +					  ptr->io_fd[i]: maxfds);
  30.143 +			}
  30.144 +			maxfds = (ptr->tap_fd > maxfds ? ptr->tap_fd: maxfds);
  30.145 +		}
  30.146 +		ptr = ptr->next;
  30.147 +	}
  30.148 +
  30.149 +	return 0;
  30.150 +}
  30.151 +
  30.152 +static inline fd_list_entry_t *add_fd_entry(int tap_fd, int io_fd[MAX_IOFD], struct td_state *s)
  30.153 +{
  30.154 +	fd_list_entry_t *ptr, *last, *entry;
  30.155 +	int i;
  30.156 +	DPRINTF("Adding fd_list_entry\n");
  30.157 +
  30.158 +	/*Add to linked list*/
  30.159 +	s->fd_entry = entry = malloc(sizeof(fd_list_entry_t));
  30.160 +	entry->tap_fd = tap_fd;
  30.161 +	for (i = 0; i < MAX_IOFD; i++) entry->io_fd[i] = io_fd[i];
  30.162 +	entry->s = s;
  30.163 +	entry->next = NULL;
  30.164 +
  30.165 +	ptr = fd_start;
  30.166 +	if (ptr == NULL) {
  30.167 +		/*We are the first entry*/
  30.168 +		fd_start = entry;
  30.169 +		entry->prev = NULL;
  30.170 +		goto finish;
  30.171 +	}
  30.172 +
  30.173 +	while (ptr != NULL) {
  30.174 +		last = ptr;
  30.175 +		ptr = ptr->next;
  30.176 +	}
  30.177 +	last->next = entry;
  30.178 +	entry->prev = last;
  30.179 +
  30.180 + finish:
  30.181 +	return entry;
  30.182 +}
  30.183 +
  30.184 +static inline struct td_state *get_state(int cookie)
  30.185 +{
  30.186 +	fd_list_entry_t *ptr;
  30.187 +
  30.188 +	ptr = fd_start;
  30.189 +	while (ptr != NULL) {
  30.190 +		if (ptr->cookie == cookie) return ptr->s;
  30.191 +		ptr = ptr->next;
  30.192 +	}
  30.193 +	return NULL;
  30.194 +}
  30.195 +
  30.196 +static struct tap_disk *get_driver(int drivertype)
  30.197 +{
  30.198 +	/* blktapctrl has passed us the driver type */
  30.199 +	
  30.200 +	return dtypes[drivertype]->drv;
  30.201 +}
  30.202 +
  30.203 +static struct td_state *state_init(void)
  30.204 +{
  30.205 +	int i;
  30.206 +	struct td_state *s;
  30.207 +	blkif_t *blkif;
  30.208 +
  30.209 +	s = malloc(sizeof(struct td_state));
  30.210 +	blkif = s->blkif = malloc(sizeof(blkif_t));
  30.211 +	s->ring_info = malloc(sizeof(tapdev_info_t));
  30.212 +
  30.213 +	for (i = 0; i < MAX_REQUESTS; i++)
  30.214 +		blkif->pending_list[i].count = 0;
  30.215 +
  30.216 +	return s;
  30.217 +}
  30.218 +
  30.219 +static int map_new_dev(struct td_state *s, int minor)
  30.220 +{
  30.221 +	int tap_fd;
  30.222 +	tapdev_info_t *info = s->ring_info;
  30.223 +	char *devname;
  30.224 +	fd_list_entry_t *ptr;
  30.225 +
  30.226 +	asprintf(&devname,"%s/%s%d", BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor);
  30.227 +	tap_fd = open(devname, O_RDWR);
  30.228 +	if (tap_fd == -1) 
  30.229 +	{
  30.230 +		DPRINTF("open failed on dev %s!",devname);
  30.231 +		goto fail;
  30.232 +	} 
  30.233 +	info->fd = tap_fd;
  30.234 +
  30.235 +	/*Map the shared memory*/
  30.236 +	info->mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
  30.237 +			  PROT_READ | PROT_WRITE, MAP_SHARED, info->fd, 0);
  30.238 +	if ((long int)info->mem == -1) 
  30.239 +	{
  30.240 +		DPRINTF("mmap failed on dev %s!\n",devname);
  30.241 +		goto fail;
  30.242 +	}
  30.243 +
  30.244 +	/* assign the rings to the mapped memory */ 
  30.245 +	info->sring = (blkif_sring_t *)((unsigned long)info->mem);
  30.246 +	BACK_RING_INIT(&info->fe_ring, info->sring, PAGE_SIZE);
  30.247 +	
  30.248 +	info->vstart = 
  30.249 +	        (unsigned long)info->mem + (BLKTAP_RING_PAGES << PAGE_SHIFT);
  30.250 +
  30.251 +	ioctl(info->fd, BLKTAP_IOCTL_SENDPID, process );
  30.252 +	ioctl(info->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
  30.253 +	free(devname);
  30.254 +
  30.255 +	/*Update the fd entry*/
  30.256 +	ptr = fd_start;
  30.257 +	while (ptr != NULL) {
  30.258 +		if (s == ptr->s) {
  30.259 +			ptr->tap_fd = tap_fd;
  30.260 +			break;
  30.261 +		}
  30.262 +		ptr = ptr->next;
  30.263 +	}	
  30.264 +
  30.265 +	return minor;
  30.266 +
  30.267 + fail:
  30.268 +	free(devname);
  30.269 +	return -1;
  30.270 +}
  30.271 +
  30.272 +static int read_msg(char *buf)
  30.273 +{
  30.274 +	int length, len, msglen, tap_fd, *io_fd;
  30.275 +	char *ptr, *path;
  30.276 +	image_t *img;
  30.277 +	struct timeval timeout;
  30.278 +	msg_hdr_t *msg;
  30.279 +	msg_newdev_t *msg_dev;
  30.280 +	msg_pid_t *msg_pid;
  30.281 +	struct tap_disk *drv;
  30.282 +	int ret = -1;
  30.283 +	struct td_state *s = NULL;
  30.284 +	fd_list_entry_t *entry;
  30.285 +
  30.286 +	length = read(fds[READ], buf, MSG_SIZE);
  30.287 +
  30.288 +	if (length > 0 && length >= sizeof(msg_hdr_t)) 
  30.289 +	{
  30.290 +		msg = (msg_hdr_t *)buf;
  30.291 +		DPRINTF("Tapdisk: Received msg, len %d, type %d, UID %d\n",
  30.292 +			length,msg->type,msg->cookie);
  30.293 +
  30.294 +		switch (msg->type) {
  30.295 +		case CTLMSG_PARAMS: 			
  30.296 +			ptr = buf + sizeof(msg_hdr_t);
  30.297 +			len = (length - sizeof(msg_hdr_t));
  30.298 +			path = calloc(1, len);
  30.299 +			
  30.300 +			memcpy(path, ptr, len); 
  30.301 +			DPRINTF("Received CTLMSG_PARAMS: [%s]\n", path);
  30.302 +
  30.303 +			/*Assign driver*/
  30.304 +			drv = get_driver(msg->drivertype);
  30.305 +			if (drv == NULL)
  30.306 +				goto params_done;
  30.307 +				
  30.308 +			DPRINTF("Loaded driver: name [%s], type [%d]\n",
  30.309 +				drv->disk_type, msg->drivertype);
  30.310 +
  30.311 +			/* Allocate the disk structs */
  30.312 +			s = state_init();
  30.313 +			if (s == NULL)
  30.314 +				goto params_done;
  30.315 +
  30.316 +			s->drv = drv;
  30.317 +			s->private = malloc(drv->private_data_size);
  30.318 +			if (s->private == NULL) {
  30.319 +				free(s);
  30.320 +				goto params_done;
  30.321 +			}
  30.322 +
  30.323 +			/*Open file*/
  30.324 +			ret = drv->td_open(s, path);
  30.325 +			io_fd = drv->td_get_fd(s);
  30.326 +
  30.327 +			entry = add_fd_entry(0, io_fd, s);
  30.328 +			entry->cookie = msg->cookie;
  30.329 +			DPRINTF("Entered cookie %d\n",entry->cookie);
  30.330 +			
  30.331 +			memset(buf, 0x00, MSG_SIZE); 
  30.332 +			
  30.333 +		params_done:
  30.334 +			if (ret == 0) {
  30.335 +				msglen = sizeof(msg_hdr_t) + sizeof(image_t);
  30.336 +				msg->type = CTLMSG_IMG;
  30.337 +				img = (image_t *)(buf + sizeof(msg_hdr_t));
  30.338 +				img->size = s->size;
  30.339 +				img->secsize = s->sector_size;
  30.340 +				img->info = s->info;
  30.341 +			} else {
  30.342 +				msglen = sizeof(msg_hdr_t);
  30.343 +				msg->type = CTLMSG_IMG_FAIL;
  30.344 +				msg->len = msglen;
  30.345 +			}
  30.346 +			len = write(fds[WRITE], buf, msglen);
  30.347 +			free(path);
  30.348 +			return 1;
  30.349 +			
  30.350 +			
  30.351 +			
  30.352 +		case CTLMSG_NEWDEV:
  30.353 +			msg_dev = (msg_newdev_t *)(buf + sizeof(msg_hdr_t));
  30.354 +
  30.355 +			s = get_state(msg->cookie);
  30.356 +			DPRINTF("Retrieving state, cookie %d.....[%s]\n",msg->cookie, (s == NULL ? "FAIL":"OK"));
  30.357 +			if (s != NULL) {
  30.358 +				ret = ((map_new_dev(s, msg_dev->devnum) 
  30.359 +					== msg_dev->devnum ? 0: -1));
  30.360 +				connected_disks++;
  30.361 +			}	
  30.362 +
  30.363 +			memset(buf, 0x00, MSG_SIZE); 
  30.364 +			msglen = sizeof(msg_hdr_t);
  30.365 +			msg->type = (ret == 0 ? CTLMSG_NEWDEV_RSP 
  30.366 +				              : CTLMSG_NEWDEV_FAIL);
  30.367 +			msg->len = msglen;
  30.368 +
  30.369 +			len = write(fds[WRITE], buf, msglen);
  30.370 +			return 1;
  30.371 +
  30.372 +		case CTLMSG_CLOSE:
  30.373 +			s = get_state(msg->cookie);
  30.374 +			if (s) unmap_disk(s);
  30.375 +			
  30.376 +			connected_disks--;
  30.377 +			sig_handler(SIGINT);
  30.378 +
  30.379 +			return 1;			
  30.380 +
  30.381 +		case CTLMSG_PID:
  30.382 +			memset(buf, 0x00, MSG_SIZE);
  30.383 +			msglen = sizeof(msg_hdr_t) + sizeof(msg_pid_t);
  30.384 +			msg->type = CTLMSG_PID_RSP;
  30.385 +			msg->len = msglen;
  30.386 +
  30.387 +			msg_pid = (msg_pid_t *)(buf + sizeof(msg_hdr_t));
  30.388 +			process = getpid();
  30.389 +			msg_pid->pid = process;
  30.390 +
  30.391 +			len = write(fds[WRITE], buf, msglen);
  30.392 +			return 1;
  30.393 +
  30.394 +		default:
  30.395 +			return 0;
  30.396 +		}
  30.397 +	}
  30.398 +	return 0;
  30.399 +}
  30.400 +
  30.401 +static inline int write_rsp_to_ring(struct td_state *s, blkif_response_t *rsp)
  30.402 +{
  30.403 +	tapdev_info_t *info = s->ring_info;
  30.404 +	blkif_response_t *rsp_d;
  30.405 +	
  30.406 +	rsp_d = RING_GET_RESPONSE(&info->fe_ring, info->fe_ring.rsp_prod_pvt);
  30.407 +	memcpy(rsp_d, rsp, sizeof(blkif_response_t));
  30.408 +	wmb();
  30.409 +	info->fe_ring.rsp_prod_pvt++;
  30.410 +	
  30.411 +	return 0;
  30.412 +}
  30.413 +
  30.414 +static inline void kick_responses(struct td_state *s)
  30.415 +{
  30.416 +	tapdev_info_t *info = s->ring_info;
  30.417 +
  30.418 +	if (info->fe_ring.rsp_prod_pvt != info->fe_ring.sring->rsp_prod) 
  30.419 +	{
  30.420 +		RING_PUSH_RESPONSES(&info->fe_ring);
  30.421 +		ioctl(info->fd, BLKTAP_IOCTL_KICK_FE);
  30.422 +	}
  30.423 +}
  30.424 +
  30.425 +void io_done(struct td_state *s, int sid)
  30.426 +{
  30.427 +	struct tap_disk *drv = s->drv;
  30.428 +
  30.429 +	if (!run) return; /*We have received signal to close*/
  30.430 +
  30.431 +	if (drv->td_do_callbacks(s, sid) > 0) kick_responses(s);
  30.432 +
  30.433 +	return;
  30.434 +}
  30.435 +
  30.436 +int send_responses(struct td_state *s, int res, int idx, void *private)
  30.437 +{
  30.438 +	blkif_request_t *req;
  30.439 +	int responses_queued = 0;
  30.440 +	blkif_t *blkif = s->blkif;
  30.441 +
  30.442 +	req   = &blkif->pending_list[idx].req;
  30.443 +			
  30.444 +	if ( (idx > MAX_REQUESTS-1) || 
  30.445 +	    (blkif->pending_list[idx].count == 0) )
  30.446 +	{
  30.447 +		DPRINTF("invalid index returned(%u)!\n", idx);
  30.448 +		return 0;
  30.449 +	}
  30.450 +	
  30.451 +	if (res != 0) {
  30.452 +		DPRINTF("*** request error %d! \n", res);
  30.453 +		return 0;
  30.454 +	}
  30.455 +
  30.456 +	blkif->pending_list[idx].count--;
  30.457 +	
  30.458 +	if (blkif->pending_list[idx].count == 0) 
  30.459 +	{
  30.460 +		blkif_request_t tmp;
  30.461 +		blkif_response_t *rsp;
  30.462 +		
  30.463 +		tmp = blkif->pending_list[idx].req;
  30.464 +		rsp = (blkif_response_t *)req;
  30.465 +		
  30.466 +		rsp->id = tmp.id;
  30.467 +		rsp->operation = tmp.operation;
  30.468 +		rsp->status = blkif->pending_list[idx].status;
  30.469 +		
  30.470 +		write_rsp_to_ring(s, rsp);
  30.471 +		responses_queued++;
  30.472 +	}
  30.473 +	return responses_queued;
  30.474 +}
  30.475 +
  30.476 +static void get_io_request(struct td_state *s)
  30.477 +{
  30.478 +	RING_IDX          rp, rc, j, i, ret;
  30.479 +	blkif_request_t  *req;
  30.480 +	int idx, nsects;
  30.481 +	uint64_t sector_nr;
  30.482 +	char *page;
  30.483 +	int early = 0; /* count early completions */
  30.484 +	struct tap_disk *drv = s->drv;
  30.485 +	blkif_t *blkif = s->blkif;
  30.486 +	tapdev_info_t *info = s->ring_info;
  30.487 +
  30.488 +	if (!run) return; /*We have received signal to close*/
  30.489 +
  30.490 +	rp = info->fe_ring.sring->req_prod; 
  30.491 +	rmb();
  30.492 +	for (j = info->fe_ring.req_cons; j != rp; j++)
  30.493 +	{
  30.494 +		int done = 0; 
  30.495 +
  30.496 +		req = NULL;
  30.497 +		req = RING_GET_REQUEST(&info->fe_ring, j);
  30.498 +		++info->fe_ring.req_cons;
  30.499 +		
  30.500 +		if (req == NULL) continue;
  30.501 +		
  30.502 +		idx = req->id;
  30.503 +		ASSERT(blkif->pending_list[idx].count == 0);
  30.504 +		memcpy(&blkif->pending_list[idx].req, req, sizeof(*req));
  30.505 +		blkif->pending_list[idx].status = BLKIF_RSP_OKAY;
  30.506 +		blkif->pending_list[idx].count = req->nr_segments;
  30.507 +
  30.508 +		sector_nr = req->sector_number;
  30.509 +
  30.510 +		for (i = 0; i < req->nr_segments; i++) {
  30.511 +			nsects = req->seg[i].last_sect - 
  30.512 +				 req->seg[i].first_sect + 1;
  30.513 +	
  30.514 +			if ((req->seg[i].last_sect >= PAGE_SIZE >> 9) ||
  30.515 +			    (nsects <= 0))
  30.516 +				continue;
  30.517 +
  30.518 +			page  = (char *)MMAP_VADDR(info->vstart, 
  30.519 +						   (unsigned long)req->id, i);
  30.520 +			page += (req->seg[i].first_sect << SECTOR_SHIFT);
  30.521 +
  30.522 +			if (sector_nr >= s->size) {
  30.523 +				DPRINTF("Sector request failed:\n");
  30.524 +				DPRINTF("%s request, idx [%d,%d] size [%llu], "
  30.525 +					"sector [%llu,%llu]\n",
  30.526 +					(req->operation == BLKIF_OP_WRITE ? 
  30.527 +					 "WRITE" : "READ"),
  30.528 +					idx,i,
  30.529 +					(long long unsigned) 
  30.530 +						nsects<<SECTOR_SHIFT,
  30.531 +					(long long unsigned) 
  30.532 +						sector_nr<<SECTOR_SHIFT,
  30.533 +					(long long unsigned) sector_nr);
  30.534 +				continue;
  30.535 +			}
  30.536 +			
  30.537 +			switch (req->operation) 
  30.538 +			{
  30.539 +			case BLKIF_OP_WRITE:
  30.540 +				ret = drv->td_queue_write(s, sector_nr,
  30.541 +						nsects, page, send_responses, 
  30.542 +						idx, NULL);
  30.543 +				if (ret > 0) early += ret;
  30.544 +				else if (ret == -EBUSY) {
  30.545 +					/*
  30.546 +					 * TODO: Sector is locked         *
  30.547 +					 * Need to put req back on queue  *
  30.548 +					 */
  30.549 +				}
  30.550 +				break;
  30.551 +			case BLKIF_OP_READ:
  30.552 +				ret = drv->td_queue_read(s, sector_nr,
  30.553 +						nsects, page, send_responses, 
  30.554 +						idx, NULL);
  30.555 +				if (ret > 0) early += ret;
  30.556 +				else if (ret == -EBUSY) {
  30.557 +					/*
  30.558 +					 * TODO: Sector is locked         *
  30.559 +					 * Need to put req back on queue  *
  30.560 +					 */
  30.561 +				}
  30.562 +				break;
  30.563 +			default:
  30.564 +				DPRINTF("Unknown block operation\n");
  30.565 +				break;
  30.566 +			}
  30.567 +			sector_nr += nsects;
  30.568 +		}
  30.569 +	}
  30.570 +
  30.571 +	/*Batch done*/
  30.572 +	drv->td_submit(s);
  30.573 +	
  30.574 +	if (early > 0) 
  30.575 +		io_done(s,10);
  30.576 +		
  30.577 +	return;
  30.578 +}
  30.579 +
  30.580 +int main(int argc, char *argv[])
  30.581 +{
  30.582 +	int len, msglen, ret, i;
  30.583 +	char *p, *buf;
  30.584 +	fd_set readfds, writefds;
  30.585 +	struct timeval timeout;
  30.586 +	fd_list_entry_t *ptr;
  30.587 +	struct tap_disk *drv;
  30.588 +	struct td_state *s;
  30.589 +	
  30.590 +	if (argc != 3) usage();
  30.591 +
  30.592 +	daemonize();
  30.593 +
  30.594 +	openlog("TAPDISK", LOG_CONS|LOG_ODELAY, LOG_DAEMON);
  30.595 +	/*Setup signal handlers*/
  30.596 +	signal (SIGBUS, sig_handler);
  30.597 +	signal (SIGINT, sig_handler);
  30.598 +
  30.599 +	/*Open the control channel*/
  30.600 +	fds[READ] = open(argv[1],O_RDWR|O_NONBLOCK);
  30.601 +	fds[WRITE] = open(argv[2],O_RDWR|O_NONBLOCK);
  30.602 +
  30.603 +	if ( (fds[READ] < 0) || (fds[WRITE] < 0) ) 
  30.604 +	{
  30.605 +		DPRINTF("FD open failed [%d,%d]\n",fds[READ], fds[WRITE]);
  30.606 +		exit(-1);
  30.607 +	}
  30.608 +
  30.609 +	buf = calloc(MSG_SIZE, 1);
  30.610 +
  30.611 +	if (buf == NULL) 
  30.612 +        {
  30.613 +		DPRINTF("ERROR: allocating memory.\n");
  30.614 +		exit(-1);
  30.615 +	}
  30.616 +
  30.617 +	while (run) 
  30.618 +        {
  30.619 +		ret = 0;
  30.620 +		FD_ZERO(&readfds);
  30.621 +		FD_SET(fds[READ], &readfds);
  30.622 +		maxfds = fds[READ];
  30.623 +
  30.624 +		/*Set all tap fds*/
  30.625 +		LOCAL_FD_SET(&readfds);
  30.626 +
  30.627 +		timeout.tv_sec = 0; 
  30.628 +		timeout.tv_usec = 1000; 
  30.629 +
  30.630 +		/*Wait for incoming messages*/
  30.631 +		ret = select(maxfds + 1, &readfds, (fd_set *) 0, 
  30.632 +			     (fd_set *) 0, &timeout);
  30.633 +
  30.634 +		if (ret > 0) 
  30.635 +                {
  30.636 +			ptr = fd_start;
  30.637 +			while (ptr != NULL) {
  30.638 +				if (FD_ISSET(ptr->tap_fd, &readfds)) 
  30.639 +					get_io_request(ptr->s);
  30.640 +				for (i = 0; i < MAX_IOFD; i++) {
  30.641 +					if (ptr->io_fd[i] && 
  30.642 +					   FD_ISSET(ptr->io_fd[i], &readfds)) 
  30.643 +						io_done(ptr->s, i);
  30.644 +				}
  30.645 +
  30.646 +				ptr = ptr->next;
  30.647 +			}
  30.648 +
  30.649 +			if (FD_ISSET(fds[READ], &readfds))
  30.650 +				read_msg(buf);
  30.651 +		}
  30.652 +	}
  30.653 +	free(buf);
  30.654 +	close(fds[READ]);
  30.655 +	close(fds[WRITE]);
  30.656 +
  30.657 +	ptr = fd_start;
  30.658 +	while (ptr != NULL) {
  30.659 +		s = ptr->s;
  30.660 +		drv = s->drv;
  30.661 +
  30.662 +		unmap_disk(s);
  30.663 +		drv->td_close(s);
  30.664 +		free(s->private);
  30.665 +		free(s->blkif);
  30.666 +		free(s->ring_info);
  30.667 +		free(s);
  30.668 +		close(ptr->tap_fd);
  30.669 +		ptr = ptr->next;
  30.670 +	}
  30.671 +	closelog();
  30.672 +
  30.673 +	return 0;
  30.674 +}
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/tools/blktap/drivers/tapdisk.h	Thu Jul 13 10:13:26 2006 +0100
    31.3 @@ -0,0 +1,211 @@
    31.4 +/* tapdisk.h
    31.5 + *
    31.6 + * Generic disk interface for blktap-based image adapters.
    31.7 + *
    31.8 + * (c) 2006 Andrew Warfield and Julian Chesterfield
    31.9 + * 
   31.10 + * Some notes on the tap_disk interface:
   31.11 + * 
   31.12 + * tap_disk aims to provide a generic interface to easily implement new 
   31.13 + * types of image accessors.  The structure-of-function-calls is similar
   31.14 + * to disk interfaces used in qemu/denali/etc, with the significant 
   31.15 + * difference being the expectation of asynchronous rather than synchronous 
   31.16 + * I/O.  The asynchronous interface is intended to allow lots of requests to
   31.17 + * be pipelined through a disk, without the disk requiring any of its own
   31.18 + * threads of control.  As such, a batch of requests is delivered to the disk
   31.19 + * using:
   31.20 + * 
   31.21 + *    td_queue_[read,write]()
   31.22 + * 
   31.23 + * and passing in a completion callback, which the disk is responsible for 
   31.24 + * tracking.  The end of a back is marked with a call to:
   31.25 + * 
   31.26 + *    td_submit()
   31.27 + * 
   31.28 + * The disk implementation must provide a file handle, which is used to 
   31.29 + * indicate that it needs to do work.  tapdisk will add this file handle 
   31.30 + * (returned from td_get_fd()) to it's poll set, and will call into the disk
   31.31 + * using td_do_callbacks() whenever there is data pending.
   31.32 + * 
   31.33 + * Two disk implementations demonstrate how this interface may be used to 
   31.34 + * implement disks with both asynchronous and synchronous calls.  block-aio.c
   31.35 + * maps this interface down onto the linux libaio calls, while block-sync uses 
   31.36 + * normal posix read/write.
   31.37 + * 
   31.38 + * A few things to realize about the sync case, which doesn't need to defer 
   31.39 + * io completions:
   31.40 + * 
   31.41 + *   - td_queue_[read,write]() call read/write directly, and then call the 
   31.42 + *     callback immediately.  The MUST then return a value greater than 0
   31.43 + *     in order to tell tapdisk that requests have finished early, and to 
   31.44 + *     force responses to be kicked to the clents.
   31.45 + * 
   31.46 + *   - The fd used for poll is an otherwise unused pipe, which allows poll to 
   31.47 + *     be safely called without ever returning anything.
   31.48 + * 
   31.49 + */
   31.50 +
   31.51 +#ifndef TAPDISK_H_
   31.52 +#define TAPDISK_H_
   31.53 +
   31.54 +#include <stdint.h>
   31.55 +#include <syslog.h>
   31.56 +#include "blktaplib.h"
   31.57 +
   31.58 +/*If enabled, log all debug messages to syslog*/
   31.59 +#if 1
   31.60 +#define DPRINTF(_f, _a...) syslog( LOG_DEBUG, _f , ## _a )
   31.61 +#else
   31.62 +#define DPRINTF(_f, _a...) ((void)0)
   31.63 +#endif
   31.64 +
   31.65 +/* Things disks need to know about, these should probably be in a higher-level
   31.66 + * header. */
   31.67 +#define MAX_REQUESTS            64
   31.68 +#define MAX_SEGMENTS_PER_REQ    11
   31.69 +#define SECTOR_SHIFT             9
   31.70 +#define DEFAULT_SECTOR_SIZE    512
   31.71 +
   31.72 +/* This structure represents the state of an active virtual disk.           */
   31.73 +struct td_state {
   31.74 +	void *private;
   31.75 +	void *drv;
   31.76 +	void *blkif;
   31.77 +	void *image;
   31.78 +	void *ring_info;
   31.79 +	void *fd_entry;
   31.80 +	char backing_file[1024]; /*Used by differencing disks, e.g. qcow*/
   31.81 +	long int   sector_size;
   31.82 +	uint64_t   size;
   31.83 +	long int   info;
   31.84 +};
   31.85 +
   31.86 +/* Prototype of the callback to activate as requests complete.              */
   31.87 +typedef int (*td_callback_t)(struct td_state *s, int res, int id, void *prv);
   31.88 +
   31.89 +/* Structure describing the interface to a virtual disk implementation.     */
   31.90 +/* See note at the top of this file describing this interface.              */
   31.91 +struct tap_disk {
   31.92 +	const char *disk_type;
   31.93 +	int private_data_size;
   31.94 +	int (*td_open)        (struct td_state *s, const char *name);
   31.95 +	int (*td_queue_read)  (struct td_state *s, uint64_t sector,
   31.96 +			       int nb_sectors, char *buf, td_callback_t cb,
   31.97 +			       int id, void *prv);
   31.98 +	int (*td_queue_write) (struct td_state *s, uint64_t sector,
   31.99 +			       int nb_sectors, char *buf, td_callback_t cb,
  31.100 +			       int id, void *prv);
  31.101 +	int (*td_submit)      (struct td_state *s);
  31.102 +	int *(*td_get_fd)      (struct td_state *s);
  31.103 +	int (*td_close)       (struct td_state *s);
  31.104 +	int (*td_do_callbacks)(struct td_state *s, int sid);
  31.105 +};
  31.106 +
  31.107 +typedef struct disk_info {
  31.108 +	int  idnum;
  31.109 +	char name[50];       /* e.g. "RAMDISK" */
  31.110 +	char handle[10];     /* xend handle, e.g. 'ram' */
  31.111 +	int  single_handler; /* is there a single controller for all */
  31.112 +	                     /* instances of disk type? */
  31.113 +#ifdef TAPDISK
  31.114 +	struct tap_disk *drv;	
  31.115 +#endif
  31.116 +} disk_info_t;
  31.117 +
  31.118 +void debug_fe_ring(struct td_state *s);
  31.119 +
  31.120 +extern struct tap_disk tapdisk_aio;
  31.121 +extern struct tap_disk tapdisk_sync;
  31.122 +extern struct tap_disk tapdisk_vmdk;
  31.123 +extern struct tap_disk tapdisk_ram;
  31.124 +extern struct tap_disk tapdisk_qcow;
  31.125 +
  31.126 +#define MAX_DISK_TYPES  20
  31.127 +#define MAX_IOFD        2
  31.128 +
  31.129 +#define DISK_TYPE_AIO   0
  31.130 +#define DISK_TYPE_SYNC  1
  31.131 +#define DISK_TYPE_VMDK  2
  31.132 +#define DISK_TYPE_RAM   3
  31.133 +#define DISK_TYPE_QCOW  4
  31.134 +
  31.135 +
  31.136 +/*Define Individual Disk Parameters here */
  31.137 +static disk_info_t aio_disk = {
  31.138 +	DISK_TYPE_AIO,
  31.139 +	"raw image (aio)",
  31.140 +	"aio",
  31.141 +	0,
  31.142 +#ifdef TAPDISK
  31.143 +	&tapdisk_aio,
  31.144 +#endif
  31.145 +};
  31.146 +
  31.147 +static disk_info_t sync_disk = {
  31.148 +	DISK_TYPE_SYNC,
  31.149 +	"raw image (sync)",
  31.150 +	"sync",
  31.151 +	0,
  31.152 +#ifdef TAPDISK
  31.153 +	&tapdisk_sync,
  31.154 +#endif
  31.155 +};
  31.156 +
  31.157 +static disk_info_t vmdk_disk = {
  31.158 +	DISK_TYPE_VMDK,
  31.159 +	"vmware image (vmdk)",
  31.160 +	"vmdk",
  31.161 +	1,
  31.162 +#ifdef TAPDISK
  31.163 +	&tapdisk_vmdk,
  31.164 +#endif
  31.165 +};
  31.166 +
  31.167 +static disk_info_t ram_disk = {
  31.168 +	DISK_TYPE_RAM,
  31.169 +	"ramdisk image (ram)",
  31.170 +	"ram",
  31.171 +	1,
  31.172 +#ifdef TAPDISK
  31.173 +	&tapdisk_ram,
  31.174 +#endif
  31.175 +};
  31.176 +
  31.177 +static disk_info_t qcow_disk = {
  31.178 +	DISK_TYPE_QCOW,
  31.179 +	"qcow disk (qcow)",
  31.180 +	"qcow",
  31.181 +	0,
  31.182 +#ifdef TAPDISK
  31.183 +	&tapdisk_qcow,
  31.184 +#endif
  31.185 +};
  31.186 +
  31.187 +/*Main disk info array */
  31.188 +static disk_info_t *dtypes[] = {
  31.189 +	&aio_disk,
  31.190 +	&sync_disk,
  31.191 +	&vmdk_disk,
  31.192 +	&ram_disk,
  31.193 +	&qcow_disk,
  31.194 +};
  31.195 +
  31.196 +typedef struct driver_list_entry {
  31.197 +	void *blkif;
  31.198 +	void *prev;
  31.199 +	void *next;
  31.200 +} driver_list_entry_t;
  31.201 +
  31.202 +typedef struct fd_list_entry {
  31.203 +	int cookie;
  31.204 +	int  tap_fd;
  31.205 +	int  io_fd[MAX_IOFD];
  31.206 +	struct td_state *s;
  31.207 +	void *prev;
  31.208 +	void *next;
  31.209 +} fd_list_entry_t;
  31.210 +
  31.211 +int qcow_create(const char *filename, uint64_t total_size,
  31.212 +		const char *backing_file, int flags);
  31.213 +
  31.214 +#endif /*TAPDISK_H_*/
    32.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.2 +++ b/tools/blktap/lib/Makefile	Thu Jul 13 10:13:26 2006 +0100
    32.3 @@ -0,0 +1,66 @@
    32.4 +XEN_ROOT = ../../..
    32.5 +include $(XEN_ROOT)/tools/Rules.mk
    32.6 +
    32.7 +MAJOR    = 3.0
    32.8 +MINOR    = 0
    32.9 +SONAME   = libblktap.so.$(MAJOR)
   32.10 +
   32.11 +BLKTAP_INSTALL_DIR = /usr/sbin
   32.12 +
   32.13 +INSTALL            = install
   32.14 +INSTALL_PROG       = $(INSTALL) -m0755
   32.15 +INSTALL_DIR        = $(INSTALL) -d -m0755
   32.16 +
   32.17 +INCLUDES += -I. -I.. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
   32.18 +
   32.19 +LIBS     := -lz
   32.20 +
   32.21 +SRCS     :=
   32.22 +SRCS     += xenbus.c blkif.c xs_api.c
   32.23 +
   32.24 +CFLAGS   += -Werror
   32.25 +CFLAGS   += -Wno-unused
   32.26 +CFLAGS   += -fno-strict-aliasing -fPIC
   32.27 +CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
   32.28 +# get asprintf():
   32.29 +CFLAGS   += -D _GNU_SOURCE
   32.30 +
   32.31 +# Get gcc to generate the dependencies for us.
   32.32 +CFLAGS   += -Wp,-MD,.$(@F).d
   32.33 +CFLAGS   += $(INCLUDES) 
   32.34 +DEPS     = .*.d
   32.35 +
   32.36 +OBJS     = $(patsubst %.c,%.o,$(SRCS))
   32.37 +IBINS   :=
   32.38 +
   32.39 +LIB      = libblktap.a libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
   32.40 +
   32.41 +all: build
   32.42 +
   32.43 +build:
   32.44 +	$(MAKE) libblktap
   32.45 +
   32.46 +install: all
   32.47 +	$(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
   32.48 +	$(INSTALL_DIR) -p $(DESTDIR)/usr/include
   32.49 +	$(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
   32.50 +	$(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
   32.51 +
   32.52 +clean:
   32.53 +	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS
   32.54 +
   32.55 +libblktap: $(OBJS) 
   32.56 +	$(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared         \
   32.57 +	      -L$(XEN_XENSTORE) -l xenstore                       \
   32.58 +	      -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
   32.59 +	ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
   32.60 +	ln -sf libblktap.so.$(MAJOR) $@.so
   32.61 +	ar rc libblktap.a $@.so
   32.62 +
   32.63 +.PHONY: TAGS all build clean install libblktap
   32.64 +
   32.65 +TAGS:
   32.66 +	etags -t $(SRCS) *.h
   32.67 +
   32.68 +-include $(DEPS)
   32.69 +
    33.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.2 +++ b/tools/blktap/lib/blkif.c	Thu Jul 13 10:13:26 2006 +0100
    33.3 @@ -0,0 +1,185 @@
    33.4 +/*
    33.5 + * tools/blktap_user/blkif.c
    33.6 + * 
    33.7 + * The blkif interface for blktap.  A blkif describes an in-use virtual disk.
    33.8 + * (c) 2005 Andrew Warfield and Julian Chesterfield
    33.9 + *
   33.10 + * This program is free software; you can redistribute it and/or
   33.11 + * modify it under the terms of the GNU General Public License version 2
   33.12 + * as published by the Free Software Foundation; or, when distributed
   33.13 + * separately from the Linux kernel or incorporated into other
   33.14 + * software packages, subject to the following license:
   33.15 + *
   33.16 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   33.17 + * of this source file (the "Software"), to deal in the Software without
   33.18 + * restriction, including without limitation the rights to use, copy, modify,
   33.19 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   33.20 + * and to permit persons to whom the Software is furnished to do so, subject to
   33.21 + * the following conditions:
   33.22 + *
   33.23 + * The above copyright notice and this permission notice shall be included in
   33.24 + * all copies or substantial portions of the Software.
   33.25 + *
   33.26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   33.27 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   33.28 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   33.29 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   33.30 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   33.31 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   33.32 + * IN THE SOFTWARE.
   33.33 + */
   33.34 +
   33.35 +#include <stdio.h>
   33.36 +#include <stdlib.h>
   33.37 +#include <errno.h>
   33.38 +#include <string.h>
   33.39 +#include <err.h>
   33.40 +#include <unistd.h>
   33.41 +
   33.42 +#include "blktaplib.h"
   33.43 +
   33.44 +#if 0
   33.45 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   33.46 +#else
   33.47 +#define DPRINTF(_f, _a...) ((void)0)
   33.48 +#endif
   33.49 +
   33.50 +#define BLKIF_HASHSZ 1024
   33.51 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
   33.52 +
   33.53 +static blkif_t      *blkif_hash[BLKIF_HASHSZ];
   33.54 +
   33.55 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
   33.56 +{
   33.57 +	blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
   33.58 +	while ( (blkif != NULL) && 
   33.59 +		((blkif->domid != domid) || (blkif->handle != handle)) )
   33.60 +		blkif = blkif->hash_next;
   33.61 +	return blkif;
   33.62 +}
   33.63 +
   33.64 +blkif_t *alloc_blkif(domid_t domid)
   33.65 +{
   33.66 +	blkif_t *blkif;
   33.67 +	DPRINTF("Alloc_blkif called [%d]\n",domid);
   33.68 +	blkif = (blkif_t *)malloc(sizeof(blkif_t));
   33.69 +	if (!blkif)
   33.70 +		return NULL;
   33.71 +	memset(blkif, 0, sizeof(*blkif));
   33.72 +	blkif->domid = domid;
   33.73 +	blkif->devnum = -1;
   33.74 +	return blkif;
   33.75 +}
   33.76 +
   33.77 +/*Controller callbacks*/
   33.78 +static int (*new_devmap_hook)(blkif_t *blkif) = NULL;
   33.79 +void register_new_devmap_hook(int (*fn)(blkif_t *blkif))
   33.80 +{
   33.81 +	new_devmap_hook = fn;
   33.82 +}
   33.83 +
   33.84 +static int (*new_unmap_hook)(blkif_t *blkif) = NULL;
   33.85 +void register_new_unmap_hook(int (*fn)(blkif_t *blkif))
   33.86 +{
   33.87 +	new_unmap_hook = fn;
   33.88 +}
   33.89 +
   33.90 +static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
   33.91 +void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
   33.92 +{
   33.93 +	new_blkif_hook = fn;
   33.94 +}
   33.95 +
   33.96 +int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
   33.97 +               long int readonly)
   33.98 +{
   33.99 +	domid_t domid;
  33.100 +	blkif_t **pblkif;
  33.101 +	int devnum;
  33.102 +	
  33.103 +	if (blkif == NULL)
  33.104 +		return -EINVAL;
  33.105 +	
  33.106 +	domid = blkif->domid;
  33.107 +	blkif->handle   = handle;
  33.108 +	blkif->pdev     = pdev;
  33.109 +	blkif->readonly = readonly;
  33.110 +	
  33.111 +	/*
  33.112 +	 * Call out to the new_blkif_hook. 
  33.113 +	 * The tap application should define this,
  33.114 +	 * and it should return having set blkif->ops
  33.115 +	 * 
  33.116 +	 */
  33.117 +	if (new_blkif_hook == NULL)
  33.118 +	{
  33.119 +		DPRINTF("Probe detected a new blkif, but no new_blkif_hook!");
  33.120 +		return -1;
  33.121 +	}
  33.122 +	if (new_blkif_hook(blkif)!=0) {
  33.123 +		DPRINTF("BLKIF: Image open failed\n");
  33.124 +		return -1;
  33.125 +	}
  33.126 +	
  33.127 +	/* Now wire it in. */
  33.128 +	pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
  33.129 +	DPRINTF("Created hash entry: %d [%d,%ld]\n", 
  33.130 +		BLKIF_HASH(domid, handle), domid, handle);
  33.131 +	
  33.132 +	while ( *pblkif != NULL )
  33.133 +	{
  33.134 +		if ( ((*pblkif)->domid == domid) && 
  33.135 +		     ((*pblkif)->handle == handle) )
  33.136 +		{
  33.137 +			DPRINTF("Could not create blkif: already exists\n");
  33.138 +			return -1;
  33.139 +		}
  33.140 +		pblkif = &(*pblkif)->hash_next;
  33.141 +	}
  33.142 +	blkif->hash_next = NULL;
  33.143 +	*pblkif = blkif;
  33.144 +	
  33.145 +	if (new_devmap_hook == NULL)
  33.146 +	{
  33.147 +		DPRINTF("Probe setting up new blkif but no devmap hook!");
  33.148 +		return -1;
  33.149 +	}
  33.150 +	
  33.151 +	devnum = new_devmap_hook(blkif);
  33.152 +	if (devnum == -1)
  33.153 +		return -1;
  33.154 +	blkif->devnum = devnum;
  33.155 +	
  33.156 +	return 0;
  33.157 +}
  33.158 +
  33.159 +void free_blkif(blkif_t *blkif)
  33.160 +{
  33.161 +	blkif_t **pblkif, *curs;
  33.162 +	image_t *image;
  33.163 +	
  33.164 +	pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
  33.165 +	while ( (curs = *pblkif) != NULL )
  33.166 +	{
  33.167 +		if ( blkif == curs )
  33.168 +		{
  33.169 +			*pblkif = curs->hash_next;
  33.170 +		}
  33.171 +		pblkif = &curs->hash_next;
  33.172 +	}
  33.173 +	if (blkif != NULL) {
  33.174 +		if ((image=(image_t *)blkif->prv)!=NULL) {
  33.175 +			free(blkif->prv);
  33.176 +		}
  33.177 +		if (blkif->info!=NULL) {
  33.178 +			free(blkif->info);
  33.179 +		}
  33.180 +		if (new_unmap_hook != NULL) new_unmap_hook(blkif);
  33.181 +		free(blkif);
  33.182 +	}
  33.183 +}
  33.184 +
  33.185 +void __init_blkif(void)
  33.186 +{    
  33.187 +	memset(blkif_hash, 0, sizeof(blkif_hash));
  33.188 +}
    34.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.2 +++ b/tools/blktap/lib/blktaplib.h	Thu Jul 13 10:13:26 2006 +0100
    34.3 @@ -0,0 +1,223 @@
    34.4 +/* blktaplib.h
    34.5 + *
    34.6 + * Blktap library userspace code.
    34.7 + *
    34.8 + * (c) 2005 Andrew Warfield and Julian Chesterfield
    34.9 + *
   34.10 + * This program is free software; you can redistribute it and/or
   34.11 + * modify it under the terms of the GNU General Public License version 2
   34.12 + * as published by the Free Software Foundation; or, when distributed
   34.13 + * separately from the Linux kernel or incorporated into other
   34.14 + * software packages, subject to the following license:
   34.15 + *
   34.16 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   34.17 + * of this source file (the "Software"), to deal in the Software without
   34.18 + * restriction, including without limitation the rights to use, copy, modify,
   34.19 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   34.20 + * and to permit persons to whom the Software is furnished to do so, subject to
   34.21 + * the following conditions:
   34.22 + *
   34.23 + * The above copyright notice and this permission notice shall be included in
   34.24 + * all copies or substantial portions of the Software.
   34.25 + *
   34.26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   34.27 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   34.28 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   34.29 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   34.30 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   34.31 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   34.32 + * IN THE SOFTWARE.
   34.33 + */
   34.34 +
   34.35 +#ifndef __BLKTAPLIB_H__
   34.36 +#define __BLKTAPLIB_H__
   34.37 +
   34.38 +#include <xenctrl.h>
   34.39 +#include <sys/user.h>
   34.40 +#include <xen/xen.h>
   34.41 +#include <xen/io/blkif.h>
   34.42 +#include <xen/io/ring.h>
   34.43 +#include <xs.h>
   34.44 +#include <sys/types.h>
   34.45 +#include <unistd.h>
   34.46 +
   34.47 +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
   34.48 +
   34.49 +/* size of the extra VMA area to map in attached pages. */
   34.50 +#define BLKTAP_VMA_PAGES BLK_RING_SIZE
   34.51 +
   34.52 +/* blktap IOCTLs: These must correspond with the blktap driver ioctls*/
   34.53 +#define BLKTAP_IOCTL_KICK_FE         1
   34.54 +#define BLKTAP_IOCTL_KICK_BE         2
   34.55 +#define BLKTAP_IOCTL_SETMODE         3
   34.56 +#define BLKTAP_IOCTL_SENDPID	     4
   34.57 +#define BLKTAP_IOCTL_NEWINTF	     5
   34.58 +#define BLKTAP_IOCTL_MINOR	     6
   34.59 +#define BLKTAP_IOCTL_MAJOR	     7
   34.60 +#define BLKTAP_QUERY_ALLOC_REQS      8
   34.61 +#define BLKTAP_IOCTL_FREEINTF	     9
   34.62 +#define BLKTAP_IOCTL_PRINT_IDXS      100   
   34.63 +
   34.64 +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
   34.65 +#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
   34.66 +#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
   34.67 +#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
   34.68 +
   34.69 +#define BLKTAP_MODE_INTERPOSE \
   34.70 +           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
   34.71 +
   34.72 +static inline int BLKTAP_MODE_VALID(unsigned long arg)
   34.73 +{
   34.74 +	return (
   34.75 +		( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
   34.76 +		( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
   34.77 +		( arg == BLKTAP_MODE_INTERPOSE    ) );
   34.78 +}
   34.79 +
   34.80 +#define MAX_REQUESTS            64
   34.81 +
   34.82 +#define BLKTAP_IOCTL_KICK 1
   34.83 +#define MAX_PENDING_REQS 64
   34.84 +#define BLKTAP_DEV_DIR   "/dev/xen"
   34.85 +#define BLKTAP_DEV_NAME  "blktap"
   34.86 +#define BLKTAP_DEV_MAJOR 254
   34.87 +#define BLKTAP_DEV_MINOR 0
   34.88 +
   34.89 +#define BLKTAP_RING_PAGES       1 /* Front */
   34.90 +#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
   34.91 +
   34.92 +struct blkif;
   34.93 +
   34.94