ia64/xen-unstable

changeset 6629:f59e0163540e

Updates to blktap driver and user code.

Mostly this makes the tap code work again with all of the changes that
have happened to the block drivers recently. We now use a shared page
per VBD (to the driver), and handle control information through the
store. The taplib interfaces have changed to be based around per-vbd
data structures that you can attach arbitrary handlers for.

There is also initial code for a user-level blockback driver, which
aims to get around the use of loopbacks for file-based vbds. Still
plenty of work to do here -- this is a working incremental checkin and
I'm away from this for the next four weeks.

Signed-off-by: Andrew Warfield <andrew.warfield@cl.cam.ac.uk>
author akw27@arcadians.cl.cam.ac.uk
date Sun Sep 04 21:19:44 2005 +0000 (2005-09-04)
parents 523078a33287
children d0a5b1857c35
files .hgignore linux-2.6-xen-sparse/drivers/xen/blktap/Makefile linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c linux-2.6-xen-sparse/drivers/xen/blktap/common.h linux-2.6-xen-sparse/drivers/xen/blktap/interface.c linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c linux-2.6-xen-sparse/drivers/xen/netback/netback.c linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6-xen-sparse/mm/memory.c tools/blktap/Makefile tools/blktap/README.sept05 tools/blktap/blkdump.c tools/blktap/blkif.c tools/blktap/blktaplib.c tools/blktap/blktaplib.h tools/blktap/list.h tools/blktap/ublkback/Makefile tools/blktap/ublkback/ublkback.c tools/blktap/ublkback/ublkbacklib.c tools/blktap/ublkback/ublkbacklib.h tools/blktap/xenbus.c
line diff
     1.1 --- a/.hgignore	Sun Sep 04 15:08:16 2005 +0000
     1.2 +++ b/.hgignore	Sun Sep 04 21:19:44 2005 +0000
     1.3 @@ -82,6 +82,7 @@
     1.4  ^tools/blktap/parallax/vdi_validate$
     1.5  ^tools/blktap/parallax/parallax$
     1.6  ^tools/blktap/parallax/blockstored$
     1.7 +^tools/blktap/ublkback/ublkback$
     1.8  ^tools/blktap/xen/.*$
     1.9  ^tools/check/\..*$
    1.10  ^tools/cmdline/.*$
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile	Sun Sep 04 15:08:16 2005 +0000
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/Makefile	Sun Sep 04 21:19:44 2005 +0000
     2.3 @@ -1,3 +1,3 @@
     2.4  
     2.5 -obj-y	:= blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 
     2.6 +obj-y	:= xenbus.o interface.o blktap.o 
     2.7  
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Sun Sep 04 15:08:16 2005 +0000
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Sun Sep 04 21:19:44 2005 +0000
     3.3 @@ -1,90 +1,916 @@
     3.4  /******************************************************************************
     3.5 - * blktap.c
     3.6 - * 
     3.7 - * XenLinux virtual block-device tap.
     3.8 + * arch/xen/drivers/blkif/blktap/blktap.c
     3.9   * 
    3.10 - * Copyright (c) 2004, Andrew Warfield
    3.11 - *
    3.12 - * Based on the original split block driver:
    3.13 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
    3.14 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
    3.15 - * Copyright (c) 2004, Christian Limpach
    3.16 + * This is a modified version of the block backend driver that remaps requests
    3.17 + * to a user-space memory region.  It is intended to be used to write 
    3.18 + * application-level servers that provide block interfaces to client VMs.
    3.19   * 
    3.20 - * Note that unlike the split block driver code, this driver has been developed
    3.21 - * strictly for Linux 2.6
    3.22   */
    3.23  
    3.24 -#include "blktap.h"
    3.25 +#include <linux/kernel.h>
    3.26 +#include <linux/spinlock.h>
    3.27 +#include <asm-xen/balloon.h>
    3.28 +#include <linux/kernel.h>
    3.29 +#include <linux/fs.h>
    3.30 +#include <linux/mm.h>
    3.31 +#include <linux/miscdevice.h>
    3.32 +#include <linux/errno.h>
    3.33 +#include <linux/major.h>
    3.34 +#include <linux/gfp.h>
    3.35 +#include <linux/poll.h>
    3.36 +#include <asm/tlbflush.h>
    3.37 +#include "common.h"
    3.38  
    3.39 -int __init xlblktap_init(void)
    3.40 -{
    3.41 -    ctrl_msg_t               cmsg;
    3.42 -    blkif_fe_driver_status_t fe_st;
    3.43 -    blkif_be_driver_status_t be_st;
    3.44 +/* Only one process may open /dev/xen/blktap at any time. */
    3.45 +static unsigned long blktap_dev_inuse;
    3.46 +unsigned long blktap_ring_ok; /* make this ring->state */
    3.47  
    3.48 -    printk(KERN_INFO "Initialising Xen block tap device\n");
    3.49 -#ifdef CONFIG_XEN_BLKDEV_GRANT
    3.50 -    printk(KERN_INFO "Block tap is using grant tables.\n");
    3.51 -#endif
    3.52 +/* Rings up to user space. */
    3.53 +static blkif_front_ring_t blktap_ufe_ring;
    3.54  
    3.55 -    DPRINTK("   tap - Backend connection init:\n");
    3.56 +/* for poll: */
    3.57 +static wait_queue_head_t blktap_wait;
    3.58 +
    3.59 +/* current switching mode */
    3.60 +static unsigned long blktap_mode;
    3.61 +
    3.62 +/* local prototypes */
    3.63 +static int blktap_read_ufe_ring(void);
    3.64  
    3.65  
    3.66 -    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
    3.67 -                                    CALLBACK_IN_BLOCKING_CONTEXT);
    3.68 -
    3.69 -    /* Send a driver-UP notification to the domain controller. */
    3.70 -    cmsg.type      = CMSG_BLKIF_FE;
    3.71 -    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
    3.72 -    cmsg.length    = sizeof(blkif_fe_driver_status_t);
    3.73 -    fe_st.status   = BLKIF_DRIVER_STATUS_UP;
    3.74 -    memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
    3.75 -    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
    3.76 +/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
    3.77 +#define BLKTAP_MINOR 202
    3.78  
    3.79 -    DPRINTK("   tap - Frontend connection init:\n");
    3.80 -    
    3.81 -    active_reqs_init();
    3.82 -    blkif_interface_init();
    3.83 -    blkdev_schedule_init();
    3.84 -    
    3.85 -    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
    3.86 -                                    CALLBACK_IN_BLOCKING_CONTEXT);
    3.87 +/* blktap IOCTLs:                                                      */
    3.88 +#define BLKTAP_IOCTL_KICK_FE         1
    3.89 +#define BLKTAP_IOCTL_KICK_BE         2 /* currently unused */
    3.90 +#define BLKTAP_IOCTL_SETMODE         3
    3.91 +#define BLKTAP_IOCTL_PRINT_IDXS      100  
    3.92  
    3.93 -    /* Send a driver-UP notification to the domain controller. */
    3.94 -    cmsg.type      = CMSG_BLKIF_BE;
    3.95 -    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS;
    3.96 -    cmsg.length    = sizeof(blkif_be_driver_status_t);
    3.97 -    be_st.status   = BLKIF_DRIVER_STATUS_UP;
    3.98 -    memcpy(cmsg.msg, &be_st, sizeof(be_st));
    3.99 -    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
   3.100 +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
   3.101 +#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
   3.102 +#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
   3.103 +#define BLKTAP_MODE_INTERCEPT_BE     0x00000002  /* unimp. */
   3.104 +#define BLKTAP_MODE_COPY_FE          0x00000004  /* unimp. */
   3.105 +#define BLKTAP_MODE_COPY_BE          0x00000008  /* unimp. */
   3.106 +#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010  /* unimp. */
   3.107 +#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020  /* unimp. */
   3.108  
   3.109 -    DPRINTK("   tap - Userland channel init:\n");
   3.110 +#define BLKTAP_MODE_INTERPOSE \
   3.111 +           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
   3.112  
   3.113 -    blktap_init();
   3.114 +#define BLKTAP_MODE_COPY_BOTH \
   3.115 +           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
   3.116  
   3.117 -    DPRINTK("Blkif tap device initialized.\n");
   3.118 +#define BLKTAP_MODE_COPY_BOTH_PAGES \
   3.119 +           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
   3.120 +
   3.121 +static inline int BLKTAP_MODE_VALID(unsigned long arg)
   3.122 +{
   3.123 +    return (
   3.124 +        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
   3.125 +        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
   3.126 +        ( arg == BLKTAP_MODE_INTERPOSE    ) );
   3.127 +/*
   3.128 +    return (
   3.129 +        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
   3.130 +        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
   3.131 +        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
   3.132 +        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
   3.133 +        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
   3.134 +        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
   3.135 +        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
   3.136 +        );
   3.137 +*/
   3.138 +}
   3.139 +
   3.140 +
   3.141 +/******************************************************************
   3.142 + * MMAP REGION
   3.143 + */
   3.144 +
   3.145 +/*
   3.146 + * We use a big chunk of address space to map in-flight requests into,
   3.147 + * and export this region up to user-space.  See the comments in blkback
   3.148 + * about this -- the two must be kept in sync if the tap is used as a 
   3.149 + * passthrough.
   3.150 + */
   3.151 +
   3.152 +#define MAX_PENDING_REQS 64
   3.153 +#define BATCH_PER_DOMAIN 16
   3.154 +
   3.155 +/* immediately before the mmap area, we have a bunch of pages reserved
   3.156 + * for shared memory rings.
   3.157 + */
   3.158 +#define RING_PAGES 1 /* Front */ 
   3.159 +
   3.160 +/* Where things are inside the device mapping. */
   3.161 +struct vm_area_struct *blktap_vma = NULL;
   3.162 +unsigned long mmap_vstart;  /* Kernel pages for mapping in data. */
   3.163 +unsigned long rings_vstart; /* start of mmaped vma               */
   3.164 +unsigned long user_vstart;  /* start of user mappings            */
   3.165 +
   3.166 +#define MMAP_PAGES                                              \
   3.167 +    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
   3.168 +#define MMAP_VADDR(_start, _req,_seg)                           \
   3.169 +    (_start +                                                   \
   3.170 +     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
   3.171 +     ((_seg) * PAGE_SIZE))
   3.172 +
   3.173 +
   3.174 +
   3.175 +/*
   3.176 + * Each outstanding request that we've passed to the lower device layers has a 
   3.177 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 
   3.178 + * the pendcnt towards zero. When it hits zero, the specified domain has a 
   3.179 + * response queued for it, with the saved 'id' passed back.
   3.180 + */
   3.181 +typedef struct {
   3.182 +    blkif_t       *blkif;
   3.183 +    unsigned long  id;
   3.184 +    int            nr_pages;
   3.185 +    atomic_t       pendcnt;
   3.186 +    unsigned short operation;
   3.187 +    int            status;
   3.188 +} pending_req_t;
   3.189 +
   3.190 +/*
   3.191 + * We can't allocate pending_req's in order, since they may complete out of 
   3.192 + * order. We therefore maintain an allocation ring. This ring also indicates 
   3.193 + * when enough work has been passed down -- at that point the allocation ring 
   3.194 + * will be empty.
   3.195 + */
   3.196 +static pending_req_t pending_reqs[MAX_PENDING_REQS];
   3.197 +static unsigned char pending_ring[MAX_PENDING_REQS];
   3.198 +static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
   3.199 +/* NB. We use a different index type to differentiate from shared blk rings. */
   3.200 +typedef unsigned int PEND_RING_IDX;
   3.201 +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
   3.202 +static PEND_RING_IDX pending_prod, pending_cons;
   3.203 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
   3.204 +
   3.205 +/* Requests passing through the tap to the backend hijack the id field
   3.206 + * in the request message.  In it we put the AR index _AND_ the fe domid.
   3.207 + * the domid is used by the backend to map the pages properly.
   3.208 + */
   3.209 +
   3.210 +static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
   3.211 +{
   3.212 +    return ( (fe_dom << 16) | MASK_PEND_IDX(idx) );
   3.213 +}
   3.214 +
   3.215 +extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id) 
   3.216 +{ 
   3.217 +    return (PEND_RING_IDX)( id & 0x0000ffff );
   3.218 +}
   3.219 +
   3.220 +extern inline domid_t ID_TO_DOM(unsigned long id) 
   3.221 +{ 
   3.222 +    return (domid_t)(id >> 16); 
   3.223 +}
   3.224 +
   3.225 +
   3.226 +
   3.227 +/******************************************************************
   3.228 + * GRANT HANDLES
   3.229 + */
   3.230 +
   3.231 +/* When using grant tables to map a frame for device access then the
   3.232 + * handle returned must be used to unmap the frame. This is needed to
   3.233 + * drop the ref count on the frame.
   3.234 + */
   3.235 +struct grant_handle_pair
   3.236 +{
   3.237 +    u16  kernel;
   3.238 +    u16  user;
   3.239 +};
   3.240 +static struct grant_handle_pair pending_grant_handles[MMAP_PAGES];
   3.241 +#define pending_handle(_idx, _i) \
   3.242 +    (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
   3.243 +#define BLKTAP_INVALID_HANDLE(_g) \
   3.244 +    (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
   3.245 +#define BLKTAP_INVALIDATE_HANDLE(_g) do {       \
   3.246 +    (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
   3.247 +    } while(0)
   3.248 +
   3.249 +
   3.250 +/******************************************************************
   3.251 + * BLKTAP VM OPS
   3.252 + */
   3.253 +
   3.254 +static struct page *blktap_nopage(struct vm_area_struct *vma,
   3.255 +                                             unsigned long address,
   3.256 +                                             int *type)
   3.257 +{
   3.258 +    /*
   3.259 +     * if the page has not been mapped in by the driver then generate
   3.260 +     * a SIGBUS to the domain.
   3.261 +     */
   3.262 +
   3.263 +    force_sig(SIGBUS, current);
   3.264  
   3.265      return 0;
   3.266  }
   3.267  
   3.268 -#if 0 /* tap doesn't handle suspend/resume */
   3.269 -void blkdev_suspend(void)
   3.270 +struct vm_operations_struct blktap_vm_ops = {
   3.271 +    nopage:   blktap_nopage,
   3.272 +};
   3.273 +
   3.274 +/******************************************************************
   3.275 + * BLKTAP FILE OPS
   3.276 + */
   3.277 +
   3.278 +static int blktap_open(struct inode *inode, struct file *filp)
   3.279  {
   3.280 +    blkif_sring_t *sring;
   3.281 +    
   3.282 +    if ( test_and_set_bit(0, &blktap_dev_inuse) )
   3.283 +        return -EBUSY;
   3.284 +    
   3.285 +    /* Allocate the fe ring. */
   3.286 +    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
   3.287 +    if (sring == NULL)
   3.288 +        goto fail_nomem;
   3.289 +
   3.290 +    SetPageReserved(virt_to_page(sring));
   3.291 +    
   3.292 +    SHARED_RING_INIT(sring);
   3.293 +    FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
   3.294 +
   3.295 +    return 0;
   3.296 +
   3.297 + fail_nomem:
   3.298 +    return -ENOMEM;
   3.299  }
   3.300  
   3.301 -void blkdev_resume(void)
   3.302 +static int blktap_release(struct inode *inode, struct file *filp)
   3.303  {
   3.304 -    ctrl_msg_t               cmsg;
   3.305 -    blkif_fe_driver_status_t st;    
   3.306 +    blktap_dev_inuse = 0;
   3.307 +    blktap_ring_ok = 0;
   3.308  
   3.309 -    /* Send a driver-UP notification to the domain controller. */
   3.310 -    cmsg.type      = CMSG_BLKIF_FE;
   3.311 -    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
   3.312 -    cmsg.length    = sizeof(blkif_fe_driver_status_t);
   3.313 -    st.status      = BLKIF_DRIVER_STATUS_UP;
   3.314 -    memcpy(cmsg.msg, &st, sizeof(st));
   3.315 -    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
   3.316 +    /* Free the ring page. */
   3.317 +    ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
   3.318 +    free_page((unsigned long) blktap_ufe_ring.sring);
   3.319 +
   3.320 +    /* Clear any active mappings and free foreign map table */
   3.321 +    if (blktap_vma != NULL) {
   3.322 +        zap_page_range(blktap_vma, blktap_vma->vm_start, 
   3.323 +                       blktap_vma->vm_end - blktap_vma->vm_start, NULL);
   3.324 +        blktap_vma = NULL;
   3.325 +    }
   3.326 +
   3.327 +    return 0;
   3.328  }
   3.329 -#endif
   3.330  
   3.331 -__initcall(xlblktap_init);
   3.332 +
   3.333 +/* Note on mmap:
   3.334 + * We need to map pages to user space in a way that will allow the block
   3.335 + * subsystem set up direct IO to them.  This couldn't be done before, because
   3.336 + * there isn't really a sane way to translate a user virtual address down to a 
   3.337 + * physical address when the page belongs to another domain.
   3.338 + *
   3.339 + * My first approach was to map the page in to kernel memory, add an entry
   3.340 + * for it in the physical frame list (using alloc_lomem_region as in blkback)
   3.341 + * and then attempt to map that page up to user space.  This is disallowed
   3.342 + * by xen though, which realizes that we don't really own the machine frame
   3.343 + * underlying the physical page.
   3.344 + *
   3.345 + * The new approach is to provide explicit support for this in xen linux.
   3.346 + * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
   3.347 + * mapped from other vms.  vma->vm_private_data is set up as a mapping 
   3.348 + * from pages to actual page structs.  There is a new clause in get_user_pages
   3.349 + * that does the right thing for this sort of mapping.
   3.350 + */
   3.351 +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
   3.352 +{
   3.353 +    int size;
   3.354 +    struct page **map;
   3.355 +    int i;
   3.356 +
   3.357 +    DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n",
   3.358 +           vma->vm_start, vma->vm_end);
   3.359 +
   3.360 +    vma->vm_flags |= VM_RESERVED;
   3.361 +    vma->vm_ops = &blktap_vm_ops;
   3.362 +
   3.363 +    size = vma->vm_end - vma->vm_start;
   3.364 +    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
   3.365 +        printk(KERN_INFO 
   3.366 +               "blktap: you _must_ map exactly %d pages!\n",
   3.367 +               MMAP_PAGES + RING_PAGES);
   3.368 +        return -EAGAIN;
   3.369 +    }
   3.370 +
   3.371 +    size >>= PAGE_SHIFT;
   3.372 +    DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
   3.373 +    
   3.374 +    rings_vstart = vma->vm_start;
   3.375 +    user_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
   3.376 +    
   3.377 +    /* Map the ring pages to the start of the region and reserve it. */
   3.378 +
   3.379 +    /* not sure if I really need to do this... */
   3.380 +    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   3.381 +
   3.382 +    if (remap_pfn_range(vma, vma->vm_start, 
   3.383 +                         __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
   3.384 +                         PAGE_SIZE, vma->vm_page_prot)) 
   3.385 +    {
   3.386 +        WPRINTK("Mapping user ring failed!\n");
   3.387 +        goto fail;
   3.388 +    }
   3.389 +
   3.390 +    /* Mark this VM as containing foreign pages, and set up mappings. */
   3.391 +    map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
   3.392 +                  * sizeof(struct page_struct*),
   3.393 +                  GFP_KERNEL);
   3.394 +    if (map == NULL) 
   3.395 +    {
   3.396 +        WPRINTK("Couldn't alloc VM_FOREIGH map.\n");
   3.397 +        goto fail;
   3.398 +    }
   3.399 +
   3.400 +    for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
   3.401 +        map[i] = NULL;
   3.402 +    
   3.403 +    vma->vm_private_data = map;
   3.404 +    vma->vm_flags |= VM_FOREIGN;
   3.405 +
   3.406 +    blktap_vma = vma;
   3.407 +    blktap_ring_ok = 1;
   3.408 +
   3.409 +    return 0;
   3.410 + fail:
   3.411 +    /* Clear any active mappings. */
   3.412 +    zap_page_range(vma, vma->vm_start, 
   3.413 +                   vma->vm_end - vma->vm_start, NULL);
   3.414 +
   3.415 +    return -ENOMEM;
   3.416 +}
   3.417 +
   3.418 +static int blktap_ioctl(struct inode *inode, struct file *filp,
   3.419 +                        unsigned int cmd, unsigned long arg)
   3.420 +{
   3.421 +    switch(cmd) {
   3.422 +    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
   3.423 +        return blktap_read_ufe_ring();
   3.424 +
   3.425 +    case BLKTAP_IOCTL_SETMODE:
   3.426 +        if (BLKTAP_MODE_VALID(arg)) {
   3.427 +            blktap_mode = arg;
   3.428 +            /* XXX: may need to flush rings here. */
   3.429 +            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
   3.430 +            return 0;
   3.431 +        }
   3.432 +    case BLKTAP_IOCTL_PRINT_IDXS:
   3.433 +        {
   3.434 +            //print_fe_ring_idxs();
   3.435 +            WPRINTK("User Rings: \n-----------\n");
   3.436 +            WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
   3.437 +                            "| req_prod: %2d, rsp_prod: %2d\n",
   3.438 +                            blktap_ufe_ring.rsp_cons,
   3.439 +                            blktap_ufe_ring.req_prod_pvt,
   3.440 +                            blktap_ufe_ring.sring->req_prod,
   3.441 +                            blktap_ufe_ring.sring->rsp_prod);
   3.442 +            
   3.443 +        }
   3.444 +    }
   3.445 +    return -ENOIOCTLCMD;
   3.446 +}
   3.447 +
   3.448 +static unsigned int blktap_poll(struct file *file, poll_table *wait)
   3.449 +{
   3.450 +        poll_wait(file, &blktap_wait, wait);
   3.451 +        if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring) ) 
   3.452 +        {
   3.453 +            flush_tlb_all();
   3.454 +
   3.455 +            RING_PUSH_REQUESTS(&blktap_ufe_ring);
   3.456 +            return POLLIN | POLLRDNORM;
   3.457 +        }
   3.458 +
   3.459 +        return 0;
   3.460 +}
   3.461 +
   3.462 +void blktap_kick_user(void)
   3.463 +{
   3.464 +    /* blktap_ring->req_prod = blktap_req_prod; */
   3.465 +    wake_up_interruptible(&blktap_wait);
   3.466 +}
   3.467 +
   3.468 +static struct file_operations blktap_fops = {
   3.469 +    owner:    THIS_MODULE,
   3.470 +    poll:     blktap_poll,
   3.471 +    ioctl:    blktap_ioctl,
   3.472 +    open:     blktap_open,
   3.473 +    release:  blktap_release,
   3.474 +    mmap:     blktap_mmap,
   3.475 +};
   3.476 +
   3.477 +
   3.478 +
   3.479 +static int do_block_io_op(blkif_t *blkif, int max_to_do);
   3.480 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
   3.481 +static void make_response(blkif_t *blkif, unsigned long id, 
   3.482 +                          unsigned short op, int st);
   3.483 +
   3.484 +
   3.485 +static void fast_flush_area(int idx, int nr_pages)
   3.486 +{
   3.487 +    struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
   3.488 +    unsigned int i, op = 0;
   3.489 +    struct grant_handle_pair *handle;
   3.490 +    unsigned long ptep;
   3.491 +
   3.492 +    for (i=0; i<nr_pages; i++)
   3.493 +    {
   3.494 +        handle = &pending_handle(idx, i);
   3.495 +        if (!BLKTAP_INVALID_HANDLE(handle))
   3.496 +        {
   3.497 +
   3.498 +            unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
   3.499 +            unmap[op].dev_bus_addr = 0;
   3.500 +            unmap[op].handle = handle->kernel;
   3.501 +            op++;
   3.502 +
   3.503 +            if (create_lookup_pte_addr(blktap_vma->vm_mm,
   3.504 +                                       MMAP_VADDR(user_vstart, idx, i), 
   3.505 +                                       &ptep) !=0) {
   3.506 +                DPRINTK("Couldn't get a pte addr!\n");
   3.507 +                return;
   3.508 +            }
   3.509 +            unmap[op].host_addr    = ptep;
   3.510 +            unmap[op].dev_bus_addr = 0;
   3.511 +            unmap[op].handle       = handle->user;
   3.512 +            op++;
   3.513 +            
   3.514 +           BLKTAP_INVALIDATE_HANDLE(handle);
   3.515 +        }
   3.516 +    }
   3.517 +    if ( unlikely(HYPERVISOR_grant_table_op(
   3.518 +        GNTTABOP_unmap_grant_ref, unmap, op)))
   3.519 +        BUG();
   3.520 +
   3.521 +    if (blktap_vma != NULL)
   3.522 +        zap_page_range(blktap_vma, 
   3.523 +                       MMAP_VADDR(user_vstart, idx, 0), 
   3.524 +                       nr_pages << PAGE_SHIFT, NULL);
   3.525 +}
   3.526 +
   3.527 +/******************************************************************
   3.528 + * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
   3.529 + */
   3.530 +
   3.531 +static struct list_head blkio_schedule_list;
   3.532 +static spinlock_t blkio_schedule_list_lock;
   3.533 +
   3.534 +static int __on_blkdev_list(blkif_t *blkif)
   3.535 +{
   3.536 +    return blkif->blkdev_list.next != NULL;
   3.537 +}
   3.538 +
   3.539 +static void remove_from_blkdev_list(blkif_t *blkif)
   3.540 +{
   3.541 +    unsigned long flags;
   3.542 +    if ( !__on_blkdev_list(blkif) ) return;
   3.543 +    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
   3.544 +    if ( __on_blkdev_list(blkif) )
   3.545 +    {
   3.546 +        list_del(&blkif->blkdev_list);
   3.547 +        blkif->blkdev_list.next = NULL;
   3.548 +        blkif_put(blkif);
   3.549 +    }
   3.550 +    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
   3.551 +}
   3.552 +
   3.553 +static void add_to_blkdev_list_tail(blkif_t *blkif)
   3.554 +{
   3.555 +    unsigned long flags;
   3.556 +    if ( __on_blkdev_list(blkif) ) return;
   3.557 +    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
   3.558 +    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
   3.559 +    {
   3.560 +        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
   3.561 +        blkif_get(blkif);
   3.562 +    }
   3.563 +    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
   3.564 +}
   3.565 +
   3.566 +
   3.567 +/******************************************************************
   3.568 + * SCHEDULER FUNCTIONS
   3.569 + */
   3.570 +
   3.571 +static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
   3.572 +
   3.573 +static int blkio_schedule(void *arg)
   3.574 +{
   3.575 +    DECLARE_WAITQUEUE(wq, current);
   3.576 +
   3.577 +    blkif_t          *blkif;
   3.578 +    struct list_head *ent;
   3.579 +
   3.580 +    daemonize("xenblkd");
   3.581 +
   3.582 +    for ( ; ; )
   3.583 +    {
   3.584 +        /* Wait for work to do. */
   3.585 +        add_wait_queue(&blkio_schedule_wait, &wq);
   3.586 +        set_current_state(TASK_INTERRUPTIBLE);
   3.587 +        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
   3.588 +             list_empty(&blkio_schedule_list) )
   3.589 +            schedule();
   3.590 +        __set_current_state(TASK_RUNNING);
   3.591 +        remove_wait_queue(&blkio_schedule_wait, &wq);
   3.592 +
   3.593 +        /* Queue up a batch of requests. */
   3.594 +        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
   3.595 +                !list_empty(&blkio_schedule_list) )
   3.596 +        {
   3.597 +            ent = blkio_schedule_list.next;
   3.598 +            blkif = list_entry(ent, blkif_t, blkdev_list);
   3.599 +            blkif_get(blkif);
   3.600 +            remove_from_blkdev_list(blkif);
   3.601 +            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
   3.602 +                add_to_blkdev_list_tail(blkif);
   3.603 +            blkif_put(blkif);
   3.604 +        }
   3.605 +    }
   3.606 +}
   3.607 +
   3.608 +static void maybe_trigger_blkio_schedule(void)
   3.609 +{
   3.610 +    /*
   3.611 +     * Needed so that two processes, who together make the following predicate
   3.612 +     * true, don't both read stale values and evaluate the predicate
   3.613 +     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
   3.614 +     */
   3.615 +    smp_mb();
   3.616 +
   3.617 +    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
   3.618 +         !list_empty(&blkio_schedule_list) )
   3.619 +        wake_up(&blkio_schedule_wait);
   3.620 +}
   3.621 +
   3.622 +
   3.623 +
   3.624 +/******************************************************************
   3.625 + * COMPLETION CALLBACK -- Called as bh->b_end_io()
   3.626 + */
   3.627 +
   3.628 +
   3.629 +static int blktap_read_ufe_ring(void)
   3.630 +{
   3.631 +    /* This is called to read responses from the UFE ring. */
   3.632 +
   3.633 +    RING_IDX i, j, rp;
   3.634 +    blkif_response_t *resp;
   3.635 +    blkif_t *blkif;
   3.636 +    int pending_idx;
   3.637 +    pending_req_t *pending_req;
   3.638 +    unsigned long     flags;
   3.639 +
   3.640 +    /* if we are forwarding from UFERring to FERing */
   3.641 +    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
   3.642 +
   3.643 +        /* for each outstanding message on the UFEring  */
   3.644 +        rp = blktap_ufe_ring.sring->rsp_prod;
   3.645 +        rmb();
   3.646 +        
   3.647 +        for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
   3.648 +        {
   3.649 +            resp = RING_GET_RESPONSE(&blktap_ufe_ring, i);
   3.650 +            pending_idx = MASK_PEND_IDX(ID_TO_IDX(resp->id));
   3.651 +            pending_req = &pending_reqs[pending_idx];
   3.652 +            
   3.653 +            blkif = pending_req->blkif;
   3.654 +            for (j = 0; j < pending_req->nr_pages; j++) {
   3.655 +                unsigned long vaddr;
   3.656 +                struct page **map = blktap_vma->vm_private_data;
   3.657 +                int offset; 
   3.658 +
   3.659 +                vaddr  = MMAP_VADDR(user_vstart, pending_idx, j);
   3.660 +                offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
   3.661 +
   3.662 +                //ClearPageReserved(virt_to_page(vaddr));
   3.663 +                ClearPageReserved((struct page *)map[offset]);
   3.664 +                map[offset] = NULL;
   3.665 +            }
   3.666 +
   3.667 +            fast_flush_area(pending_idx, pending_req->nr_pages);
   3.668 +            make_response(blkif, pending_req->id, resp->operation, 
   3.669 +                          resp->status);
   3.670 +            blkif_put(pending_req->blkif);
   3.671 +            spin_lock_irqsave(&pend_prod_lock, flags);
   3.672 +            pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
   3.673 +            spin_unlock_irqrestore(&pend_prod_lock, flags);
   3.674 +        }
   3.675 +        blktap_ufe_ring.rsp_cons = i;
   3.676 +        maybe_trigger_blkio_schedule();
   3.677 +    }
   3.678 +    return 0;
   3.679 +}
   3.680 +
   3.681 +
   3.682 +/******************************************************************************
   3.683 + * NOTIFICATION FROM GUEST OS.
   3.684 + */
   3.685 +
   3.686 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
   3.687 +{
   3.688 +    blkif_t *blkif = dev_id;
   3.689 +    add_to_blkdev_list_tail(blkif);
   3.690 +    maybe_trigger_blkio_schedule();
   3.691 +    return IRQ_HANDLED;
   3.692 +}
   3.693 +
   3.694 +
   3.695 +
   3.696 +/******************************************************************
   3.697 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
   3.698 + */
   3.699 +
   3.700 +static int do_block_io_op(blkif_t *blkif, int max_to_do)
   3.701 +{
   3.702 +    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
   3.703 +    blkif_request_t *req;
   3.704 +    RING_IDX i, rp;
   3.705 +    int more_to_do = 0;
   3.706 +    
   3.707 +    rp = blk_ring->sring->req_prod;
   3.708 +    rmb(); /* Ensure we see queued requests up to 'rp'. */
   3.709 +
   3.710 +    for ( i = blk_ring->req_cons; 
   3.711 +         (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
   3.712 +          i++ )
   3.713 +    {
   3.714 +        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
   3.715 +        {
   3.716 +            more_to_do = 1;
   3.717 +            break;
   3.718 +        }
   3.719 +        
   3.720 +        req = RING_GET_REQUEST(blk_ring, i);
   3.721 +        switch ( req->operation )
   3.722 +        {
   3.723 +        case BLKIF_OP_READ:
   3.724 +        case BLKIF_OP_WRITE:
   3.725 +            dispatch_rw_block_io(blkif, req);
   3.726 +            break;
   3.727 +
   3.728 +        default:
   3.729 +            DPRINTK("error: unknown block io operation [%d]\n",
   3.730 +                    req->operation);
   3.731 +            make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
   3.732 +            break;
   3.733 +        }
   3.734 +    }
   3.735 +
   3.736 +    blk_ring->req_cons = i;
   3.737 +    blktap_kick_user();
   3.738 +
   3.739 +    return more_to_do;
   3.740 +}
   3.741 +
   3.742 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
   3.743 +{
   3.744 +    blkif_request_t *target;
   3.745 +    int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
   3.746 +    pending_req_t *pending_req;
   3.747 +    struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
   3.748 +    int op, ret;
   3.749 +    unsigned int nseg;
   3.750 +
   3.751 +    /* Check that number of segments is sane. */
   3.752 +    nseg = req->nr_segments;
   3.753 +    if ( unlikely(nseg == 0) || 
   3.754 +         unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
   3.755 +    {
   3.756 +        DPRINTK("Bad number of segments in request (%d)\n", nseg);
   3.757 +        goto bad_descriptor;
   3.758 +    }
   3.759 +
   3.760 +    /* Make sure userspace is ready. */
   3.761 +    if (!blktap_ring_ok) {
   3.762 +        DPRINTK("blktap: ring not ready for requests!\n");
   3.763 +        goto bad_descriptor;
   3.764 +    }
   3.765 +    
   3.766 +
   3.767 +    if ( RING_FULL(&blktap_ufe_ring) ) {
   3.768 +        WPRINTK("blktap: fe_ring is full, can't add (very broken!).\n");
   3.769 +        goto bad_descriptor;
   3.770 +    }
   3.771 +
   3.772 +    flush_cache_all(); /* a noop on intel... */
   3.773 +
   3.774 +    /* Map the foreign pages directly in to the application */    
   3.775 +    op = 0;
   3.776 +    for (i=0; i<req->nr_segments; i++) {
   3.777 +
   3.778 +        unsigned long uvaddr;
   3.779 +        unsigned long kvaddr;
   3.780 +        unsigned long ptep;
   3.781 +
   3.782 +        uvaddr = MMAP_VADDR(user_vstart, pending_idx, i);
   3.783 +        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
   3.784 +
   3.785 +        /* Map the remote page to kernel. */
   3.786 +        map[op].host_addr = kvaddr;
   3.787 +        map[op].dom   = blkif->domid;
   3.788 +        map[op].ref   = blkif_gref_from_fas(req->frame_and_sects[i]);
   3.789 +        map[op].flags = GNTMAP_host_map;
   3.790 +        /* This needs a bit more thought in terms of interposition: 
   3.791 +         * If we want to be able to modify pages during write using 
   3.792 +         * grant table mappings, the guest will either need to allow 
   3.793 +         * it, or we'll need to incur a copy. Bit of an fbufs moment. ;) */
   3.794 +        if (req->operation == BLKIF_OP_WRITE)
   3.795 +            map[op].flags |= GNTMAP_readonly;
   3.796 +        op++;
   3.797 +
   3.798 +        /* Now map it to user. */
   3.799 +        ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
   3.800 +        if (ret)
   3.801 +        {
   3.802 +            DPRINTK("Couldn't get a pte addr!\n");
   3.803 +            fast_flush_area(pending_idx, req->nr_segments);
   3.804 +            goto bad_descriptor;
   3.805 +        }
   3.806 +
   3.807 +        map[op].host_addr = ptep;
   3.808 +        map[op].dom       = blkif->domid;
   3.809 +        map[op].ref       = blkif_gref_from_fas(req->frame_and_sects[i]);
   3.810 +        map[op].flags     = GNTMAP_host_map | GNTMAP_application_map
   3.811 +                            | GNTMAP_contains_pte;
   3.812 +        /* Above interposition comment applies here as well. */
   3.813 +        if (req->operation == BLKIF_OP_WRITE)
   3.814 +            map[op].flags |= GNTMAP_readonly;
   3.815 +        op++;
   3.816 +    }
   3.817 +
   3.818 +    if ( unlikely(HYPERVISOR_grant_table_op(
   3.819 +            GNTTABOP_map_grant_ref, map, op)))
   3.820 +        BUG();
   3.821 +
   3.822 +    op = 0;
   3.823 +    for (i=0; i<(req->nr_segments*2); i+=2) {
   3.824 +        unsigned long uvaddr;
   3.825 +        unsigned long kvaddr;
   3.826 +        unsigned long offset;
   3.827 +        int cancel = 0;
   3.828 +
   3.829 +        uvaddr = MMAP_VADDR(user_vstart, pending_idx, i/2);
   3.830 +        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i/2);
   3.831 +
   3.832 +        if ( unlikely(map[i].handle < 0) ) 
   3.833 +        {
   3.834 +            DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle);
   3.835 +            ret = map[i].handle;
   3.836 +            cancel = 1;
   3.837 +        }
   3.838 +
   3.839 +        if ( unlikely(map[i+1].handle < 0) ) 
   3.840 +        {
   3.841 +            DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle);
   3.842 +            ret = map[i+1].handle;
   3.843 +            cancel = 1;
   3.844 +        }
   3.845 +
   3.846 +        if (cancel) 
   3.847 +        {
   3.848 +            fast_flush_area(pending_idx, req->nr_segments);
   3.849 +            goto bad_descriptor;
   3.850 +        }
   3.851 +
   3.852 +        /* Set the necessary mappings in p2m and in the VM_FOREIGN 
   3.853 +         * vm_area_struct to allow user vaddr -> struct page lookups
   3.854 +         * to work.  This is needed for direct IO to foreign pages. */
   3.855 +        phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] =
   3.856 +            FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
   3.857 +
   3.858 +        offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
   3.859 +        ((struct page **)blktap_vma->vm_private_data)[offset] =
   3.860 +            pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
   3.861 +
   3.862 +        /* Save handles for unmapping later. */
   3.863 +        pending_handle(pending_idx, i/2).kernel = map[i].handle;
   3.864 +        pending_handle(pending_idx, i/2).user   = map[i+1].handle;
   3.865 +    }
   3.866 +
   3.867 +    /* Mark mapped pages as reserved: */
   3.868 +    for ( i = 0; i < req->nr_segments; i++ )
   3.869 +    {
   3.870 +        unsigned long kvaddr;
   3.871 +
   3.872 +        kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
   3.873 +        SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT));
   3.874 +    }
   3.875 +
   3.876 +    pending_req = &pending_reqs[pending_idx];
   3.877 +    pending_req->blkif     = blkif;
   3.878 +    pending_req->id        = req->id;
   3.879 +    pending_req->operation = req->operation;
   3.880 +    pending_req->status    = BLKIF_RSP_OKAY;
   3.881 +    pending_req->nr_pages  = nseg;
   3.882 +    req->id = MAKE_ID(blkif->domid, pending_idx);
   3.883 +    //atomic_set(&pending_req->pendcnt, nbio);
   3.884 +    pending_cons++;
   3.885 +    blkif_get(blkif);
   3.886 +
   3.887 +    /* Finally, write the request message to the user ring. */
   3.888 +    target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt);
   3.889 +    memcpy(target, req, sizeof(*req));
   3.890 +    blktap_ufe_ring.req_prod_pvt++;
   3.891 +    return;
   3.892 +
   3.893 + bad_descriptor:
   3.894 +    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
   3.895 +} 
   3.896 +
   3.897 +
   3.898 +
   3.899 +/******************************************************************
   3.900 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
   3.901 + */
   3.902 +
   3.903 +
   3.904 +static void make_response(blkif_t *blkif, unsigned long id, 
   3.905 +                          unsigned short op, int st)
   3.906 +{
   3.907 +    blkif_response_t *resp;
   3.908 +    unsigned long     flags;
   3.909 +    blkif_back_ring_t *blk_ring = &blkif->blk_ring;
   3.910 +
   3.911 +    /* Place on the response ring for the relevant domain. */ 
   3.912 +    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
   3.913 +    resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
   3.914 +    resp->id        = id;
   3.915 +    resp->operation = op;
   3.916 +    resp->status    = st;
   3.917 +    wmb(); /* Ensure other side can see the response fields. */
   3.918 +    blk_ring->rsp_prod_pvt++;
   3.919 +    RING_PUSH_RESPONSES(blk_ring);
   3.920 +    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
   3.921 +
   3.922 +    /* Kick the relevant domain. */
   3.923 +    notify_via_evtchn(blkif->evtchn);
   3.924 +}
   3.925 +
   3.926 +static struct miscdevice blktap_miscdev = {
   3.927 +    .minor        = BLKTAP_MINOR,
   3.928 +    .name         = "blktap",
   3.929 +    .fops         = &blktap_fops,
   3.930 +    .devfs_name   = "misc/blktap",
   3.931 +};
   3.932 +
   3.933 +void blkif_deschedule(blkif_t *blkif)
   3.934 +{
   3.935 +    remove_from_blkdev_list(blkif);
   3.936 +}
   3.937 +
   3.938 +static int __init blkif_init(void)
   3.939 +{
   3.940 +    int i, j, err;
   3.941 +    struct page *page;
   3.942 +/*
   3.943 +    if ( !(xen_start_info.flags & SIF_INITDOMAIN) &&
   3.944 +         !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
   3.945 +        return 0;
   3.946 +*/
   3.947 +    blkif_interface_init();
   3.948 +
   3.949 +    page = balloon_alloc_empty_page_range(MMAP_PAGES);
   3.950 +    BUG_ON(page == NULL);
   3.951 +    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
   3.952 +
   3.953 +    pending_cons = 0;
   3.954 +    pending_prod = MAX_PENDING_REQS;
   3.955 +    memset(pending_reqs, 0, sizeof(pending_reqs));
   3.956 +    for ( i = 0; i < MAX_PENDING_REQS; i++ )
   3.957 +        pending_ring[i] = i;
   3.958 +    
   3.959 +    spin_lock_init(&blkio_schedule_list_lock);
   3.960 +    INIT_LIST_HEAD(&blkio_schedule_list);
   3.961 +
   3.962 +    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
   3.963 +        BUG();
   3.964 +
   3.965 +    blkif_xenbus_init();
   3.966 +
   3.967 +    for (i=0; i<MAX_PENDING_REQS ; i++)
   3.968 +        for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
   3.969 +            BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
   3.970 +
   3.971 +    err = misc_register(&blktap_miscdev);
   3.972 +    if ( err != 0 )
   3.973 +    {
   3.974 +        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
   3.975 +        return err;
   3.976 +    }
   3.977 +
   3.978 +    init_waitqueue_head(&blktap_wait);
   3.979 +
   3.980 +    return 0;
   3.981 +}
   3.982 +
   3.983 +__initcall(blkif_init);
     4.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c	Sun Sep 04 15:08:16 2005 +0000
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,573 +0,0 @@
     4.4 -/******************************************************************************
     4.5 - * blktap_controlmsg.c
     4.6 - * 
     4.7 - * XenLinux virtual block-device tap.
     4.8 - * Control interfaces to the frontend and backend drivers.
     4.9 - * 
    4.10 - * Copyright (c) 2004, Andrew Warfield
    4.11 - *
    4.12 - */
    4.13 - 
    4.14 -#include "blktap.h"
    4.15 -#include <asm-xen/evtchn.h>
    4.16 -
    4.17 -static char *blkif_state_name[] = {
    4.18 -    [BLKIF_STATE_CLOSED]       = "closed",
    4.19 -    [BLKIF_STATE_DISCONNECTED] = "disconnected",
    4.20 -    [BLKIF_STATE_CONNECTED]    = "connected",
    4.21 -};
    4.22 -
    4.23 -static char *blkif_status_name[] = {
    4.24 -    [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
    4.25 -    [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
    4.26 -    [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
    4.27 -    [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
    4.28 -};
    4.29 -
    4.30 -unsigned int blktap_be_state = BLKIF_STATE_CLOSED;
    4.31 -unsigned int blktap_be_evtchn;
    4.32 -
    4.33 -/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
    4.34 -
    4.35 -#define BLKIF_HASHSZ 1024
    4.36 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
    4.37 -
    4.38 -static kmem_cache_t *blkif_cachep;
    4.39 -static blkif_t      *blkif_hash[BLKIF_HASHSZ];
    4.40 -
    4.41 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
    4.42 -{
    4.43 -    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
    4.44 -    while ( (blkif != NULL) && 
    4.45 -            ((blkif->domid != domid) || (blkif->handle != handle)) )
    4.46 -        blkif = blkif->hash_next;
    4.47 -    return blkif;
    4.48 -}
    4.49 -
    4.50 -static void __blkif_disconnect_complete(void *arg)
    4.51 -{
    4.52 -    blkif_t              *blkif = (blkif_t *)arg;
    4.53 -    ctrl_msg_t            cmsg;
    4.54 -    blkif_be_disconnect_t disc;
    4.55 -#ifdef CONFIG_XEN_BLKDEV_GRANT
    4.56 -    struct gnttab_unmap_grant_ref op;
    4.57 -#endif
    4.58 -
    4.59 -    /*
    4.60 -     * These can't be done in blkif_disconnect() because at that point there
    4.61 -     * may be outstanding requests at the disc whose asynchronous responses
    4.62 -     * must still be notified to the remote driver.
    4.63 -     */
    4.64 -#ifdef CONFIG_XEN_BLKDEV_GRANT
    4.65 -    op.host_addr = blkif->shmem_vaddr;
    4.66 -    op.handle         = blkif->shmem_handle;
    4.67 -    op.dev_bus_addr   = 0;
    4.68 -    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
    4.69 -#endif
    4.70 -    vfree(blkif->blk_ring.sring);
    4.71 -
    4.72 -    /* Construct the deferred response message. */
    4.73 -    cmsg.type         = CMSG_BLKIF_BE;
    4.74 -    cmsg.subtype      = CMSG_BLKIF_BE_DISCONNECT;
    4.75 -    cmsg.id           = blkif->disconnect_rspid;
    4.76 -    cmsg.length       = sizeof(blkif_be_disconnect_t);
    4.77 -    disc.domid        = blkif->domid;
    4.78 -    disc.blkif_handle = blkif->handle;
    4.79 -    disc.status       = BLKIF_BE_STATUS_OKAY;
    4.80 -    memcpy(cmsg.msg, &disc, sizeof(disc));
    4.81 -
    4.82 -    /*
    4.83 -     * Make sure message is constructed /before/ status change, because
    4.84 -     * after the status change the 'blkif' structure could be deallocated at
    4.85 -     * any time. Also make sure we send the response /after/ status change,
    4.86 -     * as otherwise a subsequent CONNECT request could spuriously fail if
    4.87 -     * another CPU doesn't see the status change yet.
    4.88 -     */
    4.89 -    mb();
    4.90 -    if ( blkif->status != DISCONNECTING )
    4.91 -        BUG();
    4.92 -    blkif->status = DISCONNECTED;
    4.93 -    mb();
    4.94 -
    4.95 -    /* Send the successful response. */
    4.96 -    ctrl_if_send_response(&cmsg);
    4.97 -}
    4.98 -
    4.99 -void blkif_disconnect_complete(blkif_t *blkif)
   4.100 -{
   4.101 -    INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif);
   4.102 -    schedule_work(&blkif->work);
   4.103 -}
   4.104 -
   4.105 -void blkif_ptfe_create(blkif_be_create_t *create)
   4.106 -{
   4.107 -    blkif_t      *blkif, **pblkif;
   4.108 -    domid_t       domid  = create->domid;
   4.109 -    unsigned int  handle = create->blkif_handle;
   4.110 -
   4.111 -
   4.112 -    /* May want to store info on the connecting domain here. */
   4.113 -
   4.114 -    DPRINTK("PT got BE_CREATE\n");
   4.115 -
   4.116 -    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
   4.117 -    {
   4.118 -        WPRINTK("Could not create blkif: out of memory\n");
   4.119 -        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   4.120 -        return;
   4.121 -    }
   4.122 -
   4.123 -    /* blkif struct init code from blkback.c */
   4.124 -    memset(blkif, 0, sizeof(*blkif));
   4.125 -    blkif->domid  = domid;
   4.126 -    blkif->handle = handle;
   4.127 -    blkif->status = DISCONNECTED;  
   4.128 -    spin_lock_init(&blkif->blk_ring_lock);
   4.129 -    atomic_set(&blkif->refcnt, 0);
   4.130 -
   4.131 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   4.132 -    while ( *pblkif != NULL )
   4.133 -    {
   4.134 -        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   4.135 -        {
   4.136 -            WPRINTK("Could not create blkif: already exists\n");
   4.137 -            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
   4.138 -            kmem_cache_free(blkif_cachep, blkif);
   4.139 -            return;
   4.140 -        }
   4.141 -        pblkif = &(*pblkif)->hash_next;
   4.142 -    }
   4.143 -
   4.144 -    blkif->hash_next = *pblkif;
   4.145 -    *pblkif = blkif;
   4.146 -
   4.147 -    create->status = BLKIF_BE_STATUS_OKAY;
   4.148 -}
   4.149 -
   4.150 -
   4.151 -void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
   4.152 -{
   4.153 -    /* Clear anything that we initialized above. */
   4.154 -
   4.155 -    domid_t       domid  = destroy->domid;
   4.156 -    unsigned int  handle = destroy->blkif_handle;
   4.157 -    blkif_t     **pblkif, *blkif;
   4.158 -
   4.159 -    DPRINTK("PT got BE_DESTROY\n");
   4.160 -    
   4.161 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   4.162 -    while ( (blkif = *pblkif) != NULL )
   4.163 -    {
   4.164 -        if ( (blkif->domid == domid) && (blkif->handle == handle) )
   4.165 -        {
   4.166 -            if ( blkif->status != DISCONNECTED )
   4.167 -                goto still_connected;
   4.168 -            goto destroy;
   4.169 -        }
   4.170 -        pblkif = &blkif->hash_next;
   4.171 -    }
   4.172 -
   4.173 -    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   4.174 -    return;
   4.175 -
   4.176 - still_connected:
   4.177 -    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
   4.178 -    return;
   4.179 -
   4.180 - destroy:
   4.181 -    *pblkif = blkif->hash_next;
   4.182 -    kmem_cache_free(blkif_cachep, blkif);
   4.183 -    destroy->status = BLKIF_BE_STATUS_OKAY;
   4.184 -}
   4.185 -
   4.186 -void blkif_ptfe_connect(blkif_be_connect_t *connect)
   4.187 -{
   4.188 -    domid_t        domid  = connect->domid;
   4.189 -    unsigned int   handle = connect->blkif_handle;
   4.190 -    unsigned int   evtchn = connect->evtchn;
   4.191 -    unsigned long  shmem_frame = connect->shmem_frame;
   4.192 -    struct vm_struct *vma;
   4.193 -#ifdef CONFIG_XEN_BLKDEV_GRANT
   4.194 -    int ref = connect->shmem_ref;
   4.195 -#else
   4.196 -    pgprot_t       prot;
   4.197 -    int            error;
   4.198 -#endif
   4.199 -    blkif_t       *blkif;
   4.200 -    blkif_sring_t *sring;
   4.201 -
   4.202 -    DPRINTK("PT got BE_CONNECT\n");
   4.203 -
   4.204 -    blkif = blkif_find_by_handle(domid, handle);
   4.205 -    if ( unlikely(blkif == NULL) )
   4.206 -    {
   4.207 -        WPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", 
   4.208 -                connect->domid, connect->blkif_handle); 
   4.209 -        connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   4.210 -        return;
   4.211 -    }
   4.212 -
   4.213 -    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
   4.214 -    {
   4.215 -        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   4.216 -        return;
   4.217 -    }
   4.218 -
   4.219 -#ifndef CONFIG_XEN_BLKDEV_GRANT
   4.220 -    prot = __pgprot(_KERNPG_TABLE);
   4.221 -    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
   4.222 -                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
   4.223 -                                    prot, domid);
   4.224 -    if ( error != 0 )
   4.225 -    {
   4.226 -        if ( error == -ENOMEM ) 
   4.227 -            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   4.228 -        else if ( error == -EFAULT )
   4.229 -            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
   4.230 -        else
   4.231 -            connect->status = BLKIF_BE_STATUS_ERROR;
   4.232 -        vfree(vma->addr);
   4.233 -        return;
   4.234 -    }
   4.235 -#else
   4.236 -    { /* Map: Use the Grant table reference */
   4.237 -        struct gnttab_map_grant_ref op;
   4.238 -        op.host_addr = VMALLOC_VMADDR(vma->addr);
   4.239 -        op.flags            = GNTMAP_host_map;
   4.240 -        op.ref              = ref;
   4.241 -        op.dom              = domid;
   4.242 -       
   4.243 -        BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
   4.244 -       
   4.245 -        handle = op.handle;
   4.246 -       
   4.247 -        if (op.handle < 0) {
   4.248 -            DPRINTK(" Grant table operation failure !\n");
   4.249 -            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
   4.250 -            vfree(vma->addr);
   4.251 -            return;
   4.252 -        }
   4.253 -
   4.254 -        blkif->shmem_ref = ref;
   4.255 -        blkif->shmem_handle = handle;
   4.256 -        blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr);
   4.257 -    }
   4.258 -#endif
   4.259 -
   4.260 -    if ( blkif->status != DISCONNECTED )
   4.261 -    {
   4.262 -        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
   4.263 -        vfree(vma->addr);
   4.264 -        return;
   4.265 -    }
   4.266 -
   4.267 -    sring = (blkif_sring_t *)vma->addr;
   4.268 -    SHARED_RING_INIT(sring);
   4.269 -    BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
   4.270 -    
   4.271 -    blkif->evtchn        = evtchn;
   4.272 -    blkif->shmem_frame   = shmem_frame;
   4.273 -    blkif->status        = CONNECTED;
   4.274 -    blkif_get(blkif);
   4.275 -
   4.276 -    bind_evtchn_to_irqhandler(
   4.277 -        evtchn, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
   4.278 -
   4.279 -    connect->status = BLKIF_BE_STATUS_OKAY;
   4.280 -}
   4.281 -
   4.282 -int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
   4.283 -{
   4.284 -    domid_t       domid  = disconnect->domid;
   4.285 -    unsigned int  handle = disconnect->blkif_handle;
   4.286 -    blkif_t      *blkif;
   4.287 -
   4.288 -    DPRINTK("PT got BE_DISCONNECT\n");
   4.289 -    
   4.290 -    blkif = blkif_find_by_handle(domid, handle);
   4.291 -    if ( unlikely(blkif == NULL) )
   4.292 -    {
   4.293 -        WPRINTK("blkif_disconnect attempted for non-existent blkif"
   4.294 -                " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); 
   4.295 -        disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   4.296 -        return 1; /* Caller will send response error message. */
   4.297 -    }
   4.298 -
   4.299 -    if ( blkif->status == CONNECTED )
   4.300 -    {
   4.301 -        blkif->status = DISCONNECTING;
   4.302 -        blkif->disconnect_rspid = rsp_id;
   4.303 -        wmb(); /* Let other CPUs see the status change. */
   4.304 -        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
   4.305 -        blkif_deschedule(blkif);
   4.306 -        blkif_put(blkif);
   4.307 -        return 0; /* Caller should not send response message. */
   4.308 -    }
   4.309 -
   4.310 -    disconnect->status = BLKIF_BE_STATUS_OKAY;
   4.311 -    return 1;
   4.312 -}
   4.313 -
   4.314 -/*-----[ Control Messages to/from Backend VM ]----------------------------*/
   4.315 -
   4.316 -/* Tell the controller to bring up the interface. */
   4.317 -static void blkif_ptbe_send_interface_connect(void)
   4.318 -{
   4.319 -    ctrl_msg_t cmsg = {
   4.320 -        .type    = CMSG_BLKIF_FE,
   4.321 -        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
   4.322 -        .length  = sizeof(blkif_fe_interface_connect_t),
   4.323 -    };
   4.324 -    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
   4.325 -    msg->handle      = 0;
   4.326 -    msg->shmem_frame = virt_to_mfn(blktap_be_ring.sring);
   4.327 -    
   4.328 -    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
   4.329 -}
   4.330 -
   4.331 -static void blkif_ptbe_close(void)
   4.332 -{
   4.333 -}
   4.334 -
   4.335 -/* Move from CLOSED to DISCONNECTED state. */
   4.336 -static void blkif_ptbe_disconnect(void)
   4.337 -{
   4.338 -    blkif_sring_t *sring;
   4.339 -    
   4.340 -    sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
   4.341 -    SHARED_RING_INIT(sring);
   4.342 -    FRONT_RING_INIT(&blktap_be_ring, sring, PAGE_SIZE);
   4.343 -    blktap_be_state  = BLKIF_STATE_DISCONNECTED;
   4.344 -    DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
   4.345 -    blkif_ptbe_send_interface_connect();
   4.346 -}
   4.347 -
   4.348 -static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
   4.349 -{
   4.350 -    int err = 0;
   4.351 -    
   4.352 -    blktap_be_evtchn = status->evtchn;
   4.353 -
   4.354 -    err = bind_evtchn_to_irqhandler(
   4.355 -        blktap_be_evtchn, blkif_ptbe_int, SA_SAMPLE_RANDOM, "blkif", NULL);
   4.356 -    if ( err ) {
   4.357 -	WPRINTK("blkfront bind_evtchn_to_irqhandler failed (%d)\n", err);
   4.358 -        return;
   4.359 -    } else {
   4.360 -	/* transtion to connected in case we need to do a 
   4.361 -           a partion probe on a whole disk */
   4.362 -        blktap_be_state = BLKIF_STATE_CONNECTED;
   4.363 -    }
   4.364 -}
   4.365 -
   4.366 -static void unexpected(blkif_fe_interface_status_t *status)
   4.367 -{
   4.368 -    WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", 
   4.369 -           blkif_status_name[status->status],
   4.370 -           blkif_state_name[blktap_be_state]);
   4.371 -}
   4.372 -
   4.373 -static void blkif_ptbe_status(
   4.374 -    blkif_fe_interface_status_t *status)
   4.375 -{
   4.376 -    if ( status->handle != 0 )
   4.377 -    {
   4.378 -        DPRINTK("Status change on unsupported blkif %d\n",
   4.379 -               status->handle);
   4.380 -        return;
   4.381 -    }
   4.382 -
   4.383 -    DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
   4.384 -    
   4.385 -    switch ( status->status )
   4.386 -    {
   4.387 -    case BLKIF_INTERFACE_STATUS_CLOSED:
   4.388 -        switch ( blktap_be_state )
   4.389 -        {
   4.390 -        case BLKIF_STATE_CLOSED:
   4.391 -            unexpected(status);
   4.392 -            break;
   4.393 -        case BLKIF_STATE_DISCONNECTED:
   4.394 -        case BLKIF_STATE_CONNECTED:
   4.395 -            unexpected(status);
   4.396 -            blkif_ptbe_close();
   4.397 -            break;
   4.398 -        }
   4.399 -        break;
   4.400 -        
   4.401 -    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
   4.402 -        switch ( blktap_be_state )
   4.403 -        {
   4.404 -        case BLKIF_STATE_CLOSED:
   4.405 -            blkif_ptbe_disconnect();
   4.406 -            break;
   4.407 -        case BLKIF_STATE_DISCONNECTED:
   4.408 -        case BLKIF_STATE_CONNECTED:
   4.409 -            printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
   4.410 -            unexpected(status);
   4.411 -            break;
   4.412 -        }
   4.413 -        break;
   4.414 -        
   4.415 -    case BLKIF_INTERFACE_STATUS_CONNECTED:
   4.416 -        switch ( blktap_be_state )
   4.417 -        {
   4.418 -        case BLKIF_STATE_CLOSED:
   4.419 -            unexpected(status);
   4.420 -            blkif_ptbe_disconnect();
   4.421 -            blkif_ptbe_connect(status);
   4.422 -            break;
   4.423 -        case BLKIF_STATE_DISCONNECTED:
   4.424 -            blkif_ptbe_connect(status);
   4.425 -            break;
   4.426 -        case BLKIF_STATE_CONNECTED:
   4.427 -            unexpected(status);
   4.428 -            blkif_ptbe_connect(status);
   4.429 -            break;
   4.430 -        }
   4.431 -        break;
   4.432 -
   4.433 -   case BLKIF_INTERFACE_STATUS_CHANGED:
   4.434 -        switch ( blktap_be_state )
   4.435 -        {
   4.436 -        case BLKIF_STATE_CLOSED:
   4.437 -        case BLKIF_STATE_DISCONNECTED:
   4.438 -            unexpected(status);
   4.439 -            break;
   4.440 -        case BLKIF_STATE_CONNECTED:
   4.441 -            /* vbd_update(); */
   4.442 -            /* tap doesn't really get state changes... */
   4.443 -            unexpected(status);
   4.444 -            break;
   4.445 -        }
   4.446 -       break;
   4.447 -       
   4.448 -    default:
   4.449 -        DPRINTK("Status change to unknown value %d\n", status->status);
   4.450 -        break;
   4.451 -    }
   4.452 -}
   4.453 -
   4.454 -/*-----[ All control messages enter here: ]-------------------------------*/
   4.455 -
   4.456 -void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
   4.457 -{
   4.458 -    switch ( msg->type )
   4.459 -    {
   4.460 -    case CMSG_BLKIF_FE:
   4.461 -
   4.462 -        switch ( msg->subtype )
   4.463 -        {
   4.464 -        case CMSG_BLKIF_FE_INTERFACE_STATUS:
   4.465 -            blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
   4.466 -            break;
   4.467 -
   4.468 -        default:
   4.469 -            goto parse_error;
   4.470 -        }
   4.471 -
   4.472 -        break;
   4.473 -
   4.474 -    case CMSG_BLKIF_BE:
   4.475 -        
   4.476 -        /* send a copy of the message to user if wanted */
   4.477 -        
   4.478 -        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
   4.479 -             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
   4.480 -            
   4.481 -            blktap_write_ctrl_ring(msg);
   4.482 -            blktap_kick_user();
   4.483 -        }
   4.484 -        
   4.485 -        switch ( msg->subtype )
   4.486 -        {
   4.487 -        case CMSG_BLKIF_BE_CREATE:
   4.488 -            blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
   4.489 -            break; 
   4.490 -        case CMSG_BLKIF_BE_DESTROY:
   4.491 -            blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
   4.492 -            break;        
   4.493 -        case CMSG_BLKIF_BE_CONNECT:
   4.494 -            blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
   4.495 -            break;        
   4.496 -        case CMSG_BLKIF_BE_DISCONNECT:
   4.497 -            if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0],
   4.498 -                    msg->id) )
   4.499 -                return;
   4.500 -            break;        
   4.501 -
   4.502 -        /* We just ignore anything to do with vbds for now. */
   4.503 -        
   4.504 -        case CMSG_BLKIF_BE_VBD_CREATE:
   4.505 -            DPRINTK("PT got VBD_CREATE\n");
   4.506 -            ((blkif_be_vbd_create_t *)&msg->msg[0])->status 
   4.507 -                = BLKIF_BE_STATUS_OKAY;
   4.508 -            break;
   4.509 -        case CMSG_BLKIF_BE_VBD_DESTROY:
   4.510 -            DPRINTK("PT got VBD_DESTROY\n");
   4.511 -            ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
   4.512 -                = BLKIF_BE_STATUS_OKAY;
   4.513 -            break;
   4.514 -        default:
   4.515 -            goto parse_error;
   4.516 -        }
   4.517 -
   4.518 -        break;
   4.519 -    }
   4.520 -
   4.521 -    ctrl_if_send_response(msg);
   4.522 -    return;
   4.523 -
   4.524 - parse_error:
   4.525 -    msg->length = 0;
   4.526 -    ctrl_if_send_response(msg);
   4.527 -}
   4.528 -
   4.529 -/*-----[ Initialization ]-------------------------------------------------*/
   4.530 -
   4.531 -void __init blkif_interface_init(void)
   4.532 -{
   4.533 -    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
   4.534 -                                     0, 0, NULL, NULL);
   4.535 -    memset(blkif_hash, 0, sizeof(blkif_hash));
   4.536 -    
   4.537 -    blktap_be_ring.sring = NULL;
   4.538 -}
   4.539 -
   4.540 -
   4.541 -
   4.542 -/* Debug : print the current ring indices. */
   4.543 -
   4.544 -void print_fe_ring_idxs(void)
   4.545 -{
   4.546 -    int i;
   4.547 -    blkif_t *blkif;
   4.548 -            
   4.549 -    WPRINTK("FE Rings: \n---------\n");
   4.550 -    for ( i = 0; i < BLKIF_HASHSZ; i++) { 
   4.551 -        blkif = blkif_hash[i];
   4.552 -        while (blkif != NULL) {
   4.553 -            if (blkif->status == DISCONNECTED) {
   4.554 -                WPRINTK("(%2d,%2d) DISCONNECTED\n", 
   4.555 -                   blkif->domid, blkif->handle);
   4.556 -            } else if (blkif->status == DISCONNECTING) {
   4.557 -                WPRINTK("(%2d,%2d) DISCONNECTING\n", 
   4.558 -                   blkif->domid, blkif->handle);
   4.559 -            } else if (blkif->blk_ring.sring == NULL) {
   4.560 -                WPRINTK("(%2d,%2d) CONNECTED, but null sring!\n", 
   4.561 -                   blkif->domid, blkif->handle);
   4.562 -            } else {
   4.563 -                blkif_get(blkif);
   4.564 -                WPRINTK("(%2d,%2d): req_cons: %2d, rsp_prod_prv: %2d "
   4.565 -                    "| req_prod: %2d, rsp_prod: %2d\n",
   4.566 -                    blkif->domid, blkif->handle,
   4.567 -                    blkif->blk_ring.req_cons,
   4.568 -                    blkif->blk_ring.rsp_prod_pvt,
   4.569 -                    blkif->blk_ring.sring->req_prod,
   4.570 -                    blkif->blk_ring.sring->rsp_prod);
   4.571 -                blkif_put(blkif);
   4.572 -            } 
   4.573 -            blkif = blkif->hash_next;
   4.574 -        }
   4.575 -    }
   4.576 -}        
     5.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c	Sun Sep 04 15:08:16 2005 +0000
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,449 +0,0 @@
     5.4 -/******************************************************************************
     5.5 - * blktap_datapath.c
     5.6 - * 
     5.7 - * XenLinux virtual block-device tap.
     5.8 - * Block request routing data path.
     5.9 - * 
    5.10 - * Copyright (c) 2004, Andrew Warfield
    5.11 - * -- see full header in blktap.c
    5.12 - */
    5.13 - 
    5.14 -#include "blktap.h"
    5.15 -#include <asm-xen/evtchn.h>
    5.16 -
    5.17 -/*-----[ The data paths ]-------------------------------------------------*/
    5.18 -
    5.19 -/* Connection to a single backend domain. */
    5.20 -blkif_front_ring_t blktap_be_ring;
    5.21 -
    5.22 -/*-----[ Tracking active requests ]---------------------------------------*/
    5.23 -
    5.24 -/* this must be the same as MAX_PENDING_REQS in blkback.c */
    5.25 -#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
    5.26 -
    5.27 -active_req_t     active_reqs[MAX_ACTIVE_REQS];
    5.28 -ACTIVE_RING_IDX  active_req_ring[MAX_ACTIVE_REQS];
    5.29 -spinlock_t       active_req_lock = SPIN_LOCK_UNLOCKED;
    5.30 -ACTIVE_RING_IDX  active_prod, active_cons;
    5.31 -#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
    5.32 -#define ACTIVE_IDX(_ar) (_ar - active_reqs)
    5.33 -#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
    5.34 -
    5.35 -inline active_req_t *get_active_req(void) 
    5.36 -{
    5.37 -    ACTIVE_RING_IDX idx;
    5.38 -    active_req_t *ar;
    5.39 -    unsigned long flags;
    5.40 -        
    5.41 -    ASSERT(active_cons != active_prod);   
    5.42 -    
    5.43 -    spin_lock_irqsave(&active_req_lock, flags);
    5.44 -    idx =  active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
    5.45 -    ar = &active_reqs[idx];
    5.46 -    spin_unlock_irqrestore(&active_req_lock, flags);
    5.47 -    
    5.48 -    return ar;
    5.49 -}
    5.50 -
    5.51 -inline void free_active_req(active_req_t *ar) 
    5.52 -{
    5.53 -    unsigned long flags;
    5.54 -        
    5.55 -    spin_lock_irqsave(&active_req_lock, flags);
    5.56 -    active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
    5.57 -    spin_unlock_irqrestore(&active_req_lock, flags);
    5.58 -}
    5.59 -
    5.60 -active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
    5.61 -{
    5.62 -    return &active_reqs[idx];   
    5.63 -}
    5.64 -
    5.65 -void active_reqs_init(void)
    5.66 -{
    5.67 -    ACTIVE_RING_IDX i;
    5.68 -    
    5.69 -    active_cons = 0;
    5.70 -    active_prod = MAX_ACTIVE_REQS;
    5.71 -    memset(active_reqs, 0, sizeof(active_reqs));
    5.72 -    for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
    5.73 -        active_req_ring[i] = i;
    5.74 -}
    5.75 -
    5.76 -/* Requests passing through the tap to the backend hijack the id field
    5.77 - * in the request message.  In it we put the AR index _AND_ the fe domid.
    5.78 - * the domid is used by the backend to map the pages properly.
    5.79 - */
    5.80 -
    5.81 -static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
    5.82 -{
    5.83 -    return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) );
    5.84 -}
    5.85 -
    5.86 -/*-----[ Ring helpers ]---------------------------------------------------*/
    5.87 -
    5.88 -static void maybe_trigger_blktap_schedule(void);
    5.89 -
    5.90 -inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
    5.91 -{
    5.92 -    blkif_response_t *resp_d;
    5.93 -    active_req_t *ar;
    5.94 -    
    5.95 -    ar = &active_reqs[ID_TO_IDX(rsp->id)];
    5.96 -    rsp->id = ar->id;
    5.97 -            
    5.98 -    resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
    5.99 -            blkif->blk_ring.rsp_prod_pvt);
   5.100 -    memcpy(resp_d, rsp, sizeof(blkif_response_t));
   5.101 -    wmb();
   5.102 -    blkif->blk_ring.rsp_prod_pvt++;
   5.103 -            
   5.104 -    blkif_put(ar->blkif);
   5.105 -    free_active_req(ar);
   5.106 -    
   5.107 -    return 0;
   5.108 -}
   5.109 -
   5.110 -inline int write_req_to_be_ring(blkif_request_t *req)
   5.111 -{
   5.112 -    blkif_request_t *req_d;
   5.113 -
   5.114 -    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
   5.115 -        WPRINTK("Tap trying to access an unconnected backend!\n");
   5.116 -        return 0;
   5.117 -    }
   5.118 -    
   5.119 -    req_d = RING_GET_REQUEST(&blktap_be_ring,
   5.120 -            blktap_be_ring.req_prod_pvt);
   5.121 -    memcpy(req_d, req, sizeof(blkif_request_t));
   5.122 -    wmb();
   5.123 -    blktap_be_ring.req_prod_pvt++;
   5.124 -            
   5.125 -    return 0;
   5.126 -}
   5.127 -
   5.128 -void kick_fe_domain(blkif_t *blkif) 
   5.129 -{
   5.130 -    RING_PUSH_RESPONSES(&blkif->blk_ring);
   5.131 -    notify_via_evtchn(blkif->evtchn);
   5.132 -    DPRINTK("notified FE(dom %u)\n", blkif->domid);
   5.133 -
   5.134 -    /* We just feed up a batch of request slots... */
   5.135 -    maybe_trigger_blktap_schedule();
   5.136 -    
   5.137 -}
   5.138 -
   5.139 -void kick_be_domain(void)
   5.140 -{
   5.141 -    if ( blktap_be_state != BLKIF_STATE_CONNECTED ) 
   5.142 -        return;
   5.143 -    
   5.144 -    wmb(); /* Ensure that the frontend can see the requests. */
   5.145 -    RING_PUSH_REQUESTS(&blktap_be_ring);
   5.146 -    notify_via_evtchn(blktap_be_evtchn);
   5.147 -    DPRINTK("notified BE\n");
   5.148 -}
   5.149 -
   5.150 -/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
   5.151 -
   5.152 -/*-----[ Scheduler list maint -from blkback ]--- */
   5.153 -
   5.154 -static struct list_head blkio_schedule_list;
   5.155 -static spinlock_t blkio_schedule_list_lock;
   5.156 -
   5.157 -static int __on_blkdev_list(blkif_t *blkif)
   5.158 -{
   5.159 -    return blkif->blkdev_list.next != NULL;
   5.160 -}
   5.161 -
   5.162 -static void remove_from_blkdev_list(blkif_t *blkif)
   5.163 -{
   5.164 -    unsigned long flags;
   5.165 -    if ( !__on_blkdev_list(blkif) ) return;
   5.166 -    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
   5.167 -    if ( __on_blkdev_list(blkif) )
   5.168 -    {
   5.169 -        list_del(&blkif->blkdev_list);
   5.170 -        blkif->blkdev_list.next = NULL;
   5.171 -        blkif_put(blkif);
   5.172 -    }
   5.173 -    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
   5.174 -}
   5.175 -
   5.176 -static void add_to_blkdev_list_tail(blkif_t *blkif)
   5.177 -{
   5.178 -    unsigned long flags;
   5.179 -    if ( __on_blkdev_list(blkif) ) return;
   5.180 -    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
   5.181 -    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
   5.182 -    {
   5.183 -        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
   5.184 -        blkif_get(blkif);
   5.185 -    }
   5.186 -    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
   5.187 -}
   5.188 -
   5.189 -
   5.190 -/*-----[ Scheduler functions - from blkback ]--- */
   5.191 -
   5.192 -static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
   5.193 -
   5.194 -static int do_block_io_op(blkif_t *blkif, int max_to_do);
   5.195 -
   5.196 -static int blkio_schedule(void *arg)
   5.197 -{
   5.198 -    DECLARE_WAITQUEUE(wq, current);
   5.199 -
   5.200 -    blkif_t          *blkif;
   5.201 -    struct list_head *ent;
   5.202 -
   5.203 -    daemonize(
   5.204 -        "xentapd"
   5.205 -        );
   5.206 -
   5.207 -    for ( ; ; )
   5.208 -    {
   5.209 -        /* Wait for work to do. */
   5.210 -        add_wait_queue(&blkio_schedule_wait, &wq);
   5.211 -        set_current_state(TASK_INTERRUPTIBLE);
   5.212 -        if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || 
   5.213 -             list_empty(&blkio_schedule_list) )
   5.214 -            schedule();
   5.215 -        __set_current_state(TASK_RUNNING);
   5.216 -        remove_wait_queue(&blkio_schedule_wait, &wq);
   5.217 -
   5.218 -        /* Queue up a batch of requests. */
   5.219 -        while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
   5.220 -                !list_empty(&blkio_schedule_list) )
   5.221 -        {
   5.222 -            ent = blkio_schedule_list.next;
   5.223 -            blkif = list_entry(ent, blkif_t, blkdev_list);
   5.224 -            blkif_get(blkif);
   5.225 -            remove_from_blkdev_list(blkif);
   5.226 -            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
   5.227 -                add_to_blkdev_list_tail(blkif);
   5.228 -            blkif_put(blkif);
   5.229 -        }
   5.230 -    }
   5.231 -}
   5.232 -
   5.233 -static void maybe_trigger_blktap_schedule(void)
   5.234 -{
   5.235 -    /*
   5.236 -     * Needed so that two processes, who together make the following predicate
   5.237 -     * true, don't both read stale values and evaluate the predicate
   5.238 -     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
   5.239 -     */
   5.240 -    smp_mb();
   5.241 -
   5.242 -    if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS/2)) &&
   5.243 -         !list_empty(&blkio_schedule_list) ) 
   5.244 -        wake_up(&blkio_schedule_wait);
   5.245 -}
   5.246 -
   5.247 -void blkif_deschedule(blkif_t *blkif)
   5.248 -{
   5.249 -    remove_from_blkdev_list(blkif);
   5.250 -}
   5.251 -
   5.252 -void __init blkdev_schedule_init(void)
   5.253 -{
   5.254 -    spin_lock_init(&blkio_schedule_list_lock);
   5.255 -    INIT_LIST_HEAD(&blkio_schedule_list);
   5.256 -
   5.257 -    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
   5.258 -        BUG();
   5.259 -}
   5.260 -    
   5.261 -/*-----[ Interrupt entry from a frontend ]------ */
   5.262 -
   5.263 -irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
   5.264 -{
   5.265 -    blkif_t *blkif = dev_id;
   5.266 -
   5.267 -    add_to_blkdev_list_tail(blkif);
   5.268 -    maybe_trigger_blktap_schedule();
   5.269 -    return IRQ_HANDLED;
   5.270 -}
   5.271 -
   5.272 -/*-----[ Other Frontend Ring functions ]-------- */
   5.273 -
   5.274 -/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
   5.275 -static int do_block_io_op(blkif_t *blkif, int max_to_do)
   5.276 -{
   5.277 -    /* we have pending messages from the real frontend. */
   5.278 -
   5.279 -    blkif_request_t *req_s;
   5.280 -    RING_IDX i, rp;
   5.281 -    unsigned long flags;
   5.282 -    active_req_t *ar;
   5.283 -    int more_to_do = 0;
   5.284 -    int notify_be = 0, notify_user = 0;
   5.285 -    
   5.286 -    /* lock both rings */
   5.287 -    spin_lock_irqsave(&blkif_io_lock, flags);
   5.288 -
   5.289 -    rp = blkif->blk_ring.sring->req_prod;
   5.290 -    rmb();
   5.291 -    
   5.292 -    for ( i = blkif->blk_ring.req_cons; 
   5.293 -         (i != rp) && 
   5.294 -            !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
   5.295 -          i++ )
   5.296 -    {
   5.297 -        
   5.298 -        if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) 
   5.299 -        {
   5.300 -            more_to_do = 1;
   5.301 -            break;
   5.302 -        }
   5.303 -        
   5.304 -        req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
   5.305 -        /* This is a new request:  
   5.306 -         * Assign an active request record, and remap the id. 
   5.307 -         */
   5.308 -        ar = get_active_req();
   5.309 -        ar->id = req_s->id;
   5.310 -        ar->nr_pages = req_s->nr_segments; 
   5.311 -        blkif_get(blkif);
   5.312 -        ar->blkif = blkif;
   5.313 -        req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
   5.314 -        /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
   5.315 -
   5.316 -        /* FE -> BE interposition point is here. */
   5.317 -        
   5.318 -        /* ------------------------------------------------------------- */
   5.319 -        /* BLKIF_OP_PROBE_HACK:                                          */
   5.320 -        /* Signal to the backend that we are a tap domain.               */
   5.321 -
   5.322 -        if (req_s->operation == BLKIF_OP_PROBE) {
   5.323 -            DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
   5.324 -            req_s->frame_and_sects[1] = BLKTAP_COOKIE;
   5.325 -        }
   5.326 -
   5.327 -        /* ------------------------------------------------------------- */
   5.328 -
   5.329 -        /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
   5.330 -        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
   5.331 -             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
   5.332 -            
   5.333 -            /* Copy the response message to UFERing */
   5.334 -            /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
   5.335 -            /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
   5.336 -
   5.337 -            DPRINTK("req->UFERing\n"); 
   5.338 -            blktap_write_fe_ring(req_s);
   5.339 -            notify_user = 1;
   5.340 -        }
   5.341 -
   5.342 -        /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
   5.343 -        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
   5.344 -               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
   5.345 -            
   5.346 -            /* be included to prevent noise from the fe when its off */
   5.347 -            /* copy the request message to the BERing */
   5.348 -
   5.349 -            DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
   5.350 -                    (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
   5.351 -                    (unsigned)blktap_be_ring.req_prod_pvt & 
   5.352 -                    (RING_SIZE((&blktap_be_ring)-1)));
   5.353 -            
   5.354 -            write_req_to_be_ring(req_s);
   5.355 -            notify_be = 1;
   5.356 -        }
   5.357 -    }
   5.358 -
   5.359 -    blkif->blk_ring.req_cons = i;
   5.360 -    
   5.361 -    /* unlock rings */
   5.362 -    spin_unlock_irqrestore(&blkif_io_lock, flags);
   5.363 -    
   5.364 -    if (notify_user)
   5.365 -        blktap_kick_user();
   5.366 -    if (notify_be)
   5.367 -        kick_be_domain();
   5.368 -    
   5.369 -    return more_to_do;
   5.370 -}
   5.371 -
   5.372 -/*-----[ Data to/from Backend (server) VM ]------------------------------*/
   5.373 -
   5.374 -
   5.375 -irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
   5.376 -                                  struct pt_regs *ptregs)
   5.377 -{
   5.378 -    blkif_response_t  *resp_s;
   5.379 -    blkif_t *blkif;
   5.380 -    RING_IDX rp, i;
   5.381 -    unsigned long flags;
   5.382 -
   5.383 -    DPRINTK("PT got BE interrupt.\n");
   5.384 -
   5.385 -    /* lock both rings */
   5.386 -    spin_lock_irqsave(&blkif_io_lock, flags);
   5.387 -    
   5.388 -    rp = blktap_be_ring.sring->rsp_prod;
   5.389 -    rmb();
   5.390 -      
   5.391 -    for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
   5.392 -    {
   5.393 -        resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
   5.394 -        
   5.395 -        /* BE -> FE interposition point is here. */
   5.396 -    
   5.397 -        blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
   5.398 -        
   5.399 -        /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
   5.400 -        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
   5.401 -             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
   5.402 -
   5.403 -            /* Copy the response message to UBERing */
   5.404 -            /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
   5.405 -            /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
   5.406 -
   5.407 -            DPRINTK("rsp->UBERing\n"); 
   5.408 -            blktap_write_be_ring(resp_s);
   5.409 -            blktap_kick_user();
   5.410 -
   5.411 -        }
   5.412 -       
   5.413 -        /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
   5.414 -        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
   5.415 -               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
   5.416 -            
   5.417 -            /* (fe included to prevent random interference from the BE) */
   5.418 -            /* Copy the response message to FERing */
   5.419 -         
   5.420 -            DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
   5.421 -                    (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
   5.422 -                    (unsigned)blkif->blk_ring.rsp_prod_pvt & 
   5.423 -                    (RING_SIZE((&blkif->blk_ring)-1)));
   5.424 -
   5.425 -            write_resp_to_fe_ring(blkif, resp_s);
   5.426 -            kick_fe_domain(blkif);
   5.427 -
   5.428 -        }
   5.429 -    }
   5.430 -    
   5.431 -    blktap_be_ring.rsp_cons = i;
   5.432 -    
   5.433 -
   5.434 -    spin_unlock_irqrestore(&blkif_io_lock, flags);
   5.435 -    
   5.436 -    return IRQ_HANDLED;
   5.437 -}
   5.438 -
   5.439 -/* Debug : print the current ring indices. */
   5.440 -
   5.441 -void print_be_ring_idxs(void)
   5.442 -{
   5.443 -    if (blktap_be_ring.sring != NULL) {
   5.444 -        WPRINTK("BE Ring: \n--------\n");
   5.445 -        WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
   5.446 -            "| req_prod: %2d, rsp_prod: %2d\n",
   5.447 -            blktap_be_ring.rsp_cons,
   5.448 -            blktap_be_ring.req_prod_pvt,
   5.449 -            blktap_be_ring.sring->req_prod,
   5.450 -            blktap_be_ring.sring->rsp_prod);
   5.451 -    }
   5.452 -}        
     6.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c	Sun Sep 04 15:08:16 2005 +0000
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,801 +0,0 @@
     6.4 -/******************************************************************************
     6.5 - * blktap_userdev.c
     6.6 - * 
     6.7 - * XenLinux virtual block-device tap.
     6.8 - * Control interface between the driver and a character device.
     6.9 - * 
    6.10 - * Copyright (c) 2004, Andrew Warfield
    6.11 - */
    6.12 -
    6.13 -#include <linux/config.h>
    6.14 -#include <linux/module.h>
    6.15 -#include <linux/kernel.h>
    6.16 -#include <linux/fs.h>
    6.17 -#include <linux/mm.h>
    6.18 -#include <linux/miscdevice.h>
    6.19 -#include <linux/errno.h>
    6.20 -#include <linux/major.h>
    6.21 -#include <linux/gfp.h>
    6.22 -#include <linux/poll.h>
    6.23 -#include <asm/pgalloc.h>
    6.24 -#include <asm/tlbflush.h>
    6.25 -#include <asm-xen/xen-public/io/blkif.h> /* for control ring. */
    6.26 -#ifdef CONFIG_XEN_BLKDEV_GRANT
    6.27 -#include <asm-xen/xen-public/grant_table.h>
    6.28 -#endif
    6.29 -
    6.30 -#include "blktap.h"
    6.31 -
    6.32 -
    6.33 -unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
    6.34 -
    6.35 -/* Only one process may open /dev/xen/blktap at any time. */
    6.36 -static unsigned long blktap_dev_inuse;
    6.37 -unsigned long blktap_ring_ok; /* make this ring->state */
    6.38 -
    6.39 -/* for poll: */
    6.40 -static wait_queue_head_t blktap_wait;
    6.41 -
    6.42 -/* Rings up to user space. */
    6.43 -static blkif_front_ring_t blktap_ufe_ring;
    6.44 -static blkif_back_ring_t  blktap_ube_ring;
    6.45 -static ctrl_front_ring_t  blktap_uctrl_ring;
    6.46 -
    6.47 -/* local prototypes */
    6.48 -static int blktap_read_fe_ring(void);
    6.49 -static int blktap_read_be_ring(void);
    6.50 -
    6.51 -
    6.52 -/* -------[ mmap region ]--------------------------------------------- */
    6.53 -/*
    6.54 - * We use a big chunk of address space to map in-flight requests into,
    6.55 - * and export this region up to user-space.  See the comments in blkback
    6.56 - * about this -- the two must be kept in sync if the tap is used as a 
    6.57 - * passthrough.
    6.58 - */
    6.59 -
    6.60 -#define MAX_PENDING_REQS 64
    6.61 -
    6.62 -/* immediately before the mmap area, we have a bunch of pages reserved
    6.63 - * for shared memory rings.
    6.64 - */
    6.65 -#define RING_PAGES 3 /* Ctrl, Front, and Back */ 
    6.66 -
    6.67 -/* Where things are inside the device mapping. */
    6.68 -struct vm_area_struct *blktap_vma = NULL;
    6.69 -unsigned long mmap_vstart;  /* Kernel pages for mapping in data. */
    6.70 -unsigned long rings_vstart; /* start of mmaped vma               */
    6.71 -unsigned long user_vstart;  /* start of user mappings            */
    6.72 -
    6.73 -#define MMAP_PAGES_PER_REQUEST \
    6.74 -    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
    6.75 -#define MMAP_PAGES             \
    6.76 -    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
    6.77 -#define MMAP_VADDR(_start, _req,_seg)                \
    6.78 -    ( _start +                                       \
    6.79 -     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
    6.80 -     ((_seg) * PAGE_SIZE))
    6.81 -
    6.82 -/* -------[ grant handles ]------------------------------------------- */
    6.83 -
    6.84 -#ifdef CONFIG_XEN_BLKDEV_GRANT
    6.85 -/* When using grant tables to map a frame for device access then the
    6.86 - * handle returned must be used to unmap the frame. This is needed to
    6.87 - * drop the ref count on the frame.
    6.88 - */
    6.89 -struct grant_handle_pair
    6.90 -{
    6.91 -    u16  kernel;
    6.92 -    u16  user;
    6.93 -};
    6.94 -static struct grant_handle_pair pending_grant_handles[MMAP_PAGES];
    6.95 -#define pending_handle(_idx, _i) \
    6.96 -    (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
    6.97 -#define BLKTAP_INVALID_HANDLE(_g) \
    6.98 -    (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
    6.99 -#define BLKTAP_INVALIDATE_HANDLE(_g) do {       \
   6.100 -    (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
   6.101 -    } while(0)
   6.102 -    
   6.103 -#endif
   6.104 -
   6.105 -
   6.106 -/* -------[ blktap vm ops ]------------------------------------------- */
   6.107 -
   6.108 -static struct page *blktap_nopage(struct vm_area_struct *vma,
   6.109 -                                             unsigned long address,
   6.110 -                                             int *type)
   6.111 -{
   6.112 -    /*
   6.113 -     * if the page has not been mapped in by the driver then generate
   6.114 -     * a SIGBUS to the domain.
   6.115 -     */
   6.116 -
   6.117 -    force_sig(SIGBUS, current);
   6.118 -
   6.119 -    return 0;
   6.120 -}
   6.121 -
   6.122 -struct vm_operations_struct blktap_vm_ops = {
   6.123 -    nopage:   blktap_nopage,
   6.124 -};
   6.125 -
   6.126 -/* -------[ blktap file ops ]----------------------------------------- */
   6.127 -
   6.128 -static int blktap_open(struct inode *inode, struct file *filp)
   6.129 -{
   6.130 -    blkif_sring_t *sring;
   6.131 -    ctrl_sring_t *csring;
   6.132 -    
   6.133 -    if ( test_and_set_bit(0, &blktap_dev_inuse) )
   6.134 -        return -EBUSY;
   6.135 -    
   6.136 -    /* Allocate the ctrl ring. */
   6.137 -    csring = (ctrl_sring_t *)get_zeroed_page(GFP_KERNEL);
   6.138 -    if (csring == NULL)
   6.139 -        goto fail_nomem;
   6.140 -
   6.141 -    SetPageReserved(virt_to_page(csring));
   6.142 -    
   6.143 -    SHARED_RING_INIT(csring);
   6.144 -    FRONT_RING_INIT(&blktap_uctrl_ring, csring, PAGE_SIZE);
   6.145 -
   6.146 -    /* Allocate the fe ring. */
   6.147 -    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
   6.148 -    if (sring == NULL)
   6.149 -        goto fail_free_ctrl;
   6.150 -
   6.151 -    SetPageReserved(virt_to_page(sring));
   6.152 -    
   6.153 -    SHARED_RING_INIT(sring);
   6.154 -    FRONT_RING_INIT(&blktap_ufe_ring, sring, PAGE_SIZE);
   6.155 -
   6.156 -    /* Allocate the be ring. */
   6.157 -    sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
   6.158 -    if (sring == NULL)
   6.159 -        goto fail_free_fe;
   6.160 -
   6.161 -    SetPageReserved(virt_to_page(sring));
   6.162 -    
   6.163 -    SHARED_RING_INIT(sring);
   6.164 -    BACK_RING_INIT(&blktap_ube_ring, sring, PAGE_SIZE);
   6.165 -
   6.166 -    DPRINTK(KERN_ALERT "blktap open.\n");
   6.167 -
   6.168 -    return 0;
   6.169 -    
   6.170 - fail_free_ctrl:
   6.171 -    free_page( (unsigned long) blktap_uctrl_ring.sring);
   6.172 -
   6.173 - fail_free_fe:
   6.174 -    free_page( (unsigned long) blktap_ufe_ring.sring);
   6.175 -
   6.176 - fail_nomem:
   6.177 -    return -ENOMEM;
   6.178 -}
   6.179 -
   6.180 -static int blktap_release(struct inode *inode, struct file *filp)
   6.181 -{
   6.182 -    blktap_dev_inuse = 0;
   6.183 -    blktap_ring_ok = 0;
   6.184 -
   6.185 -    DPRINTK(KERN_ALERT "blktap closed.\n");
   6.186 -
   6.187 -    /* Free the ring page. */
   6.188 -    ClearPageReserved(virt_to_page(blktap_uctrl_ring.sring));
   6.189 -    free_page((unsigned long) blktap_uctrl_ring.sring);
   6.190 -
   6.191 -    ClearPageReserved(virt_to_page(blktap_ufe_ring.sring));
   6.192 -    free_page((unsigned long) blktap_ufe_ring.sring);
   6.193 -
   6.194 -    ClearPageReserved(virt_to_page(blktap_ube_ring.sring));
   6.195 -    free_page((unsigned long) blktap_ube_ring.sring);
   6.196 -
   6.197 -    /* Clear any active mappings and free foreign map table */
   6.198 -    if (blktap_vma != NULL) {
   6.199 -        zap_page_range(blktap_vma, blktap_vma->vm_start, 
   6.200 -                       blktap_vma->vm_end - blktap_vma->vm_start, NULL);
   6.201 -        blktap_vma = NULL;
   6.202 -    }
   6.203 -
   6.204 -    return 0;
   6.205 -}
   6.206 -
   6.207 -/* Note on mmap:
   6.208 - * We need to map pages to user space in a way that will allow the block
   6.209 - * subsystem set up direct IO to them.  This couldn't be done before, because
   6.210 - * there isn't really a sane way to make a user virtual address down to a 
   6.211 - * physical address when the page belongs to another domain.
   6.212 - *
   6.213 - * My first approach was to map the page in to kernel memory, add an entry
   6.214 - * for it in the physical frame list (using alloc_lomem_region as in blkback)
   6.215 - * and then attempt to map that page up to user space.  This is disallowed
   6.216 - * by xen though, which realizes that we don't really own the machine frame
   6.217 - * underlying the physical page.
   6.218 - *
   6.219 - * The new approach is to provide explicit support for this in xen linux.
   6.220 - * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
   6.221 - * mapped from other vms.  vma->vm_private_data is set up as a mapping 
   6.222 - * from pages to actual page structs.  There is a new clause in get_user_pages
   6.223 - * that does the right thing for this sort of mapping.
   6.224 - * 
   6.225 - * blktap_mmap sets up this mapping.  Most of the real work is done in
   6.226 - * blktap_write_fe_ring below.
   6.227 - */
   6.228 -static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
   6.229 -{
   6.230 -    int size;
   6.231 -    struct page **map;
   6.232 -    int i;
   6.233 -
   6.234 -    DPRINTK(KERN_ALERT "blktap mmap (%lx, %lx)\n",
   6.235 -           vma->vm_start, vma->vm_end);
   6.236 -
   6.237 -    vma->vm_flags |= VM_RESERVED;
   6.238 -    vma->vm_ops = &blktap_vm_ops;
   6.239 -
   6.240 -    size = vma->vm_end - vma->vm_start;
   6.241 -    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
   6.242 -        printk(KERN_INFO 
   6.243 -               "blktap: you _must_ map exactly %d pages!\n",
   6.244 -               MMAP_PAGES + RING_PAGES);
   6.245 -        return -EAGAIN;
   6.246 -    }
   6.247 -
   6.248 -    size >>= PAGE_SHIFT;
   6.249 -    DPRINTK(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
   6.250 -    
   6.251 -    rings_vstart = vma->vm_start;
   6.252 -    user_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
   6.253 -    
   6.254 -    /* Map the ring pages to the start of the region and reserve it. */
   6.255 -
   6.256 -    /* not sure if I really need to do this... */
   6.257 -    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   6.258 -
   6.259 -    DPRINTK("Mapping ctrl_ring page %lx.\n", __pa(blktap_uctrl_ring.sring));
   6.260 -    if (remap_pfn_range(vma, vma->vm_start, 
   6.261 -                         __pa(blktap_uctrl_ring.sring) >> PAGE_SHIFT, 
   6.262 -                         PAGE_SIZE, vma->vm_page_prot)) 
   6.263 -        goto fail;
   6.264 -
   6.265 -
   6.266 -    DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring));
   6.267 -    if (remap_pfn_range(vma, vma->vm_start + PAGE_SIZE, 
   6.268 -                         __pa(blktap_ube_ring.sring) >> PAGE_SHIFT, 
   6.269 -                         PAGE_SIZE, vma->vm_page_prot)) 
   6.270 -        goto fail;
   6.271 -
   6.272 -    DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring));
   6.273 -    if (remap_pfn_range(vma, vma->vm_start + ( 2 * PAGE_SIZE ), 
   6.274 -                         __pa(blktap_ufe_ring.sring) >> PAGE_SHIFT, 
   6.275 -                         PAGE_SIZE, vma->vm_page_prot)) 
   6.276 -        goto fail;
   6.277 -
   6.278 -    /* Mark this VM as containing foreign pages, and set up mappings. */
   6.279 -    map = kmalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
   6.280 -                  * sizeof(struct page_struct*),
   6.281 -                  GFP_KERNEL);
   6.282 -    if (map == NULL) goto fail;
   6.283 -
   6.284 -    for (i=0; i<((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
   6.285 -        map[i] = NULL;
   6.286 -    
   6.287 -    vma->vm_private_data = map;
   6.288 -    vma->vm_flags |= VM_FOREIGN;
   6.289 -
   6.290 -    blktap_vma = vma;
   6.291 -    blktap_ring_ok = 1;
   6.292 -
   6.293 -    return 0;
   6.294 - fail:
   6.295 -    /* Clear any active mappings. */
   6.296 -    zap_page_range(vma, vma->vm_start, 
   6.297 -                   vma->vm_end - vma->vm_start, NULL);
   6.298 -
   6.299 -    return -ENOMEM;
   6.300 -}
   6.301 -
   6.302 -static int blktap_ioctl(struct inode *inode, struct file *filp,
   6.303 -                        unsigned int cmd, unsigned long arg)
   6.304 -{
   6.305 -    switch(cmd) {
   6.306 -    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
   6.307 -        return blktap_read_fe_ring();
   6.308 -
   6.309 -    case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
   6.310 -        return blktap_read_be_ring();
   6.311 -
   6.312 -    case BLKTAP_IOCTL_SETMODE:
   6.313 -        if (BLKTAP_MODE_VALID(arg)) {
   6.314 -            blktap_mode = arg;
   6.315 -            /* XXX: may need to flush rings here. */
   6.316 -            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
   6.317 -            return 0;
   6.318 -        }
   6.319 -    case BLKTAP_IOCTL_PRINT_IDXS:
   6.320 -        {
   6.321 -            print_be_ring_idxs();
   6.322 -            print_fe_ring_idxs();
   6.323 -            WPRINTK("User Rings: \n-----------\n");
   6.324 -            WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d "
   6.325 -                            "| req_prod: %2d, rsp_prod: %2d\n",
   6.326 -                            blktap_ufe_ring.rsp_cons,
   6.327 -                            blktap_ufe_ring.req_prod_pvt,
   6.328 -                            blktap_ufe_ring.sring->req_prod,
   6.329 -                            blktap_ufe_ring.sring->rsp_prod);
   6.330 -            WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d "
   6.331 -                            "| req_prod: %2d, rsp_prod: %2d\n",
   6.332 -                            blktap_ube_ring.req_cons,
   6.333 -                            blktap_ube_ring.rsp_prod_pvt,
   6.334 -                            blktap_ube_ring.sring->req_prod,
   6.335 -                            blktap_ube_ring.sring->rsp_prod);
   6.336 -            
   6.337 -        }
   6.338 -    }
   6.339 -    return -ENOIOCTLCMD;
   6.340 -}
   6.341 -
   6.342 -static unsigned int blktap_poll(struct file *file, poll_table *wait)
   6.343 -{
   6.344 -        poll_wait(file, &blktap_wait, wait);
   6.345 -
   6.346 -        if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) ||
   6.347 -             RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)   ||
   6.348 -             RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
   6.349 -
   6.350 -            flush_tlb_all();
   6.351 -
   6.352 -            RING_PUSH_REQUESTS(&blktap_uctrl_ring);
   6.353 -            RING_PUSH_REQUESTS(&blktap_ufe_ring);
   6.354 -            RING_PUSH_RESPONSES(&blktap_ube_ring);
   6.355 -            return POLLIN | POLLRDNORM;
   6.356 -        }
   6.357 -
   6.358 -        return 0;
   6.359 -}
   6.360 -
   6.361 -void blktap_kick_user(void)
   6.362 -{
   6.363 -    /* blktap_ring->req_prod = blktap_req_prod; */
   6.364 -    wake_up_interruptible(&blktap_wait);
   6.365 -}
   6.366 -
   6.367 -static struct file_operations blktap_fops = {
   6.368 -    owner:    THIS_MODULE,
   6.369 -    poll:     blktap_poll,
   6.370 -    ioctl:    blktap_ioctl,
   6.371 -    open:     blktap_open,
   6.372 -    release:  blktap_release,
   6.373 -    mmap:     blktap_mmap,
   6.374 -};
   6.375 -    
   6.376 -/*-----[ Data to/from user space ]----------------------------------------*/
   6.377 -
   6.378 -static void fast_flush_area(int idx, int nr_pages)
   6.379 -{
   6.380 -#ifdef CONFIG_XEN_BLKDEV_GRANT
   6.381 -    struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
   6.382 -    unsigned int i, op = 0;
   6.383 -    struct grant_handle_pair *handle;
   6.384 -    unsigned long ptep;
   6.385 -
   6.386 -    for (i=0; i<nr_pages; i++)
   6.387 -    {
   6.388 -        handle = &pending_handle(idx, i);
   6.389 -        if (!BLKTAP_INVALID_HANDLE(handle))
   6.390 -        {
   6.391 -
   6.392 -            unmap[op].host_addr = MMAP_VADDR(mmap_vstart, idx, i);
   6.393 -            unmap[op].dev_bus_addr = 0;
   6.394 -            unmap[op].handle = handle->kernel;
   6.395 -            op++;
   6.396 -
   6.397 -            if (create_lookup_pte_addr(blktap_vma->vm_mm,
   6.398 -                                       MMAP_VADDR(user_vstart, idx, i), 
   6.399 -                                       &ptep) !=0) {
   6.400 -                DPRINTK("Couldn't get a pte addr!\n");
   6.401 -                return;
   6.402 -            }
   6.403 -            unmap[op].host_addr    = ptep;
   6.404 -            unmap[op].dev_bus_addr = 0;
   6.405 -            unmap[op].handle       = handle->user;
   6.406 -            op++;
   6.407 -            
   6.408 -            BLKTAP_INVALIDATE_HANDLE(handle);
   6.409 -        }
   6.410 -    }
   6.411 -    if ( unlikely(HYPERVISOR_grant_table_op(
   6.412 -        GNTTABOP_unmap_grant_ref, unmap, op)))
   6.413 -        BUG();
   6.414 -#else
   6.415 -    multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   6.416 -    int               i;
   6.417 -
   6.418 -    for ( i = 0; i < nr_pages; i++ )
   6.419 -    {
   6.420 -        MULTI_update_va_mapping(mcl+i, MMAP_VADDR(mmap_vstart, idx, i),
   6.421 -                                __pte(0), 0);
   6.422 -    }
   6.423 -
   6.424 -    mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
   6.425 -    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
   6.426 -        BUG();
   6.427 -#endif
   6.428 -}
   6.429 -
   6.430 -
   6.431 -int blktap_write_fe_ring(blkif_request_t *req)
   6.432 -{
   6.433 -    blkif_request_t *target;
   6.434 -    int i, ret = 0;
   6.435 -#ifdef CONFIG_XEN_BLKDEV_GRANT
   6.436 -    struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
   6.437 -    int op;
   6.438 -#else
   6.439 -    unsigned long remap_prot;
   6.440 -    multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST+1];
   6.441 -    mmu_update_t mmu[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   6.442 -#endif
   6.443 -
   6.444 -    /*
   6.445 -     * This is called to pass a request from the real frontend domain's
   6.446 -     * blkif ring to the character device.
   6.447 -     */
   6.448 -
   6.449 -    if ( ! blktap_ring_ok ) {
   6.450 -        DPRINTK("blktap: ufe_ring not ready for a request!\n");
   6.451 -        return 0;
   6.452 -    }
   6.453 -
   6.454 -    if ( RING_FULL(&blktap_ufe_ring) ) {
   6.455 -        PRINTK("blktap: fe_ring is full, can't add.\n");
   6.456 -        return 0;
   6.457 -    }
   6.458 -
   6.459 -    flush_cache_all(); /* a noop on intel... */
   6.460 -
   6.461 -    target = RING_GET_REQUEST(&blktap_ufe_ring, blktap_ufe_ring.req_prod_pvt);
   6.462 -    memcpy(target, req, sizeof(*req));
   6.463 -
   6.464 -    /* Map the foreign pages directly in to the application */
   6.465 -#ifdef CONFIG_XEN_BLKDEV_GRANT
   6.466 -    op = 0;
   6.467 -    for (i=0; i<target->nr_segments; i++) {
   6.468 -
   6.469 -        unsigned long uvaddr;
   6.470 -        unsigned long kvaddr;
   6.471 -        unsigned long ptep;
   6.472 -
   6.473 -        uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i);
   6.474 -        kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
   6.475 -
   6.476 -        /* Map the remote page to kernel. */
   6.477 -        map[op].host_addr = kvaddr;
   6.478 -        map[op].dom   = ID_TO_DOM(req->id);
   6.479 -        map[op].ref   = blkif_gref_from_fas(target->frame_and_sects[i]);
   6.480 -        map[op].flags = GNTMAP_host_map;
   6.481 -        /* This needs a bit more thought in terms of interposition: 
   6.482 -         * If we want to be able to modify pages during write using 
   6.483 -         * grant table mappings, the guest will either need to allow 
   6.484 -         * it, or we'll need to incur a copy. */
   6.485 -        if (req->operation == BLKIF_OP_WRITE)
   6.486 -            map[op].flags |= GNTMAP_readonly;
   6.487 -        op++;
   6.488 -
   6.489 -        /* Now map it to user. */
   6.490 -        ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
   6.491 -        if (ret)
   6.492 -        {
   6.493 -            DPRINTK("Couldn't get a pte addr!\n");
   6.494 -            goto fail;
   6.495 -        }
   6.496 -
   6.497 -        map[op].host_addr = ptep;
   6.498 -        map[op].dom       = ID_TO_DOM(req->id);
   6.499 -        map[op].ref       = blkif_gref_from_fas(target->frame_and_sects[i]);
   6.500 -        map[op].flags     = GNTMAP_host_map | GNTMAP_application_map
   6.501 -                            | GNTMAP_contains_pte;
   6.502 -        /* Above interposition comment applies here as well. */
   6.503 -        if (req->operation == BLKIF_OP_WRITE)
   6.504 -            map[op].flags |= GNTMAP_readonly;
   6.505 -        op++;
   6.506 -    }
   6.507 -
   6.508 -    if ( unlikely(HYPERVISOR_grant_table_op(
   6.509 -            GNTTABOP_map_grant_ref, map, op)))
   6.510 -        BUG();
   6.511 -
   6.512 -    op = 0;
   6.513 -    for (i=0; i<(target->nr_segments*2); i+=2) {
   6.514 -        unsigned long uvaddr;
   6.515 -        unsigned long kvaddr;
   6.516 -        unsigned long offset;
   6.517 -        int cancel = 0;
   6.518 -
   6.519 -        uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i/2);
   6.520 -        kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i/2);
   6.521 -
   6.522 -        if ( unlikely(map[i].handle < 0) ) {
   6.523 -            DPRINTK("Error on kernel grant mapping (%d)\n", map[i].handle);
   6.524 -            ret = map[i].handle;
   6.525 -            cancel = 1;
   6.526 -        }
   6.527 -
   6.528 -        if ( unlikely(map[i+1].handle < 0) ) {
   6.529 -            DPRINTK("Error on user grant mapping (%d)\n", map[i+1].handle);
   6.530 -            ret = map[i+1].handle;
   6.531 -            cancel = 1;
   6.532 -        }
   6.533 -
   6.534 -        if (cancel) 
   6.535 -            goto fail;
   6.536 -
   6.537 -        /* Set the necessary mappings in p2m and in the VM_FOREIGN 
   6.538 -         * vm_area_struct to allow user vaddr -> struct page lookups
   6.539 -         * to work.  This is needed for direct IO to foreign pages. */
   6.540 -        phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] =
   6.541 -            FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT);
   6.542 -
   6.543 -        offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
   6.544 -        ((struct page **)blktap_vma->vm_private_data)[offset] =
   6.545 -            pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
   6.546 -
   6.547 -        /* Save handles for unmapping later. */
   6.548 -        pending_handle(ID_TO_IDX(req->id), i/2).kernel = map[i].handle;
   6.549 -        pending_handle(ID_TO_IDX(req->id), i/2).user   = map[i+1].handle;
   6.550 -    }
   6.551 -    
   6.552 -#else
   6.553 -
   6.554 -    remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
   6.555 -
   6.556 -    for (i=0; i<target->nr_segments; i++) {
   6.557 -        unsigned long buf;
   6.558 -        unsigned long uvaddr;
   6.559 -        unsigned long kvaddr;
   6.560 -        unsigned long offset;
   6.561 -        unsigned long ptep;
   6.562 -
   6.563 -        buf   = target->frame_and_sects[i] & PAGE_MASK;
   6.564 -        uvaddr = MMAP_VADDR(user_vstart, ID_TO_IDX(req->id), i);
   6.565 -        kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
   6.566 -
   6.567 -        MULTI_update_va_mapping_otherdomain(
   6.568 -            mcl+i, 
   6.569 -            kvaddr, 
   6.570 -            pfn_pte_ma(buf >> PAGE_SHIFT, __pgprot(remap_prot)),
   6.571 -            0,
   6.572 -            ID_TO_DOM(req->id));
   6.573 -
   6.574 -        phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] =
   6.575 -            FOREIGN_FRAME(buf >> PAGE_SHIFT);
   6.576 -
   6.577 -        ret = create_lookup_pte_addr(blktap_vma->vm_mm, uvaddr, &ptep);
   6.578 -        if (ret)
   6.579 -        { 
   6.580 -            DPRINTK("error getting pte\n");
   6.581 -            goto fail;
   6.582 -        }
   6.583 -
   6.584 -        mmu[i].ptr = ptep;
   6.585 -        mmu[i].val = (target->frame_and_sects[i] & PAGE_MASK)
   6.586 -            | pgprot_val(blktap_vma->vm_page_prot);
   6.587 -
   6.588 -        offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
   6.589 -        ((struct page **)blktap_vma->vm_private_data)[offset] =
   6.590 -            pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
   6.591 -    }
   6.592 -    
   6.593 -    /* Add the mmu_update call. */
   6.594 -    mcl[i].op = __HYPERVISOR_mmu_update;
   6.595 -    mcl[i].args[0] = (unsigned long)mmu;
   6.596 -    mcl[i].args[1] = target->nr_segments;
   6.597 -    mcl[i].args[2] = 0;
   6.598 -    mcl[i].args[3] = ID_TO_DOM(req->id);
   6.599 -
   6.600 -    BUG_ON(HYPERVISOR_multicall(mcl, target->nr_segments+1) != 0);
   6.601 -
   6.602 -    /* Make sure it all worked. */
   6.603 -    for ( i = 0; i < target->nr_segments; i++ )
   6.604 -    {
   6.605 -        if ( unlikely(mcl[i].result != 0) )
   6.606 -        {
   6.607 -            DPRINTK("invalid buffer -- could not remap it\n");
   6.608 -            ret = mcl[i].result;
   6.609 -            goto fail;
   6.610 -        }
   6.611 -    }
   6.612 -    if ( unlikely(mcl[i].result != 0) )
   6.613 -    {
   6.614 -        DPRINTK("direct remapping of pages to /dev/blktap failed.\n");
   6.615 -        ret = mcl[i].result;
   6.616 -        goto fail;
   6.617 -    }
   6.618 -#endif /* CONFIG_XEN_BLKDEV_GRANT */
   6.619 -
   6.620 -    /* Mark mapped pages as reserved: */
   6.621 -    for ( i = 0; i < target->nr_segments; i++ )
   6.622 -    {
   6.623 -        unsigned long kvaddr;
   6.624 -
   6.625 -        kvaddr = MMAP_VADDR(mmap_vstart, ID_TO_IDX(req->id), i);
   6.626 -        SetPageReserved(pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT));
   6.627 -    }
   6.628 -
   6.629 -
   6.630 -    blktap_ufe_ring.req_prod_pvt++;
   6.631 -    
   6.632 -    return 0;
   6.633 -
   6.634 - fail:
   6.635 -    fast_flush_area(ID_TO_IDX(req->id), target->nr_segments);
   6.636 -    return ret;
   6.637 -}
   6.638 -
   6.639 -int blktap_write_be_ring(blkif_response_t *rsp)
   6.640 -{
   6.641 -    blkif_response_t *target;
   6.642 -
   6.643 -    /*
   6.644 -     * This is called to pass a request from the real backend domain's
   6.645 -     * blkif ring to the character device.
   6.646 -     */
   6.647 -
   6.648 -    if ( ! blktap_ring_ok ) {
   6.649 -        DPRINTK("blktap: be_ring not ready for a request!\n");
   6.650 -        return 0;
   6.651 -    }
   6.652 -
   6.653 -    /* No test for fullness in the response direction. */
   6.654 -
   6.655 -    target = RING_GET_RESPONSE(&blktap_ube_ring,
   6.656 -            blktap_ube_ring.rsp_prod_pvt);
   6.657 -    memcpy(target, rsp, sizeof(*rsp));
   6.658 -
   6.659 -    /* no mapping -- pages were mapped in blktap_write_fe_ring() */
   6.660 -
   6.661 -    blktap_ube_ring.rsp_prod_pvt++;
   6.662 -    
   6.663 -    return 0;
   6.664 -}
   6.665 -
   6.666 -static int blktap_read_fe_ring(void)
   6.667 -{
   6.668 -    /* This is called to read responses from the UFE ring. */
   6.669 -
   6.670 -    RING_IDX i, j, rp;
   6.671 -    blkif_response_t *resp_s;
   6.672 -    blkif_t *blkif;
   6.673 -    active_req_t *ar;
   6.674 -
   6.675 -    DPRINTK("blktap_read_fe_ring()\n");
   6.676 -
   6.677 -    /* if we are forwarding from UFERring to FERing */
   6.678 -    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
   6.679 -
   6.680 -        /* for each outstanding message on the UFEring  */
   6.681 -        rp = blktap_ufe_ring.sring->rsp_prod;
   6.682 -        rmb();
   6.683 -        
   6.684 -        for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
   6.685 -        {
   6.686 -            resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i);
   6.687 -            
   6.688 -            DPRINTK("resp->fe_ring\n");
   6.689 -            ar = lookup_active_req(ID_TO_IDX(resp_s->id));
   6.690 -            blkif = ar->blkif;
   6.691 -            for (j = 0; j < ar->nr_pages; j++) {
   6.692 -                unsigned long vaddr;
   6.693 -                struct page **map = blktap_vma->vm_private_data;
   6.694 -                int offset; 
   6.695 -
   6.696 -                vaddr  = MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), j);
   6.697 -                offset = (vaddr - blktap_vma->vm_start) >> PAGE_SHIFT;
   6.698 -
   6.699 -                ClearPageReserved(virt_to_page(vaddr));
   6.700 -                map[offset] = NULL;
   6.701 -            }
   6.702 -
   6.703 -            fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
   6.704 -            zap_page_range(blktap_vma, 
   6.705 -                    MMAP_VADDR(user_vstart, ID_TO_IDX(resp_s->id), 0), 
   6.706 -                    ar->nr_pages << PAGE_SHIFT, NULL);
   6.707 -            write_resp_to_fe_ring(blkif, resp_s);
   6.708 -            blktap_ufe_ring.rsp_cons = i + 1;
   6.709 -            kick_fe_domain(blkif);
   6.710 -        }
   6.711 -    }
   6.712 -    return 0;
   6.713 -}
   6.714 -
   6.715 -static int blktap_read_be_ring(void)
   6.716 -{
   6.717 -    /* This is called to read requests from the UBE ring. */
   6.718 -
   6.719 -    RING_IDX i, rp;
   6.720 -    blkif_request_t *req_s;
   6.721 -
   6.722 -    DPRINTK("blktap_read_be_ring()\n");
   6.723 -
   6.724 -    /* if we are forwarding from UFERring to FERing */
   6.725 -    if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
   6.726 -
   6.727 -        /* for each outstanding message on the UFEring  */
   6.728 -        rp = blktap_ube_ring.sring->req_prod;
   6.729 -        rmb();
   6.730 -        for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
   6.731 -        {
   6.732 -            req_s = RING_GET_REQUEST(&blktap_ube_ring, i);
   6.733 -
   6.734 -            DPRINTK("req->be_ring\n");
   6.735 -            write_req_to_be_ring(req_s);
   6.736 -            kick_be_domain();
   6.737 -        }
   6.738 -        
   6.739 -        blktap_ube_ring.req_cons = i;
   6.740 -    }
   6.741 -
   6.742 -    return 0;
   6.743 -}
   6.744 -
   6.745 -int blktap_write_ctrl_ring(ctrl_msg_t *msg)
   6.746 -{
   6.747 -    ctrl_msg_t *target;
   6.748 -
   6.749 -    if ( ! blktap_ring_ok ) {
   6.750 -        DPRINTK("blktap: be_ring not ready for a request!\n");
   6.751 -        return 0;
   6.752 -    }
   6.753 -
   6.754 -    /* No test for fullness in the response direction. */
   6.755 -
   6.756 -    target = RING_GET_REQUEST(&blktap_uctrl_ring,
   6.757 -            blktap_uctrl_ring.req_prod_pvt);
   6.758 -    memcpy(target, msg, sizeof(*msg));
   6.759 -
   6.760 -    blktap_uctrl_ring.req_prod_pvt++;
   6.761 -    
   6.762 -    /* currently treat the ring as unidirectional. */
   6.763 -    blktap_uctrl_ring.rsp_cons = blktap_uctrl_ring.sring->rsp_prod;
   6.764 -    
   6.765 -    return 0;
   6.766 -       
   6.767 -}
   6.768 -
   6.769 -/* -------[ blktap module setup ]------------------------------------- */
   6.770 -
   6.771 -static struct miscdevice blktap_miscdev = {
   6.772 -    .minor        = BLKTAP_MINOR,
   6.773 -    .name         = "blktap",
   6.774 -    .fops         = &blktap_fops,
   6.775 -    .devfs_name   = "misc/blktap",
   6.776 -};
   6.777 -
   6.778 -int blktap_init(void)
   6.779 -{
   6.780 -    int err, i, j;
   6.781 -    struct page *page;
   6.782 -
   6.783 -    page = balloon_alloc_empty_page_range(MMAP_PAGES);
   6.784 -    BUG_ON(page == NULL);
   6.785 -    mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
   6.786 -
   6.787 -#ifdef CONFIG_XEN_BLKDEV_GRANT
   6.788 -    for (i=0; i<MAX_PENDING_REQS ; i++)
   6.789 -        for (j=0; j<BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
   6.790 -            BLKTAP_INVALIDATE_HANDLE(&pending_handle(i, j));
   6.791 -#endif
   6.792 -
   6.793 -    err = misc_register(&blktap_miscdev);
   6.794 -    if ( err != 0 )
   6.795 -    {
   6.796 -        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
   6.797 -        return err;
   6.798 -    }
   6.799 -
   6.800 -    init_waitqueue_head(&blktap_wait);
   6.801 -
   6.802 -
   6.803 -    return 0;
   6.804 -}
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h	Sun Sep 04 21:19:44 2005 +0000
     7.3 @@ -0,0 +1,112 @@
     7.4 +
     7.5 +#ifndef __BLKIF__BACKEND__COMMON_H__
     7.6 +#define __BLKIF__BACKEND__COMMON_H__
     7.7 +
     7.8 +#include <linux/config.h>
     7.9 +#include <linux/version.h>
    7.10 +#include <linux/module.h>
    7.11 +#include <linux/interrupt.h>
    7.12 +#include <linux/slab.h>
    7.13 +#include <linux/blkdev.h>
    7.14 +#include <linux/vmalloc.h>
    7.15 +#include <asm/io.h>
    7.16 +#include <asm/setup.h>
    7.17 +#include <asm/pgalloc.h>
    7.18 +#include <asm-xen/evtchn.h>
    7.19 +#include <asm-xen/hypervisor.h>
    7.20 +#include <asm-xen/xen-public/io/blkif.h>
    7.21 +#include <asm-xen/xen-public/io/ring.h>
    7.22 +#include <asm-xen/gnttab.h>
    7.23 +
    7.24 +#if 0
    7.25 +#define ASSERT(_p) \
    7.26 +    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
    7.27 +    __LINE__, __FILE__); *(int*)0=0; }
    7.28 +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
    7.29 +                           __FILE__ , __LINE__ , ## _a )
    7.30 +#else
    7.31 +#define ASSERT(_p) ((void)0)
    7.32 +#define DPRINTK(_f, _a...) ((void)0)
    7.33 +#endif
    7.34 +
    7.35 +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
    7.36 +
    7.37 +struct vbd {
    7.38 +    blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
    7.39 +    unsigned char  readonly;    /* Non-zero -> read-only */
    7.40 +    unsigned char  type;        /* VDISK_xxx */
    7.41 +    blkif_pdev_t   pdevice;     /* phys device that this vbd maps to */
    7.42 +    struct block_device *bdev;
    7.43 +}; 
    7.44 +
    7.45 +typedef struct blkif_st {
    7.46 +    /* Unique identifier for this interface. */
    7.47 +    domid_t           domid;
    7.48 +    unsigned int      handle;
    7.49 +    /* Physical parameters of the comms window. */
    7.50 +    unsigned long     shmem_frame;
    7.51 +    unsigned int      evtchn;
    7.52 +    unsigned int      remote_evtchn;
    7.53 +    /* Comms information. */
    7.54 +    blkif_back_ring_t blk_ring;
    7.55 +    /* VBDs attached to this interface. */
    7.56 +    struct vbd        vbd;
    7.57 +    /* Private fields. */
    7.58 +    enum { DISCONNECTED, CONNECTED } status;
    7.59 +#ifdef CONFIG_XEN_BLKDEV_TAP_BE
    7.60 +    /* Is this a blktap frontend */
    7.61 +    unsigned int     is_blktap;
    7.62 +#endif
    7.63 +    struct list_head blkdev_list;
    7.64 +    spinlock_t       blk_ring_lock;
    7.65 +    atomic_t         refcnt;
    7.66 +
    7.67 +    struct work_struct free_work;
    7.68 +    u16 shmem_handle;
    7.69 +    unsigned long shmem_vaddr;
    7.70 +    grant_ref_t shmem_ref;
    7.71 +} blkif_t;
    7.72 +
    7.73 +void blkif_create(blkif_be_create_t *create);
    7.74 +void blkif_destroy(blkif_be_destroy_t *destroy);
    7.75 +void blkif_connect(blkif_be_connect_t *connect);
    7.76 +int  blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
    7.77 +void blkif_disconnect_complete(blkif_t *blkif);
    7.78 +blkif_t *alloc_blkif(domid_t domid);
    7.79 +void free_blkif_callback(blkif_t *blkif);
    7.80 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
    7.81 +
    7.82 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
    7.83 +#define blkif_put(_b)                             \
    7.84 +    do {                                          \
    7.85 +        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
    7.86 +            free_blkif_callback(_b);		  \
    7.87 +    } while (0)
    7.88 +
    7.89 +/* Create a vbd. */
    7.90 +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t pdevice,
    7.91 +	       int readonly);
    7.92 +void vbd_free(struct vbd *vbd);
    7.93 +
    7.94 +unsigned long vbd_size(struct vbd *vbd);
    7.95 +unsigned int vbd_info(struct vbd *vbd);
    7.96 +unsigned long vbd_secsize(struct vbd *vbd);
    7.97 +
    7.98 +struct phys_req {
    7.99 +    unsigned short       dev;
   7.100 +    unsigned short       nr_sects;
   7.101 +    struct block_device *bdev;
   7.102 +    blkif_sector_t       sector_number;
   7.103 +};
   7.104 +
   7.105 +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
   7.106 +
   7.107 +void blkif_interface_init(void);
   7.108 +
   7.109 +void blkif_deschedule(blkif_t *blkif);
   7.110 +
   7.111 +void blkif_xenbus_init(void);
   7.112 +
   7.113 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
   7.114 +
   7.115 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c	Sun Sep 04 21:19:44 2005 +0000
     8.3 @@ -0,0 +1,141 @@
     8.4 +/******************************************************************************
     8.5 + * arch/xen/drivers/blkif/backend/interface.c
     8.6 + * 
     8.7 + * Block-device interface management.
     8.8 + * 
     8.9 + * Copyright (c) 2004, Keir Fraser
    8.10 + */
    8.11 +
    8.12 +#include "common.h"
    8.13 +#include <asm-xen/evtchn.h>
    8.14 +
    8.15 +static kmem_cache_t *blkif_cachep;
    8.16 +
    8.17 +blkif_t *alloc_blkif(domid_t domid)
    8.18 +{
    8.19 +    blkif_t *blkif;
    8.20 +
    8.21 +    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
    8.22 +    if (!blkif)
    8.23 +	    return ERR_PTR(-ENOMEM);
    8.24 +
    8.25 +    memset(blkif, 0, sizeof(*blkif));
    8.26 +    blkif->domid = domid;
    8.27 +    blkif->status = DISCONNECTED;
    8.28 +    spin_lock_init(&blkif->blk_ring_lock);
    8.29 +    atomic_set(&blkif->refcnt, 1);
    8.30 +
    8.31 +    return blkif;
    8.32 +}
    8.33 +
    8.34 +static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
    8.35 +			     unsigned long shared_page)
    8.36 +{
    8.37 +    struct gnttab_map_grant_ref op;
    8.38 +    op.host_addr = localaddr;
    8.39 +    op.flags = GNTMAP_host_map;
    8.40 +    op.ref = shared_page;
    8.41 +    op.dom = blkif->domid;
    8.42 +
    8.43 +    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
    8.44 +
    8.45 +    if (op.handle < 0) {
    8.46 +	DPRINTK(" Grant table operation failure !\n");
    8.47 +	return op.handle;
    8.48 +    }
    8.49 +
    8.50 +    blkif->shmem_ref = shared_page;
    8.51 +    blkif->shmem_handle = op.handle;
    8.52 +    blkif->shmem_vaddr = localaddr;
    8.53 +    return 0;
    8.54 +}
    8.55 +
    8.56 +static void unmap_frontend_page(blkif_t *blkif)
    8.57 +{
    8.58 +    struct gnttab_unmap_grant_ref op;
    8.59 +
    8.60 +    op.host_addr = blkif->shmem_vaddr;
    8.61 +    op.handle = blkif->shmem_handle;
    8.62 +    op.dev_bus_addr = 0;
    8.63 +    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
    8.64 +}
    8.65 +
    8.66 +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
    8.67 +{
    8.68 +    struct vm_struct *vma;
    8.69 +    blkif_sring_t *sring;
    8.70 +    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
    8.71 +    int err;
    8.72 +
    8.73 +    BUG_ON(blkif->remote_evtchn);
    8.74 +
    8.75 +    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
    8.76 +	return -ENOMEM;
    8.77 +
    8.78 +    err = map_frontend_page(blkif, (unsigned long)vma->addr, shared_page);
    8.79 +    if (err) {
    8.80 +        vfree(vma->addr);
    8.81 +	return err;
    8.82 +    }
    8.83 +
    8.84 +    op.u.bind_interdomain.dom1 = DOMID_SELF;
    8.85 +    op.u.bind_interdomain.dom2 = blkif->domid;
    8.86 +    op.u.bind_interdomain.port1 = 0;
    8.87 +    op.u.bind_interdomain.port2 = evtchn;
    8.88 +    err = HYPERVISOR_event_channel_op(&op);
    8.89 +    if (err) {
    8.90 +	unmap_frontend_page(blkif);
    8.91 +	vfree(vma->addr);
    8.92 +	return err;
    8.93 +    }
    8.94 +
    8.95 +    blkif->evtchn = op.u.bind_interdomain.port1;
    8.96 +    blkif->remote_evtchn = evtchn;
    8.97 +
    8.98 +    sring = (blkif_sring_t *)vma->addr;
    8.99 +    SHARED_RING_INIT(sring);
   8.100 +    BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
   8.101 +
   8.102 +    bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend",
   8.103 +			      blkif);
   8.104 +    blkif->status        = CONNECTED;
   8.105 +    blkif->shmem_frame   = shared_page;
   8.106 +
   8.107 +    return 0;
   8.108 +}
   8.109 +
   8.110 +static void free_blkif(void *arg)
   8.111 +{
   8.112 +    evtchn_op_t op = { .cmd = EVTCHNOP_close };
   8.113 +    blkif_t *blkif = (blkif_t *)arg;
   8.114 +
   8.115 +    op.u.close.port = blkif->evtchn;
   8.116 +    op.u.close.dom = DOMID_SELF;
   8.117 +    HYPERVISOR_event_channel_op(&op);
   8.118 +    op.u.close.port = blkif->remote_evtchn;
   8.119 +    op.u.close.dom = blkif->domid;
   8.120 +    HYPERVISOR_event_channel_op(&op);
   8.121 +
   8.122 +    if (blkif->evtchn)
   8.123 +        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
   8.124 +
   8.125 +    if (blkif->blk_ring.sring) {
   8.126 +	unmap_frontend_page(blkif);
   8.127 +	vfree(blkif->blk_ring.sring);
   8.128 +	blkif->blk_ring.sring = NULL;
   8.129 +    }
   8.130 +
   8.131 +    kmem_cache_free(blkif_cachep, blkif);
   8.132 +}
   8.133 +
   8.134 +void free_blkif_callback(blkif_t *blkif)
   8.135 +{
   8.136 +    INIT_WORK(&blkif->free_work, free_blkif, (void *)blkif);
   8.137 +    schedule_work(&blkif->free_work);
   8.138 +}
   8.139 +
   8.140 +void __init blkif_interface_init(void)
   8.141 +{
   8.142 +    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
   8.143 +                                     0, 0, NULL, NULL);
   8.144 +}
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c	Sun Sep 04 21:19:44 2005 +0000
     9.3 @@ -0,0 +1,225 @@
     9.4 +/*  Xenbus code for blkif tap
     9.5 +
     9.6 +    A Warfield.
     9.7 +
     9.8 +    Hastily modified from the oroginal backend code:
     9.9 +
    9.10 +    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
    9.11 +
    9.12 +    This program is free software; you can redistribute it and/or modify
    9.13 +    it under the terms of the GNU General Public License as published by
    9.14 +    the Free Software Foundation; either version 2 of the License, or
    9.15 +    (at your option) any later version.
    9.16 +
    9.17 +    This program is distributed in the hope that it will be useful,
    9.18 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
    9.19 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    9.20 +    GNU General Public License for more details.
    9.21 +
    9.22 +    You should have received a copy of the GNU General Public License
    9.23 +    along with this program; if not, write to the Free Software
    9.24 +    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    9.25 +*/
    9.26 +
    9.27 +#include <stdarg.h>
    9.28 +#include <linux/module.h>
    9.29 +#include <asm-xen/xenbus.h>
    9.30 +#include "common.h"
    9.31 +
    9.32 +struct backend_info
    9.33 +{
    9.34 +	struct xenbus_device *dev;
    9.35 +
    9.36 +	/* our communications channel */
    9.37 +	blkif_t *blkif;
    9.38 +
    9.39 +	long int frontend_id;
    9.40 +
    9.41 +	/* watch back end for changes */
    9.42 +	struct xenbus_watch backend_watch;
    9.43 +
    9.44 +	/* watch front end for changes */
    9.45 +	struct xenbus_watch watch;
    9.46 +	char *frontpath;
    9.47 +};
    9.48 +
    9.49 +static int blkback_remove(struct xenbus_device *dev)
    9.50 +{
    9.51 +	struct backend_info *be = dev->data;
    9.52 +
    9.53 +	if (be->watch.node)
    9.54 +		unregister_xenbus_watch(&be->watch);
    9.55 +	unregister_xenbus_watch(&be->backend_watch);
    9.56 +	if (be->blkif)
    9.57 +		blkif_put(be->blkif);
    9.58 +	if (be->frontpath)
    9.59 +		kfree(be->frontpath);
    9.60 +	kfree(be);
    9.61 +	return 0;
    9.62 +}
    9.63 +
    9.64 +/* Front end tells us frame. */
    9.65 +static void frontend_changed(struct xenbus_watch *watch, const char *node)
    9.66 +{
    9.67 +	unsigned long ring_ref;
    9.68 +	unsigned int evtchn;
    9.69 +	int err;
    9.70 +	struct backend_info *be
    9.71 +		= container_of(watch, struct backend_info, watch);
    9.72 +
    9.73 +	/* If other end is gone, delete ourself. */
    9.74 +	if (node && !xenbus_exists(be->frontpath, "")) {
    9.75 +		xenbus_rm(be->dev->nodename, "");
    9.76 +		device_unregister(&be->dev->dev);
    9.77 +		return;
    9.78 +	}
    9.79 +	if (be->blkif == NULL || be->blkif->status == CONNECTED)
    9.80 +		return;
    9.81 +
    9.82 +	err = xenbus_gather(be->frontpath, "ring-ref", "%lu", &ring_ref,
    9.83 +			    "event-channel", "%u", &evtchn, NULL);
    9.84 +	if (err) {
    9.85 +		xenbus_dev_error(be->dev, err,
    9.86 +				 "reading %s/ring-ref and event-channel",
    9.87 +				 be->frontpath);
    9.88 +		return;
    9.89 +	}
    9.90 +
    9.91 +	/* Map the shared frame, irq etc. */
    9.92 +	err = blkif_map(be->blkif, ring_ref, evtchn);
    9.93 +	if (err) {
    9.94 +		xenbus_dev_error(be->dev, err, "mapping ring-ref %lu port %u",
    9.95 +				 ring_ref, evtchn);
    9.96 +		goto abort;
    9.97 +	}
    9.98 +
    9.99 +	xenbus_dev_ok(be->dev);
   9.100 +
   9.101 +	return;
   9.102 +
   9.103 +abort:
   9.104 +	xenbus_transaction_end(1);
   9.105 +}
   9.106 +
   9.107 +/* 
   9.108 +   Setup supplies physical device.  
   9.109 +   We provide event channel and device details to front end.
   9.110 +   Frontend supplies shared frame and event channel.
   9.111 + */
   9.112 +static void backend_changed(struct xenbus_watch *watch, const char *node)
   9.113 +{
   9.114 +	int err;
   9.115 +	char *p;
   9.116 +	long int handle;
   9.117 +	struct backend_info *be
   9.118 +		= container_of(watch, struct backend_info, backend_watch);
   9.119 +	struct xenbus_device *dev = be->dev;
   9.120 +
   9.121 +	if (be->blkif == NULL) {
   9.122 +		/* Front end dir is a number, which is used as the handle. */
   9.123 +		p = strrchr(be->frontpath, '/') + 1;
   9.124 +		handle = simple_strtoul(p, NULL, 0);
   9.125 +
   9.126 +		be->blkif = alloc_blkif(be->frontend_id);
   9.127 +		if (IS_ERR(be->blkif)) {
   9.128 +			err = PTR_ERR(be->blkif);
   9.129 +			be->blkif = NULL;
   9.130 +			xenbus_dev_error(dev, err, "creating block interface");
   9.131 +			return;
   9.132 +		}
   9.133 +
   9.134 +		/* Pass in NULL node to skip exist test. */
   9.135 +		frontend_changed(&be->watch, NULL);
   9.136 +	}
   9.137 +}
   9.138 +
   9.139 +static int blkback_probe(struct xenbus_device *dev,
   9.140 +			 const struct xenbus_device_id *id)
   9.141 +{
   9.142 +	struct backend_info *be;
   9.143 +	char *frontend;
   9.144 +	int err;
   9.145 +
   9.146 +	be = kmalloc(sizeof(*be), GFP_KERNEL);
   9.147 +	if (!be) {
   9.148 +		xenbus_dev_error(dev, -ENOMEM, "allocating backend structure");
   9.149 +		return -ENOMEM;
   9.150 +	}
   9.151 +	memset(be, 0, sizeof(*be));
   9.152 +
   9.153 +	frontend = NULL;
   9.154 +	err = xenbus_gather(dev->nodename,
   9.155 +			    "frontend-id", "%li", &be->frontend_id,
   9.156 +			    "frontend", NULL, &frontend,
   9.157 +			    NULL);
   9.158 +	if (XENBUS_EXIST_ERR(err))
   9.159 +		goto free_be;
   9.160 +	if (err < 0) {
   9.161 +		xenbus_dev_error(dev, err,
   9.162 +				 "reading %s/frontend or frontend-id",
   9.163 +				 dev->nodename);
   9.164 +		goto free_be;
   9.165 +	}
   9.166 +	if (strlen(frontend) == 0 || !xenbus_exists(frontend, "")) {
   9.167 +		/* If we can't get a frontend path and a frontend-id,
   9.168 +		 * then our bus-id is no longer valid and we need to
   9.169 +		 * destroy the backend device.
   9.170 +		 */
   9.171 +		err = -ENOENT;
   9.172 +		goto free_be;
   9.173 +	}
   9.174 +
   9.175 +	be->dev = dev;
   9.176 +	be->backend_watch.node = dev->nodename;
   9.177 +	be->backend_watch.callback = backend_changed;
   9.178 +	err = register_xenbus_watch(&be->backend_watch);
   9.179 +	if (err) {
   9.180 +		be->backend_watch.node = NULL;
   9.181 +		xenbus_dev_error(dev, err, "adding backend watch on %s",
   9.182 +				 dev->nodename);
   9.183 +		goto free_be;
   9.184 +	}
   9.185 +
   9.186 +	be->frontpath = frontend;
   9.187 +	be->watch.node = be->frontpath;
   9.188 +	be->watch.callback = frontend_changed;
   9.189 +	err = register_xenbus_watch(&be->watch);
   9.190 +	if (err) {
   9.191 +		be->watch.node = NULL;
   9.192 +		xenbus_dev_error(dev, err,
   9.193 +				 "adding frontend watch on %s",
   9.194 +				 be->frontpath);
   9.195 +		goto free_be;
   9.196 +	}
   9.197 +
   9.198 +	dev->data = be;
   9.199 +
   9.200 +	backend_changed(&be->backend_watch, dev->nodename);
   9.201 +	return 0;
   9.202 +
   9.203 + free_be:
   9.204 +	if (be->backend_watch.node)
   9.205 +		unregister_xenbus_watch(&be->backend_watch);
   9.206 +	if (frontend)
   9.207 +		kfree(frontend);
   9.208 +	kfree(be);
   9.209 +	return err;
   9.210 +}
   9.211 +
   9.212 +static struct xenbus_device_id blkback_ids[] = {
   9.213 +	{ "vbd" },
   9.214 +	{ "" }
   9.215 +};
   9.216 +
   9.217 +static struct xenbus_driver blkback = {
   9.218 +	.name = "vbd",
   9.219 +	.owner = THIS_MODULE,
   9.220 +	.ids = blkback_ids,
   9.221 +	.probe = blkback_probe,
   9.222 +	.remove = blkback_remove,
   9.223 +};
   9.224 +
   9.225 +void blkif_xenbus_init(void)
   9.226 +{
   9.227 +	xenbus_register_backend(&blkback);
   9.228 +}
    10.1 --- a/linux-2.6-xen-sparse/mm/memory.c	Sun Sep 04 15:08:16 2005 +0000
    10.2 +++ b/linux-2.6-xen-sparse/mm/memory.c	Sun Sep 04 21:19:44 2005 +0000
    10.3 @@ -954,10 +954,8 @@ int get_user_pages(struct task_struct *t
    10.4                          i++;
    10.5                          start += PAGE_SIZE;
    10.6                          len--;
    10.7 -printk(KERN_ALERT "HIT  0x%lx\n", start);
    10.8                          continue;
    10.9                      } 
   10.10 -else printk(KERN_ALERT "MISS 0x%lx\n", start);
   10.11                  }
   10.12  
   10.13  		if (!vma || (vma->vm_flags & VM_IO)
    11.1 --- a/tools/blktap/Makefile	Sun Sep 04 15:08:16 2005 +0000
    11.2 +++ b/tools/blktap/Makefile	Sun Sep 04 21:19:44 2005 +0000
    11.3 @@ -6,7 +6,8 @@ XEN_ROOT = ../..
    11.4  include $(XEN_ROOT)/tools/Rules.mk
    11.5  
    11.6  SUBDIRS :=
    11.7 -SUBDIRS += parallax
    11.8 +SUBDIRS += ublkback
    11.9 +#SUBDIRS += parallax
   11.10  
   11.11  BLKTAP_INSTALL_DIR = /usr/sbin
   11.12  
   11.13 @@ -14,12 +15,12 @@ INSTALL            = install
   11.14  INSTALL_PROG       = $(INSTALL) -m0755
   11.15  INSTALL_DIR        = $(INSTALL) -d -m0755
   11.16  
   11.17 -INCLUDES += -I. -I $(XEN_LIBXC)
   11.18 +INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
   11.19  
   11.20  LIBS     := -lpthread -lz
   11.21  
   11.22  SRCS     :=
   11.23 -SRCS     += blktaplib.c
   11.24 +SRCS     += blktaplib.c xenbus.c blkif.c
   11.25  
   11.26  CFLAGS   += -Wall
   11.27  CFLAGS   += -Werror
   11.28 @@ -28,17 +29,20 @@ CFLAGS   += -Wno-unused
   11.29  CFLAGS   += -g3
   11.30  CFLAGS   += -fno-strict-aliasing
   11.31  CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
   11.32 +# get asprintf():
   11.33 +CFLAGS   += -D _GNU_SOURCE
   11.34  # Get gcc to generate the dependencies for us.
   11.35  CFLAGS   += -Wp,-MD,.$(@F).d
   11.36  CFLAGS   += $(INCLUDES) 
   11.37  DEPS     = .*.d
   11.38  
   11.39  OBJS     = $(patsubst %.c,%.o,$(SRCS))
   11.40 -IBINS    = blkdump
   11.41 +IBINS   :=
   11.42 +#IBINS   += blkdump
   11.43  
   11.44  LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
   11.45  
   11.46 -all: mk-symlinks libblktap.so blkdump
   11.47 +all: mk-symlinks libblktap.so #blkdump
   11.48  	@set -e; for subdir in $(SUBDIRS); do \
   11.49  		$(MAKE) -C $$subdir $@;       \
   11.50  	done
   11.51 @@ -59,7 +63,7 @@ install: all
   11.52  	$(INSTALL_DIR) -p $(DESTDIR)/usr/include
   11.53  	$(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
   11.54  	$(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
   11.55 -	$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
   11.56 +	#$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
   11.57  	@set -e; for subdir in $(SUBDIRS); do \
   11.58  		$(MAKE) -C $$subdir $@;       \
   11.59  	done
   11.60 @@ -79,14 +83,16 @@ rpm: all
   11.61  	mv staging/i386/*.rpm .
   11.62  	rm -rf staging
   11.63  
   11.64 -libblktap.so: $(OBJS)
   11.65 -	$(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared -o      \
   11.66 -	      libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
   11.67 +libblktap.so: $(OBJS) 
   11.68 +	$(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared         \
   11.69 +	      -L$(XEN_XENSTORE) -l xenstore                       \
   11.70 +	      -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
   11.71  	ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
   11.72  	ln -sf libblktap.so.$(MAJOR) $@
   11.73  
   11.74  blkdump: libblktap.so
   11.75 -	$(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. -l blktap blkdump.c
   11.76 +	$(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
   11.77 +	      -l blktap blkdump.c
   11.78  
   11.79  .PHONY: TAGS clean install mk-symlinks rpm
   11.80  
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/tools/blktap/README.sept05	Sun Sep 04 21:19:44 2005 +0000
    12.3 @@ -0,0 +1,33 @@
    12.4 +The blktap has been rewritten substantially based on the current
    12.5 +blkback driver.  I've removed passthrough support, as this is broken
    12.6 +by the move to grant tables and the lack of transitive grants.  A
    12.7 +blktap VM is now only capable of terminating block requests in
    12.8 +userspace.
    12.9 +
   12.10 +ublkback/ contains a _very_ initial cut at a user-level version of the block
   12.11 +backend driver.  It gives a working example of how the current tap
   12.12 +interfaces are used, in particular w.r.t. the vbd directories in
   12.13 +xenstore.
   12.14 +
   12.15 +parallax/ contains fairly recent parallax code.  This does not run on
   12.16 +the changed blktap interface, but should only be a couple of hours
   12.17 +work to get going again.
   12.18 +
   12.19 +All of the tricky bits are done, but there is plenty of cleaning to
   12.20 +do, and the top-level functionality is not here yet.  At the moment,
   12.21 +the daemon ignores the pdev requested by the tools and opens the file 
   12.22 +or device specified by TMP_IMAGE_FILE_NAME in ublkback.c.
   12.23 +
   12.24 +TODO:
   12.25 +1. Fix to allow pdev in the store to specify the device to open.
   12.26 +2. Add support (to tools as well) to mount arbitrary files...
   12.27 +   just write the filename to mount into the store, instead of pdev.
   12.28 +3. Reeximine blkif refcounting, it is almost certainly broken at the moment.
   12.29 +   - creating a blkif should take a reference.
   12.30 +   - each inflight request should take a reference on dequeue in blktaplib
   12.31 +   - sending responses should drop refs.
   12.32 +   - blkif should be implicitly freed when refcounts fall to 0.
   12.33 +4. Modify the parallax req/rsp code as per ublkback to use the new tap 
   12.34 +   interfaces. 
   12.35 +5. Write a front end that allows parallax and normal mounts to coexist
   12.36 +6. Allow blkback and blktap to run at the same time.
    13.1 --- a/tools/blktap/blkdump.c	Sun Sep 04 15:08:16 2005 +0000
    13.2 +++ b/tools/blktap/blkdump.c	Sun Sep 04 21:19:44 2005 +0000
    13.3 @@ -8,85 +8,18 @@
    13.4  #include <stdio.h>
    13.5  #include "blktaplib.h"
    13.6   
    13.7 -int control_print(control_msg_t *msg)
    13.8 -{
    13.9 -    if (msg->type != CMSG_BLKIF_BE) 
   13.10 -    {
   13.11 -        printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type);
   13.12 -        return 0;
   13.13 -    }
   13.14 -    
   13.15 -    switch(msg->subtype)
   13.16 -    {
   13.17 -    case CMSG_BLKIF_BE_CREATE:
   13.18 -        if ( msg->length != sizeof(blkif_be_create_t) )
   13.19 -            goto parse_error;
   13.20 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n",
   13.21 -                ((blkif_be_create_t *)msg->msg)->domid,
   13.22 -                ((blkif_be_create_t *)msg->msg)->blkif_handle);
   13.23 -        break; 
   13.24 -    case CMSG_BLKIF_BE_DESTROY:
   13.25 -        if ( msg->length != sizeof(blkif_be_destroy_t) )
   13.26 -            goto parse_error;
   13.27 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n",
   13.28 -                ((blkif_be_destroy_t *)msg->msg)->domid,
   13.29 -                ((blkif_be_destroy_t *)msg->msg)->blkif_handle);
   13.30 -        break;   
   13.31 -    case CMSG_BLKIF_BE_CONNECT:
   13.32 -        if ( msg->length != sizeof(blkif_be_connect_t) )
   13.33 -            goto parse_error;
   13.34 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_CONNECT(d:%d,h:%d)\n",
   13.35 -                ((blkif_be_connect_t *)msg->msg)->domid,
   13.36 -                ((blkif_be_connect_t *)msg->msg)->blkif_handle);
   13.37 -        break;        
   13.38 -    case CMSG_BLKIF_BE_DISCONNECT:
   13.39 -        if ( msg->length != sizeof(blkif_be_disconnect_t) )
   13.40 -            goto parse_error;
   13.41 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_DISCONNECT(d:%d,h:%d)\n",
   13.42 -                ((blkif_be_disconnect_t *)msg->msg)->domid,
   13.43 -                ((blkif_be_disconnect_t *)msg->msg)->blkif_handle);
   13.44 -        break;     
   13.45 -    case CMSG_BLKIF_BE_VBD_CREATE:
   13.46 -        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
   13.47 -            goto parse_error;
   13.48 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_CREATE(d:%d,h:%d,v:%d)\n",
   13.49 -                ((blkif_be_vbd_create_t *)msg->msg)->domid,
   13.50 -                ((blkif_be_vbd_create_t *)msg->msg)->blkif_handle,
   13.51 -                ((blkif_be_vbd_create_t *)msg->msg)->vdevice);
   13.52 -        break;
   13.53 -    case CMSG_BLKIF_BE_VBD_DESTROY:
   13.54 -        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
   13.55 -            goto parse_error;
   13.56 -        printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_DESTROY(d:%d,h:%d,v:%d)\n",
   13.57 -                ((blkif_be_vbd_destroy_t *)msg->msg)->domid,
   13.58 -                ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle,
   13.59 -                ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice);
   13.60 -        break;
   13.61 -    default:
   13.62 -        goto parse_error;
   13.63 -    }
   13.64 -   
   13.65 -    return 0; 
   13.66 -      
   13.67 -parse_error:
   13.68 -    printf("[CONTROL_MSG] Bad message type or length!\n");
   13.69 -    return 0;
   13.70 -}
   13.71 - 
   13.72  int request_print(blkif_request_t *req)
   13.73  {
   13.74      int i;
   13.75      unsigned long fas;
   13.76      
   13.77 -    if ( req->operation == BLKIF_OP_PROBE ) {
   13.78 -        printf("[%2u:%2u<%s]\n", ID_TO_DOM(req->id), ID_TO_IDX(req->id),
   13.79 -                blkif_op_name[req->operation]);
   13.80 -        return BLKTAP_PASS;
   13.81 -    } else {
   13.82 +    if ( (req->operation == BLKIF_OP_READ) ||
   13.83 +         (req->operation == BLKIF_OP_WRITE) )
   13.84 +    {
   13.85          printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 
   13.86                  ID_TO_DOM(req->id), ID_TO_IDX(req->id), 
   13.87                  blkif_op_name[req->operation], 
   13.88 -                req->nr_segments, req->device, 
   13.89 +                req->nr_segments, req->handle, 
   13.90                  req->sector_number);
   13.91          
   13.92          
   13.93 @@ -99,6 +32,8 @@ int request_print(blkif_request_t *req)
   13.94                      );
   13.95          }
   13.96              
   13.97 +    } else {
   13.98 +        printf("Unknown request message type.\n");
   13.99      }
  13.100      
  13.101      return BLKTAP_PASS;
  13.102 @@ -106,23 +41,22 @@ int request_print(blkif_request_t *req)
  13.103  
  13.104  int response_print(blkif_response_t *rsp)
  13.105  {   
  13.106 -    if ( rsp->operation == BLKIF_OP_PROBE ) {
  13.107 -        printf("[%2u:%2u>%s]\n", ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id),
  13.108 -                blkif_op_name[rsp->operation]);
  13.109 -        return BLKTAP_PASS;
  13.110 -    } else {
  13.111 +    if ( (rsp->operation == BLKIF_OP_READ) ||
  13.112 +         (rsp->operation == BLKIF_OP_WRITE) )
  13.113 +    {
  13.114          printf("[%2u:%2u>%5s] (status: %d)\n", 
  13.115                  ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 
  13.116                  blkif_op_name[rsp->operation], 
  13.117                  rsp->status);
  13.118              
  13.119 +    } else {
  13.120 +        printf("Unknown request message type.\n");
  13.121      }
  13.122      return BLKTAP_PASS;
  13.123  }
  13.124  
  13.125  int main(int argc, char *argv[])
  13.126  {
  13.127 -    blktap_register_ctrl_hook("control_print", control_print);
  13.128      blktap_register_request_hook("request_print", request_print);
  13.129      blktap_register_response_hook("response_print", response_print);
  13.130      blktap_listen();
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/tools/blktap/blkif.c	Sun Sep 04 21:19:44 2005 +0000
    14.3 @@ -0,0 +1,213 @@
    14.4 +/*
    14.5 + * blkif.c
    14.6 + * 
    14.7 + * The blkif interface for blktap.  A blkif describes an in-use virtual disk.
    14.8 + */
    14.9 +
   14.10 +#include <stdio.h>
   14.11 +#include <stdlib.h>
   14.12 +#include <errno.h>
   14.13 +#include <string.h>
   14.14 +#include <err.h>
   14.15 +
   14.16 +#include "blktaplib.h"
   14.17 +
   14.18 +#if 1
   14.19 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   14.20 +#else
   14.21 +#define DPRINTF(_f, _a...) ((void)0)
   14.22 +#endif
   14.23 +
   14.24 +#define BLKIF_HASHSZ 1024
   14.25 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
   14.26 +
   14.27 +static blkif_t      *blkif_hash[BLKIF_HASHSZ];
   14.28 +
   14.29 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
   14.30 +{
   14.31 +    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
   14.32 +    while ( (blkif != NULL) && 
   14.33 +            ((blkif->domid != domid) || (blkif->handle != handle)) )
   14.34 +        blkif = blkif->hash_next;
   14.35 +    return blkif;
   14.36 +}
   14.37 +
   14.38 +blkif_t *alloc_blkif(domid_t domid)
   14.39 +{
   14.40 +    blkif_t *blkif;
   14.41 +
   14.42 +    blkif = (blkif_t *)malloc(sizeof(blkif_t));
   14.43 +    if (!blkif)
   14.44 +        return NULL;
   14.45 +
   14.46 +    memset(blkif, 0, sizeof(*blkif));
   14.47 +    blkif->domid = domid;
   14.48 +
   14.49 +    return blkif;
   14.50 +}
   14.51 +
   14.52 +static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
   14.53 +void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
   14.54 +{
   14.55 +    new_blkif_hook = fn;
   14.56 +}
   14.57 +
   14.58 +int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
   14.59 +               long int readonly)
   14.60 +{
   14.61 +    domid_t domid;
   14.62 +    blkif_t **pblkif;
   14.63 +    
   14.64 +    if (blkif == NULL)
   14.65 +        return -EINVAL;
   14.66 +
   14.67 +    domid = blkif->domid;
   14.68 +    blkif->handle   = handle;
   14.69 +    blkif->pdev     = pdev;
   14.70 +    blkif->readonly = readonly;
   14.71 +
   14.72 +    /*
   14.73 +     * Call out to the new_blkif_hook. The tap application should define this,
   14.74 +     * and it should return having set blkif->ops
   14.75 +     * 
   14.76 +     */
   14.77 +    if (new_blkif_hook == NULL)
   14.78 +    {
   14.79 +        warn("Probe detected a new blkif, but no new_blkif_hook!");
   14.80 +        return -1;
   14.81 +    }
   14.82 +    new_blkif_hook(blkif);
   14.83 +
   14.84 +    /* Now wire it in. */
   14.85 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   14.86 +    while ( *pblkif != NULL )
   14.87 +    {
   14.88 +        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   14.89 +        {
   14.90 +            DPRINTF("Could not create blkif: already exists\n");
   14.91 +            return -1;
   14.92 +        }
   14.93 +        pblkif = &(*pblkif)->hash_next;
   14.94 +    }
   14.95 +    blkif->hash_next = NULL;
   14.96 +    *pblkif = blkif;
   14.97 +
   14.98 +    return 0;
   14.99 +}
  14.100 +
  14.101 +void free_blkif(blkif_t *blkif)
  14.102 +{
  14.103 +    blkif_t **pblkif, *curs;
  14.104 +    
  14.105 +    pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
  14.106 +    while ( (curs = *pblkif) != NULL )
  14.107 +    {
  14.108 +        if ( blkif == curs )
  14.109 +        {
  14.110 +            *pblkif = curs->hash_next;
  14.111 +        }
  14.112 +        pblkif = &curs->hash_next;
  14.113 +    }
  14.114 +    if (blkif != NULL)
  14.115 +        free(blkif);
  14.116 +}
  14.117 +
  14.118 +void blkif_register_request_hook(blkif_t *blkif, char *name, 
  14.119 +                                 int (*rh)(blkif_t *, blkif_request_t *, int)) 
  14.120 +{
  14.121 +    request_hook_t *rh_ent, **c;
  14.122 +    
  14.123 +    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
  14.124 +    if (!rh_ent) 
  14.125 +    {
  14.126 +        warn("couldn't allocate a new hook");
  14.127 +        return;
  14.128 +    }
  14.129 +    
  14.130 +    rh_ent->func  = rh;
  14.131 +    rh_ent->next = NULL;
  14.132 +    if (asprintf(&rh_ent->name, "%s", name) == -1)
  14.133 +    {
  14.134 +        free(rh_ent);
  14.135 +        warn("couldn't allocate a new hook name");
  14.136 +        return;
  14.137 +    }
  14.138 +    
  14.139 +    c = &blkif->request_hook_chain;
  14.140 +    while (*c != NULL) {
  14.141 +        c = &(*c)->next;
  14.142 +    }
  14.143 +    *c = rh_ent;
  14.144 +}
  14.145 +
  14.146 +void blkif_register_response_hook(blkif_t *blkif, char *name, 
  14.147 +                                  int (*rh)(blkif_t *, blkif_response_t *, int)) 
  14.148 +{
  14.149 +    response_hook_t *rh_ent, **c;
  14.150 +    
  14.151 +    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
  14.152 +    if (!rh_ent) 
  14.153 +    { 
  14.154 +        warn("couldn't allocate a new hook");
  14.155 +        return;
  14.156 +    }
  14.157 +    
  14.158 +    rh_ent->func  = rh;
  14.159 +    rh_ent->next = NULL;
  14.160 +    if (asprintf(&rh_ent->name, "%s", name) == -1)
  14.161 +    {
  14.162 +        free(rh_ent);
  14.163 +        warn("couldn't allocate a new hook name");
  14.164 +        return;
  14.165 +    }
  14.166 +    
  14.167 +    c = &blkif->response_hook_chain;
  14.168 +    while (*c != NULL) {
  14.169 +        c = &(*c)->next;
  14.170 +    }
  14.171 +    *c = rh_ent;
  14.172 +}
  14.173 +
  14.174 +void blkif_print_hooks(blkif_t *blkif)
  14.175 +{
  14.176 +    request_hook_t  *req_hook;
  14.177 +    response_hook_t *rsp_hook;
  14.178 +    
  14.179 +    DPRINTF("Request Hooks:\n");
  14.180 +    req_hook = blkif->request_hook_chain;
  14.181 +    while (req_hook != NULL)
  14.182 +    {
  14.183 +        DPRINTF("  [0x%p] %s\n", req_hook->func, req_hook->name);
  14.184 +        req_hook = req_hook->next;
  14.185 +    }
  14.186 +    
  14.187 +    DPRINTF("Response Hooks:\n");
  14.188 +    rsp_hook = blkif->response_hook_chain;
  14.189 +    while (rsp_hook != NULL)
  14.190 +    {
  14.191 +        DPRINTF("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
  14.192 +        rsp_hook = rsp_hook->next;
  14.193 +    }
  14.194 +}
  14.195 +
  14.196 +
  14.197 +long int vbd_size(blkif_t *blkif)
  14.198 +{
  14.199 +    return 1000000000;
  14.200 +}
  14.201 +
  14.202 +long int vbd_secsize(blkif_t *blkif)
  14.203 +{
  14.204 +    return 512;
  14.205 +}
  14.206 +
  14.207 +unsigned vbd_info(blkif_t *blkif)
  14.208 +{
  14.209 +    return 0;
  14.210 +}
  14.211 +
  14.212 +
  14.213 +void __init_blkif(void)
  14.214 +{    
  14.215 +    memset(blkif_hash, 0, sizeof(blkif_hash));
  14.216 +}
    15.1 --- a/tools/blktap/blktaplib.c	Sun Sep 04 15:08:16 2005 +0000
    15.2 +++ b/tools/blktap/blktaplib.c	Sun Sep 04 21:19:44 2005 +0000
    15.3 @@ -24,7 +24,7 @@
    15.4  #include <string.h>
    15.5  #include <unistd.h>
    15.6  #include <pthread.h>
    15.7 -
    15.8 +#include <xs.h>
    15.9                                                                       
   15.10  #define __COMPILING_BLKTAP_LIB
   15.11  #include "blktaplib.h"
   15.12 @@ -34,29 +34,26 @@
   15.13  #else
   15.14  #define DPRINTF(_f, _a...) ((void)0)
   15.15  #endif
   15.16 -#define DEBUG_RING_IDXS 1
   15.17 +#define DEBUG_RING_IDXS 0
   15.18  
   15.19  #define POLLRDNORM     0x040 
   15.20  
   15.21  #define BLKTAP_IOCTL_KICK 1
   15.22  
   15.23 +
   15.24  void got_sig_bus();
   15.25  void got_sig_int();
   15.26  
   15.27  /* in kernel these are opposite, but we are a consumer now. */
   15.28  blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
   15.29  blkif_front_ring_t be_ring; 
   15.30 -ctrl_back_ring_t   ctrl_ring;
   15.31  
   15.32  unsigned long mmap_vstart = 0;
   15.33  char *blktap_mem;
   15.34  int fd = 0;
   15.35  
   15.36 -#define BLKTAP_RING_PAGES       3 /* Ctrl, Back, Front */
   15.37 -/*#define BLKTAP_MMAP_PAGES       ((11 + 1) * 64)*/
   15.38 -#define BLKTAP_MMAP_PAGES \
   15.39 -    ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE)
   15.40 -#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES)
   15.41 +#define BLKTAP_RING_PAGES       1 /* Front */
   15.42 +#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
   15.43      
   15.44  int bad_count = 0;
   15.45  void bad(void)
   15.46 @@ -79,126 +76,13 @@ inline unsigned int ID_TO_IDX(unsigned l
   15.47  }
   15.48  
   15.49  inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
   15.50 -/*
   15.51 +
   15.52  static int (*request_hook)(blkif_request_t *req) = NULL;
   15.53  static int (*response_hook)(blkif_response_t *req) = NULL;
   15.54 -*/
   15.55 -
   15.56 -/*-----[ Request/Response hook chains.]----------------------------------*/
   15.57 -
   15.58 -#define HOOK_NAME_MAX 50
   15.59 -        
   15.60 -typedef struct ctrl_hook_st {
   15.61 -    char name[HOOK_NAME_MAX];
   15.62 -    int (*func)(control_msg_t *);
   15.63 -    struct ctrl_hook_st *next;
   15.64 -} ctrl_hook_t;
   15.65 -        
   15.66 -typedef struct request_hook_st {
   15.67 -    char name[HOOK_NAME_MAX];
   15.68 -    int (*func)(blkif_request_t *);
   15.69 -    struct request_hook_st *next;
   15.70 -} request_hook_t;
   15.71 -
   15.72 -typedef struct response_hook_st {
   15.73 -    char name[HOOK_NAME_MAX];
   15.74 -    int (*func)(blkif_response_t *);
   15.75 -    struct response_hook_st *next;
   15.76 -} response_hook_t;
   15.77 -
   15.78 -static ctrl_hook_t *ctrl_hook_chain = NULL;
   15.79 -static request_hook_t *request_hook_chain = NULL;
   15.80 -static response_hook_t *response_hook_chain = NULL;
   15.81 -
   15.82 -void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)) 
   15.83 -{
   15.84 -    ctrl_hook_t *ch_ent, **c;
   15.85 -    
   15.86 -    ch_ent = (ctrl_hook_t *)malloc(sizeof(ctrl_hook_t));
   15.87 -    if (!ch_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
   15.88 -    
   15.89 -    ch_ent->func  = ch;
   15.90 -    ch_ent->next = NULL;
   15.91 -    strncpy(ch_ent->name, name, HOOK_NAME_MAX);
   15.92 -    ch_ent->name[HOOK_NAME_MAX-1] = '\0';
   15.93 -    
   15.94 -    c = &ctrl_hook_chain;
   15.95 -    while (*c != NULL) {
   15.96 -        c = &(*c)->next;
   15.97 -    }
   15.98 -    *c = ch_ent;
   15.99 -}
  15.100 -
  15.101 -void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)) 
  15.102 -{
  15.103 -    request_hook_t *rh_ent, **c;
  15.104 -    
  15.105 -    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
  15.106 -    if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
  15.107 -    
  15.108 -    rh_ent->func  = rh;
  15.109 -    rh_ent->next = NULL;
  15.110 -    strncpy(rh_ent->name, name, HOOK_NAME_MAX);
  15.111 -    
  15.112 -    c = &request_hook_chain;
  15.113 -    while (*c != NULL) {
  15.114 -        c = &(*c)->next;
  15.115 -    }
  15.116 -    *c = rh_ent;
  15.117 -}
  15.118 -
  15.119 -void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)) 
  15.120 -{
  15.121 -    response_hook_t *rh_ent, **c;
  15.122 -    
  15.123 -    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
  15.124 -    if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); }
  15.125 -    
  15.126 -    rh_ent->func  = rh;
  15.127 -    rh_ent->next = NULL;
  15.128 -    strncpy(rh_ent->name, name, HOOK_NAME_MAX);
  15.129 -    
  15.130 -    c = &response_hook_chain;
  15.131 -    while (*c != NULL) {
  15.132 -        c = &(*c)->next;
  15.133 -    }
  15.134 -    *c = rh_ent;
  15.135 -}
  15.136 -
  15.137 -void print_hooks(void)
  15.138 -{
  15.139 -    request_hook_t  *req_hook;
  15.140 -    response_hook_t *rsp_hook;
  15.141 -    ctrl_hook_t     *ctrl_hook;
  15.142 -    
  15.143 -    DPRINTF("Control Hooks:\n");
  15.144 -    ctrl_hook = ctrl_hook_chain;
  15.145 -    while (ctrl_hook != NULL)
  15.146 -    {
  15.147 -        DPRINTF("  [0x%p] %s\n", ctrl_hook->func, ctrl_hook->name);
  15.148 -        ctrl_hook = ctrl_hook->next;
  15.149 -    }
  15.150 -    
  15.151 -    DPRINTF("Request Hooks:\n");
  15.152 -    req_hook = request_hook_chain;
  15.153 -    while (req_hook != NULL)
  15.154 -    {
  15.155 -        DPRINTF("  [0x%p] %s\n", req_hook->func, req_hook->name);
  15.156 -        req_hook = req_hook->next;
  15.157 -    }
  15.158 -    
  15.159 -    DPRINTF("Response Hooks:\n");
  15.160 -    rsp_hook = response_hook_chain;
  15.161 -    while (rsp_hook != NULL)
  15.162 -    {
  15.163 -        DPRINTF("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
  15.164 -        rsp_hook = rsp_hook->next;
  15.165 -    }
  15.166 -}
  15.167          
  15.168  /*-----[ Data to/from Backend (server) VM ]------------------------------*/
  15.169  
  15.170 -
  15.171 +/*
  15.172  
  15.173  inline int write_req_to_be_ring(blkif_request_t *req)
  15.174  {
  15.175 @@ -214,6 +98,7 @@ inline int write_req_to_be_ring(blkif_re
  15.176      
  15.177      return 0;
  15.178  }
  15.179 +*/
  15.180  
  15.181  inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
  15.182  {
  15.183 @@ -230,14 +115,14 @@ inline int write_rsp_to_fe_ring(blkif_re
  15.184      return 0;
  15.185  }
  15.186  
  15.187 -static void apply_rsp_hooks(blkif_response_t *rsp)
  15.188 +static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
  15.189  {
  15.190      response_hook_t  *rsp_hook;
  15.191      
  15.192 -    rsp_hook = response_hook_chain;
  15.193 +    rsp_hook = blkif->response_hook_chain;
  15.194      while (rsp_hook != NULL)
  15.195      {
  15.196 -        switch(rsp_hook->func(rsp))
  15.197 +        switch(rsp_hook->func(blkif, rsp, 1))
  15.198          {
  15.199          case BLKTAP_PASS:
  15.200              break;
  15.201 @@ -248,15 +133,19 @@ static void apply_rsp_hooks(blkif_respon
  15.202      }
  15.203  }
  15.204  
  15.205 +
  15.206  static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
  15.207  
  15.208 -void blktap_inject_response(blkif_response_t *rsp)
  15.209 +void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
  15.210  {
  15.211      
  15.212 -    apply_rsp_hooks(rsp);
  15.213 -    
  15.214 +    apply_rsp_hooks(blkif, rsp);
  15.215 +  
  15.216      write_rsp_to_fe_ring(rsp);
  15.217 -    
  15.218 +}
  15.219 +
  15.220 +void blktap_kick_responses(void)
  15.221 +{
  15.222      pthread_mutex_lock(&push_mutex);
  15.223      
  15.224      RING_PUSH_RESPONSES(&fe_ring);
  15.225 @@ -277,7 +166,7 @@ typedef struct {
  15.226      int active;
  15.227  } pollhook_t;
  15.228  
  15.229 -static struct pollfd  pfd[MAX_POLLFDS+1];
  15.230 +static struct pollfd  pfd[MAX_POLLFDS+2]; /* tap and store are extra */
  15.231  static pollhook_t     pollhooks[MAX_POLLFDS];
  15.232  static unsigned int   ph_freelist[MAX_POLLFDS];
  15.233  static unsigned int   ph_cons, ph_prod;
  15.234 @@ -344,65 +233,65 @@ void __attribute__ ((constructor)) blkta
  15.235  
  15.236  int blktap_listen(void)
  15.237  {
  15.238 -    int               notify_be, notify_fe, tap_pfd;
  15.239 -    
  15.240 +    int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
  15.241 +    struct xs_handle *h;
  15.242 +    blkif_t *blkif;
  15.243 +
  15.244      /* comms rings: */
  15.245      blkif_request_t  *req;
  15.246      blkif_response_t *rsp;
  15.247 -    control_msg_t    *msg;
  15.248      blkif_sring_t    *sring;
  15.249 -    ctrl_sring_t     *csring;
  15.250      RING_IDX          rp, i, pfd_count; 
  15.251      
  15.252      /* pending rings */
  15.253      blkif_request_t req_pending[BLKIF_RING_SIZE];
  15.254 -    blkif_response_t rsp_pending[BLKIF_RING_SIZE];
  15.255 +    /* blkif_response_t rsp_pending[BLKIF_RING_SIZE] */;
  15.256      
  15.257      /* handler hooks: */
  15.258      request_hook_t   *req_hook;
  15.259      response_hook_t  *rsp_hook;
  15.260 -    ctrl_hook_t      *ctrl_hook;
  15.261      
  15.262      signal (SIGBUS, got_sig_bus);
  15.263      signal (SIGINT, got_sig_int);
  15.264      
  15.265 -    print_hooks();
  15.266 -    
  15.267 +    __init_blkif();
  15.268 +
  15.269      fd = open("/dev/blktap", O_RDWR);
  15.270 -    if (fd == -1) {
  15.271 -        printf("open failed! (%d)\n", errno);
  15.272 -        goto open_failed;
  15.273 -    }
  15.274 +    if (fd == -1)
  15.275 +        err(-1, "open failed!");
  15.276  
  15.277      blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
  15.278               PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  15.279  
  15.280 -    if ((int)blktap_mem == -1) {
  15.281 -        printf("mmap failed! (%d)\n", errno);
  15.282 -        goto mmap_failed;
  15.283 -    }
  15.284 +    if ((int)blktap_mem == -1) 
  15.285 +        err(-1, "mmap failed!");
  15.286  
  15.287      /* assign the rings to the mapped memory */
  15.288 -    csring = (ctrl_sring_t *)blktap_mem;
  15.289 -    BACK_RING_INIT(&ctrl_ring, csring, PAGE_SIZE);
  15.290 -    
  15.291 +/*
  15.292      sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
  15.293      FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
  15.294 -    
  15.295 -    sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE));
  15.296 +*/  
  15.297 +    sring = (blkif_sring_t *)((unsigned long)blktap_mem);
  15.298      BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
  15.299  
  15.300      mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
  15.301  
  15.302 +
  15.303 +    /* Set up store connection and watch. */
  15.304 +    h = xs_daemon_open();
  15.305 +    if (h == NULL) 
  15.306 +        err(-1, "xs_daemon_open");
  15.307 +    
  15.308 +    ret = add_blockdevice_probe_watch(h, "Domain-0");
  15.309 +    if (ret != 0)
  15.310 +        err(0, "adding device probewatch");
  15.311 +    
  15.312      ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
  15.313  
  15.314      while(1) {
  15.315          int ret;
  15.316          
  15.317          /* build the poll list */
  15.318 -        
  15.319 -        DPRINTF("Building poll list.\n");
  15.320 -        
  15.321          pfd_count = 0;
  15.322          for ( i=0; i < MAX_POLLFDS; i++ ) {
  15.323              pollhook_t *ph = &pollhooks[i];
  15.324 @@ -415,49 +304,31 @@ int blktap_listen(void)
  15.325              }
  15.326          }
  15.327  
  15.328 -        tap_pfd = pfd_count;
  15.329 +        tap_pfd = pfd_count++;
  15.330          pfd[tap_pfd].fd = fd;
  15.331          pfd[tap_pfd].events = POLLIN;
  15.332  
  15.333 -        DPRINTF("poll() %d fds.\n", pfd_count);
  15.334 +        store_pfd = pfd_count++;
  15.335 +        pfd[store_pfd].fd = xs_fileno(h);
  15.336 +        pfd[store_pfd].events = POLLIN;
  15.337          
  15.338 -        if ( (ret = (poll(pfd, pfd_count+1, 10000)) == 0) ) {
  15.339 +        if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
  15.340              if (DEBUG_RING_IDXS)
  15.341                  ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
  15.342              continue;
  15.343          }
  15.344  
  15.345 -        DPRINTF("poll returned %d\n", ret);
  15.346 -
  15.347          for (i=0; i < MAX_POLLFDS; i++) {
  15.348              if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
  15.349                  pollhooks[i].func(pollhooks[i].pfd->fd);
  15.350          }
  15.351          
  15.352 -        if (pfd[tap_pfd].revents) {
  15.353 -            
  15.354 -            /* empty the control ring */
  15.355 -            rp = ctrl_ring.sring->req_prod;
  15.356 -            rmb();
  15.357 -            for (i = ctrl_ring.req_cons; i < rp; i++)
  15.358 -            {
  15.359 -                msg = RING_GET_REQUEST(&ctrl_ring, i);
  15.360 +        if (pfd[store_pfd].revents) {
  15.361 +            ret = xs_fire_next_watch(h);
  15.362 +        }
  15.363  
  15.364 -                ctrl_hook = ctrl_hook_chain;
  15.365 -                while (ctrl_hook != NULL)
  15.366 -                {
  15.367 -                    DPRINTF("CTRL_HOOK: %s\n", ctrl_hook->name);
  15.368 -                    /* We currently don't respond to ctrl messages. */
  15.369 -                    ctrl_hook->func(msg);
  15.370 -                    ctrl_hook = ctrl_hook->next;
  15.371 -                }
  15.372 -            }
  15.373 -            /* Using this as a unidirectional ring. */
  15.374 -            ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i;
  15.375 -pthread_mutex_lock(&push_mutex);
  15.376 -            RING_PUSH_RESPONSES(&ctrl_ring);
  15.377 -pthread_mutex_unlock(&push_mutex);
  15.378 -            
  15.379 +        if (pfd[tap_pfd].revents) 
  15.380 +        {    
  15.381              /* empty the fe_ring */
  15.382              notify_fe = 0;
  15.383              notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
  15.384 @@ -465,44 +336,62 @@ pthread_mutex_unlock(&push_mutex);
  15.385              rmb();
  15.386              for (i = fe_ring.req_cons; i != rp; i++)
  15.387              {
  15.388 -                int done = 0; /* stop forwarding this request */
  15.389 +                int done = 0; 
  15.390  
  15.391                  req = RING_GET_REQUEST(&fe_ring, i);
  15.392                  memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
  15.393                  req = &req_pending[ID_TO_IDX(req->id)];
  15.394  
  15.395 -                DPRINTF("copying an fe request\n");
  15.396 +                blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
  15.397  
  15.398 -                req_hook = request_hook_chain;
  15.399 -                while (req_hook != NULL)
  15.400 +                if (blkif != NULL)
  15.401                  {
  15.402 -                    DPRINTF("REQ_HOOK: %s\n", req_hook->name);
  15.403 -                    switch(req_hook->func(req))
  15.404 +                    req_hook = blkif->request_hook_chain;
  15.405 +                    while (req_hook != NULL)
  15.406                      {
  15.407 -                    case BLKTAP_RESPOND:
  15.408 -                        apply_rsp_hooks((blkif_response_t *)req);
  15.409 -                        write_rsp_to_fe_ring((blkif_response_t *)req);
  15.410 -                        notify_fe = 1;
  15.411 -                        done = 1;
  15.412 -                        break;
  15.413 -                    case BLKTAP_STOLEN:
  15.414 -                        done = 1;
  15.415 -                        break;
  15.416 -                    case BLKTAP_PASS:
  15.417 -                        break;
  15.418 -                    default:
  15.419 -                        printf("Unknown request hook return value!\n");
  15.420 +                        switch(req_hook->func(blkif, req, ((i+1) == rp)))
  15.421 +                        {
  15.422 +                        case BLKTAP_RESPOND:
  15.423 +                            apply_rsp_hooks(blkif, (blkif_response_t *)req);
  15.424 +                            write_rsp_to_fe_ring((blkif_response_t *)req);
  15.425 +                            notify_fe = 1;
  15.426 +                            done = 1;
  15.427 +                            break;
  15.428 +                        case BLKTAP_STOLEN:
  15.429 +                            done = 1;
  15.430 +                            break;
  15.431 +                        case BLKTAP_PASS:
  15.432 +                            break;
  15.433 +                        default:
  15.434 +                            printf("Unknown request hook return value!\n");
  15.435 +                        }
  15.436 +                        if (done) break;
  15.437 +                        req_hook = req_hook->next;
  15.438                      }
  15.439 -                    if (done) break;
  15.440 -                    req_hook = req_hook->next;
  15.441                  }
  15.442  
  15.443 -                if (done == 0) write_req_to_be_ring(req);
  15.444 +                if (done == 0) 
  15.445 +                {
  15.446 +                    /* this was:  */
  15.447 +                    /* write_req_to_be_ring(req); */
  15.448 +
  15.449 +                    unsigned long id = req->id;
  15.450 +                    unsigned short operation = req->operation;
  15.451 +                    printf("Unterminated request!\n");
  15.452 +                    rsp = (blkif_response_t *)req;
  15.453 +                    rsp->id = id;
  15.454 +                    rsp->operation = operation;
  15.455 +                    rsp->status = BLKIF_RSP_ERROR;
  15.456 +                    write_rsp_to_fe_ring(rsp);
  15.457 +                    notify_fe = 1;
  15.458 +                    done = 1;
  15.459 +                }
  15.460  
  15.461              }
  15.462              fe_ring.req_cons = i;
  15.463  
  15.464              /* empty the be_ring */
  15.465 +/*
  15.466              notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
  15.467              rp = be_ring.sring->rsp_prod;
  15.468              rmb();
  15.469 @@ -519,9 +408,9 @@ pthread_mutex_unlock(&push_mutex);
  15.470                  write_rsp_to_fe_ring(rsp);
  15.471              }
  15.472              be_ring.rsp_cons = i;
  15.473 -
  15.474 +*/
  15.475              /* notify the domains */
  15.476 -
  15.477 +/*
  15.478              if (notify_be) {
  15.479                  DPRINTF("notifying be\n");
  15.480  pthread_mutex_lock(&push_mutex);
  15.481 @@ -529,13 +418,13 @@ pthread_mutex_lock(&push_mutex);
  15.482                  ioctl(fd, BLKTAP_IOCTL_KICK_BE);
  15.483  pthread_mutex_unlock(&push_mutex);
  15.484              }
  15.485 -
  15.486 +*/
  15.487              if (notify_fe) {
  15.488                  DPRINTF("notifying fe\n");
  15.489 -pthread_mutex_lock(&push_mutex);
  15.490 +                pthread_mutex_lock(&push_mutex);
  15.491                  RING_PUSH_RESPONSES(&fe_ring);
  15.492                  ioctl(fd, BLKTAP_IOCTL_KICK_FE);
  15.493 -pthread_mutex_unlock(&push_mutex);
  15.494 +                pthread_mutex_unlock(&push_mutex);
  15.495              }
  15.496          }        
  15.497      }
    16.1 --- a/tools/blktap/blktaplib.h	Sun Sep 04 15:08:16 2005 +0000
    16.2 +++ b/tools/blktap/blktaplib.h	Sun Sep 04 21:19:44 2005 +0000
    16.3 @@ -2,6 +2,9 @@
    16.4   *
    16.5   * userland accessors to the block tap.
    16.6   *
    16.7 + * Sept 2/05 -- I'm scaling this back to only support block remappings
    16.8 + * to user in a backend domain.  Passthrough and interposition can be readded
    16.9 + * once transitive grants are available.
   16.10   */
   16.11   
   16.12  #ifndef __BLKTAPLIB_H__
   16.13 @@ -13,6 +16,7 @@
   16.14  #include <xen/io/blkif.h>
   16.15  #include <xen/io/ring.h>
   16.16  #include <xen/io/domain_controller.h>
   16.17 +#include <xs.h>
   16.18  
   16.19  /* /dev/xen/blktap resides at device number major=10, minor=202        */ 
   16.20  #define BLKTAP_MINOR 202
   16.21 @@ -49,12 +53,18 @@ static inline int BLKTAP_MODE_VALID(unsi
   16.22      return (
   16.23          ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
   16.24          ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
   16.25 +        ( arg == BLKTAP_MODE_INTERPOSE    ) );
   16.26 +/*
   16.27 +    return (
   16.28 +        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
   16.29 +        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
   16.30          ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
   16.31          ( arg == BLKTAP_MODE_INTERPOSE    ) ||
   16.32          ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
   16.33          ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
   16.34          ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
   16.35          );
   16.36 +*/
   16.37  }
   16.38  
   16.39  /* Return values for handling messages in hooks. */
   16.40 @@ -62,30 +72,89 @@ static inline int BLKTAP_MODE_VALID(unsi
   16.41  #define BLKTAP_RESPOND  1 /* Request is now a reply.  Return it.  */
   16.42  #define BLKTAP_STOLEN   2 /* Hook has stolen request.             */
   16.43  
   16.44 -#define domid_t unsigned short
   16.45 +//#define domid_t unsigned short
   16.46  
   16.47  inline unsigned int ID_TO_IDX(unsigned long id);
   16.48  inline domid_t ID_TO_DOM(unsigned long id);
   16.49  
   16.50 -void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
   16.51 -void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
   16.52 -void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
   16.53 -void blktap_inject_response(blkif_response_t *);
   16.54  int  blktap_attach_poll(int fd, short events, int (*func)(int));
   16.55  void blktap_detach_poll(int fd);
   16.56  int  blktap_listen(void);
   16.57  
   16.58 +struct blkif;
   16.59 +
   16.60 +typedef struct request_hook_st {
   16.61 +    char *name;
   16.62 +    int (*func)(struct blkif *, blkif_request_t *, int);
   16.63 +    struct request_hook_st *next;
   16.64 +} request_hook_t;
   16.65 +
   16.66 +typedef struct response_hook_st {
   16.67 +    char *name;
   16.68 +    int (*func)(struct blkif *, blkif_response_t *, int);
   16.69 +    struct response_hook_st *next;
   16.70 +} response_hook_t;
   16.71 +
   16.72 +struct blkif_ops {
   16.73 +    long int (*get_size)(struct blkif *blkif);
   16.74 +    long int (*get_secsize)(struct blkif *blkif);
   16.75 +    unsigned (*get_info)(struct blkif *blkif);
   16.76 +};
   16.77 +
   16.78 +typedef struct blkif {
   16.79 +    domid_t domid;
   16.80 +    long int handle;
   16.81 +
   16.82 +    long int pdev;
   16.83 +    long int readonly;
   16.84 +
   16.85 +    enum { DISCONNECTED, CONNECTED } state;
   16.86 +
   16.87 +    struct blkif_ops *ops;
   16.88 +    request_hook_t *request_hook_chain;
   16.89 +    response_hook_t *response_hook_chain;
   16.90 +
   16.91 +    struct blkif *hash_next;
   16.92 +
   16.93 +    void *prv;  /* device-specific data */
   16.94 +} blkif_t;
   16.95 +
   16.96 +void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
   16.97 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
   16.98 +blkif_t *alloc_blkif(domid_t domid);
   16.99 +int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
  16.100 +               long int readonly);
  16.101 +void free_blkif(blkif_t *blkif);
  16.102 +void __init_blkif(void);
  16.103 +
  16.104 +
  16.105 +/* xenstore/xenbus: */
  16.106 +extern int add_blockdevice_probe_watch(struct xs_handle *h, 
  16.107 +                                       const char *domname);
  16.108 +int xs_fire_next_watch(struct xs_handle *h);
  16.109 +
  16.110 +
  16.111 +void blkif_print_hooks(blkif_t *blkif);
  16.112 +void blkif_register_request_hook(blkif_t *blkif, char *name, 
  16.113 +                             int (*rh)(blkif_t *, blkif_request_t *, int));
  16.114 +void blkif_register_response_hook(blkif_t *blkif, char *name, 
  16.115 +                             int (*rh)(blkif_t *, blkif_response_t *, int));
  16.116 +void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
  16.117 +void blktap_kick_responses(void);
  16.118 +
  16.119 +/* this must match the underlying driver... */
  16.120 +#define MAX_PENDING_REQS 64
  16.121 +
  16.122  /* Accessing attached data page mappings */
  16.123 -#define MMAP_PAGES_PER_REQUEST \
  16.124 -    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
  16.125 -#define MMAP_VADDR(_req,_seg)                        \
  16.126 -    (mmap_vstart +                                   \
  16.127 -     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
  16.128 +#define MMAP_PAGES                                              \
  16.129 +    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
  16.130 +#define MMAP_VADDR(_req,_seg)                                   \
  16.131 +    (mmap_vstart +                                              \
  16.132 +     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
  16.133       ((_seg) * PAGE_SIZE))
  16.134  
  16.135  extern unsigned long mmap_vstart;
  16.136  
  16.137 -
  16.138  /* Defines that are only used by library clients */
  16.139  
  16.140  #ifndef __COMPILING_BLKTAP_LIB
  16.141 @@ -93,7 +162,6 @@ extern unsigned long mmap_vstart;
  16.142  static char *blkif_op_name[] = {
  16.143      [BLKIF_OP_READ]       = "READ",
  16.144      [BLKIF_OP_WRITE]      = "WRITE",
  16.145 -    [BLKIF_OP_PROBE]      = "PROBE",
  16.146  };
  16.147  
  16.148  #endif /* __COMPILING_BLKTAP_LIB */
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/tools/blktap/list.h	Sun Sep 04 21:19:44 2005 +0000
    17.3 @@ -0,0 +1,55 @@
    17.4 +/*
    17.5 + * list.h
    17.6 + * 
    17.7 + * This is a subset of linux's list.h intended to be used in user-space.
    17.8 + * 
    17.9 + */
   17.10 +
   17.11 +#ifndef __LIST_H__
   17.12 +#define __LIST_H__
   17.13 +
   17.14 +#define LIST_POISON1  ((void *) 0x00100100)
   17.15 +#define LIST_POISON2  ((void *) 0x00200200)
   17.16 +
   17.17 +struct list_head {
   17.18 +        struct list_head *next, *prev;
   17.19 +};
   17.20 + 
   17.21 +#define LIST_HEAD_INIT(name) { &(name), &(name) }
   17.22 + 
   17.23 +#define LIST_HEAD(name) \
   17.24 +        struct list_head name = LIST_HEAD_INIT(name)
   17.25 +
   17.26 +static inline void __list_add(struct list_head *new,
   17.27 +                              struct list_head *prev,
   17.28 +                              struct list_head *next)
   17.29 +{
   17.30 +        next->prev = new;
   17.31 +        new->next = next;
   17.32 +        new->prev = prev;
   17.33 +        prev->next = new;
   17.34 +}
   17.35 +
   17.36 +static inline void list_add(struct list_head *new, struct list_head *head)
   17.37 +{
   17.38 +        __list_add(new, head, head->next);
   17.39 +}
   17.40 +static inline void __list_del(struct list_head * prev, struct list_head * next)
   17.41 +{
   17.42 +        next->prev = prev;
   17.43 +        prev->next = next;
   17.44 +}
   17.45 +static inline void list_del(struct list_head *entry)
   17.46 +{
   17.47 +        __list_del(entry->prev, entry->next);
   17.48 +        entry->next = LIST_POISON1;
   17.49 +        entry->prev = LIST_POISON2;
   17.50 +}
   17.51 +#define list_entry(ptr, type, member)                                   \
   17.52 +        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
   17.53 +#define list_for_each_entry(pos, head, member)                          \
   17.54 +        for (pos = list_entry((head)->next, typeof(*pos), member);      \
   17.55 +             &pos->member != (head);                                    \
   17.56 +             pos = list_entry(pos->member.next, typeof(*pos), member))
   17.57 +
   17.58 +#endif /* __LIST_H__ */
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/tools/blktap/ublkback/Makefile	Sun Sep 04 21:19:44 2005 +0000
    18.3 @@ -0,0 +1,42 @@
    18.4 +
    18.5 +XEN_ROOT = ../../..
    18.6 +include $(XEN_ROOT)/tools/Rules.mk
    18.7 +
    18.8 +INCLUDES += -I..
    18.9 +
   18.10 +INSTALL            = install
   18.11 +INSTALL_PROG = $(INSTALL) -m0755
   18.12 +IBIN         = ublkback
   18.13 +INSTALL_DIR  = /usr/sbin
   18.14 +
   18.15 +CFLAGS   += -Wall
   18.16 +CFLAGS   += -Werror
   18.17 +CFLAGS   += -Wno-unused
   18.18 +#CFLAGS   += -O3
   18.19 +CFLAGS   += -g3
   18.20 +CFLAGS   += -fno-strict-aliasing
   18.21 +CFLAGS   += -I $(XEN_LIBXC)
   18.22 +CFLAGS   += $(INCLUDES) -I.
   18.23 +CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
   18.24 +# Get gcc to generate the dependencies for us.
   18.25 +CFLAGS   += -Wp,-MD,.$(@F).d
   18.26 +DEPS     = .*.d
   18.27 +
   18.28 +OBJS     = $(patsubst %.c,%.o,$(SRCS))
   18.29 +
   18.30 +all: $(IBIN)
   18.31 +
   18.32 +LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
   18.33 +
   18.34 +install:
   18.35 +	$(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR)
   18.36 +clean:
   18.37 +	rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN)
   18.38 +
   18.39 +ublkback: 
   18.40 +	$(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L..  \
   18.41 +	      -lblktap -laio ublkback.c ublkbacklib.c -pg
   18.42 +
   18.43 +.PHONY: clean install
   18.44 +
   18.45 +-include $(DEPS)
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/tools/blktap/ublkback/ublkback.c	Sun Sep 04 21:19:44 2005 +0000
    19.3 @@ -0,0 +1,18 @@
    19.4 +/* ublkback.c
    19.5 + *
    19.6 + * libaio-based userlevel backend.
    19.7 + */
    19.8 +
    19.9 +#include "blktaplib.h"
   19.10 +#include "ublkbacklib.h"
   19.11 +
   19.12 +
   19.13 +int main(int argc, char *argv[])
   19.14 +{
   19.15 +    ublkback_init();
   19.16 +    
   19.17 +    register_new_blkif_hook(ublkback_new_blkif);
   19.18 +    blktap_listen();
   19.19 +    
   19.20 +    return 0;
   19.21 +}
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/tools/blktap/ublkback/ublkbacklib.c	Sun Sep 04 21:19:44 2005 +0000
    20.3 @@ -0,0 +1,477 @@
    20.4 +/* ublkbacklib.c
    20.5 + *
    20.6 + * file/device image-backed block device -- using linux libaio.
    20.7 + * 
    20.8 + * (c) 2004 Andrew Warfield.
    20.9 + *
   20.10 + * Xend has been modified to use an amorfs:[fsid] disk tag.
   20.11 + * This will show up as device type (maj:240,min:0) = 61440.
   20.12 + *
   20.13 + * The fsid is placed in the sec_start field of the disk extent.
   20.14 + *
   20.15 + * NOTE: This doesn't work.  Grrr.
   20.16 + */
   20.17 +
   20.18 +#define _GNU_SOURCE
   20.19 +#define __USE_LARGEFILE64
   20.20 +
   20.21 +#include <stdio.h>
   20.22 +#include <stdlib.h>
   20.23 +#include <fcntl.h>
   20.24 +#include <string.h>
   20.25 +#include <db.h>       
   20.26 +#include <sys/stat.h>
   20.27 +#include <sys/types.h>
   20.28 +#include <sys/poll.h>
   20.29 +#include <unistd.h>
   20.30 +#include <errno.h>
   20.31 +#include <libaio.h>
   20.32 +#include <pthread.h>
   20.33 +#include <time.h>
   20.34 +#include <err.h>
   20.35 +#include "blktaplib.h"
   20.36 +
   20.37 +/* XXXX:  */
   20.38 +/* Current code just mounts this file/device to any requests that come in. */
   20.39 +//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
   20.40 +#define TMP_IMAGE_FILE_NAME "fc3.image"
   20.41 +
   20.42 +#define MAX_REQUESTS            64 /* must be synced with the blkif drivers. */
   20.43 +#define MAX_SEGMENTS_PER_REQ    11
   20.44 +#define SECTOR_SHIFT             9
   20.45 +#define MAX_AIO_REQS   (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
   20.46 +
   20.47 +#if 0
   20.48 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   20.49 +#else
   20.50 +#define DPRINTF(_f, _a...) ((void)0)
   20.51 +#endif
   20.52 +           
   20.53 +#if 1                                                                        
   20.54 +#define ASSERT(_p) \
   20.55 +    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
   20.56 +    __LINE__, __FILE__); *(int*)0=0; }
   20.57 +#else
   20.58 +#define ASSERT(_p) ((void)0)
   20.59 +#endif                                                                     
   20.60 +
   20.61 +/* Note on pending_reqs: I assume all reqs are queued before they start to 
   20.62 + * get filled.  so count of 0 is an unused record.
   20.63 + */
   20.64 +typedef struct {
   20.65 +    blkif_request_t  req;
   20.66 +    blkif_t         *blkif;
   20.67 +    int              count;
   20.68 +} pending_req_t;
   20.69 +
   20.70 +static pending_req_t    pending_list[MAX_REQUESTS];
   20.71 +static io_context_t  ctx;
   20.72 +static struct iocb  *iocb_free[MAX_AIO_REQS];
   20.73 +static int           iocb_free_count;
   20.74 +
   20.75 +/* ---[ Notification mecahnism ]--------------------------------------- */
   20.76 +
   20.77 +enum { 
   20.78 +    READ   = 0,
   20.79 +    WRITE  = 1
   20.80 +};
   20.81 +
   20.82 +static int aio_notify[2];
   20.83 +static volatile int aio_listening = 0;
   20.84 +static pthread_mutex_t notifier_sem = PTHREAD_MUTEX_INITIALIZER;
   20.85 +
   20.86 +static struct io_event aio_events[MAX_AIO_REQS];
   20.87 +static int             aio_event_count = 0;
   20.88 +
   20.89 +/* this is commented out in libaio.h for some reason. */
   20.90 +extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
   20.91 +
   20.92 +static void *notifier_thread(void *arg)
   20.93 +{
   20.94 +    int ret; 
   20.95 +    int msg = 0x00feeb00;
   20.96 +    
   20.97 +    DPRINTF("Notifier thread started.\n");
   20.98 +    for (;;) {
   20.99 +        pthread_mutex_lock(&notifier_sem);
  20.100 +        if ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0) {
  20.101 +            aio_event_count = ret;
  20.102 +            write(aio_notify[WRITE], &msg, sizeof(msg));
  20.103 +        } else {
  20.104 +                printf("[io_queue_wait error! %d]\n", errno);
  20.105 +                pthread_mutex_unlock(&notifier_sem);
  20.106 +        }
  20.107 +    }
  20.108 +}
  20.109 +
  20.110 +/* --- Talking to xenstore: ------------------------------------------- */
  20.111 +
  20.112 +int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done);
  20.113 +int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done);
  20.114 +
  20.115 +typedef struct image {
  20.116 +    /* These need to turn into an array/rbtree for multi-disk support. */
  20.117 +    int  fd;
  20.118 +    u64  fsid;
  20.119 +    blkif_vdev_t   vdevice;
  20.120 +    long int size;
  20.121 +    long int secsize;
  20.122 +    long int info;
  20.123 +} image_t;
  20.124 +
  20.125 +long int ublkback_get_size(blkif_t *blkif)
  20.126 +{
  20.127 +    image_t *img = (image_t *)blkif->prv;
  20.128 +    return img->size;
  20.129 +}
  20.130 +
  20.131 +long int ublkback_get_secsize(blkif_t *blkif)
  20.132 +{
  20.133 +    image_t *img = (image_t *)blkif->prv;
  20.134 +    return img->secsize;
  20.135 +}
  20.136 +
  20.137 +unsigned ublkback_get_info(blkif_t *blkif)
  20.138 +{
  20.139 +    image_t *img = (image_t *)blkif->prv;
  20.140 +    return img->info;
  20.141 +}
  20.142 +
  20.143 +static struct blkif_ops ublkback_ops = {
  20.144 +    get_size:    ublkback_get_size,
  20.145 +    get_secsize: ublkback_get_secsize,
  20.146 +    get_info:    ublkback_get_info,
  20.147 +};
  20.148 +
  20.149 +int ublkback_new_blkif(blkif_t *blkif)
  20.150 +{
  20.151 +    image_t *image;
  20.152 +    struct stat stat;
  20.153 +    int ret;
  20.154 +
  20.155 +    image = (image_t *)malloc(sizeof(image_t));
  20.156 +    if (image == NULL) {
  20.157 +        printf("error allocating image record.\n");
  20.158 +        return -ENOMEM;
  20.159 +    }
  20.160 +
  20.161 +    /* Open it. */
  20.162 +    image->fd = open(TMP_IMAGE_FILE_NAME, 
  20.163 +                     O_RDWR | O_DIRECT | O_LARGEFILE);
  20.164 +
  20.165 +    if ((image->fd < 0) && (errno == EINVAL)) {
  20.166 +        /* Maybe O_DIRECT isn't supported. */
  20.167 +        warn("open() failed on '%s', trying again without O_DIRECT",
  20.168 +               TMP_IMAGE_FILE_NAME);
  20.169 +        image->fd = open(TMP_IMAGE_FILE_NAME, O_RDWR | O_LARGEFILE);
  20.170 +    }
  20.171 +
  20.172 +    if (image->fd < 0) {
  20.173 +        warn("Couldn't open image file!");
  20.174 +        free(image);
  20.175 +        return -EINVAL;
  20.176 +    }
  20.177 +
  20.178 +    /* Size it. */
  20.179 +    ret = fstat(image->fd, &stat);
  20.180 +    if (ret != 0) {
  20.181 +        printf("Couldn't stat image in PROBE!");
  20.182 +        return -EINVAL;
  20.183 +    }
  20.184 +    
  20.185 +    image->size = (stat.st_size >> SECTOR_SHIFT);
  20.186 +
  20.187 +    /* TODO: IOCTL to get size of raw device. */
  20.188 +/*
  20.189 +  ret = ioctl(img->fd, BLKGETSIZE, &blksize);
  20.190 +  if (ret != 0) {
  20.191 +  printf("Couldn't ioctl image in PROBE!\n");
  20.192 +  goto err;
  20.193 +  }
  20.194 +*/
  20.195 +    if (image->size == 0)
  20.196 +        image->size =((u64) 16836057);
  20.197 +    image->secsize = 512;
  20.198 +    image->info = 0;
  20.199 +
  20.200 +    /* Register the hooks */
  20.201 +    blkif_register_request_hook(blkif, "Ublkback req.", ublkback_request);
  20.202 +    blkif_register_response_hook(blkif, "Ublkback resp.", ublkback_response);
  20.203 +
  20.204 +
  20.205 +    printf(">X<Created a new blkif! pdev was %ld, but you got %s\n", 
  20.206 +           blkif->pdev, TMP_IMAGE_FILE_NAME);
  20.207 +
  20.208 +    blkif->ops = &ublkback_ops;
  20.209 +    blkif->prv = (void *)image;
  20.210 +
  20.211 +    return 0;
  20.212 +}
  20.213 +
  20.214 +
  20.215 +/* --- Moving the bits: ----------------------------------------------- */
  20.216 +
  20.217 +static int batch_count = 0;
  20.218 +int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done)
  20.219 +{
  20.220 +    int fd;
  20.221 +    u64 sector;
  20.222 +    char *spage, *dpage;
  20.223 +    int ret, i, idx;
  20.224 +    blkif_response_t *rsp;
  20.225 +    domid_t dom = ID_TO_DOM(req->id);
  20.226 +    static struct iocb *ioq[MAX_SEGMENTS_PER_REQ*MAX_REQUESTS]; 
  20.227 +    static int io_idx = 0;
  20.228 +    struct iocb *io;
  20.229 +    image_t *img;
  20.230 +
  20.231 +    img = (image_t *)blkif->prv;
  20.232 +    fd = img->fd;
  20.233 +
  20.234 +    switch (req->operation) 
  20.235 +    {
  20.236 +    case BLKIF_OP_WRITE:
  20.237 +    {
  20.238 +        unsigned long size;
  20.239 +        
  20.240 +        
  20.241 +        batch_count++;
  20.242 +
  20.243 +        idx = ID_TO_IDX(req->id);
  20.244 +        ASSERT(pending_list[idx].count == 0);
  20.245 +        memcpy(&pending_list[idx].req, req, sizeof(*req));
  20.246 +        pending_list[idx].count = req->nr_segments;
  20.247 +        pending_list[idx].blkif = blkif;
  20.248 +        
  20.249 +        for (i = 0; i < req->nr_segments; i++) {
  20.250 +            
  20.251 +            sector = req->sector_number + (8*i);
  20.252 +            
  20.253 +            size = blkif_last_sect (req->frame_and_sects[i]) -
  20.254 +                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  20.255 +            
  20.256 +            if (blkif_first_sect(req->frame_and_sects[i]) != 0)
  20.257 +            DPRINTF("iWR: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n",
  20.258 +                    req->sector_number, sector, 
  20.259 +                    blkif_first_sect(req->frame_and_sects[i]),
  20.260 +                    blkif_last_sect (req->frame_and_sects[i]),
  20.261 +                    (long)(sector << SECTOR_SHIFT));
  20.262 +                        
  20.263 +            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  20.264 +            spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  20.265 +            
  20.266 +            /*convert size and sector to byte offsets */
  20.267 +            size   <<= SECTOR_SHIFT;
  20.268 +            sector <<= SECTOR_SHIFT;
  20.269 +            
  20.270 +            io = iocb_free[--iocb_free_count];
  20.271 +            io_prep_pwrite(io, fd, spage, size, sector);
  20.272 +            io->data = (void *)idx;
  20.273 +            //ioq[i] = io;
  20.274 +            ioq[io_idx++] = io;
  20.275 +        }
  20.276 +
  20.277 +        if (batch_done) {
  20.278 +            ret = io_submit(ctx, io_idx, ioq);
  20.279 +            batch_count = 0;
  20.280 +            if (ret < 0)
  20.281 +                printf("BADNESS: io_submit error! (%d)\n", errno);
  20.282 +            io_idx = 0;
  20.283 +        }
  20.284 +        
  20.285 +        return BLKTAP_STOLEN;
  20.286 +        
  20.287 +    }
  20.288 +    case BLKIF_OP_READ:
  20.289 +    {
  20.290 +        unsigned long size;
  20.291 +        
  20.292 +        batch_count++;
  20.293 +        idx = ID_TO_IDX(req->id);
  20.294 +        ASSERT(pending_list[idx].count == 0);
  20.295 +        memcpy(&pending_list[idx].req, req, sizeof(*req));
  20.296 +        pending_list[idx].count = req->nr_segments;
  20.297 +        pending_list[idx].blkif = blkif;
  20.298 +        
  20.299 +        for (i = 0; i < req->nr_segments; i++) {
  20.300 +            
  20.301 +            sector  = req->sector_number + (8*i);
  20.302 +            
  20.303 +            size = blkif_last_sect (req->frame_and_sects[i]) -
  20.304 +                   blkif_first_sect(req->frame_and_sects[i]) + 1;
  20.305 +            
  20.306 +            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  20.307 +            dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT;
  20.308 +            
  20.309 +            if (blkif_first_sect(req->frame_and_sects[i]) != 0)
  20.310 +            DPRINTF("iRD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
  20.311 +                    "pos: %15lu dpage: %p\n", 
  20.312 +                    req->sector_number, sector, 
  20.313 +                    blkif_first_sect(req->frame_and_sects[i]),
  20.314 +                    blkif_last_sect (req->frame_and_sects[i]),
  20.315 +                    (long)(sector << SECTOR_SHIFT), dpage);
  20.316 +            
  20.317 +            /*convert size and sector to byte offsets */
  20.318 +            size   <<= SECTOR_SHIFT;
  20.319 +            sector <<= SECTOR_SHIFT;
  20.320 +            
  20.321 +
  20.322 +            /*
  20.323 +             * NB: Looks like AIO now has non-page aligned support, this path 
  20.324 +             * can probably be removed...  Only really used for hunting
  20.325 +             * superblocks anyway... ;)
  20.326 +             */
  20.327 +            if ( ((unsigned long)dpage % PAGE_SIZE) != 0 ) {
  20.328 +                /* AIO to raw devices must be page aligned, so do this read
  20.329 +                 * synchronously.  The OS is probably just looking for 
  20.330 +                 * a superblock or something, so this won't hurt performance. 
  20.331 +                 */
  20.332 +                int ret;
  20.333 +
  20.334 +                printf("Slow path block read.\n");
  20.335 +                /* Question: do in-progress aio ops modify the file cursor? */
  20.336 +                ret = lseek(fd, sector, SEEK_SET);
  20.337 +                if (ret == (off_t)-1)
  20.338 +                    printf("lseek failed!\n");
  20.339 +                ret = read(fd, dpage, size);
  20.340 +                if (ret < 0)
  20.341 +                    printf("read problem (%d)\n", ret);
  20.342 +                printf("|\n|\n| read: %lld, %lu, %d\n|\n|\n", sector, size, ret);
  20.343 +
  20.344 +                /* not an async request any more... */
  20.345 +                pending_list[idx].count--;
  20.346 +
  20.347 +                rsp = (blkif_response_t *)req;
  20.348 +                rsp->id = req->id;
  20.349 +                rsp->operation = BLKIF_OP_READ;
  20.350 +                rsp->status = BLKIF_RSP_OKAY;
  20.351 +                return BLKTAP_RESPOND;  
  20.352 +                /* Doh -- need to flush aio if this is end-of-batch */
  20.353 +            }
  20.354 +
  20.355 +            io = iocb_free[--iocb_free_count];
  20.356 +            
  20.357 +            io_prep_pread(io, fd, dpage, size, sector);
  20.358 +            io->data = (void *)idx;
  20.359 +            
  20.360 +            ioq[io_idx++] = io;
  20.361 +            //ioq[i] = io;
  20.362 +        }
  20.363 +        
  20.364 +        if (batch_done) {
  20.365 +            ret = io_submit(ctx, io_idx, ioq);
  20.366 +            batch_count = 0;
  20.367 +            if (ret < 0)
  20.368 +                printf("BADNESS: io_submit error! (%d)\n", errno);
  20.369 +            io_idx = 0;
  20.370 +        }
  20.371 +        
  20.372 +        return BLKTAP_STOLEN;
  20.373 +        
  20.374 +    }
  20.375 +    }
  20.376 +    
  20.377 +    printf("Unknown block operation!\n");
  20.378 +err:
  20.379 +    rsp = (blkif_response_t *)req;
  20.380 +    rsp->id = req->id;
  20.381 +    rsp->operation = req->operation;
  20.382 +    rsp->status = BLKIF_RSP_ERROR;
  20.383 +    return BLKTAP_RESPOND;  
  20.384 +}
  20.385 +
  20.386 +
  20.387 +int ublkback_pollhook(int fd)
  20.388 +{
  20.389 +    struct io_event *ep;
  20.390 +    int n, ret, idx;
  20.391 +    blkif_request_t *req;
  20.392 +    blkif_response_t *rsp;
  20.393 +    int responses_queued = 0;
  20.394 +    int pages=0;
  20.395 +    
  20.396 +    for (ep = aio_events; aio_event_count-- > 0; ep++) {
  20.397 +        struct iocb *io = ep->obj;
  20.398 +        idx = (int) ep->data;
  20.399 +        
  20.400 +        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
  20.401 +            printf("invalid index returned(%u)!\n", idx);
  20.402 +            break;
  20.403 +        }
  20.404 +        
  20.405 +        if ((int)ep->res < 0) 
  20.406 +            printf("***\n***aio request error! (%d,%d)\n***\n", 
  20.407 +                   (int)ep->res, (int)ep->res2);
  20.408 +        
  20.409 +        pending_list[idx].count--;
  20.410 +        iocb_free[iocb_free_count++] = io;
  20.411 +        pages++;
  20.412 +
  20.413 +        if (pending_list[idx].count == 0) {
  20.414 +            blkif_request_t tmp = pending_list[idx].req;
  20.415 +            rsp = (blkif_response_t *)&pending_list[idx].req;
  20.416 +            rsp->id = tmp.id;
  20.417 +            rsp->operation = tmp.operation;
  20.418 +            rsp->status = BLKIF_RSP_OKAY;
  20.419 +            blkif_inject_response(pending_list[idx].blkif, rsp);
  20.420 +            responses_queued++;
  20.421 +        }
  20.422 +    }
  20.423 +
  20.424 +    if (responses_queued) {
  20.425 +        blktap_kick_responses();
  20.426 +    }
  20.427 +    
  20.428 +    read(aio_notify[READ], &idx, sizeof(idx));
  20.429 +    aio_listening = 1;
  20.430 +    pthread_mutex_unlock(&notifier_sem);
  20.431 +    
  20.432 +    return 0;
  20.433 +}
  20.434 +
  20.435 +/* the image library terminates the request stream. _resp is a noop. */
  20.436 +int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done)
  20.437 +{   
  20.438 +    return BLKTAP_PASS;
  20.439 +}
  20.440 +
  20.441 +void ublkback_init(void)
  20.442 +{
  20.443 +    int i, rc;
  20.444 +    pthread_t p;
  20.445 +    
  20.446 +    for (i = 0; i < MAX_REQUESTS; i++)
  20.447 +        pending_list[i].count = 0; 
  20.448 +    
  20.449 +    memset(&ctx, 0, sizeof(ctx));
  20.450 +    rc = io_queue_init(MAX_AIO_REQS, &ctx);
  20.451 +    if (rc != 0) {
  20.452 +        printf("queue_init failed! (%d)\n", rc);
  20.453 +        exit(0);
  20.454 +    }
  20.455 +    
  20.456 +    for (i=0; i<MAX_AIO_REQS; i++) {
  20.457 +        if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
  20.458 +            printf("error allocating iocb array\n");
  20.459 +            exit(0);
  20.460 +        }
  20.461 +        iocb_free_count = i;
  20.462 +    }
  20.463 +    
  20.464 +    rc = pipe(aio_notify);
  20.465 +    if (rc != 0) {
  20.466 +        printf("pipe failed! (%d)\n", errno);
  20.467 +        exit(0);
  20.468 +    }
  20.469 +    
  20.470 +    rc = pthread_create(&p, NULL, notifier_thread, NULL);
  20.471 +    if (rc != 0) {
  20.472 +        printf("pthread_create failed! (%d)\n", errno);
  20.473 +        exit(0);
  20.474 +    }
  20.475 +    
  20.476 +    aio_listening = 1;
  20.477 +    
  20.478 +    blktap_attach_poll(aio_notify[READ], POLLIN, ublkback_pollhook);
  20.479 +}
  20.480 +
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/tools/blktap/ublkback/ublkbacklib.h	Sun Sep 04 21:19:44 2005 +0000
    21.3 @@ -0,0 +1,16 @@
    21.4 +/* blkaiolib.h
    21.5 + *
    21.6 + * aio image-backed block device.
    21.7 + * 
    21.8 + * (c) 2004 Andrew Warfield.
    21.9 + *
   21.10 + * Xend has been modified to use an amorfs:[fsid] disk tag.
   21.11 + * This will show up as device type (maj:240,min:0) = 61440.
   21.12 + *
   21.13 + * The fsid is placed in the sec_start field of the disk extent.
   21.14 + */
   21.15 +
   21.16 +int  ublkback_request(blkif_request_t *req, int batch_done);
   21.17 +int  ublkback_response(blkif_response_t *rsp); /* noop */
   21.18 +int  ublkback_new_blkif(blkif_t *blkif);
   21.19 +void ublkback_init(void);
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/tools/blktap/xenbus.c	Sun Sep 04 21:19:44 2005 +0000
    22.3 @@ -0,0 +1,578 @@
    22.4 +/*
    22.5 + * xenbus.c
    22.6 + * 
    22.7 + * xenbus interface to the blocktap.
    22.8 + * 
    22.9 + * this handles the top-half of integration with block devices through the
   22.10 + * store -- the tap driver negotiates the device channel etc, while the
   22.11 + * userland tap clinet needs to sort out the disk parameters etc.
   22.12 + * 
   22.13 + * A. Warfield 2005 Based primarily on the blkback and xenbus driver code.  
   22.14 + * Comments there apply here...
   22.15 + */
   22.16 +
   22.17 +#include <stdio.h>
   22.18 +#include <stdlib.h>
   22.19 +#include <string.h>
   22.20 +#include <err.h>
   22.21 +#include <stdarg.h>
   22.22 +#include <errno.h>
   22.23 +#include <xs.h>
   22.24 +#include <sys/types.h>
   22.25 +#include <sys/stat.h>
   22.26 +#include <fcntl.h>
   22.27 +#include <poll.h>
   22.28 +#include "blktaplib.h"
   22.29 +#include "list.h"
   22.30 +
   22.31 +#if 0
   22.32 +#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   22.33 +#else
   22.34 +#define DPRINTF(_f, _a...) ((void)0)
   22.35 +#endif
   22.36 +
   22.37 +/* --- Xenstore / Xenbus helpers ---------------------------------------- */
   22.38 +/*
   22.39 + * These should all be pulled out into the xenstore API.  I'm faulting commands
   22.40 + * in from the xenbus interface as i need them.
   22.41 + */
   22.42 +
   22.43 +
   22.44 +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
   22.45 +int xs_gather(struct xs_handle *xs, const char *dir, ...)
   22.46 +{
   22.47 +    va_list ap;
   22.48 +    const char *name;
   22.49 +    char *path;
   22.50 +    int ret = 0;
   22.51 +    
   22.52 +    va_start(ap, dir);
   22.53 +    while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
   22.54 +        const char *fmt = va_arg(ap, char *);
   22.55 +        void *result = va_arg(ap, void *);
   22.56 +        char *p;
   22.57 +        
   22.58 +        if (asprintf(&path, "%s/%s", dir, name) == -1)
   22.59 +        {
   22.60 +            warn("allocation error in xs_gather!\n");
   22.61 +            ret = ENOMEM;
   22.62 +            break;
   22.63 +        }
   22.64 +        p = xs_read(xs, path, NULL);
   22.65 +        free(path);
   22.66 +        if (p == NULL) {
   22.67 +            ret = ENOENT;
   22.68 +            break;
   22.69 +        }
   22.70 +        if (fmt) {
   22.71 +            if (sscanf(p, fmt, result) == 0)
   22.72 +                ret = EINVAL;
   22.73 +            free(p);
   22.74 +        } else
   22.75 +            *(char **)result = p;
   22.76 +    }
   22.77 +    va_end(ap);
   22.78 +    return ret;
   22.79 +}
   22.80 +
   22.81 +/* Single printf and write: returns -errno or 0. */
   22.82 +int xs_printf(struct xs_handle *h, const char *dir, const char *node, 
   22.83 +                  const char *fmt, ...)
   22.84 +{
   22.85 +        char *buf, *path;
   22.86 +        va_list ap;
   22.87 +        int ret;
   22.88 + 
   22.89 +        va_start(ap, fmt);
   22.90 +        ret = vasprintf(&buf, fmt, ap);
   22.91 +        va_end(ap);
   22.92 + 
   22.93 +        asprintf(&path, "%s/%s", dir, node);
   22.94 +
   22.95 +        if ((path == NULL) || (buf == NULL))
   22.96 +            return 0;
   22.97 +
   22.98 +        ret = xs_write(h, path, buf, strlen(buf)+1, O_CREAT);
   22.99 +
  22.100 +        free(buf);
  22.101 +        free(path);
  22.102 +
  22.103 +        return ret;
  22.104 +}
  22.105 +
  22.106 +
  22.107 +int xs_exists(struct xs_handle *h, const char *path)
  22.108 +{
  22.109 +    char **d;
  22.110 +    int num;
  22.111 +
  22.112 +    d = xs_directory(h, path, &num);
  22.113 +    if (d == NULL)
  22.114 +        return 0;
  22.115 +    free(d);
  22.116 +    return 1;
  22.117 +}
  22.118 +
  22.119 +
  22.120 +
  22.121 +/* This assumes that the domain name we are looking for is unique! */
  22.122 +char *get_dom_uuid(struct xs_handle *h, const char *name)
  22.123 +{
  22.124 +    char **e, *val, *uuid = NULL;
  22.125 +    int num, i, len;
  22.126 +    char *path;
  22.127 +
  22.128 +    e = xs_directory(h, "/domain", &num);
  22.129 +
  22.130 +    i=0;
  22.131 +    while (i < num) {
  22.132 +        asprintf(&path, "/domain/%s/name", e[i]);
  22.133 +        val = xs_read(h, path, &len);
  22.134 +        free(path);
  22.135 +        if (val == NULL)
  22.136 +            continue;
  22.137 +        if (strcmp(val, name) == 0) {
  22.138 +            /* match! */
  22.139 +            asprintf(&path, "/domain/%s/uuid", e[i]);
  22.140 +            uuid = xs_read(h, path, &len);
  22.141 +            free(val);
  22.142 +            free(path);
  22.143 +            break;
  22.144 +        }
  22.145 +        free(val);
  22.146 +        i++;
  22.147 +    }
  22.148 +
  22.149 +    free(e);
  22.150 +    return uuid;
  22.151 +}
  22.152 +
  22.153 +static int strsep_len(const char *str, char c, unsigned int len)
  22.154 +{
  22.155 +    unsigned int i;
  22.156 +    
  22.157 +    for (i = 0; str[i]; i++)
  22.158 +        if (str[i] == c) {
  22.159 +            if (len == 0)
  22.160 +                return i;
  22.161 +            len--;
  22.162 +        }
  22.163 +    return (len == 0) ? i : -ERANGE;
  22.164 +}
  22.165 +
  22.166 +
  22.167 +/* xenbus watches: */     
  22.168 +/* Register callback to watch this node. */
  22.169 +struct xenbus_watch
  22.170 +{
  22.171 +        struct list_head list;
  22.172 +        char *node;
  22.173 +        void (*callback)(struct xs_handle *h, 
  22.174 +                         struct xenbus_watch *, 
  22.175 +                         const  char *node);
  22.176 +};
  22.177 +
  22.178 +static LIST_HEAD(watches);
  22.179 +
  22.180 +/* A little paranoia: we don't just trust token. */
  22.181 +static struct xenbus_watch *find_watch(const char *token)
  22.182 +{
  22.183 +    struct xenbus_watch *i, *cmp;
  22.184 +    
  22.185 +    cmp = (void *)strtoul(token, NULL, 16);
  22.186 +    
  22.187 +    list_for_each_entry(i, &watches, list)
  22.188 +        if (i == cmp)
  22.189 +            return i;
  22.190 +    return NULL;
  22.191 +}
  22.192 +
  22.193 +/* Register callback to watch this node. like xs_watch, return 0 on failure */
  22.194 +int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
  22.195 +{
  22.196 +    /* Pointer in ascii is the token. */
  22.197 +    char token[sizeof(watch) * 2 + 1];
  22.198 +    int er;
  22.199 +    
  22.200 +    sprintf(token, "%lX", (long)watch);
  22.201 +    if (find_watch(token)) 
  22.202 +    {
  22.203 +        warn("watch collision!");
  22.204 +        return -EINVAL;
  22.205 +    }
  22.206 +    
  22.207 +    er = xs_watch(h, watch->node, token);
  22.208 +    if (er != 0) {
  22.209 +        list_add(&watch->list, &watches);
  22.210 +    } 
  22.211 +        
  22.212 +    return er;
  22.213 +}
  22.214 +
  22.215 +int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
  22.216 +{
  22.217 +    char token[sizeof(watch) * 2 + 1];
  22.218 +    int er;
  22.219 +    
  22.220 +    sprintf(token, "%lX", (long)watch);
  22.221 +    if (!find_watch(token))
  22.222 +    {
  22.223 +        warn("no such watch!");
  22.224 +        return -EINVAL;
  22.225 +    }
  22.226 +    
  22.227 +    
  22.228 +    er = xs_unwatch(h, watch->node, token);
  22.229 +    list_del(&watch->list);
  22.230 +    
  22.231 +    if (er == 0)
  22.232 +        warn("XENBUS Failed to release watch %s: %i",
  22.233 +             watch->node, er);
  22.234 +    return 0;
  22.235 +}
  22.236 +
  22.237 +/* Re-register callbacks to all watches. */
  22.238 +void reregister_xenbus_watches(struct xs_handle *h)
  22.239 +{
  22.240 +    struct xenbus_watch *watch;
  22.241 +    char token[sizeof(watch) * 2 + 1];
  22.242 +    
  22.243 +    list_for_each_entry(watch, &watches, list) {
  22.244 +        sprintf(token, "%lX", (long)watch);
  22.245 +        xs_watch(h, watch->node, token);
  22.246 +    }
  22.247 +}
  22.248 +
  22.249 +/* based on watch_thread() */
  22.250 +int xs_fire_next_watch(struct xs_handle *h)
  22.251 +{
  22.252 +    char **res;
  22.253 +    char *token;
  22.254 +    char *node = NULL;
  22.255 +    struct xenbus_watch *w;
  22.256 +    int er;
  22.257 +
  22.258 +    res = xs_read_watch(h);
  22.259 +    if (res == NULL) 
  22.260 +        return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */
  22.261 +
  22.262 +    node  = res[0];
  22.263 +    token = res[1];
  22.264 +
  22.265 +    er = xs_acknowledge_watch(h, token);
  22.266 +    if (er == 0)
  22.267 +        warn("Couldn't acknowledge watch (%s)", token);
  22.268 +
  22.269 +    w = find_watch(token);
  22.270 +    if (!w)
  22.271 +    {
  22.272 +        warn("unregistered watch fired");
  22.273 +        goto done;
  22.274 +    }
  22.275 +    w->callback(h, w, node);
  22.276 +
  22.277 + done:
  22.278 +    free(res);
  22.279 +    return 1;
  22.280 +}
  22.281 +
  22.282 +
  22.283 +
  22.284 +
  22.285 +/* ---------------------------------------------------------------------- */
  22.286 +
  22.287 +struct backend_info
  22.288 +{
  22.289 +    /* our communications channel */
  22.290 +    blkif_t *blkif;
  22.291 +    
  22.292 +    long int frontend_id;
  22.293 +    long int pdev;
  22.294 +    long int readonly;
  22.295 +    
  22.296 +    /* watch back end for changes */
  22.297 +    struct xenbus_watch backend_watch;
  22.298 +    char *backpath;
  22.299 +
  22.300 +    /* watch front end for changes */
  22.301 +    struct xenbus_watch watch;
  22.302 +    char *frontpath;
  22.303 +
  22.304 +    struct list_head list;
  22.305 +};
  22.306 +
  22.307 +static LIST_HEAD(belist);
  22.308 +
  22.309 +static struct backend_info *be_lookup_be(const char *bepath)
  22.310 +{
  22.311 +    struct backend_info *be;
  22.312 +
  22.313 +    list_for_each_entry(be, &belist, list)
  22.314 +        if (strcmp(bepath, be->backpath) == 0)
  22.315 +            return be;
  22.316 +    return (struct backend_info *)NULL;
  22.317 +}
  22.318 +
  22.319 +static int be_exists_be(const char *bepath)
  22.320 +{
  22.321 +    return ( be_lookup_be(bepath) != NULL );
  22.322 +}
  22.323 +
  22.324 +static struct backend_info *be_lookup_fe(const char *fepath)
  22.325 +{
  22.326 +    struct backend_info *be;
  22.327 +
  22.328 +    list_for_each_entry(be, &belist, list)
  22.329 +        if (strcmp(fepath, be->frontpath) == 0)
  22.330 +            return be;
  22.331 +    return (struct backend_info *)NULL;
  22.332 +}
  22.333 +
  22.334 +static int backend_remove(struct xs_handle *h, struct backend_info *be)
  22.335 +{
  22.336 +    /* Turn off watches. */
  22.337 +    if (be->watch.node)
  22.338 +        unregister_xenbus_watch(h, &be->watch);
  22.339 +    if (be->backend_watch.node)
  22.340 +        unregister_xenbus_watch(h, &be->backend_watch);
  22.341 +
  22.342 +    /* Unhook from be list. */
  22.343 +    list_del(&be->list);
  22.344 +
  22.345 +    /* Free everything else. */
  22.346 +    if (be->blkif)
  22.347 +        free_blkif(be->blkif);
  22.348 +    if (be->frontpath)
  22.349 +        free(be->frontpath);
  22.350 +    if (be->backpath)
  22.351 +        free(be->backpath);
  22.352 +    free(be);
  22.353 +    return 0;
  22.354 +}
  22.355 +
  22.356 +static void frontend_changed(struct xs_handle *h, struct xenbus_watch *w, 
  22.357 +                     const char *fepath_im)
  22.358 +{
  22.359 +    struct backend_info *be;
  22.360 +    char *fepath = NULL;
  22.361 +    int er;
  22.362 +
  22.363 +    be = be_lookup_fe(w->node);
  22.364 +    if (be == NULL)
  22.365 +    {
  22.366 +        warn("frontend changed called for nonexistent backend! (%s)", fepath);
  22.367 +        goto fail;
  22.368 +    }
  22.369 +    
  22.370 +    /* If other end is gone, delete ourself. */
  22.371 +    if (w->node && !xs_exists(h, be->frontpath)) {
  22.372 +        DPRINTF("DELETING BE: %s\n", be->backpath);
  22.373 +        backend_remove(h, be);
  22.374 +        return;
  22.375 +    }
  22.376 +
  22.377 +    if (be->blkif == NULL || (be->blkif->state == CONNECTED))
  22.378 +        return;
  22.379 +
  22.380 +    /* Supply the information about the device the frontend needs */
  22.381 +    er = xs_transaction_start(h, be->backpath);
  22.382 +    if (er == 0) {
  22.383 +        warn("starting transaction");
  22.384 +        goto fail;
  22.385 +    }
  22.386 +    
  22.387 +    er = xs_printf(h, be->backpath, "sectors", "%lu",
  22.388 +			    be->blkif->ops->get_size(be->blkif));
  22.389 +    if (er == 0) {
  22.390 +        warn("writing sectors");
  22.391 +        goto fail;
  22.392 +    }
  22.393 +    
  22.394 +    er = xs_printf(h, be->backpath, "info", "%u",
  22.395 +			    be->blkif->ops->get_info(be->blkif));
  22.396 +    if (er == 0) {
  22.397 +        warn("writing info");
  22.398 +        goto fail;
  22.399 +    }
  22.400 +    
  22.401 +    er = xs_printf(h, be->backpath, "sector-size", "%lu",
  22.402 +			    be->blkif->ops->get_secsize(be->blkif));
  22.403 +    if (er == 0) {
  22.404 +        warn("writing sector-size");
  22.405 +        goto fail;
  22.406 +    }
  22.407 +
  22.408 +    be->blkif->state = CONNECTED;
  22.409 +
  22.410 +    xs_transaction_end(h, 0);
  22.411 +
  22.412 +    return;
  22.413 +
  22.414 + fail:
  22.415 +    if (fepath)
  22.416 +        free(fepath);
  22.417 +}
  22.418 +
  22.419 +
  22.420 +static void backend_changed(struct xs_handle *h, struct xenbus_watch *w, 
  22.421 +                     const char *bepath_im)
  22.422 +{
  22.423 +    struct backend_info *be;
  22.424 +    char *path = NULL, *p;
  22.425 +    int len, er;
  22.426 +    long int pdev = 0, handle;
  22.427 +
  22.428 +    be = be_lookup_be(w->node);
  22.429 +    if (be == NULL)
  22.430 +    {
  22.431 +        warn("backend changed called for nonexistent backend! (%s)", w->node);
  22.432 +        goto fail;
  22.433 +    }
  22.434 +    
  22.435 +    er = xs_gather(h, be->backpath, "physical-device", "%li", &pdev, NULL);
  22.436 +    if (er != 0) 
  22.437 +        goto fail;
  22.438 +
  22.439 +    if (be->pdev && be->pdev != pdev) {
  22.440 +        warn("changing physical-device not supported");
  22.441 +        goto fail;
  22.442 +    }
  22.443 +    be->pdev = pdev;
  22.444 +
  22.445 +    asprintf(&path, "%s/%s", w->node, "read-only");
  22.446 +    if (xs_exists(h, path))
  22.447 +        be->readonly = 1;
  22.448 +
  22.449 +    if (be->blkif == NULL) {
  22.450 +        /* Front end dir is a number, which is used as the handle. */
  22.451 +        p = strrchr(be->frontpath, '/') + 1;
  22.452 +        handle = strtoul(p, NULL, 0);
  22.453 +
  22.454 +        be->blkif = alloc_blkif(be->frontend_id);
  22.455 +        if (be->blkif == NULL) 
  22.456 +            goto fail;
  22.457 +
  22.458 +        er = blkif_init(be->blkif, handle, be->pdev, be->readonly);
  22.459 +        if (er) 
  22.460 +            goto fail;
  22.461 +
  22.462 +        DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", w->node);
  22.463 +
  22.464 +        /* Pass in NULL node to skip exist test. */
  22.465 +        frontend_changed(h, &be->watch, NULL);
  22.466 +    }
  22.467 +
  22.468 + fail:
  22.469 +    if (path)
  22.470 +        free(path);
  22.471 +
  22.472 +}
  22.473 +
  22.474 +static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w, 
  22.475 +                         const char *bepath_im)
  22.476 +{
  22.477 +	struct backend_info *be = NULL;
  22.478 +	char *frontend = NULL, *bepath = NULL;
  22.479 +	int er, len;
  22.480 +
  22.481 +        bepath = strdup(bepath_im);
  22.482 +        if (!bepath)
  22.483 +            return;
  22.484 +        len = strsep_len(bepath, '/', 6);
  22.485 +        if (len < 0) 
  22.486 +            goto free_be;
  22.487 +        
  22.488 +        bepath[len] = '\0'; /*truncate the passed-in string with predjudice. */
  22.489 +
  22.490 +	be = malloc(sizeof(*be));
  22.491 +	if (!be) {
  22.492 +		warn("allocating backend structure");
  22.493 +		goto free_be;
  22.494 +	}
  22.495 +	memset(be, 0, sizeof(*be));
  22.496 +
  22.497 +	frontend = NULL;
  22.498 +	er = xs_gather(h, bepath,
  22.499 +                        "frontend-id", "%li", &be->frontend_id,
  22.500 +                        "frontend", NULL, &frontend,
  22.501 +                        NULL);
  22.502 +	if (er)
  22.503 +		goto free_be;
  22.504 +
  22.505 +	if (strlen(frontend) == 0 || !xs_exists(h, frontend)) {
  22.506 +            /* If we can't get a frontend path and a frontend-id,
  22.507 +             * then our bus-id is no longer valid and we need to
  22.508 +             * destroy the backend device.
  22.509 +             */
  22.510 +            DPRINTF("No frontend (%s)\n", frontend);
  22.511 +            goto free_be;
  22.512 +	}
  22.513 +
  22.514 +        /* Are we already tracking this device? */
  22.515 +        if (be_exists_be(bepath))
  22.516 +            goto free_be;
  22.517 +
  22.518 +        be->backpath = bepath;
  22.519 +	be->backend_watch.node = be->backpath;
  22.520 +	be->backend_watch.callback = backend_changed;
  22.521 +	er = register_xenbus_watch(h, &be->backend_watch);
  22.522 +	if (er == 0) {
  22.523 +		be->backend_watch.node = NULL;
  22.524 +		warn("error adding backend watch on %s", bepath);
  22.525 +		goto free_be;
  22.526 +	}
  22.527 +
  22.528 +	be->frontpath = frontend;
  22.529 +	be->watch.node = be->frontpath;
  22.530 +	be->watch.callback = frontend_changed;
  22.531 +	er = register_xenbus_watch(h, &be->watch);
  22.532 +	if (er == 0) {
  22.533 +		be->watch.node = NULL;
  22.534 +		warn("adding frontend watch on %s", be->frontpath);
  22.535 +		goto free_be;
  22.536 +	}
  22.537 +
  22.538 +        list_add(&be->list, &belist);
  22.539 +
  22.540 +        DPRINTF("[PROBE]: ADDED NEW DEVICE (%s)\n", bepath_im);
  22.541 +
  22.542 +	backend_changed(h, &be->backend_watch, bepath);
  22.543 +	return;
  22.544 +
  22.545 + free_be:
  22.546 +	if ((be) && (be->backend_watch.node))
  22.547 +            unregister_xenbus_watch(h, &be->backend_watch);
  22.548 +	if (frontend)
  22.549 +            free(frontend);
  22.550 +        if (bepath)
  22.551 +            free(bepath);
  22.552 +	free(be);
  22.553 +	return;
  22.554 +}
  22.555 +
  22.556 +
  22.557 +int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
  22.558 +{
  22.559 +    char *uuid, *path;
  22.560 +    struct xenbus_watch *vbd_watch;
  22.561 +    int er;
  22.562 +
  22.563 +    uuid = get_dom_uuid(h, domname);
  22.564 +
  22.565 +    DPRINTF("%s: %s\n", domname, (uuid != NULL) ? uuid : "[ not found! ]");
  22.566 +
  22.567 +    asprintf(&path, "/domain/%s/backend/vbd", uuid);
  22.568 +    if (path == NULL) 
  22.569 +        return -ENOMEM;
  22.570 +
  22.571 +    vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch));
  22.572 +    vbd_watch->node     = path;
  22.573 +    vbd_watch->callback = blkback_probe;
  22.574 +    er = register_xenbus_watch(h, vbd_watch);
  22.575 +    if (er == 0) {
  22.576 +        warn("Error adding vbd probe watch %s", path);
  22.577 +        return -EINVAL;
  22.578 +    }
  22.579 +
  22.580 +    return 0;
  22.581 +}