ia64/xen-unstable

changeset 2019:ae1ea693453c

bitkeeper revision 1.1108.33.22 (410d08e1nsOUOXoTW3KvoNJBKy1OYw)

Merge freefall.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into freefall.cl.cam.ac.uk:/auto/groups/xeno/users/cl349/BK/xeno.bk-26dom0
author cl349@freefall.cl.cam.ac.uk
date Sun Aug 01 15:14:41 2004 +0000 (2004-08-01)
parents 140b3d8d1286 c48edf8434c7
children 4fc62e2234f4
files .rootkeys linux-2.6.7-xen-sparse/arch/xen/Kconfig linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig linux-2.6.7-xen-sparse/drivers/xen/Makefile linux-2.6.7-xen-sparse/drivers/xen/blkback/Makefile linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6.7-xen-sparse/drivers/xen/blkback/common.h linux-2.6.7-xen-sparse/drivers/xen/blkback/control.c linux-2.6.7-xen-sparse/drivers/xen/blkback/interface.c linux-2.6.7-xen-sparse/drivers/xen/blkback/vbd.c linux-2.6.7-xen-sparse/drivers/xen/blkfront/Kconfig linux-2.6.7-xen-sparse/drivers/xen/blkfront/Makefile linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6.7-xen-sparse/drivers/xen/blkfront/block.h linux-2.6.7-xen-sparse/drivers/xen/blkfront/vbd.c linux-2.6.7-xen-sparse/drivers/xen/block/Kconfig linux-2.6.7-xen-sparse/drivers/xen/block/Makefile linux-2.6.7-xen-sparse/drivers/xen/block/block.c linux-2.6.7-xen-sparse/drivers/xen/block/block.h linux-2.6.7-xen-sparse/drivers/xen/block/vbd.c linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/xor.h
line diff
     1.1 --- a/.rootkeys	Sat Jul 31 22:24:26 2004 +0000
     1.2 +++ b/.rootkeys	Sun Aug 01 15:14:41 2004 +0000
     1.3 @@ -190,11 +190,17 @@ 40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6
     1.4  3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.7-xen-sparse/arch/xen/kernel/xen_proc.c
     1.5  4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.7-xen-sparse/drivers/char/mem.c
     1.6  40f56239Dp_vMTgz8TEbvo1hjHGc3w linux-2.6.7-xen-sparse/drivers/xen/Makefile
     1.7 -40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.7-xen-sparse/drivers/xen/block/Kconfig
     1.8 -40f562395atl9x4suKGhPkjqLOXESg linux-2.6.7-xen-sparse/drivers/xen/block/Makefile
     1.9 -40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.7-xen-sparse/drivers/xen/block/block.c
    1.10 -40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.7-xen-sparse/drivers/xen/block/block.h
    1.11 -40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.7-xen-sparse/drivers/xen/block/vbd.c
    1.12 +410d0893otFGghmv4dUXDUBBdY5aIA linux-2.6.7-xen-sparse/drivers/xen/blkback/Makefile
    1.13 +4087cf0d1XgMkooTZAiJS6NrcpLQNQ linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c
    1.14 +4087cf0dZadZ8r6CEt4fNN350Yle3A linux-2.6.7-xen-sparse/drivers/xen/blkback/common.h
    1.15 +4087cf0dxlh29iw0w-9rxOCEGCjPcw linux-2.6.7-xen-sparse/drivers/xen/blkback/control.c
    1.16 +4087cf0dbuoH20fMjNZjcgrRK-1msQ linux-2.6.7-xen-sparse/drivers/xen/blkback/interface.c
    1.17 +4087cf0dk97tacDzxfByWV7JifUYqA linux-2.6.7-xen-sparse/drivers/xen/blkback/vbd.c
    1.18 +40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.7-xen-sparse/drivers/xen/blkfront/Kconfig
    1.19 +40f562395atl9x4suKGhPkjqLOXESg linux-2.6.7-xen-sparse/drivers/xen/blkfront/Makefile
    1.20 +40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c
    1.21 +40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.7-xen-sparse/drivers/xen/blkfront/block.h
    1.22 +40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.7-xen-sparse/drivers/xen/blkfront/vbd.c
    1.23  40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.7-xen-sparse/drivers/xen/console/Makefile
    1.24  3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.7-xen-sparse/drivers/xen/console/console.c
    1.25  40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.7-xen-sparse/drivers/xen/evtchn/Makefile
     2.1 --- a/linux-2.6.7-xen-sparse/arch/xen/Kconfig	Sat Jul 31 22:24:26 2004 +0000
     2.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/Kconfig	Sun Aug 01 15:14:41 2004 +0000
     2.3 @@ -38,6 +38,11 @@ config XEN_PHYSDEV_ACCESS
     2.4  
     2.5  endmenu
     2.6  
     2.7 +# Xen's block device backend driver needs 2^12 pages
     2.8 +config FORCE_MAX_ZONEORDER
     2.9 +        int
    2.10 +        default "12" if XEN_PHYSDEV_ACCESS
    2.11 +        default "11" if !XEN_PHYSDEV_ACCESS
    2.12  
    2.13  #config VT
    2.14  #	bool
     3.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig	Sat Jul 31 22:24:26 2004 +0000
     3.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig	Sun Aug 01 15:14:41 2004 +0000
     3.3 @@ -10,6 +10,7 @@ CONFIG_NO_IDLE_HZ=y
     3.4  #
     3.5  CONFIG_XEN_PRIVILEGED_GUEST=y
     3.6  CONFIG_XEN_PHYSDEV_ACCESS=y
     3.7 +CONFIG_FORCE_MAX_ZONEORDER=12
     3.8  CONFIG_X86=y
     3.9  # CONFIG_X86_64 is not set
    3.10  
     4.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig	Sat Jul 31 22:24:26 2004 +0000
     4.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig	Sun Aug 01 15:14:41 2004 +0000
     4.3 @@ -10,6 +10,7 @@ CONFIG_NO_IDLE_HZ=y
     4.4  #
     4.5  # CONFIG_XEN_PRIVILEGED_GUEST is not set
     4.6  # CONFIG_XEN_PHYSDEV_ACCESS is not set
     4.7 +CONFIG_FORCE_MAX_ZONEORDER=11
     4.8  CONFIG_X86=y
     4.9  # CONFIG_X86_64 is not set
    4.10  
     5.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/Makefile	Sat Jul 31 22:24:26 2004 +0000
     5.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/Makefile	Sun Aug 01 15:14:41 2004 +0000
     5.3 @@ -1,9 +1,11 @@
     5.4  
     5.5  
     5.6 -obj-y	+= block/
     5.7 +obj-y	+= blkfront/
     5.8  obj-y	+= console/
     5.9  obj-y	+= evtchn/
    5.10  obj-y	+= netfront/
    5.11  obj-y	+= privcmd/
    5.12  
    5.13 +obj-$(CONFIG_XEN_PHYSDEV_ACCESS)	+= blkback/
    5.14  obj-$(CONFIG_XEN_PHYSDEV_ACCESS)	+= netback/
    5.15 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/Makefile	Sun Aug 01 15:14:41 2004 +0000
     6.3 @@ -0,0 +1,2 @@
     6.4 +
     6.5 +obj-y	:= blkback.o control.o interface.o vbd.o
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c	Sun Aug 01 15:14:41 2004 +0000
     7.3 @@ -0,0 +1,588 @@
     7.4 +/******************************************************************************
     7.5 + * arch/xen/drivers/blkif/backend/main.c
     7.6 + * 
     7.7 + * Back-end of the driver for virtual block devices. This portion of the
     7.8 + * driver exports a 'unified' block-device interface that can be accessed
     7.9 + * by any operating system that implements a compatible front end. A 
    7.10 + * reference front-end implementation can be found in:
    7.11 + *  arch/xen/drivers/blkif/frontend
    7.12 + * 
    7.13 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
    7.14 + */
    7.15 +
    7.16 +#include "common.h"
    7.17 +
    7.18 +/*
    7.19 + * These are rather arbitrary. They are fairly large because adjacent requests
    7.20 + * pulled from a communication ring are quite likely to end up being part of
    7.21 + * the same scatter/gather request at the disc.
    7.22 + * 
    7.23 + * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
    7.24 + * This will increase the chances of being able to write whole tracks.
    7.25 + * 64 should be enough to keep us competitive with Linux.
    7.26 + */
    7.27 +#define MAX_PENDING_REQS 64
    7.28 +#define BATCH_PER_DOMAIN 16
    7.29 +
    7.30 +/*
    7.31 + * NB. We place a page of padding between each buffer page to avoid incorrect
    7.32 + * merging of requests by the IDE and SCSI merging routines. Otherwise, two
    7.33 + * adjacent buffers in a scatter-gather request would have adjacent page
    7.34 + * numbers: since the merge routines don't realise that this is in *pseudophys*
    7.35 + * space, not real space, they may collapse the s-g elements!
    7.36 + */
    7.37 +static unsigned long mmap_vstart;
    7.38 +#define MMAP_PAGES_PER_REQUEST \
    7.39 +    (2 * (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1))
    7.40 +#define MMAP_PAGES             \
    7.41 +    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
    7.42 +#define MMAP_VADDR(_req,_seg)                        \
    7.43 +    (mmap_vstart +                                   \
    7.44 +     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
    7.45 +     ((_seg) * 2 * PAGE_SIZE))
    7.46 +
    7.47 +/*
    7.48 + * Each outstanding request that we've passed to the lower device layers has a 
    7.49 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 
    7.50 + * the pendcnt towards zero. When it hits zero, the specified domain has a 
    7.51 + * response queued for it, with the saved 'id' passed back.
    7.52 + */
    7.53 +typedef struct {
    7.54 +    blkif_t       *blkif;
    7.55 +    unsigned long  id;
    7.56 +    int            nr_pages;
    7.57 +    atomic_t       pendcnt;
    7.58 +    unsigned short operation;
    7.59 +    int            status;
    7.60 +} pending_req_t;
    7.61 +
    7.62 +/*
    7.63 + * We can't allocate pending_req's in order, since they may complete out of 
    7.64 + * order. We therefore maintain an allocation ring. This ring also indicates 
    7.65 + * when enough work has been passed down -- at that point the allocation ring 
    7.66 + * will be empty.
    7.67 + */
    7.68 +static pending_req_t pending_reqs[MAX_PENDING_REQS];
    7.69 +static unsigned char pending_ring[MAX_PENDING_REQS];
    7.70 +static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
    7.71 +/* NB. We use a different index type to differentiate from shared blk rings. */
    7.72 +typedef unsigned int PEND_RING_IDX;
    7.73 +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
    7.74 +static PEND_RING_IDX pending_prod, pending_cons;
    7.75 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
    7.76 +
    7.77 +#if 0
    7.78 +static kmem_cache_t *buffer_head_cachep;
    7.79 +#endif
    7.80 +
    7.81 +static int do_block_io_op(blkif_t *blkif, int max_to_do);
    7.82 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
    7.83 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
    7.84 +static void make_response(blkif_t *blkif, unsigned long id, 
    7.85 +                          unsigned short op, int st);
    7.86 +
    7.87 +static void fast_flush_area(int idx, int nr_pages)
    7.88 +{
    7.89 +    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
    7.90 +    int               i;
    7.91 +
    7.92 +    for ( i = 0; i < nr_pages; i++ )
    7.93 +    {
    7.94 +        mcl[i].op = __HYPERVISOR_update_va_mapping;
    7.95 +        mcl[i].args[0] = MMAP_VADDR(idx, i) >> PAGE_SHIFT;
    7.96 +        mcl[i].args[1] = 0;
    7.97 +        mcl[i].args[2] = 0;
    7.98 +    }
    7.99 +
   7.100 +    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
   7.101 +    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
   7.102 +        BUG();
   7.103 +}
   7.104 +
   7.105 +
   7.106 +/******************************************************************
   7.107 + * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
   7.108 + */
   7.109 +
   7.110 +static struct list_head blkio_schedule_list;
   7.111 +static spinlock_t blkio_schedule_list_lock;
   7.112 +
   7.113 +static int __on_blkdev_list(blkif_t *blkif)
   7.114 +{
   7.115 +    return blkif->blkdev_list.next != NULL;
   7.116 +}
   7.117 +
   7.118 +static void remove_from_blkdev_list(blkif_t *blkif)
   7.119 +{
   7.120 +    unsigned long flags;
   7.121 +    if ( !__on_blkdev_list(blkif) ) return;
   7.122 +    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
   7.123 +    if ( __on_blkdev_list(blkif) )
   7.124 +    {
   7.125 +        list_del(&blkif->blkdev_list);
   7.126 +        blkif->blkdev_list.next = NULL;
   7.127 +        blkif_put(blkif);
   7.128 +    }
   7.129 +    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
   7.130 +}
   7.131 +
   7.132 +static void add_to_blkdev_list_tail(blkif_t *blkif)
   7.133 +{
   7.134 +    unsigned long flags;
   7.135 +    if ( __on_blkdev_list(blkif) ) return;
   7.136 +    spin_lock_irqsave(&blkio_schedule_list_lock, flags);
   7.137 +    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
   7.138 +    {
   7.139 +        list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
   7.140 +        blkif_get(blkif);
   7.141 +    }
   7.142 +    spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
   7.143 +}
   7.144 +
   7.145 +
   7.146 +/******************************************************************
   7.147 + * SCHEDULER FUNCTIONS
   7.148 + */
   7.149 +
   7.150 +static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
   7.151 +
   7.152 +static int blkio_schedule(void *arg)
   7.153 +{
   7.154 +    DECLARE_WAITQUEUE(wq, current);
   7.155 +
   7.156 +    blkif_t          *blkif;
   7.157 +    struct list_head *ent;
   7.158 +
   7.159 +    for ( ; ; )
   7.160 +    {
   7.161 +        /* Wait for work to do. */
   7.162 +        add_wait_queue(&blkio_schedule_wait, &wq);
   7.163 +        set_current_state(TASK_INTERRUPTIBLE);
   7.164 +        if ( (NR_PENDING_REQS == MAX_PENDING_REQS) || 
   7.165 +             list_empty(&blkio_schedule_list) )
   7.166 +            schedule();
   7.167 +        __set_current_state(TASK_RUNNING);
   7.168 +        remove_wait_queue(&blkio_schedule_wait, &wq);
   7.169 +
   7.170 +        /* Queue up a batch of requests. */
   7.171 +        while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
   7.172 +                !list_empty(&blkio_schedule_list) )
   7.173 +        {
   7.174 +            ent = blkio_schedule_list.next;
   7.175 +            blkif = list_entry(ent, blkif_t, blkdev_list);
   7.176 +            blkif_get(blkif);
   7.177 +            remove_from_blkdev_list(blkif);
   7.178 +            if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
   7.179 +                add_to_blkdev_list_tail(blkif);
   7.180 +            blkif_put(blkif);
   7.181 +        }
   7.182 +        
   7.183 +#if 0				/* XXXcl tq */
   7.184 +        /* Push the batch through to disc. */
   7.185 +        run_task_queue(&tq_disk);
   7.186 +#endif
   7.187 +    }
   7.188 +}
   7.189 +
   7.190 +static void maybe_trigger_blkio_schedule(void)
   7.191 +{
   7.192 +    /*
   7.193 +     * Needed so that two processes, who together make the following predicate
   7.194 +     * true, don't both read stale values and evaluate the predicate
   7.195 +     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
   7.196 +     */
   7.197 +    smp_mb();
   7.198 +
   7.199 +    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
   7.200 +         !list_empty(&blkio_schedule_list) )
   7.201 +        wake_up(&blkio_schedule_wait);
   7.202 +}
   7.203 +
   7.204 +
   7.205 +
   7.206 +/******************************************************************
   7.207 + * COMPLETION CALLBACK -- Called as bh->b_end_io()
   7.208 + */
   7.209 +
   7.210 +static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
   7.211 +{
   7.212 +    unsigned long flags;
   7.213 +
   7.214 +    /* An error fails the entire request. */
   7.215 +    if ( !uptodate )
   7.216 +    {
   7.217 +        DPRINTK("Buffer not up-to-date at end of operation\n");
   7.218 +        pending_req->status = BLKIF_RSP_ERROR;
   7.219 +    }
   7.220 +
   7.221 +    if ( atomic_dec_and_test(&pending_req->pendcnt) )
   7.222 +    {
   7.223 +        int pending_idx = pending_req - pending_reqs;
   7.224 +        fast_flush_area(pending_idx, pending_req->nr_pages);
   7.225 +        make_response(pending_req->blkif, pending_req->id,
   7.226 +                      pending_req->operation, pending_req->status);
   7.227 +        blkif_put(pending_req->blkif);
   7.228 +        spin_lock_irqsave(&pend_prod_lock, flags);
   7.229 +        pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
   7.230 +        spin_unlock_irqrestore(&pend_prod_lock, flags);
   7.231 +        maybe_trigger_blkio_schedule();
   7.232 +    }
   7.233 +}
   7.234 +
   7.235 +static int end_block_io_op(struct bio *bio, unsigned int done, int error)
   7.236 +{
   7.237 +    if (done || error)		/* XXXcl */
   7.238 +	__end_block_io_op(bio->bi_private, done);
   7.239 +#if 0
   7.240 +    kmem_cache_free(buffer_head_cachep, bh);
   7.241 +#else
   7.242 +    bio_put(bio);
   7.243 +#endif
   7.244 +    return error;
   7.245 +}
   7.246 +
   7.247 +
   7.248 +
   7.249 +/******************************************************************************
   7.250 + * NOTIFICATION FROM GUEST OS.
   7.251 + */
   7.252 +
   7.253 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
   7.254 +{
   7.255 +    blkif_t *blkif = dev_id;
   7.256 +    add_to_blkdev_list_tail(blkif);
   7.257 +    maybe_trigger_blkio_schedule();
   7.258 +    return IRQ_HANDLED;
   7.259 +}
   7.260 +
   7.261 +
   7.262 +
   7.263 +/******************************************************************
   7.264 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
   7.265 + */
   7.266 +
   7.267 +static int do_block_io_op(blkif_t *blkif, int max_to_do)
   7.268 +{
   7.269 +    blkif_ring_t *blk_ring = blkif->blk_ring_base;
   7.270 +    blkif_request_t *req;
   7.271 +    BLKIF_RING_IDX i;
   7.272 +    int more_to_do = 0;
   7.273 +
   7.274 +    /* Take items off the comms ring, taking care not to overflow. */
   7.275 +    for ( i = blkif->blk_req_cons; 
   7.276 +          (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != 
   7.277 +                                        BLKIF_RING_SIZE);
   7.278 +          i++ )
   7.279 +    {
   7.280 +        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
   7.281 +        {
   7.282 +            more_to_do = 1;
   7.283 +            break;
   7.284 +        }
   7.285 +        
   7.286 +        req = &blk_ring->ring[MASK_BLKIF_IDX(i)].req;
   7.287 +        switch ( req->operation )
   7.288 +        {
   7.289 +        case BLKIF_OP_READ:
   7.290 +        case BLKIF_OP_WRITE:
   7.291 +            dispatch_rw_block_io(blkif, req);
   7.292 +            break;
   7.293 +
   7.294 +        case BLKIF_OP_PROBE:
   7.295 +            dispatch_probe(blkif, req);
   7.296 +            break;
   7.297 +
   7.298 +        default:
   7.299 +            DPRINTK("error: unknown block io operation [%d]\n",
   7.300 +                    blk_ring->ring[i].req.operation);
   7.301 +            make_response(blkif, blk_ring->ring[i].req.id, 
   7.302 +                          blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR);
   7.303 +            break;
   7.304 +        }
   7.305 +    }
   7.306 +
   7.307 +    blkif->blk_req_cons = i;
   7.308 +    return more_to_do;
   7.309 +}
   7.310 +
   7.311 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
   7.312 +{
   7.313 +    int rsp = BLKIF_RSP_ERROR;
   7.314 +    int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
   7.315 +
   7.316 +    /* We expect one buffer only. */
   7.317 +    if ( unlikely(req->nr_segments != 1) )
   7.318 +        goto out;
   7.319 +
   7.320 +    /* Make sure the buffer is page-sized. */
   7.321 +    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
   7.322 +         (blkif_last_sect(req->frame_and_sects[0]) != 7) )
   7.323 +        goto out;
   7.324 +
   7.325 +    if ( HYPERVISOR_update_va_mapping_otherdomain(
   7.326 +        MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT,
   7.327 +        (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
   7.328 +        0, blkif->domid) )
   7.329 +        goto out;
   7.330 +
   7.331 +    rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 
   7.332 +                    PAGE_SIZE / sizeof(vdisk_t));
   7.333 +
   7.334 + out:
   7.335 +    fast_flush_area(pending_idx, 1);
   7.336 +    make_response(blkif, req->id, req->operation, rsp);
   7.337 +}
   7.338 +
   7.339 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
   7.340 +{
   7.341 +    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
   7.342 +#if 0
   7.343 +    struct buffer_head *bh;
   7.344 +#else
   7.345 +    struct bio *bio;
   7.346 +#endif
   7.347 +    int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
   7.348 +    short nr_sects;
   7.349 +    unsigned long buffer, fas;
   7.350 +    int i, j, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
   7.351 +    pending_req_t *pending_req;
   7.352 +    unsigned long  remap_prot;
   7.353 +    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
   7.354 +
   7.355 +    /* We map virtual scatter/gather segments to physical segments. */
   7.356 +    int new_segs, nr_psegs = 0;
   7.357 +    phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST + 1];
   7.358 +
   7.359 +    /* Check that number of segments is sane. */
   7.360 +    if ( unlikely(req->nr_segments == 0) || 
   7.361 +         unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
   7.362 +    {
   7.363 +        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
   7.364 +        goto bad_descriptor;
   7.365 +    }
   7.366 +
   7.367 +    /*
   7.368 +     * Check each address/size pair is sane, and convert into a
   7.369 +     * physical device and block offset. Note that if the offset and size
   7.370 +     * crosses a virtual extent boundary, we may end up with more
   7.371 +     * physical scatter/gather segments than virtual segments.
   7.372 +     */
   7.373 +    for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
   7.374 +    {
   7.375 +        fas      = req->frame_and_sects[i];
   7.376 +        buffer   = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
   7.377 +        nr_sects = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
   7.378 +
   7.379 +        if ( nr_sects <= 0 )
   7.380 +            goto bad_descriptor;
   7.381 +
   7.382 +        phys_seg[nr_psegs].ps_device     = req->device;
   7.383 +        phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
   7.384 +        phys_seg[nr_psegs].buffer        = buffer;
   7.385 +        phys_seg[nr_psegs].nr_sects      = nr_sects;
   7.386 +
   7.387 +        /* Translate the request into the relevant 'physical device' */
   7.388 +        new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
   7.389 +        if ( new_segs < 0 )
   7.390 +        { 
   7.391 +            DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
   7.392 +                    operation == READ ? "read" : "write", 
   7.393 +                    req->sector_number + tot_sects, 
   7.394 +                    req->sector_number + tot_sects + nr_sects, 
   7.395 +                    req->device); 
   7.396 +            goto bad_descriptor;
   7.397 +        }
   7.398 +  
   7.399 +        nr_psegs += new_segs;
   7.400 +        ASSERT(nr_psegs <= (BLKIF_MAX_SEGMENTS_PER_REQUEST+1));
   7.401 +    }
   7.402 +
   7.403 +    /* Nonsensical zero-sized request? */
   7.404 +    if ( unlikely(nr_psegs == 0) )
   7.405 +        goto bad_descriptor;
   7.406 +
   7.407 +    if ( operation == READ )
   7.408 +        remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
   7.409 +    else
   7.410 +        remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
   7.411 +
   7.412 +    for ( i = 0; i < nr_psegs; i++ )
   7.413 +    {
   7.414 +        mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
   7.415 +        mcl[i].args[0] = MMAP_VADDR(pending_idx, i) >> PAGE_SHIFT;
   7.416 +        mcl[i].args[1] = (phys_seg[i].buffer & PAGE_MASK) | remap_prot;
   7.417 +        mcl[i].args[2] = 0;
   7.418 +        mcl[i].args[3] = blkif->domid;
   7.419 +
   7.420 +        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
   7.421 +            phys_seg[i].buffer >> PAGE_SHIFT;
   7.422 +    }
   7.423 +
   7.424 +    if ( unlikely(HYPERVISOR_multicall(mcl, nr_psegs) != 0) )
   7.425 +        BUG();
   7.426 +
   7.427 +    for ( i = 0; i < nr_psegs; i++ )
   7.428 +    {
   7.429 +        if ( unlikely(mcl[i].args[5] != 0) )
   7.430 +        {
   7.431 +            DPRINTK("invalid buffer -- could not remap it\n");
   7.432 +            fast_flush_area(pending_idx, nr_psegs);
   7.433 +            goto bad_descriptor;
   7.434 +        }
   7.435 +    }
   7.436 +
   7.437 +    pending_req = &pending_reqs[pending_idx];
   7.438 +    pending_req->blkif     = blkif;
   7.439 +    pending_req->id        = req->id;
   7.440 +    pending_req->operation = operation;
   7.441 +    pending_req->status    = BLKIF_RSP_OKAY;
   7.442 +    pending_req->nr_pages  = nr_psegs;
   7.443 +    atomic_set(&pending_req->pendcnt, nr_psegs);
   7.444 +    pending_cons++;
   7.445 +
   7.446 +    blkif_get(blkif);
   7.447 +
   7.448 +    /* Now we pass each segment down to the real blkdev layer. */
   7.449 +#if 0
   7.450 +    for ( i = 0; i < nr_psegs; i++ )
   7.451 +    {
   7.452 +        bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
   7.453 +        if ( unlikely(bh == NULL) )
   7.454 +        {
   7.455 +            __end_block_io_op(pending_req, 0);
   7.456 +            continue;		/* XXXcl continue!? */
   7.457 +        }
   7.458 +        memset(bh, 0, sizeof (struct buffer_head));
   7.459 +
   7.460 +        init_waitqueue_head(&bh->b_wait);
   7.461 +        bh->b_size          = phys_seg[i].nr_sects << 9;
   7.462 +        bh->b_dev           = phys_seg[i].dev;
   7.463 +        bh->b_rdev          = phys_seg[i].dev;
   7.464 +        bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
   7.465 +        bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) +
   7.466 +            (phys_seg[i].buffer & ~PAGE_MASK);
   7.467 +        bh->b_page          = virt_to_page(MMAP_VADDR(pending_idx, i));
   7.468 +        bh->b_end_io        = end_block_io_op;
   7.469 +        bh->b_private       = pending_req;
   7.470 +
   7.471 +        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | 
   7.472 +            (1 << BH_Req) | (1 << BH_Launder);
   7.473 +        if ( operation == WRITE )
   7.474 +            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
   7.475 +
   7.476 +        atomic_set(&bh->b_count, 1);
   7.477 +
   7.478 +        /* Dispatch a single request. We'll flush it to disc later. */
   7.479 +        generic_make_request(operation, bh);
   7.480 +    }
   7.481 +#else
   7.482 +    for ( i = 0; i < nr_psegs; i++ )
   7.483 +    {
   7.484 +	int nr_iovecs = PFN_UP(phys_seg[i].nr_sects << 9);
   7.485 +	ASSERT(nr_iovecs == 1);
   7.486 +	bio = bio_alloc(GFP_ATOMIC, nr_iovecs);
   7.487 +	if ( unlikely(bio == NULL) )
   7.488 +	{
   7.489 +	    __end_block_io_op(pending_req, 0);
   7.490 +	    break;
   7.491 +	}
   7.492 +	bio->bi_bdev = phys_seg[i].ps_bdev;
   7.493 +	bio->bi_private = pending_req;
   7.494 +	bio->bi_end_io = end_block_io_op;
   7.495 +	bio->bi_sector = phys_seg[i].sector_number;
   7.496 +	bio->bi_rw = operation;
   7.497 +
   7.498 +	bio->bi_size = 0;
   7.499 +
   7.500 +	for ( j = 0; j < nr_iovecs; j++ )
   7.501 +	{
   7.502 +	    struct bio_vec *bv = bio_iovec_idx(bio, j);
   7.503 +
   7.504 +	    bv->bv_page = virt_to_page(MMAP_VADDR(pending_idx, i));
   7.505 +	    bv->bv_len = phys_seg[i].nr_sects << 9;
   7.506 +	    bv->bv_offset = phys_seg[i].buffer & ~PAGE_MASK;
   7.507 +
   7.508 +	    bio->bi_size =+ bv->bv_len;
   7.509 +	    bio->bi_vcnt++;
   7.510 +	}
   7.511 +
   7.512 +	submit_bio(operation, bio);
   7.513 +    }
   7.514 +#endif
   7.515 +
   7.516 +    return;
   7.517 +
   7.518 + bad_descriptor:
   7.519 +    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
   7.520 +} 
   7.521 +
   7.522 +
   7.523 +
   7.524 +/******************************************************************
   7.525 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
   7.526 + */
   7.527 +
   7.528 +
   7.529 +static void make_response(blkif_t *blkif, unsigned long id, 
   7.530 +                          unsigned short op, int st)
   7.531 +{
   7.532 +    blkif_response_t *resp;
   7.533 +    unsigned long     flags;
   7.534 +
   7.535 +    /* Place on the response ring for the relevant domain. */ 
   7.536 +    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
   7.537 +    resp = &blkif->blk_ring_base->
   7.538 +        ring[MASK_BLKIF_IDX(blkif->blk_resp_prod)].resp;
   7.539 +    resp->id        = id;
   7.540 +    resp->operation = op;
   7.541 +    resp->status    = st;
   7.542 +    wmb();
   7.543 +    blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
   7.544 +    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
   7.545 +
   7.546 +    /* Kick the relevant domain. */
   7.547 +    notify_via_evtchn(blkif->evtchn);
   7.548 +}
   7.549 +
   7.550 +void blkif_deschedule(blkif_t *blkif)
   7.551 +{
   7.552 +    remove_from_blkdev_list(blkif);
   7.553 +}
   7.554 +
   7.555 +static int __init blkif_init(void)
   7.556 +{
   7.557 +    int i;
   7.558 +
   7.559 +    if ( !(start_info.flags & SIF_INITDOMAIN)
   7.560 +	 && !(start_info.flags & SIF_BLK_BE_DOMAIN) )
   7.561 +        return 0;
   7.562 +
   7.563 +    blkif_interface_init();
   7.564 +
   7.565 +    if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 )
   7.566 +        BUG();
   7.567 +
   7.568 +    pending_cons = 0;
   7.569 +    pending_prod = MAX_PENDING_REQS;
   7.570 +    memset(pending_reqs, 0, sizeof(pending_reqs));
   7.571 +    for ( i = 0; i < MAX_PENDING_REQS; i++ )
   7.572 +        pending_ring[i] = i;
   7.573 +    
   7.574 +    spin_lock_init(&blkio_schedule_list_lock);
   7.575 +    INIT_LIST_HEAD(&blkio_schedule_list);
   7.576 +
   7.577 +    if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
   7.578 +        BUG();
   7.579 +
   7.580 +#if 0
   7.581 +    buffer_head_cachep = kmem_cache_create(
   7.582 +        "buffer_head_cache", sizeof(struct buffer_head),
   7.583 +        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
   7.584 +#endif
   7.585 +
   7.586 +    blkif_ctrlif_init();
   7.587 +
   7.588 +    return 0;
   7.589 +}
   7.590 +
   7.591 +__initcall(blkif_init);
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/common.h	Sun Aug 01 15:14:41 2004 +0000
     8.3 @@ -0,0 +1,124 @@
     8.4 +/******************************************************************************
     8.5 + * arch/xen/drivers/blkif/backend/common.h
     8.6 + */
     8.7 +
     8.8 +#ifndef __BLKIF__BACKEND__COMMON_H__
     8.9 +#define __BLKIF__BACKEND__COMMON_H__
    8.10 +
    8.11 +#include <linux/config.h>
    8.12 +#include <linux/version.h>
    8.13 +#include <linux/module.h>
    8.14 +#include <linux/rbtree.h>
    8.15 +#include <linux/interrupt.h>
    8.16 +#include <linux/slab.h>
    8.17 +#include <linux/blkdev.h>
    8.18 +#include <asm-xen/ctrl_if.h>
    8.19 +#include <asm/io.h>
    8.20 +#include <asm/setup.h>
    8.21 +#include <asm/pgalloc.h>
    8.22 +
    8.23 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
    8.24 +#include <asm-xen/blkif.h>
    8.25 +#else
    8.26 +#include "../blkif.h"
    8.27 +#define irqreturn_t void
    8.28 +#define IRQ_HANDLED
    8.29 +#endif
    8.30 +
    8.31 +#if 0
    8.32 +#define ASSERT(_p) \
    8.33 +    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
    8.34 +    __LINE__, __FILE__); *(int*)0=0; }
    8.35 +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
    8.36 +                           __FILE__ , __LINE__ , ## _a )
    8.37 +#else
    8.38 +#define ASSERT(_p) ((void)0)
    8.39 +#define DPRINTK(_f, _a...) ((void)0)
    8.40 +#endif
    8.41 +
    8.42 +typedef struct blkif_st {
    8.43 +    /* Unique identifier for this interface. */
    8.44 +    domid_t          domid;
    8.45 +    unsigned int     handle;
    8.46 +    /* Physical parameters of the comms window. */
    8.47 +    unsigned long    shmem_frame;
    8.48 +    unsigned int     evtchn;
    8.49 +    int              irq;
    8.50 +    /* Comms information. */
    8.51 +    blkif_ring_t    *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
    8.52 +    BLKIF_RING_IDX     blk_req_cons;  /* Request consumer. */
    8.53 +    BLKIF_RING_IDX     blk_resp_prod; /* Private version of resp. producer. */
    8.54 +    /* VBDs attached to this interface. */
    8.55 +    struct rb_root   vbd_rb;        /* Mapping from 16-bit vdevices to VBDs. */
    8.56 +    spinlock_t       vbd_lock;      /* Protects VBD mapping. */
    8.57 +    /* Private fields. */
    8.58 +    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
    8.59 +    /*
    8.60 +     * DISCONNECT response is deferred until pending requests are ack'ed.
    8.61 +     * We therefore need to store the id from the original request.
    8.62 +     */
    8.63 +    u8               disconnect_rspid;
    8.64 +    struct blkif_st *hash_next;
    8.65 +    struct list_head blkdev_list;
    8.66 +    spinlock_t       blk_ring_lock;
    8.67 +    atomic_t         refcnt;
    8.68 +} blkif_t;
    8.69 +
    8.70 +void blkif_create(blkif_be_create_t *create);
    8.71 +void blkif_destroy(blkif_be_destroy_t *destroy);
    8.72 +void blkif_connect(blkif_be_connect_t *connect);
    8.73 +int  blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
    8.74 +void __blkif_disconnect_complete(blkif_t *blkif);
    8.75 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
    8.76 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
    8.77 +#define blkif_put(_b)                             \
    8.78 +    do {                                          \
    8.79 +        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
    8.80 +            __blkif_disconnect_complete(_b);      \
    8.81 +    } while (0)
    8.82 +
    8.83 +/* An entry in a list of xen_extents. */
    8.84 +typedef struct _blkif_extent_le { 
    8.85 +    blkif_extent_t extent;               /* an individual extent */
    8.86 +    struct _blkif_extent_le *next;       /* and a pointer to the next */ 
    8.87 +    struct block_device *bdev;
    8.88 +} blkif_extent_le_t; 
    8.89 +
    8.90 +typedef struct _vbd { 
    8.91 +    blkif_vdev_t       vdevice;   /* what the domain refers to this vbd as */
    8.92 +    unsigned char      readonly;  /* Non-zero -> read-only */
    8.93 +    unsigned char      type;      /* VDISK_TYPE_xxx */
    8.94 +    blkif_extent_le_t *extents;   /* list of xen_extents making up this vbd */
    8.95 +    struct rb_node     rb;        /* for linking into R-B tree lookup struct */
    8.96 +} vbd_t; 
    8.97 +
    8.98 +void vbd_create(blkif_be_vbd_create_t *create); 
    8.99 +void vbd_grow(blkif_be_vbd_grow_t *grow); 
   8.100 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink);
   8.101 +void vbd_destroy(blkif_be_vbd_destroy_t *delete); 
   8.102 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
   8.103 +void destroy_all_vbds(blkif_t *blkif);
   8.104 +
   8.105 +/* Describes a [partial] disk extent (part of a block io request) */
   8.106 +typedef struct {
   8.107 +    union {
   8.108 +	unsigned short dev;
   8.109 +	struct block_device *bdev;
   8.110 +    } _dev;
   8.111 +    unsigned short nr_sects;
   8.112 +    unsigned long  buffer;
   8.113 +    blkif_sector_t sector_number;
   8.114 +} phys_seg_t;
   8.115 +#define ps_device _dev.dev
   8.116 +#define ps_bdev _dev.bdev
   8.117 +
   8.118 +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 
   8.119 +
   8.120 +void blkif_interface_init(void);
   8.121 +void blkif_ctrlif_init(void);
   8.122 +
   8.123 +void blkif_deschedule(blkif_t *blkif);
   8.124 +
   8.125 +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
   8.126 +
   8.127 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/control.c	Sun Aug 01 15:14:41 2004 +0000
     9.3 @@ -0,0 +1,87 @@
     9.4 +/******************************************************************************
     9.5 + * arch/xen/drivers/blkif/backend/control.c
     9.6 + * 
     9.7 + * Routines for interfacing with the control plane.
     9.8 + * 
     9.9 + * Copyright (c) 2004, Keir Fraser
    9.10 + */
    9.11 +
    9.12 +#include "common.h"
    9.13 +
    9.14 +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
    9.15 +{
    9.16 +    DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype);
    9.17 +    
    9.18 +    switch ( msg->subtype )
    9.19 +    {
    9.20 +    case CMSG_BLKIF_BE_CREATE:
    9.21 +        if ( msg->length != sizeof(blkif_be_create_t) )
    9.22 +            goto parse_error;
    9.23 +        blkif_create((blkif_be_create_t *)&msg->msg[0]);
    9.24 +        break;        
    9.25 +    case CMSG_BLKIF_BE_DESTROY:
    9.26 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
    9.27 +            goto parse_error;
    9.28 +        blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
    9.29 +        break;        
    9.30 +    case CMSG_BLKIF_BE_CONNECT:
    9.31 +        if ( msg->length != sizeof(blkif_be_connect_t) )
    9.32 +            goto parse_error;
    9.33 +        blkif_connect((blkif_be_connect_t *)&msg->msg[0]);
    9.34 +        break;        
    9.35 +    case CMSG_BLKIF_BE_DISCONNECT:
    9.36 +        if ( msg->length != sizeof(blkif_be_disconnect_t) )
    9.37 +            goto parse_error;
    9.38 +        if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) )
    9.39 +            return; /* Sending the response is deferred until later. */
    9.40 +        break;        
    9.41 +    case CMSG_BLKIF_BE_VBD_CREATE:
    9.42 +        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
    9.43 +            goto parse_error;
    9.44 +        vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]);
    9.45 +        break;
    9.46 +    case CMSG_BLKIF_BE_VBD_DESTROY:
    9.47 +        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
    9.48 +            goto parse_error;
    9.49 +        vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]);
    9.50 +        break;
    9.51 +    case CMSG_BLKIF_BE_VBD_GROW:
    9.52 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
    9.53 +            goto parse_error;
    9.54 +        vbd_grow((blkif_be_vbd_grow_t *)&msg->msg[0]);
    9.55 +        break;
    9.56 +    case CMSG_BLKIF_BE_VBD_SHRINK:
    9.57 +        if ( msg->length != sizeof(blkif_be_vbd_shrink_t) )
    9.58 +            goto parse_error;
    9.59 +        vbd_shrink((blkif_be_vbd_shrink_t *)&msg->msg[0]);
    9.60 +        break;
    9.61 +    default:
    9.62 +        goto parse_error;
    9.63 +    }
    9.64 +
    9.65 +    ctrl_if_send_response(msg);
    9.66 +    return;
    9.67 +
    9.68 + parse_error:
    9.69 +    DPRINTK("Parse error while reading message subtype %d, len %d\n",
    9.70 +            msg->subtype, msg->length);
    9.71 +    msg->length = 0;
    9.72 +    ctrl_if_send_response(msg);
    9.73 +}
    9.74 +
    9.75 +void blkif_ctrlif_init(void)
    9.76 +{
    9.77 +    ctrl_msg_t                       cmsg;
    9.78 +    blkif_be_driver_status_changed_t st;
    9.79 +
    9.80 +    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
    9.81 +                                    CALLBACK_IN_BLOCKING_CONTEXT);
    9.82 +
    9.83 +    /* Send a driver-UP notification to the domain controller. */
    9.84 +    cmsg.type      = CMSG_BLKIF_BE;
    9.85 +    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED;
    9.86 +    cmsg.length    = sizeof(blkif_be_driver_status_changed_t);
    9.87 +    st.status      = BLKIF_DRIVER_STATUS_UP;
    9.88 +    memcpy(cmsg.msg, &st, sizeof(st));
    9.89 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
    9.90 +}
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/interface.c	Sun Aug 01 15:14:41 2004 +0000
    10.3 @@ -0,0 +1,239 @@
    10.4 +/******************************************************************************
    10.5 + * arch/xen/drivers/blkif/backend/interface.c
    10.6 + * 
    10.7 + * Block-device interface management.
    10.8 + * 
    10.9 + * Copyright (c) 2004, Keir Fraser
   10.10 + */
   10.11 +
   10.12 +#include "common.h"
   10.13 +
   10.14 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   10.15 +#define VMALLOC_VMADDR(x) ((unsigned long)(x))
   10.16 +#endif
   10.17 +
   10.18 +#define BLKIF_HASHSZ 1024
   10.19 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
   10.20 +
   10.21 +static kmem_cache_t *blkif_cachep;
   10.22 +static blkif_t      *blkif_hash[BLKIF_HASHSZ];
   10.23 +
   10.24 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
   10.25 +{
   10.26 +    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
   10.27 +    while ( (blkif != NULL) && 
   10.28 +            ((blkif->domid != domid) || (blkif->handle != handle)) )
   10.29 +        blkif = blkif->hash_next;
   10.30 +    return blkif;
   10.31 +}
   10.32 +
   10.33 +void __blkif_disconnect_complete(blkif_t *blkif)
   10.34 +{
   10.35 +    ctrl_msg_t            cmsg;
   10.36 +    blkif_be_disconnect_t disc;
   10.37 +
   10.38 +    /*
   10.39 +     * These can't be done in __blkif_disconnect() because at that point there
   10.40 +     * may be outstanding requests at the disc whose asynchronous responses
   10.41 +     * must still be notified to the remote driver.
   10.42 +     */
   10.43 +    unbind_evtchn_from_irq(blkif->evtchn);
   10.44 +    vfree(blkif->blk_ring_base);
   10.45 +
   10.46 +    /* Construct the deferred response message. */
   10.47 +    cmsg.type         = CMSG_BLKIF_BE;
   10.48 +    cmsg.subtype      = CMSG_BLKIF_BE_DISCONNECT;
   10.49 +    cmsg.id           = blkif->disconnect_rspid;
   10.50 +    cmsg.length       = sizeof(blkif_be_disconnect_t);
   10.51 +    disc.domid        = blkif->domid;
   10.52 +    disc.blkif_handle = blkif->handle;
   10.53 +    disc.status       = BLKIF_BE_STATUS_OKAY;
   10.54 +    memcpy(cmsg.msg, &disc, sizeof(disc));
   10.55 +
   10.56 +    /*
   10.57 +     * Make sure message is constructed /before/ status change, because
   10.58 +     * after the status change the 'blkif' structure could be deallocated at
   10.59 +     * any time. Also make sure we send the response /after/ status change,
   10.60 +     * as otherwise a subsequent CONNECT request could spuriously fail if
   10.61 +     * another CPU doesn't see the status change yet.
   10.62 +     */
   10.63 +    mb();
   10.64 +    if ( blkif->status != DISCONNECTING )
   10.65 +        BUG();
   10.66 +    blkif->status = DISCONNECTED;
   10.67 +    mb();
   10.68 +
   10.69 +    /* Send the successful response. */
   10.70 +    ctrl_if_send_response(&cmsg);
   10.71 +}
   10.72 +
   10.73 +void blkif_create(blkif_be_create_t *create)
   10.74 +{
   10.75 +    domid_t       domid  = create->domid;
   10.76 +    unsigned int  handle = create->blkif_handle;
   10.77 +    blkif_t     **pblkif, *blkif;
   10.78 +
   10.79 +    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
   10.80 +    {
   10.81 +        DPRINTK("Could not create blkif: out of memory\n");
   10.82 +        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   10.83 +        return;
   10.84 +    }
   10.85 +
   10.86 +    memset(blkif, 0, sizeof(*blkif));
   10.87 +    blkif->domid  = domid;
   10.88 +    blkif->handle = handle;
   10.89 +    blkif->status = DISCONNECTED;
   10.90 +    spin_lock_init(&blkif->vbd_lock);
   10.91 +    spin_lock_init(&blkif->blk_ring_lock);
   10.92 +    atomic_set(&blkif->refcnt, 0);
   10.93 +
   10.94 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   10.95 +    while ( *pblkif != NULL )
   10.96 +    {
   10.97 +        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   10.98 +        {
   10.99 +            DPRINTK("Could not create blkif: already exists\n");
  10.100 +            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
  10.101 +            kmem_cache_free(blkif_cachep, blkif);
  10.102 +            return;
  10.103 +        }
  10.104 +        pblkif = &(*pblkif)->hash_next;
  10.105 +    }
  10.106 +
  10.107 +    blkif->hash_next = *pblkif;
  10.108 +    *pblkif = blkif;
  10.109 +
  10.110 +    DPRINTK("Successfully created blkif\n");
  10.111 +    create->status = BLKIF_BE_STATUS_OKAY;
  10.112 +}
  10.113 +
  10.114 +void blkif_destroy(blkif_be_destroy_t *destroy)
  10.115 +{
  10.116 +    domid_t       domid  = destroy->domid;
  10.117 +    unsigned int  handle = destroy->blkif_handle;
  10.118 +    blkif_t     **pblkif, *blkif;
  10.119 +
  10.120 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
  10.121 +    while ( (blkif = *pblkif) != NULL )
  10.122 +    {
  10.123 +        if ( (blkif->domid == domid) && (blkif->handle == handle) )
  10.124 +        {
  10.125 +            if ( blkif->status != DISCONNECTED )
  10.126 +                goto still_connected;
  10.127 +            goto destroy;
  10.128 +        }
  10.129 +        pblkif = &blkif->hash_next;
  10.130 +    }
  10.131 +
  10.132 +    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  10.133 +    return;
  10.134 +
  10.135 + still_connected:
  10.136 +    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
  10.137 +    return;
  10.138 +
  10.139 + destroy:
  10.140 +    *pblkif = blkif->hash_next;
  10.141 +    destroy_all_vbds(blkif);
  10.142 +    kmem_cache_free(blkif_cachep, blkif);
  10.143 +    destroy->status = BLKIF_BE_STATUS_OKAY;
  10.144 +}
  10.145 +
  10.146 +void blkif_connect(blkif_be_connect_t *connect)
  10.147 +{
  10.148 +    domid_t       domid  = connect->domid;
  10.149 +    unsigned int  handle = connect->blkif_handle;
  10.150 +    unsigned int  evtchn = connect->evtchn;
  10.151 +    unsigned long shmem_frame = connect->shmem_frame;
  10.152 +    struct vm_struct *vma;
  10.153 +    pgprot_t      prot;
  10.154 +    int           error;
  10.155 +    blkif_t      *blkif;
  10.156 +
  10.157 +    blkif = blkif_find_by_handle(domid, handle);
  10.158 +    if ( unlikely(blkif == NULL) )
  10.159 +    {
  10.160 +        DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", 
  10.161 +                connect->domid, connect->blkif_handle); 
  10.162 +        connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  10.163 +        return;
  10.164 +    }
  10.165 +
  10.166 +    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
  10.167 +    {
  10.168 +        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
  10.169 +        return;
  10.170 +    }
  10.171 +
  10.172 +    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
  10.173 +    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
  10.174 +                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
  10.175 +                                    prot, domid);
  10.176 +    if ( error != 0 )
  10.177 +    {
  10.178 +        if ( error == -ENOMEM )
  10.179 +            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
  10.180 +        else if ( error == -EFAULT )
  10.181 +            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
  10.182 +        else
  10.183 +            connect->status = BLKIF_BE_STATUS_ERROR;
  10.184 +        vfree(vma->addr);
  10.185 +        return;
  10.186 +    }
  10.187 +
  10.188 +    if ( blkif->status != DISCONNECTED )
  10.189 +    {
  10.190 +        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
  10.191 +        vfree(vma->addr);
  10.192 +        return;
  10.193 +    }
  10.194 +
  10.195 +    blkif->evtchn        = evtchn;
  10.196 +    blkif->irq           = bind_evtchn_to_irq(evtchn);
  10.197 +    blkif->shmem_frame   = shmem_frame;
  10.198 +    blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
  10.199 +    blkif->status        = CONNECTED;
  10.200 +    blkif_get(blkif);
  10.201 +
  10.202 +    request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
  10.203 +
  10.204 +    connect->status = BLKIF_BE_STATUS_OKAY;
  10.205 +}
  10.206 +
  10.207 +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
  10.208 +{
  10.209 +    domid_t       domid  = disconnect->domid;
  10.210 +    unsigned int  handle = disconnect->blkif_handle;
  10.211 +    blkif_t      *blkif;
  10.212 +
  10.213 +    blkif = blkif_find_by_handle(domid, handle);
  10.214 +    if ( unlikely(blkif == NULL) )
  10.215 +    {
  10.216 +        DPRINTK("blkif_disconnect attempted for non-existent blkif"
  10.217 +                " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); 
  10.218 +        disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  10.219 +        return 1; /* Caller will send response error message. */
  10.220 +    }
  10.221 +
  10.222 +    if ( blkif->status == CONNECTED )
  10.223 +    {
  10.224 +        blkif->status = DISCONNECTING;
  10.225 +        blkif->disconnect_rspid = rsp_id;
  10.226 +        wmb(); /* Let other CPUs see the status change. */
  10.227 +        free_irq(blkif->irq, blkif);
  10.228 +        blkif_deschedule(blkif);
  10.229 +        blkif_put(blkif);
  10.230 +        return 0; /* Caller should not send response message. */
  10.231 +    }
  10.232 +
  10.233 +    disconnect->status = BLKIF_BE_STATUS_OKAY;
  10.234 +    return 1;
  10.235 +}
  10.236 +
  10.237 +void __init blkif_interface_init(void)
  10.238 +{
  10.239 +    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
  10.240 +                                     0, 0, NULL, NULL);
  10.241 +    memset(blkif_hash, 0, sizeof(blkif_hash));
  10.242 +}
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/vbd.c	Sun Aug 01 15:14:41 2004 +0000
    11.3 @@ -0,0 +1,540 @@
    11.4 +/******************************************************************************
    11.5 + * arch/xen/drivers/blkif/backend/vbd.c
    11.6 + * 
    11.7 + * Routines for managing virtual block devices (VBDs).
    11.8 + * 
    11.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   11.10 + */
   11.11 +
   11.12 +#include "common.h"
   11.13 +
   11.14 +static dev_t vbd_map_devnum(blkif_pdev_t);
   11.15 +
   11.16 +void vbd_create(blkif_be_vbd_create_t *create) 
   11.17 +{
   11.18 +    vbd_t       *vbd; 
   11.19 +    struct rb_node **rb_p, *rb_parent = NULL;
   11.20 +    blkif_t     *blkif;
   11.21 +    blkif_vdev_t vdevice = create->vdevice;
   11.22 +
   11.23 +    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
   11.24 +    if ( unlikely(blkif == NULL) )
   11.25 +    {
   11.26 +        DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n", 
   11.27 +                create->domid, create->blkif_handle); 
   11.28 +        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   11.29 +        return;
   11.30 +    }
   11.31 +
   11.32 +    spin_lock(&blkif->vbd_lock);
   11.33 +
   11.34 +    rb_p = &blkif->vbd_rb.rb_node;
   11.35 +    while ( *rb_p != NULL )
   11.36 +    {
   11.37 +        rb_parent = *rb_p;
   11.38 +        vbd = rb_entry(rb_parent, vbd_t, rb);
   11.39 +        if ( vdevice < vbd->vdevice )
   11.40 +        {
   11.41 +            rb_p = &rb_parent->rb_left;
   11.42 +        }
   11.43 +        else if ( vdevice > vbd->vdevice )
   11.44 +        {
   11.45 +            rb_p = &rb_parent->rb_right;
   11.46 +        }
   11.47 +        else
   11.48 +        {
   11.49 +            DPRINTK("vbd_create attempted for already existing vbd\n");
   11.50 +            create->status = BLKIF_BE_STATUS_VBD_EXISTS;
   11.51 +            goto out;
   11.52 +        }
   11.53 +    }
   11.54 +
   11.55 +    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
   11.56 +    {
   11.57 +        DPRINTK("vbd_create: out of memory\n");
   11.58 +        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   11.59 +        goto out;
   11.60 +    }
   11.61 +
   11.62 +    vbd->vdevice  = vdevice; 
   11.63 +    vbd->readonly = create->readonly;
   11.64 +    vbd->type     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   11.65 +    vbd->extents  = NULL; 
   11.66 +
   11.67 +    rb_link_node(&vbd->rb, rb_parent, rb_p);
   11.68 +    rb_insert_color(&vbd->rb, &blkif->vbd_rb);
   11.69 +
   11.70 +    DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
   11.71 +            vdevice, create->domid);
   11.72 +    create->status = BLKIF_BE_STATUS_OKAY;
   11.73 +
   11.74 + out:
   11.75 +    spin_unlock(&blkif->vbd_lock);
   11.76 +}
   11.77 +
   11.78 +
   11.79 +/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
   11.80 +void vbd_grow(blkif_be_vbd_grow_t *grow) 
   11.81 +{
   11.82 +    blkif_t            *blkif;
   11.83 +    blkif_extent_le_t **px, *x; 
   11.84 +    vbd_t              *vbd = NULL;
   11.85 +    struct rb_node     *rb;
   11.86 +    blkif_vdev_t        vdevice = grow->vdevice;
   11.87 +#if 0
   11.88 +    unsigned long       sz;
   11.89 +#endif
   11.90 +
   11.91 +
   11.92 +    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
   11.93 +    if ( unlikely(blkif == NULL) )
   11.94 +    {
   11.95 +        DPRINTK("vbd_grow attempted for non-existent blkif (%u,%u)\n", 
   11.96 +                grow->domid, grow->blkif_handle); 
   11.97 +        grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   11.98 +        return;
   11.99 +    }
  11.100 +
  11.101 +    spin_lock(&blkif->vbd_lock);
  11.102 +
  11.103 +    rb = blkif->vbd_rb.rb_node;
  11.104 +    while ( rb != NULL )
  11.105 +    {
  11.106 +        vbd = rb_entry(rb, vbd_t, rb);
  11.107 +        if ( vdevice < vbd->vdevice )
  11.108 +            rb = rb->rb_left;
  11.109 +        else if ( vdevice > vbd->vdevice )
  11.110 +            rb = rb->rb_right;
  11.111 +        else
  11.112 +            break;
  11.113 +    }
  11.114 +
  11.115 +    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
  11.116 +    {
  11.117 +        DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
  11.118 +        grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
  11.119 +        goto out;
  11.120 +    } 
  11.121 +
  11.122 +    if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), 
  11.123 +                               GFP_KERNEL)) == NULL) )
  11.124 +    {
  11.125 +        DPRINTK("vbd_grow: out of memory\n");
  11.126 +        grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
  11.127 +        goto out;
  11.128 +    }
  11.129 +
  11.130 +    x->extent.device        = grow->extent.device;
  11.131 +    /* XXXcl see comments at top of open_by_devnum */
  11.132 +#if 01
  11.133 +    x->bdev = open_by_devnum(vbd_map_devnum(x->extent.device),
  11.134 +			     vbd->readonly ? FMODE_READ : FMODE_WRITE);
  11.135 +#endif
  11.136 +    /* XXXcl maybe bd_claim? */
  11.137 +    x->extent.sector_start  = grow->extent.sector_start;
  11.138 +    x->extent.sector_length = grow->extent.sector_length;
  11.139 +    x->next                 = (blkif_extent_le_t *)NULL;
  11.140 +
  11.141 +#if 0
  11.142 +    if( !blk_size[MAJOR(x->extent.device)] )
  11.143 +    {
  11.144 +        DPRINTK("vbd_grow: device %08x doesn't exist.\n", x->extent.device);
  11.145 +	grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
  11.146 +	goto out;
  11.147 +    }
  11.148 +    
  11.149 +    /* convert blocks (1KB) to sectors */
  11.150 +    sz = blk_size[MAJOR(x->extent.device)][MINOR(x->extent.device)] * 2;    
  11.151 +#endif
  11.152 +
  11.153 +    if ( x->extent.sector_start > 0 )
  11.154 +    {
  11.155 +        DPRINTK("vbd_grow: device %08x start not zero!\n", x->extent.device);
  11.156 +	grow->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
  11.157 +	goto out;
  11.158 +    }
  11.159 +
  11.160 +#if 0
  11.161 +    /*
  11.162 +     * NB. This test assumes sector_start == 0, which is always the case
  11.163 +     * in Xen 1.3. In fact the whole grow/shrink interface could do with
  11.164 +     * some simplification.
  11.165 +     */
  11.166 +    if ( x->extent.sector_length > sz )
  11.167 +        x->extent.sector_length = sz;
  11.168 +    
  11.169 +    DPRINTK("vbd_grow: requested_len %llu actual_len %lu\n", 
  11.170 +            x->extent.sector_length, sz);
  11.171 +#endif
  11.172 +
  11.173 +    for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 
  11.174 +        continue;
  11.175 +    
  11.176 +    *px = x;
  11.177 +
  11.178 +    DPRINTK("Successful grow of vdev=%04x (dom=%u)\n",
  11.179 +            vdevice, grow->domid);
  11.180 +    
  11.181 +    grow->status = BLKIF_BE_STATUS_OKAY;
  11.182 +
  11.183 + out:
  11.184 +    spin_unlock(&blkif->vbd_lock);
  11.185 +}
  11.186 +
  11.187 +
  11.188 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink)
  11.189 +{
  11.190 +    blkif_t            *blkif;
  11.191 +    blkif_extent_le_t **px, *x; 
  11.192 +    vbd_t              *vbd = NULL;
  11.193 +    struct rb_node     *rb;
  11.194 +    blkif_vdev_t        vdevice = shrink->vdevice;
  11.195 +
  11.196 +    blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
  11.197 +    if ( unlikely(blkif == NULL) )
  11.198 +    {
  11.199 +        DPRINTK("vbd_shrink attempted for non-existent blkif (%u,%u)\n", 
  11.200 +                shrink->domid, shrink->blkif_handle); 
  11.201 +        shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  11.202 +        return;
  11.203 +    }
  11.204 +
  11.205 +    spin_lock(&blkif->vbd_lock);
  11.206 +
  11.207 +    rb = blkif->vbd_rb.rb_node;
  11.208 +    while ( rb != NULL )
  11.209 +    {
  11.210 +        vbd = rb_entry(rb, vbd_t, rb);
  11.211 +        if ( vdevice < vbd->vdevice )
  11.212 +            rb = rb->rb_left;
  11.213 +        else if ( vdevice > vbd->vdevice )
  11.214 +            rb = rb->rb_right;
  11.215 +        else
  11.216 +            break;
  11.217 +    }
  11.218 +
  11.219 +    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
  11.220 +    {
  11.221 +        shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
  11.222 +        goto out;
  11.223 +    }
  11.224 +
  11.225 +    if ( unlikely(vbd->extents == NULL) )
  11.226 +    {
  11.227 +        shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
  11.228 +        goto out;
  11.229 +    }
  11.230 +
  11.231 +    /* Find the last extent. We now know that there is at least one. */
  11.232 +    for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
  11.233 +        continue;
  11.234 +
  11.235 +    x   = *px;
  11.236 +    *px = x->next;
  11.237 +    kfree(x);
  11.238 +
  11.239 +    shrink->status = BLKIF_BE_STATUS_OKAY;
  11.240 +
  11.241 + out:
  11.242 +    spin_unlock(&blkif->vbd_lock);
  11.243 +}
  11.244 +
  11.245 +
  11.246 +void vbd_destroy(blkif_be_vbd_destroy_t *destroy) 
  11.247 +{
  11.248 +    blkif_t           *blkif;
  11.249 +    vbd_t             *vbd;
  11.250 +    struct rb_node    *rb;
  11.251 +    blkif_extent_le_t *x, *t;
  11.252 +    blkif_vdev_t       vdevice = destroy->vdevice;
  11.253 +
  11.254 +    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
  11.255 +    if ( unlikely(blkif == NULL) )
  11.256 +    {
  11.257 +        DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
  11.258 +                destroy->domid, destroy->blkif_handle); 
  11.259 +        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  11.260 +        return;
  11.261 +    }
  11.262 +
  11.263 +    spin_lock(&blkif->vbd_lock);
  11.264 +
  11.265 +    rb = blkif->vbd_rb.rb_node;
  11.266 +    while ( rb != NULL )
  11.267 +    {
  11.268 +        vbd = rb_entry(rb, vbd_t, rb);
  11.269 +        if ( vdevice < vbd->vdevice )
  11.270 +            rb = rb->rb_left;
  11.271 +        else if ( vdevice > vbd->vdevice )
  11.272 +            rb = rb->rb_right;
  11.273 +        else
  11.274 +            goto found;
  11.275 +    }
  11.276 +
  11.277 +    destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
  11.278 +    goto out;
  11.279 +
  11.280 + found:
  11.281 +    rb_erase(rb, &blkif->vbd_rb);
  11.282 +    x = vbd->extents;
  11.283 +    kfree(vbd);
  11.284 +
  11.285 +    while ( x != NULL )
  11.286 +    {
  11.287 +        t = x->next;
  11.288 +        kfree(x);
  11.289 +        x = t;
  11.290 +    }
  11.291 +    
  11.292 + out:
  11.293 +    spin_unlock(&blkif->vbd_lock);
  11.294 +}
  11.295 +
  11.296 +
  11.297 +void destroy_all_vbds(blkif_t *blkif)
  11.298 +{
  11.299 +    vbd_t *vbd;
  11.300 +    struct rb_node *rb;
  11.301 +    blkif_extent_le_t *x, *t;
  11.302 +
  11.303 +    spin_lock(&blkif->vbd_lock);
  11.304 +
  11.305 +    while ( (rb = blkif->vbd_rb.rb_node) != NULL )
  11.306 +    {
  11.307 +        vbd = rb_entry(rb, vbd_t, rb);
  11.308 +
  11.309 +        rb_erase(rb, &blkif->vbd_rb);
  11.310 +        x = vbd->extents;
  11.311 +        kfree(vbd);
  11.312 +        
  11.313 +        while ( x != NULL )
  11.314 +        {
  11.315 +            t = x->next;
  11.316 +            kfree(x);
  11.317 +            x = t;
  11.318 +        }          
  11.319 +    }
  11.320 +
  11.321 +    spin_unlock(&blkif->vbd_lock);
  11.322 +}
  11.323 +
  11.324 +
  11.325 +static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd)
  11.326 +{
  11.327 +    blkif_extent_le_t *x; 
  11.328 +
  11.329 +    vbd_info->device = vbd->vdevice; 
  11.330 +    vbd_info->info   = vbd->type;
  11.331 +    if ( vbd->readonly )
  11.332 +        vbd_info->info |= VDISK_FLAG_RO; 
  11.333 +    vbd_info->capacity = 0ULL;
  11.334 +    for ( x = vbd->extents; x != NULL; x = x->next )
  11.335 +        vbd_info->capacity += x->extent.sector_length; 
  11.336 +        
  11.337 +    return 0;
  11.338 +}
  11.339 +
  11.340 +
  11.341 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
  11.342 +{
  11.343 +    int rc = 0, nr_vbds = 0;
  11.344 +    struct rb_node *rb;
  11.345 +
  11.346 +    spin_lock(&blkif->vbd_lock);
  11.347 +
  11.348 +    if ( (rb = blkif->vbd_rb.rb_node) == NULL )
  11.349 +        goto out;
  11.350 +
  11.351 + new_subtree:
  11.352 +    /* STEP 1. Find least node (it'll be left-most). */
  11.353 +    while ( rb->rb_left != NULL )
  11.354 +        rb = rb->rb_left;
  11.355 +
  11.356 +    for ( ; ; )
  11.357 +    {
  11.358 +        /* STEP 2. Dealt with left subtree. Now process current node. */
  11.359 +        if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds], 
  11.360 +                                    rb_entry(rb, vbd_t, rb))) != 0 )
  11.361 +            goto out;
  11.362 +        if ( ++nr_vbds == max_vbds )
  11.363 +            goto out;
  11.364 +
  11.365 +        /* STEP 3. Process right subtree, if any. */
  11.366 +        if ( rb->rb_right != NULL )
  11.367 +        {
  11.368 +            rb = rb->rb_right;
  11.369 +            goto new_subtree;
  11.370 +        }
  11.371 +
  11.372 +        /* STEP 4. Done both subtrees. Head back through ancesstors. */
  11.373 +        for ( ; ; ) 
  11.374 +        {
  11.375 +            /* We're done when we get back to the root node. */
  11.376 +            if ( rb->rb_parent == NULL )
  11.377 +                goto out;
  11.378 +            /* If we are left of parent, then parent is next to process. */
  11.379 +            if ( rb->rb_parent->rb_left == rb )
  11.380 +                break;
  11.381 +            /* If we are right of parent, then we climb to grandparent. */
  11.382 +            rb = rb->rb_parent;
  11.383 +        }
  11.384 +
  11.385 +        rb = rb->rb_parent;
  11.386 +    }
  11.387 +
  11.388 + out:
  11.389 +    spin_unlock(&blkif->vbd_lock);
  11.390 +    return (rc == 0) ? nr_vbds : rc;  
  11.391 +}
  11.392 +
  11.393 +
  11.394 +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation)
  11.395 +{
  11.396 +    blkif_extent_le_t *x; 
  11.397 +    vbd_t             *vbd;
  11.398 +    struct rb_node    *rb;
  11.399 +    blkif_sector_t     sec_off;
  11.400 +    unsigned long      nr_secs;
  11.401 +
  11.402 +    spin_lock(&blkif->vbd_lock);
  11.403 +
  11.404 +    rb = blkif->vbd_rb.rb_node;
  11.405 +    while ( rb != NULL )
  11.406 +    {
  11.407 +        vbd = rb_entry(rb, vbd_t, rb);
  11.408 +        if ( pseg->ps_device < vbd->vdevice )
  11.409 +            rb = rb->rb_left;
  11.410 +        else if ( pseg->ps_device > vbd->vdevice )
  11.411 +            rb = rb->rb_right;
  11.412 +        else
  11.413 +            goto found;
  11.414 +    }
  11.415 +
  11.416 +    DPRINTK("vbd_translate; domain %u attempted to access "
  11.417 +            "non-existent VBD.\n", blkif->domid);
  11.418 +
  11.419 +    spin_unlock(&blkif->vbd_lock);
  11.420 +    return -ENODEV; 
  11.421 +
  11.422 + found:
  11.423 +
  11.424 +    if ( (operation == WRITE) && vbd->readonly )
  11.425 +    {
  11.426 +        spin_unlock(&blkif->vbd_lock);
  11.427 +        return -EACCES; 
  11.428 +    }
  11.429 +
  11.430 +    /*
  11.431 +     * Now iterate through the list of blkif_extents, working out which should 
  11.432 +     * be used to perform the translation.
  11.433 +     */
  11.434 +    sec_off = pseg->sector_number; 
  11.435 +    nr_secs = pseg->nr_sects;
  11.436 +    for ( x = vbd->extents; x != NULL; x = x->next )
  11.437 +    { 
  11.438 +        if ( sec_off < x->extent.sector_length )
  11.439 +        {
  11.440 +#if 0
  11.441 +            pseg->ps_device = x->extent.device;
  11.442 +#else
  11.443 +	    pseg->ps_bdev = x->bdev;
  11.444 +#endif
  11.445 +            pseg->sector_number = x->extent.sector_start + sec_off;
  11.446 +            if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) )
  11.447 +                goto overrun;
  11.448 +            spin_unlock(&blkif->vbd_lock);
  11.449 +            return 1;
  11.450 +        } 
  11.451 +        sec_off -= x->extent.sector_length; 
  11.452 +    }
  11.453 +
  11.454 +    DPRINTK("vbd_translate: end of vbd.\n");
  11.455 +    spin_unlock(&blkif->vbd_lock);
  11.456 +    return -EACCES; 
  11.457 +
  11.458 +    /*
  11.459 +     * Here we deal with overrun onto the following extent. We don't deal with 
  11.460 +     * overrun of more than one boundary since each request is restricted to 
  11.461 +     * 2^9 512-byte sectors, so it should be trivial for control software to 
  11.462 +     * ensure that extents are large enough to prevent excessive overrun.
  11.463 +     */
  11.464 + overrun:
  11.465 +
  11.466 +    /* Adjust length of first chunk to run to end of first extent. */
  11.467 +    pseg[0].nr_sects = x->extent.sector_length - sec_off;
  11.468 +
  11.469 +    /* Set second chunk buffer and length to start where first chunk ended. */
  11.470 +    pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
  11.471 +    pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
  11.472 +
  11.473 +    /* Now move to the next extent. Check it exists and is long enough! */
  11.474 +    if ( unlikely((x = x->next) == NULL) || 
  11.475 +         unlikely(x->extent.sector_length < pseg[1].nr_sects) )
  11.476 +    {
  11.477 +        DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
  11.478 +        spin_unlock(&blkif->vbd_lock);
  11.479 +        return -EACCES;
  11.480 +    }
  11.481 +
  11.482 +    /* Store the real device and start sector for the second chunk. */
  11.483 +#if 0
  11.484 +    pseg[1].ps_device     = x->extent.device;
  11.485 +#else
  11.486 +    pseg->ps_bdev         = x->bdev;
  11.487 +#endif
  11.488 +    pseg[1].sector_number = x->extent.sector_start;
  11.489 +    
  11.490 +    spin_unlock(&blkif->vbd_lock);
  11.491 +    return 2;
  11.492 +}
  11.493 +
  11.494 +#define MAJOR_XEN(dev)	((dev)>>8)
  11.495 +#define MINOR_XEN(dev)	((dev) & 0xff)
  11.496 +
  11.497 +#define	XEN_IDE0_MAJOR IDE0_MAJOR
  11.498 +#define	XEN_IDE1_MAJOR IDE1_MAJOR
  11.499 +#define	XEN_IDE2_MAJOR IDE2_MAJOR
  11.500 +#define	XEN_IDE3_MAJOR IDE3_MAJOR
  11.501 +#define	XEN_IDE4_MAJOR IDE4_MAJOR
  11.502 +#define	XEN_IDE5_MAJOR IDE5_MAJOR
  11.503 +#define	XEN_IDE6_MAJOR IDE6_MAJOR
  11.504 +#define	XEN_IDE7_MAJOR IDE7_MAJOR
  11.505 +#define	XEN_IDE8_MAJOR IDE8_MAJOR
  11.506 +#define	XEN_IDE9_MAJOR IDE9_MAJOR
  11.507 +#define	XEN_SCSI_DISK0_MAJOR SCSI_DISK0_MAJOR
  11.508 +#define	XEN_SCSI_DISK1_MAJOR SCSI_DISK1_MAJOR
  11.509 +#define	XEN_SCSI_DISK2_MAJOR SCSI_DISK2_MAJOR
  11.510 +#define	XEN_SCSI_DISK3_MAJOR SCSI_DISK3_MAJOR
  11.511 +#define	XEN_SCSI_DISK4_MAJOR SCSI_DISK4_MAJOR
  11.512 +#define	XEN_SCSI_DISK5_MAJOR SCSI_DISK5_MAJOR
  11.513 +#define	XEN_SCSI_DISK6_MAJOR SCSI_DISK6_MAJOR
  11.514 +#define	XEN_SCSI_DISK7_MAJOR SCSI_DISK7_MAJOR
  11.515 +#define	XEN_SCSI_CDROM_MAJOR SCSI_CDROM_MAJOR
  11.516 +
  11.517 +static dev_t vbd_map_devnum(blkif_pdev_t cookie)
  11.518 +{
  11.519 +    int new_major;
  11.520 +    int major = MAJOR_XEN(cookie);
  11.521 +    int minor = MINOR_XEN(cookie);
  11.522 +
  11.523 +    switch (major) {
  11.524 +    case XEN_IDE0_MAJOR: new_major = IDE0_MAJOR; break;
  11.525 +    case XEN_IDE1_MAJOR: new_major = IDE1_MAJOR; break;
  11.526 +    case XEN_IDE2_MAJOR: new_major = IDE2_MAJOR; break;
  11.527 +    case XEN_IDE3_MAJOR: new_major = IDE3_MAJOR; break;
  11.528 +    case XEN_IDE4_MAJOR: new_major = IDE4_MAJOR; break;
  11.529 +    case XEN_IDE5_MAJOR: new_major = IDE5_MAJOR; break;
  11.530 +    case XEN_IDE6_MAJOR: new_major = IDE6_MAJOR; break;
  11.531 +    case XEN_IDE7_MAJOR: new_major = IDE7_MAJOR; break;
  11.532 +    case XEN_IDE8_MAJOR: new_major = IDE8_MAJOR; break;
  11.533 +    case XEN_IDE9_MAJOR: new_major = IDE9_MAJOR; break;
  11.534 +    case XEN_SCSI_DISK0_MAJOR: new_major = SCSI_DISK0_MAJOR; break;
  11.535 +    case XEN_SCSI_DISK1_MAJOR ... XEN_SCSI_DISK7_MAJOR:
  11.536 +	new_major = SCSI_DISK1_MAJOR + major - XEN_SCSI_DISK1_MAJOR;
  11.537 +	break;
  11.538 +    case XEN_SCSI_CDROM_MAJOR: new_major = SCSI_CDROM_MAJOR; break;
  11.539 +    default: new_major = 0; break;
  11.540 +    }
  11.541 +
  11.542 +    return MKDEV(new_major, minor);
  11.543 +}
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/Kconfig	Sun Aug 01 15:14:41 2004 +0000
    12.3 @@ -0,0 +1,6 @@
    12.4 +
    12.5 +config XENBLOCK
    12.6 +	tristate "Block device driver"
    12.7 +	depends on ARCH_XEN
    12.8 +	help
    12.9 +	  Block device driver for Xen
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/Makefile	Sun Aug 01 15:14:41 2004 +0000
    13.3 @@ -0,0 +1,3 @@
    13.4 +
    13.5 +obj-y	:= blkfront.o vbd.o
    13.6 +
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c	Sun Aug 01 15:14:41 2004 +0000
    14.3 @@ -0,0 +1,653 @@
    14.4 +/******************************************************************************
    14.5 + * block.c
    14.6 + * 
    14.7 + * XenLinux virtual block-device driver.
    14.8 + * 
    14.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   14.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   14.11 + * Copyright (c) 2004, Christian Limpach
   14.12 + */
   14.13 +
   14.14 +#include "block.h"
   14.15 +#include <linux/cdrom.h>
   14.16 +#include <linux/sched.h>
   14.17 +#include <linux/interrupt.h>
   14.18 +#include <scsi/scsi.h>
   14.19 +#include <asm-xen/ctrl_if.h>
   14.20 +
   14.21 +typedef unsigned char byte; /* from linux/ide.h */
   14.22 +
   14.23 +#define BLKIF_STATE_CLOSED       0
   14.24 +#define BLKIF_STATE_DISCONNECTED 1
   14.25 +#define BLKIF_STATE_CONNECTED    2
   14.26 +static unsigned int blkif_state = BLKIF_STATE_CLOSED;
   14.27 +static unsigned int blkif_evtchn, blkif_irq;
   14.28 +
   14.29 +static int blkif_control_rsp_valid;
   14.30 +static blkif_response_t blkif_control_rsp;
   14.31 +
   14.32 +static blkif_ring_t *blk_ring;
   14.33 +static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
   14.34 +static BLKIF_RING_IDX req_prod;  /* Private request producer.         */
   14.35 +
   14.36 +static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for
   14.37 +                                    * recovery.  Responses not stored here. */
   14.38 +static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for
   14.39 +                                      * recovery */
   14.40 +static int recovery = 0;           /* "Recovery in progress" flag.  Protected
   14.41 +                                    * by the blkif_io_lock */
   14.42 +
   14.43 +/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   14.44 +#define	BLKIF_RING_FULL	(((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
   14.45 +			 (blkif_state != BLKIF_STATE_CONNECTED))
   14.46 +
   14.47 +/*
   14.48 + * Request queues with outstanding work, but ring is currently full.
   14.49 + * We need no special lock here, as we always access this with the
   14.50 + * blkif_io_lock held. We only need a small maximum list.
   14.51 + */
   14.52 +#define MAX_PENDING 8
   14.53 +static request_queue_t *pending_queues[MAX_PENDING];
   14.54 +static int nr_pending;
   14.55 +
   14.56 +static inline void flush_requests(void)
   14.57 +{
   14.58 +
   14.59 +        blk_ring->req_prod = req_prod;
   14.60 +
   14.61 +        notify_via_evtchn(blkif_evtchn);
   14.62 +}
   14.63 +
   14.64 +
   14.65 +#if 0
   14.66 +/*
   14.67 + * blkif_update_int/update-vbds_task - handle VBD update events.
   14.68 + *  Schedule a task for keventd to run, which will update the VBDs and perform 
   14.69 + *  the corresponding updates to our view of VBD state.
   14.70 + */
   14.71 +static struct tq_struct update_tq;
   14.72 +static void update_vbds_task(void *unused)
   14.73 +{ 
   14.74 +    xlvbd_update_vbds();
   14.75 +}
   14.76 +#endif
   14.77 +
   14.78 +
   14.79 +int blkif_open(struct inode *inode, struct file *filep)
   14.80 +{
   14.81 +	struct gendisk *gd = inode->i_bdev->bd_disk;
   14.82 +	struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
   14.83 +
   14.84 +	/* Update of usage count is protected by per-device semaphore. */
   14.85 +	di->mi->usage++;
   14.86 +
   14.87 +	return 0;
   14.88 +}
   14.89 +
   14.90 +
   14.91 +int blkif_release(struct inode *inode, struct file *filep)
   14.92 +{
   14.93 +	struct gendisk *gd = inode->i_bdev->bd_disk;
   14.94 +	struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
   14.95 +
   14.96 +	/*
   14.97 +	 * When usage drops to zero it may allow more VBD updates to occur.
   14.98 +	 * Update of usage count is protected by a per-device semaphore.
   14.99 +	 */
  14.100 +	if (--di->mi->usage == 0) {
  14.101 +#if 0
  14.102 +		update_tq.routine = update_vbds_task;
  14.103 +		schedule_task(&update_tq);
  14.104 +#endif
  14.105 +	}
  14.106 +
  14.107 +	return 0;
  14.108 +}
  14.109 +
  14.110 +
  14.111 +int blkif_ioctl(struct inode *inode, struct file *filep,
  14.112 +                          unsigned command, unsigned long argument)
  14.113 +{
  14.114 +	/*  struct gendisk *gd = inode->i_bdev->bd_disk; */
  14.115 +
  14.116 +	DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  14.117 +	    command, (long)argument, inode->i_rdev); 
  14.118 +  
  14.119 +	switch (command) {
  14.120 +
  14.121 +	case HDIO_GETGEO:
  14.122 +		/* return ENOSYS to use defaults */
  14.123 +		return -ENOSYS;
  14.124 +
  14.125 +	default:
  14.126 +		printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
  14.127 +		       command);
  14.128 +		return -ENOSYS;
  14.129 +	}
  14.130 +
  14.131 +	return 0;
  14.132 +}
  14.133 +
  14.134 +#if 0
  14.135 +/* check media change: should probably do something here in some cases :-) */
  14.136 +int blkif_check(kdev_t dev)
  14.137 +{
  14.138 +    DPRINTK("blkif_check\n");
  14.139 +    return 0;
  14.140 +}
  14.141 +
  14.142 +int blkif_revalidate(kdev_t dev)
  14.143 +{
  14.144 +    struct block_device *bd;
  14.145 +    struct gendisk *gd;
  14.146 +    xen_block_t *disk;
  14.147 +    unsigned long capacity;
  14.148 +    int i, rc = 0;
  14.149 +    
  14.150 +    if ( (bd = bdget(dev)) == NULL )
  14.151 +        return -EINVAL;
  14.152 +
  14.153 +    /*
  14.154 +     * Update of partition info, and check of usage count, is protected
  14.155 +     * by the per-block-device semaphore.
  14.156 +     */
  14.157 +    down(&bd->bd_sem);
  14.158 +
  14.159 +    if ( ((gd = get_gendisk(dev)) == NULL) ||
  14.160 +         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  14.161 +         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  14.162 +    {
  14.163 +        rc = -EINVAL;
  14.164 +        goto out;
  14.165 +    }
  14.166 +
  14.167 +    if ( disk->usage > 1 )
  14.168 +    {
  14.169 +        rc = -EBUSY;
  14.170 +        goto out;
  14.171 +    }
  14.172 +
  14.173 +    /* Only reread partition table if VBDs aren't mapped to partitions. */
  14.174 +    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  14.175 +    {
  14.176 +        for ( i = gd->max_p - 1; i >= 0; i-- )
  14.177 +        {
  14.178 +            invalidate_device(dev+i, 1);
  14.179 +            gd->part[MINOR(dev+i)].start_sect = 0;
  14.180 +            gd->part[MINOR(dev+i)].nr_sects   = 0;
  14.181 +            gd->sizes[MINOR(dev+i)]           = 0;
  14.182 +        }
  14.183 +
  14.184 +        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  14.185 +    }
  14.186 +
  14.187 + out:
  14.188 +    up(&bd->bd_sem);
  14.189 +    bdput(bd);
  14.190 +    return rc;
  14.191 +}
  14.192 +#endif
  14.193 +
  14.194 +
  14.195 +/*
  14.196 + * blkif_queue_request
  14.197 + *
  14.198 + * request block io 
  14.199 + * 
  14.200 + * id: for guest use only.
  14.201 + * operation: BLKIF_OP_{READ,WRITE,PROBE}
  14.202 + * buffer: buffer to read/write into. this should be a
  14.203 + *   virtual address in the guest os.
  14.204 + */
  14.205 +static int blkif_queue_request(struct request *req)
  14.206 +{
  14.207 +	struct xlbd_disk_info *di =
  14.208 +		(struct xlbd_disk_info *)req->rq_disk->private_data;
  14.209 +	unsigned long buffer_ma;
  14.210 +	blkif_request_t *ring_req;
  14.211 +	struct bio *bio;
  14.212 +	struct bio_vec *bvec;
  14.213 +	int idx, s;
  14.214 +        unsigned int fsect, lsect;
  14.215 +
  14.216 +        if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
  14.217 +                return 1;
  14.218 +
  14.219 +	/* Fill out a communications ring structure. */
  14.220 +	ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
  14.221 +	ring_req->id = (unsigned long)req;
  14.222 +	ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
  14.223 +		BLKIF_OP_READ;
  14.224 +	ring_req->sector_number = (blkif_sector_t)req->sector;
  14.225 +	ring_req->device = di->xd_device;
  14.226 +
  14.227 +	s = 0;
  14.228 +	ring_req->nr_segments = 0;
  14.229 +	rq_for_each_bio(bio, req) {
  14.230 +		bio_for_each_segment(bvec, bio, idx) {
  14.231 +			buffer_ma =
  14.232 +                                phys_to_machine(page_to_phys(bvec->bv_page));
  14.233 +			if (unlikely((buffer_ma & ((1<<9)-1)) != 0))
  14.234 +				BUG();
  14.235 +
  14.236 +                        fsect = bvec->bv_offset >> 9;
  14.237 +                        lsect = fsect + (bvec->bv_len >> 9) - 1;
  14.238 +                        if (unlikely(lsect > 7))
  14.239 +                                BUG();
  14.240 +
  14.241 +			ring_req->frame_and_sects[ring_req->nr_segments++] =
  14.242 +				buffer_ma | (fsect << 3) | lsect;
  14.243 +			s += bvec->bv_len >> 9;
  14.244 +		}
  14.245 +	}
  14.246 +
  14.247 +	req_prod++;
  14.248 +
  14.249 +        /* Keep a private copy so we can reissue requests when recovering. */
  14.250 +        blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req =
  14.251 +                *ring_req;
  14.252 +        blk_ring_rec->req_prod++;
  14.253 +
  14.254 +        return 0;
  14.255 +}
  14.256 +
  14.257 +/*
  14.258 + * do_blkif_request
  14.259 + *  read a block; request is in a request queue
  14.260 + */
  14.261 +void do_blkif_request(request_queue_t *rq)
  14.262 +{
  14.263 +	struct request *req;
  14.264 +	int queued;
  14.265 +
  14.266 +	DPRINTK("Entered do_blkif_request\n"); 
  14.267 +
  14.268 +	queued = 0;
  14.269 +
  14.270 +	while ((req = elv_next_request(rq)) != NULL) {
  14.271 +		if (!blk_fs_request(req)) {
  14.272 +			end_request(req, 0);
  14.273 +			continue;
  14.274 +		}
  14.275 +
  14.276 +		if (BLKIF_RING_FULL) {
  14.277 +			blk_stop_queue(rq);
  14.278 +			break;
  14.279 +		}
  14.280 +		DPRINTK("do_blkif_request %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
  14.281 +		    req, req->cmd, req->sector, req->current_nr_sectors,
  14.282 +		    req->nr_sectors, req->buffer,
  14.283 +		    rq_data_dir(req) ? "write" : "read");
  14.284 +                blkdev_dequeue_request(req);
  14.285 +		if (blkif_queue_request(req)) {
  14.286 +                        blk_stop_queue(rq);
  14.287 +                        break;
  14.288 +                }
  14.289 +		queued++;
  14.290 +	}
  14.291 +
  14.292 +	if (queued != 0)
  14.293 +		flush_requests();
  14.294 +}
  14.295 +
  14.296 +
  14.297 +static void kick_pending_request_queues(void)
  14.298 +{
  14.299 +    /* We kick pending request queues if the ring is reasonably empty. */
  14.300 +    if ( (nr_pending != 0) && 
  14.301 +         ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) )
  14.302 +    {
  14.303 +        /* Attempt to drain the queue, but bail if the ring becomes full. */
  14.304 +        while ( (nr_pending != 0) && !BLKIF_RING_FULL )
  14.305 +            do_blkif_request(pending_queues[--nr_pending]);
  14.306 +    }
  14.307 +}
  14.308 +
  14.309 +
  14.310 +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
  14.311 +{
  14.312 +	struct request *req;
  14.313 +	blkif_response_t *bret;
  14.314 +	BLKIF_RING_IDX i; 
  14.315 +	unsigned long flags; 
  14.316 +
  14.317 +	spin_lock_irqsave(&blkif_io_lock, flags);     
  14.318 +
  14.319 +        if (unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery)) {
  14.320 +                printk("Bailed out\n");
  14.321 +        
  14.322 +                spin_unlock_irqrestore(&blkif_io_lock, flags);
  14.323 +                return IRQ_HANDLED;
  14.324 +        }
  14.325 +
  14.326 +	for (i = resp_cons; i != blk_ring->resp_prod; i++) {
  14.327 +		bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
  14.328 +		switch (bret->operation) {
  14.329 +		case BLKIF_OP_READ:
  14.330 +		case BLKIF_OP_WRITE:
  14.331 +			if (unlikely(bret->status != BLKIF_RSP_OKAY))
  14.332 +				DPRINTK("Bad return from blkdev data request: %lx\n",
  14.333 +				    bret->status);
  14.334 +			req = (struct request *)bret->id;
  14.335 +                        /* XXXcl pass up status */
  14.336 +			if (unlikely(end_that_request_first(req, 1,
  14.337 +			    req->hard_nr_sectors)))
  14.338 +				BUG();
  14.339 +
  14.340 +			end_that_request_last(req);
  14.341 +			break;
  14.342 +                case BLKIF_OP_PROBE:
  14.343 +                        memcpy(&blkif_control_rsp, bret, sizeof(*bret));
  14.344 +                        blkif_control_rsp_valid = 1;
  14.345 +                        break;
  14.346 +     		default:
  14.347 +			BUG();
  14.348 +		}
  14.349 +	}
  14.350 +    
  14.351 +	resp_cons = i;
  14.352 +        resp_cons_rec = i;
  14.353 +
  14.354 +	if (xlbd_blk_queue &&
  14.355 +            test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags)) {
  14.356 +		blk_start_queue(xlbd_blk_queue);
  14.357 +		/* XXXcl call to request_fn should not be needed but
  14.358 +                 * we get stuck without...  needs investigating
  14.359 +		 */
  14.360 +		xlbd_blk_queue->request_fn(xlbd_blk_queue);
  14.361 +	}
  14.362 +
  14.363 +	spin_unlock_irqrestore(&blkif_io_lock, flags);
  14.364 +
  14.365 +	return IRQ_HANDLED;
  14.366 +}
  14.367 +
  14.368 +
  14.369 +void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
  14.370 +{
  14.371 +    unsigned long flags;
  14.372 +
  14.373 + retry:
  14.374 +    while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
  14.375 +    {
  14.376 +        set_current_state(TASK_INTERRUPTIBLE);
  14.377 +        schedule_timeout(1);
  14.378 +    }
  14.379 +
  14.380 +    spin_lock_irqsave(&blkif_io_lock, flags);
  14.381 +    if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
  14.382 +    {
  14.383 +        spin_unlock_irqrestore(&blkif_io_lock, flags);
  14.384 +        goto retry;
  14.385 +    }
  14.386 +
  14.387 +    memcpy(&blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req, req, sizeof(*req));
  14.388 +    memcpy(&blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,
  14.389 +           req, sizeof(*req));
  14.390 +    req_prod++;
  14.391 +    flush_requests();
  14.392 +
  14.393 +    spin_unlock_irqrestore(&blkif_io_lock, flags);
  14.394 +
  14.395 +    while ( !blkif_control_rsp_valid )
  14.396 +    {
  14.397 +        set_current_state(TASK_INTERRUPTIBLE);
  14.398 +        schedule_timeout(1);
  14.399 +    }
  14.400 +
  14.401 +    memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
  14.402 +    blkif_control_rsp_valid = 0;
  14.403 +}
  14.404 +
  14.405 +
  14.406 +static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
  14.407 +{
  14.408 +    ctrl_msg_t                   cmsg;
  14.409 +    blkif_fe_interface_connect_t up;
  14.410 +
  14.411 +    if ( status->handle != 0 )
  14.412 +    {
  14.413 +        printk(KERN_WARNING "Status change on unsupported blkif %d\n",
  14.414 +               status->handle);
  14.415 +        return;
  14.416 +    }
  14.417 +
  14.418 +    switch ( status->status )
  14.419 +    {
  14.420 +    case BLKIF_INTERFACE_STATUS_DESTROYED:
  14.421 +        printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n",
  14.422 +               blkif_state);
  14.423 +        break;
  14.424 +
  14.425 +    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
  14.426 +        if ( blkif_state != BLKIF_STATE_CLOSED )
  14.427 +        {
  14.428 +            printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message"
  14.429 +                   " in state %d\n", blkif_state);
  14.430 +
  14.431 +            printk(KERN_INFO "VBD driver recovery in progress\n");
  14.432 +            
  14.433 +            /* Prevent new requests being issued until we fix things up. */
  14.434 +            spin_lock_irq(&blkif_io_lock);
  14.435 +            recovery = 1;
  14.436 +            blkif_state = BLKIF_STATE_DISCONNECTED;
  14.437 +            spin_unlock_irq(&blkif_io_lock);
  14.438 +
  14.439 +            /* Free resources associated with old device channel. */
  14.440 +            free_page((unsigned long)blk_ring);
  14.441 +            free_irq(blkif_irq, NULL);
  14.442 +            unbind_evtchn_from_irq(blkif_evtchn);
  14.443 +        }
  14.444 +
  14.445 +        /* Move from CLOSED to DISCONNECTED state. */
  14.446 +        blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
  14.447 +        blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  14.448 +        blkif_state  = BLKIF_STATE_DISCONNECTED;
  14.449 +
  14.450 +        /* Construct an interface-CONNECT message for the domain controller. */
  14.451 +        cmsg.type      = CMSG_BLKIF_FE;
  14.452 +        cmsg.subtype   = CMSG_BLKIF_FE_INTERFACE_CONNECT;
  14.453 +        cmsg.length    = sizeof(blkif_fe_interface_connect_t);
  14.454 +        up.handle      = 0;
  14.455 +        up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
  14.456 +        memcpy(cmsg.msg, &up, sizeof(up));
  14.457 +        
  14.458 +        /* Tell the controller to bring up the interface. */
  14.459 +        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
  14.460 +        break;
  14.461 +
  14.462 +    case BLKIF_INTERFACE_STATUS_CONNECTED:
  14.463 +        if ( blkif_state == BLKIF_STATE_CLOSED )
  14.464 +        {
  14.465 +            printk(KERN_WARNING "Unexpected blkif-CONNECTED message"
  14.466 +                   " in state %d\n", blkif_state);
  14.467 +            break;
  14.468 +        }
  14.469 +
  14.470 +        blkif_evtchn = status->evtchn;
  14.471 +        blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
  14.472 +        (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
  14.473 +
  14.474 +        if ( recovery )
  14.475 +        {
  14.476 +            int i;
  14.477 +
  14.478 +	    /* Shouldn't need the blkif_io_lock here - the device is
  14.479 +	     * plugged and the recovery flag prevents the interrupt handler
  14.480 +	     * changing anything. */
  14.481 +
  14.482 +            /* Reissue requests from the private block ring. */
  14.483 +            for ( i = 0;
  14.484 +		  resp_cons_rec < blk_ring_rec->req_prod;
  14.485 +                  resp_cons_rec++, i++ )
  14.486 +            {
  14.487 +                blk_ring->ring[i].req
  14.488 +                    = blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req;
  14.489 +            }
  14.490 +
  14.491 +            /* Reset the private block ring to match the new ring. */
  14.492 +            memcpy(blk_ring_rec, blk_ring, sizeof(*blk_ring));
  14.493 +            resp_cons_rec = 0;
  14.494 +
  14.495 +            /* blk_ring->req_prod will be set when we flush_requests().*/
  14.496 +            blk_ring_rec->req_prod = req_prod = i;
  14.497 +
  14.498 +            wmb();
  14.499 +
  14.500 +            /* Switch off recovery mode, using a memory barrier to ensure that
  14.501 +             * it's seen before we flush requests - we don't want to miss any
  14.502 +             * interrupts. */
  14.503 +            recovery = 0;
  14.504 +            wmb();
  14.505 +
  14.506 +            /* Kicks things back into life. */
  14.507 +            flush_requests();
  14.508 +        }
  14.509 +        else
  14.510 +        {
  14.511 +            /* Probe for discs that are attached to the interface. */
  14.512 +            xlvbd_init();
  14.513 +        }
  14.514 +
  14.515 +        blkif_state = BLKIF_STATE_CONNECTED;
  14.516 +        
  14.517 +        /* Kick pending requests. */
  14.518 +        spin_lock_irq(&blkif_io_lock);
  14.519 +        kick_pending_request_queues();
  14.520 +        spin_unlock_irq(&blkif_io_lock);
  14.521 +
  14.522 +        break;
  14.523 +
  14.524 +    default:
  14.525 +        printk(KERN_WARNING "Status change to unknown value %d\n", 
  14.526 +               status->status);
  14.527 +        break;
  14.528 +    }
  14.529 +}
  14.530 +
  14.531 +
  14.532 +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
  14.533 +{
  14.534 +    switch ( msg->subtype )
  14.535 +    {
  14.536 +    case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
  14.537 +        if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) )
  14.538 +            goto parse_error;
  14.539 +        blkif_status_change((blkif_fe_interface_status_changed_t *)
  14.540 +                            &msg->msg[0]);
  14.541 +        break;        
  14.542 +#if 0
  14.543 +    case CMSG_BLKIF_FE_VBD_STATUS_CHANGED:
  14.544 +        update_tq.routine = update_vbds_task;
  14.545 +        schedule_task(&update_tq);
  14.546 +        break;
  14.547 +#endif
  14.548 +    default:
  14.549 +        goto parse_error;
  14.550 +    }
  14.551 +
  14.552 +    ctrl_if_send_response(msg);
  14.553 +    return;
  14.554 +
  14.555 + parse_error:
  14.556 +    msg->length = 0;
  14.557 +    ctrl_if_send_response(msg);
  14.558 +}
  14.559 +
  14.560 +
  14.561 +int __init xlblk_init(void)
  14.562 +{
  14.563 +    ctrl_msg_t                       cmsg;
  14.564 +    blkif_fe_driver_status_changed_t st;
  14.565 +
  14.566 +    if ( (start_info.flags & SIF_INITDOMAIN) 
  14.567 +        || (start_info.flags & SIF_BLK_BE_DOMAIN) )
  14.568 +        return 0;
  14.569 +
  14.570 +    printk(KERN_INFO "Initialising Xen virtual block device\n");
  14.571 +
  14.572 +    blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
  14.573 +    memset(blk_ring_rec, 0, sizeof(*blk_ring_rec));
  14.574 +
  14.575 +    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
  14.576 +                                    CALLBACK_IN_BLOCKING_CONTEXT);
  14.577 +
  14.578 +    /* Send a driver-UP notification to the domain controller. */
  14.579 +    cmsg.type      = CMSG_BLKIF_FE;
  14.580 +    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED;
  14.581 +    cmsg.length    = sizeof(blkif_fe_driver_status_changed_t);
  14.582 +    st.status      = BLKIF_DRIVER_STATUS_UP;
  14.583 +    memcpy(cmsg.msg, &st, sizeof(st));
  14.584 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
  14.585 +
  14.586 +    /*
  14.587 +     * We should read 'nr_interfaces' from response message and wait
  14.588 +     * for notifications before proceeding. For now we assume that we
  14.589 +     * will be notified of exactly one interface.
  14.590 +     */
  14.591 +    while ( blkif_state != BLKIF_STATE_CONNECTED )
  14.592 +    {
  14.593 +        set_current_state(TASK_INTERRUPTIBLE);
  14.594 +        schedule_timeout(1);
  14.595 +    }
  14.596 +
  14.597 +    return 0;
  14.598 +#if 0
  14.599 +	int error; 
  14.600 +
  14.601 +	reset_xlblk_interface();
  14.602 +
  14.603 +	xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
  14.604 +	xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
  14.605 +
  14.606 +	error = request_irq(xlblk_response_irq, xlblk_response_int, 
  14.607 +			    SA_SAMPLE_RANDOM, "blkdev", NULL);
  14.608 +	if (error) {
  14.609 +		printk(KERN_ALERT "Could not allocate receive interrupt\n");
  14.610 +		goto fail;
  14.611 +	}
  14.612 +
  14.613 +	error = request_irq(xlblk_update_irq, xlblk_update_int,
  14.614 +			    0, "blkdev", NULL);
  14.615 +	if (error) {
  14.616 +		printk(KERN_ALERT
  14.617 +		       "Could not allocate block update interrupt\n");
  14.618 +		goto fail;
  14.619 +	}
  14.620 +
  14.621 +	(void)xlvbd_init();
  14.622 +
  14.623 +	return 0;
  14.624 +
  14.625 + fail:
  14.626 +	return error;
  14.627 +#endif
  14.628 +}
  14.629 +
  14.630 +
  14.631 +static void __exit xlblk_cleanup(void)
  14.632 +{
  14.633 +    /* XXX FIXME */
  14.634 +    BUG();
  14.635 +#if 0
  14.636 +	/*  xlvbd_cleanup(); */
  14.637 +	free_irq(xlblk_response_irq, NULL);
  14.638 +	free_irq(xlblk_update_irq, NULL);
  14.639 +	unbind_virq_from_irq(VIRQ_BLKDEV);
  14.640 +	unbind_virq_from_irq(VIRQ_VBD_UPD);
  14.641 +#endif
  14.642 +}
  14.643 +
  14.644 +
  14.645 +module_init(xlblk_init);
  14.646 +module_exit(xlblk_cleanup);
  14.647 +
  14.648 +
  14.649 +void blkdev_suspend(void)
  14.650 +{
  14.651 +}
  14.652 +
  14.653 +
  14.654 +void blkdev_resume(void)
  14.655 +{
  14.656 +}
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/block.h	Sun Aug 01 15:14:41 2004 +0000
    15.3 @@ -0,0 +1,92 @@
    15.4 +/******************************************************************************
    15.5 + * block.h
    15.6 + * 
    15.7 + * Shared definitions between all levels of XenLinux Virtual block devices.
    15.8 + */
    15.9 +
   15.10 +#ifndef __XEN_DRIVERS_BLOCK_H__
   15.11 +#define __XEN_DRIVERS_BLOCK_H__
   15.12 +
   15.13 +#include <linux/config.h>
   15.14 +#include <linux/module.h>
   15.15 +
   15.16 +#include <linux/kernel.h>
   15.17 +#include <linux/sched.h>
   15.18 +#include <linux/slab.h>
   15.19 +#include <linux/string.h>
   15.20 +#include <linux/errno.h>
   15.21 +
   15.22 +#include <linux/fs.h>
   15.23 +#include <linux/hdreg.h>
   15.24 +#include <linux/blkdev.h>
   15.25 +#include <linux/major.h>
   15.26 +
   15.27 +#include <linux/devfs_fs_kernel.h>
   15.28 +
   15.29 +#include <asm/hypervisor-ifs/hypervisor-if.h>
   15.30 +#include <asm/io.h>
   15.31 +#include <asm/atomic.h>
   15.32 +#include <asm/uaccess.h>
   15.33 +
   15.34 +#include <asm-xen/blkif.h>
   15.35 +
   15.36 +#if 0
   15.37 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   15.38 +#else
   15.39 +#define DPRINTK(_f, _a...) ((void)0)
   15.40 +#endif
   15.41 +
   15.42 +#if 0
   15.43 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   15.44 +#else
   15.45 +#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   15.46 +#endif
   15.47 +
   15.48 +struct xlbd_type_info {
   15.49 +	int partn_shift;
   15.50 +	int devs_per_major;
   15.51 +	int hardsect_size;
   15.52 +	int max_sectors;
   15.53 +	char *name;
   15.54 +};
   15.55 +
   15.56 +/*
   15.57 + * We have one of these per vbd, whether ide, scsi or 'other'.  They
   15.58 + * hang in private_data off the gendisk structure. We may end up
   15.59 + * putting all kinds of interesting stuff here :-)
   15.60 + */
   15.61 +struct xlbd_major_info {
   15.62 +	int major;
   15.63 +	int usage;
   15.64 +	int xd_device;
   15.65 +	struct xlbd_type_info *type;
   15.66 +};
   15.67 +
   15.68 +struct xlbd_disk_info {
   15.69 +	int xd_device;
   15.70 +	struct xlbd_major_info *mi;
   15.71 +};
   15.72 +
   15.73 +typedef struct xen_block {
   15.74 +	int usage;
   15.75 +} xen_block_t;
   15.76 +
   15.77 +extern struct request_queue *xlbd_blk_queue;
   15.78 +extern spinlock_t blkif_io_lock;
   15.79 +
   15.80 +extern int blkif_open(struct inode *inode, struct file *filep);
   15.81 +extern int blkif_release(struct inode *inode, struct file *filep);
   15.82 +extern int blkif_ioctl(struct inode *inode, struct file *filep,
   15.83 +                           unsigned command, unsigned long argument);
   15.84 +extern int blkif_check(dev_t dev);
   15.85 +extern int blkif_revalidate(dev_t dev);
   15.86 +extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
   15.87 +extern void do_blkif_request (request_queue_t *rq); 
   15.88 +
   15.89 +extern void xlvbd_update_vbds(void);
   15.90 +
   15.91 +/* Virtual block-device subsystem. */
   15.92 +extern int  xlvbd_init(void);
   15.93 +extern void xlvbd_cleanup(void); 
   15.94 +
   15.95 +#endif /* __XEN_DRIVERS_BLOCK_H__ */
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/vbd.c	Sun Aug 01 15:14:41 2004 +0000
    16.3 @@ -0,0 +1,530 @@
    16.4 +/******************************************************************************
    16.5 + * vbd.c
    16.6 + * 
    16.7 + * XenLinux virtual block-device driver (xvd).
    16.8 + * 
    16.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   16.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   16.11 + * Copyright (c) 2004, Christian Limpach
   16.12 + */
   16.13 +
   16.14 +#include "block.h"
   16.15 +#include <linux/blkdev.h>
   16.16 +
   16.17 +/*
   16.18 + * For convenience we distinguish between ide, scsi and 'other' (i.e.
   16.19 + * potentially combinations of the two) in the naming scheme and in a few 
   16.20 + * other places (like default readahead, etc).
   16.21 + */
   16.22 +
   16.23 +#define NUM_IDE_MAJORS 10
   16.24 +#define NUM_SCSI_MAJORS 9
   16.25 +#define NUM_VBD_MAJORS 1
   16.26 +
   16.27 +static struct xlbd_type_info xlbd_ide_type = {
   16.28 +	.partn_shift = 6,
   16.29 +	// XXXcl todo blksize_size[major]  = 1024;
   16.30 +	.hardsect_size = 512,
   16.31 +	.max_sectors = 128,  /* 'hwif->rqsize' if we knew it */
   16.32 +	// XXXcl todo read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
   16.33 +	.name = "hd",
   16.34 +};
   16.35 +
   16.36 +static struct xlbd_type_info xlbd_scsi_type = {
   16.37 +	.partn_shift = 4,
   16.38 +	// XXXcl todo blksize_size[major]  = 1024; /* XXX 512; */
   16.39 +	.hardsect_size = 512,
   16.40 +	.max_sectors = 128*8, /* XXX 128; */
   16.41 +	// XXXcl todo read_ahead[major]    = 0; /* XXX 8; -- guessing */
   16.42 +	.name = "sd",
   16.43 +};
   16.44 +
   16.45 +static struct xlbd_type_info xlbd_vbd_type = {
   16.46 +	.partn_shift = 4,
   16.47 +	// XXXcl todo blksize_size[major]  = 512;
   16.48 +	.hardsect_size = 512,
   16.49 +	.max_sectors = 128,
   16.50 +	// XXXcl todo read_ahead[major]    = 8;
   16.51 +	.name = "xvd",
   16.52 +};
   16.53 +
   16.54 +/* XXXcl handle cciss after finding out why it's "hacked" in */
   16.55 +
   16.56 +static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
   16.57 +					 NUM_VBD_MAJORS];
   16.58 +
   16.59 +/* Information about our VBDs. */
   16.60 +#define MAX_VBDS 64
   16.61 +static int nr_vbds;
   16.62 +static vdisk_t *vbd_info;
   16.63 +
   16.64 +struct request_queue *xlbd_blk_queue = NULL;
   16.65 +
   16.66 +#define MAJOR_XEN(dev)	((dev)>>8)
   16.67 +#define MINOR_XEN(dev)	((dev) & 0xff)
   16.68 +
   16.69 +static struct block_device_operations xlvbd_block_fops = 
   16.70 +{
   16.71 +	.owner		= THIS_MODULE,
   16.72 +	.open		= blkif_open,
   16.73 +	.release	= blkif_release,
   16.74 +	.ioctl		= blkif_ioctl,
   16.75 +#if 0
   16.76 +    check_media_change: blkif_check,
   16.77 +    revalidate:         blkif_revalidate,
   16.78 +#endif
   16.79 +};
   16.80 +
   16.81 +spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
   16.82 +
   16.83 +static int xlvbd_get_vbd_info(vdisk_t *disk_info)
   16.84 +{
   16.85 +    vdisk_t         *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
   16.86 +    blkif_request_t  req;
   16.87 +    blkif_response_t rsp;
   16.88 +    int              nr;
   16.89 +
   16.90 +    memset(&req, 0, sizeof(req));
   16.91 +    req.operation   = BLKIF_OP_PROBE;
   16.92 +    req.nr_segments = 1;
   16.93 +    req.frame_and_sects[0] = virt_to_machine(buf) | 7;
   16.94 +
   16.95 +    blkif_control_send(&req, &rsp);
   16.96 +
   16.97 +    if ( rsp.status <= 0 )
   16.98 +    {
   16.99 +        printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
  16.100 +        return -1;
  16.101 +    }
  16.102 +
  16.103 +    if ( (nr = rsp.status) > MAX_VBDS )
  16.104 +         nr = MAX_VBDS;
  16.105 +    memcpy(disk_info, buf, nr * sizeof(vdisk_t));
  16.106 +
  16.107 +    return nr;
  16.108 +}
  16.109 +
  16.110 +static struct xlbd_major_info *xlbd_get_major_info(int xd_device, int *minor)
  16.111 +{
  16.112 +	int mi_idx, new_major;
  16.113 +	int xd_major = MAJOR_XEN(xd_device); 
  16.114 +	int xd_minor = MINOR_XEN(xd_device);
  16.115 +
  16.116 +	*minor = xd_minor;
  16.117 +
  16.118 +	switch (xd_major) {
  16.119 +	case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break;
  16.120 +	case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break;
  16.121 +	case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break;
  16.122 +	case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break;
  16.123 +	case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break;
  16.124 +	case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break;
  16.125 +	case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break;
  16.126 +	case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break;
  16.127 +	case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break;
  16.128 +	case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break;
  16.129 +	case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break;
  16.130 +	case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
  16.131 +		mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR;
  16.132 +		new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR;
  16.133 +		break;
  16.134 +	case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break;
  16.135 +	default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break;
  16.136 +	}
  16.137 +
  16.138 +	if (major_info[mi_idx])
  16.139 +		return major_info[mi_idx];
  16.140 +
  16.141 +	major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
  16.142 +	if (major_info[mi_idx] == NULL)
  16.143 +		return NULL;
  16.144 +
  16.145 +	memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info));
  16.146 +
  16.147 +	switch (mi_idx) {
  16.148 +	case 0 ... (NUM_IDE_MAJORS - 1):
  16.149 +		major_info[mi_idx]->type = &xlbd_ide_type;
  16.150 +		break;
  16.151 +	case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1):
  16.152 +		major_info[mi_idx]->type = &xlbd_scsi_type;
  16.153 +		break;
  16.154 +	case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ...
  16.155 +		(NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1):
  16.156 +		major_info[mi_idx]->type = &xlbd_vbd_type;
  16.157 +		break;
  16.158 +	}
  16.159 +	major_info[mi_idx]->major = new_major;
  16.160 +
  16.161 +	if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) {
  16.162 +		printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n",
  16.163 +		    major_info[mi_idx]->major, major_info[mi_idx]->type->name);
  16.164 +		goto out;
  16.165 +	}
  16.166 +
  16.167 +	devfs_mk_dir(major_info[mi_idx]->type->name);
  16.168 +
  16.169 +	return major_info[mi_idx];
  16.170 +
  16.171 + out:
  16.172 +	kfree(major_info[mi_idx]);
  16.173 +	major_info[mi_idx] = NULL;
  16.174 +	return NULL;
  16.175 +}
  16.176 +
  16.177 +static struct gendisk *xlvbd_get_gendisk(struct xlbd_major_info *mi,
  16.178 +					 int xd_minor, vdisk_t *xd)
  16.179 +{
  16.180 +	struct gendisk *gd;
  16.181 +	struct xlbd_disk_info *di;
  16.182 +	int device, partno;
  16.183 +
  16.184 +	device = MKDEV(mi->major, xd_minor);
  16.185 +	gd = get_gendisk(device, &partno);
  16.186 +	if (gd)
  16.187 +		return gd;
  16.188 +
  16.189 +	di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
  16.190 +	if (di == NULL)
  16.191 +		return NULL;
  16.192 +	di->mi = mi;
  16.193 +	di->xd_device = xd->device;
  16.194 +
  16.195 +	/* Construct an appropriate gendisk structure. */
  16.196 +	gd = alloc_disk(1);
  16.197 +	if (gd == NULL)
  16.198 +		goto out;
  16.199 +
  16.200 +	gd->major = mi->major;
  16.201 +	gd->first_minor = xd_minor;
  16.202 +	gd->fops = &xlvbd_block_fops;
  16.203 +	gd->private_data = di;
  16.204 +	sprintf(gd->disk_name, "%s%c%d", mi->type->name,
  16.205 +	    'a' + (xd_minor >> mi->type->partn_shift),
  16.206 +	    xd_minor & ((1 << mi->type->partn_shift) - 1));
  16.207 +	/*  sprintf(gd->devfs_name, "%s%s/disc%d", mi->type->name, , ); XXXdevfs */
  16.208 +
  16.209 +	set_capacity(gd, xd->capacity);
  16.210 +
  16.211 +	if (xlbd_blk_queue == NULL) {
  16.212 +		xlbd_blk_queue = blk_init_queue(do_blkif_request,
  16.213 +						&blkif_io_lock);
  16.214 +		if (xlbd_blk_queue == NULL)
  16.215 +			goto out;
  16.216 +		elevator_init(xlbd_blk_queue, &elevator_noop);
  16.217 +
  16.218 +		/*
  16.219 +		 * Turn off barking 'headactive' mode. We dequeue
  16.220 +		 * buffer heads as soon as we pass them to back-end
  16.221 +		 * driver.
  16.222 +		 */
  16.223 +		blk_queue_headactive(xlbd_blk_queue, 0); /* XXXcl: noop according to blkdev.h */
  16.224 +
  16.225 +		blk_queue_hardsect_size(xlbd_blk_queue,
  16.226 +					mi->type->hardsect_size);
  16.227 +		blk_queue_max_sectors(xlbd_blk_queue, mi->type->max_sectors); /* 'hwif->rqsize' if we knew it */
  16.228 +
  16.229 +		/* XXXcl: set mask to PAGE_SIZE for now, to improve either use 
  16.230 +		   - blk_queue_merge_bvec to merge requests with adjacent ma's
  16.231 +		   - the tags infrastructure
  16.232 +		   - the dma infrastructure
  16.233 +		*/
  16.234 +		blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1);
  16.235 +
  16.236 +		blk_queue_max_phys_segments(xlbd_blk_queue,
  16.237 +                    BLKIF_MAX_SEGMENTS_PER_REQUEST);
  16.238 +		blk_queue_max_hw_segments(xlbd_blk_queue,
  16.239 +                    BLKIF_MAX_SEGMENTS_PER_REQUEST); /* XXXcl not needed? */
  16.240 +
  16.241 +
  16.242 +	}
  16.243 +	gd->queue = xlbd_blk_queue;
  16.244 +
  16.245 +	add_disk(gd);
  16.246 +
  16.247 +	return gd;
  16.248 +
  16.249 + out:
  16.250 +	if (gd)
  16.251 +		del_gendisk(gd);
  16.252 +	kfree(di);
  16.253 +	return NULL;
  16.254 +}
  16.255 +
  16.256 +/*
  16.257 + * xlvbd_init_device - initialise a VBD device
  16.258 + * @disk:              a vdisk_t describing the VBD
  16.259 + *
  16.260 + * Takes a vdisk_t * that describes a VBD the domain has access to.
  16.261 + * Performs appropriate initialisation and registration of the device.
  16.262 + *
  16.263 + * Care needs to be taken when making re-entrant calls to ensure that
  16.264 + * corruption does not occur.  Also, devices that are in use should not have
  16.265 + * their details updated.  This is the caller's responsibility.
  16.266 + */
  16.267 +static int xlvbd_init_device(vdisk_t *xd)
  16.268 +{
  16.269 +	struct block_device *bd;
  16.270 +	struct gendisk *gd;
  16.271 +	struct xlbd_major_info *mi;
  16.272 +	int device;
  16.273 +	int minor;
  16.274 +
  16.275 +	int err = -ENOMEM;
  16.276 +
  16.277 +	mi = xlbd_get_major_info(xd->device, &minor);
  16.278 +	if (mi == NULL)
  16.279 +		return -EPERM;
  16.280 +
  16.281 +	device = MKDEV(mi->major, minor);
  16.282 +
  16.283 +	if ((bd = bdget(device)) == NULL)
  16.284 +		return -EPERM;
  16.285 +
  16.286 +	/*
  16.287 +	 * Update of partition info, and check of usage count, is protected
  16.288 +	 * by the per-block-device semaphore.
  16.289 +	 */
  16.290 +	down(&bd->bd_sem);
  16.291 +
  16.292 +	gd = xlvbd_get_gendisk(mi, minor, xd);
  16.293 +	if (mi == NULL) {
  16.294 +		err = -EPERM;
  16.295 +		goto out;
  16.296 +	}
  16.297 +
  16.298 +	if (VDISK_READONLY(xd->info))
  16.299 +		set_disk_ro(gd, 1); 
  16.300 +
  16.301 +	/* Some final fix-ups depending on the device type */
  16.302 +	switch (VDISK_TYPE(xd->info)) { 
  16.303 +	case VDISK_TYPE_CDROM:
  16.304 +		gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD; 
  16.305 +		/* FALLTHROUGH */
  16.306 +	case VDISK_TYPE_FLOPPY: 
  16.307 +	case VDISK_TYPE_TAPE:
  16.308 +		gd->flags |= GENHD_FL_REMOVABLE; 
  16.309 +		break; 
  16.310 +
  16.311 +	case VDISK_TYPE_DISK:
  16.312 +		break; 
  16.313 +
  16.314 +	default:
  16.315 +		printk(KERN_ALERT "XenLinux: unknown device type %d\n", 
  16.316 +		    VDISK_TYPE(xd->info)); 
  16.317 +		break; 
  16.318 +	}
  16.319 +
  16.320 +	err = 0;
  16.321 + out:
  16.322 +	up(&bd->bd_sem);
  16.323 +	bdput(bd);    
  16.324 +	return err;
  16.325 +}
  16.326 +
  16.327 +#if 0
  16.328 +/*
  16.329 + * xlvbd_remove_device - remove a device node if possible
  16.330 + * @device:       numeric device ID
  16.331 + *
  16.332 + * Updates the gendisk structure and invalidates devices.
  16.333 + *
  16.334 + * This is OK for now but in future, should perhaps consider where this should
  16.335 + * deallocate gendisks / unregister devices.
  16.336 + */
  16.337 +static int xlvbd_remove_device(int device)
  16.338 +{
  16.339 +    int i, rc = 0, minor = MINOR(device);
  16.340 +    struct gendisk *gd;
  16.341 +    struct block_device *bd;
  16.342 +    xen_block_t *disk = NULL;
  16.343 +
  16.344 +    if ( (bd = bdget(device)) == NULL )
  16.345 +        return -1;
  16.346 +
  16.347 +    /*
  16.348 +     * Update of partition info, and check of usage count, is protected
  16.349 +     * by the per-block-device semaphore.
  16.350 +     */
  16.351 +    down(&bd->bd_sem);
  16.352 +
  16.353 +    if ( ((gd = get_gendisk(device)) == NULL) ||
  16.354 +         ((disk = xldev_to_xldisk(device)) == NULL) )
  16.355 +        BUG();
  16.356 +
  16.357 +    if ( disk->usage != 0 )
  16.358 +    {
  16.359 +        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
  16.360 +        rc = -1;
  16.361 +        goto out;
  16.362 +    }
  16.363 + 
  16.364 +    if ( (minor & (gd->max_p-1)) != 0 )
  16.365 +    {
  16.366 +        /* 1: The VBD is mapped to a partition rather than a whole unit. */
  16.367 +        invalidate_device(device, 1);
  16.368 +	gd->part[minor].start_sect = 0;
  16.369 +        gd->part[minor].nr_sects   = 0;
  16.370 +        gd->sizes[minor]           = 0;
  16.371 +
  16.372 +        /* Clear the consists-of-virtual-partitions flag if possible. */
  16.373 +        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
  16.374 +        for ( i = 1; i < gd->max_p; i++ )
  16.375 +            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
  16.376 +                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  16.377 +
  16.378 +        /*
  16.379 +         * If all virtual partitions are now gone, and a 'whole unit' VBD is
  16.380 +         * present, then we can try to grok the unit's real partition table.
  16.381 +         */
  16.382 +        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  16.383 +             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
  16.384 +             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
  16.385 +        {
  16.386 +            register_disk(gd,
  16.387 +                          device&~(gd->max_p-1), 
  16.388 +                          gd->max_p, 
  16.389 +                          &xlvbd_block_fops,
  16.390 +                          gd->part[minor&~(gd->max_p-1)].nr_sects);
  16.391 +        }
  16.392 +    }
  16.393 +    else
  16.394 +    {
  16.395 +        /*
  16.396 +         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
  16.397 +         * NB. The partition entries are only cleared if there are no VBDs
  16.398 +         * mapped to individual partitions on this unit.
  16.399 +         */
  16.400 +        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
  16.401 +        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  16.402 +            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
  16.403 +        while ( i >= 0 )
  16.404 +        {
  16.405 +            invalidate_device(device+i, 1);
  16.406 +            gd->part[minor+i].start_sect = 0;
  16.407 +            gd->part[minor+i].nr_sects   = 0;
  16.408 +            gd->sizes[minor+i]           = 0;
  16.409 +            i--;
  16.410 +        }
  16.411 +    }
  16.412 +
  16.413 + out:
  16.414 +    up(&bd->bd_sem);
  16.415 +    bdput(bd);
  16.416 +    return rc;
  16.417 +}
  16.418 +
  16.419 +/*
  16.420 + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
  16.421 + * state. The VBDs need to be updated in this way when the domain is
  16.422 + * initialised and also each time we receive an XLBLK_UPDATE event.
  16.423 + */
  16.424 +void xlvbd_update_vbds(void)
  16.425 +{
  16.426 +    int i, j, k, old_nr, new_nr;
  16.427 +    vdisk_t *old_info, *new_info, *merged_info;
  16.428 +
  16.429 +    old_info = vbd_info;
  16.430 +    old_nr   = nr_vbds;
  16.431 +
  16.432 +    new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
  16.433 +    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  16.434 +    {
  16.435 +        kfree(new_info);
  16.436 +        return;
  16.437 +    }
  16.438 +
  16.439 +    /*
  16.440 +     * Final list maximum size is old list + new list. This occurs only when
  16.441 +     * old list and new list do not overlap at all, and we cannot yet destroy
  16.442 +     * VBDs in the old list because the usage counts are busy.
  16.443 +     */
  16.444 +    merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
  16.445 +
  16.446 +    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  16.447 +    i = j = k = 0;
  16.448 +
  16.449 +    while ( (i < old_nr) && (j < new_nr) )
  16.450 +    {
  16.451 +        if ( old_info[i].device < new_info[j].device )
  16.452 +        {
  16.453 +            if ( xlvbd_remove_device(old_info[i].device) != 0 )
  16.454 +                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  16.455 +            i++;
  16.456 +        }
  16.457 +        else if ( old_info[i].device > new_info[j].device )
  16.458 +        {
  16.459 +            if ( xlvbd_init_device(&new_info[j]) == 0 )
  16.460 +                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  16.461 +            j++;
  16.462 +        }
  16.463 +        else
  16.464 +        {
  16.465 +            if ( ((old_info[i].capacity == new_info[j].capacity) &&
  16.466 +                  (old_info[i].info == new_info[j].info)) ||
  16.467 +                 (xlvbd_remove_device(old_info[i].device) != 0) )
  16.468 +                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  16.469 +            else if ( xlvbd_init_device(&new_info[j]) == 0 )
  16.470 +                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  16.471 +            i++; j++;
  16.472 +        }
  16.473 +    }
  16.474 +
  16.475 +    for ( ; i < old_nr; i++ )
  16.476 +    {
  16.477 +        if ( xlvbd_remove_device(old_info[i].device) != 0 )
  16.478 +            memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  16.479 +    }
  16.480 +
  16.481 +    for ( ; j < new_nr; j++ )
  16.482 +    {
  16.483 +        if ( xlvbd_init_device(&new_info[j]) == 0 )
  16.484 +            memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  16.485 +    }
  16.486 +
  16.487 +    vbd_info = merged_info;
  16.488 +    nr_vbds  = k;
  16.489 +
  16.490 +    kfree(old_info);
  16.491 +    kfree(new_info);
  16.492 +}
  16.493 +#endif
  16.494 +
  16.495 +/*
  16.496 + * Set up all the linux device goop for the virtual block devices
  16.497 + * (vbd's) that we know about. Note that although from the backend
  16.498 + * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
  16.499 + * number, the domain creation tools conventionally allocate these
  16.500 + * numbers to correspond to those used by 'real' linux -- this is just
  16.501 + * for convenience as it means e.g. that the same /etc/fstab can be
  16.502 + * used when booting with or without Xen.
  16.503 + */
  16.504 +int xlvbd_init(void)
  16.505 +{
  16.506 +	int i;
  16.507 +
  16.508 +	/*
  16.509 +	 * If compiled as a module, we don't support unloading yet. We
  16.510 +	 * therefore permanently increment the reference count to
  16.511 +	 * disallow it.
  16.512 +	 */
  16.513 +	MOD_INC_USE_COUNT;
  16.514 +
  16.515 +	memset(major_info, 0, sizeof(major_info));
  16.516 +
  16.517 +	for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) {
  16.518 +	}
  16.519 +
  16.520 +	vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
  16.521 +	nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  16.522 +
  16.523 +	if (nr_vbds < 0) {
  16.524 +		kfree(vbd_info);
  16.525 +		vbd_info = NULL;
  16.526 +		nr_vbds  = 0;
  16.527 +	} else {
  16.528 +		for (i = 0; i < nr_vbds; i++)
  16.529 +			xlvbd_init_device(&vbd_info[i]);
  16.530 +	}
  16.531 +
  16.532 +	return 0;
  16.533 +}
    17.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/Kconfig	Sat Jul 31 22:24:26 2004 +0000
    17.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.3 @@ -1,6 +0,0 @@
    17.4 -
    17.5 -config XENBLOCK
    17.6 -	tristate "Block device driver"
    17.7 -	depends on ARCH_XEN
    17.8 -	help
    17.9 -	  Block device driver for Xen
    18.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/Makefile	Sat Jul 31 22:24:26 2004 +0000
    18.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.3 @@ -1,3 +0,0 @@
    18.4 -
    18.5 -obj-y	:= vbd.o block.o
    18.6 -
    19.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/block.c	Sat Jul 31 22:24:26 2004 +0000
    19.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.3 @@ -1,653 +0,0 @@
    19.4 -/******************************************************************************
    19.5 - * block.c
    19.6 - * 
    19.7 - * XenLinux virtual block-device driver.
    19.8 - * 
    19.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   19.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   19.11 - * Copyright (c) 2004, Christian Limpach
   19.12 - */
   19.13 -
   19.14 -#include "block.h"
   19.15 -#include <linux/cdrom.h>
   19.16 -#include <linux/sched.h>
   19.17 -#include <linux/interrupt.h>
   19.18 -#include <scsi/scsi.h>
   19.19 -#include <asm-xen/ctrl_if.h>
   19.20 -
   19.21 -typedef unsigned char byte; /* from linux/ide.h */
   19.22 -
   19.23 -#define BLKIF_STATE_CLOSED       0
   19.24 -#define BLKIF_STATE_DISCONNECTED 1
   19.25 -#define BLKIF_STATE_CONNECTED    2
   19.26 -static unsigned int blkif_state = BLKIF_STATE_CLOSED;
   19.27 -static unsigned int blkif_evtchn, blkif_irq;
   19.28 -
   19.29 -static int blkif_control_rsp_valid;
   19.30 -static blkif_response_t blkif_control_rsp;
   19.31 -
   19.32 -static blkif_ring_t *blk_ring;
   19.33 -static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
   19.34 -static BLKIF_RING_IDX req_prod;  /* Private request producer.         */
   19.35 -
   19.36 -static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for
   19.37 -                                    * recovery.  Responses not stored here. */
   19.38 -static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for
   19.39 -                                      * recovery */
   19.40 -static int recovery = 0;           /* "Recovery in progress" flag.  Protected
   19.41 -                                    * by the blkif_io_lock */
   19.42 -
   19.43 -/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   19.44 -#define	BLKIF_RING_FULL	(((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
   19.45 -			 (blkif_state != BLKIF_STATE_CONNECTED))
   19.46 -
   19.47 -/*
   19.48 - * Request queues with outstanding work, but ring is currently full.
   19.49 - * We need no special lock here, as we always access this with the
   19.50 - * blkif_io_lock held. We only need a small maximum list.
   19.51 - */
   19.52 -#define MAX_PENDING 8
   19.53 -static request_queue_t *pending_queues[MAX_PENDING];
   19.54 -static int nr_pending;
   19.55 -
   19.56 -static inline void flush_requests(void)
   19.57 -{
   19.58 -
   19.59 -        blk_ring->req_prod = req_prod;
   19.60 -
   19.61 -        notify_via_evtchn(blkif_evtchn);
   19.62 -}
   19.63 -
   19.64 -
   19.65 -#if 0
   19.66 -/*
   19.67 - * blkif_update_int/update-vbds_task - handle VBD update events.
   19.68 - *  Schedule a task for keventd to run, which will update the VBDs and perform 
   19.69 - *  the corresponding updates to our view of VBD state.
   19.70 - */
   19.71 -static struct tq_struct update_tq;
   19.72 -static void update_vbds_task(void *unused)
   19.73 -{ 
   19.74 -    xlvbd_update_vbds();
   19.75 -}
   19.76 -#endif
   19.77 -
   19.78 -
   19.79 -int blkif_open(struct inode *inode, struct file *filep)
   19.80 -{
   19.81 -	struct gendisk *gd = inode->i_bdev->bd_disk;
   19.82 -	struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
   19.83 -
   19.84 -	/* Update of usage count is protected by per-device semaphore. */
   19.85 -	di->mi->usage++;
   19.86 -
   19.87 -	return 0;
   19.88 -}
   19.89 -
   19.90 -
   19.91 -int blkif_release(struct inode *inode, struct file *filep)
   19.92 -{
   19.93 -	struct gendisk *gd = inode->i_bdev->bd_disk;
   19.94 -	struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
   19.95 -
   19.96 -	/*
   19.97 -	 * When usage drops to zero it may allow more VBD updates to occur.
   19.98 -	 * Update of usage count is protected by a per-device semaphore.
   19.99 -	 */
  19.100 -	if (--di->mi->usage == 0) {
  19.101 -#if 0
  19.102 -		update_tq.routine = update_vbds_task;
  19.103 -		schedule_task(&update_tq);
  19.104 -#endif
  19.105 -	}
  19.106 -
  19.107 -	return 0;
  19.108 -}
  19.109 -
  19.110 -
  19.111 -int blkif_ioctl(struct inode *inode, struct file *filep,
  19.112 -                          unsigned command, unsigned long argument)
  19.113 -{
  19.114 -	/*  struct gendisk *gd = inode->i_bdev->bd_disk; */
  19.115 -
  19.116 -	DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  19.117 -	    command, (long)argument, inode->i_rdev); 
  19.118 -  
  19.119 -	switch (command) {
  19.120 -
  19.121 -	case HDIO_GETGEO:
  19.122 -		/* return ENOSYS to use defaults */
  19.123 -		return -ENOSYS;
  19.124 -
  19.125 -	default:
  19.126 -		printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
  19.127 -		       command);
  19.128 -		return -ENOSYS;
  19.129 -	}
  19.130 -
  19.131 -	return 0;
  19.132 -}
  19.133 -
  19.134 -#if 0
  19.135 -/* check media change: should probably do something here in some cases :-) */
  19.136 -int blkif_check(kdev_t dev)
  19.137 -{
  19.138 -    DPRINTK("blkif_check\n");
  19.139 -    return 0;
  19.140 -}
  19.141 -
  19.142 -int blkif_revalidate(kdev_t dev)
  19.143 -{
  19.144 -    struct block_device *bd;
  19.145 -    struct gendisk *gd;
  19.146 -    xen_block_t *disk;
  19.147 -    unsigned long capacity;
  19.148 -    int i, rc = 0;
  19.149 -    
  19.150 -    if ( (bd = bdget(dev)) == NULL )
  19.151 -        return -EINVAL;
  19.152 -
  19.153 -    /*
  19.154 -     * Update of partition info, and check of usage count, is protected
  19.155 -     * by the per-block-device semaphore.
  19.156 -     */
  19.157 -    down(&bd->bd_sem);
  19.158 -
  19.159 -    if ( ((gd = get_gendisk(dev)) == NULL) ||
  19.160 -         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  19.161 -         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  19.162 -    {
  19.163 -        rc = -EINVAL;
  19.164 -        goto out;
  19.165 -    }
  19.166 -
  19.167 -    if ( disk->usage > 1 )
  19.168 -    {
  19.169 -        rc = -EBUSY;
  19.170 -        goto out;
  19.171 -    }
  19.172 -
  19.173 -    /* Only reread partition table if VBDs aren't mapped to partitions. */
  19.174 -    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  19.175 -    {
  19.176 -        for ( i = gd->max_p - 1; i >= 0; i-- )
  19.177 -        {
  19.178 -            invalidate_device(dev+i, 1);
  19.179 -            gd->part[MINOR(dev+i)].start_sect = 0;
  19.180 -            gd->part[MINOR(dev+i)].nr_sects   = 0;
  19.181 -            gd->sizes[MINOR(dev+i)]           = 0;
  19.182 -        }
  19.183 -
  19.184 -        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  19.185 -    }
  19.186 -
  19.187 - out:
  19.188 -    up(&bd->bd_sem);
  19.189 -    bdput(bd);
  19.190 -    return rc;
  19.191 -}
  19.192 -#endif
  19.193 -
  19.194 -
  19.195 -/*
  19.196 - * blkif_queue_request
  19.197 - *
  19.198 - * request block io 
  19.199 - * 
  19.200 - * id: for guest use only.
  19.201 - * operation: BLKIF_OP_{READ,WRITE,PROBE}
  19.202 - * buffer: buffer to read/write into. this should be a
  19.203 - *   virtual address in the guest os.
  19.204 - */
  19.205 -static int blkif_queue_request(struct request *req)
  19.206 -{
  19.207 -	struct xlbd_disk_info *di =
  19.208 -		(struct xlbd_disk_info *)req->rq_disk->private_data;
  19.209 -	unsigned long buffer_ma;
  19.210 -	blkif_request_t *ring_req;
  19.211 -	struct bio *bio;
  19.212 -	struct bio_vec *bvec;
  19.213 -	int idx, s;
  19.214 -        unsigned int fsect, lsect;
  19.215 -
  19.216 -        if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
  19.217 -                return 1;
  19.218 -
  19.219 -	/* Fill out a communications ring structure. */
  19.220 -	ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
  19.221 -	ring_req->id = (unsigned long)req;
  19.222 -	ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
  19.223 -		BLKIF_OP_READ;
  19.224 -	ring_req->sector_number = (blkif_sector_t)req->sector;
  19.225 -	ring_req->device = di->xd_device;
  19.226 -
  19.227 -	s = 0;
  19.228 -	ring_req->nr_segments = 0;
  19.229 -	rq_for_each_bio(bio, req) {
  19.230 -		bio_for_each_segment(bvec, bio, idx) {
  19.231 -			buffer_ma =
  19.232 -                                phys_to_machine(page_to_phys(bvec->bv_page));
  19.233 -			if (unlikely((buffer_ma & ((1<<9)-1)) != 0))
  19.234 -				BUG();
  19.235 -
  19.236 -                        fsect = bvec->bv_offset >> 9;
  19.237 -                        lsect = fsect + (bvec->bv_len >> 9) - 1;
  19.238 -                        if (unlikely(lsect > 7))
  19.239 -                                BUG();
  19.240 -
  19.241 -			ring_req->frame_and_sects[ring_req->nr_segments++] =
  19.242 -				buffer_ma | (fsect << 3) | lsect;
  19.243 -			s += bvec->bv_len >> 9;
  19.244 -		}
  19.245 -	}
  19.246 -
  19.247 -	req_prod++;
  19.248 -
  19.249 -        /* Keep a private copy so we can reissue requests when recovering. */
  19.250 -        blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req =
  19.251 -                *ring_req;
  19.252 -        blk_ring_rec->req_prod++;
  19.253 -
  19.254 -        return 0;
  19.255 -}
  19.256 -
  19.257 -/*
  19.258 - * do_blkif_request
  19.259 - *  read a block; request is in a request queue
  19.260 - */
  19.261 -void do_blkif_request(request_queue_t *rq)
  19.262 -{
  19.263 -	struct request *req;
  19.264 -	int queued;
  19.265 -
  19.266 -	DPRINTK("Entered do_blkif_request\n"); 
  19.267 -
  19.268 -	queued = 0;
  19.269 -
  19.270 -	while ((req = elv_next_request(rq)) != NULL) {
  19.271 -		if (!blk_fs_request(req)) {
  19.272 -			end_request(req, 0);
  19.273 -			continue;
  19.274 -		}
  19.275 -
  19.276 -		if (BLKIF_RING_FULL) {
  19.277 -			blk_stop_queue(rq);
  19.278 -			break;
  19.279 -		}
  19.280 -		DPRINTK("do_blkif_request %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
  19.281 -		    req, req->cmd, req->sector, req->current_nr_sectors,
  19.282 -		    req->nr_sectors, req->buffer,
  19.283 -		    rq_data_dir(req) ? "write" : "read");
  19.284 -                blkdev_dequeue_request(req);
  19.285 -		if (blkif_queue_request(req)) {
  19.286 -                        blk_stop_queue(rq);
  19.287 -                        break;
  19.288 -                }
  19.289 -		queued++;
  19.290 -	}
  19.291 -
  19.292 -	if (queued != 0)
  19.293 -		flush_requests();
  19.294 -}
  19.295 -
  19.296 -
  19.297 -static void kick_pending_request_queues(void)
  19.298 -{
  19.299 -    /* We kick pending request queues if the ring is reasonably empty. */
  19.300 -    if ( (nr_pending != 0) && 
  19.301 -         ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) )
  19.302 -    {
  19.303 -        /* Attempt to drain the queue, but bail if the ring becomes full. */
  19.304 -        while ( (nr_pending != 0) && !BLKIF_RING_FULL )
  19.305 -            do_blkif_request(pending_queues[--nr_pending]);
  19.306 -    }
  19.307 -}
  19.308 -
  19.309 -
  19.310 -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
  19.311 -{
  19.312 -	struct request *req;
  19.313 -	blkif_response_t *bret;
  19.314 -	BLKIF_RING_IDX i; 
  19.315 -	unsigned long flags; 
  19.316 -
  19.317 -	spin_lock_irqsave(&blkif_io_lock, flags);     
  19.318 -
  19.319 -        if (unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery)) {
  19.320 -                printk("Bailed out\n");
  19.321 -        
  19.322 -                spin_unlock_irqrestore(&blkif_io_lock, flags);
  19.323 -                return IRQ_HANDLED;
  19.324 -        }
  19.325 -
  19.326 -	for (i = resp_cons; i != blk_ring->resp_prod; i++) {
  19.327 -		bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
  19.328 -		switch (bret->operation) {
  19.329 -		case BLKIF_OP_READ:
  19.330 -		case BLKIF_OP_WRITE:
  19.331 -			if (unlikely(bret->status != BLKIF_RSP_OKAY))
  19.332 -				DPRINTK("Bad return from blkdev data request: %lx\n",
  19.333 -				    bret->status);
  19.334 -			req = (struct request *)bret->id;
  19.335 -                        /* XXXcl pass up status */
  19.336 -			if (unlikely(end_that_request_first(req, 1,
  19.337 -			    req->hard_nr_sectors)))
  19.338 -				BUG();
  19.339 -
  19.340 -			end_that_request_last(req);
  19.341 -			break;
  19.342 -                case BLKIF_OP_PROBE:
  19.343 -                        memcpy(&blkif_control_rsp, bret, sizeof(*bret));
  19.344 -                        blkif_control_rsp_valid = 1;
  19.345 -                        break;
  19.346 -     		default:
  19.347 -			BUG();
  19.348 -		}
  19.349 -	}
  19.350 -    
  19.351 -	resp_cons = i;
  19.352 -        resp_cons_rec = i;
  19.353 -
  19.354 -	if (xlbd_blk_queue &&
  19.355 -            test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags)) {
  19.356 -		blk_start_queue(xlbd_blk_queue);
  19.357 -		/* XXXcl call to request_fn should not be needed but
  19.358 -                 * we get stuck without...  needs investigating
  19.359 -		 */
  19.360 -		xlbd_blk_queue->request_fn(xlbd_blk_queue);
  19.361 -	}
  19.362 -
  19.363 -	spin_unlock_irqrestore(&blkif_io_lock, flags);
  19.364 -
  19.365 -	return IRQ_HANDLED;
  19.366 -}
  19.367 -
  19.368 -
  19.369 -void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
  19.370 -{
  19.371 -    unsigned long flags;
  19.372 -
  19.373 - retry:
  19.374 -    while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
  19.375 -    {
  19.376 -        set_current_state(TASK_INTERRUPTIBLE);
  19.377 -        schedule_timeout(1);
  19.378 -    }
  19.379 -
  19.380 -    spin_lock_irqsave(&blkif_io_lock, flags);
  19.381 -    if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
  19.382 -    {
  19.383 -        spin_unlock_irqrestore(&blkif_io_lock, flags);
  19.384 -        goto retry;
  19.385 -    }
  19.386 -
  19.387 -    memcpy(&blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req, req, sizeof(*req));
  19.388 -    memcpy(&blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,
  19.389 -           req, sizeof(*req));
  19.390 -    req_prod++;
  19.391 -    flush_requests();
  19.392 -
  19.393 -    spin_unlock_irqrestore(&blkif_io_lock, flags);
  19.394 -
  19.395 -    while ( !blkif_control_rsp_valid )
  19.396 -    {
  19.397 -        set_current_state(TASK_INTERRUPTIBLE);
  19.398 -        schedule_timeout(1);
  19.399 -    }
  19.400 -
  19.401 -    memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
  19.402 -    blkif_control_rsp_valid = 0;
  19.403 -}
  19.404 -
  19.405 -
  19.406 -static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
  19.407 -{
  19.408 -    ctrl_msg_t                   cmsg;
  19.409 -    blkif_fe_interface_connect_t up;
  19.410 -
  19.411 -    if ( status->handle != 0 )
  19.412 -    {
  19.413 -        printk(KERN_WARNING "Status change on unsupported blkif %d\n",
  19.414 -               status->handle);
  19.415 -        return;
  19.416 -    }
  19.417 -
  19.418 -    switch ( status->status )
  19.419 -    {
  19.420 -    case BLKIF_INTERFACE_STATUS_DESTROYED:
  19.421 -        printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n",
  19.422 -               blkif_state);
  19.423 -        break;
  19.424 -
  19.425 -    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
  19.426 -        if ( blkif_state != BLKIF_STATE_CLOSED )
  19.427 -        {
  19.428 -            printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message"
  19.429 -                   " in state %d\n", blkif_state);
  19.430 -
  19.431 -            printk(KERN_INFO "VBD driver recovery in progress\n");
  19.432 -            
  19.433 -            /* Prevent new requests being issued until we fix things up. */
  19.434 -            spin_lock_irq(&blkif_io_lock);
  19.435 -            recovery = 1;
  19.436 -            blkif_state = BLKIF_STATE_DISCONNECTED;
  19.437 -            spin_unlock_irq(&blkif_io_lock);
  19.438 -
  19.439 -            /* Free resources associated with old device channel. */
  19.440 -            free_page((unsigned long)blk_ring);
  19.441 -            free_irq(blkif_irq, NULL);
  19.442 -            unbind_evtchn_from_irq(blkif_evtchn);
  19.443 -        }
  19.444 -
  19.445 -        /* Move from CLOSED to DISCONNECTED state. */
  19.446 -        blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
  19.447 -        blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  19.448 -        blkif_state  = BLKIF_STATE_DISCONNECTED;
  19.449 -
  19.450 -        /* Construct an interface-CONNECT message for the domain controller. */
  19.451 -        cmsg.type      = CMSG_BLKIF_FE;
  19.452 -        cmsg.subtype   = CMSG_BLKIF_FE_INTERFACE_CONNECT;
  19.453 -        cmsg.length    = sizeof(blkif_fe_interface_connect_t);
  19.454 -        up.handle      = 0;
  19.455 -        up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
  19.456 -        memcpy(cmsg.msg, &up, sizeof(up));
  19.457 -        
  19.458 -        /* Tell the controller to bring up the interface. */
  19.459 -        ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
  19.460 -        break;
  19.461 -
  19.462 -    case BLKIF_INTERFACE_STATUS_CONNECTED:
  19.463 -        if ( blkif_state == BLKIF_STATE_CLOSED )
  19.464 -        {
  19.465 -            printk(KERN_WARNING "Unexpected blkif-CONNECTED message"
  19.466 -                   " in state %d\n", blkif_state);
  19.467 -            break;
  19.468 -        }
  19.469 -
  19.470 -        blkif_evtchn = status->evtchn;
  19.471 -        blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
  19.472 -        (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
  19.473 -
  19.474 -        if ( recovery )
  19.475 -        {
  19.476 -            int i;
  19.477 -
  19.478 -	    /* Shouldn't need the blkif_io_lock here - the device is
  19.479 -	     * plugged and the recovery flag prevents the interrupt handler
  19.480 -	     * changing anything. */
  19.481 -
  19.482 -            /* Reissue requests from the private block ring. */
  19.483 -            for ( i = 0;
  19.484 -		  resp_cons_rec < blk_ring_rec->req_prod;
  19.485 -                  resp_cons_rec++, i++ )
  19.486 -            {
  19.487 -                blk_ring->ring[i].req
  19.488 -                    = blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req;
  19.489 -            }
  19.490 -
  19.491 -            /* Reset the private block ring to match the new ring. */
  19.492 -            memcpy(blk_ring_rec, blk_ring, sizeof(*blk_ring));
  19.493 -            resp_cons_rec = 0;
  19.494 -
  19.495 -            /* blk_ring->req_prod will be set when we flush_requests().*/
  19.496 -            blk_ring_rec->req_prod = req_prod = i;
  19.497 -
  19.498 -            wmb();
  19.499 -
  19.500 -            /* Switch off recovery mode, using a memory barrier to ensure that
  19.501 -             * it's seen before we flush requests - we don't want to miss any
  19.502 -             * interrupts. */
  19.503 -            recovery = 0;
  19.504 -            wmb();
  19.505 -
  19.506 -            /* Kicks things back into life. */
  19.507 -            flush_requests();
  19.508 -        }
  19.509 -        else
  19.510 -        {
  19.511 -            /* Probe for discs that are attached to the interface. */
  19.512 -            xlvbd_init();
  19.513 -        }
  19.514 -
  19.515 -        blkif_state = BLKIF_STATE_CONNECTED;
  19.516 -        
  19.517 -        /* Kick pending requests. */
  19.518 -        spin_lock_irq(&blkif_io_lock);
  19.519 -        kick_pending_request_queues();
  19.520 -        spin_unlock_irq(&blkif_io_lock);
  19.521 -
  19.522 -        break;
  19.523 -
  19.524 -    default:
  19.525 -        printk(KERN_WARNING "Status change to unknown value %d\n", 
  19.526 -               status->status);
  19.527 -        break;
  19.528 -    }
  19.529 -}
  19.530 -
  19.531 -
  19.532 -static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
  19.533 -{
  19.534 -    switch ( msg->subtype )
  19.535 -    {
  19.536 -    case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
  19.537 -        if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) )
  19.538 -            goto parse_error;
  19.539 -        blkif_status_change((blkif_fe_interface_status_changed_t *)
  19.540 -                            &msg->msg[0]);
  19.541 -        break;        
  19.542 -#if 0
  19.543 -    case CMSG_BLKIF_FE_VBD_STATUS_CHANGED:
  19.544 -        update_tq.routine = update_vbds_task;
  19.545 -        schedule_task(&update_tq);
  19.546 -        break;
  19.547 -#endif
  19.548 -    default:
  19.549 -        goto parse_error;
  19.550 -    }
  19.551 -
  19.552 -    ctrl_if_send_response(msg);
  19.553 -    return;
  19.554 -
  19.555 - parse_error:
  19.556 -    msg->length = 0;
  19.557 -    ctrl_if_send_response(msg);
  19.558 -}
  19.559 -
  19.560 -
  19.561 -int __init xlblk_init(void)
  19.562 -{
  19.563 -    ctrl_msg_t                       cmsg;
  19.564 -    blkif_fe_driver_status_changed_t st;
  19.565 -
  19.566 -    if ( (start_info.flags & SIF_INITDOMAIN) 
  19.567 -        || (start_info.flags & SIF_BLK_BE_DOMAIN) )
  19.568 -        return 0;
  19.569 -
  19.570 -    printk(KERN_INFO "Initialising Xen virtual block device\n");
  19.571 -
  19.572 -    blk_ring_rec = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
  19.573 -    memset(blk_ring_rec, 0, sizeof(*blk_ring_rec));
  19.574 -
  19.575 -    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
  19.576 -                                    CALLBACK_IN_BLOCKING_CONTEXT);
  19.577 -
  19.578 -    /* Send a driver-UP notification to the domain controller. */
  19.579 -    cmsg.type      = CMSG_BLKIF_FE;
  19.580 -    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED;
  19.581 -    cmsg.length    = sizeof(blkif_fe_driver_status_changed_t);
  19.582 -    st.status      = BLKIF_DRIVER_STATUS_UP;
  19.583 -    memcpy(cmsg.msg, &st, sizeof(st));
  19.584 -    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
  19.585 -
  19.586 -    /*
  19.587 -     * We should read 'nr_interfaces' from response message and wait
  19.588 -     * for notifications before proceeding. For now we assume that we
  19.589 -     * will be notified of exactly one interface.
  19.590 -     */
  19.591 -    while ( blkif_state != BLKIF_STATE_CONNECTED )
  19.592 -    {
  19.593 -        set_current_state(TASK_INTERRUPTIBLE);
  19.594 -        schedule_timeout(1);
  19.595 -    }
  19.596 -
  19.597 -    return 0;
  19.598 -#if 0
  19.599 -	int error; 
  19.600 -
  19.601 -	reset_xlblk_interface();
  19.602 -
  19.603 -	xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
  19.604 -	xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
  19.605 -
  19.606 -	error = request_irq(xlblk_response_irq, xlblk_response_int, 
  19.607 -			    SA_SAMPLE_RANDOM, "blkdev", NULL);
  19.608 -	if (error) {
  19.609 -		printk(KERN_ALERT "Could not allocate receive interrupt\n");
  19.610 -		goto fail;
  19.611 -	}
  19.612 -
  19.613 -	error = request_irq(xlblk_update_irq, xlblk_update_int,
  19.614 -			    0, "blkdev", NULL);
  19.615 -	if (error) {
  19.616 -		printk(KERN_ALERT
  19.617 -		       "Could not allocate block update interrupt\n");
  19.618 -		goto fail;
  19.619 -	}
  19.620 -
  19.621 -	(void)xlvbd_init();
  19.622 -
  19.623 -	return 0;
  19.624 -
  19.625 - fail:
  19.626 -	return error;
  19.627 -#endif
  19.628 -}
  19.629 -
  19.630 -
  19.631 -static void __exit xlblk_cleanup(void)
  19.632 -{
  19.633 -    /* XXX FIXME */
  19.634 -    BUG();
  19.635 -#if 0
  19.636 -	/*  xlvbd_cleanup(); */
  19.637 -	free_irq(xlblk_response_irq, NULL);
  19.638 -	free_irq(xlblk_update_irq, NULL);
  19.639 -	unbind_virq_from_irq(VIRQ_BLKDEV);
  19.640 -	unbind_virq_from_irq(VIRQ_VBD_UPD);
  19.641 -#endif
  19.642 -}
  19.643 -
  19.644 -
  19.645 -module_init(xlblk_init);
  19.646 -module_exit(xlblk_cleanup);
  19.647 -
  19.648 -
  19.649 -void blkdev_suspend(void)
  19.650 -{
  19.651 -}
  19.652 -
  19.653 -
  19.654 -void blkdev_resume(void)
  19.655 -{
  19.656 -}
    20.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/block.h	Sat Jul 31 22:24:26 2004 +0000
    20.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.3 @@ -1,92 +0,0 @@
    20.4 -/******************************************************************************
    20.5 - * block.h
    20.6 - * 
    20.7 - * Shared definitions between all levels of XenLinux Virtual block devices.
    20.8 - */
    20.9 -
   20.10 -#ifndef __XEN_DRIVERS_BLOCK_H__
   20.11 -#define __XEN_DRIVERS_BLOCK_H__
   20.12 -
   20.13 -#include <linux/config.h>
   20.14 -#include <linux/module.h>
   20.15 -
   20.16 -#include <linux/kernel.h>
   20.17 -#include <linux/sched.h>
   20.18 -#include <linux/slab.h>
   20.19 -#include <linux/string.h>
   20.20 -#include <linux/errno.h>
   20.21 -
   20.22 -#include <linux/fs.h>
   20.23 -#include <linux/hdreg.h>
   20.24 -#include <linux/blkdev.h>
   20.25 -#include <linux/major.h>
   20.26 -
   20.27 -#include <linux/devfs_fs_kernel.h>
   20.28 -
   20.29 -#include <asm/hypervisor-ifs/hypervisor-if.h>
   20.30 -#include <asm/io.h>
   20.31 -#include <asm/atomic.h>
   20.32 -#include <asm/uaccess.h>
   20.33 -
   20.34 -#include <asm-xen/blkif.h>
   20.35 -
   20.36 -#if 0
   20.37 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   20.38 -#else
   20.39 -#define DPRINTK(_f, _a...) ((void)0)
   20.40 -#endif
   20.41 -
   20.42 -#if 0
   20.43 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   20.44 -#else
   20.45 -#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   20.46 -#endif
   20.47 -
   20.48 -struct xlbd_type_info {
   20.49 -	int partn_shift;
   20.50 -	int devs_per_major;
   20.51 -	int hardsect_size;
   20.52 -	int max_sectors;
   20.53 -	char *name;
   20.54 -};
   20.55 -
   20.56 -/*
   20.57 - * We have one of these per vbd, whether ide, scsi or 'other'.  They
   20.58 - * hang in private_data off the gendisk structure. We may end up
   20.59 - * putting all kinds of interesting stuff here :-)
   20.60 - */
   20.61 -struct xlbd_major_info {
   20.62 -	int major;
   20.63 -	int usage;
   20.64 -	int xd_device;
   20.65 -	struct xlbd_type_info *type;
   20.66 -};
   20.67 -
   20.68 -struct xlbd_disk_info {
   20.69 -	int xd_device;
   20.70 -	struct xlbd_major_info *mi;
   20.71 -};
   20.72 -
   20.73 -typedef struct xen_block {
   20.74 -	int usage;
   20.75 -} xen_block_t;
   20.76 -
   20.77 -extern struct request_queue *xlbd_blk_queue;
   20.78 -extern spinlock_t blkif_io_lock;
   20.79 -
   20.80 -extern int blkif_open(struct inode *inode, struct file *filep);
   20.81 -extern int blkif_release(struct inode *inode, struct file *filep);
   20.82 -extern int blkif_ioctl(struct inode *inode, struct file *filep,
   20.83 -                           unsigned command, unsigned long argument);
   20.84 -extern int blkif_check(dev_t dev);
   20.85 -extern int blkif_revalidate(dev_t dev);
   20.86 -extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
   20.87 -extern void do_blkif_request (request_queue_t *rq); 
   20.88 -
   20.89 -extern void xlvbd_update_vbds(void);
   20.90 -
   20.91 -/* Virtual block-device subsystem. */
   20.92 -extern int  xlvbd_init(void);
   20.93 -extern void xlvbd_cleanup(void); 
   20.94 -
   20.95 -#endif /* __XEN_DRIVERS_BLOCK_H__ */
    21.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/block/vbd.c	Sat Jul 31 22:24:26 2004 +0000
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,530 +0,0 @@
    21.4 -/******************************************************************************
    21.5 - * vbd.c
    21.6 - * 
    21.7 - * XenLinux virtual block-device driver (xvd).
    21.8 - * 
    21.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   21.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   21.11 - * Copyright (c) 2004, Christian Limpach
   21.12 - */
   21.13 -
   21.14 -#include "block.h"
   21.15 -#include <linux/blkdev.h>
   21.16 -
   21.17 -/*
   21.18 - * For convenience we distinguish between ide, scsi and 'other' (i.e.
   21.19 - * potentially combinations of the two) in the naming scheme and in a few 
   21.20 - * other places (like default readahead, etc).
   21.21 - */
   21.22 -
   21.23 -#define NUM_IDE_MAJORS 10
   21.24 -#define NUM_SCSI_MAJORS 9
   21.25 -#define NUM_VBD_MAJORS 1
   21.26 -
   21.27 -static struct xlbd_type_info xlbd_ide_type = {
   21.28 -	.partn_shift = 6,
   21.29 -	// XXXcl todo blksize_size[major]  = 1024;
   21.30 -	.hardsect_size = 512,
   21.31 -	.max_sectors = 128,  /* 'hwif->rqsize' if we knew it */
   21.32 -	// XXXcl todo read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
   21.33 -	.name = "hd",
   21.34 -};
   21.35 -
   21.36 -static struct xlbd_type_info xlbd_scsi_type = {
   21.37 -	.partn_shift = 4,
   21.38 -	// XXXcl todo blksize_size[major]  = 1024; /* XXX 512; */
   21.39 -	.hardsect_size = 512,
   21.40 -	.max_sectors = 128*8, /* XXX 128; */
   21.41 -	// XXXcl todo read_ahead[major]    = 0; /* XXX 8; -- guessing */
   21.42 -	.name = "sd",
   21.43 -};
   21.44 -
   21.45 -static struct xlbd_type_info xlbd_vbd_type = {
   21.46 -	.partn_shift = 4,
   21.47 -	// XXXcl todo blksize_size[major]  = 512;
   21.48 -	.hardsect_size = 512,
   21.49 -	.max_sectors = 128,
   21.50 -	// XXXcl todo read_ahead[major]    = 8;
   21.51 -	.name = "xvd",
   21.52 -};
   21.53 -
   21.54 -/* XXXcl handle cciss after finding out why it's "hacked" in */
   21.55 -
   21.56 -static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
   21.57 -					 NUM_VBD_MAJORS];
   21.58 -
   21.59 -/* Information about our VBDs. */
   21.60 -#define MAX_VBDS 64
   21.61 -static int nr_vbds;
   21.62 -static vdisk_t *vbd_info;
   21.63 -
   21.64 -struct request_queue *xlbd_blk_queue = NULL;
   21.65 -
   21.66 -#define MAJOR_XEN(dev)	((dev)>>8)
   21.67 -#define MINOR_XEN(dev)	((dev) & 0xff)
   21.68 -
   21.69 -static struct block_device_operations xlvbd_block_fops = 
   21.70 -{
   21.71 -	.owner		= THIS_MODULE,
   21.72 -	.open		= blkif_open,
   21.73 -	.release	= blkif_release,
   21.74 -	.ioctl		= blkif_ioctl,
   21.75 -#if 0
   21.76 -    check_media_change: blkif_check,
   21.77 -    revalidate:         blkif_revalidate,
   21.78 -#endif
   21.79 -};
   21.80 -
   21.81 -spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
   21.82 -
   21.83 -static int xlvbd_get_vbd_info(vdisk_t *disk_info)
   21.84 -{
   21.85 -    vdisk_t         *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
   21.86 -    blkif_request_t  req;
   21.87 -    blkif_response_t rsp;
   21.88 -    int              nr;
   21.89 -
   21.90 -    memset(&req, 0, sizeof(req));
   21.91 -    req.operation   = BLKIF_OP_PROBE;
   21.92 -    req.nr_segments = 1;
   21.93 -    req.frame_and_sects[0] = virt_to_machine(buf) | 7;
   21.94 -
   21.95 -    blkif_control_send(&req, &rsp);
   21.96 -
   21.97 -    if ( rsp.status <= 0 )
   21.98 -    {
   21.99 -        printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
  21.100 -        return -1;
  21.101 -    }
  21.102 -
  21.103 -    if ( (nr = rsp.status) > MAX_VBDS )
  21.104 -         nr = MAX_VBDS;
  21.105 -    memcpy(disk_info, buf, nr * sizeof(vdisk_t));
  21.106 -
  21.107 -    return nr;
  21.108 -}
  21.109 -
  21.110 -static struct xlbd_major_info *xlbd_get_major_info(int xd_device, int *minor)
  21.111 -{
  21.112 -	int mi_idx, new_major;
  21.113 -	int xd_major = MAJOR_XEN(xd_device); 
  21.114 -	int xd_minor = MINOR_XEN(xd_device);
  21.115 -
  21.116 -	*minor = xd_minor;
  21.117 -
  21.118 -	switch (xd_major) {
  21.119 -	case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break;
  21.120 -	case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break;
  21.121 -	case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break;
  21.122 -	case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break;
  21.123 -	case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break;
  21.124 -	case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break;
  21.125 -	case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break;
  21.126 -	case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break;
  21.127 -	case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break;
  21.128 -	case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break;
  21.129 -	case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break;
  21.130 -	case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
  21.131 -		mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR;
  21.132 -		new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR;
  21.133 -		break;
  21.134 -	case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break;
  21.135 -	default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break;
  21.136 -	}
  21.137 -
  21.138 -	if (major_info[mi_idx])
  21.139 -		return major_info[mi_idx];
  21.140 -
  21.141 -	major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
  21.142 -	if (major_info[mi_idx] == NULL)
  21.143 -		return NULL;
  21.144 -
  21.145 -	memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info));
  21.146 -
  21.147 -	switch (mi_idx) {
  21.148 -	case 0 ... (NUM_IDE_MAJORS - 1):
  21.149 -		major_info[mi_idx]->type = &xlbd_ide_type;
  21.150 -		break;
  21.151 -	case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1):
  21.152 -		major_info[mi_idx]->type = &xlbd_scsi_type;
  21.153 -		break;
  21.154 -	case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ...
  21.155 -		(NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1):
  21.156 -		major_info[mi_idx]->type = &xlbd_vbd_type;
  21.157 -		break;
  21.158 -	}
  21.159 -	major_info[mi_idx]->major = new_major;
  21.160 -
  21.161 -	if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) {
  21.162 -		printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n",
  21.163 -		    major_info[mi_idx]->major, major_info[mi_idx]->type->name);
  21.164 -		goto out;
  21.165 -	}
  21.166 -
  21.167 -	devfs_mk_dir(major_info[mi_idx]->type->name);
  21.168 -
  21.169 -	return major_info[mi_idx];
  21.170 -
  21.171 - out:
  21.172 -	kfree(major_info[mi_idx]);
  21.173 -	major_info[mi_idx] = NULL;
  21.174 -	return NULL;
  21.175 -}
  21.176 -
  21.177 -static struct gendisk *xlvbd_get_gendisk(struct xlbd_major_info *mi,
  21.178 -					 int xd_minor, vdisk_t *xd)
  21.179 -{
  21.180 -	struct gendisk *gd;
  21.181 -	struct xlbd_disk_info *di;
  21.182 -	int device, partno;
  21.183 -
  21.184 -	device = MKDEV(mi->major, xd_minor);
  21.185 -	gd = get_gendisk(device, &partno);
  21.186 -	if (gd)
  21.187 -		return gd;
  21.188 -
  21.189 -	di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
  21.190 -	if (di == NULL)
  21.191 -		return NULL;
  21.192 -	di->mi = mi;
  21.193 -	di->xd_device = xd->device;
  21.194 -
  21.195 -	/* Construct an appropriate gendisk structure. */
  21.196 -	gd = alloc_disk(1);
  21.197 -	if (gd == NULL)
  21.198 -		goto out;
  21.199 -
  21.200 -	gd->major = mi->major;
  21.201 -	gd->first_minor = xd_minor;
  21.202 -	gd->fops = &xlvbd_block_fops;
  21.203 -	gd->private_data = di;
  21.204 -	sprintf(gd->disk_name, "%s%c%d", mi->type->name,
  21.205 -	    'a' + (xd_minor >> mi->type->partn_shift),
  21.206 -	    xd_minor & ((1 << mi->type->partn_shift) - 1));
  21.207 -	/*  sprintf(gd->devfs_name, "%s%s/disc%d", mi->type->name, , ); XXXdevfs */
  21.208 -
  21.209 -	set_capacity(gd, xd->capacity);
  21.210 -
  21.211 -	if (xlbd_blk_queue == NULL) {
  21.212 -		xlbd_blk_queue = blk_init_queue(do_blkif_request,
  21.213 -						&blkif_io_lock);
  21.214 -		if (xlbd_blk_queue == NULL)
  21.215 -			goto out;
  21.216 -		elevator_init(xlbd_blk_queue, &elevator_noop);
  21.217 -
  21.218 -		/*
  21.219 -		 * Turn off barking 'headactive' mode. We dequeue
  21.220 -		 * buffer heads as soon as we pass them to back-end
  21.221 -		 * driver.
  21.222 -		 */
  21.223 -		blk_queue_headactive(xlbd_blk_queue, 0); /* XXXcl: noop according to blkdev.h */
  21.224 -
  21.225 -		blk_queue_hardsect_size(xlbd_blk_queue,
  21.226 -					mi->type->hardsect_size);
  21.227 -		blk_queue_max_sectors(xlbd_blk_queue, mi->type->max_sectors); /* 'hwif->rqsize' if we knew it */
  21.228 -
  21.229 -		/* XXXcl: set mask to PAGE_SIZE for now, to improve either use 
  21.230 -		   - blk_queue_merge_bvec to merge requests with adjacent ma's
  21.231 -		   - the tags infrastructure
  21.232 -		   - the dma infrastructure
  21.233 -		*/
  21.234 -		blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1);
  21.235 -
  21.236 -		blk_queue_max_phys_segments(xlbd_blk_queue,
  21.237 -                    BLKIF_MAX_SEGMENTS_PER_REQUEST);
  21.238 -		blk_queue_max_hw_segments(xlbd_blk_queue,
  21.239 -                    BLKIF_MAX_SEGMENTS_PER_REQUEST); /* XXXcl not needed? */
  21.240 -
  21.241 -
  21.242 -	}
  21.243 -	gd->queue = xlbd_blk_queue;
  21.244 -
  21.245 -	add_disk(gd);
  21.246 -
  21.247 -	return gd;
  21.248 -
  21.249 - out:
  21.250 -	if (gd)
  21.251 -		del_gendisk(gd);
  21.252 -	kfree(di);
  21.253 -	return NULL;
  21.254 -}
  21.255 -
  21.256 -/*
  21.257 - * xlvbd_init_device - initialise a VBD device
  21.258 - * @disk:              a vdisk_t describing the VBD
  21.259 - *
  21.260 - * Takes a vdisk_t * that describes a VBD the domain has access to.
  21.261 - * Performs appropriate initialisation and registration of the device.
  21.262 - *
  21.263 - * Care needs to be taken when making re-entrant calls to ensure that
  21.264 - * corruption does not occur.  Also, devices that are in use should not have
  21.265 - * their details updated.  This is the caller's responsibility.
  21.266 - */
  21.267 -static int xlvbd_init_device(vdisk_t *xd)
  21.268 -{
  21.269 -	struct block_device *bd;
  21.270 -	struct gendisk *gd;
  21.271 -	struct xlbd_major_info *mi;
  21.272 -	int device;
  21.273 -	int minor;
  21.274 -
  21.275 -	int err = -ENOMEM;
  21.276 -
  21.277 -	mi = xlbd_get_major_info(xd->device, &minor);
  21.278 -	if (mi == NULL)
  21.279 -		return -EPERM;
  21.280 -
  21.281 -	device = MKDEV(mi->major, minor);
  21.282 -
  21.283 -	if ((bd = bdget(device)) == NULL)
  21.284 -		return -EPERM;
  21.285 -
  21.286 -	/*
  21.287 -	 * Update of partition info, and check of usage count, is protected
  21.288 -	 * by the per-block-device semaphore.
  21.289 -	 */
  21.290 -	down(&bd->bd_sem);
  21.291 -
  21.292 -	gd = xlvbd_get_gendisk(mi, minor, xd);
  21.293 -	if (mi == NULL) {
  21.294 -		err = -EPERM;
  21.295 -		goto out;
  21.296 -	}
  21.297 -
  21.298 -	if (VDISK_READONLY(xd->info))
  21.299 -		set_disk_ro(gd, 1); 
  21.300 -
  21.301 -	/* Some final fix-ups depending on the device type */
  21.302 -	switch (VDISK_TYPE(xd->info)) { 
  21.303 -	case VDISK_TYPE_CDROM:
  21.304 -		gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD; 
  21.305 -		/* FALLTHROUGH */
  21.306 -	case VDISK_TYPE_FLOPPY: 
  21.307 -	case VDISK_TYPE_TAPE:
  21.308 -		gd->flags |= GENHD_FL_REMOVABLE; 
  21.309 -		break; 
  21.310 -
  21.311 -	case VDISK_TYPE_DISK:
  21.312 -		break; 
  21.313 -
  21.314 -	default:
  21.315 -		printk(KERN_ALERT "XenLinux: unknown device type %d\n", 
  21.316 -		    VDISK_TYPE(xd->info)); 
  21.317 -		break; 
  21.318 -	}
  21.319 -
  21.320 -	err = 0;
  21.321 - out:
  21.322 -	up(&bd->bd_sem);
  21.323 -	bdput(bd);    
  21.324 -	return err;
  21.325 -}
  21.326 -
  21.327 -#if 0
  21.328 -/*
  21.329 - * xlvbd_remove_device - remove a device node if possible
  21.330 - * @device:       numeric device ID
  21.331 - *
  21.332 - * Updates the gendisk structure and invalidates devices.
  21.333 - *
  21.334 - * This is OK for now but in future, should perhaps consider where this should
  21.335 - * deallocate gendisks / unregister devices.
  21.336 - */
  21.337 -static int xlvbd_remove_device(int device)
  21.338 -{
  21.339 -    int i, rc = 0, minor = MINOR(device);
  21.340 -    struct gendisk *gd;
  21.341 -    struct block_device *bd;
  21.342 -    xen_block_t *disk = NULL;
  21.343 -
  21.344 -    if ( (bd = bdget(device)) == NULL )
  21.345 -        return -1;
  21.346 -
  21.347 -    /*
  21.348 -     * Update of partition info, and check of usage count, is protected
  21.349 -     * by the per-block-device semaphore.
  21.350 -     */
  21.351 -    down(&bd->bd_sem);
  21.352 -
  21.353 -    if ( ((gd = get_gendisk(device)) == NULL) ||
  21.354 -         ((disk = xldev_to_xldisk(device)) == NULL) )
  21.355 -        BUG();
  21.356 -
  21.357 -    if ( disk->usage != 0 )
  21.358 -    {
  21.359 -        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
  21.360 -        rc = -1;
  21.361 -        goto out;
  21.362 -    }
  21.363 - 
  21.364 -    if ( (minor & (gd->max_p-1)) != 0 )
  21.365 -    {
  21.366 -        /* 1: The VBD is mapped to a partition rather than a whole unit. */
  21.367 -        invalidate_device(device, 1);
  21.368 -	gd->part[minor].start_sect = 0;
  21.369 -        gd->part[minor].nr_sects   = 0;
  21.370 -        gd->sizes[minor]           = 0;
  21.371 -
  21.372 -        /* Clear the consists-of-virtual-partitions flag if possible. */
  21.373 -        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
  21.374 -        for ( i = 1; i < gd->max_p; i++ )
  21.375 -            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
  21.376 -                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  21.377 -
  21.378 -        /*
  21.379 -         * If all virtual partitions are now gone, and a 'whole unit' VBD is
  21.380 -         * present, then we can try to grok the unit's real partition table.
  21.381 -         */
  21.382 -        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  21.383 -             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
  21.384 -             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
  21.385 -        {
  21.386 -            register_disk(gd,
  21.387 -                          device&~(gd->max_p-1), 
  21.388 -                          gd->max_p, 
  21.389 -                          &xlvbd_block_fops,
  21.390 -                          gd->part[minor&~(gd->max_p-1)].nr_sects);
  21.391 -        }
  21.392 -    }
  21.393 -    else
  21.394 -    {
  21.395 -        /*
  21.396 -         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
  21.397 -         * NB. The partition entries are only cleared if there are no VBDs
  21.398 -         * mapped to individual partitions on this unit.
  21.399 -         */
  21.400 -        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
  21.401 -        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  21.402 -            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
  21.403 -        while ( i >= 0 )
  21.404 -        {
  21.405 -            invalidate_device(device+i, 1);
  21.406 -            gd->part[minor+i].start_sect = 0;
  21.407 -            gd->part[minor+i].nr_sects   = 0;
  21.408 -            gd->sizes[minor+i]           = 0;
  21.409 -            i--;
  21.410 -        }
  21.411 -    }
  21.412 -
  21.413 - out:
  21.414 -    up(&bd->bd_sem);
  21.415 -    bdput(bd);
  21.416 -    return rc;
  21.417 -}
  21.418 -
  21.419 -/*
  21.420 - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
  21.421 - * state. The VBDs need to be updated in this way when the domain is
  21.422 - * initialised and also each time we receive an XLBLK_UPDATE event.
  21.423 - */
  21.424 -void xlvbd_update_vbds(void)
  21.425 -{
  21.426 -    int i, j, k, old_nr, new_nr;
  21.427 -    vdisk_t *old_info, *new_info, *merged_info;
  21.428 -
  21.429 -    old_info = vbd_info;
  21.430 -    old_nr   = nr_vbds;
  21.431 -
  21.432 -    new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
  21.433 -    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  21.434 -    {
  21.435 -        kfree(new_info);
  21.436 -        return;
  21.437 -    }
  21.438 -
  21.439 -    /*
  21.440 -     * Final list maximum size is old list + new list. This occurs only when
  21.441 -     * old list and new list do not overlap at all, and we cannot yet destroy
  21.442 -     * VBDs in the old list because the usage counts are busy.
  21.443 -     */
  21.444 -    merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
  21.445 -
  21.446 -    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  21.447 -    i = j = k = 0;
  21.448 -
  21.449 -    while ( (i < old_nr) && (j < new_nr) )
  21.450 -    {
  21.451 -        if ( old_info[i].device < new_info[j].device )
  21.452 -        {
  21.453 -            if ( xlvbd_remove_device(old_info[i].device) != 0 )
  21.454 -                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  21.455 -            i++;
  21.456 -        }
  21.457 -        else if ( old_info[i].device > new_info[j].device )
  21.458 -        {
  21.459 -            if ( xlvbd_init_device(&new_info[j]) == 0 )
  21.460 -                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  21.461 -            j++;
  21.462 -        }
  21.463 -        else
  21.464 -        {
  21.465 -            if ( ((old_info[i].capacity == new_info[j].capacity) &&
  21.466 -                  (old_info[i].info == new_info[j].info)) ||
  21.467 -                 (xlvbd_remove_device(old_info[i].device) != 0) )
  21.468 -                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  21.469 -            else if ( xlvbd_init_device(&new_info[j]) == 0 )
  21.470 -                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  21.471 -            i++; j++;
  21.472 -        }
  21.473 -    }
  21.474 -
  21.475 -    for ( ; i < old_nr; i++ )
  21.476 -    {
  21.477 -        if ( xlvbd_remove_device(old_info[i].device) != 0 )
  21.478 -            memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  21.479 -    }
  21.480 -
  21.481 -    for ( ; j < new_nr; j++ )
  21.482 -    {
  21.483 -        if ( xlvbd_init_device(&new_info[j]) == 0 )
  21.484 -            memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  21.485 -    }
  21.486 -
  21.487 -    vbd_info = merged_info;
  21.488 -    nr_vbds  = k;
  21.489 -
  21.490 -    kfree(old_info);
  21.491 -    kfree(new_info);
  21.492 -}
  21.493 -#endif
  21.494 -
  21.495 -/*
  21.496 - * Set up all the linux device goop for the virtual block devices
  21.497 - * (vbd's) that we know about. Note that although from the backend
  21.498 - * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
  21.499 - * number, the domain creation tools conventionally allocate these
  21.500 - * numbers to correspond to those used by 'real' linux -- this is just
  21.501 - * for convenience as it means e.g. that the same /etc/fstab can be
  21.502 - * used when booting with or without Xen.
  21.503 - */
  21.504 -int xlvbd_init(void)
  21.505 -{
  21.506 -	int i;
  21.507 -
  21.508 -	/*
  21.509 -	 * If compiled as a module, we don't support unloading yet. We
  21.510 -	 * therefore permanently increment the reference count to
  21.511 -	 * disallow it.
  21.512 -	 */
  21.513 -	MOD_INC_USE_COUNT;
  21.514 -
  21.515 -	memset(major_info, 0, sizeof(major_info));
  21.516 -
  21.517 -	for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) {
  21.518 -	}
  21.519 -
  21.520 -	vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
  21.521 -	nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  21.522 -
  21.523 -	if (nr_vbds < 0) {
  21.524 -		kfree(vbd_info);
  21.525 -		vbd_info = NULL;
  21.526 -		nr_vbds  = 0;
  21.527 -	} else {
  21.528 -		for (i = 0; i < nr_vbds; i++)
  21.529 -			xlvbd_init_device(&vbd_info[i]);
  21.530 -	}
  21.531 -
  21.532 -	return 0;
  21.533 -}
    22.1 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/xor.h	Sat Jul 31 22:24:26 2004 +0000
    22.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/xor.h	Sun Aug 01 15:14:41 2004 +0000
    22.3 @@ -535,7 +535,7 @@ static struct xor_block_template xor_blo
    22.4  
    22.5  #define XMMS_SAVE do {				\
    22.6  	preempt_disable();			\
    22.7 -	if (!(current->flags & PF_USEDFPU))	\
    22.8 +	if (!(current_thread_info()->status & TS_USEDFPU))	\
    22.9  		clts();				\
   22.10  	__asm__ __volatile__ ( 			\
   22.11  		"movups %%xmm0,(%1)	;\n\t"	\
   22.12 @@ -557,7 +557,7 @@ static struct xor_block_template xor_blo
   22.13  		:				\
   22.14  		: "r" (cr0), "r" (xmm_save)	\
   22.15  		: "memory");			\
   22.16 -	if (!(current->flags & PF_USEDFPU))	\
   22.17 +	if (!(current_thread_info()->status & TS_USEDFPU))	\
   22.18  		stts();				\
   22.19  	preempt_enable();			\
   22.20  } while(0)