ia64/xen-unstable

changeset 1324:8f158bd218a6

bitkeeper revision 1.879.1.1 (408f7ae5PHe1i2motf-Iulpr3dEVhQ)

Further modifications towards new block-device drivers for new I/O
model.
author kaf24@scramble.cl.cam.ac.uk
date Wed Apr 28 09:35:33 2004 +0000 (2004-04-28)
parents 112eec3395e6
children 47b57e3d20f1
files .rootkeys tools/xend/lib/domain_controller.h tools/xend/lib/main.py tools/xend/lib/utils.c tools/xend/setup.py xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h xenolinux-2.4.26-sparse/mm/vmalloc.c
line diff
     1.1 --- a/.rootkeys	Tue Apr 27 13:13:06 2004 +0000
     1.2 +++ b/.rootkeys	Wed Apr 28 09:35:33 2004 +0000
     1.3 @@ -665,8 +665,8 @@ 4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux
     1.4  4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
     1.5  40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
     1.6  4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile
     1.7 -4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c
     1.8 -4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h
     1.9 +4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h
    1.10 +4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
    1.11  4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
    1.12  3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.26-sparse/arch/xen/drivers/block/Makefile
    1.13  3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c
     2.1 --- a/tools/xend/lib/domain_controller.h	Tue Apr 27 13:13:06 2004 +0000
     2.2 +++ b/tools/xend/lib/domain_controller.h	Wed Apr 28 09:35:33 2004 +0000
     2.3 @@ -56,14 +56,90 @@ typedef struct {
     2.4  #define CMSG_BLKIF_BE           1  /* Block-device backend  */
     2.5  #define CMSG_BLKIF_FE           2  /* Block-device frontend */
     2.6  
     2.7 +
     2.8 +/******************************************************************************
     2.9 + * CONSOLE DEFINITIONS
    2.10 + */
    2.11 +
    2.12  /*
    2.13   * Subtypes for console messages.
    2.14   */
    2.15  #define CMSG_CONSOLE_DATA       0
    2.16  
    2.17 +
    2.18 +/******************************************************************************
    2.19 + * BLOCK-INTERFACE FRONTEND DEFINITIONS
    2.20 + */
    2.21 +
    2.22 +/* Messages from domain controller to guest. */
    2.23 +#define CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED   0
    2.24 +
    2.25 +/* Messages from guest to domain controller. */
    2.26 +#define CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED     32
    2.27 +#define CMSG_BLKIF_FE_INTERFACE_UP              33
    2.28 +#define CMSG_BLKIF_FE_INTERFACE_DOWN            34
    2.29 +
    2.30 +/* These are used by both front-end and back-end drivers. */
    2.31 +#define blkif_vdev_t   u16
    2.32 +#define blkif_pdev_t   u16
    2.33 +#define blkif_sector_t u64
    2.34 +
    2.35  /*
    2.36 - * Subtypes for block-device messages.
    2.37 + * CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
    2.38 + *  Notify a guest about a status change on one of its block interfaces.
    2.39 + *  If the interface is DESTROYED or DOWN then the interface is disconnected:
    2.40 + *   1. The shared-memory frame is available for reuse.
    2.41 + *   2. Any unacknowledged messgaes pending on the interface were dropped.
    2.42 + */
    2.43 +#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist.      */
    2.44 +#define BLKIF_INTERFACE_STATUS_DOWN      1 /* Interface exists but is down. */
    2.45 +#define BLKIF_INTERFACE_STATUS_UP        2 /* Interface exists and is up.   */
    2.46 +typedef struct {
    2.47 +    unsigned int handle;
    2.48 +    unsigned int status;
    2.49 +    unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_UP */
    2.50 +} blkif_fe_interface_status_changed_t;
    2.51 +
    2.52 +/*
    2.53 + * CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED:
    2.54 + *  Notify the domain controller that the front-end driver is DOWN or UP.
    2.55 + *  When the driver goes DOWN then the controller will send no more
    2.56 + *  status-change notifications. When the driver comes UP then the controller
    2.57 + *  will send a notification for each interface that currently exists.
    2.58 + *  If the driver goes DOWN while interfaces are still UP, the domain
    2.59 + *  will automatically take the interfaces DOWN.
    2.60   */
    2.61 +#define BLKIF_DRIVER_STATUS_DOWN         0
    2.62 +#define BLKIF_DRIVER_STATUS_UP           1
    2.63 +typedef struct {
    2.64 +    unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
    2.65 +} blkif_fe_driver_status_changed_t;
    2.66 +
    2.67 +/*
    2.68 + * CMSG_BLKIF_FE_INTERFACE_UP:
    2.69 + *  If successful, the domain controller will acknowledge with a STATUS_UP
    2.70 + *  message.
    2.71 + */
    2.72 +typedef struct {
    2.73 +    unsigned int  handle;
    2.74 +    unsigned long shmem_frame;
    2.75 +} blkif_fe_interface_up_t;
    2.76 +
    2.77 +/*
    2.78 + * CMSG_BLKIF_FE_INTERFACE_DOWN:
    2.79 + *  If successful, the domain controller will acknowledge with a STATUS_DOWN
    2.80 + *  message.
    2.81 + */
    2.82 +typedef struct {
    2.83 +    unsigned int handle;
    2.84 +} blkif_fe_interface_down_t;
    2.85 +
    2.86 +
    2.87 +/******************************************************************************
    2.88 + * BLOCK-INTERFACE BACKEND DEFINITIONS
    2.89 + */
    2.90 +
    2.91 +/* Messages from domain controller. */
    2.92  #define CMSG_BLKIF_BE_CREATE      0  /* Create a new block-device interface. */
    2.93  #define CMSG_BLKIF_BE_DESTROY     1  /* Destroy a block-device interface.    */
    2.94  #define CMSG_BLKIF_BE_VBD_CREATE  2  /* Create a new VBD for an interface.   */
    2.95 @@ -71,13 +147,12 @@ typedef struct {
    2.96  #define CMSG_BLKIF_BE_VBD_GROW    4  /* Append an extent to a given VBD.     */
    2.97  #define CMSG_BLKIF_BE_VBD_SHRINK  5  /* Remove last extent from a given VBD. */
    2.98  
    2.99 -/*
   2.100 - * Message request/response defintions for block-device messages.
   2.101 - */
   2.102 +/* Messages to domain controller. */
   2.103 +#define CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED 32
   2.104  
   2.105 -#define blkif_vdev_t   u16
   2.106 -#define blkif_pdev_t   u16
   2.107 -#define blkif_sector_t u64
   2.108 +/*
   2.109 + * Message request/response definitions for block-device messages.
   2.110 + */
   2.111  
   2.112  typedef struct {
   2.113      blkif_pdev_t   device;
   2.114 @@ -86,21 +161,36 @@ typedef struct {
   2.115  } blkif_extent_t;
   2.116  
   2.117  /* Non-specific 'okay' return. */
   2.118 -#define BLKIF_STATUS_OKAY                0
   2.119 +#define BLKIF_BE_STATUS_OKAY                0
   2.120  /* Non-specific 'error' return. */
   2.121 -#define BLKIF_STATUS_ERROR               1
   2.122 +#define BLKIF_BE_STATUS_ERROR               1
   2.123  /* The following are specific error returns. */
   2.124 -#define BLKIF_STATUS_INTERFACE_EXISTS    2
   2.125 -#define BLKIF_STATUS_INTERFACE_NOT_FOUND 3
   2.126 +#define BLKIF_BE_STATUS_INTERFACE_EXISTS    2
   2.127 +#define BLKIF_BE_STATUS_INTERFACE_NOT_FOUND 3
   2.128 +#define BLKIF_BE_STATUS_VBD_EXISTS          4
   2.129 +#define BLKIF_BE_STATUS_VBD_NOT_FOUND       5
   2.130 +#define BLKIF_BE_STATUS_OUT_OF_MEMORY       6
   2.131 +#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND    7
   2.132 +#define BLKIF_BE_STATUS_MAPPING_ERROR       8
   2.133  
   2.134  /* This macro can be used to create an array of descriptive error strings. */
   2.135 -#define BLKIF_STATUS_ERRORS {    \
   2.136 -    "Okay",                      \
   2.137 -    "Non-specific error",        \
   2.138 -    "Interface already exists",  \
   2.139 -    "Interface not found" }
   2.140 +#define BLKIF_BE_STATUS_ERRORS {   \
   2.141 +    "Okay",                        \
   2.142 +    "Non-specific error",          \
   2.143 +    "Interface already exists",    \
   2.144 +    "Interface not found",         \
   2.145 +    "VBD already exists",          \
   2.146 +    "VBD not found",               \
   2.147 +    "Out of memory",               \
   2.148 +    "Extent not found for VBD",    \
   2.149 +    "Could not map domain memory" }
   2.150  
   2.151 -/* CMSG_BLKIF_CREATE */
   2.152 +/*
   2.153 + * CMSG_BLKIF_BE_CREATE:
   2.154 + *  When the driver sends a successful response then the interface is fully
   2.155 + *  set up. The controller will send an UP notification to the front-end
   2.156 + *  driver.
   2.157 + */
   2.158  typedef struct { 
   2.159      /* IN */
   2.160      domid_t        domid;             /* Domain attached to new interface.   */
   2.161 @@ -109,18 +199,23 @@ typedef struct {
   2.162      unsigned long  shmem_frame;       /* Page cont. shared comms window.     */
   2.163      /* OUT */
   2.164      unsigned int   status;
   2.165 -} blkif_create_t; 
   2.166 +} blkif_be_create_t; 
   2.167  
   2.168 -/* CMSG_BLKIF_DESTROY */
   2.169 +/*
   2.170 + * CMSG_BLKIF_BE_DESTROY:
   2.171 + *  When the driver sends a successful response then the interface is fully
   2.172 + *  torn down. The controller will send a DOWN notification to the front-end
   2.173 + *  driver.
   2.174 + */
   2.175  typedef struct { 
   2.176      /* IN */
   2.177      domid_t        domid;             /* Identify interface to be destroyed. */
   2.178      unsigned int   blkif_handle;      /* ...ditto...                         */
   2.179      /* OUT */
   2.180      unsigned int   status;
   2.181 -} blkif_destroy_t; 
   2.182 +} blkif_be_destroy_t; 
   2.183  
   2.184 -/* CMSG_BLKIF_VBD_CREATE */
   2.185 +/* CMSG_BLKIF_BE_VBD_CREATE */
   2.186  typedef struct { 
   2.187      /* IN */
   2.188      domid_t        domid;             /* Identify blkdev interface.          */
   2.189 @@ -129,9 +224,9 @@ typedef struct {
   2.190      int            readonly;          /* Non-zero -> VBD isn't writeable.    */
   2.191      /* OUT */
   2.192      unsigned int   status;
   2.193 -} blkif_vbd_create_t; 
   2.194 +} blkif_be_vbd_create_t; 
   2.195  
   2.196 -/* CMSG_BLKIF_VBD_DESTROY */
   2.197 +/* CMSG_BLKIF_BE_VBD_DESTROY */
   2.198  typedef struct {
   2.199      /* IN */
   2.200      domid_t        domid;             /* Identify blkdev interface.          */
   2.201 @@ -139,9 +234,9 @@ typedef struct {
   2.202      blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
   2.203      /* OUT */
   2.204      unsigned int   status;
   2.205 -} blkif_vbd_destroy_t; 
   2.206 +} blkif_be_vbd_destroy_t; 
   2.207  
   2.208 -/* CMSG_BLKIF_VBD_GROW */
   2.209 +/* CMSG_BLKIF_BE_VBD_GROW */
   2.210  typedef struct { 
   2.211      /* IN */
   2.212      domid_t        domid;             /* Identify blkdev interface.          */
   2.213 @@ -150,9 +245,9 @@ typedef struct {
   2.214      blkif_extent_t extent;            /* Physical extent to append to VBD.   */
   2.215      /* OUT */
   2.216      unsigned int   status;
   2.217 -} blkif_vbd_grow_t; 
   2.218 +} blkif_be_vbd_grow_t; 
   2.219  
   2.220 -/* CMSG_BLKIF_VBD_SHRINK */
   2.221 +/* CMSG_BLKIF_BE_VBD_SHRINK */
   2.222  typedef struct { 
   2.223      /* IN */
   2.224      domid_t        domid;             /* Identify blkdev interface.          */
   2.225 @@ -160,6 +255,16 @@ typedef struct {
   2.226      blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
   2.227      /* OUT */
   2.228      unsigned int   status;
   2.229 -} blkif_vbd_shrink_t; 
   2.230 +} blkif_be_vbd_shrink_t; 
   2.231 +
   2.232 +/*
   2.233 + * CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED:
   2.234 + *  Notify the domain controller that the back-end driver is DOWN or UP.
   2.235 + *  If the driver goes DOWN while interfaces are still UP, the domain
   2.236 + *  will automatically send DOWN notifications.
   2.237 + */
   2.238 +typedef struct {
   2.239 +    unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
   2.240 +} blkif_be_driver_status_changed;
   2.241  
   2.242  #endif /* __DOMAIN_CONTROLLER_H__ */
     3.1 --- a/tools/xend/lib/main.py	Tue Apr 27 13:13:06 2004 +0000
     3.2 +++ b/tools/xend/lib/main.py	Wed Apr 28 09:35:33 2004 +0000
     3.3 @@ -44,6 +44,14 @@ def daemon_loop():
     3.4      # notifications.
     3.5      notifier = xend.utils.notifier()
     3.6  
     3.7 +    # The DOM0 control interface is not set up via the management interface.
     3.8 +    # Note that console messages don't come our way (actually, only driver
     3.9 +    # back-ends should use the DOM0 control interface). We therefore don't
    3.10 +    # need to set up console structures.
    3.11 +    xend.utils.port(0)
    3.12 +    xend.main.notifier.bind(port.local_port)
    3.13 +    xend.main.control_list[port.local_port] = (port, 0, 0, 0)
    3.14 +
    3.15      ##
    3.16      ## MAIN LOOP
    3.17      ## 
     4.1 --- a/tools/xend/lib/utils.c	Tue Apr 27 13:13:06 2004 +0000
     4.2 +++ b/tools/xend/lib/utils.c	Wed Apr 28 09:35:33 2004 +0000
     4.3 @@ -22,6 +22,8 @@
     4.4  #include <signal.h>
     4.5  #include <xc.h>
     4.6  
     4.7 +#include <asm-xen/proc_cmd.h>
     4.8 +
     4.9  #include <hypervisor-if.h>
    4.10  #include "domain_controller.h"
    4.11  
    4.12 @@ -684,8 +686,23 @@ static PyObject *xu_port_new(PyObject *s
    4.13          goto fail2;
    4.14      }
    4.15  
    4.16 -    if ( xc_evtchn_bind_interdomain(xup->xc_handle, 
    4.17 -                                    DOMID_SELF, dom, &port1, &port2) != 0 )
    4.18 +    if ( dom == 0ULL )
    4.19 +    {
    4.20 +        /*
    4.21 +         * The control-interface event channel for DOM0 is already set up.
    4.22 +         * We use an ioctl to discover the port at our end of the channel.
    4.23 +         */
    4.24 +        port1 = ioctl(xup->xc_handle, IOCTL_PRIVCMD_INITDOMAIN_EVTCHN, NULL);
    4.25 +        port2 = -1; /* We don't need the remote end of the DOM0 link. */
    4.26 +        if ( port1 < 0 )
    4.27 +        {
    4.28 +            PyErr_SetString(port_error, "Could not open channel to DOM0");
    4.29 +            goto fail3;
    4.30 +        }
    4.31 +    }
    4.32 +    else if ( xc_evtchn_bind_interdomain(xup->xc_handle, 
    4.33 +                                         DOMID_SELF, dom, 
    4.34 +                                         &port1, &port2) != 0 )
    4.35      {
    4.36          PyErr_SetString(port_error, "Could not open channel to domain");
    4.37          goto fail3;
    4.38 @@ -744,7 +761,8 @@ static void xu_port_dealloc(PyObject *se
    4.39  {
    4.40      xu_port_object *xup = (xu_port_object *)self;
    4.41      unmap_control_interface(xup->mem_fd, xup->interface);
    4.42 -    (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port);
    4.43 +    if ( xup->remote_dom != 0ULL )
    4.44 +        (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port);
    4.45      (void)xc_interface_close(xup->xc_handle);
    4.46      (void)close(xup->mem_fd);
    4.47      PyObject_Del(self);
     5.1 --- a/tools/xend/setup.py	Tue Apr 27 13:13:06 2004 +0000
     5.2 +++ b/tools/xend/setup.py	Wed Apr 28 09:35:33 2004 +0000
     5.3 @@ -4,7 +4,8 @@ from distutils.core import setup, Extens
     5.4  utils = Extension("utils",
     5.5                    extra_compile_args   = ["-fno-strict-aliasing"],
     5.6                    include_dirs         = ["../xc/lib",
     5.7 -                                          "../../xen/include/hypervisor-ifs"],
     5.8 +                                          "../../xen/include/hypervisor-ifs",
     5.9 +                                          "../../xenolinux-sparse/include"],
    5.10                    library_dirs         = ["../xc/lib"],
    5.11                    libraries            = ["xc"],
    5.12                    sources              = ["lib/utils.c"])
     6.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h	Tue Apr 27 13:13:06 2004 +0000
     6.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h	Wed Apr 28 09:35:33 2004 +0000
     6.3 @@ -34,7 +34,7 @@ typedef struct blkif_st {
     6.4      unsigned int     evtchn;
     6.5      int              irq;
     6.6      /* Comms information. */
     6.7 -    blk_ring_t      *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
     6.8 +    blkif_ring_t    *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
     6.9      BLK_RING_IDX     blk_req_cons;  /* Request consumer. */
    6.10      BLK_RING_IDX     blk_resp_prod; /* Private version of response producer. */
    6.11      /* VBDs attached to this interface. */
    6.12 @@ -44,13 +44,19 @@ typedef struct blkif_st {
    6.13      struct blkif_st *hash_next;
    6.14      struct list_head blkdev_list;
    6.15      spinlock_t       blk_ring_lock;
    6.16 +    atomic_t         refcnt;
    6.17  } blkif_t;
    6.18  
    6.19 -void blkif_create(blkif_create_t *create);
    6.20 -void blkif_destroy(blkif_destroy_t *destroy);
    6.21 +void blkif_create(blkif_be_create_t *create);
    6.22 +void blkif_destroy(blkif_be_destroy_t *destroy);
    6.23 +void __blkif_destroy(blkif_t *blkif);
    6.24  blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
    6.25 -void blkif_get(blkif_t *blkif);
    6.26 -void blkif_put(blkif_t *blkif);
    6.27 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
    6.28 +#define blkif_put(_b)                             \
    6.29 +    do {                                          \
    6.30 +        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
    6.31 +            __blkif_destroy(_b);                  \
    6.32 +    } while (0)
    6.33  
    6.34  /* An entry in a list of xen_extents. */
    6.35  typedef struct _blkif_extent_le { 
    6.36 @@ -60,25 +66,25 @@ typedef struct _blkif_extent_le {
    6.37  
    6.38  typedef struct _vbd { 
    6.39      blkif_vdev_t       vdevice;   /* what the domain refers to this vbd as */
    6.40 -    unsigned char      mode;      /* VBD_MODE_{R,W} */
    6.41 +    unsigned char      readonly;  /* Non-zero -> read-only */
    6.42      unsigned char      type;      /* XD_TYPE_xxx */
    6.43      blkif_extent_le_t *extents;   /* list of xen_extents making up this vbd */
    6.44      rb_node_t          rb;        /* for linking into R-B tree lookup struct */
    6.45  } vbd_t; 
    6.46  
    6.47 -long vbd_create(blkif_vbd_create_t *create_params); 
    6.48 -long vbd_grow(blkif_vbd_grow_t *grow_params); 
    6.49 -long vbd_shrink(blkif_vbd_shrink_t *shrink_params);
    6.50 -long vbd_destroy(blkif_vbd_destroy_t *delete_params); 
    6.51 -
    6.52 -void destroy_all_vbds(struct task_struct *p);
    6.53 +void vbd_create(blkif_be_vbd_create_t *create); 
    6.54 +void vbd_grow(blkif_be_vbd_grow_t *grow); 
    6.55 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink);
    6.56 +void vbd_destroy(blkif_be_vbd_destroy_t *delete); 
    6.57 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
    6.58 +void destroy_all_vbds(blkif_t *blkif);
    6.59  
    6.60  typedef struct {
    6.61      blkif_t       *blkif;
    6.62      unsigned long  id;
    6.63      atomic_t       pendcnt;
    6.64      unsigned short operation;
    6.65 -    unsigned short status;
    6.66 +    int            status;
    6.67  } pending_req_t;
    6.68  
    6.69  /* Describes a [partial] disk extent (part of a block io request) */
    6.70 @@ -91,7 +97,10 @@ typedef struct {
    6.71  
    6.72  int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 
    6.73  
    6.74 -int blkif_be_controller_init(void);
    6.75 +void blkif_interface_init(void);
    6.76 +void blkif_ctrlif_init(void);
    6.77 +
    6.78 +void blkif_deschedule(blkif_t *blkif);
    6.79  
    6.80  void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
    6.81  
     7.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c	Tue Apr 27 13:13:06 2004 +0000
     7.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c	Wed Apr 28 09:35:33 2004 +0000
     7.3 @@ -13,34 +13,34 @@ static void blkif_ctrlif_rx(ctrl_msg_t *
     7.4      switch ( msg->subtype )
     7.5      {
     7.6      case CMSG_BLKIF_BE_CREATE:
     7.7 -        if ( msg->length != sizeof(blkif_create_t) )
     7.8 +        if ( msg->length != sizeof(blkif_be_create_t) )
     7.9              goto parse_error;
    7.10 -        blkif_create((blkif_create_t *)&msg->msg[0]);
    7.11 +        blkif_create((blkif_be_create_t *)&msg->msg[0]);
    7.12          break;        
    7.13      case CMSG_BLKIF_BE_DESTROY:
    7.14 -        if ( msg->length != sizeof(blkif_destroy_t) )
    7.15 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
    7.16              goto parse_error;
    7.17 -        blkif_destroy((blkif_destroy_t *)&msg->msg[0]);
    7.18 +        blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
    7.19          break;        
    7.20      case CMSG_BLKIF_BE_VBD_CREATE:
    7.21 -        if ( msg->length != sizeof(blkif_vbd_create_t) )
    7.22 +        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
    7.23              goto parse_error;
    7.24 -        vbd_create((blkif_vbd_create_t *)&msg->msg[0]);
    7.25 +        vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]);
    7.26          break;
    7.27      case CMSG_BLKIF_BE_VBD_DESTROY:
    7.28 -        if ( msg->length != sizeof(blkif_vbd_destroy_t) )
    7.29 +        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
    7.30              goto parse_error;
    7.31 -        vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]);
    7.32 +        vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]);
    7.33          break;
    7.34      case CMSG_BLKIF_BE_VBD_GROW:
    7.35 -        if ( msg->length != sizeof(blkif_vbd_grow_t) )
    7.36 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
    7.37              goto parse_error;
    7.38 -        vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]);
    7.39 +        vbd_grow((blkif_be_vbd_grow_t *)&msg->msg[0]);
    7.40          break;
    7.41      case CMSG_BLKIF_BE_VBD_SHRINK:
    7.42 -        if ( msg->length != sizeof(blkif_vbd_shrink_t) )
    7.43 +        if ( msg->length != sizeof(blkif_be_vbd_shrink_t) )
    7.44              goto parse_error;
    7.45 -        vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]);
    7.46 +        vbd_shrink((blkif_be_vbd_shrink_t *)&msg->msg[0]);
    7.47          break;
    7.48      default:
    7.49          goto parse_error;
    7.50 @@ -54,8 +54,7 @@ static void blkif_ctrlif_rx(ctrl_msg_t *
    7.51      ctrl_if_send_response(msg);
    7.52  }
    7.53  
    7.54 -int blkif_ctrlif_init(void)
    7.55 +void blkif_ctrlif_init(void)
    7.56  {
    7.57      (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
    7.58 -    return 0;
    7.59  }
     8.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c	Tue Apr 27 13:13:06 2004 +0000
     8.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c	Wed Apr 28 09:35:33 2004 +0000
     8.3 @@ -12,85 +12,152 @@
     8.4  #define BLKIF_HASH(_d,_h) \
     8.5      (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1))
     8.6  
     8.7 -static blkif_t *blkif_hash[BLKIF_HASHSZ];
     8.8 +static kmem_cache_t *blkif_cachep;
     8.9 +static blkif_t      *blkif_hash[BLKIF_HASHSZ];
    8.10 +static spinlock_t    blkif_hash_lock;
    8.11  
    8.12  blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
    8.13  {
    8.14 -    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
    8.15 -    while ( (blkif != NULL) && 
    8.16 -            (blkif->domid != domid) && 
    8.17 -            (blkif->handle != handle) )
    8.18 +    blkif_t      *blkif;
    8.19 +    unsigned long flags;
    8.20 +    
    8.21 +    spin_lock_irqsave(&blkif_hash_lock, flags);
    8.22 +    blkif = blkif_hash[BLKIF_HASH(domid, handle)];
    8.23 +    while ( blkif != NULL )
    8.24 +    {
    8.25 +        if ( (blkif->domid == domid) && (blkif->handle == handle) )
    8.26 +        {
    8.27 +            blkif_get(blkif);
    8.28 +            break;
    8.29 +        }
    8.30          blkif = blkif->hash_next;
    8.31 +    }
    8.32 +    spin_unlock_irqrestore(&blkif_hash_lock, flags);
    8.33 +
    8.34      return blkif;
    8.35  }
    8.36  
    8.37 -void blkif_create(blkif_create_t *create)
    8.38 +void __blkif_destroy(blkif_t *blkif)
    8.39 +{
    8.40 +    free_irq(blkif->irq, NULL);
    8.41 +    unbind_evtchn_from_irq(blkif->evtchn);
    8.42 +    vfree(blkif->blk_ring_base);
    8.43 +    destroy_all_vbds(blkif);
    8.44 +    kmem_cache_free(blkif_cachep, blkif);    
    8.45 +}
    8.46 +
    8.47 +void blkif_create(blkif_be_create_t *create)
    8.48  {
    8.49      domid_t       domid  = create->domid;
    8.50      unsigned int  handle = create->blkif_handle;
    8.51      unsigned int  evtchn = create->evtchn;
    8.52      unsigned long shmem_frame = create->shmem_frame;
    8.53 +    unsigned long flags;
    8.54      blkif_t     **pblkif, *blkif;
    8.55 +    struct vm_struct *vma;
    8.56 +    pgprot_t      prot;
    8.57 +    int           error;
    8.58 +
    8.59 +    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
    8.60 +    {
    8.61 +        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
    8.62 +        return;
    8.63 +    }
    8.64 +
    8.65 +    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
    8.66 +    {
    8.67 +        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
    8.68 +        goto fail1;
    8.69 +    }
    8.70 +
    8.71 +    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
    8.72 +    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
    8.73 +                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
    8.74 +                                    prot, domid);
    8.75 +    if ( error != 0 )
    8.76 +    {
    8.77 +        if ( error == -ENOMEM )
    8.78 +            create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
    8.79 +        else if ( error == -EFAULT )
    8.80 +            create->status = BLKIF_BE_STATUS_MAPPING_ERROR;
    8.81 +        else
    8.82 +            create->status = BLKIF_BE_STATUS_ERROR;
    8.83 +        goto fail2;
    8.84 +    }
    8.85 +
    8.86 +    memset(blkif, 0, sizeof(*blkif));
    8.87 +    blkif->domid         = domid;
    8.88 +    blkif->handle        = handle;
    8.89 +    blkif->evtchn        = evtchn;
    8.90 +    blkif->irq           = bind_evtchn_to_irq(evtchn);
    8.91 +    blkif->shmem_frame   = shmem_frame;
    8.92 +    blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
    8.93 +    spin_lock_init(&blkif->vbd_lock);
    8.94 +    spin_lock_init(&blkif->blk_ring_lock);
    8.95 +
    8.96 +    spin_lock_irqsave(&blkif_hash_lock, flags);
    8.97  
    8.98      pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
    8.99      while ( *pblkif == NULL )
   8.100      {
   8.101          if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   8.102 -            goto found_match;
   8.103 +        {
   8.104 +            spin_unlock_irqrestore(&blkif_hash_lock, flags);
   8.105 +            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
   8.106 +            goto fail3;
   8.107 +        }
   8.108          pblkif = &(*pblkif)->hash_next;
   8.109      }
   8.110  
   8.111 -    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
   8.112 -    memset(blkif, 0, sizeof(*blkif));
   8.113 -    blkif->domid       = domid;
   8.114 -    blkif->handle      = handle;
   8.115 -    blkif->evtchn      = evtchn;
   8.116 -    blkif->irq         = bind_evtchn_to_irq(evtchn);
   8.117 -    blkif->shmem_frame = shmem_frame;
   8.118 -    blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE);
   8.119 -    spin_lock_init(&blkif->vbd_lock);
   8.120 -    spin_lock_init(&blkif->blk_ring_lock);
   8.121 -
   8.122 -    request_irq(irq, blkif_be_int, 0, "blkif-backend", blkif);
   8.123 -
   8.124 +    atomic_set(&blkif->refcnt, 1);
   8.125      blkif->hash_next = *pblkif;
   8.126      *pblkif = blkif;
   8.127  
   8.128 -    create->status = BLKIF_STATUS_OKAY;
   8.129 -    return;
   8.130 +    spin_unlock_irqrestore(&blkif_hash_lock, flags);
   8.131  
   8.132 - found_match:
   8.133 -    create->status = BLKIF_STATUS_INTERFACE_EXISTS;
   8.134 +    request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
   8.135 +
   8.136 +    create->status = BLKIF_BE_STATUS_OKAY;
   8.137      return;
   8.138  
   8.139 - evtchn_in_use:
   8.140 -    unbind_evtchn_from_irq(evtchn); /* drop refcnt */
   8.141 -    create->status = BLKIF_STATUS_ERROR;
   8.142 -    return;
   8.143 + fail3: unbind_evtchn_from_irq(evtchn);
   8.144 + fail2: kmem_cache_free(blkif_cachep, blkif);
   8.145 + fail1: vfree(vma->addr);
   8.146  }
   8.147  
   8.148 -void blkif_destroy(blkif_destroy_t *destroy)
   8.149 +void blkif_destroy(blkif_be_destroy_t *destroy)
   8.150  {
   8.151      domid_t       domid  = destroy->domid;
   8.152      unsigned int  handle = destroy->blkif_handle;
   8.153 +    unsigned long flags;
   8.154      blkif_t     **pblkif, *blkif;
   8.155  
   8.156 +    spin_lock_irqsave(&blkif_hash_lock, flags);
   8.157 +
   8.158      pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   8.159      while ( (blkif = *pblkif) == NULL )
   8.160      {
   8.161          if ( (blkif->domid == domid) && (blkif->handle == handle) )
   8.162 -            goto found_match;
   8.163 +        {
   8.164 +            *pblkif = blkif->hash_next;
   8.165 +            spin_unlock_irqrestore(&blkif_hash_lock, flags);
   8.166 +            blkif_deschedule(blkif);
   8.167 +            blkif_put(blkif);
   8.168 +            destroy->status = BLKIF_BE_STATUS_OKAY;
   8.169 +            return;
   8.170 +        }
   8.171          pblkif = &blkif->hash_next;
   8.172      }
   8.173  
   8.174 -    destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   8.175 -    return;
   8.176 +    spin_unlock_irqrestore(&blkif_hash_lock, flags);
   8.177  
   8.178 - found_match:
   8.179 -    free_irq(blkif->irq, NULL);
   8.180 -    unbind_evtchn_from_irq(blkif->evtchn);
   8.181 -    *pblkif = blkif->hash_next;
   8.182 -    kmem_cache_free(blkif_cachep, blkif);
   8.183 -    destroy->status = BLKIF_STATUS_OKAY;
   8.184 +    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   8.185  }
   8.186  
   8.187 +void __init blkif_interface_init(void)
   8.188 +{
   8.189 +    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
   8.190 +                                     0, 0, NULL, NULL);
   8.191 +    memset(blkif_hash, 0, sizeof(blkif_hash));
   8.192 +    spin_lock_init(&blkif_hash_lock);
   8.193 +}
     9.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c	Tue Apr 27 13:13:06 2004 +0000
     9.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c	Wed Apr 28 09:35:33 2004 +0000
     9.3 @@ -24,6 +24,18 @@
     9.4  #define MAX_PENDING_REQS 64
     9.5  #define BATCH_PER_DOMAIN 16
     9.6  
     9.7 +static struct vm_struct *mmap_vma;
     9.8 +#define MMAP_PAGES_PER_SEGMENT \
     9.9 +    ((BLKIF_MAX_SEGMENTS_PER_REQUEST >> (PAGE_SHIFT-9)) + 1)
    9.10 +#define MMAP_PAGES_PER_REQUEST \
    9.11 +    (2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * MMAP_PAGES_PER_SEGMENT)
    9.12 +#define MMAP_PAGES             \
    9.13 +    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
    9.14 +#define MMAP_VADDR(_req,_seg)            \
    9.15 +    ((unsigned long)mmap_vma->addr +     \
    9.16 +     ((_req) * MMAP_PAGES_PER_REQUEST) + \
    9.17 +     ((_seg) * MMAP_PAGES_PER_SEGMENT))
    9.18 +
    9.19  /*
    9.20   * Each outstanding request that we've passed to the lower device layers has a 
    9.21   * 'pending_req' allocated to it. Each buffer_head that completes decrements 
    9.22 @@ -46,22 +58,11 @@ static PEND_RING_IDX pending_prod, pendi
    9.23  
    9.24  static kmem_cache_t *buffer_head_cachep;
    9.25  
    9.26 -static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
    9.27 -
    9.28 -static int lock_buffer(blkif_t *blkif,
    9.29 -                       unsigned long buffer,
    9.30 -                       unsigned short size,
    9.31 -                       int writeable_buffer);
    9.32 -static void unlock_buffer(unsigned long buffer,
    9.33 -                          unsigned short size,
    9.34 -                          int writeable_buffer);
    9.35 -
    9.36 -static void io_schedule(unsigned long unused);
    9.37  static int do_block_io_op(blkif_t *blkif, int max_to_do);
    9.38 -static void dispatch_rw_block_io(blkif_t *blkif,
    9.39 -                                 blk_ring_req_entry_t *req);
    9.40 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
    9.41 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
    9.42  static void make_response(blkif_t *blkif, unsigned long id, 
    9.43 -                          unsigned short op, unsigned long st);
    9.44 +                          unsigned short op, int st);
    9.45  
    9.46  
    9.47  /******************************************************************
    9.48 @@ -108,8 +109,6 @@ static void add_to_blkdev_list_tail(blki
    9.49   * SCHEDULER FUNCTIONS
    9.50   */
    9.51  
    9.52 -static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
    9.53 -
    9.54  static void io_schedule(unsigned long unused)
    9.55  {
    9.56      blkif_t          *blkif;
    9.57 @@ -132,6 +131,8 @@ static void io_schedule(unsigned long un
    9.58      run_task_queue(&tq_disk);
    9.59  }
    9.60  
    9.61 +static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
    9.62 +
    9.63  static void maybe_trigger_io_schedule(void)
    9.64  {
    9.65      /*
    9.66 @@ -155,28 +156,25 @@ static void maybe_trigger_io_schedule(vo
    9.67  static void end_block_io_op(struct buffer_head *bh, int uptodate)
    9.68  {
    9.69      pending_req_t *pending_req = bh->b_private;
    9.70 +    unsigned long  flags;
    9.71  
    9.72      /* An error fails the entire request. */
    9.73      if ( !uptodate )
    9.74      {
    9.75          DPRINTK("Buffer not up-to-date at end of operation\n");
    9.76 -        pending_req->status = 2;
    9.77 +        pending_req->status = BLKIF_RSP_ERROR;
    9.78      }
    9.79  
    9.80 -    unlock_buffer(virt_to_phys(bh->b_data), 
    9.81 -                  bh->b_size, 
    9.82 -                  (pending_req->operation==READ));
    9.83 -    
    9.84      if ( atomic_dec_and_test(&pending_req->pendcnt) )
    9.85      {
    9.86 +        int pending_idx = pending_req - pending_reqs;
    9.87 +        vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST);
    9.88          make_response(pending_req->blkif, pending_req->id,
    9.89                        pending_req->operation, pending_req->status);
    9.90          blkif_put(pending_req->blkif);
    9.91 -        spin_lock(&pend_prod_lock);
    9.92 -        pending_ring[MASK_PEND_IDX(pending_prod)] = 
    9.93 -            pending_req - pending_reqs;
    9.94 -        pending_prod++;
    9.95 -        spin_unlock(&pend_prod_lock);
    9.96 +        spin_lock_irqsave(&pend_prod_lock, flags);
    9.97 +        pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
    9.98 +        spin_unlock_irqrestore(&pend_prod_lock, flags);
    9.99          maybe_trigger_io_schedule();
   9.100      }
   9.101  }
   9.102 @@ -200,45 +198,10 @@ void blkif_be_int(int irq, void *dev_id,
   9.103   * DOWNWARD CALLS -- These interface with the block-device layer proper.
   9.104   */
   9.105  
   9.106 -static int lock_buffer(blkif_t *blkif,
   9.107 -                       unsigned long buffer,
   9.108 -                       unsigned short size,
   9.109 -                       int writeable_buffer)
   9.110 -{
   9.111 -    unsigned long    pfn;
   9.112 -
   9.113 -    for ( pfn = buffer >> PAGE_SHIFT; 
   9.114 -          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
   9.115 -          pfn++ )
   9.116 -    {
   9.117 -    }
   9.118 -
   9.119 -    return 1;
   9.120 -
   9.121 - fail:
   9.122 -    while ( pfn-- > (buffer >> PAGE_SHIFT) )
   9.123 -    {        
   9.124 -    }
   9.125 -    return 0;
   9.126 -}
   9.127 -
   9.128 -static void unlock_buffer(unsigned long buffer,
   9.129 -                          unsigned short size,
   9.130 -                          int writeable_buffer)
   9.131 -{
   9.132 -    unsigned long pfn;
   9.133 -
   9.134 -    for ( pfn = buffer >> PAGE_SHIFT; 
   9.135 -          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
   9.136 -          pfn++ )
   9.137 -    {
   9.138 -    }
   9.139 -}
   9.140 -
   9.141  static int do_block_io_op(blkif_t *blkif, int max_to_do)
   9.142  {
   9.143 -    blk_ring_t *blk_ring = blkif->blk_ring_base;
   9.144 -    blk_ring_req_entry_t *req;
   9.145 +    blkif_ring_t *blk_ring = blkif->blk_ring_base;
   9.146 +    blkif_request_t *req;
   9.147      BLK_RING_IDX i;
   9.148      int more_to_do = 0;
   9.149  
   9.150 @@ -262,11 +225,15 @@ static int do_block_io_op(blkif_t *blkif
   9.151              dispatch_rw_block_io(blkif, req);
   9.152              break;
   9.153  
   9.154 +        case BLKIF_OP_PROBE:
   9.155 +            dispatch_probe(blkif, req);
   9.156 +            break;
   9.157 +
   9.158          default:
   9.159              DPRINTK("error: unknown block io operation [%d]\n",
   9.160                      blk_ring->ring[i].req.operation);
   9.161              make_response(blkif, blk_ring->ring[i].req.id, 
   9.162 -                          blk_ring->ring[i].req.operation, 1);
   9.163 +                          blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR);
   9.164              break;
   9.165          }
   9.166      }
   9.167 @@ -275,24 +242,62 @@ static int do_block_io_op(blkif_t *blkif
   9.168      return more_to_do;
   9.169  }
   9.170  
   9.171 -static void dispatch_rw_block_io(blkif_t *blkif,
   9.172 -                                 blk_ring_req_entry_t *req)
   9.173 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
   9.174 +{
   9.175 +    int      i, rc, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
   9.176 +    pgprot_t prot;
   9.177 +
   9.178 +    /* Check that number of segments is sane. */
   9.179 +    if ( unlikely(req->nr_segments == 0) || 
   9.180 +         unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
   9.181 +    {
   9.182 +        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
   9.183 +        goto bad_descriptor;
   9.184 +    }
   9.185 +
   9.186 +    prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
   9.187 +    for ( i = 0; i < req->nr_segments; i++ )
   9.188 +    {
   9.189 +        if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) )
   9.190 +            goto bad_descriptor;
   9.191 +        if ( direct_remap_area_pages(&init_mm, 
   9.192 +                                     MMAP_VADDR(pending_idx, i),
   9.193 +                                     req->buffer_and_sects[i] & PAGE_MASK, 
   9.194 +                                     PAGE_SIZE, prot, blkif->domid) != 0 )
   9.195 +            goto bad_descriptor;
   9.196 +    }
   9.197 +
   9.198 +    rc = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 
   9.199 +                   (req->nr_segments * PAGE_SIZE) / sizeof(vdisk_t));
   9.200 +
   9.201 +    vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
   9.202 +                      MMAP_PAGES_PER_REQUEST);
   9.203 +    make_response(blkif, req->id, req->operation, rc);
   9.204 +    return;
   9.205 +
   9.206 + bad_descriptor:
   9.207 +    vmfree_area_pages(MMAP_VADDR(pending_idx, 0), MMAP_PAGES_PER_REQUEST);
   9.208 +    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
   9.209 +}
   9.210 +
   9.211 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
   9.212  {
   9.213      extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
   9.214      struct buffer_head *bh;
   9.215      int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
   9.216      unsigned short nr_sects;
   9.217      unsigned long buffer;
   9.218 -    int i, tot_sects;
   9.219 +    int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
   9.220      pending_req_t *pending_req;
   9.221 +    pgprot_t       prot;
   9.222  
   9.223      /* We map virtual scatter/gather segments to physical segments. */
   9.224      int new_segs, nr_psegs = 0;
   9.225 -    phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
   9.226 +    phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
   9.227  
   9.228      /* Check that number of segments is sane. */
   9.229      if ( unlikely(req->nr_segments == 0) || 
   9.230 -         unlikely(req->nr_segments > MAX_BLK_SEGS) )
   9.231 +         unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
   9.232      {
   9.233          DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
   9.234          goto bad_descriptor;
   9.235 @@ -310,8 +315,11 @@ static void dispatch_rw_block_io(blkif_t
   9.236          nr_sects = req->buffer_and_sects[i] &  0x1FF;
   9.237  
   9.238          if ( unlikely(nr_sects == 0) )
   9.239 +            continue;
   9.240 +
   9.241 +        if ( unlikely(nr_sects > BLKIF_MAX_SECTORS_PER_SEGMENT) )
   9.242          {
   9.243 -            DPRINTK("zero-sized data request\n");
   9.244 +            DPRINTK("Too many sectors in segment\n");
   9.245              goto bad_descriptor;
   9.246          }
   9.247  
   9.248 @@ -333,29 +341,40 @@ static void dispatch_rw_block_io(blkif_t
   9.249          }
   9.250    
   9.251          nr_psegs += new_segs;
   9.252 -        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
   9.253 +        ASSERT(nr_psegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST*2);
   9.254      }
   9.255  
   9.256 +    /* Nonsensical zero-sized request? */
   9.257 +    if ( unlikely(nr_psegs == 0) )
   9.258 +        goto bad_descriptor;
   9.259 +
   9.260 +    if ( operation == READ )
   9.261 +        prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
   9.262 +    else
   9.263 +        prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED);
   9.264 +
   9.265      for ( i = 0; i < nr_psegs; i++ )
   9.266      {
   9.267 -        if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, 
   9.268 -                                   phys_seg[i].nr_sects << 9,
   9.269 -                                   operation==READ)) )
   9.270 +        unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) + 
   9.271 +                            (phys_seg[i].nr_sects << 9) + 
   9.272 +                            (PAGE_SIZE - 1)) & PAGE_MASK;
   9.273 +        if ( direct_remap_area_pages(&init_mm, 
   9.274 +                                     MMAP_VADDR(pending_idx, i),
   9.275 +                                     phys_seg[i].buffer & PAGE_MASK, 
   9.276 +                                     sz, prot, blkif->domid) != 0 )
   9.277          {
   9.278              DPRINTK("invalid buffer\n");
   9.279 -            while ( i-- > 0 )
   9.280 -                unlock_buffer(phys_seg[i].buffer, 
   9.281 -                              phys_seg[i].nr_sects << 9,
   9.282 -                              operation==READ);
   9.283 +            vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
   9.284 +                              MMAP_PAGES_PER_REQUEST);
   9.285              goto bad_descriptor;
   9.286          }
   9.287      }
   9.288  
   9.289 -    pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
   9.290 +    pending_req = &pending_reqs[pending_idx];
   9.291      pending_req->blkif     = blkif;
   9.292      pending_req->id        = req->id;
   9.293      pending_req->operation = operation;
   9.294 -    pending_req->status    = 0;
   9.295 +    pending_req->status    = BLKIF_RSP_ERROR;
   9.296      atomic_set(&pending_req->pendcnt, nr_psegs);
   9.297  
   9.298      blkif_get(blkif);
   9.299 @@ -371,11 +390,8 @@ static void dispatch_rw_block_io(blkif_t
   9.300          bh->b_size          = phys_seg[i].nr_sects << 9;
   9.301          bh->b_dev           = phys_seg[i].dev;
   9.302          bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
   9.303 -
   9.304 -        /* SMH: we store a 'pseudo-virtual' bogus address in b_data since
   9.305 -           later code will undo this transformation (i.e. +-PAGE_OFFSET). */
   9.306 -        bh->b_data          = phys_to_virt(phys_seg[i].buffer);
   9.307 - 
   9.308 +        bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) + 
   9.309 +            (phys_seg[i].buffer & ~PAGE_MASK);
   9.310          /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
   9.311          bh->b_page          = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 
   9.312          bh->b_end_io        = end_block_io_op;
   9.313 @@ -391,10 +407,11 @@ static void dispatch_rw_block_io(blkif_t
   9.314          submit_bh(operation, bh);
   9.315      }
   9.316  
   9.317 +    pending_cons++;
   9.318      return;
   9.319  
   9.320   bad_descriptor:
   9.321 -    make_response(blkif, req->id, req->operation, 1);
   9.322 +    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
   9.323  } 
   9.324  
   9.325  
   9.326 @@ -405,12 +422,13 @@ static void dispatch_rw_block_io(blkif_t
   9.327  
   9.328  
   9.329  static void make_response(blkif_t *blkif, unsigned long id, 
   9.330 -                          unsigned short op, unsigned long st)
   9.331 +                          unsigned short op, int st)
   9.332  {
   9.333 -    blk_ring_resp_entry_t *resp;
   9.334 +    blkif_response_t *resp;
   9.335 +    unsigned long     flags;
   9.336  
   9.337      /* Place on the response ring for the relevant domain. */ 
   9.338 -    spin_lock(&blkif->blk_ring_lock);
   9.339 +    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
   9.340      resp = &blkif->blk_ring_base->
   9.341          ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp;
   9.342      resp->id        = id;
   9.343 @@ -418,48 +436,13 @@ static void make_response(blkif_t *blkif
   9.344      resp->status    = st;
   9.345      wmb();
   9.346      blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
   9.347 -    spin_unlock(&blkif->blk_ring_lock);
   9.348 +    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
   9.349  
   9.350      /* Kick the relevant domain. */
   9.351      notify_via_evtchn(blkif->evtchn);
   9.352  }
   9.353  
   9.354 -static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs)
   9.355 -{
   9.356 -#if 0
   9.357 -    unsigned long flags;
   9.358 -    struct task_struct *p;
   9.359 -    blk_ring_t *blk_ring;
   9.360 -    int i;
   9.361 -
   9.362 -    printk("Dumping block queue stats: nr_pending = %d"
   9.363 -           " (prod=0x%08x,cons=0x%08x)\n",
   9.364 -           NR_PENDING_REQS, pending_prod, pending_cons);
   9.365 -
   9.366 -    read_lock_irqsave(&tasklist_lock, flags);
   9.367 -    for_each_domain ( p )
   9.368 -    {
   9.369 -        printk("Domain: %llu\n", blkif->domain);
   9.370 -        blk_ring = blkif->blk_ring_base;
   9.371 -        printk("  req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
   9.372 -               "0x%08x on_list=%d\n",
   9.373 -               blk_ring->req_prod, blkif->blk_req_cons,
   9.374 -               blk_ring->resp_prod, blkif->blk_resp_prod,
   9.375 -               __on_blkdev_list(p));
   9.376 -    }
   9.377 -    read_unlock_irqrestore(&tasklist_lock, flags);
   9.378 -
   9.379 -    for ( i = 0; i < MAX_PENDING_REQS; i++ )
   9.380 -    {
   9.381 -        printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n",
   9.382 -               i, pending_reqs[i].domain, pending_reqs[i].id,
   9.383 -               atomic_read(&pending_reqs[i].pendcnt), 
   9.384 -               pending_reqs[i].operation, pending_reqs[i].status);
   9.385 -    }
   9.386 -#endif
   9.387 -}
   9.388 -
   9.389 -void unlink_blkdev_info(blkif_t *blkif)
   9.390 +void blkif_deschedule(blkif_t *blkif)
   9.391  {
   9.392      unsigned long flags;
   9.393  
   9.394 @@ -477,26 +460,29 @@ static int __init init_module(void)
   9.395  {
   9.396      int i;
   9.397  
   9.398 +    blkif_interface_init();
   9.399 +
   9.400 +    if ( (mmap_vma = get_vm_area(MMAP_PAGES * PAGE_SIZE, VM_IOREMAP)) == NULL )
   9.401 +    {
   9.402 +        printk(KERN_WARNING "Could not allocate VMA for blkif backend.\n");
   9.403 +        return -ENOMEM;
   9.404 +    }
   9.405 +
   9.406      pending_cons = 0;
   9.407      pending_prod = MAX_PENDING_REQS;
   9.408      memset(pending_reqs, 0, sizeof(pending_reqs));
   9.409      for ( i = 0; i < MAX_PENDING_REQS; i++ )
   9.410          pending_ring[i] = i;
   9.411      
   9.412 -    for ( i = 0; i < NR_CPUS; i++ )
   9.413 -        completed_bhs[i] = NULL;
   9.414 -        
   9.415      spin_lock_init(&io_schedule_list_lock);
   9.416      INIT_LIST_HEAD(&io_schedule_list);
   9.417  
   9.418 -    if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, 
   9.419 -                     SA_SHIRQ, "blkif-backend-dbg", &blkif_debug_int) != 0 )
   9.420 -        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
   9.421 -
   9.422      buffer_head_cachep = kmem_cache_create(
   9.423          "buffer_head_cache", sizeof(struct buffer_head),
   9.424          0, SLAB_HWCACHE_ALIGN, NULL, NULL);
   9.425  
   9.426 +    blkif_ctrlif_init();
   9.427 +
   9.428      return 0;
   9.429  }
   9.430  
    10.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c	Tue Apr 27 13:13:06 2004 +0000
    10.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c	Wed Apr 28 09:35:33 2004 +0000
    10.3 @@ -8,7 +8,7 @@
    10.4  
    10.5  #include "common.h"
    10.6  
    10.7 -void vbd_create(blkif_vbd_create_t *create) 
    10.8 +void vbd_create(blkif_be_vbd_create_t *create) 
    10.9  {
   10.10      vbd_t       *vbd; 
   10.11      rb_node_t  **rb_p, *rb_parent = NULL;
   10.12 @@ -18,9 +18,9 @@ void vbd_create(blkif_vbd_create_t *crea
   10.13      blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
   10.14      if ( unlikely(blkif == NULL) )
   10.15      {
   10.16 -        DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", 
   10.17 +        DPRINTK("vbd_create attempted for non-existent blkif (%llu,%u)\n", 
   10.18                  create->domid, create->blkif_handle); 
   10.19 -        create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   10.20 +        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   10.21          return;
   10.22      }
   10.23  
   10.24 @@ -42,7 +42,7 @@ void vbd_create(blkif_vbd_create_t *crea
   10.25          else
   10.26          {
   10.27              DPRINTK("vbd_create attempted for already existing vbd\n");
   10.28 -            create->status = BLKIF_STATUS_VBD_EXISTS;
   10.29 +            create->status = BLKIF_BE_STATUS_VBD_EXISTS;
   10.30              goto out;
   10.31          }
   10.32      }
   10.33 @@ -50,19 +50,19 @@ void vbd_create(blkif_vbd_create_t *crea
   10.34      if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
   10.35      {
   10.36          DPRINTK("vbd_create: out of memory\n");
   10.37 -        create->status = BLKIF_STATUS_OUT_OF_MEMORY;
   10.38 +        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   10.39          goto out;
   10.40      }
   10.41  
   10.42 -    vbd->vdevice = vdevice; 
   10.43 -    vbd->mode    = create->mode; 
   10.44 -    vbd->type    = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   10.45 -    vbd->extents = NULL; 
   10.46 +    vbd->vdevice  = vdevice; 
   10.47 +    vbd->readonly = create->readonly;
   10.48 +    vbd->type     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   10.49 +    vbd->extents  = NULL; 
   10.50  
   10.51      rb_link_node(&vbd->rb, rb_parent, rb_p);
   10.52      rb_insert_color(&vbd->rb, &blkif->vbd_rb);
   10.53  
   10.54 -    create->status = BLKIF_STATUS_OKAY;
   10.55 +    create->status = BLKIF_BE_STATUS_OKAY;
   10.56  
   10.57   out:
   10.58      spin_unlock(&blkif->vbd_lock);
   10.59 @@ -71,20 +71,20 @@ void vbd_create(blkif_vbd_create_t *crea
   10.60  
   10.61  
   10.62  /* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
   10.63 -void vbd_grow(blkif_vbd_grow_t *grow) 
   10.64 +void vbd_grow(blkif_be_vbd_grow_t *grow) 
   10.65  {
   10.66 -    blkif_t          *blkif;
   10.67 -    xen_extent_le_t **px, *x; 
   10.68 -    vbd_t            *vbd = NULL;
   10.69 -    rb_node_t        *rb;
   10.70 -    blkif_vdev_t      vdevice = grow->vdevice;
   10.71 +    blkif_t            *blkif;
   10.72 +    blkif_extent_le_t **px, *x; 
   10.73 +    vbd_t              *vbd = NULL;
   10.74 +    rb_node_t          *rb;
   10.75 +    blkif_vdev_t        vdevice = grow->vdevice;
   10.76  
   10.77      blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
   10.78      if ( unlikely(blkif == NULL) )
   10.79      {
   10.80 -        DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", 
   10.81 +        DPRINTK("vbd_grow attempted for non-existent blkif (%llu,%u)\n", 
   10.82                  grow->domid, grow->blkif_handle); 
   10.83 -        grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   10.84 +        grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   10.85          return;
   10.86      }
   10.87  
   10.88 @@ -105,28 +105,29 @@ void vbd_grow(blkif_vbd_grow_t *grow)
   10.89      if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
   10.90      {
   10.91          DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
   10.92 -        grow->status = BLKIF_STATUS_VBD_NOT_FOUND;
   10.93 +        grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
   10.94          goto out;
   10.95      } 
   10.96  
   10.97 -    if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
   10.98 +    if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), 
   10.99 +                               GFP_KERNEL)) == NULL) )
  10.100      {
  10.101          DPRINTK("vbd_grow: out of memory\n");
  10.102 -        grow->status = BLKIF_STATUS_OUT_OF_MEMORY;
  10.103 +        grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
  10.104          goto out;
  10.105      }
  10.106   
  10.107      x->extent.device        = grow->extent.device; 
  10.108      x->extent.sector_start  = grow->extent.sector_start; 
  10.109      x->extent.sector_length = grow->extent.sector_length; 
  10.110 -    x->next                 = (xen_extent_le_t *)NULL; 
  10.111 +    x->next                 = (blkif_extent_le_t *)NULL; 
  10.112  
  10.113      for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 
  10.114          continue;
  10.115  
  10.116      *px = x;
  10.117  
  10.118 -    grow->status = BLKIF_STATUS_OKAY;
  10.119 +    grow->status = BLKIF_BE_STATUS_OKAY;
  10.120  
  10.121   out:
  10.122      spin_unlock(&blkif->vbd_lock);
  10.123 @@ -134,20 +135,20 @@ void vbd_grow(blkif_vbd_grow_t *grow)
  10.124  }
  10.125  
  10.126  
  10.127 -void vbd_shrink(blkif_vbd_shrink_t *shrink)
  10.128 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink)
  10.129  {
  10.130 -    blkif_t          *blkif;
  10.131 -    xen_extent_le_t **px, *x; 
  10.132 -    vbd_t            *vbd = NULL;
  10.133 -    rb_node_t        *rb;
  10.134 -    blkif_vdev_t      vdevice = shrink->vdevice;
  10.135 +    blkif_t            *blkif;
  10.136 +    blkif_extent_le_t **px, *x; 
  10.137 +    vbd_t              *vbd = NULL;
  10.138 +    rb_node_t          *rb;
  10.139 +    blkif_vdev_t        vdevice = shrink->vdevice;
  10.140  
  10.141      blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
  10.142      if ( unlikely(blkif == NULL) )
  10.143      {
  10.144 -        DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", 
  10.145 +        DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,%u)\n", 
  10.146                  shrink->domid, shrink->blkif_handle); 
  10.147 -        shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
  10.148 +        shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  10.149          return;
  10.150      }
  10.151  
  10.152 @@ -167,13 +168,13 @@ void vbd_shrink(blkif_vbd_shrink_t *shri
  10.153  
  10.154      if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
  10.155      {
  10.156 -        shrink->status = BLKIF_STATUS_VBD_NOT_FOUND;
  10.157 +        shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
  10.158          goto out;
  10.159      }
  10.160  
  10.161      if ( unlikely(vbd->extents == NULL) )
  10.162      {
  10.163 -        shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND;
  10.164 +        shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
  10.165          goto out;
  10.166      }
  10.167  
  10.168 @@ -185,7 +186,7 @@ void vbd_shrink(blkif_vbd_shrink_t *shri
  10.169      *px = x->next;
  10.170      kfree(x);
  10.171  
  10.172 -    shrink->status = BLKIF_STATUS_OKAY;
  10.173 +    shrink->status = BLKIF_BE_STATUS_OKAY;
  10.174  
  10.175   out:
  10.176      spin_unlock(&blkif->vbd_lock);
  10.177 @@ -193,20 +194,20 @@ void vbd_shrink(blkif_vbd_shrink_t *shri
  10.178  }
  10.179  
  10.180  
  10.181 -void vbd_destroy(blkif_vbd_destroy_t *destroy) 
  10.182 +void vbd_destroy(blkif_be_vbd_destroy_t *destroy) 
  10.183  {
  10.184 -    blkif_t         *blkif;
  10.185 -    vbd_t           *vbd;
  10.186 -    rb_node_t       *rb;
  10.187 -    xen_extent_le_t *x, *t;
  10.188 -    blkif_vdev_t     vdevice = destroy->vdevice;
  10.189 +    blkif_t           *blkif;
  10.190 +    vbd_t             *vbd;
  10.191 +    rb_node_t         *rb;
  10.192 +    blkif_extent_le_t *x, *t;
  10.193 +    blkif_vdev_t       vdevice = destroy->vdevice;
  10.194  
  10.195      blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
  10.196      if ( unlikely(blkif == NULL) )
  10.197      {
  10.198 -        DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", 
  10.199 +        DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,%u)\n", 
  10.200                  destroy->domid, destroy->blkif_handle); 
  10.201 -        destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
  10.202 +        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  10.203          return;
  10.204      }
  10.205  
  10.206 @@ -224,7 +225,7 @@ void vbd_destroy(blkif_vbd_destroy_t *de
  10.207              goto found;
  10.208      }
  10.209  
  10.210 -    destroy->status = BLKIF_STATUS_VBD_NOT_FOUND;
  10.211 +    destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
  10.212      goto out;
  10.213  
  10.214   found:
  10.215 @@ -249,7 +250,7 @@ void destroy_all_vbds(blkif_t *blkif)
  10.216  {
  10.217      vbd_t *vbd;
  10.218      rb_node_t *rb;
  10.219 -    xen_extent_le_t *x, *t;
  10.220 +    blkif_extent_le_t *x, *t;
  10.221  
  10.222      spin_lock(&blkif->vbd_lock);
  10.223  
  10.224 @@ -273,51 +274,30 @@ void destroy_all_vbds(blkif_t *blkif)
  10.225  }
  10.226  
  10.227  
  10.228 -static int vbd_probe_single(xen_disk_info_t *xdi, 
  10.229 -                            vbd_t *vbd, 
  10.230 -                            struct task_struct *p)
  10.231 +static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd)
  10.232  {
  10.233 -    xen_extent_le_t *x; 
  10.234 -    xen_disk_t cur_disk; 
  10.235 -
  10.236 -    if ( xdi->count == xdi->max )
  10.237 -    {
  10.238 -        DPRINTK("vbd_probe_devices: out of space for probe.\n"); 
  10.239 -        return -ENOMEM; 
  10.240 -    }
  10.241 +    blkif_extent_le_t *x; 
  10.242  
  10.243 -    cur_disk.device = vbd->vdevice; 
  10.244 -    cur_disk.info   = vbd->type;
  10.245 -    if ( !VBD_CAN_WRITE(vbd) )
  10.246 -        cur_disk.info |= XD_FLAG_RO; 
  10.247 -    cur_disk.capacity = 0ULL;
  10.248 +    vbd_info->device = vbd->vdevice; 
  10.249 +    vbd_info->info   = vbd->type;
  10.250 +    if ( vbd->readonly )
  10.251 +        vbd_info->info |= VDISK_FLAG_RO; 
  10.252 +    vbd_info->capacity = 0ULL;
  10.253      for ( x = vbd->extents; x != NULL; x = x->next )
  10.254 -        cur_disk.capacity += x->extent.nr_sectors; 
  10.255 -    cur_disk.domain = p->domain; 
  10.256 +        vbd_info->capacity += x->extent.sector_length; 
  10.257          
  10.258 -    /* Now copy into relevant part of user-space buffer */
  10.259 -    if( copy_to_user(&xdi->disks[xdi->count], 
  10.260 -                     &cur_disk, 
  10.261 -                     sizeof(xen_disk_t)) )
  10.262 -    { 
  10.263 -        DPRINTK("vbd_probe_devices: copy_to_user failed\n");
  10.264 -        return -EFAULT;
  10.265 -    } 
  10.266 -        
  10.267 -    xdi->count++; 
  10.268 -
  10.269      return 0;
  10.270  }
  10.271  
  10.272  
  10.273 -static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
  10.274 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
  10.275  {
  10.276 -    int rc = 0;
  10.277 +    int rc = 0, nr_vbds = 0;
  10.278      rb_node_t *rb;
  10.279  
  10.280 -    spin_lock(&p->vbd_lock);
  10.281 +    spin_lock(&blkif->vbd_lock);
  10.282  
  10.283 -    if ( (rb = p->vbd_rb.rb_node) == NULL )
  10.284 +    if ( (rb = blkif->vbd_rb.rb_node) == NULL )
  10.285          goto out;
  10.286  
  10.287   new_subtree:
  10.288 @@ -328,7 +308,10 @@ static int vbd_probe_devices(xen_disk_in
  10.289      for ( ; ; )
  10.290      {
  10.291          /* STEP 2. Dealt with left subtree. Now process current node. */
  10.292 -        if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
  10.293 +        if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds], 
  10.294 +                                    rb_entry(rb, vbd_t, rb))) != 0 )
  10.295 +            goto out;
  10.296 +        if ( ++nr_vbds == max_vbds )
  10.297              goto out;
  10.298  
  10.299          /* STEP 3. Process right subtree, if any. */
  10.300 @@ -355,146 +338,22 @@ static int vbd_probe_devices(xen_disk_in
  10.301      }
  10.302  
  10.303   out:
  10.304 -    spin_unlock(&p->vbd_lock);
  10.305 -    return rc;  
  10.306 -}
  10.307 -
  10.308 -
  10.309 -/*
  10.310 - * Return information about the VBDs available for a given domain, or for all 
  10.311 - * domains; in the general case the 'domain' argument will be 0 which means 
  10.312 - * "information about the caller"; otherwise the 'domain' argument will 
  10.313 - * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of 
  10.314 - * these cases require the caller to be privileged.
  10.315 - */
  10.316 -long vbd_probe(vbd_probe_t *probe) 
  10.317 -{
  10.318 -    struct task_struct *p = NULL; 
  10.319 -    unsigned long flags;
  10.320 -    long ret = 0;  
  10.321 -
  10.322 -    if ( probe->domain != 0 )
  10.323 -    { 
  10.324 -        /* We can only probe for ourselves (unless we're privileged). */
  10.325 -        if( (probe->domain != current->domain) && !IS_PRIV(current) )
  10.326 -            return -EPERM; 
  10.327 -
  10.328 -        if ( (probe->domain != VBD_PROBE_ALL) &&
  10.329 -             ((p = find_domain_by_id(probe->domain)) == NULL) )
  10.330 -        {
  10.331 -            DPRINTK("vbd_probe attempted for non-existent domain %llu\n", 
  10.332 -                    probe->domain); 
  10.333 -            return -EINVAL; 
  10.334 -        }
  10.335 -    }
  10.336 -    else
  10.337 -    { 
  10.338 -        /* Default is to probe for ourselves. */
  10.339 -        p = current; 
  10.340 -        get_task_struct(p); /* to mirror final put_task_struct */
  10.341 -    }
  10.342 -
  10.343 -    if ( probe->domain == VBD_PROBE_ALL )
  10.344 -    { 
  10.345 -        read_lock_irqsave(&tasklist_lock, flags);
  10.346 -        for_each_domain ( p )
  10.347 -        {
  10.348 -            if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
  10.349 -            { 
  10.350 -                read_unlock_irqrestore(&tasklist_lock, flags);
  10.351 -                goto out; 
  10.352 -            }
  10.353 -        }
  10.354 -        read_unlock_irqrestore(&tasklist_lock, flags);
  10.355 -    } 
  10.356 -    else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
  10.357 -        goto out; 
  10.358 -
  10.359 - out: 
  10.360 -    if ( ret != 0 )
  10.361 -        DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); 
  10.362 -    if ( p != NULL )
  10.363 -        put_task_struct(p); 
  10.364 -    return ret; 
  10.365 +    spin_unlock(&blkif->vbd_lock);
  10.366 +    return (rc == 0) ? nr_vbds : rc;  
  10.367  }
  10.368  
  10.369  
  10.370 -long vbd_info(vbd_info_t *info) 
  10.371 +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation)
  10.372  {
  10.373 -    struct task_struct *p; 
  10.374 -    xen_extent_le_t *x; 
  10.375 -    xen_extent_t *extents; 
  10.376 -    vbd_t *vbd = NULL;
  10.377 -    rb_node_t *rb;
  10.378 -    long ret = 0;  
  10.379 -   
  10.380 -    if ( (info->domain != current->domain) && !IS_PRIV(current) )
  10.381 -        return -EPERM; 
  10.382 -
  10.383 -    if ( (p = find_domain_by_id(info->domain)) == NULL )
  10.384 -    {
  10.385 -        DPRINTK("vbd_info attempted for non-existent domain %llu\n", 
  10.386 -                info->domain); 
  10.387 -        return -EINVAL; 
  10.388 -    }
  10.389 -
  10.390 -    spin_lock(&p->vbd_lock);
  10.391 -
  10.392 -    rb = p->vbd_rb.rb_node;
  10.393 -    while ( rb != NULL )
  10.394 -    {
  10.395 -        vbd = rb_entry(rb, vbd_t, rb);
  10.396 -        if ( info->vdevice < vbd->vdevice )
  10.397 -            rb = rb->rb_left;
  10.398 -        else if ( info->vdevice > vbd->vdevice )
  10.399 -            rb = rb->rb_right;
  10.400 -        else
  10.401 -            break;
  10.402 -    }
  10.403 +    blkif_extent_le_t *x; 
  10.404 +    vbd_t             *vbd;
  10.405 +    rb_node_t         *rb;
  10.406 +    blkif_sector_t     sec_off;
  10.407 +    unsigned long      nr_secs;
  10.408  
  10.409 -    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
  10.410 -    {
  10.411 -        DPRINTK("vbd_info attempted on non-existent VBD.\n"); 
  10.412 -        ret = -EINVAL; 
  10.413 -        goto out; 
  10.414 -    }
  10.415 -
  10.416 -    info->mode     = vbd->mode;
  10.417 -    info->nextents = 0; 
  10.418 +    spin_lock(&blkif->vbd_lock);
  10.419  
  10.420 -    extents = info->extents;
  10.421 -    for ( x = vbd->extents; x != NULL; x = x->next )
  10.422 -    {
  10.423 -        if ( info->nextents == info->maxextents )
  10.424 -            break;
  10.425 -        if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
  10.426 -        {
  10.427 -            DPRINTK("vbd_info: copy_to_user failed\n");
  10.428 -            ret = -EFAULT;
  10.429 -            goto out; 
  10.430 -        } 
  10.431 -        extents++;
  10.432 -        info->nextents++;
  10.433 -    }
  10.434 -
  10.435 - out: 
  10.436 -    spin_unlock(&p->vbd_lock);
  10.437 -    put_task_struct(p); 
  10.438 -    return ret; 
  10.439 -}
  10.440 -
  10.441 -
  10.442 -int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
  10.443 -{
  10.444 -    xen_extent_le_t *x; 
  10.445 -    vbd_t *vbd;
  10.446 -    rb_node_t *rb;
  10.447 -    xen_sector_t sec_off;
  10.448 -    unsigned long nr_secs;
  10.449 -
  10.450 -    spin_lock(&p->vbd_lock);
  10.451 -
  10.452 -    rb = p->vbd_rb.rb_node;
  10.453 +    rb = blkif->vbd_rb.rb_node;
  10.454      while ( rb != NULL )
  10.455      {
  10.456          vbd = rb_entry(rb, vbd_t, rb);
  10.457 @@ -507,42 +366,41 @@ int vbd_translate(phys_seg_t *pseg, stru
  10.458      }
  10.459  
  10.460      DPRINTK("vbd_translate; domain %llu attempted to access "
  10.461 -            "non-existent VBD.\n", p->domain); 
  10.462 +            "non-existent VBD.\n", blkif->domid);
  10.463  
  10.464 -    spin_unlock(&p->vbd_lock);
  10.465 +    spin_unlock(&blkif->vbd_lock);
  10.466      return -ENODEV; 
  10.467  
  10.468   found:
  10.469  
  10.470 -    if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
  10.471 -         ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
  10.472 +    if ( (operation == WRITE) && vbd->readonly )
  10.473      {
  10.474 -        spin_unlock(&p->vbd_lock);
  10.475 +        spin_unlock(&blkif->vbd_lock);
  10.476          return -EACCES; 
  10.477      }
  10.478  
  10.479      /*
  10.480 -     * Now iterate through the list of xen_extents, working out which should 
  10.481 +     * Now iterate through the list of blkif_extents, working out which should 
  10.482       * be used to perform the translation.
  10.483       */
  10.484      sec_off = pseg->sector_number; 
  10.485      nr_secs = pseg->nr_sects;
  10.486      for ( x = vbd->extents; x != NULL; x = x->next )
  10.487      { 
  10.488 -        if ( sec_off < x->extent.nr_sectors )
  10.489 +        if ( sec_off < x->extent.sector_length )
  10.490          {
  10.491              pseg->dev = x->extent.device; 
  10.492 -            pseg->sector_number = x->extent.start_sector + sec_off;
  10.493 -            if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
  10.494 +            pseg->sector_number = x->extent.sector_start + sec_off;
  10.495 +            if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) )
  10.496                  goto overrun;
  10.497              spin_unlock(&p->vbd_lock);
  10.498              return 1;
  10.499          } 
  10.500 -        sec_off -= x->extent.nr_sectors; 
  10.501 +        sec_off -= x->extent.sector_length; 
  10.502      }
  10.503  
  10.504      DPRINTK("vbd_translate: end of vbd.\n");
  10.505 -    spin_unlock(&p->vbd_lock);
  10.506 +    spin_unlock(&blkif->vbd_lock);
  10.507      return -EACCES; 
  10.508  
  10.509      /*
  10.510 @@ -554,7 +412,7 @@ int vbd_translate(phys_seg_t *pseg, stru
  10.511   overrun:
  10.512  
  10.513      /* Adjust length of first chunk to run to end of first extent. */
  10.514 -    pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
  10.515 +    pseg[0].nr_sects = x->extent.sector_length - sec_off;
  10.516  
  10.517      /* Set second chunk buffer and length to start where first chunk ended. */
  10.518      pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
  10.519 @@ -562,7 +420,7 @@ int vbd_translate(phys_seg_t *pseg, stru
  10.520  
  10.521      /* Now move to the next extent. Check it exists and is long enough! */
  10.522      if ( unlikely((x = x->next) == NULL) || 
  10.523 -         unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
  10.524 +         unlikely(x->extent.sector_length < pseg[1].nr_sects) )
  10.525      {
  10.526          DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
  10.527          spin_unlock(&p->vbd_lock);
  10.528 @@ -571,8 +429,8 @@ int vbd_translate(phys_seg_t *pseg, stru
  10.529  
  10.530      /* Store the real device and start sector for the second chunk. */
  10.531      pseg[1].dev           = x->extent.device;
  10.532 -    pseg[1].sector_number = x->extent.start_sector;
  10.533 +    pseg[1].sector_number = x->extent.sector_start;
  10.534      
  10.535 -    spin_unlock(&p->vbd_lock);
  10.536 +    spin_unlock(&blkif->vbd_lock);
  10.537      return 2;
  10.538  }
    11.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h	Tue Apr 27 13:13:06 2004 +0000
    11.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h	Wed Apr 28 09:35:33 2004 +0000
    11.3 @@ -16,24 +16,27 @@
    11.4  #define BLKIF_OP_WRITE     1
    11.5  #define BLKIF_OP_PROBE     2
    11.6  
    11.7 -/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */
    11.8 +/* NB. Ring size must be small enough for sizeof(blkif_ring_t) <= PAGE_SIZE. */
    11.9  #define BLKIF_RING_SIZE        64
   11.10  
   11.11  /*
   11.12   * Maximum scatter/gather segments per request.
   11.13 - * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE.
   11.14 + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
   11.15   * NB. This could be 12 if the ring indexes weren't stored in the same page.
   11.16   */
   11.17 -#define BLKIF_REQUEST_MAX_SEGMENTS 11
   11.18 +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
   11.19 +
   11.20 +#define BLKIF_MAX_SECTORS_PER_SEGMENT  16
   11.21  
   11.22  typedef struct {
   11.23      unsigned char  operation;        /* BLKIF_OP_???                         */
   11.24 -    unsigned char  nr_segments;      /* number of segments (<= MAX_BLK_SEGS) */
   11.25 +    unsigned char  nr_segments;      /* number of segments                   */
   11.26      blkif_vdev_t   device;           /* only for read/write requests         */
   11.27      unsigned long  id;               /* private guest value, echoed in resp  */
   11.28 -    xen_sector_t   sector_number;    /* start sector idx on disk (r/w only)  */
   11.29 -    /* Least 9 bits is 'nr_sects'. High 23 bits is the address.      */
   11.30 -    unsigned long  buffer_and_sects[MAX_BLK_SEGS];
   11.31 +    blkif_sector_t sector_number;    /* start sector idx on disk (r/w only)  */
   11.32 +    /* Least 9 bits is 'nr_sects'. High 23 bits is the address.       */
   11.33 +    /* We must have '0 <= nr_sects <= BLKIF_MAX_SECTORS_PER_SEGMENT'. */
   11.34 +    unsigned long  buffer_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   11.35  } blkif_request_t;
   11.36  
   11.37  typedef struct {
   11.38 @@ -59,8 +62,8 @@ typedef unsigned int BLKIF_RING_IDX;
   11.39  #define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1))
   11.40  
   11.41  typedef struct {
   11.42 -    BLKIF_RING_IDX req_prod;  /* Request producer. Updated by guest OS. */
   11.43 -    BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen.     */
   11.44 +    BLKIF_RING_IDX req_prod;  /* Request producer. Updated by front-end. */
   11.45 +    BLKIF_RING_IDX resp_prod; /* Response producer. Updated by back-end. */
   11.46      union {
   11.47          blkif_request_t  req;
   11.48          blkif_response_t resp;
   11.49 @@ -103,7 +106,7 @@ typedef struct {
   11.50  typedef struct {
   11.51      blkif_vdev_t   device;       /* Device number (opaque 16 bit value). */
   11.52      unsigned short info;         /* Device type and flags (VDISK_*).     */
   11.53 -    xen_sector_t   capacity;     /* Size in terms of 512-byte sectors.   */
   11.54 +    blkif_sector_t capacity;     /* Size in terms of 512-byte sectors.   */
   11.55  } vdisk_t;
   11.56  
   11.57  #endif /* __SHARED_BLKIF_H__ */
    12.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile	Tue Apr 27 13:13:06 2004 +0000
    12.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile	Wed Apr 28 09:35:33 2004 +0000
    12.3 @@ -1,3 +1,3 @@
    12.4  O_TARGET := drv.o
    12.5 -obj-y := block.o vbd.o
    12.6 +obj-y := main.o vbd.o
    12.7  include $(TOPDIR)/Rules.make
    13.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c	Tue Apr 27 13:13:06 2004 +0000
    13.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.3 @@ -1,625 +0,0 @@
    13.4 -/******************************************************************************
    13.5 - * block.c
    13.6 - * 
    13.7 - * Xenolinux virtual block-device driver.
    13.8 - * 
    13.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   13.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   13.11 - */
   13.12 -
   13.13 -#include "block.h"
   13.14 -#include <linux/blk.h>
   13.15 -#include <linux/cdrom.h>
   13.16 -#include <linux/tqueue.h>
   13.17 -#include <linux/sched.h>
   13.18 -#include <scsi/scsi.h>
   13.19 -
   13.20 -#include <linux/interrupt.h>
   13.21 -
   13.22 -typedef unsigned char byte; /* from linux/ide.h */
   13.23 -
   13.24 -#define STATE_ACTIVE    0
   13.25 -#define STATE_SUSPENDED 1
   13.26 -#define STATE_CLOSED    2
   13.27 -static unsigned int state = STATE_SUSPENDED;
   13.28 -
   13.29 -/* Dynamically-mapped IRQs. */
   13.30 -static int xlblk_response_irq, xlblk_update_irq;
   13.31 -
   13.32 -static blk_ring_t *blk_ring;
   13.33 -static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
   13.34 -static BLK_RING_IDX req_prod;  /* Private request producer.         */
   13.35 -
   13.36 -/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   13.37 -#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
   13.38 -                      (state != STATE_ACTIVE))
   13.39 -
   13.40 -
   13.41 -/*
   13.42 - * Request queues with outstanding work, but ring is currently full.
   13.43 - * We need no special lock here, as we always access this with the
   13.44 - * io_request_lock held. We only need a small maximum list.
   13.45 - */
   13.46 -#define MAX_PENDING 8
   13.47 -static request_queue_t *pending_queues[MAX_PENDING];
   13.48 -static int nr_pending;
   13.49 -
   13.50 -static kdev_t        sg_dev;
   13.51 -static int           sg_operation = -1;
   13.52 -static unsigned long sg_next_sect;
   13.53 -#define DISABLE_SCATTERGATHER() (sg_operation = -1)
   13.54 -
   13.55 -static inline void signal_requests_to_xen(void)
   13.56 -{
   13.57 -    block_io_op_t op; 
   13.58 -
   13.59 -    DISABLE_SCATTERGATHER();
   13.60 -    blk_ring->req_prod = req_prod;
   13.61 -
   13.62 -    op.cmd = BLOCK_IO_OP_SIGNAL; 
   13.63 -    HYPERVISOR_block_io_op(&op);
   13.64 -    return;
   13.65 -}
   13.66 -
   13.67 -
   13.68 -/*
   13.69 - * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
   13.70 - * 
   13.71 - * Schedule a task for keventd to run, which will update the VBDs and perform 
   13.72 - * the corresponding updates to our view of VBD state, so the XenoLinux will 
   13.73 - * respond to changes / additions / deletions to the set of VBDs automatically.
   13.74 - */
   13.75 -static struct tq_struct update_tq;
   13.76 -static void update_vbds_task(void *unused)
   13.77 -{ 
   13.78 -    xlvbd_update_vbds();
   13.79 -}
   13.80 -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
   13.81 -{
   13.82 -    update_tq.routine = update_vbds_task;
   13.83 -    schedule_task(&update_tq);
   13.84 -}
   13.85 -
   13.86 -
   13.87 -int xen_block_open(struct inode *inode, struct file *filep)
   13.88 -{
   13.89 -    short xldev = inode->i_rdev; 
   13.90 -    struct gendisk *gd = get_gendisk(xldev);
   13.91 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   13.92 -    short minor = MINOR(xldev); 
   13.93 -
   13.94 -    if ( gd->part[minor].nr_sects == 0 )
   13.95 -    { 
   13.96 -        /*
   13.97 -         * Device either doesn't exist, or has zero capacity; we use a few
   13.98 -         * cheesy heuristics to return the relevant error code
   13.99 -         */
  13.100 -        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
  13.101 -             ((minor & (gd->max_p - 1)) != 0) )
  13.102 -        { 
  13.103 -            /*
  13.104 -             * We have a real device, but no such partition, or we just have a
  13.105 -             * partition number so guess this is the problem.
  13.106 -             */
  13.107 -            return -ENXIO;     /* no such device or address */
  13.108 -        }
  13.109 -        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
  13.110 -        {
  13.111 -            /* This is a removable device => assume that media is missing. */ 
  13.112 -            return -ENOMEDIUM; /* media not present (this is a guess) */
  13.113 -        } 
  13.114 -        else
  13.115 -        { 
  13.116 -            /* Just go for the general 'no such device' error. */
  13.117 -            return -ENODEV;    /* no such device */
  13.118 -        }
  13.119 -    }
  13.120 -    
  13.121 -    /* Update of usage count is protected by per-device semaphore. */
  13.122 -    disk->usage++;
  13.123 -
  13.124 -    return 0;
  13.125 -}
  13.126 -
  13.127 -
  13.128 -int xen_block_release(struct inode *inode, struct file *filep)
  13.129 -{
  13.130 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
  13.131 -
  13.132 -    /*
  13.133 -     * When usage drops to zero it may allow more VBD updates to occur.
  13.134 -     * Update of usage count is protected by a per-device semaphore.
  13.135 -     */
  13.136 -    if ( --disk->usage == 0 )
  13.137 -    {
  13.138 -        update_tq.routine = update_vbds_task;
  13.139 -        schedule_task(&update_tq);
  13.140 -    }
  13.141 -
  13.142 -    return 0;
  13.143 -}
  13.144 -
  13.145 -
  13.146 -int xen_block_ioctl(struct inode *inode, struct file *filep,
  13.147 -                          unsigned command, unsigned long argument)
  13.148 -{
  13.149 -    kdev_t dev = inode->i_rdev;
  13.150 -    struct hd_geometry *geo = (struct hd_geometry *)argument;
  13.151 -    struct gendisk *gd;     
  13.152 -    struct hd_struct *part; 
  13.153 -    int i;
  13.154 -
  13.155 -    /* NB. No need to check permissions. That is done for us. */
  13.156 -    
  13.157 -    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  13.158 -                  command, (long) argument, dev); 
  13.159 -  
  13.160 -    gd = get_gendisk(dev);
  13.161 -    part = &gd->part[MINOR(dev)]; 
  13.162 -
  13.163 -    switch ( command )
  13.164 -    {
  13.165 -    case BLKGETSIZE:
  13.166 -        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
  13.167 -        return put_user(part->nr_sects, (unsigned long *) argument);
  13.168 -
  13.169 -    case BLKGETSIZE64:
  13.170 -        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
  13.171 -                      (u64)part->nr_sects * 512);
  13.172 -        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
  13.173 -
  13.174 -    case BLKRRPART:                               /* re-read partition table */
  13.175 -        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
  13.176 -        return xen_block_revalidate(dev);
  13.177 -
  13.178 -    case BLKSSZGET:
  13.179 -        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
  13.180 -
  13.181 -    case BLKBSZGET:                                        /* get block size */
  13.182 -        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
  13.183 -        break;
  13.184 -
  13.185 -    case BLKBSZSET:                                        /* set block size */
  13.186 -        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
  13.187 -        break;
  13.188 -
  13.189 -    case BLKRASET:                                         /* set read-ahead */
  13.190 -        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
  13.191 -        break;
  13.192 -
  13.193 -    case BLKRAGET:                                         /* get read-ahead */
  13.194 -        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
  13.195 -        break;
  13.196 -
  13.197 -    case HDIO_GETGEO:
  13.198 -        /* note: these values are complete garbage */
  13.199 -        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
  13.200 -        if (!argument) return -EINVAL;
  13.201 -        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
  13.202 -        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
  13.203 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  13.204 -        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
  13.205 -        return 0;
  13.206 -
  13.207 -    case HDIO_GETGEO_BIG: 
  13.208 -        /* note: these values are complete garbage */
  13.209 -        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
  13.210 -        if (!argument) return -EINVAL;
  13.211 -        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
  13.212 -        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
  13.213 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  13.214 -        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
  13.215 -        return 0;
  13.216 -
  13.217 -    case CDROMMULTISESSION:
  13.218 -        DPRINTK("FIXME: support multisession CDs later\n");
  13.219 -        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
  13.220 -            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
  13.221 -        return 0;
  13.222 -
  13.223 -    case SCSI_IOCTL_GET_BUS_NUMBER:
  13.224 -        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
  13.225 -        return -ENOSYS;
  13.226 -
  13.227 -    default:
  13.228 -        printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
  13.229 -        return -ENOSYS;
  13.230 -    }
  13.231 -    
  13.232 -    return 0;
  13.233 -}
  13.234 -
  13.235 -/* check media change: should probably do something here in some cases :-) */
  13.236 -int xen_block_check(kdev_t dev)
  13.237 -{
  13.238 -    DPRINTK("xen_block_check\n");
  13.239 -    return 0;
  13.240 -}
  13.241 -
  13.242 -int xen_block_revalidate(kdev_t dev)
  13.243 -{
  13.244 -    struct block_device *bd;
  13.245 -    struct gendisk *gd;
  13.246 -    xl_disk_t *disk;
  13.247 -    unsigned long capacity;
  13.248 -    int i, rc = 0;
  13.249 -    
  13.250 -    if ( (bd = bdget(dev)) == NULL )
  13.251 -        return -EINVAL;
  13.252 -
  13.253 -    /*
  13.254 -     * Update of partition info, and check of usage count, is protected
  13.255 -     * by the per-block-device semaphore.
  13.256 -     */
  13.257 -    down(&bd->bd_sem);
  13.258 -
  13.259 -    if ( ((gd = get_gendisk(dev)) == NULL) ||
  13.260 -         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  13.261 -         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  13.262 -    {
  13.263 -        rc = -EINVAL;
  13.264 -        goto out;
  13.265 -    }
  13.266 -
  13.267 -    if ( disk->usage > 1 )
  13.268 -    {
  13.269 -        rc = -EBUSY;
  13.270 -        goto out;
  13.271 -    }
  13.272 -
  13.273 -    /* Only reread partition table if VBDs aren't mapped to partitions. */
  13.274 -    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  13.275 -    {
  13.276 -        for ( i = gd->max_p - 1; i >= 0; i-- )
  13.277 -        {
  13.278 -            invalidate_device(dev+i, 1);
  13.279 -            gd->part[MINOR(dev+i)].start_sect = 0;
  13.280 -            gd->part[MINOR(dev+i)].nr_sects   = 0;
  13.281 -            gd->sizes[MINOR(dev+i)]           = 0;
  13.282 -        }
  13.283 -
  13.284 -        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  13.285 -    }
  13.286 -
  13.287 - out:
  13.288 -    up(&bd->bd_sem);
  13.289 -    bdput(bd);
  13.290 -    return rc;
  13.291 -}
  13.292 -
  13.293 -
  13.294 -/*
  13.295 - * hypervisor_request
  13.296 - *
  13.297 - * request block io 
  13.298 - * 
  13.299 - * id: for guest use only.
  13.300 - * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
  13.301 - * buffer: buffer to read/write into. this should be a
  13.302 - *   virtual address in the guest os.
  13.303 - */
  13.304 -static int hypervisor_request(unsigned long   id,
  13.305 -                              int             operation,
  13.306 -                              char *          buffer,
  13.307 -                              unsigned long   sector_number,
  13.308 -                              unsigned short  nr_sectors,
  13.309 -                              kdev_t          device)
  13.310 -{
  13.311 -    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
  13.312 -    struct gendisk *gd;
  13.313 -    blk_ring_req_entry_t *req;
  13.314 -    struct buffer_head *bh;
  13.315 -
  13.316 -    if ( unlikely(nr_sectors >= (1<<9)) )
  13.317 -        BUG();
  13.318 -    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
  13.319 -        BUG();
  13.320 -
  13.321 -    if ( unlikely(state == STATE_CLOSED) )
  13.322 -        return 1;
  13.323 -
  13.324 -    switch ( operation )
  13.325 -    {
  13.326 -
  13.327 -    case XEN_BLOCK_READ:
  13.328 -    case XEN_BLOCK_WRITE:
  13.329 -        gd = get_gendisk(device); 
  13.330 -
  13.331 -        /*
  13.332 -         * Update the sector_number we'll pass down as appropriate; note that
  13.333 -         * we could sanity check that resulting sector will be in this
  13.334 -         * partition, but this will happen in xen anyhow.
  13.335 -         */
  13.336 -        sector_number += gd->part[MINOR(device)].start_sect;
  13.337 -
  13.338 -        /*
  13.339 -         * If this unit doesn't consist of virtual (i.e., Xen-specified)
  13.340 -         * partitions then we clear the partn bits from the device number.
  13.341 -         */
  13.342 -        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
  13.343 -               GENHD_FL_VIRT_PARTNS) )
  13.344 -            device &= ~(gd->max_p - 1);
  13.345 -
  13.346 -        if ( (sg_operation == operation) &&
  13.347 -             (sg_dev == device) &&
  13.348 -             (sg_next_sect == sector_number) )
  13.349 -        {
  13.350 -            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
  13.351 -            bh = (struct buffer_head *)id;
  13.352 -            bh->b_reqnext = (struct buffer_head *)req->id;
  13.353 -            req->id = id;
  13.354 -            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
  13.355 -            if ( ++req->nr_segments < MAX_BLK_SEGS )
  13.356 -                sg_next_sect += nr_sectors;
  13.357 -            else
  13.358 -                DISABLE_SCATTERGATHER();
  13.359 -            return 0;
  13.360 -        }
  13.361 -        else if ( RING_PLUGGED )
  13.362 -        {
  13.363 -            return 1;
  13.364 -        }
  13.365 -        else
  13.366 -        {
  13.367 -            sg_operation = operation;
  13.368 -            sg_dev       = device;
  13.369 -            sg_next_sect = sector_number + nr_sectors;
  13.370 -        }
  13.371 -        break;
  13.372 -
  13.373 -    default:
  13.374 -        panic("unknown op %d\n", operation);
  13.375 -    }
  13.376 -
  13.377 -    /* Fill out a communications ring structure. */
  13.378 -    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
  13.379 -    req->id            = id;
  13.380 -    req->operation     = operation;
  13.381 -    req->sector_number = (xen_sector_t)sector_number;
  13.382 -    req->device        = device; 
  13.383 -    req->nr_segments   = 1;
  13.384 -    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
  13.385 -    req_prod++;
  13.386 -
  13.387 -    return 0;
  13.388 -}
  13.389 -
  13.390 -
  13.391 -/*
  13.392 - * do_xlblk_request
  13.393 - *  read a block; request is in a request queue
  13.394 - */
  13.395 -void do_xlblk_request(request_queue_t *rq)
  13.396 -{
  13.397 -    struct request *req;
  13.398 -    struct buffer_head *bh, *next_bh;
  13.399 -    int rw, nsect, full, queued = 0;
  13.400 -
  13.401 -    DPRINTK("xlblk.c::do_xlblk_request\n"); 
  13.402 -
  13.403 -    while ( !rq->plugged && !list_empty(&rq->queue_head))
  13.404 -    {
  13.405 -        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
  13.406 -            goto out;
  13.407 -  
  13.408 -        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
  13.409 -                req, req->cmd, req->sector,
  13.410 -                req->current_nr_sectors, req->nr_sectors, req->bh);
  13.411 -
  13.412 -        rw = req->cmd;
  13.413 -        if ( rw == READA )
  13.414 -            rw = READ;
  13.415 -        if ( unlikely((rw != READ) && (rw != WRITE)) )
  13.416 -            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
  13.417 -
  13.418 -        req->errors = 0;
  13.419 -
  13.420 -        bh = req->bh;
  13.421 -        while ( bh != NULL )
  13.422 -        {
  13.423 -            next_bh = bh->b_reqnext;
  13.424 -            bh->b_reqnext = NULL;
  13.425 -
  13.426 -            full = hypervisor_request(
  13.427 -                (unsigned long)bh,
  13.428 -                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
  13.429 -                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
  13.430 -
  13.431 -            if ( full )
  13.432 -            { 
  13.433 -                bh->b_reqnext = next_bh;
  13.434 -                pending_queues[nr_pending++] = rq;
  13.435 -                if ( unlikely(nr_pending >= MAX_PENDING) )
  13.436 -                    BUG();
  13.437 -                goto out; 
  13.438 -            }
  13.439 -
  13.440 -            queued++;
  13.441 -
  13.442 -            /* Dequeue the buffer head from the request. */
  13.443 -            nsect = bh->b_size >> 9;
  13.444 -            bh = req->bh = next_bh;
  13.445 -            
  13.446 -            if ( bh != NULL )
  13.447 -            {
  13.448 -                /* There's another buffer head to do. Update the request. */
  13.449 -                req->hard_sector += nsect;
  13.450 -                req->hard_nr_sectors -= nsect;
  13.451 -                req->sector = req->hard_sector;
  13.452 -                req->nr_sectors = req->hard_nr_sectors;
  13.453 -                req->current_nr_sectors = bh->b_size >> 9;
  13.454 -                req->buffer = bh->b_data;
  13.455 -            }
  13.456 -            else
  13.457 -            {
  13.458 -                /* That was the last buffer head. Finalise the request. */
  13.459 -                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
  13.460 -                    BUG();
  13.461 -                blkdev_dequeue_request(req);
  13.462 -                end_that_request_last(req);
  13.463 -            }
  13.464 -        }
  13.465 -    }
  13.466 -
  13.467 - out:
  13.468 -    if ( queued != 0 ) signal_requests_to_xen();
  13.469 -}
  13.470 -
  13.471 -
  13.472 -static void kick_pending_request_queues(void)
  13.473 -{
  13.474 -    /* We kick pending request queues if the ring is reasonably empty. */
  13.475 -    if ( (nr_pending != 0) && 
  13.476 -         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
  13.477 -    {
  13.478 -        /* Attempt to drain the queue, but bail if the ring becomes full. */
  13.479 -        while ( (nr_pending != 0) && !RING_PLUGGED )
  13.480 -            do_xlblk_request(pending_queues[--nr_pending]);
  13.481 -    }
  13.482 -}
  13.483 -
  13.484 -
  13.485 -static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
  13.486 -{
  13.487 -    BLK_RING_IDX i; 
  13.488 -    unsigned long flags; 
  13.489 -    struct buffer_head *bh, *next_bh;
  13.490 -    
  13.491 -    if ( unlikely(state == STATE_CLOSED) )
  13.492 -        return;
  13.493 -    
  13.494 -    spin_lock_irqsave(&io_request_lock, flags);     
  13.495 -
  13.496 -    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
  13.497 -    {
  13.498 -        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
  13.499 -        switch ( bret->operation )
  13.500 -        {
  13.501 -        case XEN_BLOCK_READ:
  13.502 -        case XEN_BLOCK_WRITE:
  13.503 -            if ( unlikely(bret->status != 0) )
  13.504 -                DPRINTK("Bad return from blkdev data request: %lx\n",
  13.505 -                        bret->status);
  13.506 -            for ( bh = (struct buffer_head *)bret->id; 
  13.507 -                  bh != NULL; 
  13.508 -                  bh = next_bh )
  13.509 -            {
  13.510 -                next_bh = bh->b_reqnext;
  13.511 -                bh->b_reqnext = NULL;
  13.512 -                bh->b_end_io(bh, !bret->status);
  13.513 -            }
  13.514 -            break;
  13.515 -     
  13.516 -        default:
  13.517 -            BUG();
  13.518 -        }
  13.519 -    }
  13.520 -    
  13.521 -    resp_cons = i;
  13.522 -
  13.523 -    kick_pending_request_queues();
  13.524 -
  13.525 -    spin_unlock_irqrestore(&io_request_lock, flags);
  13.526 -}
  13.527 -
  13.528 -
  13.529 -static void reset_xlblk_interface(void)
  13.530 -{
  13.531 -    block_io_op_t op; 
  13.532 -
  13.533 -    nr_pending = 0;
  13.534 -
  13.535 -    op.cmd = BLOCK_IO_OP_RESET;
  13.536 -    if ( HYPERVISOR_block_io_op(&op) != 0 )
  13.537 -        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
  13.538 -
  13.539 -    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
  13.540 -    (void)HYPERVISOR_block_io_op(&op);
  13.541 -
  13.542 -    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
  13.543 -    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
  13.544 -    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  13.545 -
  13.546 -    wmb();
  13.547 -    state = STATE_ACTIVE;
  13.548 -}
  13.549 -
  13.550 -
  13.551 -int __init xlblk_init(void)
  13.552 -{
  13.553 -    int error; 
  13.554 -
  13.555 -    reset_xlblk_interface();
  13.556 -
  13.557 -    xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
  13.558 -    xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
  13.559 -
  13.560 -    error = request_irq(xlblk_response_irq, xlblk_response_int, 
  13.561 -                        SA_SAMPLE_RANDOM, "blkdev", NULL);
  13.562 -    if ( error )
  13.563 -    {
  13.564 -        printk(KERN_ALERT "Could not allocate receive interrupt\n");
  13.565 -        goto fail;
  13.566 -    }
  13.567 -
  13.568 -    error = request_irq(xlblk_update_irq, xlblk_update_int,
  13.569 -                        0, "blkdev", NULL);
  13.570 -
  13.571 -    if ( error )
  13.572 -    {
  13.573 -        printk(KERN_ALERT "Could not allocate block update interrupt\n");
  13.574 -        goto fail;
  13.575 -    }
  13.576 -
  13.577 -    (void)xlvbd_init();
  13.578 -
  13.579 -    return 0;
  13.580 -
  13.581 - fail:
  13.582 -    return error;
  13.583 -}
  13.584 -
  13.585 -
  13.586 -static void __exit xlblk_cleanup(void)
  13.587 -{
  13.588 -    xlvbd_cleanup();
  13.589 -    free_irq(xlblk_response_irq, NULL);
  13.590 -    free_irq(xlblk_update_irq, NULL);
  13.591 -    unbind_virq_from_irq(VIRQ_BLKDEV);
  13.592 -    unbind_virq_from_irq(VIRQ_VBD_UPD);
  13.593 -}
  13.594 -
  13.595 -
  13.596 -#ifdef MODULE
  13.597 -module_init(xlblk_init);
  13.598 -module_exit(xlblk_cleanup);
  13.599 -#endif
  13.600 -
  13.601 -
  13.602 -void blkdev_suspend(void)
  13.603 -{
  13.604 -    state = STATE_SUSPENDED;
  13.605 -    wmb();
  13.606 -
  13.607 -    while ( resp_cons != blk_ring->req_prod )
  13.608 -    {
  13.609 -        barrier();
  13.610 -        current->state = TASK_INTERRUPTIBLE;
  13.611 -        schedule_timeout(1);
  13.612 -    }
  13.613 -
  13.614 -    wmb();
  13.615 -    state = STATE_CLOSED;
  13.616 -    wmb();
  13.617 -
  13.618 -    clear_fixmap(FIX_BLKRING_BASE);
  13.619 -}
  13.620 -
  13.621 -
  13.622 -void blkdev_resume(void)
  13.623 -{
  13.624 -    reset_xlblk_interface();
  13.625 -    spin_lock_irq(&io_request_lock);
  13.626 -    kick_pending_request_queues();
  13.627 -    spin_unlock_irq(&io_request_lock);
  13.628 -}
    14.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h	Tue Apr 27 13:13:06 2004 +0000
    14.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.3 @@ -1,82 +0,0 @@
    14.4 -/******************************************************************************
    14.5 - * block.h
    14.6 - * 
    14.7 - * Shared definitions between all levels of XenoLinux Virtual block devices.
    14.8 - */
    14.9 -
   14.10 -#ifndef __XEN_DRIVERS_BLOCK_H__
   14.11 -#define __XEN_DRIVERS_BLOCK_H__
   14.12 -
   14.13 -#include <linux/config.h>
   14.14 -#include <linux/module.h>
   14.15 -
   14.16 -#include <linux/kernel.h>
   14.17 -#include <linux/sched.h>
   14.18 -#include <linux/slab.h>
   14.19 -#include <linux/string.h>
   14.20 -#include <linux/errno.h>
   14.21 -
   14.22 -#include <linux/fs.h>
   14.23 -#include <linux/hdreg.h>
   14.24 -#include <linux/blkdev.h>
   14.25 -#include <linux/major.h>
   14.26 -
   14.27 -#include <asm/hypervisor-ifs/hypervisor-if.h>
   14.28 -#include <asm/hypervisor-ifs/vbd.h>
   14.29 -#include <asm/io.h>
   14.30 -#include <asm/atomic.h>
   14.31 -#include <asm/uaccess.h>
   14.32 -
   14.33 -#if 0
   14.34 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   14.35 -#else
   14.36 -#define DPRINTK(_f, _a...) ((void)0)
   14.37 -#endif
   14.38 -
   14.39 -#if 0
   14.40 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   14.41 -#else
   14.42 -#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   14.43 -#endif
   14.44 -
   14.45 -/* Private gendisk->flags[] values. */
   14.46 -#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
   14.47 -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
   14.48 -
   14.49 -/*
   14.50 - * We have one of these per vbd, whether ide, scsi or 'other'.
   14.51 - * They hang in an array off the gendisk structure. We may end up putting
   14.52 - * all kinds of interesting stuff here :-)
   14.53 - */
   14.54 -typedef struct xl_disk {
   14.55 -    int usage;
   14.56 -} xl_disk_t;
   14.57 -
   14.58 -extern int xen_control_msg(int operration, char *buffer, int size);
   14.59 -extern int xen_block_open(struct inode *inode, struct file *filep);
   14.60 -extern int xen_block_release(struct inode *inode, struct file *filep);
   14.61 -extern int xen_block_ioctl(struct inode *inode, struct file *filep,
   14.62 -                                 unsigned command, unsigned long argument);
   14.63 -extern int xen_block_check(kdev_t dev);
   14.64 -extern int xen_block_revalidate(kdev_t dev);
   14.65 -extern void do_xlblk_request (request_queue_t *rq); 
   14.66 -
   14.67 -extern void xlvbd_update_vbds(void);
   14.68 -
   14.69 -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
   14.70 -{
   14.71 -    struct gendisk *gd = get_gendisk(xldev);
   14.72 -    
   14.73 -    if ( gd == NULL ) 
   14.74 -        return NULL;
   14.75 -    
   14.76 -    return (xl_disk_t *)gd->real_devices + 
   14.77 -        (MINOR(xldev) >> gd->minor_shift);
   14.78 -}
   14.79 -
   14.80 -
   14.81 -/* Virtual block-device subsystem. */
   14.82 -extern int  xlvbd_init(void);
   14.83 -extern void xlvbd_cleanup(void); 
   14.84 -
   14.85 -#endif /* __XEN_DRIVERS_BLOCK_H__ */
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h	Wed Apr 28 09:35:33 2004 +0000
    15.3 @@ -0,0 +1,84 @@
    15.4 +/******************************************************************************
    15.5 + * arch/xen/drivers/blkif/frontend/common.h
    15.6 + * 
    15.7 + * Shared definitions between all levels of XenoLinux Virtual block devices.
    15.8 + */
    15.9 +
   15.10 +#ifndef __XEN_DRIVERS_COMMON_H__
   15.11 +#define __XEN_DRIVERS_COMMON_H__
   15.12 +
   15.13 +#include <linux/config.h>
   15.14 +#include <linux/module.h>
   15.15 +
   15.16 +#include <linux/kernel.h>
   15.17 +#include <linux/sched.h>
   15.18 +#include <linux/slab.h>
   15.19 +#include <linux/string.h>
   15.20 +#include <linux/errno.h>
   15.21 +
   15.22 +#include <linux/fs.h>
   15.23 +#include <linux/hdreg.h>
   15.24 +#include <linux/blkdev.h>
   15.25 +#include <linux/major.h>
   15.26 +
   15.27 +#include <asm/hypervisor-ifs/hypervisor-if.h>
   15.28 +#include <asm/hypervisor-ifs/vbd.h>
   15.29 +#include <asm/io.h>
   15.30 +#include <asm/atomic.h>
   15.31 +#include <asm/uaccess.h>
   15.32 +
   15.33 +#include "../blkif.h"
   15.34 +
   15.35 +#if 0
   15.36 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   15.37 +#else
   15.38 +#define DPRINTK(_f, _a...) ((void)0)
   15.39 +#endif
   15.40 +
   15.41 +#if 0
   15.42 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   15.43 +#else
   15.44 +#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   15.45 +#endif
   15.46 +
   15.47 +/* Private gendisk->flags[] values. */
   15.48 +#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
   15.49 +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
   15.50 +
   15.51 +/*
   15.52 + * We have one of these per vbd, whether ide, scsi or 'other'.
   15.53 + * They hang in an array off the gendisk structure. We may end up putting
   15.54 + * all kinds of interesting stuff here :-)
   15.55 + */
   15.56 +typedef struct xl_disk {
   15.57 +    int usage;
   15.58 +} xl_disk_t;
   15.59 +
   15.60 +extern int blkif_open(struct inode *inode, struct file *filep);
   15.61 +extern int blkif_release(struct inode *inode, struct file *filep);
   15.62 +extern int blkif_ioctl(struct inode *inode, struct file *filep,
   15.63 +                                 unsigned command, unsigned long argument);
   15.64 +extern int blkif_check(kdev_t dev);
   15.65 +extern int blkif_revalidate(kdev_t dev);
   15.66 +extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
   15.67 +extern void do_blkif_request (request_queue_t *rq); 
   15.68 +
   15.69 +extern void xlvbd_update_vbds(void);
   15.70 +
   15.71 +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
   15.72 +{
   15.73 +    struct gendisk *gd = get_gendisk(xldev);
   15.74 +    
   15.75 +    if ( gd == NULL ) 
   15.76 +        return NULL;
   15.77 +    
   15.78 +    return (xl_disk_t *)gd->real_devices + 
   15.79 +        (MINOR(xldev) >> gd->minor_shift);
   15.80 +}
   15.81 +
   15.82 +
   15.83 +/* Virtual block-device subsystem. */
   15.84 +extern int  xlvbd_init(void);
   15.85 +extern void xlvbd_cleanup(void); 
   15.86 +
   15.87 +#endif /* __XEN_DRIVERS_COMMON_H__ */
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c	Wed Apr 28 09:35:33 2004 +0000
    16.3 @@ -0,0 +1,702 @@
    16.4 +/******************************************************************************
    16.5 + * arch/xen/drivers/blkif/frontend/main.c
    16.6 + * 
    16.7 + * Xenolinux virtual block-device driver.
    16.8 + * 
    16.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   16.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   16.11 + */
   16.12 +
   16.13 +#include "common.h"
   16.14 +#include <linux/blk.h>
   16.15 +#include <linux/cdrom.h>
   16.16 +#include <linux/tqueue.h>
   16.17 +#include <linux/sched.h>
   16.18 +#include <linux/interrupt.h>
   16.19 +#include <scsi/scsi.h>
   16.20 +#include <asm/ctrl_if.h>
   16.21 +
   16.22 +typedef unsigned char byte; /* from linux/ide.h */
   16.23 +
   16.24 +#define BLKIF_STATE_CLOSED    0
   16.25 +#define BLKIF_STATE_DOWN      1
   16.26 +#define BLKIF_STATE_UP        2
   16.27 +static unsigned int blkif_state = BLKIF_STATE_CLOSED;
   16.28 +static unsigned int blkif_evtchn, blkif_irq;
   16.29 +
   16.30 +static struct tq_struct blkif_statechange_tq;
   16.31 +
   16.32 +static int blkif_control_rsp_valid;
   16.33 +static blkif_response_t blkif_control_rsp;
   16.34 +
   16.35 +static blkif_ring_t *blk_ring;
   16.36 +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
   16.37 +static BLK_RING_IDX req_prod;  /* Private request producer.         */
   16.38 +
   16.39 +/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   16.40 +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
   16.41 +                      (blkif_state != BLKIF_STATE_UP))
   16.42 +
   16.43 +
   16.44 +/*
   16.45 + * Request queues with outstanding work, but ring is currently full.
   16.46 + * We need no special lock here, as we always access this with the
   16.47 + * io_request_lock held. We only need a small maximum list.
   16.48 + */
   16.49 +#define MAX_PENDING 8
   16.50 +static request_queue_t *pending_queues[MAX_PENDING];
   16.51 +static int nr_pending;
   16.52 +
   16.53 +static kdev_t        sg_dev;
   16.54 +static int           sg_operation = -1;
   16.55 +static unsigned long sg_next_sect;
   16.56 +#define DISABLE_SCATTERGATHER() (sg_operation = -1)
   16.57 +
   16.58 +static inline void flush_requests(void)
   16.59 +{
   16.60 +    DISABLE_SCATTERGATHER();
   16.61 +    blk_ring->req_prod = req_prod;
   16.62 +    notify_via_evtchn(blkif_evtchn);
   16.63 +}
   16.64 +
   16.65 +
   16.66 +/*
   16.67 + * blkif_update_int/update-vbds_task - handle VBD update events.
   16.68 + *  Schedule a task for keventd to run, which will update the VBDs and perform 
   16.69 + *  the corresponding updates to our view of VBD state.
   16.70 + */
   16.71 +static struct tq_struct update_tq;
   16.72 +static void update_vbds_task(void *unused)
   16.73 +{ 
   16.74 +    xlvbd_update_vbds();
   16.75 +}
   16.76 +
   16.77 +
   16.78 +int blkif_open(struct inode *inode, struct file *filep)
   16.79 +{
   16.80 +    short xldev = inode->i_rdev; 
   16.81 +    struct gendisk *gd = get_gendisk(xldev);
   16.82 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   16.83 +    short minor = MINOR(xldev); 
   16.84 +
   16.85 +    if ( gd->part[minor].nr_sects == 0 )
   16.86 +    { 
   16.87 +        /*
   16.88 +         * Device either doesn't exist, or has zero capacity; we use a few
   16.89 +         * cheesy heuristics to return the relevant error code
   16.90 +         */
   16.91 +        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
   16.92 +             ((minor & (gd->max_p - 1)) != 0) )
   16.93 +        { 
   16.94 +            /*
   16.95 +             * We have a real device, but no such partition, or we just have a
   16.96 +             * partition number so guess this is the problem.
   16.97 +             */
   16.98 +            return -ENXIO;     /* no such device or address */
   16.99 +        }
  16.100 +        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
  16.101 +        {
  16.102 +            /* This is a removable device => assume that media is missing. */ 
  16.103 +            return -ENOMEDIUM; /* media not present (this is a guess) */
  16.104 +        } 
  16.105 +        else
  16.106 +        { 
  16.107 +            /* Just go for the general 'no such device' error. */
  16.108 +            return -ENODEV;    /* no such device */
  16.109 +        }
  16.110 +    }
  16.111 +    
  16.112 +    /* Update of usage count is protected by per-device semaphore. */
  16.113 +    disk->usage++;
  16.114 +
  16.115 +    return 0;
  16.116 +}
  16.117 +
  16.118 +
  16.119 +int blkif_release(struct inode *inode, struct file *filep)
  16.120 +{
  16.121 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
  16.122 +
  16.123 +    /*
  16.124 +     * When usage drops to zero it may allow more VBD updates to occur.
  16.125 +     * Update of usage count is protected by a per-device semaphore.
  16.126 +     */
  16.127 +    if ( --disk->usage == 0 )
  16.128 +    {
  16.129 +        update_tq.routine = update_vbds_task;
  16.130 +        schedule_task(&update_tq);
  16.131 +    }
  16.132 +
  16.133 +    return 0;
  16.134 +}
  16.135 +
  16.136 +
  16.137 +int blkif_ioctl(struct inode *inode, struct file *filep,
  16.138 +                          unsigned command, unsigned long argument)
  16.139 +{
  16.140 +    kdev_t dev = inode->i_rdev;
  16.141 +    struct hd_geometry *geo = (struct hd_geometry *)argument;
  16.142 +    struct gendisk *gd;     
  16.143 +    struct hd_struct *part; 
  16.144 +    int i;
  16.145 +
  16.146 +    /* NB. No need to check permissions. That is done for us. */
  16.147 +    
  16.148 +    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  16.149 +                  command, (long) argument, dev); 
  16.150 +  
  16.151 +    gd = get_gendisk(dev);
  16.152 +    part = &gd->part[MINOR(dev)]; 
  16.153 +
  16.154 +    switch ( command )
  16.155 +    {
  16.156 +    case BLKGETSIZE:
  16.157 +        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
  16.158 +        return put_user(part->nr_sects, (unsigned long *) argument);
  16.159 +
  16.160 +    case BLKGETSIZE64:
  16.161 +        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
  16.162 +                      (u64)part->nr_sects * 512);
  16.163 +        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
  16.164 +
  16.165 +    case BLKRRPART:                               /* re-read partition table */
  16.166 +        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
  16.167 +        return blkif_revalidate(dev);
  16.168 +
  16.169 +    case BLKSSZGET:
  16.170 +        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
  16.171 +
  16.172 +    case BLKBSZGET:                                        /* get block size */
  16.173 +        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
  16.174 +        break;
  16.175 +
  16.176 +    case BLKBSZSET:                                        /* set block size */
  16.177 +        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
  16.178 +        break;
  16.179 +
  16.180 +    case BLKRASET:                                         /* set read-ahead */
  16.181 +        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
  16.182 +        break;
  16.183 +
  16.184 +    case BLKRAGET:                                         /* get read-ahead */
  16.185 +        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
  16.186 +        break;
  16.187 +
  16.188 +    case HDIO_GETGEO:
  16.189 +        /* note: these values are complete garbage */
  16.190 +        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
  16.191 +        if (!argument) return -EINVAL;
  16.192 +        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
  16.193 +        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
  16.194 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  16.195 +        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
  16.196 +        return 0;
  16.197 +
  16.198 +    case HDIO_GETGEO_BIG: 
  16.199 +        /* note: these values are complete garbage */
  16.200 +        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
  16.201 +        if (!argument) return -EINVAL;
  16.202 +        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
  16.203 +        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
  16.204 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  16.205 +        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
  16.206 +        return 0;
  16.207 +
  16.208 +    case CDROMMULTISESSION:
  16.209 +        DPRINTK("FIXME: support multisession CDs later\n");
  16.210 +        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
  16.211 +            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
  16.212 +        return 0;
  16.213 +
  16.214 +    case SCSI_IOCTL_GET_BUS_NUMBER:
  16.215 +        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif");
  16.216 +        return -ENOSYS;
  16.217 +
  16.218 +    default:
  16.219 +        printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command);
  16.220 +        return -ENOSYS;
  16.221 +    }
  16.222 +    
  16.223 +    return 0;
  16.224 +}
  16.225 +
  16.226 +/* check media change: should probably do something here in some cases :-) */
  16.227 +int blkif_check(kdev_t dev)
  16.228 +{
  16.229 +    DPRINTK("blkif_check\n");
  16.230 +    return 0;
  16.231 +}
  16.232 +
  16.233 +int blkif_revalidate(kdev_t dev)
  16.234 +{
  16.235 +    struct block_device *bd;
  16.236 +    struct gendisk *gd;
  16.237 +    xl_disk_t *disk;
  16.238 +    unsigned long capacity;
  16.239 +    int i, rc = 0;
  16.240 +    
  16.241 +    if ( (bd = bdget(dev)) == NULL )
  16.242 +        return -EINVAL;
  16.243 +
  16.244 +    /*
  16.245 +     * Update of partition info, and check of usage count, is protected
  16.246 +     * by the per-block-device semaphore.
  16.247 +     */
  16.248 +    down(&bd->bd_sem);
  16.249 +
  16.250 +    if ( ((gd = get_gendisk(dev)) == NULL) ||
  16.251 +         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  16.252 +         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  16.253 +    {
  16.254 +        rc = -EINVAL;
  16.255 +        goto out;
  16.256 +    }
  16.257 +
  16.258 +    if ( disk->usage > 1 )
  16.259 +    {
  16.260 +        rc = -EBUSY;
  16.261 +        goto out;
  16.262 +    }
  16.263 +
  16.264 +    /* Only reread partition table if VBDs aren't mapped to partitions. */
  16.265 +    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  16.266 +    {
  16.267 +        for ( i = gd->max_p - 1; i >= 0; i-- )
  16.268 +        {
  16.269 +            invalidate_device(dev+i, 1);
  16.270 +            gd->part[MINOR(dev+i)].start_sect = 0;
  16.271 +            gd->part[MINOR(dev+i)].nr_sects   = 0;
  16.272 +            gd->sizes[MINOR(dev+i)]           = 0;
  16.273 +        }
  16.274 +
  16.275 +        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  16.276 +    }
  16.277 +
  16.278 + out:
  16.279 +    up(&bd->bd_sem);
  16.280 +    bdput(bd);
  16.281 +    return rc;
  16.282 +}
  16.283 +
  16.284 +
  16.285 +/*
  16.286 + * blkif_queue_request
  16.287 + *
  16.288 + * request block io 
  16.289 + * 
  16.290 + * id: for guest use only.
  16.291 + * operation: BLKIF_OP_{READ,WRITE,PROBE}
  16.292 + * buffer: buffer to read/write into. this should be a
  16.293 + *   virtual address in the guest os.
  16.294 + */
  16.295 +static int blkif_queue_request(unsigned long   id,
  16.296 +                               int             operation,
  16.297 +                               char *          buffer,
  16.298 +                               unsigned long   sector_number,
  16.299 +                               unsigned short  nr_sectors,
  16.300 +                               kdev_t          device)
  16.301 +{
  16.302 +    unsigned long       buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
  16.303 +    struct gendisk     *gd;
  16.304 +    blkif_request_t    *req;
  16.305 +    struct buffer_head *bh;
  16.306 +
  16.307 +    if ( unlikely(nr_sectors >= (1<<9)) )
  16.308 +        BUG();
  16.309 +    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
  16.310 +        BUG();
  16.311 +
  16.312 +    if ( unlikely(blkif_state != BLKIF_STATE_UP) )
  16.313 +        return 1;
  16.314 +
  16.315 +    switch ( operation )
  16.316 +    {
  16.317 +
  16.318 +    case BLKIF_OP_READ:
  16.319 +    case BLKIF_OP_WRITE:
  16.320 +        gd = get_gendisk(device); 
  16.321 +
  16.322 +        /*
  16.323 +         * Update the sector_number we'll pass down as appropriate; note that
  16.324 +         * we could sanity check that resulting sector will be in this
  16.325 +         * partition, but this will happen in driver backend anyhow.
  16.326 +         */
  16.327 +        sector_number += gd->part[MINOR(device)].start_sect;
  16.328 +
  16.329 +        /*
  16.330 +         * If this unit doesn't consist of virtual partitions then we clear 
  16.331 +         * the partn bits from the device number.
  16.332 +         */
  16.333 +        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
  16.334 +               GENHD_FL_VIRT_PARTNS) )
  16.335 +            device &= ~(gd->max_p - 1);
  16.336 +
  16.337 +        if ( (sg_operation == operation) &&
  16.338 +             (sg_dev == device) &&
  16.339 +             (sg_next_sect == sector_number) )
  16.340 +        {
  16.341 +            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
  16.342 +            bh = (struct buffer_head *)id;
  16.343 +            bh->b_reqnext = (struct buffer_head *)req->id;
  16.344 +            req->id = id;
  16.345 +            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
  16.346 +            if ( ++req->nr_segments < MAX_BLK_SEGS )
  16.347 +                sg_next_sect += nr_sectors;
  16.348 +            else
  16.349 +                DISABLE_SCATTERGATHER();
  16.350 +            return 0;
  16.351 +        }
  16.352 +        else if ( RING_PLUGGED )
  16.353 +        {
  16.354 +            return 1;
  16.355 +        }
  16.356 +        else
  16.357 +        {
  16.358 +            sg_operation = operation;
  16.359 +            sg_dev       = device;
  16.360 +            sg_next_sect = sector_number + nr_sectors;
  16.361 +        }
  16.362 +        break;
  16.363 +
  16.364 +    default:
  16.365 +        panic("unknown op %d\n", operation);
  16.366 +    }
  16.367 +
  16.368 +    /* Fill out a communications ring structure. */
  16.369 +    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
  16.370 +    req->id            = id;
  16.371 +    req->operation     = operation;
  16.372 +    req->sector_number = (blkif_sector_t)sector_number;
  16.373 +    req->device        = device; 
  16.374 +    req->nr_segments   = 1;
  16.375 +    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
  16.376 +    req_prod++;
  16.377 +
  16.378 +    return 0;
  16.379 +}
  16.380 +
  16.381 +
  16.382 +/*
  16.383 + * do_blkif_request
  16.384 + *  read a block; request is in a request queue
  16.385 + */
  16.386 +void do_blkif_request(request_queue_t *rq)
  16.387 +{
  16.388 +    struct request *req;
  16.389 +    struct buffer_head *bh, *next_bh;
  16.390 +    int rw, nsect, full, queued = 0;
  16.391 +
  16.392 +    DPRINTK("Entered do_blkif_request\n"); 
  16.393 +
  16.394 +    while ( !rq->plugged && !list_empty(&rq->queue_head))
  16.395 +    {
  16.396 +        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
  16.397 +            goto out;
  16.398 +  
  16.399 +        DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
  16.400 +                req, req->cmd, req->sector,
  16.401 +                req->current_nr_sectors, req->nr_sectors, req->bh);
  16.402 +
  16.403 +        rw = req->cmd;
  16.404 +        if ( rw == READA )
  16.405 +            rw = READ;
  16.406 +        if ( unlikely((rw != READ) && (rw != WRITE)) )
  16.407 +            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
  16.408 +
  16.409 +        req->errors = 0;
  16.410 +
  16.411 +        bh = req->bh;
  16.412 +        while ( bh != NULL )
  16.413 +        {
  16.414 +            next_bh = bh->b_reqnext;
  16.415 +            bh->b_reqnext = NULL;
  16.416 +
  16.417 +            full = blkif_queue_request(
  16.418 +                (unsigned long)bh,
  16.419 +                (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, 
  16.420 +                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
  16.421 +
  16.422 +            if ( full )
  16.423 +            { 
  16.424 +                bh->b_reqnext = next_bh;
  16.425 +                pending_queues[nr_pending++] = rq;
  16.426 +                if ( unlikely(nr_pending >= MAX_PENDING) )
  16.427 +                    BUG();
  16.428 +                goto out; 
  16.429 +            }
  16.430 +
  16.431 +            queued++;
  16.432 +
  16.433 +            /* Dequeue the buffer head from the request. */
  16.434 +            nsect = bh->b_size >> 9;
  16.435 +            bh = req->bh = next_bh;
  16.436 +            
  16.437 +            if ( bh != NULL )
  16.438 +            {
  16.439 +                /* There's another buffer head to do. Update the request. */
  16.440 +                req->hard_sector += nsect;
  16.441 +                req->hard_nr_sectors -= nsect;
  16.442 +                req->sector = req->hard_sector;
  16.443 +                req->nr_sectors = req->hard_nr_sectors;
  16.444 +                req->current_nr_sectors = bh->b_size >> 9;
  16.445 +                req->buffer = bh->b_data;
  16.446 +            }
  16.447 +            else
  16.448 +            {
  16.449 +                /* That was the last buffer head. Finalise the request. */
  16.450 +                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
  16.451 +                    BUG();
  16.452 +                blkdev_dequeue_request(req);
  16.453 +                end_that_request_last(req);
  16.454 +            }
  16.455 +        }
  16.456 +    }
  16.457 +
  16.458 + out:
  16.459 +    if ( queued != 0 )
  16.460 +        flush_requests();
  16.461 +}
  16.462 +
  16.463 +
  16.464 +static void kick_pending_request_queues(void)
  16.465 +{
  16.466 +    /* We kick pending request queues if the ring is reasonably empty. */
  16.467 +    if ( (nr_pending != 0) && 
  16.468 +         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
  16.469 +    {
  16.470 +        /* Attempt to drain the queue, but bail if the ring becomes full. */
  16.471 +        while ( (nr_pending != 0) && !RING_PLUGGED )
  16.472 +            do_blkif_request(pending_queues[--nr_pending]);
  16.473 +    }
  16.474 +}
  16.475 +
  16.476 +
  16.477 +static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
  16.478 +{
  16.479 +    BLK_RING_IDX i; 
  16.480 +    unsigned long flags; 
  16.481 +    struct buffer_head *bh, *next_bh;
  16.482 +    
  16.483 +    if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) )
  16.484 +        return;
  16.485 +    
  16.486 +    spin_lock_irqsave(&io_request_lock, flags);     
  16.487 +
  16.488 +    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
  16.489 +    {
  16.490 +        blkif_response_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
  16.491 +        switch ( bret->operation )
  16.492 +        {
  16.493 +        case BLKIF_OP_READ:
  16.494 +        case BLKIF_OP_WRITE:
  16.495 +            if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
  16.496 +                DPRINTK("Bad return from blkdev data request: %lx\n",
  16.497 +                        bret->status);
  16.498 +            for ( bh = (struct buffer_head *)bret->id; 
  16.499 +                  bh != NULL; 
  16.500 +                  bh = next_bh )
  16.501 +            {
  16.502 +                next_bh = bh->b_reqnext;
  16.503 +                bh->b_reqnext = NULL;
  16.504 +                bh->b_end_io(bh, !bret->status);
  16.505 +            }
  16.506 +            break;
  16.507 +        case BLKIF_OP_PROBE:
  16.508 +            memcpy(&blkif_control_rsp, bret, sizeof(*bret));
  16.509 +            blkif_control_rsp_valid = 1;
  16.510 +            break;
  16.511 +        default:
  16.512 +            BUG();
  16.513 +        }
  16.514 +    }
  16.515 +    
  16.516 +    resp_cons = i;
  16.517 +
  16.518 +    kick_pending_request_queues();
  16.519 +
  16.520 +    spin_unlock_irqrestore(&io_request_lock, flags);
  16.521 +}
  16.522 +
  16.523 +
  16.524 +void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
  16.525 +{
  16.526 +    unsigned long flags;
  16.527 +
  16.528 + retry:
  16.529 +    while ( (req_prod - resp_cons) == BLK_RING_SIZE )
  16.530 +    {
  16.531 +        set_current_state(TASK_INTERRUPTIBLE);
  16.532 +        schedule_timeout(1);
  16.533 +    }
  16.534 +
  16.535 +    spin_lock_irqsave(&io_request_lock, flags);
  16.536 +    if ( (req_prod - resp_cons) == BLK_RING_SIZE )
  16.537 +    {
  16.538 +        spin_unlock_irqrestore(&io_request_lock, flags);
  16.539 +        goto retry;
  16.540 +    }
  16.541 +
  16.542 +    DISABLE_SCATTERGATHER();
  16.543 +    memcpy(&blk_ring->ring[MASK_BLK_IDX(req_prod)].req, req, sizeof(*req));
  16.544 +    req_prod++;
  16.545 +    flush_requests();
  16.546 +
  16.547 +    spin_unlock_irqrestore(&io_request_lock, flags);
  16.548 +
  16.549 +    while ( !blkif_control_rsp_valid )
  16.550 +    {
  16.551 +        set_current_state(TASK_INTERRUPTIBLE);
  16.552 +        schedule_timeout(1);
  16.553 +    }
  16.554 +
  16.555 +    memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
  16.556 +    blkif_control_rsp_valid = 0;
  16.557 +}
  16.558 +
  16.559 +
  16.560 +static void blkif_bringup_phase1(void *unused)
  16.561 +{
  16.562 +    ctrl_msg_t              cmsg;
  16.563 +    blkif_fe_interface_up_t up;
  16.564 +
  16.565 +    /* Move from CLOSED to DOWN state. */
  16.566 +    blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
  16.567 +    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  16.568 +    blkif_state  = BLKIF_STATE_DOWN;
  16.569 +
  16.570 +    /* Construct an interface-UP message for the domain controller. */
  16.571 +    cmsg.type      = CMSG_BLKIF_FE;
  16.572 +    cmsg.subtype   = CMSG_BLKIF_FE_INTERFACE_UP;
  16.573 +    cmsg.length    = sizeof(blkif_fe_interface_up_t);
  16.574 +    up.handle      = 0;
  16.575 +    up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
  16.576 +    memcpy(cmsg.msg, &up, sizeof(up));
  16.577 +
  16.578 +    /* Tell the controller to bring up the interface. */
  16.579 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
  16.580 +}
  16.581 +
  16.582 +static void blkif_bringup_phase2(void *unused)
  16.583 +{
  16.584 +    /* Move from DOWN to UP state. */
  16.585 +    blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
  16.586 +    (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
  16.587 +    blkif_state = BLKIF_STATE_UP;
  16.588 +
  16.589 +    /* Probe for discs that are attached to the interface. */
  16.590 +    xlvbd_init();
  16.591 +
  16.592 +    /* Kick pending requests. */
  16.593 +    spin_lock_irq(&io_request_lock);
  16.594 +    kick_pending_request_queues();
  16.595 +    spin_unlock_irq(&io_request_lock);
  16.596 +}
  16.597 +
  16.598 +static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
  16.599 +{
  16.600 +    if ( status->handle != 0 )
  16.601 +    {
  16.602 +        printk(KERN_WARNING "Status change on unsupported blkif %d\n",
  16.603 +               status->handle);
  16.604 +        return;
  16.605 +    }
  16.606 +
  16.607 +    switch ( status->status )
  16.608 +    {
  16.609 +    case BLKIF_INTERFACE_STATUS_DESTROYED:
  16.610 +        printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n",
  16.611 +               blkif_state);
  16.612 +        break;
  16.613 +
  16.614 +    case BLKIF_INTERFACE_STATUS_DOWN:
  16.615 +        if ( blkif_state != BLKIF_STATE_CLOSED )
  16.616 +        {
  16.617 +            printk(KERN_WARNING "Unexpected blkif-DOWN message in state %d\n",
  16.618 +                   blkif_state);
  16.619 +            break;
  16.620 +        }
  16.621 +        blkif_statechange_tq.routine = blkif_bringup_phase1;
  16.622 +        schedule_task(&blkif_statechange_tq);
  16.623 +        break;
  16.624 +
  16.625 +    case BLKIF_INTERFACE_STATUS_UP:
  16.626 +        if ( blkif_state == BLKIF_STATE_CLOSED )
  16.627 +        {
  16.628 +            printk(KERN_WARNING "Unexpected blkif-UP message in state %d\n",
  16.629 +                   blkif_state);
  16.630 +            break;
  16.631 +        }
  16.632 +        blkif_evtchn = status->evtchn;
  16.633 +        blkif_statechange_tq.routine = blkif_bringup_phase2;
  16.634 +        schedule_task(&blkif_statechange_tq);
  16.635 +        break;
  16.636 +
  16.637 +    default:
  16.638 +        printk(KERN_WARNING "Status change to unknown value %d\n", 
  16.639 +               status->status);
  16.640 +        break;
  16.641 +    }
  16.642 +}
  16.643 +
  16.644 +
  16.645 +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
  16.646 +{
  16.647 +    switch ( msg->subtype )
  16.648 +    {
  16.649 +    case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
  16.650 +        if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) )
  16.651 +            goto parse_error;
  16.652 +        blkif_status_change((blkif_fe_interface_status_changed_t *)
  16.653 +                            &msg->msg[0]);
  16.654 +        break;        
  16.655 +#if 0
  16.656 +    case CMSG_BLKIF_FE_VBD_STATUS_CHANGED:
  16.657 +        update_tq.routine = update_vbds_task;
  16.658 +        schedule_task(&update_tq);
  16.659 +        break;
  16.660 +#endif
  16.661 +    default:
  16.662 +        goto parse_error;
  16.663 +    }
  16.664 +
  16.665 +    ctrl_if_send_response(msg);
  16.666 +    return;
  16.667 +
  16.668 + parse_error:
  16.669 +    msg->length = 0;
  16.670 +    ctrl_if_send_response(msg);
  16.671 +}
  16.672 +
  16.673 +
  16.674 +int __init xlblk_init(void)
  16.675 +{
  16.676 +    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx);
  16.677 +    return 0;
  16.678 +}
  16.679 +
  16.680 +
  16.681 +static void __exit xlblk_cleanup(void)
  16.682 +{
  16.683 +    /* XXX FIXME */
  16.684 +    BUG();
  16.685 +}
  16.686 +
  16.687 +
  16.688 +#ifdef MODULE
  16.689 +module_init(xlblk_init);
  16.690 +module_exit(xlblk_cleanup);
  16.691 +#endif
  16.692 +
  16.693 +
  16.694 +void blkdev_suspend(void)
  16.695 +{
  16.696 +    /* XXX FIXME */
  16.697 +    BUG();
  16.698 +}
  16.699 +
  16.700 +
  16.701 +void blkdev_resume(void)
  16.702 +{
  16.703 +    /* XXX FIXME */
  16.704 +    BUG();
  16.705 +}
    17.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c	Tue Apr 27 13:13:06 2004 +0000
    17.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c	Wed Apr 28 09:35:33 2004 +0000
    17.3 @@ -1,13 +1,13 @@
    17.4  /******************************************************************************
    17.5 - * vbd.c
    17.6 + * arch/xen/drivers/blkif/frontend/vbd.c
    17.7   * 
    17.8 - * Xenolinux virtual block-device driver (xvd).
    17.9 + * Xenolinux virtual block-device driver.
   17.10   * 
   17.11   * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   17.12   * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   17.13   */
   17.14  
   17.15 -#include "block.h"
   17.16 +#include "common.h"
   17.17  #include <linux/blk.h>
   17.18  
   17.19  /*
   17.20 @@ -43,54 +43,59 @@ static int xlvbd_blksize_size[256];
   17.21  static int xlvbd_hardsect_size[256];
   17.22  static int xlvbd_max_sectors[256];
   17.23  
   17.24 -/* Information from Xen about our VBDs. */
   17.25 +/* Information about our VBDs. */
   17.26  #define MAX_VBDS 64
   17.27  static int nr_vbds;
   17.28 -static xen_disk_t *vbd_info;
   17.29 +static vdisk_t *vbd_info;
   17.30  
   17.31  static struct block_device_operations xlvbd_block_fops = 
   17.32  {
   17.33 -    open:               xen_block_open,
   17.34 -    release:            xen_block_release,
   17.35 -    ioctl:              xen_block_ioctl,
   17.36 -    check_media_change: xen_block_check,
   17.37 -    revalidate:         xen_block_revalidate,
   17.38 +    open:               blkif_open,
   17.39 +    release:            blkif_release,
   17.40 +    ioctl:              blkif_ioctl,
   17.41 +    check_media_change: blkif_check,
   17.42 +    revalidate:         blkif_revalidate,
   17.43  };
   17.44  
   17.45 -static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
   17.46 +static int xlvbd_get_vbd_info(vdisk_t *disk_info)
   17.47  {
   17.48 -    int error;
   17.49 -    block_io_op_t op; 
   17.50 +    vdisk_t         *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
   17.51 +    blkif_request_t  req;
   17.52 +    blkif_response_t rsp;
   17.53 +    int              nr;
   17.54  
   17.55 -    /* Probe for disk information. */
   17.56 -    memset(&op, 0, sizeof(op)); 
   17.57 -    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
   17.58 -    op.u.probe_params.domain    = 0; 
   17.59 -    op.u.probe_params.xdi.max   = MAX_VBDS;
   17.60 -    op.u.probe_params.xdi.disks = disk_info;
   17.61 -    op.u.probe_params.xdi.count = 0;
   17.62 +    memset(&req, 0, sizeof(req));
   17.63 +    req.operation   = BLKIF_OP_PROBE;
   17.64 +    req.nr_segments = 1;
   17.65 +    req.buffer_and_sects[0] = virt_to_machine(buf) | (PAGE_SIZE/512);
   17.66  
   17.67 -    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
   17.68 +    blkif_control_send(&req, &rsp);
   17.69 +
   17.70 +    if ( rsp.status <= 0 )
   17.71      {
   17.72 -        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
   17.73 +        printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
   17.74          return -1;
   17.75      }
   17.76  
   17.77 -    return op.u.probe_params.xdi.count;
   17.78 +    if ( (nr = rsp.status) > MAX_VBDS )
   17.79 +         nr = MAX_VBDS;
   17.80 +    memcpy(disk_info, buf, nr * sizeof(vdisk_t));
   17.81 +
   17.82 +    return nr;
   17.83  }
   17.84  
   17.85  /*
   17.86   * xlvbd_init_device - initialise a VBD device
   17.87 - * @disk:              a xen_disk_t describing the VBD
   17.88 + * @disk:              a vdisk_t describing the VBD
   17.89   *
   17.90 - * Takes a xen_disk_t * that describes a VBD the domain has access to.
   17.91 + * Takes a vdisk_t * that describes a VBD the domain has access to.
   17.92   * Performs appropriate initialisation and registration of the device.
   17.93   *
   17.94   * Care needs to be taken when making re-entrant calls to ensure that
   17.95   * corruption does not occur.  Also, devices that are in use should not have
   17.96   * their details updated.  This is the caller's responsibility.
   17.97   */
   17.98 -static int xlvbd_init_device(xen_disk_t *xd)
   17.99 +static int xlvbd_init_device(vdisk_t *xd)
  17.100  {
  17.101      int device = xd->device;
  17.102      int major  = MAJOR(device); 
  17.103 @@ -181,11 +186,11 @@ static int xlvbd_init_device(xen_disk_t 
  17.104              read_ahead[major]    = 8;
  17.105          }
  17.106  
  17.107 -        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
  17.108 +        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_blkif_request);
  17.109  
  17.110          /*
  17.111           * Turn off barking 'headactive' mode. We dequeue buffer heads as
  17.112 -         * soon as we pass them down to Xen.
  17.113 +         * soon as we pass them to the back-end driver.
  17.114           */
  17.115          blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
  17.116  
  17.117 @@ -431,12 +436,12 @@ static int xlvbd_remove_device(int devic
  17.118  void xlvbd_update_vbds(void)
  17.119  {
  17.120      int i, j, k, old_nr, new_nr;
  17.121 -    xen_disk_t *old_info, *new_info, *merged_info;
  17.122 +    vdisk_t *old_info, *new_info, *merged_info;
  17.123  
  17.124      old_info = vbd_info;
  17.125      old_nr   = nr_vbds;
  17.126  
  17.127 -    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  17.128 +    new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
  17.129      if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  17.130      {
  17.131          kfree(new_info);
  17.132 @@ -448,7 +453,7 @@ void xlvbd_update_vbds(void)
  17.133       * old list and new list do not overlap at all, and we cannot yet destroy
  17.134       * VBDs in the old list because the usage counts are busy.
  17.135       */
  17.136 -    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
  17.137 +    merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
  17.138  
  17.139      /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  17.140      i = j = k = 0;
  17.141 @@ -458,13 +463,13 @@ void xlvbd_update_vbds(void)
  17.142          if ( old_info[i].device < new_info[j].device )
  17.143          {
  17.144              if ( xlvbd_remove_device(old_info[i].device) != 0 )
  17.145 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  17.146 +                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  17.147              i++;
  17.148          }
  17.149          else if ( old_info[i].device > new_info[j].device )
  17.150          {
  17.151              if ( xlvbd_init_device(&new_info[j]) == 0 )
  17.152 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  17.153 +                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  17.154              j++;
  17.155          }
  17.156          else
  17.157 @@ -472,9 +477,9 @@ void xlvbd_update_vbds(void)
  17.158              if ( ((old_info[i].capacity == new_info[j].capacity) &&
  17.159                    (old_info[i].info == new_info[j].info)) ||
  17.160                   (xlvbd_remove_device(old_info[i].device) != 0) )
  17.161 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  17.162 +                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  17.163              else if ( xlvbd_init_device(&new_info[j]) == 0 )
  17.164 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  17.165 +                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  17.166              i++; j++;
  17.167          }
  17.168      }
  17.169 @@ -482,13 +487,13 @@ void xlvbd_update_vbds(void)
  17.170      for ( ; i < old_nr; i++ )
  17.171      {
  17.172          if ( xlvbd_remove_device(old_info[i].device) != 0 )
  17.173 -            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  17.174 +            memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  17.175      }
  17.176  
  17.177      for ( ; j < new_nr; j++ )
  17.178      {
  17.179          if ( xlvbd_init_device(&new_info[j]) == 0 )
  17.180 -            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  17.181 +            memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  17.182      }
  17.183  
  17.184      vbd_info = merged_info;
  17.185 @@ -500,12 +505,12 @@ void xlvbd_update_vbds(void)
  17.186  
  17.187  
  17.188  /*
  17.189 - * Set up all the linux device goop for the virtual block devices (vbd's) that 
  17.190 - * xen tells us about. Note that although from xen's pov VBDs are addressed 
  17.191 - * simply an opaque 16-bit device number, the domain creation tools 
  17.192 + * Set up all the linux device goop for the virtual block devices (vbd's) that
  17.193 + * we know about. Note that although from the backend driver's p.o.v. VBDs are
  17.194 + * addressed simply an opaque 16-bit device number, the domain creation tools 
  17.195   * conventionally allocate these numbers to correspond to those used by 'real' 
  17.196   * linux -- this is just for convenience as it means e.g. that the same 
  17.197 - * /etc/fstab can be used when booting with or without xen.
  17.198 + * /etc/fstab can be used when booting with or without Xen.
  17.199   */
  17.200  int __init xlvbd_init(void)
  17.201  {
  17.202 @@ -537,7 +542,7 @@ int __init xlvbd_init(void)
  17.203          xlvbd_max_sectors[i]   = 128;
  17.204      }
  17.205  
  17.206 -    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  17.207 +    vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
  17.208      nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  17.209  
  17.210      if ( nr_vbds < 0 )
    18.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Tue Apr 27 13:13:06 2004 +0000
    18.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Wed Apr 28 09:35:33 2004 +0000
    18.3 @@ -36,7 +36,7 @@ static struct proc_dir_entry *privcmd_in
    18.4  static int privcmd_ioctl(struct inode *inode, struct file *file,
    18.5                           unsigned int cmd, unsigned long data)
    18.6  {
    18.7 -    int ret = 0;
    18.8 +    int ret = -ENOSYS;
    18.9  
   18.10      switch ( cmd )
   18.11      {
   18.12 @@ -62,10 +62,14 @@ static int privcmd_ioctl(struct inode *i
   18.13      }
   18.14      break;
   18.15  
   18.16 -    default:
   18.17 -        ret = -EINVAL;
   18.18 -    	break;
   18.19 -	}
   18.20 +    case IOCTL_PRIVCMD_INITDOMAIN_EVTCHN:
   18.21 +    {
   18.22 +        extern int initdom_ctrlif_domcontroller_port;
   18.23 +        ret = initdom_ctrlif_domcontroller_port;
   18.24 +    }
   18.25 +    break;
   18.26 +    }
   18.27 +
   18.28      return ret;
   18.29  }
   18.30  
   18.31 @@ -85,7 +89,7 @@ static int __init init_module(void)
   18.32      {
   18.33          privcmd_intf->owner      = THIS_MODULE;
   18.34          privcmd_intf->nlink      = 1;
   18.35 -	privcmd_intf->proc_fops  = &privcmd_file_ops;
   18.36 +        privcmd_intf->proc_fops  = &privcmd_file_ops;
   18.37      }
   18.38  
   18.39      return 0;
    19.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c	Tue Apr 27 13:13:06 2004 +0000
    19.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c	Wed Apr 28 09:35:33 2004 +0000
    19.3 @@ -17,6 +17,13 @@
    19.4  #include <asm/ctrl_if.h>
    19.5  #include <asm/evtchn.h>
    19.6  
    19.7 +/*
    19.8 + * Only used by initial domain which must create its own control-interface
    19.9 + * event channel. This value is picked up by the user-space domain controller
   19.10 + * via an ioctl.
   19.11 + */
   19.12 +int initdom_ctrlif_domcontroller_port = -1;
   19.13 +
   19.14  static int        ctrl_if_evtchn;
   19.15  static int        ctrl_if_irq;
   19.16  static spinlock_t ctrl_if_lock;
   19.17 @@ -276,9 +283,6 @@ void ctrl_if_unregister_receiver(u8 type
   19.18  
   19.19  void ctrl_if_suspend(void)
   19.20  {
   19.21 -    if ( start_info.flags & SIF_INITDOMAIN )
   19.22 -        return;
   19.23 -
   19.24      free_irq(ctrl_if_irq, NULL);
   19.25      unbind_evtchn_from_irq(ctrl_if_evtchn);
   19.26  }
   19.27 @@ -286,7 +290,21 @@ void ctrl_if_suspend(void)
   19.28  void ctrl_if_resume(void)
   19.29  {
   19.30      if ( start_info.flags & SIF_INITDOMAIN )
   19.31 -        return;
   19.32 +    {
   19.33 +        /*
   19.34 +         * The initial domain must create its own domain-controller link.
   19.35 +         * The controller is probably not running at this point, but will
   19.36 +         * pick up its end of the event channel from 
   19.37 +         */
   19.38 +        evtchn_op_t op;
   19.39 +        op.cmd = EVTCHNOP_bind_interdomain;
   19.40 +        op.u.bind_interdomain.dom1 = DOMID_SELF;
   19.41 +        op.u.bind_interdomain.dom2 = DOMID_SELF;
   19.42 +        if ( HYPERVISOR_event_channel_op(&op) != 0 )
   19.43 +            BUG();
   19.44 +        start_info.domain_controller_evtchn = op.u.bind_interdomain.port1;
   19.45 +        initdom_ctrlif_domcontroller_port   = op.u.bind_interdomain.port2;
   19.46 +    }
   19.47  
   19.48      ctrl_if_tx_resp_cons = 0;
   19.49      ctrl_if_rx_req_cons  = 0;
    20.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Tue Apr 27 13:13:06 2004 +0000
    20.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Wed Apr 28 09:35:33 2004 +0000
    20.3 @@ -5,7 +5,7 @@
    20.4   *
    20.5   * (C) Copyright 1995 1996 Linus Torvalds
    20.6   *
    20.7 - * Modifications for Xenolinux (c) 2003 Keir Fraser
    20.8 + * Modifications for Xenolinux (c) 2003-2004 Keir Fraser
    20.9   */
   20.10  
   20.11  #include <linux/slab.h>
   20.12 @@ -27,19 +27,27 @@
   20.13  #define direct_mk_pte_phys(physpage, pgprot) \
   20.14    __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
   20.15  
   20.16 -static inline void direct_remap_area_pte(pte_t *pte, 
   20.17 -                                         unsigned long address, 
   20.18 -                                         unsigned long size,
   20.19 -                                         unsigned long machine_addr, 
   20.20 -                                         pgprot_t prot,
   20.21 -                                         domid_t  domid)
   20.22 +static inline int direct_remap_area_pte(pte_t *pte, 
   20.23 +                                        unsigned long address, 
   20.24 +                                        unsigned long size,
   20.25 +                                        unsigned long machine_addr, 
   20.26 +                                        pgprot_t prot,
   20.27 +                                        domid_t  domid)
   20.28  {
   20.29      unsigned long end;
   20.30 +#define MAX_DIRECTMAP_MMU_QUEUE 64
   20.31 +    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v;
   20.32  
   20.33 -    mmu_update_t *u, *v;
   20.34 -    u = v = vmalloc(3*PAGE_SIZE); /* plenty */
   20.35 +    address &= ~PMD_MASK;
   20.36 +    end = address + size;
   20.37 +    if (end > PMD_SIZE)
   20.38 +        end = PMD_SIZE;
   20.39 +    if (address >= end)
   20.40 +        BUG();
   20.41  
   20.42 + reset_buffer:
   20.43      /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */
   20.44 +    v = &u[0];
   20.45      if ( domid != 0 )
   20.46      {
   20.47          v[0].val  = (unsigned long)(domid<<16) & ~0xFFFFUL;
   20.48 @@ -53,12 +61,6 @@ static inline void direct_remap_area_pte
   20.49          v += 2;
   20.50      }
   20.51  
   20.52 -    address &= ~PMD_MASK;
   20.53 -    end = address + size;
   20.54 -    if (end > PMD_SIZE)
   20.55 -        end = PMD_SIZE;
   20.56 -    if (address >= end)
   20.57 -        BUG();
   20.58      do {
   20.59          if (!pte_none(*pte)) {
   20.60              printk("direct_remap_area_pte: page already exists\n");
   20.61 @@ -66,16 +68,21 @@ static inline void direct_remap_area_pte
   20.62          }
   20.63          v->ptr = virt_to_machine(pte);
   20.64          v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO;
   20.65 -        v++;
   20.66 +        if ( ++v == MAX_DIRECTMAP_MMU_QUEUE )
   20.67 +        {
   20.68 +            if ( HYPERVISOR_mmu_update(u, MAX_DIRECTMAP_MMU_QUEUE) < 0 )
   20.69 +                return -EFAULT;
   20.70 +            goto reset_buffer;
   20.71 +        }
   20.72          address += PAGE_SIZE;
   20.73          machine_addr += PAGE_SIZE;
   20.74          pte++;
   20.75      } while (address && (address < end));
   20.76  
   20.77      if ( ((v-u) != 0) && (HYPERVISOR_mmu_update(u, v-u) < 0) )
   20.78 -        printk(KERN_WARNING "Failed to ioremap %08lx->%08lx (%08lx)\n",
   20.79 -               end-size, end, machine_addr-size);
   20.80 -    vfree(u);
   20.81 +        return -EFAULT;
   20.82 +
   20.83 +    return 0;
   20.84  }
   20.85  
   20.86  static inline int direct_remap_area_pmd(struct mm_struct *mm,
   20.87 @@ -86,6 +93,7 @@ static inline int direct_remap_area_pmd(
   20.88                                          pgprot_t prot,
   20.89                                          domid_t  domid)
   20.90  {
   20.91 +    int error = 0;
   20.92      unsigned long end;
   20.93  
   20.94      address &= ~PGDIR_MASK;
   20.95 @@ -99,12 +107,14 @@ static inline int direct_remap_area_pmd(
   20.96          pte_t * pte = pte_alloc(mm, pmd, address);
   20.97          if (!pte)
   20.98              return -ENOMEM;
   20.99 -        direct_remap_area_pte(pte, address, end - address, 
  20.100 -                              address + machine_addr, prot, domid);
  20.101 +        error = direct_remap_area_pte(pte, address, end - address, 
  20.102 +                                      address + machine_addr, prot, domid);
  20.103 +        if ( error )
  20.104 +            break;
  20.105          address = (address + PMD_SIZE) & PMD_MASK;
  20.106          pmd++;
  20.107      } while (address && (address < end));
  20.108 -    return 0;
  20.109 +    return error;
  20.110  }
  20.111   
  20.112  int direct_remap_area_pages(struct mm_struct *mm,
    21.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h	Tue Apr 27 13:13:06 2004 +0000
    21.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h	Wed Apr 28 09:35:33 2004 +0000
    21.3 @@ -52,7 +52,7 @@ int ctrl_if_send_message_noblock(
    21.4   *     function returns.
    21.5   *  2. If @hnd is NULL then no callback is executed.
    21.6   */
    21.7 -int ctrl_if_send_message(
    21.8 +int ctrl_if_send_message_block(
    21.9      ctrl_msg_t *msg, 
   21.10      ctrl_msg_handler_t hnd, 
   21.11      unsigned long id, 
    22.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h	Tue Apr 27 13:13:06 2004 +0000
    22.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h	Wed Apr 28 09:35:33 2004 +0000
    22.3 @@ -47,6 +47,11 @@ static inline pmd_t * pmd_offset(pgd_t *
    22.4  	return (pmd_t *) dir;
    22.5  }
    22.6  
    22.7 +#define pte_same(a, b)		((a).pte_low == (b).pte_low)
    22.8 +#define pte_page(x)		(mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT))))
    22.9 +#define pte_none(x)		(!(x).pte_low)
   22.10 +#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
   22.11 +
   22.12  /*
   22.13   * A note on implementation of this atomic 'get-and-clear' operation.
   22.14   * This is actually very simple because XenoLinux can only run on a single
   22.15 @@ -59,13 +64,9 @@ static inline pmd_t * pmd_offset(pgd_t *
   22.16  static inline pte_t ptep_get_and_clear(pte_t *xp)
   22.17  {
   22.18      pte_t pte = *xp;
   22.19 -    queue_l1_entry_update(xp, 0);
   22.20 +    if ( !pte_none(pte) )
   22.21 +        queue_l1_entry_update(xp, 0);
   22.22      return pte;
   22.23  }
   22.24  
   22.25 -#define pte_same(a, b)		((a).pte_low == (b).pte_low)
   22.26 -#define pte_page(x)		(mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT))))
   22.27 -#define pte_none(x)		(!(x).pte_low)
   22.28 -#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
   22.29 -
   22.30  #endif /* _I386_PGTABLE_2LEVEL_H */
    23.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h	Tue Apr 27 13:13:06 2004 +0000
    23.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h	Wed Apr 28 09:35:33 2004 +0000
    23.3 @@ -13,16 +13,21 @@ typedef struct privcmd_hypercall
    23.4      unsigned long arg[5];
    23.5  } privcmd_hypercall_t;
    23.6  
    23.7 -typedef struct privcmd_blkmsg
    23.8 -{
    23.9 -    unsigned long op;
   23.10 -    void         *buf;
   23.11 -    int           buf_size;
   23.12 -} privcmd_blkmsg_t;
   23.13 +/*
   23.14 + * @cmd: IOCTL_PRIVCMD_HYPERCALL
   23.15 + * @arg: &privcmd_hypercall_t
   23.16 + * Return: Value returned from execution of the specified hypercall.
   23.17 + */
   23.18 +#define IOCTL_PRIVCMD_HYPERCALL         \
   23.19 +    _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t))
   23.20  
   23.21 -#define IOCTL_PRIVCMD_HYPERCALL        \
   23.22 -    _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t))
   23.23 -#define IOCTL_PRIVCMD_BLKMSG           \
   23.24 -    _IOC(_IOC_NONE, 'P', 1, sizeof(privcmd_blkmsg_t))
   23.25 +/*
   23.26 + * @cmd: IOCTL_PRIVCMD_INITDOMAIN_EVTCHN
   23.27 + * @arg: n/a
   23.28 + * Return: Port associated with domain-controller end of control event channel
   23.29 + *         for the initial domain.
   23.30 + */
   23.31 +#define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \
   23.32 +    _IOC(_IOC_NONE, 'P', 1, 0)
   23.33  
   23.34  #endif /* __PROC_CMD_H__ */
    24.1 --- a/xenolinux-2.4.26-sparse/mm/vmalloc.c	Tue Apr 27 13:13:06 2004 +0000
    24.2 +++ b/xenolinux-2.4.26-sparse/mm/vmalloc.c	Wed Apr 28 09:35:33 2004 +0000
    24.3 @@ -45,6 +45,10 @@ static inline void free_area_pte(pmd_t *
    24.4  			continue;
    24.5  		if (pte_present(page)) {
    24.6  			struct page *ptpage = pte_page(page);
    24.7 +#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
    24.8 +			if (pte_io(page))
    24.9 +				continue;
   24.10 +#endif
   24.11  			if (VALID_PAGE(ptpage) && (!PageReserved(ptpage)))
   24.12  				__free_page(ptpage);
   24.13  			continue;
   24.14 @@ -250,11 +254,6 @@ void __vfree(void * addr, int free_area_
   24.15  	for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
   24.16  		if (tmp->addr == addr) {
   24.17  			*p = tmp->next;
   24.18 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   24.19 -			if (tmp->flags & VM_IOREMAP)
   24.20 -				zap_page_range(&init_mm, VMALLOC_VMADDR(tmp->addr), tmp->size);
   24.21 -			else
   24.22 -#endif
   24.23  			if (free_area_pages)
   24.24  			    vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size);
   24.25  			write_unlock(&vmlist_lock);