ia64/xen-unstable

changeset 1318:8c64f13373a3

bitkeeper revision 1.878 (408cde49uOK-SAzfb5CBmpUoC0yXOg)

Dir renames.
author kaf24@scramble.cl.cam.ac.uk
date Mon Apr 26 10:02:49 2004 +0000 (2004-04-26)
parents 1acac2de0d7c
children 112eec3395e6
files .rootkeys xenolinux-2.4.26-sparse/arch/xen/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c
line diff
     1.1 --- a/.rootkeys	Sat Apr 24 08:10:58 2004 +0000
     1.2 +++ b/.rootkeys	Mon Apr 26 10:02:49 2004 +0000
     1.3 @@ -656,6 +656,18 @@ 3e5a4e65TNEycLeXqPSXQJQm_xGecA xenolinux
     1.4  40648526SxcA4lGIHB_k7ID8VlRSzw xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev
     1.5  3e6377f5xwPfYZkPHPrDbEq1PRN7uQ xenolinux-2.4.26-sparse/arch/xen/drivers/balloon/Makefile
     1.6  3e6377f8Me8IqtvEhb70XFgOvqQH7A xenolinux-2.4.26-sparse/arch/xen/drivers/balloon/balloon.c
     1.7 +4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile
     1.8 +4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile
     1.9 +4087cf0dPeHOvzmZAazvwLslKEF93A xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h
    1.10 +4087cf0da2cROOiybf9A-j4R_yHnjg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c
    1.11 +4087cf0dvXL1PKX23t_LvO1wVPb7OA xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c
    1.12 +4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c
    1.13 +4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
    1.14 +40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
    1.15 +4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile
    1.16 +4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c
    1.17 +4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h
    1.18 +4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
    1.19  3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.26-sparse/arch/xen/drivers/block/Makefile
    1.20  3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c
    1.21  3e67f822FOPwqHiaRKbrskgWgoNL5g xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.h
    1.22 @@ -667,25 +679,13 @@ 3e5a4e65BXtftInNHUC2PjDfPhdZZA xenolinux
    1.23  3e5a4e65gfn_ltB8ujHMVFApnTTNRQ xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/vfr.c
    1.24  40420a6ebRqDjufoN1WSJvolEW2Wjw xenolinux-2.4.26-sparse/arch/xen/drivers/evtchn/Makefile
    1.25  40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.26-sparse/arch/xen/drivers/evtchn/evtchn.c
    1.26 +4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile
    1.27 +4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile
    1.28 +4087cf0d5dudKw_DecIJgOhLlBF_0Q xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c
    1.29 +405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile
    1.30 +405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c
    1.31  3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.26-sparse/arch/xen/drivers/network/Makefile
    1.32  3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c
    1.33 -4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile
    1.34 -4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile
    1.35 -4087cf0dPeHOvzmZAazvwLslKEF93A xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h
    1.36 -4087cf0da2cROOiybf9A-j4R_yHnjg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c
    1.37 -4087cf0dvXL1PKX23t_LvO1wVPb7OA xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c
    1.38 -4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c
    1.39 -4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c
    1.40 -4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile
    1.41 -4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c
    1.42 -4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h
    1.43 -4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c
    1.44 -40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h
    1.45 -4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile
    1.46 -4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile
    1.47 -4087cf0d5dudKw_DecIJgOhLlBF_0Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c
    1.48 -405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile
    1.49 -405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c
    1.50  3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.26-sparse/arch/xen/kernel/Makefile
    1.51  4075806dE5mQwlVUf8-t3YXjiMMWDQ xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
    1.52  3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.26-sparse/arch/xen/kernel/entry.S
     2.1 --- a/xenolinux-2.4.26-sparse/arch/xen/Makefile	Sat Apr 24 08:10:58 2004 +0000
     2.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/Makefile	Mon Apr 26 10:02:49 2004 +0000
     2.3 @@ -52,8 +52,8 @@ SUBDIRS += arch/xen/kernel arch/xen/mm a
     2.4  SUBDIRS += arch/xen/drivers/console 
     2.5  SUBDIRS += arch/xen/drivers/evtchn
     2.6  ifdef CONFIG_XEN_NEWIO
     2.7 -SUBDIRS += arch/xen/drivers/vblkif
     2.8 -SUBDIRS += arch/xen/drivers/vnetif
     2.9 +SUBDIRS += arch/xen/drivers/blkif
    2.10 +SUBDIRS += arch/xen/drivers/netif
    2.11  else
    2.12  SUBDIRS += arch/xen/drivers/block
    2.13  SUBDIRS += arch/xen/drivers/network
    2.14 @@ -67,8 +67,8 @@ CORE_FILES += arch/xen/kernel/kernel.o a
    2.15  CORE_FILES += arch/xen/drivers/evtchn/drv.o
    2.16  CORE_FILES += arch/xen/drivers/console/drv.o
    2.17  ifdef CONFIG_XEN_NEWIO
    2.18 -CORE_FILES += arch/xen/drivers/vblkif/drv.o
    2.19 -CORE_FILES += arch/xen/drivers/vnetif/drv.o
    2.20 +CORE_FILES += arch/xen/drivers/blkif/drv.o
    2.21 +CORE_FILES += arch/xen/drivers/netif/drv.o
    2.22  else
    2.23  CORE_FILES += arch/xen/drivers/block/drv.o
    2.24  CORE_FILES += arch/xen/drivers/network/drv.o
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/Makefile	Mon Apr 26 10:02:49 2004 +0000
     3.3 @@ -0,0 +1,10 @@
     3.4 +
     3.5 +O_TARGET := drv.o
     3.6 +
     3.7 +subdir-y += frontend
     3.8 +obj-y    += frontend/drv.o
     3.9 +
    3.10 +subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
    3.11 +obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
    3.12 +
    3.13 +include $(TOPDIR)/Rules.make
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/Makefile	Mon Apr 26 10:02:49 2004 +0000
     4.3 @@ -0,0 +1,3 @@
     4.4 +O_TARGET := drv.o
     4.5 +obj-y := main.o control.o interface.o vbd.o
     4.6 +include $(TOPDIR)/Rules.make
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h	Mon Apr 26 10:02:49 2004 +0000
     5.3 @@ -0,0 +1,98 @@
     5.4 +/******************************************************************************
     5.5 + * arch/xen/drivers/blkif/backend/common.h
     5.6 + */
     5.7 +
     5.8 +#ifndef __BLKIF__BACKEND__COMMON_H__
     5.9 +#define __BLKIF__BACKEND__COMMON_H__
    5.10 +
    5.11 +#include <linux/config.h>
    5.12 +#include <linux/module.h>
    5.13 +#include <linux/rbtree.h>
    5.14 +#include <linux/interrupt.h>
    5.15 +#include <linux/slab.h>
    5.16 +#include <asm/ctrl_if.h>
    5.17 +#include <asm/io.h>
    5.18 +#include "../blkif.h"
    5.19 +
    5.20 +#ifndef NDEBUG
    5.21 +#define ASSERT(_p) \
    5.22 +    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
    5.23 +    __LINE__, __FILE__); *(int*)0=0; }
    5.24 +#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \
    5.25 +                           __FILE__ , __LINE__ , ## _a )
    5.26 +#else
    5.27 +#define ASSERT(_p) ((void)0)
    5.28 +#define DPRINTK(_f, _a...) ((void)0)
    5.29 +#endif
    5.30 +
    5.31 +typedef struct blkif_st {
    5.32 +    /* Unique identifier for this interface. */
    5.33 +    domid_t          domid;
    5.34 +    unsigned int     handle;
    5.35 +    /* Physical parameters of the comms window. */
    5.36 +    unsigned long    shmem_frame;
    5.37 +    unsigned int     evtchn;
    5.38 +    int              irq;
    5.39 +    /* Comms information. */
    5.40 +    blk_ring_t      *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
    5.41 +    BLK_RING_IDX     blk_req_cons;  /* Request consumer. */
    5.42 +    BLK_RING_IDX     blk_resp_prod; /* Private version of response producer. */
    5.43 +    /* VBDs attached to this interface. */
    5.44 +    rb_root_t        vbd_rb;        /* Mapping from 16-bit vdevices to VBDs. */
    5.45 +    spinlock_t       vbd_lock;      /* Protects VBD mapping. */
    5.46 +    /* Private fields. */
    5.47 +    struct blkif_st *hash_next;
    5.48 +    struct list_head blkdev_list;
    5.49 +    spinlock_t       blk_ring_lock;
    5.50 +} blkif_t;
    5.51 +
    5.52 +void blkif_create(blkif_create_t *create);
    5.53 +void blkif_destroy(blkif_destroy_t *destroy);
    5.54 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
    5.55 +void blkif_get(blkif_t *blkif);
    5.56 +void blkif_put(blkif_t *blkif);
    5.57 +
    5.58 +/* An entry in a list of xen_extents. */
    5.59 +typedef struct _blkif_extent_le { 
    5.60 +    blkif_extent_t extent;               /* an individual extent */
    5.61 +    struct _blkif_extent_le *next;       /* and a pointer to the next */ 
    5.62 +} blkif_extent_le_t; 
    5.63 +
    5.64 +typedef struct _vbd { 
    5.65 +    blkif_vdev_t       vdevice;   /* what the domain refers to this vbd as */
    5.66 +    unsigned char      mode;      /* VBD_MODE_{R,W} */
    5.67 +    unsigned char      type;      /* XD_TYPE_xxx */
    5.68 +    blkif_extent_le_t *extents;   /* list of xen_extents making up this vbd */
    5.69 +    rb_node_t          rb;        /* for linking into R-B tree lookup struct */
    5.70 +} vbd_t; 
    5.71 +
    5.72 +long vbd_create(blkif_vbd_create_t *create_params); 
    5.73 +long vbd_grow(blkif_vbd_grow_t *grow_params); 
    5.74 +long vbd_shrink(blkif_vbd_shrink_t *shrink_params);
    5.75 +long vbd_destroy(blkif_vbd_destroy_t *delete_params); 
    5.76 +
    5.77 +void destroy_all_vbds(struct task_struct *p);
    5.78 +
    5.79 +typedef struct {
    5.80 +    blkif_t       *blkif;
    5.81 +    unsigned long  id;
    5.82 +    atomic_t       pendcnt;
    5.83 +    unsigned short operation;
    5.84 +    unsigned short status;
    5.85 +} pending_req_t;
    5.86 +
    5.87 +/* Describes a [partial] disk extent (part of a block io request) */
    5.88 +typedef struct {
    5.89 +    unsigned short dev;
    5.90 +    unsigned short nr_sects;
    5.91 +    unsigned long  buffer;
    5.92 +    xen_sector_t   sector_number;
    5.93 +} phys_seg_t;
    5.94 +
    5.95 +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 
    5.96 +
    5.97 +int blkif_be_controller_init(void);
    5.98 +
    5.99 +void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
   5.100 +
   5.101 +#endif /* __BLKIF__BACKEND__COMMON_H__ */
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c	Mon Apr 26 10:02:49 2004 +0000
     6.3 @@ -0,0 +1,61 @@
     6.4 +/******************************************************************************
     6.5 + * arch/xen/drivers/blkif/backend/control.c
     6.6 + * 
     6.7 + * Routines for interfacing with the control plane.
     6.8 + * 
     6.9 + * Copyright (c) 2004, Keir Fraser
    6.10 + */
    6.11 +
    6.12 +#include "common.h"
    6.13 +
    6.14 +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
    6.15 +{
    6.16 +    switch ( msg->subtype )
    6.17 +    {
    6.18 +    case CMSG_BLKIF_BE_CREATE:
    6.19 +        if ( msg->length != sizeof(blkif_create_t) )
    6.20 +            goto parse_error;
    6.21 +        blkif_create((blkif_create_t *)&msg->msg[0]);
    6.22 +        break;        
    6.23 +    case CMSG_BLKIF_BE_DESTROY:
    6.24 +        if ( msg->length != sizeof(blkif_destroy_t) )
    6.25 +            goto parse_error;
    6.26 +        blkif_destroy((blkif_destroy_t *)&msg->msg[0]);
    6.27 +        break;        
    6.28 +    case CMSG_BLKIF_BE_VBD_CREATE:
    6.29 +        if ( msg->length != sizeof(blkif_vbd_create_t) )
    6.30 +            goto parse_error;
    6.31 +        vbd_create((blkif_vbd_create_t *)&msg->msg[0]);
    6.32 +        break;
    6.33 +    case CMSG_BLKIF_BE_VBD_DESTROY:
    6.34 +        if ( msg->length != sizeof(blkif_vbd_destroy_t) )
    6.35 +            goto parse_error;
    6.36 +        vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]);
    6.37 +        break;
    6.38 +    case CMSG_BLKIF_BE_VBD_GROW:
    6.39 +        if ( msg->length != sizeof(blkif_vbd_grow_t) )
    6.40 +            goto parse_error;
    6.41 +        vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]);
    6.42 +        break;
    6.43 +    case CMSG_BLKIF_BE_VBD_SHRINK:
    6.44 +        if ( msg->length != sizeof(blkif_vbd_shrink_t) )
    6.45 +            goto parse_error;
    6.46 +        vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]);
    6.47 +        break;
    6.48 +    default:
    6.49 +        goto parse_error;
    6.50 +    }
    6.51 +
    6.52 +    ctrl_if_send_response(msg);
    6.53 +    return;
    6.54 +
    6.55 + parse_error:
    6.56 +    msg->length = 0;
    6.57 +    ctrl_if_send_response(msg);
    6.58 +}
    6.59 +
    6.60 +int blkif_ctrlif_init(void)
    6.61 +{
    6.62 +    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
    6.63 +    return 0;
    6.64 +}
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c	Mon Apr 26 10:02:49 2004 +0000
     7.3 @@ -0,0 +1,96 @@
     7.4 +/******************************************************************************
     7.5 + * arch/xen/drivers/blkif/backend/interface.c
     7.6 + * 
     7.7 + * Block-device interface management.
     7.8 + * 
     7.9 + * Copyright (c) 2004, Keir Fraser
    7.10 + */
    7.11 +
    7.12 +#include "common.h"
    7.13 +
    7.14 +#define BLKIF_HASHSZ 1024
    7.15 +#define BLKIF_HASH(_d,_h) \
    7.16 +    (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1))
    7.17 +
    7.18 +static blkif_t *blkif_hash[BLKIF_HASHSZ];
    7.19 +
    7.20 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
    7.21 +{
    7.22 +    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
    7.23 +    while ( (blkif != NULL) && 
    7.24 +            (blkif->domid != domid) && 
    7.25 +            (blkif->handle != handle) )
    7.26 +        blkif = blkif->hash_next;
    7.27 +    return blkif;
    7.28 +}
    7.29 +
    7.30 +void blkif_create(blkif_create_t *create)
    7.31 +{
    7.32 +    domid_t       domid  = create->domid;
    7.33 +    unsigned int  handle = create->blkif_handle;
    7.34 +    unsigned int  evtchn = create->evtchn;
    7.35 +    unsigned long shmem_frame = create->shmem_frame;
    7.36 +    blkif_t     **pblkif, *blkif;
    7.37 +
    7.38 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
    7.39 +    while ( *pblkif == NULL )
    7.40 +    {
    7.41 +        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
    7.42 +            goto found_match;
    7.43 +        pblkif = &(*pblkif)->hash_next;
    7.44 +    }
    7.45 +
    7.46 +    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
    7.47 +    memset(blkif, 0, sizeof(*blkif));
    7.48 +    blkif->domid       = domid;
    7.49 +    blkif->handle      = handle;
    7.50 +    blkif->evtchn      = evtchn;
    7.51 +    blkif->irq         = bind_evtchn_to_irq(evtchn);
    7.52 +    blkif->shmem_frame = shmem_frame;
    7.53 +    blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE);
    7.54 +    spin_lock_init(&blkif->vbd_lock);
    7.55 +    spin_lock_init(&blkif->blk_ring_lock);
    7.56 +
    7.57 +    request_irq(irq, blkif_be_int, 0, "blkif-backend", blkif);
    7.58 +
    7.59 +    blkif->hash_next = *pblkif;
    7.60 +    *pblkif = blkif;
    7.61 +
    7.62 +    create->status = BLKIF_STATUS_OKAY;
    7.63 +    return;
    7.64 +
    7.65 + found_match:
    7.66 +    create->status = BLKIF_STATUS_INTERFACE_EXISTS;
    7.67 +    return;
    7.68 +
    7.69 + evtchn_in_use:
    7.70 +    unbind_evtchn_from_irq(evtchn); /* drop refcnt */
    7.71 +    create->status = BLKIF_STATUS_ERROR;
    7.72 +    return;
    7.73 +}
    7.74 +
    7.75 +void blkif_destroy(blkif_destroy_t *destroy)
    7.76 +{
    7.77 +    domid_t       domid  = destroy->domid;
    7.78 +    unsigned int  handle = destroy->blkif_handle;
    7.79 +    blkif_t     **pblkif, *blkif;
    7.80 +
    7.81 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
    7.82 +    while ( (blkif = *pblkif) == NULL )
    7.83 +    {
    7.84 +        if ( (blkif->domid == domid) && (blkif->handle == handle) )
    7.85 +            goto found_match;
    7.86 +        pblkif = &blkif->hash_next;
    7.87 +    }
    7.88 +
    7.89 +    destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
    7.90 +    return;
    7.91 +
    7.92 + found_match:
    7.93 +    free_irq(blkif->irq, NULL);
    7.94 +    unbind_evtchn_from_irq(blkif->evtchn);
    7.95 +    *pblkif = blkif->hash_next;
    7.96 +    kmem_cache_free(blkif_cachep, blkif);
    7.97 +    destroy->status = BLKIF_STATUS_OKAY;
    7.98 +}
    7.99 +
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c	Mon Apr 26 10:02:49 2004 +0000
     8.3 @@ -0,0 +1,508 @@
     8.4 +/******************************************************************************
     8.5 + * arch/xen/drivers/blkif/backend/main.c
     8.6 + * 
     8.7 + * Back-end of the driver for virtual block devices. This portion of the
     8.8 + * driver exports a 'unified' block-device interface that can be accessed
     8.9 + * by any operating system that implements a compatible front end. A 
    8.10 + * reference front-end implementation can be found in:
    8.11 + *  arch/xen/drivers/blkif/frontend
    8.12 + * 
    8.13 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
    8.14 + */
    8.15 +
    8.16 +#include "common.h"
    8.17 +
    8.18 +/*
    8.19 + * These are rather arbitrary. They are fairly large because adjacent requests
    8.20 + * pulled from a communication ring are quite likely to end up being part of
    8.21 + * the same scatter/gather request at the disc.
    8.22 + * 
    8.23 + * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
    8.24 + * This will increase the chances of being able to write whole tracks.
    8.25 + * 64 should be enough to keep us competitive with Linux.
    8.26 + */
    8.27 +#define MAX_PENDING_REQS 64
    8.28 +#define BATCH_PER_DOMAIN 16
    8.29 +
    8.30 +/*
    8.31 + * Each outstanding request that we've passed to the lower device layers has a 
    8.32 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 
    8.33 + * the pendcnt towards zero. When it hits zero, the specified domain has a 
    8.34 + * response queued for it, with the saved 'id' passed back.
    8.35 + * 
    8.36 + * We can't allocate pending_req's in order, since they may complete out of 
    8.37 + * order. We therefore maintain an allocation ring. This ring also indicates 
    8.38 + * when enough work has been passed down -- at that point the allocation ring 
    8.39 + * will be empty.
    8.40 + */
    8.41 +static pending_req_t pending_reqs[MAX_PENDING_REQS];
    8.42 +static unsigned char pending_ring[MAX_PENDING_REQS];
    8.43 +static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
    8.44 +/* NB. We use a different index type to differentiate from shared blk rings. */
    8.45 +typedef unsigned int PEND_RING_IDX;
    8.46 +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
    8.47 +static PEND_RING_IDX pending_prod, pending_cons;
    8.48 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
    8.49 +
    8.50 +static kmem_cache_t *buffer_head_cachep;
    8.51 +
    8.52 +static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
    8.53 +
    8.54 +static int lock_buffer(blkif_t *blkif,
    8.55 +                       unsigned long buffer,
    8.56 +                       unsigned short size,
    8.57 +                       int writeable_buffer);
    8.58 +static void unlock_buffer(unsigned long buffer,
    8.59 +                          unsigned short size,
    8.60 +                          int writeable_buffer);
    8.61 +
    8.62 +static void io_schedule(unsigned long unused);
    8.63 +static int do_block_io_op(blkif_t *blkif, int max_to_do);
    8.64 +static void dispatch_rw_block_io(blkif_t *blkif,
    8.65 +                                 blk_ring_req_entry_t *req);
    8.66 +static void make_response(blkif_t *blkif, unsigned long id, 
    8.67 +                          unsigned short op, unsigned long st);
    8.68 +
    8.69 +
    8.70 +/******************************************************************
    8.71 + * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
    8.72 + */
    8.73 +
    8.74 +static struct list_head io_schedule_list;
    8.75 +static spinlock_t io_schedule_list_lock;
    8.76 +
    8.77 +static int __on_blkdev_list(blkif_t *blkif)
    8.78 +{
    8.79 +    return blkif->blkdev_list.next != NULL;
    8.80 +}
    8.81 +
    8.82 +static void remove_from_blkdev_list(blkif_t *blkif)
    8.83 +{
    8.84 +    unsigned long flags;
    8.85 +    if ( !__on_blkdev_list(blkif) ) return;
    8.86 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
    8.87 +    if ( __on_blkdev_list(blkif) )
    8.88 +    {
    8.89 +        list_del(&blkif->blkdev_list);
    8.90 +        blkif->blkdev_list.next = NULL;
    8.91 +        blkif_put(blkif);
    8.92 +    }
    8.93 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
    8.94 +}
    8.95 +
    8.96 +static void add_to_blkdev_list_tail(blkif_t *blkif)
    8.97 +{
    8.98 +    unsigned long flags;
    8.99 +    if ( __on_blkdev_list(blkif) ) return;
   8.100 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
   8.101 +    if ( !__on_blkdev_list(blkif) )
   8.102 +    {
   8.103 +        list_add_tail(&blkif->blkdev_list, &io_schedule_list);
   8.104 +        blkif_get(blkif);
   8.105 +    }
   8.106 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
   8.107 +}
   8.108 +
   8.109 +
   8.110 +/******************************************************************
   8.111 + * SCHEDULER FUNCTIONS
   8.112 + */
   8.113 +
   8.114 +static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
   8.115 +
   8.116 +static void io_schedule(unsigned long unused)
   8.117 +{
   8.118 +    blkif_t          *blkif;
   8.119 +    struct list_head *ent;
   8.120 +
   8.121 +    /* Queue up a batch of requests. */
   8.122 +    while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
   8.123 +            !list_empty(&io_schedule_list) )
   8.124 +    {
   8.125 +        ent = io_schedule_list.next;
   8.126 +        blkif = list_entry(ent, blkif_t, blkdev_list);
   8.127 +        blkif_get(blkif);
   8.128 +        remove_from_blkdev_list(blkif);
   8.129 +        if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
   8.130 +            add_to_blkdev_list_tail(blkif);
   8.131 +        blkif_put(blkif);
   8.132 +    }
   8.133 +
   8.134 +    /* Push the batch through to disc. */
   8.135 +    run_task_queue(&tq_disk);
   8.136 +}
   8.137 +
   8.138 +static void maybe_trigger_io_schedule(void)
   8.139 +{
   8.140 +    /*
   8.141 +     * Needed so that two processes, who together make the following predicate
   8.142 +     * true, don't both read stale values and evaluate the predicate
   8.143 +     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
   8.144 +     */
   8.145 +    smp_mb();
   8.146 +
   8.147 +    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
   8.148 +         !list_empty(&io_schedule_list) )
   8.149 +        tasklet_schedule(&io_schedule_tasklet);
   8.150 +}
   8.151 +
   8.152 +
   8.153 +
   8.154 +/******************************************************************
   8.155 + * COMPLETION CALLBACK -- Called as bh->b_end_io()
   8.156 + */
   8.157 +
   8.158 +static void end_block_io_op(struct buffer_head *bh, int uptodate)
   8.159 +{
   8.160 +    pending_req_t *pending_req = bh->b_private;
   8.161 +
   8.162 +    /* An error fails the entire request. */
   8.163 +    if ( !uptodate )
   8.164 +    {
   8.165 +        DPRINTK("Buffer not up-to-date at end of operation\n");
   8.166 +        pending_req->status = 2;
   8.167 +    }
   8.168 +
   8.169 +    unlock_buffer(virt_to_phys(bh->b_data), 
   8.170 +                  bh->b_size, 
   8.171 +                  (pending_req->operation==READ));
   8.172 +    
   8.173 +    if ( atomic_dec_and_test(&pending_req->pendcnt) )
   8.174 +    {
   8.175 +        make_response(pending_req->blkif, pending_req->id,
   8.176 +                      pending_req->operation, pending_req->status);
   8.177 +        blkif_put(pending_req->blkif);
   8.178 +        spin_lock(&pend_prod_lock);
   8.179 +        pending_ring[MASK_PEND_IDX(pending_prod)] = 
   8.180 +            pending_req - pending_reqs;
   8.181 +        pending_prod++;
   8.182 +        spin_unlock(&pend_prod_lock);
   8.183 +        maybe_trigger_io_schedule();
   8.184 +    }
   8.185 +}
   8.186 +
   8.187 +
   8.188 +
   8.189 +/******************************************************************************
   8.190 + * NOTIFICATION FROM GUEST OS.
   8.191 + */
   8.192 +
   8.193 +void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
   8.194 +{
   8.195 +    blkif_t *blkif = dev_id;
   8.196 +    add_to_blkdev_list_tail(blkif);
   8.197 +    maybe_trigger_io_schedule();
   8.198 +}
   8.199 +
   8.200 +
   8.201 +
   8.202 +/******************************************************************
   8.203 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
   8.204 + */
   8.205 +
   8.206 +static int lock_buffer(blkif_t *blkif,
   8.207 +                       unsigned long buffer,
   8.208 +                       unsigned short size,
   8.209 +                       int writeable_buffer)
   8.210 +{
   8.211 +    unsigned long    pfn;
   8.212 +
   8.213 +    for ( pfn = buffer >> PAGE_SHIFT; 
   8.214 +          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
   8.215 +          pfn++ )
   8.216 +    {
   8.217 +    }
   8.218 +
   8.219 +    return 1;
   8.220 +
   8.221 + fail:
   8.222 +    while ( pfn-- > (buffer >> PAGE_SHIFT) )
   8.223 +    {        
   8.224 +    }
   8.225 +    return 0;
   8.226 +}
   8.227 +
   8.228 +static void unlock_buffer(unsigned long buffer,
   8.229 +                          unsigned short size,
   8.230 +                          int writeable_buffer)
   8.231 +{
   8.232 +    unsigned long pfn;
   8.233 +
   8.234 +    for ( pfn = buffer >> PAGE_SHIFT; 
   8.235 +          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
   8.236 +          pfn++ )
   8.237 +    {
   8.238 +    }
   8.239 +}
   8.240 +
   8.241 +static int do_block_io_op(blkif_t *blkif, int max_to_do)
   8.242 +{
   8.243 +    blk_ring_t *blk_ring = blkif->blk_ring_base;
   8.244 +    blk_ring_req_entry_t *req;
   8.245 +    BLK_RING_IDX i;
   8.246 +    int more_to_do = 0;
   8.247 +
   8.248 +    /* Take items off the comms ring, taking care not to overflow. */
   8.249 +    for ( i = blkif->blk_req_cons; 
   8.250 +          (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != 
   8.251 +                                        BLK_RING_SIZE);
   8.252 +          i++ )
   8.253 +    {
   8.254 +        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
   8.255 +        {
   8.256 +            more_to_do = 1;
   8.257 +            break;
   8.258 +        }
   8.259 +        
   8.260 +        req = &blk_ring->ring[MASK_BLK_IDX(i)].req;
   8.261 +        switch ( req->operation )
   8.262 +        {
   8.263 +        case BLKIF_OP_READ:
   8.264 +        case BLKIF_OP_WRITE:
   8.265 +            dispatch_rw_block_io(blkif, req);
   8.266 +            break;
   8.267 +
   8.268 +        default:
   8.269 +            DPRINTK("error: unknown block io operation [%d]\n",
   8.270 +                    blk_ring->ring[i].req.operation);
   8.271 +            make_response(blkif, blk_ring->ring[i].req.id, 
   8.272 +                          blk_ring->ring[i].req.operation, 1);
   8.273 +            break;
   8.274 +        }
   8.275 +    }
   8.276 +
   8.277 +    blkif->blk_req_cons = i;
   8.278 +    return more_to_do;
   8.279 +}
   8.280 +
   8.281 +static void dispatch_rw_block_io(blkif_t *blkif,
   8.282 +                                 blk_ring_req_entry_t *req)
   8.283 +{
   8.284 +    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
   8.285 +    struct buffer_head *bh;
   8.286 +    int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
   8.287 +    unsigned short nr_sects;
   8.288 +    unsigned long buffer;
   8.289 +    int i, tot_sects;
   8.290 +    pending_req_t *pending_req;
   8.291 +
   8.292 +    /* We map virtual scatter/gather segments to physical segments. */
   8.293 +    int new_segs, nr_psegs = 0;
   8.294 +    phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
   8.295 +
   8.296 +    /* Check that number of segments is sane. */
   8.297 +    if ( unlikely(req->nr_segments == 0) || 
   8.298 +         unlikely(req->nr_segments > MAX_BLK_SEGS) )
   8.299 +    {
   8.300 +        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
   8.301 +        goto bad_descriptor;
   8.302 +    }
   8.303 +
   8.304 +    /*
   8.305 +     * Check each address/size pair is sane, and convert into a
   8.306 +     * physical device and block offset. Note that if the offset and size
   8.307 +     * crosses a virtual extent boundary, we may end up with more
   8.308 +     * physical scatter/gather segments than virtual segments.
   8.309 +     */
   8.310 +    for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
   8.311 +    {
   8.312 +        buffer   = req->buffer_and_sects[i] & ~0x1FF;
   8.313 +        nr_sects = req->buffer_and_sects[i] &  0x1FF;
   8.314 +
   8.315 +        if ( unlikely(nr_sects == 0) )
   8.316 +        {
   8.317 +            DPRINTK("zero-sized data request\n");
   8.318 +            goto bad_descriptor;
   8.319 +        }
   8.320 +
   8.321 +        phys_seg[nr_psegs].dev           = req->device;
   8.322 +        phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
   8.323 +        phys_seg[nr_psegs].buffer        = buffer;
   8.324 +        phys_seg[nr_psegs].nr_sects      = nr_sects;
   8.325 +
   8.326 +        /* Translate the request into the relevant 'physical device' */
   8.327 +        new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
   8.328 +        if ( new_segs < 0 )
   8.329 +        { 
   8.330 +            DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
   8.331 +                    operation == READ ? "read" : "write", 
   8.332 +                    req->sector_number + tot_sects, 
   8.333 +                    req->sector_number + tot_sects + nr_sects, 
   8.334 +                    req->device); 
   8.335 +            goto bad_descriptor;
   8.336 +        }
   8.337 +  
   8.338 +        nr_psegs += new_segs;
   8.339 +        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
   8.340 +    }
   8.341 +
   8.342 +    for ( i = 0; i < nr_psegs; i++ )
   8.343 +    {
   8.344 +        if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, 
   8.345 +                                   phys_seg[i].nr_sects << 9,
   8.346 +                                   operation==READ)) )
   8.347 +        {
   8.348 +            DPRINTK("invalid buffer\n");
   8.349 +            while ( i-- > 0 )
   8.350 +                unlock_buffer(phys_seg[i].buffer, 
   8.351 +                              phys_seg[i].nr_sects << 9,
   8.352 +                              operation==READ);
   8.353 +            goto bad_descriptor;
   8.354 +        }
   8.355 +    }
   8.356 +
   8.357 +    pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
   8.358 +    pending_req->blkif     = blkif;
   8.359 +    pending_req->id        = req->id;
   8.360 +    pending_req->operation = operation;
   8.361 +    pending_req->status    = 0;
   8.362 +    atomic_set(&pending_req->pendcnt, nr_psegs);
   8.363 +
   8.364 +    blkif_get(blkif);
   8.365 +
   8.366 +    /* Now we pass each segment down to the real blkdev layer. */
   8.367 +    for ( i = 0; i < nr_psegs; i++ )
   8.368 +    {
   8.369 +        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
   8.370 +        if ( unlikely(bh == NULL) )
   8.371 +            panic("bh is null\n");
   8.372 +        memset(bh, 0, sizeof (struct buffer_head));
   8.373 +    
   8.374 +        bh->b_size          = phys_seg[i].nr_sects << 9;
   8.375 +        bh->b_dev           = phys_seg[i].dev;
   8.376 +        bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
   8.377 +
   8.378 +        /* SMH: we store a 'pseudo-virtual' bogus address in b_data since
   8.379 +           later code will undo this transformation (i.e. +-PAGE_OFFSET). */
   8.380 +        bh->b_data          = phys_to_virt(phys_seg[i].buffer);
   8.381 + 
   8.382 +        /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
   8.383 +        bh->b_page          = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 
   8.384 +        bh->b_end_io        = end_block_io_op;
   8.385 +        bh->b_private       = pending_req;
   8.386 +
   8.387 +        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
   8.388 +        if ( operation == WRITE )
   8.389 +            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
   8.390 +
   8.391 +        atomic_set(&bh->b_count, 1);
   8.392 +
   8.393 +        /* Dispatch a single request. We'll flush it to disc later. */
   8.394 +        submit_bh(operation, bh);
   8.395 +    }
   8.396 +
   8.397 +    return;
   8.398 +
   8.399 + bad_descriptor:
   8.400 +    make_response(blkif, req->id, req->operation, 1);
   8.401 +} 
   8.402 +
   8.403 +
   8.404 +
   8.405 +/******************************************************************
   8.406 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
   8.407 + */
   8.408 +
   8.409 +
   8.410 +static void make_response(blkif_t *blkif, unsigned long id, 
   8.411 +                          unsigned short op, unsigned long st)
   8.412 +{
   8.413 +    blk_ring_resp_entry_t *resp;
   8.414 +
   8.415 +    /* Place on the response ring for the relevant domain. */ 
   8.416 +    spin_lock(&blkif->blk_ring_lock);
   8.417 +    resp = &blkif->blk_ring_base->
   8.418 +        ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp;
   8.419 +    resp->id        = id;
   8.420 +    resp->operation = op;
   8.421 +    resp->status    = st;
   8.422 +    wmb();
   8.423 +    blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
   8.424 +    spin_unlock(&blkif->blk_ring_lock);
   8.425 +
   8.426 +    /* Kick the relevant domain. */
   8.427 +    notify_via_evtchn(blkif->evtchn);
   8.428 +}
   8.429 +
   8.430 +static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs)
   8.431 +{
   8.432 +#if 0
   8.433 +    unsigned long flags;
   8.434 +    struct task_struct *p;
   8.435 +    blk_ring_t *blk_ring;
   8.436 +    int i;
   8.437 +
   8.438 +    printk("Dumping block queue stats: nr_pending = %d"
   8.439 +           " (prod=0x%08x,cons=0x%08x)\n",
   8.440 +           NR_PENDING_REQS, pending_prod, pending_cons);
   8.441 +
   8.442 +    read_lock_irqsave(&tasklist_lock, flags);
   8.443 +    for_each_domain ( p )
   8.444 +    {
   8.445 +        printk("Domain: %llu\n", blkif->domain);
   8.446 +        blk_ring = blkif->blk_ring_base;
   8.447 +        printk("  req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
   8.448 +               "0x%08x on_list=%d\n",
   8.449 +               blk_ring->req_prod, blkif->blk_req_cons,
   8.450 +               blk_ring->resp_prod, blkif->blk_resp_prod,
   8.451 +               __on_blkdev_list(p));
   8.452 +    }
   8.453 +    read_unlock_irqrestore(&tasklist_lock, flags);
   8.454 +
   8.455 +    for ( i = 0; i < MAX_PENDING_REQS; i++ )
   8.456 +    {
   8.457 +        printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n",
   8.458 +               i, pending_reqs[i].domain, pending_reqs[i].id,
   8.459 +               atomic_read(&pending_reqs[i].pendcnt), 
   8.460 +               pending_reqs[i].operation, pending_reqs[i].status);
   8.461 +    }
   8.462 +#endif
   8.463 +}
   8.464 +
   8.465 +void unlink_blkdev_info(blkif_t *blkif)
   8.466 +{
   8.467 +    unsigned long flags;
   8.468 +
   8.469 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
   8.470 +    if ( __on_blkdev_list(blkif) )
   8.471 +    {
   8.472 +        list_del(&blkif->blkdev_list);
   8.473 +        blkif->blkdev_list.next = (void *)0xdeadbeef;
   8.474 +        blkif_put(blkif);
   8.475 +    }
   8.476 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
   8.477 +}
   8.478 +
   8.479 +static int __init init_module(void)
   8.480 +{
   8.481 +    int i;
   8.482 +
   8.483 +    pending_cons = 0;
   8.484 +    pending_prod = MAX_PENDING_REQS;
   8.485 +    memset(pending_reqs, 0, sizeof(pending_reqs));
   8.486 +    for ( i = 0; i < MAX_PENDING_REQS; i++ )
   8.487 +        pending_ring[i] = i;
   8.488 +    
   8.489 +    for ( i = 0; i < NR_CPUS; i++ )
   8.490 +        completed_bhs[i] = NULL;
   8.491 +        
   8.492 +    spin_lock_init(&io_schedule_list_lock);
   8.493 +    INIT_LIST_HEAD(&io_schedule_list);
   8.494 +
   8.495 +    if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, 
   8.496 +                     SA_SHIRQ, "blkif-backend-dbg", &blkif_debug_int) != 0 )
   8.497 +        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
   8.498 +
   8.499 +    buffer_head_cachep = kmem_cache_create(
   8.500 +        "buffer_head_cache", sizeof(struct buffer_head),
   8.501 +        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
   8.502 +
   8.503 +    return 0;
   8.504 +}
   8.505 +
   8.506 +static void cleanup_module(void)
   8.507 +{
   8.508 +}
   8.509 +
   8.510 +module_init(init_module);
   8.511 +module_exit(cleanup_module);
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c	Mon Apr 26 10:02:49 2004 +0000
     9.3 @@ -0,0 +1,578 @@
     9.4 +/******************************************************************************
     9.5 + * arch/xen/drivers/blkif/backend/vbd.c
     9.6 + * 
     9.7 + * Routines for managing virtual block devices (VBDs).
     9.8 + * 
     9.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
    9.10 + */
    9.11 +
    9.12 +#include "common.h"
    9.13 +
    9.14 +void vbd_create(blkif_vbd_create_t *create) 
    9.15 +{
    9.16 +    vbd_t       *vbd; 
    9.17 +    rb_node_t  **rb_p, *rb_parent = NULL;
    9.18 +    blkif_t     *blkif;
    9.19 +    blkif_vdev_t vdevice = create->vdevice;
    9.20 +
    9.21 +    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
    9.22 +    if ( unlikely(blkif == NULL) )
    9.23 +    {
    9.24 +        DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", 
    9.25 +                create->domid, create->blkif_handle); 
    9.26 +        create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
    9.27 +        return;
    9.28 +    }
    9.29 +
    9.30 +    spin_lock(&blkif->vbd_lock);
    9.31 +
    9.32 +    rb_p = &blkif->vbd_rb.rb_node;
    9.33 +    while ( *rb_p != NULL )
    9.34 +    {
    9.35 +        rb_parent = *rb_p;
    9.36 +        vbd = rb_entry(rb_parent, vbd_t, rb);
    9.37 +        if ( vdevice < vbd->vdevice )
    9.38 +        {
    9.39 +            rb_p = &rb_parent->rb_left;
    9.40 +        }
    9.41 +        else if ( vdevice > vbd->vdevice )
    9.42 +        {
    9.43 +            rb_p = &rb_parent->rb_right;
    9.44 +        }
    9.45 +        else
    9.46 +        {
    9.47 +            DPRINTK("vbd_create attempted for already existing vbd\n");
    9.48 +            create->status = BLKIF_STATUS_VBD_EXISTS;
    9.49 +            goto out;
    9.50 +        }
    9.51 +    }
    9.52 +
    9.53 +    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
    9.54 +    {
    9.55 +        DPRINTK("vbd_create: out of memory\n");
    9.56 +        create->status = BLKIF_STATUS_OUT_OF_MEMORY;
    9.57 +        goto out;
    9.58 +    }
    9.59 +
    9.60 +    vbd->vdevice = vdevice; 
    9.61 +    vbd->mode    = create->mode; 
    9.62 +    vbd->type    = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
    9.63 +    vbd->extents = NULL; 
    9.64 +
    9.65 +    rb_link_node(&vbd->rb, rb_parent, rb_p);
    9.66 +    rb_insert_color(&vbd->rb, &blkif->vbd_rb);
    9.67 +
    9.68 +    create->status = BLKIF_STATUS_OKAY;
    9.69 +
    9.70 + out:
    9.71 +    spin_unlock(&blkif->vbd_lock);
    9.72 +    blkif_put(blkif);
    9.73 +}
    9.74 +
    9.75 +
    9.76 +/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
    9.77 +void vbd_grow(blkif_vbd_grow_t *grow) 
    9.78 +{
    9.79 +    blkif_t          *blkif;
    9.80 +    xen_extent_le_t **px, *x; 
    9.81 +    vbd_t            *vbd = NULL;
    9.82 +    rb_node_t        *rb;
    9.83 +    blkif_vdev_t      vdevice = grow->vdevice;
    9.84 +
    9.85 +    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
    9.86 +    if ( unlikely(blkif == NULL) )
    9.87 +    {
    9.88 +        DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", 
    9.89 +                grow->domid, grow->blkif_handle); 
    9.90 +        grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
    9.91 +        return;
    9.92 +    }
    9.93 +
    9.94 +    spin_lock(&blkif->vbd_lock);
    9.95 +
    9.96 +    rb = blkif->vbd_rb.rb_node;
    9.97 +    while ( rb != NULL )
    9.98 +    {
    9.99 +        vbd = rb_entry(rb, vbd_t, rb);
   9.100 +        if ( vdevice < vbd->vdevice )
   9.101 +            rb = rb->rb_left;
   9.102 +        else if ( vdevice > vbd->vdevice )
   9.103 +            rb = rb->rb_right;
   9.104 +        else
   9.105 +            break;
   9.106 +    }
   9.107 +
   9.108 +    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
   9.109 +    {
   9.110 +        DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
   9.111 +        grow->status = BLKIF_STATUS_VBD_NOT_FOUND;
   9.112 +        goto out;
   9.113 +    } 
   9.114 +
   9.115 +    if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
   9.116 +    {
   9.117 +        DPRINTK("vbd_grow: out of memory\n");
   9.118 +        grow->status = BLKIF_STATUS_OUT_OF_MEMORY;
   9.119 +        goto out;
   9.120 +    }
   9.121 + 
   9.122 +    x->extent.device        = grow->extent.device; 
   9.123 +    x->extent.sector_start  = grow->extent.sector_start; 
   9.124 +    x->extent.sector_length = grow->extent.sector_length; 
   9.125 +    x->next                 = (xen_extent_le_t *)NULL; 
   9.126 +
   9.127 +    for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 
   9.128 +        continue;
   9.129 +
   9.130 +    *px = x;
   9.131 +
   9.132 +    grow->status = BLKIF_STATUS_OKAY;
   9.133 +
   9.134 + out:
   9.135 +    spin_unlock(&blkif->vbd_lock);
   9.136 +    blkif_put(blkif);
   9.137 +}
   9.138 +
   9.139 +
   9.140 +void vbd_shrink(blkif_vbd_shrink_t *shrink)
   9.141 +{
   9.142 +    blkif_t          *blkif;
   9.143 +    xen_extent_le_t **px, *x; 
   9.144 +    vbd_t            *vbd = NULL;
   9.145 +    rb_node_t        *rb;
   9.146 +    blkif_vdev_t      vdevice = shrink->vdevice;
   9.147 +
   9.148 +    blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
   9.149 +    if ( unlikely(blkif == NULL) )
   9.150 +    {
   9.151 +        DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", 
   9.152 +                shrink->domid, shrink->blkif_handle); 
   9.153 +        shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   9.154 +        return;
   9.155 +    }
   9.156 +
   9.157 +    spin_lock(&blkif->vbd_lock);
   9.158 +
   9.159 +    rb = blkif->vbd_rb.rb_node;
   9.160 +    while ( rb != NULL )
   9.161 +    {
   9.162 +        vbd = rb_entry(rb, vbd_t, rb);
   9.163 +        if ( vdevice < vbd->vdevice )
   9.164 +            rb = rb->rb_left;
   9.165 +        else if ( vdevice > vbd->vdevice )
   9.166 +            rb = rb->rb_right;
   9.167 +        else
   9.168 +            break;
   9.169 +    }
   9.170 +
   9.171 +    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
   9.172 +    {
   9.173 +        shrink->status = BLKIF_STATUS_VBD_NOT_FOUND;
   9.174 +        goto out;
   9.175 +    }
   9.176 +
   9.177 +    if ( unlikely(vbd->extents == NULL) )
   9.178 +    {
   9.179 +        shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND;
   9.180 +        goto out;
   9.181 +    }
   9.182 +
   9.183 +    /* Find the last extent. We now know that there is at least one. */
   9.184 +    for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
   9.185 +        continue;
   9.186 +
   9.187 +    x   = *px;
   9.188 +    *px = x->next;
   9.189 +    kfree(x);
   9.190 +
   9.191 +    shrink->status = BLKIF_STATUS_OKAY;
   9.192 +
   9.193 + out:
   9.194 +    spin_unlock(&blkif->vbd_lock);
   9.195 +    blkif_put(blkif);
   9.196 +}
   9.197 +
   9.198 +
   9.199 +void vbd_destroy(blkif_vbd_destroy_t *destroy) 
   9.200 +{
   9.201 +    blkif_t         *blkif;
   9.202 +    vbd_t           *vbd;
   9.203 +    rb_node_t       *rb;
   9.204 +    xen_extent_le_t *x, *t;
   9.205 +    blkif_vdev_t     vdevice = destroy->vdevice;
   9.206 +
   9.207 +    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
   9.208 +    if ( unlikely(blkif == NULL) )
   9.209 +    {
   9.210 +        DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", 
   9.211 +                destroy->domid, destroy->blkif_handle); 
   9.212 +        destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   9.213 +        return;
   9.214 +    }
   9.215 +
   9.216 +    spin_lock(&blkif->vbd_lock);
   9.217 +
   9.218 +    rb = blkif->vbd_rb.rb_node;
   9.219 +    while ( rb != NULL )
   9.220 +    {
   9.221 +        vbd = rb_entry(rb, vbd_t, rb);
   9.222 +        if ( vdevice < vbd->vdevice )
   9.223 +            rb = rb->rb_left;
   9.224 +        else if ( vdevice > vbd->vdevice )
   9.225 +            rb = rb->rb_right;
   9.226 +        else
   9.227 +            goto found;
   9.228 +    }
   9.229 +
   9.230 +    destroy->status = BLKIF_STATUS_VBD_NOT_FOUND;
   9.231 +    goto out;
   9.232 +
   9.233 + found:
   9.234 +    rb_erase(rb, &blkif->vbd_rb);
   9.235 +    x = vbd->extents;
   9.236 +    kfree(vbd);
   9.237 +
   9.238 +    while ( x != NULL )
   9.239 +    {
   9.240 +        t = x->next;
   9.241 +        kfree(x);
   9.242 +        x = t;
   9.243 +    }
   9.244 +    
   9.245 + out:
   9.246 +    spin_unlock(&blkif->vbd_lock);
   9.247 +    blkif_put(blkif);
   9.248 +}
   9.249 +
   9.250 +
   9.251 +void destroy_all_vbds(blkif_t *blkif)
   9.252 +{
   9.253 +    vbd_t *vbd;
   9.254 +    rb_node_t *rb;
   9.255 +    xen_extent_le_t *x, *t;
   9.256 +
   9.257 +    spin_lock(&blkif->vbd_lock);
   9.258 +
   9.259 +    while ( (rb = blkif->vbd_rb.rb_node) != NULL )
   9.260 +    {
   9.261 +        vbd = rb_entry(rb, vbd_t, rb);
   9.262 +
   9.263 +        rb_erase(rb, &blkif->vbd_rb);
   9.264 +        x = vbd->extents;
   9.265 +        kfree(vbd);
   9.266 +        
   9.267 +        while ( x != NULL )
   9.268 +        {
   9.269 +            t = x->next;
   9.270 +            kfree(x);
   9.271 +            x = t;
   9.272 +        }          
   9.273 +    }
   9.274 +
   9.275 +    spin_unlock(&blkif->vbd_lock);
   9.276 +}
   9.277 +
   9.278 +
   9.279 +static int vbd_probe_single(xen_disk_info_t *xdi, 
   9.280 +                            vbd_t *vbd, 
   9.281 +                            struct task_struct *p)
   9.282 +{
   9.283 +    xen_extent_le_t *x; 
   9.284 +    xen_disk_t cur_disk; 
   9.285 +
   9.286 +    if ( xdi->count == xdi->max )
   9.287 +    {
   9.288 +        DPRINTK("vbd_probe_devices: out of space for probe.\n"); 
   9.289 +        return -ENOMEM; 
   9.290 +    }
   9.291 +
   9.292 +    cur_disk.device = vbd->vdevice; 
   9.293 +    cur_disk.info   = vbd->type;
   9.294 +    if ( !VBD_CAN_WRITE(vbd) )
   9.295 +        cur_disk.info |= XD_FLAG_RO; 
   9.296 +    cur_disk.capacity = 0ULL;
   9.297 +    for ( x = vbd->extents; x != NULL; x = x->next )
   9.298 +        cur_disk.capacity += x->extent.nr_sectors; 
   9.299 +    cur_disk.domain = p->domain; 
   9.300 +        
   9.301 +    /* Now copy into relevant part of user-space buffer */
   9.302 +    if( copy_to_user(&xdi->disks[xdi->count], 
   9.303 +                     &cur_disk, 
   9.304 +                     sizeof(xen_disk_t)) )
   9.305 +    { 
   9.306 +        DPRINTK("vbd_probe_devices: copy_to_user failed\n");
   9.307 +        return -EFAULT;
   9.308 +    } 
   9.309 +        
   9.310 +    xdi->count++; 
   9.311 +
   9.312 +    return 0;
   9.313 +}
   9.314 +
   9.315 +
   9.316 +static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
   9.317 +{
   9.318 +    int rc = 0;
   9.319 +    rb_node_t *rb;
   9.320 +
   9.321 +    spin_lock(&p->vbd_lock);
   9.322 +
   9.323 +    if ( (rb = p->vbd_rb.rb_node) == NULL )
   9.324 +        goto out;
   9.325 +
   9.326 + new_subtree:
   9.327 +    /* STEP 1. Find least node (it'll be left-most). */
   9.328 +    while ( rb->rb_left != NULL )
   9.329 +        rb = rb->rb_left;
   9.330 +
   9.331 +    for ( ; ; )
   9.332 +    {
   9.333 +        /* STEP 2. Dealt with left subtree. Now process current node. */
   9.334 +        if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
   9.335 +            goto out;
   9.336 +
   9.337 +        /* STEP 3. Process right subtree, if any. */
   9.338 +        if ( rb->rb_right != NULL )
   9.339 +        {
   9.340 +            rb = rb->rb_right;
   9.341 +            goto new_subtree;
   9.342 +        }
   9.343 +
   9.344 +        /* STEP 4. Done both subtrees. Head back through ancesstors. */
   9.345 +        for ( ; ; ) 
   9.346 +        {
   9.347 +            /* We're done when we get back to the root node. */
   9.348 +            if ( rb->rb_parent == NULL )
   9.349 +                goto out;
   9.350 +            /* If we are left of parent, then parent is next to process. */
   9.351 +            if ( rb->rb_parent->rb_left == rb )
   9.352 +                break;
   9.353 +            /* If we are right of parent, then we climb to grandparent. */
   9.354 +            rb = rb->rb_parent;
   9.355 +        }
   9.356 +
   9.357 +        rb = rb->rb_parent;
   9.358 +    }
   9.359 +
   9.360 + out:
   9.361 +    spin_unlock(&p->vbd_lock);
   9.362 +    return rc;  
   9.363 +}
   9.364 +
   9.365 +
   9.366 +/*
   9.367 + * Return information about the VBDs available for a given domain, or for all 
   9.368 + * domains; in the general case the 'domain' argument will be 0 which means 
   9.369 + * "information about the caller"; otherwise the 'domain' argument will 
   9.370 + * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of 
   9.371 + * these cases require the caller to be privileged.
   9.372 + */
   9.373 +long vbd_probe(vbd_probe_t *probe) 
   9.374 +{
   9.375 +    struct task_struct *p = NULL; 
   9.376 +    unsigned long flags;
   9.377 +    long ret = 0;  
   9.378 +
   9.379 +    if ( probe->domain != 0 )
   9.380 +    { 
   9.381 +        /* We can only probe for ourselves (unless we're privileged). */
   9.382 +        if( (probe->domain != current->domain) && !IS_PRIV(current) )
   9.383 +            return -EPERM; 
   9.384 +
   9.385 +        if ( (probe->domain != VBD_PROBE_ALL) &&
   9.386 +             ((p = find_domain_by_id(probe->domain)) == NULL) )
   9.387 +        {
   9.388 +            DPRINTK("vbd_probe attempted for non-existent domain %llu\n", 
   9.389 +                    probe->domain); 
   9.390 +            return -EINVAL; 
   9.391 +        }
   9.392 +    }
   9.393 +    else
   9.394 +    { 
   9.395 +        /* Default is to probe for ourselves. */
   9.396 +        p = current; 
   9.397 +        get_task_struct(p); /* to mirror final put_task_struct */
   9.398 +    }
   9.399 +
   9.400 +    if ( probe->domain == VBD_PROBE_ALL )
   9.401 +    { 
   9.402 +        read_lock_irqsave(&tasklist_lock, flags);
   9.403 +        for_each_domain ( p )
   9.404 +        {
   9.405 +            if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
   9.406 +            { 
   9.407 +                read_unlock_irqrestore(&tasklist_lock, flags);
   9.408 +                goto out; 
   9.409 +            }
   9.410 +        }
   9.411 +        read_unlock_irqrestore(&tasklist_lock, flags);
   9.412 +    } 
   9.413 +    else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
   9.414 +        goto out; 
   9.415 +
   9.416 + out: 
   9.417 +    if ( ret != 0 )
   9.418 +        DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); 
   9.419 +    if ( p != NULL )
   9.420 +        put_task_struct(p); 
   9.421 +    return ret; 
   9.422 +}
   9.423 +
   9.424 +
   9.425 +long vbd_info(vbd_info_t *info) 
   9.426 +{
   9.427 +    struct task_struct *p; 
   9.428 +    xen_extent_le_t *x; 
   9.429 +    xen_extent_t *extents; 
   9.430 +    vbd_t *vbd = NULL;
   9.431 +    rb_node_t *rb;
   9.432 +    long ret = 0;  
   9.433 +   
   9.434 +    if ( (info->domain != current->domain) && !IS_PRIV(current) )
   9.435 +        return -EPERM; 
   9.436 +
   9.437 +    if ( (p = find_domain_by_id(info->domain)) == NULL )
   9.438 +    {
   9.439 +        DPRINTK("vbd_info attempted for non-existent domain %llu\n", 
   9.440 +                info->domain); 
   9.441 +        return -EINVAL; 
   9.442 +    }
   9.443 +
   9.444 +    spin_lock(&p->vbd_lock);
   9.445 +
   9.446 +    rb = p->vbd_rb.rb_node;
   9.447 +    while ( rb != NULL )
   9.448 +    {
   9.449 +        vbd = rb_entry(rb, vbd_t, rb);
   9.450 +        if ( info->vdevice < vbd->vdevice )
   9.451 +            rb = rb->rb_left;
   9.452 +        else if ( info->vdevice > vbd->vdevice )
   9.453 +            rb = rb->rb_right;
   9.454 +        else
   9.455 +            break;
   9.456 +    }
   9.457 +
   9.458 +    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
   9.459 +    {
   9.460 +        DPRINTK("vbd_info attempted on non-existent VBD.\n"); 
   9.461 +        ret = -EINVAL; 
   9.462 +        goto out; 
   9.463 +    }
   9.464 +
   9.465 +    info->mode     = vbd->mode;
   9.466 +    info->nextents = 0; 
   9.467 +
   9.468 +    extents = info->extents;
   9.469 +    for ( x = vbd->extents; x != NULL; x = x->next )
   9.470 +    {
   9.471 +        if ( info->nextents == info->maxextents )
   9.472 +            break;
   9.473 +        if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
   9.474 +        {
   9.475 +            DPRINTK("vbd_info: copy_to_user failed\n");
   9.476 +            ret = -EFAULT;
   9.477 +            goto out; 
   9.478 +        } 
   9.479 +        extents++;
   9.480 +        info->nextents++;
   9.481 +    }
   9.482 +
   9.483 + out: 
   9.484 +    spin_unlock(&p->vbd_lock);
   9.485 +    put_task_struct(p); 
   9.486 +    return ret; 
   9.487 +}
   9.488 +
   9.489 +
   9.490 +int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
   9.491 +{
   9.492 +    xen_extent_le_t *x; 
   9.493 +    vbd_t *vbd;
   9.494 +    rb_node_t *rb;
   9.495 +    xen_sector_t sec_off;
   9.496 +    unsigned long nr_secs;
   9.497 +
   9.498 +    spin_lock(&p->vbd_lock);
   9.499 +
   9.500 +    rb = p->vbd_rb.rb_node;
   9.501 +    while ( rb != NULL )
   9.502 +    {
   9.503 +        vbd = rb_entry(rb, vbd_t, rb);
   9.504 +        if ( pseg->dev < vbd->vdevice )
   9.505 +            rb = rb->rb_left;
   9.506 +        else if ( pseg->dev > vbd->vdevice )
   9.507 +            rb = rb->rb_right;
   9.508 +        else
   9.509 +            goto found;
   9.510 +    }
   9.511 +
   9.512 +    DPRINTK("vbd_translate; domain %llu attempted to access "
   9.513 +            "non-existent VBD.\n", p->domain); 
   9.514 +
   9.515 +    spin_unlock(&p->vbd_lock);
   9.516 +    return -ENODEV; 
   9.517 +
   9.518 + found:
   9.519 +
   9.520 +    if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
   9.521 +         ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
   9.522 +    {
   9.523 +        spin_unlock(&p->vbd_lock);
   9.524 +        return -EACCES; 
   9.525 +    }
   9.526 +
   9.527 +    /*
   9.528 +     * Now iterate through the list of xen_extents, working out which should 
   9.529 +     * be used to perform the translation.
   9.530 +     */
   9.531 +    sec_off = pseg->sector_number; 
   9.532 +    nr_secs = pseg->nr_sects;
   9.533 +    for ( x = vbd->extents; x != NULL; x = x->next )
   9.534 +    { 
   9.535 +        if ( sec_off < x->extent.nr_sectors )
   9.536 +        {
   9.537 +            pseg->dev = x->extent.device; 
   9.538 +            pseg->sector_number = x->extent.start_sector + sec_off;
   9.539 +            if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
   9.540 +                goto overrun;
   9.541 +            spin_unlock(&p->vbd_lock);
   9.542 +            return 1;
   9.543 +        } 
   9.544 +        sec_off -= x->extent.nr_sectors; 
   9.545 +    }
   9.546 +
   9.547 +    DPRINTK("vbd_translate: end of vbd.\n");
   9.548 +    spin_unlock(&p->vbd_lock);
   9.549 +    return -EACCES; 
   9.550 +
   9.551 +    /*
   9.552 +     * Here we deal with overrun onto the following extent. We don't deal with 
   9.553 +     * overrun of more than one boundary since each request is restricted to 
   9.554 +     * 2^9 512-byte sectors, so it should be trivial for control software to 
   9.555 +     * ensure that extents are large enough to prevent excessive overrun.
   9.556 +     */
   9.557 + overrun:
   9.558 +
   9.559 +    /* Adjust length of first chunk to run to end of first extent. */
   9.560 +    pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
   9.561 +
   9.562 +    /* Set second chunk buffer and length to start where first chunk ended. */
   9.563 +    pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
   9.564 +    pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
   9.565 +
   9.566 +    /* Now move to the next extent. Check it exists and is long enough! */
   9.567 +    if ( unlikely((x = x->next) == NULL) || 
   9.568 +         unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
   9.569 +    {
   9.570 +        DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
   9.571 +        spin_unlock(&p->vbd_lock);
   9.572 +        return -EACCES;
   9.573 +    }
   9.574 +
   9.575 +    /* Store the real device and start sector for the second chunk. */
   9.576 +    pseg[1].dev           = x->extent.device;
   9.577 +    pseg[1].sector_number = x->extent.start_sector;
   9.578 +    
   9.579 +    spin_unlock(&p->vbd_lock);
   9.580 +    return 2;
   9.581 +}
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h	Mon Apr 26 10:02:49 2004 +0000
    10.3 @@ -0,0 +1,109 @@
    10.4 +/******************************************************************************
    10.5 + * blkif.h
    10.6 + * 
    10.7 + * Unified block-device I/O interface for Xen guest OSes.
    10.8 + * 
    10.9 + * Copyright (c) 2003-2004, Keir Fraser
   10.10 + */
   10.11 +
   10.12 +#ifndef __SHARED_BLKIF_H__
   10.13 +#define __SHARED_BLKIF_H__
   10.14 +
   10.15 +#define blkif_vdev_t   u16
   10.16 +#define blkif_sector_t u64
   10.17 +
   10.18 +#define BLKIF_OP_READ      0
   10.19 +#define BLKIF_OP_WRITE     1
   10.20 +#define BLKIF_OP_PROBE     2
   10.21 +
   10.22 +/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */
   10.23 +#define BLKIF_RING_SIZE        64
   10.24 +
   10.25 +/*
   10.26 + * Maximum scatter/gather segments per request.
   10.27 + * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE.
   10.28 + * NB. This could be 12 if the ring indexes weren't stored in the same page.
   10.29 + */
   10.30 +#define BLKIF_REQUEST_MAX_SEGMENTS 11
   10.31 +
   10.32 +typedef struct {
   10.33 +    unsigned char  operation;        /* BLKIF_OP_???                         */
   10.34 +    unsigned char  nr_segments;      /* number of segments (<= MAX_BLK_SEGS) */
   10.35 +    blkif_vdev_t   device;           /* only for read/write requests         */
   10.36 +    unsigned long  id;               /* private guest value, echoed in resp  */
   10.37 +    xen_sector_t   sector_number;    /* start sector idx on disk (r/w only)  */
   10.38 +    /* Least 9 bits is 'nr_sects'. High 23 bits is the address.      */
   10.39 +    unsigned long  buffer_and_sects[MAX_BLK_SEGS];
   10.40 +} blkif_request_t;
   10.41 +
   10.42 +typedef struct {
   10.43 +    unsigned long   id;              /* copied from request */
   10.44 +    unsigned char   operation;       /* copied from request */
   10.45 +    int             status;          /* BLKIF_RSP_???       */
   10.46 +} blkif_response_t;
   10.47 +
   10.48 +#define BLKIF_RSP_ERROR  -1 /* non-specific 'error' */
   10.49 +#define BLKIF_RSP_OKAY    0 /* non-specific 'okay'  */
   10.50 +
   10.51 +/*
   10.52 + * We use a special capitalised type name because it is _essential_ that all 
   10.53 + * arithmetic on indexes is done on an integer type of the correct size.
   10.54 + */
   10.55 +typedef unsigned int BLKIF_RING_IDX;
   10.56 +
   10.57 +/*
   10.58 + * Ring indexes are 'free running'. That is, they are not stored modulo the
   10.59 + * size of the ring buffer. The following macro converts a free-running counter
   10.60 + * into a value that can directly index a ring-buffer array.
   10.61 + */
   10.62 +#define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1))
   10.63 +
   10.64 +typedef struct {
   10.65 +    BLKIF_RING_IDX req_prod;  /* Request producer. Updated by guest OS. */
   10.66 +    BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen.     */
   10.67 +    union {
   10.68 +        blkif_request_t  req;
   10.69 +        blkif_response_t resp;
   10.70 +    } ring[BLKIF_RING_SIZE];
   10.71 +} blkif_ring_t;
   10.72 +
   10.73 +
   10.74 +/*
   10.75 + * BLKIF_OP_PROBE:
   10.76 + * The request format for a probe request is constrained as follows:
   10.77 + *  @operation   == BLKIF_OP_PROBE
   10.78 + *  @nr_segments == size of probe buffer in pages
   10.79 + *  @device      == unused (zero)
   10.80 + *  @id          == any value (echoed in response message)
   10.81 + *  @sector_num  == unused (zero)
   10.82 + *  @buffer_and_sects == list of page-aligned, page-sized buffers.
   10.83 + *                       (i.e., nr_sects == 8).
   10.84 + * 
   10.85 + * The response is a list of vdisk_t elements copied into the out-of-band
   10.86 + * probe buffer. On success the response status field contains the number
   10.87 + * of vdisk_t elements.
   10.88 + */
   10.89 +
   10.90 +/* XXX SMH: Type values below are chosen to match ide_xxx in Linux ide.h. */
   10.91 +#define VDISK_TYPE_FLOPPY  0x00
   10.92 +#define VDISK_TYPE_TAPE    0x01
   10.93 +#define VDISK_TYPE_CDROM   0x05
   10.94 +#define VDISK_TYPE_OPTICAL 0x07
   10.95 +#define VDISK_TYPE_DISK    0x20 
   10.96 +
   10.97 +#define VDISK_TYPE_MASK    0x3F
   10.98 +#define VDISK_TYPE(_x)     ((_x) & VDISK_TYPE_MASK) 
   10.99 +
  10.100 +/* The top two bits of the type field encode various flags. */
  10.101 +#define VDISK_FLAG_RO      0x40
  10.102 +#define VDISK_FLAG_VIRT    0x80
  10.103 +#define VDISK_READONLY(_x) ((_x) & VDISK_FLAG_RO)
  10.104 +#define VDISK_VIRTUAL(_x)  ((_x) & VDISK_FLAG_VIRT) 
  10.105 +
  10.106 +typedef struct {
  10.107 +    blkif_vdev_t   device;       /* Device number (opaque 16 bit value). */
  10.108 +    unsigned short info;         /* Device type and flags (VDISK_*).     */
  10.109 +    xen_sector_t   capacity;     /* Size in terms of 512-byte sectors.   */
  10.110 +} vdisk_t;
  10.111 +
  10.112 +#endif /* __SHARED_BLKIF_H__ */
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile	Mon Apr 26 10:02:49 2004 +0000
    11.3 @@ -0,0 +1,3 @@
    11.4 +O_TARGET := drv.o
    11.5 +obj-y := block.o vbd.o
    11.6 +include $(TOPDIR)/Rules.make
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c	Mon Apr 26 10:02:49 2004 +0000
    12.3 @@ -0,0 +1,625 @@
    12.4 +/******************************************************************************
    12.5 + * block.c
    12.6 + * 
    12.7 + * Xenolinux virtual block-device driver.
    12.8 + * 
    12.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   12.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   12.11 + */
   12.12 +
   12.13 +#include "block.h"
   12.14 +#include <linux/blk.h>
   12.15 +#include <linux/cdrom.h>
   12.16 +#include <linux/tqueue.h>
   12.17 +#include <linux/sched.h>
   12.18 +#include <scsi/scsi.h>
   12.19 +
   12.20 +#include <linux/interrupt.h>
   12.21 +
   12.22 +typedef unsigned char byte; /* from linux/ide.h */
   12.23 +
   12.24 +#define STATE_ACTIVE    0
   12.25 +#define STATE_SUSPENDED 1
   12.26 +#define STATE_CLOSED    2
   12.27 +static unsigned int state = STATE_SUSPENDED;
   12.28 +
   12.29 +/* Dynamically-mapped IRQs. */
   12.30 +static int xlblk_response_irq, xlblk_update_irq;
   12.31 +
   12.32 +static blk_ring_t *blk_ring;
   12.33 +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
   12.34 +static BLK_RING_IDX req_prod;  /* Private request producer.         */
   12.35 +
   12.36 +/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   12.37 +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
   12.38 +                      (state != STATE_ACTIVE))
   12.39 +
   12.40 +
   12.41 +/*
   12.42 + * Request queues with outstanding work, but ring is currently full.
   12.43 + * We need no special lock here, as we always access this with the
   12.44 + * io_request_lock held. We only need a small maximum list.
   12.45 + */
   12.46 +#define MAX_PENDING 8
   12.47 +static request_queue_t *pending_queues[MAX_PENDING];
   12.48 +static int nr_pending;
   12.49 +
   12.50 +static kdev_t        sg_dev;
   12.51 +static int           sg_operation = -1;
   12.52 +static unsigned long sg_next_sect;
   12.53 +#define DISABLE_SCATTERGATHER() (sg_operation = -1)
   12.54 +
   12.55 +static inline void signal_requests_to_xen(void)
   12.56 +{
   12.57 +    block_io_op_t op; 
   12.58 +
   12.59 +    DISABLE_SCATTERGATHER();
   12.60 +    blk_ring->req_prod = req_prod;
   12.61 +
   12.62 +    op.cmd = BLOCK_IO_OP_SIGNAL; 
   12.63 +    HYPERVISOR_block_io_op(&op);
   12.64 +    return;
   12.65 +}
   12.66 +
   12.67 +
   12.68 +/*
   12.69 + * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
   12.70 + * 
   12.71 + * Schedule a task for keventd to run, which will update the VBDs and perform 
   12.72 + * the corresponding updates to our view of VBD state, so the XenoLinux will 
   12.73 + * respond to changes / additions / deletions to the set of VBDs automatically.
   12.74 + */
   12.75 +static struct tq_struct update_tq;
   12.76 +static void update_vbds_task(void *unused)
   12.77 +{ 
   12.78 +    xlvbd_update_vbds();
   12.79 +}
   12.80 +static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
   12.81 +{
   12.82 +    update_tq.routine = update_vbds_task;
   12.83 +    schedule_task(&update_tq);
   12.84 +}
   12.85 +
   12.86 +
   12.87 +int xen_block_open(struct inode *inode, struct file *filep)
   12.88 +{
   12.89 +    short xldev = inode->i_rdev; 
   12.90 +    struct gendisk *gd = get_gendisk(xldev);
   12.91 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   12.92 +    short minor = MINOR(xldev); 
   12.93 +
   12.94 +    if ( gd->part[minor].nr_sects == 0 )
   12.95 +    { 
   12.96 +        /*
   12.97 +         * Device either doesn't exist, or has zero capacity; we use a few
   12.98 +         * cheesy heuristics to return the relevant error code
   12.99 +         */
  12.100 +        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
  12.101 +             ((minor & (gd->max_p - 1)) != 0) )
  12.102 +        { 
  12.103 +            /*
  12.104 +             * We have a real device, but no such partition, or we just have a
  12.105 +             * partition number so guess this is the problem.
  12.106 +             */
  12.107 +            return -ENXIO;     /* no such device or address */
  12.108 +        }
  12.109 +        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
  12.110 +        {
  12.111 +            /* This is a removable device => assume that media is missing. */ 
  12.112 +            return -ENOMEDIUM; /* media not present (this is a guess) */
  12.113 +        } 
  12.114 +        else
  12.115 +        { 
  12.116 +            /* Just go for the general 'no such device' error. */
  12.117 +            return -ENODEV;    /* no such device */
  12.118 +        }
  12.119 +    }
  12.120 +    
  12.121 +    /* Update of usage count is protected by per-device semaphore. */
  12.122 +    disk->usage++;
  12.123 +
  12.124 +    return 0;
  12.125 +}
  12.126 +
  12.127 +
  12.128 +int xen_block_release(struct inode *inode, struct file *filep)
  12.129 +{
  12.130 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
  12.131 +
  12.132 +    /*
  12.133 +     * When usage drops to zero it may allow more VBD updates to occur.
  12.134 +     * Update of usage count is protected by a per-device semaphore.
  12.135 +     */
  12.136 +    if ( --disk->usage == 0 )
  12.137 +    {
  12.138 +        update_tq.routine = update_vbds_task;
  12.139 +        schedule_task(&update_tq);
  12.140 +    }
  12.141 +
  12.142 +    return 0;
  12.143 +}
  12.144 +
  12.145 +
  12.146 +int xen_block_ioctl(struct inode *inode, struct file *filep,
  12.147 +                          unsigned command, unsigned long argument)
  12.148 +{
  12.149 +    kdev_t dev = inode->i_rdev;
  12.150 +    struct hd_geometry *geo = (struct hd_geometry *)argument;
  12.151 +    struct gendisk *gd;     
  12.152 +    struct hd_struct *part; 
  12.153 +    int i;
  12.154 +
  12.155 +    /* NB. No need to check permissions. That is done for us. */
  12.156 +    
  12.157 +    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  12.158 +                  command, (long) argument, dev); 
  12.159 +  
  12.160 +    gd = get_gendisk(dev);
  12.161 +    part = &gd->part[MINOR(dev)]; 
  12.162 +
  12.163 +    switch ( command )
  12.164 +    {
  12.165 +    case BLKGETSIZE:
  12.166 +        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
  12.167 +        return put_user(part->nr_sects, (unsigned long *) argument);
  12.168 +
  12.169 +    case BLKGETSIZE64:
  12.170 +        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
  12.171 +                      (u64)part->nr_sects * 512);
  12.172 +        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
  12.173 +
  12.174 +    case BLKRRPART:                               /* re-read partition table */
  12.175 +        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
  12.176 +        return xen_block_revalidate(dev);
  12.177 +
  12.178 +    case BLKSSZGET:
  12.179 +        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
  12.180 +
  12.181 +    case BLKBSZGET:                                        /* get block size */
  12.182 +        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
  12.183 +        break;
  12.184 +
  12.185 +    case BLKBSZSET:                                        /* set block size */
  12.186 +        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
  12.187 +        break;
  12.188 +
  12.189 +    case BLKRASET:                                         /* set read-ahead */
  12.190 +        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
  12.191 +        break;
  12.192 +
  12.193 +    case BLKRAGET:                                         /* get read-ahead */
  12.194 +        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
  12.195 +        break;
  12.196 +
  12.197 +    case HDIO_GETGEO:
  12.198 +        /* note: these values are complete garbage */
  12.199 +        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
  12.200 +        if (!argument) return -EINVAL;
  12.201 +        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
  12.202 +        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
  12.203 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  12.204 +        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
  12.205 +        return 0;
  12.206 +
  12.207 +    case HDIO_GETGEO_BIG: 
  12.208 +        /* note: these values are complete garbage */
  12.209 +        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
  12.210 +        if (!argument) return -EINVAL;
  12.211 +        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
  12.212 +        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
  12.213 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  12.214 +        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
  12.215 +        return 0;
  12.216 +
  12.217 +    case CDROMMULTISESSION:
  12.218 +        DPRINTK("FIXME: support multisession CDs later\n");
  12.219 +        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
  12.220 +            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
  12.221 +        return 0;
  12.222 +
  12.223 +    case SCSI_IOCTL_GET_BUS_NUMBER:
  12.224 +        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
  12.225 +        return -ENOSYS;
  12.226 +
  12.227 +    default:
  12.228 +        printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
  12.229 +        return -ENOSYS;
  12.230 +    }
  12.231 +    
  12.232 +    return 0;
  12.233 +}
  12.234 +
  12.235 +/* check media change: should probably do something here in some cases :-) */
  12.236 +int xen_block_check(kdev_t dev)
  12.237 +{
  12.238 +    DPRINTK("xen_block_check\n");
  12.239 +    return 0;
  12.240 +}
  12.241 +
  12.242 +int xen_block_revalidate(kdev_t dev)
  12.243 +{
  12.244 +    struct block_device *bd;
  12.245 +    struct gendisk *gd;
  12.246 +    xl_disk_t *disk;
  12.247 +    unsigned long capacity;
  12.248 +    int i, rc = 0;
  12.249 +    
  12.250 +    if ( (bd = bdget(dev)) == NULL )
  12.251 +        return -EINVAL;
  12.252 +
  12.253 +    /*
  12.254 +     * Update of partition info, and check of usage count, is protected
  12.255 +     * by the per-block-device semaphore.
  12.256 +     */
  12.257 +    down(&bd->bd_sem);
  12.258 +
  12.259 +    if ( ((gd = get_gendisk(dev)) == NULL) ||
  12.260 +         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  12.261 +         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  12.262 +    {
  12.263 +        rc = -EINVAL;
  12.264 +        goto out;
  12.265 +    }
  12.266 +
  12.267 +    if ( disk->usage > 1 )
  12.268 +    {
  12.269 +        rc = -EBUSY;
  12.270 +        goto out;
  12.271 +    }
  12.272 +
  12.273 +    /* Only reread partition table if VBDs aren't mapped to partitions. */
  12.274 +    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  12.275 +    {
  12.276 +        for ( i = gd->max_p - 1; i >= 0; i-- )
  12.277 +        {
  12.278 +            invalidate_device(dev+i, 1);
  12.279 +            gd->part[MINOR(dev+i)].start_sect = 0;
  12.280 +            gd->part[MINOR(dev+i)].nr_sects   = 0;
  12.281 +            gd->sizes[MINOR(dev+i)]           = 0;
  12.282 +        }
  12.283 +
  12.284 +        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  12.285 +    }
  12.286 +
  12.287 + out:
  12.288 +    up(&bd->bd_sem);
  12.289 +    bdput(bd);
  12.290 +    return rc;
  12.291 +}
  12.292 +
  12.293 +
  12.294 +/*
  12.295 + * hypervisor_request
  12.296 + *
  12.297 + * request block io 
  12.298 + * 
  12.299 + * id: for guest use only.
  12.300 + * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
  12.301 + * buffer: buffer to read/write into. this should be a
  12.302 + *   virtual address in the guest os.
  12.303 + */
  12.304 +static int hypervisor_request(unsigned long   id,
  12.305 +                              int             operation,
  12.306 +                              char *          buffer,
  12.307 +                              unsigned long   sector_number,
  12.308 +                              unsigned short  nr_sectors,
  12.309 +                              kdev_t          device)
  12.310 +{
  12.311 +    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
  12.312 +    struct gendisk *gd;
  12.313 +    blk_ring_req_entry_t *req;
  12.314 +    struct buffer_head *bh;
  12.315 +
  12.316 +    if ( unlikely(nr_sectors >= (1<<9)) )
  12.317 +        BUG();
  12.318 +    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
  12.319 +        BUG();
  12.320 +
  12.321 +    if ( unlikely(state == STATE_CLOSED) )
  12.322 +        return 1;
  12.323 +
  12.324 +    switch ( operation )
  12.325 +    {
  12.326 +
  12.327 +    case XEN_BLOCK_READ:
  12.328 +    case XEN_BLOCK_WRITE:
  12.329 +        gd = get_gendisk(device); 
  12.330 +
  12.331 +        /*
  12.332 +         * Update the sector_number we'll pass down as appropriate; note that
  12.333 +         * we could sanity check that resulting sector will be in this
  12.334 +         * partition, but this will happen in xen anyhow.
  12.335 +         */
  12.336 +        sector_number += gd->part[MINOR(device)].start_sect;
  12.337 +
  12.338 +        /*
  12.339 +         * If this unit doesn't consist of virtual (i.e., Xen-specified)
  12.340 +         * partitions then we clear the partn bits from the device number.
  12.341 +         */
  12.342 +        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
  12.343 +               GENHD_FL_VIRT_PARTNS) )
  12.344 +            device &= ~(gd->max_p - 1);
  12.345 +
  12.346 +        if ( (sg_operation == operation) &&
  12.347 +             (sg_dev == device) &&
  12.348 +             (sg_next_sect == sector_number) )
  12.349 +        {
  12.350 +            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
  12.351 +            bh = (struct buffer_head *)id;
  12.352 +            bh->b_reqnext = (struct buffer_head *)req->id;
  12.353 +            req->id = id;
  12.354 +            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
  12.355 +            if ( ++req->nr_segments < MAX_BLK_SEGS )
  12.356 +                sg_next_sect += nr_sectors;
  12.357 +            else
  12.358 +                DISABLE_SCATTERGATHER();
  12.359 +            return 0;
  12.360 +        }
  12.361 +        else if ( RING_PLUGGED )
  12.362 +        {
  12.363 +            return 1;
  12.364 +        }
  12.365 +        else
  12.366 +        {
  12.367 +            sg_operation = operation;
  12.368 +            sg_dev       = device;
  12.369 +            sg_next_sect = sector_number + nr_sectors;
  12.370 +        }
  12.371 +        break;
  12.372 +
  12.373 +    default:
  12.374 +        panic("unknown op %d\n", operation);
  12.375 +    }
  12.376 +
  12.377 +    /* Fill out a communications ring structure. */
  12.378 +    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
  12.379 +    req->id            = id;
  12.380 +    req->operation     = operation;
  12.381 +    req->sector_number = (xen_sector_t)sector_number;
  12.382 +    req->device        = device; 
  12.383 +    req->nr_segments   = 1;
  12.384 +    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
  12.385 +    req_prod++;
  12.386 +
  12.387 +    return 0;
  12.388 +}
  12.389 +
  12.390 +
  12.391 +/*
  12.392 + * do_xlblk_request
  12.393 + *  read a block; request is in a request queue
  12.394 + */
  12.395 +void do_xlblk_request(request_queue_t *rq)
  12.396 +{
  12.397 +    struct request *req;
  12.398 +    struct buffer_head *bh, *next_bh;
  12.399 +    int rw, nsect, full, queued = 0;
  12.400 +
  12.401 +    DPRINTK("xlblk.c::do_xlblk_request\n"); 
  12.402 +
  12.403 +    while ( !rq->plugged && !list_empty(&rq->queue_head))
  12.404 +    {
  12.405 +        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
  12.406 +            goto out;
  12.407 +  
  12.408 +        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
  12.409 +                req, req->cmd, req->sector,
  12.410 +                req->current_nr_sectors, req->nr_sectors, req->bh);
  12.411 +
  12.412 +        rw = req->cmd;
  12.413 +        if ( rw == READA )
  12.414 +            rw = READ;
  12.415 +        if ( unlikely((rw != READ) && (rw != WRITE)) )
  12.416 +            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
  12.417 +
  12.418 +        req->errors = 0;
  12.419 +
  12.420 +        bh = req->bh;
  12.421 +        while ( bh != NULL )
  12.422 +        {
  12.423 +            next_bh = bh->b_reqnext;
  12.424 +            bh->b_reqnext = NULL;
  12.425 +
  12.426 +            full = hypervisor_request(
  12.427 +                (unsigned long)bh,
  12.428 +                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
  12.429 +                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
  12.430 +
  12.431 +            if ( full )
  12.432 +            { 
  12.433 +                bh->b_reqnext = next_bh;
  12.434 +                pending_queues[nr_pending++] = rq;
  12.435 +                if ( unlikely(nr_pending >= MAX_PENDING) )
  12.436 +                    BUG();
  12.437 +                goto out; 
  12.438 +            }
  12.439 +
  12.440 +            queued++;
  12.441 +
  12.442 +            /* Dequeue the buffer head from the request. */
  12.443 +            nsect = bh->b_size >> 9;
  12.444 +            bh = req->bh = next_bh;
  12.445 +            
  12.446 +            if ( bh != NULL )
  12.447 +            {
  12.448 +                /* There's another buffer head to do. Update the request. */
  12.449 +                req->hard_sector += nsect;
  12.450 +                req->hard_nr_sectors -= nsect;
  12.451 +                req->sector = req->hard_sector;
  12.452 +                req->nr_sectors = req->hard_nr_sectors;
  12.453 +                req->current_nr_sectors = bh->b_size >> 9;
  12.454 +                req->buffer = bh->b_data;
  12.455 +            }
  12.456 +            else
  12.457 +            {
  12.458 +                /* That was the last buffer head. Finalise the request. */
  12.459 +                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
  12.460 +                    BUG();
  12.461 +                blkdev_dequeue_request(req);
  12.462 +                end_that_request_last(req);
  12.463 +            }
  12.464 +        }
  12.465 +    }
  12.466 +
  12.467 + out:
  12.468 +    if ( queued != 0 ) signal_requests_to_xen();
  12.469 +}
  12.470 +
  12.471 +
  12.472 +static void kick_pending_request_queues(void)
  12.473 +{
  12.474 +    /* We kick pending request queues if the ring is reasonably empty. */
  12.475 +    if ( (nr_pending != 0) && 
  12.476 +         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
  12.477 +    {
  12.478 +        /* Attempt to drain the queue, but bail if the ring becomes full. */
  12.479 +        while ( (nr_pending != 0) && !RING_PLUGGED )
  12.480 +            do_xlblk_request(pending_queues[--nr_pending]);
  12.481 +    }
  12.482 +}
  12.483 +
  12.484 +
  12.485 +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
  12.486 +{
  12.487 +    BLK_RING_IDX i; 
  12.488 +    unsigned long flags; 
  12.489 +    struct buffer_head *bh, *next_bh;
  12.490 +    
  12.491 +    if ( unlikely(state == STATE_CLOSED) )
  12.492 +        return;
  12.493 +    
  12.494 +    spin_lock_irqsave(&io_request_lock, flags);     
  12.495 +
  12.496 +    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
  12.497 +    {
  12.498 +        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
  12.499 +        switch ( bret->operation )
  12.500 +        {
  12.501 +        case XEN_BLOCK_READ:
  12.502 +        case XEN_BLOCK_WRITE:
  12.503 +            if ( unlikely(bret->status != 0) )
  12.504 +                DPRINTK("Bad return from blkdev data request: %lx\n",
  12.505 +                        bret->status);
  12.506 +            for ( bh = (struct buffer_head *)bret->id; 
  12.507 +                  bh != NULL; 
  12.508 +                  bh = next_bh )
  12.509 +            {
  12.510 +                next_bh = bh->b_reqnext;
  12.511 +                bh->b_reqnext = NULL;
  12.512 +                bh->b_end_io(bh, !bret->status);
  12.513 +            }
  12.514 +            break;
  12.515 +     
  12.516 +        default:
  12.517 +            BUG();
  12.518 +        }
  12.519 +    }
  12.520 +    
  12.521 +    resp_cons = i;
  12.522 +
  12.523 +    kick_pending_request_queues();
  12.524 +
  12.525 +    spin_unlock_irqrestore(&io_request_lock, flags);
  12.526 +}
  12.527 +
  12.528 +
  12.529 +static void reset_xlblk_interface(void)
  12.530 +{
  12.531 +    block_io_op_t op; 
  12.532 +
  12.533 +    nr_pending = 0;
  12.534 +
  12.535 +    op.cmd = BLOCK_IO_OP_RESET;
  12.536 +    if ( HYPERVISOR_block_io_op(&op) != 0 )
  12.537 +        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
  12.538 +
  12.539 +    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
  12.540 +    (void)HYPERVISOR_block_io_op(&op);
  12.541 +
  12.542 +    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
  12.543 +    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
  12.544 +    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  12.545 +
  12.546 +    wmb();
  12.547 +    state = STATE_ACTIVE;
  12.548 +}
  12.549 +
  12.550 +
  12.551 +int __init xlblk_init(void)
  12.552 +{
  12.553 +    int error; 
  12.554 +
  12.555 +    reset_xlblk_interface();
  12.556 +
  12.557 +    xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
  12.558 +    xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
  12.559 +
  12.560 +    error = request_irq(xlblk_response_irq, xlblk_response_int, 
  12.561 +                        SA_SAMPLE_RANDOM, "blkdev", NULL);
  12.562 +    if ( error )
  12.563 +    {
  12.564 +        printk(KERN_ALERT "Could not allocate receive interrupt\n");
  12.565 +        goto fail;
  12.566 +    }
  12.567 +
  12.568 +    error = request_irq(xlblk_update_irq, xlblk_update_int,
  12.569 +                        0, "blkdev", NULL);
  12.570 +
  12.571 +    if ( error )
  12.572 +    {
  12.573 +        printk(KERN_ALERT "Could not allocate block update interrupt\n");
  12.574 +        goto fail;
  12.575 +    }
  12.576 +
  12.577 +    (void)xlvbd_init();
  12.578 +
  12.579 +    return 0;
  12.580 +
  12.581 + fail:
  12.582 +    return error;
  12.583 +}
  12.584 +
  12.585 +
  12.586 +static void __exit xlblk_cleanup(void)
  12.587 +{
  12.588 +    xlvbd_cleanup();
  12.589 +    free_irq(xlblk_response_irq, NULL);
  12.590 +    free_irq(xlblk_update_irq, NULL);
  12.591 +    unbind_virq_from_irq(VIRQ_BLKDEV);
  12.592 +    unbind_virq_from_irq(VIRQ_VBD_UPD);
  12.593 +}
  12.594 +
  12.595 +
  12.596 +#ifdef MODULE
  12.597 +module_init(xlblk_init);
  12.598 +module_exit(xlblk_cleanup);
  12.599 +#endif
  12.600 +
  12.601 +
  12.602 +void blkdev_suspend(void)
  12.603 +{
  12.604 +    state = STATE_SUSPENDED;
  12.605 +    wmb();
  12.606 +
  12.607 +    while ( resp_cons != blk_ring->req_prod )
  12.608 +    {
  12.609 +        barrier();
  12.610 +        current->state = TASK_INTERRUPTIBLE;
  12.611 +        schedule_timeout(1);
  12.612 +    }
  12.613 +
  12.614 +    wmb();
  12.615 +    state = STATE_CLOSED;
  12.616 +    wmb();
  12.617 +
  12.618 +    clear_fixmap(FIX_BLKRING_BASE);
  12.619 +}
  12.620 +
  12.621 +
  12.622 +void blkdev_resume(void)
  12.623 +{
  12.624 +    reset_xlblk_interface();
  12.625 +    spin_lock_irq(&io_request_lock);
  12.626 +    kick_pending_request_queues();
  12.627 +    spin_unlock_irq(&io_request_lock);
  12.628 +}
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h	Mon Apr 26 10:02:49 2004 +0000
    13.3 @@ -0,0 +1,82 @@
    13.4 +/******************************************************************************
    13.5 + * block.h
    13.6 + * 
    13.7 + * Shared definitions between all levels of XenoLinux Virtual block devices.
    13.8 + */
    13.9 +
   13.10 +#ifndef __XEN_DRIVERS_BLOCK_H__
   13.11 +#define __XEN_DRIVERS_BLOCK_H__
   13.12 +
   13.13 +#include <linux/config.h>
   13.14 +#include <linux/module.h>
   13.15 +
   13.16 +#include <linux/kernel.h>
   13.17 +#include <linux/sched.h>
   13.18 +#include <linux/slab.h>
   13.19 +#include <linux/string.h>
   13.20 +#include <linux/errno.h>
   13.21 +
   13.22 +#include <linux/fs.h>
   13.23 +#include <linux/hdreg.h>
   13.24 +#include <linux/blkdev.h>
   13.25 +#include <linux/major.h>
   13.26 +
   13.27 +#include <asm/hypervisor-ifs/hypervisor-if.h>
   13.28 +#include <asm/hypervisor-ifs/vbd.h>
   13.29 +#include <asm/io.h>
   13.30 +#include <asm/atomic.h>
   13.31 +#include <asm/uaccess.h>
   13.32 +
   13.33 +#if 0
   13.34 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   13.35 +#else
   13.36 +#define DPRINTK(_f, _a...) ((void)0)
   13.37 +#endif
   13.38 +
   13.39 +#if 0
   13.40 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   13.41 +#else
   13.42 +#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   13.43 +#endif
   13.44 +
   13.45 +/* Private gendisk->flags[] values. */
   13.46 +#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
   13.47 +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
   13.48 +
   13.49 +/*
   13.50 + * We have one of these per vbd, whether ide, scsi or 'other'.
   13.51 + * They hang in an array off the gendisk structure. We may end up putting
   13.52 + * all kinds of interesting stuff here :-)
   13.53 + */
   13.54 +typedef struct xl_disk {
   13.55 +    int usage;
   13.56 +} xl_disk_t;
   13.57 +
   13.58 +extern int xen_control_msg(int operration, char *buffer, int size);
   13.59 +extern int xen_block_open(struct inode *inode, struct file *filep);
   13.60 +extern int xen_block_release(struct inode *inode, struct file *filep);
   13.61 +extern int xen_block_ioctl(struct inode *inode, struct file *filep,
   13.62 +                                 unsigned command, unsigned long argument);
   13.63 +extern int xen_block_check(kdev_t dev);
   13.64 +extern int xen_block_revalidate(kdev_t dev);
   13.65 +extern void do_xlblk_request (request_queue_t *rq); 
   13.66 +
   13.67 +extern void xlvbd_update_vbds(void);
   13.68 +
   13.69 +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
   13.70 +{
   13.71 +    struct gendisk *gd = get_gendisk(xldev);
   13.72 +    
   13.73 +    if ( gd == NULL ) 
   13.74 +        return NULL;
   13.75 +    
   13.76 +    return (xl_disk_t *)gd->real_devices + 
   13.77 +        (MINOR(xldev) >> gd->minor_shift);
   13.78 +}
   13.79 +
   13.80 +
   13.81 +/* Virtual block-device subsystem. */
   13.82 +extern int  xlvbd_init(void);
   13.83 +extern void xlvbd_cleanup(void); 
   13.84 +
   13.85 +#endif /* __XEN_DRIVERS_BLOCK_H__ */
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c	Mon Apr 26 10:02:49 2004 +0000
    14.3 @@ -0,0 +1,561 @@
    14.4 +/******************************************************************************
    14.5 + * vbd.c
    14.6 + * 
    14.7 + * Xenolinux virtual block-device driver (xvd).
    14.8 + * 
    14.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   14.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   14.11 + */
   14.12 +
   14.13 +#include "block.h"
   14.14 +#include <linux/blk.h>
   14.15 +
   14.16 +/*
   14.17 + * For convenience we distinguish between ide, scsi and 'other' (i.e.
   14.18 + * potentially combinations of the two) in the naming scheme and in a few 
   14.19 + * other places (like default readahead, etc).
   14.20 + */
   14.21 +#define XLIDE_MAJOR_NAME  "hd"
   14.22 +#define XLSCSI_MAJOR_NAME "sd"
   14.23 +#define XLVBD_MAJOR_NAME "xvd"
   14.24 +
   14.25 +#define XLIDE_DEVS_PER_MAJOR   2
   14.26 +#define XLSCSI_DEVS_PER_MAJOR 16
   14.27 +#define XLVBD_DEVS_PER_MAJOR  16
   14.28 +
   14.29 +#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
   14.30 +#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
   14.31 +
   14.32 +#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
   14.33 +#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
   14.34 +
   14.35 +#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
   14.36 +#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
   14.37 +
   14.38 +/* The below are for the generic drivers/block/ll_rw_block.c code. */
   14.39 +static int xlide_blksize_size[256];
   14.40 +static int xlide_hardsect_size[256];
   14.41 +static int xlide_max_sectors[256];
   14.42 +static int xlscsi_blksize_size[256];
   14.43 +static int xlscsi_hardsect_size[256];
   14.44 +static int xlscsi_max_sectors[256];
   14.45 +static int xlvbd_blksize_size[256];
   14.46 +static int xlvbd_hardsect_size[256];
   14.47 +static int xlvbd_max_sectors[256];
   14.48 +
   14.49 +/* Information from Xen about our VBDs. */
   14.50 +#define MAX_VBDS 64
   14.51 +static int nr_vbds;
   14.52 +static xen_disk_t *vbd_info;
   14.53 +
   14.54 +static struct block_device_operations xlvbd_block_fops = 
   14.55 +{
   14.56 +    open:               xen_block_open,
   14.57 +    release:            xen_block_release,
   14.58 +    ioctl:              xen_block_ioctl,
   14.59 +    check_media_change: xen_block_check,
   14.60 +    revalidate:         xen_block_revalidate,
   14.61 +};
   14.62 +
   14.63 +static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
   14.64 +{
   14.65 +    int error;
   14.66 +    block_io_op_t op; 
   14.67 +
   14.68 +    /* Probe for disk information. */
   14.69 +    memset(&op, 0, sizeof(op)); 
   14.70 +    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
   14.71 +    op.u.probe_params.domain    = 0; 
   14.72 +    op.u.probe_params.xdi.max   = MAX_VBDS;
   14.73 +    op.u.probe_params.xdi.disks = disk_info;
   14.74 +    op.u.probe_params.xdi.count = 0;
   14.75 +
   14.76 +    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
   14.77 +    {
   14.78 +        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
   14.79 +        return -1;
   14.80 +    }
   14.81 +
   14.82 +    return op.u.probe_params.xdi.count;
   14.83 +}
   14.84 +
   14.85 +/*
   14.86 + * xlvbd_init_device - initialise a VBD device
   14.87 + * @disk:              a xen_disk_t describing the VBD
   14.88 + *
   14.89 + * Takes a xen_disk_t * that describes a VBD the domain has access to.
   14.90 + * Performs appropriate initialisation and registration of the device.
   14.91 + *
   14.92 + * Care needs to be taken when making re-entrant calls to ensure that
   14.93 + * corruption does not occur.  Also, devices that are in use should not have
   14.94 + * their details updated.  This is the caller's responsibility.
   14.95 + */
   14.96 +static int xlvbd_init_device(xen_disk_t *xd)
   14.97 +{
   14.98 +    int device = xd->device;
   14.99 +    int major  = MAJOR(device); 
  14.100 +    int minor  = MINOR(device);
  14.101 +    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
  14.102 +    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
  14.103 +    char *major_name;
  14.104 +    struct gendisk *gd;
  14.105 +    struct block_device *bd;
  14.106 +    xl_disk_t *disk;
  14.107 +    int i, rc = 0, max_part, partno;
  14.108 +    unsigned long capacity;
  14.109 +
  14.110 +    unsigned char buf[64];
  14.111 +
  14.112 +    if ( (bd = bdget(device)) == NULL )
  14.113 +        return -1;
  14.114 +
  14.115 +    /*
  14.116 +     * Update of partition info, and check of usage count, is protected
  14.117 +     * by the per-block-device semaphore.
  14.118 +     */
  14.119 +    down(&bd->bd_sem);
  14.120 +
  14.121 +    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
  14.122 +    {
  14.123 +        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
  14.124 +        rc = -1;
  14.125 +        goto out;
  14.126 +    }
  14.127 +
  14.128 +    if ( is_ide ) {
  14.129 +
  14.130 +	major_name = XLIDE_MAJOR_NAME; 
  14.131 +	max_part   = XLIDE_MAX_PART;
  14.132 +
  14.133 +    } else if ( is_scsi ) {
  14.134 +
  14.135 +	major_name = XLSCSI_MAJOR_NAME;
  14.136 +	max_part   = XLSCSI_MAX_PART;
  14.137 +
  14.138 +    } else if (XD_VIRTUAL(xd->info)) {
  14.139 +
  14.140 +	major_name = XLVBD_MAJOR_NAME;
  14.141 +	max_part   = XLVBD_MAX_PART;
  14.142 +
  14.143 +    } else { 
  14.144 +
  14.145 +        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
  14.146 +	printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
  14.147 +	       major, minor);
  14.148 +	is_scsi    = 1; 
  14.149 +	major_name = "cciss"; 
  14.150 +	max_part   = XLSCSI_MAX_PART;
  14.151 +
  14.152 +    }
  14.153 +    
  14.154 +    partno = minor & (max_part - 1); 
  14.155 +    
  14.156 +    if ( (gd = get_gendisk(device)) == NULL )
  14.157 +    {
  14.158 +        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
  14.159 +        if ( rc < 0 )
  14.160 +        {
  14.161 +            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
  14.162 +            goto out;
  14.163 +        }
  14.164 +
  14.165 +        if ( is_ide )
  14.166 +        { 
  14.167 +            blksize_size[major]  = xlide_blksize_size;
  14.168 +            hardsect_size[major] = xlide_hardsect_size;
  14.169 +            max_sectors[major]   = xlide_max_sectors;
  14.170 +            read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
  14.171 +        } 
  14.172 +        else if ( is_scsi )
  14.173 +        { 
  14.174 +            blksize_size[major]  = xlscsi_blksize_size;
  14.175 +            hardsect_size[major] = xlscsi_hardsect_size;
  14.176 +            max_sectors[major]   = xlscsi_max_sectors;
  14.177 +            read_ahead[major]    = 0; /* XXX 8; -- guessing */
  14.178 +        }
  14.179 +        else
  14.180 +        { 
  14.181 +            blksize_size[major]  = xlvbd_blksize_size;
  14.182 +            hardsect_size[major] = xlvbd_hardsect_size;
  14.183 +            max_sectors[major]   = xlvbd_max_sectors;
  14.184 +            read_ahead[major]    = 8;
  14.185 +        }
  14.186 +
  14.187 +        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
  14.188 +
  14.189 +        /*
  14.190 +         * Turn off barking 'headactive' mode. We dequeue buffer heads as
  14.191 +         * soon as we pass them down to Xen.
  14.192 +         */
  14.193 +        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
  14.194 +
  14.195 +        /* Construct an appropriate gendisk structure. */
  14.196 +        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
  14.197 +        gd->major      = major;
  14.198 +        gd->major_name = major_name; 
  14.199 +    
  14.200 +        gd->max_p      = max_part; 
  14.201 +        if ( is_ide )
  14.202 +        { 
  14.203 +            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
  14.204 +            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
  14.205 +        } 
  14.206 +        else if ( is_scsi )
  14.207 +        { 
  14.208 +            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
  14.209 +            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
  14.210 +        }
  14.211 +        else
  14.212 +        { 
  14.213 +            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
  14.214 +            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
  14.215 +        }
  14.216 +
  14.217 +        /* 
  14.218 +        ** The sizes[] and part[] arrays hold the sizes and other 
  14.219 +        ** information about every partition with this 'major' (i.e. 
  14.220 +        ** every disk sharing the 8 bit prefix * max partns per disk) 
  14.221 +        */
  14.222 +        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
  14.223 +        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
  14.224 +                            GFP_KERNEL);
  14.225 +        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
  14.226 +        memset(gd->part,  0, max_part * gd->nr_real 
  14.227 +               * sizeof(struct hd_struct));
  14.228 +
  14.229 +
  14.230 +        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
  14.231 +                                   GFP_KERNEL);
  14.232 +        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
  14.233 +
  14.234 +        gd->next   = NULL;            
  14.235 +        gd->fops   = &xlvbd_block_fops;
  14.236 +
  14.237 +        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
  14.238 +                             GFP_KERNEL);
  14.239 +        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
  14.240 +    
  14.241 +        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
  14.242 +        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
  14.243 +
  14.244 +        add_gendisk(gd);
  14.245 +
  14.246 +        blk_size[major] = gd->sizes;
  14.247 +    }
  14.248 +
  14.249 +    if ( XD_READONLY(xd->info) )
  14.250 +        set_device_ro(device, 1); 
  14.251 +
  14.252 +    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
  14.253 +
  14.254 +    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
  14.255 +    capacity = (unsigned long)xd->capacity;
  14.256 +
  14.257 +    if ( partno != 0 )
  14.258 +    {
  14.259 +        /*
  14.260 +         * If this was previously set up as a real disc we will have set 
  14.261 +         * up partition-table information. Virtual partitions override 
  14.262 +         * 'real' partitions, and the two cannot coexist on a device.
  14.263 +         */
  14.264 +        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  14.265 +             (gd->sizes[minor & ~(max_part-1)] != 0) )
  14.266 +        {
  14.267 +            /*
  14.268 +             * Any non-zero sub-partition entries must be cleaned out before
  14.269 +             * installing 'virtual' partition entries. The two types cannot
  14.270 +             * coexist, and virtual partitions are favoured.
  14.271 +             */
  14.272 +            kdev_t dev = device & ~(max_part-1);
  14.273 +            for ( i = max_part - 1; i > 0; i-- )
  14.274 +            {
  14.275 +                invalidate_device(dev+i, 1);
  14.276 +                gd->part[MINOR(dev+i)].start_sect = 0;
  14.277 +                gd->part[MINOR(dev+i)].nr_sects   = 0;
  14.278 +                gd->sizes[MINOR(dev+i)]           = 0;
  14.279 +            }
  14.280 +            printk(KERN_ALERT
  14.281 +                   "Virtual partitions found for /dev/%s - ignoring any "
  14.282 +                   "real partition information we may have found.\n",
  14.283 +                   disk_name(gd, MINOR(device), buf));
  14.284 +        }
  14.285 +
  14.286 +        /* Need to skankily setup 'partition' information */
  14.287 +        gd->part[minor].start_sect = 0; 
  14.288 +        gd->part[minor].nr_sects   = capacity; 
  14.289 +        gd->sizes[minor]           = capacity; 
  14.290 +
  14.291 +        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  14.292 +    }
  14.293 +    else
  14.294 +    {
  14.295 +        gd->part[minor].nr_sects = capacity;
  14.296 +        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
  14.297 +        
  14.298 +        /* Some final fix-ups depending on the device type */
  14.299 +        switch ( XD_TYPE(xd->info) )
  14.300 +        { 
  14.301 +        case XD_TYPE_CDROM:
  14.302 +        case XD_TYPE_FLOPPY: 
  14.303 +        case XD_TYPE_TAPE:
  14.304 +            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
  14.305 +            printk(KERN_ALERT 
  14.306 +                   "Skipping partition check on %s /dev/%s\n", 
  14.307 +                   XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : 
  14.308 +                   (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : 
  14.309 +                    "floppy"), disk_name(gd, MINOR(device), buf)); 
  14.310 +            break; 
  14.311 +
  14.312 +        case XD_TYPE_DISK:
  14.313 +            /* Only check partitions on real discs (not virtual!). */
  14.314 +            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  14.315 +            {
  14.316 +                printk(KERN_ALERT
  14.317 +                       "Skipping partition check on virtual /dev/%s\n",
  14.318 +                       disk_name(gd, MINOR(device), buf));
  14.319 +                break;
  14.320 +            }
  14.321 +            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
  14.322 +            break; 
  14.323 +
  14.324 +        default:
  14.325 +            printk(KERN_ALERT "XenoLinux: unknown device type %d\n", 
  14.326 +                   XD_TYPE(xd->info)); 
  14.327 +            break; 
  14.328 +        }
  14.329 +    }
  14.330 +
  14.331 + out:
  14.332 +    up(&bd->bd_sem);
  14.333 +    bdput(bd);    
  14.334 +    return rc;
  14.335 +}
  14.336 +
  14.337 +
  14.338 +/*
  14.339 + * xlvbd_remove_device - remove a device node if possible
  14.340 + * @device:       numeric device ID
  14.341 + *
  14.342 + * Updates the gendisk structure and invalidates devices.
  14.343 + *
  14.344 + * This is OK for now but in future, should perhaps consider where this should
  14.345 + * deallocate gendisks / unregister devices.
  14.346 + */
  14.347 +static int xlvbd_remove_device(int device)
  14.348 +{
  14.349 +    int i, rc = 0, minor = MINOR(device);
  14.350 +    struct gendisk *gd;
  14.351 +    struct block_device *bd;
  14.352 +    xl_disk_t *disk = NULL;
  14.353 +
  14.354 +    if ( (bd = bdget(device)) == NULL )
  14.355 +        return -1;
  14.356 +
  14.357 +    /*
  14.358 +     * Update of partition info, and check of usage count, is protected
  14.359 +     * by the per-block-device semaphore.
  14.360 +     */
  14.361 +    down(&bd->bd_sem);
  14.362 +
  14.363 +    if ( ((gd = get_gendisk(device)) == NULL) ||
  14.364 +         ((disk = xldev_to_xldisk(device)) == NULL) )
  14.365 +        BUG();
  14.366 +
  14.367 +    if ( disk->usage != 0 )
  14.368 +    {
  14.369 +        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
  14.370 +        rc = -1;
  14.371 +        goto out;
  14.372 +    }
  14.373 + 
  14.374 +    if ( (minor & (gd->max_p-1)) != 0 )
  14.375 +    {
  14.376 +        /* 1: The VBD is mapped to a partition rather than a whole unit. */
  14.377 +        invalidate_device(device, 1);
  14.378 +	gd->part[minor].start_sect = 0;
  14.379 +        gd->part[minor].nr_sects   = 0;
  14.380 +        gd->sizes[minor]           = 0;
  14.381 +
  14.382 +        /* Clear the consists-of-virtual-partitions flag if possible. */
  14.383 +        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
  14.384 +        for ( i = 1; i < gd->max_p; i++ )
  14.385 +            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
  14.386 +                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  14.387 +
  14.388 +        /*
  14.389 +         * If all virtual partitions are now gone, and a 'whole unit' VBD is
  14.390 +         * present, then we can try to grok the unit's real partition table.
  14.391 +         */
  14.392 +        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  14.393 +             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
  14.394 +             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
  14.395 +        {
  14.396 +            register_disk(gd,
  14.397 +                          device&~(gd->max_p-1), 
  14.398 +                          gd->max_p, 
  14.399 +                          &xlvbd_block_fops,
  14.400 +                          gd->part[minor&~(gd->max_p-1)].nr_sects);
  14.401 +        }
  14.402 +    }
  14.403 +    else
  14.404 +    {
  14.405 +        /*
  14.406 +         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
  14.407 +         * NB. The partition entries are only cleared if there are no VBDs
  14.408 +         * mapped to individual partitions on this unit.
  14.409 +         */
  14.410 +        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
  14.411 +        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  14.412 +            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
  14.413 +        while ( i >= 0 )
  14.414 +        {
  14.415 +            invalidate_device(device+i, 1);
  14.416 +            gd->part[minor+i].start_sect = 0;
  14.417 +            gd->part[minor+i].nr_sects   = 0;
  14.418 +            gd->sizes[minor+i]           = 0;
  14.419 +            i--;
  14.420 +        }
  14.421 +    }
  14.422 +
  14.423 + out:
  14.424 +    up(&bd->bd_sem);
  14.425 +    bdput(bd);
  14.426 +    return rc;
  14.427 +}
  14.428 +
  14.429 +/*
  14.430 + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
  14.431 + * state. The VBDs need to be updated in this way when the domain is
  14.432 + * initialised and also each time we receive an XLBLK_UPDATE event.
  14.433 + */
  14.434 +void xlvbd_update_vbds(void)
  14.435 +{
  14.436 +    int i, j, k, old_nr, new_nr;
  14.437 +    xen_disk_t *old_info, *new_info, *merged_info;
  14.438 +
  14.439 +    old_info = vbd_info;
  14.440 +    old_nr   = nr_vbds;
  14.441 +
  14.442 +    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  14.443 +    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  14.444 +    {
  14.445 +        kfree(new_info);
  14.446 +        return;
  14.447 +    }
  14.448 +
  14.449 +    /*
  14.450 +     * Final list maximum size is old list + new list. This occurs only when
  14.451 +     * old list and new list do not overlap at all, and we cannot yet destroy
  14.452 +     * VBDs in the old list because the usage counts are busy.
  14.453 +     */
  14.454 +    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
  14.455 +
  14.456 +    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  14.457 +    i = j = k = 0;
  14.458 +
  14.459 +    while ( (i < old_nr) && (j < new_nr) )
  14.460 +    {
  14.461 +        if ( old_info[i].device < new_info[j].device )
  14.462 +        {
  14.463 +            if ( xlvbd_remove_device(old_info[i].device) != 0 )
  14.464 +                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  14.465 +            i++;
  14.466 +        }
  14.467 +        else if ( old_info[i].device > new_info[j].device )
  14.468 +        {
  14.469 +            if ( xlvbd_init_device(&new_info[j]) == 0 )
  14.470 +                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  14.471 +            j++;
  14.472 +        }
  14.473 +        else
  14.474 +        {
  14.475 +            if ( ((old_info[i].capacity == new_info[j].capacity) &&
  14.476 +                  (old_info[i].info == new_info[j].info)) ||
  14.477 +                 (xlvbd_remove_device(old_info[i].device) != 0) )
  14.478 +                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  14.479 +            else if ( xlvbd_init_device(&new_info[j]) == 0 )
  14.480 +                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  14.481 +            i++; j++;
  14.482 +        }
  14.483 +    }
  14.484 +
  14.485 +    for ( ; i < old_nr; i++ )
  14.486 +    {
  14.487 +        if ( xlvbd_remove_device(old_info[i].device) != 0 )
  14.488 +            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  14.489 +    }
  14.490 +
  14.491 +    for ( ; j < new_nr; j++ )
  14.492 +    {
  14.493 +        if ( xlvbd_init_device(&new_info[j]) == 0 )
  14.494 +            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  14.495 +    }
  14.496 +
  14.497 +    vbd_info = merged_info;
  14.498 +    nr_vbds  = k;
  14.499 +
  14.500 +    kfree(old_info);
  14.501 +    kfree(new_info);
  14.502 +}
  14.503 +
  14.504 +
  14.505 +/*
  14.506 + * Set up all the linux device goop for the virtual block devices (vbd's) that 
  14.507 + * xen tells us about. Note that although from xen's pov VBDs are addressed 
  14.508 + * simply an opaque 16-bit device number, the domain creation tools 
  14.509 + * conventionally allocate these numbers to correspond to those used by 'real' 
  14.510 + * linux -- this is just for convenience as it means e.g. that the same 
  14.511 + * /etc/fstab can be used when booting with or without xen.
  14.512 + */
  14.513 +int __init xlvbd_init(void)
  14.514 +{
  14.515 +    int i;
  14.516 +    
  14.517 +    /*
  14.518 +     * If compiled as a module, we don't support unloading yet. We therefore 
  14.519 +     * permanently increment the reference count to disallow it.
  14.520 +     */
  14.521 +    SET_MODULE_OWNER(&xlvbd_block_fops);
  14.522 +    MOD_INC_USE_COUNT;
  14.523 +
  14.524 +    /* Initialize the global arrays. */
  14.525 +    for ( i = 0; i < 256; i++ ) 
  14.526 +    {
  14.527 +        /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
  14.528 +        xlide_blksize_size[i]  = 1024;
  14.529 +        xlide_hardsect_size[i] = 512;
  14.530 +        xlide_max_sectors[i]   = 128;  /* 'hwif->rqsize' if we knew it */
  14.531 +
  14.532 +        /* from the generic scsi disk code (drivers/scsi/sd.c) */
  14.533 +        xlscsi_blksize_size[i]  = 1024; /* XXX 512; */
  14.534 +        xlscsi_hardsect_size[i] = 512;
  14.535 +        xlscsi_max_sectors[i]   = 128*8; /* XXX 128; */
  14.536 +
  14.537 +        /* we don't really know what to set these too since it depends */
  14.538 +        xlvbd_blksize_size[i]  = 512;
  14.539 +        xlvbd_hardsect_size[i] = 512;
  14.540 +        xlvbd_max_sectors[i]   = 128;
  14.541 +    }
  14.542 +
  14.543 +    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  14.544 +    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  14.545 +
  14.546 +    if ( nr_vbds < 0 )
  14.547 +    {
  14.548 +        kfree(vbd_info);
  14.549 +        vbd_info = NULL;
  14.550 +        nr_vbds  = 0;
  14.551 +    }
  14.552 +    else
  14.553 +    {
  14.554 +        for ( i = 0; i < nr_vbds; i++ )
  14.555 +            xlvbd_init_device(&vbd_info[i]);
  14.556 +    }
  14.557 +
  14.558 +    return 0;
  14.559 +}
  14.560 +
  14.561 +
  14.562 +#ifdef MODULE
  14.563 +module_init(xlvbd_init);
  14.564 +#endif
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/Makefile	Mon Apr 26 10:02:49 2004 +0000
    15.3 @@ -0,0 +1,10 @@
    15.4 +
    15.5 +O_TARGET := drv.o
    15.6 +
    15.7 +subdir-y += frontend
    15.8 +obj-y    += frontend/drv.o
    15.9 +
   15.10 +subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
   15.11 +obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
   15.12 +
   15.13 +include $(TOPDIR)/Rules.make
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/Makefile	Mon Apr 26 10:02:49 2004 +0000
    16.3 @@ -0,0 +1,3 @@
    16.4 +O_TARGET := drv.o
    16.5 +obj-y := main.o
    16.6 +include $(TOPDIR)/Rules.make
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c	Mon Apr 26 10:02:49 2004 +0000
    17.3 @@ -0,0 +1,26 @@
    17.4 +/******************************************************************************
    17.5 + * arch/xen/drivers/netif/backend/main.c
    17.6 + * 
    17.7 + * Back-end of the driver for virtual block devices. This portion of the
    17.8 + * driver exports a 'unified' block-device interface that can be accessed
    17.9 + * by any operating system that implements a compatible front end. A 
   17.10 + * reference front-end implementation can be found in:
   17.11 + *  arch/xen/drivers/netif/frontend
   17.12 + * 
   17.13 + * Copyright (c) 2004, K A Fraser
   17.14 + */
   17.15 +
   17.16 +#include <linux/config.h>
   17.17 +#include <linux/module.h>
   17.18 +
   17.19 +static int __init init_module(void)
   17.20 +{
   17.21 +    return 0;
   17.22 +}
   17.23 +
   17.24 +static void cleanup_module(void)
   17.25 +{
   17.26 +}
   17.27 +
   17.28 +module_init(init_module);
   17.29 +module_exit(cleanup_module);
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/Makefile	Mon Apr 26 10:02:49 2004 +0000
    18.3 @@ -0,0 +1,3 @@
    18.4 +O_TARGET := drv.o
    18.5 +obj-y := main.o
    18.6 +include $(TOPDIR)/Rules.make
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c	Mon Apr 26 10:02:49 2004 +0000
    19.3 @@ -0,0 +1,565 @@
    19.4 +/******************************************************************************
    19.5 + * arch/xen/drivers/netif/frontend/main.c
    19.6 + * 
    19.7 + * Virtual network driver for XenoLinux.
    19.8 + * 
    19.9 + * Copyright (c) 2002-2004, K A Fraser
   19.10 + */
   19.11 +
   19.12 +#include <linux/config.h>
   19.13 +#include <linux/module.h>
   19.14 +
   19.15 +#include <linux/kernel.h>
   19.16 +#include <linux/sched.h>
   19.17 +#include <linux/slab.h>
   19.18 +#include <linux/string.h>
   19.19 +#include <linux/errno.h>
   19.20 +
   19.21 +#include <linux/netdevice.h>
   19.22 +#include <linux/inetdevice.h>
   19.23 +#include <linux/etherdevice.h>
   19.24 +#include <linux/skbuff.h>
   19.25 +#include <linux/init.h>
   19.26 +
   19.27 +#include <asm/io.h>
   19.28 +#include <net/sock.h>
   19.29 +#include <net/pkt_sched.h>
   19.30 +
   19.31 +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
   19.32 +
   19.33 +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
   19.34 +static void network_tx_buf_gc(struct net_device *dev);
   19.35 +static void network_alloc_rx_buffers(struct net_device *dev);
   19.36 +static void cleanup_module(void);
   19.37 +
   19.38 +/* Dynamically-mapped IRQs. */
   19.39 +static int network_irq, debug_irq;
   19.40 +
   19.41 +static struct list_head dev_list;
   19.42 +
   19.43 +struct net_private
   19.44 +{
   19.45 +    struct list_head list;
   19.46 +    struct net_device *dev;
   19.47 +
   19.48 +    struct net_device_stats stats;
   19.49 +    NET_RING_IDX rx_resp_cons, tx_resp_cons;
   19.50 +    unsigned int net_ring_fixmap_idx, tx_full;
   19.51 +    net_ring_t  *net_ring;
   19.52 +    net_idx_t   *net_idx;
   19.53 +    spinlock_t   tx_lock;
   19.54 +    unsigned int idx; /* Domain-specific index of this VIF. */
   19.55 +
   19.56 +    unsigned int rx_bufs_to_notify;
   19.57 +
   19.58 +#define STATE_ACTIVE    0
   19.59 +#define STATE_SUSPENDED 1
   19.60 +#define STATE_CLOSED    2
   19.61 +    unsigned int state;
   19.62 +
   19.63 +    /*
   19.64 +     * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
   19.65 +     * array is an index into a chain of free entries.
   19.66 +     */
   19.67 +    struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1];
   19.68 +    struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1];
   19.69 +};
   19.70 +
   19.71 +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
   19.72 +#define ADD_ID_TO_FREELIST(_list, _id)             \
   19.73 +    (_list)[(_id)] = (_list)[0];                   \
   19.74 +    (_list)[0]     = (void *)(unsigned long)(_id);
   19.75 +#define GET_ID_FROM_FREELIST(_list)                \
   19.76 + ({ unsigned long _id = (unsigned long)(_list)[0]; \
   19.77 +    (_list)[0]  = (_list)[_id];                    \
   19.78 +    (unsigned short)_id; })
   19.79 +
   19.80 +
   19.81 +static void _dbg_network_int(struct net_device *dev)
   19.82 +{
   19.83 +    struct net_private *np = dev->priv;
   19.84 +
   19.85 +    if ( np->state == STATE_CLOSED )
   19.86 +        return;
   19.87 +    
   19.88 +    printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
   19.89 +           " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
   19.90 +           " tx_event=0x%08x, state=%d\n",
   19.91 +           np->tx_full, np->tx_resp_cons, 
   19.92 +           np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, 
   19.93 +           np->net_idx->tx_event,
   19.94 +           test_bit(__LINK_STATE_XOFF, &dev->state));
   19.95 +    printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
   19.96 +           " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
   19.97 +           np->rx_resp_cons, np->net_idx->rx_req_prod,
   19.98 +           np->net_idx->rx_resp_prod, np->net_idx->rx_event);
   19.99 +}
  19.100 +
  19.101 +
  19.102 +static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
  19.103 +{
  19.104 +    struct list_head *ent;
  19.105 +    struct net_private *np;
  19.106 +    list_for_each ( ent, &dev_list )
  19.107 +    {
  19.108 +        np = list_entry(ent, struct net_private, list);
  19.109 +        _dbg_network_int(np->dev);
  19.110 +    }
  19.111 +}
  19.112 +
  19.113 +
  19.114 +static int network_open(struct net_device *dev)
  19.115 +{
  19.116 +    struct net_private *np = dev->priv;
  19.117 +    netop_t netop;
  19.118 +    int i, ret;
  19.119 +
  19.120 +    netop.cmd = NETOP_RESET_RINGS;
  19.121 +    netop.vif = np->idx;
  19.122 +    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  19.123 +    {
  19.124 +        printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
  19.125 +        return ret;
  19.126 +    }
  19.127 +
  19.128 +    netop.cmd = NETOP_GET_VIF_INFO;
  19.129 +    netop.vif = np->idx;
  19.130 +    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  19.131 +    {
  19.132 +        printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
  19.133 +        return ret;
  19.134 +    }
  19.135 +
  19.136 +    memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  19.137 +
  19.138 +    set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, 
  19.139 +               netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
  19.140 +    np->net_ring = (net_ring_t *)fix_to_virt(
  19.141 +        FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  19.142 +    np->net_idx  = &HYPERVISOR_shared_info->net_idx[np->idx];
  19.143 +
  19.144 +    np->rx_bufs_to_notify = 0;
  19.145 +    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
  19.146 +    memset(&np->stats, 0, sizeof(np->stats));
  19.147 +    spin_lock_init(&np->tx_lock);
  19.148 +    memset(np->net_ring, 0, sizeof(*np->net_ring));
  19.149 +    memset(np->net_idx, 0, sizeof(*np->net_idx));
  19.150 +
  19.151 +    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
  19.152 +    for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ )
  19.153 +        np->tx_skbs[i] = (void *)(i+1);
  19.154 +    for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ )
  19.155 +        np->rx_skbs[i] = (void *)(i+1);
  19.156 +
  19.157 +    wmb();
  19.158 +    np->state = STATE_ACTIVE;
  19.159 +
  19.160 +    network_alloc_rx_buffers(dev);
  19.161 +
  19.162 +    netif_start_queue(dev);
  19.163 +
  19.164 +    MOD_INC_USE_COUNT;
  19.165 +
  19.166 +    return 0;
  19.167 +}
  19.168 +
  19.169 +
  19.170 +static void network_tx_buf_gc(struct net_device *dev)
  19.171 +{
  19.172 +    NET_RING_IDX i, prod;
  19.173 +    unsigned short id;
  19.174 +    struct net_private *np = dev->priv;
  19.175 +    struct sk_buff *skb;
  19.176 +    tx_entry_t *tx_ring = np->net_ring->tx_ring;
  19.177 +
  19.178 +    do {
  19.179 +        prod = np->net_idx->tx_resp_prod;
  19.180 +
  19.181 +        for ( i = np->tx_resp_cons; i != prod; i++ )
  19.182 +        {
  19.183 +            id  = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
  19.184 +            skb = np->tx_skbs[id];
  19.185 +            ADD_ID_TO_FREELIST(np->tx_skbs, id);
  19.186 +            dev_kfree_skb_any(skb);
  19.187 +        }
  19.188 +        
  19.189 +        np->tx_resp_cons = prod;
  19.190 +        
  19.191 +        /*
  19.192 +         * Set a new event, then check for race with update of tx_cons. Note
  19.193 +         * that it is essential to schedule a callback, no matter how few
  19.194 +         * buffers are pending. Even if there is space in the transmit ring,
  19.195 +         * higher layers may be blocked because too much data is outstanding:
  19.196 +         * in such cases notification from Xen is likely to be the only kick
  19.197 +         * that we'll get.
  19.198 +         */
  19.199 +        np->net_idx->tx_event = 
  19.200 +            prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
  19.201 +        mb();
  19.202 +    }
  19.203 +    while ( prod != np->net_idx->tx_resp_prod );
  19.204 +
  19.205 +    if ( np->tx_full && 
  19.206 +         ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) )
  19.207 +    {
  19.208 +        np->tx_full = 0;
  19.209 +        if ( np->state == STATE_ACTIVE )
  19.210 +            netif_wake_queue(dev);
  19.211 +    }
  19.212 +}
  19.213 +
  19.214 +
  19.215 +static inline pte_t *get_ppte(void *addr)
  19.216 +{
  19.217 +    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
  19.218 +    pgd = pgd_offset_k(   (unsigned long)addr);
  19.219 +    pmd = pmd_offset(pgd, (unsigned long)addr);
  19.220 +    pte = pte_offset(pmd, (unsigned long)addr);
  19.221 +    return pte;
  19.222 +}
  19.223 +
  19.224 +
  19.225 +static void network_alloc_rx_buffers(struct net_device *dev)
  19.226 +{
  19.227 +    unsigned short id;
  19.228 +    struct net_private *np = dev->priv;
  19.229 +    struct sk_buff *skb;
  19.230 +    netop_t netop;
  19.231 +    NET_RING_IDX i = np->net_idx->rx_req_prod;
  19.232 +
  19.233 +    if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || 
  19.234 +         unlikely(np->state != STATE_ACTIVE) )
  19.235 +        return;
  19.236 +
  19.237 +    do {
  19.238 +        skb = dev_alloc_skb(RX_BUF_SIZE);
  19.239 +        if ( unlikely(skb == NULL) )
  19.240 +            break;
  19.241 +
  19.242 +        skb->dev = dev;
  19.243 +
  19.244 +        if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
  19.245 +            panic("alloc_skb needs to provide us page-aligned buffers.");
  19.246 +
  19.247 +        id = GET_ID_FROM_FREELIST(np->rx_skbs);
  19.248 +        np->rx_skbs[id] = skb;
  19.249 +
  19.250 +        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id   = id;
  19.251 +        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
  19.252 +            virt_to_machine(get_ppte(skb->head));
  19.253 +
  19.254 +        np->rx_bufs_to_notify++;
  19.255 +    }
  19.256 +    while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
  19.257 +
  19.258 +    /*
  19.259 +     * We may have allocated buffers which have entries outstanding in the page
  19.260 +     * update queue -- make sure we flush those first!
  19.261 +     */
  19.262 +    flush_page_update_queue();
  19.263 +
  19.264 +    np->net_idx->rx_req_prod = i;
  19.265 +    np->net_idx->rx_event    = np->rx_resp_cons + 1;
  19.266 +        
  19.267 +    /* Batch Xen notifications. */
  19.268 +    if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) )
  19.269 +    {
  19.270 +        netop.cmd = NETOP_PUSH_BUFFERS;
  19.271 +        netop.vif = np->idx;
  19.272 +        (void)HYPERVISOR_net_io_op(&netop);
  19.273 +        np->rx_bufs_to_notify = 0;
  19.274 +    }
  19.275 +}
  19.276 +
  19.277 +
  19.278 +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
  19.279 +{
  19.280 +    unsigned short id;
  19.281 +    struct net_private *np = (struct net_private *)dev->priv;
  19.282 +    tx_req_entry_t *tx;
  19.283 +    netop_t netop;
  19.284 +    NET_RING_IDX i;
  19.285 +
  19.286 +    if ( unlikely(np->tx_full) )
  19.287 +    {
  19.288 +        printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
  19.289 +        netif_stop_queue(dev);
  19.290 +        return -ENOBUFS;
  19.291 +    }
  19.292 +
  19.293 +    if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
  19.294 +                  PAGE_SIZE) )
  19.295 +    {
  19.296 +        struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
  19.297 +        if ( unlikely(new_skb == NULL) )
  19.298 +            return 1;
  19.299 +        skb_put(new_skb, skb->len);
  19.300 +        memcpy(new_skb->data, skb->data, skb->len);
  19.301 +        dev_kfree_skb(skb);
  19.302 +        skb = new_skb;
  19.303 +    }   
  19.304 +    
  19.305 +    spin_lock_irq(&np->tx_lock);
  19.306 +
  19.307 +    i = np->net_idx->tx_req_prod;
  19.308 +
  19.309 +    id = GET_ID_FROM_FREELIST(np->tx_skbs);
  19.310 +    np->tx_skbs[id] = skb;
  19.311 +
  19.312 +    tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
  19.313 +
  19.314 +    tx->id   = id;
  19.315 +    tx->addr = phys_to_machine(virt_to_phys(skb->data));
  19.316 +    tx->size = skb->len;
  19.317 +
  19.318 +    wmb();
  19.319 +    np->net_idx->tx_req_prod = i + 1;
  19.320 +
  19.321 +    network_tx_buf_gc(dev);
  19.322 +
  19.323 +    if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) )
  19.324 +    {
  19.325 +        np->tx_full = 1;
  19.326 +        netif_stop_queue(dev);
  19.327 +    }
  19.328 +
  19.329 +    spin_unlock_irq(&np->tx_lock);
  19.330 +
  19.331 +    np->stats.tx_bytes += skb->len;
  19.332 +    np->stats.tx_packets++;
  19.333 +
  19.334 +    /* Only notify Xen if there are no outstanding responses. */
  19.335 +    mb();
  19.336 +    if ( np->net_idx->tx_resp_prod == i )
  19.337 +    {
  19.338 +        netop.cmd = NETOP_PUSH_BUFFERS;
  19.339 +        netop.vif = np->idx;
  19.340 +        (void)HYPERVISOR_net_io_op(&netop);
  19.341 +    }
  19.342 +
  19.343 +    return 0;
  19.344 +}
  19.345 +
  19.346 +
  19.347 +static inline void _network_interrupt(struct net_device *dev)
  19.348 +{
  19.349 +    struct net_private *np = dev->priv;
  19.350 +    unsigned long flags;
  19.351 +    struct sk_buff *skb;
  19.352 +    rx_resp_entry_t *rx;
  19.353 +    NET_RING_IDX i;
  19.354 +
  19.355 +    if ( unlikely(np->state == STATE_CLOSED) )
  19.356 +        return;
  19.357 +    
  19.358 +    spin_lock_irqsave(&np->tx_lock, flags);
  19.359 +    network_tx_buf_gc(dev);
  19.360 +    spin_unlock_irqrestore(&np->tx_lock, flags);
  19.361 +
  19.362 + again:
  19.363 +    for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
  19.364 +    {
  19.365 +        rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
  19.366 +
  19.367 +        skb = np->rx_skbs[rx->id];
  19.368 +        ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
  19.369 +
  19.370 +        if ( unlikely(rx->status != RING_STATUS_OK) )
  19.371 +        {
  19.372 +            /* Gate this error. We get a (valid) slew of them on suspend. */
  19.373 +            if ( np->state == STATE_ACTIVE )
  19.374 +                printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
  19.375 +            dev_kfree_skb_any(skb);
  19.376 +            continue;
  19.377 +        }
  19.378 +
  19.379 +        /*
  19.380 +         * Set up shinfo -- from alloc_skb This was particularily nasty:  the
  19.381 +         * shared info is hidden at the back of the data area (presumably so it
  19.382 +         * can be shared), but on page flip it gets very spunked.
  19.383 +         */
  19.384 +        atomic_set(&(skb_shinfo(skb)->dataref), 1);
  19.385 +        skb_shinfo(skb)->nr_frags = 0;
  19.386 +        skb_shinfo(skb)->frag_list = NULL;
  19.387 +                                
  19.388 +        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
  19.389 +            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
  19.390 +
  19.391 +        skb->data = skb->tail = skb->head + rx->offset;
  19.392 +        skb_put(skb, rx->size);
  19.393 +        skb->protocol = eth_type_trans(skb, dev);
  19.394 +
  19.395 +        np->stats.rx_packets++;
  19.396 +
  19.397 +        np->stats.rx_bytes += rx->size;
  19.398 +        netif_rx(skb);
  19.399 +        dev->last_rx = jiffies;
  19.400 +    }
  19.401 +
  19.402 +    np->rx_resp_cons = i;
  19.403 +
  19.404 +    network_alloc_rx_buffers(dev);
  19.405 +    
  19.406 +    /* Deal with hypervisor racing our resetting of rx_event. */
  19.407 +    mb();
  19.408 +    if ( np->net_idx->rx_resp_prod != i )
  19.409 +        goto again;
  19.410 +}
  19.411 +
  19.412 +
  19.413 +static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
  19.414 +{
  19.415 +    struct list_head *ent;
  19.416 +    struct net_private *np;
  19.417 +    list_for_each ( ent, &dev_list )
  19.418 +    {
  19.419 +        np = list_entry(ent, struct net_private, list);
  19.420 +        _network_interrupt(np->dev);
  19.421 +    }
  19.422 +}
  19.423 +
  19.424 +
  19.425 +static int network_close(struct net_device *dev)
  19.426 +{
  19.427 +    struct net_private *np = dev->priv;
  19.428 +    netop_t netop;
  19.429 +
  19.430 +    np->state = STATE_SUSPENDED;
  19.431 +    wmb();
  19.432 +
  19.433 +    netif_stop_queue(np->dev);
  19.434 +
  19.435 +    netop.cmd = NETOP_FLUSH_BUFFERS;
  19.436 +    netop.vif = np->idx;
  19.437 +    (void)HYPERVISOR_net_io_op(&netop);
  19.438 +
  19.439 +    while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
  19.440 +            (np->tx_resp_cons != np->net_idx->tx_req_prod) )
  19.441 +    {
  19.442 +        barrier();
  19.443 +        current->state = TASK_INTERRUPTIBLE;
  19.444 +        schedule_timeout(1);
  19.445 +    }
  19.446 +
  19.447 +    wmb();
  19.448 +    np->state = STATE_CLOSED;
  19.449 +    wmb();
  19.450 +
  19.451 +    /* Now no longer safe to take interrupts for this device. */
  19.452 +    clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  19.453 +
  19.454 +    MOD_DEC_USE_COUNT;
  19.455 +
  19.456 +    return 0;
  19.457 +}
  19.458 +
  19.459 +
  19.460 +static struct net_device_stats *network_get_stats(struct net_device *dev)
  19.461 +{
  19.462 +    struct net_private *np = (struct net_private *)dev->priv;
  19.463 +    return &np->stats;
  19.464 +}
  19.465 +
  19.466 +
  19.467 +static int __init init_module(void)
  19.468 +{
  19.469 +#if 0
  19.470 +    int i, fixmap_idx=-1, err;
  19.471 +    struct net_device *dev;
  19.472 +    struct net_private *np;
  19.473 +    netop_t netop;
  19.474 +
  19.475 +    INIT_LIST_HEAD(&dev_list);
  19.476 +
  19.477 +    network_irq = bind_virq_to_irq(VIRQ_NET);
  19.478 +    debug_irq   = bind_virq_to_irq(VIRQ_DEBUG);
  19.479 +
  19.480 +    err = request_irq(network_irq, network_interrupt, 
  19.481 +                      SA_SAMPLE_RANDOM, "network", NULL);
  19.482 +    if ( err )
  19.483 +    {
  19.484 +        printk(KERN_WARNING "Could not allocate network interrupt\n");
  19.485 +        goto fail;
  19.486 +    }
  19.487 +    
  19.488 +    err = request_irq(debug_irq, dbg_network_int, 
  19.489 +                      SA_SHIRQ, "net_dbg", &dbg_network_int);
  19.490 +    if ( err )
  19.491 +        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
  19.492 +
  19.493 +    for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
  19.494 +    {
  19.495 +        /* If the VIF is invalid then the query hypercall will fail. */
  19.496 +        netop.cmd = NETOP_GET_VIF_INFO;
  19.497 +        netop.vif = i;
  19.498 +        if ( HYPERVISOR_net_io_op(&netop) != 0 )
  19.499 +            continue;
  19.500 +
  19.501 +        /* We actually only support up to 4 vifs right now. */
  19.502 +        if ( ++fixmap_idx == 4 )
  19.503 +            break;
  19.504 +
  19.505 +        dev = alloc_etherdev(sizeof(struct net_private));
  19.506 +        if ( dev == NULL )
  19.507 +        {
  19.508 +            err = -ENOMEM;
  19.509 +            goto fail;
  19.510 +        }
  19.511 +
  19.512 +        np = dev->priv;
  19.513 +        np->state               = STATE_CLOSED;
  19.514 +        np->net_ring_fixmap_idx = fixmap_idx;
  19.515 +        np->idx                 = i;
  19.516 +
  19.517 +        SET_MODULE_OWNER(dev);
  19.518 +        dev->open            = network_open;
  19.519 +        dev->hard_start_xmit = network_start_xmit;
  19.520 +        dev->stop            = network_close;
  19.521 +        dev->get_stats       = network_get_stats;
  19.522 +
  19.523 +        memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  19.524 +
  19.525 +        if ( (err = register_netdev(dev)) != 0 )
  19.526 +        {
  19.527 +            kfree(dev);
  19.528 +            goto fail;
  19.529 +        }
  19.530 +
  19.531 +        np->dev = dev;
  19.532 +        list_add(&np->list, &dev_list);
  19.533 +    }
  19.534 +
  19.535 +    return 0;
  19.536 +
  19.537 + fail:
  19.538 +    cleanup_module();
  19.539 +    return err;
  19.540 +#endif
  19.541 +    return 0;
  19.542 +}
  19.543 +
  19.544 +
  19.545 +static void cleanup_module(void)
  19.546 +{
  19.547 +    struct net_private *np;
  19.548 +    struct net_device *dev;
  19.549 +
  19.550 +    while ( !list_empty(&dev_list) )
  19.551 +    {
  19.552 +        np = list_entry(dev_list.next, struct net_private, list);
  19.553 +        list_del(&np->list);
  19.554 +        dev = np->dev;
  19.555 +        unregister_netdev(dev);
  19.556 +        kfree(dev);
  19.557 +    }
  19.558 +
  19.559 +    free_irq(network_irq, NULL);
  19.560 +    free_irq(debug_irq, NULL);
  19.561 +
  19.562 +    unbind_virq_from_irq(VIRQ_NET);
  19.563 +    unbind_virq_from_irq(VIRQ_DEBUG);
  19.564 +}
  19.565 +
  19.566 +
  19.567 +module_init(init_module);
  19.568 +module_exit(cleanup_module);
    20.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile	Sat Apr 24 08:10:58 2004 +0000
    20.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.3 @@ -1,10 +0,0 @@
    20.4 -
    20.5 -O_TARGET := drv.o
    20.6 -
    20.7 -subdir-y += frontend
    20.8 -obj-y    += frontend/drv.o
    20.9 -
   20.10 -subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
   20.11 -obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
   20.12 -
   20.13 -include $(TOPDIR)/Rules.make
    21.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile	Sat Apr 24 08:10:58 2004 +0000
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,3 +0,0 @@
    21.4 -O_TARGET := drv.o
    21.5 -obj-y := main.o control.o interface.o vbd.o
    21.6 -include $(TOPDIR)/Rules.make
    22.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h	Sat Apr 24 08:10:58 2004 +0000
    22.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.3 @@ -1,98 +0,0 @@
    22.4 -/******************************************************************************
    22.5 - * arch/xen/drivers/vblkif/backend/common.h
    22.6 - */
    22.7 -
    22.8 -#ifndef __VBLKIF__BACKEND__COMMON_H__
    22.9 -#define __VBLKIF__BACKEND__COMMON_H__
   22.10 -
   22.11 -#include <linux/config.h>
   22.12 -#include <linux/module.h>
   22.13 -#include <linux/rbtree.h>
   22.14 -#include <linux/interrupt.h>
   22.15 -#include <linux/slab.h>
   22.16 -#include <asm/ctrl_if.h>
   22.17 -#include <asm/io.h>
   22.18 -#include "../vblkif.h"
   22.19 -
   22.20 -#ifndef NDEBUG
   22.21 -#define ASSERT(_p) \
   22.22 -    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
   22.23 -    __LINE__, __FILE__); *(int*)0=0; }
   22.24 -#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \
   22.25 -                           __FILE__ , __LINE__ , ## _a )
   22.26 -#else
   22.27 -#define ASSERT(_p) ((void)0)
   22.28 -#define DPRINTK(_f, _a...) ((void)0)
   22.29 -#endif
   22.30 -
   22.31 -typedef struct blkif_st {
   22.32 -    /* Unique identifier for this interface. */
   22.33 -    domid_t          domid;
   22.34 -    unsigned int     handle;
   22.35 -    /* Physical parameters of the comms window. */
   22.36 -    unsigned long    shmem_frame;
   22.37 -    unsigned int     evtchn;
   22.38 -    int              irq;
   22.39 -    /* Comms information. */
   22.40 -    blk_ring_t      *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
   22.41 -    BLK_RING_IDX     blk_req_cons;  /* Request consumer. */
   22.42 -    BLK_RING_IDX     blk_resp_prod; /* Private version of response producer. */
   22.43 -    /* VBDs attached to this interface. */
   22.44 -    rb_root_t        vbd_rb;        /* Mapping from 16-bit vdevices to VBDs. */
   22.45 -    spinlock_t       vbd_lock;      /* Protects VBD mapping. */
   22.46 -    /* Private fields. */
   22.47 -    struct blkif_st *hash_next;
   22.48 -    struct list_head blkdev_list;
   22.49 -    spinlock_t       blk_ring_lock;
   22.50 -} blkif_t;
   22.51 -
   22.52 -void blkif_create(blkif_create_t *create);
   22.53 -void blkif_destroy(blkif_destroy_t *destroy);
   22.54 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
   22.55 -void blkif_get(blkif_t *blkif);
   22.56 -void blkif_put(blkif_t *blkif);
   22.57 -
   22.58 -/* An entry in a list of xen_extents. */
   22.59 -typedef struct _blkif_extent_le { 
   22.60 -    blkif_extent_t extent;               /* an individual extent */
   22.61 -    struct _blkif_extent_le *next;       /* and a pointer to the next */ 
   22.62 -} blkif_extent_le_t; 
   22.63 -
   22.64 -typedef struct _vbd { 
   22.65 -    blkif_vdev_t       vdevice;   /* what the domain refers to this vbd as */
   22.66 -    unsigned char      mode;      /* VBD_MODE_{R,W} */
   22.67 -    unsigned char      type;      /* XD_TYPE_xxx */
   22.68 -    blkif_extent_le_t *extents;   /* list of xen_extents making up this vbd */
   22.69 -    rb_node_t          rb;        /* for linking into R-B tree lookup struct */
   22.70 -} vbd_t; 
   22.71 -
   22.72 -long vbd_create(blkif_vbd_create_t *create_params); 
   22.73 -long vbd_grow(blkif_vbd_grow_t *grow_params); 
   22.74 -long vbd_shrink(blkif_vbd_shrink_t *shrink_params);
   22.75 -long vbd_destroy(blkif_vbd_destroy_t *delete_params); 
   22.76 -
   22.77 -void destroy_all_vbds(struct task_struct *p);
   22.78 -
   22.79 -typedef struct {
   22.80 -    blkif_t       *blkif;
   22.81 -    unsigned long  id;
   22.82 -    atomic_t       pendcnt;
   22.83 -    unsigned short operation;
   22.84 -    unsigned short status;
   22.85 -} pending_req_t;
   22.86 -
   22.87 -/* Describes a [partial] disk extent (part of a block io request) */
   22.88 -typedef struct {
   22.89 -    unsigned short dev;
   22.90 -    unsigned short nr_sects;
   22.91 -    unsigned long  buffer;
   22.92 -    xen_sector_t   sector_number;
   22.93 -} phys_seg_t;
   22.94 -
   22.95 -int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 
   22.96 -
   22.97 -int vblkif_be_controller_init(void);
   22.98 -
   22.99 -void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
  22.100 -
  22.101 -#endif /* __VBLKIF__BACKEND__COMMON_H__ */
    23.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c	Sat Apr 24 08:10:58 2004 +0000
    23.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.3 @@ -1,61 +0,0 @@
    23.4 -/******************************************************************************
    23.5 - * arch/xen/drivers/vblkif/backend/control.c
    23.6 - * 
    23.7 - * Routines for interfacing with the control plane.
    23.8 - * 
    23.9 - * Copyright (c) 2004, Keir Fraser
   23.10 - */
   23.11 -
   23.12 -#include "common.h"
   23.13 -
   23.14 -static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
   23.15 -{
   23.16 -    switch ( msg->subtype )
   23.17 -    {
   23.18 -    case CMSG_BLKIF_BE_CREATE:
   23.19 -        if ( msg->length != sizeof(blkif_create_t) )
   23.20 -            goto parse_error;
   23.21 -        blkif_create((blkif_create_t *)&msg->msg[0]);
   23.22 -        break;        
   23.23 -    case CMSG_BLKIF_BE_DESTROY:
   23.24 -        if ( msg->length != sizeof(blkif_destroy_t) )
   23.25 -            goto parse_error;
   23.26 -        blkif_destroy((blkif_destroy_t *)&msg->msg[0]);
   23.27 -        break;        
   23.28 -    case CMSG_BLKIF_BE_VBD_CREATE:
   23.29 -        if ( msg->length != sizeof(blkif_vbd_create_t) )
   23.30 -            goto parse_error;
   23.31 -        vbd_create((blkif_vbd_create_t *)&msg->msg[0]);
   23.32 -        break;
   23.33 -    case CMSG_BLKIF_BE_VBD_DESTROY:
   23.34 -        if ( msg->length != sizeof(blkif_vbd_destroy_t) )
   23.35 -            goto parse_error;
   23.36 -        vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]);
   23.37 -        break;
   23.38 -    case CMSG_BLKIF_BE_VBD_GROW:
   23.39 -        if ( msg->length != sizeof(blkif_vbd_grow_t) )
   23.40 -            goto parse_error;
   23.41 -        vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]);
   23.42 -        break;
   23.43 -    case CMSG_BLKIF_BE_VBD_SHRINK:
   23.44 -        if ( msg->length != sizeof(blkif_vbd_shrink_t) )
   23.45 -            goto parse_error;
   23.46 -        vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]);
   23.47 -        break;
   23.48 -    default:
   23.49 -        goto parse_error;
   23.50 -    }
   23.51 -
   23.52 -    ctrl_if_send_response(msg);
   23.53 -    return;
   23.54 -
   23.55 - parse_error:
   23.56 -    msg->length = 0;
   23.57 -    ctrl_if_send_response(msg);
   23.58 -}
   23.59 -
   23.60 -int blkif_ctrlif_init(void)
   23.61 -{
   23.62 -    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
   23.63 -    return 0;
   23.64 -}
    24.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c	Sat Apr 24 08:10:58 2004 +0000
    24.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.3 @@ -1,96 +0,0 @@
    24.4 -/******************************************************************************
    24.5 - * arch/xen/drivers/vblkif/backend/interface.c
    24.6 - * 
    24.7 - * Block-device interface management.
    24.8 - * 
    24.9 - * Copyright (c) 2004, Keir Fraser
   24.10 - */
   24.11 -
   24.12 -#include "common.h"
   24.13 -
   24.14 -#define BLKIF_HASHSZ 1024
   24.15 -#define BLKIF_HASH(_d,_h) \
   24.16 -    (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1))
   24.17 -
   24.18 -static blkif_t *blkif_hash[BLKIF_HASHSZ];
   24.19 -
   24.20 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
   24.21 -{
   24.22 -    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
   24.23 -    while ( (blkif != NULL) && 
   24.24 -            (blkif->domid != domid) && 
   24.25 -            (blkif->handle != handle) )
   24.26 -        blkif = blkif->hash_next;
   24.27 -    return blkif;
   24.28 -}
   24.29 -
   24.30 -void blkif_create(blkif_create_t *create)
   24.31 -{
   24.32 -    domid_t       domid  = create->domid;
   24.33 -    unsigned int  handle = create->blkif_handle;
   24.34 -    unsigned int  evtchn = create->evtchn;
   24.35 -    unsigned long shmem_frame = create->shmem_frame;
   24.36 -    blkif_t     **pblkif, *blkif;
   24.37 -
   24.38 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   24.39 -    while ( *pblkif == NULL )
   24.40 -    {
   24.41 -        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   24.42 -            goto found_match;
   24.43 -        pblkif = &(*pblkif)->hash_next;
   24.44 -    }
   24.45 -
   24.46 -    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
   24.47 -    memset(blkif, 0, sizeof(*blkif));
   24.48 -    blkif->domid       = domid;
   24.49 -    blkif->handle      = handle;
   24.50 -    blkif->evtchn      = evtchn;
   24.51 -    blkif->irq         = bind_evtchn_to_irq(evtchn);
   24.52 -    blkif->shmem_frame = shmem_frame;
   24.53 -    blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE);
   24.54 -    spin_lock_init(&blkif->vbd_lock);
   24.55 -    spin_lock_init(&blkif->blk_ring_lock);
   24.56 -
   24.57 -    request_irq(irq, vblkif_be_int, 0, "vblkif-backend", blkif);
   24.58 -
   24.59 -    blkif->hash_next = *pblkif;
   24.60 -    *pblkif = blkif;
   24.61 -
   24.62 -    create->status = BLKIF_STATUS_OKAY;
   24.63 -    return;
   24.64 -
   24.65 - found_match:
   24.66 -    create->status = BLKIF_STATUS_INTERFACE_EXISTS;
   24.67 -    return;
   24.68 -
   24.69 - evtchn_in_use:
   24.70 -    unbind_evtchn_from_irq(evtchn); /* drop refcnt */
   24.71 -    create->status = BLKIF_STATUS_ERROR;
   24.72 -    return;
   24.73 -}
   24.74 -
   24.75 -void blkif_destroy(blkif_destroy_t *destroy)
   24.76 -{
   24.77 -    domid_t       domid  = destroy->domid;
   24.78 -    unsigned int  handle = destroy->blkif_handle;
   24.79 -    blkif_t     **pblkif, *blkif;
   24.80 -
   24.81 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   24.82 -    while ( (blkif = *pblkif) == NULL )
   24.83 -    {
   24.84 -        if ( (blkif->domid == domid) && (blkif->handle == handle) )
   24.85 -            goto found_match;
   24.86 -        pblkif = &blkif->hash_next;
   24.87 -    }
   24.88 -
   24.89 -    destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   24.90 -    return;
   24.91 -
   24.92 - found_match:
   24.93 -    free_irq(blkif->irq, NULL);
   24.94 -    unbind_evtchn_from_irq(blkif->evtchn);
   24.95 -    *pblkif = blkif->hash_next;
   24.96 -    kmem_cache_free(blkif_cachep, blkif);
   24.97 -    destroy->status = BLKIF_STATUS_OKAY;
   24.98 -}
   24.99 -
    25.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c	Sat Apr 24 08:10:58 2004 +0000
    25.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.3 @@ -1,508 +0,0 @@
    25.4 -/******************************************************************************
    25.5 - * arch/xen/drivers/vblkif/backend/main.c
    25.6 - * 
    25.7 - * Back-end of the driver for virtual block devices. This portion of the
    25.8 - * driver exports a 'unified' block-device interface that can be accessed
    25.9 - * by any operating system that implements a compatible front end. A 
   25.10 - * reference front-end implementation can be found in:
   25.11 - *  arch/xen/drivers/vblkif/frontend
   25.12 - * 
   25.13 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   25.14 - */
   25.15 -
   25.16 -#include "common.h"
   25.17 -
   25.18 -/*
   25.19 - * These are rather arbitrary. They are fairly large because adjacent requests
   25.20 - * pulled from a communication ring are quite likely to end up being part of
   25.21 - * the same scatter/gather request at the disc.
   25.22 - * 
   25.23 - * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
   25.24 - * This will increase the chances of being able to write whole tracks.
   25.25 - * 64 should be enough to keep us competitive with Linux.
   25.26 - */
   25.27 -#define MAX_PENDING_REQS 64
   25.28 -#define BATCH_PER_DOMAIN 16
   25.29 -
   25.30 -/*
   25.31 - * Each outstanding request that we've passed to the lower device layers has a 
   25.32 - * 'pending_req' allocated to it. Each buffer_head that completes decrements 
   25.33 - * the pendcnt towards zero. When it hits zero, the specified domain has a 
   25.34 - * response queued for it, with the saved 'id' passed back.
   25.35 - * 
   25.36 - * We can't allocate pending_req's in order, since they may complete out of 
   25.37 - * order. We therefore maintain an allocation ring. This ring also indicates 
   25.38 - * when enough work has been passed down -- at that point the allocation ring 
   25.39 - * will be empty.
   25.40 - */
   25.41 -static pending_req_t pending_reqs[MAX_PENDING_REQS];
   25.42 -static unsigned char pending_ring[MAX_PENDING_REQS];
   25.43 -static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
   25.44 -/* NB. We use a different index type to differentiate from shared blk rings. */
   25.45 -typedef unsigned int PEND_RING_IDX;
   25.46 -#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
   25.47 -static PEND_RING_IDX pending_prod, pending_cons;
   25.48 -#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
   25.49 -
   25.50 -static kmem_cache_t *buffer_head_cachep;
   25.51 -
   25.52 -static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
   25.53 -
   25.54 -static int lock_buffer(blkif_t *blkif,
   25.55 -                       unsigned long buffer,
   25.56 -                       unsigned short size,
   25.57 -                       int writeable_buffer);
   25.58 -static void unlock_buffer(unsigned long buffer,
   25.59 -                          unsigned short size,
   25.60 -                          int writeable_buffer);
   25.61 -
   25.62 -static void io_schedule(unsigned long unused);
   25.63 -static int do_block_io_op(blkif_t *blkif, int max_to_do);
   25.64 -static void dispatch_rw_block_io(blkif_t *blkif,
   25.65 -                                 blk_ring_req_entry_t *req);
   25.66 -static void make_response(blkif_t *blkif, unsigned long id, 
   25.67 -                          unsigned short op, unsigned long st);
   25.68 -
   25.69 -
   25.70 -/******************************************************************
   25.71 - * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
   25.72 - */
   25.73 -
   25.74 -static struct list_head io_schedule_list;
   25.75 -static spinlock_t io_schedule_list_lock;
   25.76 -
   25.77 -static int __on_blkdev_list(blkif_t *blkif)
   25.78 -{
   25.79 -    return blkif->blkdev_list.next != NULL;
   25.80 -}
   25.81 -
   25.82 -static void remove_from_blkdev_list(blkif_t *blkif)
   25.83 -{
   25.84 -    unsigned long flags;
   25.85 -    if ( !__on_blkdev_list(blkif) ) return;
   25.86 -    spin_lock_irqsave(&io_schedule_list_lock, flags);
   25.87 -    if ( __on_blkdev_list(blkif) )
   25.88 -    {
   25.89 -        list_del(&blkif->blkdev_list);
   25.90 -        blkif->blkdev_list.next = NULL;
   25.91 -        blkif_put(blkif);
   25.92 -    }
   25.93 -    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
   25.94 -}
   25.95 -
   25.96 -static void add_to_blkdev_list_tail(blkif_t *blkif)
   25.97 -{
   25.98 -    unsigned long flags;
   25.99 -    if ( __on_blkdev_list(blkif) ) return;
  25.100 -    spin_lock_irqsave(&io_schedule_list_lock, flags);
  25.101 -    if ( !__on_blkdev_list(blkif) )
  25.102 -    {
  25.103 -        list_add_tail(&blkif->blkdev_list, &io_schedule_list);
  25.104 -        blkif_get(blkif);
  25.105 -    }
  25.106 -    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
  25.107 -}
  25.108 -
  25.109 -
  25.110 -/******************************************************************
  25.111 - * SCHEDULER FUNCTIONS
  25.112 - */
  25.113 -
  25.114 -static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
  25.115 -
  25.116 -static void io_schedule(unsigned long unused)
  25.117 -{
  25.118 -    blkif_t          *blkif;
  25.119 -    struct list_head *ent;
  25.120 -
  25.121 -    /* Queue up a batch of requests. */
  25.122 -    while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
  25.123 -            !list_empty(&io_schedule_list) )
  25.124 -    {
  25.125 -        ent = io_schedule_list.next;
  25.126 -        blkif = list_entry(ent, blkif_t, blkdev_list);
  25.127 -        blkif_get(blkif);
  25.128 -        remove_from_blkdev_list(blkif);
  25.129 -        if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
  25.130 -            add_to_blkdev_list_tail(blkif);
  25.131 -        blkif_put(blkif);
  25.132 -    }
  25.133 -
  25.134 -    /* Push the batch through to disc. */
  25.135 -    run_task_queue(&tq_disk);
  25.136 -}
  25.137 -
  25.138 -static void maybe_trigger_io_schedule(void)
  25.139 -{
  25.140 -    /*
  25.141 -     * Needed so that two processes, who together make the following predicate
  25.142 -     * true, don't both read stale values and evaluate the predicate
  25.143 -     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
  25.144 -     */
  25.145 -    smp_mb();
  25.146 -
  25.147 -    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
  25.148 -         !list_empty(&io_schedule_list) )
  25.149 -        tasklet_schedule(&io_schedule_tasklet);
  25.150 -}
  25.151 -
  25.152 -
  25.153 -
  25.154 -/******************************************************************
  25.155 - * COMPLETION CALLBACK -- Called as bh->b_end_io()
  25.156 - */
  25.157 -
  25.158 -static void end_block_io_op(struct buffer_head *bh, int uptodate)
  25.159 -{
  25.160 -    pending_req_t *pending_req = bh->b_private;
  25.161 -
  25.162 -    /* An error fails the entire request. */
  25.163 -    if ( !uptodate )
  25.164 -    {
  25.165 -        DPRINTK("Buffer not up-to-date at end of operation\n");
  25.166 -        pending_req->status = 2;
  25.167 -    }
  25.168 -
  25.169 -    unlock_buffer(virt_to_phys(bh->b_data), 
  25.170 -                  bh->b_size, 
  25.171 -                  (pending_req->operation==READ));
  25.172 -    
  25.173 -    if ( atomic_dec_and_test(&pending_req->pendcnt) )
  25.174 -    {
  25.175 -        make_response(pending_req->blkif, pending_req->id,
  25.176 -                      pending_req->operation, pending_req->status);
  25.177 -        blkif_put(pending_req->blkif);
  25.178 -        spin_lock(&pend_prod_lock);
  25.179 -        pending_ring[MASK_PEND_IDX(pending_prod)] = 
  25.180 -            pending_req - pending_reqs;
  25.181 -        pending_prod++;
  25.182 -        spin_unlock(&pend_prod_lock);
  25.183 -        maybe_trigger_io_schedule();
  25.184 -    }
  25.185 -}
  25.186 -
  25.187 -
  25.188 -
  25.189 -/******************************************************************************
  25.190 - * NOTIFICATION FROM GUEST OS.
  25.191 - */
  25.192 -
  25.193 -void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
  25.194 -{
  25.195 -    blkif_t *blkif = dev_id;
  25.196 -    add_to_blkdev_list_tail(blkif);
  25.197 -    maybe_trigger_io_schedule();
  25.198 -}
  25.199 -
  25.200 -
  25.201 -
  25.202 -/******************************************************************
  25.203 - * DOWNWARD CALLS -- These interface with the block-device layer proper.
  25.204 - */
  25.205 -
  25.206 -static int lock_buffer(blkif_t *blkif,
  25.207 -                       unsigned long buffer,
  25.208 -                       unsigned short size,
  25.209 -                       int writeable_buffer)
  25.210 -{
  25.211 -    unsigned long    pfn;
  25.212 -
  25.213 -    for ( pfn = buffer >> PAGE_SHIFT; 
  25.214 -          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
  25.215 -          pfn++ )
  25.216 -    {
  25.217 -    }
  25.218 -
  25.219 -    return 1;
  25.220 -
  25.221 - fail:
  25.222 -    while ( pfn-- > (buffer >> PAGE_SHIFT) )
  25.223 -    {        
  25.224 -    }
  25.225 -    return 0;
  25.226 -}
  25.227 -
  25.228 -static void unlock_buffer(unsigned long buffer,
  25.229 -                          unsigned short size,
  25.230 -                          int writeable_buffer)
  25.231 -{
  25.232 -    unsigned long pfn;
  25.233 -
  25.234 -    for ( pfn = buffer >> PAGE_SHIFT; 
  25.235 -          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
  25.236 -          pfn++ )
  25.237 -    {
  25.238 -    }
  25.239 -}
  25.240 -
  25.241 -static int do_block_io_op(blkif_t *blkif, int max_to_do)
  25.242 -{
  25.243 -    blk_ring_t *blk_ring = blkif->blk_ring_base;
  25.244 -    blk_ring_req_entry_t *req;
  25.245 -    BLK_RING_IDX i;
  25.246 -    int more_to_do = 0;
  25.247 -
  25.248 -    /* Take items off the comms ring, taking care not to overflow. */
  25.249 -    for ( i = blkif->blk_req_cons; 
  25.250 -          (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != 
  25.251 -                                        BLK_RING_SIZE);
  25.252 -          i++ )
  25.253 -    {
  25.254 -        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
  25.255 -        {
  25.256 -            more_to_do = 1;
  25.257 -            break;
  25.258 -        }
  25.259 -        
  25.260 -        req = &blk_ring->ring[MASK_BLK_IDX(i)].req;
  25.261 -        switch ( req->operation )
  25.262 -        {
  25.263 -        case BLKIF_OP_READ:
  25.264 -        case BLKIF_OP_WRITE:
  25.265 -            dispatch_rw_block_io(blkif, req);
  25.266 -            break;
  25.267 -
  25.268 -        default:
  25.269 -            DPRINTK("error: unknown block io operation [%d]\n",
  25.270 -                    blk_ring->ring[i].req.operation);
  25.271 -            make_response(blkif, blk_ring->ring[i].req.id, 
  25.272 -                          blk_ring->ring[i].req.operation, 1);
  25.273 -            break;
  25.274 -        }
  25.275 -    }
  25.276 -
  25.277 -    blkif->blk_req_cons = i;
  25.278 -    return more_to_do;
  25.279 -}
  25.280 -
  25.281 -static void dispatch_rw_block_io(blkif_t *blkif,
  25.282 -                                 blk_ring_req_entry_t *req)
  25.283 -{
  25.284 -    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
  25.285 -    struct buffer_head *bh;
  25.286 -    int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
  25.287 -    unsigned short nr_sects;
  25.288 -    unsigned long buffer;
  25.289 -    int i, tot_sects;
  25.290 -    pending_req_t *pending_req;
  25.291 -
  25.292 -    /* We map virtual scatter/gather segments to physical segments. */
  25.293 -    int new_segs, nr_psegs = 0;
  25.294 -    phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
  25.295 -
  25.296 -    /* Check that number of segments is sane. */
  25.297 -    if ( unlikely(req->nr_segments == 0) || 
  25.298 -         unlikely(req->nr_segments > MAX_BLK_SEGS) )
  25.299 -    {
  25.300 -        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
  25.301 -        goto bad_descriptor;
  25.302 -    }
  25.303 -
  25.304 -    /*
  25.305 -     * Check each address/size pair is sane, and convert into a
  25.306 -     * physical device and block offset. Note that if the offset and size
  25.307 -     * crosses a virtual extent boundary, we may end up with more
  25.308 -     * physical scatter/gather segments than virtual segments.
  25.309 -     */
  25.310 -    for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
  25.311 -    {
  25.312 -        buffer   = req->buffer_and_sects[i] & ~0x1FF;
  25.313 -        nr_sects = req->buffer_and_sects[i] &  0x1FF;
  25.314 -
  25.315 -        if ( unlikely(nr_sects == 0) )
  25.316 -        {
  25.317 -            DPRINTK("zero-sized data request\n");
  25.318 -            goto bad_descriptor;
  25.319 -        }
  25.320 -
  25.321 -        phys_seg[nr_psegs].dev           = req->device;
  25.322 -        phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
  25.323 -        phys_seg[nr_psegs].buffer        = buffer;
  25.324 -        phys_seg[nr_psegs].nr_sects      = nr_sects;
  25.325 -
  25.326 -        /* Translate the request into the relevant 'physical device' */
  25.327 -        new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
  25.328 -        if ( new_segs < 0 )
  25.329 -        { 
  25.330 -            DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
  25.331 -                    operation == READ ? "read" : "write", 
  25.332 -                    req->sector_number + tot_sects, 
  25.333 -                    req->sector_number + tot_sects + nr_sects, 
  25.334 -                    req->device); 
  25.335 -            goto bad_descriptor;
  25.336 -        }
  25.337 -  
  25.338 -        nr_psegs += new_segs;
  25.339 -        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
  25.340 -    }
  25.341 -
  25.342 -    for ( i = 0; i < nr_psegs; i++ )
  25.343 -    {
  25.344 -        if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, 
  25.345 -                                   phys_seg[i].nr_sects << 9,
  25.346 -                                   operation==READ)) )
  25.347 -        {
  25.348 -            DPRINTK("invalid buffer\n");
  25.349 -            while ( i-- > 0 )
  25.350 -                unlock_buffer(phys_seg[i].buffer, 
  25.351 -                              phys_seg[i].nr_sects << 9,
  25.352 -                              operation==READ);
  25.353 -            goto bad_descriptor;
  25.354 -        }
  25.355 -    }
  25.356 -
  25.357 -    pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
  25.358 -    pending_req->blkif     = blkif;
  25.359 -    pending_req->id        = req->id;
  25.360 -    pending_req->operation = operation;
  25.361 -    pending_req->status    = 0;
  25.362 -    atomic_set(&pending_req->pendcnt, nr_psegs);
  25.363 -
  25.364 -    blkif_get(blkif);
  25.365 -
  25.366 -    /* Now we pass each segment down to the real blkdev layer. */
  25.367 -    for ( i = 0; i < nr_psegs; i++ )
  25.368 -    {
  25.369 -        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
  25.370 -        if ( unlikely(bh == NULL) )
  25.371 -            panic("bh is null\n");
  25.372 -        memset(bh, 0, sizeof (struct buffer_head));
  25.373 -    
  25.374 -        bh->b_size          = phys_seg[i].nr_sects << 9;
  25.375 -        bh->b_dev           = phys_seg[i].dev;
  25.376 -        bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
  25.377 -
  25.378 -        /* SMH: we store a 'pseudo-virtual' bogus address in b_data since
  25.379 -           later code will undo this transformation (i.e. +-PAGE_OFFSET). */
  25.380 -        bh->b_data          = phys_to_virt(phys_seg[i].buffer);
  25.381 - 
  25.382 -        /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
  25.383 -        bh->b_page          = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 
  25.384 -        bh->b_end_io        = end_block_io_op;
  25.385 -        bh->b_private       = pending_req;
  25.386 -
  25.387 -        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
  25.388 -        if ( operation == WRITE )
  25.389 -            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
  25.390 -
  25.391 -        atomic_set(&bh->b_count, 1);
  25.392 -
  25.393 -        /* Dispatch a single request. We'll flush it to disc later. */
  25.394 -        submit_bh(operation, bh);
  25.395 -    }
  25.396 -
  25.397 -    return;
  25.398 -
  25.399 - bad_descriptor:
  25.400 -    make_response(blkif, req->id, req->operation, 1);
  25.401 -} 
  25.402 -
  25.403 -
  25.404 -
  25.405 -/******************************************************************
  25.406 - * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
  25.407 - */
  25.408 -
  25.409 -
  25.410 -static void make_response(blkif_t *blkif, unsigned long id, 
  25.411 -                          unsigned short op, unsigned long st)
  25.412 -{
  25.413 -    blk_ring_resp_entry_t *resp;
  25.414 -
  25.415 -    /* Place on the response ring for the relevant domain. */ 
  25.416 -    spin_lock(&blkif->blk_ring_lock);
  25.417 -    resp = &blkif->blk_ring_base->
  25.418 -        ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp;
  25.419 -    resp->id        = id;
  25.420 -    resp->operation = op;
  25.421 -    resp->status    = st;
  25.422 -    wmb();
  25.423 -    blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
  25.424 -    spin_unlock(&blkif->blk_ring_lock);
  25.425 -
  25.426 -    /* Kick the relevant domain. */
  25.427 -    notify_via_evtchn(blkif->evtchn);
  25.428 -}
  25.429 -
  25.430 -static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs)
  25.431 -{
  25.432 -#if 0
  25.433 -    unsigned long flags;
  25.434 -    struct task_struct *p;
  25.435 -    blk_ring_t *blk_ring;
  25.436 -    int i;
  25.437 -
  25.438 -    printk("Dumping block queue stats: nr_pending = %d"
  25.439 -           " (prod=0x%08x,cons=0x%08x)\n",
  25.440 -           NR_PENDING_REQS, pending_prod, pending_cons);
  25.441 -
  25.442 -    read_lock_irqsave(&tasklist_lock, flags);
  25.443 -    for_each_domain ( p )
  25.444 -    {
  25.445 -        printk("Domain: %llu\n", blkif->domain);
  25.446 -        blk_ring = blkif->blk_ring_base;
  25.447 -        printk("  req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
  25.448 -               "0x%08x on_list=%d\n",
  25.449 -               blk_ring->req_prod, blkif->blk_req_cons,
  25.450 -               blk_ring->resp_prod, blkif->blk_resp_prod,
  25.451 -               __on_blkdev_list(p));
  25.452 -    }
  25.453 -    read_unlock_irqrestore(&tasklist_lock, flags);
  25.454 -
  25.455 -    for ( i = 0; i < MAX_PENDING_REQS; i++ )
  25.456 -    {
  25.457 -        printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n",
  25.458 -               i, pending_reqs[i].domain, pending_reqs[i].id,
  25.459 -               atomic_read(&pending_reqs[i].pendcnt), 
  25.460 -               pending_reqs[i].operation, pending_reqs[i].status);
  25.461 -    }
  25.462 -#endif
  25.463 -}
  25.464 -
  25.465 -void unlink_blkdev_info(blkif_t *blkif)
  25.466 -{
  25.467 -    unsigned long flags;
  25.468 -
  25.469 -    spin_lock_irqsave(&io_schedule_list_lock, flags);
  25.470 -    if ( __on_blkdev_list(blkif) )
  25.471 -    {
  25.472 -        list_del(&blkif->blkdev_list);
  25.473 -        blkif->blkdev_list.next = (void *)0xdeadbeef;
  25.474 -        blkif_put(blkif);
  25.475 -    }
  25.476 -    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
  25.477 -}
  25.478 -
  25.479 -static int __init init_module(void)
  25.480 -{
  25.481 -    int i;
  25.482 -
  25.483 -    pending_cons = 0;
  25.484 -    pending_prod = MAX_PENDING_REQS;
  25.485 -    memset(pending_reqs, 0, sizeof(pending_reqs));
  25.486 -    for ( i = 0; i < MAX_PENDING_REQS; i++ )
  25.487 -        pending_ring[i] = i;
  25.488 -    
  25.489 -    for ( i = 0; i < NR_CPUS; i++ )
  25.490 -        completed_bhs[i] = NULL;
  25.491 -        
  25.492 -    spin_lock_init(&io_schedule_list_lock);
  25.493 -    INIT_LIST_HEAD(&io_schedule_list);
  25.494 -
  25.495 -    if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, 
  25.496 -                     SA_SHIRQ, "vblkif-backend-dbg", &blkif_debug_int) != 0 )
  25.497 -        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
  25.498 -
  25.499 -    buffer_head_cachep = kmem_cache_create(
  25.500 -        "buffer_head_cache", sizeof(struct buffer_head),
  25.501 -        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
  25.502 -
  25.503 -    return 0;
  25.504 -}
  25.505 -
  25.506 -static void cleanup_module(void)
  25.507 -{
  25.508 -}
  25.509 -
  25.510 -module_init(init_module);
  25.511 -module_exit(cleanup_module);
    26.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c	Sat Apr 24 08:10:58 2004 +0000
    26.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.3 @@ -1,578 +0,0 @@
    26.4 -/******************************************************************************
    26.5 - * arch/xen/drivers/vblkif/backend/vbd.c
    26.6 - * 
    26.7 - * Routines for managing virtual block devices (VBDs).
    26.8 - * 
    26.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   26.10 - */
   26.11 -
   26.12 -#include "common.h"
   26.13 -
   26.14 -void vbd_create(blkif_vbd_create_t *create) 
   26.15 -{
   26.16 -    vbd_t       *vbd; 
   26.17 -    rb_node_t  **rb_p, *rb_parent = NULL;
   26.18 -    blkif_t     *blkif;
   26.19 -    blkif_vdev_t vdevice = create->vdevice;
   26.20 -
   26.21 -    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
   26.22 -    if ( unlikely(blkif == NULL) )
   26.23 -    {
   26.24 -        DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", 
   26.25 -                create->domid, create->blkif_handle); 
   26.26 -        create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   26.27 -        return;
   26.28 -    }
   26.29 -
   26.30 -    spin_lock(&blkif->vbd_lock);
   26.31 -
   26.32 -    rb_p = &blkif->vbd_rb.rb_node;
   26.33 -    while ( *rb_p != NULL )
   26.34 -    {
   26.35 -        rb_parent = *rb_p;
   26.36 -        vbd = rb_entry(rb_parent, vbd_t, rb);
   26.37 -        if ( vdevice < vbd->vdevice )
   26.38 -        {
   26.39 -            rb_p = &rb_parent->rb_left;
   26.40 -        }
   26.41 -        else if ( vdevice > vbd->vdevice )
   26.42 -        {
   26.43 -            rb_p = &rb_parent->rb_right;
   26.44 -        }
   26.45 -        else
   26.46 -        {
   26.47 -            DPRINTK("vbd_create attempted for already existing vbd\n");
   26.48 -            create->status = BLKIF_STATUS_VBD_EXISTS;
   26.49 -            goto out;
   26.50 -        }
   26.51 -    }
   26.52 -
   26.53 -    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
   26.54 -    {
   26.55 -        DPRINTK("vbd_create: out of memory\n");
   26.56 -        create->status = BLKIF_STATUS_OUT_OF_MEMORY;
   26.57 -        goto out;
   26.58 -    }
   26.59 -
   26.60 -    vbd->vdevice = vdevice; 
   26.61 -    vbd->mode    = create->mode; 
   26.62 -    vbd->type    = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   26.63 -    vbd->extents = NULL; 
   26.64 -
   26.65 -    rb_link_node(&vbd->rb, rb_parent, rb_p);
   26.66 -    rb_insert_color(&vbd->rb, &blkif->vbd_rb);
   26.67 -
   26.68 -    create->status = BLKIF_STATUS_OKAY;
   26.69 -
   26.70 - out:
   26.71 -    spin_unlock(&blkif->vbd_lock);
   26.72 -    blkif_put(blkif);
   26.73 -}
   26.74 -
   26.75 -
   26.76 -/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
   26.77 -void vbd_grow(blkif_vbd_grow_t *grow) 
   26.78 -{
   26.79 -    blkif_t          *blkif;
   26.80 -    xen_extent_le_t **px, *x; 
   26.81 -    vbd_t            *vbd = NULL;
   26.82 -    rb_node_t        *rb;
   26.83 -    blkif_vdev_t      vdevice = grow->vdevice;
   26.84 -
   26.85 -    blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
   26.86 -    if ( unlikely(blkif == NULL) )
   26.87 -    {
   26.88 -        DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", 
   26.89 -                grow->domid, grow->blkif_handle); 
   26.90 -        grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   26.91 -        return;
   26.92 -    }
   26.93 -
   26.94 -    spin_lock(&blkif->vbd_lock);
   26.95 -
   26.96 -    rb = blkif->vbd_rb.rb_node;
   26.97 -    while ( rb != NULL )
   26.98 -    {
   26.99 -        vbd = rb_entry(rb, vbd_t, rb);
  26.100 -        if ( vdevice < vbd->vdevice )
  26.101 -            rb = rb->rb_left;
  26.102 -        else if ( vdevice > vbd->vdevice )
  26.103 -            rb = rb->rb_right;
  26.104 -        else
  26.105 -            break;
  26.106 -    }
  26.107 -
  26.108 -    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
  26.109 -    {
  26.110 -        DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
  26.111 -        grow->status = BLKIF_STATUS_VBD_NOT_FOUND;
  26.112 -        goto out;
  26.113 -    } 
  26.114 -
  26.115 -    if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
  26.116 -    {
  26.117 -        DPRINTK("vbd_grow: out of memory\n");
  26.118 -        grow->status = BLKIF_STATUS_OUT_OF_MEMORY;
  26.119 -        goto out;
  26.120 -    }
  26.121 - 
  26.122 -    x->extent.device        = grow->extent.device; 
  26.123 -    x->extent.sector_start  = grow->extent.sector_start; 
  26.124 -    x->extent.sector_length = grow->extent.sector_length; 
  26.125 -    x->next                 = (xen_extent_le_t *)NULL; 
  26.126 -
  26.127 -    for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 
  26.128 -        continue;
  26.129 -
  26.130 -    *px = x;
  26.131 -
  26.132 -    grow->status = BLKIF_STATUS_OKAY;
  26.133 -
  26.134 - out:
  26.135 -    spin_unlock(&blkif->vbd_lock);
  26.136 -    blkif_put(blkif);
  26.137 -}
  26.138 -
  26.139 -
  26.140 -void vbd_shrink(blkif_vbd_shrink_t *shrink)
  26.141 -{
  26.142 -    blkif_t          *blkif;
  26.143 -    xen_extent_le_t **px, *x; 
  26.144 -    vbd_t            *vbd = NULL;
  26.145 -    rb_node_t        *rb;
  26.146 -    blkif_vdev_t      vdevice = shrink->vdevice;
  26.147 -
  26.148 -    blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
  26.149 -    if ( unlikely(blkif == NULL) )
  26.150 -    {
  26.151 -        DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", 
  26.152 -                shrink->domid, shrink->blkif_handle); 
  26.153 -        shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
  26.154 -        return;
  26.155 -    }
  26.156 -
  26.157 -    spin_lock(&blkif->vbd_lock);
  26.158 -
  26.159 -    rb = blkif->vbd_rb.rb_node;
  26.160 -    while ( rb != NULL )
  26.161 -    {
  26.162 -        vbd = rb_entry(rb, vbd_t, rb);
  26.163 -        if ( vdevice < vbd->vdevice )
  26.164 -            rb = rb->rb_left;
  26.165 -        else if ( vdevice > vbd->vdevice )
  26.166 -            rb = rb->rb_right;
  26.167 -        else
  26.168 -            break;
  26.169 -    }
  26.170 -
  26.171 -    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
  26.172 -    {
  26.173 -        shrink->status = BLKIF_STATUS_VBD_NOT_FOUND;
  26.174 -        goto out;
  26.175 -    }
  26.176 -
  26.177 -    if ( unlikely(vbd->extents == NULL) )
  26.178 -    {
  26.179 -        shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND;
  26.180 -        goto out;
  26.181 -    }
  26.182 -
  26.183 -    /* Find the last extent. We now know that there is at least one. */
  26.184 -    for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
  26.185 -        continue;
  26.186 -
  26.187 -    x   = *px;
  26.188 -    *px = x->next;
  26.189 -    kfree(x);
  26.190 -
  26.191 -    shrink->status = BLKIF_STATUS_OKAY;
  26.192 -
  26.193 - out:
  26.194 -    spin_unlock(&blkif->vbd_lock);
  26.195 -    blkif_put(blkif);
  26.196 -}
  26.197 -
  26.198 -
  26.199 -void vbd_destroy(blkif_vbd_destroy_t *destroy) 
  26.200 -{
  26.201 -    blkif_t         *blkif;
  26.202 -    vbd_t           *vbd;
  26.203 -    rb_node_t       *rb;
  26.204 -    xen_extent_le_t *x, *t;
  26.205 -    blkif_vdev_t     vdevice = destroy->vdevice;
  26.206 -
  26.207 -    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
  26.208 -    if ( unlikely(blkif == NULL) )
  26.209 -    {
  26.210 -        DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", 
  26.211 -                destroy->domid, destroy->blkif_handle); 
  26.212 -        destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
  26.213 -        return;
  26.214 -    }
  26.215 -
  26.216 -    spin_lock(&blkif->vbd_lock);
  26.217 -
  26.218 -    rb = blkif->vbd_rb.rb_node;
  26.219 -    while ( rb != NULL )
  26.220 -    {
  26.221 -        vbd = rb_entry(rb, vbd_t, rb);
  26.222 -        if ( vdevice < vbd->vdevice )
  26.223 -            rb = rb->rb_left;
  26.224 -        else if ( vdevice > vbd->vdevice )
  26.225 -            rb = rb->rb_right;
  26.226 -        else
  26.227 -            goto found;
  26.228 -    }
  26.229 -
  26.230 -    destroy->status = BLKIF_STATUS_VBD_NOT_FOUND;
  26.231 -    goto out;
  26.232 -
  26.233 - found:
  26.234 -    rb_erase(rb, &blkif->vbd_rb);
  26.235 -    x = vbd->extents;
  26.236 -    kfree(vbd);
  26.237 -
  26.238 -    while ( x != NULL )
  26.239 -    {
  26.240 -        t = x->next;
  26.241 -        kfree(x);
  26.242 -        x = t;
  26.243 -    }
  26.244 -    
  26.245 - out:
  26.246 -    spin_unlock(&blkif->vbd_lock);
  26.247 -    blkif_put(blkif);
  26.248 -}
  26.249 -
  26.250 -
  26.251 -void destroy_all_vbds(blkif_t *blkif)
  26.252 -{
  26.253 -    vbd_t *vbd;
  26.254 -    rb_node_t *rb;
  26.255 -    xen_extent_le_t *x, *t;
  26.256 -
  26.257 -    spin_lock(&blkif->vbd_lock);
  26.258 -
  26.259 -    while ( (rb = blkif->vbd_rb.rb_node) != NULL )
  26.260 -    {
  26.261 -        vbd = rb_entry(rb, vbd_t, rb);
  26.262 -
  26.263 -        rb_erase(rb, &blkif->vbd_rb);
  26.264 -        x = vbd->extents;
  26.265 -        kfree(vbd);
  26.266 -        
  26.267 -        while ( x != NULL )
  26.268 -        {
  26.269 -            t = x->next;
  26.270 -            kfree(x);
  26.271 -            x = t;
  26.272 -        }          
  26.273 -    }
  26.274 -
  26.275 -    spin_unlock(&blkif->vbd_lock);
  26.276 -}
  26.277 -
  26.278 -
  26.279 -static int vbd_probe_single(xen_disk_info_t *xdi, 
  26.280 -                            vbd_t *vbd, 
  26.281 -                            struct task_struct *p)
  26.282 -{
  26.283 -    xen_extent_le_t *x; 
  26.284 -    xen_disk_t cur_disk; 
  26.285 -
  26.286 -    if ( xdi->count == xdi->max )
  26.287 -    {
  26.288 -        DPRINTK("vbd_probe_devices: out of space for probe.\n"); 
  26.289 -        return -ENOMEM; 
  26.290 -    }
  26.291 -
  26.292 -    cur_disk.device = vbd->vdevice; 
  26.293 -    cur_disk.info   = vbd->type;
  26.294 -    if ( !VBD_CAN_WRITE(vbd) )
  26.295 -        cur_disk.info |= XD_FLAG_RO; 
  26.296 -    cur_disk.capacity = 0ULL;
  26.297 -    for ( x = vbd->extents; x != NULL; x = x->next )
  26.298 -        cur_disk.capacity += x->extent.nr_sectors; 
  26.299 -    cur_disk.domain = p->domain; 
  26.300 -        
  26.301 -    /* Now copy into relevant part of user-space buffer */
  26.302 -    if( copy_to_user(&xdi->disks[xdi->count], 
  26.303 -                     &cur_disk, 
  26.304 -                     sizeof(xen_disk_t)) )
  26.305 -    { 
  26.306 -        DPRINTK("vbd_probe_devices: copy_to_user failed\n");
  26.307 -        return -EFAULT;
  26.308 -    } 
  26.309 -        
  26.310 -    xdi->count++; 
  26.311 -
  26.312 -    return 0;
  26.313 -}
  26.314 -
  26.315 -
  26.316 -static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
  26.317 -{
  26.318 -    int rc = 0;
  26.319 -    rb_node_t *rb;
  26.320 -
  26.321 -    spin_lock(&p->vbd_lock);
  26.322 -
  26.323 -    if ( (rb = p->vbd_rb.rb_node) == NULL )
  26.324 -        goto out;
  26.325 -
  26.326 - new_subtree:
  26.327 -    /* STEP 1. Find least node (it'll be left-most). */
  26.328 -    while ( rb->rb_left != NULL )
  26.329 -        rb = rb->rb_left;
  26.330 -
  26.331 -    for ( ; ; )
  26.332 -    {
  26.333 -        /* STEP 2. Dealt with left subtree. Now process current node. */
  26.334 -        if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
  26.335 -            goto out;
  26.336 -
  26.337 -        /* STEP 3. Process right subtree, if any. */
  26.338 -        if ( rb->rb_right != NULL )
  26.339 -        {
  26.340 -            rb = rb->rb_right;
  26.341 -            goto new_subtree;
  26.342 -        }
  26.343 -
  26.344 -        /* STEP 4. Done both subtrees. Head back through ancesstors. */
  26.345 -        for ( ; ; ) 
  26.346 -        {
  26.347 -            /* We're done when we get back to the root node. */
  26.348 -            if ( rb->rb_parent == NULL )
  26.349 -                goto out;
  26.350 -            /* If we are left of parent, then parent is next to process. */
  26.351 -            if ( rb->rb_parent->rb_left == rb )
  26.352 -                break;
  26.353 -            /* If we are right of parent, then we climb to grandparent. */
  26.354 -            rb = rb->rb_parent;
  26.355 -        }
  26.356 -
  26.357 -        rb = rb->rb_parent;
  26.358 -    }
  26.359 -
  26.360 - out:
  26.361 -    spin_unlock(&p->vbd_lock);
  26.362 -    return rc;  
  26.363 -}
  26.364 -
  26.365 -
  26.366 -/*
  26.367 - * Return information about the VBDs available for a given domain, or for all 
  26.368 - * domains; in the general case the 'domain' argument will be 0 which means 
  26.369 - * "information about the caller"; otherwise the 'domain' argument will 
  26.370 - * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of 
  26.371 - * these cases require the caller to be privileged.
  26.372 - */
  26.373 -long vbd_probe(vbd_probe_t *probe) 
  26.374 -{
  26.375 -    struct task_struct *p = NULL; 
  26.376 -    unsigned long flags;
  26.377 -    long ret = 0;  
  26.378 -
  26.379 -    if ( probe->domain != 0 )
  26.380 -    { 
  26.381 -        /* We can only probe for ourselves (unless we're privileged). */
  26.382 -        if( (probe->domain != current->domain) && !IS_PRIV(current) )
  26.383 -            return -EPERM; 
  26.384 -
  26.385 -        if ( (probe->domain != VBD_PROBE_ALL) &&
  26.386 -             ((p = find_domain_by_id(probe->domain)) == NULL) )
  26.387 -        {
  26.388 -            DPRINTK("vbd_probe attempted for non-existent domain %llu\n", 
  26.389 -                    probe->domain); 
  26.390 -            return -EINVAL; 
  26.391 -        }
  26.392 -    }
  26.393 -    else
  26.394 -    { 
  26.395 -        /* Default is to probe for ourselves. */
  26.396 -        p = current; 
  26.397 -        get_task_struct(p); /* to mirror final put_task_struct */
  26.398 -    }
  26.399 -
  26.400 -    if ( probe->domain == VBD_PROBE_ALL )
  26.401 -    { 
  26.402 -        read_lock_irqsave(&tasklist_lock, flags);
  26.403 -        for_each_domain ( p )
  26.404 -        {
  26.405 -            if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
  26.406 -            { 
  26.407 -                read_unlock_irqrestore(&tasklist_lock, flags);
  26.408 -                goto out; 
  26.409 -            }
  26.410 -        }
  26.411 -        read_unlock_irqrestore(&tasklist_lock, flags);
  26.412 -    } 
  26.413 -    else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
  26.414 -        goto out; 
  26.415 -
  26.416 - out: 
  26.417 -    if ( ret != 0 )
  26.418 -        DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); 
  26.419 -    if ( p != NULL )
  26.420 -        put_task_struct(p); 
  26.421 -    return ret; 
  26.422 -}
  26.423 -
  26.424 -
  26.425 -long vbd_info(vbd_info_t *info) 
  26.426 -{
  26.427 -    struct task_struct *p; 
  26.428 -    xen_extent_le_t *x; 
  26.429 -    xen_extent_t *extents; 
  26.430 -    vbd_t *vbd = NULL;
  26.431 -    rb_node_t *rb;
  26.432 -    long ret = 0;  
  26.433 -   
  26.434 -    if ( (info->domain != current->domain) && !IS_PRIV(current) )
  26.435 -        return -EPERM; 
  26.436 -
  26.437 -    if ( (p = find_domain_by_id(info->domain)) == NULL )
  26.438 -    {
  26.439 -        DPRINTK("vbd_info attempted for non-existent domain %llu\n", 
  26.440 -                info->domain); 
  26.441 -        return -EINVAL; 
  26.442 -    }
  26.443 -
  26.444 -    spin_lock(&p->vbd_lock);
  26.445 -
  26.446 -    rb = p->vbd_rb.rb_node;
  26.447 -    while ( rb != NULL )
  26.448 -    {
  26.449 -        vbd = rb_entry(rb, vbd_t, rb);
  26.450 -        if ( info->vdevice < vbd->vdevice )
  26.451 -            rb = rb->rb_left;
  26.452 -        else if ( info->vdevice > vbd->vdevice )
  26.453 -            rb = rb->rb_right;
  26.454 -        else
  26.455 -            break;
  26.456 -    }
  26.457 -
  26.458 -    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
  26.459 -    {
  26.460 -        DPRINTK("vbd_info attempted on non-existent VBD.\n"); 
  26.461 -        ret = -EINVAL; 
  26.462 -        goto out; 
  26.463 -    }
  26.464 -
  26.465 -    info->mode     = vbd->mode;
  26.466 -    info->nextents = 0; 
  26.467 -
  26.468 -    extents = info->extents;
  26.469 -    for ( x = vbd->extents; x != NULL; x = x->next )
  26.470 -    {
  26.471 -        if ( info->nextents == info->maxextents )
  26.472 -            break;
  26.473 -        if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
  26.474 -        {
  26.475 -            DPRINTK("vbd_info: copy_to_user failed\n");
  26.476 -            ret = -EFAULT;
  26.477 -            goto out; 
  26.478 -        } 
  26.479 -        extents++;
  26.480 -        info->nextents++;
  26.481 -    }
  26.482 -
  26.483 - out: 
  26.484 -    spin_unlock(&p->vbd_lock);
  26.485 -    put_task_struct(p); 
  26.486 -    return ret; 
  26.487 -}
  26.488 -
  26.489 -
  26.490 -int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
  26.491 -{
  26.492 -    xen_extent_le_t *x; 
  26.493 -    vbd_t *vbd;
  26.494 -    rb_node_t *rb;
  26.495 -    xen_sector_t sec_off;
  26.496 -    unsigned long nr_secs;
  26.497 -
  26.498 -    spin_lock(&p->vbd_lock);
  26.499 -
  26.500 -    rb = p->vbd_rb.rb_node;
  26.501 -    while ( rb != NULL )
  26.502 -    {
  26.503 -        vbd = rb_entry(rb, vbd_t, rb);
  26.504 -        if ( pseg->dev < vbd->vdevice )
  26.505 -            rb = rb->rb_left;
  26.506 -        else if ( pseg->dev > vbd->vdevice )
  26.507 -            rb = rb->rb_right;
  26.508 -        else
  26.509 -            goto found;
  26.510 -    }
  26.511 -
  26.512 -    DPRINTK("vbd_translate; domain %llu attempted to access "
  26.513 -            "non-existent VBD.\n", p->domain); 
  26.514 -
  26.515 -    spin_unlock(&p->vbd_lock);
  26.516 -    return -ENODEV; 
  26.517 -
  26.518 - found:
  26.519 -
  26.520 -    if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
  26.521 -         ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
  26.522 -    {
  26.523 -        spin_unlock(&p->vbd_lock);
  26.524 -        return -EACCES; 
  26.525 -    }
  26.526 -
  26.527 -    /*
  26.528 -     * Now iterate through the list of xen_extents, working out which should 
  26.529 -     * be used to perform the translation.
  26.530 -     */
  26.531 -    sec_off = pseg->sector_number; 
  26.532 -    nr_secs = pseg->nr_sects;
  26.533 -    for ( x = vbd->extents; x != NULL; x = x->next )
  26.534 -    { 
  26.535 -        if ( sec_off < x->extent.nr_sectors )
  26.536 -        {
  26.537 -            pseg->dev = x->extent.device; 
  26.538 -            pseg->sector_number = x->extent.start_sector + sec_off;
  26.539 -            if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
  26.540 -                goto overrun;
  26.541 -            spin_unlock(&p->vbd_lock);
  26.542 -            return 1;
  26.543 -        } 
  26.544 -        sec_off -= x->extent.nr_sectors; 
  26.545 -    }
  26.546 -
  26.547 -    DPRINTK("vbd_translate: end of vbd.\n");
  26.548 -    spin_unlock(&p->vbd_lock);
  26.549 -    return -EACCES; 
  26.550 -
  26.551 -    /*
  26.552 -     * Here we deal with overrun onto the following extent. We don't deal with 
  26.553 -     * overrun of more than one boundary since each request is restricted to 
  26.554 -     * 2^9 512-byte sectors, so it should be trivial for control software to 
  26.555 -     * ensure that extents are large enough to prevent excessive overrun.
  26.556 -     */
  26.557 - overrun:
  26.558 -
  26.559 -    /* Adjust length of first chunk to run to end of first extent. */
  26.560 -    pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
  26.561 -
  26.562 -    /* Set second chunk buffer and length to start where first chunk ended. */
  26.563 -    pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
  26.564 -    pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
  26.565 -
  26.566 -    /* Now move to the next extent. Check it exists and is long enough! */
  26.567 -    if ( unlikely((x = x->next) == NULL) || 
  26.568 -         unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
  26.569 -    {
  26.570 -        DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
  26.571 -        spin_unlock(&p->vbd_lock);
  26.572 -        return -EACCES;
  26.573 -    }
  26.574 -
  26.575 -    /* Store the real device and start sector for the second chunk. */
  26.576 -    pseg[1].dev           = x->extent.device;
  26.577 -    pseg[1].sector_number = x->extent.start_sector;
  26.578 -    
  26.579 -    spin_unlock(&p->vbd_lock);
  26.580 -    return 2;
  26.581 -}
    27.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile	Sat Apr 24 08:10:58 2004 +0000
    27.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.3 @@ -1,3 +0,0 @@
    27.4 -O_TARGET := drv.o
    27.5 -obj-y := block.o vbd.o
    27.6 -include $(TOPDIR)/Rules.make
    28.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c	Sat Apr 24 08:10:58 2004 +0000
    28.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.3 @@ -1,625 +0,0 @@
    28.4 -/******************************************************************************
    28.5 - * block.c
    28.6 - * 
    28.7 - * Xenolinux virtual block-device driver.
    28.8 - * 
    28.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   28.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   28.11 - */
   28.12 -
   28.13 -#include "block.h"
   28.14 -#include <linux/blk.h>
   28.15 -#include <linux/cdrom.h>
   28.16 -#include <linux/tqueue.h>
   28.17 -#include <linux/sched.h>
   28.18 -#include <scsi/scsi.h>
   28.19 -
   28.20 -#include <linux/interrupt.h>
   28.21 -
   28.22 -typedef unsigned char byte; /* from linux/ide.h */
   28.23 -
   28.24 -#define STATE_ACTIVE    0
   28.25 -#define STATE_SUSPENDED 1
   28.26 -#define STATE_CLOSED    2
   28.27 -static unsigned int state = STATE_SUSPENDED;
   28.28 -
   28.29 -/* Dynamically-mapped IRQs. */
   28.30 -static int xlblk_response_irq, xlblk_update_irq;
   28.31 -
   28.32 -static blk_ring_t *blk_ring;
   28.33 -static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
   28.34 -static BLK_RING_IDX req_prod;  /* Private request producer.         */
   28.35 -
   28.36 -/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   28.37 -#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
   28.38 -                      (state != STATE_ACTIVE))
   28.39 -
   28.40 -
   28.41 -/*
   28.42 - * Request queues with outstanding work, but ring is currently full.
   28.43 - * We need no special lock here, as we always access this with the
   28.44 - * io_request_lock held. We only need a small maximum list.
   28.45 - */
   28.46 -#define MAX_PENDING 8
   28.47 -static request_queue_t *pending_queues[MAX_PENDING];
   28.48 -static int nr_pending;
   28.49 -
   28.50 -static kdev_t        sg_dev;
   28.51 -static int           sg_operation = -1;
   28.52 -static unsigned long sg_next_sect;
   28.53 -#define DISABLE_SCATTERGATHER() (sg_operation = -1)
   28.54 -
   28.55 -static inline void signal_requests_to_xen(void)
   28.56 -{
   28.57 -    block_io_op_t op; 
   28.58 -
   28.59 -    DISABLE_SCATTERGATHER();
   28.60 -    blk_ring->req_prod = req_prod;
   28.61 -
   28.62 -    op.cmd = BLOCK_IO_OP_SIGNAL; 
   28.63 -    HYPERVISOR_block_io_op(&op);
   28.64 -    return;
   28.65 -}
   28.66 -
   28.67 -
   28.68 -/*
   28.69 - * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
   28.70 - * 
   28.71 - * Schedule a task for keventd to run, which will update the VBDs and perform 
   28.72 - * the corresponding updates to our view of VBD state, so the XenoLinux will 
   28.73 - * respond to changes / additions / deletions to the set of VBDs automatically.
   28.74 - */
   28.75 -static struct tq_struct update_tq;
   28.76 -static void update_vbds_task(void *unused)
   28.77 -{ 
   28.78 -    xlvbd_update_vbds();
   28.79 -}
   28.80 -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
   28.81 -{
   28.82 -    update_tq.routine = update_vbds_task;
   28.83 -    schedule_task(&update_tq);
   28.84 -}
   28.85 -
   28.86 -
   28.87 -int xen_block_open(struct inode *inode, struct file *filep)
   28.88 -{
   28.89 -    short xldev = inode->i_rdev; 
   28.90 -    struct gendisk *gd = get_gendisk(xldev);
   28.91 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   28.92 -    short minor = MINOR(xldev); 
   28.93 -
   28.94 -    if ( gd->part[minor].nr_sects == 0 )
   28.95 -    { 
   28.96 -        /*
   28.97 -         * Device either doesn't exist, or has zero capacity; we use a few
   28.98 -         * cheesy heuristics to return the relevant error code
   28.99 -         */
  28.100 -        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
  28.101 -             ((minor & (gd->max_p - 1)) != 0) )
  28.102 -        { 
  28.103 -            /*
  28.104 -             * We have a real device, but no such partition, or we just have a
  28.105 -             * partition number so guess this is the problem.
  28.106 -             */
  28.107 -            return -ENXIO;     /* no such device or address */
  28.108 -        }
  28.109 -        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
  28.110 -        {
  28.111 -            /* This is a removable device => assume that media is missing. */ 
  28.112 -            return -ENOMEDIUM; /* media not present (this is a guess) */
  28.113 -        } 
  28.114 -        else
  28.115 -        { 
  28.116 -            /* Just go for the general 'no such device' error. */
  28.117 -            return -ENODEV;    /* no such device */
  28.118 -        }
  28.119 -    }
  28.120 -    
  28.121 -    /* Update of usage count is protected by per-device semaphore. */
  28.122 -    disk->usage++;
  28.123 -
  28.124 -    return 0;
  28.125 -}
  28.126 -
  28.127 -
  28.128 -int xen_block_release(struct inode *inode, struct file *filep)
  28.129 -{
  28.130 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
  28.131 -
  28.132 -    /*
  28.133 -     * When usage drops to zero it may allow more VBD updates to occur.
  28.134 -     * Update of usage count is protected by a per-device semaphore.
  28.135 -     */
  28.136 -    if ( --disk->usage == 0 )
  28.137 -    {
  28.138 -        update_tq.routine = update_vbds_task;
  28.139 -        schedule_task(&update_tq);
  28.140 -    }
  28.141 -
  28.142 -    return 0;
  28.143 -}
  28.144 -
  28.145 -
  28.146 -int xen_block_ioctl(struct inode *inode, struct file *filep,
  28.147 -                          unsigned command, unsigned long argument)
  28.148 -{
  28.149 -    kdev_t dev = inode->i_rdev;
  28.150 -    struct hd_geometry *geo = (struct hd_geometry *)argument;
  28.151 -    struct gendisk *gd;     
  28.152 -    struct hd_struct *part; 
  28.153 -    int i;
  28.154 -
  28.155 -    /* NB. No need to check permissions. That is done for us. */
  28.156 -    
  28.157 -    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  28.158 -                  command, (long) argument, dev); 
  28.159 -  
  28.160 -    gd = get_gendisk(dev);
  28.161 -    part = &gd->part[MINOR(dev)]; 
  28.162 -
  28.163 -    switch ( command )
  28.164 -    {
  28.165 -    case BLKGETSIZE:
  28.166 -        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
  28.167 -        return put_user(part->nr_sects, (unsigned long *) argument);
  28.168 -
  28.169 -    case BLKGETSIZE64:
  28.170 -        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
  28.171 -                      (u64)part->nr_sects * 512);
  28.172 -        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
  28.173 -
  28.174 -    case BLKRRPART:                               /* re-read partition table */
  28.175 -        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
  28.176 -        return xen_block_revalidate(dev);
  28.177 -
  28.178 -    case BLKSSZGET:
  28.179 -        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
  28.180 -
  28.181 -    case BLKBSZGET:                                        /* get block size */
  28.182 -        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
  28.183 -        break;
  28.184 -
  28.185 -    case BLKBSZSET:                                        /* set block size */
  28.186 -        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
  28.187 -        break;
  28.188 -
  28.189 -    case BLKRASET:                                         /* set read-ahead */
  28.190 -        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
  28.191 -        break;
  28.192 -
  28.193 -    case BLKRAGET:                                         /* get read-ahead */
  28.194 -        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
  28.195 -        break;
  28.196 -
  28.197 -    case HDIO_GETGEO:
  28.198 -        /* note: these values are complete garbage */
  28.199 -        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
  28.200 -        if (!argument) return -EINVAL;
  28.201 -        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
  28.202 -        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
  28.203 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  28.204 -        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
  28.205 -        return 0;
  28.206 -
  28.207 -    case HDIO_GETGEO_BIG: 
  28.208 -        /* note: these values are complete garbage */
  28.209 -        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
  28.210 -        if (!argument) return -EINVAL;
  28.211 -        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
  28.212 -        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
  28.213 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  28.214 -        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
  28.215 -        return 0;
  28.216 -
  28.217 -    case CDROMMULTISESSION:
  28.218 -        DPRINTK("FIXME: support multisession CDs later\n");
  28.219 -        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
  28.220 -            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
  28.221 -        return 0;
  28.222 -
  28.223 -    case SCSI_IOCTL_GET_BUS_NUMBER:
  28.224 -        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
  28.225 -        return -ENOSYS;
  28.226 -
  28.227 -    default:
  28.228 -        printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
  28.229 -        return -ENOSYS;
  28.230 -    }
  28.231 -    
  28.232 -    return 0;
  28.233 -}
  28.234 -
  28.235 -/* check media change: should probably do something here in some cases :-) */
  28.236 -int xen_block_check(kdev_t dev)
  28.237 -{
  28.238 -    DPRINTK("xen_block_check\n");
  28.239 -    return 0;
  28.240 -}
  28.241 -
  28.242 -int xen_block_revalidate(kdev_t dev)
  28.243 -{
  28.244 -    struct block_device *bd;
  28.245 -    struct gendisk *gd;
  28.246 -    xl_disk_t *disk;
  28.247 -    unsigned long capacity;
  28.248 -    int i, rc = 0;
  28.249 -    
  28.250 -    if ( (bd = bdget(dev)) == NULL )
  28.251 -        return -EINVAL;
  28.252 -
  28.253 -    /*
  28.254 -     * Update of partition info, and check of usage count, is protected
  28.255 -     * by the per-block-device semaphore.
  28.256 -     */
  28.257 -    down(&bd->bd_sem);
  28.258 -
  28.259 -    if ( ((gd = get_gendisk(dev)) == NULL) ||
  28.260 -         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  28.261 -         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  28.262 -    {
  28.263 -        rc = -EINVAL;
  28.264 -        goto out;
  28.265 -    }
  28.266 -
  28.267 -    if ( disk->usage > 1 )
  28.268 -    {
  28.269 -        rc = -EBUSY;
  28.270 -        goto out;
  28.271 -    }
  28.272 -
  28.273 -    /* Only reread partition table if VBDs aren't mapped to partitions. */
  28.274 -    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  28.275 -    {
  28.276 -        for ( i = gd->max_p - 1; i >= 0; i-- )
  28.277 -        {
  28.278 -            invalidate_device(dev+i, 1);
  28.279 -            gd->part[MINOR(dev+i)].start_sect = 0;
  28.280 -            gd->part[MINOR(dev+i)].nr_sects   = 0;
  28.281 -            gd->sizes[MINOR(dev+i)]           = 0;
  28.282 -        }
  28.283 -
  28.284 -        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  28.285 -    }
  28.286 -
  28.287 - out:
  28.288 -    up(&bd->bd_sem);
  28.289 -    bdput(bd);
  28.290 -    return rc;
  28.291 -}
  28.292 -
  28.293 -
  28.294 -/*
  28.295 - * hypervisor_request
  28.296 - *
  28.297 - * request block io 
  28.298 - * 
  28.299 - * id: for guest use only.
  28.300 - * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
  28.301 - * buffer: buffer to read/write into. this should be a
  28.302 - *   virtual address in the guest os.
  28.303 - */
  28.304 -static int hypervisor_request(unsigned long   id,
  28.305 -                              int             operation,
  28.306 -                              char *          buffer,
  28.307 -                              unsigned long   sector_number,
  28.308 -                              unsigned short  nr_sectors,
  28.309 -                              kdev_t          device)
  28.310 -{
  28.311 -    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
  28.312 -    struct gendisk *gd;
  28.313 -    blk_ring_req_entry_t *req;
  28.314 -    struct buffer_head *bh;
  28.315 -
  28.316 -    if ( unlikely(nr_sectors >= (1<<9)) )
  28.317 -        BUG();
  28.318 -    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
  28.319 -        BUG();
  28.320 -
  28.321 -    if ( unlikely(state == STATE_CLOSED) )
  28.322 -        return 1;
  28.323 -
  28.324 -    switch ( operation )
  28.325 -    {
  28.326 -
  28.327 -    case XEN_BLOCK_READ:
  28.328 -    case XEN_BLOCK_WRITE:
  28.329 -        gd = get_gendisk(device); 
  28.330 -
  28.331 -        /*
  28.332 -         * Update the sector_number we'll pass down as appropriate; note that
  28.333 -         * we could sanity check that resulting sector will be in this
  28.334 -         * partition, but this will happen in xen anyhow.
  28.335 -         */
  28.336 -        sector_number += gd->part[MINOR(device)].start_sect;
  28.337 -
  28.338 -        /*
  28.339 -         * If this unit doesn't consist of virtual (i.e., Xen-specified)
  28.340 -         * partitions then we clear the partn bits from the device number.
  28.341 -         */
  28.342 -        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
  28.343 -               GENHD_FL_VIRT_PARTNS) )
  28.344 -            device &= ~(gd->max_p - 1);
  28.345 -
  28.346 -        if ( (sg_operation == operation) &&
  28.347 -             (sg_dev == device) &&
  28.348 -             (sg_next_sect == sector_number) )
  28.349 -        {
  28.350 -            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
  28.351 -            bh = (struct buffer_head *)id;
  28.352 -            bh->b_reqnext = (struct buffer_head *)req->id;
  28.353 -            req->id = id;
  28.354 -            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
  28.355 -            if ( ++req->nr_segments < MAX_BLK_SEGS )
  28.356 -                sg_next_sect += nr_sectors;
  28.357 -            else
  28.358 -                DISABLE_SCATTERGATHER();
  28.359 -            return 0;
  28.360 -        }
  28.361 -        else if ( RING_PLUGGED )
  28.362 -        {
  28.363 -            return 1;
  28.364 -        }
  28.365 -        else
  28.366 -        {
  28.367 -            sg_operation = operation;
  28.368 -            sg_dev       = device;
  28.369 -            sg_next_sect = sector_number + nr_sectors;
  28.370 -        }
  28.371 -        break;
  28.372 -
  28.373 -    default:
  28.374 -        panic("unknown op %d\n", operation);
  28.375 -    }
  28.376 -
  28.377 -    /* Fill out a communications ring structure. */
  28.378 -    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
  28.379 -    req->id            = id;
  28.380 -    req->operation     = operation;
  28.381 -    req->sector_number = (xen_sector_t)sector_number;
  28.382 -    req->device        = device; 
  28.383 -    req->nr_segments   = 1;
  28.384 -    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
  28.385 -    req_prod++;
  28.386 -
  28.387 -    return 0;
  28.388 -}
  28.389 -
  28.390 -
  28.391 -/*
  28.392 - * do_xlblk_request
  28.393 - *  read a block; request is in a request queue
  28.394 - */
  28.395 -void do_xlblk_request(request_queue_t *rq)
  28.396 -{
  28.397 -    struct request *req;
  28.398 -    struct buffer_head *bh, *next_bh;
  28.399 -    int rw, nsect, full, queued = 0;
  28.400 -
  28.401 -    DPRINTK("xlblk.c::do_xlblk_request\n"); 
  28.402 -
  28.403 -    while ( !rq->plugged && !list_empty(&rq->queue_head))
  28.404 -    {
  28.405 -        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
  28.406 -            goto out;
  28.407 -  
  28.408 -        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
  28.409 -                req, req->cmd, req->sector,
  28.410 -                req->current_nr_sectors, req->nr_sectors, req->bh);
  28.411 -
  28.412 -        rw = req->cmd;
  28.413 -        if ( rw == READA )
  28.414 -            rw = READ;
  28.415 -        if ( unlikely((rw != READ) && (rw != WRITE)) )
  28.416 -            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
  28.417 -
  28.418 -        req->errors = 0;
  28.419 -
  28.420 -        bh = req->bh;
  28.421 -        while ( bh != NULL )
  28.422 -        {
  28.423 -            next_bh = bh->b_reqnext;
  28.424 -            bh->b_reqnext = NULL;
  28.425 -
  28.426 -            full = hypervisor_request(
  28.427 -                (unsigned long)bh,
  28.428 -                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
  28.429 -                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
  28.430 -
  28.431 -            if ( full )
  28.432 -            { 
  28.433 -                bh->b_reqnext = next_bh;
  28.434 -                pending_queues[nr_pending++] = rq;
  28.435 -                if ( unlikely(nr_pending >= MAX_PENDING) )
  28.436 -                    BUG();
  28.437 -                goto out; 
  28.438 -            }
  28.439 -
  28.440 -            queued++;
  28.441 -
  28.442 -            /* Dequeue the buffer head from the request. */
  28.443 -            nsect = bh->b_size >> 9;
  28.444 -            bh = req->bh = next_bh;
  28.445 -            
  28.446 -            if ( bh != NULL )
  28.447 -            {
  28.448 -                /* There's another buffer head to do. Update the request. */
  28.449 -                req->hard_sector += nsect;
  28.450 -                req->hard_nr_sectors -= nsect;
  28.451 -                req->sector = req->hard_sector;
  28.452 -                req->nr_sectors = req->hard_nr_sectors;
  28.453 -                req->current_nr_sectors = bh->b_size >> 9;
  28.454 -                req->buffer = bh->b_data;
  28.455 -            }
  28.456 -            else
  28.457 -            {
  28.458 -                /* That was the last buffer head. Finalise the request. */
  28.459 -                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
  28.460 -                    BUG();
  28.461 -                blkdev_dequeue_request(req);
  28.462 -                end_that_request_last(req);
  28.463 -            }
  28.464 -        }
  28.465 -    }
  28.466 -
  28.467 - out:
  28.468 -    if ( queued != 0 ) signal_requests_to_xen();
  28.469 -}
  28.470 -
  28.471 -
  28.472 -static void kick_pending_request_queues(void)
  28.473 -{
  28.474 -    /* We kick pending request queues if the ring is reasonably empty. */
  28.475 -    if ( (nr_pending != 0) && 
  28.476 -         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
  28.477 -    {
  28.478 -        /* Attempt to drain the queue, but bail if the ring becomes full. */
  28.479 -        while ( (nr_pending != 0) && !RING_PLUGGED )
  28.480 -            do_xlblk_request(pending_queues[--nr_pending]);
  28.481 -    }
  28.482 -}
  28.483 -
  28.484 -
  28.485 -static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
  28.486 -{
  28.487 -    BLK_RING_IDX i; 
  28.488 -    unsigned long flags; 
  28.489 -    struct buffer_head *bh, *next_bh;
  28.490 -    
  28.491 -    if ( unlikely(state == STATE_CLOSED) )
  28.492 -        return;
  28.493 -    
  28.494 -    spin_lock_irqsave(&io_request_lock, flags);     
  28.495 -
  28.496 -    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
  28.497 -    {
  28.498 -        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
  28.499 -        switch ( bret->operation )
  28.500 -        {
  28.501 -        case XEN_BLOCK_READ:
  28.502 -        case XEN_BLOCK_WRITE:
  28.503 -            if ( unlikely(bret->status != 0) )
  28.504 -                DPRINTK("Bad return from blkdev data request: %lx\n",
  28.505 -                        bret->status);
  28.506 -            for ( bh = (struct buffer_head *)bret->id; 
  28.507 -                  bh != NULL; 
  28.508 -                  bh = next_bh )
  28.509 -            {
  28.510 -                next_bh = bh->b_reqnext;
  28.511 -                bh->b_reqnext = NULL;
  28.512 -                bh->b_end_io(bh, !bret->status);
  28.513 -            }
  28.514 -            break;
  28.515 -     
  28.516 -        default:
  28.517 -            BUG();
  28.518 -        }
  28.519 -    }
  28.520 -    
  28.521 -    resp_cons = i;
  28.522 -
  28.523 -    kick_pending_request_queues();
  28.524 -
  28.525 -    spin_unlock_irqrestore(&io_request_lock, flags);
  28.526 -}
  28.527 -
  28.528 -
  28.529 -static void reset_xlblk_interface(void)
  28.530 -{
  28.531 -    block_io_op_t op; 
  28.532 -
  28.533 -    nr_pending = 0;
  28.534 -
  28.535 -    op.cmd = BLOCK_IO_OP_RESET;
  28.536 -    if ( HYPERVISOR_block_io_op(&op) != 0 )
  28.537 -        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
  28.538 -
  28.539 -    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
  28.540 -    (void)HYPERVISOR_block_io_op(&op);
  28.541 -
  28.542 -    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
  28.543 -    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
  28.544 -    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  28.545 -
  28.546 -    wmb();
  28.547 -    state = STATE_ACTIVE;
  28.548 -}
  28.549 -
  28.550 -
  28.551 -int __init xlblk_init(void)
  28.552 -{
  28.553 -    int error; 
  28.554 -
  28.555 -    reset_xlblk_interface();
  28.556 -
  28.557 -    xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
  28.558 -    xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
  28.559 -
  28.560 -    error = request_irq(xlblk_response_irq, xlblk_response_int, 
  28.561 -                        SA_SAMPLE_RANDOM, "blkdev", NULL);
  28.562 -    if ( error )
  28.563 -    {
  28.564 -        printk(KERN_ALERT "Could not allocate receive interrupt\n");
  28.565 -        goto fail;
  28.566 -    }
  28.567 -
  28.568 -    error = request_irq(xlblk_update_irq, xlblk_update_int,
  28.569 -                        0, "blkdev", NULL);
  28.570 -
  28.571 -    if ( error )
  28.572 -    {
  28.573 -        printk(KERN_ALERT "Could not allocate block update interrupt\n");
  28.574 -        goto fail;
  28.575 -    }
  28.576 -
  28.577 -    (void)xlvbd_init();
  28.578 -
  28.579 -    return 0;
  28.580 -
  28.581 - fail:
  28.582 -    return error;
  28.583 -}
  28.584 -
  28.585 -
  28.586 -static void __exit xlblk_cleanup(void)
  28.587 -{
  28.588 -    xlvbd_cleanup();
  28.589 -    free_irq(xlblk_response_irq, NULL);
  28.590 -    free_irq(xlblk_update_irq, NULL);
  28.591 -    unbind_virq_from_irq(VIRQ_BLKDEV);
  28.592 -    unbind_virq_from_irq(VIRQ_VBD_UPD);
  28.593 -}
  28.594 -
  28.595 -
  28.596 -#ifdef MODULE
  28.597 -module_init(xlblk_init);
  28.598 -module_exit(xlblk_cleanup);
  28.599 -#endif
  28.600 -
  28.601 -
  28.602 -void blkdev_suspend(void)
  28.603 -{
  28.604 -    state = STATE_SUSPENDED;
  28.605 -    wmb();
  28.606 -
  28.607 -    while ( resp_cons != blk_ring->req_prod )
  28.608 -    {
  28.609 -        barrier();
  28.610 -        current->state = TASK_INTERRUPTIBLE;
  28.611 -        schedule_timeout(1);
  28.612 -    }
  28.613 -
  28.614 -    wmb();
  28.615 -    state = STATE_CLOSED;
  28.616 -    wmb();
  28.617 -
  28.618 -    clear_fixmap(FIX_BLKRING_BASE);
  28.619 -}
  28.620 -
  28.621 -
  28.622 -void blkdev_resume(void)
  28.623 -{
  28.624 -    reset_xlblk_interface();
  28.625 -    spin_lock_irq(&io_request_lock);
  28.626 -    kick_pending_request_queues();
  28.627 -    spin_unlock_irq(&io_request_lock);
  28.628 -}
    29.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h	Sat Apr 24 08:10:58 2004 +0000
    29.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.3 @@ -1,82 +0,0 @@
    29.4 -/******************************************************************************
    29.5 - * block.h
    29.6 - * 
    29.7 - * Shared definitions between all levels of XenoLinux Virtual block devices.
    29.8 - */
    29.9 -
   29.10 -#ifndef __XEN_DRIVERS_BLOCK_H__
   29.11 -#define __XEN_DRIVERS_BLOCK_H__
   29.12 -
   29.13 -#include <linux/config.h>
   29.14 -#include <linux/module.h>
   29.15 -
   29.16 -#include <linux/kernel.h>
   29.17 -#include <linux/sched.h>
   29.18 -#include <linux/slab.h>
   29.19 -#include <linux/string.h>
   29.20 -#include <linux/errno.h>
   29.21 -
   29.22 -#include <linux/fs.h>
   29.23 -#include <linux/hdreg.h>
   29.24 -#include <linux/blkdev.h>
   29.25 -#include <linux/major.h>
   29.26 -
   29.27 -#include <asm/hypervisor-ifs/hypervisor-if.h>
   29.28 -#include <asm/hypervisor-ifs/vbd.h>
   29.29 -#include <asm/io.h>
   29.30 -#include <asm/atomic.h>
   29.31 -#include <asm/uaccess.h>
   29.32 -
   29.33 -#if 0
   29.34 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   29.35 -#else
   29.36 -#define DPRINTK(_f, _a...) ((void)0)
   29.37 -#endif
   29.38 -
   29.39 -#if 0
   29.40 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   29.41 -#else
   29.42 -#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   29.43 -#endif
   29.44 -
   29.45 -/* Private gendisk->flags[] values. */
   29.46 -#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
   29.47 -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
   29.48 -
   29.49 -/*
   29.50 - * We have one of these per vbd, whether ide, scsi or 'other'.
   29.51 - * They hang in an array off the gendisk structure. We may end up putting
   29.52 - * all kinds of interesting stuff here :-)
   29.53 - */
   29.54 -typedef struct xl_disk {
   29.55 -    int usage;
   29.56 -} xl_disk_t;
   29.57 -
   29.58 -extern int xen_control_msg(int operration, char *buffer, int size);
   29.59 -extern int xen_block_open(struct inode *inode, struct file *filep);
   29.60 -extern int xen_block_release(struct inode *inode, struct file *filep);
   29.61 -extern int xen_block_ioctl(struct inode *inode, struct file *filep,
   29.62 -                                 unsigned command, unsigned long argument);
   29.63 -extern int xen_block_check(kdev_t dev);
   29.64 -extern int xen_block_revalidate(kdev_t dev);
   29.65 -extern void do_xlblk_request (request_queue_t *rq); 
   29.66 -
   29.67 -extern void xlvbd_update_vbds(void);
   29.68 -
   29.69 -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
   29.70 -{
   29.71 -    struct gendisk *gd = get_gendisk(xldev);
   29.72 -    
   29.73 -    if ( gd == NULL ) 
   29.74 -        return NULL;
   29.75 -    
   29.76 -    return (xl_disk_t *)gd->real_devices + 
   29.77 -        (MINOR(xldev) >> gd->minor_shift);
   29.78 -}
   29.79 -
   29.80 -
   29.81 -/* Virtual block-device subsystem. */
   29.82 -extern int  xlvbd_init(void);
   29.83 -extern void xlvbd_cleanup(void); 
   29.84 -
   29.85 -#endif /* __XEN_DRIVERS_BLOCK_H__ */
    30.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c	Sat Apr 24 08:10:58 2004 +0000
    30.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.3 @@ -1,561 +0,0 @@
    30.4 -/******************************************************************************
    30.5 - * vbd.c
    30.6 - * 
    30.7 - * Xenolinux virtual block-device driver (xvd).
    30.8 - * 
    30.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   30.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   30.11 - */
   30.12 -
   30.13 -#include "block.h"
   30.14 -#include <linux/blk.h>
   30.15 -
   30.16 -/*
   30.17 - * For convenience we distinguish between ide, scsi and 'other' (i.e.
   30.18 - * potentially combinations of the two) in the naming scheme and in a few 
   30.19 - * other places (like default readahead, etc).
   30.20 - */
   30.21 -#define XLIDE_MAJOR_NAME  "hd"
   30.22 -#define XLSCSI_MAJOR_NAME "sd"
   30.23 -#define XLVBD_MAJOR_NAME "xvd"
   30.24 -
   30.25 -#define XLIDE_DEVS_PER_MAJOR   2
   30.26 -#define XLSCSI_DEVS_PER_MAJOR 16
   30.27 -#define XLVBD_DEVS_PER_MAJOR  16
   30.28 -
   30.29 -#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
   30.30 -#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
   30.31 -
   30.32 -#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
   30.33 -#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
   30.34 -
   30.35 -#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
   30.36 -#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
   30.37 -
   30.38 -/* The below are for the generic drivers/block/ll_rw_block.c code. */
   30.39 -static int xlide_blksize_size[256];
   30.40 -static int xlide_hardsect_size[256];
   30.41 -static int xlide_max_sectors[256];
   30.42 -static int xlscsi_blksize_size[256];
   30.43 -static int xlscsi_hardsect_size[256];
   30.44 -static int xlscsi_max_sectors[256];
   30.45 -static int xlvbd_blksize_size[256];
   30.46 -static int xlvbd_hardsect_size[256];
   30.47 -static int xlvbd_max_sectors[256];
   30.48 -
   30.49 -/* Information from Xen about our VBDs. */
   30.50 -#define MAX_VBDS 64
   30.51 -static int nr_vbds;
   30.52 -static xen_disk_t *vbd_info;
   30.53 -
   30.54 -static struct block_device_operations xlvbd_block_fops = 
   30.55 -{
   30.56 -    open:               xen_block_open,
   30.57 -    release:            xen_block_release,
   30.58 -    ioctl:              xen_block_ioctl,
   30.59 -    check_media_change: xen_block_check,
   30.60 -    revalidate:         xen_block_revalidate,
   30.61 -};
   30.62 -
   30.63 -static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
   30.64 -{
   30.65 -    int error;
   30.66 -    block_io_op_t op; 
   30.67 -
   30.68 -    /* Probe for disk information. */
   30.69 -    memset(&op, 0, sizeof(op)); 
   30.70 -    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
   30.71 -    op.u.probe_params.domain    = 0; 
   30.72 -    op.u.probe_params.xdi.max   = MAX_VBDS;
   30.73 -    op.u.probe_params.xdi.disks = disk_info;
   30.74 -    op.u.probe_params.xdi.count = 0;
   30.75 -
   30.76 -    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
   30.77 -    {
   30.78 -        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
   30.79 -        return -1;
   30.80 -    }
   30.81 -
   30.82 -    return op.u.probe_params.xdi.count;
   30.83 -}
   30.84 -
   30.85 -/*
   30.86 - * xlvbd_init_device - initialise a VBD device
   30.87 - * @disk:              a xen_disk_t describing the VBD
   30.88 - *
   30.89 - * Takes a xen_disk_t * that describes a VBD the domain has access to.
   30.90 - * Performs appropriate initialisation and registration of the device.
   30.91 - *
   30.92 - * Care needs to be taken when making re-entrant calls to ensure that
   30.93 - * corruption does not occur.  Also, devices that are in use should not have
   30.94 - * their details updated.  This is the caller's responsibility.
   30.95 - */
   30.96 -static int xlvbd_init_device(xen_disk_t *xd)
   30.97 -{
   30.98 -    int device = xd->device;
   30.99 -    int major  = MAJOR(device); 
  30.100 -    int minor  = MINOR(device);
  30.101 -    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
  30.102 -    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
  30.103 -    char *major_name;
  30.104 -    struct gendisk *gd;
  30.105 -    struct block_device *bd;
  30.106 -    xl_disk_t *disk;
  30.107 -    int i, rc = 0, max_part, partno;
  30.108 -    unsigned long capacity;
  30.109 -
  30.110 -    unsigned char buf[64];
  30.111 -
  30.112 -    if ( (bd = bdget(device)) == NULL )
  30.113 -        return -1;
  30.114 -
  30.115 -    /*
  30.116 -     * Update of partition info, and check of usage count, is protected
  30.117 -     * by the per-block-device semaphore.
  30.118 -     */
  30.119 -    down(&bd->bd_sem);
  30.120 -
  30.121 -    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
  30.122 -    {
  30.123 -        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
  30.124 -        rc = -1;
  30.125 -        goto out;
  30.126 -    }
  30.127 -
  30.128 -    if ( is_ide ) {
  30.129 -
  30.130 -	major_name = XLIDE_MAJOR_NAME; 
  30.131 -	max_part   = XLIDE_MAX_PART;
  30.132 -
  30.133 -    } else if ( is_scsi ) {
  30.134 -
  30.135 -	major_name = XLSCSI_MAJOR_NAME;
  30.136 -	max_part   = XLSCSI_MAX_PART;
  30.137 -
  30.138 -    } else if (XD_VIRTUAL(xd->info)) {
  30.139 -
  30.140 -	major_name = XLVBD_MAJOR_NAME;
  30.141 -	max_part   = XLVBD_MAX_PART;
  30.142 -
  30.143 -    } else { 
  30.144 -
  30.145 -        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
  30.146 -	printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
  30.147 -	       major, minor);
  30.148 -	is_scsi    = 1; 
  30.149 -	major_name = "cciss"; 
  30.150 -	max_part   = XLSCSI_MAX_PART;
  30.151 -
  30.152 -    }
  30.153 -    
  30.154 -    partno = minor & (max_part - 1); 
  30.155 -    
  30.156 -    if ( (gd = get_gendisk(device)) == NULL )
  30.157 -    {
  30.158 -        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
  30.159 -        if ( rc < 0 )
  30.160 -        {
  30.161 -            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
  30.162 -            goto out;
  30.163 -        }
  30.164 -
  30.165 -        if ( is_ide )
  30.166 -        { 
  30.167 -            blksize_size[major]  = xlide_blksize_size;
  30.168 -            hardsect_size[major] = xlide_hardsect_size;
  30.169 -            max_sectors[major]   = xlide_max_sectors;
  30.170 -            read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
  30.171 -        } 
  30.172 -        else if ( is_scsi )
  30.173 -        { 
  30.174 -            blksize_size[major]  = xlscsi_blksize_size;
  30.175 -            hardsect_size[major] = xlscsi_hardsect_size;
  30.176 -            max_sectors[major]   = xlscsi_max_sectors;
  30.177 -            read_ahead[major]    = 0; /* XXX 8; -- guessing */
  30.178 -        }
  30.179 -        else
  30.180 -        { 
  30.181 -            blksize_size[major]  = xlvbd_blksize_size;
  30.182 -            hardsect_size[major] = xlvbd_hardsect_size;
  30.183 -            max_sectors[major]   = xlvbd_max_sectors;
  30.184 -            read_ahead[major]    = 8;
  30.185 -        }
  30.186 -
  30.187 -        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
  30.188 -
  30.189 -        /*
  30.190 -         * Turn off barking 'headactive' mode. We dequeue buffer heads as
  30.191 -         * soon as we pass them down to Xen.
  30.192 -         */
  30.193 -        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
  30.194 -
  30.195 -        /* Construct an appropriate gendisk structure. */
  30.196 -        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
  30.197 -        gd->major      = major;
  30.198 -        gd->major_name = major_name; 
  30.199 -    
  30.200 -        gd->max_p      = max_part; 
  30.201 -        if ( is_ide )
  30.202 -        { 
  30.203 -            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
  30.204 -            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
  30.205 -        } 
  30.206 -        else if ( is_scsi )
  30.207 -        { 
  30.208 -            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
  30.209 -            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
  30.210 -        }
  30.211 -        else
  30.212 -        { 
  30.213 -            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
  30.214 -            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
  30.215 -        }
  30.216 -
  30.217 -        /* 
  30.218 -        ** The sizes[] and part[] arrays hold the sizes and other 
  30.219 -        ** information about every partition with this 'major' (i.e. 
  30.220 -        ** every disk sharing the 8 bit prefix * max partns per disk) 
  30.221 -        */
  30.222 -        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
  30.223 -        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
  30.224 -                            GFP_KERNEL);
  30.225 -        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
  30.226 -        memset(gd->part,  0, max_part * gd->nr_real 
  30.227 -               * sizeof(struct hd_struct));
  30.228 -
  30.229 -
  30.230 -        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
  30.231 -                                   GFP_KERNEL);
  30.232 -        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
  30.233 -
  30.234 -        gd->next   = NULL;            
  30.235 -        gd->fops   = &xlvbd_block_fops;
  30.236 -
  30.237 -        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
  30.238 -                             GFP_KERNEL);
  30.239 -        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
  30.240 -    
  30.241 -        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
  30.242 -        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
  30.243 -
  30.244 -        add_gendisk(gd);
  30.245 -
  30.246 -        blk_size[major] = gd->sizes;
  30.247 -    }
  30.248 -
  30.249 -    if ( XD_READONLY(xd->info) )
  30.250 -        set_device_ro(device, 1); 
  30.251 -
  30.252 -    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
  30.253 -
  30.254 -    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
  30.255 -    capacity = (unsigned long)xd->capacity;
  30.256 -
  30.257 -    if ( partno != 0 )
  30.258 -    {
  30.259 -        /*
  30.260 -         * If this was previously set up as a real disc we will have set 
  30.261 -         * up partition-table information. Virtual partitions override 
  30.262 -         * 'real' partitions, and the two cannot coexist on a device.
  30.263 -         */
  30.264 -        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  30.265 -             (gd->sizes[minor & ~(max_part-1)] != 0) )
  30.266 -        {
  30.267 -            /*
  30.268 -             * Any non-zero sub-partition entries must be cleaned out before
  30.269 -             * installing 'virtual' partition entries. The two types cannot
  30.270 -             * coexist, and virtual partitions are favoured.
  30.271 -             */
  30.272 -            kdev_t dev = device & ~(max_part-1);
  30.273 -            for ( i = max_part - 1; i > 0; i-- )
  30.274 -            {
  30.275 -                invalidate_device(dev+i, 1);
  30.276 -                gd->part[MINOR(dev+i)].start_sect = 0;
  30.277 -                gd->part[MINOR(dev+i)].nr_sects   = 0;
  30.278 -                gd->sizes[MINOR(dev+i)]           = 0;
  30.279 -            }
  30.280 -            printk(KERN_ALERT
  30.281 -                   "Virtual partitions found for /dev/%s - ignoring any "
  30.282 -                   "real partition information we may have found.\n",
  30.283 -                   disk_name(gd, MINOR(device), buf));
  30.284 -        }
  30.285 -
  30.286 -        /* Need to skankily setup 'partition' information */
  30.287 -        gd->part[minor].start_sect = 0; 
  30.288 -        gd->part[minor].nr_sects   = capacity; 
  30.289 -        gd->sizes[minor]           = capacity; 
  30.290 -
  30.291 -        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  30.292 -    }
  30.293 -    else
  30.294 -    {
  30.295 -        gd->part[minor].nr_sects = capacity;
  30.296 -        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
  30.297 -        
  30.298 -        /* Some final fix-ups depending on the device type */
  30.299 -        switch ( XD_TYPE(xd->info) )
  30.300 -        { 
  30.301 -        case XD_TYPE_CDROM:
  30.302 -        case XD_TYPE_FLOPPY: 
  30.303 -        case XD_TYPE_TAPE:
  30.304 -            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
  30.305 -            printk(KERN_ALERT 
  30.306 -                   "Skipping partition check on %s /dev/%s\n", 
  30.307 -                   XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : 
  30.308 -                   (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : 
  30.309 -                    "floppy"), disk_name(gd, MINOR(device), buf)); 
  30.310 -            break; 
  30.311 -
  30.312 -        case XD_TYPE_DISK:
  30.313 -            /* Only check partitions on real discs (not virtual!). */
  30.314 -            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  30.315 -            {
  30.316 -                printk(KERN_ALERT
  30.317 -                       "Skipping partition check on virtual /dev/%s\n",
  30.318 -                       disk_name(gd, MINOR(device), buf));
  30.319 -                break;
  30.320 -            }
  30.321 -            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
  30.322 -            break; 
  30.323 -
  30.324 -        default:
  30.325 -            printk(KERN_ALERT "XenoLinux: unknown device type %d\n", 
  30.326 -                   XD_TYPE(xd->info)); 
  30.327 -            break; 
  30.328 -        }
  30.329 -    }
  30.330 -
  30.331 - out:
  30.332 -    up(&bd->bd_sem);
  30.333 -    bdput(bd);    
  30.334 -    return rc;
  30.335 -}
  30.336 -
  30.337 -
  30.338 -/*
  30.339 - * xlvbd_remove_device - remove a device node if possible
  30.340 - * @device:       numeric device ID
  30.341 - *
  30.342 - * Updates the gendisk structure and invalidates devices.
  30.343 - *
  30.344 - * This is OK for now but in future, should perhaps consider where this should
  30.345 - * deallocate gendisks / unregister devices.
  30.346 - */
  30.347 -static int xlvbd_remove_device(int device)
  30.348 -{
  30.349 -    int i, rc = 0, minor = MINOR(device);
  30.350 -    struct gendisk *gd;
  30.351 -    struct block_device *bd;
  30.352 -    xl_disk_t *disk = NULL;
  30.353 -
  30.354 -    if ( (bd = bdget(device)) == NULL )
  30.355 -        return -1;
  30.356 -
  30.357 -    /*
  30.358 -     * Update of partition info, and check of usage count, is protected
  30.359 -     * by the per-block-device semaphore.
  30.360 -     */
  30.361 -    down(&bd->bd_sem);
  30.362 -
  30.363 -    if ( ((gd = get_gendisk(device)) == NULL) ||
  30.364 -         ((disk = xldev_to_xldisk(device)) == NULL) )
  30.365 -        BUG();
  30.366 -
  30.367 -    if ( disk->usage != 0 )
  30.368 -    {
  30.369 -        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
  30.370 -        rc = -1;
  30.371 -        goto out;
  30.372 -    }
  30.373 - 
  30.374 -    if ( (minor & (gd->max_p-1)) != 0 )
  30.375 -    {
  30.376 -        /* 1: The VBD is mapped to a partition rather than a whole unit. */
  30.377 -        invalidate_device(device, 1);
  30.378 -	gd->part[minor].start_sect = 0;
  30.379 -        gd->part[minor].nr_sects   = 0;
  30.380 -        gd->sizes[minor]           = 0;
  30.381 -
  30.382 -        /* Clear the consists-of-virtual-partitions flag if possible. */
  30.383 -        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
  30.384 -        for ( i = 1; i < gd->max_p; i++ )
  30.385 -            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
  30.386 -                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  30.387 -
  30.388 -        /*
  30.389 -         * If all virtual partitions are now gone, and a 'whole unit' VBD is
  30.390 -         * present, then we can try to grok the unit's real partition table.
  30.391 -         */
  30.392 -        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  30.393 -             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
  30.394 -             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
  30.395 -        {
  30.396 -            register_disk(gd,
  30.397 -                          device&~(gd->max_p-1), 
  30.398 -                          gd->max_p, 
  30.399 -                          &xlvbd_block_fops,
  30.400 -                          gd->part[minor&~(gd->max_p-1)].nr_sects);
  30.401 -        }
  30.402 -    }
  30.403 -    else
  30.404 -    {
  30.405 -        /*
  30.406 -         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
  30.407 -         * NB. The partition entries are only cleared if there are no VBDs
  30.408 -         * mapped to individual partitions on this unit.
  30.409 -         */
  30.410 -        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
  30.411 -        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  30.412 -            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
  30.413 -        while ( i >= 0 )
  30.414 -        {
  30.415 -            invalidate_device(device+i, 1);
  30.416 -            gd->part[minor+i].start_sect = 0;
  30.417 -            gd->part[minor+i].nr_sects   = 0;
  30.418 -            gd->sizes[minor+i]           = 0;
  30.419 -            i--;
  30.420 -        }
  30.421 -    }
  30.422 -
  30.423 - out:
  30.424 -    up(&bd->bd_sem);
  30.425 -    bdput(bd);
  30.426 -    return rc;
  30.427 -}
  30.428 -
  30.429 -/*
  30.430 - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
  30.431 - * state. The VBDs need to be updated in this way when the domain is
  30.432 - * initialised and also each time we receive an XLBLK_UPDATE event.
  30.433 - */
  30.434 -void xlvbd_update_vbds(void)
  30.435 -{
  30.436 -    int i, j, k, old_nr, new_nr;
  30.437 -    xen_disk_t *old_info, *new_info, *merged_info;
  30.438 -
  30.439 -    old_info = vbd_info;
  30.440 -    old_nr   = nr_vbds;
  30.441 -
  30.442 -    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  30.443 -    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  30.444 -    {
  30.445 -        kfree(new_info);
  30.446 -        return;
  30.447 -    }
  30.448 -
  30.449 -    /*
  30.450 -     * Final list maximum size is old list + new list. This occurs only when
  30.451 -     * old list and new list do not overlap at all, and we cannot yet destroy
  30.452 -     * VBDs in the old list because the usage counts are busy.
  30.453 -     */
  30.454 -    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
  30.455 -
  30.456 -    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  30.457 -    i = j = k = 0;
  30.458 -
  30.459 -    while ( (i < old_nr) && (j < new_nr) )
  30.460 -    {
  30.461 -        if ( old_info[i].device < new_info[j].device )
  30.462 -        {
  30.463 -            if ( xlvbd_remove_device(old_info[i].device) != 0 )
  30.464 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  30.465 -            i++;
  30.466 -        }
  30.467 -        else if ( old_info[i].device > new_info[j].device )
  30.468 -        {
  30.469 -            if ( xlvbd_init_device(&new_info[j]) == 0 )
  30.470 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  30.471 -            j++;
  30.472 -        }
  30.473 -        else
  30.474 -        {
  30.475 -            if ( ((old_info[i].capacity == new_info[j].capacity) &&
  30.476 -                  (old_info[i].info == new_info[j].info)) ||
  30.477 -                 (xlvbd_remove_device(old_info[i].device) != 0) )
  30.478 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  30.479 -            else if ( xlvbd_init_device(&new_info[j]) == 0 )
  30.480 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  30.481 -            i++; j++;
  30.482 -        }
  30.483 -    }
  30.484 -
  30.485 -    for ( ; i < old_nr; i++ )
  30.486 -    {
  30.487 -        if ( xlvbd_remove_device(old_info[i].device) != 0 )
  30.488 -            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  30.489 -    }
  30.490 -
  30.491 -    for ( ; j < new_nr; j++ )
  30.492 -    {
  30.493 -        if ( xlvbd_init_device(&new_info[j]) == 0 )
  30.494 -            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  30.495 -    }
  30.496 -
  30.497 -    vbd_info = merged_info;
  30.498 -    nr_vbds  = k;
  30.499 -
  30.500 -    kfree(old_info);
  30.501 -    kfree(new_info);
  30.502 -}
  30.503 -
  30.504 -
  30.505 -/*
  30.506 - * Set up all the linux device goop for the virtual block devices (vbd's) that 
  30.507 - * xen tells us about. Note that although from xen's pov VBDs are addressed 
  30.508 - * simply an opaque 16-bit device number, the domain creation tools 
  30.509 - * conventionally allocate these numbers to correspond to those used by 'real' 
  30.510 - * linux -- this is just for convenience as it means e.g. that the same 
  30.511 - * /etc/fstab can be used when booting with or without xen.
  30.512 - */
  30.513 -int __init xlvbd_init(void)
  30.514 -{
  30.515 -    int i;
  30.516 -    
  30.517 -    /*
  30.518 -     * If compiled as a module, we don't support unloading yet. We therefore 
  30.519 -     * permanently increment the reference count to disallow it.
  30.520 -     */
  30.521 -    SET_MODULE_OWNER(&xlvbd_block_fops);
  30.522 -    MOD_INC_USE_COUNT;
  30.523 -
  30.524 -    /* Initialize the global arrays. */
  30.525 -    for ( i = 0; i < 256; i++ ) 
  30.526 -    {
  30.527 -        /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
  30.528 -        xlide_blksize_size[i]  = 1024;
  30.529 -        xlide_hardsect_size[i] = 512;
  30.530 -        xlide_max_sectors[i]   = 128;  /* 'hwif->rqsize' if we knew it */
  30.531 -
  30.532 -        /* from the generic scsi disk code (drivers/scsi/sd.c) */
  30.533 -        xlscsi_blksize_size[i]  = 1024; /* XXX 512; */
  30.534 -        xlscsi_hardsect_size[i] = 512;
  30.535 -        xlscsi_max_sectors[i]   = 128*8; /* XXX 128; */
  30.536 -
  30.537 -        /* we don't really know what to set these too since it depends */
  30.538 -        xlvbd_blksize_size[i]  = 512;
  30.539 -        xlvbd_hardsect_size[i] = 512;
  30.540 -        xlvbd_max_sectors[i]   = 128;
  30.541 -    }
  30.542 -
  30.543 -    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  30.544 -    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  30.545 -
  30.546 -    if ( nr_vbds < 0 )
  30.547 -    {
  30.548 -        kfree(vbd_info);
  30.549 -        vbd_info = NULL;
  30.550 -        nr_vbds  = 0;
  30.551 -    }
  30.552 -    else
  30.553 -    {
  30.554 -        for ( i = 0; i < nr_vbds; i++ )
  30.555 -            xlvbd_init_device(&vbd_info[i]);
  30.556 -    }
  30.557 -
  30.558 -    return 0;
  30.559 -}
  30.560 -
  30.561 -
  30.562 -#ifdef MODULE
  30.563 -module_init(xlvbd_init);
  30.564 -#endif
    31.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vblkif.h	Sat Apr 24 08:10:58 2004 +0000
    31.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.3 @@ -1,109 +0,0 @@
    31.4 -/******************************************************************************
    31.5 - * vblkif.h
    31.6 - * 
    31.7 - * Unified block-device I/O interface for Xen guest OSes.
    31.8 - * 
    31.9 - * Copyright (c) 2003-2004, Keir Fraser
   31.10 - */
   31.11 -
   31.12 -#ifndef __SHARED_VBLKIF_H__
   31.13 -#define __SHARED_VBLKIF_H__
   31.14 -
   31.15 -#define blkif_vdev_t   u16
   31.16 -#define blkif_sector_t u64
   31.17 -
   31.18 -#define BLKIF_OP_READ      0
   31.19 -#define BLKIF_OP_WRITE     1
   31.20 -#define BLKIF_OP_PROBE     2
   31.21 -
   31.22 -/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */
   31.23 -#define BLKIF_RING_SIZE        64
   31.24 -
   31.25 -/*
   31.26 - * Maximum scatter/gather segments per request.
   31.27 - * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE.
   31.28 - * NB. This could be 12 if the ring indexes weren't stored in the same page.
   31.29 - */
   31.30 -#define BLKIF_REQUEST_MAX_SEGMENTS 11
   31.31 -
   31.32 -typedef struct {
   31.33 -    unsigned char  operation;        /* BLKIF_OP_???                         */
   31.34 -    unsigned char  nr_segments;      /* number of segments (<= MAX_BLK_SEGS) */
   31.35 -    blkif_vdev_t   device;           /* only for read/write requests         */
   31.36 -    unsigned long  id;               /* private guest value, echoed in resp  */
   31.37 -    xen_sector_t   sector_number;    /* start sector idx on disk (r/w only)  */
   31.38 -    /* Least 9 bits is 'nr_sects'. High 23 bits is the address.      */
   31.39 -    unsigned long  buffer_and_sects[MAX_BLK_SEGS];
   31.40 -} blkif_request_t;
   31.41 -
   31.42 -typedef struct {
   31.43 -    unsigned long   id;              /* copied from request */
   31.44 -    unsigned char   operation;       /* copied from request */
   31.45 -    int             status;          /* BLKIF_RSP_???       */
   31.46 -} blkif_response_t;
   31.47 -
   31.48 -#define BLKIF_RSP_ERROR  -1 /* non-specific 'error' */
   31.49 -#define BLKIF_RSP_OKAY    0 /* non-specific 'okay'  */
   31.50 -
   31.51 -/*
   31.52 - * We use a special capitalised type name because it is _essential_ that all 
   31.53 - * arithmetic on indexes is done on an integer type of the correct size.
   31.54 - */
   31.55 -typedef unsigned int BLKIF_RING_IDX;
   31.56 -
   31.57 -/*
   31.58 - * Ring indexes are 'free running'. That is, they are not stored modulo the
   31.59 - * size of the ring buffer. The following macro converts a free-running counter
   31.60 - * into a value that can directly index a ring-buffer array.
   31.61 - */
   31.62 -#define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1))
   31.63 -
   31.64 -typedef struct {
   31.65 -    BLKIF_RING_IDX req_prod;  /* Request producer. Updated by guest OS. */
   31.66 -    BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen.     */
   31.67 -    union {
   31.68 -        blkif_request_t  req;
   31.69 -        blkif_response_t resp;
   31.70 -    } ring[BLKIF_RING_SIZE];
   31.71 -} blkif_ring_t;
   31.72 -
   31.73 -
   31.74 -/*
   31.75 - * BLKIF_OP_PROBE:
   31.76 - * The request format for a probe request is constrained as follows:
   31.77 - *  @operation   == BLKIF_OP_PROBE
   31.78 - *  @nr_segments == size of probe buffer in pages
   31.79 - *  @device      == unused (zero)
   31.80 - *  @id          == any value (echoed in response message)
   31.81 - *  @sector_num  == unused (zero)
   31.82 - *  @buffer_and_sects == list of page-aligned, page-sized buffers.
   31.83 - *                       (i.e., nr_sects == 8).
   31.84 - * 
   31.85 - * The response is a list of vdisk_t elements copied into the out-of-band
   31.86 - * probe buffer. On success the response status field contains the number
   31.87 - * of vdisk_t elements.
   31.88 - */
   31.89 -
   31.90 -/* XXX SMH: Type values below are chosen to match ide_xxx in Linux ide.h. */
   31.91 -#define VDISK_TYPE_FLOPPY  0x00
   31.92 -#define VDISK_TYPE_TAPE    0x01
   31.93 -#define VDISK_TYPE_CDROM   0x05
   31.94 -#define VDISK_TYPE_OPTICAL 0x07
   31.95 -#define VDISK_TYPE_DISK    0x20 
   31.96 -
   31.97 -#define VDISK_TYPE_MASK    0x3F
   31.98 -#define VDISK_TYPE(_x)     ((_x) & VDISK_TYPE_MASK) 
   31.99 -
  31.100 -/* The top two bits of the type field encode various flags. */
  31.101 -#define VDISK_FLAG_RO      0x40
  31.102 -#define VDISK_FLAG_VIRT    0x80
  31.103 -#define VDISK_READONLY(_x) ((_x) & VDISK_FLAG_RO)
  31.104 -#define VDISK_VIRTUAL(_x)  ((_x) & VDISK_FLAG_VIRT) 
  31.105 -
  31.106 -typedef struct {
  31.107 -    blkif_vdev_t   device;       /* Device number (opaque 16 bit value). */
  31.108 -    unsigned short info;         /* Device type and flags (VDISK_*).     */
  31.109 -    xen_sector_t   capacity;     /* Size in terms of 512-byte sectors.   */
  31.110 -} vdisk_t;
  31.111 -
  31.112 -#endif /* __SHARED_VBLKIF_H__ */
    32.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile	Sat Apr 24 08:10:58 2004 +0000
    32.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.3 @@ -1,10 +0,0 @@
    32.4 -
    32.5 -O_TARGET := drv.o
    32.6 -
    32.7 -subdir-y += frontend
    32.8 -obj-y    += frontend/drv.o
    32.9 -
   32.10 -subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
   32.11 -obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
   32.12 -
   32.13 -include $(TOPDIR)/Rules.make
    33.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile	Sat Apr 24 08:10:58 2004 +0000
    33.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.3 @@ -1,3 +0,0 @@
    33.4 -O_TARGET := drv.o
    33.5 -obj-y := main.o
    33.6 -include $(TOPDIR)/Rules.make
    34.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c	Sat Apr 24 08:10:58 2004 +0000
    34.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.3 @@ -1,26 +0,0 @@
    34.4 -/******************************************************************************
    34.5 - * arch/xen/drivers/vnetif/backend/main.c
    34.6 - * 
    34.7 - * Back-end of the driver for virtual block devices. This portion of the
    34.8 - * driver exports a 'unified' block-device interface that can be accessed
    34.9 - * by any operating system that implements a compatible front end. A 
   34.10 - * reference front-end implementation can be found in:
   34.11 - *  arch/xen/drivers/vnetif/frontend
   34.12 - * 
   34.13 - * Copyright (c) 2004, K A Fraser
   34.14 - */
   34.15 -
   34.16 -#include <linux/config.h>
   34.17 -#include <linux/module.h>
   34.18 -
   34.19 -static int __init init_module(void)
   34.20 -{
   34.21 -    return 0;
   34.22 -}
   34.23 -
   34.24 -static void cleanup_module(void)
   34.25 -{
   34.26 -}
   34.27 -
   34.28 -module_init(init_module);
   34.29 -module_exit(cleanup_module);
    35.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile	Sat Apr 24 08:10:58 2004 +0000
    35.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.3 @@ -1,3 +0,0 @@
    35.4 -O_TARGET := drv.o
    35.5 -obj-y := vnetif.o
    35.6 -include $(TOPDIR)/Rules.make
    36.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c	Sat Apr 24 08:10:58 2004 +0000
    36.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.3 @@ -1,565 +0,0 @@
    36.4 -/******************************************************************************
    36.5 - * vnetif.c
    36.6 - * 
    36.7 - * Virtual network driver for XenoLinux.
    36.8 - * 
    36.9 - * Copyright (c) 2002-2004, K A Fraser
   36.10 - */
   36.11 -
   36.12 -#include <linux/config.h>
   36.13 -#include <linux/module.h>
   36.14 -
   36.15 -#include <linux/kernel.h>
   36.16 -#include <linux/sched.h>
   36.17 -#include <linux/slab.h>
   36.18 -#include <linux/string.h>
   36.19 -#include <linux/errno.h>
   36.20 -
   36.21 -#include <linux/netdevice.h>
   36.22 -#include <linux/inetdevice.h>
   36.23 -#include <linux/etherdevice.h>
   36.24 -#include <linux/skbuff.h>
   36.25 -#include <linux/init.h>
   36.26 -
   36.27 -#include <asm/io.h>
   36.28 -#include <net/sock.h>
   36.29 -#include <net/pkt_sched.h>
   36.30 -
   36.31 -#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
   36.32 -
   36.33 -static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
   36.34 -static void network_tx_buf_gc(struct net_device *dev);
   36.35 -static void network_alloc_rx_buffers(struct net_device *dev);
   36.36 -static void cleanup_module(void);
   36.37 -
   36.38 -/* Dynamically-mapped IRQs. */
   36.39 -static int network_irq, debug_irq;
   36.40 -
   36.41 -static struct list_head dev_list;
   36.42 -
   36.43 -struct net_private
   36.44 -{
   36.45 -    struct list_head list;
   36.46 -    struct net_device *dev;
   36.47 -
   36.48 -    struct net_device_stats stats;
   36.49 -    NET_RING_IDX rx_resp_cons, tx_resp_cons;
   36.50 -    unsigned int net_ring_fixmap_idx, tx_full;
   36.51 -    net_ring_t  *net_ring;
   36.52 -    net_idx_t   *net_idx;
   36.53 -    spinlock_t   tx_lock;
   36.54 -    unsigned int idx; /* Domain-specific index of this VIF. */
   36.55 -
   36.56 -    unsigned int rx_bufs_to_notify;
   36.57 -
   36.58 -#define STATE_ACTIVE    0
   36.59 -#define STATE_SUSPENDED 1
   36.60 -#define STATE_CLOSED    2
   36.61 -    unsigned int state;
   36.62 -
   36.63 -    /*
   36.64 -     * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
   36.65 -     * array is an index into a chain of free entries.
   36.66 -     */
   36.67 -    struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1];
   36.68 -    struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1];
   36.69 -};
   36.70 -
   36.71 -/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
   36.72 -#define ADD_ID_TO_FREELIST(_list, _id)             \
   36.73 -    (_list)[(_id)] = (_list)[0];                   \
   36.74 -    (_list)[0]     = (void *)(unsigned long)(_id);
   36.75 -#define GET_ID_FROM_FREELIST(_list)                \
   36.76 - ({ unsigned long _id = (unsigned long)(_list)[0]; \
   36.77 -    (_list)[0]  = (_list)[_id];                    \
   36.78 -    (unsigned short)_id; })
   36.79 -
   36.80 -
   36.81 -static void _dbg_network_int(struct net_device *dev)
   36.82 -{
   36.83 -    struct net_private *np = dev->priv;
   36.84 -
   36.85 -    if ( np->state == STATE_CLOSED )
   36.86 -        return;
   36.87 -    
   36.88 -    printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
   36.89 -           " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
   36.90 -           " tx_event=0x%08x, state=%d\n",
   36.91 -           np->tx_full, np->tx_resp_cons, 
   36.92 -           np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, 
   36.93 -           np->net_idx->tx_event,
   36.94 -           test_bit(__LINK_STATE_XOFF, &dev->state));
   36.95 -    printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
   36.96 -           " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
   36.97 -           np->rx_resp_cons, np->net_idx->rx_req_prod,
   36.98 -           np->net_idx->rx_resp_prod, np->net_idx->rx_event);
   36.99 -}
  36.100 -
  36.101 -
  36.102 -static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
  36.103 -{
  36.104 -    struct list_head *ent;
  36.105 -    struct net_private *np;
  36.106 -    list_for_each ( ent, &dev_list )
  36.107 -    {
  36.108 -        np = list_entry(ent, struct net_private, list);
  36.109 -        _dbg_network_int(np->dev);
  36.110 -    }
  36.111 -}
  36.112 -
  36.113 -
  36.114 -static int network_open(struct net_device *dev)
  36.115 -{
  36.116 -    struct net_private *np = dev->priv;
  36.117 -    netop_t netop;
  36.118 -    int i, ret;
  36.119 -
  36.120 -    netop.cmd = NETOP_RESET_RINGS;
  36.121 -    netop.vif = np->idx;
  36.122 -    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  36.123 -    {
  36.124 -        printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
  36.125 -        return ret;
  36.126 -    }
  36.127 -
  36.128 -    netop.cmd = NETOP_GET_VIF_INFO;
  36.129 -    netop.vif = np->idx;
  36.130 -    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  36.131 -    {
  36.132 -        printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
  36.133 -        return ret;
  36.134 -    }
  36.135 -
  36.136 -    memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  36.137 -
  36.138 -    set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, 
  36.139 -               netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
  36.140 -    np->net_ring = (net_ring_t *)fix_to_virt(
  36.141 -        FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  36.142 -    np->net_idx  = &HYPERVISOR_shared_info->net_idx[np->idx];
  36.143 -
  36.144 -    np->rx_bufs_to_notify = 0;
  36.145 -    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
  36.146 -    memset(&np->stats, 0, sizeof(np->stats));
  36.147 -    spin_lock_init(&np->tx_lock);
  36.148 -    memset(np->net_ring, 0, sizeof(*np->net_ring));
  36.149 -    memset(np->net_idx, 0, sizeof(*np->net_idx));
  36.150 -
  36.151 -    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
  36.152 -    for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ )
  36.153 -        np->tx_skbs[i] = (void *)(i+1);
  36.154 -    for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ )
  36.155 -        np->rx_skbs[i] = (void *)(i+1);
  36.156 -
  36.157 -    wmb();
  36.158 -    np->state = STATE_ACTIVE;
  36.159 -
  36.160 -    network_alloc_rx_buffers(dev);
  36.161 -
  36.162 -    netif_start_queue(dev);
  36.163 -
  36.164 -    MOD_INC_USE_COUNT;
  36.165 -
  36.166 -    return 0;
  36.167 -}
  36.168 -
  36.169 -
  36.170 -static void network_tx_buf_gc(struct net_device *dev)
  36.171 -{
  36.172 -    NET_RING_IDX i, prod;
  36.173 -    unsigned short id;
  36.174 -    struct net_private *np = dev->priv;
  36.175 -    struct sk_buff *skb;
  36.176 -    tx_entry_t *tx_ring = np->net_ring->tx_ring;
  36.177 -
  36.178 -    do {
  36.179 -        prod = np->net_idx->tx_resp_prod;
  36.180 -
  36.181 -        for ( i = np->tx_resp_cons; i != prod; i++ )
  36.182 -        {
  36.183 -            id  = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
  36.184 -            skb = np->tx_skbs[id];
  36.185 -            ADD_ID_TO_FREELIST(np->tx_skbs, id);
  36.186 -            dev_kfree_skb_any(skb);
  36.187 -        }
  36.188 -        
  36.189 -        np->tx_resp_cons = prod;
  36.190 -        
  36.191 -        /*
  36.192 -         * Set a new event, then check for race with update of tx_cons. Note
  36.193 -         * that it is essential to schedule a callback, no matter how few
  36.194 -         * buffers are pending. Even if there is space in the transmit ring,
  36.195 -         * higher layers may be blocked because too much data is outstanding:
  36.196 -         * in such cases notification from Xen is likely to be the only kick
  36.197 -         * that we'll get.
  36.198 -         */
  36.199 -        np->net_idx->tx_event = 
  36.200 -            prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
  36.201 -        mb();
  36.202 -    }
  36.203 -    while ( prod != np->net_idx->tx_resp_prod );
  36.204 -
  36.205 -    if ( np->tx_full && 
  36.206 -         ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) )
  36.207 -    {
  36.208 -        np->tx_full = 0;
  36.209 -        if ( np->state == STATE_ACTIVE )
  36.210 -            netif_wake_queue(dev);
  36.211 -    }
  36.212 -}
  36.213 -
  36.214 -
  36.215 -static inline pte_t *get_ppte(void *addr)
  36.216 -{
  36.217 -    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
  36.218 -    pgd = pgd_offset_k(   (unsigned long)addr);
  36.219 -    pmd = pmd_offset(pgd, (unsigned long)addr);
  36.220 -    pte = pte_offset(pmd, (unsigned long)addr);
  36.221 -    return pte;
  36.222 -}
  36.223 -
  36.224 -
  36.225 -static void network_alloc_rx_buffers(struct net_device *dev)
  36.226 -{
  36.227 -    unsigned short id;
  36.228 -    struct net_private *np = dev->priv;
  36.229 -    struct sk_buff *skb;
  36.230 -    netop_t netop;
  36.231 -    NET_RING_IDX i = np->net_idx->rx_req_prod;
  36.232 -
  36.233 -    if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || 
  36.234 -         unlikely(np->state != STATE_ACTIVE) )
  36.235 -        return;
  36.236 -
  36.237 -    do {
  36.238 -        skb = dev_alloc_skb(RX_BUF_SIZE);
  36.239 -        if ( unlikely(skb == NULL) )
  36.240 -            break;
  36.241 -
  36.242 -        skb->dev = dev;
  36.243 -
  36.244 -        if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
  36.245 -            panic("alloc_skb needs to provide us page-aligned buffers.");
  36.246 -
  36.247 -        id = GET_ID_FROM_FREELIST(np->rx_skbs);
  36.248 -        np->rx_skbs[id] = skb;
  36.249 -
  36.250 -        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id   = id;
  36.251 -        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
  36.252 -            virt_to_machine(get_ppte(skb->head));
  36.253 -
  36.254 -        np->rx_bufs_to_notify++;
  36.255 -    }
  36.256 -    while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
  36.257 -
  36.258 -    /*
  36.259 -     * We may have allocated buffers which have entries outstanding in the page
  36.260 -     * update queue -- make sure we flush those first!
  36.261 -     */
  36.262 -    flush_page_update_queue();
  36.263 -
  36.264 -    np->net_idx->rx_req_prod = i;
  36.265 -    np->net_idx->rx_event    = np->rx_resp_cons + 1;
  36.266 -        
  36.267 -    /* Batch Xen notifications. */
  36.268 -    if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) )
  36.269 -    {
  36.270 -        netop.cmd = NETOP_PUSH_BUFFERS;
  36.271 -        netop.vif = np->idx;
  36.272 -        (void)HYPERVISOR_net_io_op(&netop);
  36.273 -        np->rx_bufs_to_notify = 0;
  36.274 -    }
  36.275 -}
  36.276 -
  36.277 -
  36.278 -static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
  36.279 -{
  36.280 -    unsigned short id;
  36.281 -    struct net_private *np = (struct net_private *)dev->priv;
  36.282 -    tx_req_entry_t *tx;
  36.283 -    netop_t netop;
  36.284 -    NET_RING_IDX i;
  36.285 -
  36.286 -    if ( unlikely(np->tx_full) )
  36.287 -    {
  36.288 -        printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
  36.289 -        netif_stop_queue(dev);
  36.290 -        return -ENOBUFS;
  36.291 -    }
  36.292 -
  36.293 -    if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
  36.294 -                  PAGE_SIZE) )
  36.295 -    {
  36.296 -        struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
  36.297 -        if ( unlikely(new_skb == NULL) )
  36.298 -            return 1;
  36.299 -        skb_put(new_skb, skb->len);
  36.300 -        memcpy(new_skb->data, skb->data, skb->len);
  36.301 -        dev_kfree_skb(skb);
  36.302 -        skb = new_skb;
  36.303 -    }   
  36.304 -    
  36.305 -    spin_lock_irq(&np->tx_lock);
  36.306 -
  36.307 -    i = np->net_idx->tx_req_prod;
  36.308 -
  36.309 -    id = GET_ID_FROM_FREELIST(np->tx_skbs);
  36.310 -    np->tx_skbs[id] = skb;
  36.311 -
  36.312 -    tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
  36.313 -
  36.314 -    tx->id   = id;
  36.315 -    tx->addr = phys_to_machine(virt_to_phys(skb->data));
  36.316 -    tx->size = skb->len;
  36.317 -
  36.318 -    wmb();
  36.319 -    np->net_idx->tx_req_prod = i + 1;
  36.320 -
  36.321 -    network_tx_buf_gc(dev);
  36.322 -
  36.323 -    if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) )
  36.324 -    {
  36.325 -        np->tx_full = 1;
  36.326 -        netif_stop_queue(dev);
  36.327 -    }
  36.328 -
  36.329 -    spin_unlock_irq(&np->tx_lock);
  36.330 -
  36.331 -    np->stats.tx_bytes += skb->len;
  36.332 -    np->stats.tx_packets++;
  36.333 -
  36.334 -    /* Only notify Xen if there are no outstanding responses. */
  36.335 -    mb();
  36.336 -    if ( np->net_idx->tx_resp_prod == i )
  36.337 -    {
  36.338 -        netop.cmd = NETOP_PUSH_BUFFERS;
  36.339 -        netop.vif = np->idx;
  36.340 -        (void)HYPERVISOR_net_io_op(&netop);
  36.341 -    }
  36.342 -
  36.343 -    return 0;
  36.344 -}
  36.345 -
  36.346 -
  36.347 -static inline void _network_interrupt(struct net_device *dev)
  36.348 -{
  36.349 -    struct net_private *np = dev->priv;
  36.350 -    unsigned long flags;
  36.351 -    struct sk_buff *skb;
  36.352 -    rx_resp_entry_t *rx;
  36.353 -    NET_RING_IDX i;
  36.354 -
  36.355 -    if ( unlikely(np->state == STATE_CLOSED) )
  36.356 -        return;
  36.357 -    
  36.358 -    spin_lock_irqsave(&np->tx_lock, flags);
  36.359 -    network_tx_buf_gc(dev);
  36.360 -    spin_unlock_irqrestore(&np->tx_lock, flags);
  36.361 -
  36.362 - again:
  36.363 -    for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
  36.364 -    {
  36.365 -        rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
  36.366 -
  36.367 -        skb = np->rx_skbs[rx->id];
  36.368 -        ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
  36.369 -
  36.370 -        if ( unlikely(rx->status != RING_STATUS_OK) )
  36.371 -        {
  36.372 -            /* Gate this error. We get a (valid) slew of them on suspend. */
  36.373 -            if ( np->state == STATE_ACTIVE )
  36.374 -                printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
  36.375 -            dev_kfree_skb_any(skb);
  36.376 -            continue;
  36.377 -        }
  36.378 -
  36.379 -        /*
  36.380 -         * Set up shinfo -- from alloc_skb This was particularily nasty:  the
  36.381 -         * shared info is hidden at the back of the data area (presumably so it
  36.382 -         * can be shared), but on page flip it gets very spunked.
  36.383 -         */
  36.384 -        atomic_set(&(skb_shinfo(skb)->dataref), 1);
  36.385 -        skb_shinfo(skb)->nr_frags = 0;
  36.386 -        skb_shinfo(skb)->frag_list = NULL;
  36.387 -                                
  36.388 -        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
  36.389 -            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
  36.390 -
  36.391 -        skb->data = skb->tail = skb->head + rx->offset;
  36.392 -        skb_put(skb, rx->size);
  36.393 -        skb->protocol = eth_type_trans(skb, dev);
  36.394 -
  36.395 -        np->stats.rx_packets++;
  36.396 -
  36.397 -        np->stats.rx_bytes += rx->size;
  36.398 -        netif_rx(skb);
  36.399 -        dev->last_rx = jiffies;
  36.400 -    }
  36.401 -
  36.402 -    np->rx_resp_cons = i;
  36.403 -
  36.404 -    network_alloc_rx_buffers(dev);
  36.405 -    
  36.406 -    /* Deal with hypervisor racing our resetting of rx_event. */
  36.407 -    mb();
  36.408 -    if ( np->net_idx->rx_resp_prod != i )
  36.409 -        goto again;
  36.410 -}
  36.411 -
  36.412 -
  36.413 -static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
  36.414 -{
  36.415 -    struct list_head *ent;
  36.416 -    struct net_private *np;
  36.417 -    list_for_each ( ent, &dev_list )
  36.418 -    {
  36.419 -        np = list_entry(ent, struct net_private, list);
  36.420 -        _network_interrupt(np->dev);
  36.421 -    }
  36.422 -}
  36.423 -
  36.424 -
  36.425 -static int network_close(struct net_device *dev)
  36.426 -{
  36.427 -    struct net_private *np = dev->priv;
  36.428 -    netop_t netop;
  36.429 -
  36.430 -    np->state = STATE_SUSPENDED;
  36.431 -    wmb();
  36.432 -
  36.433 -    netif_stop_queue(np->dev);
  36.434 -
  36.435 -    netop.cmd = NETOP_FLUSH_BUFFERS;
  36.436 -    netop.vif = np->idx;
  36.437 -    (void)HYPERVISOR_net_io_op(&netop);
  36.438 -
  36.439 -    while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
  36.440 -            (np->tx_resp_cons != np->net_idx->tx_req_prod) )
  36.441 -    {
  36.442 -        barrier();
  36.443 -        current->state = TASK_INTERRUPTIBLE;
  36.444 -        schedule_timeout(1);
  36.445 -    }
  36.446 -
  36.447 -    wmb();
  36.448 -    np->state = STATE_CLOSED;
  36.449 -    wmb();
  36.450 -
  36.451 -    /* Now no longer safe to take interrupts for this device. */
  36.452 -    clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  36.453 -
  36.454 -    MOD_DEC_USE_COUNT;
  36.455 -
  36.456 -    return 0;
  36.457 -}
  36.458 -
  36.459 -
  36.460 -static struct net_device_stats *network_get_stats(struct net_device *dev)
  36.461 -{
  36.462 -    struct net_private *np = (struct net_private *)dev->priv;
  36.463 -    return &np->stats;
  36.464 -}
  36.465 -
  36.466 -
  36.467 -static int __init init_module(void)
  36.468 -{
  36.469 -#if 0
  36.470 -    int i, fixmap_idx=-1, err;
  36.471 -    struct net_device *dev;
  36.472 -    struct net_private *np;
  36.473 -    netop_t netop;
  36.474 -
  36.475 -    INIT_LIST_HEAD(&dev_list);
  36.476 -
  36.477 -    network_irq = bind_virq_to_irq(VIRQ_NET);
  36.478 -    debug_irq   = bind_virq_to_irq(VIRQ_DEBUG);
  36.479 -
  36.480 -    err = request_irq(network_irq, network_interrupt, 
  36.481 -                      SA_SAMPLE_RANDOM, "network", NULL);
  36.482 -    if ( err )
  36.483 -    {
  36.484 -        printk(KERN_WARNING "Could not allocate network interrupt\n");
  36.485 -        goto fail;
  36.486 -    }
  36.487 -    
  36.488 -    err = request_irq(debug_irq, dbg_network_int, 
  36.489 -                      SA_SHIRQ, "net_dbg", &dbg_network_int);
  36.490 -    if ( err )
  36.491 -        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
  36.492 -
  36.493 -    for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
  36.494 -    {
  36.495 -        /* If the VIF is invalid then the query hypercall will fail. */
  36.496 -        netop.cmd = NETOP_GET_VIF_INFO;
  36.497 -        netop.vif = i;
  36.498 -        if ( HYPERVISOR_net_io_op(&netop) != 0 )
  36.499 -            continue;
  36.500 -
  36.501 -        /* We actually only support up to 4 vifs right now. */
  36.502 -        if ( ++fixmap_idx == 4 )
  36.503 -            break;
  36.504 -
  36.505 -        dev = alloc_etherdev(sizeof(struct net_private));
  36.506 -        if ( dev == NULL )
  36.507 -        {
  36.508 -            err = -ENOMEM;
  36.509 -            goto fail;
  36.510 -        }
  36.511 -
  36.512 -        np = dev->priv;
  36.513 -        np->state               = STATE_CLOSED;
  36.514 -        np->net_ring_fixmap_idx = fixmap_idx;
  36.515 -        np->idx                 = i;
  36.516 -
  36.517 -        SET_MODULE_OWNER(dev);
  36.518 -        dev->open            = network_open;
  36.519 -        dev->hard_start_xmit = network_start_xmit;
  36.520 -        dev->stop            = network_close;
  36.521 -        dev->get_stats       = network_get_stats;
  36.522 -
  36.523 -        memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  36.524 -
  36.525 -        if ( (err = register_netdev(dev)) != 0 )
  36.526 -        {
  36.527 -            kfree(dev);
  36.528 -            goto fail;
  36.529 -        }
  36.530 -
  36.531 -        np->dev = dev;
  36.532 -        list_add(&np->list, &dev_list);
  36.533 -    }
  36.534 -
  36.535 -    return 0;
  36.536 -
  36.537 - fail:
  36.538 -    cleanup_module();
  36.539 -    return err;
  36.540 -#endif
  36.541 -    return 0;
  36.542 -}
  36.543 -
  36.544 -
  36.545 -static void cleanup_module(void)
  36.546 -{
  36.547 -    struct net_private *np;
  36.548 -    struct net_device *dev;
  36.549 -
  36.550 -    while ( !list_empty(&dev_list) )
  36.551 -    {
  36.552 -        np = list_entry(dev_list.next, struct net_private, list);
  36.553 -        list_del(&np->list);
  36.554 -        dev = np->dev;
  36.555 -        unregister_netdev(dev);
  36.556 -        kfree(dev);
  36.557 -    }
  36.558 -
  36.559 -    free_irq(network_irq, NULL);
  36.560 -    free_irq(debug_irq, NULL);
  36.561 -
  36.562 -    unbind_virq_from_irq(VIRQ_NET);
  36.563 -    unbind_virq_from_irq(VIRQ_DEBUG);
  36.564 -}
  36.565 -
  36.566 -
  36.567 -module_init(init_module);
  36.568 -module_exit(cleanup_module);