ia64/xen-unstable

changeset 1306:ee3ca1b3f62a

bitkeeper revision 1.867 (4083dc17x4KfiVBhD7lCtJYk8h8OjA)

Arrange directory structure for new I/O drivers.
author kaf24@scramble.cl.cam.ac.uk
date Mon Apr 19 14:03:03 2004 +0000 (2004-04-19)
parents 6532f3f768df
children 0eac2a407546
files .rootkeys xenolinux-2.4.26-sparse/arch/xen/Makefile xenolinux-2.4.26-sparse/arch/xen/config.in xenolinux-2.4.26-sparse/arch/xen/defconfig xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/block.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/block.h xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/vnetif.c
line diff
     1.1 --- a/.rootkeys	Thu Apr 15 16:28:53 2004 +0000
     1.2 +++ b/.rootkeys	Mon Apr 19 14:03:03 2004 +0000
     1.3 @@ -668,12 +668,16 @@ 40420a6ebRqDjufoN1WSJvolEW2Wjw xenolinux
     1.4  40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.26-sparse/arch/xen/drivers/evtchn/evtchn.c
     1.5  3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.26-sparse/arch/xen/drivers/network/Makefile
     1.6  3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c
     1.7 -4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile
     1.8 -4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/block.c
     1.9 -4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/block.h
    1.10 -4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vbd.c
    1.11 -405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile
    1.12 -405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/vnetif.c
    1.13 +4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile
    1.14 +4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile
    1.15 +4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile
    1.16 +4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c
    1.17 +4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h
    1.18 +4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c
    1.19 +4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile
    1.20 +4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile
    1.21 +405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile
    1.22 +405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c
    1.23  3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.26-sparse/arch/xen/kernel/Makefile
    1.24  4075806dE5mQwlVUf8-t3YXjiMMWDQ xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c
    1.25  3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.26-sparse/arch/xen/kernel/entry.S
     2.1 --- a/xenolinux-2.4.26-sparse/arch/xen/Makefile	Thu Apr 15 16:28:53 2004 +0000
     2.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/Makefile	Mon Apr 19 14:03:03 2004 +0000
     2.3 @@ -51,7 +51,7 @@ HEAD := arch/xen/kernel/head.o arch/xen/
     2.4  SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib
     2.5  SUBDIRS += arch/xen/drivers/console 
     2.6  SUBDIRS += arch/xen/drivers/evtchn
     2.7 -ifdef CONFIG_XEN_PHYSDEV_ACCESS
     2.8 +ifdef CONFIG_XEN_NEWIO
     2.9  SUBDIRS += arch/xen/drivers/vblkif
    2.10  SUBDIRS += arch/xen/drivers/vnetif
    2.11  else
    2.12 @@ -66,7 +66,7 @@ endif
    2.13  CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o
    2.14  CORE_FILES += arch/xen/drivers/evtchn/drv.o
    2.15  CORE_FILES += arch/xen/drivers/console/drv.o
    2.16 -ifdef CONFIG_XEN_PHYSDEV_ACCESS
    2.17 +ifdef CONFIG_XEN_NEWIO
    2.18  CORE_FILES += arch/xen/drivers/vblkif/drv.o
    2.19  CORE_FILES += arch/xen/drivers/vnetif/drv.o
    2.20  else
     3.1 --- a/xenolinux-2.4.26-sparse/arch/xen/config.in	Thu Apr 15 16:28:53 2004 +0000
     3.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/config.in	Mon Apr 19 14:03:03 2004 +0000
     3.3 @@ -15,7 +15,10 @@ define_bool CONFIG_UID16 y
     3.4  mainmenu_option next_comment
     3.5  comment 'Xen'
     3.6  bool 'Support for privileged operations (domain 0)' CONFIG_XEN_PRIVILEGED_GUEST
     3.7 -bool 'Support for direct physical device access' CONFIG_XEN_PHYSDEV_ACCESS
     3.8 +bool 'New I/O model (no drivers in Xen) [EXPERIMENTAL]' CONFIG_XEN_NEWIO
     3.9 +if [ "$CONFIG_XEN_NEWIO" = "y" ]; then
    3.10 +  bool 'Device-driver domain (physical device access)' CONFIG_XEN_PHYSDEV_ACCESS
    3.11 +fi
    3.12  endmenu
    3.13  # The IBM S/390 patch needs this.
    3.14  define_bool CONFIG_NO_IDLE_HZ y
    3.15 @@ -155,24 +158,7 @@ if [ "$CONFIG_NET" = "y" ]; then
    3.16     source net/Config.in
    3.17  fi
    3.18  
    3.19 -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "n" ]; then
    3.20 -   #
    3.21 -   # Block device driver configuration
    3.22 -   #
    3.23 -   mainmenu_option next_comment
    3.24 -   comment 'Block devices'
    3.25 -   tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
    3.26 -   dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
    3.27 -   tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
    3.28 -   if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
    3.29 -      int '  Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
    3.30 -   fi
    3.31 -   dep_bool '  Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
    3.32 -   bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS
    3.33 -   bool 'XenoLinux virtual block device support' CONFIG_XEN_VBD
    3.34 -   define_bool CONFIG_BLK_DEV_HD n
    3.35 -   endmenu
    3.36 -else
    3.37 +if [ "$CONFIG_XEN_PHYSDEV_ACCESS" = "y" ]; then
    3.38     mainmenu_option next_comment
    3.39     comment 'ATA/IDE/MFM/RLL support'
    3.40  
    3.41 @@ -244,6 +230,23 @@ else
    3.42     # input before char - char/joystick depends on it. As does USB.
    3.43     #
    3.44     source drivers/input/Config.in
    3.45 +else
    3.46 +   #
    3.47 +   # Block device driver configuration
    3.48 +   #
    3.49 +   mainmenu_option next_comment
    3.50 +   comment 'Block devices'
    3.51 +   tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
    3.52 +   dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
    3.53 +   tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
    3.54 +   if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
    3.55 +      int '  Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
    3.56 +   fi
    3.57 +   dep_bool '  Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
    3.58 +   bool 'Per partition statistics in /proc/partitions' CONFIG_BLK_STATS
    3.59 +   bool 'XenoLinux virtual block device support' CONFIG_XEN_VBD
    3.60 +   define_bool CONFIG_BLK_DEV_HD n
    3.61 +   endmenu
    3.62  fi
    3.63  
    3.64  source drivers/char/Config.in
     4.1 --- a/xenolinux-2.4.26-sparse/arch/xen/defconfig	Thu Apr 15 16:28:53 2004 +0000
     4.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/defconfig	Mon Apr 19 14:03:03 2004 +0000
     4.3 @@ -11,7 +11,7 @@ CONFIG_UID16=y
     4.4  # Xen
     4.5  #
     4.6  CONFIG_XEN_PRIVILEGED_GUEST=y
     4.7 -# CONFIG_XEN_PHYSDEV_ACCESS is not set
     4.8 +# CONFIG_XEN_NEWIO is not set
     4.9  CONFIG_NO_IDLE_HZ=y
    4.10  
    4.11  #
     5.1 --- a/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev	Thu Apr 15 16:28:53 2004 +0000
     5.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev	Mon Apr 19 14:03:03 2004 +0000
     5.3 @@ -11,6 +11,7 @@ CONFIG_UID16=y
     5.4  # Xen
     5.5  #
     5.6  CONFIG_XEN_PRIVILEGED_GUEST=y
     5.7 +CONFIG_XEN_NEWIO=y
     5.8  CONFIG_XEN_PHYSDEV_ACCESS=y
     5.9  CONFIG_NO_IDLE_HZ=y
    5.10  
    5.11 @@ -48,6 +49,8 @@ CONFIG_X86_PGE=y
    5.12  CONFIG_X86_USE_PPRO_CHECKSUM=y
    5.13  CONFIG_X86_TSC=y
    5.14  CONFIG_X86_L1_CACHE_SHIFT=5
    5.15 +CONFIG_NOHIGHMEM=y
    5.16 +# CONFIG_HIGHMEM4G is not set
    5.17  
    5.18  #
    5.19  # General setup
    5.20 @@ -216,7 +219,6 @@ CONFIG_IP_NF_TARGET_ULOG=y
    5.21  #
    5.22  #    SCTP Configuration (EXPERIMENTAL)
    5.23  #
    5.24 -CONFIG_IPV6_SCTP__=y
    5.25  # CONFIG_IP_SCTP is not set
    5.26  # CONFIG_ATM is not set
    5.27  # CONFIG_VLAN_8021Q is not set
    5.28 @@ -299,6 +301,7 @@ CONFIG_BLK_DEV_ALI15X3=y
    5.29  CONFIG_WDC_ALI15X3=y
    5.30  CONFIG_BLK_DEV_AMD74XX=y
    5.31  CONFIG_AMD74XX_OVERRIDE=y
    5.32 +# CONFIG_BLK_DEV_ATIIXP is not set
    5.33  CONFIG_BLK_DEV_CMD64X=y
    5.34  CONFIG_BLK_DEV_TRIFLEX=y
    5.35  CONFIG_BLK_DEV_CY82C693=y
    5.36 @@ -340,6 +343,7 @@ CONFIG_BLK_DEV_PDC202XX=y
    5.37  # CONFIG_BLK_DEV_ATARAID is not set
    5.38  # CONFIG_BLK_DEV_ATARAID_PDC is not set
    5.39  # CONFIG_BLK_DEV_ATARAID_HPT is not set
    5.40 +# CONFIG_BLK_DEV_ATARAID_MEDLEY is not set
    5.41  # CONFIG_BLK_DEV_ATARAID_SII is not set
    5.42  
    5.43  #
    5.44 @@ -519,6 +523,7 @@ CONFIG_TULIP=y
    5.45  # CONFIG_FEALNX is not set
    5.46  # CONFIG_NATSEMI is not set
    5.47  # CONFIG_NE2K_PCI is not set
    5.48 +# CONFIG_FORCEDETH is not set
    5.49  # CONFIG_NE3210 is not set
    5.50  # CONFIG_ES3210 is not set
    5.51  # CONFIG_8139CP is not set
     6.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile	Thu Apr 15 16:28:53 2004 +0000
     6.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile	Mon Apr 19 14:03:03 2004 +0000
     6.3 @@ -1,3 +1,10 @@
     6.4 +
     6.5  O_TARGET := drv.o
     6.6 -obj-y := block.o vbd.o
     6.7 +
     6.8 +subdir-y += frontend
     6.9 +obj-y    += frontend/drv.o
    6.10 +
    6.11 +subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
    6.12 +obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
    6.13 +
    6.14  include $(TOPDIR)/Rules.make
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile	Mon Apr 19 14:03:03 2004 +0000
     7.3 @@ -0,0 +1,3 @@
     7.4 +O_TARGET := drv.o
     7.5 +obj-y := main.o
     7.6 +include $(TOPDIR)/Rules.make
     8.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/block.c	Thu Apr 15 16:28:53 2004 +0000
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,625 +0,0 @@
     8.4 -/******************************************************************************
     8.5 - * block.c
     8.6 - * 
     8.7 - * Xenolinux virtual block-device driver.
     8.8 - * 
     8.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
    8.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
    8.11 - */
    8.12 -
    8.13 -#include "block.h"
    8.14 -#include <linux/blk.h>
    8.15 -#include <linux/cdrom.h>
    8.16 -#include <linux/tqueue.h>
    8.17 -#include <linux/sched.h>
    8.18 -#include <scsi/scsi.h>
    8.19 -
    8.20 -#include <linux/interrupt.h>
    8.21 -
    8.22 -typedef unsigned char byte; /* from linux/ide.h */
    8.23 -
    8.24 -#define STATE_ACTIVE    0
    8.25 -#define STATE_SUSPENDED 1
    8.26 -#define STATE_CLOSED    2
    8.27 -static unsigned int state = STATE_SUSPENDED;
    8.28 -
    8.29 -/* Dynamically-mapped IRQs. */
    8.30 -static int xlblk_response_irq, xlblk_update_irq;
    8.31 -
    8.32 -static blk_ring_t *blk_ring;
    8.33 -static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
    8.34 -static BLK_RING_IDX req_prod;  /* Private request producer.         */
    8.35 -
    8.36 -/* We plug the I/O ring if the driver is suspended or if the ring is full. */
    8.37 -#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
    8.38 -                      (state != STATE_ACTIVE))
    8.39 -
    8.40 -
    8.41 -/*
    8.42 - * Request queues with outstanding work, but ring is currently full.
    8.43 - * We need no special lock here, as we always access this with the
    8.44 - * io_request_lock held. We only need a small maximum list.
    8.45 - */
    8.46 -#define MAX_PENDING 8
    8.47 -static request_queue_t *pending_queues[MAX_PENDING];
    8.48 -static int nr_pending;
    8.49 -
    8.50 -static kdev_t        sg_dev;
    8.51 -static int           sg_operation = -1;
    8.52 -static unsigned long sg_next_sect;
    8.53 -#define DISABLE_SCATTERGATHER() (sg_operation = -1)
    8.54 -
    8.55 -static inline void signal_requests_to_xen(void)
    8.56 -{
    8.57 -    block_io_op_t op; 
    8.58 -
    8.59 -    DISABLE_SCATTERGATHER();
    8.60 -    blk_ring->req_prod = req_prod;
    8.61 -
    8.62 -    op.cmd = BLOCK_IO_OP_SIGNAL; 
    8.63 -    HYPERVISOR_block_io_op(&op);
    8.64 -    return;
    8.65 -}
    8.66 -
    8.67 -
    8.68 -/*
    8.69 - * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
    8.70 - * 
    8.71 - * Schedule a task for keventd to run, which will update the VBDs and perform 
    8.72 - * the corresponding updates to our view of VBD state, so the XenoLinux will 
    8.73 - * respond to changes / additions / deletions to the set of VBDs automatically.
    8.74 - */
    8.75 -static struct tq_struct update_tq;
    8.76 -static void update_vbds_task(void *unused)
    8.77 -{ 
    8.78 -    xlvbd_update_vbds();
    8.79 -}
    8.80 -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
    8.81 -{
    8.82 -    update_tq.routine = update_vbds_task;
    8.83 -    schedule_task(&update_tq);
    8.84 -}
    8.85 -
    8.86 -
    8.87 -int xen_block_open(struct inode *inode, struct file *filep)
    8.88 -{
    8.89 -    short xldev = inode->i_rdev; 
    8.90 -    struct gendisk *gd = get_gendisk(xldev);
    8.91 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
    8.92 -    short minor = MINOR(xldev); 
    8.93 -
    8.94 -    if ( gd->part[minor].nr_sects == 0 )
    8.95 -    { 
    8.96 -        /*
    8.97 -         * Device either doesn't exist, or has zero capacity; we use a few
    8.98 -         * cheesy heuristics to return the relevant error code
    8.99 -         */
   8.100 -        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
   8.101 -             ((minor & (gd->max_p - 1)) != 0) )
   8.102 -        { 
   8.103 -            /*
   8.104 -             * We have a real device, but no such partition, or we just have a
   8.105 -             * partition number so guess this is the problem.
   8.106 -             */
   8.107 -            return -ENXIO;     /* no such device or address */
   8.108 -        }
   8.109 -        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
   8.110 -        {
   8.111 -            /* This is a removable device => assume that media is missing. */ 
   8.112 -            return -ENOMEDIUM; /* media not present (this is a guess) */
   8.113 -        } 
   8.114 -        else
   8.115 -        { 
   8.116 -            /* Just go for the general 'no such device' error. */
   8.117 -            return -ENODEV;    /* no such device */
   8.118 -        }
   8.119 -    }
   8.120 -    
   8.121 -    /* Update of usage count is protected by per-device semaphore. */
   8.122 -    disk->usage++;
   8.123 -
   8.124 -    return 0;
   8.125 -}
   8.126 -
   8.127 -
   8.128 -int xen_block_release(struct inode *inode, struct file *filep)
   8.129 -{
   8.130 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   8.131 -
   8.132 -    /*
   8.133 -     * When usage drops to zero it may allow more VBD updates to occur.
   8.134 -     * Update of usage count is protected by a per-device semaphore.
   8.135 -     */
   8.136 -    if ( --disk->usage == 0 )
   8.137 -    {
   8.138 -        update_tq.routine = update_vbds_task;
   8.139 -        schedule_task(&update_tq);
   8.140 -    }
   8.141 -
   8.142 -    return 0;
   8.143 -}
   8.144 -
   8.145 -
   8.146 -int xen_block_ioctl(struct inode *inode, struct file *filep,
   8.147 -                          unsigned command, unsigned long argument)
   8.148 -{
   8.149 -    kdev_t dev = inode->i_rdev;
   8.150 -    struct hd_geometry *geo = (struct hd_geometry *)argument;
   8.151 -    struct gendisk *gd;     
   8.152 -    struct hd_struct *part; 
   8.153 -    int i;
   8.154 -
   8.155 -    /* NB. No need to check permissions. That is done for us. */
   8.156 -    
   8.157 -    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
   8.158 -                  command, (long) argument, dev); 
   8.159 -  
   8.160 -    gd = get_gendisk(dev);
   8.161 -    part = &gd->part[MINOR(dev)]; 
   8.162 -
   8.163 -    switch ( command )
   8.164 -    {
   8.165 -    case BLKGETSIZE:
   8.166 -        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
   8.167 -        return put_user(part->nr_sects, (unsigned long *) argument);
   8.168 -
   8.169 -    case BLKGETSIZE64:
   8.170 -        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
   8.171 -                      (u64)part->nr_sects * 512);
   8.172 -        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
   8.173 -
   8.174 -    case BLKRRPART:                               /* re-read partition table */
   8.175 -        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
   8.176 -        return xen_block_revalidate(dev);
   8.177 -
   8.178 -    case BLKSSZGET:
   8.179 -        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
   8.180 -
   8.181 -    case BLKBSZGET:                                        /* get block size */
   8.182 -        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
   8.183 -        break;
   8.184 -
   8.185 -    case BLKBSZSET:                                        /* set block size */
   8.186 -        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
   8.187 -        break;
   8.188 -
   8.189 -    case BLKRASET:                                         /* set read-ahead */
   8.190 -        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
   8.191 -        break;
   8.192 -
   8.193 -    case BLKRAGET:                                         /* get read-ahead */
   8.194 -        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
   8.195 -        break;
   8.196 -
   8.197 -    case HDIO_GETGEO:
   8.198 -        /* note: these values are complete garbage */
   8.199 -        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
   8.200 -        if (!argument) return -EINVAL;
   8.201 -        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
   8.202 -        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
   8.203 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
   8.204 -        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
   8.205 -        return 0;
   8.206 -
   8.207 -    case HDIO_GETGEO_BIG: 
   8.208 -        /* note: these values are complete garbage */
   8.209 -        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
   8.210 -        if (!argument) return -EINVAL;
   8.211 -        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
   8.212 -        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
   8.213 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
   8.214 -        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
   8.215 -        return 0;
   8.216 -
   8.217 -    case CDROMMULTISESSION:
   8.218 -        DPRINTK("FIXME: support multisession CDs later\n");
   8.219 -        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
   8.220 -            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
   8.221 -        return 0;
   8.222 -
   8.223 -    case SCSI_IOCTL_GET_BUS_NUMBER:
   8.224 -        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
   8.225 -        return -ENOSYS;
   8.226 -
   8.227 -    default:
   8.228 -        printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
   8.229 -        return -ENOSYS;
   8.230 -    }
   8.231 -    
   8.232 -    return 0;
   8.233 -}
   8.234 -
   8.235 -/* check media change: should probably do something here in some cases :-) */
   8.236 -int xen_block_check(kdev_t dev)
   8.237 -{
   8.238 -    DPRINTK("xen_block_check\n");
   8.239 -    return 0;
   8.240 -}
   8.241 -
   8.242 -int xen_block_revalidate(kdev_t dev)
   8.243 -{
   8.244 -    struct block_device *bd;
   8.245 -    struct gendisk *gd;
   8.246 -    xl_disk_t *disk;
   8.247 -    unsigned long capacity;
   8.248 -    int i, rc = 0;
   8.249 -    
   8.250 -    if ( (bd = bdget(dev)) == NULL )
   8.251 -        return -EINVAL;
   8.252 -
   8.253 -    /*
   8.254 -     * Update of partition info, and check of usage count, is protected
   8.255 -     * by the per-block-device semaphore.
   8.256 -     */
   8.257 -    down(&bd->bd_sem);
   8.258 -
   8.259 -    if ( ((gd = get_gendisk(dev)) == NULL) ||
   8.260 -         ((disk = xldev_to_xldisk(dev)) == NULL) ||
   8.261 -         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
   8.262 -    {
   8.263 -        rc = -EINVAL;
   8.264 -        goto out;
   8.265 -    }
   8.266 -
   8.267 -    if ( disk->usage > 1 )
   8.268 -    {
   8.269 -        rc = -EBUSY;
   8.270 -        goto out;
   8.271 -    }
   8.272 -
   8.273 -    /* Only reread partition table if VBDs aren't mapped to partitions. */
   8.274 -    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
   8.275 -    {
   8.276 -        for ( i = gd->max_p - 1; i >= 0; i-- )
   8.277 -        {
   8.278 -            invalidate_device(dev+i, 1);
   8.279 -            gd->part[MINOR(dev+i)].start_sect = 0;
   8.280 -            gd->part[MINOR(dev+i)].nr_sects   = 0;
   8.281 -            gd->sizes[MINOR(dev+i)]           = 0;
   8.282 -        }
   8.283 -
   8.284 -        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
   8.285 -    }
   8.286 -
   8.287 - out:
   8.288 -    up(&bd->bd_sem);
   8.289 -    bdput(bd);
   8.290 -    return rc;
   8.291 -}
   8.292 -
   8.293 -
   8.294 -/*
   8.295 - * hypervisor_request
   8.296 - *
   8.297 - * request block io 
   8.298 - * 
   8.299 - * id: for guest use only.
   8.300 - * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
   8.301 - * buffer: buffer to read/write into. this should be a
   8.302 - *   virtual address in the guest os.
   8.303 - */
   8.304 -static int hypervisor_request(unsigned long   id,
   8.305 -                              int             operation,
   8.306 -                              char *          buffer,
   8.307 -                              unsigned long   sector_number,
   8.308 -                              unsigned short  nr_sectors,
   8.309 -                              kdev_t          device)
   8.310 -{
   8.311 -    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
   8.312 -    struct gendisk *gd;
   8.313 -    blk_ring_req_entry_t *req;
   8.314 -    struct buffer_head *bh;
   8.315 -
   8.316 -    if ( unlikely(nr_sectors >= (1<<9)) )
   8.317 -        BUG();
   8.318 -    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
   8.319 -        BUG();
   8.320 -
   8.321 -    if ( unlikely(state == STATE_CLOSED) )
   8.322 -        return 1;
   8.323 -
   8.324 -    switch ( operation )
   8.325 -    {
   8.326 -
   8.327 -    case XEN_BLOCK_READ:
   8.328 -    case XEN_BLOCK_WRITE:
   8.329 -        gd = get_gendisk(device); 
   8.330 -
   8.331 -        /*
   8.332 -         * Update the sector_number we'll pass down as appropriate; note that
   8.333 -         * we could sanity check that resulting sector will be in this
   8.334 -         * partition, but this will happen in xen anyhow.
   8.335 -         */
   8.336 -        sector_number += gd->part[MINOR(device)].start_sect;
   8.337 -
   8.338 -        /*
   8.339 -         * If this unit doesn't consist of virtual (i.e., Xen-specified)
   8.340 -         * partitions then we clear the partn bits from the device number.
   8.341 -         */
   8.342 -        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
   8.343 -               GENHD_FL_VIRT_PARTNS) )
   8.344 -            device &= ~(gd->max_p - 1);
   8.345 -
   8.346 -        if ( (sg_operation == operation) &&
   8.347 -             (sg_dev == device) &&
   8.348 -             (sg_next_sect == sector_number) )
   8.349 -        {
   8.350 -            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
   8.351 -            bh = (struct buffer_head *)id;
   8.352 -            bh->b_reqnext = (struct buffer_head *)req->id;
   8.353 -            req->id = id;
   8.354 -            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
   8.355 -            if ( ++req->nr_segments < MAX_BLK_SEGS )
   8.356 -                sg_next_sect += nr_sectors;
   8.357 -            else
   8.358 -                DISABLE_SCATTERGATHER();
   8.359 -            return 0;
   8.360 -        }
   8.361 -        else if ( RING_PLUGGED )
   8.362 -        {
   8.363 -            return 1;
   8.364 -        }
   8.365 -        else
   8.366 -        {
   8.367 -            sg_operation = operation;
   8.368 -            sg_dev       = device;
   8.369 -            sg_next_sect = sector_number + nr_sectors;
   8.370 -        }
   8.371 -        break;
   8.372 -
   8.373 -    default:
   8.374 -        panic("unknown op %d\n", operation);
   8.375 -    }
   8.376 -
   8.377 -    /* Fill out a communications ring structure. */
   8.378 -    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
   8.379 -    req->id            = id;
   8.380 -    req->operation     = operation;
   8.381 -    req->sector_number = (xen_sector_t)sector_number;
   8.382 -    req->device        = device; 
   8.383 -    req->nr_segments   = 1;
   8.384 -    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
   8.385 -    req_prod++;
   8.386 -
   8.387 -    return 0;
   8.388 -}
   8.389 -
   8.390 -
   8.391 -/*
   8.392 - * do_xlblk_request
   8.393 - *  read a block; request is in a request queue
   8.394 - */
   8.395 -void do_xlblk_request(request_queue_t *rq)
   8.396 -{
   8.397 -    struct request *req;
   8.398 -    struct buffer_head *bh, *next_bh;
   8.399 -    int rw, nsect, full, queued = 0;
   8.400 -
   8.401 -    DPRINTK("xlblk.c::do_xlblk_request\n"); 
   8.402 -
   8.403 -    while ( !rq->plugged && !list_empty(&rq->queue_head))
   8.404 -    {
   8.405 -        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
   8.406 -            goto out;
   8.407 -  
   8.408 -        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
   8.409 -                req, req->cmd, req->sector,
   8.410 -                req->current_nr_sectors, req->nr_sectors, req->bh);
   8.411 -
   8.412 -        rw = req->cmd;
   8.413 -        if ( rw == READA )
   8.414 -            rw = READ;
   8.415 -        if ( unlikely((rw != READ) && (rw != WRITE)) )
   8.416 -            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
   8.417 -
   8.418 -        req->errors = 0;
   8.419 -
   8.420 -        bh = req->bh;
   8.421 -        while ( bh != NULL )
   8.422 -        {
   8.423 -            next_bh = bh->b_reqnext;
   8.424 -            bh->b_reqnext = NULL;
   8.425 -
   8.426 -            full = hypervisor_request(
   8.427 -                (unsigned long)bh,
   8.428 -                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
   8.429 -                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
   8.430 -
   8.431 -            if ( full )
   8.432 -            { 
   8.433 -                bh->b_reqnext = next_bh;
   8.434 -                pending_queues[nr_pending++] = rq;
   8.435 -                if ( unlikely(nr_pending >= MAX_PENDING) )
   8.436 -                    BUG();
   8.437 -                goto out; 
   8.438 -            }
   8.439 -
   8.440 -            queued++;
   8.441 -
   8.442 -            /* Dequeue the buffer head from the request. */
   8.443 -            nsect = bh->b_size >> 9;
   8.444 -            bh = req->bh = next_bh;
   8.445 -            
   8.446 -            if ( bh != NULL )
   8.447 -            {
   8.448 -                /* There's another buffer head to do. Update the request. */
   8.449 -                req->hard_sector += nsect;
   8.450 -                req->hard_nr_sectors -= nsect;
   8.451 -                req->sector = req->hard_sector;
   8.452 -                req->nr_sectors = req->hard_nr_sectors;
   8.453 -                req->current_nr_sectors = bh->b_size >> 9;
   8.454 -                req->buffer = bh->b_data;
   8.455 -            }
   8.456 -            else
   8.457 -            {
   8.458 -                /* That was the last buffer head. Finalise the request. */
   8.459 -                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
   8.460 -                    BUG();
   8.461 -                blkdev_dequeue_request(req);
   8.462 -                end_that_request_last(req);
   8.463 -            }
   8.464 -        }
   8.465 -    }
   8.466 -
   8.467 - out:
   8.468 -    if ( queued != 0 ) signal_requests_to_xen();
   8.469 -}
   8.470 -
   8.471 -
   8.472 -static void kick_pending_request_queues(void)
   8.473 -{
   8.474 -    /* We kick pending request queues if the ring is reasonably empty. */
   8.475 -    if ( (nr_pending != 0) && 
   8.476 -         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
   8.477 -    {
   8.478 -        /* Attempt to drain the queue, but bail if the ring becomes full. */
   8.479 -        while ( (nr_pending != 0) && !RING_PLUGGED )
   8.480 -            do_xlblk_request(pending_queues[--nr_pending]);
   8.481 -    }
   8.482 -}
   8.483 -
   8.484 -
   8.485 -static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
   8.486 -{
   8.487 -    BLK_RING_IDX i; 
   8.488 -    unsigned long flags; 
   8.489 -    struct buffer_head *bh, *next_bh;
   8.490 -    
   8.491 -    if ( unlikely(state == STATE_CLOSED) )
   8.492 -        return;
   8.493 -    
   8.494 -    spin_lock_irqsave(&io_request_lock, flags);     
   8.495 -
   8.496 -    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
   8.497 -    {
   8.498 -        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
   8.499 -        switch ( bret->operation )
   8.500 -        {
   8.501 -        case XEN_BLOCK_READ:
   8.502 -        case XEN_BLOCK_WRITE:
   8.503 -            if ( unlikely(bret->status != 0) )
   8.504 -                DPRINTK("Bad return from blkdev data request: %lx\n",
   8.505 -                        bret->status);
   8.506 -            for ( bh = (struct buffer_head *)bret->id; 
   8.507 -                  bh != NULL; 
   8.508 -                  bh = next_bh )
   8.509 -            {
   8.510 -                next_bh = bh->b_reqnext;
   8.511 -                bh->b_reqnext = NULL;
   8.512 -                bh->b_end_io(bh, !bret->status);
   8.513 -            }
   8.514 -            break;
   8.515 -     
   8.516 -        default:
   8.517 -            BUG();
   8.518 -        }
   8.519 -    }
   8.520 -    
   8.521 -    resp_cons = i;
   8.522 -
   8.523 -    kick_pending_request_queues();
   8.524 -
   8.525 -    spin_unlock_irqrestore(&io_request_lock, flags);
   8.526 -}
   8.527 -
   8.528 -
   8.529 -static void reset_xlblk_interface(void)
   8.530 -{
   8.531 -    block_io_op_t op; 
   8.532 -
   8.533 -    nr_pending = 0;
   8.534 -
   8.535 -    op.cmd = BLOCK_IO_OP_RESET;
   8.536 -    if ( HYPERVISOR_block_io_op(&op) != 0 )
   8.537 -        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
   8.538 -
   8.539 -    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
   8.540 -    (void)HYPERVISOR_block_io_op(&op);
   8.541 -
   8.542 -    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
   8.543 -    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
   8.544 -    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
   8.545 -
   8.546 -    wmb();
   8.547 -    state = STATE_ACTIVE;
   8.548 -}
   8.549 -
   8.550 -
   8.551 -int __init xlblk_init(void)
   8.552 -{
   8.553 -    int error; 
   8.554 -
   8.555 -    reset_xlblk_interface();
   8.556 -
   8.557 -    xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
   8.558 -    xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
   8.559 -
   8.560 -    error = request_irq(xlblk_response_irq, xlblk_response_int, 
   8.561 -                        SA_SAMPLE_RANDOM, "blkdev", NULL);
   8.562 -    if ( error )
   8.563 -    {
   8.564 -        printk(KERN_ALERT "Could not allocate receive interrupt\n");
   8.565 -        goto fail;
   8.566 -    }
   8.567 -
   8.568 -    error = request_irq(xlblk_update_irq, xlblk_update_int,
   8.569 -                        0, "blkdev", NULL);
   8.570 -
   8.571 -    if ( error )
   8.572 -    {
   8.573 -        printk(KERN_ALERT "Could not allocate block update interrupt\n");
   8.574 -        goto fail;
   8.575 -    }
   8.576 -
   8.577 -    (void)xlvbd_init();
   8.578 -
   8.579 -    return 0;
   8.580 -
   8.581 - fail:
   8.582 -    return error;
   8.583 -}
   8.584 -
   8.585 -
   8.586 -static void __exit xlblk_cleanup(void)
   8.587 -{
   8.588 -    xlvbd_cleanup();
   8.589 -    free_irq(xlblk_response_irq, NULL);
   8.590 -    free_irq(xlblk_update_irq, NULL);
   8.591 -    unbind_virq_from_irq(VIRQ_BLKDEV);
   8.592 -    unbind_virq_from_irq(VIRQ_VBD_UPD);
   8.593 -}
   8.594 -
   8.595 -
   8.596 -#ifdef MODULE
   8.597 -module_init(xlblk_init);
   8.598 -module_exit(xlblk_cleanup);
   8.599 -#endif
   8.600 -
   8.601 -
   8.602 -void blkdev_suspend(void)
   8.603 -{
   8.604 -    state = STATE_SUSPENDED;
   8.605 -    wmb();
   8.606 -
   8.607 -    while ( resp_cons != blk_ring->req_prod )
   8.608 -    {
   8.609 -        barrier();
   8.610 -        current->state = TASK_INTERRUPTIBLE;
   8.611 -        schedule_timeout(1);
   8.612 -    }
   8.613 -
   8.614 -    wmb();
   8.615 -    state = STATE_CLOSED;
   8.616 -    wmb();
   8.617 -
   8.618 -    clear_fixmap(FIX_BLKRING_BASE);
   8.619 -}
   8.620 -
   8.621 -
   8.622 -void blkdev_resume(void)
   8.623 -{
   8.624 -    reset_xlblk_interface();
   8.625 -    spin_lock_irq(&io_request_lock);
   8.626 -    kick_pending_request_queues();
   8.627 -    spin_unlock_irq(&io_request_lock);
   8.628 -}
     9.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/block.h	Thu Apr 15 16:28:53 2004 +0000
     9.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.3 @@ -1,82 +0,0 @@
     9.4 -/******************************************************************************
     9.5 - * block.h
     9.6 - * 
     9.7 - * Shared definitions between all levels of XenoLinux Virtual block devices.
     9.8 - */
     9.9 -
    9.10 -#ifndef __XEN_DRIVERS_BLOCK_H__
    9.11 -#define __XEN_DRIVERS_BLOCK_H__
    9.12 -
    9.13 -#include <linux/config.h>
    9.14 -#include <linux/module.h>
    9.15 -
    9.16 -#include <linux/kernel.h>
    9.17 -#include <linux/sched.h>
    9.18 -#include <linux/slab.h>
    9.19 -#include <linux/string.h>
    9.20 -#include <linux/errno.h>
    9.21 -
    9.22 -#include <linux/fs.h>
    9.23 -#include <linux/hdreg.h>
    9.24 -#include <linux/blkdev.h>
    9.25 -#include <linux/major.h>
    9.26 -
    9.27 -#include <asm/hypervisor-ifs/hypervisor-if.h>
    9.28 -#include <asm/hypervisor-ifs/vbd.h>
    9.29 -#include <asm/io.h>
    9.30 -#include <asm/atomic.h>
    9.31 -#include <asm/uaccess.h>
    9.32 -
    9.33 -#if 0
    9.34 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
    9.35 -#else
    9.36 -#define DPRINTK(_f, _a...) ((void)0)
    9.37 -#endif
    9.38 -
    9.39 -#if 0
    9.40 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
    9.41 -#else
    9.42 -#define DPRINTK_IOCTL(_f, _a...) ((void)0)
    9.43 -#endif
    9.44 -
    9.45 -/* Private gendisk->flags[] values. */
    9.46 -#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
    9.47 -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
    9.48 -
    9.49 -/*
    9.50 - * We have one of these per vbd, whether ide, scsi or 'other'.
    9.51 - * They hang in an array off the gendisk structure. We may end up putting
    9.52 - * all kinds of interesting stuff here :-)
    9.53 - */
    9.54 -typedef struct xl_disk {
    9.55 -    int usage;
    9.56 -} xl_disk_t;
    9.57 -
    9.58 -extern int xen_control_msg(int operration, char *buffer, int size);
    9.59 -extern int xen_block_open(struct inode *inode, struct file *filep);
    9.60 -extern int xen_block_release(struct inode *inode, struct file *filep);
    9.61 -extern int xen_block_ioctl(struct inode *inode, struct file *filep,
    9.62 -                                 unsigned command, unsigned long argument);
    9.63 -extern int xen_block_check(kdev_t dev);
    9.64 -extern int xen_block_revalidate(kdev_t dev);
    9.65 -extern void do_xlblk_request (request_queue_t *rq); 
    9.66 -
    9.67 -extern void xlvbd_update_vbds(void);
    9.68 -
    9.69 -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
    9.70 -{
    9.71 -    struct gendisk *gd = get_gendisk(xldev);
    9.72 -    
    9.73 -    if ( gd == NULL ) 
    9.74 -        return NULL;
    9.75 -    
    9.76 -    return (xl_disk_t *)gd->real_devices + 
    9.77 -        (MINOR(xldev) >> gd->minor_shift);
    9.78 -}
    9.79 -
    9.80 -
    9.81 -/* Virtual block-device subsystem. */
    9.82 -extern int  xlvbd_init(void);
    9.83 -extern void xlvbd_cleanup(void); 
    9.84 -
    9.85 -#endif /* __XEN_DRIVERS_BLOCK_H__ */
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile	Mon Apr 19 14:03:03 2004 +0000
    10.3 @@ -0,0 +1,3 @@
    10.4 +O_TARGET := drv.o
    10.5 +obj-y := block.o vbd.o
    10.6 +include $(TOPDIR)/Rules.make
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c	Mon Apr 19 14:03:03 2004 +0000
    11.3 @@ -0,0 +1,625 @@
    11.4 +/******************************************************************************
    11.5 + * block.c
    11.6 + * 
    11.7 + * Xenolinux virtual block-device driver.
    11.8 + * 
    11.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   11.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   11.11 + */
   11.12 +
   11.13 +#include "block.h"
   11.14 +#include <linux/blk.h>
   11.15 +#include <linux/cdrom.h>
   11.16 +#include <linux/tqueue.h>
   11.17 +#include <linux/sched.h>
   11.18 +#include <scsi/scsi.h>
   11.19 +
   11.20 +#include <linux/interrupt.h>
   11.21 +
   11.22 +typedef unsigned char byte; /* from linux/ide.h */
   11.23 +
   11.24 +#define STATE_ACTIVE    0
   11.25 +#define STATE_SUSPENDED 1
   11.26 +#define STATE_CLOSED    2
   11.27 +static unsigned int state = STATE_SUSPENDED;
   11.28 +
   11.29 +/* Dynamically-mapped IRQs. */
   11.30 +static int xlblk_response_irq, xlblk_update_irq;
   11.31 +
   11.32 +static blk_ring_t *blk_ring;
   11.33 +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
   11.34 +static BLK_RING_IDX req_prod;  /* Private request producer.         */
   11.35 +
   11.36 +/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   11.37 +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
   11.38 +                      (state != STATE_ACTIVE))
   11.39 +
   11.40 +
   11.41 +/*
   11.42 + * Request queues with outstanding work, but ring is currently full.
   11.43 + * We need no special lock here, as we always access this with the
   11.44 + * io_request_lock held. We only need a small maximum list.
   11.45 + */
   11.46 +#define MAX_PENDING 8
   11.47 +static request_queue_t *pending_queues[MAX_PENDING];
   11.48 +static int nr_pending;
   11.49 +
   11.50 +static kdev_t        sg_dev;
   11.51 +static int           sg_operation = -1;
   11.52 +static unsigned long sg_next_sect;
   11.53 +#define DISABLE_SCATTERGATHER() (sg_operation = -1)
   11.54 +
   11.55 +static inline void signal_requests_to_xen(void)
   11.56 +{
   11.57 +    block_io_op_t op; 
   11.58 +
   11.59 +    DISABLE_SCATTERGATHER();
   11.60 +    blk_ring->req_prod = req_prod;
   11.61 +
   11.62 +    op.cmd = BLOCK_IO_OP_SIGNAL; 
   11.63 +    HYPERVISOR_block_io_op(&op);
   11.64 +    return;
   11.65 +}
   11.66 +
   11.67 +
   11.68 +/*
   11.69 + * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
   11.70 + * 
   11.71 + * Schedule a task for keventd to run, which will update the VBDs and perform 
   11.72 + * the corresponding updates to our view of VBD state, so the XenoLinux will 
   11.73 + * respond to changes / additions / deletions to the set of VBDs automatically.
   11.74 + */
   11.75 +static struct tq_struct update_tq;
   11.76 +static void update_vbds_task(void *unused)
   11.77 +{ 
   11.78 +    xlvbd_update_vbds();
   11.79 +}
   11.80 +static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
   11.81 +{
   11.82 +    update_tq.routine = update_vbds_task;
   11.83 +    schedule_task(&update_tq);
   11.84 +}
   11.85 +
   11.86 +
   11.87 +int xen_block_open(struct inode *inode, struct file *filep)
   11.88 +{
   11.89 +    short xldev = inode->i_rdev; 
   11.90 +    struct gendisk *gd = get_gendisk(xldev);
   11.91 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   11.92 +    short minor = MINOR(xldev); 
   11.93 +
   11.94 +    if ( gd->part[minor].nr_sects == 0 )
   11.95 +    { 
   11.96 +        /*
   11.97 +         * Device either doesn't exist, or has zero capacity; we use a few
   11.98 +         * cheesy heuristics to return the relevant error code
   11.99 +         */
  11.100 +        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
  11.101 +             ((minor & (gd->max_p - 1)) != 0) )
  11.102 +        { 
  11.103 +            /*
  11.104 +             * We have a real device, but no such partition, or we just have a
  11.105 +             * partition number so guess this is the problem.
  11.106 +             */
  11.107 +            return -ENXIO;     /* no such device or address */
  11.108 +        }
  11.109 +        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
  11.110 +        {
  11.111 +            /* This is a removable device => assume that media is missing. */ 
  11.112 +            return -ENOMEDIUM; /* media not present (this is a guess) */
  11.113 +        } 
  11.114 +        else
  11.115 +        { 
  11.116 +            /* Just go for the general 'no such device' error. */
  11.117 +            return -ENODEV;    /* no such device */
  11.118 +        }
  11.119 +    }
  11.120 +    
  11.121 +    /* Update of usage count is protected by per-device semaphore. */
  11.122 +    disk->usage++;
  11.123 +
  11.124 +    return 0;
  11.125 +}
  11.126 +
  11.127 +
  11.128 +int xen_block_release(struct inode *inode, struct file *filep)
  11.129 +{
  11.130 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
  11.131 +
  11.132 +    /*
  11.133 +     * When usage drops to zero it may allow more VBD updates to occur.
  11.134 +     * Update of usage count is protected by a per-device semaphore.
  11.135 +     */
  11.136 +    if ( --disk->usage == 0 )
  11.137 +    {
  11.138 +        update_tq.routine = update_vbds_task;
  11.139 +        schedule_task(&update_tq);
  11.140 +    }
  11.141 +
  11.142 +    return 0;
  11.143 +}
  11.144 +
  11.145 +
  11.146 +int xen_block_ioctl(struct inode *inode, struct file *filep,
  11.147 +                          unsigned command, unsigned long argument)
  11.148 +{
  11.149 +    kdev_t dev = inode->i_rdev;
  11.150 +    struct hd_geometry *geo = (struct hd_geometry *)argument;
  11.151 +    struct gendisk *gd;     
  11.152 +    struct hd_struct *part; 
  11.153 +    int i;
  11.154 +
  11.155 +    /* NB. No need to check permissions. That is done for us. */
  11.156 +    
  11.157 +    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  11.158 +                  command, (long) argument, dev); 
  11.159 +  
  11.160 +    gd = get_gendisk(dev);
  11.161 +    part = &gd->part[MINOR(dev)]; 
  11.162 +
  11.163 +    switch ( command )
  11.164 +    {
  11.165 +    case BLKGETSIZE:
  11.166 +        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
  11.167 +        return put_user(part->nr_sects, (unsigned long *) argument);
  11.168 +
  11.169 +    case BLKGETSIZE64:
  11.170 +        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
  11.171 +                      (u64)part->nr_sects * 512);
  11.172 +        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
  11.173 +
  11.174 +    case BLKRRPART:                               /* re-read partition table */
  11.175 +        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
  11.176 +        return xen_block_revalidate(dev);
  11.177 +
  11.178 +    case BLKSSZGET:
  11.179 +        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
  11.180 +
  11.181 +    case BLKBSZGET:                                        /* get block size */
  11.182 +        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
  11.183 +        break;
  11.184 +
  11.185 +    case BLKBSZSET:                                        /* set block size */
  11.186 +        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
  11.187 +        break;
  11.188 +
  11.189 +    case BLKRASET:                                         /* set read-ahead */
  11.190 +        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
  11.191 +        break;
  11.192 +
  11.193 +    case BLKRAGET:                                         /* get read-ahead */
  11.194 +        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
  11.195 +        break;
  11.196 +
  11.197 +    case HDIO_GETGEO:
  11.198 +        /* note: these values are complete garbage */
  11.199 +        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
  11.200 +        if (!argument) return -EINVAL;
  11.201 +        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
  11.202 +        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
  11.203 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  11.204 +        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
  11.205 +        return 0;
  11.206 +
  11.207 +    case HDIO_GETGEO_BIG: 
  11.208 +        /* note: these values are complete garbage */
  11.209 +        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
  11.210 +        if (!argument) return -EINVAL;
  11.211 +        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
  11.212 +        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
  11.213 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  11.214 +        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
  11.215 +        return 0;
  11.216 +
  11.217 +    case CDROMMULTISESSION:
  11.218 +        DPRINTK("FIXME: support multisession CDs later\n");
  11.219 +        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
  11.220 +            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
  11.221 +        return 0;
  11.222 +
  11.223 +    case SCSI_IOCTL_GET_BUS_NUMBER:
  11.224 +        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
  11.225 +        return -ENOSYS;
  11.226 +
  11.227 +    default:
  11.228 +        printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
  11.229 +        return -ENOSYS;
  11.230 +    }
  11.231 +    
  11.232 +    return 0;
  11.233 +}
  11.234 +
  11.235 +/* check media change: should probably do something here in some cases :-) */
  11.236 +int xen_block_check(kdev_t dev)
  11.237 +{
  11.238 +    DPRINTK("xen_block_check\n");
  11.239 +    return 0;
  11.240 +}
  11.241 +
  11.242 +int xen_block_revalidate(kdev_t dev)
  11.243 +{
  11.244 +    struct block_device *bd;
  11.245 +    struct gendisk *gd;
  11.246 +    xl_disk_t *disk;
  11.247 +    unsigned long capacity;
  11.248 +    int i, rc = 0;
  11.249 +    
  11.250 +    if ( (bd = bdget(dev)) == NULL )
  11.251 +        return -EINVAL;
  11.252 +
  11.253 +    /*
  11.254 +     * Update of partition info, and check of usage count, is protected
  11.255 +     * by the per-block-device semaphore.
  11.256 +     */
  11.257 +    down(&bd->bd_sem);
  11.258 +
  11.259 +    if ( ((gd = get_gendisk(dev)) == NULL) ||
  11.260 +         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  11.261 +         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  11.262 +    {
  11.263 +        rc = -EINVAL;
  11.264 +        goto out;
  11.265 +    }
  11.266 +
  11.267 +    if ( disk->usage > 1 )
  11.268 +    {
  11.269 +        rc = -EBUSY;
  11.270 +        goto out;
  11.271 +    }
  11.272 +
  11.273 +    /* Only reread partition table if VBDs aren't mapped to partitions. */
  11.274 +    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  11.275 +    {
  11.276 +        for ( i = gd->max_p - 1; i >= 0; i-- )
  11.277 +        {
  11.278 +            invalidate_device(dev+i, 1);
  11.279 +            gd->part[MINOR(dev+i)].start_sect = 0;
  11.280 +            gd->part[MINOR(dev+i)].nr_sects   = 0;
  11.281 +            gd->sizes[MINOR(dev+i)]           = 0;
  11.282 +        }
  11.283 +
  11.284 +        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  11.285 +    }
  11.286 +
  11.287 + out:
  11.288 +    up(&bd->bd_sem);
  11.289 +    bdput(bd);
  11.290 +    return rc;
  11.291 +}
  11.292 +
  11.293 +
  11.294 +/*
  11.295 + * hypervisor_request
  11.296 + *
  11.297 + * request block io 
  11.298 + * 
  11.299 + * id: for guest use only.
  11.300 + * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
  11.301 + * buffer: buffer to read/write into. this should be a
  11.302 + *   virtual address in the guest os.
  11.303 + */
  11.304 +static int hypervisor_request(unsigned long   id,
  11.305 +                              int             operation,
  11.306 +                              char *          buffer,
  11.307 +                              unsigned long   sector_number,
  11.308 +                              unsigned short  nr_sectors,
  11.309 +                              kdev_t          device)
  11.310 +{
  11.311 +    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
  11.312 +    struct gendisk *gd;
  11.313 +    blk_ring_req_entry_t *req;
  11.314 +    struct buffer_head *bh;
  11.315 +
  11.316 +    if ( unlikely(nr_sectors >= (1<<9)) )
  11.317 +        BUG();
  11.318 +    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
  11.319 +        BUG();
  11.320 +
  11.321 +    if ( unlikely(state == STATE_CLOSED) )
  11.322 +        return 1;
  11.323 +
  11.324 +    switch ( operation )
  11.325 +    {
  11.326 +
  11.327 +    case XEN_BLOCK_READ:
  11.328 +    case XEN_BLOCK_WRITE:
  11.329 +        gd = get_gendisk(device); 
  11.330 +
  11.331 +        /*
  11.332 +         * Update the sector_number we'll pass down as appropriate; note that
  11.333 +         * we could sanity check that resulting sector will be in this
  11.334 +         * partition, but this will happen in xen anyhow.
  11.335 +         */
  11.336 +        sector_number += gd->part[MINOR(device)].start_sect;
  11.337 +
  11.338 +        /*
  11.339 +         * If this unit doesn't consist of virtual (i.e., Xen-specified)
  11.340 +         * partitions then we clear the partn bits from the device number.
  11.341 +         */
  11.342 +        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
  11.343 +               GENHD_FL_VIRT_PARTNS) )
  11.344 +            device &= ~(gd->max_p - 1);
  11.345 +
  11.346 +        if ( (sg_operation == operation) &&
  11.347 +             (sg_dev == device) &&
  11.348 +             (sg_next_sect == sector_number) )
  11.349 +        {
  11.350 +            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
  11.351 +            bh = (struct buffer_head *)id;
  11.352 +            bh->b_reqnext = (struct buffer_head *)req->id;
  11.353 +            req->id = id;
  11.354 +            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
  11.355 +            if ( ++req->nr_segments < MAX_BLK_SEGS )
  11.356 +                sg_next_sect += nr_sectors;
  11.357 +            else
  11.358 +                DISABLE_SCATTERGATHER();
  11.359 +            return 0;
  11.360 +        }
  11.361 +        else if ( RING_PLUGGED )
  11.362 +        {
  11.363 +            return 1;
  11.364 +        }
  11.365 +        else
  11.366 +        {
  11.367 +            sg_operation = operation;
  11.368 +            sg_dev       = device;
  11.369 +            sg_next_sect = sector_number + nr_sectors;
  11.370 +        }
  11.371 +        break;
  11.372 +
  11.373 +    default:
  11.374 +        panic("unknown op %d\n", operation);
  11.375 +    }
  11.376 +
  11.377 +    /* Fill out a communications ring structure. */
  11.378 +    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
  11.379 +    req->id            = id;
  11.380 +    req->operation     = operation;
  11.381 +    req->sector_number = (xen_sector_t)sector_number;
  11.382 +    req->device        = device; 
  11.383 +    req->nr_segments   = 1;
  11.384 +    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
  11.385 +    req_prod++;
  11.386 +
  11.387 +    return 0;
  11.388 +}
  11.389 +
  11.390 +
  11.391 +/*
  11.392 + * do_xlblk_request
  11.393 + *  read a block; request is in a request queue
  11.394 + */
  11.395 +void do_xlblk_request(request_queue_t *rq)
  11.396 +{
  11.397 +    struct request *req;
  11.398 +    struct buffer_head *bh, *next_bh;
  11.399 +    int rw, nsect, full, queued = 0;
  11.400 +
  11.401 +    DPRINTK("xlblk.c::do_xlblk_request\n"); 
  11.402 +
  11.403 +    while ( !rq->plugged && !list_empty(&rq->queue_head))
  11.404 +    {
  11.405 +        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
  11.406 +            goto out;
  11.407 +  
  11.408 +        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
  11.409 +                req, req->cmd, req->sector,
  11.410 +                req->current_nr_sectors, req->nr_sectors, req->bh);
  11.411 +
  11.412 +        rw = req->cmd;
  11.413 +        if ( rw == READA )
  11.414 +            rw = READ;
  11.415 +        if ( unlikely((rw != READ) && (rw != WRITE)) )
  11.416 +            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
  11.417 +
  11.418 +        req->errors = 0;
  11.419 +
  11.420 +        bh = req->bh;
  11.421 +        while ( bh != NULL )
  11.422 +        {
  11.423 +            next_bh = bh->b_reqnext;
  11.424 +            bh->b_reqnext = NULL;
  11.425 +
  11.426 +            full = hypervisor_request(
  11.427 +                (unsigned long)bh,
  11.428 +                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
  11.429 +                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
  11.430 +
  11.431 +            if ( full )
  11.432 +            { 
  11.433 +                bh->b_reqnext = next_bh;
  11.434 +                pending_queues[nr_pending++] = rq;
  11.435 +                if ( unlikely(nr_pending >= MAX_PENDING) )
  11.436 +                    BUG();
  11.437 +                goto out; 
  11.438 +            }
  11.439 +
  11.440 +            queued++;
  11.441 +
  11.442 +            /* Dequeue the buffer head from the request. */
  11.443 +            nsect = bh->b_size >> 9;
  11.444 +            bh = req->bh = next_bh;
  11.445 +            
  11.446 +            if ( bh != NULL )
  11.447 +            {
  11.448 +                /* There's another buffer head to do. Update the request. */
  11.449 +                req->hard_sector += nsect;
  11.450 +                req->hard_nr_sectors -= nsect;
  11.451 +                req->sector = req->hard_sector;
  11.452 +                req->nr_sectors = req->hard_nr_sectors;
  11.453 +                req->current_nr_sectors = bh->b_size >> 9;
  11.454 +                req->buffer = bh->b_data;
  11.455 +            }
  11.456 +            else
  11.457 +            {
  11.458 +                /* That was the last buffer head. Finalise the request. */
  11.459 +                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
  11.460 +                    BUG();
  11.461 +                blkdev_dequeue_request(req);
  11.462 +                end_that_request_last(req);
  11.463 +            }
  11.464 +        }
  11.465 +    }
  11.466 +
  11.467 + out:
  11.468 +    if ( queued != 0 ) signal_requests_to_xen();
  11.469 +}
  11.470 +
  11.471 +
  11.472 +static void kick_pending_request_queues(void)
  11.473 +{
  11.474 +    /* We kick pending request queues if the ring is reasonably empty. */
  11.475 +    if ( (nr_pending != 0) && 
  11.476 +         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
  11.477 +    {
  11.478 +        /* Attempt to drain the queue, but bail if the ring becomes full. */
  11.479 +        while ( (nr_pending != 0) && !RING_PLUGGED )
  11.480 +            do_xlblk_request(pending_queues[--nr_pending]);
  11.481 +    }
  11.482 +}
  11.483 +
  11.484 +
  11.485 +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
  11.486 +{
  11.487 +    BLK_RING_IDX i; 
  11.488 +    unsigned long flags; 
  11.489 +    struct buffer_head *bh, *next_bh;
  11.490 +    
  11.491 +    if ( unlikely(state == STATE_CLOSED) )
  11.492 +        return;
  11.493 +    
  11.494 +    spin_lock_irqsave(&io_request_lock, flags);     
  11.495 +
  11.496 +    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
  11.497 +    {
  11.498 +        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
  11.499 +        switch ( bret->operation )
  11.500 +        {
  11.501 +        case XEN_BLOCK_READ:
  11.502 +        case XEN_BLOCK_WRITE:
  11.503 +            if ( unlikely(bret->status != 0) )
  11.504 +                DPRINTK("Bad return from blkdev data request: %lx\n",
  11.505 +                        bret->status);
  11.506 +            for ( bh = (struct buffer_head *)bret->id; 
  11.507 +                  bh != NULL; 
  11.508 +                  bh = next_bh )
  11.509 +            {
  11.510 +                next_bh = bh->b_reqnext;
  11.511 +                bh->b_reqnext = NULL;
  11.512 +                bh->b_end_io(bh, !bret->status);
  11.513 +            }
  11.514 +            break;
  11.515 +     
  11.516 +        default:
  11.517 +            BUG();
  11.518 +        }
  11.519 +    }
  11.520 +    
  11.521 +    resp_cons = i;
  11.522 +
  11.523 +    kick_pending_request_queues();
  11.524 +
  11.525 +    spin_unlock_irqrestore(&io_request_lock, flags);
  11.526 +}
  11.527 +
  11.528 +
  11.529 +static void reset_xlblk_interface(void)
  11.530 +{
  11.531 +    block_io_op_t op; 
  11.532 +
  11.533 +    nr_pending = 0;
  11.534 +
  11.535 +    op.cmd = BLOCK_IO_OP_RESET;
  11.536 +    if ( HYPERVISOR_block_io_op(&op) != 0 )
  11.537 +        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
  11.538 +
  11.539 +    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
  11.540 +    (void)HYPERVISOR_block_io_op(&op);
  11.541 +
  11.542 +    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
  11.543 +    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
  11.544 +    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  11.545 +
  11.546 +    wmb();
  11.547 +    state = STATE_ACTIVE;
  11.548 +}
  11.549 +
  11.550 +
  11.551 +int __init xlblk_init(void)
  11.552 +{
  11.553 +    int error; 
  11.554 +
  11.555 +    reset_xlblk_interface();
  11.556 +
  11.557 +    xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
  11.558 +    xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
  11.559 +
  11.560 +    error = request_irq(xlblk_response_irq, xlblk_response_int, 
  11.561 +                        SA_SAMPLE_RANDOM, "blkdev", NULL);
  11.562 +    if ( error )
  11.563 +    {
  11.564 +        printk(KERN_ALERT "Could not allocate receive interrupt\n");
  11.565 +        goto fail;
  11.566 +    }
  11.567 +
  11.568 +    error = request_irq(xlblk_update_irq, xlblk_update_int,
  11.569 +                        0, "blkdev", NULL);
  11.570 +
  11.571 +    if ( error )
  11.572 +    {
  11.573 +        printk(KERN_ALERT "Could not allocate block update interrupt\n");
  11.574 +        goto fail;
  11.575 +    }
  11.576 +
  11.577 +    (void)xlvbd_init();
  11.578 +
  11.579 +    return 0;
  11.580 +
  11.581 + fail:
  11.582 +    return error;
  11.583 +}
  11.584 +
  11.585 +
  11.586 +static void __exit xlblk_cleanup(void)
  11.587 +{
  11.588 +    xlvbd_cleanup();
  11.589 +    free_irq(xlblk_response_irq, NULL);
  11.590 +    free_irq(xlblk_update_irq, NULL);
  11.591 +    unbind_virq_from_irq(VIRQ_BLKDEV);
  11.592 +    unbind_virq_from_irq(VIRQ_VBD_UPD);
  11.593 +}
  11.594 +
  11.595 +
  11.596 +#ifdef MODULE
  11.597 +module_init(xlblk_init);
  11.598 +module_exit(xlblk_cleanup);
  11.599 +#endif
  11.600 +
  11.601 +
  11.602 +void blkdev_suspend(void)
  11.603 +{
  11.604 +    state = STATE_SUSPENDED;
  11.605 +    wmb();
  11.606 +
  11.607 +    while ( resp_cons != blk_ring->req_prod )
  11.608 +    {
  11.609 +        barrier();
  11.610 +        current->state = TASK_INTERRUPTIBLE;
  11.611 +        schedule_timeout(1);
  11.612 +    }
  11.613 +
  11.614 +    wmb();
  11.615 +    state = STATE_CLOSED;
  11.616 +    wmb();
  11.617 +
  11.618 +    clear_fixmap(FIX_BLKRING_BASE);
  11.619 +}
  11.620 +
  11.621 +
  11.622 +void blkdev_resume(void)
  11.623 +{
  11.624 +    reset_xlblk_interface();
  11.625 +    spin_lock_irq(&io_request_lock);
  11.626 +    kick_pending_request_queues();
  11.627 +    spin_unlock_irq(&io_request_lock);
  11.628 +}
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h	Mon Apr 19 14:03:03 2004 +0000
    12.3 @@ -0,0 +1,82 @@
    12.4 +/******************************************************************************
    12.5 + * block.h
    12.6 + * 
    12.7 + * Shared definitions between all levels of XenoLinux Virtual block devices.
    12.8 + */
    12.9 +
   12.10 +#ifndef __XEN_DRIVERS_BLOCK_H__
   12.11 +#define __XEN_DRIVERS_BLOCK_H__
   12.12 +
   12.13 +#include <linux/config.h>
   12.14 +#include <linux/module.h>
   12.15 +
   12.16 +#include <linux/kernel.h>
   12.17 +#include <linux/sched.h>
   12.18 +#include <linux/slab.h>
   12.19 +#include <linux/string.h>
   12.20 +#include <linux/errno.h>
   12.21 +
   12.22 +#include <linux/fs.h>
   12.23 +#include <linux/hdreg.h>
   12.24 +#include <linux/blkdev.h>
   12.25 +#include <linux/major.h>
   12.26 +
   12.27 +#include <asm/hypervisor-ifs/hypervisor-if.h>
   12.28 +#include <asm/hypervisor-ifs/vbd.h>
   12.29 +#include <asm/io.h>
   12.30 +#include <asm/atomic.h>
   12.31 +#include <asm/uaccess.h>
   12.32 +
   12.33 +#if 0
   12.34 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   12.35 +#else
   12.36 +#define DPRINTK(_f, _a...) ((void)0)
   12.37 +#endif
   12.38 +
   12.39 +#if 0
   12.40 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   12.41 +#else
   12.42 +#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   12.43 +#endif
   12.44 +
   12.45 +/* Private gendisk->flags[] values. */
   12.46 +#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
   12.47 +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
   12.48 +
   12.49 +/*
   12.50 + * We have one of these per vbd, whether ide, scsi or 'other'.
   12.51 + * They hang in an array off the gendisk structure. We may end up putting
   12.52 + * all kinds of interesting stuff here :-)
   12.53 + */
   12.54 +typedef struct xl_disk {
   12.55 +    int usage;
   12.56 +} xl_disk_t;
   12.57 +
   12.58 +extern int xen_control_msg(int operration, char *buffer, int size);
   12.59 +extern int xen_block_open(struct inode *inode, struct file *filep);
   12.60 +extern int xen_block_release(struct inode *inode, struct file *filep);
   12.61 +extern int xen_block_ioctl(struct inode *inode, struct file *filep,
   12.62 +                                 unsigned command, unsigned long argument);
   12.63 +extern int xen_block_check(kdev_t dev);
   12.64 +extern int xen_block_revalidate(kdev_t dev);
   12.65 +extern void do_xlblk_request (request_queue_t *rq); 
   12.66 +
   12.67 +extern void xlvbd_update_vbds(void);
   12.68 +
   12.69 +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
   12.70 +{
   12.71 +    struct gendisk *gd = get_gendisk(xldev);
   12.72 +    
   12.73 +    if ( gd == NULL ) 
   12.74 +        return NULL;
   12.75 +    
   12.76 +    return (xl_disk_t *)gd->real_devices + 
   12.77 +        (MINOR(xldev) >> gd->minor_shift);
   12.78 +}
   12.79 +
   12.80 +
   12.81 +/* Virtual block-device subsystem. */
   12.82 +extern int  xlvbd_init(void);
   12.83 +extern void xlvbd_cleanup(void); 
   12.84 +
   12.85 +#endif /* __XEN_DRIVERS_BLOCK_H__ */
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c	Mon Apr 19 14:03:03 2004 +0000
    13.3 @@ -0,0 +1,561 @@
    13.4 +/******************************************************************************
    13.5 + * vbd.c
    13.6 + * 
    13.7 + * Xenolinux virtual block-device driver (xvd).
    13.8 + * 
    13.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   13.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   13.11 + */
   13.12 +
   13.13 +#include "block.h"
   13.14 +#include <linux/blk.h>
   13.15 +
   13.16 +/*
   13.17 + * For convenience we distinguish between ide, scsi and 'other' (i.e.
   13.18 + * potentially combinations of the two) in the naming scheme and in a few 
   13.19 + * other places (like default readahead, etc).
   13.20 + */
   13.21 +#define XLIDE_MAJOR_NAME  "hd"
   13.22 +#define XLSCSI_MAJOR_NAME "sd"
   13.23 +#define XLVBD_MAJOR_NAME "xvd"
   13.24 +
   13.25 +#define XLIDE_DEVS_PER_MAJOR   2
   13.26 +#define XLSCSI_DEVS_PER_MAJOR 16
   13.27 +#define XLVBD_DEVS_PER_MAJOR  16
   13.28 +
   13.29 +#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
   13.30 +#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
   13.31 +
   13.32 +#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
   13.33 +#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
   13.34 +
   13.35 +#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
   13.36 +#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
   13.37 +
   13.38 +/* The below are for the generic drivers/block/ll_rw_block.c code. */
   13.39 +static int xlide_blksize_size[256];
   13.40 +static int xlide_hardsect_size[256];
   13.41 +static int xlide_max_sectors[256];
   13.42 +static int xlscsi_blksize_size[256];
   13.43 +static int xlscsi_hardsect_size[256];
   13.44 +static int xlscsi_max_sectors[256];
   13.45 +static int xlvbd_blksize_size[256];
   13.46 +static int xlvbd_hardsect_size[256];
   13.47 +static int xlvbd_max_sectors[256];
   13.48 +
   13.49 +/* Information from Xen about our VBDs. */
   13.50 +#define MAX_VBDS 64
   13.51 +static int nr_vbds;
   13.52 +static xen_disk_t *vbd_info;
   13.53 +
   13.54 +static struct block_device_operations xlvbd_block_fops = 
   13.55 +{
   13.56 +    open:               xen_block_open,
   13.57 +    release:            xen_block_release,
   13.58 +    ioctl:              xen_block_ioctl,
   13.59 +    check_media_change: xen_block_check,
   13.60 +    revalidate:         xen_block_revalidate,
   13.61 +};
   13.62 +
   13.63 +static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
   13.64 +{
   13.65 +    int error;
   13.66 +    block_io_op_t op; 
   13.67 +
   13.68 +    /* Probe for disk information. */
   13.69 +    memset(&op, 0, sizeof(op)); 
   13.70 +    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
   13.71 +    op.u.probe_params.domain    = 0; 
   13.72 +    op.u.probe_params.xdi.max   = MAX_VBDS;
   13.73 +    op.u.probe_params.xdi.disks = disk_info;
   13.74 +    op.u.probe_params.xdi.count = 0;
   13.75 +
   13.76 +    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
   13.77 +    {
   13.78 +        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
   13.79 +        return -1;
   13.80 +    }
   13.81 +
   13.82 +    return op.u.probe_params.xdi.count;
   13.83 +}
   13.84 +
   13.85 +/*
   13.86 + * xlvbd_init_device - initialise a VBD device
   13.87 + * @disk:              a xen_disk_t describing the VBD
   13.88 + *
   13.89 + * Takes a xen_disk_t * that describes a VBD the domain has access to.
   13.90 + * Performs appropriate initialisation and registration of the device.
   13.91 + *
   13.92 + * Care needs to be taken when making re-entrant calls to ensure that
   13.93 + * corruption does not occur.  Also, devices that are in use should not have
   13.94 + * their details updated.  This is the caller's responsibility.
   13.95 + */
   13.96 +static int xlvbd_init_device(xen_disk_t *xd)
   13.97 +{
   13.98 +    int device = xd->device;
   13.99 +    int major  = MAJOR(device); 
  13.100 +    int minor  = MINOR(device);
  13.101 +    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
  13.102 +    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
  13.103 +    char *major_name;
  13.104 +    struct gendisk *gd;
  13.105 +    struct block_device *bd;
  13.106 +    xl_disk_t *disk;
  13.107 +    int i, rc = 0, max_part, partno;
  13.108 +    unsigned long capacity;
  13.109 +
  13.110 +    unsigned char buf[64];
  13.111 +
  13.112 +    if ( (bd = bdget(device)) == NULL )
  13.113 +        return -1;
  13.114 +
  13.115 +    /*
  13.116 +     * Update of partition info, and check of usage count, is protected
  13.117 +     * by the per-block-device semaphore.
  13.118 +     */
  13.119 +    down(&bd->bd_sem);
  13.120 +
  13.121 +    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
  13.122 +    {
  13.123 +        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
  13.124 +        rc = -1;
  13.125 +        goto out;
  13.126 +    }
  13.127 +
  13.128 +    if ( is_ide ) {
  13.129 +
  13.130 +	major_name = XLIDE_MAJOR_NAME; 
  13.131 +	max_part   = XLIDE_MAX_PART;
  13.132 +
  13.133 +    } else if ( is_scsi ) {
  13.134 +
  13.135 +	major_name = XLSCSI_MAJOR_NAME;
  13.136 +	max_part   = XLSCSI_MAX_PART;
  13.137 +
  13.138 +    } else if (XD_VIRTUAL(xd->info)) {
  13.139 +
  13.140 +	major_name = XLVBD_MAJOR_NAME;
  13.141 +	max_part   = XLVBD_MAX_PART;
  13.142 +
  13.143 +    } else { 
  13.144 +
  13.145 +        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
  13.146 +	printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
  13.147 +	       major, minor);
  13.148 +	is_scsi    = 1; 
  13.149 +	major_name = "cciss"; 
  13.150 +	max_part   = XLSCSI_MAX_PART;
  13.151 +
  13.152 +    }
  13.153 +    
  13.154 +    partno = minor & (max_part - 1); 
  13.155 +    
  13.156 +    if ( (gd = get_gendisk(device)) == NULL )
  13.157 +    {
  13.158 +        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
  13.159 +        if ( rc < 0 )
  13.160 +        {
  13.161 +            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
  13.162 +            goto out;
  13.163 +        }
  13.164 +
  13.165 +        if ( is_ide )
  13.166 +        { 
  13.167 +            blksize_size[major]  = xlide_blksize_size;
  13.168 +            hardsect_size[major] = xlide_hardsect_size;
  13.169 +            max_sectors[major]   = xlide_max_sectors;
  13.170 +            read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
  13.171 +        } 
  13.172 +        else if ( is_scsi )
  13.173 +        { 
  13.174 +            blksize_size[major]  = xlscsi_blksize_size;
  13.175 +            hardsect_size[major] = xlscsi_hardsect_size;
  13.176 +            max_sectors[major]   = xlscsi_max_sectors;
  13.177 +            read_ahead[major]    = 0; /* XXX 8; -- guessing */
  13.178 +        }
  13.179 +        else
  13.180 +        { 
  13.181 +            blksize_size[major]  = xlvbd_blksize_size;
  13.182 +            hardsect_size[major] = xlvbd_hardsect_size;
  13.183 +            max_sectors[major]   = xlvbd_max_sectors;
  13.184 +            read_ahead[major]    = 8;
  13.185 +        }
  13.186 +
  13.187 +        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
  13.188 +
  13.189 +        /*
  13.190 +         * Turn off barking 'headactive' mode. We dequeue buffer heads as
  13.191 +         * soon as we pass them down to Xen.
  13.192 +         */
  13.193 +        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
  13.194 +
  13.195 +        /* Construct an appropriate gendisk structure. */
  13.196 +        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
  13.197 +        gd->major      = major;
  13.198 +        gd->major_name = major_name; 
  13.199 +    
  13.200 +        gd->max_p      = max_part; 
  13.201 +        if ( is_ide )
  13.202 +        { 
  13.203 +            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
  13.204 +            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
  13.205 +        } 
  13.206 +        else if ( is_scsi )
  13.207 +        { 
  13.208 +            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
  13.209 +            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
  13.210 +        }
  13.211 +        else
  13.212 +        { 
  13.213 +            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
  13.214 +            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
  13.215 +        }
  13.216 +
  13.217 +        /* 
  13.218 +        ** The sizes[] and part[] arrays hold the sizes and other 
  13.219 +        ** information about every partition with this 'major' (i.e. 
  13.220 +        ** every disk sharing the 8 bit prefix * max partns per disk) 
  13.221 +        */
  13.222 +        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
  13.223 +        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
  13.224 +                            GFP_KERNEL);
  13.225 +        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
  13.226 +        memset(gd->part,  0, max_part * gd->nr_real 
  13.227 +               * sizeof(struct hd_struct));
  13.228 +
  13.229 +
  13.230 +        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
  13.231 +                                   GFP_KERNEL);
  13.232 +        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
  13.233 +
  13.234 +        gd->next   = NULL;            
  13.235 +        gd->fops   = &xlvbd_block_fops;
  13.236 +
  13.237 +        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
  13.238 +                             GFP_KERNEL);
  13.239 +        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
  13.240 +    
  13.241 +        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
  13.242 +        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
  13.243 +
  13.244 +        add_gendisk(gd);
  13.245 +
  13.246 +        blk_size[major] = gd->sizes;
  13.247 +    }
  13.248 +
  13.249 +    if ( XD_READONLY(xd->info) )
  13.250 +        set_device_ro(device, 1); 
  13.251 +
  13.252 +    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
  13.253 +
  13.254 +    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
  13.255 +    capacity = (unsigned long)xd->capacity;
  13.256 +
  13.257 +    if ( partno != 0 )
  13.258 +    {
  13.259 +        /*
  13.260 +         * If this was previously set up as a real disc we will have set 
  13.261 +         * up partition-table information. Virtual partitions override 
  13.262 +         * 'real' partitions, and the two cannot coexist on a device.
  13.263 +         */
  13.264 +        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  13.265 +             (gd->sizes[minor & ~(max_part-1)] != 0) )
  13.266 +        {
  13.267 +            /*
  13.268 +             * Any non-zero sub-partition entries must be cleaned out before
  13.269 +             * installing 'virtual' partition entries. The two types cannot
  13.270 +             * coexist, and virtual partitions are favoured.
  13.271 +             */
  13.272 +            kdev_t dev = device & ~(max_part-1);
  13.273 +            for ( i = max_part - 1; i > 0; i-- )
  13.274 +            {
  13.275 +                invalidate_device(dev+i, 1);
  13.276 +                gd->part[MINOR(dev+i)].start_sect = 0;
  13.277 +                gd->part[MINOR(dev+i)].nr_sects   = 0;
  13.278 +                gd->sizes[MINOR(dev+i)]           = 0;
  13.279 +            }
  13.280 +            printk(KERN_ALERT
  13.281 +                   "Virtual partitions found for /dev/%s - ignoring any "
  13.282 +                   "real partition information we may have found.\n",
  13.283 +                   disk_name(gd, MINOR(device), buf));
  13.284 +        }
  13.285 +
  13.286 +        /* Need to skankily setup 'partition' information */
  13.287 +        gd->part[minor].start_sect = 0; 
  13.288 +        gd->part[minor].nr_sects   = capacity; 
  13.289 +        gd->sizes[minor]           = capacity; 
  13.290 +
  13.291 +        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  13.292 +    }
  13.293 +    else
  13.294 +    {
  13.295 +        gd->part[minor].nr_sects = capacity;
  13.296 +        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
  13.297 +        
  13.298 +        /* Some final fix-ups depending on the device type */
  13.299 +        switch ( XD_TYPE(xd->info) )
  13.300 +        { 
  13.301 +        case XD_TYPE_CDROM:
  13.302 +        case XD_TYPE_FLOPPY: 
  13.303 +        case XD_TYPE_TAPE:
  13.304 +            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
  13.305 +            printk(KERN_ALERT 
  13.306 +                   "Skipping partition check on %s /dev/%s\n", 
  13.307 +                   XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : 
  13.308 +                   (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : 
  13.309 +                    "floppy"), disk_name(gd, MINOR(device), buf)); 
  13.310 +            break; 
  13.311 +
  13.312 +        case XD_TYPE_DISK:
  13.313 +            /* Only check partitions on real discs (not virtual!). */
  13.314 +            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  13.315 +            {
  13.316 +                printk(KERN_ALERT
  13.317 +                       "Skipping partition check on virtual /dev/%s\n",
  13.318 +                       disk_name(gd, MINOR(device), buf));
  13.319 +                break;
  13.320 +            }
  13.321 +            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
  13.322 +            break; 
  13.323 +
  13.324 +        default:
  13.325 +            printk(KERN_ALERT "XenoLinux: unknown device type %d\n", 
  13.326 +                   XD_TYPE(xd->info)); 
  13.327 +            break; 
  13.328 +        }
  13.329 +    }
  13.330 +
  13.331 + out:
  13.332 +    up(&bd->bd_sem);
  13.333 +    bdput(bd);    
  13.334 +    return rc;
  13.335 +}
  13.336 +
  13.337 +
  13.338 +/*
  13.339 + * xlvbd_remove_device - remove a device node if possible
  13.340 + * @device:       numeric device ID
  13.341 + *
  13.342 + * Updates the gendisk structure and invalidates devices.
  13.343 + *
  13.344 + * This is OK for now but in future, should perhaps consider where this should
  13.345 + * deallocate gendisks / unregister devices.
  13.346 + */
  13.347 +static int xlvbd_remove_device(int device)
  13.348 +{
  13.349 +    int i, rc = 0, minor = MINOR(device);
  13.350 +    struct gendisk *gd;
  13.351 +    struct block_device *bd;
  13.352 +    xl_disk_t *disk = NULL;
  13.353 +
  13.354 +    if ( (bd = bdget(device)) == NULL )
  13.355 +        return -1;
  13.356 +
  13.357 +    /*
  13.358 +     * Update of partition info, and check of usage count, is protected
  13.359 +     * by the per-block-device semaphore.
  13.360 +     */
  13.361 +    down(&bd->bd_sem);
  13.362 +
  13.363 +    if ( ((gd = get_gendisk(device)) == NULL) ||
  13.364 +         ((disk = xldev_to_xldisk(device)) == NULL) )
  13.365 +        BUG();
  13.366 +
  13.367 +    if ( disk->usage != 0 )
  13.368 +    {
  13.369 +        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
  13.370 +        rc = -1;
  13.371 +        goto out;
  13.372 +    }
  13.373 + 
  13.374 +    if ( (minor & (gd->max_p-1)) != 0 )
  13.375 +    {
  13.376 +        /* 1: The VBD is mapped to a partition rather than a whole unit. */
  13.377 +        invalidate_device(device, 1);
  13.378 +	gd->part[minor].start_sect = 0;
  13.379 +        gd->part[minor].nr_sects   = 0;
  13.380 +        gd->sizes[minor]           = 0;
  13.381 +
  13.382 +        /* Clear the consists-of-virtual-partitions flag if possible. */
  13.383 +        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
  13.384 +        for ( i = 1; i < gd->max_p; i++ )
  13.385 +            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
  13.386 +                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  13.387 +
  13.388 +        /*
  13.389 +         * If all virtual partitions are now gone, and a 'whole unit' VBD is
  13.390 +         * present, then we can try to grok the unit's real partition table.
  13.391 +         */
  13.392 +        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  13.393 +             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
  13.394 +             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
  13.395 +        {
  13.396 +            register_disk(gd,
  13.397 +                          device&~(gd->max_p-1), 
  13.398 +                          gd->max_p, 
  13.399 +                          &xlvbd_block_fops,
  13.400 +                          gd->part[minor&~(gd->max_p-1)].nr_sects);
  13.401 +        }
  13.402 +    }
  13.403 +    else
  13.404 +    {
  13.405 +        /*
  13.406 +         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
  13.407 +         * NB. The partition entries are only cleared if there are no VBDs
  13.408 +         * mapped to individual partitions on this unit.
  13.409 +         */
  13.410 +        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
  13.411 +        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  13.412 +            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
  13.413 +        while ( i >= 0 )
  13.414 +        {
  13.415 +            invalidate_device(device+i, 1);
  13.416 +            gd->part[minor+i].start_sect = 0;
  13.417 +            gd->part[minor+i].nr_sects   = 0;
  13.418 +            gd->sizes[minor+i]           = 0;
  13.419 +            i--;
  13.420 +        }
  13.421 +    }
  13.422 +
  13.423 + out:
  13.424 +    up(&bd->bd_sem);
  13.425 +    bdput(bd);
  13.426 +    return rc;
  13.427 +}
  13.428 +
  13.429 +/*
  13.430 + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
  13.431 + * state. The VBDs need to be updated in this way when the domain is
  13.432 + * initialised and also each time we receive an XLBLK_UPDATE event.
  13.433 + */
  13.434 +void xlvbd_update_vbds(void)
  13.435 +{
  13.436 +    int i, j, k, old_nr, new_nr;
  13.437 +    xen_disk_t *old_info, *new_info, *merged_info;
  13.438 +
  13.439 +    old_info = vbd_info;
  13.440 +    old_nr   = nr_vbds;
  13.441 +
  13.442 +    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  13.443 +    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  13.444 +    {
  13.445 +        kfree(new_info);
  13.446 +        return;
  13.447 +    }
  13.448 +
  13.449 +    /*
  13.450 +     * Final list maximum size is old list + new list. This occurs only when
  13.451 +     * old list and new list do not overlap at all, and we cannot yet destroy
  13.452 +     * VBDs in the old list because the usage counts are busy.
  13.453 +     */
  13.454 +    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
  13.455 +
  13.456 +    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  13.457 +    i = j = k = 0;
  13.458 +
  13.459 +    while ( (i < old_nr) && (j < new_nr) )
  13.460 +    {
  13.461 +        if ( old_info[i].device < new_info[j].device )
  13.462 +        {
  13.463 +            if ( xlvbd_remove_device(old_info[i].device) != 0 )
  13.464 +                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  13.465 +            i++;
  13.466 +        }
  13.467 +        else if ( old_info[i].device > new_info[j].device )
  13.468 +        {
  13.469 +            if ( xlvbd_init_device(&new_info[j]) == 0 )
  13.470 +                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  13.471 +            j++;
  13.472 +        }
  13.473 +        else
  13.474 +        {
  13.475 +            if ( ((old_info[i].capacity == new_info[j].capacity) &&
  13.476 +                  (old_info[i].info == new_info[j].info)) ||
  13.477 +                 (xlvbd_remove_device(old_info[i].device) != 0) )
  13.478 +                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  13.479 +            else if ( xlvbd_init_device(&new_info[j]) == 0 )
  13.480 +                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  13.481 +            i++; j++;
  13.482 +        }
  13.483 +    }
  13.484 +
  13.485 +    for ( ; i < old_nr; i++ )
  13.486 +    {
  13.487 +        if ( xlvbd_remove_device(old_info[i].device) != 0 )
  13.488 +            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  13.489 +    }
  13.490 +
  13.491 +    for ( ; j < new_nr; j++ )
  13.492 +    {
  13.493 +        if ( xlvbd_init_device(&new_info[j]) == 0 )
  13.494 +            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  13.495 +    }
  13.496 +
  13.497 +    vbd_info = merged_info;
  13.498 +    nr_vbds  = k;
  13.499 +
  13.500 +    kfree(old_info);
  13.501 +    kfree(new_info);
  13.502 +}
  13.503 +
  13.504 +
  13.505 +/*
  13.506 + * Set up all the linux device goop for the virtual block devices (vbd's) that 
  13.507 + * xen tells us about. Note that although from xen's pov VBDs are addressed 
  13.508 + * simply an opaque 16-bit device number, the domain creation tools 
  13.509 + * conventionally allocate these numbers to correspond to those used by 'real' 
  13.510 + * linux -- this is just for convenience as it means e.g. that the same 
  13.511 + * /etc/fstab can be used when booting with or without xen.
  13.512 + */
  13.513 +int __init xlvbd_init(void)
  13.514 +{
  13.515 +    int i;
  13.516 +    
  13.517 +    /*
  13.518 +     * If compiled as a module, we don't support unloading yet. We therefore 
  13.519 +     * permanently increment the reference count to disallow it.
  13.520 +     */
  13.521 +    SET_MODULE_OWNER(&xlvbd_block_fops);
  13.522 +    MOD_INC_USE_COUNT;
  13.523 +
  13.524 +    /* Initialize the global arrays. */
  13.525 +    for ( i = 0; i < 256; i++ ) 
  13.526 +    {
  13.527 +        /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
  13.528 +        xlide_blksize_size[i]  = 1024;
  13.529 +        xlide_hardsect_size[i] = 512;
  13.530 +        xlide_max_sectors[i]   = 128;  /* 'hwif->rqsize' if we knew it */
  13.531 +
  13.532 +        /* from the generic scsi disk code (drivers/scsi/sd.c) */
  13.533 +        xlscsi_blksize_size[i]  = 1024; /* XXX 512; */
  13.534 +        xlscsi_hardsect_size[i] = 512;
  13.535 +        xlscsi_max_sectors[i]   = 128*8; /* XXX 128; */
  13.536 +
  13.537 +        /* we don't really know what to set these too since it depends */
  13.538 +        xlvbd_blksize_size[i]  = 512;
  13.539 +        xlvbd_hardsect_size[i] = 512;
  13.540 +        xlvbd_max_sectors[i]   = 128;
  13.541 +    }
  13.542 +
  13.543 +    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  13.544 +    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  13.545 +
  13.546 +    if ( nr_vbds < 0 )
  13.547 +    {
  13.548 +        kfree(vbd_info);
  13.549 +        vbd_info = NULL;
  13.550 +        nr_vbds  = 0;
  13.551 +    }
  13.552 +    else
  13.553 +    {
  13.554 +        for ( i = 0; i < nr_vbds; i++ )
  13.555 +            xlvbd_init_device(&vbd_info[i]);
  13.556 +    }
  13.557 +
  13.558 +    return 0;
  13.559 +}
  13.560 +
  13.561 +
  13.562 +#ifdef MODULE
  13.563 +module_init(xlvbd_init);
  13.564 +#endif
    14.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/vbd.c	Thu Apr 15 16:28:53 2004 +0000
    14.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.3 @@ -1,561 +0,0 @@
    14.4 -/******************************************************************************
    14.5 - * vbd.c
    14.6 - * 
    14.7 - * Xenolinux virtual block-device driver (xvd).
    14.8 - * 
    14.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   14.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   14.11 - */
   14.12 -
   14.13 -#include "block.h"
   14.14 -#include <linux/blk.h>
   14.15 -
   14.16 -/*
   14.17 - * For convenience we distinguish between ide, scsi and 'other' (i.e.
   14.18 - * potentially combinations of the two) in the naming scheme and in a few 
   14.19 - * other places (like default readahead, etc).
   14.20 - */
   14.21 -#define XLIDE_MAJOR_NAME  "hd"
   14.22 -#define XLSCSI_MAJOR_NAME "sd"
   14.23 -#define XLVBD_MAJOR_NAME "xvd"
   14.24 -
   14.25 -#define XLIDE_DEVS_PER_MAJOR   2
   14.26 -#define XLSCSI_DEVS_PER_MAJOR 16
   14.27 -#define XLVBD_DEVS_PER_MAJOR  16
   14.28 -
   14.29 -#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
   14.30 -#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
   14.31 -
   14.32 -#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
   14.33 -#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
   14.34 -
   14.35 -#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
   14.36 -#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
   14.37 -
   14.38 -/* The below are for the generic drivers/block/ll_rw_block.c code. */
   14.39 -static int xlide_blksize_size[256];
   14.40 -static int xlide_hardsect_size[256];
   14.41 -static int xlide_max_sectors[256];
   14.42 -static int xlscsi_blksize_size[256];
   14.43 -static int xlscsi_hardsect_size[256];
   14.44 -static int xlscsi_max_sectors[256];
   14.45 -static int xlvbd_blksize_size[256];
   14.46 -static int xlvbd_hardsect_size[256];
   14.47 -static int xlvbd_max_sectors[256];
   14.48 -
   14.49 -/* Information from Xen about our VBDs. */
   14.50 -#define MAX_VBDS 64
   14.51 -static int nr_vbds;
   14.52 -static xen_disk_t *vbd_info;
   14.53 -
   14.54 -static struct block_device_operations xlvbd_block_fops = 
   14.55 -{
   14.56 -    open:               xen_block_open,
   14.57 -    release:            xen_block_release,
   14.58 -    ioctl:              xen_block_ioctl,
   14.59 -    check_media_change: xen_block_check,
   14.60 -    revalidate:         xen_block_revalidate,
   14.61 -};
   14.62 -
   14.63 -static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
   14.64 -{
   14.65 -    int error;
   14.66 -    block_io_op_t op; 
   14.67 -
   14.68 -    /* Probe for disk information. */
   14.69 -    memset(&op, 0, sizeof(op)); 
   14.70 -    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
   14.71 -    op.u.probe_params.domain    = 0; 
   14.72 -    op.u.probe_params.xdi.max   = MAX_VBDS;
   14.73 -    op.u.probe_params.xdi.disks = disk_info;
   14.74 -    op.u.probe_params.xdi.count = 0;
   14.75 -
   14.76 -    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
   14.77 -    {
   14.78 -        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
   14.79 -        return -1;
   14.80 -    }
   14.81 -
   14.82 -    return op.u.probe_params.xdi.count;
   14.83 -}
   14.84 -
   14.85 -/*
   14.86 - * xlvbd_init_device - initialise a VBD device
   14.87 - * @disk:              a xen_disk_t describing the VBD
   14.88 - *
   14.89 - * Takes a xen_disk_t * that describes a VBD the domain has access to.
   14.90 - * Performs appropriate initialisation and registration of the device.
   14.91 - *
   14.92 - * Care needs to be taken when making re-entrant calls to ensure that
   14.93 - * corruption does not occur.  Also, devices that are in use should not have
   14.94 - * their details updated.  This is the caller's responsibility.
   14.95 - */
   14.96 -static int xlvbd_init_device(xen_disk_t *xd)
   14.97 -{
   14.98 -    int device = xd->device;
   14.99 -    int major  = MAJOR(device); 
  14.100 -    int minor  = MINOR(device);
  14.101 -    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
  14.102 -    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
  14.103 -    char *major_name;
  14.104 -    struct gendisk *gd;
  14.105 -    struct block_device *bd;
  14.106 -    xl_disk_t *disk;
  14.107 -    int i, rc = 0, max_part, partno;
  14.108 -    unsigned long capacity;
  14.109 -
  14.110 -    unsigned char buf[64];
  14.111 -
  14.112 -    if ( (bd = bdget(device)) == NULL )
  14.113 -        return -1;
  14.114 -
  14.115 -    /*
  14.116 -     * Update of partition info, and check of usage count, is protected
  14.117 -     * by the per-block-device semaphore.
  14.118 -     */
  14.119 -    down(&bd->bd_sem);
  14.120 -
  14.121 -    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
  14.122 -    {
  14.123 -        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
  14.124 -        rc = -1;
  14.125 -        goto out;
  14.126 -    }
  14.127 -
  14.128 -    if ( is_ide ) {
  14.129 -
  14.130 -	major_name = XLIDE_MAJOR_NAME; 
  14.131 -	max_part   = XLIDE_MAX_PART;
  14.132 -
  14.133 -    } else if ( is_scsi ) {
  14.134 -
  14.135 -	major_name = XLSCSI_MAJOR_NAME;
  14.136 -	max_part   = XLSCSI_MAX_PART;
  14.137 -
  14.138 -    } else if (XD_VIRTUAL(xd->info)) {
  14.139 -
  14.140 -	major_name = XLVBD_MAJOR_NAME;
  14.141 -	max_part   = XLVBD_MAX_PART;
  14.142 -
  14.143 -    } else { 
  14.144 -
  14.145 -        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
  14.146 -	printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
  14.147 -	       major, minor);
  14.148 -	is_scsi    = 1; 
  14.149 -	major_name = "cciss"; 
  14.150 -	max_part   = XLSCSI_MAX_PART;
  14.151 -
  14.152 -    }
  14.153 -    
  14.154 -    partno = minor & (max_part - 1); 
  14.155 -    
  14.156 -    if ( (gd = get_gendisk(device)) == NULL )
  14.157 -    {
  14.158 -        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
  14.159 -        if ( rc < 0 )
  14.160 -        {
  14.161 -            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
  14.162 -            goto out;
  14.163 -        }
  14.164 -
  14.165 -        if ( is_ide )
  14.166 -        { 
  14.167 -            blksize_size[major]  = xlide_blksize_size;
  14.168 -            hardsect_size[major] = xlide_hardsect_size;
  14.169 -            max_sectors[major]   = xlide_max_sectors;
  14.170 -            read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
  14.171 -        } 
  14.172 -        else if ( is_scsi )
  14.173 -        { 
  14.174 -            blksize_size[major]  = xlscsi_blksize_size;
  14.175 -            hardsect_size[major] = xlscsi_hardsect_size;
  14.176 -            max_sectors[major]   = xlscsi_max_sectors;
  14.177 -            read_ahead[major]    = 0; /* XXX 8; -- guessing */
  14.178 -        }
  14.179 -        else
  14.180 -        { 
  14.181 -            blksize_size[major]  = xlvbd_blksize_size;
  14.182 -            hardsect_size[major] = xlvbd_hardsect_size;
  14.183 -            max_sectors[major]   = xlvbd_max_sectors;
  14.184 -            read_ahead[major]    = 8;
  14.185 -        }
  14.186 -
  14.187 -        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
  14.188 -
  14.189 -        /*
  14.190 -         * Turn off barking 'headactive' mode. We dequeue buffer heads as
  14.191 -         * soon as we pass them down to Xen.
  14.192 -         */
  14.193 -        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
  14.194 -
  14.195 -        /* Construct an appropriate gendisk structure. */
  14.196 -        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
  14.197 -        gd->major      = major;
  14.198 -        gd->major_name = major_name; 
  14.199 -    
  14.200 -        gd->max_p      = max_part; 
  14.201 -        if ( is_ide )
  14.202 -        { 
  14.203 -            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
  14.204 -            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
  14.205 -        } 
  14.206 -        else if ( is_scsi )
  14.207 -        { 
  14.208 -            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
  14.209 -            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
  14.210 -        }
  14.211 -        else
  14.212 -        { 
  14.213 -            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
  14.214 -            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
  14.215 -        }
  14.216 -
  14.217 -        /* 
  14.218 -        ** The sizes[] and part[] arrays hold the sizes and other 
  14.219 -        ** information about every partition with this 'major' (i.e. 
  14.220 -        ** every disk sharing the 8 bit prefix * max partns per disk) 
  14.221 -        */
  14.222 -        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
  14.223 -        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
  14.224 -                            GFP_KERNEL);
  14.225 -        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
  14.226 -        memset(gd->part,  0, max_part * gd->nr_real 
  14.227 -               * sizeof(struct hd_struct));
  14.228 -
  14.229 -
  14.230 -        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
  14.231 -                                   GFP_KERNEL);
  14.232 -        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
  14.233 -
  14.234 -        gd->next   = NULL;            
  14.235 -        gd->fops   = &xlvbd_block_fops;
  14.236 -
  14.237 -        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
  14.238 -                             GFP_KERNEL);
  14.239 -        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
  14.240 -    
  14.241 -        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
  14.242 -        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
  14.243 -
  14.244 -        add_gendisk(gd);
  14.245 -
  14.246 -        blk_size[major] = gd->sizes;
  14.247 -    }
  14.248 -
  14.249 -    if ( XD_READONLY(xd->info) )
  14.250 -        set_device_ro(device, 1); 
  14.251 -
  14.252 -    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XEN;
  14.253 -
  14.254 -    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
  14.255 -    capacity = (unsigned long)xd->capacity;
  14.256 -
  14.257 -    if ( partno != 0 )
  14.258 -    {
  14.259 -        /*
  14.260 -         * If this was previously set up as a real disc we will have set 
  14.261 -         * up partition-table information. Virtual partitions override 
  14.262 -         * 'real' partitions, and the two cannot coexist on a device.
  14.263 -         */
  14.264 -        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  14.265 -             (gd->sizes[minor & ~(max_part-1)] != 0) )
  14.266 -        {
  14.267 -            /*
  14.268 -             * Any non-zero sub-partition entries must be cleaned out before
  14.269 -             * installing 'virtual' partition entries. The two types cannot
  14.270 -             * coexist, and virtual partitions are favoured.
  14.271 -             */
  14.272 -            kdev_t dev = device & ~(max_part-1);
  14.273 -            for ( i = max_part - 1; i > 0; i-- )
  14.274 -            {
  14.275 -                invalidate_device(dev+i, 1);
  14.276 -                gd->part[MINOR(dev+i)].start_sect = 0;
  14.277 -                gd->part[MINOR(dev+i)].nr_sects   = 0;
  14.278 -                gd->sizes[MINOR(dev+i)]           = 0;
  14.279 -            }
  14.280 -            printk(KERN_ALERT
  14.281 -                   "Virtual partitions found for /dev/%s - ignoring any "
  14.282 -                   "real partition information we may have found.\n",
  14.283 -                   disk_name(gd, MINOR(device), buf));
  14.284 -        }
  14.285 -
  14.286 -        /* Need to skankily setup 'partition' information */
  14.287 -        gd->part[minor].start_sect = 0; 
  14.288 -        gd->part[minor].nr_sects   = capacity; 
  14.289 -        gd->sizes[minor]           = capacity; 
  14.290 -
  14.291 -        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  14.292 -    }
  14.293 -    else
  14.294 -    {
  14.295 -        gd->part[minor].nr_sects = capacity;
  14.296 -        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
  14.297 -        
  14.298 -        /* Some final fix-ups depending on the device type */
  14.299 -        switch ( XD_TYPE(xd->info) )
  14.300 -        { 
  14.301 -        case XD_TYPE_CDROM:
  14.302 -        case XD_TYPE_FLOPPY: 
  14.303 -        case XD_TYPE_TAPE:
  14.304 -            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
  14.305 -            printk(KERN_ALERT 
  14.306 -                   "Skipping partition check on %s /dev/%s\n", 
  14.307 -                   XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : 
  14.308 -                   (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : 
  14.309 -                    "floppy"), disk_name(gd, MINOR(device), buf)); 
  14.310 -            break; 
  14.311 -
  14.312 -        case XD_TYPE_DISK:
  14.313 -            /* Only check partitions on real discs (not virtual!). */
  14.314 -            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  14.315 -            {
  14.316 -                printk(KERN_ALERT
  14.317 -                       "Skipping partition check on virtual /dev/%s\n",
  14.318 -                       disk_name(gd, MINOR(device), buf));
  14.319 -                break;
  14.320 -            }
  14.321 -            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
  14.322 -            break; 
  14.323 -
  14.324 -        default:
  14.325 -            printk(KERN_ALERT "XenoLinux: unknown device type %d\n", 
  14.326 -                   XD_TYPE(xd->info)); 
  14.327 -            break; 
  14.328 -        }
  14.329 -    }
  14.330 -
  14.331 - out:
  14.332 -    up(&bd->bd_sem);
  14.333 -    bdput(bd);    
  14.334 -    return rc;
  14.335 -}
  14.336 -
  14.337 -
  14.338 -/*
  14.339 - * xlvbd_remove_device - remove a device node if possible
  14.340 - * @device:       numeric device ID
  14.341 - *
  14.342 - * Updates the gendisk structure and invalidates devices.
  14.343 - *
  14.344 - * This is OK for now but in future, should perhaps consider where this should
  14.345 - * deallocate gendisks / unregister devices.
  14.346 - */
  14.347 -static int xlvbd_remove_device(int device)
  14.348 -{
  14.349 -    int i, rc = 0, minor = MINOR(device);
  14.350 -    struct gendisk *gd;
  14.351 -    struct block_device *bd;
  14.352 -    xl_disk_t *disk = NULL;
  14.353 -
  14.354 -    if ( (bd = bdget(device)) == NULL )
  14.355 -        return -1;
  14.356 -
  14.357 -    /*
  14.358 -     * Update of partition info, and check of usage count, is protected
  14.359 -     * by the per-block-device semaphore.
  14.360 -     */
  14.361 -    down(&bd->bd_sem);
  14.362 -
  14.363 -    if ( ((gd = get_gendisk(device)) == NULL) ||
  14.364 -         ((disk = xldev_to_xldisk(device)) == NULL) )
  14.365 -        BUG();
  14.366 -
  14.367 -    if ( disk->usage != 0 )
  14.368 -    {
  14.369 -        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
  14.370 -        rc = -1;
  14.371 -        goto out;
  14.372 -    }
  14.373 - 
  14.374 -    if ( (minor & (gd->max_p-1)) != 0 )
  14.375 -    {
  14.376 -        /* 1: The VBD is mapped to a partition rather than a whole unit. */
  14.377 -        invalidate_device(device, 1);
  14.378 -	gd->part[minor].start_sect = 0;
  14.379 -        gd->part[minor].nr_sects   = 0;
  14.380 -        gd->sizes[minor]           = 0;
  14.381 -
  14.382 -        /* Clear the consists-of-virtual-partitions flag if possible. */
  14.383 -        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
  14.384 -        for ( i = 1; i < gd->max_p; i++ )
  14.385 -            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
  14.386 -                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  14.387 -
  14.388 -        /*
  14.389 -         * If all virtual partitions are now gone, and a 'whole unit' VBD is
  14.390 -         * present, then we can try to grok the unit's real partition table.
  14.391 -         */
  14.392 -        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  14.393 -             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
  14.394 -             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
  14.395 -        {
  14.396 -            register_disk(gd,
  14.397 -                          device&~(gd->max_p-1), 
  14.398 -                          gd->max_p, 
  14.399 -                          &xlvbd_block_fops,
  14.400 -                          gd->part[minor&~(gd->max_p-1)].nr_sects);
  14.401 -        }
  14.402 -    }
  14.403 -    else
  14.404 -    {
  14.405 -        /*
  14.406 -         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
  14.407 -         * NB. The partition entries are only cleared if there are no VBDs
  14.408 -         * mapped to individual partitions on this unit.
  14.409 -         */
  14.410 -        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
  14.411 -        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  14.412 -            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
  14.413 -        while ( i >= 0 )
  14.414 -        {
  14.415 -            invalidate_device(device+i, 1);
  14.416 -            gd->part[minor+i].start_sect = 0;
  14.417 -            gd->part[minor+i].nr_sects   = 0;
  14.418 -            gd->sizes[minor+i]           = 0;
  14.419 -            i--;
  14.420 -        }
  14.421 -    }
  14.422 -
  14.423 - out:
  14.424 -    up(&bd->bd_sem);
  14.425 -    bdput(bd);
  14.426 -    return rc;
  14.427 -}
  14.428 -
  14.429 -/*
  14.430 - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
  14.431 - * state. The VBDs need to be updated in this way when the domain is
  14.432 - * initialised and also each time we receive an XLBLK_UPDATE event.
  14.433 - */
  14.434 -void xlvbd_update_vbds(void)
  14.435 -{
  14.436 -    int i, j, k, old_nr, new_nr;
  14.437 -    xen_disk_t *old_info, *new_info, *merged_info;
  14.438 -
  14.439 -    old_info = vbd_info;
  14.440 -    old_nr   = nr_vbds;
  14.441 -
  14.442 -    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  14.443 -    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  14.444 -    {
  14.445 -        kfree(new_info);
  14.446 -        return;
  14.447 -    }
  14.448 -
  14.449 -    /*
  14.450 -     * Final list maximum size is old list + new list. This occurs only when
  14.451 -     * old list and new list do not overlap at all, and we cannot yet destroy
  14.452 -     * VBDs in the old list because the usage counts are busy.
  14.453 -     */
  14.454 -    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
  14.455 -
  14.456 -    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  14.457 -    i = j = k = 0;
  14.458 -
  14.459 -    while ( (i < old_nr) && (j < new_nr) )
  14.460 -    {
  14.461 -        if ( old_info[i].device < new_info[j].device )
  14.462 -        {
  14.463 -            if ( xlvbd_remove_device(old_info[i].device) != 0 )
  14.464 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  14.465 -            i++;
  14.466 -        }
  14.467 -        else if ( old_info[i].device > new_info[j].device )
  14.468 -        {
  14.469 -            if ( xlvbd_init_device(&new_info[j]) == 0 )
  14.470 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  14.471 -            j++;
  14.472 -        }
  14.473 -        else
  14.474 -        {
  14.475 -            if ( ((old_info[i].capacity == new_info[j].capacity) &&
  14.476 -                  (old_info[i].info == new_info[j].info)) ||
  14.477 -                 (xlvbd_remove_device(old_info[i].device) != 0) )
  14.478 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  14.479 -            else if ( xlvbd_init_device(&new_info[j]) == 0 )
  14.480 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  14.481 -            i++; j++;
  14.482 -        }
  14.483 -    }
  14.484 -
  14.485 -    for ( ; i < old_nr; i++ )
  14.486 -    {
  14.487 -        if ( xlvbd_remove_device(old_info[i].device) != 0 )
  14.488 -            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  14.489 -    }
  14.490 -
  14.491 -    for ( ; j < new_nr; j++ )
  14.492 -    {
  14.493 -        if ( xlvbd_init_device(&new_info[j]) == 0 )
  14.494 -            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  14.495 -    }
  14.496 -
  14.497 -    vbd_info = merged_info;
  14.498 -    nr_vbds  = k;
  14.499 -
  14.500 -    kfree(old_info);
  14.501 -    kfree(new_info);
  14.502 -}
  14.503 -
  14.504 -
  14.505 -/*
  14.506 - * Set up all the linux device goop for the virtual block devices (vbd's) that 
  14.507 - * xen tells us about. Note that although from xen's pov VBDs are addressed 
  14.508 - * simply an opaque 16-bit device number, the domain creation tools 
  14.509 - * conventionally allocate these numbers to correspond to those used by 'real' 
  14.510 - * linux -- this is just for convenience as it means e.g. that the same 
  14.511 - * /etc/fstab can be used when booting with or without xen.
  14.512 - */
  14.513 -int __init xlvbd_init(void)
  14.514 -{
  14.515 -    int i;
  14.516 -    
  14.517 -    /*
  14.518 -     * If compiled as a module, we don't support unloading yet. We therefore 
  14.519 -     * permanently increment the reference count to disallow it.
  14.520 -     */
  14.521 -    SET_MODULE_OWNER(&xlvbd_block_fops);
  14.522 -    MOD_INC_USE_COUNT;
  14.523 -
  14.524 -    /* Initialize the global arrays. */
  14.525 -    for ( i = 0; i < 256; i++ ) 
  14.526 -    {
  14.527 -        /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
  14.528 -        xlide_blksize_size[i]  = 1024;
  14.529 -        xlide_hardsect_size[i] = 512;
  14.530 -        xlide_max_sectors[i]   = 128;  /* 'hwif->rqsize' if we knew it */
  14.531 -
  14.532 -        /* from the generic scsi disk code (drivers/scsi/sd.c) */
  14.533 -        xlscsi_blksize_size[i]  = 1024; /* XXX 512; */
  14.534 -        xlscsi_hardsect_size[i] = 512;
  14.535 -        xlscsi_max_sectors[i]   = 128*8; /* XXX 128; */
  14.536 -
  14.537 -        /* we don't really know what to set these too since it depends */
  14.538 -        xlvbd_blksize_size[i]  = 512;
  14.539 -        xlvbd_hardsect_size[i] = 512;
  14.540 -        xlvbd_max_sectors[i]   = 128;
  14.541 -    }
  14.542 -
  14.543 -    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  14.544 -    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  14.545 -
  14.546 -    if ( nr_vbds < 0 )
  14.547 -    {
  14.548 -        kfree(vbd_info);
  14.549 -        vbd_info = NULL;
  14.550 -        nr_vbds  = 0;
  14.551 -    }
  14.552 -    else
  14.553 -    {
  14.554 -        for ( i = 0; i < nr_vbds; i++ )
  14.555 -            xlvbd_init_device(&vbd_info[i]);
  14.556 -    }
  14.557 -
  14.558 -    return 0;
  14.559 -}
  14.560 -
  14.561 -
  14.562 -#ifdef MODULE
  14.563 -module_init(xlvbd_init);
  14.564 -#endif
    15.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile	Thu Apr 15 16:28:53 2004 +0000
    15.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile	Mon Apr 19 14:03:03 2004 +0000
    15.3 @@ -1,3 +1,10 @@
    15.4 +
    15.5  O_TARGET := drv.o
    15.6 -obj-y := vnetif.o
    15.7 +
    15.8 +subdir-y += frontend
    15.9 +obj-y    += frontend/drv.o
   15.10 +
   15.11 +subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
   15.12 +obj-$(CONFIG_XEN_PHYSDEV_ACCESS)    += backend/drv.o
   15.13 +
   15.14  include $(TOPDIR)/Rules.make
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile	Mon Apr 19 14:03:03 2004 +0000
    16.3 @@ -0,0 +1,3 @@
    16.4 +O_TARGET := drv.o
    16.5 +obj-y := main.o
    16.6 +include $(TOPDIR)/Rules.make
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile	Mon Apr 19 14:03:03 2004 +0000
    17.3 @@ -0,0 +1,3 @@
    17.4 +O_TARGET := drv.o
    17.5 +obj-y := vnetif.o
    17.6 +include $(TOPDIR)/Rules.make
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c	Mon Apr 19 14:03:03 2004 +0000
    18.3 @@ -0,0 +1,565 @@
    18.4 +/******************************************************************************
    18.5 + * vnetif.c
    18.6 + * 
    18.7 + * Virtual network driver for XenoLinux.
    18.8 + * 
    18.9 + * Copyright (c) 2002-2004, K A Fraser
   18.10 + */
   18.11 +
   18.12 +#include <linux/config.h>
   18.13 +#include <linux/module.h>
   18.14 +
   18.15 +#include <linux/kernel.h>
   18.16 +#include <linux/sched.h>
   18.17 +#include <linux/slab.h>
   18.18 +#include <linux/string.h>
   18.19 +#include <linux/errno.h>
   18.20 +
   18.21 +#include <linux/netdevice.h>
   18.22 +#include <linux/inetdevice.h>
   18.23 +#include <linux/etherdevice.h>
   18.24 +#include <linux/skbuff.h>
   18.25 +#include <linux/init.h>
   18.26 +
   18.27 +#include <asm/io.h>
   18.28 +#include <net/sock.h>
   18.29 +#include <net/pkt_sched.h>
   18.30 +
   18.31 +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
   18.32 +
   18.33 +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
   18.34 +static void network_tx_buf_gc(struct net_device *dev);
   18.35 +static void network_alloc_rx_buffers(struct net_device *dev);
   18.36 +static void cleanup_module(void);
   18.37 +
   18.38 +/* Dynamically-mapped IRQs. */
   18.39 +static int network_irq, debug_irq;
   18.40 +
   18.41 +static struct list_head dev_list;
   18.42 +
   18.43 +struct net_private
   18.44 +{
   18.45 +    struct list_head list;
   18.46 +    struct net_device *dev;
   18.47 +
   18.48 +    struct net_device_stats stats;
   18.49 +    NET_RING_IDX rx_resp_cons, tx_resp_cons;
   18.50 +    unsigned int net_ring_fixmap_idx, tx_full;
   18.51 +    net_ring_t  *net_ring;
   18.52 +    net_idx_t   *net_idx;
   18.53 +    spinlock_t   tx_lock;
   18.54 +    unsigned int idx; /* Domain-specific index of this VIF. */
   18.55 +
   18.56 +    unsigned int rx_bufs_to_notify;
   18.57 +
   18.58 +#define STATE_ACTIVE    0
   18.59 +#define STATE_SUSPENDED 1
   18.60 +#define STATE_CLOSED    2
   18.61 +    unsigned int state;
   18.62 +
   18.63 +    /*
   18.64 +     * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
   18.65 +     * array is an index into a chain of free entries.
   18.66 +     */
   18.67 +    struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1];
   18.68 +    struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1];
   18.69 +};
   18.70 +
   18.71 +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
   18.72 +#define ADD_ID_TO_FREELIST(_list, _id)             \
   18.73 +    (_list)[(_id)] = (_list)[0];                   \
   18.74 +    (_list)[0]     = (void *)(unsigned long)(_id);
   18.75 +#define GET_ID_FROM_FREELIST(_list)                \
   18.76 + ({ unsigned long _id = (unsigned long)(_list)[0]; \
   18.77 +    (_list)[0]  = (_list)[_id];                    \
   18.78 +    (unsigned short)_id; })
   18.79 +
   18.80 +
   18.81 +static void _dbg_network_int(struct net_device *dev)
   18.82 +{
   18.83 +    struct net_private *np = dev->priv;
   18.84 +
   18.85 +    if ( np->state == STATE_CLOSED )
   18.86 +        return;
   18.87 +    
   18.88 +    printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
   18.89 +           " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
   18.90 +           " tx_event=0x%08x, state=%d\n",
   18.91 +           np->tx_full, np->tx_resp_cons, 
   18.92 +           np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, 
   18.93 +           np->net_idx->tx_event,
   18.94 +           test_bit(__LINK_STATE_XOFF, &dev->state));
   18.95 +    printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
   18.96 +           " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
   18.97 +           np->rx_resp_cons, np->net_idx->rx_req_prod,
   18.98 +           np->net_idx->rx_resp_prod, np->net_idx->rx_event);
   18.99 +}
  18.100 +
  18.101 +
  18.102 +static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
  18.103 +{
  18.104 +    struct list_head *ent;
  18.105 +    struct net_private *np;
  18.106 +    list_for_each ( ent, &dev_list )
  18.107 +    {
  18.108 +        np = list_entry(ent, struct net_private, list);
  18.109 +        _dbg_network_int(np->dev);
  18.110 +    }
  18.111 +}
  18.112 +
  18.113 +
  18.114 +static int network_open(struct net_device *dev)
  18.115 +{
  18.116 +    struct net_private *np = dev->priv;
  18.117 +    netop_t netop;
  18.118 +    int i, ret;
  18.119 +
  18.120 +    netop.cmd = NETOP_RESET_RINGS;
  18.121 +    netop.vif = np->idx;
  18.122 +    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  18.123 +    {
  18.124 +        printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
  18.125 +        return ret;
  18.126 +    }
  18.127 +
  18.128 +    netop.cmd = NETOP_GET_VIF_INFO;
  18.129 +    netop.vif = np->idx;
  18.130 +    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  18.131 +    {
  18.132 +        printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
  18.133 +        return ret;
  18.134 +    }
  18.135 +
  18.136 +    memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  18.137 +
  18.138 +    set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, 
  18.139 +               netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
  18.140 +    np->net_ring = (net_ring_t *)fix_to_virt(
  18.141 +        FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  18.142 +    np->net_idx  = &HYPERVISOR_shared_info->net_idx[np->idx];
  18.143 +
  18.144 +    np->rx_bufs_to_notify = 0;
  18.145 +    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
  18.146 +    memset(&np->stats, 0, sizeof(np->stats));
  18.147 +    spin_lock_init(&np->tx_lock);
  18.148 +    memset(np->net_ring, 0, sizeof(*np->net_ring));
  18.149 +    memset(np->net_idx, 0, sizeof(*np->net_idx));
  18.150 +
  18.151 +    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
  18.152 +    for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ )
  18.153 +        np->tx_skbs[i] = (void *)(i+1);
  18.154 +    for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ )
  18.155 +        np->rx_skbs[i] = (void *)(i+1);
  18.156 +
  18.157 +    wmb();
  18.158 +    np->state = STATE_ACTIVE;
  18.159 +
  18.160 +    network_alloc_rx_buffers(dev);
  18.161 +
  18.162 +    netif_start_queue(dev);
  18.163 +
  18.164 +    MOD_INC_USE_COUNT;
  18.165 +
  18.166 +    return 0;
  18.167 +}
  18.168 +
  18.169 +
  18.170 +static void network_tx_buf_gc(struct net_device *dev)
  18.171 +{
  18.172 +    NET_RING_IDX i, prod;
  18.173 +    unsigned short id;
  18.174 +    struct net_private *np = dev->priv;
  18.175 +    struct sk_buff *skb;
  18.176 +    tx_entry_t *tx_ring = np->net_ring->tx_ring;
  18.177 +
  18.178 +    do {
  18.179 +        prod = np->net_idx->tx_resp_prod;
  18.180 +
  18.181 +        for ( i = np->tx_resp_cons; i != prod; i++ )
  18.182 +        {
  18.183 +            id  = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
  18.184 +            skb = np->tx_skbs[id];
  18.185 +            ADD_ID_TO_FREELIST(np->tx_skbs, id);
  18.186 +            dev_kfree_skb_any(skb);
  18.187 +        }
  18.188 +        
  18.189 +        np->tx_resp_cons = prod;
  18.190 +        
  18.191 +        /*
  18.192 +         * Set a new event, then check for race with update of tx_cons. Note
  18.193 +         * that it is essential to schedule a callback, no matter how few
  18.194 +         * buffers are pending. Even if there is space in the transmit ring,
  18.195 +         * higher layers may be blocked because too much data is outstanding:
  18.196 +         * in such cases notification from Xen is likely to be the only kick
  18.197 +         * that we'll get.
  18.198 +         */
  18.199 +        np->net_idx->tx_event = 
  18.200 +            prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
  18.201 +        mb();
  18.202 +    }
  18.203 +    while ( prod != np->net_idx->tx_resp_prod );
  18.204 +
  18.205 +    if ( np->tx_full && 
  18.206 +         ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) )
  18.207 +    {
  18.208 +        np->tx_full = 0;
  18.209 +        if ( np->state == STATE_ACTIVE )
  18.210 +            netif_wake_queue(dev);
  18.211 +    }
  18.212 +}
  18.213 +
  18.214 +
  18.215 +static inline pte_t *get_ppte(void *addr)
  18.216 +{
  18.217 +    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
  18.218 +    pgd = pgd_offset_k(   (unsigned long)addr);
  18.219 +    pmd = pmd_offset(pgd, (unsigned long)addr);
  18.220 +    pte = pte_offset(pmd, (unsigned long)addr);
  18.221 +    return pte;
  18.222 +}
  18.223 +
  18.224 +
  18.225 +static void network_alloc_rx_buffers(struct net_device *dev)
  18.226 +{
  18.227 +    unsigned short id;
  18.228 +    struct net_private *np = dev->priv;
  18.229 +    struct sk_buff *skb;
  18.230 +    netop_t netop;
  18.231 +    NET_RING_IDX i = np->net_idx->rx_req_prod;
  18.232 +
  18.233 +    if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || 
  18.234 +         unlikely(np->state != STATE_ACTIVE) )
  18.235 +        return;
  18.236 +
  18.237 +    do {
  18.238 +        skb = dev_alloc_skb(RX_BUF_SIZE);
  18.239 +        if ( unlikely(skb == NULL) )
  18.240 +            break;
  18.241 +
  18.242 +        skb->dev = dev;
  18.243 +
  18.244 +        if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
  18.245 +            panic("alloc_skb needs to provide us page-aligned buffers.");
  18.246 +
  18.247 +        id = GET_ID_FROM_FREELIST(np->rx_skbs);
  18.248 +        np->rx_skbs[id] = skb;
  18.249 +
  18.250 +        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id   = id;
  18.251 +        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
  18.252 +            virt_to_machine(get_ppte(skb->head));
  18.253 +
  18.254 +        np->rx_bufs_to_notify++;
  18.255 +    }
  18.256 +    while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
  18.257 +
  18.258 +    /*
  18.259 +     * We may have allocated buffers which have entries outstanding in the page
  18.260 +     * update queue -- make sure we flush those first!
  18.261 +     */
  18.262 +    flush_page_update_queue();
  18.263 +
  18.264 +    np->net_idx->rx_req_prod = i;
  18.265 +    np->net_idx->rx_event    = np->rx_resp_cons + 1;
  18.266 +        
  18.267 +    /* Batch Xen notifications. */
  18.268 +    if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) )
  18.269 +    {
  18.270 +        netop.cmd = NETOP_PUSH_BUFFERS;
  18.271 +        netop.vif = np->idx;
  18.272 +        (void)HYPERVISOR_net_io_op(&netop);
  18.273 +        np->rx_bufs_to_notify = 0;
  18.274 +    }
  18.275 +}
  18.276 +
  18.277 +
  18.278 +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
  18.279 +{
  18.280 +    unsigned short id;
  18.281 +    struct net_private *np = (struct net_private *)dev->priv;
  18.282 +    tx_req_entry_t *tx;
  18.283 +    netop_t netop;
  18.284 +    NET_RING_IDX i;
  18.285 +
  18.286 +    if ( unlikely(np->tx_full) )
  18.287 +    {
  18.288 +        printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
  18.289 +        netif_stop_queue(dev);
  18.290 +        return -ENOBUFS;
  18.291 +    }
  18.292 +
  18.293 +    if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
  18.294 +                  PAGE_SIZE) )
  18.295 +    {
  18.296 +        struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
  18.297 +        if ( unlikely(new_skb == NULL) )
  18.298 +            return 1;
  18.299 +        skb_put(new_skb, skb->len);
  18.300 +        memcpy(new_skb->data, skb->data, skb->len);
  18.301 +        dev_kfree_skb(skb);
  18.302 +        skb = new_skb;
  18.303 +    }   
  18.304 +    
  18.305 +    spin_lock_irq(&np->tx_lock);
  18.306 +
  18.307 +    i = np->net_idx->tx_req_prod;
  18.308 +
  18.309 +    id = GET_ID_FROM_FREELIST(np->tx_skbs);
  18.310 +    np->tx_skbs[id] = skb;
  18.311 +
  18.312 +    tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
  18.313 +
  18.314 +    tx->id   = id;
  18.315 +    tx->addr = phys_to_machine(virt_to_phys(skb->data));
  18.316 +    tx->size = skb->len;
  18.317 +
  18.318 +    wmb();
  18.319 +    np->net_idx->tx_req_prod = i + 1;
  18.320 +
  18.321 +    network_tx_buf_gc(dev);
  18.322 +
  18.323 +    if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) )
  18.324 +    {
  18.325 +        np->tx_full = 1;
  18.326 +        netif_stop_queue(dev);
  18.327 +    }
  18.328 +
  18.329 +    spin_unlock_irq(&np->tx_lock);
  18.330 +
  18.331 +    np->stats.tx_bytes += skb->len;
  18.332 +    np->stats.tx_packets++;
  18.333 +
  18.334 +    /* Only notify Xen if there are no outstanding responses. */
  18.335 +    mb();
  18.336 +    if ( np->net_idx->tx_resp_prod == i )
  18.337 +    {
  18.338 +        netop.cmd = NETOP_PUSH_BUFFERS;
  18.339 +        netop.vif = np->idx;
  18.340 +        (void)HYPERVISOR_net_io_op(&netop);
  18.341 +    }
  18.342 +
  18.343 +    return 0;
  18.344 +}
  18.345 +
  18.346 +
  18.347 +static inline void _network_interrupt(struct net_device *dev)
  18.348 +{
  18.349 +    struct net_private *np = dev->priv;
  18.350 +    unsigned long flags;
  18.351 +    struct sk_buff *skb;
  18.352 +    rx_resp_entry_t *rx;
  18.353 +    NET_RING_IDX i;
  18.354 +
  18.355 +    if ( unlikely(np->state == STATE_CLOSED) )
  18.356 +        return;
  18.357 +    
  18.358 +    spin_lock_irqsave(&np->tx_lock, flags);
  18.359 +    network_tx_buf_gc(dev);
  18.360 +    spin_unlock_irqrestore(&np->tx_lock, flags);
  18.361 +
  18.362 + again:
  18.363 +    for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
  18.364 +    {
  18.365 +        rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
  18.366 +
  18.367 +        skb = np->rx_skbs[rx->id];
  18.368 +        ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
  18.369 +
  18.370 +        if ( unlikely(rx->status != RING_STATUS_OK) )
  18.371 +        {
  18.372 +            /* Gate this error. We get a (valid) slew of them on suspend. */
  18.373 +            if ( np->state == STATE_ACTIVE )
  18.374 +                printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
  18.375 +            dev_kfree_skb_any(skb);
  18.376 +            continue;
  18.377 +        }
  18.378 +
  18.379 +        /*
  18.380 +         * Set up shinfo -- from alloc_skb This was particularily nasty:  the
  18.381 +         * shared info is hidden at the back of the data area (presumably so it
  18.382 +         * can be shared), but on page flip it gets very spunked.
  18.383 +         */
  18.384 +        atomic_set(&(skb_shinfo(skb)->dataref), 1);
  18.385 +        skb_shinfo(skb)->nr_frags = 0;
  18.386 +        skb_shinfo(skb)->frag_list = NULL;
  18.387 +                                
  18.388 +        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
  18.389 +            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
  18.390 +
  18.391 +        skb->data = skb->tail = skb->head + rx->offset;
  18.392 +        skb_put(skb, rx->size);
  18.393 +        skb->protocol = eth_type_trans(skb, dev);
  18.394 +
  18.395 +        np->stats.rx_packets++;
  18.396 +
  18.397 +        np->stats.rx_bytes += rx->size;
  18.398 +        netif_rx(skb);
  18.399 +        dev->last_rx = jiffies;
  18.400 +    }
  18.401 +
  18.402 +    np->rx_resp_cons = i;
  18.403 +
  18.404 +    network_alloc_rx_buffers(dev);
  18.405 +    
  18.406 +    /* Deal with hypervisor racing our resetting of rx_event. */
  18.407 +    mb();
  18.408 +    if ( np->net_idx->rx_resp_prod != i )
  18.409 +        goto again;
  18.410 +}
  18.411 +
  18.412 +
  18.413 +static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
  18.414 +{
  18.415 +    struct list_head *ent;
  18.416 +    struct net_private *np;
  18.417 +    list_for_each ( ent, &dev_list )
  18.418 +    {
  18.419 +        np = list_entry(ent, struct net_private, list);
  18.420 +        _network_interrupt(np->dev);
  18.421 +    }
  18.422 +}
  18.423 +
  18.424 +
  18.425 +static int network_close(struct net_device *dev)
  18.426 +{
  18.427 +    struct net_private *np = dev->priv;
  18.428 +    netop_t netop;
  18.429 +
  18.430 +    np->state = STATE_SUSPENDED;
  18.431 +    wmb();
  18.432 +
  18.433 +    netif_stop_queue(np->dev);
  18.434 +
  18.435 +    netop.cmd = NETOP_FLUSH_BUFFERS;
  18.436 +    netop.vif = np->idx;
  18.437 +    (void)HYPERVISOR_net_io_op(&netop);
  18.438 +
  18.439 +    while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
  18.440 +            (np->tx_resp_cons != np->net_idx->tx_req_prod) )
  18.441 +    {
  18.442 +        barrier();
  18.443 +        current->state = TASK_INTERRUPTIBLE;
  18.444 +        schedule_timeout(1);
  18.445 +    }
  18.446 +
  18.447 +    wmb();
  18.448 +    np->state = STATE_CLOSED;
  18.449 +    wmb();
  18.450 +
  18.451 +    /* Now no longer safe to take interrupts for this device. */
  18.452 +    clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  18.453 +
  18.454 +    MOD_DEC_USE_COUNT;
  18.455 +
  18.456 +    return 0;
  18.457 +}
  18.458 +
  18.459 +
  18.460 +static struct net_device_stats *network_get_stats(struct net_device *dev)
  18.461 +{
  18.462 +    struct net_private *np = (struct net_private *)dev->priv;
  18.463 +    return &np->stats;
  18.464 +}
  18.465 +
  18.466 +
  18.467 +static int __init init_module(void)
  18.468 +{
  18.469 +#if 0
  18.470 +    int i, fixmap_idx=-1, err;
  18.471 +    struct net_device *dev;
  18.472 +    struct net_private *np;
  18.473 +    netop_t netop;
  18.474 +
  18.475 +    INIT_LIST_HEAD(&dev_list);
  18.476 +
  18.477 +    network_irq = bind_virq_to_irq(VIRQ_NET);
  18.478 +    debug_irq   = bind_virq_to_irq(VIRQ_DEBUG);
  18.479 +
  18.480 +    err = request_irq(network_irq, network_interrupt, 
  18.481 +                      SA_SAMPLE_RANDOM, "network", NULL);
  18.482 +    if ( err )
  18.483 +    {
  18.484 +        printk(KERN_WARNING "Could not allocate network interrupt\n");
  18.485 +        goto fail;
  18.486 +    }
  18.487 +    
  18.488 +    err = request_irq(debug_irq, dbg_network_int, 
  18.489 +                      SA_SHIRQ, "net_dbg", &dbg_network_int);
  18.490 +    if ( err )
  18.491 +        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
  18.492 +
  18.493 +    for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
  18.494 +    {
  18.495 +        /* If the VIF is invalid then the query hypercall will fail. */
  18.496 +        netop.cmd = NETOP_GET_VIF_INFO;
  18.497 +        netop.vif = i;
  18.498 +        if ( HYPERVISOR_net_io_op(&netop) != 0 )
  18.499 +            continue;
  18.500 +
  18.501 +        /* We actually only support up to 4 vifs right now. */
  18.502 +        if ( ++fixmap_idx == 4 )
  18.503 +            break;
  18.504 +
  18.505 +        dev = alloc_etherdev(sizeof(struct net_private));
  18.506 +        if ( dev == NULL )
  18.507 +        {
  18.508 +            err = -ENOMEM;
  18.509 +            goto fail;
  18.510 +        }
  18.511 +
  18.512 +        np = dev->priv;
  18.513 +        np->state               = STATE_CLOSED;
  18.514 +        np->net_ring_fixmap_idx = fixmap_idx;
  18.515 +        np->idx                 = i;
  18.516 +
  18.517 +        SET_MODULE_OWNER(dev);
  18.518 +        dev->open            = network_open;
  18.519 +        dev->hard_start_xmit = network_start_xmit;
  18.520 +        dev->stop            = network_close;
  18.521 +        dev->get_stats       = network_get_stats;
  18.522 +
  18.523 +        memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  18.524 +
  18.525 +        if ( (err = register_netdev(dev)) != 0 )
  18.526 +        {
  18.527 +            kfree(dev);
  18.528 +            goto fail;
  18.529 +        }
  18.530 +
  18.531 +        np->dev = dev;
  18.532 +        list_add(&np->list, &dev_list);
  18.533 +    }
  18.534 +
  18.535 +    return 0;
  18.536 +
  18.537 + fail:
  18.538 +    cleanup_module();
  18.539 +    return err;
  18.540 +#endif
  18.541 +    return 0;
  18.542 +}
  18.543 +
  18.544 +
  18.545 +static void cleanup_module(void)
  18.546 +{
  18.547 +    struct net_private *np;
  18.548 +    struct net_device *dev;
  18.549 +
  18.550 +    while ( !list_empty(&dev_list) )
  18.551 +    {
  18.552 +        np = list_entry(dev_list.next, struct net_private, list);
  18.553 +        list_del(&np->list);
  18.554 +        dev = np->dev;
  18.555 +        unregister_netdev(dev);
  18.556 +        kfree(dev);
  18.557 +    }
  18.558 +
  18.559 +    free_irq(network_irq, NULL);
  18.560 +    free_irq(debug_irq, NULL);
  18.561 +
  18.562 +    unbind_virq_from_irq(VIRQ_NET);
  18.563 +    unbind_virq_from_irq(VIRQ_DEBUG);
  18.564 +}
  18.565 +
  18.566 +
  18.567 +module_init(init_module);
  18.568 +module_exit(cleanup_module);
    19.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/vnetif.c	Thu Apr 15 16:28:53 2004 +0000
    19.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.3 @@ -1,565 +0,0 @@
    19.4 -/******************************************************************************
    19.5 - * vnetif.c
    19.6 - * 
    19.7 - * Virtual network driver for XenoLinux.
    19.8 - * 
    19.9 - * Copyright (c) 2002-2004, K A Fraser
   19.10 - */
   19.11 -
   19.12 -#include <linux/config.h>
   19.13 -#include <linux/module.h>
   19.14 -
   19.15 -#include <linux/kernel.h>
   19.16 -#include <linux/sched.h>
   19.17 -#include <linux/slab.h>
   19.18 -#include <linux/string.h>
   19.19 -#include <linux/errno.h>
   19.20 -
   19.21 -#include <linux/netdevice.h>
   19.22 -#include <linux/inetdevice.h>
   19.23 -#include <linux/etherdevice.h>
   19.24 -#include <linux/skbuff.h>
   19.25 -#include <linux/init.h>
   19.26 -
   19.27 -#include <asm/io.h>
   19.28 -#include <net/sock.h>
   19.29 -#include <net/pkt_sched.h>
   19.30 -
   19.31 -#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
   19.32 -
   19.33 -static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
   19.34 -static void network_tx_buf_gc(struct net_device *dev);
   19.35 -static void network_alloc_rx_buffers(struct net_device *dev);
   19.36 -static void cleanup_module(void);
   19.37 -
   19.38 -/* Dynamically-mapped IRQs. */
   19.39 -static int network_irq, debug_irq;
   19.40 -
   19.41 -static struct list_head dev_list;
   19.42 -
   19.43 -struct net_private
   19.44 -{
   19.45 -    struct list_head list;
   19.46 -    struct net_device *dev;
   19.47 -
   19.48 -    struct net_device_stats stats;
   19.49 -    NET_RING_IDX rx_resp_cons, tx_resp_cons;
   19.50 -    unsigned int net_ring_fixmap_idx, tx_full;
   19.51 -    net_ring_t  *net_ring;
   19.52 -    net_idx_t   *net_idx;
   19.53 -    spinlock_t   tx_lock;
   19.54 -    unsigned int idx; /* Domain-specific index of this VIF. */
   19.55 -
   19.56 -    unsigned int rx_bufs_to_notify;
   19.57 -
   19.58 -#define STATE_ACTIVE    0
   19.59 -#define STATE_SUSPENDED 1
   19.60 -#define STATE_CLOSED    2
   19.61 -    unsigned int state;
   19.62 -
   19.63 -    /*
   19.64 -     * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
   19.65 -     * array is an index into a chain of free entries.
   19.66 -     */
   19.67 -    struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1];
   19.68 -    struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1];
   19.69 -};
   19.70 -
   19.71 -/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
   19.72 -#define ADD_ID_TO_FREELIST(_list, _id)             \
   19.73 -    (_list)[(_id)] = (_list)[0];                   \
   19.74 -    (_list)[0]     = (void *)(unsigned long)(_id);
   19.75 -#define GET_ID_FROM_FREELIST(_list)                \
   19.76 - ({ unsigned long _id = (unsigned long)(_list)[0]; \
   19.77 -    (_list)[0]  = (_list)[_id];                    \
   19.78 -    (unsigned short)_id; })
   19.79 -
   19.80 -
   19.81 -static void _dbg_network_int(struct net_device *dev)
   19.82 -{
   19.83 -    struct net_private *np = dev->priv;
   19.84 -
   19.85 -    if ( np->state == STATE_CLOSED )
   19.86 -        return;
   19.87 -    
   19.88 -    printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
   19.89 -           " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
   19.90 -           " tx_event=0x%08x, state=%d\n",
   19.91 -           np->tx_full, np->tx_resp_cons, 
   19.92 -           np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, 
   19.93 -           np->net_idx->tx_event,
   19.94 -           test_bit(__LINK_STATE_XOFF, &dev->state));
   19.95 -    printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
   19.96 -           " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
   19.97 -           np->rx_resp_cons, np->net_idx->rx_req_prod,
   19.98 -           np->net_idx->rx_resp_prod, np->net_idx->rx_event);
   19.99 -}
  19.100 -
  19.101 -
  19.102 -static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
  19.103 -{
  19.104 -    struct list_head *ent;
  19.105 -    struct net_private *np;
  19.106 -    list_for_each ( ent, &dev_list )
  19.107 -    {
  19.108 -        np = list_entry(ent, struct net_private, list);
  19.109 -        _dbg_network_int(np->dev);
  19.110 -    }
  19.111 -}
  19.112 -
  19.113 -
  19.114 -static int network_open(struct net_device *dev)
  19.115 -{
  19.116 -    struct net_private *np = dev->priv;
  19.117 -    netop_t netop;
  19.118 -    int i, ret;
  19.119 -
  19.120 -    netop.cmd = NETOP_RESET_RINGS;
  19.121 -    netop.vif = np->idx;
  19.122 -    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  19.123 -    {
  19.124 -        printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
  19.125 -        return ret;
  19.126 -    }
  19.127 -
  19.128 -    netop.cmd = NETOP_GET_VIF_INFO;
  19.129 -    netop.vif = np->idx;
  19.130 -    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  19.131 -    {
  19.132 -        printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
  19.133 -        return ret;
  19.134 -    }
  19.135 -
  19.136 -    memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  19.137 -
  19.138 -    set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, 
  19.139 -               netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
  19.140 -    np->net_ring = (net_ring_t *)fix_to_virt(
  19.141 -        FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  19.142 -    np->net_idx  = &HYPERVISOR_shared_info->net_idx[np->idx];
  19.143 -
  19.144 -    np->rx_bufs_to_notify = 0;
  19.145 -    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
  19.146 -    memset(&np->stats, 0, sizeof(np->stats));
  19.147 -    spin_lock_init(&np->tx_lock);
  19.148 -    memset(np->net_ring, 0, sizeof(*np->net_ring));
  19.149 -    memset(np->net_idx, 0, sizeof(*np->net_idx));
  19.150 -
  19.151 -    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
  19.152 -    for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ )
  19.153 -        np->tx_skbs[i] = (void *)(i+1);
  19.154 -    for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ )
  19.155 -        np->rx_skbs[i] = (void *)(i+1);
  19.156 -
  19.157 -    wmb();
  19.158 -    np->state = STATE_ACTIVE;
  19.159 -
  19.160 -    network_alloc_rx_buffers(dev);
  19.161 -
  19.162 -    netif_start_queue(dev);
  19.163 -
  19.164 -    MOD_INC_USE_COUNT;
  19.165 -
  19.166 -    return 0;
  19.167 -}
  19.168 -
  19.169 -
  19.170 -static void network_tx_buf_gc(struct net_device *dev)
  19.171 -{
  19.172 -    NET_RING_IDX i, prod;
  19.173 -    unsigned short id;
  19.174 -    struct net_private *np = dev->priv;
  19.175 -    struct sk_buff *skb;
  19.176 -    tx_entry_t *tx_ring = np->net_ring->tx_ring;
  19.177 -
  19.178 -    do {
  19.179 -        prod = np->net_idx->tx_resp_prod;
  19.180 -
  19.181 -        for ( i = np->tx_resp_cons; i != prod; i++ )
  19.182 -        {
  19.183 -            id  = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
  19.184 -            skb = np->tx_skbs[id];
  19.185 -            ADD_ID_TO_FREELIST(np->tx_skbs, id);
  19.186 -            dev_kfree_skb_any(skb);
  19.187 -        }
  19.188 -        
  19.189 -        np->tx_resp_cons = prod;
  19.190 -        
  19.191 -        /*
  19.192 -         * Set a new event, then check for race with update of tx_cons. Note
  19.193 -         * that it is essential to schedule a callback, no matter how few
  19.194 -         * buffers are pending. Even if there is space in the transmit ring,
  19.195 -         * higher layers may be blocked because too much data is outstanding:
  19.196 -         * in such cases notification from Xen is likely to be the only kick
  19.197 -         * that we'll get.
  19.198 -         */
  19.199 -        np->net_idx->tx_event = 
  19.200 -            prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
  19.201 -        mb();
  19.202 -    }
  19.203 -    while ( prod != np->net_idx->tx_resp_prod );
  19.204 -
  19.205 -    if ( np->tx_full && 
  19.206 -         ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) )
  19.207 -    {
  19.208 -        np->tx_full = 0;
  19.209 -        if ( np->state == STATE_ACTIVE )
  19.210 -            netif_wake_queue(dev);
  19.211 -    }
  19.212 -}
  19.213 -
  19.214 -
  19.215 -static inline pte_t *get_ppte(void *addr)
  19.216 -{
  19.217 -    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
  19.218 -    pgd = pgd_offset_k(   (unsigned long)addr);
  19.219 -    pmd = pmd_offset(pgd, (unsigned long)addr);
  19.220 -    pte = pte_offset(pmd, (unsigned long)addr);
  19.221 -    return pte;
  19.222 -}
  19.223 -
  19.224 -
  19.225 -static void network_alloc_rx_buffers(struct net_device *dev)
  19.226 -{
  19.227 -    unsigned short id;
  19.228 -    struct net_private *np = dev->priv;
  19.229 -    struct sk_buff *skb;
  19.230 -    netop_t netop;
  19.231 -    NET_RING_IDX i = np->net_idx->rx_req_prod;
  19.232 -
  19.233 -    if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || 
  19.234 -         unlikely(np->state != STATE_ACTIVE) )
  19.235 -        return;
  19.236 -
  19.237 -    do {
  19.238 -        skb = dev_alloc_skb(RX_BUF_SIZE);
  19.239 -        if ( unlikely(skb == NULL) )
  19.240 -            break;
  19.241 -
  19.242 -        skb->dev = dev;
  19.243 -
  19.244 -        if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
  19.245 -            panic("alloc_skb needs to provide us page-aligned buffers.");
  19.246 -
  19.247 -        id = GET_ID_FROM_FREELIST(np->rx_skbs);
  19.248 -        np->rx_skbs[id] = skb;
  19.249 -
  19.250 -        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id   = id;
  19.251 -        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
  19.252 -            virt_to_machine(get_ppte(skb->head));
  19.253 -
  19.254 -        np->rx_bufs_to_notify++;
  19.255 -    }
  19.256 -    while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE );
  19.257 -
  19.258 -    /*
  19.259 -     * We may have allocated buffers which have entries outstanding in the page
  19.260 -     * update queue -- make sure we flush those first!
  19.261 -     */
  19.262 -    flush_page_update_queue();
  19.263 -
  19.264 -    np->net_idx->rx_req_prod = i;
  19.265 -    np->net_idx->rx_event    = np->rx_resp_cons + 1;
  19.266 -        
  19.267 -    /* Batch Xen notifications. */
  19.268 -    if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) )
  19.269 -    {
  19.270 -        netop.cmd = NETOP_PUSH_BUFFERS;
  19.271 -        netop.vif = np->idx;
  19.272 -        (void)HYPERVISOR_net_io_op(&netop);
  19.273 -        np->rx_bufs_to_notify = 0;
  19.274 -    }
  19.275 -}
  19.276 -
  19.277 -
  19.278 -static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
  19.279 -{
  19.280 -    unsigned short id;
  19.281 -    struct net_private *np = (struct net_private *)dev->priv;
  19.282 -    tx_req_entry_t *tx;
  19.283 -    netop_t netop;
  19.284 -    NET_RING_IDX i;
  19.285 -
  19.286 -    if ( unlikely(np->tx_full) )
  19.287 -    {
  19.288 -        printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
  19.289 -        netif_stop_queue(dev);
  19.290 -        return -ENOBUFS;
  19.291 -    }
  19.292 -
  19.293 -    if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
  19.294 -                  PAGE_SIZE) )
  19.295 -    {
  19.296 -        struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
  19.297 -        if ( unlikely(new_skb == NULL) )
  19.298 -            return 1;
  19.299 -        skb_put(new_skb, skb->len);
  19.300 -        memcpy(new_skb->data, skb->data, skb->len);
  19.301 -        dev_kfree_skb(skb);
  19.302 -        skb = new_skb;
  19.303 -    }   
  19.304 -    
  19.305 -    spin_lock_irq(&np->tx_lock);
  19.306 -
  19.307 -    i = np->net_idx->tx_req_prod;
  19.308 -
  19.309 -    id = GET_ID_FROM_FREELIST(np->tx_skbs);
  19.310 -    np->tx_skbs[id] = skb;
  19.311 -
  19.312 -    tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
  19.313 -
  19.314 -    tx->id   = id;
  19.315 -    tx->addr = phys_to_machine(virt_to_phys(skb->data));
  19.316 -    tx->size = skb->len;
  19.317 -
  19.318 -    wmb();
  19.319 -    np->net_idx->tx_req_prod = i + 1;
  19.320 -
  19.321 -    network_tx_buf_gc(dev);
  19.322 -
  19.323 -    if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) )
  19.324 -    {
  19.325 -        np->tx_full = 1;
  19.326 -        netif_stop_queue(dev);
  19.327 -    }
  19.328 -
  19.329 -    spin_unlock_irq(&np->tx_lock);
  19.330 -
  19.331 -    np->stats.tx_bytes += skb->len;
  19.332 -    np->stats.tx_packets++;
  19.333 -
  19.334 -    /* Only notify Xen if there are no outstanding responses. */
  19.335 -    mb();
  19.336 -    if ( np->net_idx->tx_resp_prod == i )
  19.337 -    {
  19.338 -        netop.cmd = NETOP_PUSH_BUFFERS;
  19.339 -        netop.vif = np->idx;
  19.340 -        (void)HYPERVISOR_net_io_op(&netop);
  19.341 -    }
  19.342 -
  19.343 -    return 0;
  19.344 -}
  19.345 -
  19.346 -
  19.347 -static inline void _network_interrupt(struct net_device *dev)
  19.348 -{
  19.349 -    struct net_private *np = dev->priv;
  19.350 -    unsigned long flags;
  19.351 -    struct sk_buff *skb;
  19.352 -    rx_resp_entry_t *rx;
  19.353 -    NET_RING_IDX i;
  19.354 -
  19.355 -    if ( unlikely(np->state == STATE_CLOSED) )
  19.356 -        return;
  19.357 -    
  19.358 -    spin_lock_irqsave(&np->tx_lock, flags);
  19.359 -    network_tx_buf_gc(dev);
  19.360 -    spin_unlock_irqrestore(&np->tx_lock, flags);
  19.361 -
  19.362 - again:
  19.363 -    for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
  19.364 -    {
  19.365 -        rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
  19.366 -
  19.367 -        skb = np->rx_skbs[rx->id];
  19.368 -        ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
  19.369 -
  19.370 -        if ( unlikely(rx->status != RING_STATUS_OK) )
  19.371 -        {
  19.372 -            /* Gate this error. We get a (valid) slew of them on suspend. */
  19.373 -            if ( np->state == STATE_ACTIVE )
  19.374 -                printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
  19.375 -            dev_kfree_skb_any(skb);
  19.376 -            continue;
  19.377 -        }
  19.378 -
  19.379 -        /*
  19.380 -         * Set up shinfo -- from alloc_skb This was particularily nasty:  the
  19.381 -         * shared info is hidden at the back of the data area (presumably so it
  19.382 -         * can be shared), but on page flip it gets very spunked.
  19.383 -         */
  19.384 -        atomic_set(&(skb_shinfo(skb)->dataref), 1);
  19.385 -        skb_shinfo(skb)->nr_frags = 0;
  19.386 -        skb_shinfo(skb)->frag_list = NULL;
  19.387 -                                
  19.388 -        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
  19.389 -            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
  19.390 -
  19.391 -        skb->data = skb->tail = skb->head + rx->offset;
  19.392 -        skb_put(skb, rx->size);
  19.393 -        skb->protocol = eth_type_trans(skb, dev);
  19.394 -
  19.395 -        np->stats.rx_packets++;
  19.396 -
  19.397 -        np->stats.rx_bytes += rx->size;
  19.398 -        netif_rx(skb);
  19.399 -        dev->last_rx = jiffies;
  19.400 -    }
  19.401 -
  19.402 -    np->rx_resp_cons = i;
  19.403 -
  19.404 -    network_alloc_rx_buffers(dev);
  19.405 -    
  19.406 -    /* Deal with hypervisor racing our resetting of rx_event. */
  19.407 -    mb();
  19.408 -    if ( np->net_idx->rx_resp_prod != i )
  19.409 -        goto again;
  19.410 -}
  19.411 -
  19.412 -
  19.413 -static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
  19.414 -{
  19.415 -    struct list_head *ent;
  19.416 -    struct net_private *np;
  19.417 -    list_for_each ( ent, &dev_list )
  19.418 -    {
  19.419 -        np = list_entry(ent, struct net_private, list);
  19.420 -        _network_interrupt(np->dev);
  19.421 -    }
  19.422 -}
  19.423 -
  19.424 -
  19.425 -static int network_close(struct net_device *dev)
  19.426 -{
  19.427 -    struct net_private *np = dev->priv;
  19.428 -    netop_t netop;
  19.429 -
  19.430 -    np->state = STATE_SUSPENDED;
  19.431 -    wmb();
  19.432 -
  19.433 -    netif_stop_queue(np->dev);
  19.434 -
  19.435 -    netop.cmd = NETOP_FLUSH_BUFFERS;
  19.436 -    netop.vif = np->idx;
  19.437 -    (void)HYPERVISOR_net_io_op(&netop);
  19.438 -
  19.439 -    while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
  19.440 -            (np->tx_resp_cons != np->net_idx->tx_req_prod) )
  19.441 -    {
  19.442 -        barrier();
  19.443 -        current->state = TASK_INTERRUPTIBLE;
  19.444 -        schedule_timeout(1);
  19.445 -    }
  19.446 -
  19.447 -    wmb();
  19.448 -    np->state = STATE_CLOSED;
  19.449 -    wmb();
  19.450 -
  19.451 -    /* Now no longer safe to take interrupts for this device. */
  19.452 -    clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  19.453 -
  19.454 -    MOD_DEC_USE_COUNT;
  19.455 -
  19.456 -    return 0;
  19.457 -}
  19.458 -
  19.459 -
  19.460 -static struct net_device_stats *network_get_stats(struct net_device *dev)
  19.461 -{
  19.462 -    struct net_private *np = (struct net_private *)dev->priv;
  19.463 -    return &np->stats;
  19.464 -}
  19.465 -
  19.466 -
  19.467 -static int __init init_module(void)
  19.468 -{
  19.469 -#if 0
  19.470 -    int i, fixmap_idx=-1, err;
  19.471 -    struct net_device *dev;
  19.472 -    struct net_private *np;
  19.473 -    netop_t netop;
  19.474 -
  19.475 -    INIT_LIST_HEAD(&dev_list);
  19.476 -
  19.477 -    network_irq = bind_virq_to_irq(VIRQ_NET);
  19.478 -    debug_irq   = bind_virq_to_irq(VIRQ_DEBUG);
  19.479 -
  19.480 -    err = request_irq(network_irq, network_interrupt, 
  19.481 -                      SA_SAMPLE_RANDOM, "network", NULL);
  19.482 -    if ( err )
  19.483 -    {
  19.484 -        printk(KERN_WARNING "Could not allocate network interrupt\n");
  19.485 -        goto fail;
  19.486 -    }
  19.487 -    
  19.488 -    err = request_irq(debug_irq, dbg_network_int, 
  19.489 -                      SA_SHIRQ, "net_dbg", &dbg_network_int);
  19.490 -    if ( err )
  19.491 -        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
  19.492 -
  19.493 -    for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
  19.494 -    {
  19.495 -        /* If the VIF is invalid then the query hypercall will fail. */
  19.496 -        netop.cmd = NETOP_GET_VIF_INFO;
  19.497 -        netop.vif = i;
  19.498 -        if ( HYPERVISOR_net_io_op(&netop) != 0 )
  19.499 -            continue;
  19.500 -
  19.501 -        /* We actually only support up to 4 vifs right now. */
  19.502 -        if ( ++fixmap_idx == 4 )
  19.503 -            break;
  19.504 -
  19.505 -        dev = alloc_etherdev(sizeof(struct net_private));
  19.506 -        if ( dev == NULL )
  19.507 -        {
  19.508 -            err = -ENOMEM;
  19.509 -            goto fail;
  19.510 -        }
  19.511 -
  19.512 -        np = dev->priv;
  19.513 -        np->state               = STATE_CLOSED;
  19.514 -        np->net_ring_fixmap_idx = fixmap_idx;
  19.515 -        np->idx                 = i;
  19.516 -
  19.517 -        SET_MODULE_OWNER(dev);
  19.518 -        dev->open            = network_open;
  19.519 -        dev->hard_start_xmit = network_start_xmit;
  19.520 -        dev->stop            = network_close;
  19.521 -        dev->get_stats       = network_get_stats;
  19.522 -
  19.523 -        memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  19.524 -
  19.525 -        if ( (err = register_netdev(dev)) != 0 )
  19.526 -        {
  19.527 -            kfree(dev);
  19.528 -            goto fail;
  19.529 -        }
  19.530 -
  19.531 -        np->dev = dev;
  19.532 -        list_add(&np->list, &dev_list);
  19.533 -    }
  19.534 -
  19.535 -    return 0;
  19.536 -
  19.537 - fail:
  19.538 -    cleanup_module();
  19.539 -    return err;
  19.540 -#endif
  19.541 -    return 0;
  19.542 -}
  19.543 -
  19.544 -
  19.545 -static void cleanup_module(void)
  19.546 -{
  19.547 -    struct net_private *np;
  19.548 -    struct net_device *dev;
  19.549 -
  19.550 -    while ( !list_empty(&dev_list) )
  19.551 -    {
  19.552 -        np = list_entry(dev_list.next, struct net_private, list);
  19.553 -        list_del(&np->list);
  19.554 -        dev = np->dev;
  19.555 -        unregister_netdev(dev);
  19.556 -        kfree(dev);
  19.557 -    }
  19.558 -
  19.559 -    free_irq(network_irq, NULL);
  19.560 -    free_irq(debug_irq, NULL);
  19.561 -
  19.562 -    unbind_virq_from_irq(VIRQ_NET);
  19.563 -    unbind_virq_from_irq(VIRQ_DEBUG);
  19.564 -}
  19.565 -
  19.566 -
  19.567 -module_init(init_module);
  19.568 -module_exit(cleanup_module);