ia64/xen-unstable

changeset 1312:de457488a115

bitkeeper revision 1.872 (4087cf0eay7XY7T1xObNygn1qSwJ0g)

Partial checkin of new blkdev backend in Xenolinux. Also updates
to the mmu_update interface.
author kaf24@scramble.cl.cam.ac.uk
date Thu Apr 22 13:56:30 2004 +0000 (2004-04-22)
parents 822d2f4f8e1d
children bbc618396572
files .rootkeys tools/xc/lib/xc_linux_build.c tools/xc/lib/xc_linux_restore.c tools/xc/lib/xc_linux_save.c tools/xc/lib/xc_netbsd_build.c tools/xc/lib/xc_private.c tools/xc/lib/xc_private.h tools/xend/lib/domain_controller.h tools/xend/lib/utils.c xen/common/memory.c xen/include/hypervisor-ifs/hypervisor-if.h xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c xenolinux-2.4.26-sparse/drivers/char/mem.c xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h xenolinux-2.4.26-sparse/include/asm-xen/evtchn.h xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h
line diff
     1.1 --- a/.rootkeys	Wed Apr 21 10:43:06 2004 +0000
     1.2 +++ b/.rootkeys	Thu Apr 22 13:56:30 2004 +0000
     1.3 @@ -671,12 +671,18 @@ 3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux
     1.4  3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c
     1.5  4083dc16z0jvZEH4PiVDbDRreaNp6w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/Makefile
     1.6  4083dc16KQus88a4U3uCV6qVCA6_8Q xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile
     1.7 +4087cf0dPeHOvzmZAazvwLslKEF93A xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h
     1.8 +4087cf0da2cROOiybf9A-j4R_yHnjg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c
     1.9 +4087cf0dvXL1PKX23t_LvO1wVPb7OA xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c
    1.10 +4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c
    1.11 +4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c
    1.12  4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/Makefile
    1.13  4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.c
    1.14  4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/block.h
    1.15  4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/frontend/vbd.c
    1.16  4083dc16-Kd5y9psK_yk161sme5j5Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/Makefile
    1.17  4083dc16UmHXxS9g_UFVnkUpN-oP2Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/Makefile
    1.18 +4087cf0d5dudKw_DecIJgOhLlBF_0Q xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c
    1.19  405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/Makefile
    1.20  405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/frontend/vnetif.c
    1.21  3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.26-sparse/arch/xen/kernel/Makefile
     2.1 --- a/tools/xc/lib/xc_linux_build.c	Wed Apr 21 10:43:06 2004 +0000
     2.2 +++ b/tools/xc/lib/xc_linux_build.c	Thu Apr 22 13:56:30 2004 +0000
     2.3 @@ -165,7 +165,7 @@ static int setup_guestos(int xc_handle,
     2.4  
     2.5      memset(builddomain, 0, sizeof(*builddomain));
     2.6  
     2.7 -    if ( (pm_handle = init_pfn_mapper()) < 0 )
     2.8 +    if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
     2.9          goto error_out;
    2.10  
    2.11      if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
     3.1 --- a/tools/xc/lib/xc_linux_restore.c	Wed Apr 21 10:43:06 2004 +0000
     3.2 +++ b/tools/xc/lib/xc_linux_restore.c	Thu Apr 22 13:56:30 2004 +0000
     3.3 @@ -186,7 +186,7 @@ int xc_linux_restore(int xc_handle,
     3.4      }
     3.5      shared_info_frame = op.u.getdomaininfo.shared_info_frame;
     3.6  
     3.7 -    if ( (pm_handle = init_pfn_mapper()) < 0 )
     3.8 +    if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
     3.9          goto out;
    3.10  
    3.11      /* Copy saved contents of shared-info page. No checking needed. */
     4.1 --- a/tools/xc/lib/xc_linux_save.c	Wed Apr 21 10:43:06 2004 +0000
     4.2 +++ b/tools/xc/lib/xc_linux_save.c	Thu Apr 22 13:56:30 2004 +0000
     4.3 @@ -178,7 +178,7 @@ int xc_linux_save(int xc_handle,
     4.4          goto out;
     4.5      }
     4.6  
     4.7 -    if ( (pm_handle = init_pfn_mapper()) < 0 )
     4.8 +    if ( (pm_handle = init_pfn_mapper((domid_t)domid)) < 0 )
     4.9          goto out;
    4.10  
    4.11      /* Is the suspend-record MFN actually valid for this domain? */
     5.1 --- a/tools/xc/lib/xc_netbsd_build.c	Wed Apr 21 10:43:06 2004 +0000
     5.2 +++ b/tools/xc/lib/xc_netbsd_build.c	Thu Apr 22 13:56:30 2004 +0000
     5.3 @@ -80,7 +80,7 @@ static int setup_guestos(int xc_handle,
     5.4  
     5.5      memset(builddomain, 0, sizeof(*builddomain));
     5.6  
     5.7 -    if ( (pm_handle = init_pfn_mapper()) < 0 )
     5.8 +    if ( (pm_handle = init_pfn_mapper((domid_t)dom)) < 0 )
     5.9          goto error_out;
    5.10  
    5.11      if ( (page_array = malloc(tot_pages * sizeof(unsigned long))) == NULL )
     6.1 --- a/tools/xc/lib/xc_private.c	Wed Apr 21 10:43:06 2004 +0000
     6.2 +++ b/tools/xc/lib/xc_private.c	Thu Apr 22 13:56:30 2004 +0000
     6.3 @@ -6,9 +6,15 @@
     6.4  
     6.5  #include "xc_private.h"
     6.6  
     6.7 -int init_pfn_mapper(void)
     6.8 +int init_pfn_mapper(domid_t domid)
     6.9  {
    6.10 -    return open("/dev/mem", O_RDWR);
    6.11 +    int fd = open("/dev/mem", O_RDWR);
    6.12 +    if ( fd >= 0 )
    6.13 +    {
    6.14 +        (void)ioctl(fd, _IO('M', 1), (unsigned long)(domid>> 0)); /* low  */
    6.15 +        (void)ioctl(fd, _IO('M', 2), (unsigned long)(domid>>32)); /* high */
    6.16 +    }
    6.17 +    return fd;
    6.18  }
    6.19  
    6.20  int close_pfn_mapper(int pm_handle)
    6.21 @@ -49,7 +55,7 @@ static int flush_mmu_updates(int xc_hand
    6.22      if ( mmu->idx == FIRST_MMU_UPDATE )
    6.23          return 0;
    6.24  
    6.25 -    /* The first two requests set the correct subject domain. */
    6.26 +    /* The first two requests set the correct subject domain (PTS and GPS). */
    6.27      mmu->updates[0].val  = (unsigned long)(mmu->subject<<16) & ~0xFFFFUL;
    6.28      mmu->updates[0].ptr  = (unsigned long)(mmu->subject<< 0) & ~0xFFFFUL;
    6.29      mmu->updates[1].val  = (unsigned long)(mmu->subject>>16) & ~0xFFFFUL;
    6.30 @@ -57,7 +63,7 @@ static int flush_mmu_updates(int xc_hand
    6.31      mmu->updates[0].ptr |= MMU_EXTENDED_COMMAND;
    6.32      mmu->updates[0].val |= MMUEXT_SET_SUBJECTDOM_L;
    6.33      mmu->updates[1].ptr |= MMU_EXTENDED_COMMAND;
    6.34 -    mmu->updates[1].val |= MMUEXT_SET_SUBJECTDOM_H;
    6.35 +    mmu->updates[1].val |= MMUEXT_SET_SUBJECTDOM_H | SET_PAGETABLE_SUBJECTDOM;
    6.36  
    6.37      hypercall.op     = __HYPERVISOR_mmu_update;
    6.38      hypercall.arg[0] = (unsigned long)mmu->updates;
     7.1 --- a/tools/xc/lib/xc_private.h	Wed Apr 21 10:43:06 2004 +0000
     7.2 +++ b/tools/xc/lib/xc_private.h	Thu Apr 22 13:56:30 2004 +0000
     7.3 @@ -154,7 +154,7 @@ static inline int do_block_io_op(int xc_
     7.4  /*
     7.5   * PFN mapping.
     7.6   */
     7.7 -int init_pfn_mapper(void);
     7.8 +int init_pfn_mapper(domid_t domid);
     7.9  int close_pfn_mapper(int pm_handle);
    7.10  void *map_pfn_writeable(int pm_handle, unsigned long pfn);
    7.11  void *map_pfn_readonly(int pm_handle, unsigned long pfn);
     8.1 --- a/tools/xend/lib/domain_controller.h	Wed Apr 21 10:43:06 2004 +0000
     8.2 +++ b/tools/xend/lib/domain_controller.h	Thu Apr 22 13:56:30 2004 +0000
     8.3 @@ -49,8 +49,116 @@ typedef struct {
     8.4      CONTROL_RING_IDX rx_req_prod, rx_resp_prod;
     8.5  } control_if_t;
     8.6  
     8.7 -#define CMSG_CONSOLE      0
     8.8 -#define CMSG_CONSOLE_DATA 0
     8.9 +/*
    8.10 + * Top-level command types.
    8.11 + */
    8.12 +#define CMSG_CONSOLE            0  /* Console               */
    8.13 +#define CMSG_BLKIF_BE           1  /* Block-device backend  */
    8.14 +#define CMSG_BLKIF_FE           2  /* Block-device frontend */
    8.15 +
    8.16 +/*
    8.17 + * Subtypes for console messages.
    8.18 + */
    8.19 +#define CMSG_CONSOLE_DATA       0
    8.20 +
    8.21 +/*
    8.22 + * Subtypes for block-device messages.
    8.23 + */
    8.24 +#define CMSG_BLKIF_BE_CREATE      0  /* Create a new block-device interface. */
    8.25 +#define CMSG_BLKIF_BE_DESTROY     1  /* Destroy a block-device interface.    */
    8.26 +#define CMSG_BLKIF_BE_VBD_CREATE  2  /* Create a new VBD for an interface.   */
    8.27 +#define CMSG_BLKIF_BE_VBD_DESTROY 3  /* Delete a VBD from an interface.      */
    8.28 +#define CMSG_BLKIF_BE_VBD_GROW    4  /* Append an extent to a given VBD.     */
    8.29 +#define CMSG_BLKIF_BE_VBD_SHRINK  5  /* Remove last extent from a given VBD. */
    8.30 +
    8.31 +/*
    8.32 + * Message request/response defintions for block-device messages.
    8.33 + */
    8.34 +
    8.35 +typedef u16 blkif_vdev_t;
    8.36 +typedef u16 blkif_pdev_t;
    8.37 +typedef u64 blkif_sector_t;
    8.38 +typedef struct {
    8.39 +    blkif_pdev_t   device;
    8.40 +    blkif_sector_t sector_start;
    8.41 +    blkif_sector_t sector_length;
    8.42 +} blkif_extent_t;
    8.43 +
    8.44 +/* Non-specific 'okay' return. */
    8.45 +#define BLKIF_STATUS_OKAY                0
    8.46 +/* Non-specific 'error' return. */
    8.47 +#define BLKIF_STATUS_ERROR               1
    8.48 +/* The following are specific error returns. */
    8.49 +#define BLKIF_STATUS_INTERFACE_EXISTS    2
    8.50 +#define BLKIF_STATUS_INTERFACE_NOT_FOUND 3
    8.51 +
    8.52 +/* This macro can be used to create an array of descriptive error strings. */
    8.53 +#define BLKIF_STATUS_ERRORS {    \
    8.54 +    "Okay",                      \
    8.55 +    "Non-specific error",        \
    8.56 +    "Interface already exists",  \
    8.57 +    "Interface not found" }
    8.58  
    8.59 +/* CMSG_BLKIF_CREATE */
    8.60 +typedef struct { 
    8.61 +    /* IN */
    8.62 +    domid_t        domid;             /* Domain attached to new interface.   */
    8.63 +    unsigned int   blkif_handle;      /* Domain-specific interface handle.   */
    8.64 +    unsigned int   evtchn_port;       /* Event channel for notifications.    */
    8.65 +    unsigned long  shmem_frame;       /* Page cont. shared comms window.     */
    8.66 +    /* OUT */
    8.67 +    unsigned int   status;
    8.68 +} blkif_create_t; 
    8.69 +
    8.70 +/* CMSG_BLKIF_DESTROY */
    8.71 +typedef struct { 
    8.72 +    /* IN */
    8.73 +    domid_t        domid;             /* Identify interface to be destroyed. */
    8.74 +    unsigned int   blkif_handle;      /* ...ditto...                         */
    8.75 +    /* OUT */
    8.76 +    unsigned int   status;
    8.77 +} blkif_destroy_t; 
    8.78 +
    8.79 +/* CMSG_BLKIF_VBD_CREATE */
    8.80 +typedef struct { 
    8.81 +    /* IN */
    8.82 +    domid_t        domid;             /* Identify blkdev interface.          */
    8.83 +    unsigned int   blkif_handle;      /* ...ditto...                         */
    8.84 +    blkif_vdev_t   vdevice;           /* Interface-specific id for this VBD. */
    8.85 +    int            readonly;          /* Non-zero -> VBD isn't writeable.    */
    8.86 +    /* OUT */
    8.87 +    unsigned int   status;
    8.88 +} blkif_vbd_create_t; 
    8.89 +
    8.90 +/* CMSG_BLKIF_VBD_DESTROY */
    8.91 +typedef struct {
    8.92 +    /* IN */
    8.93 +    domid_t        domid;             /* Identify blkdev interface.          */
    8.94 +    unsigned int   blkif_handle;      /* ...ditto...                         */
    8.95 +    blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
    8.96 +    /* OUT */
    8.97 +    unsigned int   status;
    8.98 +} blkif_vbd_destroy_t; 
    8.99 +
   8.100 +/* CMSG_BLKIF_VBD_GROW */
   8.101 +typedef struct { 
   8.102 +    /* IN */
   8.103 +    domid_t        domid;             /* Identify blkdev interface.          */
   8.104 +    unsigned int   blkif_handle;      /* ...ditto...                         */
   8.105 +    blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
   8.106 +    blkif_extent_t extent;            /* Physical extent to append to VBD.   */
   8.107 +    /* OUT */
   8.108 +    unsigned int   status;
   8.109 +} blkif_vbd_grow_t; 
   8.110 +
   8.111 +/* CMSG_BLKIF_VBD_SHRINK */
   8.112 +typedef struct { 
   8.113 +    /* IN */
   8.114 +    domid_t        domid;             /* Identify blkdev interface.          */
   8.115 +    unsigned int   blkif_handle;      /* ...ditto...                         */
   8.116 +    blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
   8.117 +    /* OUT */
   8.118 +    unsigned int   status;
   8.119 +} blkif_vbd_shrink_t; 
   8.120  
   8.121  #endif /* __DOMAIN_CONTROLLER_H__ */
     9.1 --- a/tools/xend/lib/utils.c	Wed Apr 21 10:43:06 2004 +0000
     9.2 +++ b/tools/xend/lib/utils.c	Thu Apr 22 13:56:30 2004 +0000
     9.3 @@ -674,6 +674,10 @@ static PyObject *xu_port_new(PyObject *s
     9.4          goto fail1;
     9.5      }
     9.6  
     9.7 +    /* Set the General-Purpose Subject whose page frame will be mapped. */
     9.8 +    (void)ioctl(xup->mem_fd, _IO('M', 1), (unsigned long)(dom>> 0)); /* low  */
     9.9 +    (void)ioctl(xup->mem_fd, _IO('M', 2), (unsigned long)(dom>>32)); /* high */
    9.10 +
    9.11      if ( (xup->xc_handle = xc_interface_open()) == -1 )
    9.12      {
    9.13          PyErr_SetString(port_error, "Could not open Xen control interface");
    10.1 --- a/xen/common/memory.c	Wed Apr 21 10:43:06 2004 +0000
    10.2 +++ b/xen/common/memory.c	Thu Apr 22 13:56:30 2004 +0000
    10.3 @@ -151,12 +151,10 @@
    10.4  
    10.5  static int alloc_l2_table(struct pfn_info *page);
    10.6  static int alloc_l1_table(struct pfn_info *page);
    10.7 -static int get_page_from_pagenr(unsigned long page_nr, int check_level);
    10.8 +static int get_page_from_pagenr(unsigned long page_nr, struct task_struct *p);
    10.9  static int get_page_and_type_from_pagenr(unsigned long page_nr, 
   10.10                                           u32 type,
   10.11 -                                         int check_level);
   10.12 -#define CHECK_STRICT 0 /* Subject domain must own the page                  */
   10.13 -#define CHECK_ANYDOM 1 /* Any domain may own the page (if subject is priv.) */
   10.14 +                                         struct task_struct *p);
   10.15  
   10.16  static void free_l2_table(struct pfn_info *page);
   10.17  static void free_l1_table(struct pfn_info *page);
   10.18 @@ -180,9 +178,14 @@ static struct {
   10.19      unsigned long       deferred_ops;
   10.20      unsigned long       cr0;
   10.21      domid_t             subject_id;
   10.22 -    struct task_struct *subject_p;
   10.23 +    /* General-Purpose Subject, Page-Table Subject */
   10.24 +    struct task_struct *gps, *pts;
   10.25  } percpu_info[NR_CPUS] __cacheline_aligned;
   10.26  
   10.27 +/* Determine the current General-Purpose Subject or Page-Table Subject. */
   10.28 +#define PTS (percpu_info[smp_processor_id()].pts ? : current)
   10.29 +#define GPS (percpu_info[smp_processor_id()].gps ? : current)
   10.30 +
   10.31  
   10.32  /*
   10.33   * init_frametable:
   10.34 @@ -295,11 +298,9 @@ int map_ldt_shadow_page(unsigned int off
   10.35  }
   10.36  
   10.37  
   10.38 -static int get_page_from_pagenr(unsigned long page_nr, int check_level)
   10.39 +static int get_page_from_pagenr(unsigned long page_nr, struct task_struct *p)
   10.40  {
   10.41 -    struct task_struct *p = current;
   10.42      struct pfn_info *page = &frame_table[page_nr];
   10.43 -    u32 y, x, nx;
   10.44  
   10.45      if ( unlikely(!pfn_is_ram(page_nr)) )
   10.46      {
   10.47 @@ -307,37 +308,10 @@ static int get_page_from_pagenr(unsigned
   10.48          return 0;
   10.49      }
   10.50  
   10.51 -    /* Find the correct subject domain. */
   10.52 -    if ( unlikely(percpu_info[p->processor].subject_p != NULL) )
   10.53 -        p = percpu_info[p->processor].subject_p;
   10.54 -
   10.55 -    /* Demote ANYDOM to STRICT if subject domain is not privileged. */
   10.56 -    if ( check_level == CHECK_ANYDOM && !IS_PRIV(p) )
   10.57 -        check_level = CHECK_STRICT;
   10.58 -
   10.59 -    switch ( check_level )
   10.60 +    if ( unlikely(!get_page(page, p)) )
   10.61      {
   10.62 -    case CHECK_STRICT:
   10.63 -        if ( unlikely(!get_page(page, p)) )
   10.64 -        {
   10.65 -            MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
   10.66 -            return 0;
   10.67 -        }
   10.68 -        break;
   10.69 -    case CHECK_ANYDOM:
   10.70 -        y = page->count_and_flags;
   10.71 -        do {
   10.72 -            x  = y;
   10.73 -            nx = x + 1;
   10.74 -            if ( unlikely((x & PGC_count_mask) == 0) ||
   10.75 -                 unlikely((nx & PGC_count_mask) == 0) )
   10.76 -            {
   10.77 -                MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
   10.78 -                return 0;
   10.79 -            }
   10.80 -        }
   10.81 -        while ( unlikely((y = cmpxchg(&page->count_and_flags, x, nx)) != x) );
   10.82 -        break;
   10.83 +        MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr);
   10.84 +        return 0;
   10.85      }
   10.86  
   10.87      return 1;
   10.88 @@ -346,11 +320,11 @@ static int get_page_from_pagenr(unsigned
   10.89  
   10.90  static int get_page_and_type_from_pagenr(unsigned long page_nr, 
   10.91                                           u32 type,
   10.92 -                                         int check_level)
   10.93 +                                         struct task_struct *p)
   10.94  {
   10.95      struct pfn_info *page = &frame_table[page_nr];
   10.96  
   10.97 -    if ( unlikely(!get_page_from_pagenr(page_nr, check_level)) )
   10.98 +    if ( unlikely(!get_page_from_pagenr(page_nr, p)) )
   10.99          return 0;
  10.100  
  10.101      if ( unlikely(!get_page_type(page, type)) )
  10.102 @@ -391,8 +365,7 @@ static int get_linear_pagetable(l2_pgent
  10.103      if ( (l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn )
  10.104      {
  10.105          /* Make sure the mapped frame belongs to the correct domain. */
  10.106 -        if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e), 
  10.107 -                                            CHECK_STRICT)) )
  10.108 +        if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e), PTS)) )
  10.109              return 0;
  10.110  
  10.111          /*
  10.112 @@ -443,14 +416,14 @@ static int get_page_from_l1e(l1_pgentry_
  10.113      if ( l1v & _PAGE_RW )
  10.114      {
  10.115          if ( unlikely(!get_page_and_type_from_pagenr(
  10.116 -            pfn, PGT_writeable_page, CHECK_ANYDOM)) )
  10.117 +            pfn, PGT_writeable_page, GPS)) )
  10.118              return 0;
  10.119          set_bit(_PGC_tlb_flush_on_type_change, 
  10.120                  &frame_table[pfn].count_and_flags);
  10.121          return 1;
  10.122      }
  10.123  
  10.124 -    return get_page_from_pagenr(pfn, CHECK_ANYDOM);
  10.125 +    return get_page_from_pagenr(pfn, GPS);
  10.126  }
  10.127  
  10.128  
  10.129 @@ -468,7 +441,7 @@ static int get_page_from_l2e(l2_pgentry_
  10.130      }
  10.131  
  10.132      if ( unlikely(!get_page_and_type_from_pagenr(
  10.133 -        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table, CHECK_STRICT)) )
  10.134 +        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table, PTS)) )
  10.135          return get_linear_pagetable(l2e, pfn);
  10.136  
  10.137      return 1;
  10.138 @@ -771,12 +744,12 @@ void free_page_type(struct pfn_info *pag
  10.139                                  page-frame_table) & PSH_shadowed) )
  10.140          {
  10.141              /*
  10.142 -             * Using 'current->mm' is safe and correct because page-table pages 
  10.143 -             * are not shared across domains. Updates to such pages' types are 
  10.144 -             * thus only done within the context of the owning domain. The one 
  10.145 -             * exception is when destroying a domain; however, this is not a 
  10.146 -             * problem as the currently-executing domain will not have this 
  10.147 -             * MFN shadowed, and at domain end-of-day we explicitly unshadow 
  10.148 +             * Using 'current->mm' is safe and correct because page-table pages
  10.149 +             * are not shared across domains. Updates to such pages' types are
  10.150 +             * thus only done within the context of the owning domain. The one
  10.151 +             * exception is when destroying a domain; however, this is not a
  10.152 +             * problem as the currently-executing domain will not have this MFN
  10.153 +             * shadowed, and at domain end-of-day we explicitly unshadow
  10.154               * everything so that nothing will get left lying around.
  10.155               */
  10.156              unshadow_table( page-frame_table, type );
  10.157 @@ -814,9 +787,9 @@ static int do_extended_command(unsigned 
  10.158      case MMUEXT_PIN_L1_TABLE:
  10.159      case MMUEXT_PIN_L2_TABLE:
  10.160          okay = get_page_and_type_from_pagenr(
  10.161 -            pfn, (cmd == MMUEXT_PIN_L2_TABLE) ? PGT_l2_page_table : 
  10.162 -            PGT_l1_page_table,
  10.163 -            CHECK_STRICT);
  10.164 +            pfn, 
  10.165 +            (cmd==MMUEXT_PIN_L2_TABLE) ? PGT_l2_page_table : PGT_l1_page_table,
  10.166 +            PTS);
  10.167          if ( unlikely(!okay) )
  10.168          {
  10.169              MEM_LOG("Error while pinning pfn %08lx", pfn);
  10.170 @@ -836,7 +809,7 @@ static int do_extended_command(unsigned 
  10.171          break;
  10.172  
  10.173      case MMUEXT_UNPIN_TABLE:
  10.174 -        if ( unlikely(!(okay = get_page_from_pagenr(pfn, CHECK_STRICT))) )
  10.175 +        if ( unlikely(!(okay = get_page_from_pagenr(pfn, PTS))) )
  10.176          {
  10.177              MEM_LOG("Page %08lx bad domain (dom=%p)",
  10.178                      ptr, page->u.domain);
  10.179 @@ -856,8 +829,7 @@ static int do_extended_command(unsigned 
  10.180          break;
  10.181  
  10.182      case MMUEXT_NEW_BASEPTR:
  10.183 -        okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, 
  10.184 -                                             CHECK_STRICT);
  10.185 +        okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, current);
  10.186          if ( likely(okay) )
  10.187          {
  10.188              invalidate_shadow_ldt();
  10.189 @@ -890,7 +862,7 @@ static int do_extended_command(unsigned 
  10.190          break;
  10.191      
  10.192      case MMUEXT_INVLPG:
  10.193 -        __flush_tlb_one(val & ~MMUEXT_CMD_MASK);
  10.194 +        __flush_tlb_one(ptr);
  10.195          break;
  10.196  
  10.197      case MMUEXT_SET_LDT:
  10.198 @@ -932,11 +904,13 @@ static int do_extended_command(unsigned 
  10.199          }
  10.200          else
  10.201          {
  10.202 -            if ( percpu_info[cpu].subject_p != NULL )
  10.203 -                put_task_struct(percpu_info[cpu].subject_p);
  10.204 -            percpu_info[cpu].subject_p = find_domain_by_id(
  10.205 +            if ( percpu_info[cpu].gps != NULL )
  10.206 +                put_task_struct(percpu_info[cpu].gps);
  10.207 +            percpu_info[cpu].gps = find_domain_by_id(
  10.208                  percpu_info[cpu].subject_id);
  10.209 -            if ( percpu_info[cpu].subject_p == NULL )
  10.210 +            percpu_info[cpu].pts = (val & SET_PAGETABLE_SUBJECTDOM) ? 
  10.211 +                percpu_info[cpu].gps : NULL;
  10.212 +            if ( percpu_info[cpu].gps == NULL )
  10.213              {
  10.214                  MEM_LOG("Unknown domain '%llu'", percpu_info[cpu].subject_id);
  10.215                  okay = 0;
  10.216 @@ -987,7 +961,7 @@ int do_mmu_update(mmu_update_t *ureqs, i
  10.217               * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
  10.218               */
  10.219          case MMU_NORMAL_PT_UPDATE:
  10.220 -            if ( unlikely(!get_page_from_pagenr(pfn, CHECK_STRICT)) )
  10.221 +            if ( unlikely(!get_page_from_pagenr(pfn, PTS)) )
  10.222              {
  10.223                  MEM_LOG("Could not get page for normal update");
  10.224                  break;
  10.225 @@ -1059,7 +1033,7 @@ int do_mmu_update(mmu_update_t *ureqs, i
  10.226              break;
  10.227  
  10.228          case MMU_MACHPHYS_UPDATE:
  10.229 -            if ( unlikely(!get_page_from_pagenr(pfn, CHECK_STRICT)) )
  10.230 +            if ( unlikely(!get_page_from_pagenr(pfn, GPS)) )
  10.231              {
  10.232                  MEM_LOG("Could not get page for mach->phys update");
  10.233                  break;
  10.234 @@ -1108,10 +1082,10 @@ int do_mmu_update(mmu_update_t *ureqs, i
  10.235      if ( deferred_ops & DOP_RELOAD_LDT )
  10.236          (void)map_ldt_shadow_page(0);
  10.237  
  10.238 -    if ( unlikely(percpu_info[cpu].subject_p != NULL) )
  10.239 +    if ( unlikely(percpu_info[cpu].gps != NULL) )
  10.240      {
  10.241 -        put_task_struct(percpu_info[cpu].subject_p);
  10.242 -        percpu_info[cpu].subject_p = NULL;
  10.243 +        put_task_struct(percpu_info[cpu].gps);
  10.244 +        percpu_info[cpu].gps = percpu_info[cpu].pts = NULL;
  10.245      }
  10.246  
  10.247      return rc;
    11.1 --- a/xen/include/hypervisor-ifs/hypervisor-if.h	Wed Apr 21 10:43:06 2004 +0000
    11.2 +++ b/xen/include/hypervisor-ifs/hypervisor-if.h	Thu Apr 22 13:56:30 2004 +0000
    11.3 @@ -71,22 +71,73 @@
    11.4  #define NR_VIRQS       12
    11.5  
    11.6  /*
    11.7 - * MMU_XXX: specified in least 2 bits of 'ptr' field. These bits are masked
    11.8 - *  off to get the real 'ptr' value.
    11.9 - * All requests specify relevent address in 'ptr'. This is either a
   11.10 - * machine/physical address (MA), or linear/virtual address (VA).
   11.11 - * Normal requests specify update value in 'value'.
   11.12 - * Extended requests specify command in least 8 bits of 'value'. These bits
   11.13 - *  are masked off to get the real 'val' value. Except for MMUEXT_SET_LDT 
   11.14 - *  which shifts the least bits out.
   11.15 + * MMU-UPDATE REQUESTS
   11.16 + * 
   11.17 + * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
   11.18 + * ptr[1:0] specifies the appropriate MMU_* command.
   11.19 + * 
   11.20 + * GPS (General-Purpose Subject)
   11.21 + * -----------------------------
   11.22 + *  This domain that must own all non-page-table pages that are involved in
   11.23 + *  MMU updates. By default it is the domain that executes mmu_update(). If the
   11.24 + *  caller has sufficient privilege then it can be changed by executing
   11.25 + *  MMUEXT_SET_SUBJECTDOM_{L,H}.
   11.26 + * 
   11.27 + * PTS (Page-Table Subject)
   11.28 + * ------------------------
   11.29 + *  This domain must own all the page-table pages that are subject to MMU
   11.30 + *  updates. By default it is the domain that executes mmu_update(). If the
   11.31 + *  caller has sufficient privilege then it can be changed by executing
   11.32 + *  MMUEXT_SET_SUBJECTDOM_H with val[14] (SET_PAGETABLE_SUBJECTDOM) set.
   11.33 + * 
   11.34 + * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
   11.35 + * Updates an entry in a page table.
   11.36 + * ptr[:2]  -- machine address of the page-table entry to modify [1]
   11.37 + * val      -- value to write [2]
   11.38 + * 
   11.39 + * ptr[1:0] == MMU_MACHPHYS_UPDATE:
   11.40 + * Updates an entry in the machine->pseudo-physical mapping table.
   11.41 + * ptr[:2]  -- machine address within the frame whose mapping to modify [3]
   11.42 + * val      -- value to write into the mapping entry
   11.43 + *  
   11.44 + * ptr[1:0] == MMU_EXTENDED_COMMAND:
   11.45 + * val[7:0] -- MMUEXT_* command
   11.46 + * 
   11.47 + *   val[7:0] == MMUEXT_(UN)PIN_*_TABLE:
   11.48 + *   ptr[:2]  -- machine address of frame to be (un)pinned as a p.t. page [1]
   11.49 + * 
   11.50 + *   val[7:0] == MMUEXT_NEW_BASEPTR:
   11.51 + *   ptr[:2]  -- machine address of new page-table base to install in MMU [1]
   11.52 + * 
   11.53 + *   val[7:0] == MMUEXT_TLB_FLUSH:
   11.54 + *   no additional arguments
   11.55 + * 
   11.56 + *   val[7:0] == MMUEXT_INVLPG:
   11.57 + *   ptr[:2]  -- linear address to be flushed from the TLB
   11.58 + * 
   11.59 + *   val[7:0] == MMUEXT_SET_LDT:
   11.60 + *   ptr[:2]  -- linear address of LDT base (NB. must be page-aligned)
   11.61 + *   val[:8]  -- number of entries in LDT
   11.62 + * 
   11.63 + *   val[7:0] == MMUEXT_SET_SUBJECTDOM_L:
   11.64 + *   (ptr[31:15],val[31:15]) -- dom[31:0]
   11.65 + * 
   11.66 + *   val[7:0] == MMUEXT_SET_SUBJECTDOM_H:
   11.67 + *   val[14]  -- if TRUE then sets the PTS in addition to the GPS.
   11.68 + *   (ptr[31:15],val[31:15]) -- dom[63:32]
   11.69 + *   NB. This command must be immediately preceded by SET_SUBJECTDOM_L.
   11.70 + * 
   11.71 + * Notes on constraints on the above arguments:
   11.72 + *  [1] The page frame containing the machine address must belong to the PTS.
   11.73 + *  [2] If the PTE is valid (i.e., bit 0 is set) then the specified page frame
   11.74 + *      must belong to: 
   11.75 + *       (a) the PTS (if the PTE is part of a non-L1 table); or
   11.76 + *       (b) the GPS (if the PTE is part of an L1 table).
   11.77 + *  [3] The page frame containing the machine address must belong to the GPS.
   11.78   */
   11.79 -/* A normal page-table update request. */
   11.80  #define MMU_NORMAL_PT_UPDATE     0 /* checked '*ptr = val'. ptr is MA.       */
   11.81 -/* Update an entry in the machine->physical mapping table. */
   11.82  #define MMU_MACHPHYS_UPDATE      2 /* ptr = MA of frame to modify entry for  */
   11.83 -/* An extended command. */
   11.84  #define MMU_EXTENDED_COMMAND     3 /* least 8 bits of val demux further      */
   11.85 -/* Extended commands: */
   11.86  #define MMUEXT_PIN_L1_TABLE      0 /* ptr = MA of frame to pin               */
   11.87  #define MMUEXT_PIN_L2_TABLE      1 /* ptr = MA of frame to pin               */
   11.88  #define MMUEXT_PIN_L3_TABLE      2 /* ptr = MA of frame to pin               */
   11.89 @@ -94,11 +145,12 @@
   11.90  #define MMUEXT_UNPIN_TABLE       4 /* ptr = MA of frame to unpin             */
   11.91  #define MMUEXT_NEW_BASEPTR       5 /* ptr = MA of new pagetable base         */
   11.92  #define MMUEXT_TLB_FLUSH         6 /* ptr = NULL                             */
   11.93 -#define MMUEXT_INVLPG            7 /* ptr = NULL ; val = VA to invalidate    */
   11.94 +#define MMUEXT_INVLPG            7 /* ptr = VA to invalidate                 */
   11.95  #define MMUEXT_SET_LDT           8 /* ptr = VA of table; val = # entries     */
   11.96  /* NB. MMUEXT_SET_SUBJECTDOM must consist of *_L followed immediately by *_H */
   11.97  #define MMUEXT_SET_SUBJECTDOM_L  9 /* (ptr[31:15],val[31:15]) = dom[31:0]    */
   11.98  #define MMUEXT_SET_SUBJECTDOM_H 10 /* (ptr[31:15],val[31:15]) = dom[63:32]   */
   11.99 +#define SET_PAGETABLE_SUBJECTDOM (1<<14) /* OR into 'val' arg of SUBJECTDOM_H*/
  11.100  #define MMUEXT_CMD_MASK        255
  11.101  #define MMUEXT_CMD_SHIFT         8
  11.102  
    12.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile	Wed Apr 21 10:43:06 2004 +0000
    12.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/Makefile	Thu Apr 22 13:56:30 2004 +0000
    12.3 @@ -1,3 +1,3 @@
    12.4  O_TARGET := drv.o
    12.5 -obj-y := main.o
    12.6 +obj-y := main.o control.o interface.o vbd.o
    12.7  include $(TOPDIR)/Rules.make
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/common.h	Thu Apr 22 13:56:30 2004 +0000
    13.3 @@ -0,0 +1,94 @@
    13.4 +/******************************************************************************
    13.5 + * arch/xen/drivers/vblkif/backend/common.h
    13.6 + */
    13.7 +
    13.8 +#ifndef __VBLKIF__BACKEND__COMMON_H__
    13.9 +#define __VBLKIF__BACKEND__COMMON_H__
   13.10 +
   13.11 +#include <linux/config.h>
   13.12 +#include <linux/module.h>
   13.13 +#include <linux/rbtree.h>
   13.14 +#include <linux/interrupt.h>
   13.15 +#include <linux/slab.h>
   13.16 +#include <asm/ctrl_if.h>
   13.17 +#include <asm/io.h>
   13.18 +
   13.19 +#ifndef NDEBUG
   13.20 +#define ASSERT(_p) \
   13.21 +    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
   13.22 +    __LINE__, __FILE__); *(int*)0=0; }
   13.23 +#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \
   13.24 +                           __FILE__ , __LINE__ , ## _a )
   13.25 +#else
   13.26 +#define ASSERT(_p) ((void)0)
   13.27 +#define DPRINTK(_f, _a...) ((void)0)
   13.28 +#endif
   13.29 +
   13.30 +typedef struct {
   13.31 +    /* Unique identifier for this interface. */
   13.32 +    domid_t          domid;
   13.33 +    unsigned int     handle;
   13.34 +    /* Physical parameters of the comms window. */
   13.35 +    unsigned long    shmem_frame;
   13.36 +    unsigned int     evtchn;
   13.37 +    int              irq;
   13.38 +    /* Comms information. */
   13.39 +    blk_ring_t      *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
   13.40 +    BLK_RING_IDX     blk_req_cons;  /* Request consumer. */
   13.41 +    BLK_RING_IDX     blk_resp_prod; /* Private version of response producer. */
   13.42 +    /* VBDs attached to this interface. */
   13.43 +    rb_root_t        vbd_rb;        /* Mapping from 16-bit vdevices to VBDs. */
   13.44 +    spinlock_t       vbd_lock;      /* Protects VBD mapping. */
   13.45 +    /* Private fields. */
   13.46 +    struct list_head blkdev_list;
   13.47 +    spinlock_t       blk_ring_lock;
   13.48 +} blkif_t;
   13.49 +
   13.50 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
   13.51 +void blkif_get(blkif_t *blkif);
   13.52 +void blkif_put(blkif_t *blkif);
   13.53 +
   13.54 +/* An entry in a list of xen_extents. */
   13.55 +typedef struct _blkif_extent_le { 
   13.56 +    blkif_extent_t extent;               /* an individual extent */
   13.57 +    struct _blkif_extent_le *next;       /* and a pointer to the next */ 
   13.58 +} blkif_extent_le_t; 
   13.59 +
   13.60 +typedef struct _vbd { 
   13.61 +    blkif_vdev_t       vdevice;   /* what the domain refers to this vbd as */
   13.62 +    unsigned char      mode;      /* VBD_MODE_{R,W} */
   13.63 +    unsigned char      type;      /* XD_TYPE_xxx */
   13.64 +    blkif_extent_le_t *extents;   /* list of xen_extents making up this vbd */
   13.65 +    rb_node_t          rb;        /* for linking into R-B tree lookup struct */
   13.66 +} vbd_t; 
   13.67 +
   13.68 +long vbd_create(blkif_vbd_create_t *create_params); 
   13.69 +long vbd_grow(blkif_vbd_grow_t *grow_params); 
   13.70 +long vbd_shrink(blkif_vbd_shrink_t *shrink_params);
   13.71 +long vbd_destroy(blkif_vbd_destroy_t *delete_params); 
   13.72 +
   13.73 +void destroy_all_vbds(struct task_struct *p);
   13.74 +
   13.75 +typedef struct {
   13.76 +    blkif_t       *blkif;
   13.77 +    unsigned long  id;
   13.78 +    atomic_t       pendcnt;
   13.79 +    unsigned short operation;
   13.80 +    unsigned short status;
   13.81 +} pending_req_t;
   13.82 +
   13.83 +/* Describes a [partial] disk extent (part of a block io request) */
   13.84 +typedef struct {
   13.85 +    unsigned short dev;
   13.86 +    unsigned short nr_sects;
   13.87 +    unsigned long  buffer;
   13.88 +    xen_sector_t   sector_number;
   13.89 +} phys_seg_t;
   13.90 +
   13.91 +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 
   13.92 +
   13.93 +int vblkif_be_controller_init(void);
   13.94 +
   13.95 +void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
   13.96 +
   13.97 +#endif /* __VBLKIF__BACKEND__COMMON_H__ */
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/control.c	Thu Apr 22 13:56:30 2004 +0000
    14.3 @@ -0,0 +1,60 @@
    14.4 +/******************************************************************************
    14.5 + * arch/xen/drivers/vblkif/backend/control.c
    14.6 + * 
    14.7 + * Routines for interfacing with the control plane.
    14.8 + * 
    14.9 + * Copyright (c) 2004, Keir Fraser
   14.10 + */
   14.11 +
   14.12 +#include "common.h"
   14.13 +
   14.14 +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
   14.15 +{
   14.16 +    switch ( msg->subtype )
   14.17 +    {
   14.18 +    case CMSG_BLKIF_BE_CREATE:
   14.19 +        if ( msg->length != sizeof(blkif_create_t) )
   14.20 +            goto parse_error;
   14.21 +        blkif_create((blkif_create_t *)&msg->msg[0]);
   14.22 +        break;        
   14.23 +    case CMSG_BLKIF_BE_DESTROY:
   14.24 +        if ( msg->length != sizeof(blkif_destroy_t) )
   14.25 +            goto parse_error;
   14.26 +        blkif_destroy((blkif_destroy_t *)&msg->msg[0]);
   14.27 +        break;        
   14.28 +    case CMSG_BLKIF_BE_VBD_CREATE:
   14.29 +        if ( msg->length != sizeof(blkif_vbd_create_t) )
   14.30 +            goto parse_error;
   14.31 +        vbd_create((blkif_vbd_create_t *)&msg->msg[0]);
   14.32 +        break;
   14.33 +    case CMSG_BLKIF_BE_VBD_DESTROY:
   14.34 +        if ( msg->length != sizeof(blkif_vbd_destroy_t) )
   14.35 +            goto parse_error;
   14.36 +        vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]);
   14.37 +        break;
   14.38 +    case CMSG_BLKIF_BE_VBD_GROW:
   14.39 +        if ( msg->length != sizeof(blkif_vbd_grow_t) )
   14.40 +            goto parse_error;
   14.41 +        vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]);
   14.42 +        break;
   14.43 +    case CMSG_BLKIF_BE_VBD_SHRINK:
   14.44 +        if ( msg->length != sizeof(blkif_vbd_shrink_t) )
   14.45 +            goto parse_error;
   14.46 +        vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]);
   14.47 +        break;
   14.48 +    default:
   14.49 +        goto parse_error;
   14.50 +    }
   14.51 +
   14.52 +    ctrl_if_send_response(msg);
   14.53 +    return;
   14.54 +
   14.55 + parse_error:
   14.56 +    msg->length = 0;
   14.57 +    ctrl_if_send_response(msg);
   14.58 +}
   14.59 +
   14.60 +int blkif_ctrlif_init(void)
   14.61 +{
   14.62 +    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
   14.63 +}
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/interface.c	Thu Apr 22 13:56:30 2004 +0000
    15.3 @@ -0,0 +1,96 @@
    15.4 +/******************************************************************************
    15.5 + * arch/xen/drivers/vblkif/backend/interface.c
    15.6 + * 
    15.7 + * Block-device interface management.
    15.8 + * 
    15.9 + * Copyright (c) 2004, Keir Fraser
   15.10 + */
   15.11 +
   15.12 +#include "common.h"
   15.13 +
   15.14 +#define BLKIF_HASHSZ 1024
   15.15 +#define BLKIF_HASH(_d,_h) \
   15.16 +    (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1))
   15.17 +
   15.18 +static blkif_t *blkif_hash[BLKIF_HASHSZ];
   15.19 +
   15.20 +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
   15.21 +{
   15.22 +    blkif_t *blkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   15.23 +    while ( (blkif != NULL) && 
   15.24 +            (blkif->domid != domid) && 
   15.25 +            (blkif->handle != handle) )
   15.26 +        blkif = blkif->hash_next;
   15.27 +    return blkif;
   15.28 +}
   15.29 +
   15.30 +static void blkif_create(blkif_create_t *create)
   15.31 +{
   15.32 +    domid_t       domid  = create->domid;
   15.33 +    unsigned int  handle = create->blkif_handle;
   15.34 +    unsigned int  evtchn = create->evtchn;
   15.35 +    unsigned long shmem_frame = create->shmem_frame;
   15.36 +    blkif_t     **pblkif, *blkif;
   15.37 +
   15.38 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   15.39 +    while ( *pblkif == NULL )
   15.40 +    {
   15.41 +        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   15.42 +            goto found_match;
   15.43 +        pblkif = &(*pblkif)->hash_next;
   15.44 +    }
   15.45 +
   15.46 +    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
   15.47 +    memset(blkif, 0, sizeof(*blkif));
   15.48 +    blkif->domid       = domid;
   15.49 +    blkif->handle      = handle;
   15.50 +    blkif->evtchn      = evtchn;
   15.51 +    blkif->irq         = bind_evtchn_to_irq(evtchn);
   15.52 +    blkif->shmem_frame = shmem_frame;
   15.53 +    blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE);
   15.54 +    spin_lock_init(&blkif->vbd_lock);
   15.55 +    spin_lock_init(&blkif->blk_ring_lock);
   15.56 +
   15.57 +    request_irq(irq, vblkif_be_int, 0, "vblkif-backend", blkif);
   15.58 +
   15.59 +    blkif->hash_next = *pblkif;
   15.60 +    *pblkif = blkif;
   15.61 +
   15.62 +    create->status = BLKIF_STATUS_OKAY;
   15.63 +    return;
   15.64 +
   15.65 + found_match:
   15.66 +    create->status = BLKIF_STATUS_INTERFACE_EXISTS;
   15.67 +    return;
   15.68 +
   15.69 + evtchn_in_use:
   15.70 +    unbind_evtchn_from_irq(evtchn); /* drop refcnt */
   15.71 +    create->status = BLKIF_STATUS_ERROR;
   15.72 +    return;
   15.73 +}
   15.74 +
   15.75 +static void blkif_destroy(blkif_destroy_t *destroy)
   15.76 +{
   15.77 +    domid_t       domid  = destroy->domid;
   15.78 +    unsigned int  handle = destroy->blkif_handle;
   15.79 +    blkif_t     **pblkif, *blkif;
   15.80 +
   15.81 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   15.82 +    while ( (blkif = *pblkif) == NULL )
   15.83 +    {
   15.84 +        if ( (blkif->domid == domid) && (blkif->handle == handle) )
   15.85 +            goto found_match;
   15.86 +        pblkif = &blkif->hash_next;
   15.87 +    }
   15.88 +
   15.89 +    destroy->status = BLKIF_STATUS_NO_INTERFACE;
   15.90 +    return;
   15.91 +
   15.92 + found_match:
   15.93 +    free_irq(blkif->irq, NULL);
   15.94 +    unbind_evtchn_from_irq(blkif->evtchn);
   15.95 +    *pblkif = blkif->hash_next;
   15.96 +    kmem_cache_free(blkif_cachep, blkif);
   15.97 +    destroy->status = BLKIF_STATUS_OKAY;
   15.98 +}
   15.99 +
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/main.c	Thu Apr 22 13:56:30 2004 +0000
    16.3 @@ -0,0 +1,508 @@
    16.4 +/******************************************************************************
    16.5 + * arch/xen/drivers/vblkif/backend/main.c
    16.6 + * 
    16.7 + * Back-end of the driver for virtual block devices. This portion of the
    16.8 + * driver exports a 'unified' block-device interface that can be accessed
    16.9 + * by any operating system that implements a compatible front end. A 
   16.10 + * reference front-end implementation can be found in:
   16.11 + *  arch/xen/drivers/vblkif/frontend
   16.12 + * 
   16.13 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   16.14 + */
   16.15 +
   16.16 +#include "common.h"
   16.17 +
   16.18 +/*
   16.19 + * These are rather arbitrary. They are fairly large because adjacent requests
   16.20 + * pulled from a communication ring are quite likely to end up being part of
   16.21 + * the same scatter/gather request at the disc.
   16.22 + * 
   16.23 + * ** TRY INCREASING 'MAX_PENDING_REQS' IF WRITE SPEEDS SEEM TOO LOW **
   16.24 + * This will increase the chances of being able to write whole tracks.
   16.25 + * 64 should be enough to keep us competitive with Linux.
   16.26 + */
   16.27 +#define MAX_PENDING_REQS 64
   16.28 +#define BATCH_PER_DOMAIN 16
   16.29 +
   16.30 +/*
   16.31 + * Each outstanding request that we've passed to the lower device layers has a 
   16.32 + * 'pending_req' allocated to it. Each buffer_head that completes decrements 
   16.33 + * the pendcnt towards zero. When it hits zero, the specified domain has a 
   16.34 + * response queued for it, with the saved 'id' passed back.
   16.35 + * 
   16.36 + * We can't allocate pending_req's in order, since they may complete out of 
   16.37 + * order. We therefore maintain an allocation ring. This ring also indicates 
   16.38 + * when enough work has been passed down -- at that point the allocation ring 
   16.39 + * will be empty.
   16.40 + */
   16.41 +static pending_req_t pending_reqs[MAX_PENDING_REQS];
   16.42 +static unsigned char pending_ring[MAX_PENDING_REQS];
   16.43 +static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
   16.44 +/* NB. We use a different index type to differentiate from shared blk rings. */
   16.45 +typedef unsigned int PEND_RING_IDX;
   16.46 +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
   16.47 +static PEND_RING_IDX pending_prod, pending_cons;
   16.48 +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
   16.49 +
   16.50 +static kmem_cache_t *buffer_head_cachep;
   16.51 +
   16.52 +static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
   16.53 +
   16.54 +static int lock_buffer(blkif_t *blkif,
   16.55 +                       unsigned long buffer,
   16.56 +                       unsigned short size,
   16.57 +                       int writeable_buffer);
   16.58 +static void unlock_buffer(unsigned long buffer,
   16.59 +                          unsigned short size,
   16.60 +                          int writeable_buffer);
   16.61 +
   16.62 +static void io_schedule(unsigned long unused);
   16.63 +static int do_block_io_op(blkif_t *blkif, int max_to_do);
   16.64 +static void dispatch_rw_block_io(blkif_t *blkif,
   16.65 +                                 blk_ring_req_entry_t *req);
   16.66 +static void make_response(blkif_t *blkif, unsigned long id, 
   16.67 +                          unsigned short op, unsigned long st);
   16.68 +
   16.69 +
   16.70 +/******************************************************************
   16.71 + * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
   16.72 + */
   16.73 +
   16.74 +static struct list_head io_schedule_list;
   16.75 +static spinlock_t io_schedule_list_lock;
   16.76 +
   16.77 +static int __on_blkdev_list(blkif_t *blkif)
   16.78 +{
   16.79 +    return blkif->blkdev_list.next != NULL;
   16.80 +}
   16.81 +
   16.82 +static void remove_from_blkdev_list(blkif_t *blkif)
   16.83 +{
   16.84 +    unsigned long flags;
   16.85 +    if ( !__on_blkdev_list(blkif) ) return;
   16.86 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
   16.87 +    if ( __on_blkdev_list(blkif) )
   16.88 +    {
   16.89 +        list_del(&blkif->blkdev_list);
   16.90 +        blkif->blkdev_list.next = NULL;
   16.91 +        blkif_put(blkif);
   16.92 +    }
   16.93 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
   16.94 +}
   16.95 +
   16.96 +static void add_to_blkdev_list_tail(blkif_t *blkif)
   16.97 +{
   16.98 +    unsigned long flags;
   16.99 +    if ( __on_blkdev_list(blkif) ) return;
  16.100 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
  16.101 +    if ( !__on_blkdev_list(blkif) )
  16.102 +    {
  16.103 +        list_add_tail(&blkif->blkdev_list, &io_schedule_list);
  16.104 +        blkif_get(blkif);
  16.105 +    }
  16.106 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
  16.107 +}
  16.108 +
  16.109 +
  16.110 +/******************************************************************
  16.111 + * SCHEDULER FUNCTIONS
  16.112 + */
  16.113 +
  16.114 +static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
  16.115 +
  16.116 +static void io_schedule(unsigned long unused)
  16.117 +{
  16.118 +    blkif_t          *blkif;
  16.119 +    struct list_head *ent;
  16.120 +
  16.121 +    /* Queue up a batch of requests. */
  16.122 +    while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
  16.123 +            !list_empty(&io_schedule_list) )
  16.124 +    {
  16.125 +        ent = io_schedule_list.next;
  16.126 +        blkif = list_entry(ent, blkif_t, blkdev_list);
  16.127 +        blkif_get(blkif);
  16.128 +        remove_from_blkdev_list(blkif);
  16.129 +        if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
  16.130 +            add_to_blkdev_list_tail(blkif);
  16.131 +        blkif_put(blkif);
  16.132 +    }
  16.133 +
  16.134 +    /* Push the batch through to disc. */
  16.135 +    run_task_queue(&tq_disk);
  16.136 +}
  16.137 +
  16.138 +static void maybe_trigger_io_schedule(void)
  16.139 +{
  16.140 +    /*
  16.141 +     * Needed so that two processes, who together make the following predicate
  16.142 +     * true, don't both read stale values and evaluate the predicate
  16.143 +     * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
  16.144 +     */
  16.145 +    smp_mb();
  16.146 +
  16.147 +    if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
  16.148 +         !list_empty(&io_schedule_list) )
  16.149 +        tasklet_schedule(&io_schedule_tasklet);
  16.150 +}
  16.151 +
  16.152 +
  16.153 +
  16.154 +/******************************************************************
  16.155 + * COMPLETION CALLBACK -- Called as bh->b_end_io()
  16.156 + */
  16.157 +
  16.158 +static void end_block_io_op(struct buffer_head *bh, int uptodate)
  16.159 +{
  16.160 +    pending_req_t *pending_req = bh->b_private;
  16.161 +
  16.162 +    /* An error fails the entire request. */
  16.163 +    if ( !uptodate )
  16.164 +    {
  16.165 +        DPRINTK("Buffer not up-to-date at end of operation\n");
  16.166 +        pending_req->status = 2;
  16.167 +    }
  16.168 +
  16.169 +    unlock_buffer(virt_to_phys(bh->b_data), 
  16.170 +                  bh->b_size, 
  16.171 +                  (pending_req->operation==READ));
  16.172 +    
  16.173 +    if ( atomic_dec_and_test(&pending_req->pendcnt) )
  16.174 +    {
  16.175 +        make_response(pending_req->blkif, pending_req->id,
  16.176 +                      pending_req->operation, pending_req->status);
  16.177 +        blkif_put(pending_req->blkif);
  16.178 +        spin_lock(&pend_prod_lock);
  16.179 +        pending_ring[MASK_PEND_IDX(pending_prod)] = 
  16.180 +            pending_req - pending_reqs;
  16.181 +        pending_prod++;
  16.182 +        spin_unlock(&pend_prod_lock);
  16.183 +        maybe_trigger_io_schedule();
  16.184 +    }
  16.185 +}
  16.186 +
  16.187 +
  16.188 +
  16.189 +/******************************************************************************
  16.190 + * NOTIFICATION FROM GUEST OS.
  16.191 + */
  16.192 +
  16.193 +void vblkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
  16.194 +{
  16.195 +    blkif_t *blkif = dev_id;
  16.196 +    add_to_blkdev_list_tail(blkif);
  16.197 +    maybe_trigger_io_schedule();
  16.198 +}
  16.199 +
  16.200 +
  16.201 +
  16.202 +/******************************************************************
  16.203 + * DOWNWARD CALLS -- These interface with the block-device layer proper.
  16.204 + */
  16.205 +
  16.206 +static int lock_buffer(blkif_t *blkif,
  16.207 +                       unsigned long buffer,
  16.208 +                       unsigned short size,
  16.209 +                       int writeable_buffer)
  16.210 +{
  16.211 +    unsigned long    pfn;
  16.212 +
  16.213 +    for ( pfn = buffer >> PAGE_SHIFT; 
  16.214 +          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
  16.215 +          pfn++ )
  16.216 +    {
  16.217 +    }
  16.218 +
  16.219 +    return 1;
  16.220 +
  16.221 + fail:
  16.222 +    while ( pfn-- > (buffer >> PAGE_SHIFT) )
  16.223 +    {        
  16.224 +    }
  16.225 +    return 0;
  16.226 +}
  16.227 +
  16.228 +static void unlock_buffer(unsigned long buffer,
  16.229 +                          unsigned short size,
  16.230 +                          int writeable_buffer)
  16.231 +{
  16.232 +    unsigned long pfn;
  16.233 +
  16.234 +    for ( pfn = buffer >> PAGE_SHIFT; 
  16.235 +          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
  16.236 +          pfn++ )
  16.237 +    {
  16.238 +    }
  16.239 +}
  16.240 +
  16.241 +static int do_block_io_op(blkif_t *blkif, int max_to_do)
  16.242 +{
  16.243 +    blk_ring_t *blk_ring = blkif->blk_ring_base;
  16.244 +    blk_ring_req_entry_t *req;
  16.245 +    BLK_RING_IDX i;
  16.246 +    int more_to_do = 0;
  16.247 +
  16.248 +    /* Take items off the comms ring, taking care not to overflow. */
  16.249 +    for ( i = blkif->blk_req_cons; 
  16.250 +          (i != blk_ring->req_prod) && ((i-blkif->blk_resp_prod) != 
  16.251 +                                        BLK_RING_SIZE);
  16.252 +          i++ )
  16.253 +    {
  16.254 +        if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
  16.255 +        {
  16.256 +            more_to_do = 1;
  16.257 +            break;
  16.258 +        }
  16.259 +        
  16.260 +        req = &blk_ring->ring[MASK_BLK_IDX(i)].req;
  16.261 +        switch ( req->operation )
  16.262 +        {
  16.263 +        case BLKIF_OP_READ:
  16.264 +        case BLKIF_OP_WRITE:
  16.265 +            dispatch_rw_block_io(blkif, req);
  16.266 +            break;
  16.267 +
  16.268 +        default:
  16.269 +            DPRINTK("error: unknown block io operation [%d]\n",
  16.270 +                    blk_ring->ring[i].req.operation);
  16.271 +            make_response(blkif, blk_ring->ring[i].req.id, 
  16.272 +                          blk_ring->ring[i].req.operation, 1);
  16.273 +            break;
  16.274 +        }
  16.275 +    }
  16.276 +
  16.277 +    blkif->blk_req_cons = i;
  16.278 +    return more_to_do;
  16.279 +}
  16.280 +
  16.281 +static void dispatch_rw_block_io(blkif_t *blkif,
  16.282 +                                 blk_ring_req_entry_t *req)
  16.283 +{
  16.284 +    extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
  16.285 +    struct buffer_head *bh;
  16.286 +    int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
  16.287 +    unsigned short nr_sects;
  16.288 +    unsigned long buffer;
  16.289 +    int i, tot_sects;
  16.290 +    pending_req_t *pending_req;
  16.291 +
  16.292 +    /* We map virtual scatter/gather segments to physical segments. */
  16.293 +    int new_segs, nr_psegs = 0;
  16.294 +    phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
  16.295 +
  16.296 +    /* Check that number of segments is sane. */
  16.297 +    if ( unlikely(req->nr_segments == 0) || 
  16.298 +         unlikely(req->nr_segments > MAX_BLK_SEGS) )
  16.299 +    {
  16.300 +        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
  16.301 +        goto bad_descriptor;
  16.302 +    }
  16.303 +
  16.304 +    /*
  16.305 +     * Check each address/size pair is sane, and convert into a
  16.306 +     * physical device and block offset. Note that if the offset and size
  16.307 +     * crosses a virtual extent boundary, we may end up with more
  16.308 +     * physical scatter/gather segments than virtual segments.
  16.309 +     */
  16.310 +    for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects )
  16.311 +    {
  16.312 +        buffer   = req->buffer_and_sects[i] & ~0x1FF;
  16.313 +        nr_sects = req->buffer_and_sects[i] &  0x1FF;
  16.314 +
  16.315 +        if ( unlikely(nr_sects == 0) )
  16.316 +        {
  16.317 +            DPRINTK("zero-sized data request\n");
  16.318 +            goto bad_descriptor;
  16.319 +        }
  16.320 +
  16.321 +        phys_seg[nr_psegs].dev           = req->device;
  16.322 +        phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
  16.323 +        phys_seg[nr_psegs].buffer        = buffer;
  16.324 +        phys_seg[nr_psegs].nr_sects      = nr_sects;
  16.325 +
  16.326 +        /* Translate the request into the relevant 'physical device' */
  16.327 +        new_segs = vbd_translate(&phys_seg[nr_psegs], blkif, operation);
  16.328 +        if ( new_segs < 0 )
  16.329 +        { 
  16.330 +            DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
  16.331 +                    operation == READ ? "read" : "write", 
  16.332 +                    req->sector_number + tot_sects, 
  16.333 +                    req->sector_number + tot_sects + nr_sects, 
  16.334 +                    req->device); 
  16.335 +            goto bad_descriptor;
  16.336 +        }
  16.337 +  
  16.338 +        nr_psegs += new_segs;
  16.339 +        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
  16.340 +    }
  16.341 +
  16.342 +    for ( i = 0; i < nr_psegs; i++ )
  16.343 +    {
  16.344 +        if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, 
  16.345 +                                   phys_seg[i].nr_sects << 9,
  16.346 +                                   operation==READ)) )
  16.347 +        {
  16.348 +            DPRINTK("invalid buffer\n");
  16.349 +            while ( i-- > 0 )
  16.350 +                unlock_buffer(phys_seg[i].buffer, 
  16.351 +                              phys_seg[i].nr_sects << 9,
  16.352 +                              operation==READ);
  16.353 +            goto bad_descriptor;
  16.354 +        }
  16.355 +    }
  16.356 +
  16.357 +    pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
  16.358 +    pending_req->blkif     = blkif;
  16.359 +    pending_req->id        = req->id;
  16.360 +    pending_req->operation = operation;
  16.361 +    pending_req->status    = 0;
  16.362 +    atomic_set(&pending_req->pendcnt, nr_psegs);
  16.363 +
  16.364 +    blkif_get(blkif);
  16.365 +
  16.366 +    /* Now we pass each segment down to the real blkdev layer. */
  16.367 +    for ( i = 0; i < nr_psegs; i++ )
  16.368 +    {
  16.369 +        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
  16.370 +        if ( unlikely(bh == NULL) )
  16.371 +            panic("bh is null\n");
  16.372 +        memset(bh, 0, sizeof (struct buffer_head));
  16.373 +    
  16.374 +        bh->b_size          = phys_seg[i].nr_sects << 9;
  16.375 +        bh->b_dev           = phys_seg[i].dev;
  16.376 +        bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
  16.377 +
  16.378 +        /* SMH: we store a 'pseudo-virtual' bogus address in b_data since
  16.379 +           later code will undo this transformation (i.e. +-PAGE_OFFSET). */
  16.380 +        bh->b_data          = phys_to_virt(phys_seg[i].buffer);
  16.381 + 
  16.382 +        /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
  16.383 +        bh->b_page          = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 
  16.384 +        bh->b_end_io        = end_block_io_op;
  16.385 +        bh->b_private       = pending_req;
  16.386 +
  16.387 +        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
  16.388 +        if ( operation == WRITE )
  16.389 +            bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
  16.390 +
  16.391 +        atomic_set(&bh->b_count, 1);
  16.392 +
  16.393 +        /* Dispatch a single request. We'll flush it to disc later. */
  16.394 +        submit_bh(operation, bh);
  16.395 +    }
  16.396 +
  16.397 +    return;
  16.398 +
  16.399 + bad_descriptor:
  16.400 +    make_response(blkif, req->id, req->operation, 1);
  16.401 +} 
  16.402 +
  16.403 +
  16.404 +
  16.405 +/******************************************************************
  16.406 + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
  16.407 + */
  16.408 +
  16.409 +
  16.410 +static void make_response(blkif_t *blkif, unsigned long id, 
  16.411 +                          unsigned short op, unsigned long st)
  16.412 +{
  16.413 +    blk_ring_resp_entry_t *resp;
  16.414 +
  16.415 +    /* Place on the response ring for the relevant domain. */ 
  16.416 +    spin_lock(&blkif->blk_ring_lock);
  16.417 +    resp = &blkif->blk_ring_base->
  16.418 +        ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp;
  16.419 +    resp->id        = id;
  16.420 +    resp->operation = op;
  16.421 +    resp->status    = st;
  16.422 +    wmb();
  16.423 +    blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
  16.424 +    spin_unlock(&blkif->blk_ring_lock);
  16.425 +
  16.426 +    /* Kick the relevant domain. */
  16.427 +    notify_via_evtchn(blkif->evtchn);
  16.428 +}
  16.429 +
  16.430 +static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs)
  16.431 +{
  16.432 +#if 0
  16.433 +    unsigned long flags;
  16.434 +    struct task_struct *p;
  16.435 +    blk_ring_t *blk_ring;
  16.436 +    int i;
  16.437 +
  16.438 +    printk("Dumping block queue stats: nr_pending = %d"
  16.439 +           " (prod=0x%08x,cons=0x%08x)\n",
  16.440 +           NR_PENDING_REQS, pending_prod, pending_cons);
  16.441 +
  16.442 +    read_lock_irqsave(&tasklist_lock, flags);
  16.443 +    for_each_domain ( p )
  16.444 +    {
  16.445 +        printk("Domain: %llu\n", blkif->domain);
  16.446 +        blk_ring = blkif->blk_ring_base;
  16.447 +        printk("  req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
  16.448 +               "0x%08x on_list=%d\n",
  16.449 +               blk_ring->req_prod, blkif->blk_req_cons,
  16.450 +               blk_ring->resp_prod, blkif->blk_resp_prod,
  16.451 +               __on_blkdev_list(p));
  16.452 +    }
  16.453 +    read_unlock_irqrestore(&tasklist_lock, flags);
  16.454 +
  16.455 +    for ( i = 0; i < MAX_PENDING_REQS; i++ )
  16.456 +    {
  16.457 +        printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n",
  16.458 +               i, pending_reqs[i].domain, pending_reqs[i].id,
  16.459 +               atomic_read(&pending_reqs[i].pendcnt), 
  16.460 +               pending_reqs[i].operation, pending_reqs[i].status);
  16.461 +    }
  16.462 +#endif
  16.463 +}
  16.464 +
  16.465 +void unlink_blkdev_info(blkif_t *blkif)
  16.466 +{
  16.467 +    unsigned long flags;
  16.468 +
  16.469 +    spin_lock_irqsave(&io_schedule_list_lock, flags);
  16.470 +    if ( __on_blkdev_list(blkif) )
  16.471 +    {
  16.472 +        list_del(&blkif->blkdev_list);
  16.473 +        blkif->blkdev_list.next = (void *)0xdeadbeef;
  16.474 +        blkif_put(blkif);
  16.475 +    }
  16.476 +    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
  16.477 +}
  16.478 +
  16.479 +static int __init init_module(void)
  16.480 +{
  16.481 +    int i;
  16.482 +
  16.483 +    pending_cons = 0;
  16.484 +    pending_prod = MAX_PENDING_REQS;
  16.485 +    memset(pending_reqs, 0, sizeof(pending_reqs));
  16.486 +    for ( i = 0; i < MAX_PENDING_REQS; i++ )
  16.487 +        pending_ring[i] = i;
  16.488 +    
  16.489 +    for ( i = 0; i < NR_CPUS; i++ )
  16.490 +        completed_bhs[i] = NULL;
  16.491 +        
  16.492 +    spin_lock_init(&io_schedule_list_lock);
  16.493 +    INIT_LIST_HEAD(&io_schedule_list);
  16.494 +
  16.495 +    if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, 
  16.496 +                     SA_SHIRQ, "vblkif-backend-dbg", &blkif_debug_int) != 0 )
  16.497 +        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
  16.498 +
  16.499 +    buffer_head_cachep = kmem_cache_create(
  16.500 +        "buffer_head_cache", sizeof(struct buffer_head),
  16.501 +        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
  16.502 +
  16.503 +    return 0;
  16.504 +}
  16.505 +
  16.506 +static void cleanup_module(void)
  16.507 +{
  16.508 +}
  16.509 +
  16.510 +module_init(init_module);
  16.511 +module_exit(cleanup_module);
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vblkif/backend/vbd.c	Thu Apr 22 13:56:30 2004 +0000
    17.3 @@ -0,0 +1,701 @@
    17.4 +/******************************************************************************
    17.5 + * arch/xen/drivers/vblkif/backend/vbd.c
    17.6 + * 
    17.7 + * Routines for managing virtual block devices (VBDs).
    17.8 + * 
    17.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   17.10 + */
   17.11 +
   17.12 +#include "common.h"
   17.13 +
   17.14 +long __vbd_create(struct task_struct *p,
   17.15 +                  unsigned short vdevice,
   17.16 +                  unsigned char mode,
   17.17 +                  unsigned char type)
   17.18 +{
   17.19 +    vbd_t *vbd; 
   17.20 +    rb_node_t **rb_p, *rb_parent = NULL;
   17.21 +    long ret = 0;
   17.22 +
   17.23 +    spin_lock(&p->vbd_lock);
   17.24 +
   17.25 +    rb_p = &p->vbd_rb.rb_node;
   17.26 +    while ( *rb_p != NULL )
   17.27 +    {
   17.28 +        rb_parent = *rb_p;
   17.29 +        vbd = rb_entry(rb_parent, vbd_t, rb);
   17.30 +        if ( vdevice < vbd->vdevice )
   17.31 +        {
   17.32 +            rb_p = &rb_parent->rb_left;
   17.33 +        }
   17.34 +        else if ( vdevice > vbd->vdevice )
   17.35 +        {
   17.36 +            rb_p = &rb_parent->rb_right;
   17.37 +        }
   17.38 +        else
   17.39 +        {
   17.40 +            DPRINTK("vbd_create attempted for already existing vbd\n");
   17.41 +            ret = -EINVAL;
   17.42 +            goto out;
   17.43 +        }
   17.44 +    }
   17.45 +
   17.46 +    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
   17.47 +    {
   17.48 +        DPRINTK("vbd_create: out of memory\n");
   17.49 +        ret = -ENOMEM;
   17.50 +        goto out;
   17.51 +    }
   17.52 +
   17.53 +    vbd->vdevice = vdevice; 
   17.54 +    vbd->mode    = mode; 
   17.55 +    vbd->type    = type;
   17.56 +    vbd->extents = NULL; 
   17.57 +
   17.58 +    rb_link_node(&vbd->rb, rb_parent, rb_p);
   17.59 +    rb_insert_color(&vbd->rb, &p->vbd_rb);
   17.60 +
   17.61 + out:
   17.62 +    spin_unlock(&p->vbd_lock);
   17.63 +    return ret; 
   17.64 +}
   17.65 +
   17.66 +
   17.67 +long vbd_create(vbd_create_t *create) 
   17.68 +{
   17.69 +    struct task_struct *p;
   17.70 +    long rc;
   17.71 +
   17.72 +    if ( unlikely(!IS_PRIV(current)) )
   17.73 +        return -EPERM;
   17.74 +
   17.75 +    if ( unlikely((p = find_domain_by_id(create->domain)) == NULL) )
   17.76 +    {
   17.77 +        DPRINTK("vbd_create attempted for non-existent domain %llu\n", 
   17.78 +                create->domain); 
   17.79 +        return -EINVAL; 
   17.80 +    }
   17.81 +
   17.82 +    rc = __vbd_create(p, create->vdevice, create->mode,
   17.83 +                      XD_TYPE_DISK | XD_FLAG_VIRT);
   17.84 +
   17.85 +    put_task_struct(p);
   17.86 +
   17.87 +    return rc;
   17.88 +}
   17.89 +
   17.90 +
   17.91 +long __vbd_grow(struct task_struct *p,
   17.92 +                unsigned short vdevice,
   17.93 +                xen_extent_t *extent)
   17.94 +{
   17.95 +    xen_extent_le_t **px, *x; 
   17.96 +    vbd_t *vbd = NULL;
   17.97 +    rb_node_t *rb;
   17.98 +    long ret = 0;
   17.99 +
  17.100 +    spin_lock(&p->vbd_lock);
  17.101 +
  17.102 +    rb = p->vbd_rb.rb_node;
  17.103 +    while ( rb != NULL )
  17.104 +    {
  17.105 +        vbd = rb_entry(rb, vbd_t, rb);
  17.106 +        if ( vdevice < vbd->vdevice )
  17.107 +            rb = rb->rb_left;
  17.108 +        else if ( vdevice > vbd->vdevice )
  17.109 +            rb = rb->rb_right;
  17.110 +        else
  17.111 +            break;
  17.112 +    }
  17.113 +
  17.114 +    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
  17.115 +    {
  17.116 +        DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
  17.117 +        ret = -EINVAL;
  17.118 +        goto out;
  17.119 +    } 
  17.120 +
  17.121 +    if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
  17.122 +    {
  17.123 +        DPRINTK("vbd_grow: out of memory\n");
  17.124 +        ret = -ENOMEM;
  17.125 +        goto out;
  17.126 +    }
  17.127 + 
  17.128 +    x->extent.device       = extent->device; 
  17.129 +    x->extent.start_sector = extent->start_sector; 
  17.130 +    x->extent.nr_sectors   = extent->nr_sectors; 
  17.131 +    x->next                = (xen_extent_le_t *)NULL; 
  17.132 +
  17.133 +    for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 
  17.134 +        continue;
  17.135 +
  17.136 +    *px = x;
  17.137 +
  17.138 + out:
  17.139 +    spin_unlock(&p->vbd_lock);
  17.140 +    return ret;
  17.141 +}
  17.142 +
  17.143 +
  17.144 +/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
  17.145 +long vbd_grow(vbd_grow_t *grow) 
  17.146 +{
  17.147 +    struct task_struct *p;
  17.148 +    long rc;
  17.149 +
  17.150 +    if ( unlikely(!IS_PRIV(current)) )
  17.151 +        return -EPERM; 
  17.152 +
  17.153 +    if ( unlikely((p = find_domain_by_id(grow->domain)) == NULL) )
  17.154 +    {
  17.155 +        DPRINTK("vbd_grow: attempted for non-existent domain %llu\n", 
  17.156 +                grow->domain); 
  17.157 +        return -EINVAL; 
  17.158 +    }
  17.159 +
  17.160 +    rc = __vbd_grow(p, grow->vdevice, &grow->extent);
  17.161 +
  17.162 +    put_task_struct(p);
  17.163 +
  17.164 +    return rc;
  17.165 +}
  17.166 +
  17.167 +
  17.168 +long vbd_shrink(vbd_shrink_t *shrink)
  17.169 +{
  17.170 +    struct task_struct *p; 
  17.171 +    xen_extent_le_t **px, *x; 
  17.172 +    vbd_t *vbd = NULL;
  17.173 +    rb_node_t *rb;
  17.174 +    long ret = 0;
  17.175 +
  17.176 +    if ( !IS_PRIV(current) )
  17.177 +        return -EPERM; 
  17.178 +
  17.179 +    if ( (p = find_domain_by_id(shrink->domain)) == NULL )
  17.180 +    {
  17.181 +        DPRINTK("vbd_shrink attempted for non-existent domain %llu\n", 
  17.182 +                shrink->domain); 
  17.183 +        return -EINVAL; 
  17.184 +    }
  17.185 +
  17.186 +    spin_lock(&p->vbd_lock);
  17.187 +
  17.188 +    rb = p->vbd_rb.rb_node;
  17.189 +    while ( rb != NULL )
  17.190 +    {
  17.191 +        vbd = rb_entry(rb, vbd_t, rb);
  17.192 +        if ( shrink->vdevice < vbd->vdevice )
  17.193 +            rb = rb->rb_left;
  17.194 +        else if ( shrink->vdevice > vbd->vdevice )
  17.195 +            rb = rb->rb_right;
  17.196 +        else
  17.197 +            break;
  17.198 +    }
  17.199 +
  17.200 +    if ( unlikely(vbd == NULL) || 
  17.201 +         unlikely(vbd->vdevice != shrink->vdevice) ||
  17.202 +         unlikely(vbd->extents == NULL) )
  17.203 +    {
  17.204 +        DPRINTK("vbd_shrink: attempt to remove non-existent extent.\n"); 
  17.205 +        ret = -EINVAL;
  17.206 +        goto out;
  17.207 +    }
  17.208 +
  17.209 +    /* Find the last extent. We now know that there is at least one. */
  17.210 +    for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
  17.211 +        continue;
  17.212 +
  17.213 +    x   = *px;
  17.214 +    *px = x->next;
  17.215 +    kfree(x);
  17.216 +
  17.217 + out:
  17.218 +    spin_unlock(&p->vbd_lock);
  17.219 +    put_task_struct(p);
  17.220 +    return ret; 
  17.221 +}
  17.222 +
  17.223 +
  17.224 +long vbd_setextents(vbd_setextents_t *setextents)
  17.225 +{
  17.226 +    struct task_struct *p; 
  17.227 +    xen_extent_t e;
  17.228 +    xen_extent_le_t *new_extents, *x, *t; 
  17.229 +    vbd_t *vbd = NULL;
  17.230 +    rb_node_t *rb;
  17.231 +    int i;
  17.232 +    long ret = 0;
  17.233 +
  17.234 +    if ( !IS_PRIV(current) )
  17.235 +        return -EPERM; 
  17.236 +
  17.237 +    if ( (p = find_domain_by_id(setextents->domain)) == NULL )
  17.238 +    {
  17.239 +        DPRINTK("vbd_setextents attempted for non-existent domain %llu\n", 
  17.240 +                setextents->domain); 
  17.241 +        return -EINVAL; 
  17.242 +    }
  17.243 +
  17.244 +    spin_lock(&p->vbd_lock);
  17.245 +
  17.246 +    rb = p->vbd_rb.rb_node;
  17.247 +    while ( rb != NULL )
  17.248 +    {
  17.249 +        vbd = rb_entry(rb, vbd_t, rb);
  17.250 +        if ( setextents->vdevice < vbd->vdevice )
  17.251 +            rb = rb->rb_left;
  17.252 +        else if ( setextents->vdevice > vbd->vdevice )
  17.253 +            rb = rb->rb_right;
  17.254 +        else
  17.255 +            break;
  17.256 +    }
  17.257 +
  17.258 +    if ( unlikely(vbd == NULL) || 
  17.259 +         unlikely(vbd->vdevice != setextents->vdevice) )
  17.260 +    {
  17.261 +        DPRINTK("vbd_setextents: attempt to modify non-existent VBD.\n"); 
  17.262 +        ret = -EINVAL;
  17.263 +        goto out;
  17.264 +    }
  17.265 +
  17.266 +    /* Construct the new extent list. */
  17.267 +    new_extents = NULL;
  17.268 +    for ( i = setextents->nr_extents - 1; i >= 0; i-- )
  17.269 +    {
  17.270 +        if ( unlikely(copy_from_user(&e, 
  17.271 +                                     &setextents->extents[i], 
  17.272 +                                     sizeof(e)) != 0) )
  17.273 +        {
  17.274 +            DPRINTK("vbd_setextents: copy_from_user failed\n");
  17.275 +            ret = -EFAULT;
  17.276 +            goto free_and_out;
  17.277 +        }
  17.278 +        
  17.279 +        if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL))
  17.280 +                      == NULL) )
  17.281 +        {
  17.282 +            DPRINTK("vbd_setextents: out of memory\n");
  17.283 +            ret = -ENOMEM;
  17.284 +            goto free_and_out;
  17.285 +        }
  17.286 +        
  17.287 +        x->extent = e;
  17.288 +        x->next   = new_extents;
  17.289 +
  17.290 +        new_extents = x;
  17.291 +    }
  17.292 +
  17.293 +    /* Delete the old extent list _after_ successfully creating the new. */
  17.294 +    for ( x = vbd->extents; x != NULL; x = t )
  17.295 +    {
  17.296 +        t = x->next;
  17.297 +        kfree(x);
  17.298 +    }
  17.299 +
  17.300 +    /* Make the new list visible. */
  17.301 +    vbd->extents = new_extents;
  17.302 +
  17.303 + out:
  17.304 +    spin_unlock(&p->vbd_lock);
  17.305 +    put_task_struct(p);
  17.306 +    return ret;
  17.307 +
  17.308 + free_and_out:
  17.309 +    /* Failed part-way through the new list. Delete all that we managed. */
  17.310 +    for ( x = new_extents; x != NULL; x = t )
  17.311 +    {
  17.312 +        t = x->next;
  17.313 +        kfree(x);
  17.314 +    }
  17.315 +    goto out;
  17.316 +}
  17.317 +
  17.318 +
  17.319 +long vbd_delete(vbd_delete_t *delete) 
  17.320 +{
  17.321 +    struct task_struct *p; 
  17.322 +    vbd_t *vbd;
  17.323 +    rb_node_t *rb;
  17.324 +    xen_extent_le_t *x, *t;
  17.325 +
  17.326 +    if( !IS_PRIV(current) )
  17.327 +        return -EPERM; 
  17.328 +
  17.329 +    if ( (p = find_domain_by_id(delete->domain)) == NULL )
  17.330 +    {
  17.331 +        DPRINTK("vbd_delete attempted for non-existent domain %llu\n", 
  17.332 +                delete->domain); 
  17.333 +        return -EINVAL; 
  17.334 +    }
  17.335 +
  17.336 +    spin_lock(&p->vbd_lock);
  17.337 +
  17.338 +    rb = p->vbd_rb.rb_node;
  17.339 +    while ( rb != NULL )
  17.340 +    {
  17.341 +        vbd = rb_entry(rb, vbd_t, rb);
  17.342 +        if ( delete->vdevice < vbd->vdevice )
  17.343 +            rb = rb->rb_left;
  17.344 +        else if ( delete->vdevice > vbd->vdevice )
  17.345 +            rb = rb->rb_right;
  17.346 +        else
  17.347 +            goto found;
  17.348 +    }
  17.349 +
  17.350 +    DPRINTK("vbd_delete attempted for non-existing VBD.\n");
  17.351 +
  17.352 +    spin_unlock(&p->vbd_lock);
  17.353 +    put_task_struct(p);
  17.354 +    return -EINVAL;
  17.355 +
  17.356 + found:
  17.357 +    rb_erase(rb, &p->vbd_rb);
  17.358 +    x = vbd->extents;
  17.359 +    kfree(vbd);
  17.360 +
  17.361 +    while ( x != NULL )
  17.362 +    {
  17.363 +        t = x->next;
  17.364 +        kfree(x);
  17.365 +        x = t;
  17.366 +    }
  17.367 +    
  17.368 +    spin_unlock(&p->vbd_lock);
  17.369 +    put_task_struct(p);
  17.370 +    return 0;
  17.371 +}
  17.372 +
  17.373 +
  17.374 +void destroy_all_vbds(struct task_struct *p)
  17.375 +{
  17.376 +    vbd_t *vbd;
  17.377 +    rb_node_t *rb;
  17.378 +    xen_extent_le_t *x, *t;
  17.379 +
  17.380 +    spin_lock(&p->vbd_lock);
  17.381 +
  17.382 +    while ( (rb = p->vbd_rb.rb_node) != NULL )
  17.383 +    {
  17.384 +        vbd = rb_entry(rb, vbd_t, rb);
  17.385 +
  17.386 +        rb_erase(rb, &p->vbd_rb);
  17.387 +        x = vbd->extents;
  17.388 +        kfree(vbd);
  17.389 +        
  17.390 +        while ( x != NULL )
  17.391 +        {
  17.392 +            t = x->next;
  17.393 +            kfree(x);
  17.394 +            x = t;
  17.395 +        }          
  17.396 +    }
  17.397 +
  17.398 +    spin_unlock(&p->vbd_lock);
  17.399 +}
  17.400 +
  17.401 +
  17.402 +static int vbd_probe_single(xen_disk_info_t *xdi, 
  17.403 +                            vbd_t *vbd, 
  17.404 +                            struct task_struct *p)
  17.405 +{
  17.406 +    xen_extent_le_t *x; 
  17.407 +    xen_disk_t cur_disk; 
  17.408 +
  17.409 +    if ( xdi->count == xdi->max )
  17.410 +    {
  17.411 +        DPRINTK("vbd_probe_devices: out of space for probe.\n"); 
  17.412 +        return -ENOMEM; 
  17.413 +    }
  17.414 +
  17.415 +    cur_disk.device = vbd->vdevice; 
  17.416 +    cur_disk.info   = vbd->type;
  17.417 +    if ( !VBD_CAN_WRITE(vbd) )
  17.418 +        cur_disk.info |= XD_FLAG_RO; 
  17.419 +    cur_disk.capacity = 0ULL;
  17.420 +    for ( x = vbd->extents; x != NULL; x = x->next )
  17.421 +        cur_disk.capacity += x->extent.nr_sectors; 
  17.422 +    cur_disk.domain = p->domain; 
  17.423 +        
  17.424 +    /* Now copy into relevant part of user-space buffer */
  17.425 +    if( copy_to_user(&xdi->disks[xdi->count], 
  17.426 +                     &cur_disk, 
  17.427 +                     sizeof(xen_disk_t)) )
  17.428 +    { 
  17.429 +        DPRINTK("vbd_probe_devices: copy_to_user failed\n");
  17.430 +        return -EFAULT;
  17.431 +    } 
  17.432 +        
  17.433 +    xdi->count++; 
  17.434 +
  17.435 +    return 0;
  17.436 +}
  17.437 +
  17.438 +
  17.439 +static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
  17.440 +{
  17.441 +    int rc = 0;
  17.442 +    rb_node_t *rb;
  17.443 +
  17.444 +    spin_lock(&p->vbd_lock);
  17.445 +
  17.446 +    if ( (rb = p->vbd_rb.rb_node) == NULL )
  17.447 +        goto out;
  17.448 +
  17.449 + new_subtree:
  17.450 +    /* STEP 1. Find least node (it'll be left-most). */
  17.451 +    while ( rb->rb_left != NULL )
  17.452 +        rb = rb->rb_left;
  17.453 +
  17.454 +    for ( ; ; )
  17.455 +    {
  17.456 +        /* STEP 2. Dealt with left subtree. Now process current node. */
  17.457 +        if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
  17.458 +            goto out;
  17.459 +
  17.460 +        /* STEP 3. Process right subtree, if any. */
  17.461 +        if ( rb->rb_right != NULL )
  17.462 +        {
  17.463 +            rb = rb->rb_right;
  17.464 +            goto new_subtree;
  17.465 +        }
  17.466 +
  17.467 +        /* STEP 4. Done both subtrees. Head back through ancesstors. */
  17.468 +        for ( ; ; ) 
  17.469 +        {
  17.470 +            /* We're done when we get back to the root node. */
  17.471 +            if ( rb->rb_parent == NULL )
  17.472 +                goto out;
  17.473 +            /* If we are left of parent, then parent is next to process. */
  17.474 +            if ( rb->rb_parent->rb_left == rb )
  17.475 +                break;
  17.476 +            /* If we are right of parent, then we climb to grandparent. */
  17.477 +            rb = rb->rb_parent;
  17.478 +        }
  17.479 +
  17.480 +        rb = rb->rb_parent;
  17.481 +    }
  17.482 +
  17.483 + out:
  17.484 +    spin_unlock(&p->vbd_lock);
  17.485 +    return rc;  
  17.486 +}
  17.487 +
  17.488 +
  17.489 +/*
  17.490 + * Return information about the VBDs available for a given domain, or for all 
  17.491 + * domains; in the general case the 'domain' argument will be 0 which means 
  17.492 + * "information about the caller"; otherwise the 'domain' argument will 
  17.493 + * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of 
  17.494 + * these cases require the caller to be privileged.
  17.495 + */
  17.496 +long vbd_probe(vbd_probe_t *probe) 
  17.497 +{
  17.498 +    struct task_struct *p = NULL; 
  17.499 +    unsigned long flags;
  17.500 +    long ret = 0;  
  17.501 +
  17.502 +    if ( probe->domain != 0 )
  17.503 +    { 
  17.504 +        /* We can only probe for ourselves (unless we're privileged). */
  17.505 +        if( (probe->domain != current->domain) && !IS_PRIV(current) )
  17.506 +            return -EPERM; 
  17.507 +
  17.508 +        if ( (probe->domain != VBD_PROBE_ALL) &&
  17.509 +             ((p = find_domain_by_id(probe->domain)) == NULL) )
  17.510 +        {
  17.511 +            DPRINTK("vbd_probe attempted for non-existent domain %llu\n", 
  17.512 +                    probe->domain); 
  17.513 +            return -EINVAL; 
  17.514 +        }
  17.515 +    }
  17.516 +    else
  17.517 +    { 
  17.518 +        /* Default is to probe for ourselves. */
  17.519 +        p = current; 
  17.520 +        get_task_struct(p); /* to mirror final put_task_struct */
  17.521 +    }
  17.522 +
  17.523 +    if ( probe->domain == VBD_PROBE_ALL )
  17.524 +    { 
  17.525 +        read_lock_irqsave(&tasklist_lock, flags);
  17.526 +        for_each_domain ( p )
  17.527 +        {
  17.528 +            if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
  17.529 +            { 
  17.530 +                read_unlock_irqrestore(&tasklist_lock, flags);
  17.531 +                goto out; 
  17.532 +            }
  17.533 +        }
  17.534 +        read_unlock_irqrestore(&tasklist_lock, flags);
  17.535 +    } 
  17.536 +    else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
  17.537 +        goto out; 
  17.538 +
  17.539 + out: 
  17.540 +    if ( ret != 0 )
  17.541 +        DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); 
  17.542 +    if ( p != NULL )
  17.543 +        put_task_struct(p); 
  17.544 +    return ret; 
  17.545 +}
  17.546 +
  17.547 +
  17.548 +long vbd_info(vbd_info_t *info) 
  17.549 +{
  17.550 +    struct task_struct *p; 
  17.551 +    xen_extent_le_t *x; 
  17.552 +    xen_extent_t *extents; 
  17.553 +    vbd_t *vbd = NULL;
  17.554 +    rb_node_t *rb;
  17.555 +    long ret = 0;  
  17.556 +   
  17.557 +    if ( (info->domain != current->domain) && !IS_PRIV(current) )
  17.558 +        return -EPERM; 
  17.559 +
  17.560 +    if ( (p = find_domain_by_id(info->domain)) == NULL )
  17.561 +    {
  17.562 +        DPRINTK("vbd_info attempted for non-existent domain %llu\n", 
  17.563 +                info->domain); 
  17.564 +        return -EINVAL; 
  17.565 +    }
  17.566 +
  17.567 +    spin_lock(&p->vbd_lock);
  17.568 +
  17.569 +    rb = p->vbd_rb.rb_node;
  17.570 +    while ( rb != NULL )
  17.571 +    {
  17.572 +        vbd = rb_entry(rb, vbd_t, rb);
  17.573 +        if ( info->vdevice < vbd->vdevice )
  17.574 +            rb = rb->rb_left;
  17.575 +        else if ( info->vdevice > vbd->vdevice )
  17.576 +            rb = rb->rb_right;
  17.577 +        else
  17.578 +            break;
  17.579 +    }
  17.580 +
  17.581 +    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
  17.582 +    {
  17.583 +        DPRINTK("vbd_info attempted on non-existent VBD.\n"); 
  17.584 +        ret = -EINVAL; 
  17.585 +        goto out; 
  17.586 +    }
  17.587 +
  17.588 +    info->mode     = vbd->mode;
  17.589 +    info->nextents = 0; 
  17.590 +
  17.591 +    extents = info->extents;
  17.592 +    for ( x = vbd->extents; x != NULL; x = x->next )
  17.593 +    {
  17.594 +        if ( info->nextents == info->maxextents )
  17.595 +            break;
  17.596 +        if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
  17.597 +        {
  17.598 +            DPRINTK("vbd_info: copy_to_user failed\n");
  17.599 +            ret = -EFAULT;
  17.600 +            goto out; 
  17.601 +        } 
  17.602 +        extents++;
  17.603 +        info->nextents++;
  17.604 +    }
  17.605 +
  17.606 + out: 
  17.607 +    spin_unlock(&p->vbd_lock);
  17.608 +    put_task_struct(p); 
  17.609 +    return ret; 
  17.610 +}
  17.611 +
  17.612 +
  17.613 +int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
  17.614 +{
  17.615 +    xen_extent_le_t *x; 
  17.616 +    vbd_t *vbd;
  17.617 +    rb_node_t *rb;
  17.618 +    xen_sector_t sec_off;
  17.619 +    unsigned long nr_secs;
  17.620 +
  17.621 +    spin_lock(&p->vbd_lock);
  17.622 +
  17.623 +    rb = p->vbd_rb.rb_node;
  17.624 +    while ( rb != NULL )
  17.625 +    {
  17.626 +        vbd = rb_entry(rb, vbd_t, rb);
  17.627 +        if ( pseg->dev < vbd->vdevice )
  17.628 +            rb = rb->rb_left;
  17.629 +        else if ( pseg->dev > vbd->vdevice )
  17.630 +            rb = rb->rb_right;
  17.631 +        else
  17.632 +            goto found;
  17.633 +    }
  17.634 +
  17.635 +    DPRINTK("vbd_translate; domain %llu attempted to access "
  17.636 +            "non-existent VBD.\n", p->domain); 
  17.637 +
  17.638 +    spin_unlock(&p->vbd_lock);
  17.639 +    return -ENODEV; 
  17.640 +
  17.641 + found:
  17.642 +
  17.643 +    if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
  17.644 +         ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
  17.645 +    {
  17.646 +        spin_unlock(&p->vbd_lock);
  17.647 +        return -EACCES; 
  17.648 +    }
  17.649 +
  17.650 +    /*
  17.651 +     * Now iterate through the list of xen_extents, working out which should 
  17.652 +     * be used to perform the translation.
  17.653 +     */
  17.654 +    sec_off = pseg->sector_number; 
  17.655 +    nr_secs = pseg->nr_sects;
  17.656 +    for ( x = vbd->extents; x != NULL; x = x->next )
  17.657 +    { 
  17.658 +        if ( sec_off < x->extent.nr_sectors )
  17.659 +        {
  17.660 +            pseg->dev = x->extent.device; 
  17.661 +            pseg->sector_number = x->extent.start_sector + sec_off;
  17.662 +            if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
  17.663 +                goto overrun;
  17.664 +            spin_unlock(&p->vbd_lock);
  17.665 +            return 1;
  17.666 +        } 
  17.667 +        sec_off -= x->extent.nr_sectors; 
  17.668 +    }
  17.669 +
  17.670 +    DPRINTK("vbd_translate: end of vbd.\n");
  17.671 +    spin_unlock(&p->vbd_lock);
  17.672 +    return -EACCES; 
  17.673 +
  17.674 +    /*
  17.675 +     * Here we deal with overrun onto the following extent. We don't deal with 
  17.676 +     * overrun of more than one boundary since each request is restricted to 
  17.677 +     * 2^9 512-byte sectors, so it should be trivial for control software to 
  17.678 +     * ensure that extents are large enough to prevent excessive overrun.
  17.679 +     */
  17.680 + overrun:
  17.681 +
  17.682 +    /* Adjust length of first chunk to run to end of first extent. */
  17.683 +    pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
  17.684 +
  17.685 +    /* Set second chunk buffer and length to start where first chunk ended. */
  17.686 +    pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
  17.687 +    pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
  17.688 +
  17.689 +    /* Now move to the next extent. Check it exists and is long enough! */
  17.690 +    if ( unlikely((x = x->next) == NULL) || 
  17.691 +         unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
  17.692 +    {
  17.693 +        DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
  17.694 +        spin_unlock(&p->vbd_lock);
  17.695 +        return -EACCES;
  17.696 +    }
  17.697 +
  17.698 +    /* Store the real device and start sector for the second chunk. */
  17.699 +    pseg[1].dev           = x->extent.device;
  17.700 +    pseg[1].sector_number = x->extent.start_sector;
  17.701 +    
  17.702 +    spin_unlock(&p->vbd_lock);
  17.703 +    return 2;
  17.704 +}
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/vnetif/backend/main.c	Thu Apr 22 13:56:30 2004 +0000
    18.3 @@ -0,0 +1,26 @@
    18.4 +/******************************************************************************
    18.5 + * arch/xen/drivers/vnetif/backend/main.c
    18.6 + * 
    18.7 + * Back-end of the driver for virtual block devices. This portion of the
    18.8 + * driver exports a 'unified' block-device interface that can be accessed
    18.9 + * by any operating system that implements a compatible front end. A 
   18.10 + * reference front-end implementation can be found in:
   18.11 + *  arch/xen/drivers/vnetif/frontend
   18.12 + * 
   18.13 + * Copyright (c) 2004, K A Fraser
   18.14 + */
   18.15 +
   18.16 +#include <linux/config.h>
   18.17 +#include <linux/module.h>
   18.18 +
   18.19 +static int __init init_module(void)
   18.20 +{
   18.21 +    return 0;
   18.22 +}
   18.23 +
   18.24 +static void cleanup_module(void)
   18.25 +{
   18.26 +}
   18.27 +
   18.28 +module_init(init_module);
   18.29 +module_exit(cleanup_module);
    19.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c	Wed Apr 21 10:43:06 2004 +0000
    19.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c	Thu Apr 22 13:56:30 2004 +0000
    19.3 @@ -15,8 +15,7 @@
    19.4  #include <linux/irq.h>
    19.5  #include <linux/interrupt.h>
    19.6  #include <asm/ctrl_if.h>
    19.7 -#include <asm/hypervisor.h>
    19.8 -#include <asm/hypervisor-ifs/event_channel.h>
    19.9 +#include <asm/evtchn.h>
   19.10  
   19.11  static int        ctrl_if_evtchn;
   19.12  static int        ctrl_if_irq;
   19.13 @@ -50,10 +49,7 @@ static DECLARE_TASKLET(ctrl_if_rx_taskle
   19.14  
   19.15  static void ctrl_if_notify_controller(void)
   19.16  {
   19.17 -    evtchn_op_t evtchn_op;
   19.18 -    evtchn_op.cmd = EVTCHNOP_send;
   19.19 -    evtchn_op.u.send.local_port = ctrl_if_evtchn;
   19.20 -    (void)HYPERVISOR_event_channel_op(&evtchn_op);
   19.21 +    notify_via_evtchn(ctrl_if_evtchn);
   19.22  }
   19.23  
   19.24  static void ctrl_if_rxmsg_default_handler(ctrl_msg_t *msg, unsigned long id)
    20.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c	Wed Apr 21 10:43:06 2004 +0000
    20.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c	Thu Apr 22 13:56:30 2004 +0000
    20.3 @@ -321,7 +321,11 @@ asmlinkage void do_general_protection(st
    20.4  			u.ptr  = MMU_EXTENDED_COMMAND;
    20.5  			u.ptr |= (unsigned long)&default_ldt[0];
    20.6  			u.val  = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT);
    20.7 -			HYPERVISOR_mmu_update(&u, 1);
    20.8 +			if ( unlikely(HYPERVISOR_mmu_update(&u, 1) < 0) )
    20.9 +			{
   20.10 +				show_trace(NULL);
   20.11 +				panic("Failed to install default LDT");
   20.12 +			}
   20.13  			return;
   20.14  		}
   20.15  	}
    21.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c	Wed Apr 21 10:43:06 2004 +0000
    21.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c	Thu Apr 22 13:56:30 2004 +0000
    21.3 @@ -116,7 +116,8 @@ static inline void __flush_page_update_q
    21.4  #endif
    21.5      idx = 0;
    21.6      wmb(); /* Make sure index is cleared first to avoid double updates. */
    21.7 -    HYPERVISOR_mmu_update(update_queue, _idx);
    21.8 +    if ( unlikely(HYPERVISOR_mmu_update(update_queue, _idx) < 0) )
    21.9 +        panic("Failed to execute MMU updates");
   21.10  }
   21.11  
   21.12  void _flush_page_update_queue(void)
   21.13 @@ -182,8 +183,8 @@ void queue_invlpg(unsigned long ptr)
   21.14      unsigned long flags;
   21.15      spin_lock_irqsave(&update_lock, flags);
   21.16      update_queue[idx].ptr  = MMU_EXTENDED_COMMAND;
   21.17 -    update_queue[idx].val  = ptr & PAGE_MASK;
   21.18 -    update_queue[idx].val |= MMUEXT_INVLPG;
   21.19 +    update_queue[idx].ptr |= ptr & PAGE_MASK;
   21.20 +    update_queue[idx].val  = MMUEXT_INVLPG;
   21.21      increment_index();
   21.22      spin_unlock_irqrestore(&update_lock, flags);
   21.23  }
    22.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Wed Apr 21 10:43:06 2004 +0000
    22.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Thu Apr 22 13:56:30 2004 +0000
    22.3 @@ -31,10 +31,28 @@ static inline void direct_remap_area_pte
    22.4                                           unsigned long address, 
    22.5                                           unsigned long size,
    22.6                                           unsigned long machine_addr, 
    22.7 -                                         pgprot_t prot)
    22.8 +                                         pgprot_t prot,
    22.9 +                                         domid_t  domid)
   22.10  {
   22.11      unsigned long end;
   22.12  
   22.13 +    mmu_update_t *u, *v;
   22.14 +    u = v = vmalloc(3*PAGE_SIZE); /* plenty */
   22.15 +
   22.16 +    /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */
   22.17 +    if ( domid != 0 )
   22.18 +    {
   22.19 +        v[0].val  = (unsigned long)(domid<<16) & ~0xFFFFUL;
   22.20 +        v[0].ptr  = (unsigned long)(domid<< 0) & ~0xFFFFUL;
   22.21 +        v[1].val  = (unsigned long)(domid>>16) & ~0xFFFFUL;
   22.22 +        v[1].ptr  = (unsigned long)(domid>>32) & ~0xFFFFUL;
   22.23 +        v[0].ptr |= MMU_EXTENDED_COMMAND;
   22.24 +        v[0].val |= MMUEXT_SET_SUBJECTDOM_L;
   22.25 +        v[1].ptr |= MMU_EXTENDED_COMMAND;
   22.26 +        v[1].val |= MMUEXT_SET_SUBJECTDOM_H;
   22.27 +        v += 2;
   22.28 +    }
   22.29 +
   22.30      address &= ~PMD_MASK;
   22.31      end = address + size;
   22.32      if (end > PMD_SIZE)
   22.33 @@ -46,11 +64,18 @@ static inline void direct_remap_area_pte
   22.34              printk("direct_remap_area_pte: page already exists\n");
   22.35              BUG();
   22.36          }
   22.37 -        set_pte(pte, pte_mkio(direct_mk_pte_phys(machine_addr, prot))); 
   22.38 +        v->ptr = virt_to_machine(pte);
   22.39 +        v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO;
   22.40 +        v++;
   22.41          address += PAGE_SIZE;
   22.42          machine_addr += PAGE_SIZE;
   22.43          pte++;
   22.44      } while (address && (address < end));
   22.45 +
   22.46 +    if ( ((v-u) != 0) && (HYPERVISOR_mmu_update(u, v-u) < 0) )
   22.47 +        printk(KERN_WARNING "Failed to ioremap %08lx->%08lx (%08lx)\n",
   22.48 +               end-size, end, machine_addr-size);
   22.49 +    vfree(u);
   22.50  }
   22.51  
   22.52  static inline int direct_remap_area_pmd(struct mm_struct *mm,
   22.53 @@ -58,7 +83,8 @@ static inline int direct_remap_area_pmd(
   22.54                                          unsigned long address, 
   22.55                                          unsigned long size,
   22.56                                          unsigned long machine_addr,
   22.57 -                                        pgprot_t prot)
   22.58 +                                        pgprot_t prot,
   22.59 +                                        domid_t  domid)
   22.60  {
   22.61      unsigned long end;
   22.62  
   22.63 @@ -74,7 +100,7 @@ static inline int direct_remap_area_pmd(
   22.64          if (!pte)
   22.65              return -ENOMEM;
   22.66          direct_remap_area_pte(pte, address, end - address, 
   22.67 -                              address + machine_addr, prot);
   22.68 +                              address + machine_addr, prot, domid);
   22.69          address = (address + PMD_SIZE) & PMD_MASK;
   22.70          pmd++;
   22.71      } while (address && (address < end));
   22.72 @@ -85,7 +111,8 @@ int direct_remap_area_pages(struct mm_st
   22.73                              unsigned long address, 
   22.74                              unsigned long machine_addr,
   22.75                              unsigned long size, 
   22.76 -                            pgprot_t prot)
   22.77 +                            pgprot_t prot,
   22.78 +                            domid_t  domid)
   22.79  {
   22.80      int error = 0;
   22.81      pgd_t * dir;
   22.82 @@ -103,7 +130,7 @@ int direct_remap_area_pages(struct mm_st
   22.83          if (!pmd)
   22.84              break;
   22.85          error = direct_remap_area_pmd(mm, pmd, address, end - address,
   22.86 -                                      machine_addr + address, prot);
   22.87 +                                      machine_addr + address, prot, domid);
   22.88          if (error)
   22.89              break;
   22.90          address = (address + PGDIR_SIZE) & PGDIR_MASK;
   22.91 @@ -158,7 +185,7 @@ void * __ioremap(unsigned long machine_a
   22.92      prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | 
   22.93                      _PAGE_ACCESSED | flags);
   22.94      if (direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(addr), 
   22.95 -                                machine_addr, size, prot)) {
   22.96 +                                machine_addr, size, prot, 0)) {
   22.97          vfree(addr);
   22.98          return NULL;
   22.99      }
    23.1 --- a/xenolinux-2.4.26-sparse/drivers/char/mem.c	Wed Apr 21 10:43:06 2004 +0000
    23.2 +++ b/xenolinux-2.4.26-sparse/drivers/char/mem.c	Thu Apr 22 13:56:30 2004 +0000
    23.3 @@ -197,24 +197,11 @@ static inline int noncached_address(unsi
    23.4  #endif
    23.5  }
    23.6  
    23.7 +#if !defined(CONFIG_XEN)
    23.8  static int mmap_mem(struct file * file, struct vm_area_struct * vma)
    23.9  {
   23.10  	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
   23.11  
   23.12 -#if defined(CONFIG_XEN) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
   23.13 -	if (!(start_info.flags & SIF_PRIVILEGED))
   23.14 -		return -ENXIO;
   23.15 -
   23.16 -	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
   23.17 -	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
   23.18 -	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   23.19 -	if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, 
   23.20 -			     vma->vm_end-vma->vm_start, vma->vm_page_prot))
   23.21 -		return -EAGAIN;
   23.22 -	return 0;
   23.23 -#elif defined(CONFIG_XEN)
   23.24 -	return -ENXIO;
   23.25 -#else
   23.26  	/*
   23.27  	 * Accessing memory above the top the kernel knows about or
   23.28  	 * through a file pointer that was marked O_SYNC will be
   23.29 @@ -236,8 +223,50 @@ static int mmap_mem(struct file * file, 
   23.30  			     vma->vm_page_prot))
   23.31  		return -EAGAIN;
   23.32  	return 0;
   23.33 -#endif
   23.34 +}
   23.35 +#elif !defined(CONFIG_XEN_PRIVILEGED_GUEST)
   23.36 +static int mmap_mem(struct file * file, struct vm_area_struct * vma)
   23.37 +{
   23.38 +	return -ENXIO;
   23.39  }
   23.40 +#else
   23.41 +static int mmap_mem(struct file * file, struct vm_area_struct * vma)
   23.42 +{
   23.43 +	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
   23.44 +	domid_t domid;
   23.45 +
   23.46 +	if (!(start_info.flags & SIF_PRIVILEGED))
   23.47 +		return -ENXIO;
   23.48 +
   23.49 +	domid = file->private_data ? *(domid_t *)file->private_data : 0;
   23.50 +
   23.51 +	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
   23.52 +	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
   23.53 +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   23.54 +	if (direct_remap_area_pages(vma->vm_mm, vma->vm_start, offset, 
   23.55 +				vma->vm_end-vma->vm_start, vma->vm_page_prot,
   23.56 +				domid))
   23.57 +		return -EAGAIN;
   23.58 +	return 0;
   23.59 +}
   23.60 +static int ioctl_mem(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
   23.61 +{
   23.62 +	if (file->private_data == NULL)
   23.63 +		file->private_data = kmalloc(sizeof(domid_t), GFP_KERNEL);
   23.64 +	switch (cmd) {
   23.65 +	case _IO('M', 1): ((unsigned long *)file->private_data)[0]=arg; break;
   23.66 +	case _IO('M', 2): ((unsigned long *)file->private_data)[1]=arg; break;
   23.67 +	default: return -ENOSYS;
   23.68 +	}
   23.69 +	return 0;
   23.70 +}
   23.71 +static int release_mem(struct inode * inode, struct file * file)
   23.72 +{
   23.73 +	if (file->private_data != NULL)
   23.74 +		kfree(file->private_data);
   23.75 +	return 0;
   23.76 +}
   23.77 +#endif /* CONFIG_XEN */
   23.78  
   23.79  /*
   23.80   * This function reads the *virtual* memory as seen by the kernel.
   23.81 @@ -426,10 +455,6 @@ static inline size_t read_zero_pagealign
   23.82  			goto out_up;
   23.83  		if (vma->vm_flags & VM_SHARED)
   23.84  			break;
   23.85 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
   23.86 -		if (vma->vm_flags & VM_IO)
   23.87 -			break;
   23.88 -#endif
   23.89  		count = vma->vm_end - addr;
   23.90  		if (count > size)
   23.91  			count = size;
   23.92 @@ -615,10 +640,6 @@ static int mmap_kmem(struct file * file,
   23.93  	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
   23.94  	unsigned long size = vma->vm_end - vma->vm_start;
   23.95  
   23.96 -#if defined(CONFIG_XEN)
   23.97 -	return -ENXIO;
   23.98 -#endif
   23.99 -
  23.100  	/*
  23.101  	 * If the user is not attempting to mmap a high memory address then
  23.102  	 * the standard mmap_mem mechanism will work.  High memory addresses
  23.103 @@ -663,13 +684,19 @@ static struct file_operations mem_fops =
  23.104  	write:		write_mem,
  23.105  	mmap:		mmap_mem,
  23.106  	open:		open_mem,
  23.107 +#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
  23.108 +	release:	release_mem,
  23.109 +	ioctl:		ioctl_mem,
  23.110 +#endif
  23.111  };
  23.112  
  23.113  static struct file_operations kmem_fops = {
  23.114  	llseek:		memory_lseek,
  23.115  	read:		read_kmem,
  23.116  	write:		write_kmem,
  23.117 +#if !defined(CONFIG_XEN)
  23.118  	mmap:		mmap_kmem,
  23.119 +#endif
  23.120  	open:		open_kmem,
  23.121  };
  23.122  
  23.123 @@ -715,12 +742,6 @@ static int memory_open(struct inode * in
  23.124  			break;
  23.125  #if defined(CONFIG_ISA) || !defined(__mc68000__)
  23.126  		case 4:
  23.127 -#if defined(CONFIG_XEN)
  23.128 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
  23.129 -			if (!(start_info.flags & SIF_PRIVILEGED))
  23.130 -#endif
  23.131 -				return -ENXIO;
  23.132 -#endif
  23.133  			filp->f_op = &port_fops;
  23.134  			break;
  23.135  #endif
    24.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h	Wed Apr 21 10:43:06 2004 +0000
    24.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h	Thu Apr 22 13:56:30 2004 +0000
    24.3 @@ -9,6 +9,7 @@
    24.4  #ifndef __ASM_XEN__CTRL_IF_H__
    24.5  #define __ASM_XEN__CTRL_IF_H__
    24.6  
    24.7 +#include <linux/tqueue.h>
    24.8  #include <asm/hypervisor.h>
    24.9  
   24.10  typedef control_msg_t ctrl_msg_t;
    25.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/evtchn.h	Wed Apr 21 10:43:06 2004 +0000
    25.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/evtchn.h	Thu Apr 22 13:56:30 2004 +0000
    25.3 @@ -14,6 +14,7 @@
    25.4  #include <asm/hypervisor.h>
    25.5  #include <asm/ptrace.h>
    25.6  #include <asm/synch_bitops.h>
    25.7 +#include <asm/hypervisor-ifs/event_channel.h>
    25.8  
    25.9  /*
   25.10   * LOW-LEVEL DEFINITIONS
   25.11 @@ -62,6 +63,14 @@ static inline void clear_evtchn_exceptio
   25.12      synch_clear_bit(port, &s->evtchn_exception[0]);
   25.13  }
   25.14  
   25.15 +static inline void notify_via_evtchn(int port)
   25.16 +{
   25.17 +    evtchn_op_t op;
   25.18 +    op.cmd = EVTCHNOP_send;
   25.19 +    op.u.send.local_port = port;
   25.20 +    (void)HYPERVISOR_event_channel_op(&op);
   25.21 +}
   25.22 +
   25.23  /*
   25.24   * CHARACTER-DEVICE DEFINITIONS
   25.25   */
    26.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h	Wed Apr 21 10:43:06 2004 +0000
    26.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h	Thu Apr 22 13:56:30 2004 +0000
    26.3 @@ -161,13 +161,6 @@ static inline int HYPERVISOR_mmu_update(
    26.4          : "=a" (ret) : "0" (__HYPERVISOR_mmu_update), 
    26.5          "b" (req), "c" (count) : "memory" );
    26.6  
    26.7 -    if ( unlikely(ret < 0) )
    26.8 -    {
    26.9 -        extern void show_trace(unsigned long *);
   26.10 -        show_trace(NULL);
   26.11 -        panic("Failed mmu update: %p, %d", req, count);
   26.12 -    }
   26.13 -
   26.14      return ret;
   26.15  }
   26.16  
    27.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h	Wed Apr 21 10:43:06 2004 +0000
    27.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h	Thu Apr 22 13:56:30 2004 +0000
    27.3 @@ -265,10 +265,15 @@ static inline void flush_tlb_pgtables(st
    27.4      XEN_flush_page_update_queue();
    27.5  }
    27.6  
    27.7 +/*
    27.8 + * NB. The 'domid' field should be zero if mapping I/O space (non RAM).
    27.9 + * Otherwise it identifies the owner of the memory that is being mapped.
   27.10 + */
   27.11  extern int direct_remap_area_pages(struct mm_struct *mm,
   27.12                                     unsigned long address, 
   27.13                                     unsigned long machine_addr,
   27.14                                     unsigned long size, 
   27.15 -                                   pgprot_t prot);
   27.16 +                                   pgprot_t prot,
   27.17 +                                   domid_t  domid);
   27.18  
   27.19  #endif /* _I386_PGALLOC_H */