direct-io.hg

changeset 14680:5e65a86c8982

linux: User-space grant table device.

A character device for accessing (in user-space) pages that have been
granted by other domains.

Signed-off-by: Derek Murray <Derek.Murray@cl.cam.ac.uk>

Fix ioctl interface to be 32/64-bit invariant. Move xen_class
mechanism to common util.c.

Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Sat Mar 31 13:53:24 2007 +0100 (2007-03-31)
parents 7180d2e61f92
children 7b77b47a49d1
files linux-2.6-xen-sparse/drivers/xen/Makefile linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c linux-2.6-xen-sparse/drivers/xen/util.c linux-2.6-xen-sparse/include/xen/driver_util.h linux-2.6-xen-sparse/include/xen/public/gntdev.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/Makefile	Sat Mar 31 12:42:02 2007 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Makefile	Sat Mar 31 13:53:24 2007 +0100
     1.3 @@ -3,6 +3,7 @@ obj-y	+= console/
     1.4  obj-y	+= evtchn/
     1.5  obj-y	+= privcmd/
     1.6  obj-y	+= xenbus/
     1.7 +obj-y	+= gntdev/
     1.8  
     1.9  obj-$(CONFIG_XEN_UTIL)			+= util.o
    1.10  obj-$(CONFIG_XEN_BALLOON)		+= balloon/
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Sat Mar 31 12:42:02 2007 +0100
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Sat Mar 31 13:53:24 2007 +0100
     2.3 @@ -44,6 +44,7 @@
     2.4  #include <asm/hypervisor.h>
     2.5  #include "common.h"
     2.6  #include <xen/balloon.h>
     2.7 +#include <xen/driver_util.h>
     2.8  #include <linux/kernel.h>
     2.9  #include <linux/fs.h>
    2.10  #include <linux/mm.h>
    2.11 @@ -56,30 +57,6 @@
    2.12  #define MAX_TAP_DEV 256     /*the maximum number of tapdisk ring devices    */
    2.13  #define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
    2.14  
    2.15 -
    2.16 -struct class *xen_class;
    2.17 -EXPORT_SYMBOL_GPL(xen_class);
    2.18 -
    2.19 -/*
    2.20 - * Setup the xen class.  This should probably go in another file, but
    2.21 - * since blktap is the only user of it so far, it gets to keep it.
    2.22 - */
    2.23 -int setup_xen_class(void)
    2.24 -{
    2.25 -	int ret;
    2.26 -
    2.27 -	if (xen_class)
    2.28 -		return 0;
    2.29 -
    2.30 -	xen_class = class_create(THIS_MODULE, "xen");
    2.31 -	if ((ret = IS_ERR(xen_class))) {
    2.32 -		xen_class = NULL;
    2.33 -		return ret;
    2.34 -	}
    2.35 -
    2.36 -	return 0;
    2.37 -}
    2.38 -
    2.39  /*
    2.40   * The maximum number of requests that can be outstanding at any time
    2.41   * is determined by 
    2.42 @@ -347,6 +324,7 @@ static const struct file_operations blkt
    2.43  
    2.44  static tap_blkif_t *get_next_free_dev(void)
    2.45  {
    2.46 +	struct class *class;
    2.47  	tap_blkif_t *info;
    2.48  	int minor;
    2.49  
    2.50 @@ -409,9 +387,10 @@ found:
    2.51  		wmb();
    2.52  		tapfds[minor] = info;
    2.53  
    2.54 -		class_device_create(xen_class, NULL,
    2.55 -				    MKDEV(blktap_major, minor), NULL,
    2.56 -				    "blktap%d", minor);
    2.57 +		if ((class = get_xen_class()) != NULL)
    2.58 +			class_device_create(class, NULL,
    2.59 +					    MKDEV(blktap_major, minor), NULL,
    2.60 +					    "blktap%d", minor);
    2.61  	}
    2.62  
    2.63  out:
    2.64 @@ -1487,6 +1466,7 @@ static void make_response(blkif_t *blkif
    2.65  static int __init blkif_init(void)
    2.66  {
    2.67  	int i, ret;
    2.68 +	struct class *class;
    2.69  
    2.70  	if (!is_running_on_xen())
    2.71  		return -ENODEV;
    2.72 @@ -1522,7 +1502,7 @@ static int __init blkif_init(void)
    2.73  	DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
    2.74  
    2.75  	/* Make sure the xen class exists */
    2.76 -	if (!setup_xen_class()) {
    2.77 +	if ((class = get_xen_class()) != NULL) {
    2.78  		/*
    2.79  		 * This will allow udev to create the blktap ctrl device.
    2.80  		 * We only want to create blktap0 first.  We don't want
    2.81 @@ -1530,7 +1510,7 @@ static int __init blkif_init(void)
    2.82  		 * We only create the device when a request of a new device is
    2.83  		 * made.
    2.84  		 */
    2.85 -		class_device_create(xen_class, NULL,
    2.86 +		class_device_create(class, NULL,
    2.87  				    MKDEV(blktap_major, 0), NULL,
    2.88  				    "blktap0");
    2.89  	} else {
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/Makefile	Sat Mar 31 13:53:24 2007 +0100
     3.3 @@ -0,0 +1,1 @@
     3.4 +obj-y	:= gntdev.o
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/linux-2.6-xen-sparse/drivers/xen/gntdev/gntdev.c	Sat Mar 31 13:53:24 2007 +0100
     4.3 @@ -0,0 +1,971 @@
     4.4 +/******************************************************************************
     4.5 + * gntdev.c
     4.6 + * 
     4.7 + * Device for accessing (in user-space) pages that have been granted by other
     4.8 + * domains.
     4.9 + *
    4.10 + * Copyright (c) 2006-2007, D G Murray.
    4.11 + * 
    4.12 + * This program is distributed in the hope that it will be useful,
    4.13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
    4.14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    4.15 + * GNU General Public License for more details.
    4.16 + * 
    4.17 + * You should have received a copy of the GNU General Public License
    4.18 + * along with this program; if not, write to the Free Software
    4.19 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    4.20 + */
    4.21 +
    4.22 +#include <asm/atomic.h>
    4.23 +#include <linux/module.h>
    4.24 +#include <linux/kernel.h>
    4.25 +#include <linux/init.h>
    4.26 +#include <linux/fs.h>
    4.27 +#include <linux/device.h>
    4.28 +#include <linux/mm.h>
    4.29 +#include <linux/mman.h>
    4.30 +#include <asm/uaccess.h>
    4.31 +#include <asm/io.h>
    4.32 +#include <xen/gnttab.h>
    4.33 +#include <asm/hypervisor.h>
    4.34 +#include <xen/balloon.h>
    4.35 +#include <xen/evtchn.h>
    4.36 +#include <xen/driver_util.h>
    4.37 +
    4.38 +#include <linux/types.h>
    4.39 +#include <xen/public/gntdev.h>
    4.40 +
    4.41 +
    4.42 +#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>"
    4.43 +#define DRIVER_DESC   "User-space granted page access driver"
    4.44 +
    4.45 +MODULE_LICENSE("GPL");
    4.46 +MODULE_AUTHOR(DRIVER_AUTHOR);
    4.47 +MODULE_DESCRIPTION(DRIVER_DESC);
    4.48 +
    4.49 +#define MAX_GRANTS 128
    4.50 +
    4.51 +/* A slot can be in one of three states:
    4.52 + *
    4.53 + * 0. GNTDEV_SLOT_INVALID:
    4.54 + *    This slot is not associated with a grant reference, and is therefore free
    4.55 + *    to be overwritten by a new grant reference.
    4.56 + *
    4.57 + * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
    4.58 + *    This slot is associated with a grant reference (via the 
    4.59 + *    IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
    4.60 + *
    4.61 + * 2. GNTDEV_SLOT_MAPPED:
    4.62 + *    This slot is associated with a grant reference, and has been mmap()-ed.
    4.63 + */
    4.64 +typedef enum gntdev_slot_state {
    4.65 +	GNTDEV_SLOT_INVALID = 0,
    4.66 +	GNTDEV_SLOT_NOT_YET_MAPPED,
    4.67 +	GNTDEV_SLOT_MAPPED
    4.68 +} gntdev_slot_state_t;
    4.69 +
    4.70 +#define GNTDEV_INVALID_HANDLE    -1
    4.71 +#define GNTDEV_FREE_LIST_INVALID -1
    4.72 +/* Each opened instance of gntdev is associated with a list of grants,
    4.73 + * represented by an array of elements of the following type,
    4.74 + * gntdev_grant_info_t.
    4.75 + */
    4.76 +typedef struct gntdev_grant_info {
    4.77 +	gntdev_slot_state_t state;
    4.78 +	union {
    4.79 +		uint32_t free_list_index;
    4.80 +		struct {
    4.81 +			domid_t domid;
    4.82 +			grant_ref_t ref;
    4.83 +			grant_handle_t kernel_handle;
    4.84 +			grant_handle_t user_handle;
    4.85 +			uint64_t dev_bus_addr;
    4.86 +		} valid;
    4.87 +	} u;
    4.88 +} gntdev_grant_info_t;
    4.89 +
    4.90 +/* Private data structure, which is stored in the file pointer for files
    4.91 + * associated with this device.
    4.92 + */
    4.93 +typedef struct gntdev_file_private_data {
    4.94 +  
    4.95 +	/* Array of grant information. */
    4.96 +	gntdev_grant_info_t grants[MAX_GRANTS];
    4.97 +
    4.98 +	/* Read/write semaphore used to protect the grants array. */
    4.99 +	struct rw_semaphore grants_sem;
   4.100 +
   4.101 +	/* An array of indices of free slots in the grants array.
   4.102 +	 * N.B. An entry in this list may temporarily have the value
   4.103 +	 * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
   4.104 +	 * from the list by the contiguous allocator, but the list has not yet
   4.105 +	 * been compressed. However, this is not visible across invocations of
   4.106 +	 * the device.
   4.107 +	 */
   4.108 +	int32_t free_list[MAX_GRANTS];
   4.109 +	
   4.110 +	/* The number of free slots in the grants array. */
   4.111 +	uint32_t free_list_size;
   4.112 +
   4.113 +	/* Read/write semaphore used to protect the free list. */
   4.114 +	struct rw_semaphore free_list_sem;
   4.115 +	
   4.116 +	/* Index of the next slot after the most recent contiguous allocation, 
   4.117 +	 * for use in a next-fit allocator.
   4.118 +	 */
   4.119 +	uint32_t next_fit_index;
   4.120 +
   4.121 +	/* Used to map grants into the kernel, before mapping them into user
   4.122 +	 * space.
   4.123 +	 */
   4.124 +	struct page **foreign_pages;
   4.125 +
   4.126 +} gntdev_file_private_data_t;
   4.127 +
   4.128 +/* Module lifecycle operations. */
   4.129 +static int __init gntdev_init(void);
   4.130 +static void __exit gntdev_exit(void);
   4.131 +
   4.132 +module_init(gntdev_init);
   4.133 +module_exit(gntdev_exit);
   4.134 +
   4.135 +/* File operations. */
   4.136 +static int gntdev_open(struct inode *inode, struct file *flip);
   4.137 +static int gntdev_release(struct inode *inode, struct file *flip);
   4.138 +static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
   4.139 +static int gntdev_ioctl (struct inode *inode, struct file *flip,
   4.140 +			 unsigned int cmd, unsigned long arg);
   4.141 +
   4.142 +static struct file_operations gntdev_fops = {
   4.143 +	.owner = THIS_MODULE,
   4.144 +	.open = gntdev_open,
   4.145 +	.release = gntdev_release,
   4.146 +	.mmap = gntdev_mmap,
   4.147 +	.ioctl = gntdev_ioctl
   4.148 +};
   4.149 +
   4.150 +/* VM operations. */
   4.151 +static void gntdev_vma_close(struct vm_area_struct *vma);
   4.152 +static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
   4.153 +			      pte_t *ptep, int is_fullmm);
   4.154 +
   4.155 +static struct vm_operations_struct gntdev_vmops = {
   4.156 +	.close = gntdev_vma_close,
   4.157 +	.ptep_get_and_clear_full = gntdev_clear_pte
   4.158 +};
   4.159 +
   4.160 +/* Global variables. */
   4.161 +
   4.162 +/* The driver major number, for use when unregistering the driver. */
   4.163 +static int gntdev_major;
   4.164 +
   4.165 +#define GNTDEV_NAME "gntdev"
   4.166 +
   4.167 +/* Memory mapping functions
   4.168 + * ------------------------
   4.169 + *
   4.170 + * Every granted page is mapped into both kernel and user space, and the two
   4.171 + * following functions return the respective virtual addresses of these pages.
   4.172 + *
   4.173 + * When shadow paging is disabled, the granted page is mapped directly into
   4.174 + * user space; when it is enabled, it is mapped into the kernel and remapped
   4.175 + * into user space using vm_insert_page() (see gntdev_mmap(), below).
   4.176 + */
   4.177 +
   4.178 +/* Returns the virtual address (in user space) of the @page_index'th page
   4.179 + * in the given VM area.
   4.180 + */
   4.181 +static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
   4.182 +					    int page_index)
   4.183 +{
   4.184 +	return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
   4.185 +}
   4.186 +
   4.187 +/* Returns the virtual address (in kernel space) of the @slot_index'th page
   4.188 + * mapped by the gntdev instance that owns the given private data struct.
   4.189 + */
   4.190 +static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
   4.191 +					      int slot_index)
   4.192 +{
   4.193 +	unsigned long pfn;
   4.194 +	void *kaddr;
   4.195 +	pfn = page_to_pfn(priv->foreign_pages[slot_index]);
   4.196 +	kaddr = pfn_to_kaddr(pfn);
   4.197 +	return (unsigned long) kaddr;
   4.198 +}
   4.199 +
   4.200 +/* Helper functions. */
   4.201 +
   4.202 +/* Adds information about a grant reference to the list of grants in the file's
   4.203 + * private data structure. Returns non-zero on failure. On success, sets the
   4.204 + * value of *offset to the offset that should be mmap()-ed in order to map the
   4.205 + * grant reference.
   4.206 + */
   4.207 +static int add_grant_reference(struct file *flip,
   4.208 +			       struct ioctl_gntdev_grant_ref *op,
   4.209 +			       uint64_t *offset)
   4.210 +{
   4.211 +	gntdev_file_private_data_t *private_data 
   4.212 +		= (gntdev_file_private_data_t *) flip->private_data;
   4.213 +
   4.214 +	uint32_t slot_index;
   4.215 +
   4.216 +	if (unlikely(private_data->free_list_size == 0)) {
   4.217 +		return -ENOMEM;
   4.218 +	}
   4.219 +
   4.220 +	slot_index = private_data->free_list[--private_data->free_list_size];
   4.221 +
   4.222 +	/* Copy the grant information into file's private data. */
   4.223 +	private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
   4.224 +	private_data->grants[slot_index].u.valid.domid = op->domid;
   4.225 +	private_data->grants[slot_index].u.valid.ref = op->ref;
   4.226 +
   4.227 +	/* The offset is calculated as the index of the chosen entry in the
   4.228 +	 * file's private data's array of grant information. This is then
   4.229 +	 * shifted to give an offset into the virtual "file address space".
   4.230 +	 */
   4.231 +	*offset = slot_index << PAGE_SHIFT;
   4.232 +
   4.233 +	return 0;
   4.234 +}
   4.235 +
   4.236 +/* Adds the @count grant references to the contiguous range in the slot array
   4.237 + * beginning at @first_slot. It is assumed that @first_slot was returned by a
   4.238 + * previous invocation of find_contiguous_free_range(), during the same
   4.239 + * invocation of the driver.
   4.240 + */
   4.241 +static int add_grant_references(struct file *flip,
   4.242 +				int count,
   4.243 +				struct ioctl_gntdev_grant_ref *ops,
   4.244 +				uint32_t first_slot)
   4.245 +{
   4.246 +	gntdev_file_private_data_t *private_data 
   4.247 +		= (gntdev_file_private_data_t *) flip->private_data;
   4.248 +	int i;
   4.249 +	
   4.250 +	for (i = 0; i < count; ++i) {
   4.251 +
   4.252 +		/* First, mark the slot's entry in the free list as invalid. */
   4.253 +		int free_list_index = 
   4.254 +			private_data->grants[first_slot+i].u.free_list_index;
   4.255 +		private_data->free_list[free_list_index] = 
   4.256 +			GNTDEV_FREE_LIST_INVALID;
   4.257 +
   4.258 +		/* Now, update the slot. */
   4.259 +		private_data->grants[first_slot+i].state = 
   4.260 +			GNTDEV_SLOT_NOT_YET_MAPPED;
   4.261 +		private_data->grants[first_slot+i].u.valid.domid =
   4.262 +			ops[i].domid;
   4.263 +		private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
   4.264 +	}
   4.265 +
   4.266 +	return 0;	
   4.267 +}
   4.268 +
   4.269 +/* Scans through the free list for @flip, removing entries that are marked as
   4.270 + * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
   4.271 + * the number of valid entries.
   4.272 + */
   4.273 +static void compress_free_list(struct file *flip) 
   4.274 +{
   4.275 +	gntdev_file_private_data_t *private_data 
   4.276 +		= (gntdev_file_private_data_t *) flip->private_data;
   4.277 +	int i, j = 0, old_size;
   4.278 +	
   4.279 +	old_size = private_data->free_list_size;
   4.280 +	for (i = 0; i < old_size; ++i) {
   4.281 +		if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
   4.282 +			private_data->free_list[j] = 
   4.283 +				private_data->free_list[i];
   4.284 +			++j;
   4.285 +		} else {
   4.286 +			--private_data->free_list_size;
   4.287 +		}
   4.288 +	}
   4.289 +}
   4.290 +
   4.291 +/* Searches the grant array in the private data of @flip for a range of
   4.292 + * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
   4.293 + *
   4.294 + * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
   4.295 + */
   4.296 +static int find_contiguous_free_range(struct file *flip,
   4.297 +				      uint32_t num_slots) 
   4.298 +{
   4.299 +	gntdev_file_private_data_t *private_data 
   4.300 +		= (gntdev_file_private_data_t *) flip->private_data;
   4.301 +	
   4.302 +	int i;
   4.303 +	int start_index = private_data->next_fit_index;
   4.304 +	int range_start = 0, range_length;
   4.305 +
   4.306 +	if (private_data->free_list_size < num_slots) {
   4.307 +		return -ENOMEM;
   4.308 +	}
   4.309 +
   4.310 +	/* First search from the start_index to the end of the array. */
   4.311 +	range_length = 0;
   4.312 +	for (i = start_index; i < MAX_GRANTS; ++i) {
   4.313 +		if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
   4.314 +			if (range_length == 0) {
   4.315 +				range_start = i;
   4.316 +			}
   4.317 +			++range_length;
   4.318 +			if (range_length == num_slots) {
   4.319 +				return range_start;
   4.320 +			}
   4.321 +		}
   4.322 +	}
   4.323 +	
   4.324 +	/* Now search from the start of the array to the start_index. */
   4.325 +	range_length = 0;
   4.326 +	for (i = 0; i < start_index; ++i) {
   4.327 +		if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
   4.328 +			if (range_length == 0) {
   4.329 +				range_start = i;
   4.330 +			}
   4.331 +			++range_length;
   4.332 +			if (range_length == num_slots) {
   4.333 +				return range_start;
   4.334 +			}
   4.335 +		}
   4.336 +	}
   4.337 +	
   4.338 +	return -ENOMEM;
   4.339 +}
   4.340 +
   4.341 +/* Interface functions. */
   4.342 +
   4.343 +/* Initialises the driver. Called when the module is loaded. */
   4.344 +static int __init gntdev_init(void)
   4.345 +{
   4.346 +	struct class *class;
   4.347 +	struct class_device *device;
   4.348 +
   4.349 +	if (!is_running_on_xen()) {
   4.350 +		printk(KERN_ERR "You must be running Xen to use gntdev\n");
   4.351 +		return -ENODEV;
   4.352 +	}
   4.353 +
   4.354 +	gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
   4.355 +	if (gntdev_major < 0)
   4.356 +	{
   4.357 +		printk(KERN_ERR "Could not register gntdev device\n");
   4.358 +		return -ENOMEM;
   4.359 +	}
   4.360 +
   4.361 +	/* Note that if the sysfs code fails, we will still initialise the
   4.362 +	 * device, and output the major number so that the device can be
   4.363 +	 * created manually using mknod.
   4.364 +	 */
   4.365 +	if ((class = get_xen_class()) == NULL) {
   4.366 +		printk(KERN_ERR "Error setting up xen_class\n");
   4.367 +		printk(KERN_ERR "gntdev created with major number = %d\n", 
   4.368 +		       gntdev_major);
   4.369 +		return 0;
   4.370 +	}
   4.371 +
   4.372 +	device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
   4.373 +				     NULL, GNTDEV_NAME);
   4.374 +	if (IS_ERR(device)) {
   4.375 +		printk(KERN_ERR "Error creating gntdev device in xen_class\n");
   4.376 +		printk(KERN_ERR "gntdev created with major number = %d\n",
   4.377 +		       gntdev_major);
   4.378 +		return 0;
   4.379 +	}
   4.380 +
   4.381 +	return 0;
   4.382 +}
   4.383 +
   4.384 +/* Cleans up and unregisters the driver. Called when the driver is unloaded.
   4.385 + */
   4.386 +static void __exit gntdev_exit(void)
   4.387 +{
   4.388 +	struct class *class;
   4.389 +	if ((class = get_xen_class()) != NULL)
   4.390 +		class_device_destroy(class, MKDEV(gntdev_major, 0));
   4.391 +	unregister_chrdev(gntdev_major, GNTDEV_NAME);
   4.392 +}
   4.393 +
   4.394 +/* Called when the device is opened. */
   4.395 +static int gntdev_open(struct inode *inode, struct file *flip)
   4.396 +{
   4.397 +	gntdev_file_private_data_t *private_data;
   4.398 +	int i;
   4.399 +
   4.400 +	try_module_get(THIS_MODULE);
   4.401 +
   4.402 +	/* Allocate space for the per-instance private data. */
   4.403 +	private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
   4.404 +	if (!private_data)
   4.405 +		goto nomem_out;
   4.406 +
   4.407 +	/* Allocate space for the kernel-mapping of granted pages. */
   4.408 +	private_data->foreign_pages = 
   4.409 +		alloc_empty_pages_and_pagevec(MAX_GRANTS);
   4.410 +	if (!private_data->foreign_pages)
   4.411 +		goto nomem_out2;
   4.412 +
   4.413 +	/* Initialise the free-list, which contains all slots at first.
   4.414 +	 */
   4.415 +	for (i = 0; i < MAX_GRANTS; ++i) {
   4.416 +		private_data->free_list[MAX_GRANTS - i - 1] = i;
   4.417 +		private_data->grants[i].state = GNTDEV_SLOT_INVALID;
   4.418 +		private_data->grants[i].u.free_list_index = MAX_GRANTS - i - 1;
   4.419 +	}
   4.420 +	private_data->free_list_size = MAX_GRANTS;
   4.421 +	private_data->next_fit_index = 0;
   4.422 +
   4.423 +	init_rwsem(&private_data->grants_sem);
   4.424 +	init_rwsem(&private_data->free_list_sem);
   4.425 +
   4.426 +	flip->private_data = private_data;
   4.427 +
   4.428 +	return 0;
   4.429 +
   4.430 +nomem_out2:
   4.431 +	kfree(private_data);
   4.432 +nomem_out:
   4.433 +	return -ENOMEM;
   4.434 +}
   4.435 +
   4.436 +/* Called when the device is closed.
   4.437 + */
   4.438 +static int gntdev_release(struct inode *inode, struct file *flip)
   4.439 +{
   4.440 +	if (flip->private_data) {
   4.441 +		gntdev_file_private_data_t *private_data = 
   4.442 +			(gntdev_file_private_data_t *) flip->private_data;
   4.443 +		if (private_data->foreign_pages) {
   4.444 +			free_empty_pages_and_pagevec
   4.445 +				(private_data->foreign_pages, MAX_GRANTS);
   4.446 +		}
   4.447 +		kfree(private_data);
   4.448 +	}
   4.449 +	module_put(THIS_MODULE);
   4.450 +	return 0;
   4.451 +}
   4.452 +
   4.453 +/* Called when an attempt is made to mmap() the device. The private data from
   4.454 + * @flip contains the list of grant references that can be mapped. The vm_pgoff
   4.455 + * field of @vma contains the index into that list that refers to the grant
   4.456 + * reference that will be mapped. Only mappings that are a multiple of
   4.457 + * PAGE_SIZE are handled.
   4.458 + */
   4.459 +static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma) 
   4.460 +{
   4.461 +	struct gnttab_map_grant_ref op;
   4.462 +	unsigned long slot_index = vma->vm_pgoff;
   4.463 +	unsigned long kernel_vaddr, user_vaddr;
   4.464 +	uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
   4.465 +	uint64_t ptep;
   4.466 +	int ret;
   4.467 +	int flags;
   4.468 +	int i;
   4.469 +	struct page *page;
   4.470 +	gntdev_file_private_data_t *private_data = flip->private_data;
   4.471 +
   4.472 +	if (unlikely(!private_data)) {
   4.473 +		printk(KERN_ERR "File's private data is NULL.\n");
   4.474 +		return -EINVAL;
   4.475 +	}
   4.476 +
   4.477 +	if (unlikely((size <= 0) || (size + slot_index) > MAX_GRANTS)) {
   4.478 +		printk(KERN_ERR "Invalid number of pages or offset"
   4.479 +		       "(num_pages = %d, first_slot = %ld).\n",
   4.480 +		       size, slot_index);
   4.481 +		return -ENXIO;
   4.482 +	}
   4.483 +
   4.484 +	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
   4.485 +		printk(KERN_ERR "Writable mappings must be shared.\n");
   4.486 +		return -EINVAL;
   4.487 +	}
   4.488 +
   4.489 +	/* Slots must be in the NOT_YET_MAPPED state. */
   4.490 +	down_write(&private_data->grants_sem);
   4.491 +	for (i = 0; i < size; ++i) {
   4.492 +		if (private_data->grants[slot_index + i].state != 
   4.493 +		    GNTDEV_SLOT_NOT_YET_MAPPED) {
   4.494 +			printk(KERN_ERR "Slot (index = %ld) is in the wrong "
   4.495 +			       "state (%d).\n", slot_index + i, 
   4.496 +			       private_data->grants[slot_index + i].state);
   4.497 +			up_write(&private_data->grants_sem);
   4.498 +			return -EINVAL;
   4.499 +		}
   4.500 +	}
   4.501 +
   4.502 +	/* Install the hook for unmapping. */
   4.503 +	vma->vm_ops = &gntdev_vmops;
   4.504 +    
   4.505 +	/* The VM area contains pages from another VM. */
   4.506 +	vma->vm_flags |= VM_FOREIGN;
   4.507 +	vma->vm_private_data = kzalloc(size * sizeof(struct page_struct *), 
   4.508 +				       GFP_KERNEL);
   4.509 +	if (vma->vm_private_data == NULL) {
   4.510 +		printk(KERN_ERR "Couldn't allocate mapping structure for VM "
   4.511 +		       "area.\n");
   4.512 +		return -ENOMEM;
   4.513 +	}
   4.514 +
   4.515 +	/* This flag prevents Bad PTE errors when the memory is unmapped. */
   4.516 +	vma->vm_flags |= VM_RESERVED;
   4.517 +
   4.518 +	/* This flag prevents this VM area being copied on a fork(). A better
   4.519 +	 * behaviour might be to explicitly carry out the appropriate mappings
   4.520 +	 * on fork(), but I don't know if there's a hook for this.
   4.521 +	 */
   4.522 +	vma->vm_flags |= VM_DONTCOPY;
   4.523 +
   4.524 +	/* This flag ensures that the page tables are not unpinned before the
   4.525 +	 * VM area is unmapped. Therefore Xen still recognises the PTE as
   4.526 +	 * belonging to an L1 pagetable, and the grant unmap operation will
   4.527 +	 * succeed, even if the process does not exit cleanly.
   4.528 +	 */
   4.529 +	vma->vm_mm->context.has_foreign_mappings = 1;
   4.530 +
   4.531 +	for (i = 0; i < size; ++i) {
   4.532 +
   4.533 +		flags = GNTMAP_host_map;
   4.534 +		if (!(vma->vm_flags & VM_WRITE))
   4.535 +			flags |= GNTMAP_readonly;
   4.536 +
   4.537 +		kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
   4.538 +		user_vaddr = get_user_vaddr(vma, i);
   4.539 +		page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
   4.540 +
   4.541 +		gnttab_set_map_op(&op, kernel_vaddr, flags,   
   4.542 +				  private_data->grants[slot_index+i]
   4.543 +				  .u.valid.ref, 
   4.544 +				  private_data->grants[slot_index+i]
   4.545 +				  .u.valid.domid);
   4.546 +
   4.547 +		/* Carry out the mapping of the grant reference. */
   4.548 +		ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 
   4.549 +						&op, 1);
   4.550 +		BUG_ON(ret);
   4.551 +		if (op.status) {
   4.552 +			printk(KERN_ERR "Error mapping the grant reference "
   4.553 +			       "into the kernel (%d). domid = %d; ref = %d\n",
   4.554 +			       op.status,
   4.555 +			       private_data->grants[slot_index+i]
   4.556 +			       .u.valid.domid,
   4.557 +			       private_data->grants[slot_index+i]
   4.558 +			       .u.valid.ref);
   4.559 +			goto undo_map_out;
   4.560 +		}
   4.561 +
   4.562 +		/* Store a reference to the page that will be mapped into user
   4.563 +		 * space.
   4.564 +		 */
   4.565 +		((struct page **) vma->vm_private_data)[i] = page;
   4.566 +
   4.567 +		/* Mark mapped page as reserved. */
   4.568 +		SetPageReserved(page);
   4.569 +
   4.570 +		/* Record the grant handle, for use in the unmap operation. */
   4.571 +		private_data->grants[slot_index+i].u.valid.kernel_handle = 
   4.572 +			op.handle;
   4.573 +		private_data->grants[slot_index+i].u.valid.dev_bus_addr = 
   4.574 +			op.dev_bus_addr;
   4.575 +		
   4.576 +		private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
   4.577 +		private_data->grants[slot_index+i].u.valid.user_handle =
   4.578 +			GNTDEV_INVALID_HANDLE;
   4.579 +
   4.580 +		/* Now perform the mapping to user space. */
   4.581 +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   4.582 +
   4.583 +			/* NOT USING SHADOW PAGE TABLES. */
   4.584 +			/* In this case, we map the grant(s) straight into user
   4.585 +			 * space.
   4.586 +			 */
   4.587 +
   4.588 +			/* Get the machine address of the PTE for the user 
   4.589 +			 *  page.
   4.590 +			 */
   4.591 +			if ((ret = create_lookup_pte_addr(vma->vm_mm, 
   4.592 +							  vma->vm_start 
   4.593 +							  + (i << PAGE_SHIFT), 
   4.594 +							  &ptep)))
   4.595 +			{
   4.596 +				printk(KERN_ERR "Error obtaining PTE pointer "
   4.597 +				       "(%d).\n", ret);
   4.598 +				goto undo_map_out;
   4.599 +			}
   4.600 +			
   4.601 +			/* Configure the map operation. */
   4.602 +		
   4.603 +			/* The reference is to be used by host CPUs. */
   4.604 +			flags = GNTMAP_host_map;
   4.605 +			
   4.606 +			/* Specifies a user space mapping. */
   4.607 +			flags |= GNTMAP_application_map;
   4.608 +			
   4.609 +			/* The map request contains the machine address of the
   4.610 +			 * PTE to update.
   4.611 +			 */
   4.612 +			flags |= GNTMAP_contains_pte;
   4.613 +			
   4.614 +			if (!(vma->vm_flags & VM_WRITE))
   4.615 +				flags |= GNTMAP_readonly;
   4.616 +
   4.617 +			gnttab_set_map_op(&op, ptep, flags, 
   4.618 +					  private_data->grants[slot_index+i]
   4.619 +					  .u.valid.ref, 
   4.620 +					  private_data->grants[slot_index+i]
   4.621 +					  .u.valid.domid);
   4.622 +
   4.623 +			/* Carry out the mapping of the grant reference. */
   4.624 +			ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
   4.625 +							&op, 1);
   4.626 +			BUG_ON(ret);
   4.627 +			if (op.status) {
   4.628 +				printk(KERN_ERR "Error mapping the grant "
   4.629 +				       "reference into user space (%d). domid "
   4.630 +				       "= %d; ref = %d\n", op.status,
   4.631 +				       private_data->grants[slot_index+i].u
   4.632 +				       .valid.domid,
   4.633 +				       private_data->grants[slot_index+i].u
   4.634 +				       .valid.ref);
   4.635 +				goto undo_map_out;
   4.636 +			}
   4.637 +			
   4.638 +			/* Record the grant handle, for use in the unmap 
   4.639 +			 * operation. 
   4.640 +			 */
   4.641 +			private_data->grants[slot_index+i].u.
   4.642 +				valid.user_handle = op.handle;
   4.643 +
   4.644 +			/* Update p2m structure with the new mapping. */
   4.645 +			set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
   4.646 +					    FOREIGN_FRAME(private_data->
   4.647 +							  grants[slot_index+i]
   4.648 +							  .u.valid.dev_bus_addr
   4.649 +							  >> PAGE_SHIFT));
   4.650 +		} else {
   4.651 +			/* USING SHADOW PAGE TABLES. */
   4.652 +			/* In this case, we simply insert the page into the VM
   4.653 +			 * area. */
   4.654 +			ret = vm_insert_page(vma, user_vaddr, page);
   4.655 +		}
   4.656 +
   4.657 +	}
   4.658 +
   4.659 +	up_write(&private_data->grants_sem);
   4.660 +	return 0;
   4.661 +
   4.662 +undo_map_out:
   4.663 +	/* If we have a mapping failure, the unmapping will be taken care of
   4.664 +	 * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
   4.665 +	 * All we need to do here is free the vma_private_data.
   4.666 +	 */
   4.667 +	kfree(vma->vm_private_data);
   4.668 +
   4.669 +	/* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
   4.670 +	 * to NULL on failure. However, we need this in gntdev_clear_pte() to
   4.671 +	 * unmap the grants. Therefore, we smuggle a reference to the file's
   4.672 +	 * private data in the VM area's private data pointer.
   4.673 +	 */
   4.674 +	vma->vm_private_data = private_data;
   4.675 +	
   4.676 +	up_write(&private_data->grants_sem);
   4.677 +
   4.678 +	return -ENOMEM;
   4.679 +}
   4.680 +
   4.681 +static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
   4.682 +			      pte_t *ptep, int is_fullmm)
   4.683 +{
   4.684 +	int slot_index, ret;
   4.685 +	pte_t copy;
   4.686 +	struct gnttab_unmap_grant_ref op;
   4.687 +	gntdev_file_private_data_t *private_data;
   4.688 +
   4.689 +	/* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
   4.690 +	 * to NULL on failure. However, we need this in gntdev_clear_pte() to
   4.691 +	 * unmap the grants. Therefore, we smuggle a reference to the file's
   4.692 +	 * private data in the VM area's private data pointer.
   4.693 +	 */
   4.694 +	if (vma->vm_file) {
   4.695 +		private_data = (gntdev_file_private_data_t *)
   4.696 +			vma->vm_file->private_data;
   4.697 +	} else if (vma->vm_private_data) {
   4.698 +		private_data = (gntdev_file_private_data_t *)
   4.699 +			vma->vm_private_data;
   4.700 +	} else {
   4.701 +		private_data = NULL; /* gcc warning */
   4.702 +		BUG();
   4.703 +	}
   4.704 +
   4.705 +	/* Copy the existing value of the PTE for returning. */
   4.706 +	copy = *ptep;
   4.707 +
   4.708 +	/* Calculate the grant relating to this PTE. */
   4.709 +	slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
   4.710 +
   4.711 +	/* Only unmap grants if the slot has been mapped. This could be being
   4.712 +	 * called from a failing mmap().
   4.713 +	 */
   4.714 +	if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
   4.715 +
   4.716 +		/* First, we clear the user space mapping, if it has been made.
   4.717 +		 */
   4.718 +		if (private_data->grants[slot_index].u.valid.user_handle !=
   4.719 +		    GNTDEV_INVALID_HANDLE && 
   4.720 +		    !xen_feature(XENFEAT_auto_translated_physmap)) {
   4.721 +			/* NOT USING SHADOW PAGE TABLES. */
   4.722 +			gnttab_set_unmap_op(&op, virt_to_machine(ptep), 
   4.723 +					    GNTMAP_contains_pte,
   4.724 +					    private_data->grants[slot_index]
   4.725 +					    .u.valid.user_handle);
   4.726 +			ret = HYPERVISOR_grant_table_op(
   4.727 +				GNTTABOP_unmap_grant_ref, &op, 1);
   4.728 +			BUG_ON(ret);
   4.729 +			if (op.status)
   4.730 +				printk("User unmap grant status = %d\n", 
   4.731 +				       op.status);
   4.732 +		} else {
   4.733 +			/* USING SHADOW PAGE TABLES. */
   4.734 +			pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
   4.735 +		}
   4.736 +
   4.737 +		/* Finally, we unmap the grant from kernel space. */
   4.738 +		gnttab_set_unmap_op(&op, 
   4.739 +				    get_kernel_vaddr(private_data, slot_index),
   4.740 +				    GNTMAP_host_map, 
   4.741 +				    private_data->grants[slot_index].u.valid
   4.742 +				    .kernel_handle);
   4.743 +		ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 
   4.744 +						&op, 1);
   4.745 +		BUG_ON(ret);
   4.746 +		if (op.status)
   4.747 +			printk("Kernel unmap grant status = %d\n", op.status);
   4.748 +
   4.749 +
   4.750 +		/* Return slot to the not-yet-mapped state, so that it may be
   4.751 +		 * mapped again, or removed by a subsequent ioctl.
   4.752 +		 */
   4.753 +		private_data->grants[slot_index].state = 
   4.754 +			GNTDEV_SLOT_NOT_YET_MAPPED;
   4.755 +
   4.756 +		/* Invalidate the physical to machine mapping for this page. */
   4.757 +		set_phys_to_machine(__pa(get_kernel_vaddr(private_data, 
   4.758 +							  slot_index)) 
   4.759 +				    >> PAGE_SHIFT, INVALID_P2M_ENTRY);
   4.760 +
   4.761 +	} else {
   4.762 +		pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
   4.763 +	}
   4.764 +
   4.765 +	return copy;
   4.766 +}
   4.767 +
   4.768 +/* "Destructor" for a VM area.
   4.769 + */
   4.770 +static void gntdev_vma_close(struct vm_area_struct *vma) {
   4.771 +	if (vma->vm_private_data) {
   4.772 +		kfree(vma->vm_private_data);
   4.773 +	}
   4.774 +}
   4.775 +
   4.776 +/* Called when an ioctl is made on the device.
   4.777 + */
   4.778 +static int gntdev_ioctl(struct inode *inode, struct file *flip,
   4.779 +			unsigned int cmd, unsigned long arg)
   4.780 +{
   4.781 +	int rc = 0;
   4.782 +	gntdev_file_private_data_t *private_data = 
   4.783 +		(gntdev_file_private_data_t *) flip->private_data;
   4.784 +
   4.785 +	switch (cmd) {
   4.786 +	case IOCTL_GNTDEV_MAP_GRANT_REF:
   4.787 +	{
   4.788 +		struct ioctl_gntdev_map_grant_ref op;
   4.789 +		down_write(&private_data->grants_sem);
   4.790 +		down_write(&private_data->free_list_sem);
   4.791 +
   4.792 +		if ((rc = copy_from_user(&op, (void __user *) arg, 
   4.793 +					 sizeof(op)))) {
   4.794 +			rc = -EFAULT;
   4.795 +			goto map_out;
   4.796 +		}
   4.797 +		if (unlikely(op.count <= 0)) {
   4.798 +			rc = -EINVAL;
   4.799 +			goto map_out;
   4.800 +		}
   4.801 +
   4.802 +		if (op.count == 1) {
   4.803 +			if ((rc = add_grant_reference(flip, &op.refs[0],
   4.804 +						      &op.index)) < 0) {
   4.805 +				printk(KERN_ERR "Adding grant reference "
   4.806 +				       "failed (%d).\n", rc);
   4.807 +				goto map_out;
   4.808 +			}
   4.809 +		} else {
   4.810 +			struct ioctl_gntdev_grant_ref *refs, *u;
   4.811 +			refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
   4.812 +			if (!refs) {
   4.813 +				rc = -ENOMEM;
   4.814 +				goto map_out;
   4.815 +			}
   4.816 +			u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
   4.817 +			if ((rc = copy_from_user(refs,
   4.818 +						 (void __user *)u,
   4.819 +						 sizeof(*refs) * op.count))) {
   4.820 +				printk(KERN_ERR "Copying refs from user failed"
   4.821 +				       " (%d).\n", rc);
   4.822 +				rc = -EINVAL;
   4.823 +				goto map_out;
   4.824 +			}
   4.825 +			if ((rc = find_contiguous_free_range(flip, op.count))
   4.826 +			    < 0) {
   4.827 +				printk(KERN_ERR "Finding contiguous range "
   4.828 +				       "failed (%d).\n", rc);
   4.829 +				kfree(refs);
   4.830 +				goto map_out;
   4.831 +			}
   4.832 +			op.index = rc << PAGE_SHIFT;
   4.833 +			if ((rc = add_grant_references(flip, op.count,
   4.834 +						       refs, rc))) {
   4.835 +				printk(KERN_ERR "Adding grant references "
   4.836 +				       "failed (%d).\n", rc);
   4.837 +				kfree(refs);
   4.838 +				goto map_out;
   4.839 +			}
   4.840 +			compress_free_list(flip);
   4.841 +			kfree(refs);
   4.842 +		}
   4.843 +		if ((rc = copy_to_user((void __user *) arg, 
   4.844 +				       &op, 
   4.845 +				       sizeof(op)))) {
   4.846 +			printk(KERN_ERR "Copying result back to user failed "
   4.847 +			       "(%d)\n", rc);
   4.848 +			rc = -EFAULT;
   4.849 +			goto map_out;
   4.850 +		}
   4.851 +	map_out:
   4.852 +		up_write(&private_data->grants_sem);
   4.853 +		up_write(&private_data->free_list_sem);
   4.854 +		return rc;
   4.855 +	}
   4.856 +	case IOCTL_GNTDEV_UNMAP_GRANT_REF:
   4.857 +	{
   4.858 +		struct ioctl_gntdev_unmap_grant_ref op;
   4.859 +		int i, start_index;
   4.860 +
   4.861 +		down_write(&private_data->grants_sem);
   4.862 +		down_write(&private_data->free_list_sem);
   4.863 +
   4.864 +		if ((rc = copy_from_user(&op, 
   4.865 +					 (void __user *) arg, 
   4.866 +					 sizeof(op)))) {
   4.867 +			rc = -EFAULT;
   4.868 +			goto unmap_out;
   4.869 +		}
   4.870 +
   4.871 +		start_index = op.index >> PAGE_SHIFT;
   4.872 +
   4.873 +		/* First, check that all pages are in the NOT_YET_MAPPED
   4.874 +		 * state.
   4.875 +		 */
   4.876 +		for (i = 0; i < op.count; ++i) {
   4.877 +			if (unlikely
   4.878 +			    (private_data->grants[start_index + i].state
   4.879 +			     != GNTDEV_SLOT_NOT_YET_MAPPED)) {
   4.880 +				if (private_data->grants[start_index + i].state
   4.881 +				    == GNTDEV_SLOT_INVALID) {
   4.882 +					printk(KERN_ERR
   4.883 +					       "Tried to remove an invalid "
   4.884 +					       "grant at offset 0x%x.",
   4.885 +					       (start_index + i) 
   4.886 +					       << PAGE_SHIFT);
   4.887 +					rc = -EINVAL;
   4.888 +				} else {
   4.889 +					printk(KERN_ERR
   4.890 +					       "Tried to remove a grant which "
   4.891 +					       "is currently mmap()-ed at "
   4.892 +					       "offset 0x%x.",
   4.893 +					       (start_index + i) 
   4.894 +					       << PAGE_SHIFT);
   4.895 +					rc = -EBUSY;
   4.896 +				}
   4.897 +				goto unmap_out;
   4.898 +			}
   4.899 +		}
   4.900 +
   4.901 +		/* Unmap pages and add them to the free list.
   4.902 +		 */
   4.903 +		for (i = 0; i < op.count; ++i) {
   4.904 +			private_data->grants[start_index+i].state = 
   4.905 +				GNTDEV_SLOT_INVALID;
   4.906 +			private_data->grants[start_index+i].u.free_list_index =
   4.907 +				private_data->free_list_size;
   4.908 +			private_data->free_list[private_data->free_list_size] =
   4.909 +				start_index + i;
   4.910 +			++private_data->free_list_size;
   4.911 +		}
   4.912 +		compress_free_list(flip);
   4.913 +
   4.914 +	unmap_out:
   4.915 +		up_write(&private_data->grants_sem);
   4.916 +		up_write(&private_data->free_list_sem);
   4.917 +		return rc;
   4.918 +	}
   4.919 +	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
   4.920 +	{
   4.921 +		struct ioctl_gntdev_get_offset_for_vaddr op;
   4.922 +		struct vm_area_struct *vma;
   4.923 +		unsigned long vaddr;
   4.924 +
   4.925 +		if ((rc = copy_from_user(&op, 
   4.926 +					 (void __user *) arg, 
   4.927 +					 sizeof(op)))) {
   4.928 +			rc = -EFAULT;
   4.929 +			goto get_offset_out;
   4.930 +		}
   4.931 +		vaddr = (unsigned long)op.vaddr;
   4.932 +
   4.933 +		down_read(&current->mm->mmap_sem);		
   4.934 +		vma = find_vma(current->mm, vaddr);
   4.935 +		if (vma == NULL) {
   4.936 +			rc = -EFAULT;
   4.937 +			goto get_offset_unlock_out;
   4.938 +		}
   4.939 +		if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
   4.940 +			printk(KERN_ERR "The vaddr specified does not belong "
   4.941 +			       "to a gntdev instance: %#lx\n", vaddr);
   4.942 +			rc = -EFAULT;
   4.943 +			goto get_offset_unlock_out;
   4.944 +		}
   4.945 +		if (vma->vm_start != vaddr) {
   4.946 +			printk(KERN_ERR "The vaddr specified in an "
   4.947 +			       "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
   4.948 +			       "the start of the VM area. vma->vm_start = "
   4.949 +			       "%#lx; vaddr = %#lx\n",
   4.950 +			       vma->vm_start, vaddr);
   4.951 +			rc = -EFAULT;
   4.952 +			goto get_offset_unlock_out;
   4.953 +		}
   4.954 +		op.offset = vma->vm_pgoff << PAGE_SHIFT;
   4.955 +		op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
   4.956 +		up_read(&current->mm->mmap_sem);
   4.957 +		if ((rc = copy_to_user((void __user *) arg, 
   4.958 +				       &op, 
   4.959 +				       sizeof(op)))) {
   4.960 +			rc = -EFAULT;
   4.961 +			goto get_offset_out;
   4.962 +		}
   4.963 +		goto get_offset_out;
   4.964 +	get_offset_unlock_out:
   4.965 +		up_read(&current->mm->mmap_sem);
   4.966 +	get_offset_out:
   4.967 +		return rc;
   4.968 +	}
   4.969 +	default:
   4.970 +		return -ENOIOCTLCMD;
   4.971 +	}
   4.972 +
   4.973 +	return 0;
   4.974 +}
     5.1 --- a/linux-2.6-xen-sparse/drivers/xen/util.c	Sat Mar 31 12:42:02 2007 +0100
     5.2 +++ b/linux-2.6-xen-sparse/drivers/xen/util.c	Sat Mar 31 13:53:24 2007 +0100
     5.3 @@ -5,6 +5,23 @@
     5.4  #include <asm/uaccess.h>
     5.5  #include <xen/driver_util.h>
     5.6  
     5.7 +struct class *get_xen_class(void)
     5.8 +{
     5.9 +	static struct class *xen_class;
    5.10 +
    5.11 +	if (xen_class)
    5.12 +		return xen_class;
    5.13 +
    5.14 +	xen_class = class_create(THIS_MODULE, "xen");
    5.15 +	if (IS_ERR(xen_class)) {
    5.16 +		printk("Failed to create xen sysfs class.\n");
    5.17 +		xen_class = NULL;
    5.18 +	}
    5.19 +
    5.20 +	return xen_class;
    5.21 +}
    5.22 +EXPORT_SYMBOL_GPL(get_xen_class);
    5.23 +
    5.24  static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
    5.25  {
    5.26  	/* apply_to_page_range() does all the hard work. */
     6.1 --- a/linux-2.6-xen-sparse/include/xen/driver_util.h	Sat Mar 31 12:42:02 2007 +0100
     6.2 +++ b/linux-2.6-xen-sparse/include/xen/driver_util.h	Sat Mar 31 13:53:24 2007 +0100
     6.3 @@ -3,9 +3,12 @@
     6.4  #define __ASM_XEN_DRIVER_UTIL_H__
     6.5  
     6.6  #include <linux/vmalloc.h>
     6.7 +#include <linux/device.h>
     6.8  
     6.9  /* Allocate/destroy a 'vmalloc' VM area. */
    6.10  extern struct vm_struct *alloc_vm_area(unsigned long size);
    6.11  extern void free_vm_area(struct vm_struct *area);
    6.12  
    6.13 +extern struct class *get_xen_class(void);
    6.14 +
    6.15  #endif /* __ASM_XEN_DRIVER_UTIL_H__ */
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/linux-2.6-xen-sparse/include/xen/public/gntdev.h	Sat Mar 31 13:53:24 2007 +0100
     7.3 @@ -0,0 +1,105 @@
     7.4 +/******************************************************************************
     7.5 + * gntdev.h
     7.6 + * 
     7.7 + * Interface to /dev/xen/gntdev.
     7.8 + * 
     7.9 + * Copyright (c) 2007, D G Murray
    7.10 + * 
    7.11 + * This program is free software; you can redistribute it and/or
    7.12 + * modify it under the terms of the GNU General Public License version 2
    7.13 + * as published by the Free Software Foundation; or, when distributed
    7.14 + * separately from the Linux kernel or incorporated into other
    7.15 + * software packages, subject to the following license:
    7.16 + * 
    7.17 + * Permission is hereby granted, free of charge, to any person obtaining a copy
    7.18 + * of this source file (the "Software"), to deal in the Software without
    7.19 + * restriction, including without limitation the rights to use, copy, modify,
    7.20 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
    7.21 + * and to permit persons to whom the Software is furnished to do so, subject to
    7.22 + * the following conditions:
    7.23 + * 
    7.24 + * The above copyright notice and this permission notice shall be included in
    7.25 + * all copies or substantial portions of the Software.
    7.26 + * 
    7.27 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    7.28 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    7.29 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    7.30 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    7.31 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    7.32 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    7.33 + * IN THE SOFTWARE.
    7.34 + */
    7.35 +
    7.36 +#ifndef __LINUX_PUBLIC_GNTDEV_H__
    7.37 +#define __LINUX_PUBLIC_GNTDEV_H__
    7.38 +
    7.39 +struct ioctl_gntdev_grant_ref {
    7.40 +	/* The domain ID of the grant to be mapped. */
    7.41 +	uint32_t domid;
    7.42 +	/* The grant reference of the grant to be mapped. */
    7.43 +	uint32_t ref;
    7.44 +};
    7.45 +
    7.46 +/*
    7.47 + * Inserts the grant references into the mapping table of an instance
    7.48 + * of gntdev. N.B. This does not perform the mapping, which is deferred
    7.49 + * until mmap() is called with @index as the offset.
    7.50 + */
    7.51 +#define IOCTL_GNTDEV_MAP_GRANT_REF \
    7.52 +_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref))
    7.53 +struct ioctl_gntdev_map_grant_ref {
    7.54 +	/* IN parameters */
    7.55 +	/* The number of grants to be mapped. */
    7.56 +	uint32_t count;
    7.57 +	uint32_t pad;
    7.58 +	/* OUT parameters */
    7.59 +	/* The offset to be used on a subsequent call to mmap(). */
    7.60 +	uint64_t index;
    7.61 +	/* Variable IN parameter. */
    7.62 +	/* Array of grant references, of size @count. */
    7.63 +	struct ioctl_gntdev_grant_ref refs[1];
    7.64 +};
    7.65 +
    7.66 +/*
    7.67 + * Removes the grant references from the mapping table of an instance of
    7.68 + * of gntdev. N.B. munmap() must be called on the relevant virtual address(es)
    7.69 + * before this ioctl is called, or an error will result.
    7.70 + */
    7.71 +#define IOCTL_GNTDEV_UNMAP_GRANT_REF \
    7.72 +_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref))       
    7.73 +struct ioctl_gntdev_unmap_grant_ref {
    7.74 +	/* IN parameters */
    7.75 +	/* The offset was returned by the corresponding map operation. */
    7.76 +	uint64_t index;
    7.77 +	/* The number of pages to be unmapped. */
    7.78 +	uint32_t count;
    7.79 +	uint32_t pad;
    7.80 +};
    7.81 +
    7.82 +/*
    7.83 + * Returns the offset in the driver's address space that corresponds
    7.84 + * to @vaddr. This can be used to perform a munmap(), followed by an
    7.85 + * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by
    7.86 + * the caller. The number of pages that were allocated at the same time as
    7.87 + * @vaddr is returned in @count.
    7.88 + *
    7.89 + * N.B. Where more than one page has been mapped into a contiguous range, the
    7.90 + *      supplied @vaddr must correspond to the start of the range; otherwise
    7.91 + *      an error will result. It is only possible to munmap() the entire
    7.92 + *      contiguously-allocated range at once, and not any subrange thereof.
    7.93 + */
    7.94 +#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \
    7.95 +_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr))
    7.96 +struct ioctl_gntdev_get_offset_for_vaddr {
    7.97 +	/* IN parameters */
    7.98 +	/* The virtual address of the first mapped page in a range. */
    7.99 +	uint64_t vaddr;
   7.100 +	/* OUT parameters */
   7.101 +	/* The offset that was used in the initial mmap() operation. */
   7.102 +	uint64_t offset;
   7.103 +	/* The number of pages mapped in the VM area that begins at @vaddr. */
   7.104 +	uint32_t count;
   7.105 +	uint32_t pad;
   7.106 +};
   7.107 +
   7.108 +#endif /* __LINUX_PUBLIC_GNTDEV_H__ */