ia64/linux-2.6.18-xen.hg

view drivers/xen/gntdev/gntdev.c @ 761:5e1269aa5c29

blktap, gntdev: fix highpte handling

In case of highpte, virt_to_machine() can't be used. Introduce
ptep_to_machine() and use it, also to simplify xen_l1_entry_update().

Original patch from: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Dec 10 13:32:32 2008 +0000 (2008-12-10)
parents 1bd3dbfdaf0f
children
line source
1 /******************************************************************************
2 * gntdev.c
3 *
4 * Device for accessing (in user-space) pages that have been granted by other
5 * domains.
6 *
7 * Copyright (c) 2006-2007, D G Murray.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
19 #include <asm/atomic.h>
20 #include <linux/module.h>
21 #include <linux/kernel.h>
22 #include <linux/init.h>
23 #include <linux/fs.h>
24 #include <linux/device.h>
25 #include <linux/mm.h>
26 #include <linux/mman.h>
27 #include <asm/uaccess.h>
28 #include <asm/io.h>
29 #include <xen/gnttab.h>
30 #include <asm/hypervisor.h>
31 #include <xen/balloon.h>
32 #include <xen/evtchn.h>
33 #include <xen/driver_util.h>
35 #include <linux/types.h>
36 #include <xen/public/gntdev.h>
39 #define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>"
40 #define DRIVER_DESC "User-space granted page access driver"
42 MODULE_LICENSE("GPL");
43 MODULE_AUTHOR(DRIVER_AUTHOR);
44 MODULE_DESCRIPTION(DRIVER_DESC);
46 #define MAX_GRANTS_LIMIT 1024
47 #define DEFAULT_MAX_GRANTS 128
49 /* A slot can be in one of three states:
50 *
51 * 0. GNTDEV_SLOT_INVALID:
52 * This slot is not associated with a grant reference, and is therefore free
53 * to be overwritten by a new grant reference.
54 *
55 * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
56 * This slot is associated with a grant reference (via the
57 * IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
58 *
59 * 2. GNTDEV_SLOT_MAPPED:
60 * This slot is associated with a grant reference, and has been mmap()-ed.
61 */
62 typedef enum gntdev_slot_state {
63 GNTDEV_SLOT_INVALID = 0,
64 GNTDEV_SLOT_NOT_YET_MAPPED,
65 GNTDEV_SLOT_MAPPED
66 } gntdev_slot_state_t;
68 #define GNTDEV_INVALID_HANDLE -1
69 #define GNTDEV_FREE_LIST_INVALID -1
70 /* Each opened instance of gntdev is associated with a list of grants,
71 * represented by an array of elements of the following type,
72 * gntdev_grant_info_t.
73 */
74 typedef struct gntdev_grant_info {
75 gntdev_slot_state_t state;
76 union {
77 uint32_t free_list_index;
78 struct {
79 domid_t domid;
80 grant_ref_t ref;
81 grant_handle_t kernel_handle;
82 grant_handle_t user_handle;
83 uint64_t dev_bus_addr;
84 } valid;
85 } u;
86 } gntdev_grant_info_t;
88 /* Private data structure, which is stored in the file pointer for files
89 * associated with this device.
90 */
91 typedef struct gntdev_file_private_data {
93 /* Array of grant information. */
94 gntdev_grant_info_t *grants;
95 uint32_t grants_size;
97 /* Read/write semaphore used to protect the grants array. */
98 struct rw_semaphore grants_sem;
100 /* An array of indices of free slots in the grants array.
101 * N.B. An entry in this list may temporarily have the value
102 * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
103 * from the list by the contiguous allocator, but the list has not yet
104 * been compressed. However, this is not visible across invocations of
105 * the device.
106 */
107 int32_t *free_list;
109 /* The number of free slots in the grants array. */
110 uint32_t free_list_size;
112 /* Read/write semaphore used to protect the free list. */
113 struct rw_semaphore free_list_sem;
115 /* Index of the next slot after the most recent contiguous allocation,
116 * for use in a next-fit allocator.
117 */
118 uint32_t next_fit_index;
120 /* Used to map grants into the kernel, before mapping them into user
121 * space.
122 */
123 struct page **foreign_pages;
125 } gntdev_file_private_data_t;
127 /* Module lifecycle operations. */
128 static int __init gntdev_init(void);
129 static void __exit gntdev_exit(void);
131 module_init(gntdev_init);
132 module_exit(gntdev_exit);
134 /* File operations. */
135 static int gntdev_open(struct inode *inode, struct file *flip);
136 static int gntdev_release(struct inode *inode, struct file *flip);
137 static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
138 static long gntdev_ioctl(struct file *flip,
139 unsigned int cmd, unsigned long arg);
141 static const struct file_operations gntdev_fops = {
142 .owner = THIS_MODULE,
143 .open = gntdev_open,
144 .release = gntdev_release,
145 .mmap = gntdev_mmap,
146 .unlocked_ioctl = gntdev_ioctl
147 };
149 /* VM operations. */
150 static void gntdev_vma_close(struct vm_area_struct *vma);
151 static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
152 pte_t *ptep, int is_fullmm);
154 static struct vm_operations_struct gntdev_vmops = {
155 .close = gntdev_vma_close,
156 .zap_pte = gntdev_clear_pte
157 };
159 /* Global variables. */
161 /* The driver major number, for use when unregistering the driver. */
162 static int gntdev_major;
164 #define GNTDEV_NAME "gntdev"
166 /* Memory mapping functions
167 * ------------------------
168 *
169 * Every granted page is mapped into both kernel and user space, and the two
170 * following functions return the respective virtual addresses of these pages.
171 *
172 * When shadow paging is disabled, the granted page is mapped directly into
173 * user space; when it is enabled, it is mapped into the kernel and remapped
174 * into user space using vm_insert_page() (see gntdev_mmap(), below).
175 */
177 /* Returns the virtual address (in user space) of the @page_index'th page
178 * in the given VM area.
179 */
180 static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
181 int page_index)
182 {
183 return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
184 }
186 /* Returns the virtual address (in kernel space) of the @slot_index'th page
187 * mapped by the gntdev instance that owns the given private data struct.
188 */
189 static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
190 int slot_index)
191 {
192 unsigned long pfn;
193 void *kaddr;
194 pfn = page_to_pfn(priv->foreign_pages[slot_index]);
195 kaddr = pfn_to_kaddr(pfn);
196 return (unsigned long) kaddr;
197 }
199 /* Helper functions. */
201 /* Adds information about a grant reference to the list of grants in the file's
202 * private data structure. Returns non-zero on failure. On success, sets the
203 * value of *offset to the offset that should be mmap()-ed in order to map the
204 * grant reference.
205 */
206 static int add_grant_reference(struct file *flip,
207 struct ioctl_gntdev_grant_ref *op,
208 uint64_t *offset)
209 {
210 gntdev_file_private_data_t *private_data
211 = (gntdev_file_private_data_t *) flip->private_data;
213 uint32_t slot_index;
215 if (unlikely(private_data->free_list_size == 0)) {
216 return -ENOMEM;
217 }
219 slot_index = private_data->free_list[--private_data->free_list_size];
220 private_data->free_list[private_data->free_list_size]
221 = GNTDEV_FREE_LIST_INVALID;
223 /* Copy the grant information into file's private data. */
224 private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
225 private_data->grants[slot_index].u.valid.domid = op->domid;
226 private_data->grants[slot_index].u.valid.ref = op->ref;
228 /* The offset is calculated as the index of the chosen entry in the
229 * file's private data's array of grant information. This is then
230 * shifted to give an offset into the virtual "file address space".
231 */
232 *offset = slot_index << PAGE_SHIFT;
234 return 0;
235 }
237 /* Adds the @count grant references to the contiguous range in the slot array
238 * beginning at @first_slot. It is assumed that @first_slot was returned by a
239 * previous invocation of find_contiguous_free_range(), during the same
240 * invocation of the driver.
241 */
242 static int add_grant_references(struct file *flip,
243 int count,
244 struct ioctl_gntdev_grant_ref *ops,
245 uint32_t first_slot)
246 {
247 gntdev_file_private_data_t *private_data
248 = (gntdev_file_private_data_t *) flip->private_data;
249 int i;
251 for (i = 0; i < count; ++i) {
253 /* First, mark the slot's entry in the free list as invalid. */
254 int free_list_index =
255 private_data->grants[first_slot+i].u.free_list_index;
256 private_data->free_list[free_list_index] =
257 GNTDEV_FREE_LIST_INVALID;
259 /* Now, update the slot. */
260 private_data->grants[first_slot+i].state =
261 GNTDEV_SLOT_NOT_YET_MAPPED;
262 private_data->grants[first_slot+i].u.valid.domid =
263 ops[i].domid;
264 private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
265 }
267 return 0;
268 }
270 /* Scans through the free list for @flip, removing entries that are marked as
271 * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
272 * the number of valid entries.
273 */
274 static void compress_free_list(struct file *flip)
275 {
276 gntdev_file_private_data_t *private_data
277 = (gntdev_file_private_data_t *) flip->private_data;
278 int i, j = 0, old_size, slot_index;
280 old_size = private_data->free_list_size;
281 for (i = 0; i < old_size; ++i) {
282 if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
283 if (i > j) {
284 slot_index = private_data->free_list[i];
285 private_data->free_list[j] = slot_index;
286 private_data->grants[slot_index].u
287 .free_list_index = j;
288 private_data->free_list[i]
289 = GNTDEV_FREE_LIST_INVALID;
290 }
291 ++j;
292 } else {
293 --private_data->free_list_size;
294 }
295 }
296 }
298 /* Searches the grant array in the private data of @flip for a range of
299 * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
300 *
301 * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
302 */
303 static int find_contiguous_free_range(struct file *flip,
304 uint32_t num_slots)
305 {
306 gntdev_file_private_data_t *private_data
307 = (gntdev_file_private_data_t *) flip->private_data;
309 int i;
310 int start_index = private_data->next_fit_index;
311 int range_start = 0, range_length;
313 if (private_data->free_list_size < num_slots) {
314 return -ENOMEM;
315 }
317 /* First search from the start_index to the end of the array. */
318 range_length = 0;
319 for (i = start_index; i < private_data->grants_size; ++i) {
320 if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
321 if (range_length == 0) {
322 range_start = i;
323 }
324 ++range_length;
325 if (range_length == num_slots) {
326 return range_start;
327 }
328 }
329 }
331 /* Now search from the start of the array to the start_index. */
332 range_length = 0;
333 for (i = 0; i < start_index; ++i) {
334 if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
335 if (range_length == 0) {
336 range_start = i;
337 }
338 ++range_length;
339 if (range_length == num_slots) {
340 return range_start;
341 }
342 }
343 }
345 return -ENOMEM;
346 }
348 static int init_private_data(gntdev_file_private_data_t *priv,
349 uint32_t max_grants)
350 {
351 int i;
353 /* Allocate space for the kernel-mapping of granted pages. */
354 priv->foreign_pages =
355 alloc_empty_pages_and_pagevec(max_grants);
356 if (!priv->foreign_pages)
357 goto nomem_out;
359 /* Allocate the grant list and free-list. */
360 priv->grants = kmalloc(max_grants * sizeof(gntdev_grant_info_t),
361 GFP_KERNEL);
362 if (!priv->grants)
363 goto nomem_out2;
364 priv->free_list = kmalloc(max_grants * sizeof(int32_t), GFP_KERNEL);
365 if (!priv->free_list)
366 goto nomem_out3;
368 /* Initialise the free-list, which contains all slots at first. */
369 for (i = 0; i < max_grants; ++i) {
370 priv->free_list[max_grants - i - 1] = i;
371 priv->grants[i].state = GNTDEV_SLOT_INVALID;
372 priv->grants[i].u.free_list_index = max_grants - i - 1;
373 }
374 priv->grants_size = max_grants;
375 priv->free_list_size = max_grants;
376 priv->next_fit_index = 0;
378 return 0;
380 nomem_out3:
381 kfree(priv->grants);
382 nomem_out2:
383 free_empty_pages_and_pagevec(priv->foreign_pages, max_grants);
384 nomem_out:
385 return -ENOMEM;
387 }
389 /* Interface functions. */
391 /* Initialises the driver. Called when the module is loaded. */
392 static int __init gntdev_init(void)
393 {
394 struct class *class;
395 struct class_device *device;
397 if (!is_running_on_xen()) {
398 printk(KERN_ERR "You must be running Xen to use gntdev\n");
399 return -ENODEV;
400 }
402 gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
403 if (gntdev_major < 0)
404 {
405 printk(KERN_ERR "Could not register gntdev device\n");
406 return -ENOMEM;
407 }
409 /* Note that if the sysfs code fails, we will still initialise the
410 * device, and output the major number so that the device can be
411 * created manually using mknod.
412 */
413 if ((class = get_xen_class()) == NULL) {
414 printk(KERN_ERR "Error setting up xen_class\n");
415 printk(KERN_ERR "gntdev created with major number = %d\n",
416 gntdev_major);
417 return 0;
418 }
420 device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
421 NULL, GNTDEV_NAME);
422 if (IS_ERR(device)) {
423 printk(KERN_ERR "Error creating gntdev device in xen_class\n");
424 printk(KERN_ERR "gntdev created with major number = %d\n",
425 gntdev_major);
426 return 0;
427 }
429 return 0;
430 }
432 /* Cleans up and unregisters the driver. Called when the driver is unloaded.
433 */
434 static void __exit gntdev_exit(void)
435 {
436 struct class *class;
437 if ((class = get_xen_class()) != NULL)
438 class_device_destroy(class, MKDEV(gntdev_major, 0));
439 unregister_chrdev(gntdev_major, GNTDEV_NAME);
440 }
442 /* Called when the device is opened. */
443 static int gntdev_open(struct inode *inode, struct file *flip)
444 {
445 gntdev_file_private_data_t *private_data;
447 try_module_get(THIS_MODULE);
449 /* Allocate space for the per-instance private data. */
450 private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
451 if (!private_data)
452 goto nomem_out;
454 /* These will be lazily initialised by init_private_data. */
455 private_data->grants = NULL;
456 private_data->free_list = NULL;
457 private_data->foreign_pages = NULL;
459 init_rwsem(&private_data->grants_sem);
460 init_rwsem(&private_data->free_list_sem);
462 flip->private_data = private_data;
464 return 0;
466 nomem_out:
467 return -ENOMEM;
468 }
470 /* Called when the device is closed.
471 */
472 static int gntdev_release(struct inode *inode, struct file *flip)
473 {
474 if (flip->private_data) {
475 gntdev_file_private_data_t *private_data =
476 (gntdev_file_private_data_t *) flip->private_data;
477 if (private_data->foreign_pages)
478 free_empty_pages_and_pagevec
479 (private_data->foreign_pages,
480 private_data->grants_size);
481 if (private_data->grants)
482 kfree(private_data->grants);
483 if (private_data->free_list)
484 kfree(private_data->free_list);
485 kfree(private_data);
486 }
487 module_put(THIS_MODULE);
488 return 0;
489 }
491 /* Called when an attempt is made to mmap() the device. The private data from
492 * @flip contains the list of grant references that can be mapped. The vm_pgoff
493 * field of @vma contains the index into that list that refers to the grant
494 * reference that will be mapped. Only mappings that are a multiple of
495 * PAGE_SIZE are handled.
496 */
497 static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma)
498 {
499 struct gnttab_map_grant_ref op;
500 unsigned long slot_index = vma->vm_pgoff;
501 unsigned long kernel_vaddr, user_vaddr;
502 uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
503 uint64_t ptep;
504 int ret;
505 int flags;
506 int i;
507 struct page *page;
508 gntdev_file_private_data_t *private_data = flip->private_data;
510 if (unlikely(!private_data)) {
511 printk(KERN_ERR "File's private data is NULL.\n");
512 return -EINVAL;
513 }
515 /* Test to make sure that the grants array has been initialised. */
516 down_read(&private_data->grants_sem);
517 if (unlikely(!private_data->grants)) {
518 up_read(&private_data->grants_sem);
519 printk(KERN_ERR "Attempted to mmap before ioctl.\n");
520 return -EINVAL;
521 }
522 up_read(&private_data->grants_sem);
524 if (unlikely((size <= 0) ||
525 (size + slot_index) > private_data->grants_size)) {
526 printk(KERN_ERR "Invalid number of pages or offset"
527 "(num_pages = %d, first_slot = %ld).\n",
528 size, slot_index);
529 return -ENXIO;
530 }
532 if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
533 printk(KERN_ERR "Writable mappings must be shared.\n");
534 return -EINVAL;
535 }
537 /* Slots must be in the NOT_YET_MAPPED state. */
538 down_write(&private_data->grants_sem);
539 for (i = 0; i < size; ++i) {
540 if (private_data->grants[slot_index + i].state !=
541 GNTDEV_SLOT_NOT_YET_MAPPED) {
542 printk(KERN_ERR "Slot (index = %ld) is in the wrong "
543 "state (%d).\n", slot_index + i,
544 private_data->grants[slot_index + i].state);
545 up_write(&private_data->grants_sem);
546 return -EINVAL;
547 }
548 }
550 /* Install the hook for unmapping. */
551 vma->vm_ops = &gntdev_vmops;
553 /* The VM area contains pages from another VM. */
554 vma->vm_flags |= VM_FOREIGN;
555 vma->vm_private_data = kzalloc(size * sizeof(struct page *),
556 GFP_KERNEL);
557 if (vma->vm_private_data == NULL) {
558 printk(KERN_ERR "Couldn't allocate mapping structure for VM "
559 "area.\n");
560 return -ENOMEM;
561 }
563 /* This flag prevents Bad PTE errors when the memory is unmapped. */
564 vma->vm_flags |= VM_RESERVED;
566 /* This flag prevents this VM area being copied on a fork(). A better
567 * behaviour might be to explicitly carry out the appropriate mappings
568 * on fork(), but I don't know if there's a hook for this.
569 */
570 vma->vm_flags |= VM_DONTCOPY;
572 #ifdef CONFIG_X86
573 /* This flag ensures that the page tables are not unpinned before the
574 * VM area is unmapped. Therefore Xen still recognises the PTE as
575 * belonging to an L1 pagetable, and the grant unmap operation will
576 * succeed, even if the process does not exit cleanly.
577 */
578 vma->vm_mm->context.has_foreign_mappings = 1;
579 #endif
581 for (i = 0; i < size; ++i) {
583 flags = GNTMAP_host_map;
584 if (!(vma->vm_flags & VM_WRITE))
585 flags |= GNTMAP_readonly;
587 kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
588 user_vaddr = get_user_vaddr(vma, i);
589 page = pfn_to_page(__pa(kernel_vaddr) >> PAGE_SHIFT);
591 gnttab_set_map_op(&op, kernel_vaddr, flags,
592 private_data->grants[slot_index+i]
593 .u.valid.ref,
594 private_data->grants[slot_index+i]
595 .u.valid.domid);
597 /* Carry out the mapping of the grant reference. */
598 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
599 &op, 1);
600 BUG_ON(ret);
601 if (op.status) {
602 printk(KERN_ERR "Error mapping the grant reference "
603 "into the kernel (%d). domid = %d; ref = %d\n",
604 op.status,
605 private_data->grants[slot_index+i]
606 .u.valid.domid,
607 private_data->grants[slot_index+i]
608 .u.valid.ref);
609 goto undo_map_out;
610 }
612 /* Store a reference to the page that will be mapped into user
613 * space.
614 */
615 ((struct page **) vma->vm_private_data)[i] = page;
617 /* Mark mapped page as reserved. */
618 SetPageReserved(page);
620 /* Record the grant handle, for use in the unmap operation. */
621 private_data->grants[slot_index+i].u.valid.kernel_handle =
622 op.handle;
623 private_data->grants[slot_index+i].u.valid.dev_bus_addr =
624 op.dev_bus_addr;
626 private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
627 private_data->grants[slot_index+i].u.valid.user_handle =
628 GNTDEV_INVALID_HANDLE;
630 /* Now perform the mapping to user space. */
631 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
633 /* NOT USING SHADOW PAGE TABLES. */
634 /* In this case, we map the grant(s) straight into user
635 * space.
636 */
638 /* Get the machine address of the PTE for the user
639 * page.
640 */
641 if ((ret = create_lookup_pte_addr(vma->vm_mm,
642 vma->vm_start
643 + (i << PAGE_SHIFT),
644 &ptep)))
645 {
646 printk(KERN_ERR "Error obtaining PTE pointer "
647 "(%d).\n", ret);
648 goto undo_map_out;
649 }
651 /* Configure the map operation. */
653 /* The reference is to be used by host CPUs. */
654 flags = GNTMAP_host_map;
656 /* Specifies a user space mapping. */
657 flags |= GNTMAP_application_map;
659 /* The map request contains the machine address of the
660 * PTE to update.
661 */
662 flags |= GNTMAP_contains_pte;
664 if (!(vma->vm_flags & VM_WRITE))
665 flags |= GNTMAP_readonly;
667 gnttab_set_map_op(&op, ptep, flags,
668 private_data->grants[slot_index+i]
669 .u.valid.ref,
670 private_data->grants[slot_index+i]
671 .u.valid.domid);
673 /* Carry out the mapping of the grant reference. */
674 ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
675 &op, 1);
676 BUG_ON(ret);
677 if (op.status) {
678 printk(KERN_ERR "Error mapping the grant "
679 "reference into user space (%d). domid "
680 "= %d; ref = %d\n", op.status,
681 private_data->grants[slot_index+i].u
682 .valid.domid,
683 private_data->grants[slot_index+i].u
684 .valid.ref);
685 goto undo_map_out;
686 }
688 /* Record the grant handle, for use in the unmap
689 * operation.
690 */
691 private_data->grants[slot_index+i].u.
692 valid.user_handle = op.handle;
694 /* Update p2m structure with the new mapping. */
695 set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
696 FOREIGN_FRAME(private_data->
697 grants[slot_index+i]
698 .u.valid.dev_bus_addr
699 >> PAGE_SHIFT));
700 } else {
701 /* USING SHADOW PAGE TABLES. */
702 /* In this case, we simply insert the page into the VM
703 * area. */
704 ret = vm_insert_page(vma, user_vaddr, page);
705 }
707 }
709 up_write(&private_data->grants_sem);
710 return 0;
712 undo_map_out:
713 /* If we have a mapping failure, the unmapping will be taken care of
714 * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
715 * All we need to do here is free the vma_private_data.
716 */
717 kfree(vma->vm_private_data);
719 /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
720 * to NULL on failure. However, we need this in gntdev_clear_pte() to
721 * unmap the grants. Therefore, we smuggle a reference to the file's
722 * private data in the VM area's private data pointer.
723 */
724 vma->vm_private_data = private_data;
726 up_write(&private_data->grants_sem);
728 return -ENOMEM;
729 }
731 static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
732 pte_t *ptep, int is_fullmm)
733 {
734 int slot_index, ret;
735 pte_t copy;
736 struct gnttab_unmap_grant_ref op;
737 gntdev_file_private_data_t *private_data;
739 /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
740 * to NULL on failure. However, we need this in gntdev_clear_pte() to
741 * unmap the grants. Therefore, we smuggle a reference to the file's
742 * private data in the VM area's private data pointer.
743 */
744 if (vma->vm_file) {
745 private_data = (gntdev_file_private_data_t *)
746 vma->vm_file->private_data;
747 } else if (vma->vm_private_data) {
748 private_data = (gntdev_file_private_data_t *)
749 vma->vm_private_data;
750 } else {
751 private_data = NULL; /* gcc warning */
752 BUG();
753 }
755 /* Copy the existing value of the PTE for returning. */
756 copy = *ptep;
758 /* Calculate the grant relating to this PTE. */
759 slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
761 /* Only unmap grants if the slot has been mapped. This could be being
762 * called from a failing mmap().
763 */
764 if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
766 /* First, we clear the user space mapping, if it has been made.
767 */
768 if (private_data->grants[slot_index].u.valid.user_handle !=
769 GNTDEV_INVALID_HANDLE &&
770 !xen_feature(XENFEAT_auto_translated_physmap)) {
771 /* NOT USING SHADOW PAGE TABLES. */
772 gnttab_set_unmap_op(&op, ptep_to_machine(ptep),
773 GNTMAP_contains_pte,
774 private_data->grants[slot_index]
775 .u.valid.user_handle);
776 ret = HYPERVISOR_grant_table_op(
777 GNTTABOP_unmap_grant_ref, &op, 1);
778 BUG_ON(ret);
779 if (op.status)
780 printk("User unmap grant status = %d\n",
781 op.status);
782 } else {
783 /* USING SHADOW PAGE TABLES. */
784 pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
785 }
787 /* Finally, we unmap the grant from kernel space. */
788 gnttab_set_unmap_op(&op,
789 get_kernel_vaddr(private_data, slot_index),
790 GNTMAP_host_map,
791 private_data->grants[slot_index].u.valid
792 .kernel_handle);
793 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
794 &op, 1);
795 BUG_ON(ret);
796 if (op.status)
797 printk("Kernel unmap grant status = %d\n", op.status);
800 /* Return slot to the not-yet-mapped state, so that it may be
801 * mapped again, or removed by a subsequent ioctl.
802 */
803 private_data->grants[slot_index].state =
804 GNTDEV_SLOT_NOT_YET_MAPPED;
806 /* Invalidate the physical to machine mapping for this page. */
807 set_phys_to_machine(__pa(get_kernel_vaddr(private_data,
808 slot_index))
809 >> PAGE_SHIFT, INVALID_P2M_ENTRY);
811 } else {
812 pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
813 }
815 return copy;
816 }
818 /* "Destructor" for a VM area.
819 */
820 static void gntdev_vma_close(struct vm_area_struct *vma) {
821 if (vma->vm_private_data) {
822 kfree(vma->vm_private_data);
823 }
824 }
826 /* Called when an ioctl is made on the device.
827 */
828 static long gntdev_ioctl(struct file *flip,
829 unsigned int cmd, unsigned long arg)
830 {
831 int rc = 0;
832 gntdev_file_private_data_t *private_data =
833 (gntdev_file_private_data_t *) flip->private_data;
835 /* On the first invocation, we will lazily initialise the grant array
836 * and free-list.
837 */
838 if (unlikely(!private_data->grants)
839 && likely(cmd != IOCTL_GNTDEV_SET_MAX_GRANTS)) {
840 down_write(&private_data->grants_sem);
842 if (unlikely(private_data->grants)) {
843 up_write(&private_data->grants_sem);
844 goto private_data_initialised;
845 }
847 /* Just use the default. Setting to a non-default is handled
848 * in the ioctl switch.
849 */
850 rc = init_private_data(private_data, DEFAULT_MAX_GRANTS);
852 up_write(&private_data->grants_sem);
854 if (rc) {
855 printk (KERN_ERR "Initialising gntdev private data "
856 "failed.\n");
857 return rc;
858 }
859 }
861 private_data_initialised:
862 switch (cmd) {
863 case IOCTL_GNTDEV_MAP_GRANT_REF:
864 {
865 struct ioctl_gntdev_map_grant_ref op;
866 down_write(&private_data->grants_sem);
867 down_write(&private_data->free_list_sem);
869 if ((rc = copy_from_user(&op, (void __user *) arg,
870 sizeof(op)))) {
871 rc = -EFAULT;
872 goto map_out;
873 }
874 if (unlikely(op.count <= 0)) {
875 rc = -EINVAL;
876 goto map_out;
877 }
879 if (op.count == 1) {
880 if ((rc = add_grant_reference(flip, &op.refs[0],
881 &op.index)) < 0) {
882 printk(KERN_ERR "Adding grant reference "
883 "failed (%d).\n", rc);
884 goto map_out;
885 }
886 } else {
887 struct ioctl_gntdev_grant_ref *refs, *u;
888 refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
889 if (!refs) {
890 rc = -ENOMEM;
891 goto map_out;
892 }
893 u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
894 if ((rc = copy_from_user(refs,
895 (void __user *)u,
896 sizeof(*refs) * op.count))) {
897 printk(KERN_ERR "Copying refs from user failed"
898 " (%d).\n", rc);
899 rc = -EINVAL;
900 goto map_out;
901 }
902 if ((rc = find_contiguous_free_range(flip, op.count))
903 < 0) {
904 printk(KERN_ERR "Finding contiguous range "
905 "failed (%d).\n", rc);
906 kfree(refs);
907 goto map_out;
908 }
909 op.index = rc << PAGE_SHIFT;
910 if ((rc = add_grant_references(flip, op.count,
911 refs, rc))) {
912 printk(KERN_ERR "Adding grant references "
913 "failed (%d).\n", rc);
914 kfree(refs);
915 goto map_out;
916 }
917 compress_free_list(flip);
918 kfree(refs);
919 }
920 if ((rc = copy_to_user((void __user *) arg,
921 &op,
922 sizeof(op)))) {
923 printk(KERN_ERR "Copying result back to user failed "
924 "(%d)\n", rc);
925 rc = -EFAULT;
926 goto map_out;
927 }
928 map_out:
929 up_write(&private_data->grants_sem);
930 up_write(&private_data->free_list_sem);
931 return rc;
932 }
933 case IOCTL_GNTDEV_UNMAP_GRANT_REF:
934 {
935 struct ioctl_gntdev_unmap_grant_ref op;
936 int i, start_index;
938 down_write(&private_data->grants_sem);
939 down_write(&private_data->free_list_sem);
941 if ((rc = copy_from_user(&op,
942 (void __user *) arg,
943 sizeof(op)))) {
944 rc = -EFAULT;
945 goto unmap_out;
946 }
948 start_index = op.index >> PAGE_SHIFT;
950 /* First, check that all pages are in the NOT_YET_MAPPED
951 * state.
952 */
953 for (i = 0; i < op.count; ++i) {
954 if (unlikely
955 (private_data->grants[start_index + i].state
956 != GNTDEV_SLOT_NOT_YET_MAPPED)) {
957 if (private_data->grants[start_index + i].state
958 == GNTDEV_SLOT_INVALID) {
959 printk(KERN_ERR
960 "Tried to remove an invalid "
961 "grant at offset 0x%x.",
962 (start_index + i)
963 << PAGE_SHIFT);
964 rc = -EINVAL;
965 } else {
966 printk(KERN_ERR
967 "Tried to remove a grant which "
968 "is currently mmap()-ed at "
969 "offset 0x%x.",
970 (start_index + i)
971 << PAGE_SHIFT);
972 rc = -EBUSY;
973 }
974 goto unmap_out;
975 }
976 }
978 /* Unmap pages and add them to the free list.
979 */
980 for (i = 0; i < op.count; ++i) {
981 private_data->grants[start_index+i].state =
982 GNTDEV_SLOT_INVALID;
983 private_data->grants[start_index+i].u.free_list_index =
984 private_data->free_list_size;
985 private_data->free_list[private_data->free_list_size] =
986 start_index + i;
987 ++private_data->free_list_size;
988 }
990 unmap_out:
991 up_write(&private_data->grants_sem);
992 up_write(&private_data->free_list_sem);
993 return rc;
994 }
995 case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
996 {
997 struct ioctl_gntdev_get_offset_for_vaddr op;
998 struct vm_area_struct *vma;
999 unsigned long vaddr;
1001 if ((rc = copy_from_user(&op,
1002 (void __user *) arg,
1003 sizeof(op)))) {
1004 rc = -EFAULT;
1005 goto get_offset_out;
1007 vaddr = (unsigned long)op.vaddr;
1009 down_read(&current->mm->mmap_sem);
1010 vma = find_vma(current->mm, vaddr);
1011 if (vma == NULL) {
1012 rc = -EFAULT;
1013 goto get_offset_unlock_out;
1015 if ((!vma->vm_ops) || (vma->vm_ops != &gntdev_vmops)) {
1016 printk(KERN_ERR "The vaddr specified does not belong "
1017 "to a gntdev instance: %#lx\n", vaddr);
1018 rc = -EFAULT;
1019 goto get_offset_unlock_out;
1021 if (vma->vm_start != vaddr) {
1022 printk(KERN_ERR "The vaddr specified in an "
1023 "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
1024 "the start of the VM area. vma->vm_start = "
1025 "%#lx; vaddr = %#lx\n",
1026 vma->vm_start, vaddr);
1027 rc = -EFAULT;
1028 goto get_offset_unlock_out;
1030 op.offset = vma->vm_pgoff << PAGE_SHIFT;
1031 op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
1032 up_read(&current->mm->mmap_sem);
1033 if ((rc = copy_to_user((void __user *) arg,
1034 &op,
1035 sizeof(op)))) {
1036 rc = -EFAULT;
1037 goto get_offset_out;
1039 goto get_offset_out;
1040 get_offset_unlock_out:
1041 up_read(&current->mm->mmap_sem);
1042 get_offset_out:
1043 return rc;
1045 case IOCTL_GNTDEV_SET_MAX_GRANTS:
1047 struct ioctl_gntdev_set_max_grants op;
1048 if ((rc = copy_from_user(&op,
1049 (void __user *) arg,
1050 sizeof(op)))) {
1051 rc = -EFAULT;
1052 goto set_max_out;
1054 down_write(&private_data->grants_sem);
1055 if (private_data->grants) {
1056 rc = -EBUSY;
1057 goto set_max_unlock_out;
1059 if (op.count > MAX_GRANTS_LIMIT) {
1060 rc = -EINVAL;
1061 goto set_max_unlock_out;
1063 rc = init_private_data(private_data, op.count);
1064 set_max_unlock_out:
1065 up_write(&private_data->grants_sem);
1066 set_max_out:
1067 return rc;
1069 default:
1070 return -ENOIOCTLCMD;
1073 return 0;