ia64/linux-2.6.18-xen.hg

view drivers/xen/blktap2/ring.c @ 912:dd42cdb0ab89

[IA64] Build blktap2 driver by default in x86 builds.

add CONFIG_XEN_BLKDEV_TAP2=y to buildconfigs/linux-defconfig_xen_ia64.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Isaku Yamahata <yamahata@valinux.co.jp>
date Mon Jun 29 12:09:16 2009 +0900 (2009-06-29)
parents eba6fe6d8d53
children
line source
1 #include <linux/module.h>
2 #include <linux/signal.h>
4 #include "blktap.h"
6 static int blktap_ring_major;
8 static inline struct blktap *
9 vma_to_blktap(struct vm_area_struct *vma)
10 {
11 struct vm_foreign_map *m = vma->vm_private_data;
12 struct blktap_ring *r = container_of(m, struct blktap_ring, foreign_map);
13 return container_of(r, struct blktap, ring);
14 }
16 /*
17 * BLKTAP - immediately before the mmap area,
18 * we have a bunch of pages reserved for shared memory rings.
19 */
20 #define RING_PAGES 1
22 static int
23 blktap_read_ring(struct blktap *tap)
24 {
25 /* This is called to read responses from the ring. */
26 int usr_idx;
27 RING_IDX rc, rp;
28 blkif_response_t res;
29 struct blktap_ring *ring;
30 struct blktap_request *request;
32 down_read(&tap->tap_sem);
34 ring = &tap->ring;
35 if (!ring->vma) {
36 up_read(&tap->tap_sem);
37 return 0;
38 }
40 /* for each outstanding message on the ring */
41 rp = ring->ring.sring->rsp_prod;
42 rmb();
44 for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
45 memcpy(&res, RING_GET_RESPONSE(&ring->ring, rc), sizeof(res));
46 mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
47 ++ring->ring.rsp_cons;
49 usr_idx = (int)res.id;
50 if (usr_idx >= MAX_PENDING_REQS ||
51 !tap->pending_requests[usr_idx]) {
52 BTWARN("Request %d/%d invalid [%x], tapdisk %d%p\n",
53 rc, rp, usr_idx, tap->pid, ring->vma);
54 continue;
55 }
57 request = tap->pending_requests[usr_idx];
58 BTDBG("request %p response #%d id %x\n", request, rc, usr_idx);
59 blktap_device_finish_request(tap, &res, request);
60 }
62 up_read(&tap->tap_sem);
64 blktap_run_deferred();
66 return 0;
67 }
69 static struct page *
70 blktap_ring_nopage(struct vm_area_struct *vma,
71 unsigned long address, int *type)
72 {
73 /*
74 * if the page has not been mapped in by the driver then return
75 * NOPAGE_SIGBUS to the domain.
76 */
78 return NOPAGE_SIGBUS;
79 }
81 static pte_t
82 blktap_ring_clear_pte(struct vm_area_struct *vma,
83 unsigned long uvaddr,
84 pte_t *ptep, int is_fullmm)
85 {
86 pte_t copy;
87 struct blktap *tap;
88 unsigned long kvaddr;
89 struct page **map, *page;
90 struct blktap_ring *ring;
91 struct blktap_request *request;
92 struct grant_handle_pair *khandle;
93 struct gnttab_unmap_grant_ref unmap[2];
94 int offset, seg, usr_idx, count = 0;
96 tap = vma_to_blktap(vma);
97 ring = &tap->ring;
98 map = ring->foreign_map.map;
99 BUG_ON(!map); /* TODO Should this be changed to if statement? */
101 /*
102 * Zap entry if the address is before the start of the grant
103 * mapped region.
104 */
105 if (uvaddr < ring->user_vstart)
106 return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
107 ptep, is_fullmm);
109 offset = (int)((uvaddr - ring->user_vstart) >> PAGE_SHIFT);
110 usr_idx = offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
111 seg = offset % BLKIF_MAX_SEGMENTS_PER_REQUEST;
113 offset = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT);
114 page = map[offset];
115 if (page) {
116 ClearPageReserved(page);
117 if (PageBlkback(page)) {
118 ClearPageBlkback(page);
119 set_page_private(page, 0);
120 }
121 }
122 map[offset] = NULL;
124 request = tap->pending_requests[usr_idx];
125 kvaddr = request_to_kaddr(request, seg);
126 khandle = request->handles + seg;
128 if (khandle->kernel != INVALID_GRANT_HANDLE) {
129 gnttab_set_unmap_op(&unmap[count], kvaddr,
130 GNTMAP_host_map, khandle->kernel);
131 count++;
133 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
134 INVALID_P2M_ENTRY);
135 }
138 if (khandle->user != INVALID_GRANT_HANDLE) {
139 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
141 copy = *ptep;
142 gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep),
143 GNTMAP_host_map
144 | GNTMAP_application_map
145 | GNTMAP_contains_pte,
146 khandle->user);
147 count++;
148 } else
149 copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
150 is_fullmm);
152 if (count)
153 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
154 unmap, count))
155 BUG();
157 khandle->kernel = INVALID_GRANT_HANDLE;
158 khandle->user = INVALID_GRANT_HANDLE;
160 return copy;
161 }
163 static void
164 blktap_ring_vm_unmap(struct vm_area_struct *vma)
165 {
166 struct blktap *tap = vma_to_blktap(vma);
168 down_write(&tap->tap_sem);
169 clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
170 clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
171 clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
172 up_write(&tap->tap_sem);
173 }
175 static void
176 blktap_ring_vm_close(struct vm_area_struct *vma)
177 {
178 struct blktap *tap = vma_to_blktap(vma);
179 struct blktap_ring *ring = &tap->ring;
181 blktap_ring_vm_unmap(vma); /* fail future requests */
182 blktap_device_fail_pending_requests(tap); /* fail pending requests */
183 blktap_device_restart(tap); /* fail deferred requests */
185 down_write(&tap->tap_sem);
187 zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
189 kfree(ring->foreign_map.map);
190 ring->foreign_map.map = NULL;
192 /* Free the ring page. */
193 ClearPageReserved(virt_to_page(ring->ring.sring));
194 free_page((unsigned long)ring->ring.sring);
196 BTINFO("unmapping ring %d\n", tap->minor);
197 ring->ring.sring = NULL;
198 ring->vma = NULL;
200 up_write(&tap->tap_sem);
202 wake_up(&tap->wq);
203 }
205 static struct vm_operations_struct blktap_ring_vm_operations = {
206 .close = blktap_ring_vm_close,
207 .unmap = blktap_ring_vm_unmap,
208 .nopage = blktap_ring_nopage,
209 .zap_pte = blktap_ring_clear_pte,
210 };
212 static int
213 blktap_ring_open(struct inode *inode, struct file *filp)
214 {
215 int idx;
216 struct blktap *tap;
218 idx = iminor(inode);
219 if (idx < 0 || idx > MAX_BLKTAP_DEVICE || blktaps[idx] == NULL) {
220 BTERR("unable to open device blktap%d\n", idx);
221 return -ENODEV;
222 }
224 tap = blktaps[idx];
226 BTINFO("opening device blktap%d\n", idx);
228 if (!test_bit(BLKTAP_CONTROL, &tap->dev_inuse))
229 return -ENODEV;
231 /* Only one process can access ring at a time */
232 if (test_and_set_bit(BLKTAP_RING_FD, &tap->dev_inuse))
233 return -EBUSY;
235 filp->private_data = tap;
236 BTINFO("opened device %d\n", tap->minor);
238 return 0;
239 }
241 static int
242 blktap_ring_release(struct inode *inode, struct file *filp)
243 {
244 struct blktap *tap = filp->private_data;
246 BTINFO("freeing device %d\n", tap->minor);
247 clear_bit(BLKTAP_RING_FD, &tap->dev_inuse);
248 filp->private_data = NULL;
249 wake_up(&tap->wq);
250 return 0;
251 }
253 /* Note on mmap:
254 * We need to map pages to user space in a way that will allow the block
255 * subsystem set up direct IO to them. This couldn't be done before, because
256 * there isn't really a sane way to translate a user virtual address down to a
257 * physical address when the page belongs to another domain.
258 *
259 * My first approach was to map the page in to kernel memory, add an entry
260 * for it in the physical frame list (using alloc_lomem_region as in blkback)
261 * and then attempt to map that page up to user space. This is disallowed
262 * by xen though, which realizes that we don't really own the machine frame
263 * underlying the physical page.
264 *
265 * The new approach is to provide explicit support for this in xen linux.
266 * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
267 * mapped from other vms. vma->vm_private_data is set up as a mapping
268 * from pages to actual page structs. There is a new clause in get_user_pages
269 * that does the right thing for this sort of mapping.
270 */
271 static int
272 blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
273 {
274 int size, err;
275 struct page **map;
276 struct blktap *tap;
277 blkif_sring_t *sring;
278 struct blktap_ring *ring;
280 tap = filp->private_data;
281 ring = &tap->ring;
282 map = NULL;
283 sring = NULL;
285 if (!tap || test_and_set_bit(BLKTAP_RING_VMA, &tap->dev_inuse))
286 return -ENOMEM;
288 size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
289 if (size != (MMAP_PAGES + RING_PAGES)) {
290 BTERR("you _must_ map exactly %lu pages!\n",
291 MMAP_PAGES + RING_PAGES);
292 return -EAGAIN;
293 }
295 /* Allocate the fe ring. */
296 sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
297 if (!sring) {
298 BTERR("Couldn't alloc sring.\n");
299 goto fail_mem;
300 }
302 map = kzalloc(size * sizeof(struct page *), GFP_KERNEL);
303 if (!map) {
304 BTERR("Couldn't alloc VM_FOREIGN map.\n");
305 goto fail_mem;
306 }
308 SetPageReserved(virt_to_page(sring));
310 SHARED_RING_INIT(sring);
311 FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
313 ring->ring_vstart = vma->vm_start;
314 ring->user_vstart = ring->ring_vstart + (RING_PAGES << PAGE_SHIFT);
316 /* Map the ring pages to the start of the region and reserve it. */
317 if (xen_feature(XENFEAT_auto_translated_physmap))
318 err = vm_insert_page(vma, vma->vm_start,
319 virt_to_page(ring->ring.sring));
320 else
321 err = remap_pfn_range(vma, vma->vm_start,
322 __pa(ring->ring.sring) >> PAGE_SHIFT,
323 PAGE_SIZE, vma->vm_page_prot);
324 if (err) {
325 BTERR("Mapping user ring failed: %d\n", err);
326 goto fail;
327 }
329 /* Mark this VM as containing foreign pages, and set up mappings. */
330 ring->foreign_map.map = map;
331 vma->vm_private_data = &ring->foreign_map;
332 vma->vm_flags |= VM_FOREIGN;
333 vma->vm_flags |= VM_DONTCOPY;
334 vma->vm_flags |= VM_RESERVED;
335 vma->vm_ops = &blktap_ring_vm_operations;
337 #ifdef CONFIG_X86
338 vma->vm_mm->context.has_foreign_mappings = 1;
339 #endif
341 tap->pid = current->pid;
342 BTINFO("blktap: mapping pid is %d\n", tap->pid);
344 ring->vma = vma;
345 return 0;
347 fail:
348 /* Clear any active mappings. */
349 zap_page_range(vma, vma->vm_start,
350 vma->vm_end - vma->vm_start, NULL);
351 ClearPageReserved(virt_to_page(sring));
352 fail_mem:
353 free_page((unsigned long)sring);
354 kfree(map);
356 return -ENOMEM;
357 }
359 static inline void
360 blktap_ring_set_message(struct blktap *tap, int msg)
361 {
362 struct blktap_ring *ring = &tap->ring;
364 down_read(&tap->tap_sem);
365 if (ring->ring.sring)
366 ring->ring.sring->pad[0] = msg;
367 up_read(&tap->tap_sem);
368 }
370 static int
371 blktap_ring_ioctl(struct inode *inode, struct file *filp,
372 unsigned int cmd, unsigned long arg)
373 {
374 struct blktap_params params;
375 struct blktap *tap = filp->private_data;
377 BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
379 switch(cmd) {
380 case BLKTAP2_IOCTL_KICK_FE:
381 /* There are fe messages to process. */
382 return blktap_read_ring(tap);
384 case BLKTAP2_IOCTL_CREATE_DEVICE:
385 if (!arg)
386 return -EINVAL;
388 if (copy_from_user(&params, (struct blktap_params __user *)arg,
389 sizeof(params))) {
390 BTERR("failed to get params\n");
391 return -EFAULT;
392 }
394 if (blktap_validate_params(tap, &params)) {
395 BTERR("invalid params\n");
396 return -EINVAL;
397 }
399 tap->params = params;
400 return blktap_device_create(tap);
402 case BLKTAP2_IOCTL_SET_PARAMS:
403 if (!arg)
404 return -EINVAL;
406 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
407 return -EINVAL;
409 if (copy_from_user(&params, (struct blktap_params __user *)arg,
410 sizeof(params))) {
411 BTERR("failed to get params\n");
412 return -EFAULT;
413 }
415 if (blktap_validate_params(tap, &params)) {
416 BTERR("invalid params\n");
417 return -EINVAL;
418 }
420 tap->params = params;
421 return 0;
423 case BLKTAP2_IOCTL_PAUSE:
424 if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
425 return -EINVAL;
427 set_bit(BLKTAP_PAUSED, &tap->dev_inuse);
428 clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
430 blktap_ring_set_message(tap, 0);
431 wake_up_interruptible(&tap->wq);
433 return 0;
436 case BLKTAP2_IOCTL_REOPEN:
437 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
438 return -EINVAL;
440 if (!arg)
441 return -EINVAL;
443 if (copy_to_user((char __user *)arg,
444 tap->params.name,
445 strlen(tap->params.name) + 1))
446 return -EFAULT;
448 blktap_ring_set_message(tap, 0);
449 wake_up_interruptible(&tap->wq);
451 return 0;
453 case BLKTAP2_IOCTL_RESUME:
454 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
455 return -EINVAL;
457 tap->ring.response = (int)arg;
458 if (!tap->ring.response)
459 clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
461 blktap_ring_set_message(tap, 0);
462 wake_up_interruptible(&tap->wq);
464 return 0;
465 }
467 return -ENOIOCTLCMD;
468 }
470 static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
471 {
472 struct blktap *tap = filp->private_data;
473 struct blktap_ring *ring = &tap->ring;
475 poll_wait(filp, &ring->poll_wait, wait);
476 if (ring->ring.sring->pad[0] != 0 ||
477 ring->ring.req_prod_pvt != ring->ring.sring->req_prod) {
478 RING_PUSH_REQUESTS(&ring->ring);
479 return POLLIN | POLLRDNORM;
480 }
482 return 0;
483 }
485 static struct file_operations blktap_ring_file_operations = {
486 .owner = THIS_MODULE,
487 .open = blktap_ring_open,
488 .release = blktap_ring_release,
489 .ioctl = blktap_ring_ioctl,
490 .mmap = blktap_ring_mmap,
491 .poll = blktap_ring_poll,
492 };
494 void
495 blktap_ring_kick_user(struct blktap *tap)
496 {
497 wake_up_interruptible(&tap->ring.poll_wait);
498 }
500 int
501 blktap_ring_resume(struct blktap *tap)
502 {
503 int err;
504 struct blktap_ring *ring = &tap->ring;
506 if (!blktap_active(tap))
507 return -ENODEV;
509 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
510 return -EINVAL;
512 /* set shared flag for resume */
513 ring->response = 0;
515 blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_RESUME);
516 blktap_ring_kick_user(tap);
518 wait_event_interruptible(tap->wq, ring->response ||
519 !test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
521 err = ring->response;
522 ring->response = 0;
524 BTDBG("err: %d\n", err);
526 if (err)
527 return err;
529 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
530 return -EAGAIN;
532 return 0;
533 }
535 int
536 blktap_ring_pause(struct blktap *tap)
537 {
538 if (!blktap_active(tap))
539 return -ENODEV;
541 if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
542 return -EINVAL;
544 BTDBG("draining queue\n");
545 wait_event_interruptible(tap->wq, !tap->pending_cnt);
546 if (tap->pending_cnt)
547 return -EAGAIN;
549 blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_PAUSE);
550 blktap_ring_kick_user(tap);
552 BTDBG("waiting for tapdisk response\n");
553 wait_event_interruptible(tap->wq, test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
554 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
555 return -EAGAIN;
557 return 0;
558 }
560 int
561 blktap_ring_destroy(struct blktap *tap)
562 {
563 if (!test_bit(BLKTAP_RING_FD, &tap->dev_inuse) &&
564 !test_bit(BLKTAP_RING_VMA, &tap->dev_inuse))
565 return 0;
567 BTDBG("sending tapdisk close message\n");
568 blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_CLOSE);
569 blktap_ring_kick_user(tap);
571 return -EAGAIN;
572 }
574 static void
575 blktap_ring_initialize(struct blktap_ring *ring, int minor)
576 {
577 memset(ring, 0, sizeof(*ring));
578 init_waitqueue_head(&ring->poll_wait);
579 ring->devno = MKDEV(blktap_ring_major, minor);
580 }
582 int
583 blktap_ring_create(struct blktap *tap)
584 {
585 struct blktap_ring *ring = &tap->ring;
586 blktap_ring_initialize(ring, tap->minor);
587 return blktap_sysfs_create(tap);
588 }
590 int
591 blktap_ring_init(int *major)
592 {
593 int err;
595 err = register_chrdev(0, "blktap2", &blktap_ring_file_operations);
596 if (err < 0) {
597 BTERR("error registering blktap ring device: %d\n", err);
598 return err;
599 }
601 blktap_ring_major = *major = err;
602 BTINFO("blktap ring major: %d\n", blktap_ring_major);
603 return 0;
604 }
606 int
607 blktap_ring_free(void)
608 {
609 if (blktap_ring_major)
610 unregister_chrdev(blktap_ring_major, "blktap2");
612 return 0;
613 }