ia64/linux-2.6.18-xen.hg

view drivers/xen/blktap2/device.c @ 896:f59c5daed527

blktap2: use blk_rq_map_sg() here too

Just like in blkfront, not doing so can cause the maximum number of
segments check to trigger.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 04 10:46:54 2009 +0100 (2009-06-04)
parents 4e5cd2fc45a7
children 046a6eabd4dc
line source
1 #include <linux/fs.h>
2 #include <linux/blkdev.h>
3 #include <linux/cdrom.h>
4 #include <linux/hdreg.h>
5 #include <linux/module.h>
6 #include <asm/tlbflush.h>
8 #include <scsi/scsi.h>
9 #include <scsi/scsi_ioctl.h>
11 #include <xen/xenbus.h>
12 #include <xen/interface/io/blkif.h>
14 #include "blktap.h"
16 #include "../blkback/blkback-pagemap.h"
18 #if 0
19 #define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
20 #else
21 #define DPRINTK_IOCTL(_f, _a...) ((void)0)
22 #endif
24 struct blktap_grant_table {
25 int cnt;
26 struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
27 };
29 static int blktap_device_major;
31 static inline struct blktap *
32 dev_to_blktap(struct blktap_device *dev)
33 {
34 return container_of(dev, struct blktap, device);
35 }
37 static int
38 blktap_device_open(struct inode *inode, struct file *filep)
39 {
40 struct blktap *tap;
41 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
43 if (!dev)
44 return -ENOENT;
46 tap = dev_to_blktap(dev);
47 if (!blktap_active(tap) ||
48 test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
49 return -ENOENT;
51 dev->users++;
53 return 0;
54 }
56 static int
57 blktap_device_release(struct inode *inode, struct file *filep)
58 {
59 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
60 struct blktap *tap = dev_to_blktap(dev);
62 dev->users--;
63 if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
64 blktap_device_destroy(tap);
66 return 0;
67 }
69 static int
70 blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
71 {
72 /* We don't have real geometry info, but let's at least return
73 values consistent with the size of the device */
74 sector_t nsect = get_capacity(bd->bd_disk);
75 sector_t cylinders = nsect;
77 hg->heads = 0xff;
78 hg->sectors = 0x3f;
79 sector_div(cylinders, hg->heads * hg->sectors);
80 hg->cylinders = cylinders;
81 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
82 hg->cylinders = 0xffff;
83 return 0;
84 }
86 static int
87 blktap_device_ioctl(struct inode *inode, struct file *filep,
88 unsigned command, unsigned long argument)
89 {
90 int i;
92 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
93 command, (long)argument, inode->i_rdev);
95 switch (command) {
96 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
97 case HDIO_GETGEO: {
98 struct block_device *bd = inode->i_bdev;
99 struct hd_geometry geo;
100 int ret;
102 if (!argument)
103 return -EINVAL;
105 geo.start = get_start_sect(bd);
106 ret = blktap_device_getgeo(bd, &geo);
107 if (ret)
108 return ret;
110 if (copy_to_user((struct hd_geometry __user *)argument, &geo,
111 sizeof(geo)))
112 return -EFAULT;
114 return 0;
115 }
116 #endif
117 case CDROMMULTISESSION:
118 BTDBG("FIXME: support multisession CDs later\n");
119 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
120 if (put_user(0, (char __user *)(argument + i)))
121 return -EFAULT;
122 return 0;
124 case SCSI_IOCTL_GET_IDLUN:
125 if (!access_ok(VERIFY_WRITE, argument,
126 sizeof(struct scsi_idlun)))
127 return -EFAULT;
129 /* return 0 for now. */
130 __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
131 __put_user(0,
132 &((struct scsi_idlun __user *)argument)->host_unique_id);
133 return 0;
135 default:
136 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
137 command);*/
138 return -EINVAL; /* same return as native Linux */
139 }
141 return 0;
142 }
144 static struct block_device_operations blktap_device_file_operations = {
145 .owner = THIS_MODULE,
146 .open = blktap_device_open,
147 .release = blktap_device_release,
148 .ioctl = blktap_device_ioctl,
149 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
150 .getgeo = blktap_device_getgeo
151 #endif
152 };
154 static int
155 blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page,
156 unsigned long addr, void *data)
157 {
158 pte_t *pte = (pte_t *)data;
160 BTDBG("ptep %p -> %012llx\n", ptep, (unsigned long long)pte_val(*pte));
161 set_pte(ptep, *pte);
162 return 0;
163 }
165 static int
166 blktap_map_uaddr(struct mm_struct *mm, unsigned long address, pte_t pte)
167 {
168 return apply_to_page_range(mm, address,
169 PAGE_SIZE, blktap_map_uaddr_fn, &pte);
170 }
172 static int
173 blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page,
174 unsigned long addr, void *data)
175 {
176 struct mm_struct *mm = (struct mm_struct *)data;
178 BTDBG("ptep %p\n", ptep);
179 pte_clear(mm, addr, ptep);
180 return 0;
181 }
183 static int
184 blktap_umap_uaddr(struct mm_struct *mm, unsigned long address)
185 {
186 return apply_to_page_range(mm, address,
187 PAGE_SIZE, blktap_umap_uaddr_fn, mm);
188 }
190 static inline void
191 flush_tlb_kernel_page(unsigned long kvaddr)
192 {
193 #ifdef CONFIG_X86
194 xen_invlpg_all(kvaddr);
195 #else
196 flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
197 #endif
198 }
200 static void
201 blktap_device_end_dequeued_request(struct blktap_device *dev,
202 struct request *req, int uptodate)
203 {
204 int ret;
206 ret = end_that_request_first(req, uptodate, req->hard_nr_sectors);
207 BUG_ON(ret);
209 spin_lock_irq(&dev->lock);
210 end_that_request_last(req, uptodate);
211 spin_unlock_irq(&dev->lock);
212 }
214 /*
215 * tap->tap_sem held on entry
216 */
217 static void
218 blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
219 {
220 uint64_t ptep;
221 int ret, usr_idx;
222 unsigned int i, cnt;
223 struct page **map, *page;
224 struct blktap_ring *ring;
225 struct grant_handle_pair *khandle;
226 unsigned long kvaddr, uvaddr, offset;
227 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
229 cnt = 0;
230 ring = &tap->ring;
231 usr_idx = request->usr_idx;
232 map = ring->foreign_map.map;
234 if (!ring->vma)
235 return;
237 if (xen_feature(XENFEAT_auto_translated_physmap))
238 zap_page_range(ring->vma,
239 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
240 request->nr_pages << PAGE_SHIFT, NULL);
242 for (i = 0; i < request->nr_pages; i++) {
243 kvaddr = request_to_kaddr(request, i);
244 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
246 khandle = request->handles + i;
248 if (khandle->kernel != INVALID_GRANT_HANDLE) {
249 gnttab_set_unmap_op(&unmap[cnt], kvaddr,
250 GNTMAP_host_map, khandle->kernel);
251 cnt++;
252 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
253 INVALID_P2M_ENTRY);
254 }
256 if (khandle->user != INVALID_GRANT_HANDLE) {
257 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
258 if (create_lookup_pte_addr(ring->vma->vm_mm,
259 uvaddr, &ptep) != 0) {
260 BTERR("Couldn't get a pte addr!\n");
261 return;
262 }
264 gnttab_set_unmap_op(&unmap[cnt], ptep,
265 GNTMAP_host_map
266 | GNTMAP_application_map
267 | GNTMAP_contains_pte,
268 khandle->user);
269 cnt++;
270 }
272 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
274 BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, "
275 "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: "
276 "0x%08lx, handle: %u\n", offset, map[offset], request,
277 usr_idx, i, kvaddr, khandle->kernel, uvaddr,
278 khandle->user);
280 page = map[offset];
281 if (page) {
282 ClearPageReserved(map[offset]);
283 if (PageBlkback(page)) {
284 ClearPageBlkback(page);
285 set_page_private(page, 0);
286 }
287 }
288 map[offset] = NULL;
290 khandle->kernel = INVALID_GRANT_HANDLE;
291 khandle->user = INVALID_GRANT_HANDLE;
292 }
294 if (cnt) {
295 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
296 unmap, cnt);
297 BUG_ON(ret);
298 }
300 if (!xen_feature(XENFEAT_auto_translated_physmap))
301 zap_page_range(ring->vma,
302 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
303 request->nr_pages << PAGE_SHIFT, NULL);
304 }
306 /*
307 * tap->tap_sem held on entry
308 */
309 static void
310 blktap_unmap(struct blktap *tap, struct blktap_request *request)
311 {
312 int i, usr_idx;
313 unsigned long kvaddr;
315 usr_idx = request->usr_idx;
316 down_write(&tap->ring.vma->vm_mm->mmap_sem);
318 for (i = 0; i < request->nr_pages; i++) {
319 BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, "
320 "uvaddr: 0x%08lx, uhandle: %u\n", request, i,
321 request_to_kaddr(request, i),
322 request->handles[i].kernel,
323 MMAP_VADDR(tap->ring.user_vstart, usr_idx, i),
324 request->handles[i].user);
326 if (request->handles[i].kernel == INVALID_GRANT_HANDLE) {
327 kvaddr = request_to_kaddr(request, i);
328 blktap_umap_uaddr(&init_mm, kvaddr);
329 flush_tlb_kernel_page(kvaddr);
330 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
331 INVALID_P2M_ENTRY);
332 }
333 }
335 blktap_device_fast_flush(tap, request);
336 up_write(&tap->ring.vma->vm_mm->mmap_sem);
337 }
339 /*
340 * called if the tapdisk process dies unexpectedly.
341 * fail and release any pending requests and disable queue.
342 */
343 void
344 blktap_device_fail_pending_requests(struct blktap *tap)
345 {
346 int usr_idx;
347 struct request *req;
348 struct blktap_device *dev;
349 struct blktap_request *request;
351 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
352 return;
354 down_write(&tap->tap_sem);
356 dev = &tap->device;
357 for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
358 request = tap->pending_requests[usr_idx];
359 if (!request || request->status != BLKTAP_REQUEST_PENDING)
360 continue;
362 BTERR("%u:%u: failing pending %s of %d pages\n",
363 blktap_device_major, tap->minor,
364 (request->operation == BLKIF_OP_READ ?
365 "read" : "write"), request->nr_pages);
367 blktap_unmap(tap, request);
368 req = (struct request *)(unsigned long)request->id;
369 blktap_device_end_dequeued_request(dev, req, 0);
370 blktap_request_free(tap, request);
371 }
373 up_write(&tap->tap_sem);
375 spin_lock_irq(&dev->lock);
377 /* fail any future requests */
378 dev->gd->queue->queuedata = NULL;
379 blk_start_queue(dev->gd->queue);
381 spin_unlock_irq(&dev->lock);
382 }
384 /*
385 * tap->tap_sem held on entry
386 */
387 void
388 blktap_device_finish_request(struct blktap *tap,
389 blkif_response_t *res,
390 struct blktap_request *request)
391 {
392 int uptodate;
393 struct request *req;
394 struct blktap_device *dev;
396 dev = &tap->device;
398 blktap_unmap(tap, request);
400 req = (struct request *)(unsigned long)request->id;
401 uptodate = (res->status == BLKIF_RSP_OKAY);
403 BTDBG("req %p res status %d operation %d/%d id %lld\n", req,
404 res->status, res->operation, request->operation, res->id);
406 switch (request->operation) {
407 case BLKIF_OP_READ:
408 case BLKIF_OP_WRITE:
409 if (unlikely(res->status != BLKIF_RSP_OKAY))
410 BTERR("Bad return from device data "
411 "request: %x\n", res->status);
412 blktap_device_end_dequeued_request(dev, req, uptodate);
413 break;
414 default:
415 BUG();
416 }
418 blktap_request_free(tap, request);
419 }
421 static int
422 blktap_prep_foreign(struct blktap *tap,
423 struct blktap_request *request,
424 blkif_request_t *blkif_req,
425 unsigned int seg, struct page *page,
426 struct blktap_grant_table *table)
427 {
428 uint64_t ptep;
429 uint32_t flags;
430 struct page *tap_page;
431 struct blktap_ring *ring;
432 struct blkback_pagemap map;
433 unsigned long uvaddr, kvaddr;
435 ring = &tap->ring;
436 map = blkback_pagemap_read(page);
437 blkif_req->seg[seg].gref = map.gref;
439 uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
440 kvaddr = request_to_kaddr(request, seg);
441 flags = GNTMAP_host_map |
442 (request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0);
444 gnttab_set_map_op(&table->grants[table->cnt],
445 kvaddr, flags, map.gref, map.domid);
446 table->cnt++;
448 /* enable chained tap devices */
449 tap_page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
450 set_page_private(tap_page, page_private(page));
451 SetPageBlkback(tap_page);
453 if (xen_feature(XENFEAT_auto_translated_physmap))
454 return 0;
456 if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) {
457 BTERR("couldn't get a pte addr!\n");
458 return -1;
459 }
461 flags |= GNTMAP_application_map | GNTMAP_contains_pte;
462 gnttab_set_map_op(&table->grants[table->cnt],
463 ptep, flags, map.gref, map.domid);
464 table->cnt++;
466 return 0;
467 }
469 static int
470 blktap_map_foreign(struct blktap *tap,
471 struct blktap_request *request,
472 blkif_request_t *blkif_req,
473 struct blktap_grant_table *table)
474 {
475 struct page *page;
476 int i, grant, err, usr_idx;
477 struct blktap_ring *ring;
478 unsigned long uvaddr, kvaddr, foreign_mfn;
480 if (!table->cnt)
481 return 0;
483 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
484 table->grants, table->cnt);
485 BUG_ON(err);
487 grant = 0;
488 usr_idx = request->usr_idx;
489 ring = &tap->ring;
491 for (i = 0; i < request->nr_pages; i++) {
492 if (!blkif_req->seg[i].gref)
493 continue;
495 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
496 kvaddr = request_to_kaddr(request, i);
498 if (unlikely(table->grants[grant].status)) {
499 BTERR("invalid kernel buffer: could not remap it\n");
500 err |= 1;
501 table->grants[grant].handle = INVALID_GRANT_HANDLE;
502 }
504 request->handles[i].kernel = table->grants[grant].handle;
505 foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT;
506 grant++;
508 if (xen_feature(XENFEAT_auto_translated_physmap))
509 goto done;
511 if (unlikely(table->grants[grant].status)) {
512 BTERR("invalid user buffer: could not remap it\n");
513 err |= 1;
514 table->grants[grant].handle = INVALID_GRANT_HANDLE;
515 }
517 request->handles[i].user = table->grants[grant].handle;
518 grant++;
520 done:
521 if (err)
522 continue;
524 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
526 if (!xen_feature(XENFEAT_auto_translated_physmap))
527 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
528 FOREIGN_FRAME(foreign_mfn));
529 else if (vm_insert_page(ring->vma, uvaddr, page))
530 err |= 1;
532 BTDBG("pending_req: %p, seg: %d, page: %p, "
533 "kvaddr: 0x%08lx, khandle: %u, uvaddr: 0x%08lx, "
534 "uhandle: %u\n", request, i, page,
535 kvaddr, request->handles[i].kernel,
536 uvaddr, request->handles[i].user);
537 }
539 return err;
540 }
542 static void
543 blktap_map(struct blktap *tap,
544 struct blktap_request *request,
545 unsigned int seg, struct page *page)
546 {
547 pte_t pte;
548 int usr_idx;
549 struct blktap_ring *ring;
550 unsigned long uvaddr, kvaddr;
552 ring = &tap->ring;
553 usr_idx = request->usr_idx;
554 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, seg);
555 kvaddr = request_to_kaddr(request, seg);
557 pte = mk_pte(page, ring->vma->vm_page_prot);
558 blktap_map_uaddr(ring->vma->vm_mm, uvaddr, pte_mkwrite(pte));
559 flush_tlb_page(ring->vma, uvaddr);
560 blktap_map_uaddr(&init_mm, kvaddr, mk_pte(page, PAGE_KERNEL));
561 flush_tlb_kernel_page(kvaddr);
563 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte));
564 request->handles[seg].kernel = INVALID_GRANT_HANDLE;
565 request->handles[seg].user = INVALID_GRANT_HANDLE;
567 BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, "
568 "uvaddr: 0x%08lx\n", request, seg, page, kvaddr,
569 uvaddr);
570 }
572 static int
573 blktap_device_process_request(struct blktap *tap,
574 struct blktap_request *request,
575 struct request *req)
576 {
577 struct page *page;
578 int i, usr_idx, err;
579 struct blktap_ring *ring;
580 struct scatterlist *sg;
581 struct blktap_grant_table table;
582 unsigned int fsect, lsect, nr_sects;
583 unsigned long offset, uvaddr, kvaddr;
584 struct blkif_request blkif_req, *target;
586 err = -1;
587 memset(&table, 0, sizeof(table));
589 if (!blktap_active(tap))
590 goto out;
592 ring = &tap->ring;
593 usr_idx = request->usr_idx;
594 blkif_req.id = usr_idx;
595 blkif_req.sector_number = (blkif_sector_t)req->sector;
596 blkif_req.handle = 0;
597 blkif_req.operation = rq_data_dir(req) ?
598 BLKIF_OP_WRITE : BLKIF_OP_READ;
600 request->id = (unsigned long)req;
601 request->operation = blkif_req.operation;
602 request->status = BLKTAP_REQUEST_PENDING;
603 do_gettimeofday(&request->time);
605 nr_sects = 0;
606 request->nr_pages = 0;
607 blkif_req.nr_segments = blk_rq_map_sg(req->q, req, tap->sg);
608 BUG_ON(blkif_req.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
609 for (i = 0; i < blkif_req.nr_segments; ++i) {
610 sg = tap->sg + i;
611 fsect = sg->offset >> 9;
612 lsect = fsect + (sg->length >> 9) - 1;
613 nr_sects += sg->length >> 9;
615 blkif_req.seg[i] =
616 (struct blkif_request_segment) {
617 .gref = 0,
618 .first_sect = fsect,
619 .last_sect = lsect };
621 if (PageBlkback(sg->page)) {
622 /* foreign page -- use xen */
623 if (blktap_prep_foreign(tap,
624 request,
625 &blkif_req,
626 i,
627 sg->page,
628 &table))
629 goto out;
630 } else {
631 /* do it the old fashioned way */
632 blktap_map(tap,
633 request,
634 i,
635 sg->page);
636 }
638 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
639 kvaddr = request_to_kaddr(request, i);
640 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
641 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
642 ring->foreign_map.map[offset] = page;
643 SetPageReserved(page);
645 BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n",
646 uvaddr, page, __pa(kvaddr) >> PAGE_SHIFT);
647 BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, "
648 "page: %p, kvaddr: 0x%08lx, uvaddr: 0x%08lx\n",
649 offset, request, i,
650 page, kvaddr, uvaddr);
652 request->nr_pages++;
653 }
655 if (blktap_map_foreign(tap, request, &blkif_req, &table))
656 goto out;
658 /* Finally, write the request message to the user ring. */
659 target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
660 memcpy(target, &blkif_req, sizeof(blkif_req));
661 target->id = request->usr_idx;
662 wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
663 ring->ring.req_prod_pvt++;
665 if (rq_data_dir(req)) {
666 tap->stats.st_wr_sect += nr_sects;
667 tap->stats.st_wr_req++;
668 } else {
669 tap->stats.st_rd_sect += nr_sects;
670 tap->stats.st_rd_req++;
671 }
673 err = 0;
675 out:
676 if (err)
677 blktap_device_fast_flush(tap, request);
678 return err;
679 }
681 #ifdef ENABLE_PASSTHROUGH
682 #define rq_for_each_bio_safe(_bio, _tmp, _req) \
683 if ((_req)->bio) \
684 for (_bio = (_req)->bio; \
685 _bio && ((_tmp = _bio->bi_next) || 1); \
686 _bio = _tmp)
688 static void
689 blktap_device_forward_request(struct blktap *tap, struct request *req)
690 {
691 struct bio *bio, *tmp;
692 struct blktap_device *dev;
694 dev = &tap->device;
696 rq_for_each_bio_safe(bio, tmp, req) {
697 bio->bi_bdev = dev->bdev;
698 submit_bio(bio->bi_rw, bio);
699 }
700 }
702 static void
703 blktap_device_close_bdev(struct blktap *tap)
704 {
705 struct blktap_device *dev;
707 dev = &tap->device;
709 if (dev->bdev)
710 blkdev_put(dev->bdev);
712 dev->bdev = NULL;
713 clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
714 }
716 static int
717 blktap_device_open_bdev(struct blktap *tap, u32 pdev)
718 {
719 struct block_device *bdev;
720 struct blktap_device *dev;
722 dev = &tap->device;
724 bdev = open_by_devnum(pdev, FMODE_WRITE);
725 if (IS_ERR(bdev)) {
726 BTERR("opening device %x:%x failed: %ld\n",
727 MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
728 return PTR_ERR(bdev);
729 }
731 if (!bdev->bd_disk) {
732 BTERR("device %x:%x doesn't exist\n",
733 MAJOR(pdev), MINOR(pdev));
734 blkdev_put(dev->bdev);
735 return -ENOENT;
736 }
738 dev->bdev = bdev;
739 set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
741 /* TODO: readjust queue parameters */
743 BTINFO("set device %d to passthrough on %x:%x\n",
744 tap->minor, MAJOR(pdev), MINOR(pdev));
746 return 0;
747 }
749 int
750 blktap_device_enable_passthrough(struct blktap *tap,
751 unsigned major, unsigned minor)
752 {
753 u32 pdev;
754 struct blktap_device *dev;
756 dev = &tap->device;
757 pdev = MKDEV(major, minor);
759 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
760 return -EINVAL;
762 if (dev->bdev) {
763 if (pdev)
764 return -EINVAL;
765 blktap_device_close_bdev(tap);
766 return 0;
767 }
769 return blktap_device_open_bdev(tap, pdev);
770 }
771 #endif
773 /*
774 * dev->lock held on entry
775 */
776 static void
777 blktap_device_run_queue(struct blktap *tap)
778 {
779 int queued, err;
780 request_queue_t *rq;
781 struct request *req;
782 struct blktap_ring *ring;
783 struct blktap_device *dev;
784 struct blktap_request *request;
786 queued = 0;
787 ring = &tap->ring;
788 dev = &tap->device;
789 rq = dev->gd->queue;
791 BTDBG("running queue for %d\n", tap->minor);
793 while ((req = elv_next_request(rq)) != NULL) {
794 if (!blk_fs_request(req)) {
795 end_request(req, 0);
796 continue;
797 }
799 if (blk_barrier_rq(req)) {
800 end_request(req, 0);
801 continue;
802 }
804 #ifdef ENABLE_PASSTHROUGH
805 if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
806 blkdev_dequeue_request(req);
807 blktap_device_forward_request(tap, req);
808 continue;
809 }
810 #endif
812 if (RING_FULL(&ring->ring)) {
813 wait:
814 /* Avoid pointless unplugs. */
815 blk_stop_queue(rq);
816 blktap_defer(tap);
817 break;
818 }
820 request = blktap_request_allocate(tap);
821 if (!request) {
822 tap->stats.st_oo_req++;
823 goto wait;
824 }
826 BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
827 "buffer:%p [%s], pending: %p\n", req, tap->minor,
828 req->cmd, req->sector, req->current_nr_sectors,
829 req->nr_sectors, req->buffer,
830 rq_data_dir(req) ? "write" : "read", request);
832 blkdev_dequeue_request(req);
834 spin_unlock_irq(&dev->lock);
835 down_read(&tap->tap_sem);
837 err = blktap_device_process_request(tap, request, req);
838 if (!err)
839 queued++;
840 else {
841 blktap_device_end_dequeued_request(dev, req, 0);
842 blktap_request_free(tap, request);
843 }
845 up_read(&tap->tap_sem);
846 spin_lock_irq(&dev->lock);
847 }
849 if (queued)
850 blktap_ring_kick_user(tap);
851 }
853 /*
854 * dev->lock held on entry
855 */
856 static void
857 blktap_device_do_request(request_queue_t *rq)
858 {
859 struct request *req;
860 struct blktap *tap;
861 struct blktap_device *dev;
863 dev = rq->queuedata;
864 if (!dev)
865 goto fail;
867 tap = dev_to_blktap(dev);
868 if (!blktap_active(tap))
869 goto fail;
871 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
872 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
873 blktap_defer(tap);
874 return;
875 }
877 blktap_device_run_queue(tap);
878 return;
880 fail:
881 while ((req = elv_next_request(rq))) {
882 BTERR("device closed: failing secs %llu - %llu\n",
883 req->sector, req->sector + req->nr_sectors);
884 end_request(req, 0);
885 }
886 }
888 void
889 blktap_device_restart(struct blktap *tap)
890 {
891 struct blktap_device *dev;
893 dev = &tap->device;
894 if (!dev->gd || !dev->gd->queue)
895 return;
897 if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) {
898 blktap_defer(tap);
899 return;
900 }
902 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
903 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
904 blktap_defer(tap);
905 return;
906 }
908 spin_lock_irq(&dev->lock);
910 /* Re-enable calldowns. */
911 if (blk_queue_stopped(dev->gd->queue))
912 blk_start_queue(dev->gd->queue);
914 /* Kick things off immediately. */
915 blktap_device_do_request(dev->gd->queue);
917 spin_unlock_irq(&dev->lock);
918 }
920 static void
921 blktap_device_configure(struct blktap *tap)
922 {
923 struct request_queue *rq;
924 struct blktap_device *dev = &tap->device;
926 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !dev->gd)
927 return;
929 dev = &tap->device;
930 rq = dev->gd->queue;
932 spin_lock_irq(&dev->lock);
934 set_capacity(dev->gd, tap->params.capacity);
936 /* Hard sector size and max sectors impersonate the equiv. hardware. */
937 blk_queue_hardsect_size(rq, tap->params.sector_size);
938 blk_queue_max_sectors(rq, 512);
940 /* Each segment in a request is up to an aligned page in size. */
941 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
942 blk_queue_max_segment_size(rq, PAGE_SIZE);
944 /* Ensure a merged request will fit in a single I/O ring slot. */
945 blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
946 blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
948 /* Make sure buffer addresses are sector-aligned. */
949 blk_queue_dma_alignment(rq, 511);
951 spin_unlock_irq(&dev->lock);
952 }
954 int
955 blktap_device_resume(struct blktap *tap)
956 {
957 int err;
959 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
960 return -ENODEV;
962 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
963 return 0;
965 err = blktap_ring_resume(tap);
966 if (err)
967 return err;
969 /* device size may have changed */
970 blktap_device_configure(tap);
972 BTDBG("restarting device\n");
973 blktap_device_restart(tap);
975 return 0;
976 }
978 int
979 blktap_device_pause(struct blktap *tap)
980 {
981 unsigned long flags;
982 struct blktap_device *dev = &tap->device;
984 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
985 return -ENODEV;
987 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
988 return 0;
990 spin_lock_irqsave(&dev->lock, flags);
992 blk_stop_queue(dev->gd->queue);
993 set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
995 spin_unlock_irqrestore(&dev->lock, flags);
997 return blktap_ring_pause(tap);
998 }
1000 int
1001 blktap_device_destroy(struct blktap *tap)
1003 struct blktap_device *dev = &tap->device;
1005 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
1006 return 0;
1008 BTINFO("destroy device %d users %d\n", tap->minor, dev->users);
1010 if (dev->users)
1011 return -EBUSY;
1013 spin_lock_irq(&dev->lock);
1014 /* No more blktap_device_do_request(). */
1015 blk_stop_queue(dev->gd->queue);
1016 clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
1017 spin_unlock_irq(&dev->lock);
1019 #ifdef ENABLE_PASSTHROUGH
1020 if (dev->bdev)
1021 blktap_device_close_bdev(tap);
1022 #endif
1024 del_gendisk(dev->gd);
1025 put_disk(dev->gd);
1026 blk_cleanup_queue(dev->gd->queue);
1028 dev->gd = NULL;
1030 wake_up(&tap->wq);
1032 return 0;
1035 int
1036 blktap_device_create(struct blktap *tap)
1038 int minor, err;
1039 struct gendisk *gd;
1040 struct request_queue *rq;
1041 struct blktap_device *dev;
1043 gd = NULL;
1044 rq = NULL;
1045 dev = &tap->device;
1046 minor = tap->minor;
1048 if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
1049 return -EEXIST;
1051 if (blktap_validate_params(tap, &tap->params))
1052 return -EINVAL;
1054 BTINFO("minor %d sectors %Lu sector-size %lu\n",
1055 minor, tap->params.capacity, tap->params.sector_size);
1057 err = -ENODEV;
1059 gd = alloc_disk(1);
1060 if (!gd)
1061 goto error;
1063 if (minor < 26)
1064 sprintf(gd->disk_name, "tapdev%c", 'a' + minor);
1065 else
1066 sprintf(gd->disk_name, "tapdev%c%c",
1067 'a' + ((minor / 26) - 1), 'a' + (minor % 26));
1069 gd->major = blktap_device_major;
1070 gd->first_minor = minor;
1071 gd->fops = &blktap_device_file_operations;
1072 gd->private_data = dev;
1074 spin_lock_init(&dev->lock);
1075 rq = blk_init_queue(blktap_device_do_request, &dev->lock);
1076 if (!rq)
1077 goto error;
1079 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
1080 elevator_init(rq, "noop");
1081 #else
1082 elevator_init(rq, &elevator_noop);
1083 #endif
1085 gd->queue = rq;
1086 rq->queuedata = dev;
1087 dev->gd = gd;
1089 set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
1090 blktap_device_configure(tap);
1092 add_disk(gd);
1094 err = 0;
1095 goto out;
1097 error:
1098 if (gd)
1099 del_gendisk(gd);
1100 if (rq)
1101 blk_cleanup_queue(rq);
1103 out:
1104 BTINFO("creation of %u:%u: %d\n", blktap_device_major, tap->minor, err);
1105 return err;
1108 int
1109 blktap_device_init(int *maj)
1111 int major;
1113 /* Dynamically allocate a major for this device */
1114 major = register_blkdev(0, "tapdev");
1115 if (major < 0) {
1116 BTERR("Couldn't register blktap device\n");
1117 return -ENOMEM;
1120 blktap_device_major = *maj = major;
1121 BTINFO("blktap device major %d\n", major);
1123 return 0;
1126 void
1127 blktap_device_free(void)
1129 if (blktap_device_major)
1130 if (unregister_blkdev(blktap_device_major, "tapdev"))
1131 BTERR("blktap device unregister failed\n");