ia64/linux-2.6.18-xen.hg

view drivers/xen/blktap2/device.c @ 892:485fe5efa4ff

linux/blktap2: allow to build as module

... and also allow to interact with blkback when that's also built as
a module.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 04 10:32:34 2009 +0100 (2009-06-04)
parents 85a4c18ad9aa
children f994bfe9b93b
line source
1 #include <linux/fs.h>
2 #include <linux/blkdev.h>
3 #include <linux/cdrom.h>
4 #include <linux/hdreg.h>
5 #include <linux/module.h>
6 #include <asm/tlbflush.h>
8 #include <scsi/scsi.h>
9 #include <scsi/scsi_ioctl.h>
11 #include <xen/xenbus.h>
12 #include <xen/interface/io/blkif.h>
14 #include "blktap.h"
16 #if defined(CONFIG_XEN_BLKDEV_BACKEND) || \
17 (defined(CONFIG_XEN_BLKDEV_BACKEND_MODULE) && defined(MODULE))
18 #include "../blkback/blkback-pagemap.h"
19 #else
20 struct blkback_pagemap { };
21 #define blkback_pagemap_read(page) BUG();
22 #endif
24 #if 0
25 #define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
26 #else
27 #define DPRINTK_IOCTL(_f, _a...) ((void)0)
28 #endif
30 struct blktap_grant_table {
31 int cnt;
32 struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
33 };
35 static int blktap_device_major;
37 static inline struct blktap *
38 dev_to_blktap(struct blktap_device *dev)
39 {
40 return container_of(dev, struct blktap, device);
41 }
43 static int
44 blktap_device_open(struct inode *inode, struct file *filep)
45 {
46 struct blktap *tap;
47 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
49 if (!dev)
50 return -ENOENT;
52 tap = dev_to_blktap(dev);
53 if (!blktap_active(tap) ||
54 test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
55 return -ENOENT;
57 dev->users++;
59 return 0;
60 }
62 static int
63 blktap_device_release(struct inode *inode, struct file *filep)
64 {
65 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
66 struct blktap *tap = dev_to_blktap(dev);
68 dev->users--;
69 if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
70 blktap_device_destroy(tap);
72 return 0;
73 }
75 static int
76 blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
77 {
78 /* We don't have real geometry info, but let's at least return
79 values consistent with the size of the device */
80 sector_t nsect = get_capacity(bd->bd_disk);
81 sector_t cylinders = nsect;
83 hg->heads = 0xff;
84 hg->sectors = 0x3f;
85 sector_div(cylinders, hg->heads * hg->sectors);
86 hg->cylinders = cylinders;
87 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
88 hg->cylinders = 0xffff;
89 return 0;
90 }
92 static int
93 blktap_device_ioctl(struct inode *inode, struct file *filep,
94 unsigned command, unsigned long argument)
95 {
96 int i;
98 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
99 command, (long)argument, inode->i_rdev);
101 switch (command) {
102 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
103 case HDIO_GETGEO: {
104 struct block_device *bd = inode->i_bdev;
105 struct hd_geometry geo;
106 int ret;
108 if (!argument)
109 return -EINVAL;
111 geo.start = get_start_sect(bd);
112 ret = blktap_device_getgeo(bd, &geo);
113 if (ret)
114 return ret;
116 if (copy_to_user((struct hd_geometry __user *)argument, &geo,
117 sizeof(geo)))
118 return -EFAULT;
120 return 0;
121 }
122 #endif
123 case CDROMMULTISESSION:
124 BTDBG("FIXME: support multisession CDs later\n");
125 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
126 if (put_user(0, (char __user *)(argument + i)))
127 return -EFAULT;
128 return 0;
130 case SCSI_IOCTL_GET_IDLUN:
131 if (!access_ok(VERIFY_WRITE, argument,
132 sizeof(struct scsi_idlun)))
133 return -EFAULT;
135 /* return 0 for now. */
136 __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
137 __put_user(0,
138 &((struct scsi_idlun __user *)argument)->host_unique_id);
139 return 0;
141 default:
142 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
143 command);*/
144 return -EINVAL; /* same return as native Linux */
145 }
147 return 0;
148 }
150 static struct block_device_operations blktap_device_file_operations = {
151 .owner = THIS_MODULE,
152 .open = blktap_device_open,
153 .release = blktap_device_release,
154 .ioctl = blktap_device_ioctl,
155 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
156 .getgeo = blktap_device_getgeo
157 #endif
158 };
160 static int
161 blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page,
162 unsigned long addr, void *data)
163 {
164 pte_t *pte = (pte_t *)data;
166 BTDBG("ptep %p -> %012llx\n", ptep, (unsigned long long)pte_val(*pte));
167 set_pte(ptep, *pte);
168 return 0;
169 }
171 static int
172 blktap_map_uaddr(struct mm_struct *mm, unsigned long address, pte_t pte)
173 {
174 return apply_to_page_range(mm, address,
175 PAGE_SIZE, blktap_map_uaddr_fn, &pte);
176 }
178 static int
179 blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page,
180 unsigned long addr, void *data)
181 {
182 struct mm_struct *mm = (struct mm_struct *)data;
184 BTDBG("ptep %p\n", ptep);
185 pte_clear(mm, addr, ptep);
186 return 0;
187 }
189 static int
190 blktap_umap_uaddr(struct mm_struct *mm, unsigned long address)
191 {
192 return apply_to_page_range(mm, address,
193 PAGE_SIZE, blktap_umap_uaddr_fn, mm);
194 }
196 static void
197 blktap_device_end_dequeued_request(struct blktap_device *dev,
198 struct request *req, int uptodate)
199 {
200 int ret;
202 ret = end_that_request_first(req, uptodate, req->hard_nr_sectors);
203 BUG_ON(ret);
205 spin_lock_irq(&dev->lock);
206 end_that_request_last(req, uptodate);
207 spin_unlock_irq(&dev->lock);
208 }
210 /*
211 * tap->tap_sem held on entry
212 */
213 static void
214 blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
215 {
216 uint64_t ptep;
217 int ret, usr_idx;
218 unsigned int i, cnt;
219 struct page **map, *page;
220 struct blktap_ring *ring;
221 struct grant_handle_pair *khandle;
222 unsigned long kvaddr, uvaddr, offset;
223 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
225 cnt = 0;
226 ring = &tap->ring;
227 usr_idx = request->usr_idx;
228 map = ring->foreign_map.map;
230 if (!ring->vma)
231 return;
233 if (xen_feature(XENFEAT_auto_translated_physmap))
234 zap_page_range(ring->vma,
235 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
236 request->nr_pages << PAGE_SHIFT, NULL);
238 for (i = 0; i < request->nr_pages; i++) {
239 kvaddr = request_to_kaddr(request, i);
240 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
242 khandle = request->handles + i;
244 if (khandle->kernel != INVALID_GRANT_HANDLE) {
245 gnttab_set_unmap_op(&unmap[cnt], kvaddr,
246 GNTMAP_host_map, khandle->kernel);
247 cnt++;
248 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
249 INVALID_P2M_ENTRY);
250 }
252 if (khandle->user != INVALID_GRANT_HANDLE) {
253 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
254 if (create_lookup_pte_addr(ring->vma->vm_mm,
255 uvaddr, &ptep) != 0) {
256 BTERR("Couldn't get a pte addr!\n");
257 return;
258 }
260 gnttab_set_unmap_op(&unmap[cnt], ptep,
261 GNTMAP_host_map
262 | GNTMAP_application_map
263 | GNTMAP_contains_pte,
264 khandle->user);
265 cnt++;
266 }
268 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
270 BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, "
271 "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: "
272 "0x%08lx, handle: %u\n", offset, map[offset], request,
273 usr_idx, i, kvaddr, khandle->kernel, uvaddr,
274 khandle->user);
276 page = map[offset];
277 if (page) {
278 ClearPageReserved(map[offset]);
279 if (PageBlkback(page)) {
280 ClearPageBlkback(page);
281 set_page_private(page, 0);
282 }
283 }
284 map[offset] = NULL;
286 khandle->kernel = INVALID_GRANT_HANDLE;
287 khandle->user = INVALID_GRANT_HANDLE;
288 }
290 if (cnt) {
291 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
292 unmap, cnt);
293 BUG_ON(ret);
294 }
296 if (!xen_feature(XENFEAT_auto_translated_physmap))
297 zap_page_range(ring->vma,
298 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
299 request->nr_pages << PAGE_SHIFT, NULL);
300 }
302 /*
303 * tap->tap_sem held on entry
304 */
305 static void
306 blktap_unmap(struct blktap *tap, struct blktap_request *request)
307 {
308 int i, usr_idx;
309 unsigned long kvaddr;
311 usr_idx = request->usr_idx;
312 down_write(&tap->ring.vma->vm_mm->mmap_sem);
314 for (i = 0; i < request->nr_pages; i++) {
315 BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, "
316 "uvaddr: 0x%08lx, uhandle: %u\n", request, i,
317 request_to_kaddr(request, i),
318 request->handles[i].kernel,
319 MMAP_VADDR(tap->ring.user_vstart, usr_idx, i),
320 request->handles[i].user);
322 if (request->handles[i].kernel == INVALID_GRANT_HANDLE) {
323 kvaddr = request_to_kaddr(request, i);
324 blktap_umap_uaddr(&init_mm, kvaddr);
325 flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
326 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
327 INVALID_P2M_ENTRY);
328 }
329 }
331 blktap_device_fast_flush(tap, request);
332 up_write(&tap->ring.vma->vm_mm->mmap_sem);
333 }
335 /*
336 * called if the tapdisk process dies unexpectedly.
337 * fail and release any pending requests and disable queue.
338 */
339 void
340 blktap_device_fail_pending_requests(struct blktap *tap)
341 {
342 int usr_idx;
343 struct request *req;
344 struct blktap_device *dev;
345 struct blktap_request *request;
347 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
348 return;
350 down_write(&tap->tap_sem);
352 dev = &tap->device;
353 for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
354 request = tap->pending_requests[usr_idx];
355 if (!request || request->status != BLKTAP_REQUEST_PENDING)
356 continue;
358 BTERR("%u:%u: failing pending %s of %d pages\n",
359 blktap_device_major, tap->minor,
360 (request->operation == BLKIF_OP_READ ?
361 "read" : "write"), request->nr_pages);
363 blktap_unmap(tap, request);
364 req = (struct request *)(unsigned long)request->id;
365 blktap_device_end_dequeued_request(dev, req, 0);
366 blktap_request_free(tap, request);
367 }
369 up_write(&tap->tap_sem);
371 spin_lock_irq(&dev->lock);
373 /* fail any future requests */
374 dev->gd->queue->queuedata = NULL;
375 blk_start_queue(dev->gd->queue);
377 spin_unlock_irq(&dev->lock);
378 }
380 /*
381 * tap->tap_sem held on entry
382 */
383 void
384 blktap_device_finish_request(struct blktap *tap,
385 blkif_response_t *res,
386 struct blktap_request *request)
387 {
388 int uptodate;
389 struct request *req;
390 struct blktap_device *dev;
392 dev = &tap->device;
394 blktap_unmap(tap, request);
396 req = (struct request *)(unsigned long)request->id;
397 uptodate = (res->status == BLKIF_RSP_OKAY);
399 BTDBG("req %p res status %d operation %d/%d id %lld\n", req,
400 res->status, res->operation, request->operation, res->id);
402 switch (request->operation) {
403 case BLKIF_OP_READ:
404 case BLKIF_OP_WRITE:
405 if (unlikely(res->status != BLKIF_RSP_OKAY))
406 BTERR("Bad return from device data "
407 "request: %x\n", res->status);
408 blktap_device_end_dequeued_request(dev, req, uptodate);
409 break;
410 default:
411 BUG();
412 }
414 blktap_request_free(tap, request);
415 }
417 static int
418 blktap_prep_foreign(struct blktap *tap,
419 struct blktap_request *request,
420 blkif_request_t *blkif_req,
421 unsigned int seg, struct page *page,
422 struct blktap_grant_table *table)
423 {
424 uint64_t ptep;
425 uint32_t flags;
426 struct page *tap_page;
427 struct blktap_ring *ring;
428 struct blkback_pagemap map;
429 unsigned long uvaddr, kvaddr;
431 ring = &tap->ring;
432 map = blkback_pagemap_read(page);
433 blkif_req->seg[seg].gref = map.gref;
435 uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
436 kvaddr = request_to_kaddr(request, seg);
437 flags = GNTMAP_host_map |
438 (request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0);
440 gnttab_set_map_op(&table->grants[table->cnt],
441 kvaddr, flags, map.gref, map.domid);
442 table->cnt++;
444 /* enable chained tap devices */
445 tap_page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
446 set_page_private(tap_page, page_private(page));
447 SetPageBlkback(tap_page);
449 if (xen_feature(XENFEAT_auto_translated_physmap))
450 return 0;
452 if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) {
453 BTERR("couldn't get a pte addr!\n");
454 return -1;
455 }
457 flags |= GNTMAP_application_map | GNTMAP_contains_pte;
458 gnttab_set_map_op(&table->grants[table->cnt],
459 ptep, flags, map.gref, map.domid);
460 table->cnt++;
462 return 0;
463 }
465 static int
466 blktap_map_foreign(struct blktap *tap,
467 struct blktap_request *request,
468 blkif_request_t *blkif_req,
469 struct blktap_grant_table *table)
470 {
471 struct page *page;
472 int i, grant, err, usr_idx;
473 struct blktap_ring *ring;
474 unsigned long uvaddr, kvaddr, foreign_mfn;
476 if (!table->cnt)
477 return 0;
479 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
480 table->grants, table->cnt);
481 BUG_ON(err);
483 grant = 0;
484 usr_idx = request->usr_idx;
485 ring = &tap->ring;
487 for (i = 0; i < request->nr_pages; i++) {
488 if (!blkif_req->seg[i].gref)
489 continue;
491 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
492 kvaddr = request_to_kaddr(request, i);
494 if (unlikely(table->grants[grant].status)) {
495 BTERR("invalid kernel buffer: could not remap it\n");
496 err |= 1;
497 table->grants[grant].handle = INVALID_GRANT_HANDLE;
498 }
500 request->handles[i].kernel = table->grants[grant].handle;
501 foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT;
502 grant++;
504 if (xen_feature(XENFEAT_auto_translated_physmap))
505 goto done;
507 if (unlikely(table->grants[grant].status)) {
508 BTERR("invalid user buffer: could not remap it\n");
509 err |= 1;
510 table->grants[grant].handle = INVALID_GRANT_HANDLE;
511 }
513 request->handles[i].user = table->grants[grant].handle;
514 grant++;
516 done:
517 if (err)
518 continue;
520 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
522 if (!xen_feature(XENFEAT_auto_translated_physmap))
523 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
524 FOREIGN_FRAME(foreign_mfn));
525 else if (vm_insert_page(ring->vma, uvaddr, page))
526 err |= 1;
528 BTDBG("pending_req: %p, seg: %d, page: %p, "
529 "kvaddr: 0x%08lx, khandle: %u, uvaddr: 0x%08lx, "
530 "uhandle: %u\n", request, i, page,
531 kvaddr, request->handles[i].kernel,
532 uvaddr, request->handles[i].user);
533 }
535 return err;
536 }
538 static void
539 blktap_map(struct blktap *tap,
540 struct blktap_request *request,
541 unsigned int seg, struct page *page)
542 {
543 pte_t pte;
544 int usr_idx;
545 struct blktap_ring *ring;
546 unsigned long uvaddr, kvaddr;
548 ring = &tap->ring;
549 usr_idx = request->usr_idx;
550 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, seg);
551 kvaddr = request_to_kaddr(request, seg);
553 pte = mk_pte(page, ring->vma->vm_page_prot);
554 blktap_map_uaddr(ring->vma->vm_mm, uvaddr, pte_mkwrite(pte));
555 flush_tlb_mm(ring->vma->vm_mm);
556 blktap_map_uaddr(&init_mm, kvaddr, mk_pte(page, PAGE_KERNEL));
557 flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
559 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte));
560 request->handles[seg].kernel = INVALID_GRANT_HANDLE;
561 request->handles[seg].user = INVALID_GRANT_HANDLE;
563 BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, "
564 "uvaddr: 0x%08lx\n", request, seg, page, kvaddr,
565 uvaddr);
566 }
568 static int
569 blktap_device_process_request(struct blktap *tap,
570 struct blktap_request *request,
571 struct request *req)
572 {
573 struct bio *bio;
574 struct page *page;
575 struct bio_vec *bvec;
576 int idx, usr_idx, err;
577 struct blktap_ring *ring;
578 struct blktap_grant_table table;
579 unsigned int fsect, lsect, nr_sects;
580 unsigned long offset, uvaddr, kvaddr;
581 struct blkif_request blkif_req, *target;
583 err = -1;
584 memset(&table, 0, sizeof(table));
586 if (!blktap_active(tap))
587 goto out;
589 ring = &tap->ring;
590 usr_idx = request->usr_idx;
591 blkif_req.id = usr_idx;
592 blkif_req.sector_number = (blkif_sector_t)req->sector;
593 blkif_req.handle = 0;
594 blkif_req.operation = rq_data_dir(req) ?
595 BLKIF_OP_WRITE : BLKIF_OP_READ;
597 request->id = (unsigned long)req;
598 request->operation = blkif_req.operation;
599 request->status = BLKTAP_REQUEST_PENDING;
600 do_gettimeofday(&request->time);
602 nr_sects = 0;
603 request->nr_pages = 0;
604 blkif_req.nr_segments = 0;
605 rq_for_each_bio(bio, req) {
606 bio_for_each_segment(bvec, bio, idx) {
607 BUG_ON(blkif_req.nr_segments ==
608 BLKIF_MAX_SEGMENTS_PER_REQUEST);
610 fsect = bvec->bv_offset >> 9;
611 lsect = fsect + (bvec->bv_len >> 9) - 1;
612 nr_sects += bvec->bv_len >> 9;
614 blkif_req.seg[blkif_req.nr_segments] =
615 (struct blkif_request_segment) {
616 .gref = 0,
617 .first_sect = fsect,
618 .last_sect = lsect };
620 if (PageBlkback(bvec->bv_page)) {
621 /* foreign page -- use xen */
622 if (blktap_prep_foreign(tap,
623 request,
624 &blkif_req,
625 blkif_req.nr_segments,
626 bvec->bv_page,
627 &table))
628 goto out;
629 } else {
630 /* do it the old fashioned way */
631 blktap_map(tap,
632 request,
633 blkif_req.nr_segments,
634 bvec->bv_page);
635 }
637 uvaddr = MMAP_VADDR(ring->user_vstart,
638 usr_idx, blkif_req.nr_segments);
639 kvaddr = request_to_kaddr(request,
640 blkif_req.nr_segments);
641 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
642 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
643 ring->foreign_map.map[offset] = page;
644 SetPageReserved(page);
646 BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n",
647 uvaddr, page, __pa(kvaddr) >> PAGE_SHIFT);
648 BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, "
649 "page: %p, kvaddr: 0x%08lx, uvaddr: 0x%08lx\n",
650 offset, request, blkif_req.nr_segments,
651 page, kvaddr, uvaddr);
653 blkif_req.nr_segments++;
654 request->nr_pages++;
655 }
656 }
658 if (blktap_map_foreign(tap, request, &blkif_req, &table))
659 goto out;
661 /* Finally, write the request message to the user ring. */
662 target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
663 memcpy(target, &blkif_req, sizeof(blkif_req));
664 target->id = request->usr_idx;
665 wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
666 ring->ring.req_prod_pvt++;
668 if (rq_data_dir(req)) {
669 tap->stats.st_wr_sect += nr_sects;
670 tap->stats.st_wr_req++;
671 } else {
672 tap->stats.st_rd_sect += nr_sects;
673 tap->stats.st_rd_req++;
674 }
676 err = 0;
678 out:
679 if (err)
680 blktap_device_fast_flush(tap, request);
681 return err;
682 }
684 #ifdef ENABLE_PASSTHROUGH
685 #define rq_for_each_bio_safe(_bio, _tmp, _req) \
686 if ((_req)->bio) \
687 for (_bio = (_req)->bio; \
688 _bio && ((_tmp = _bio->bi_next) || 1); \
689 _bio = _tmp)
691 static void
692 blktap_device_forward_request(struct blktap *tap, struct request *req)
693 {
694 struct bio *bio, *tmp;
695 struct blktap_device *dev;
697 dev = &tap->device;
699 rq_for_each_bio_safe(bio, tmp, req) {
700 bio->bi_bdev = dev->bdev;
701 submit_bio(bio->bi_rw, bio);
702 }
703 }
705 static void
706 blktap_device_close_bdev(struct blktap *tap)
707 {
708 struct blktap_device *dev;
710 dev = &tap->device;
712 if (dev->bdev)
713 blkdev_put(dev->bdev);
715 dev->bdev = NULL;
716 clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
717 }
719 static int
720 blktap_device_open_bdev(struct blktap *tap, u32 pdev)
721 {
722 struct block_device *bdev;
723 struct blktap_device *dev;
725 dev = &tap->device;
727 bdev = open_by_devnum(pdev, FMODE_WRITE);
728 if (IS_ERR(bdev)) {
729 BTERR("opening device %x:%x failed: %ld\n",
730 MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
731 return PTR_ERR(bdev);
732 }
734 if (!bdev->bd_disk) {
735 BTERR("device %x:%x doesn't exist\n",
736 MAJOR(pdev), MINOR(pdev));
737 blkdev_put(dev->bdev);
738 return -ENOENT;
739 }
741 dev->bdev = bdev;
742 set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
744 /* TODO: readjust queue parameters */
746 BTINFO("set device %d to passthrough on %x:%x\n",
747 tap->minor, MAJOR(pdev), MINOR(pdev));
749 return 0;
750 }
752 int
753 blktap_device_enable_passthrough(struct blktap *tap,
754 unsigned major, unsigned minor)
755 {
756 u32 pdev;
757 struct blktap_device *dev;
759 dev = &tap->device;
760 pdev = MKDEV(major, minor);
762 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
763 return -EINVAL;
765 if (dev->bdev) {
766 if (pdev)
767 return -EINVAL;
768 blktap_device_close_bdev(tap);
769 return 0;
770 }
772 return blktap_device_open_bdev(tap, pdev);
773 }
774 #endif
776 /*
777 * dev->lock held on entry
778 */
779 static void
780 blktap_device_run_queue(struct blktap *tap)
781 {
782 int queued, err;
783 request_queue_t *rq;
784 struct request *req;
785 struct blktap_ring *ring;
786 struct blktap_device *dev;
787 struct blktap_request *request;
789 queued = 0;
790 ring = &tap->ring;
791 dev = &tap->device;
792 rq = dev->gd->queue;
794 BTDBG("running queue for %d\n", tap->minor);
796 while ((req = elv_next_request(rq)) != NULL) {
797 if (!blk_fs_request(req)) {
798 end_request(req, 0);
799 continue;
800 }
802 if (blk_barrier_rq(req)) {
803 end_request(req, 0);
804 continue;
805 }
807 #ifdef ENABLE_PASSTHROUGH
808 if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
809 blkdev_dequeue_request(req);
810 blktap_device_forward_request(tap, req);
811 continue;
812 }
813 #endif
815 if (RING_FULL(&ring->ring)) {
816 wait:
817 /* Avoid pointless unplugs. */
818 blk_stop_queue(rq);
819 blktap_defer(tap);
820 break;
821 }
823 request = blktap_request_allocate(tap);
824 if (!request) {
825 tap->stats.st_oo_req++;
826 goto wait;
827 }
829 BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
830 "buffer:%p [%s], pending: %p\n", req, tap->minor,
831 req->cmd, req->sector, req->current_nr_sectors,
832 req->nr_sectors, req->buffer,
833 rq_data_dir(req) ? "write" : "read", request);
835 blkdev_dequeue_request(req);
837 spin_unlock_irq(&dev->lock);
838 down_read(&tap->tap_sem);
840 err = blktap_device_process_request(tap, request, req);
841 if (!err)
842 queued++;
843 else {
844 blktap_device_end_dequeued_request(dev, req, 0);
845 blktap_request_free(tap, request);
846 }
848 up_read(&tap->tap_sem);
849 spin_lock_irq(&dev->lock);
850 }
852 if (queued)
853 blktap_ring_kick_user(tap);
854 }
856 /*
857 * dev->lock held on entry
858 */
859 static void
860 blktap_device_do_request(request_queue_t *rq)
861 {
862 struct request *req;
863 struct blktap *tap;
864 struct blktap_device *dev;
866 dev = rq->queuedata;
867 if (!dev)
868 goto fail;
870 tap = dev_to_blktap(dev);
871 if (!blktap_active(tap))
872 goto fail;
874 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
875 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
876 blktap_defer(tap);
877 return;
878 }
880 blktap_device_run_queue(tap);
881 return;
883 fail:
884 while ((req = elv_next_request(rq))) {
885 BTERR("device closed: failing secs %llu - %llu\n",
886 req->sector, req->sector + req->nr_sectors);
887 end_request(req, 0);
888 }
889 }
891 void
892 blktap_device_restart(struct blktap *tap)
893 {
894 struct blktap_device *dev;
896 dev = &tap->device;
897 if (!dev->gd || !dev->gd->queue)
898 return;
900 if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) {
901 blktap_defer(tap);
902 return;
903 }
905 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
906 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
907 blktap_defer(tap);
908 return;
909 }
911 spin_lock_irq(&dev->lock);
913 /* Re-enable calldowns. */
914 if (blk_queue_stopped(dev->gd->queue))
915 blk_start_queue(dev->gd->queue);
917 /* Kick things off immediately. */
918 blktap_device_do_request(dev->gd->queue);
920 spin_unlock_irq(&dev->lock);
921 }
923 static void
924 blktap_device_configure(struct blktap *tap)
925 {
926 struct request_queue *rq;
927 struct blktap_device *dev = &tap->device;
929 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !dev->gd)
930 return;
932 dev = &tap->device;
933 rq = dev->gd->queue;
935 spin_lock_irq(&dev->lock);
937 set_capacity(dev->gd, tap->params.capacity);
939 /* Hard sector size and max sectors impersonate the equiv. hardware. */
940 blk_queue_hardsect_size(rq, tap->params.sector_size);
941 blk_queue_max_sectors(rq, 512);
943 /* Each segment in a request is up to an aligned page in size. */
944 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
945 blk_queue_max_segment_size(rq, PAGE_SIZE);
947 /* Ensure a merged request will fit in a single I/O ring slot. */
948 blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
949 blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
951 /* Make sure buffer addresses are sector-aligned. */
952 blk_queue_dma_alignment(rq, 511);
954 spin_unlock_irq(&dev->lock);
955 }
957 int
958 blktap_device_resume(struct blktap *tap)
959 {
960 int err;
962 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
963 return -ENODEV;
965 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
966 return 0;
968 err = blktap_ring_resume(tap);
969 if (err)
970 return err;
972 /* device size may have changed */
973 blktap_device_configure(tap);
975 BTDBG("restarting device\n");
976 blktap_device_restart(tap);
978 return 0;
979 }
981 int
982 blktap_device_pause(struct blktap *tap)
983 {
984 unsigned long flags;
985 struct blktap_device *dev = &tap->device;
987 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
988 return -ENODEV;
990 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
991 return 0;
993 spin_lock_irqsave(&dev->lock, flags);
995 blk_stop_queue(dev->gd->queue);
996 set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
998 spin_unlock_irqrestore(&dev->lock, flags);
1000 return blktap_ring_pause(tap);
1003 int
1004 blktap_device_destroy(struct blktap *tap)
1006 struct blktap_device *dev = &tap->device;
1008 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
1009 return 0;
1011 BTINFO("destroy device %d users %d\n", tap->minor, dev->users);
1013 if (dev->users)
1014 return -EBUSY;
1016 spin_lock_irq(&dev->lock);
1017 /* No more blktap_device_do_request(). */
1018 blk_stop_queue(dev->gd->queue);
1019 clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
1020 spin_unlock_irq(&dev->lock);
1022 #ifdef ENABLE_PASSTHROUGH
1023 if (dev->bdev)
1024 blktap_device_close_bdev(tap);
1025 #endif
1027 del_gendisk(dev->gd);
1028 put_disk(dev->gd);
1029 blk_cleanup_queue(dev->gd->queue);
1031 dev->gd = NULL;
1033 wake_up(&tap->wq);
1035 return 0;
1038 int
1039 blktap_device_create(struct blktap *tap)
1041 int minor, err;
1042 struct gendisk *gd;
1043 struct request_queue *rq;
1044 struct blktap_device *dev;
1046 gd = NULL;
1047 rq = NULL;
1048 dev = &tap->device;
1049 minor = tap->minor;
1051 if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
1052 return -EEXIST;
1054 if (blktap_validate_params(tap, &tap->params))
1055 return -EINVAL;
1057 BTINFO("minor %d sectors %Lu sector-size %lu\n",
1058 minor, tap->params.capacity, tap->params.sector_size);
1060 err = -ENODEV;
1062 gd = alloc_disk(1);
1063 if (!gd)
1064 goto error;
1066 if (minor < 26)
1067 sprintf(gd->disk_name, "tapdev%c", 'a' + minor);
1068 else
1069 sprintf(gd->disk_name, "tapdev%c%c",
1070 'a' + ((minor / 26) - 1), 'a' + (minor % 26));
1072 gd->major = blktap_device_major;
1073 gd->first_minor = minor;
1074 gd->fops = &blktap_device_file_operations;
1075 gd->private_data = dev;
1077 spin_lock_init(&dev->lock);
1078 rq = blk_init_queue(blktap_device_do_request, &dev->lock);
1079 if (!rq)
1080 goto error;
1082 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
1083 elevator_init(rq, "noop");
1084 #else
1085 elevator_init(rq, &elevator_noop);
1086 #endif
1088 gd->queue = rq;
1089 rq->queuedata = dev;
1090 dev->gd = gd;
1092 set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
1093 blktap_device_configure(tap);
1095 add_disk(gd);
1097 err = 0;
1098 goto out;
1100 error:
1101 if (gd)
1102 del_gendisk(gd);
1103 if (rq)
1104 blk_cleanup_queue(rq);
1106 out:
1107 BTINFO("creation of %u:%u: %d\n", blktap_device_major, tap->minor, err);
1108 return err;
1111 int
1112 blktap_device_init(int *maj)
1114 int major;
1116 /* Dynamically allocate a major for this device */
1117 major = register_blkdev(0, "tapdev");
1118 if (major < 0) {
1119 BTERR("Couldn't register blktap device\n");
1120 return -ENOMEM;
1123 blktap_device_major = *maj = major;
1124 BTINFO("blktap device major %d\n", major);
1126 return 0;
1129 void
1130 blktap_device_free(void)
1132 if (blktap_device_major)
1133 if (unregister_blkdev(blktap_device_major, "tapdev"))
1134 BTERR("blktap device unregister failed\n");