ia64/linux-2.6.18-xen.hg

view drivers/xen/blktap2/device.c @ 894:4e5cd2fc45a7

linux: fix blkback/blktap2 interaction

blkback's page map code needs to be accessible to both blkback and
blktap2, irrespective of whether either or both are modules. The
most immediate solution is to break it out into a separate, library-
like component that doesn't need building if either of the two
consumers is configured off, and that gets built as a module if both
consumers are modules.

Also fix the dummy implementation of blkback_pagemap_read(), since
using BUG() there doesn't compile.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 04 10:33:52 2009 +0100 (2009-06-04)
parents f994bfe9b93b
children f59c5daed527
line source
1 #include <linux/fs.h>
2 #include <linux/blkdev.h>
3 #include <linux/cdrom.h>
4 #include <linux/hdreg.h>
5 #include <linux/module.h>
6 #include <asm/tlbflush.h>
8 #include <scsi/scsi.h>
9 #include <scsi/scsi_ioctl.h>
11 #include <xen/xenbus.h>
12 #include <xen/interface/io/blkif.h>
14 #include "blktap.h"
16 #include "../blkback/blkback-pagemap.h"
18 #if 0
19 #define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
20 #else
21 #define DPRINTK_IOCTL(_f, _a...) ((void)0)
22 #endif
24 struct blktap_grant_table {
25 int cnt;
26 struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
27 };
29 static int blktap_device_major;
31 static inline struct blktap *
32 dev_to_blktap(struct blktap_device *dev)
33 {
34 return container_of(dev, struct blktap, device);
35 }
37 static int
38 blktap_device_open(struct inode *inode, struct file *filep)
39 {
40 struct blktap *tap;
41 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
43 if (!dev)
44 return -ENOENT;
46 tap = dev_to_blktap(dev);
47 if (!blktap_active(tap) ||
48 test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
49 return -ENOENT;
51 dev->users++;
53 return 0;
54 }
56 static int
57 blktap_device_release(struct inode *inode, struct file *filep)
58 {
59 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
60 struct blktap *tap = dev_to_blktap(dev);
62 dev->users--;
63 if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
64 blktap_device_destroy(tap);
66 return 0;
67 }
69 static int
70 blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
71 {
72 /* We don't have real geometry info, but let's at least return
73 values consistent with the size of the device */
74 sector_t nsect = get_capacity(bd->bd_disk);
75 sector_t cylinders = nsect;
77 hg->heads = 0xff;
78 hg->sectors = 0x3f;
79 sector_div(cylinders, hg->heads * hg->sectors);
80 hg->cylinders = cylinders;
81 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
82 hg->cylinders = 0xffff;
83 return 0;
84 }
86 static int
87 blktap_device_ioctl(struct inode *inode, struct file *filep,
88 unsigned command, unsigned long argument)
89 {
90 int i;
92 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
93 command, (long)argument, inode->i_rdev);
95 switch (command) {
96 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
97 case HDIO_GETGEO: {
98 struct block_device *bd = inode->i_bdev;
99 struct hd_geometry geo;
100 int ret;
102 if (!argument)
103 return -EINVAL;
105 geo.start = get_start_sect(bd);
106 ret = blktap_device_getgeo(bd, &geo);
107 if (ret)
108 return ret;
110 if (copy_to_user((struct hd_geometry __user *)argument, &geo,
111 sizeof(geo)))
112 return -EFAULT;
114 return 0;
115 }
116 #endif
117 case CDROMMULTISESSION:
118 BTDBG("FIXME: support multisession CDs later\n");
119 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
120 if (put_user(0, (char __user *)(argument + i)))
121 return -EFAULT;
122 return 0;
124 case SCSI_IOCTL_GET_IDLUN:
125 if (!access_ok(VERIFY_WRITE, argument,
126 sizeof(struct scsi_idlun)))
127 return -EFAULT;
129 /* return 0 for now. */
130 __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
131 __put_user(0,
132 &((struct scsi_idlun __user *)argument)->host_unique_id);
133 return 0;
135 default:
136 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
137 command);*/
138 return -EINVAL; /* same return as native Linux */
139 }
141 return 0;
142 }
144 static struct block_device_operations blktap_device_file_operations = {
145 .owner = THIS_MODULE,
146 .open = blktap_device_open,
147 .release = blktap_device_release,
148 .ioctl = blktap_device_ioctl,
149 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
150 .getgeo = blktap_device_getgeo
151 #endif
152 };
154 static int
155 blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page,
156 unsigned long addr, void *data)
157 {
158 pte_t *pte = (pte_t *)data;
160 BTDBG("ptep %p -> %012llx\n", ptep, (unsigned long long)pte_val(*pte));
161 set_pte(ptep, *pte);
162 return 0;
163 }
165 static int
166 blktap_map_uaddr(struct mm_struct *mm, unsigned long address, pte_t pte)
167 {
168 return apply_to_page_range(mm, address,
169 PAGE_SIZE, blktap_map_uaddr_fn, &pte);
170 }
172 static int
173 blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page,
174 unsigned long addr, void *data)
175 {
176 struct mm_struct *mm = (struct mm_struct *)data;
178 BTDBG("ptep %p\n", ptep);
179 pte_clear(mm, addr, ptep);
180 return 0;
181 }
183 static int
184 blktap_umap_uaddr(struct mm_struct *mm, unsigned long address)
185 {
186 return apply_to_page_range(mm, address,
187 PAGE_SIZE, blktap_umap_uaddr_fn, mm);
188 }
190 static inline void
191 flush_tlb_kernel_page(unsigned long kvaddr)
192 {
193 #ifdef CONFIG_X86
194 xen_invlpg_all(kvaddr);
195 #else
196 flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
197 #endif
198 }
200 static void
201 blktap_device_end_dequeued_request(struct blktap_device *dev,
202 struct request *req, int uptodate)
203 {
204 int ret;
206 ret = end_that_request_first(req, uptodate, req->hard_nr_sectors);
207 BUG_ON(ret);
209 spin_lock_irq(&dev->lock);
210 end_that_request_last(req, uptodate);
211 spin_unlock_irq(&dev->lock);
212 }
214 /*
215 * tap->tap_sem held on entry
216 */
217 static void
218 blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
219 {
220 uint64_t ptep;
221 int ret, usr_idx;
222 unsigned int i, cnt;
223 struct page **map, *page;
224 struct blktap_ring *ring;
225 struct grant_handle_pair *khandle;
226 unsigned long kvaddr, uvaddr, offset;
227 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
229 cnt = 0;
230 ring = &tap->ring;
231 usr_idx = request->usr_idx;
232 map = ring->foreign_map.map;
234 if (!ring->vma)
235 return;
237 if (xen_feature(XENFEAT_auto_translated_physmap))
238 zap_page_range(ring->vma,
239 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
240 request->nr_pages << PAGE_SHIFT, NULL);
242 for (i = 0; i < request->nr_pages; i++) {
243 kvaddr = request_to_kaddr(request, i);
244 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
246 khandle = request->handles + i;
248 if (khandle->kernel != INVALID_GRANT_HANDLE) {
249 gnttab_set_unmap_op(&unmap[cnt], kvaddr,
250 GNTMAP_host_map, khandle->kernel);
251 cnt++;
252 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
253 INVALID_P2M_ENTRY);
254 }
256 if (khandle->user != INVALID_GRANT_HANDLE) {
257 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
258 if (create_lookup_pte_addr(ring->vma->vm_mm,
259 uvaddr, &ptep) != 0) {
260 BTERR("Couldn't get a pte addr!\n");
261 return;
262 }
264 gnttab_set_unmap_op(&unmap[cnt], ptep,
265 GNTMAP_host_map
266 | GNTMAP_application_map
267 | GNTMAP_contains_pte,
268 khandle->user);
269 cnt++;
270 }
272 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
274 BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, "
275 "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: "
276 "0x%08lx, handle: %u\n", offset, map[offset], request,
277 usr_idx, i, kvaddr, khandle->kernel, uvaddr,
278 khandle->user);
280 page = map[offset];
281 if (page) {
282 ClearPageReserved(map[offset]);
283 if (PageBlkback(page)) {
284 ClearPageBlkback(page);
285 set_page_private(page, 0);
286 }
287 }
288 map[offset] = NULL;
290 khandle->kernel = INVALID_GRANT_HANDLE;
291 khandle->user = INVALID_GRANT_HANDLE;
292 }
294 if (cnt) {
295 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
296 unmap, cnt);
297 BUG_ON(ret);
298 }
300 if (!xen_feature(XENFEAT_auto_translated_physmap))
301 zap_page_range(ring->vma,
302 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
303 request->nr_pages << PAGE_SHIFT, NULL);
304 }
306 /*
307 * tap->tap_sem held on entry
308 */
309 static void
310 blktap_unmap(struct blktap *tap, struct blktap_request *request)
311 {
312 int i, usr_idx;
313 unsigned long kvaddr;
315 usr_idx = request->usr_idx;
316 down_write(&tap->ring.vma->vm_mm->mmap_sem);
318 for (i = 0; i < request->nr_pages; i++) {
319 BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, "
320 "uvaddr: 0x%08lx, uhandle: %u\n", request, i,
321 request_to_kaddr(request, i),
322 request->handles[i].kernel,
323 MMAP_VADDR(tap->ring.user_vstart, usr_idx, i),
324 request->handles[i].user);
326 if (request->handles[i].kernel == INVALID_GRANT_HANDLE) {
327 kvaddr = request_to_kaddr(request, i);
328 blktap_umap_uaddr(&init_mm, kvaddr);
329 flush_tlb_kernel_page(kvaddr);
330 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
331 INVALID_P2M_ENTRY);
332 }
333 }
335 blktap_device_fast_flush(tap, request);
336 up_write(&tap->ring.vma->vm_mm->mmap_sem);
337 }
339 /*
340 * called if the tapdisk process dies unexpectedly.
341 * fail and release any pending requests and disable queue.
342 */
343 void
344 blktap_device_fail_pending_requests(struct blktap *tap)
345 {
346 int usr_idx;
347 struct request *req;
348 struct blktap_device *dev;
349 struct blktap_request *request;
351 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
352 return;
354 down_write(&tap->tap_sem);
356 dev = &tap->device;
357 for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
358 request = tap->pending_requests[usr_idx];
359 if (!request || request->status != BLKTAP_REQUEST_PENDING)
360 continue;
362 BTERR("%u:%u: failing pending %s of %d pages\n",
363 blktap_device_major, tap->minor,
364 (request->operation == BLKIF_OP_READ ?
365 "read" : "write"), request->nr_pages);
367 blktap_unmap(tap, request);
368 req = (struct request *)(unsigned long)request->id;
369 blktap_device_end_dequeued_request(dev, req, 0);
370 blktap_request_free(tap, request);
371 }
373 up_write(&tap->tap_sem);
375 spin_lock_irq(&dev->lock);
377 /* fail any future requests */
378 dev->gd->queue->queuedata = NULL;
379 blk_start_queue(dev->gd->queue);
381 spin_unlock_irq(&dev->lock);
382 }
384 /*
385 * tap->tap_sem held on entry
386 */
387 void
388 blktap_device_finish_request(struct blktap *tap,
389 blkif_response_t *res,
390 struct blktap_request *request)
391 {
392 int uptodate;
393 struct request *req;
394 struct blktap_device *dev;
396 dev = &tap->device;
398 blktap_unmap(tap, request);
400 req = (struct request *)(unsigned long)request->id;
401 uptodate = (res->status == BLKIF_RSP_OKAY);
403 BTDBG("req %p res status %d operation %d/%d id %lld\n", req,
404 res->status, res->operation, request->operation, res->id);
406 switch (request->operation) {
407 case BLKIF_OP_READ:
408 case BLKIF_OP_WRITE:
409 if (unlikely(res->status != BLKIF_RSP_OKAY))
410 BTERR("Bad return from device data "
411 "request: %x\n", res->status);
412 blktap_device_end_dequeued_request(dev, req, uptodate);
413 break;
414 default:
415 BUG();
416 }
418 blktap_request_free(tap, request);
419 }
421 static int
422 blktap_prep_foreign(struct blktap *tap,
423 struct blktap_request *request,
424 blkif_request_t *blkif_req,
425 unsigned int seg, struct page *page,
426 struct blktap_grant_table *table)
427 {
428 uint64_t ptep;
429 uint32_t flags;
430 struct page *tap_page;
431 struct blktap_ring *ring;
432 struct blkback_pagemap map;
433 unsigned long uvaddr, kvaddr;
435 ring = &tap->ring;
436 map = blkback_pagemap_read(page);
437 blkif_req->seg[seg].gref = map.gref;
439 uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
440 kvaddr = request_to_kaddr(request, seg);
441 flags = GNTMAP_host_map |
442 (request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0);
444 gnttab_set_map_op(&table->grants[table->cnt],
445 kvaddr, flags, map.gref, map.domid);
446 table->cnt++;
448 /* enable chained tap devices */
449 tap_page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
450 set_page_private(tap_page, page_private(page));
451 SetPageBlkback(tap_page);
453 if (xen_feature(XENFEAT_auto_translated_physmap))
454 return 0;
456 if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) {
457 BTERR("couldn't get a pte addr!\n");
458 return -1;
459 }
461 flags |= GNTMAP_application_map | GNTMAP_contains_pte;
462 gnttab_set_map_op(&table->grants[table->cnt],
463 ptep, flags, map.gref, map.domid);
464 table->cnt++;
466 return 0;
467 }
469 static int
470 blktap_map_foreign(struct blktap *tap,
471 struct blktap_request *request,
472 blkif_request_t *blkif_req,
473 struct blktap_grant_table *table)
474 {
475 struct page *page;
476 int i, grant, err, usr_idx;
477 struct blktap_ring *ring;
478 unsigned long uvaddr, kvaddr, foreign_mfn;
480 if (!table->cnt)
481 return 0;
483 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
484 table->grants, table->cnt);
485 BUG_ON(err);
487 grant = 0;
488 usr_idx = request->usr_idx;
489 ring = &tap->ring;
491 for (i = 0; i < request->nr_pages; i++) {
492 if (!blkif_req->seg[i].gref)
493 continue;
495 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
496 kvaddr = request_to_kaddr(request, i);
498 if (unlikely(table->grants[grant].status)) {
499 BTERR("invalid kernel buffer: could not remap it\n");
500 err |= 1;
501 table->grants[grant].handle = INVALID_GRANT_HANDLE;
502 }
504 request->handles[i].kernel = table->grants[grant].handle;
505 foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT;
506 grant++;
508 if (xen_feature(XENFEAT_auto_translated_physmap))
509 goto done;
511 if (unlikely(table->grants[grant].status)) {
512 BTERR("invalid user buffer: could not remap it\n");
513 err |= 1;
514 table->grants[grant].handle = INVALID_GRANT_HANDLE;
515 }
517 request->handles[i].user = table->grants[grant].handle;
518 grant++;
520 done:
521 if (err)
522 continue;
524 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
526 if (!xen_feature(XENFEAT_auto_translated_physmap))
527 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
528 FOREIGN_FRAME(foreign_mfn));
529 else if (vm_insert_page(ring->vma, uvaddr, page))
530 err |= 1;
532 BTDBG("pending_req: %p, seg: %d, page: %p, "
533 "kvaddr: 0x%08lx, khandle: %u, uvaddr: 0x%08lx, "
534 "uhandle: %u\n", request, i, page,
535 kvaddr, request->handles[i].kernel,
536 uvaddr, request->handles[i].user);
537 }
539 return err;
540 }
542 static void
543 blktap_map(struct blktap *tap,
544 struct blktap_request *request,
545 unsigned int seg, struct page *page)
546 {
547 pte_t pte;
548 int usr_idx;
549 struct blktap_ring *ring;
550 unsigned long uvaddr, kvaddr;
552 ring = &tap->ring;
553 usr_idx = request->usr_idx;
554 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, seg);
555 kvaddr = request_to_kaddr(request, seg);
557 pte = mk_pte(page, ring->vma->vm_page_prot);
558 blktap_map_uaddr(ring->vma->vm_mm, uvaddr, pte_mkwrite(pte));
559 flush_tlb_page(ring->vma, uvaddr);
560 blktap_map_uaddr(&init_mm, kvaddr, mk_pte(page, PAGE_KERNEL));
561 flush_tlb_kernel_page(kvaddr);
563 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte));
564 request->handles[seg].kernel = INVALID_GRANT_HANDLE;
565 request->handles[seg].user = INVALID_GRANT_HANDLE;
567 BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, "
568 "uvaddr: 0x%08lx\n", request, seg, page, kvaddr,
569 uvaddr);
570 }
572 static int
573 blktap_device_process_request(struct blktap *tap,
574 struct blktap_request *request,
575 struct request *req)
576 {
577 struct bio *bio;
578 struct page *page;
579 struct bio_vec *bvec;
580 int idx, usr_idx, err;
581 struct blktap_ring *ring;
582 struct blktap_grant_table table;
583 unsigned int fsect, lsect, nr_sects;
584 unsigned long offset, uvaddr, kvaddr;
585 struct blkif_request blkif_req, *target;
587 err = -1;
588 memset(&table, 0, sizeof(table));
590 if (!blktap_active(tap))
591 goto out;
593 ring = &tap->ring;
594 usr_idx = request->usr_idx;
595 blkif_req.id = usr_idx;
596 blkif_req.sector_number = (blkif_sector_t)req->sector;
597 blkif_req.handle = 0;
598 blkif_req.operation = rq_data_dir(req) ?
599 BLKIF_OP_WRITE : BLKIF_OP_READ;
601 request->id = (unsigned long)req;
602 request->operation = blkif_req.operation;
603 request->status = BLKTAP_REQUEST_PENDING;
604 do_gettimeofday(&request->time);
606 nr_sects = 0;
607 request->nr_pages = 0;
608 blkif_req.nr_segments = 0;
609 rq_for_each_bio(bio, req) {
610 bio_for_each_segment(bvec, bio, idx) {
611 BUG_ON(blkif_req.nr_segments ==
612 BLKIF_MAX_SEGMENTS_PER_REQUEST);
614 fsect = bvec->bv_offset >> 9;
615 lsect = fsect + (bvec->bv_len >> 9) - 1;
616 nr_sects += bvec->bv_len >> 9;
618 blkif_req.seg[blkif_req.nr_segments] =
619 (struct blkif_request_segment) {
620 .gref = 0,
621 .first_sect = fsect,
622 .last_sect = lsect };
624 if (PageBlkback(bvec->bv_page)) {
625 /* foreign page -- use xen */
626 if (blktap_prep_foreign(tap,
627 request,
628 &blkif_req,
629 blkif_req.nr_segments,
630 bvec->bv_page,
631 &table))
632 goto out;
633 } else {
634 /* do it the old fashioned way */
635 blktap_map(tap,
636 request,
637 blkif_req.nr_segments,
638 bvec->bv_page);
639 }
641 uvaddr = MMAP_VADDR(ring->user_vstart,
642 usr_idx, blkif_req.nr_segments);
643 kvaddr = request_to_kaddr(request,
644 blkif_req.nr_segments);
645 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
646 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
647 ring->foreign_map.map[offset] = page;
648 SetPageReserved(page);
650 BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n",
651 uvaddr, page, __pa(kvaddr) >> PAGE_SHIFT);
652 BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, "
653 "page: %p, kvaddr: 0x%08lx, uvaddr: 0x%08lx\n",
654 offset, request, blkif_req.nr_segments,
655 page, kvaddr, uvaddr);
657 blkif_req.nr_segments++;
658 request->nr_pages++;
659 }
660 }
662 if (blktap_map_foreign(tap, request, &blkif_req, &table))
663 goto out;
665 /* Finally, write the request message to the user ring. */
666 target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
667 memcpy(target, &blkif_req, sizeof(blkif_req));
668 target->id = request->usr_idx;
669 wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
670 ring->ring.req_prod_pvt++;
672 if (rq_data_dir(req)) {
673 tap->stats.st_wr_sect += nr_sects;
674 tap->stats.st_wr_req++;
675 } else {
676 tap->stats.st_rd_sect += nr_sects;
677 tap->stats.st_rd_req++;
678 }
680 err = 0;
682 out:
683 if (err)
684 blktap_device_fast_flush(tap, request);
685 return err;
686 }
688 #ifdef ENABLE_PASSTHROUGH
689 #define rq_for_each_bio_safe(_bio, _tmp, _req) \
690 if ((_req)->bio) \
691 for (_bio = (_req)->bio; \
692 _bio && ((_tmp = _bio->bi_next) || 1); \
693 _bio = _tmp)
695 static void
696 blktap_device_forward_request(struct blktap *tap, struct request *req)
697 {
698 struct bio *bio, *tmp;
699 struct blktap_device *dev;
701 dev = &tap->device;
703 rq_for_each_bio_safe(bio, tmp, req) {
704 bio->bi_bdev = dev->bdev;
705 submit_bio(bio->bi_rw, bio);
706 }
707 }
709 static void
710 blktap_device_close_bdev(struct blktap *tap)
711 {
712 struct blktap_device *dev;
714 dev = &tap->device;
716 if (dev->bdev)
717 blkdev_put(dev->bdev);
719 dev->bdev = NULL;
720 clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
721 }
723 static int
724 blktap_device_open_bdev(struct blktap *tap, u32 pdev)
725 {
726 struct block_device *bdev;
727 struct blktap_device *dev;
729 dev = &tap->device;
731 bdev = open_by_devnum(pdev, FMODE_WRITE);
732 if (IS_ERR(bdev)) {
733 BTERR("opening device %x:%x failed: %ld\n",
734 MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
735 return PTR_ERR(bdev);
736 }
738 if (!bdev->bd_disk) {
739 BTERR("device %x:%x doesn't exist\n",
740 MAJOR(pdev), MINOR(pdev));
741 blkdev_put(dev->bdev);
742 return -ENOENT;
743 }
745 dev->bdev = bdev;
746 set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
748 /* TODO: readjust queue parameters */
750 BTINFO("set device %d to passthrough on %x:%x\n",
751 tap->minor, MAJOR(pdev), MINOR(pdev));
753 return 0;
754 }
756 int
757 blktap_device_enable_passthrough(struct blktap *tap,
758 unsigned major, unsigned minor)
759 {
760 u32 pdev;
761 struct blktap_device *dev;
763 dev = &tap->device;
764 pdev = MKDEV(major, minor);
766 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
767 return -EINVAL;
769 if (dev->bdev) {
770 if (pdev)
771 return -EINVAL;
772 blktap_device_close_bdev(tap);
773 return 0;
774 }
776 return blktap_device_open_bdev(tap, pdev);
777 }
778 #endif
780 /*
781 * dev->lock held on entry
782 */
783 static void
784 blktap_device_run_queue(struct blktap *tap)
785 {
786 int queued, err;
787 request_queue_t *rq;
788 struct request *req;
789 struct blktap_ring *ring;
790 struct blktap_device *dev;
791 struct blktap_request *request;
793 queued = 0;
794 ring = &tap->ring;
795 dev = &tap->device;
796 rq = dev->gd->queue;
798 BTDBG("running queue for %d\n", tap->minor);
800 while ((req = elv_next_request(rq)) != NULL) {
801 if (!blk_fs_request(req)) {
802 end_request(req, 0);
803 continue;
804 }
806 if (blk_barrier_rq(req)) {
807 end_request(req, 0);
808 continue;
809 }
811 #ifdef ENABLE_PASSTHROUGH
812 if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
813 blkdev_dequeue_request(req);
814 blktap_device_forward_request(tap, req);
815 continue;
816 }
817 #endif
819 if (RING_FULL(&ring->ring)) {
820 wait:
821 /* Avoid pointless unplugs. */
822 blk_stop_queue(rq);
823 blktap_defer(tap);
824 break;
825 }
827 request = blktap_request_allocate(tap);
828 if (!request) {
829 tap->stats.st_oo_req++;
830 goto wait;
831 }
833 BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
834 "buffer:%p [%s], pending: %p\n", req, tap->minor,
835 req->cmd, req->sector, req->current_nr_sectors,
836 req->nr_sectors, req->buffer,
837 rq_data_dir(req) ? "write" : "read", request);
839 blkdev_dequeue_request(req);
841 spin_unlock_irq(&dev->lock);
842 down_read(&tap->tap_sem);
844 err = blktap_device_process_request(tap, request, req);
845 if (!err)
846 queued++;
847 else {
848 blktap_device_end_dequeued_request(dev, req, 0);
849 blktap_request_free(tap, request);
850 }
852 up_read(&tap->tap_sem);
853 spin_lock_irq(&dev->lock);
854 }
856 if (queued)
857 blktap_ring_kick_user(tap);
858 }
860 /*
861 * dev->lock held on entry
862 */
863 static void
864 blktap_device_do_request(request_queue_t *rq)
865 {
866 struct request *req;
867 struct blktap *tap;
868 struct blktap_device *dev;
870 dev = rq->queuedata;
871 if (!dev)
872 goto fail;
874 tap = dev_to_blktap(dev);
875 if (!blktap_active(tap))
876 goto fail;
878 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
879 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
880 blktap_defer(tap);
881 return;
882 }
884 blktap_device_run_queue(tap);
885 return;
887 fail:
888 while ((req = elv_next_request(rq))) {
889 BTERR("device closed: failing secs %llu - %llu\n",
890 req->sector, req->sector + req->nr_sectors);
891 end_request(req, 0);
892 }
893 }
895 void
896 blktap_device_restart(struct blktap *tap)
897 {
898 struct blktap_device *dev;
900 dev = &tap->device;
901 if (!dev->gd || !dev->gd->queue)
902 return;
904 if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) {
905 blktap_defer(tap);
906 return;
907 }
909 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
910 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
911 blktap_defer(tap);
912 return;
913 }
915 spin_lock_irq(&dev->lock);
917 /* Re-enable calldowns. */
918 if (blk_queue_stopped(dev->gd->queue))
919 blk_start_queue(dev->gd->queue);
921 /* Kick things off immediately. */
922 blktap_device_do_request(dev->gd->queue);
924 spin_unlock_irq(&dev->lock);
925 }
927 static void
928 blktap_device_configure(struct blktap *tap)
929 {
930 struct request_queue *rq;
931 struct blktap_device *dev = &tap->device;
933 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !dev->gd)
934 return;
936 dev = &tap->device;
937 rq = dev->gd->queue;
939 spin_lock_irq(&dev->lock);
941 set_capacity(dev->gd, tap->params.capacity);
943 /* Hard sector size and max sectors impersonate the equiv. hardware. */
944 blk_queue_hardsect_size(rq, tap->params.sector_size);
945 blk_queue_max_sectors(rq, 512);
947 /* Each segment in a request is up to an aligned page in size. */
948 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
949 blk_queue_max_segment_size(rq, PAGE_SIZE);
951 /* Ensure a merged request will fit in a single I/O ring slot. */
952 blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
953 blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
955 /* Make sure buffer addresses are sector-aligned. */
956 blk_queue_dma_alignment(rq, 511);
958 spin_unlock_irq(&dev->lock);
959 }
961 int
962 blktap_device_resume(struct blktap *tap)
963 {
964 int err;
966 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
967 return -ENODEV;
969 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
970 return 0;
972 err = blktap_ring_resume(tap);
973 if (err)
974 return err;
976 /* device size may have changed */
977 blktap_device_configure(tap);
979 BTDBG("restarting device\n");
980 blktap_device_restart(tap);
982 return 0;
983 }
985 int
986 blktap_device_pause(struct blktap *tap)
987 {
988 unsigned long flags;
989 struct blktap_device *dev = &tap->device;
991 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
992 return -ENODEV;
994 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
995 return 0;
997 spin_lock_irqsave(&dev->lock, flags);
999 blk_stop_queue(dev->gd->queue);
1000 set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
1002 spin_unlock_irqrestore(&dev->lock, flags);
1004 return blktap_ring_pause(tap);
1007 int
1008 blktap_device_destroy(struct blktap *tap)
1010 struct blktap_device *dev = &tap->device;
1012 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
1013 return 0;
1015 BTINFO("destroy device %d users %d\n", tap->minor, dev->users);
1017 if (dev->users)
1018 return -EBUSY;
1020 spin_lock_irq(&dev->lock);
1021 /* No more blktap_device_do_request(). */
1022 blk_stop_queue(dev->gd->queue);
1023 clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
1024 spin_unlock_irq(&dev->lock);
1026 #ifdef ENABLE_PASSTHROUGH
1027 if (dev->bdev)
1028 blktap_device_close_bdev(tap);
1029 #endif
1031 del_gendisk(dev->gd);
1032 put_disk(dev->gd);
1033 blk_cleanup_queue(dev->gd->queue);
1035 dev->gd = NULL;
1037 wake_up(&tap->wq);
1039 return 0;
1042 int
1043 blktap_device_create(struct blktap *tap)
1045 int minor, err;
1046 struct gendisk *gd;
1047 struct request_queue *rq;
1048 struct blktap_device *dev;
1050 gd = NULL;
1051 rq = NULL;
1052 dev = &tap->device;
1053 minor = tap->minor;
1055 if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
1056 return -EEXIST;
1058 if (blktap_validate_params(tap, &tap->params))
1059 return -EINVAL;
1061 BTINFO("minor %d sectors %Lu sector-size %lu\n",
1062 minor, tap->params.capacity, tap->params.sector_size);
1064 err = -ENODEV;
1066 gd = alloc_disk(1);
1067 if (!gd)
1068 goto error;
1070 if (minor < 26)
1071 sprintf(gd->disk_name, "tapdev%c", 'a' + minor);
1072 else
1073 sprintf(gd->disk_name, "tapdev%c%c",
1074 'a' + ((minor / 26) - 1), 'a' + (minor % 26));
1076 gd->major = blktap_device_major;
1077 gd->first_minor = minor;
1078 gd->fops = &blktap_device_file_operations;
1079 gd->private_data = dev;
1081 spin_lock_init(&dev->lock);
1082 rq = blk_init_queue(blktap_device_do_request, &dev->lock);
1083 if (!rq)
1084 goto error;
1086 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
1087 elevator_init(rq, "noop");
1088 #else
1089 elevator_init(rq, &elevator_noop);
1090 #endif
1092 gd->queue = rq;
1093 rq->queuedata = dev;
1094 dev->gd = gd;
1096 set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
1097 blktap_device_configure(tap);
1099 add_disk(gd);
1101 err = 0;
1102 goto out;
1104 error:
1105 if (gd)
1106 del_gendisk(gd);
1107 if (rq)
1108 blk_cleanup_queue(rq);
1110 out:
1111 BTINFO("creation of %u:%u: %d\n", blktap_device_major, tap->minor, err);
1112 return err;
1115 int
1116 blktap_device_init(int *maj)
1118 int major;
1120 /* Dynamically allocate a major for this device */
1121 major = register_blkdev(0, "tapdev");
1122 if (major < 0) {
1123 BTERR("Couldn't register blktap device\n");
1124 return -ENOMEM;
1127 blktap_device_major = *maj = major;
1128 BTINFO("blktap device major %d\n", major);
1130 return 0;
1133 void
1134 blktap_device_free(void)
1136 if (blktap_device_major)
1137 if (unregister_blkdev(blktap_device_major, "tapdev"))
1138 BTERR("blktap device unregister failed\n");