ia64/linux-2.6.18-xen.hg

view drivers/xen/blktap2/device.c @ 885:a4b49dff3387

blktap2: add tlb flush properly.

xen_invlpg() flushes tlb on its cpu, but tlb flush is needed on
all cpus. So replace xen_invlpg() with more proper ones.
Maybe it would be possible to make tlb flush less.
this patch also makes blktap2 compile on ia64 because xen_invlpg()
is x86 specific.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Keir Fraser <keir.fraser@citrix.com>
date Thu May 28 10:04:26 2009 +0100 (2009-05-28)
parents eba6fe6d8d53
children 85a4c18ad9aa
line source
1 #include <linux/fs.h>
2 #include <linux/blkdev.h>
3 #include <linux/cdrom.h>
4 #include <linux/hdreg.h>
5 #include <linux/module.h>
6 #include <asm/tlbflush.h>
8 #include <scsi/scsi.h>
9 #include <scsi/scsi_ioctl.h>
11 #include <xen/xenbus.h>
12 #include <xen/interface/io/blkif.h>
14 #include "blktap.h"
16 #ifdef CONFIG_XEN_BLKDEV_BACKEND
17 #include "../blkback/blkback-pagemap.h"
18 #else
19 struct blkback_pagemap { };
20 #define blkback_pagemap_read(page) BUG();
21 #endif
23 #if 0
24 #define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
25 #else
26 #define DPRINTK_IOCTL(_f, _a...) ((void)0)
27 #endif
29 struct blktap_grant_table {
30 int cnt;
31 struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
32 };
34 static int blktap_device_major;
36 static inline struct blktap *
37 dev_to_blktap(struct blktap_device *dev)
38 {
39 return container_of(dev, struct blktap, device);
40 }
42 static int
43 blktap_device_open(struct inode *inode, struct file *filep)
44 {
45 struct blktap *tap;
46 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
48 if (!dev)
49 return -ENOENT;
51 tap = dev_to_blktap(dev);
52 if (!blktap_active(tap) ||
53 test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
54 return -ENOENT;
56 dev->users++;
58 return 0;
59 }
61 static int
62 blktap_device_release(struct inode *inode, struct file *filep)
63 {
64 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
65 struct blktap *tap = dev_to_blktap(dev);
67 dev->users--;
68 if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
69 blktap_device_destroy(tap);
71 return 0;
72 }
74 static int
75 blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
76 {
77 /* We don't have real geometry info, but let's at least return
78 values consistent with the size of the device */
79 sector_t nsect = get_capacity(bd->bd_disk);
80 sector_t cylinders = nsect;
82 hg->heads = 0xff;
83 hg->sectors = 0x3f;
84 sector_div(cylinders, hg->heads * hg->sectors);
85 hg->cylinders = cylinders;
86 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
87 hg->cylinders = 0xffff;
88 return 0;
89 }
91 static int
92 blktap_device_ioctl(struct inode *inode, struct file *filep,
93 unsigned command, unsigned long argument)
94 {
95 int i;
97 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
98 command, (long)argument, inode->i_rdev);
100 switch (command) {
101 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
102 case HDIO_GETGEO: {
103 struct block_device *bd = inode->i_bdev;
104 struct hd_geometry geo;
105 int ret;
107 if (!argument)
108 return -EINVAL;
110 geo.start = get_start_sect(bd);
111 ret = blktap_device_getgeo(bd, &geo);
112 if (ret)
113 return ret;
115 if (copy_to_user((struct hd_geometry __user *)argument, &geo,
116 sizeof(geo)))
117 return -EFAULT;
119 return 0;
120 }
121 #endif
122 case CDROMMULTISESSION:
123 BTDBG("FIXME: support multisession CDs later\n");
124 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
125 if (put_user(0, (char __user *)(argument + i)))
126 return -EFAULT;
127 return 0;
129 case SCSI_IOCTL_GET_IDLUN:
130 if (!access_ok(VERIFY_WRITE, argument,
131 sizeof(struct scsi_idlun)))
132 return -EFAULT;
134 /* return 0 for now. */
135 __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
136 __put_user(0,
137 &((struct scsi_idlun __user *)argument)->host_unique_id);
138 return 0;
140 default:
141 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
142 command);*/
143 return -EINVAL; /* same return as native Linux */
144 }
146 return 0;
147 }
149 static struct block_device_operations blktap_device_file_operations = {
150 .owner = THIS_MODULE,
151 .open = blktap_device_open,
152 .release = blktap_device_release,
153 .ioctl = blktap_device_ioctl,
154 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
155 .getgeo = blktap_device_getgeo
156 #endif
157 };
159 static int
160 blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page,
161 unsigned long addr, void *data)
162 {
163 pte_t *pte = (pte_t *)data;
165 BTDBG("ptep %p -> %012llx\n", ptep, pte_val(*pte));
166 set_pte(ptep, *pte);
167 return 0;
168 }
170 static int
171 blktap_map_uaddr(struct mm_struct *mm, unsigned long address, pte_t pte)
172 {
173 return apply_to_page_range(mm, address,
174 PAGE_SIZE, blktap_map_uaddr_fn, &pte);
175 }
177 static int
178 blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page,
179 unsigned long addr, void *data)
180 {
181 struct mm_struct *mm = (struct mm_struct *)data;
183 BTDBG("ptep %p\n", ptep);
184 pte_clear(mm, addr, ptep);
185 return 0;
186 }
188 static int
189 blktap_umap_uaddr(struct mm_struct *mm, unsigned long address)
190 {
191 return apply_to_page_range(mm, address,
192 PAGE_SIZE, blktap_umap_uaddr_fn, mm);
193 }
195 static void
196 blktap_device_end_dequeued_request(struct blktap_device *dev,
197 struct request *req, int uptodate)
198 {
199 int ret;
201 ret = end_that_request_first(req, uptodate, req->hard_nr_sectors);
202 BUG_ON(ret);
204 spin_lock_irq(&dev->lock);
205 end_that_request_last(req, uptodate);
206 spin_unlock_irq(&dev->lock);
207 }
209 /*
210 * tap->tap_sem held on entry
211 */
212 static void
213 blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
214 {
215 uint64_t ptep;
216 int ret, usr_idx;
217 unsigned int i, cnt;
218 struct page **map, *page;
219 struct blktap_ring *ring;
220 struct grant_handle_pair *khandle;
221 unsigned long kvaddr, uvaddr, offset;
222 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
224 cnt = 0;
225 ring = &tap->ring;
226 usr_idx = request->usr_idx;
227 map = ring->foreign_map.map;
229 if (!ring->vma)
230 return;
232 if (xen_feature(XENFEAT_auto_translated_physmap))
233 zap_page_range(ring->vma,
234 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
235 request->nr_pages << PAGE_SHIFT, NULL);
237 for (i = 0; i < request->nr_pages; i++) {
238 kvaddr = request_to_kaddr(request, i);
239 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
241 khandle = request->handles + i;
243 if (khandle->kernel != INVALID_GRANT_HANDLE) {
244 gnttab_set_unmap_op(&unmap[cnt], kvaddr,
245 GNTMAP_host_map, khandle->kernel);
246 cnt++;
247 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
248 INVALID_P2M_ENTRY);
249 }
251 if (khandle->user != INVALID_GRANT_HANDLE) {
252 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
253 if (create_lookup_pte_addr(ring->vma->vm_mm,
254 uvaddr, &ptep) != 0) {
255 BTERR("Couldn't get a pte addr!\n");
256 return;
257 }
259 gnttab_set_unmap_op(&unmap[cnt], ptep,
260 GNTMAP_host_map
261 | GNTMAP_application_map
262 | GNTMAP_contains_pte,
263 khandle->user);
264 cnt++;
265 }
267 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
269 BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, "
270 "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: "
271 "0x%08lx, handle: %u\n", offset, map[offset], request,
272 usr_idx, i, kvaddr, khandle->kernel, uvaddr,
273 khandle->user);
275 page = map[offset];
276 if (page) {
277 ClearPageReserved(map[offset]);
278 if (PageBlkback(page)) {
279 ClearPageBlkback(page);
280 set_page_private(page, 0);
281 }
282 }
283 map[offset] = NULL;
285 khandle->kernel = INVALID_GRANT_HANDLE;
286 khandle->user = INVALID_GRANT_HANDLE;
287 }
289 if (cnt) {
290 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
291 unmap, cnt);
292 BUG_ON(ret);
293 }
295 if (!xen_feature(XENFEAT_auto_translated_physmap))
296 zap_page_range(ring->vma,
297 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
298 request->nr_pages << PAGE_SHIFT, NULL);
299 }
301 /*
302 * tap->tap_sem held on entry
303 */
304 static void
305 blktap_unmap(struct blktap *tap, struct blktap_request *request)
306 {
307 int i, usr_idx;
308 unsigned long kvaddr;
310 usr_idx = request->usr_idx;
311 down_write(&tap->ring.vma->vm_mm->mmap_sem);
313 for (i = 0; i < request->nr_pages; i++) {
314 BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, "
315 "uvaddr: 0x%08lx, uhandle: %u\n", request, i,
316 request_to_kaddr(request, i),
317 request->handles[i].kernel,
318 MMAP_VADDR(tap->ring.user_vstart, usr_idx, i),
319 request->handles[i].user);
321 if (request->handles[i].kernel == INVALID_GRANT_HANDLE) {
322 kvaddr = request_to_kaddr(request, i);
323 blktap_umap_uaddr(&init_mm, kvaddr);
324 flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
325 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
326 INVALID_P2M_ENTRY);
327 }
328 }
330 blktap_device_fast_flush(tap, request);
331 up_write(&tap->ring.vma->vm_mm->mmap_sem);
332 }
334 /*
335 * called if the tapdisk process dies unexpectedly.
336 * fail and release any pending requests and disable queue.
337 */
338 void
339 blktap_device_fail_pending_requests(struct blktap *tap)
340 {
341 int usr_idx;
342 struct request *req;
343 struct blktap_device *dev;
344 struct blktap_request *request;
346 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
347 return;
349 down_write(&tap->tap_sem);
351 dev = &tap->device;
352 for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
353 request = tap->pending_requests[usr_idx];
354 if (!request || request->status != BLKTAP_REQUEST_PENDING)
355 continue;
357 BTERR("%u:%u: failing pending %s of %d pages\n",
358 blktap_device_major, tap->minor,
359 (request->operation == BLKIF_OP_READ ?
360 "read" : "write"), request->nr_pages);
362 blktap_unmap(tap, request);
363 req = (struct request *)(unsigned long)request->id;
364 blktap_device_end_dequeued_request(dev, req, 0);
365 blktap_request_free(tap, request);
366 }
368 up_write(&tap->tap_sem);
370 spin_lock_irq(&dev->lock);
372 /* fail any future requests */
373 dev->gd->queue->queuedata = NULL;
374 blk_start_queue(dev->gd->queue);
376 spin_unlock_irq(&dev->lock);
377 }
379 /*
380 * tap->tap_sem held on entry
381 */
382 void
383 blktap_device_finish_request(struct blktap *tap,
384 blkif_response_t *res,
385 struct blktap_request *request)
386 {
387 int uptodate;
388 struct request *req;
389 struct blktap_device *dev;
391 dev = &tap->device;
393 blktap_unmap(tap, request);
395 req = (struct request *)(unsigned long)request->id;
396 uptodate = (res->status == BLKIF_RSP_OKAY);
398 BTDBG("req %p res status %d operation %d/%d id %lld\n", req,
399 res->status, res->operation, request->operation, res->id);
401 switch (request->operation) {
402 case BLKIF_OP_READ:
403 case BLKIF_OP_WRITE:
404 if (unlikely(res->status != BLKIF_RSP_OKAY))
405 BTERR("Bad return from device data "
406 "request: %x\n", res->status);
407 blktap_device_end_dequeued_request(dev, req, uptodate);
408 break;
409 default:
410 BUG();
411 }
413 blktap_request_free(tap, request);
414 }
416 static int
417 blktap_prep_foreign(struct blktap *tap,
418 struct blktap_request *request,
419 blkif_request_t *blkif_req,
420 unsigned int seg, struct page *page,
421 struct blktap_grant_table *table)
422 {
423 uint64_t ptep;
424 uint32_t flags;
425 struct page *tap_page;
426 struct blktap_ring *ring;
427 struct blkback_pagemap map;
428 unsigned long uvaddr, kvaddr;
430 ring = &tap->ring;
431 map = blkback_pagemap_read(page);
432 blkif_req->seg[seg].gref = map.gref;
434 uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
435 kvaddr = request_to_kaddr(request, seg);
436 flags = GNTMAP_host_map |
437 (request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0);
439 gnttab_set_map_op(&table->grants[table->cnt],
440 kvaddr, flags, map.gref, map.domid);
441 table->cnt++;
443 /* enable chained tap devices */
444 tap_page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
445 set_page_private(tap_page, page_private(page));
446 SetPageBlkback(tap_page);
448 if (xen_feature(XENFEAT_auto_translated_physmap))
449 return 0;
451 if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) {
452 BTERR("couldn't get a pte addr!\n");
453 return -1;
454 }
456 flags |= GNTMAP_application_map | GNTMAP_contains_pte;
457 gnttab_set_map_op(&table->grants[table->cnt],
458 ptep, flags, map.gref, map.domid);
459 table->cnt++;
461 return 0;
462 }
464 static int
465 blktap_map_foreign(struct blktap *tap,
466 struct blktap_request *request,
467 blkif_request_t *blkif_req,
468 struct blktap_grant_table *table)
469 {
470 struct page *page;
471 int i, grant, err, usr_idx;
472 struct blktap_ring *ring;
473 unsigned long uvaddr, kvaddr, foreign_mfn;
475 if (!table->cnt)
476 return 0;
478 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
479 table->grants, table->cnt);
480 BUG_ON(err);
482 grant = 0;
483 usr_idx = request->usr_idx;
484 ring = &tap->ring;
486 for (i = 0; i < request->nr_pages; i++) {
487 if (!blkif_req->seg[i].gref)
488 continue;
490 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
491 kvaddr = request_to_kaddr(request, i);
493 if (unlikely(table->grants[grant].status)) {
494 BTERR("invalid kernel buffer: could not remap it\n");
495 err |= 1;
496 table->grants[grant].handle = INVALID_GRANT_HANDLE;
497 }
499 request->handles[i].kernel = table->grants[grant].handle;
500 foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT;
501 grant++;
503 if (xen_feature(XENFEAT_auto_translated_physmap))
504 goto done;
506 if (unlikely(table->grants[grant].status)) {
507 BTERR("invalid user buffer: could not remap it\n");
508 err |= 1;
509 table->grants[grant].handle = INVALID_GRANT_HANDLE;
510 }
512 request->handles[i].user = table->grants[grant].handle;
513 grant++;
515 done:
516 if (err)
517 continue;
519 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
521 if (!xen_feature(XENFEAT_auto_translated_physmap))
522 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
523 FOREIGN_FRAME(foreign_mfn));
524 else if (vm_insert_page(ring->vma, uvaddr, page))
525 err |= 1;
527 BTDBG("pending_req: %p, seg: %d, page: %p, "
528 "kvaddr: 0x%08lx, khandle: %u, uvaddr: 0x%08lx, "
529 "uhandle: %u\n", request, i, page,
530 kvaddr, request->handles[i].kernel,
531 uvaddr, request->handles[i].user);
532 }
534 return err;
535 }
537 static void
538 blktap_map(struct blktap *tap,
539 struct blktap_request *request,
540 unsigned int seg, struct page *page)
541 {
542 pte_t pte;
543 int usr_idx;
544 struct blktap_ring *ring;
545 unsigned long uvaddr, kvaddr;
547 ring = &tap->ring;
548 usr_idx = request->usr_idx;
549 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, seg);
550 kvaddr = request_to_kaddr(request, seg);
552 pte = mk_pte(page, ring->vma->vm_page_prot);
553 blktap_map_uaddr(ring->vma->vm_mm, uvaddr, pte_mkwrite(pte));
554 flush_tlb_mm(ring->vma->vm_mm);
555 blktap_map_uaddr(&init_mm, kvaddr, mk_pte(page, PAGE_KERNEL));
556 flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
558 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte));
559 request->handles[seg].kernel = INVALID_GRANT_HANDLE;
560 request->handles[seg].user = INVALID_GRANT_HANDLE;
562 BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, "
563 "uvaddr: 0x%08lx\n", request, seg, page, kvaddr,
564 uvaddr);
565 }
567 static int
568 blktap_device_process_request(struct blktap *tap,
569 struct blktap_request *request,
570 struct request *req)
571 {
572 struct bio *bio;
573 struct page *page;
574 struct bio_vec *bvec;
575 int idx, usr_idx, err;
576 struct blktap_ring *ring;
577 struct blktap_grant_table table;
578 unsigned int fsect, lsect, nr_sects;
579 unsigned long offset, uvaddr, kvaddr;
580 struct blkif_request blkif_req, *target;
582 err = -1;
583 memset(&table, 0, sizeof(table));
585 if (!blktap_active(tap))
586 goto out;
588 ring = &tap->ring;
589 usr_idx = request->usr_idx;
590 blkif_req.id = usr_idx;
591 blkif_req.sector_number = (blkif_sector_t)req->sector;
592 blkif_req.handle = 0;
593 blkif_req.operation = rq_data_dir(req) ?
594 BLKIF_OP_WRITE : BLKIF_OP_READ;
596 request->id = (unsigned long)req;
597 request->operation = blkif_req.operation;
598 request->status = BLKTAP_REQUEST_PENDING;
599 do_gettimeofday(&request->time);
601 nr_sects = 0;
602 request->nr_pages = 0;
603 blkif_req.nr_segments = 0;
604 rq_for_each_bio(bio, req) {
605 bio_for_each_segment(bvec, bio, idx) {
606 BUG_ON(blkif_req.nr_segments ==
607 BLKIF_MAX_SEGMENTS_PER_REQUEST);
609 fsect = bvec->bv_offset >> 9;
610 lsect = fsect + (bvec->bv_len >> 9) - 1;
611 nr_sects += bvec->bv_len >> 9;
613 blkif_req.seg[blkif_req.nr_segments] =
614 (struct blkif_request_segment) {
615 .gref = 0,
616 .first_sect = fsect,
617 .last_sect = lsect };
619 if (PageBlkback(bvec->bv_page)) {
620 /* foreign page -- use xen */
621 if (blktap_prep_foreign(tap,
622 request,
623 &blkif_req,
624 blkif_req.nr_segments,
625 bvec->bv_page,
626 &table))
627 goto out;
628 } else {
629 /* do it the old fashioned way */
630 blktap_map(tap,
631 request,
632 blkif_req.nr_segments,
633 bvec->bv_page);
634 }
636 uvaddr = MMAP_VADDR(ring->user_vstart,
637 usr_idx, blkif_req.nr_segments);
638 kvaddr = request_to_kaddr(request,
639 blkif_req.nr_segments);
640 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
641 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
642 ring->foreign_map.map[offset] = page;
643 SetPageReserved(page);
645 BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n",
646 uvaddr, page, __pa(kvaddr) >> PAGE_SHIFT);
647 BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, "
648 "page: %p, kvaddr: 0x%08lx, uvaddr: 0x%08lx\n",
649 offset, request, blkif_req.nr_segments,
650 page, kvaddr, uvaddr);
652 blkif_req.nr_segments++;
653 request->nr_pages++;
654 }
655 }
657 if (blktap_map_foreign(tap, request, &blkif_req, &table))
658 goto out;
660 /* Finally, write the request message to the user ring. */
661 target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
662 memcpy(target, &blkif_req, sizeof(blkif_req));
663 target->id = request->usr_idx;
664 wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
665 ring->ring.req_prod_pvt++;
667 if (rq_data_dir(req)) {
668 tap->stats.st_wr_sect += nr_sects;
669 tap->stats.st_wr_req++;
670 } else {
671 tap->stats.st_rd_sect += nr_sects;
672 tap->stats.st_rd_req++;
673 }
675 err = 0;
677 out:
678 if (err)
679 blktap_device_fast_flush(tap, request);
680 return err;
681 }
683 #ifdef ENABLE_PASSTHROUGH
684 #define rq_for_each_bio_safe(_bio, _tmp, _req) \
685 if ((_req)->bio) \
686 for (_bio = (_req)->bio; \
687 _bio && ((_tmp = _bio->bi_next) || 1); \
688 _bio = _tmp)
690 static void
691 blktap_device_forward_request(struct blktap *tap, struct request *req)
692 {
693 struct bio *bio, *tmp;
694 struct blktap_device *dev;
696 dev = &tap->device;
698 rq_for_each_bio_safe(bio, tmp, req) {
699 bio->bi_bdev = dev->bdev;
700 submit_bio(bio->bi_rw, bio);
701 }
702 }
704 static void
705 blktap_device_close_bdev(struct blktap *tap)
706 {
707 struct blktap_device *dev;
709 dev = &tap->device;
711 if (dev->bdev)
712 blkdev_put(dev->bdev);
714 dev->bdev = NULL;
715 clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
716 }
718 static int
719 blktap_device_open_bdev(struct blktap *tap, u32 pdev)
720 {
721 struct block_device *bdev;
722 struct blktap_device *dev;
724 dev = &tap->device;
726 bdev = open_by_devnum(pdev, FMODE_WRITE);
727 if (IS_ERR(bdev)) {
728 BTERR("opening device %x:%x failed: %ld\n",
729 MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
730 return PTR_ERR(bdev);
731 }
733 if (!bdev->bd_disk) {
734 BTERR("device %x:%x doesn't exist\n",
735 MAJOR(pdev), MINOR(pdev));
736 blkdev_put(dev->bdev);
737 return -ENOENT;
738 }
740 dev->bdev = bdev;
741 set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
743 /* TODO: readjust queue parameters */
745 BTINFO("set device %d to passthrough on %x:%x\n",
746 tap->minor, MAJOR(pdev), MINOR(pdev));
748 return 0;
749 }
751 int
752 blktap_device_enable_passthrough(struct blktap *tap,
753 unsigned major, unsigned minor)
754 {
755 u32 pdev;
756 struct blktap_device *dev;
758 dev = &tap->device;
759 pdev = MKDEV(major, minor);
761 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
762 return -EINVAL;
764 if (dev->bdev) {
765 if (pdev)
766 return -EINVAL;
767 blktap_device_close_bdev(tap);
768 return 0;
769 }
771 return blktap_device_open_bdev(tap, pdev);
772 }
773 #endif
775 /*
776 * dev->lock held on entry
777 */
778 static void
779 blktap_device_run_queue(struct blktap *tap)
780 {
781 int queued, err;
782 request_queue_t *rq;
783 struct request *req;
784 struct blktap_ring *ring;
785 struct blktap_device *dev;
786 struct blktap_request *request;
788 queued = 0;
789 ring = &tap->ring;
790 dev = &tap->device;
791 rq = dev->gd->queue;
793 BTDBG("running queue for %d\n", tap->minor);
795 while ((req = elv_next_request(rq)) != NULL) {
796 if (!blk_fs_request(req)) {
797 end_request(req, 0);
798 continue;
799 }
801 if (blk_barrier_rq(req)) {
802 end_request(req, 0);
803 continue;
804 }
806 #ifdef ENABLE_PASSTHROUGH
807 if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
808 blkdev_dequeue_request(req);
809 blktap_device_forward_request(tap, req);
810 continue;
811 }
812 #endif
814 if (RING_FULL(&ring->ring)) {
815 wait:
816 /* Avoid pointless unplugs. */
817 blk_stop_queue(rq);
818 blktap_defer(tap);
819 break;
820 }
822 request = blktap_request_allocate(tap);
823 if (!request) {
824 tap->stats.st_oo_req++;
825 goto wait;
826 }
828 BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
829 "buffer:%p [%s], pending: %p\n", req, tap->minor,
830 req->cmd, req->sector, req->current_nr_sectors,
831 req->nr_sectors, req->buffer,
832 rq_data_dir(req) ? "write" : "read", request);
834 blkdev_dequeue_request(req);
836 spin_unlock_irq(&dev->lock);
837 down_read(&tap->tap_sem);
839 err = blktap_device_process_request(tap, request, req);
840 if (!err)
841 queued++;
842 else {
843 blktap_device_end_dequeued_request(dev, req, 0);
844 blktap_request_free(tap, request);
845 }
847 up_read(&tap->tap_sem);
848 spin_lock_irq(&dev->lock);
849 }
851 if (queued)
852 blktap_ring_kick_user(tap);
853 }
855 /*
856 * dev->lock held on entry
857 */
858 static void
859 blktap_device_do_request(request_queue_t *rq)
860 {
861 struct request *req;
862 struct blktap *tap;
863 struct blktap_device *dev;
865 dev = rq->queuedata;
866 if (!dev)
867 goto fail;
869 tap = dev_to_blktap(dev);
870 if (!blktap_active(tap))
871 goto fail;
873 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
874 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
875 blktap_defer(tap);
876 return;
877 }
879 blktap_device_run_queue(tap);
880 return;
882 fail:
883 while ((req = elv_next_request(rq))) {
884 BTERR("device closed: failing secs %llu - %llu\n",
885 req->sector, req->sector + req->nr_sectors);
886 end_request(req, 0);
887 }
888 }
890 void
891 blktap_device_restart(struct blktap *tap)
892 {
893 struct blktap_device *dev;
895 dev = &tap->device;
896 if (!dev->gd || !dev->gd->queue)
897 return;
899 if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) {
900 blktap_defer(tap);
901 return;
902 }
904 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
905 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
906 blktap_defer(tap);
907 return;
908 }
910 spin_lock_irq(&dev->lock);
912 /* Re-enable calldowns. */
913 if (blk_queue_stopped(dev->gd->queue))
914 blk_start_queue(dev->gd->queue);
916 /* Kick things off immediately. */
917 blktap_device_do_request(dev->gd->queue);
919 spin_unlock_irq(&dev->lock);
920 }
922 static void
923 blktap_device_configure(struct blktap *tap)
924 {
925 struct request_queue *rq;
926 struct blktap_device *dev = &tap->device;
928 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !dev->gd)
929 return;
931 dev = &tap->device;
932 rq = dev->gd->queue;
934 spin_lock_irq(&dev->lock);
936 set_capacity(dev->gd, tap->params.capacity);
938 /* Hard sector size and max sectors impersonate the equiv. hardware. */
939 blk_queue_hardsect_size(rq, tap->params.sector_size);
940 blk_queue_max_sectors(rq, 512);
942 /* Each segment in a request is up to an aligned page in size. */
943 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
944 blk_queue_max_segment_size(rq, PAGE_SIZE);
946 /* Ensure a merged request will fit in a single I/O ring slot. */
947 blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
948 blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
950 /* Make sure buffer addresses are sector-aligned. */
951 blk_queue_dma_alignment(rq, 511);
953 spin_unlock_irq(&dev->lock);
954 }
956 int
957 blktap_device_resume(struct blktap *tap)
958 {
959 int err;
961 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
962 return -ENODEV;
964 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
965 return 0;
967 err = blktap_ring_resume(tap);
968 if (err)
969 return err;
971 /* device size may have changed */
972 blktap_device_configure(tap);
974 BTDBG("restarting device\n");
975 blktap_device_restart(tap);
977 return 0;
978 }
980 int
981 blktap_device_pause(struct blktap *tap)
982 {
983 unsigned long flags;
984 struct blktap_device *dev = &tap->device;
986 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
987 return -ENODEV;
989 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
990 return 0;
992 spin_lock_irqsave(&dev->lock, flags);
994 blk_stop_queue(dev->gd->queue);
995 set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
997 spin_unlock_irqrestore(&dev->lock, flags);
999 return blktap_ring_pause(tap);
1002 int
1003 blktap_device_destroy(struct blktap *tap)
1005 struct blktap_device *dev = &tap->device;
1007 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
1008 return 0;
1010 BTINFO("destroy device %d users %d\n", tap->minor, dev->users);
1012 if (dev->users)
1013 return -EBUSY;
1015 spin_lock_irq(&dev->lock);
1016 /* No more blktap_device_do_request(). */
1017 blk_stop_queue(dev->gd->queue);
1018 clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
1019 spin_unlock_irq(&dev->lock);
1021 #ifdef ENABLE_PASSTHROUGH
1022 if (dev->bdev)
1023 blktap_device_close_bdev(tap);
1024 #endif
1026 del_gendisk(dev->gd);
1027 put_disk(dev->gd);
1028 blk_cleanup_queue(dev->gd->queue);
1030 dev->gd = NULL;
1032 wake_up(&tap->wq);
1034 return 0;
1037 int
1038 blktap_device_create(struct blktap *tap)
1040 int minor, err;
1041 struct gendisk *gd;
1042 struct request_queue *rq;
1043 struct blktap_device *dev;
1045 gd = NULL;
1046 rq = NULL;
1047 dev = &tap->device;
1048 minor = tap->minor;
1050 if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
1051 return -EEXIST;
1053 if (blktap_validate_params(tap, &tap->params))
1054 return -EINVAL;
1056 BTINFO("minor %d sectors %Lu sector-size %lu\n",
1057 minor, tap->params.capacity, tap->params.sector_size);
1059 err = -ENODEV;
1061 gd = alloc_disk(1);
1062 if (!gd)
1063 goto error;
1065 if (minor < 26)
1066 sprintf(gd->disk_name, "tapdev%c", 'a' + minor);
1067 else
1068 sprintf(gd->disk_name, "tapdev%c%c",
1069 'a' + ((minor / 26) - 1), 'a' + (minor % 26));
1071 gd->major = blktap_device_major;
1072 gd->first_minor = minor;
1073 gd->fops = &blktap_device_file_operations;
1074 gd->private_data = dev;
1076 spin_lock_init(&dev->lock);
1077 rq = blk_init_queue(blktap_device_do_request, &dev->lock);
1078 if (!rq)
1079 goto error;
1081 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
1082 elevator_init(rq, "noop");
1083 #else
1084 elevator_init(rq, &elevator_noop);
1085 #endif
1087 gd->queue = rq;
1088 rq->queuedata = dev;
1089 dev->gd = gd;
1091 set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
1092 blktap_device_configure(tap);
1094 add_disk(gd);
1096 err = 0;
1097 goto out;
1099 error:
1100 if (gd)
1101 del_gendisk(gd);
1102 if (rq)
1103 blk_cleanup_queue(rq);
1105 out:
1106 BTINFO("creation of %u:%u: %d\n", blktap_device_major, tap->minor, err);
1107 return err;
1110 int
1111 blktap_device_init(int *maj)
1113 int major;
1115 /* Dynamically allocate a major for this device */
1116 major = register_blkdev(0, "tapdev");
1117 if (major < 0) {
1118 BTERR("Couldn't register blktap device\n");
1119 return -ENOMEM;
1122 blktap_device_major = *maj = major;
1123 BTINFO("blktap device major %d\n", major);
1125 return 0;
1128 void
1129 blktap_device_free(void)
1131 if (blktap_device_major)
1132 if (unregister_blkdev(blktap_device_major, "tapdev"))
1133 BTERR("blktap device unregister failed\n");