ia64/linux-2.6.18-xen.hg

diff drivers/xen/blktap2/device.c @ 878:eba6fe6d8d53

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:23:16 2009 +0100 (2009-05-26)
parents
children a4b49dff3387
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/drivers/xen/blktap2/device.c	Tue May 26 11:23:16 2009 +0100
     1.3 @@ -0,0 +1,1132 @@
     1.4 +#include <linux/fs.h>
     1.5 +#include <linux/blkdev.h>
     1.6 +#include <linux/cdrom.h>
     1.7 +#include <linux/hdreg.h>
     1.8 +#include <linux/module.h>
     1.9 +
    1.10 +#include <scsi/scsi.h>
    1.11 +#include <scsi/scsi_ioctl.h>
    1.12 +
    1.13 +#include <xen/xenbus.h>
    1.14 +#include <xen/interface/io/blkif.h>
    1.15 +
    1.16 +#include "blktap.h"
    1.17 +
    1.18 +#ifdef CONFIG_XEN_BLKDEV_BACKEND
    1.19 +#include "../blkback/blkback-pagemap.h"
    1.20 +#else
    1.21 +struct blkback_pagemap { };
    1.22 +#define blkback_pagemap_read(page) BUG();
    1.23 +#endif
    1.24 +
    1.25 +#if 0
    1.26 +#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
    1.27 +#else
    1.28 +#define DPRINTK_IOCTL(_f, _a...) ((void)0)
    1.29 +#endif
    1.30 +
    1.31 +struct blktap_grant_table {
    1.32 +	int cnt;
    1.33 +	struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
    1.34 +};
    1.35 +
    1.36 +static int blktap_device_major;
    1.37 +
    1.38 +static inline struct blktap *
    1.39 +dev_to_blktap(struct blktap_device *dev)
    1.40 +{
    1.41 +	return container_of(dev, struct blktap, device);
    1.42 +}
    1.43 +
    1.44 +static int
    1.45 +blktap_device_open(struct inode *inode, struct file *filep)
    1.46 +{
    1.47 +	struct blktap *tap;
    1.48 +	struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
    1.49 +
    1.50 +	if (!dev)
    1.51 +		return -ENOENT;
    1.52 +
    1.53 +	tap = dev_to_blktap(dev);
    1.54 +	if (!blktap_active(tap) ||
    1.55 +	    test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
    1.56 +		return -ENOENT;
    1.57 +
    1.58 +	dev->users++;
    1.59 +
    1.60 +	return 0;
    1.61 +}
    1.62 +
    1.63 +static int
    1.64 +blktap_device_release(struct inode *inode, struct file *filep)
    1.65 +{
    1.66 +	struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
    1.67 +	struct blktap *tap = dev_to_blktap(dev);
    1.68 +
    1.69 +	dev->users--;
    1.70 +	if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
    1.71 +		blktap_device_destroy(tap);
    1.72 +
    1.73 +	return 0;
    1.74 +}
    1.75 +
    1.76 +static int
    1.77 +blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
    1.78 +{
    1.79 +	/* We don't have real geometry info, but let's at least return
    1.80 +	   values consistent with the size of the device */
    1.81 +	sector_t nsect = get_capacity(bd->bd_disk);
    1.82 +	sector_t cylinders = nsect;
    1.83 +
    1.84 +	hg->heads = 0xff;
    1.85 +	hg->sectors = 0x3f;
    1.86 +	sector_div(cylinders, hg->heads * hg->sectors);
    1.87 +	hg->cylinders = cylinders;
    1.88 +	if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
    1.89 +		hg->cylinders = 0xffff;
    1.90 +	return 0;
    1.91 +}
    1.92 +
    1.93 +static int
    1.94 +blktap_device_ioctl(struct inode *inode, struct file *filep,
    1.95 +		    unsigned command, unsigned long argument)
    1.96 +{
    1.97 +	int i;
    1.98 +
    1.99 +	DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
   1.100 +		      command, (long)argument, inode->i_rdev);
   1.101 +
   1.102 +	switch (command) {
   1.103 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
   1.104 +	case HDIO_GETGEO: {
   1.105 +		struct block_device *bd = inode->i_bdev;
   1.106 +		struct hd_geometry geo;
   1.107 +		int ret;
   1.108 +
   1.109 +                if (!argument)
   1.110 +                        return -EINVAL;
   1.111 +
   1.112 +		geo.start = get_start_sect(bd);
   1.113 +		ret = blktap_device_getgeo(bd, &geo);
   1.114 +		if (ret)
   1.115 +			return ret;
   1.116 +
   1.117 +		if (copy_to_user((struct hd_geometry __user *)argument, &geo,
   1.118 +				 sizeof(geo)))
   1.119 +                        return -EFAULT;
   1.120 +
   1.121 +                return 0;
   1.122 +	}
   1.123 +#endif
   1.124 +	case CDROMMULTISESSION:
   1.125 +		BTDBG("FIXME: support multisession CDs later\n");
   1.126 +		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
   1.127 +			if (put_user(0, (char __user *)(argument + i)))
   1.128 +				return -EFAULT;
   1.129 +		return 0;
   1.130 +
   1.131 +	case SCSI_IOCTL_GET_IDLUN:
   1.132 +		if (!access_ok(VERIFY_WRITE, argument, 
   1.133 +			sizeof(struct scsi_idlun)))
   1.134 +			return -EFAULT;
   1.135 +
   1.136 +		/* return 0 for now. */
   1.137 +		__put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
   1.138 +		__put_user(0, 
   1.139 +			&((struct scsi_idlun __user *)argument)->host_unique_id);
   1.140 +		return 0;
   1.141 +
   1.142 +	default:
   1.143 +		/*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
   1.144 +		  command);*/
   1.145 +		return -EINVAL; /* same return as native Linux */
   1.146 +	}
   1.147 +
   1.148 +	return 0;
   1.149 +}
   1.150 +
   1.151 +static struct block_device_operations blktap_device_file_operations = {
   1.152 +	.owner     = THIS_MODULE,
   1.153 +	.open      = blktap_device_open,
   1.154 +	.release   = blktap_device_release,
   1.155 +	.ioctl     = blktap_device_ioctl,
   1.156 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
   1.157 +	.getgeo    = blktap_device_getgeo
   1.158 +#endif
   1.159 +};
   1.160 +
   1.161 +static int
   1.162 +blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page,
   1.163 +		    unsigned long addr, void *data)
   1.164 +{
   1.165 +	pte_t *pte = (pte_t *)data;
   1.166 +
   1.167 +	BTDBG("ptep %p -> %012llx\n", ptep, pte_val(*pte));
   1.168 +	set_pte(ptep, *pte);
   1.169 +	xen_invlpg(addr);
   1.170 +	return 0;
   1.171 +}
   1.172 +
   1.173 +static int
   1.174 +blktap_map_uaddr(struct mm_struct *mm, unsigned long address, pte_t pte)
   1.175 +{
   1.176 +	return apply_to_page_range(mm, address,
   1.177 +				   PAGE_SIZE, blktap_map_uaddr_fn, &pte);
   1.178 +}
   1.179 +
   1.180 +static int
   1.181 +blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page,
   1.182 +		     unsigned long addr, void *data)
   1.183 +{
   1.184 +	struct mm_struct *mm = (struct mm_struct *)data;
   1.185 +
   1.186 +	BTDBG("ptep %p\n", ptep);
   1.187 +	pte_clear(mm, addr, ptep);
   1.188 +	xen_invlpg(addr);
   1.189 +	return 0;
   1.190 +}
   1.191 +
   1.192 +static int
   1.193 +blktap_umap_uaddr(struct mm_struct *mm, unsigned long address)
   1.194 +{
   1.195 +	return apply_to_page_range(mm, address,
   1.196 +				   PAGE_SIZE, blktap_umap_uaddr_fn, mm);
   1.197 +}
   1.198 +
   1.199 +static void
   1.200 +blktap_device_end_dequeued_request(struct blktap_device *dev,
   1.201 +				   struct request *req, int uptodate)
   1.202 +{
   1.203 +	int ret;
   1.204 +
   1.205 +	ret = end_that_request_first(req, uptodate, req->hard_nr_sectors);
   1.206 +	BUG_ON(ret);
   1.207 +
   1.208 +	spin_lock_irq(&dev->lock);
   1.209 +	end_that_request_last(req, uptodate);
   1.210 +	spin_unlock_irq(&dev->lock);
   1.211 +}
   1.212 +
   1.213 +/*
   1.214 + * tap->tap_sem held on entry
   1.215 + */
   1.216 +static void
   1.217 +blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
   1.218 +{
   1.219 +	uint64_t ptep;
   1.220 +	int ret, usr_idx;
   1.221 +	unsigned int i, cnt;
   1.222 +	struct page **map, *page;
   1.223 +	struct blktap_ring *ring;
   1.224 +	struct grant_handle_pair *khandle;
   1.225 +	unsigned long kvaddr, uvaddr, offset;
   1.226 +	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
   1.227 +
   1.228 +	cnt     = 0;
   1.229 +	ring    = &tap->ring;
   1.230 +	usr_idx = request->usr_idx;
   1.231 +	map     = ring->foreign_map.map;
   1.232 +
   1.233 +	if (!ring->vma)
   1.234 +		return;
   1.235 +
   1.236 +	if (xen_feature(XENFEAT_auto_translated_physmap))
   1.237 +		zap_page_range(ring->vma, 
   1.238 +			       MMAP_VADDR(ring->user_vstart, usr_idx, 0),
   1.239 +			       request->nr_pages << PAGE_SHIFT, NULL);
   1.240 +
   1.241 +	for (i = 0; i < request->nr_pages; i++) {
   1.242 +		kvaddr = request_to_kaddr(request, i);
   1.243 +		uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
   1.244 +
   1.245 +		khandle = request->handles + i;
   1.246 +
   1.247 +		if (khandle->kernel != INVALID_GRANT_HANDLE) {
   1.248 +			gnttab_set_unmap_op(&unmap[cnt], kvaddr,
   1.249 +					    GNTMAP_host_map, khandle->kernel);
   1.250 +			cnt++;
   1.251 +			set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
   1.252 +					    INVALID_P2M_ENTRY);
   1.253 +		}
   1.254 +
   1.255 +		if (khandle->user != INVALID_GRANT_HANDLE) {
   1.256 +			BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
   1.257 +			if (create_lookup_pte_addr(ring->vma->vm_mm,
   1.258 +						   uvaddr, &ptep) != 0) {
   1.259 +				BTERR("Couldn't get a pte addr!\n");
   1.260 +				return;
   1.261 +			}
   1.262 +
   1.263 +			gnttab_set_unmap_op(&unmap[cnt], ptep,
   1.264 +					    GNTMAP_host_map
   1.265 +					    | GNTMAP_application_map
   1.266 +					    | GNTMAP_contains_pte,
   1.267 +					    khandle->user);
   1.268 +			cnt++;
   1.269 +		}
   1.270 +
   1.271 +		offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
   1.272 +
   1.273 +		BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, "
   1.274 +		      "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: "
   1.275 +		      "0x%08lx, handle: %u\n", offset, map[offset], request,
   1.276 +		      usr_idx, i, kvaddr, khandle->kernel, uvaddr,
   1.277 +		      khandle->user);
   1.278 +
   1.279 +		page = map[offset];
   1.280 +		if (page) {
   1.281 +			ClearPageReserved(map[offset]);
   1.282 +			if (PageBlkback(page)) {
   1.283 +				ClearPageBlkback(page);
   1.284 +				set_page_private(page, 0);
   1.285 +			}
   1.286 +		}
   1.287 +		map[offset] = NULL;
   1.288 +
   1.289 +		khandle->kernel = INVALID_GRANT_HANDLE;
   1.290 +		khandle->user   = INVALID_GRANT_HANDLE;
   1.291 +	}
   1.292 +
   1.293 +	if (cnt) {
   1.294 +		ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
   1.295 +						unmap, cnt);
   1.296 +		BUG_ON(ret);
   1.297 +	}
   1.298 +
   1.299 +	if (!xen_feature(XENFEAT_auto_translated_physmap))
   1.300 +		zap_page_range(ring->vma, 
   1.301 +			       MMAP_VADDR(ring->user_vstart, usr_idx, 0), 
   1.302 +			       request->nr_pages << PAGE_SHIFT, NULL);
   1.303 +}
   1.304 +
   1.305 +/*
   1.306 + * tap->tap_sem held on entry
   1.307 + */
   1.308 +static void
   1.309 +blktap_unmap(struct blktap *tap, struct blktap_request *request)
   1.310 +{
   1.311 +	int i, usr_idx;
   1.312 +	unsigned long kvaddr;
   1.313 +
   1.314 +	usr_idx = request->usr_idx;
   1.315 +	down_write(&tap->ring.vma->vm_mm->mmap_sem);
   1.316 +
   1.317 +	for (i = 0; i < request->nr_pages; i++) {
   1.318 +		BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, "
   1.319 +		      "uvaddr: 0x%08lx, uhandle: %u\n", request, i,
   1.320 +		      request_to_kaddr(request, i),
   1.321 +		      request->handles[i].kernel,
   1.322 +		      MMAP_VADDR(tap->ring.user_vstart, usr_idx, i),
   1.323 +		      request->handles[i].user);
   1.324 +
   1.325 +		if (request->handles[i].kernel == INVALID_GRANT_HANDLE) {
   1.326 +			kvaddr = request_to_kaddr(request, i);
   1.327 +			blktap_umap_uaddr(&init_mm, kvaddr);
   1.328 +			set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
   1.329 +					    INVALID_P2M_ENTRY);
   1.330 +		}
   1.331 +	}
   1.332 +
   1.333 +	blktap_device_fast_flush(tap, request);
   1.334 +	up_write(&tap->ring.vma->vm_mm->mmap_sem);
   1.335 +}
   1.336 +
   1.337 +/*
   1.338 + * called if the tapdisk process dies unexpectedly.
   1.339 + * fail and release any pending requests and disable queue.
   1.340 + */
   1.341 +void
   1.342 +blktap_device_fail_pending_requests(struct blktap *tap)
   1.343 +{
   1.344 +	int usr_idx;
   1.345 +	struct request *req;
   1.346 +	struct blktap_device *dev;
   1.347 +	struct blktap_request *request;
   1.348 +
   1.349 +	if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
   1.350 +		return;
   1.351 +
   1.352 +	down_write(&tap->tap_sem);
   1.353 +
   1.354 +	dev = &tap->device;
   1.355 +	for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
   1.356 +		request = tap->pending_requests[usr_idx];
   1.357 +		if (!request || request->status != BLKTAP_REQUEST_PENDING)
   1.358 +			continue;
   1.359 +
   1.360 +		BTERR("%u:%u: failing pending %s of %d pages\n",
   1.361 +		      blktap_device_major, tap->minor,
   1.362 +		      (request->operation == BLKIF_OP_READ ?
   1.363 +		       "read" : "write"), request->nr_pages);
   1.364 +
   1.365 +		blktap_unmap(tap, request);
   1.366 +		req = (struct request *)(unsigned long)request->id;
   1.367 +		blktap_device_end_dequeued_request(dev, req, 0);
   1.368 +		blktap_request_free(tap, request);
   1.369 +	}
   1.370 +
   1.371 +	up_write(&tap->tap_sem);
   1.372 +
   1.373 +	spin_lock_irq(&dev->lock);
   1.374 +
   1.375 +	/* fail any future requests */
   1.376 +	dev->gd->queue->queuedata = NULL;
   1.377 +	blk_start_queue(dev->gd->queue);
   1.378 +
   1.379 +	spin_unlock_irq(&dev->lock);
   1.380 +}
   1.381 +
   1.382 +/*
   1.383 + * tap->tap_sem held on entry
   1.384 + */
   1.385 +void
   1.386 +blktap_device_finish_request(struct blktap *tap,
   1.387 +			     blkif_response_t *res,
   1.388 +			     struct blktap_request *request)
   1.389 +{
   1.390 +	int uptodate;
   1.391 +	struct request *req;
   1.392 +	struct blktap_device *dev;
   1.393 +
   1.394 +	dev = &tap->device;
   1.395 +
   1.396 +	blktap_unmap(tap, request);
   1.397 +
   1.398 +	req = (struct request *)(unsigned long)request->id;
   1.399 +	uptodate = (res->status == BLKIF_RSP_OKAY);
   1.400 +
   1.401 +	BTDBG("req %p res status %d operation %d/%d id %lld\n", req,
   1.402 +		res->status, res->operation, request->operation, res->id);
   1.403 +
   1.404 +	switch (request->operation) {
   1.405 +	case BLKIF_OP_READ:
   1.406 +	case BLKIF_OP_WRITE:
   1.407 +		if (unlikely(res->status != BLKIF_RSP_OKAY))
   1.408 +			BTERR("Bad return from device data "
   1.409 +				"request: %x\n", res->status);
   1.410 +		blktap_device_end_dequeued_request(dev, req, uptodate);
   1.411 +		break;
   1.412 +	default:
   1.413 +		BUG();
   1.414 +	}
   1.415 +
   1.416 +	blktap_request_free(tap, request);
   1.417 +}
   1.418 +
   1.419 +static int
   1.420 +blktap_prep_foreign(struct blktap *tap,
   1.421 +		    struct blktap_request *request,
   1.422 +		    blkif_request_t *blkif_req,
   1.423 +		    unsigned int seg, struct page *page,
   1.424 +		    struct blktap_grant_table *table)
   1.425 +{
   1.426 +	uint64_t ptep;
   1.427 +	uint32_t flags;
   1.428 +	struct page *tap_page;
   1.429 +	struct blktap_ring *ring;
   1.430 +	struct blkback_pagemap map;
   1.431 +	unsigned long uvaddr, kvaddr;
   1.432 +
   1.433 +	ring = &tap->ring;
   1.434 +	map  = blkback_pagemap_read(page);
   1.435 +	blkif_req->seg[seg].gref = map.gref;
   1.436 +
   1.437 +	uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
   1.438 +	kvaddr = request_to_kaddr(request, seg);
   1.439 +	flags  = GNTMAP_host_map |
   1.440 +		(request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0);
   1.441 +
   1.442 +	gnttab_set_map_op(&table->grants[table->cnt],
   1.443 +			  kvaddr, flags, map.gref, map.domid);
   1.444 +	table->cnt++;
   1.445 +
   1.446 +	/* enable chained tap devices */
   1.447 +	tap_page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
   1.448 +	set_page_private(tap_page, page_private(page));
   1.449 +	SetPageBlkback(tap_page);
   1.450 +
   1.451 +	if (xen_feature(XENFEAT_auto_translated_physmap))
   1.452 +		return 0;
   1.453 +
   1.454 +	if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) {
   1.455 +		BTERR("couldn't get a pte addr!\n");
   1.456 +		return -1;
   1.457 +	}
   1.458 +
   1.459 +	flags |= GNTMAP_application_map | GNTMAP_contains_pte;
   1.460 +	gnttab_set_map_op(&table->grants[table->cnt],
   1.461 +			  ptep, flags, map.gref, map.domid);
   1.462 +	table->cnt++;
   1.463 +
   1.464 +	return 0;
   1.465 +}
   1.466 +
   1.467 +static int
   1.468 +blktap_map_foreign(struct blktap *tap,
   1.469 +		   struct blktap_request *request,
   1.470 +		   blkif_request_t *blkif_req,
   1.471 +		   struct blktap_grant_table *table)
   1.472 +{
   1.473 +	struct page *page;
   1.474 +	int i, grant, err, usr_idx;
   1.475 +	struct blktap_ring *ring;
   1.476 +	unsigned long uvaddr, kvaddr, foreign_mfn;
   1.477 +
   1.478 +	if (!table->cnt)
   1.479 +		return 0;
   1.480 +
   1.481 +	err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
   1.482 +					table->grants, table->cnt);
   1.483 +	BUG_ON(err);
   1.484 +
   1.485 +	grant   = 0;
   1.486 +	usr_idx = request->usr_idx;
   1.487 +	ring    = &tap->ring;
   1.488 +
   1.489 +	for (i = 0; i < request->nr_pages; i++) {
   1.490 +		if (!blkif_req->seg[i].gref)
   1.491 +			continue;
   1.492 +
   1.493 +		uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
   1.494 +		kvaddr = request_to_kaddr(request, i);
   1.495 +
   1.496 +		if (unlikely(table->grants[grant].status)) {
   1.497 +			BTERR("invalid kernel buffer: could not remap it\n");
   1.498 +			err |= 1;
   1.499 +			table->grants[grant].handle = INVALID_GRANT_HANDLE;
   1.500 +		}
   1.501 +
   1.502 +		request->handles[i].kernel = table->grants[grant].handle;
   1.503 +		foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT;
   1.504 +		grant++;
   1.505 +
   1.506 +		if (xen_feature(XENFEAT_auto_translated_physmap))
   1.507 +			goto done;
   1.508 +
   1.509 +		if (unlikely(table->grants[grant].status)) {
   1.510 +			BTERR("invalid user buffer: could not remap it\n");
   1.511 +			err |= 1;
   1.512 +			table->grants[grant].handle = INVALID_GRANT_HANDLE;
   1.513 +		}
   1.514 +
   1.515 +		request->handles[i].user = table->grants[grant].handle;
   1.516 +		grant++;
   1.517 +
   1.518 +	done:
   1.519 +		if (err)
   1.520 +			continue;
   1.521 +
   1.522 +		page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
   1.523 +
   1.524 +		if (!xen_feature(XENFEAT_auto_translated_physmap))
   1.525 +			set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
   1.526 +					    FOREIGN_FRAME(foreign_mfn));
   1.527 +		else if (vm_insert_page(ring->vma, uvaddr, page))
   1.528 +			err |= 1;
   1.529 +
   1.530 +		BTDBG("pending_req: %p, seg: %d, page: %p, "
   1.531 +		      "kvaddr: 0x%08lx, khandle: %u, uvaddr: 0x%08lx, "
   1.532 +		      "uhandle: %u\n", request, i, page,
   1.533 +		      kvaddr, request->handles[i].kernel,		       
   1.534 +		      uvaddr, request->handles[i].user);
   1.535 +	}
   1.536 +
   1.537 +	return err;
   1.538 +}
   1.539 +
   1.540 +static void
   1.541 +blktap_map(struct blktap *tap,
   1.542 +	   struct blktap_request *request,
   1.543 +	   unsigned int seg, struct page *page)
   1.544 +{
   1.545 +	pte_t pte;
   1.546 +	int usr_idx;
   1.547 +	struct blktap_ring *ring;
   1.548 +	unsigned long uvaddr, kvaddr;
   1.549 +
   1.550 +	ring    = &tap->ring;
   1.551 +	usr_idx = request->usr_idx;
   1.552 +	uvaddr  = MMAP_VADDR(ring->user_vstart, usr_idx, seg);
   1.553 +	kvaddr  = request_to_kaddr(request, seg);
   1.554 +
   1.555 +	pte = mk_pte(page, ring->vma->vm_page_prot);
   1.556 +	blktap_map_uaddr(ring->vma->vm_mm, uvaddr, pte_mkwrite(pte));
   1.557 +	blktap_map_uaddr(&init_mm, kvaddr, mk_pte(page, PAGE_KERNEL));
   1.558 +
   1.559 +	set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte));
   1.560 +	request->handles[seg].kernel = INVALID_GRANT_HANDLE;
   1.561 +	request->handles[seg].user   = INVALID_GRANT_HANDLE;
   1.562 +
   1.563 +	BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, "
   1.564 +	      "uvaddr: 0x%08lx\n", request, seg, page, kvaddr,
   1.565 +	      uvaddr);
   1.566 +}
   1.567 +
   1.568 +static int
   1.569 +blktap_device_process_request(struct blktap *tap,
   1.570 +			      struct blktap_request *request,
   1.571 +			      struct request *req)
   1.572 +{
   1.573 +	struct bio *bio;
   1.574 +	struct page *page;
   1.575 +	struct bio_vec *bvec;
   1.576 +	int idx, usr_idx, err;
   1.577 +	struct blktap_ring *ring;
   1.578 +	struct blktap_grant_table table;
   1.579 +	unsigned int fsect, lsect, nr_sects;
   1.580 +	unsigned long offset, uvaddr, kvaddr;
   1.581 +	struct blkif_request blkif_req, *target;
   1.582 +
   1.583 +	err = -1;
   1.584 +	memset(&table, 0, sizeof(table));
   1.585 +
   1.586 +	if (!blktap_active(tap))
   1.587 +		goto out;
   1.588 +
   1.589 +	ring    = &tap->ring;
   1.590 +	usr_idx = request->usr_idx;
   1.591 +	blkif_req.id = usr_idx;
   1.592 +	blkif_req.sector_number = (blkif_sector_t)req->sector;
   1.593 +	blkif_req.handle = 0;
   1.594 +	blkif_req.operation = rq_data_dir(req) ?
   1.595 +		BLKIF_OP_WRITE : BLKIF_OP_READ;
   1.596 +
   1.597 +	request->id        = (unsigned long)req;
   1.598 +	request->operation = blkif_req.operation;
   1.599 +	request->status    = BLKTAP_REQUEST_PENDING;
   1.600 +	do_gettimeofday(&request->time);
   1.601 +
   1.602 +	nr_sects = 0;
   1.603 +	request->nr_pages = 0;
   1.604 +	blkif_req.nr_segments = 0;
   1.605 +	rq_for_each_bio(bio, req) {
   1.606 +		bio_for_each_segment(bvec, bio, idx) {
   1.607 +			BUG_ON(blkif_req.nr_segments ==
   1.608 +			       BLKIF_MAX_SEGMENTS_PER_REQUEST);
   1.609 +
   1.610 +			fsect     = bvec->bv_offset >> 9;
   1.611 +			lsect     = fsect + (bvec->bv_len >> 9) - 1;
   1.612 +			nr_sects += bvec->bv_len >> 9;
   1.613 +
   1.614 +			blkif_req.seg[blkif_req.nr_segments] =
   1.615 +				(struct blkif_request_segment) {
   1.616 +				.gref       = 0,
   1.617 +				.first_sect = fsect,
   1.618 +				.last_sect  = lsect };
   1.619 +
   1.620 +			if (PageBlkback(bvec->bv_page)) {
   1.621 +				/* foreign page -- use xen */
   1.622 +				if (blktap_prep_foreign(tap,
   1.623 +							request,
   1.624 +							&blkif_req,
   1.625 +							blkif_req.nr_segments,
   1.626 +							bvec->bv_page,
   1.627 +							&table))
   1.628 +					goto out;
   1.629 +			} else {
   1.630 +				/* do it the old fashioned way */
   1.631 +				blktap_map(tap,
   1.632 +					   request,
   1.633 +					   blkif_req.nr_segments,
   1.634 +					   bvec->bv_page);
   1.635 +			}
   1.636 +
   1.637 +			uvaddr = MMAP_VADDR(ring->user_vstart,
   1.638 +					    usr_idx, blkif_req.nr_segments);
   1.639 +			kvaddr = request_to_kaddr(request,
   1.640 +						  blkif_req.nr_segments);
   1.641 +			offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
   1.642 +			page   = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
   1.643 +			ring->foreign_map.map[offset] = page;
   1.644 +			SetPageReserved(page);
   1.645 +
   1.646 +			BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n",
   1.647 +			      uvaddr, page, __pa(kvaddr) >> PAGE_SHIFT);
   1.648 +			BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, "
   1.649 +			      "page: %p, kvaddr: 0x%08lx, uvaddr: 0x%08lx\n",
   1.650 +			      offset, request, blkif_req.nr_segments,
   1.651 +			      page, kvaddr, uvaddr);
   1.652 +
   1.653 +			blkif_req.nr_segments++;
   1.654 +			request->nr_pages++;
   1.655 +		}
   1.656 +	}
   1.657 +
   1.658 +	if (blktap_map_foreign(tap, request, &blkif_req, &table))
   1.659 +		goto out;
   1.660 +
   1.661 +	/* Finally, write the request message to the user ring. */
   1.662 +	target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
   1.663 +	memcpy(target, &blkif_req, sizeof(blkif_req));
   1.664 +	target->id = request->usr_idx;
   1.665 +	wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
   1.666 +	ring->ring.req_prod_pvt++;
   1.667 +
   1.668 +	if (rq_data_dir(req)) {
   1.669 +		tap->stats.st_wr_sect += nr_sects;
   1.670 +		tap->stats.st_wr_req++;
   1.671 +	} else {
   1.672 +		tap->stats.st_rd_sect += nr_sects;
   1.673 +		tap->stats.st_rd_req++;
   1.674 +	}
   1.675 +
   1.676 +	err = 0;
   1.677 +
   1.678 +out:
   1.679 +	if (err)
   1.680 +		blktap_device_fast_flush(tap, request);
   1.681 +	return err;
   1.682 +}
   1.683 +
   1.684 +#ifdef ENABLE_PASSTHROUGH
   1.685 +#define rq_for_each_bio_safe(_bio, _tmp, _req)				\
   1.686 +	if ((_req)->bio)						\
   1.687 +		for (_bio = (_req)->bio;				\
   1.688 +		     _bio && ((_tmp = _bio->bi_next) || 1);		\
   1.689 +		     _bio = _tmp)
   1.690 +
   1.691 +static void
   1.692 +blktap_device_forward_request(struct blktap *tap, struct request *req)
   1.693 +{
   1.694 +	struct bio *bio, *tmp;
   1.695 +	struct blktap_device *dev;
   1.696 +
   1.697 +	dev = &tap->device;
   1.698 +
   1.699 +	rq_for_each_bio_safe(bio, tmp, req) {
   1.700 +		bio->bi_bdev = dev->bdev;
   1.701 +		submit_bio(bio->bi_rw, bio);
   1.702 +	}
   1.703 +}
   1.704 +
   1.705 +static void
   1.706 +blktap_device_close_bdev(struct blktap *tap)
   1.707 +{
   1.708 +	struct blktap_device *dev;
   1.709 +
   1.710 +	dev = &tap->device;
   1.711 +
   1.712 +	if (dev->bdev)
   1.713 +		blkdev_put(dev->bdev);
   1.714 +
   1.715 +	dev->bdev = NULL;
   1.716 +	clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
   1.717 +}
   1.718 +
   1.719 +static int
   1.720 +blktap_device_open_bdev(struct blktap *tap, u32 pdev)
   1.721 +{
   1.722 +	struct block_device *bdev;
   1.723 +	struct blktap_device *dev;
   1.724 +
   1.725 +	dev = &tap->device;
   1.726 +
   1.727 +	bdev = open_by_devnum(pdev, FMODE_WRITE);
   1.728 +	if (IS_ERR(bdev)) {
   1.729 +		BTERR("opening device %x:%x failed: %ld\n",
   1.730 +		      MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
   1.731 +		return PTR_ERR(bdev);
   1.732 +	}
   1.733 +
   1.734 +	if (!bdev->bd_disk) {
   1.735 +		BTERR("device %x:%x doesn't exist\n",
   1.736 +		      MAJOR(pdev), MINOR(pdev));
   1.737 +		blkdev_put(dev->bdev);
   1.738 +		return -ENOENT;
   1.739 +	}
   1.740 +
   1.741 +	dev->bdev = bdev;
   1.742 +	set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
   1.743 +
   1.744 +	/* TODO: readjust queue parameters */
   1.745 +
   1.746 +	BTINFO("set device %d to passthrough on %x:%x\n",
   1.747 +	       tap->minor, MAJOR(pdev), MINOR(pdev));
   1.748 +
   1.749 +	return 0;
   1.750 +}
   1.751 +
   1.752 +int
   1.753 +blktap_device_enable_passthrough(struct blktap *tap,
   1.754 +				 unsigned major, unsigned minor)
   1.755 +{
   1.756 +	u32 pdev;
   1.757 +	struct blktap_device *dev;
   1.758 +
   1.759 +	dev  = &tap->device;
   1.760 +	pdev = MKDEV(major, minor);
   1.761 +
   1.762 +	if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
   1.763 +		return -EINVAL;
   1.764 +
   1.765 +	if (dev->bdev) {
   1.766 +		if (pdev)
   1.767 +			return -EINVAL;
   1.768 +		blktap_device_close_bdev(tap);
   1.769 +		return 0;
   1.770 +	}
   1.771 +
   1.772 +	return blktap_device_open_bdev(tap, pdev);
   1.773 +}
   1.774 +#endif
   1.775 +
   1.776 +/*
   1.777 + * dev->lock held on entry
   1.778 + */
   1.779 +static void
   1.780 +blktap_device_run_queue(struct blktap *tap)
   1.781 +{
   1.782 +	int queued, err;
   1.783 +	request_queue_t *rq;
   1.784 +	struct request *req;
   1.785 +	struct blktap_ring *ring;
   1.786 +	struct blktap_device *dev;
   1.787 +	struct blktap_request *request;
   1.788 +
   1.789 +	queued = 0;
   1.790 +	ring   = &tap->ring;
   1.791 +	dev    = &tap->device;
   1.792 +	rq     = dev->gd->queue;
   1.793 +
   1.794 +	BTDBG("running queue for %d\n", tap->minor);
   1.795 +
   1.796 +	while ((req = elv_next_request(rq)) != NULL) {
   1.797 +		if (!blk_fs_request(req)) {
   1.798 +			end_request(req, 0);
   1.799 +			continue;
   1.800 +		}
   1.801 +
   1.802 +		if (blk_barrier_rq(req)) {
   1.803 +			end_request(req, 0);
   1.804 +			continue;
   1.805 +		}
   1.806 +
   1.807 +#ifdef ENABLE_PASSTHROUGH
   1.808 +		if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
   1.809 +			blkdev_dequeue_request(req);
   1.810 +			blktap_device_forward_request(tap, req);
   1.811 +			continue;
   1.812 +		}
   1.813 +#endif
   1.814 +
   1.815 +		if (RING_FULL(&ring->ring)) {
   1.816 +		wait:
   1.817 +			/* Avoid pointless unplugs. */
   1.818 +			blk_stop_queue(rq);
   1.819 +			blktap_defer(tap);
   1.820 +			break;
   1.821 +		}
   1.822 +
   1.823 +		request = blktap_request_allocate(tap);
   1.824 +		if (!request) {
   1.825 +			tap->stats.st_oo_req++;
   1.826 +			goto wait;
   1.827 +		}
   1.828 +
   1.829 +		BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
   1.830 +		      "buffer:%p [%s], pending: %p\n", req, tap->minor,
   1.831 +		      req->cmd, req->sector, req->current_nr_sectors,
   1.832 +		      req->nr_sectors, req->buffer,
   1.833 +		      rq_data_dir(req) ? "write" : "read", request);
   1.834 +
   1.835 +		blkdev_dequeue_request(req);
   1.836 +
   1.837 +		spin_unlock_irq(&dev->lock);
   1.838 +		down_read(&tap->tap_sem);
   1.839 +
   1.840 +		err = blktap_device_process_request(tap, request, req);
   1.841 +		if (!err)
   1.842 +			queued++;
   1.843 +		else {
   1.844 +			blktap_device_end_dequeued_request(dev, req, 0);
   1.845 +			blktap_request_free(tap, request);
   1.846 +		}
   1.847 +
   1.848 +		up_read(&tap->tap_sem);
   1.849 +		spin_lock_irq(&dev->lock);
   1.850 +	}
   1.851 +
   1.852 +	if (queued)
   1.853 +		blktap_ring_kick_user(tap);
   1.854 +}
   1.855 +
   1.856 +/*
   1.857 + * dev->lock held on entry
   1.858 + */
   1.859 +static void
   1.860 +blktap_device_do_request(request_queue_t *rq)
   1.861 +{
   1.862 +	struct request *req;
   1.863 +	struct blktap *tap;
   1.864 +	struct blktap_device *dev;
   1.865 +
   1.866 +	dev = rq->queuedata;
   1.867 +	if (!dev)
   1.868 +		goto fail;
   1.869 +
   1.870 +	tap = dev_to_blktap(dev);
   1.871 +	if (!blktap_active(tap))
   1.872 +		goto fail;
   1.873 +
   1.874 +	if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
   1.875 +	    test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
   1.876 +		blktap_defer(tap);
   1.877 +		return;
   1.878 +	}
   1.879 +
   1.880 +	blktap_device_run_queue(tap);
   1.881 +	return;
   1.882 +
   1.883 +fail:
   1.884 +	while ((req = elv_next_request(rq))) {
   1.885 +		BTERR("device closed: failing secs %llu - %llu\n",
   1.886 +		      req->sector, req->sector + req->nr_sectors);
   1.887 +		end_request(req, 0);
   1.888 +	}
   1.889 +}
   1.890 +
   1.891 +void
   1.892 +blktap_device_restart(struct blktap *tap)
   1.893 +{
   1.894 +	struct blktap_device *dev;
   1.895 +
   1.896 +	dev = &tap->device;
   1.897 +	if (!dev->gd || !dev->gd->queue)
   1.898 +		return;
   1.899 +
   1.900 +	if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) {
   1.901 +		blktap_defer(tap);
   1.902 +		return;
   1.903 +	}
   1.904 +
   1.905 +	if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
   1.906 +	    test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
   1.907 +		blktap_defer(tap);
   1.908 +		return;
   1.909 +	}
   1.910 +
   1.911 +	spin_lock_irq(&dev->lock);
   1.912 +
   1.913 +	/* Re-enable calldowns. */
   1.914 +	if (blk_queue_stopped(dev->gd->queue))
   1.915 +		blk_start_queue(dev->gd->queue);
   1.916 +
   1.917 +	/* Kick things off immediately. */
   1.918 +	blktap_device_do_request(dev->gd->queue);
   1.919 +
   1.920 +	spin_unlock_irq(&dev->lock);
   1.921 +}
   1.922 +
   1.923 +static void
   1.924 +blktap_device_configure(struct blktap *tap)
   1.925 +{
   1.926 +	struct request_queue *rq;
   1.927 +	struct blktap_device *dev = &tap->device;
   1.928 +
   1.929 +	if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !dev->gd)
   1.930 +		return;
   1.931 +
   1.932 +	dev = &tap->device;
   1.933 +	rq  = dev->gd->queue;
   1.934 +
   1.935 +	spin_lock_irq(&dev->lock);
   1.936 +
   1.937 +	set_capacity(dev->gd, tap->params.capacity);
   1.938 +
   1.939 +	/* Hard sector size and max sectors impersonate the equiv. hardware. */
   1.940 +	blk_queue_hardsect_size(rq, tap->params.sector_size);
   1.941 +	blk_queue_max_sectors(rq, 512);
   1.942 +
   1.943 +	/* Each segment in a request is up to an aligned page in size. */
   1.944 +	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
   1.945 +	blk_queue_max_segment_size(rq, PAGE_SIZE);
   1.946 +
   1.947 +	/* Ensure a merged request will fit in a single I/O ring slot. */
   1.948 +	blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
   1.949 +	blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
   1.950 +
   1.951 +	/* Make sure buffer addresses are sector-aligned. */
   1.952 +	blk_queue_dma_alignment(rq, 511);
   1.953 +
   1.954 +	spin_unlock_irq(&dev->lock);
   1.955 +}
   1.956 +
   1.957 +int
   1.958 +blktap_device_resume(struct blktap *tap)
   1.959 +{
   1.960 +	int err;
   1.961 +
   1.962 +	if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
   1.963 +		return -ENODEV;
   1.964 +
   1.965 +	if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
   1.966 +		return 0;
   1.967 +
   1.968 +	err = blktap_ring_resume(tap);
   1.969 +	if (err)
   1.970 +		return err;
   1.971 +
   1.972 +	/* device size may have changed */
   1.973 +	blktap_device_configure(tap);
   1.974 +
   1.975 +	BTDBG("restarting device\n");
   1.976 +	blktap_device_restart(tap);
   1.977 +
   1.978 +	return 0;
   1.979 +}
   1.980 +
   1.981 +int
   1.982 +blktap_device_pause(struct blktap *tap)
   1.983 +{
   1.984 +	unsigned long flags;
   1.985 +	struct blktap_device *dev = &tap->device;
   1.986 +
   1.987 +	if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
   1.988 +		return -ENODEV;
   1.989 +
   1.990 +	if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
   1.991 +		return 0;
   1.992 +
   1.993 +	spin_lock_irqsave(&dev->lock, flags);
   1.994 +
   1.995 +	blk_stop_queue(dev->gd->queue);
   1.996 +	set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
   1.997 +
   1.998 +	spin_unlock_irqrestore(&dev->lock, flags);
   1.999 +
  1.1000 +	return blktap_ring_pause(tap);
  1.1001 +}
  1.1002 +
  1.1003 +int
  1.1004 +blktap_device_destroy(struct blktap *tap)
  1.1005 +{
  1.1006 +	struct blktap_device *dev = &tap->device;
  1.1007 +
  1.1008 +	if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
  1.1009 +		return 0;
  1.1010 +
  1.1011 +	BTINFO("destroy device %d users %d\n", tap->minor, dev->users);
  1.1012 +
  1.1013 +	if (dev->users)
  1.1014 +		return -EBUSY;
  1.1015 +
  1.1016 +	spin_lock_irq(&dev->lock);
  1.1017 +	/* No more blktap_device_do_request(). */
  1.1018 +	blk_stop_queue(dev->gd->queue);
  1.1019 +	clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
  1.1020 +	spin_unlock_irq(&dev->lock);
  1.1021 +
  1.1022 +#ifdef ENABLE_PASSTHROUGH
  1.1023 +	if (dev->bdev)
  1.1024 +		blktap_device_close_bdev(tap);
  1.1025 +#endif
  1.1026 +
  1.1027 +	del_gendisk(dev->gd);
  1.1028 +	put_disk(dev->gd);
  1.1029 +	blk_cleanup_queue(dev->gd->queue);
  1.1030 +
  1.1031 +	dev->gd = NULL;
  1.1032 +
  1.1033 +	wake_up(&tap->wq);
  1.1034 +
  1.1035 +	return 0;
  1.1036 +}
  1.1037 +
  1.1038 +int
  1.1039 +blktap_device_create(struct blktap *tap)
  1.1040 +{
  1.1041 +	int minor, err;
  1.1042 +	struct gendisk *gd;
  1.1043 +	struct request_queue *rq;
  1.1044 +	struct blktap_device *dev;
  1.1045 +
  1.1046 +	gd    = NULL;
  1.1047 +	rq    = NULL;
  1.1048 +	dev   = &tap->device;
  1.1049 +	minor = tap->minor;
  1.1050 +
  1.1051 +	if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
  1.1052 +		return -EEXIST;
  1.1053 +
  1.1054 +	if (blktap_validate_params(tap, &tap->params))
  1.1055 +		return -EINVAL;
  1.1056 +
  1.1057 +	BTINFO("minor %d sectors %Lu sector-size %lu\n",
  1.1058 +	       minor, tap->params.capacity, tap->params.sector_size);
  1.1059 +
  1.1060 +	err = -ENODEV;
  1.1061 +
  1.1062 +	gd = alloc_disk(1);
  1.1063 +	if (!gd)
  1.1064 +		goto error;
  1.1065 +
  1.1066 +	if (minor < 26)
  1.1067 +		sprintf(gd->disk_name, "tapdev%c", 'a' + minor);
  1.1068 +	else
  1.1069 +		sprintf(gd->disk_name, "tapdev%c%c",
  1.1070 +			'a' + ((minor / 26) - 1), 'a' + (minor % 26));
  1.1071 +
  1.1072 +	gd->major = blktap_device_major;
  1.1073 +	gd->first_minor = minor;
  1.1074 +	gd->fops = &blktap_device_file_operations;
  1.1075 +	gd->private_data = dev;
  1.1076 +
  1.1077 +	spin_lock_init(&dev->lock);
  1.1078 +	rq = blk_init_queue(blktap_device_do_request, &dev->lock);
  1.1079 +	if (!rq)
  1.1080 +		goto error;
  1.1081 +
  1.1082 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
  1.1083 +	elevator_init(rq, "noop");
  1.1084 +#else
  1.1085 +	elevator_init(rq, &elevator_noop);
  1.1086 +#endif
  1.1087 +
  1.1088 +	gd->queue     = rq;
  1.1089 +	rq->queuedata = dev;
  1.1090 +	dev->gd       = gd;
  1.1091 +
  1.1092 +	set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
  1.1093 +	blktap_device_configure(tap);
  1.1094 +
  1.1095 +	add_disk(gd);
  1.1096 +
  1.1097 +	err = 0;
  1.1098 +	goto out;
  1.1099 +
  1.1100 + error:
  1.1101 +	if (gd)
  1.1102 +		del_gendisk(gd);
  1.1103 +	if (rq)
  1.1104 +		blk_cleanup_queue(rq);
  1.1105 +
  1.1106 + out:
  1.1107 +	BTINFO("creation of %u:%u: %d\n", blktap_device_major, tap->minor, err);
  1.1108 +	return err;
  1.1109 +}
  1.1110 +
  1.1111 +int
  1.1112 +blktap_device_init(int *maj)
  1.1113 +{
  1.1114 +	int major;
  1.1115 +
  1.1116 +	/* Dynamically allocate a major for this device */
  1.1117 +	major = register_blkdev(0, "tapdev");
  1.1118 +	if (major < 0) {
  1.1119 +		BTERR("Couldn't register blktap device\n");
  1.1120 +		return -ENOMEM;
  1.1121 +	}	
  1.1122 +
  1.1123 +	blktap_device_major = *maj = major;
  1.1124 +	BTINFO("blktap device major %d\n", major);
  1.1125 +
  1.1126 +	return 0;
  1.1127 +}
  1.1128 +
  1.1129 +void
  1.1130 +blktap_device_free(void)
  1.1131 +{
  1.1132 +	if (blktap_device_major)
  1.1133 +		if (unregister_blkdev(blktap_device_major, "tapdev"))
  1.1134 +			BTERR("blktap device unregister failed\n");
  1.1135 +}