ia64/linux-2.6.18-xen.hg

annotate drivers/xen/blktap2/device.c @ 878:eba6fe6d8d53

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:23:16 2009 +0100 (2009-05-26)
parents
children a4b49dff3387
rev   line source
keir@878 1 #include <linux/fs.h>
keir@878 2 #include <linux/blkdev.h>
keir@878 3 #include <linux/cdrom.h>
keir@878 4 #include <linux/hdreg.h>
keir@878 5 #include <linux/module.h>
keir@878 6
keir@878 7 #include <scsi/scsi.h>
keir@878 8 #include <scsi/scsi_ioctl.h>
keir@878 9
keir@878 10 #include <xen/xenbus.h>
keir@878 11 #include <xen/interface/io/blkif.h>
keir@878 12
keir@878 13 #include "blktap.h"
keir@878 14
keir@878 15 #ifdef CONFIG_XEN_BLKDEV_BACKEND
keir@878 16 #include "../blkback/blkback-pagemap.h"
keir@878 17 #else
keir@878 18 struct blkback_pagemap { };
keir@878 19 #define blkback_pagemap_read(page) BUG();
keir@878 20 #endif
keir@878 21
keir@878 22 #if 0
keir@878 23 #define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
keir@878 24 #else
keir@878 25 #define DPRINTK_IOCTL(_f, _a...) ((void)0)
keir@878 26 #endif
keir@878 27
keir@878 28 struct blktap_grant_table {
keir@878 29 int cnt;
keir@878 30 struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
keir@878 31 };
keir@878 32
keir@878 33 static int blktap_device_major;
keir@878 34
keir@878 35 static inline struct blktap *
keir@878 36 dev_to_blktap(struct blktap_device *dev)
keir@878 37 {
keir@878 38 return container_of(dev, struct blktap, device);
keir@878 39 }
keir@878 40
keir@878 41 static int
keir@878 42 blktap_device_open(struct inode *inode, struct file *filep)
keir@878 43 {
keir@878 44 struct blktap *tap;
keir@878 45 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
keir@878 46
keir@878 47 if (!dev)
keir@878 48 return -ENOENT;
keir@878 49
keir@878 50 tap = dev_to_blktap(dev);
keir@878 51 if (!blktap_active(tap) ||
keir@878 52 test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
keir@878 53 return -ENOENT;
keir@878 54
keir@878 55 dev->users++;
keir@878 56
keir@878 57 return 0;
keir@878 58 }
keir@878 59
keir@878 60 static int
keir@878 61 blktap_device_release(struct inode *inode, struct file *filep)
keir@878 62 {
keir@878 63 struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
keir@878 64 struct blktap *tap = dev_to_blktap(dev);
keir@878 65
keir@878 66 dev->users--;
keir@878 67 if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
keir@878 68 blktap_device_destroy(tap);
keir@878 69
keir@878 70 return 0;
keir@878 71 }
keir@878 72
keir@878 73 static int
keir@878 74 blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
keir@878 75 {
keir@878 76 /* We don't have real geometry info, but let's at least return
keir@878 77 values consistent with the size of the device */
keir@878 78 sector_t nsect = get_capacity(bd->bd_disk);
keir@878 79 sector_t cylinders = nsect;
keir@878 80
keir@878 81 hg->heads = 0xff;
keir@878 82 hg->sectors = 0x3f;
keir@878 83 sector_div(cylinders, hg->heads * hg->sectors);
keir@878 84 hg->cylinders = cylinders;
keir@878 85 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
keir@878 86 hg->cylinders = 0xffff;
keir@878 87 return 0;
keir@878 88 }
keir@878 89
keir@878 90 static int
keir@878 91 blktap_device_ioctl(struct inode *inode, struct file *filep,
keir@878 92 unsigned command, unsigned long argument)
keir@878 93 {
keir@878 94 int i;
keir@878 95
keir@878 96 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
keir@878 97 command, (long)argument, inode->i_rdev);
keir@878 98
keir@878 99 switch (command) {
keir@878 100 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
keir@878 101 case HDIO_GETGEO: {
keir@878 102 struct block_device *bd = inode->i_bdev;
keir@878 103 struct hd_geometry geo;
keir@878 104 int ret;
keir@878 105
keir@878 106 if (!argument)
keir@878 107 return -EINVAL;
keir@878 108
keir@878 109 geo.start = get_start_sect(bd);
keir@878 110 ret = blktap_device_getgeo(bd, &geo);
keir@878 111 if (ret)
keir@878 112 return ret;
keir@878 113
keir@878 114 if (copy_to_user((struct hd_geometry __user *)argument, &geo,
keir@878 115 sizeof(geo)))
keir@878 116 return -EFAULT;
keir@878 117
keir@878 118 return 0;
keir@878 119 }
keir@878 120 #endif
keir@878 121 case CDROMMULTISESSION:
keir@878 122 BTDBG("FIXME: support multisession CDs later\n");
keir@878 123 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
keir@878 124 if (put_user(0, (char __user *)(argument + i)))
keir@878 125 return -EFAULT;
keir@878 126 return 0;
keir@878 127
keir@878 128 case SCSI_IOCTL_GET_IDLUN:
keir@878 129 if (!access_ok(VERIFY_WRITE, argument,
keir@878 130 sizeof(struct scsi_idlun)))
keir@878 131 return -EFAULT;
keir@878 132
keir@878 133 /* return 0 for now. */
keir@878 134 __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
keir@878 135 __put_user(0,
keir@878 136 &((struct scsi_idlun __user *)argument)->host_unique_id);
keir@878 137 return 0;
keir@878 138
keir@878 139 default:
keir@878 140 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
keir@878 141 command);*/
keir@878 142 return -EINVAL; /* same return as native Linux */
keir@878 143 }
keir@878 144
keir@878 145 return 0;
keir@878 146 }
keir@878 147
keir@878 148 static struct block_device_operations blktap_device_file_operations = {
keir@878 149 .owner = THIS_MODULE,
keir@878 150 .open = blktap_device_open,
keir@878 151 .release = blktap_device_release,
keir@878 152 .ioctl = blktap_device_ioctl,
keir@878 153 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
keir@878 154 .getgeo = blktap_device_getgeo
keir@878 155 #endif
keir@878 156 };
keir@878 157
keir@878 158 static int
keir@878 159 blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page,
keir@878 160 unsigned long addr, void *data)
keir@878 161 {
keir@878 162 pte_t *pte = (pte_t *)data;
keir@878 163
keir@878 164 BTDBG("ptep %p -> %012llx\n", ptep, pte_val(*pte));
keir@878 165 set_pte(ptep, *pte);
keir@878 166 xen_invlpg(addr);
keir@878 167 return 0;
keir@878 168 }
keir@878 169
keir@878 170 static int
keir@878 171 blktap_map_uaddr(struct mm_struct *mm, unsigned long address, pte_t pte)
keir@878 172 {
keir@878 173 return apply_to_page_range(mm, address,
keir@878 174 PAGE_SIZE, blktap_map_uaddr_fn, &pte);
keir@878 175 }
keir@878 176
keir@878 177 static int
keir@878 178 blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page,
keir@878 179 unsigned long addr, void *data)
keir@878 180 {
keir@878 181 struct mm_struct *mm = (struct mm_struct *)data;
keir@878 182
keir@878 183 BTDBG("ptep %p\n", ptep);
keir@878 184 pte_clear(mm, addr, ptep);
keir@878 185 xen_invlpg(addr);
keir@878 186 return 0;
keir@878 187 }
keir@878 188
keir@878 189 static int
keir@878 190 blktap_umap_uaddr(struct mm_struct *mm, unsigned long address)
keir@878 191 {
keir@878 192 return apply_to_page_range(mm, address,
keir@878 193 PAGE_SIZE, blktap_umap_uaddr_fn, mm);
keir@878 194 }
keir@878 195
keir@878 196 static void
keir@878 197 blktap_device_end_dequeued_request(struct blktap_device *dev,
keir@878 198 struct request *req, int uptodate)
keir@878 199 {
keir@878 200 int ret;
keir@878 201
keir@878 202 ret = end_that_request_first(req, uptodate, req->hard_nr_sectors);
keir@878 203 BUG_ON(ret);
keir@878 204
keir@878 205 spin_lock_irq(&dev->lock);
keir@878 206 end_that_request_last(req, uptodate);
keir@878 207 spin_unlock_irq(&dev->lock);
keir@878 208 }
keir@878 209
keir@878 210 /*
keir@878 211 * tap->tap_sem held on entry
keir@878 212 */
keir@878 213 static void
keir@878 214 blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
keir@878 215 {
keir@878 216 uint64_t ptep;
keir@878 217 int ret, usr_idx;
keir@878 218 unsigned int i, cnt;
keir@878 219 struct page **map, *page;
keir@878 220 struct blktap_ring *ring;
keir@878 221 struct grant_handle_pair *khandle;
keir@878 222 unsigned long kvaddr, uvaddr, offset;
keir@878 223 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
keir@878 224
keir@878 225 cnt = 0;
keir@878 226 ring = &tap->ring;
keir@878 227 usr_idx = request->usr_idx;
keir@878 228 map = ring->foreign_map.map;
keir@878 229
keir@878 230 if (!ring->vma)
keir@878 231 return;
keir@878 232
keir@878 233 if (xen_feature(XENFEAT_auto_translated_physmap))
keir@878 234 zap_page_range(ring->vma,
keir@878 235 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
keir@878 236 request->nr_pages << PAGE_SHIFT, NULL);
keir@878 237
keir@878 238 for (i = 0; i < request->nr_pages; i++) {
keir@878 239 kvaddr = request_to_kaddr(request, i);
keir@878 240 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
keir@878 241
keir@878 242 khandle = request->handles + i;
keir@878 243
keir@878 244 if (khandle->kernel != INVALID_GRANT_HANDLE) {
keir@878 245 gnttab_set_unmap_op(&unmap[cnt], kvaddr,
keir@878 246 GNTMAP_host_map, khandle->kernel);
keir@878 247 cnt++;
keir@878 248 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
keir@878 249 INVALID_P2M_ENTRY);
keir@878 250 }
keir@878 251
keir@878 252 if (khandle->user != INVALID_GRANT_HANDLE) {
keir@878 253 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
keir@878 254 if (create_lookup_pte_addr(ring->vma->vm_mm,
keir@878 255 uvaddr, &ptep) != 0) {
keir@878 256 BTERR("Couldn't get a pte addr!\n");
keir@878 257 return;
keir@878 258 }
keir@878 259
keir@878 260 gnttab_set_unmap_op(&unmap[cnt], ptep,
keir@878 261 GNTMAP_host_map
keir@878 262 | GNTMAP_application_map
keir@878 263 | GNTMAP_contains_pte,
keir@878 264 khandle->user);
keir@878 265 cnt++;
keir@878 266 }
keir@878 267
keir@878 268 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
keir@878 269
keir@878 270 BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, "
keir@878 271 "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: "
keir@878 272 "0x%08lx, handle: %u\n", offset, map[offset], request,
keir@878 273 usr_idx, i, kvaddr, khandle->kernel, uvaddr,
keir@878 274 khandle->user);
keir@878 275
keir@878 276 page = map[offset];
keir@878 277 if (page) {
keir@878 278 ClearPageReserved(map[offset]);
keir@878 279 if (PageBlkback(page)) {
keir@878 280 ClearPageBlkback(page);
keir@878 281 set_page_private(page, 0);
keir@878 282 }
keir@878 283 }
keir@878 284 map[offset] = NULL;
keir@878 285
keir@878 286 khandle->kernel = INVALID_GRANT_HANDLE;
keir@878 287 khandle->user = INVALID_GRANT_HANDLE;
keir@878 288 }
keir@878 289
keir@878 290 if (cnt) {
keir@878 291 ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
keir@878 292 unmap, cnt);
keir@878 293 BUG_ON(ret);
keir@878 294 }
keir@878 295
keir@878 296 if (!xen_feature(XENFEAT_auto_translated_physmap))
keir@878 297 zap_page_range(ring->vma,
keir@878 298 MMAP_VADDR(ring->user_vstart, usr_idx, 0),
keir@878 299 request->nr_pages << PAGE_SHIFT, NULL);
keir@878 300 }
keir@878 301
keir@878 302 /*
keir@878 303 * tap->tap_sem held on entry
keir@878 304 */
keir@878 305 static void
keir@878 306 blktap_unmap(struct blktap *tap, struct blktap_request *request)
keir@878 307 {
keir@878 308 int i, usr_idx;
keir@878 309 unsigned long kvaddr;
keir@878 310
keir@878 311 usr_idx = request->usr_idx;
keir@878 312 down_write(&tap->ring.vma->vm_mm->mmap_sem);
keir@878 313
keir@878 314 for (i = 0; i < request->nr_pages; i++) {
keir@878 315 BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, "
keir@878 316 "uvaddr: 0x%08lx, uhandle: %u\n", request, i,
keir@878 317 request_to_kaddr(request, i),
keir@878 318 request->handles[i].kernel,
keir@878 319 MMAP_VADDR(tap->ring.user_vstart, usr_idx, i),
keir@878 320 request->handles[i].user);
keir@878 321
keir@878 322 if (request->handles[i].kernel == INVALID_GRANT_HANDLE) {
keir@878 323 kvaddr = request_to_kaddr(request, i);
keir@878 324 blktap_umap_uaddr(&init_mm, kvaddr);
keir@878 325 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
keir@878 326 INVALID_P2M_ENTRY);
keir@878 327 }
keir@878 328 }
keir@878 329
keir@878 330 blktap_device_fast_flush(tap, request);
keir@878 331 up_write(&tap->ring.vma->vm_mm->mmap_sem);
keir@878 332 }
keir@878 333
keir@878 334 /*
keir@878 335 * called if the tapdisk process dies unexpectedly.
keir@878 336 * fail and release any pending requests and disable queue.
keir@878 337 */
keir@878 338 void
keir@878 339 blktap_device_fail_pending_requests(struct blktap *tap)
keir@878 340 {
keir@878 341 int usr_idx;
keir@878 342 struct request *req;
keir@878 343 struct blktap_device *dev;
keir@878 344 struct blktap_request *request;
keir@878 345
keir@878 346 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
keir@878 347 return;
keir@878 348
keir@878 349 down_write(&tap->tap_sem);
keir@878 350
keir@878 351 dev = &tap->device;
keir@878 352 for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
keir@878 353 request = tap->pending_requests[usr_idx];
keir@878 354 if (!request || request->status != BLKTAP_REQUEST_PENDING)
keir@878 355 continue;
keir@878 356
keir@878 357 BTERR("%u:%u: failing pending %s of %d pages\n",
keir@878 358 blktap_device_major, tap->minor,
keir@878 359 (request->operation == BLKIF_OP_READ ?
keir@878 360 "read" : "write"), request->nr_pages);
keir@878 361
keir@878 362 blktap_unmap(tap, request);
keir@878 363 req = (struct request *)(unsigned long)request->id;
keir@878 364 blktap_device_end_dequeued_request(dev, req, 0);
keir@878 365 blktap_request_free(tap, request);
keir@878 366 }
keir@878 367
keir@878 368 up_write(&tap->tap_sem);
keir@878 369
keir@878 370 spin_lock_irq(&dev->lock);
keir@878 371
keir@878 372 /* fail any future requests */
keir@878 373 dev->gd->queue->queuedata = NULL;
keir@878 374 blk_start_queue(dev->gd->queue);
keir@878 375
keir@878 376 spin_unlock_irq(&dev->lock);
keir@878 377 }
keir@878 378
keir@878 379 /*
keir@878 380 * tap->tap_sem held on entry
keir@878 381 */
keir@878 382 void
keir@878 383 blktap_device_finish_request(struct blktap *tap,
keir@878 384 blkif_response_t *res,
keir@878 385 struct blktap_request *request)
keir@878 386 {
keir@878 387 int uptodate;
keir@878 388 struct request *req;
keir@878 389 struct blktap_device *dev;
keir@878 390
keir@878 391 dev = &tap->device;
keir@878 392
keir@878 393 blktap_unmap(tap, request);
keir@878 394
keir@878 395 req = (struct request *)(unsigned long)request->id;
keir@878 396 uptodate = (res->status == BLKIF_RSP_OKAY);
keir@878 397
keir@878 398 BTDBG("req %p res status %d operation %d/%d id %lld\n", req,
keir@878 399 res->status, res->operation, request->operation, res->id);
keir@878 400
keir@878 401 switch (request->operation) {
keir@878 402 case BLKIF_OP_READ:
keir@878 403 case BLKIF_OP_WRITE:
keir@878 404 if (unlikely(res->status != BLKIF_RSP_OKAY))
keir@878 405 BTERR("Bad return from device data "
keir@878 406 "request: %x\n", res->status);
keir@878 407 blktap_device_end_dequeued_request(dev, req, uptodate);
keir@878 408 break;
keir@878 409 default:
keir@878 410 BUG();
keir@878 411 }
keir@878 412
keir@878 413 blktap_request_free(tap, request);
keir@878 414 }
keir@878 415
keir@878 416 static int
keir@878 417 blktap_prep_foreign(struct blktap *tap,
keir@878 418 struct blktap_request *request,
keir@878 419 blkif_request_t *blkif_req,
keir@878 420 unsigned int seg, struct page *page,
keir@878 421 struct blktap_grant_table *table)
keir@878 422 {
keir@878 423 uint64_t ptep;
keir@878 424 uint32_t flags;
keir@878 425 struct page *tap_page;
keir@878 426 struct blktap_ring *ring;
keir@878 427 struct blkback_pagemap map;
keir@878 428 unsigned long uvaddr, kvaddr;
keir@878 429
keir@878 430 ring = &tap->ring;
keir@878 431 map = blkback_pagemap_read(page);
keir@878 432 blkif_req->seg[seg].gref = map.gref;
keir@878 433
keir@878 434 uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
keir@878 435 kvaddr = request_to_kaddr(request, seg);
keir@878 436 flags = GNTMAP_host_map |
keir@878 437 (request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0);
keir@878 438
keir@878 439 gnttab_set_map_op(&table->grants[table->cnt],
keir@878 440 kvaddr, flags, map.gref, map.domid);
keir@878 441 table->cnt++;
keir@878 442
keir@878 443 /* enable chained tap devices */
keir@878 444 tap_page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
keir@878 445 set_page_private(tap_page, page_private(page));
keir@878 446 SetPageBlkback(tap_page);
keir@878 447
keir@878 448 if (xen_feature(XENFEAT_auto_translated_physmap))
keir@878 449 return 0;
keir@878 450
keir@878 451 if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) {
keir@878 452 BTERR("couldn't get a pte addr!\n");
keir@878 453 return -1;
keir@878 454 }
keir@878 455
keir@878 456 flags |= GNTMAP_application_map | GNTMAP_contains_pte;
keir@878 457 gnttab_set_map_op(&table->grants[table->cnt],
keir@878 458 ptep, flags, map.gref, map.domid);
keir@878 459 table->cnt++;
keir@878 460
keir@878 461 return 0;
keir@878 462 }
keir@878 463
keir@878 464 static int
keir@878 465 blktap_map_foreign(struct blktap *tap,
keir@878 466 struct blktap_request *request,
keir@878 467 blkif_request_t *blkif_req,
keir@878 468 struct blktap_grant_table *table)
keir@878 469 {
keir@878 470 struct page *page;
keir@878 471 int i, grant, err, usr_idx;
keir@878 472 struct blktap_ring *ring;
keir@878 473 unsigned long uvaddr, kvaddr, foreign_mfn;
keir@878 474
keir@878 475 if (!table->cnt)
keir@878 476 return 0;
keir@878 477
keir@878 478 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
keir@878 479 table->grants, table->cnt);
keir@878 480 BUG_ON(err);
keir@878 481
keir@878 482 grant = 0;
keir@878 483 usr_idx = request->usr_idx;
keir@878 484 ring = &tap->ring;
keir@878 485
keir@878 486 for (i = 0; i < request->nr_pages; i++) {
keir@878 487 if (!blkif_req->seg[i].gref)
keir@878 488 continue;
keir@878 489
keir@878 490 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
keir@878 491 kvaddr = request_to_kaddr(request, i);
keir@878 492
keir@878 493 if (unlikely(table->grants[grant].status)) {
keir@878 494 BTERR("invalid kernel buffer: could not remap it\n");
keir@878 495 err |= 1;
keir@878 496 table->grants[grant].handle = INVALID_GRANT_HANDLE;
keir@878 497 }
keir@878 498
keir@878 499 request->handles[i].kernel = table->grants[grant].handle;
keir@878 500 foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT;
keir@878 501 grant++;
keir@878 502
keir@878 503 if (xen_feature(XENFEAT_auto_translated_physmap))
keir@878 504 goto done;
keir@878 505
keir@878 506 if (unlikely(table->grants[grant].status)) {
keir@878 507 BTERR("invalid user buffer: could not remap it\n");
keir@878 508 err |= 1;
keir@878 509 table->grants[grant].handle = INVALID_GRANT_HANDLE;
keir@878 510 }
keir@878 511
keir@878 512 request->handles[i].user = table->grants[grant].handle;
keir@878 513 grant++;
keir@878 514
keir@878 515 done:
keir@878 516 if (err)
keir@878 517 continue;
keir@878 518
keir@878 519 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
keir@878 520
keir@878 521 if (!xen_feature(XENFEAT_auto_translated_physmap))
keir@878 522 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
keir@878 523 FOREIGN_FRAME(foreign_mfn));
keir@878 524 else if (vm_insert_page(ring->vma, uvaddr, page))
keir@878 525 err |= 1;
keir@878 526
keir@878 527 BTDBG("pending_req: %p, seg: %d, page: %p, "
keir@878 528 "kvaddr: 0x%08lx, khandle: %u, uvaddr: 0x%08lx, "
keir@878 529 "uhandle: %u\n", request, i, page,
keir@878 530 kvaddr, request->handles[i].kernel,
keir@878 531 uvaddr, request->handles[i].user);
keir@878 532 }
keir@878 533
keir@878 534 return err;
keir@878 535 }
keir@878 536
keir@878 537 static void
keir@878 538 blktap_map(struct blktap *tap,
keir@878 539 struct blktap_request *request,
keir@878 540 unsigned int seg, struct page *page)
keir@878 541 {
keir@878 542 pte_t pte;
keir@878 543 int usr_idx;
keir@878 544 struct blktap_ring *ring;
keir@878 545 unsigned long uvaddr, kvaddr;
keir@878 546
keir@878 547 ring = &tap->ring;
keir@878 548 usr_idx = request->usr_idx;
keir@878 549 uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, seg);
keir@878 550 kvaddr = request_to_kaddr(request, seg);
keir@878 551
keir@878 552 pte = mk_pte(page, ring->vma->vm_page_prot);
keir@878 553 blktap_map_uaddr(ring->vma->vm_mm, uvaddr, pte_mkwrite(pte));
keir@878 554 blktap_map_uaddr(&init_mm, kvaddr, mk_pte(page, PAGE_KERNEL));
keir@878 555
keir@878 556 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte));
keir@878 557 request->handles[seg].kernel = INVALID_GRANT_HANDLE;
keir@878 558 request->handles[seg].user = INVALID_GRANT_HANDLE;
keir@878 559
keir@878 560 BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, "
keir@878 561 "uvaddr: 0x%08lx\n", request, seg, page, kvaddr,
keir@878 562 uvaddr);
keir@878 563 }
keir@878 564
keir@878 565 static int
keir@878 566 blktap_device_process_request(struct blktap *tap,
keir@878 567 struct blktap_request *request,
keir@878 568 struct request *req)
keir@878 569 {
keir@878 570 struct bio *bio;
keir@878 571 struct page *page;
keir@878 572 struct bio_vec *bvec;
keir@878 573 int idx, usr_idx, err;
keir@878 574 struct blktap_ring *ring;
keir@878 575 struct blktap_grant_table table;
keir@878 576 unsigned int fsect, lsect, nr_sects;
keir@878 577 unsigned long offset, uvaddr, kvaddr;
keir@878 578 struct blkif_request blkif_req, *target;
keir@878 579
keir@878 580 err = -1;
keir@878 581 memset(&table, 0, sizeof(table));
keir@878 582
keir@878 583 if (!blktap_active(tap))
keir@878 584 goto out;
keir@878 585
keir@878 586 ring = &tap->ring;
keir@878 587 usr_idx = request->usr_idx;
keir@878 588 blkif_req.id = usr_idx;
keir@878 589 blkif_req.sector_number = (blkif_sector_t)req->sector;
keir@878 590 blkif_req.handle = 0;
keir@878 591 blkif_req.operation = rq_data_dir(req) ?
keir@878 592 BLKIF_OP_WRITE : BLKIF_OP_READ;
keir@878 593
keir@878 594 request->id = (unsigned long)req;
keir@878 595 request->operation = blkif_req.operation;
keir@878 596 request->status = BLKTAP_REQUEST_PENDING;
keir@878 597 do_gettimeofday(&request->time);
keir@878 598
keir@878 599 nr_sects = 0;
keir@878 600 request->nr_pages = 0;
keir@878 601 blkif_req.nr_segments = 0;
keir@878 602 rq_for_each_bio(bio, req) {
keir@878 603 bio_for_each_segment(bvec, bio, idx) {
keir@878 604 BUG_ON(blkif_req.nr_segments ==
keir@878 605 BLKIF_MAX_SEGMENTS_PER_REQUEST);
keir@878 606
keir@878 607 fsect = bvec->bv_offset >> 9;
keir@878 608 lsect = fsect + (bvec->bv_len >> 9) - 1;
keir@878 609 nr_sects += bvec->bv_len >> 9;
keir@878 610
keir@878 611 blkif_req.seg[blkif_req.nr_segments] =
keir@878 612 (struct blkif_request_segment) {
keir@878 613 .gref = 0,
keir@878 614 .first_sect = fsect,
keir@878 615 .last_sect = lsect };
keir@878 616
keir@878 617 if (PageBlkback(bvec->bv_page)) {
keir@878 618 /* foreign page -- use xen */
keir@878 619 if (blktap_prep_foreign(tap,
keir@878 620 request,
keir@878 621 &blkif_req,
keir@878 622 blkif_req.nr_segments,
keir@878 623 bvec->bv_page,
keir@878 624 &table))
keir@878 625 goto out;
keir@878 626 } else {
keir@878 627 /* do it the old fashioned way */
keir@878 628 blktap_map(tap,
keir@878 629 request,
keir@878 630 blkif_req.nr_segments,
keir@878 631 bvec->bv_page);
keir@878 632 }
keir@878 633
keir@878 634 uvaddr = MMAP_VADDR(ring->user_vstart,
keir@878 635 usr_idx, blkif_req.nr_segments);
keir@878 636 kvaddr = request_to_kaddr(request,
keir@878 637 blkif_req.nr_segments);
keir@878 638 offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
keir@878 639 page = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
keir@878 640 ring->foreign_map.map[offset] = page;
keir@878 641 SetPageReserved(page);
keir@878 642
keir@878 643 BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n",
keir@878 644 uvaddr, page, __pa(kvaddr) >> PAGE_SHIFT);
keir@878 645 BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, "
keir@878 646 "page: %p, kvaddr: 0x%08lx, uvaddr: 0x%08lx\n",
keir@878 647 offset, request, blkif_req.nr_segments,
keir@878 648 page, kvaddr, uvaddr);
keir@878 649
keir@878 650 blkif_req.nr_segments++;
keir@878 651 request->nr_pages++;
keir@878 652 }
keir@878 653 }
keir@878 654
keir@878 655 if (blktap_map_foreign(tap, request, &blkif_req, &table))
keir@878 656 goto out;
keir@878 657
keir@878 658 /* Finally, write the request message to the user ring. */
keir@878 659 target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
keir@878 660 memcpy(target, &blkif_req, sizeof(blkif_req));
keir@878 661 target->id = request->usr_idx;
keir@878 662 wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
keir@878 663 ring->ring.req_prod_pvt++;
keir@878 664
keir@878 665 if (rq_data_dir(req)) {
keir@878 666 tap->stats.st_wr_sect += nr_sects;
keir@878 667 tap->stats.st_wr_req++;
keir@878 668 } else {
keir@878 669 tap->stats.st_rd_sect += nr_sects;
keir@878 670 tap->stats.st_rd_req++;
keir@878 671 }
keir@878 672
keir@878 673 err = 0;
keir@878 674
keir@878 675 out:
keir@878 676 if (err)
keir@878 677 blktap_device_fast_flush(tap, request);
keir@878 678 return err;
keir@878 679 }
keir@878 680
keir@878 681 #ifdef ENABLE_PASSTHROUGH
keir@878 682 #define rq_for_each_bio_safe(_bio, _tmp, _req) \
keir@878 683 if ((_req)->bio) \
keir@878 684 for (_bio = (_req)->bio; \
keir@878 685 _bio && ((_tmp = _bio->bi_next) || 1); \
keir@878 686 _bio = _tmp)
keir@878 687
keir@878 688 static void
keir@878 689 blktap_device_forward_request(struct blktap *tap, struct request *req)
keir@878 690 {
keir@878 691 struct bio *bio, *tmp;
keir@878 692 struct blktap_device *dev;
keir@878 693
keir@878 694 dev = &tap->device;
keir@878 695
keir@878 696 rq_for_each_bio_safe(bio, tmp, req) {
keir@878 697 bio->bi_bdev = dev->bdev;
keir@878 698 submit_bio(bio->bi_rw, bio);
keir@878 699 }
keir@878 700 }
keir@878 701
keir@878 702 static void
keir@878 703 blktap_device_close_bdev(struct blktap *tap)
keir@878 704 {
keir@878 705 struct blktap_device *dev;
keir@878 706
keir@878 707 dev = &tap->device;
keir@878 708
keir@878 709 if (dev->bdev)
keir@878 710 blkdev_put(dev->bdev);
keir@878 711
keir@878 712 dev->bdev = NULL;
keir@878 713 clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
keir@878 714 }
keir@878 715
keir@878 716 static int
keir@878 717 blktap_device_open_bdev(struct blktap *tap, u32 pdev)
keir@878 718 {
keir@878 719 struct block_device *bdev;
keir@878 720 struct blktap_device *dev;
keir@878 721
keir@878 722 dev = &tap->device;
keir@878 723
keir@878 724 bdev = open_by_devnum(pdev, FMODE_WRITE);
keir@878 725 if (IS_ERR(bdev)) {
keir@878 726 BTERR("opening device %x:%x failed: %ld\n",
keir@878 727 MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
keir@878 728 return PTR_ERR(bdev);
keir@878 729 }
keir@878 730
keir@878 731 if (!bdev->bd_disk) {
keir@878 732 BTERR("device %x:%x doesn't exist\n",
keir@878 733 MAJOR(pdev), MINOR(pdev));
keir@878 734 blkdev_put(dev->bdev);
keir@878 735 return -ENOENT;
keir@878 736 }
keir@878 737
keir@878 738 dev->bdev = bdev;
keir@878 739 set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
keir@878 740
keir@878 741 /* TODO: readjust queue parameters */
keir@878 742
keir@878 743 BTINFO("set device %d to passthrough on %x:%x\n",
keir@878 744 tap->minor, MAJOR(pdev), MINOR(pdev));
keir@878 745
keir@878 746 return 0;
keir@878 747 }
keir@878 748
keir@878 749 int
keir@878 750 blktap_device_enable_passthrough(struct blktap *tap,
keir@878 751 unsigned major, unsigned minor)
keir@878 752 {
keir@878 753 u32 pdev;
keir@878 754 struct blktap_device *dev;
keir@878 755
keir@878 756 dev = &tap->device;
keir@878 757 pdev = MKDEV(major, minor);
keir@878 758
keir@878 759 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
keir@878 760 return -EINVAL;
keir@878 761
keir@878 762 if (dev->bdev) {
keir@878 763 if (pdev)
keir@878 764 return -EINVAL;
keir@878 765 blktap_device_close_bdev(tap);
keir@878 766 return 0;
keir@878 767 }
keir@878 768
keir@878 769 return blktap_device_open_bdev(tap, pdev);
keir@878 770 }
keir@878 771 #endif
keir@878 772
keir@878 773 /*
keir@878 774 * dev->lock held on entry
keir@878 775 */
keir@878 776 static void
keir@878 777 blktap_device_run_queue(struct blktap *tap)
keir@878 778 {
keir@878 779 int queued, err;
keir@878 780 request_queue_t *rq;
keir@878 781 struct request *req;
keir@878 782 struct blktap_ring *ring;
keir@878 783 struct blktap_device *dev;
keir@878 784 struct blktap_request *request;
keir@878 785
keir@878 786 queued = 0;
keir@878 787 ring = &tap->ring;
keir@878 788 dev = &tap->device;
keir@878 789 rq = dev->gd->queue;
keir@878 790
keir@878 791 BTDBG("running queue for %d\n", tap->minor);
keir@878 792
keir@878 793 while ((req = elv_next_request(rq)) != NULL) {
keir@878 794 if (!blk_fs_request(req)) {
keir@878 795 end_request(req, 0);
keir@878 796 continue;
keir@878 797 }
keir@878 798
keir@878 799 if (blk_barrier_rq(req)) {
keir@878 800 end_request(req, 0);
keir@878 801 continue;
keir@878 802 }
keir@878 803
keir@878 804 #ifdef ENABLE_PASSTHROUGH
keir@878 805 if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
keir@878 806 blkdev_dequeue_request(req);
keir@878 807 blktap_device_forward_request(tap, req);
keir@878 808 continue;
keir@878 809 }
keir@878 810 #endif
keir@878 811
keir@878 812 if (RING_FULL(&ring->ring)) {
keir@878 813 wait:
keir@878 814 /* Avoid pointless unplugs. */
keir@878 815 blk_stop_queue(rq);
keir@878 816 blktap_defer(tap);
keir@878 817 break;
keir@878 818 }
keir@878 819
keir@878 820 request = blktap_request_allocate(tap);
keir@878 821 if (!request) {
keir@878 822 tap->stats.st_oo_req++;
keir@878 823 goto wait;
keir@878 824 }
keir@878 825
keir@878 826 BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
keir@878 827 "buffer:%p [%s], pending: %p\n", req, tap->minor,
keir@878 828 req->cmd, req->sector, req->current_nr_sectors,
keir@878 829 req->nr_sectors, req->buffer,
keir@878 830 rq_data_dir(req) ? "write" : "read", request);
keir@878 831
keir@878 832 blkdev_dequeue_request(req);
keir@878 833
keir@878 834 spin_unlock_irq(&dev->lock);
keir@878 835 down_read(&tap->tap_sem);
keir@878 836
keir@878 837 err = blktap_device_process_request(tap, request, req);
keir@878 838 if (!err)
keir@878 839 queued++;
keir@878 840 else {
keir@878 841 blktap_device_end_dequeued_request(dev, req, 0);
keir@878 842 blktap_request_free(tap, request);
keir@878 843 }
keir@878 844
keir@878 845 up_read(&tap->tap_sem);
keir@878 846 spin_lock_irq(&dev->lock);
keir@878 847 }
keir@878 848
keir@878 849 if (queued)
keir@878 850 blktap_ring_kick_user(tap);
keir@878 851 }
keir@878 852
keir@878 853 /*
keir@878 854 * dev->lock held on entry
keir@878 855 */
keir@878 856 static void
keir@878 857 blktap_device_do_request(request_queue_t *rq)
keir@878 858 {
keir@878 859 struct request *req;
keir@878 860 struct blktap *tap;
keir@878 861 struct blktap_device *dev;
keir@878 862
keir@878 863 dev = rq->queuedata;
keir@878 864 if (!dev)
keir@878 865 goto fail;
keir@878 866
keir@878 867 tap = dev_to_blktap(dev);
keir@878 868 if (!blktap_active(tap))
keir@878 869 goto fail;
keir@878 870
keir@878 871 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
keir@878 872 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
keir@878 873 blktap_defer(tap);
keir@878 874 return;
keir@878 875 }
keir@878 876
keir@878 877 blktap_device_run_queue(tap);
keir@878 878 return;
keir@878 879
keir@878 880 fail:
keir@878 881 while ((req = elv_next_request(rq))) {
keir@878 882 BTERR("device closed: failing secs %llu - %llu\n",
keir@878 883 req->sector, req->sector + req->nr_sectors);
keir@878 884 end_request(req, 0);
keir@878 885 }
keir@878 886 }
keir@878 887
keir@878 888 void
keir@878 889 blktap_device_restart(struct blktap *tap)
keir@878 890 {
keir@878 891 struct blktap_device *dev;
keir@878 892
keir@878 893 dev = &tap->device;
keir@878 894 if (!dev->gd || !dev->gd->queue)
keir@878 895 return;
keir@878 896
keir@878 897 if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) {
keir@878 898 blktap_defer(tap);
keir@878 899 return;
keir@878 900 }
keir@878 901
keir@878 902 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
keir@878 903 test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
keir@878 904 blktap_defer(tap);
keir@878 905 return;
keir@878 906 }
keir@878 907
keir@878 908 spin_lock_irq(&dev->lock);
keir@878 909
keir@878 910 /* Re-enable calldowns. */
keir@878 911 if (blk_queue_stopped(dev->gd->queue))
keir@878 912 blk_start_queue(dev->gd->queue);
keir@878 913
keir@878 914 /* Kick things off immediately. */
keir@878 915 blktap_device_do_request(dev->gd->queue);
keir@878 916
keir@878 917 spin_unlock_irq(&dev->lock);
keir@878 918 }
keir@878 919
keir@878 920 static void
keir@878 921 blktap_device_configure(struct blktap *tap)
keir@878 922 {
keir@878 923 struct request_queue *rq;
keir@878 924 struct blktap_device *dev = &tap->device;
keir@878 925
keir@878 926 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !dev->gd)
keir@878 927 return;
keir@878 928
keir@878 929 dev = &tap->device;
keir@878 930 rq = dev->gd->queue;
keir@878 931
keir@878 932 spin_lock_irq(&dev->lock);
keir@878 933
keir@878 934 set_capacity(dev->gd, tap->params.capacity);
keir@878 935
keir@878 936 /* Hard sector size and max sectors impersonate the equiv. hardware. */
keir@878 937 blk_queue_hardsect_size(rq, tap->params.sector_size);
keir@878 938 blk_queue_max_sectors(rq, 512);
keir@878 939
keir@878 940 /* Each segment in a request is up to an aligned page in size. */
keir@878 941 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
keir@878 942 blk_queue_max_segment_size(rq, PAGE_SIZE);
keir@878 943
keir@878 944 /* Ensure a merged request will fit in a single I/O ring slot. */
keir@878 945 blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
keir@878 946 blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
keir@878 947
keir@878 948 /* Make sure buffer addresses are sector-aligned. */
keir@878 949 blk_queue_dma_alignment(rq, 511);
keir@878 950
keir@878 951 spin_unlock_irq(&dev->lock);
keir@878 952 }
keir@878 953
keir@878 954 int
keir@878 955 blktap_device_resume(struct blktap *tap)
keir@878 956 {
keir@878 957 int err;
keir@878 958
keir@878 959 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
keir@878 960 return -ENODEV;
keir@878 961
keir@878 962 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
keir@878 963 return 0;
keir@878 964
keir@878 965 err = blktap_ring_resume(tap);
keir@878 966 if (err)
keir@878 967 return err;
keir@878 968
keir@878 969 /* device size may have changed */
keir@878 970 blktap_device_configure(tap);
keir@878 971
keir@878 972 BTDBG("restarting device\n");
keir@878 973 blktap_device_restart(tap);
keir@878 974
keir@878 975 return 0;
keir@878 976 }
keir@878 977
keir@878 978 int
keir@878 979 blktap_device_pause(struct blktap *tap)
keir@878 980 {
keir@878 981 unsigned long flags;
keir@878 982 struct blktap_device *dev = &tap->device;
keir@878 983
keir@878 984 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
keir@878 985 return -ENODEV;
keir@878 986
keir@878 987 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
keir@878 988 return 0;
keir@878 989
keir@878 990 spin_lock_irqsave(&dev->lock, flags);
keir@878 991
keir@878 992 blk_stop_queue(dev->gd->queue);
keir@878 993 set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
keir@878 994
keir@878 995 spin_unlock_irqrestore(&dev->lock, flags);
keir@878 996
keir@878 997 return blktap_ring_pause(tap);
keir@878 998 }
keir@878 999
keir@878 1000 int
keir@878 1001 blktap_device_destroy(struct blktap *tap)
keir@878 1002 {
keir@878 1003 struct blktap_device *dev = &tap->device;
keir@878 1004
keir@878 1005 if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
keir@878 1006 return 0;
keir@878 1007
keir@878 1008 BTINFO("destroy device %d users %d\n", tap->minor, dev->users);
keir@878 1009
keir@878 1010 if (dev->users)
keir@878 1011 return -EBUSY;
keir@878 1012
keir@878 1013 spin_lock_irq(&dev->lock);
keir@878 1014 /* No more blktap_device_do_request(). */
keir@878 1015 blk_stop_queue(dev->gd->queue);
keir@878 1016 clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
keir@878 1017 spin_unlock_irq(&dev->lock);
keir@878 1018
keir@878 1019 #ifdef ENABLE_PASSTHROUGH
keir@878 1020 if (dev->bdev)
keir@878 1021 blktap_device_close_bdev(tap);
keir@878 1022 #endif
keir@878 1023
keir@878 1024 del_gendisk(dev->gd);
keir@878 1025 put_disk(dev->gd);
keir@878 1026 blk_cleanup_queue(dev->gd->queue);
keir@878 1027
keir@878 1028 dev->gd = NULL;
keir@878 1029
keir@878 1030 wake_up(&tap->wq);
keir@878 1031
keir@878 1032 return 0;
keir@878 1033 }
keir@878 1034
keir@878 1035 int
keir@878 1036 blktap_device_create(struct blktap *tap)
keir@878 1037 {
keir@878 1038 int minor, err;
keir@878 1039 struct gendisk *gd;
keir@878 1040 struct request_queue *rq;
keir@878 1041 struct blktap_device *dev;
keir@878 1042
keir@878 1043 gd = NULL;
keir@878 1044 rq = NULL;
keir@878 1045 dev = &tap->device;
keir@878 1046 minor = tap->minor;
keir@878 1047
keir@878 1048 if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
keir@878 1049 return -EEXIST;
keir@878 1050
keir@878 1051 if (blktap_validate_params(tap, &tap->params))
keir@878 1052 return -EINVAL;
keir@878 1053
keir@878 1054 BTINFO("minor %d sectors %Lu sector-size %lu\n",
keir@878 1055 minor, tap->params.capacity, tap->params.sector_size);
keir@878 1056
keir@878 1057 err = -ENODEV;
keir@878 1058
keir@878 1059 gd = alloc_disk(1);
keir@878 1060 if (!gd)
keir@878 1061 goto error;
keir@878 1062
keir@878 1063 if (minor < 26)
keir@878 1064 sprintf(gd->disk_name, "tapdev%c", 'a' + minor);
keir@878 1065 else
keir@878 1066 sprintf(gd->disk_name, "tapdev%c%c",
keir@878 1067 'a' + ((minor / 26) - 1), 'a' + (minor % 26));
keir@878 1068
keir@878 1069 gd->major = blktap_device_major;
keir@878 1070 gd->first_minor = minor;
keir@878 1071 gd->fops = &blktap_device_file_operations;
keir@878 1072 gd->private_data = dev;
keir@878 1073
keir@878 1074 spin_lock_init(&dev->lock);
keir@878 1075 rq = blk_init_queue(blktap_device_do_request, &dev->lock);
keir@878 1076 if (!rq)
keir@878 1077 goto error;
keir@878 1078
keir@878 1079 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
keir@878 1080 elevator_init(rq, "noop");
keir@878 1081 #else
keir@878 1082 elevator_init(rq, &elevator_noop);
keir@878 1083 #endif
keir@878 1084
keir@878 1085 gd->queue = rq;
keir@878 1086 rq->queuedata = dev;
keir@878 1087 dev->gd = gd;
keir@878 1088
keir@878 1089 set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
keir@878 1090 blktap_device_configure(tap);
keir@878 1091
keir@878 1092 add_disk(gd);
keir@878 1093
keir@878 1094 err = 0;
keir@878 1095 goto out;
keir@878 1096
keir@878 1097 error:
keir@878 1098 if (gd)
keir@878 1099 del_gendisk(gd);
keir@878 1100 if (rq)
keir@878 1101 blk_cleanup_queue(rq);
keir@878 1102
keir@878 1103 out:
keir@878 1104 BTINFO("creation of %u:%u: %d\n", blktap_device_major, tap->minor, err);
keir@878 1105 return err;
keir@878 1106 }
keir@878 1107
keir@878 1108 int
keir@878 1109 blktap_device_init(int *maj)
keir@878 1110 {
keir@878 1111 int major;
keir@878 1112
keir@878 1113 /* Dynamically allocate a major for this device */
keir@878 1114 major = register_blkdev(0, "tapdev");
keir@878 1115 if (major < 0) {
keir@878 1116 BTERR("Couldn't register blktap device\n");
keir@878 1117 return -ENOMEM;
keir@878 1118 }
keir@878 1119
keir@878 1120 blktap_device_major = *maj = major;
keir@878 1121 BTINFO("blktap device major %d\n", major);
keir@878 1122
keir@878 1123 return 0;
keir@878 1124 }
keir@878 1125
keir@878 1126 void
keir@878 1127 blktap_device_free(void)
keir@878 1128 {
keir@878 1129 if (blktap_device_major)
keir@878 1130 if (unregister_blkdev(blktap_device_major, "tapdev"))
keir@878 1131 BTERR("blktap device unregister failed\n");
keir@878 1132 }