ia64/linux-2.6.18-xen.hg

annotate drivers/xen/blktap2/ring.c @ 878:eba6fe6d8d53

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:23:16 2009 +0100 (2009-05-26)
parents
children
rev   line source
keir@878 1 #include <linux/module.h>
keir@878 2 #include <linux/signal.h>
keir@878 3
keir@878 4 #include "blktap.h"
keir@878 5
keir@878 6 static int blktap_ring_major;
keir@878 7
keir@878 8 static inline struct blktap *
keir@878 9 vma_to_blktap(struct vm_area_struct *vma)
keir@878 10 {
keir@878 11 struct vm_foreign_map *m = vma->vm_private_data;
keir@878 12 struct blktap_ring *r = container_of(m, struct blktap_ring, foreign_map);
keir@878 13 return container_of(r, struct blktap, ring);
keir@878 14 }
keir@878 15
keir@878 16 /*
keir@878 17 * BLKTAP - immediately before the mmap area,
keir@878 18 * we have a bunch of pages reserved for shared memory rings.
keir@878 19 */
keir@878 20 #define RING_PAGES 1
keir@878 21
keir@878 22 static int
keir@878 23 blktap_read_ring(struct blktap *tap)
keir@878 24 {
keir@878 25 /* This is called to read responses from the ring. */
keir@878 26 int usr_idx;
keir@878 27 RING_IDX rc, rp;
keir@878 28 blkif_response_t res;
keir@878 29 struct blktap_ring *ring;
keir@878 30 struct blktap_request *request;
keir@878 31
keir@878 32 down_read(&tap->tap_sem);
keir@878 33
keir@878 34 ring = &tap->ring;
keir@878 35 if (!ring->vma) {
keir@878 36 up_read(&tap->tap_sem);
keir@878 37 return 0;
keir@878 38 }
keir@878 39
keir@878 40 /* for each outstanding message on the ring */
keir@878 41 rp = ring->ring.sring->rsp_prod;
keir@878 42 rmb();
keir@878 43
keir@878 44 for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
keir@878 45 memcpy(&res, RING_GET_RESPONSE(&ring->ring, rc), sizeof(res));
keir@878 46 mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
keir@878 47 ++ring->ring.rsp_cons;
keir@878 48
keir@878 49 usr_idx = (int)res.id;
keir@878 50 if (usr_idx >= MAX_PENDING_REQS ||
keir@878 51 !tap->pending_requests[usr_idx]) {
keir@878 52 BTWARN("Request %d/%d invalid [%x], tapdisk %d%p\n",
keir@878 53 rc, rp, usr_idx, tap->pid, ring->vma);
keir@878 54 continue;
keir@878 55 }
keir@878 56
keir@878 57 request = tap->pending_requests[usr_idx];
keir@878 58 BTDBG("request %p response #%d id %x\n", request, rc, usr_idx);
keir@878 59 blktap_device_finish_request(tap, &res, request);
keir@878 60 }
keir@878 61
keir@878 62 up_read(&tap->tap_sem);
keir@878 63
keir@878 64 blktap_run_deferred();
keir@878 65
keir@878 66 return 0;
keir@878 67 }
keir@878 68
keir@878 69 static struct page *
keir@878 70 blktap_ring_nopage(struct vm_area_struct *vma,
keir@878 71 unsigned long address, int *type)
keir@878 72 {
keir@878 73 /*
keir@878 74 * if the page has not been mapped in by the driver then return
keir@878 75 * NOPAGE_SIGBUS to the domain.
keir@878 76 */
keir@878 77
keir@878 78 return NOPAGE_SIGBUS;
keir@878 79 }
keir@878 80
keir@878 81 static pte_t
keir@878 82 blktap_ring_clear_pte(struct vm_area_struct *vma,
keir@878 83 unsigned long uvaddr,
keir@878 84 pte_t *ptep, int is_fullmm)
keir@878 85 {
keir@878 86 pte_t copy;
keir@878 87 struct blktap *tap;
keir@878 88 unsigned long kvaddr;
keir@878 89 struct page **map, *page;
keir@878 90 struct blktap_ring *ring;
keir@878 91 struct blktap_request *request;
keir@878 92 struct grant_handle_pair *khandle;
keir@878 93 struct gnttab_unmap_grant_ref unmap[2];
keir@878 94 int offset, seg, usr_idx, count = 0;
keir@878 95
keir@878 96 tap = vma_to_blktap(vma);
keir@878 97 ring = &tap->ring;
keir@878 98 map = ring->foreign_map.map;
keir@878 99 BUG_ON(!map); /* TODO Should this be changed to if statement? */
keir@878 100
keir@878 101 /*
keir@878 102 * Zap entry if the address is before the start of the grant
keir@878 103 * mapped region.
keir@878 104 */
keir@878 105 if (uvaddr < ring->user_vstart)
keir@878 106 return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
keir@878 107 ptep, is_fullmm);
keir@878 108
keir@878 109 offset = (int)((uvaddr - ring->user_vstart) >> PAGE_SHIFT);
keir@878 110 usr_idx = offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
keir@878 111 seg = offset % BLKIF_MAX_SEGMENTS_PER_REQUEST;
keir@878 112
keir@878 113 offset = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT);
keir@878 114 page = map[offset];
keir@878 115 if (page) {
keir@878 116 ClearPageReserved(page);
keir@878 117 if (PageBlkback(page)) {
keir@878 118 ClearPageBlkback(page);
keir@878 119 set_page_private(page, 0);
keir@878 120 }
keir@878 121 }
keir@878 122 map[offset] = NULL;
keir@878 123
keir@878 124 request = tap->pending_requests[usr_idx];
keir@878 125 kvaddr = request_to_kaddr(request, seg);
keir@878 126 khandle = request->handles + seg;
keir@878 127
keir@878 128 if (khandle->kernel != INVALID_GRANT_HANDLE) {
keir@878 129 gnttab_set_unmap_op(&unmap[count], kvaddr,
keir@878 130 GNTMAP_host_map, khandle->kernel);
keir@878 131 count++;
keir@878 132
keir@878 133 set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
keir@878 134 INVALID_P2M_ENTRY);
keir@878 135 }
keir@878 136
keir@878 137
keir@878 138 if (khandle->user != INVALID_GRANT_HANDLE) {
keir@878 139 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
keir@878 140
keir@878 141 copy = *ptep;
keir@878 142 gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep),
keir@878 143 GNTMAP_host_map
keir@878 144 | GNTMAP_application_map
keir@878 145 | GNTMAP_contains_pte,
keir@878 146 khandle->user);
keir@878 147 count++;
keir@878 148 } else
keir@878 149 copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
keir@878 150 is_fullmm);
keir@878 151
keir@878 152 if (count)
keir@878 153 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
keir@878 154 unmap, count))
keir@878 155 BUG();
keir@878 156
keir@878 157 khandle->kernel = INVALID_GRANT_HANDLE;
keir@878 158 khandle->user = INVALID_GRANT_HANDLE;
keir@878 159
keir@878 160 return copy;
keir@878 161 }
keir@878 162
keir@878 163 static void
keir@878 164 blktap_ring_vm_unmap(struct vm_area_struct *vma)
keir@878 165 {
keir@878 166 struct blktap *tap = vma_to_blktap(vma);
keir@878 167
keir@878 168 down_write(&tap->tap_sem);
keir@878 169 clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
keir@878 170 clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
keir@878 171 clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
keir@878 172 up_write(&tap->tap_sem);
keir@878 173 }
keir@878 174
keir@878 175 static void
keir@878 176 blktap_ring_vm_close(struct vm_area_struct *vma)
keir@878 177 {
keir@878 178 struct blktap *tap = vma_to_blktap(vma);
keir@878 179 struct blktap_ring *ring = &tap->ring;
keir@878 180
keir@878 181 blktap_ring_vm_unmap(vma); /* fail future requests */
keir@878 182 blktap_device_fail_pending_requests(tap); /* fail pending requests */
keir@878 183 blktap_device_restart(tap); /* fail deferred requests */
keir@878 184
keir@878 185 down_write(&tap->tap_sem);
keir@878 186
keir@878 187 zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
keir@878 188
keir@878 189 kfree(ring->foreign_map.map);
keir@878 190 ring->foreign_map.map = NULL;
keir@878 191
keir@878 192 /* Free the ring page. */
keir@878 193 ClearPageReserved(virt_to_page(ring->ring.sring));
keir@878 194 free_page((unsigned long)ring->ring.sring);
keir@878 195
keir@878 196 BTINFO("unmapping ring %d\n", tap->minor);
keir@878 197 ring->ring.sring = NULL;
keir@878 198 ring->vma = NULL;
keir@878 199
keir@878 200 up_write(&tap->tap_sem);
keir@878 201
keir@878 202 wake_up(&tap->wq);
keir@878 203 }
keir@878 204
keir@878 205 static struct vm_operations_struct blktap_ring_vm_operations = {
keir@878 206 .close = blktap_ring_vm_close,
keir@878 207 .unmap = blktap_ring_vm_unmap,
keir@878 208 .nopage = blktap_ring_nopage,
keir@878 209 .zap_pte = blktap_ring_clear_pte,
keir@878 210 };
keir@878 211
keir@878 212 static int
keir@878 213 blktap_ring_open(struct inode *inode, struct file *filp)
keir@878 214 {
keir@878 215 int idx;
keir@878 216 struct blktap *tap;
keir@878 217
keir@878 218 idx = iminor(inode);
keir@878 219 if (idx < 0 || idx > MAX_BLKTAP_DEVICE || blktaps[idx] == NULL) {
keir@878 220 BTERR("unable to open device blktap%d\n", idx);
keir@878 221 return -ENODEV;
keir@878 222 }
keir@878 223
keir@878 224 tap = blktaps[idx];
keir@878 225
keir@878 226 BTINFO("opening device blktap%d\n", idx);
keir@878 227
keir@878 228 if (!test_bit(BLKTAP_CONTROL, &tap->dev_inuse))
keir@878 229 return -ENODEV;
keir@878 230
keir@878 231 /* Only one process can access ring at a time */
keir@878 232 if (test_and_set_bit(BLKTAP_RING_FD, &tap->dev_inuse))
keir@878 233 return -EBUSY;
keir@878 234
keir@878 235 filp->private_data = tap;
keir@878 236 BTINFO("opened device %d\n", tap->minor);
keir@878 237
keir@878 238 return 0;
keir@878 239 }
keir@878 240
keir@878 241 static int
keir@878 242 blktap_ring_release(struct inode *inode, struct file *filp)
keir@878 243 {
keir@878 244 struct blktap *tap = filp->private_data;
keir@878 245
keir@878 246 BTINFO("freeing device %d\n", tap->minor);
keir@878 247 clear_bit(BLKTAP_RING_FD, &tap->dev_inuse);
keir@878 248 filp->private_data = NULL;
keir@878 249 wake_up(&tap->wq);
keir@878 250 return 0;
keir@878 251 }
keir@878 252
keir@878 253 /* Note on mmap:
keir@878 254 * We need to map pages to user space in a way that will allow the block
keir@878 255 * subsystem set up direct IO to them. This couldn't be done before, because
keir@878 256 * there isn't really a sane way to translate a user virtual address down to a
keir@878 257 * physical address when the page belongs to another domain.
keir@878 258 *
keir@878 259 * My first approach was to map the page in to kernel memory, add an entry
keir@878 260 * for it in the physical frame list (using alloc_lomem_region as in blkback)
keir@878 261 * and then attempt to map that page up to user space. This is disallowed
keir@878 262 * by xen though, which realizes that we don't really own the machine frame
keir@878 263 * underlying the physical page.
keir@878 264 *
keir@878 265 * The new approach is to provide explicit support for this in xen linux.
keir@878 266 * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
keir@878 267 * mapped from other vms. vma->vm_private_data is set up as a mapping
keir@878 268 * from pages to actual page structs. There is a new clause in get_user_pages
keir@878 269 * that does the right thing for this sort of mapping.
keir@878 270 */
keir@878 271 static int
keir@878 272 blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
keir@878 273 {
keir@878 274 int size, err;
keir@878 275 struct page **map;
keir@878 276 struct blktap *tap;
keir@878 277 blkif_sring_t *sring;
keir@878 278 struct blktap_ring *ring;
keir@878 279
keir@878 280 tap = filp->private_data;
keir@878 281 ring = &tap->ring;
keir@878 282 map = NULL;
keir@878 283 sring = NULL;
keir@878 284
keir@878 285 if (!tap || test_and_set_bit(BLKTAP_RING_VMA, &tap->dev_inuse))
keir@878 286 return -ENOMEM;
keir@878 287
keir@878 288 size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
keir@878 289 if (size != (MMAP_PAGES + RING_PAGES)) {
keir@878 290 BTERR("you _must_ map exactly %lu pages!\n",
keir@878 291 MMAP_PAGES + RING_PAGES);
keir@878 292 return -EAGAIN;
keir@878 293 }
keir@878 294
keir@878 295 /* Allocate the fe ring. */
keir@878 296 sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
keir@878 297 if (!sring) {
keir@878 298 BTERR("Couldn't alloc sring.\n");
keir@878 299 goto fail_mem;
keir@878 300 }
keir@878 301
keir@878 302 map = kzalloc(size * sizeof(struct page *), GFP_KERNEL);
keir@878 303 if (!map) {
keir@878 304 BTERR("Couldn't alloc VM_FOREIGN map.\n");
keir@878 305 goto fail_mem;
keir@878 306 }
keir@878 307
keir@878 308 SetPageReserved(virt_to_page(sring));
keir@878 309
keir@878 310 SHARED_RING_INIT(sring);
keir@878 311 FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
keir@878 312
keir@878 313 ring->ring_vstart = vma->vm_start;
keir@878 314 ring->user_vstart = ring->ring_vstart + (RING_PAGES << PAGE_SHIFT);
keir@878 315
keir@878 316 /* Map the ring pages to the start of the region and reserve it. */
keir@878 317 if (xen_feature(XENFEAT_auto_translated_physmap))
keir@878 318 err = vm_insert_page(vma, vma->vm_start,
keir@878 319 virt_to_page(ring->ring.sring));
keir@878 320 else
keir@878 321 err = remap_pfn_range(vma, vma->vm_start,
keir@878 322 __pa(ring->ring.sring) >> PAGE_SHIFT,
keir@878 323 PAGE_SIZE, vma->vm_page_prot);
keir@878 324 if (err) {
keir@878 325 BTERR("Mapping user ring failed: %d\n", err);
keir@878 326 goto fail;
keir@878 327 }
keir@878 328
keir@878 329 /* Mark this VM as containing foreign pages, and set up mappings. */
keir@878 330 ring->foreign_map.map = map;
keir@878 331 vma->vm_private_data = &ring->foreign_map;
keir@878 332 vma->vm_flags |= VM_FOREIGN;
keir@878 333 vma->vm_flags |= VM_DONTCOPY;
keir@878 334 vma->vm_flags |= VM_RESERVED;
keir@878 335 vma->vm_ops = &blktap_ring_vm_operations;
keir@878 336
keir@878 337 #ifdef CONFIG_X86
keir@878 338 vma->vm_mm->context.has_foreign_mappings = 1;
keir@878 339 #endif
keir@878 340
keir@878 341 tap->pid = current->pid;
keir@878 342 BTINFO("blktap: mapping pid is %d\n", tap->pid);
keir@878 343
keir@878 344 ring->vma = vma;
keir@878 345 return 0;
keir@878 346
keir@878 347 fail:
keir@878 348 /* Clear any active mappings. */
keir@878 349 zap_page_range(vma, vma->vm_start,
keir@878 350 vma->vm_end - vma->vm_start, NULL);
keir@878 351 ClearPageReserved(virt_to_page(sring));
keir@878 352 fail_mem:
keir@878 353 free_page((unsigned long)sring);
keir@878 354 kfree(map);
keir@878 355
keir@878 356 return -ENOMEM;
keir@878 357 }
keir@878 358
keir@878 359 static inline void
keir@878 360 blktap_ring_set_message(struct blktap *tap, int msg)
keir@878 361 {
keir@878 362 struct blktap_ring *ring = &tap->ring;
keir@878 363
keir@878 364 down_read(&tap->tap_sem);
keir@878 365 if (ring->ring.sring)
keir@878 366 ring->ring.sring->pad[0] = msg;
keir@878 367 up_read(&tap->tap_sem);
keir@878 368 }
keir@878 369
keir@878 370 static int
keir@878 371 blktap_ring_ioctl(struct inode *inode, struct file *filp,
keir@878 372 unsigned int cmd, unsigned long arg)
keir@878 373 {
keir@878 374 struct blktap_params params;
keir@878 375 struct blktap *tap = filp->private_data;
keir@878 376
keir@878 377 BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
keir@878 378
keir@878 379 switch(cmd) {
keir@878 380 case BLKTAP2_IOCTL_KICK_FE:
keir@878 381 /* There are fe messages to process. */
keir@878 382 return blktap_read_ring(tap);
keir@878 383
keir@878 384 case BLKTAP2_IOCTL_CREATE_DEVICE:
keir@878 385 if (!arg)
keir@878 386 return -EINVAL;
keir@878 387
keir@878 388 if (copy_from_user(&params, (struct blktap_params __user *)arg,
keir@878 389 sizeof(params))) {
keir@878 390 BTERR("failed to get params\n");
keir@878 391 return -EFAULT;
keir@878 392 }
keir@878 393
keir@878 394 if (blktap_validate_params(tap, &params)) {
keir@878 395 BTERR("invalid params\n");
keir@878 396 return -EINVAL;
keir@878 397 }
keir@878 398
keir@878 399 tap->params = params;
keir@878 400 return blktap_device_create(tap);
keir@878 401
keir@878 402 case BLKTAP2_IOCTL_SET_PARAMS:
keir@878 403 if (!arg)
keir@878 404 return -EINVAL;
keir@878 405
keir@878 406 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
keir@878 407 return -EINVAL;
keir@878 408
keir@878 409 if (copy_from_user(&params, (struct blktap_params __user *)arg,
keir@878 410 sizeof(params))) {
keir@878 411 BTERR("failed to get params\n");
keir@878 412 return -EFAULT;
keir@878 413 }
keir@878 414
keir@878 415 if (blktap_validate_params(tap, &params)) {
keir@878 416 BTERR("invalid params\n");
keir@878 417 return -EINVAL;
keir@878 418 }
keir@878 419
keir@878 420 tap->params = params;
keir@878 421 return 0;
keir@878 422
keir@878 423 case BLKTAP2_IOCTL_PAUSE:
keir@878 424 if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
keir@878 425 return -EINVAL;
keir@878 426
keir@878 427 set_bit(BLKTAP_PAUSED, &tap->dev_inuse);
keir@878 428 clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
keir@878 429
keir@878 430 blktap_ring_set_message(tap, 0);
keir@878 431 wake_up_interruptible(&tap->wq);
keir@878 432
keir@878 433 return 0;
keir@878 434
keir@878 435
keir@878 436 case BLKTAP2_IOCTL_REOPEN:
keir@878 437 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
keir@878 438 return -EINVAL;
keir@878 439
keir@878 440 if (!arg)
keir@878 441 return -EINVAL;
keir@878 442
keir@878 443 if (copy_to_user((char __user *)arg,
keir@878 444 tap->params.name,
keir@878 445 strlen(tap->params.name) + 1))
keir@878 446 return -EFAULT;
keir@878 447
keir@878 448 blktap_ring_set_message(tap, 0);
keir@878 449 wake_up_interruptible(&tap->wq);
keir@878 450
keir@878 451 return 0;
keir@878 452
keir@878 453 case BLKTAP2_IOCTL_RESUME:
keir@878 454 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
keir@878 455 return -EINVAL;
keir@878 456
keir@878 457 tap->ring.response = (int)arg;
keir@878 458 if (!tap->ring.response)
keir@878 459 clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
keir@878 460
keir@878 461 blktap_ring_set_message(tap, 0);
keir@878 462 wake_up_interruptible(&tap->wq);
keir@878 463
keir@878 464 return 0;
keir@878 465 }
keir@878 466
keir@878 467 return -ENOIOCTLCMD;
keir@878 468 }
keir@878 469
keir@878 470 static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
keir@878 471 {
keir@878 472 struct blktap *tap = filp->private_data;
keir@878 473 struct blktap_ring *ring = &tap->ring;
keir@878 474
keir@878 475 poll_wait(filp, &ring->poll_wait, wait);
keir@878 476 if (ring->ring.sring->pad[0] != 0 ||
keir@878 477 ring->ring.req_prod_pvt != ring->ring.sring->req_prod) {
keir@878 478 RING_PUSH_REQUESTS(&ring->ring);
keir@878 479 return POLLIN | POLLRDNORM;
keir@878 480 }
keir@878 481
keir@878 482 return 0;
keir@878 483 }
keir@878 484
keir@878 485 static struct file_operations blktap_ring_file_operations = {
keir@878 486 .owner = THIS_MODULE,
keir@878 487 .open = blktap_ring_open,
keir@878 488 .release = blktap_ring_release,
keir@878 489 .ioctl = blktap_ring_ioctl,
keir@878 490 .mmap = blktap_ring_mmap,
keir@878 491 .poll = blktap_ring_poll,
keir@878 492 };
keir@878 493
keir@878 494 void
keir@878 495 blktap_ring_kick_user(struct blktap *tap)
keir@878 496 {
keir@878 497 wake_up_interruptible(&tap->ring.poll_wait);
keir@878 498 }
keir@878 499
keir@878 500 int
keir@878 501 blktap_ring_resume(struct blktap *tap)
keir@878 502 {
keir@878 503 int err;
keir@878 504 struct blktap_ring *ring = &tap->ring;
keir@878 505
keir@878 506 if (!blktap_active(tap))
keir@878 507 return -ENODEV;
keir@878 508
keir@878 509 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
keir@878 510 return -EINVAL;
keir@878 511
keir@878 512 /* set shared flag for resume */
keir@878 513 ring->response = 0;
keir@878 514
keir@878 515 blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_RESUME);
keir@878 516 blktap_ring_kick_user(tap);
keir@878 517
keir@878 518 wait_event_interruptible(tap->wq, ring->response ||
keir@878 519 !test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
keir@878 520
keir@878 521 err = ring->response;
keir@878 522 ring->response = 0;
keir@878 523
keir@878 524 BTDBG("err: %d\n", err);
keir@878 525
keir@878 526 if (err)
keir@878 527 return err;
keir@878 528
keir@878 529 if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
keir@878 530 return -EAGAIN;
keir@878 531
keir@878 532 return 0;
keir@878 533 }
keir@878 534
keir@878 535 int
keir@878 536 blktap_ring_pause(struct blktap *tap)
keir@878 537 {
keir@878 538 if (!blktap_active(tap))
keir@878 539 return -ENODEV;
keir@878 540
keir@878 541 if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
keir@878 542 return -EINVAL;
keir@878 543
keir@878 544 BTDBG("draining queue\n");
keir@878 545 wait_event_interruptible(tap->wq, !tap->pending_cnt);
keir@878 546 if (tap->pending_cnt)
keir@878 547 return -EAGAIN;
keir@878 548
keir@878 549 blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_PAUSE);
keir@878 550 blktap_ring_kick_user(tap);
keir@878 551
keir@878 552 BTDBG("waiting for tapdisk response\n");
keir@878 553 wait_event_interruptible(tap->wq, test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
keir@878 554 if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
keir@878 555 return -EAGAIN;
keir@878 556
keir@878 557 return 0;
keir@878 558 }
keir@878 559
keir@878 560 int
keir@878 561 blktap_ring_destroy(struct blktap *tap)
keir@878 562 {
keir@878 563 if (!test_bit(BLKTAP_RING_FD, &tap->dev_inuse) &&
keir@878 564 !test_bit(BLKTAP_RING_VMA, &tap->dev_inuse))
keir@878 565 return 0;
keir@878 566
keir@878 567 BTDBG("sending tapdisk close message\n");
keir@878 568 blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_CLOSE);
keir@878 569 blktap_ring_kick_user(tap);
keir@878 570
keir@878 571 return -EAGAIN;
keir@878 572 }
keir@878 573
keir@878 574 static void
keir@878 575 blktap_ring_initialize(struct blktap_ring *ring, int minor)
keir@878 576 {
keir@878 577 memset(ring, 0, sizeof(*ring));
keir@878 578 init_waitqueue_head(&ring->poll_wait);
keir@878 579 ring->devno = MKDEV(blktap_ring_major, minor);
keir@878 580 }
keir@878 581
keir@878 582 int
keir@878 583 blktap_ring_create(struct blktap *tap)
keir@878 584 {
keir@878 585 struct blktap_ring *ring = &tap->ring;
keir@878 586 blktap_ring_initialize(ring, tap->minor);
keir@878 587 return blktap_sysfs_create(tap);
keir@878 588 }
keir@878 589
keir@878 590 int
keir@878 591 blktap_ring_init(int *major)
keir@878 592 {
keir@878 593 int err;
keir@878 594
keir@878 595 err = register_chrdev(0, "blktap2", &blktap_ring_file_operations);
keir@878 596 if (err < 0) {
keir@878 597 BTERR("error registering blktap ring device: %d\n", err);
keir@878 598 return err;
keir@878 599 }
keir@878 600
keir@878 601 blktap_ring_major = *major = err;
keir@878 602 BTINFO("blktap ring major: %d\n", blktap_ring_major);
keir@878 603 return 0;
keir@878 604 }
keir@878 605
keir@878 606 int
keir@878 607 blktap_ring_free(void)
keir@878 608 {
keir@878 609 if (blktap_ring_major)
keir@878 610 unregister_chrdev(blktap_ring_major, "blktap2");
keir@878 611
keir@878 612 return 0;
keir@878 613 }