ia64/xen-unstable

view tools/blktap2/drivers/tapdisk-vbd.c @ 19817:b7f73a7f3078

blktap2: portability fixes for NetBSD

- Use standard off_t and lseek() instead of non-portable off64_t and
lseek64()
- Use uuid API as documented in DCE 1.1 RPC specification
- Add NetBSD implementation for blk_getimagesize() and
blk_getsectorsize()
- Use blk_getimagesize() and blk_getsectorsize()
- Fix uuid header check

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jun 23 17:24:14 2009 +0100 (2009-06-23)
parents 1c627434605e
children
line source
1 /*
2 * Copyright (c) 2008, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 #include <stdio.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <regex.h>
32 #include <unistd.h>
33 #include <stdlib.h>
34 #include <libgen.h>
35 #include <sys/mman.h>
36 #include <sys/ioctl.h>
38 #include "libvhd.h"
39 #include "tapdisk-image.h"
40 #include "tapdisk-driver.h"
41 #include "tapdisk-server.h"
42 #include "tapdisk-interface.h"
43 #include "tapdisk-vbd.h"
44 #include "blktap2.h"
46 #define DBG(_level, _f, _a...) tlog_write(_level, _f, ##_a)
47 #define ERR(_err, _f, _a...) tlog_error(_err, _f, ##_a)
49 #if 1
50 #define ASSERT(p) \
51 do { \
52 if (!(p)) { \
53 DPRINTF("Assertion '%s' failed, line %d, " \
54 "file %s", #p, __LINE__, __FILE__); \
55 *(int*)0 = 0; \
56 } \
57 } while (0)
58 #else
59 #define ASSERT(p) ((void)0)
60 #endif
63 #define TD_VBD_EIO_RETRIES 10
64 #define TD_VBD_EIO_SLEEP 1
65 #define TD_VBD_WATCHDOG_TIMEOUT 10
67 static void tapdisk_vbd_ring_event(event_id_t, char, void *);
68 static void tapdisk_vbd_callback(void *, blkif_response_t *);
70 /*
71 * initialization
72 */
74 static inline void
75 tapdisk_vbd_initialize_vreq(td_vbd_request_t *vreq)
76 {
77 memset(vreq, 0, sizeof(td_vbd_request_t));
78 INIT_LIST_HEAD(&vreq->next);
79 }
81 int
82 tapdisk_vbd_initialize(int rfd, int wfd, uint16_t uuid)
83 {
84 int i;
85 td_vbd_t *vbd;
87 vbd = tapdisk_server_get_vbd(uuid);
88 if (vbd) {
89 EPRINTF("duplicate vbds! %u\n", uuid);
90 return -EEXIST;
91 }
93 vbd = calloc(1, sizeof(td_vbd_t));
94 if (!vbd) {
95 EPRINTF("failed to allocate tapdisk state\n");
96 return -ENOMEM;
97 }
99 vbd->uuid = uuid;
100 vbd->ipc.rfd = rfd;
101 vbd->ipc.wfd = wfd;
102 vbd->ipc.uuid = uuid;
103 vbd->ring.fd = -1;
105 /* default blktap ring completion */
106 vbd->callback = tapdisk_vbd_callback;
107 vbd->argument = vbd;
109 INIT_LIST_HEAD(&vbd->images);
110 INIT_LIST_HEAD(&vbd->new_requests);
111 INIT_LIST_HEAD(&vbd->pending_requests);
112 INIT_LIST_HEAD(&vbd->failed_requests);
113 INIT_LIST_HEAD(&vbd->completed_requests);
114 INIT_LIST_HEAD(&vbd->next);
115 gettimeofday(&vbd->ts, NULL);
117 for (i = 0; i < MAX_REQUESTS; i++)
118 tapdisk_vbd_initialize_vreq(vbd->request_list + i);
120 tapdisk_server_add_vbd(vbd);
122 return 0;
123 }
125 void
126 tapdisk_vbd_set_callback(td_vbd_t *vbd, td_vbd_cb_t callback, void *argument)
127 {
128 vbd->callback = callback;
129 vbd->argument = argument;
130 }
132 static int
133 tapdisk_vbd_validate_chain(td_vbd_t *vbd)
134 {
135 int err;
136 td_image_t *image, *parent, *tmp;
138 DPRINTF("VBD CHAIN:\n");
140 tapdisk_vbd_for_each_image(vbd, image, tmp) {
141 DPRINTF("%s: %d\n", image->name, image->type);
143 if (tapdisk_vbd_is_last_image(vbd, image))
144 break;
146 parent = tapdisk_vbd_next_image(image);
147 err = td_validate_parent(image, parent);
148 if (err)
149 return err;
150 }
152 return 0;
153 }
155 void
156 tapdisk_vbd_close_vdi(td_vbd_t *vbd)
157 {
158 td_image_t *image, *tmp;
160 tapdisk_vbd_for_each_image(vbd, image, tmp) {
161 td_close(image);
162 tapdisk_image_free(image);
163 }
165 INIT_LIST_HEAD(&vbd->images);
166 td_flag_set(vbd->state, TD_VBD_CLOSED);
167 }
169 static int
170 tapdisk_vbd_add_block_cache(td_vbd_t *vbd)
171 {
172 int err;
173 td_driver_t *driver;
174 td_image_t *cache, *image, *target, *tmp;
176 target = NULL;
178 tapdisk_vbd_for_each_image(vbd, image, tmp)
179 if (td_flag_test(image->flags, TD_OPEN_RDONLY) &&
180 td_flag_test(image->flags, TD_OPEN_SHAREABLE)) {
181 target = image;
182 break;
183 }
185 if (!target)
186 return 0;
188 cache = tapdisk_image_allocate(target->name,
189 DISK_TYPE_BLOCK_CACHE,
190 target->storage,
191 target->flags,
192 target->private);
193 if (!cache)
194 return -ENOMEM;
196 /* try to load existing cache */
197 err = td_load(cache);
198 if (!err)
199 goto done;
201 /* hack driver to send open() correct image size */
202 if (!target->driver) {
203 err = -ENODEV;
204 goto fail;
205 }
207 cache->driver = tapdisk_driver_allocate(cache->type,
208 cache->name,
209 cache->flags,
210 cache->storage);
211 if (!cache->driver) {
212 err = -ENOMEM;
213 goto fail;
214 }
216 cache->driver->info = target->driver->info;
218 /* try to open new cache */
219 err = td_open(cache);
220 if (!err)
221 goto done;
223 fail:
224 /* give up */
225 tapdisk_image_free(target);
226 return err;
228 done:
229 /* insert cache before image */
230 list_add(&cache->next, target->next.prev);
231 return 0;
232 }
234 static int
235 tapdisk_vbd_add_dirty_log(td_vbd_t *vbd)
236 {
237 int err;
238 td_driver_t *driver;
239 td_image_t *log, *parent;
241 driver = NULL;
242 log = NULL;
244 parent = tapdisk_vbd_first_image(vbd);
246 log = tapdisk_image_allocate(parent->name,
247 DISK_TYPE_LOG,
248 parent->storage,
249 parent->flags,
250 vbd);
251 if (!log)
252 return -ENOMEM;
254 driver = tapdisk_driver_allocate(log->type,
255 log->name,
256 log->flags,
257 log->storage);
258 if (!driver) {
259 err = -ENOMEM;
260 goto fail;
261 }
263 driver->info = parent->driver->info;
264 log->driver = driver;
266 err = td_open(log);
267 if (err)
268 goto fail;
270 list_add(&log->next, &vbd->images);
271 return 0;
273 fail:
274 tapdisk_image_free(log);
275 return err;
276 }
278 /*
279 * LVHD hack: have to rescan LVM metadata on pool
280 * slaves to register lvchanges made on master. FIXME.
281 */
282 static int
283 tapdisk_vbd_reactivate_volume(const char *name)
284 {
285 int err;
286 char *cmd;
288 DPRINTF("reactivating %s\n", name);
290 err = asprintf(&cmd, "lvchange -an %s", name);
291 if (err == - 1) {
292 EPRINTF("failed to deactivate %s\n", name);
293 return -errno;
294 }
296 err = system(cmd);
297 if (err) {
298 /*
299 * Assume that LV deactivation failed because the LV is open,
300 * in which case the LVM information should be up-to-date and
301 * we don't need this step anyways (so ignore the error). If
302 * the failure is due to a non-existent LV, the next command
303 * (lvchange -ay) will catch it.
304 * If we want to be more prudent/paranoid, we can instead check
305 * whether the LV is currently open (a bit more work).
306 */
307 }
309 free(cmd);
310 err = asprintf(&cmd, "lvchange -ay --refresh %s", name);
311 if (err == - 1) {
312 EPRINTF("failed to activate %s\n", name);
313 return -errno;
314 }
316 err = system(cmd);
317 if (err)
318 EPRINTF("%s failed: %d\n", cmd, err);
319 free(cmd);
320 return err;
321 }
323 static int
324 tapdisk_vbd_reactivate_volumes(td_vbd_t *vbd, int resume)
325 {
326 int i, cnt, err;
327 char *name, *new;
328 vhd_context_t vhd;
329 vhd_parent_locator_t *loc;
331 new = NULL;
332 name = NULL;
334 if (vbd->storage != TAPDISK_STORAGE_TYPE_LVM)
335 return 0;
337 if (!resume && vbd->reactivated)
338 return 0;
340 name = strdup(vbd->name);
341 if (!name) {
342 EPRINTF("%s: nomem\n", vbd->name);
343 return -ENOMEM;
344 }
346 for (cnt = 0; 1; cnt++) {
348 /* only need to reactivate child and parent during resume */
349 if (resume && cnt == 2)
350 break;
352 err = tapdisk_vbd_reactivate_volume(name);
353 if (err)
354 goto fail;
356 if (!strstr(name, "VHD"))
357 break;
359 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
360 err = vhd_open(&vhd, name, VHD_OPEN_RDONLY);
361 if (!err)
362 break;
364 libvhd_set_log_level(1);
365 sleep(TD_VBD_EIO_SLEEP);
366 }
367 libvhd_set_log_level(0);
368 if (err)
369 goto fail;
371 if (vhd.footer.type != HD_TYPE_DIFF) {
372 vhd_close(&vhd);
373 break;
374 }
376 loc = NULL;
377 for (i = 0; i < 8; i++)
378 if (vhd.header.loc[i].code == PLAT_CODE_MACX) {
379 loc = vhd.header.loc + i;
380 break;
381 }
383 if (!loc) {
384 vhd_close(&vhd);
385 err = -EINVAL;
386 goto fail;
387 }
389 free(name);
390 err = vhd_parent_locator_read(&vhd, loc, &name);
391 vhd_close(&vhd);
393 if (err) {
394 name = NULL;
395 goto fail;
396 }
398 /*
399 * vhd_parent_locator_read returns path relative to child:
400 * ./VG_XenStorage--<sr-uuid>-VHD--<vdi-uuid>
401 * we have to convert this to absolute path for lvm
402 */
403 err = asprintf(&new, "/dev/mapper/%s", name + 2);
404 if (err == -1) {
405 err = -errno;
406 goto fail;
407 }
409 free(name);
410 name = new;
411 }
413 err = 0;
414 vbd->reactivated = 1;
416 out:
417 free(name);
418 return err;
420 fail:
421 EPRINTF("failed to reactivate %s: %d\n", vbd->name, err);
422 goto out;
423 }
425 /*
426 * LVHD hack:
427 * raw volumes are named /dev/<sr-vg-name>-<sr-uuid>/LV-<sr-uuid>
428 * vhd volumes are named /dev/<sr-vg-name>-<sr-uuid>/VHD-<sr-uuid>
429 *
430 * a live snapshot of a raw volume will result in the writeable volume's
431 * name changing from the raw to vhd format, but this change will not be
432 * reflected by xenstore. hence this mess.
433 */
434 static int
435 tapdisk_vbd_check_file(td_vbd_t *vbd)
436 {
437 int i, err;
438 regex_t re;
439 size_t len, max;
440 regmatch_t matches[4];
441 char *new, *src, *dst, error[256];
443 if (vbd->storage != TAPDISK_STORAGE_TYPE_LVM)
444 return 0;
446 err = tapdisk_vbd_reactivate_volume(vbd->name);
447 if (!err)
448 return 0;
449 else
450 DPRINTF("reactivating %s failed\n", vbd->name);
452 #define HEX "[A-Za-z0-9]"
453 #define UUID HEX"\\{8\\}-"HEX"\\{4\\}-"HEX"\\{4\\}-"HEX"\\{4\\}-"HEX"\\{12\\}"
454 #define VG "VG_"HEX"\\+"
455 #define TYPE "\\(LV\\|VHD\\)"
456 #define RE "\\(/dev/"VG"-"UUID"/\\)"TYPE"\\(-"UUID"\\)"
458 err = regcomp(&re, RE, 0);
459 if (err)
460 goto regerr;
462 #undef HEX
463 #undef UUID
464 #undef VG
465 #undef TYPE
466 #undef RE
468 err = regexec(&re, vbd->name, 4, matches, 0);
469 if (err)
470 goto regerr;
472 max = strlen("VHD") + 1;
473 for (i = 1; i < 4; i++) {
474 if (matches[i].rm_so == -1 || matches[i].rm_eo == -1) {
475 EPRINTF("%s: failed to tokenize name\n", vbd->name);
476 err = -EINVAL;
477 goto out;
478 }
480 max += matches[i].rm_eo - matches[i].rm_so;
481 }
483 new = malloc(max);
484 if (!new) {
485 EPRINTF("%s: failed to allocate new name\n", vbd->name);
486 err = -ENOMEM;
487 goto out;
488 }
490 src = new;
491 for (i = 1; i < 4; i++) {
492 dst = vbd->name + matches[i].rm_so;
493 len = matches[i].rm_eo - matches[i].rm_so;
495 if (i == 2) {
496 if (memcmp(dst, "LV", len)) {
497 EPRINTF("%s: bad name format\n", vbd->name);
498 free(new);
499 err = -EINVAL;
500 goto out;
501 }
503 src += sprintf(src, "VHD");
504 continue;
505 }
507 memcpy(src, dst, len + 1);
508 src += len;
509 }
511 *src = '\0';
513 err = tapdisk_vbd_reactivate_volume(new);
514 if (err)
515 DPRINTF("reactivating %s failed\n", new);
517 err = access(new, F_OK);
518 if (err == -1) {
519 EPRINTF("neither %s nor %s accessible\n",
520 vbd->name, new);
521 err = -errno;
522 free(new);
523 goto out;
524 }
526 DPRINTF("couldn't find %s, trying %s\n", vbd->name, new);
528 err = 0;
529 free(vbd->name);
530 vbd->name = new;
531 vbd->type = DISK_TYPE_VHD;
533 out:
534 regfree(&re);
535 return err;
537 regerr:
538 regerror(err, &re, error, sizeof(error));
539 EPRINTF("%s: regex failed: %s\n", vbd->name, error);
540 err = -EINVAL;
541 goto out;
542 }
544 static int
545 __tapdisk_vbd_open_vdi(td_vbd_t *vbd, td_flag_t extra_flags)
546 {
547 char *file;
548 int err, type;
549 td_flag_t flags;
550 td_disk_id_t id;
551 td_image_t *image, *tmp;
552 struct tfilter *filter = NULL;
554 err = tapdisk_vbd_reactivate_volumes(vbd, 0);
555 if (err)
556 return err;
558 flags = (vbd->flags & ~TD_OPEN_SHAREABLE) | extra_flags;
559 file = vbd->name;
560 type = vbd->type;
562 for (;;) {
563 err = -ENOMEM;
564 image = tapdisk_image_allocate(file, type,
565 vbd->storage, flags, vbd);
567 if (file != vbd->name) {
568 free(file);
569 file = NULL;
570 }
572 if (!image)
573 goto fail;
575 err = td_load(image);
576 if (err) {
577 if (err != -ENODEV)
578 goto fail;
580 err = td_open(image);
581 if (err)
582 goto fail;
583 }
585 err = td_get_parent_id(image, &id);
586 if (err && err != TD_NO_PARENT) {
587 td_close(image);
588 goto fail;
589 }
591 if (!image->storage)
592 image->storage = vbd->storage;
594 tapdisk_vbd_add_image(vbd, image);
595 image = NULL;
597 if (err == TD_NO_PARENT)
598 break;
600 file = id.name;
601 type = id.drivertype;
602 flags |= (TD_OPEN_RDONLY | TD_OPEN_SHAREABLE);
603 }
605 if (td_flag_test(vbd->flags, TD_OPEN_LOG_DIRTY)) {
606 err = tapdisk_vbd_add_dirty_log(vbd);
607 if (err)
608 goto fail;
609 }
611 if (td_flag_test(vbd->flags, TD_OPEN_ADD_CACHE)) {
612 err = tapdisk_vbd_add_block_cache(vbd);
613 if (err)
614 goto fail;
615 }
617 err = tapdisk_vbd_validate_chain(vbd);
618 if (err)
619 goto fail;
621 td_flag_clear(vbd->state, TD_VBD_CLOSED);
623 return 0;
625 fail:
626 if (image)
627 tapdisk_image_free(image);
629 tapdisk_vbd_close_vdi(vbd);
631 return err;
632 }
634 int
635 tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *path,
636 uint16_t drivertype, uint16_t storage, td_flag_t flags)
637 {
638 int i, err;
639 struct tap_disk *ops;
641 ops = tapdisk_server_find_driver_interface(drivertype);
642 if (!ops)
643 return -EINVAL;
644 DPRINTF("Loaded %s driver for vbd %u %s 0x%08x\n",
645 ops->disk_type, vbd->uuid, path, flags);
647 err = tapdisk_namedup(&vbd->name, path);
648 if (err)
649 return err;
651 vbd->flags = flags;
652 vbd->storage = storage;
653 vbd->type = drivertype;
655 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
656 err = __tapdisk_vbd_open_vdi(vbd, 0);
657 if (err != -EIO)
658 break;
660 sleep(TD_VBD_EIO_SLEEP);
661 }
662 if (err)
663 goto fail;
665 return 0;
667 fail:
668 free(vbd->name);
669 vbd->name = NULL;
670 return err;
671 }
673 static int
674 tapdisk_vbd_register_event_watches(td_vbd_t *vbd)
675 {
676 event_id_t id;
678 id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
679 vbd->ring.fd, 0,
680 tapdisk_vbd_ring_event, vbd);
681 if (id < 0)
682 return id;
684 vbd->ring_event_id = id;
686 return 0;
687 }
689 static void
690 tapdisk_vbd_unregister_events(td_vbd_t *vbd)
691 {
692 if (vbd->ring_event_id)
693 tapdisk_server_unregister_event(vbd->ring_event_id);
694 }
696 static int
697 tapdisk_vbd_map_device(td_vbd_t *vbd, const char *devname)
698 {
700 int err, psize;
701 td_ring_t *ring;
703 ring = &vbd->ring;
704 psize = getpagesize();
706 ring->fd = open(devname, O_RDWR);
707 if (ring->fd == -1) {
708 err = -errno;
709 EPRINTF("failed to open %s: %d\n", devname, err);
710 goto fail;
711 }
713 ring->mem = mmap(0, psize * BLKTAP_MMAP_REGION_SIZE,
714 PROT_READ | PROT_WRITE, MAP_SHARED, ring->fd, 0);
715 if (ring->mem == MAP_FAILED) {
716 err = -errno;
717 EPRINTF("failed to mmap %s: %d\n", devname, err);
718 goto fail;
719 }
721 ring->sring = (blkif_sring_t *)((unsigned long)ring->mem);
722 BACK_RING_INIT(&ring->fe_ring, ring->sring, psize);
724 ring->vstart =
725 (unsigned long)ring->mem + (BLKTAP_RING_PAGES * psize);
727 ioctl(ring->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE);
729 return 0;
731 fail:
732 if (ring->mem && ring->mem != MAP_FAILED)
733 munmap(ring->mem, psize * BLKTAP_MMAP_REGION_SIZE);
734 if (ring->fd != -1)
735 close(ring->fd);
736 ring->fd = -1;
737 ring->mem = NULL;
738 return err;
739 }
741 static int
742 tapdisk_vbd_unmap_device(td_vbd_t *vbd)
743 {
744 int psize;
746 psize = getpagesize();
748 if (vbd->ring.fd != -1)
749 close(vbd->ring.fd);
750 if (vbd->ring.mem > 0)
751 munmap(vbd->ring.mem, psize * BLKTAP_MMAP_REGION_SIZE);
753 return 0;
754 }
756 int
757 tapdisk_vbd_open(td_vbd_t *vbd, const char *name, uint16_t type,
758 uint16_t storage, const char *ring, td_flag_t flags)
759 {
760 int err;
762 err = tapdisk_vbd_open_vdi(vbd, name, type, storage, flags);
763 if (err)
764 goto out;
766 err = tapdisk_vbd_map_device(vbd, ring);
767 if (err)
768 goto out;
770 err = tapdisk_vbd_register_event_watches(vbd);
771 if (err)
772 goto out;
774 return 0;
776 out:
777 tapdisk_vbd_close_vdi(vbd);
778 tapdisk_vbd_unmap_device(vbd);
779 tapdisk_vbd_unregister_events(vbd);
780 free(vbd->name);
781 vbd->name = NULL;
782 return err;
783 }
785 static void
786 tapdisk_vbd_queue_count(td_vbd_t *vbd, int *new,
787 int *pending, int *failed, int *completed)
788 {
789 int n, p, f, c;
790 td_vbd_request_t *vreq, *tvreq;
792 n = 0;
793 p = 0;
794 f = 0;
795 c = 0;
797 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->new_requests)
798 n++;
800 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->pending_requests)
801 p++;
803 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->failed_requests)
804 f++;
806 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->completed_requests)
807 c++;
809 *new = n;
810 *pending = p;
811 *failed = f;
812 *completed = c;
813 }
815 static int
816 tapdisk_vbd_shutdown(td_vbd_t *vbd)
817 {
818 int new, pending, failed, completed;
820 if (!list_empty(&vbd->pending_requests))
821 return -EAGAIN;
823 tapdisk_vbd_kick(vbd);
824 tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed);
826 DPRINTF("%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
827 "failed: 0x%02x, completed: 0x%02x\n",
828 vbd->name, vbd->state, new, pending, failed, completed);
829 DPRINTF("last activity: %010ld.%06lld, errors: 0x%04"PRIx64", "
830 "retries: 0x%04"PRIx64", received: 0x%08"PRIx64", "
831 "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n",
832 vbd->ts.tv_sec, (unsigned long long)vbd->ts.tv_usec,
833 vbd->errors, vbd->retries, vbd->received, vbd->returned,
834 vbd->kicked);
836 tapdisk_vbd_close_vdi(vbd);
837 tapdisk_ipc_write(&vbd->ipc, TAPDISK_MESSAGE_CLOSE_RSP);
838 tapdisk_vbd_unregister_events(vbd);
839 tapdisk_vbd_unmap_device(vbd);
840 tapdisk_server_remove_vbd(vbd);
841 free(vbd->name);
842 free(vbd);
844 tlog_print_errors();
846 return 0;
847 }
849 int
850 tapdisk_vbd_close(td_vbd_t *vbd)
851 {
852 /*
853 * don't close if any requests are pending in the aio layer
854 */
855 if (!list_empty(&vbd->pending_requests))
856 goto fail;
858 /*
859 * if the queue is still active and we have more
860 * requests, try to complete them before closing.
861 */
862 if (tapdisk_vbd_queue_ready(vbd) &&
863 (!list_empty(&vbd->new_requests) ||
864 !list_empty(&vbd->failed_requests) ||
865 !list_empty(&vbd->completed_requests)))
866 goto fail;
868 return tapdisk_vbd_shutdown(vbd);
870 fail:
871 td_flag_set(vbd->state, TD_VBD_SHUTDOWN_REQUESTED);
872 DBG(TLOG_WARN, "%s: requests pending\n", vbd->name);
873 return -EAGAIN;
874 }
876 /*
877 * control operations
878 */
880 void
881 tapdisk_vbd_debug(td_vbd_t *vbd)
882 {
883 td_image_t *image, *tmp;
884 int new, pending, failed, completed;
886 tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed);
888 DBG(TLOG_WARN, "%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
889 "failed: 0x%02x, completed: 0x%02x, last activity: %010ld.%06lld, "
890 "errors: 0x%04"PRIx64", retries: 0x%04"PRIx64", received: 0x%08"PRIx64", "
891 "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n",
892 vbd->name, vbd->state, new, pending, failed, completed,
893 vbd->ts.tv_sec, (unsigned long long)vbd->ts.tv_usec,
894 vbd->errors, vbd->retries,
895 vbd->received, vbd->returned, vbd->kicked);
897 tapdisk_vbd_for_each_image(vbd, image, tmp)
898 td_debug(image);
899 }
901 static void
902 tapdisk_vbd_drop_log(td_vbd_t *vbd)
903 {
904 if (td_flag_test(vbd->state, TD_VBD_LOG_DROPPED))
905 return;
907 tapdisk_vbd_debug(vbd);
908 tlog_flush();
909 td_flag_set(vbd->state, TD_VBD_LOG_DROPPED);
910 }
912 int
913 tapdisk_vbd_get_image_info(td_vbd_t *vbd, image_t *img)
914 {
915 td_image_t *image;
917 memset(img, 0, sizeof(image_t));
919 if (list_empty(&vbd->images))
920 return -EINVAL;
922 image = tapdisk_vbd_first_image(vbd);
923 img->size = image->info.size;
924 img->secsize = image->info.sector_size;
925 img->info = image->info.info;
927 return 0;
928 }
930 int
931 tapdisk_vbd_queue_ready(td_vbd_t *vbd)
932 {
933 return (!td_flag_test(vbd->state, TD_VBD_DEAD) &&
934 !td_flag_test(vbd->state, TD_VBD_CLOSED) &&
935 !td_flag_test(vbd->state, TD_VBD_QUIESCED) &&
936 !td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED));
937 }
939 int
940 tapdisk_vbd_retry_needed(td_vbd_t *vbd)
941 {
942 return td_flag_test(vbd->state, TD_VBD_RETRY_NEEDED);
943 }
945 int
946 tapdisk_vbd_lock(td_vbd_t *vbd)
947 {
948 return 0;
949 }
951 int
952 tapdisk_vbd_quiesce_queue(td_vbd_t *vbd)
953 {
954 if (!list_empty(&vbd->pending_requests)) {
955 td_flag_set(vbd->state, TD_VBD_QUIESCE_REQUESTED);
956 return -EAGAIN;
957 }
959 td_flag_clear(vbd->state, TD_VBD_QUIESCE_REQUESTED);
960 td_flag_set(vbd->state, TD_VBD_QUIESCED);
961 return 0;
962 }
964 int
965 tapdisk_vbd_start_queue(td_vbd_t *vbd)
966 {
967 td_flag_clear(vbd->state, TD_VBD_QUIESCED);
968 td_flag_clear(vbd->state, TD_VBD_QUIESCE_REQUESTED);
969 return 0;
970 }
972 int
973 tapdisk_vbd_kill_queue(td_vbd_t *vbd)
974 {
975 tapdisk_vbd_quiesce_queue(vbd);
976 td_flag_set(vbd->state, TD_VBD_DEAD);
977 return 0;
978 }
980 static int
981 tapdisk_vbd_open_image(td_vbd_t *vbd, td_image_t *image)
982 {
983 int err;
984 td_image_t *parent;
986 err = td_open(image);
987 if (err)
988 return err;
990 if (!tapdisk_vbd_is_last_image(vbd, image)) {
991 parent = tapdisk_vbd_next_image(image);
992 err = td_validate_parent(image, parent);
993 if (err) {
994 td_close(image);
995 return err;
996 }
997 }
999 return 0;
1002 static int
1003 tapdisk_vbd_close_and_reopen_image(td_vbd_t *vbd, td_image_t *image)
1005 int i, err;
1007 td_close(image);
1009 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
1010 err = tapdisk_vbd_open_image(vbd, image);
1011 if (err != -EIO)
1012 break;
1014 sleep(TD_VBD_EIO_SLEEP);
1017 if (err)
1018 td_flag_set(vbd->state, TD_VBD_CLOSED);
1020 return err;
1023 int
1024 tapdisk_vbd_pause(td_vbd_t *vbd)
1026 int err;
1028 td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED);
1030 err = tapdisk_vbd_quiesce_queue(vbd);
1031 if (err)
1032 return err;
1034 tapdisk_vbd_close_vdi(vbd);
1036 td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
1037 td_flag_set(vbd->state, TD_VBD_PAUSED);
1038 tapdisk_ipc_write(&vbd->ipc, TAPDISK_MESSAGE_PAUSE_RSP);
1040 return 0;
1043 int
1044 tapdisk_vbd_resume(td_vbd_t *vbd, const char *path, uint16_t drivertype)
1046 int i, err;
1048 if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) {
1049 EPRINTF("resume request for unpaused vbd %s\n", vbd->name);
1050 tapdisk_ipc_write(&vbd->ipc, TAPDISK_MESSAGE_ERROR);
1051 return -EINVAL;
1054 free(vbd->name);
1055 vbd->name = strdup(path);
1056 if (!vbd->name) {
1057 EPRINTF("copying new vbd %s name failed\n", path);
1058 tapdisk_ipc_write(&vbd->ipc, TAPDISK_MESSAGE_ERROR);
1059 return -EINVAL;
1061 vbd->type = drivertype;
1063 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
1064 err = tapdisk_vbd_check_file(vbd);
1065 if (err)
1066 goto sleep;
1068 err = tapdisk_vbd_reactivate_volumes(vbd, 1);
1069 if (err) {
1070 EPRINTF("failed to reactivate %s: %d\n",
1071 vbd->name, err);
1072 goto sleep;
1075 err = __tapdisk_vbd_open_vdi(vbd, TD_OPEN_STRICT);
1076 if (!err)
1077 break;
1079 sleep:
1080 sleep(TD_VBD_EIO_SLEEP);
1083 if (err) {
1084 tapdisk_ipc_write(&vbd->ipc, TAPDISK_MESSAGE_ERROR);
1085 return err;
1088 tapdisk_vbd_start_queue(vbd);
1089 td_flag_clear(vbd->state, TD_VBD_PAUSED);
1090 td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
1091 tapdisk_ipc_write(&vbd->ipc, TAPDISK_MESSAGE_RESUME_RSP);
1093 return 0;
1096 int
1097 tapdisk_vbd_kick(td_vbd_t *vbd)
1099 int n;
1100 td_ring_t *ring;
1102 ring = &vbd->ring;
1103 if (!ring->sring)
1104 return 0;
1106 n = (ring->fe_ring.rsp_prod_pvt - ring->fe_ring.sring->rsp_prod);
1107 if (!n)
1108 return 0;
1110 vbd->kicked += n;
1111 RING_PUSH_RESPONSES(&ring->fe_ring);
1112 ioctl(ring->fd, BLKTAP_IOCTL_KICK_FE, 0);
1114 DBG(TLOG_INFO, "kicking %d: rec: 0x%08"PRIx64", ret: 0x%08"PRIx64", kicked: "
1115 "0x%08"PRIx64"\n", n, vbd->received, vbd->returned, vbd->kicked);
1117 return n;
1120 static inline void
1121 tapdisk_vbd_write_response_to_ring(td_vbd_t *vbd, blkif_response_t *rsp)
1123 td_ring_t *ring;
1124 blkif_response_t *rspp;
1126 ring = &vbd->ring;
1127 rspp = RING_GET_RESPONSE(&ring->fe_ring, ring->fe_ring.rsp_prod_pvt);
1128 memcpy(rspp, rsp, sizeof(blkif_response_t));
1129 ring->fe_ring.rsp_prod_pvt++;
1132 static void
1133 tapdisk_vbd_callback(void *arg, blkif_response_t *rsp)
1135 td_vbd_t *vbd = (td_vbd_t *)arg;
1136 tapdisk_vbd_write_response_to_ring(vbd, rsp);
1139 static void
1140 tapdisk_vbd_make_response(td_vbd_t *vbd, td_vbd_request_t *vreq)
1142 blkif_request_t tmp;
1143 blkif_response_t *rsp;
1145 tmp = vreq->req;
1146 rsp = (blkif_response_t *)&vreq->req;
1148 rsp->id = tmp.id;
1149 rsp->operation = tmp.operation;
1150 rsp->status = vreq->status;
1152 DBG(TLOG_DBG, "writing req %d, sec 0x%08"PRIx64", res %d to ring\n",
1153 (int)tmp.id, tmp.sector_number, vreq->status);
1155 if (rsp->status != BLKIF_RSP_OKAY)
1156 ERR(EIO, "returning BLKIF_RSP %d", rsp->status);
1158 vbd->returned++;
1159 vbd->callback(vbd->argument, rsp);
1162 void
1163 tapdisk_vbd_check_state(td_vbd_t *vbd)
1165 td_vbd_request_t *vreq, *tmp;
1167 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests)
1168 if (vreq->num_retries >= TD_VBD_MAX_RETRIES)
1169 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1171 if (!list_empty(&vbd->new_requests) ||
1172 !list_empty(&vbd->failed_requests))
1173 tapdisk_vbd_issue_requests(vbd);
1175 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->completed_requests) {
1176 tapdisk_vbd_make_response(vbd, vreq);
1177 list_del(&vreq->next);
1178 tapdisk_vbd_initialize_vreq(vreq);
1181 if (td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED))
1182 tapdisk_vbd_quiesce_queue(vbd);
1184 if (td_flag_test(vbd->state, TD_VBD_PAUSE_REQUESTED))
1185 tapdisk_vbd_pause(vbd);
1187 if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
1188 tapdisk_vbd_close(vbd);
1191 void
1192 tapdisk_vbd_check_progress(td_vbd_t *vbd)
1194 int diff;
1195 struct timeval now;
1197 if (list_empty(&vbd->pending_requests))
1198 return;
1200 gettimeofday(&now, NULL);
1201 diff = now.tv_sec - vbd->ts.tv_sec;
1203 if (diff >= TD_VBD_WATCHDOG_TIMEOUT) {
1204 DBG(TLOG_WARN, "%s: watchdog timeout: pending requests "
1205 "idle for %d seconds\n", vbd->name, diff);
1206 tapdisk_vbd_drop_log(vbd);
1207 return;
1210 tapdisk_server_set_max_timeout(TD_VBD_WATCHDOG_TIMEOUT - diff);
1213 /*
1214 * request submission
1215 */
1217 static int
1218 tapdisk_vbd_check_queue(td_vbd_t *vbd)
1220 int err;
1221 td_image_t *image;
1223 if (list_empty(&vbd->images))
1224 return -ENOSYS;
1226 if (!tapdisk_vbd_queue_ready(vbd))
1227 return -EAGAIN;
1229 if (!vbd->reopened) {
1230 if (td_flag_test(vbd->state, TD_VBD_LOCKING)) {
1231 err = tapdisk_vbd_lock(vbd);
1232 if (err)
1233 return err;
1236 image = tapdisk_vbd_first_image(vbd);
1237 td_flag_set(image->flags, TD_OPEN_STRICT);
1239 if (tapdisk_vbd_close_and_reopen_image(vbd, image))
1240 EPRINTF("reopening disks failed\n");
1241 else {
1242 DPRINTF("reopening disks succeeded\n");
1243 vbd->reopened = 1;
1247 return 0;
1250 void
1251 tapdisk_vbd_complete_vbd_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
1253 if (!vreq->submitting && !vreq->secs_pending) {
1254 if (vreq->status == BLKIF_RSP_ERROR &&
1255 vreq->num_retries < TD_VBD_MAX_RETRIES &&
1256 !td_flag_test(vbd->state, TD_VBD_DEAD) &&
1257 !td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
1258 tapdisk_vbd_move_request(vreq, &vbd->failed_requests);
1259 else
1260 tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
1264 static void
1265 __tapdisk_vbd_complete_td_request(td_vbd_t *vbd, td_vbd_request_t *vreq,
1266 td_request_t treq, int res)
1268 int err;
1270 err = (res <= 0 ? res : -res);
1271 vbd->secs_pending -= treq.secs;
1272 vreq->secs_pending -= treq.secs;
1274 vreq->blocked = treq.blocked;
1276 if (err) {
1277 vreq->status = BLKIF_RSP_ERROR;
1278 vreq->error = (vreq->error ? : err);
1279 if (err != -EBUSY) {
1280 vbd->errors++;
1281 ERR(err, "req %"PRIu64": %s 0x%04x secs to "
1282 "0x%08"PRIx64, vreq->req.id,
1283 (treq.op == TD_OP_WRITE ? "write" : "read"),
1284 treq.secs, treq.sec);
1288 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1291 static void
1292 __tapdisk_vbd_reissue_td_request(td_vbd_t *vbd,
1293 td_image_t *image, td_request_t treq)
1295 td_image_t *parent;
1296 td_vbd_request_t *vreq;
1298 vreq = (td_vbd_request_t *)treq.private;
1299 gettimeofday(&vreq->last_try, NULL);
1301 vreq->submitting++;
1303 if (tapdisk_vbd_is_last_image(vbd, image)) {
1304 memset(treq.buf, 0, treq.secs << SECTOR_SHIFT);
1305 td_complete_request(treq, 0);
1306 goto done;
1309 parent = tapdisk_vbd_next_image(image);
1310 treq.image = parent;
1312 /* return zeros for requests that extend beyond end of parent image */
1313 if (treq.sec + treq.secs > parent->info.size) {
1314 td_request_t clone = treq;
1316 if (parent->info.size > treq.sec) {
1317 int secs = parent->info.size - treq.sec;
1318 clone.sec += secs;
1319 clone.secs -= secs;
1320 clone.buf += (secs << SECTOR_SHIFT);
1321 treq.secs = secs;
1322 } else
1323 treq.secs = 0;
1325 memset(clone.buf, 0, clone.secs << SECTOR_SHIFT);
1326 td_complete_request(clone, 0);
1328 if (!treq.secs)
1329 goto done;
1332 switch (treq.op) {
1333 case TD_OP_WRITE:
1334 td_queue_write(parent, treq);
1335 break;
1337 case TD_OP_READ:
1338 td_queue_read(parent, treq);
1339 break;
1342 done:
1343 vreq->submitting--;
1344 if (!vreq->secs_pending)
1345 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1348 void
1349 tapdisk_vbd_forward_request(td_request_t treq)
1351 td_vbd_t *vbd;
1352 td_image_t *image;
1353 td_vbd_request_t *vreq;
1355 image = treq.image;
1356 vbd = (td_vbd_t *)image->private;
1357 vreq = (td_vbd_request_t *)treq.private;
1359 gettimeofday(&vbd->ts, NULL);
1361 if (tapdisk_vbd_queue_ready(vbd))
1362 __tapdisk_vbd_reissue_td_request(vbd, image, treq);
1363 else
1364 __tapdisk_vbd_complete_td_request(vbd, vreq, treq, -EIO);
1367 static void
1368 tapdisk_vbd_complete_td_request(td_request_t treq, int res)
1370 td_vbd_t *vbd;
1371 td_image_t *image;
1372 td_vbd_request_t *vreq;
1374 image = treq.image;
1375 vbd = (td_vbd_t *)image->private;
1376 vreq = (td_vbd_request_t *)treq.private;
1378 gettimeofday(&vbd->ts, NULL);
1379 DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08"PRIx64" "
1380 "secs 0x%04x buf %p op %d res %d\n", image->name,
1381 (int)treq.id, treq.sidx, treq.sec, treq.secs,
1382 treq.buf, (int)vreq->req.operation, res);
1384 __tapdisk_vbd_complete_td_request(vbd, vreq, treq, res);
1387 static int
1388 tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
1390 char *page;
1391 td_ring_t *ring;
1392 td_image_t *image;
1393 td_request_t treq;
1394 uint64_t sector_nr;
1395 blkif_request_t *req;
1396 int i, err, id, nsects;
1398 req = &vreq->req;
1399 id = req->id;
1400 ring = &vbd->ring;
1401 sector_nr = req->sector_number;
1402 image = tapdisk_vbd_first_image(vbd);
1404 vreq->submitting = 1;
1405 gettimeofday(&vbd->ts, NULL);
1406 gettimeofday(&vreq->last_try, NULL);
1407 tapdisk_vbd_move_request(vreq, &vbd->pending_requests);
1409 err = tapdisk_vbd_check_queue(vbd);
1410 if (err)
1411 goto fail;
1413 err = tapdisk_image_check_ring_request(image, req);
1414 if (err)
1415 goto fail;
1417 for (i = 0; i < req->nr_segments; i++) {
1418 nsects = req->seg[i].last_sect - req->seg[i].first_sect + 1;
1419 page = (char *)MMAP_VADDR(ring->vstart,
1420 (unsigned long)req->id, i);
1421 page += (req->seg[i].first_sect << SECTOR_SHIFT);
1423 treq.id = id;
1424 treq.sidx = i;
1425 treq.blocked = 0;
1426 treq.buf = page;
1427 treq.sec = sector_nr;
1428 treq.secs = nsects;
1429 treq.image = image;
1430 treq.cb = tapdisk_vbd_complete_td_request;
1431 treq.cb_data = NULL;
1432 treq.private = vreq;
1434 DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08"PRIx64" secs 0x%04x "
1435 "buf %p op %d\n", image->name, id, i, treq.sec, treq.secs,
1436 treq.buf, (int)req->operation);
1438 vreq->secs_pending += nsects;
1439 vbd->secs_pending += nsects;
1441 switch (req->operation) {
1442 case BLKIF_OP_WRITE:
1443 treq.op = TD_OP_WRITE;
1444 td_queue_write(image, treq);
1445 break;
1447 case BLKIF_OP_READ:
1448 treq.op = TD_OP_READ;
1449 td_queue_read(image, treq);
1450 break;
1453 sector_nr += nsects;
1456 err = 0;
1458 out:
1459 vreq->submitting--;
1460 if (!vreq->secs_pending) {
1461 err = (err ? : vreq->error);
1462 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1465 return err;
1467 fail:
1468 vreq->status = BLKIF_RSP_ERROR;
1469 goto out;
1472 static int
1473 tapdisk_vbd_reissue_failed_requests(td_vbd_t *vbd)
1475 int err;
1476 struct timeval now;
1477 td_vbd_request_t *vreq, *tmp;
1479 err = 0;
1480 gettimeofday(&now, NULL);
1482 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) {
1483 if (vreq->secs_pending)
1484 continue;
1486 if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
1487 goto fail;
1489 if (vreq->error != -EBUSY &&
1490 now.tv_sec - vreq->last_try.tv_sec < TD_VBD_RETRY_INTERVAL)
1491 continue;
1493 if (vreq->num_retries >= TD_VBD_MAX_RETRIES) {
1494 fail:
1495 DBG(TLOG_INFO, "req %"PRIu64"retried %d times\n",
1496 vreq->req.id, vreq->num_retries);
1497 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1498 continue;
1501 /*
1502 * never fail due to too many retries if we are blocked on a
1503 * dependency
1504 */
1505 if (vreq->blocked) {
1506 vreq->blocked = 0;
1507 } else {
1508 vbd->retries++;
1509 vreq->num_retries++;
1511 vreq->error = 0;
1512 vreq->status = BLKIF_RSP_OKAY;
1513 DBG(TLOG_DBG, "retry #%d of req %"PRIu64", "
1514 "sec 0x%08"PRIx64", nr_segs: %d\n", vreq->num_retries,
1515 vreq->req.id, vreq->req.sector_number,
1516 vreq->req.nr_segments);
1518 err = tapdisk_vbd_issue_request(vbd, vreq);
1519 if (err)
1520 break;
1523 if (list_empty(&vbd->failed_requests))
1524 td_flag_clear(vbd->state, TD_VBD_RETRY_NEEDED);
1525 else
1526 td_flag_set(vbd->state, TD_VBD_RETRY_NEEDED);
1528 return err;
1531 static int
1532 tapdisk_vbd_issue_new_requests(td_vbd_t *vbd)
1534 int err;
1535 td_vbd_request_t *vreq, *tmp;
1537 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) {
1538 err = tapdisk_vbd_issue_request(vbd, vreq);
1539 if (err)
1540 return err;
1543 return 0;
1546 static int
1547 tapdisk_vbd_kill_requests(td_vbd_t *vbd)
1549 td_vbd_request_t *vreq, *tmp;
1551 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) {
1552 vreq->status = BLKIF_RSP_ERROR;
1553 tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
1556 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) {
1557 vreq->status = BLKIF_RSP_ERROR;
1558 tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
1561 return 0;
1564 int
1565 tapdisk_vbd_issue_requests(td_vbd_t *vbd)
1567 int err;
1569 if (td_flag_test(vbd->state, TD_VBD_DEAD))
1570 return tapdisk_vbd_kill_requests(vbd);
1572 if (!tapdisk_vbd_queue_ready(vbd))
1573 return -EAGAIN;
1575 err = tapdisk_vbd_reissue_failed_requests(vbd);
1576 if (err)
1577 return err;
1579 return tapdisk_vbd_issue_new_requests(vbd);
1582 static void
1583 tapdisk_vbd_pull_ring_requests(td_vbd_t *vbd)
1585 int idx;
1586 RING_IDX rp, rc;
1587 td_ring_t *ring;
1588 blkif_request_t *req;
1589 td_vbd_request_t *vreq;
1591 ring = &vbd->ring;
1592 if (!ring->sring)
1593 return;
1595 rp = ring->fe_ring.sring->req_prod;
1596 xen_rmb();
1598 for (rc = ring->fe_ring.req_cons; rc != rp; rc++) {
1599 req = RING_GET_REQUEST(&ring->fe_ring, rc);
1600 ++ring->fe_ring.req_cons;
1602 idx = req->id;
1603 vreq = &vbd->request_list[idx];
1605 ASSERT(list_empty(&vreq->next));
1606 ASSERT(vreq->secs_pending == 0);
1608 memcpy(&vreq->req, req, sizeof(blkif_request_t));
1609 vbd->received++;
1610 vreq->vbd = vbd;
1612 tapdisk_vbd_move_request(vreq, &vbd->new_requests);
1614 DBG(TLOG_DBG, "%s: request %d \n", vbd->name, idx);
1618 static int
1619 tapdisk_vbd_pause_ring(td_vbd_t *vbd)
1621 int err;
1623 if (td_flag_test(vbd->state, TD_VBD_PAUSED))
1624 return 0;
1626 td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED);
1628 err = tapdisk_vbd_quiesce_queue(vbd);
1629 if (err) {
1630 EPRINTF("%s: ring pause request on active queue\n", vbd->name);
1631 return err;
1634 tapdisk_vbd_close_vdi(vbd);
1636 err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_PAUSE, 0);
1637 if (err)
1638 EPRINTF("%s: pause ioctl failed: %d\n", vbd->name, errno);
1639 else {
1640 td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
1641 td_flag_set(vbd->state, TD_VBD_PAUSED);
1644 return err;
1647 static int
1648 tapdisk_vbd_resume_ring(td_vbd_t *vbd)
1650 int i, err, type;
1651 char *path, message[BLKTAP2_MAX_MESSAGE_LEN];
1653 memset(message, 0, sizeof(message));
1655 if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) {
1656 EPRINTF("%s: resume message for unpaused vbd\n", vbd->name);
1657 return -EINVAL;
1660 err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_REOPEN, &message);
1661 if (err) {
1662 EPRINTF("%s: resume ioctl failed: %d\n", vbd->name, errno);
1663 return err;
1666 err = tapdisk_parse_disk_type(message, &path, &type);
1667 if (err) {
1668 EPRINTF("%s: invalid resume string %s\n", vbd->name, message);
1669 goto out;
1672 free(vbd->name);
1673 vbd->name = strdup(path);
1674 if (!vbd->name) {
1675 EPRINTF("resume malloc failed\n");
1676 err = -ENOMEM;
1677 goto out;
1679 vbd->type = type;
1681 tapdisk_vbd_start_queue(vbd);
1683 err = tapdisk_vbd_reactivate_volumes(vbd, 1);
1684 if (err) {
1685 EPRINTF("failed to reactivate %s, %d\n", vbd->name, err);
1686 goto out;
1689 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
1690 err = __tapdisk_vbd_open_vdi(vbd, TD_OPEN_STRICT);
1691 if (err != -EIO)
1692 break;
1694 sleep(TD_VBD_EIO_SLEEP);
1697 out:
1698 if (!err) {
1699 image_t image;
1700 struct blktap2_params params;
1702 memset(&params, 0, sizeof(params));
1703 tapdisk_vbd_get_image_info(vbd, &image);
1705 params.sector_size = image.secsize;
1706 params.capacity = image.size;
1707 snprintf(params.name, sizeof(params.name) - 1, "%s", message);
1709 ioctl(vbd->ring.fd, BLKTAP2_IOCTL_SET_PARAMS, &params);
1710 td_flag_clear(vbd->state, TD_VBD_PAUSED);
1713 ioctl(vbd->ring.fd, BLKTAP2_IOCTL_RESUME, err);
1714 return err;
1717 static int
1718 tapdisk_vbd_check_ring_message(td_vbd_t *vbd)
1720 if (!vbd->ring.sring)
1721 return -EINVAL;
1723 switch (vbd->ring.sring->pad[0]) {
1724 case 0:
1725 return 0;
1727 case BLKTAP2_RING_MESSAGE_PAUSE:
1728 return tapdisk_vbd_pause_ring(vbd);
1730 case BLKTAP2_RING_MESSAGE_RESUME:
1731 return tapdisk_vbd_resume_ring(vbd);
1733 case BLKTAP2_RING_MESSAGE_CLOSE:
1734 return tapdisk_vbd_close(vbd);
1736 default:
1737 return -EINVAL;
1741 static void
1742 tapdisk_vbd_ring_event(event_id_t id, char mode, void *private)
1744 td_vbd_t *vbd;
1746 vbd = (td_vbd_t *)private;
1748 tapdisk_vbd_pull_ring_requests(vbd);
1749 tapdisk_vbd_issue_requests(vbd);
1751 /* vbd may be destroyed after this call */
1752 tapdisk_vbd_check_ring_message(vbd);
1755 td_image_t *
1756 tapdisk_vbd_first_image(td_vbd_t *vbd)
1758 return list_entry(vbd->images.next, td_image_t, next);