ia64/xen-unstable

view tools/blktap2/daemon/tapdisk-channel.c @ 19647:1c627434605e

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:52:31 2009 +0100 (2009-05-26)
parents
children
line source
1 /* Copyright (c) 2008, XenSource Inc.
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * * Neither the name of XenSource Inc. nor the names of its contributors
12 * may be used to endorse or promote products derived from this software
13 * without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #include <stdio.h>
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <string.h>
33 #include <stdarg.h>
34 #include <sys/wait.h>
35 #include <sys/ioctl.h>
36 #include <sys/resource.h>
38 #include <xs.h>
39 #include "disktypes.h"
40 #include "tapdisk-dispatch.h"
42 #define TAPDISK_CHANNEL_IDLE 1
43 #define TAPDISK_CHANNEL_WAIT_PID 2
44 #define TAPDISK_CHANNEL_WAIT_OPEN 3
45 #define TAPDISK_CHANNEL_WAIT_PAUSE 4
46 #define TAPDISK_CHANNEL_WAIT_RESUME 5
47 #define TAPDISK_CHANNEL_WAIT_CLOSE 6
48 #define TAPDISK_CHANNEL_CLOSED 7
50 static void tapdisk_channel_error(tapdisk_channel_t *,
51 const char *fmt, ...)
52 __attribute__((format(printf, 2, 3)));
53 static void tapdisk_channel_fatal(tapdisk_channel_t *,
54 const char *fmt, ...)
55 __attribute__((format(printf, 2, 3)));
56 static int tapdisk_channel_parse_params(tapdisk_channel_t *);
57 static void tapdisk_channel_pause_event(struct xs_handle *,
58 struct xenbus_watch *,
59 const char *);
61 static int
62 tapdisk_channel_check_uuid(tapdisk_channel_t *channel)
63 {
64 uint32_t uuid;
65 char *uuid_str;
67 uuid_str = xs_read(channel->xsh, XBT_NULL, channel->uuid_str, NULL);
68 if (!uuid_str)
69 return -errno;
71 uuid = strtoul(uuid_str, NULL, 10);
72 free(uuid_str);
74 if (uuid != channel->cookie)
75 return -EINVAL;
77 return 0;
78 }
80 static inline int
81 tapdisk_channel_validate_watch(tapdisk_channel_t *channel, const char *path)
82 {
83 int err, len;
85 len = strsep_len(path, '/', 7);
86 if (len < 0)
87 return -EINVAL;
89 err = tapdisk_channel_check_uuid(channel);
90 if (err)
91 return err;
93 if (!xs_exists(channel->xsh, path))
94 return -ENOENT;
96 return 0;
97 }
99 static inline int
100 tapdisk_channel_validate_message(tapdisk_channel_t *channel,
101 tapdisk_message_t *message)
102 {
103 switch (message->type) {
104 case TAPDISK_MESSAGE_PID_RSP:
105 if (channel->state != TAPDISK_CHANNEL_WAIT_PID)
106 return -EINVAL;
107 break;
109 case TAPDISK_MESSAGE_OPEN_RSP:
110 if (channel->state != TAPDISK_CHANNEL_WAIT_OPEN)
111 return -EINVAL;
112 break;
114 case TAPDISK_MESSAGE_PAUSE_RSP:
115 if (channel->state != TAPDISK_CHANNEL_WAIT_PAUSE)
116 return -EINVAL;
117 break;
119 case TAPDISK_MESSAGE_RESUME_RSP:
120 if (channel->state != TAPDISK_CHANNEL_WAIT_RESUME)
121 return -EINVAL;
122 break;
124 case TAPDISK_MESSAGE_CLOSE_RSP:
125 if (channel->state != TAPDISK_CHANNEL_WAIT_CLOSE)
126 return -EINVAL;
127 break;
129 case TAPDISK_MESSAGE_RUNTIME_ERROR:
130 /*
131 * runtime errors can be received at any time
132 * and should not affect the state machine
133 */
134 return 0;
135 }
137 channel->state = TAPDISK_CHANNEL_IDLE;
138 return 0;
139 }
141 static int
142 tapdisk_channel_send_message(tapdisk_channel_t *channel,
143 tapdisk_message_t *message, int timeout)
144 {
145 fd_set writefds;
146 struct timeval tv;
147 int ret, len, offset;
149 tv.tv_sec = timeout;
150 tv.tv_usec = 0;
151 offset = 0;
152 len = sizeof(tapdisk_message_t);
154 DPRINTF("%s: sending '%s' message to %d:%d\n",
155 channel->path, tapdisk_message_name(message->type),
156 channel->channel_id, channel->cookie);
158 if (channel->state != TAPDISK_CHANNEL_IDLE &&
159 message->type != TAPDISK_MESSAGE_CLOSE)
160 EPRINTF("%s: writing message to non-idle channel (%d)\n",
161 channel->path, channel->state);
163 while (offset < len) {
164 FD_ZERO(&writefds);
165 FD_SET(channel->write_fd, &writefds);
167 /* we don't bother reinitializing tv. at worst, it will wait a
168 * bit more time than expected. */
170 ret = select(channel->write_fd + 1,
171 NULL, &writefds, NULL, &tv);
172 if (ret == -1)
173 break;
174 else if (FD_ISSET(channel->write_fd, &writefds)) {
175 ret = write(channel->write_fd,
176 message + offset, len - offset);
177 if (ret <= 0)
178 break;
179 offset += ret;
180 } else
181 break;
182 }
184 if (offset != len) {
185 EPRINTF("%s: error writing '%s' message to %d:%d\n",
186 channel->path, tapdisk_message_name(message->type),
187 channel->channel_id, channel->cookie);
188 return -EIO;
189 }
191 switch (message->type) {
192 case TAPDISK_MESSAGE_PID:
193 channel->state = TAPDISK_CHANNEL_WAIT_PID;
194 break;
196 case TAPDISK_MESSAGE_OPEN:
197 channel->state = TAPDISK_CHANNEL_WAIT_OPEN;
198 break;
200 case TAPDISK_MESSAGE_PAUSE:
201 channel->state = TAPDISK_CHANNEL_WAIT_PAUSE;
202 break;
204 case TAPDISK_MESSAGE_RESUME:
205 channel->state = TAPDISK_CHANNEL_WAIT_RESUME;
206 break;
208 case TAPDISK_MESSAGE_CLOSE:
209 channel->state = TAPDISK_CHANNEL_WAIT_CLOSE;
210 break;
212 default:
213 EPRINTF("%s: unrecognized message type %d\n",
214 channel->path, message->type);
215 }
217 return 0;
218 }
220 static void
221 __tapdisk_channel_error(tapdisk_channel_t *channel,
222 const char *fmt, va_list ap)
223 {
224 int err;
225 char *dir, *buf, *message;
227 err = vasprintf(&buf, fmt, ap);
228 if (err == -1) {
229 EPRINTF("failed to allocate error message\n");
230 buf = NULL;
231 }
233 if (buf)
234 message = buf;
235 else
236 message = "tapdisk error";
238 EPRINTF("%s: %s\n", channel->path, message);
240 err = asprintf(&dir, "%s/tapdisk-error", channel->path);
241 if (err == -1) {
242 EPRINTF("%s: failed to write %s\n", __func__, message);
243 dir = NULL;
244 goto out;
245 }
247 xs_write(channel->xsh, XBT_NULL, dir, message, strlen(message));
249 out:
250 free(dir);
251 free(buf);
252 }
254 static void
255 tapdisk_channel_error(tapdisk_channel_t *channel, const char *fmt, ...)
256 {
257 va_list ap;
259 va_start(ap, fmt);
260 __tapdisk_channel_error(channel, fmt, ap);
261 va_end(ap);
262 }
264 static void
265 tapdisk_channel_fatal(tapdisk_channel_t *channel, const char *fmt, ...)
266 {
267 va_list ap;
269 va_start(ap, fmt);
270 __tapdisk_channel_error(channel, fmt, ap);
271 va_end(ap);
273 tapdisk_channel_close(channel);
274 }
276 static int
277 tapdisk_channel_connect_backdev(tapdisk_channel_t *channel)
278 {
279 int err, major, minor;
280 char *s, *path, *devname;
282 s = NULL;
283 path = NULL;
284 devname = NULL;
286 err = ioctl(channel->blktap_fd,
287 BLKTAP_IOCTL_BACKDEV_SETUP, channel->minor);
288 if (err) {
289 err = -errno;
290 goto fail;
291 }
293 err = asprintf(&path, "%s/backdev-node", channel->path);
294 if (err == -1) {
295 path = NULL;
296 err = -ENOMEM;
297 goto fail;
298 }
300 s = xs_read(channel->xsh, XBT_NULL, path, NULL);
301 if (!s) {
302 err = -errno;
303 goto fail;
304 }
306 err = sscanf(s, "%d:%d", &major, &minor);
307 if (err != 2) {
308 err = -EINVAL;
309 goto fail;
310 }
312 err = asprintf(&devname,"%s/%s%d",
313 BLKTAP_DEV_DIR, BACKDEV_NAME, minor);
314 if (err == -1) {
315 devname = NULL;
316 err = -ENOMEM;
317 goto fail;
318 }
320 err = make_blktap_device(devname, major, minor, S_IFBLK | 0600);
321 if (err)
322 goto fail;
324 free(path);
325 err = asprintf(&path, "%s/backdev-path", channel->path);
326 if (err == -1) {
327 path = NULL;
328 err = -ENOMEM;
329 goto fail;
330 }
332 err = xs_write(channel->xsh, XBT_NULL, path, devname, strlen(devname));
333 if (err == 0) {
334 err = -errno;
335 goto fail;
336 }
338 err = 0;
339 out:
340 free(devname);
341 free(path);
342 free(s);
343 return err;
345 fail:
346 EPRINTF("backdev setup failed [%d]\n", err);
347 goto out;
348 }
350 static int
351 tapdisk_channel_complete_connection(tapdisk_channel_t *channel)
352 {
353 int err;
354 char *path;
356 if (!xs_printf(channel->xsh, channel->path,
357 "sectors", "%llu", channel->image.size)) {
358 EPRINTF("ERROR: Failed writing sectors");
359 return -errno;
360 }
362 if (!xs_printf(channel->xsh, channel->path,
363 "sector-size", "%lu", channel->image.secsize)) {
364 EPRINTF("ERROR: Failed writing sector-size");
365 return -errno;
366 }
368 if (!xs_printf(channel->xsh, channel->path,
369 "info", "%u", channel->image.info)) {
370 EPRINTF("ERROR: Failed writing info");
371 return -errno;
372 }
374 err = tapdisk_channel_connect_backdev(channel);
375 if (err)
376 goto clean;
378 channel->connected = 1;
379 return 0;
381 clean:
382 if (asprintf(&path, "%s/info", channel->path) == -1)
383 return err;
385 if (!xs_rm(channel->xsh, XBT_NULL, path))
386 goto clean_out;
388 free(path);
389 if (asprintf(&path, "%s/sector-size", channel->path) == -1)
390 return err;
392 if (!xs_rm(channel->xsh, XBT_NULL, path))
393 goto clean_out;
395 free(path);
396 if (asprintf(&path, "%s/sectors", channel->path) == -1)
397 return err;
399 xs_rm(channel->xsh, XBT_NULL, path);
401 clean_out:
402 free(path);
403 return err;
404 }
406 static int
407 tapdisk_channel_send_open_request(tapdisk_channel_t *channel)
408 {
409 int len;
410 tapdisk_message_t message;
412 memset(&message, 0, sizeof(tapdisk_message_t));
414 len = strlen(channel->vdi_path);
416 message.type = TAPDISK_MESSAGE_OPEN;
417 message.cookie = channel->cookie;
418 message.drivertype = channel->drivertype;
419 message.u.params.storage = channel->storage;
420 message.u.params.devnum = channel->minor;
421 message.u.params.domid = channel->domid;
422 message.u.params.path_len = len;
423 strncpy(message.u.params.path, channel->vdi_path, len);
425 if (channel->mode == 'r')
426 message.u.params.flags |= TAPDISK_MESSAGE_FLAG_RDONLY;
427 if (channel->shared)
428 message.u.params.flags |= TAPDISK_MESSAGE_FLAG_SHARED;
430 /* TODO: clean this up */
431 if (xs_exists(channel->xsh, "/local/domain/0/tapdisk/add-cache"))
432 message.u.params.flags |= TAPDISK_MESSAGE_FLAG_ADD_CACHE;
433 if (xs_exists(channel->xsh, "/local/domain/0/tapdisk/log-dirty"))
434 message.u.params.flags |= TAPDISK_MESSAGE_FLAG_LOG_DIRTY;
436 return tapdisk_channel_send_message(channel, &message, 2);
437 }
439 static int
440 tapdisk_channel_receive_open_response(tapdisk_channel_t *channel,
441 tapdisk_message_t *message)
442 {
443 int err;
445 channel->image.size = message->u.image.sectors;
446 channel->image.secsize = message->u.image.sector_size;
447 channel->image.info = message->u.image.info;
449 err = tapdisk_channel_complete_connection(channel);
450 if (err)
451 goto fail;
453 /* did we receive a pause request before the connection completed? */
454 if (channel->pause_needed) {
455 DPRINTF("%s: deferred pause request\n", channel->path);
456 tapdisk_channel_pause_event(channel->xsh,
457 &channel->pause_watch,
458 channel->pause_str);
459 channel->pause_needed = 0;
460 }
462 return 0;
464 fail:
465 tapdisk_channel_fatal(channel,
466 "failure completing connection: %d", err);
467 return err;
468 }
470 static int
471 tapdisk_channel_send_shutdown_request(tapdisk_channel_t *channel)
472 {
473 tapdisk_message_t message;
475 memset(&message, 0, sizeof(tapdisk_message_t));
477 message.type = TAPDISK_MESSAGE_CLOSE;
478 message.drivertype = channel->drivertype;
479 message.cookie = channel->cookie;
481 return tapdisk_channel_send_message(channel, &message, 2);
482 }
484 static int
485 tapdisk_channel_receive_shutdown_response(tapdisk_channel_t *channel,
486 tapdisk_message_t *message)
487 {
488 channel->open = 0;
489 channel->state = TAPDISK_CHANNEL_CLOSED;
490 tapdisk_channel_close(channel);
491 return 0;
492 }
494 static int
495 tapdisk_channel_receive_runtime_error(tapdisk_channel_t *channel,
496 tapdisk_message_t *message)
497 {
498 tapdisk_channel_error(channel,
499 "runtime error: %s", message->u.string.text);
500 return 0;
501 }
503 static int
504 tapdisk_channel_send_pid_request(tapdisk_channel_t *channel)
505 {
506 int err;
507 tapdisk_message_t message;
509 memset(&message, 0, sizeof(tapdisk_message_t));
511 message.type = TAPDISK_MESSAGE_PID;
512 message.drivertype = channel->drivertype;
513 message.cookie = channel->cookie;
515 err = tapdisk_channel_send_message(channel, &message, 2);
517 if (!err)
518 channel->open = 1;
520 return err;
521 }
523 static int
524 tapdisk_channel_receive_pid_response(tapdisk_channel_t *channel,
525 tapdisk_message_t *message)
526 {
527 int err;
529 channel->tapdisk_pid = message->u.tapdisk_pid;
531 DPRINTF("%s: tapdisk pid: %d\n", channel->path, channel->tapdisk_pid);
533 err = setpriority(PRIO_PROCESS, channel->tapdisk_pid, PRIO_SPECIAL_IO);
534 if (err) {
535 tapdisk_channel_fatal(channel,
536 "setting tapdisk priority: %d", err);
537 return err;
538 }
540 err = tapdisk_channel_send_open_request(channel);
541 if (err) {
542 tapdisk_channel_fatal(channel,
543 "sending open request: %d", err);
544 return err;
545 }
547 return 0;
548 }
550 static int
551 tapdisk_channel_send_pause_request(tapdisk_channel_t *channel)
552 {
553 tapdisk_message_t message;
555 memset(&message, 0, sizeof(tapdisk_message_t));
557 DPRINTF("pausing %s\n", channel->path);
559 message.type = TAPDISK_MESSAGE_PAUSE;
560 message.drivertype = channel->drivertype;
561 message.cookie = channel->cookie;
563 return tapdisk_channel_send_message(channel, &message, 2);
564 }
566 static int
567 tapdisk_channel_receive_pause_response(tapdisk_channel_t *channel,
568 tapdisk_message_t *message)
569 {
570 int err;
572 if (!xs_write(channel->xsh, XBT_NULL,
573 channel->pause_done_str, "", strlen(""))) {
574 err = -errno;
575 goto fail;
576 }
578 return 0;
580 fail:
581 tapdisk_channel_fatal(channel,
582 "failure receiving pause response: %d\n", err);
583 return err;
584 }
586 static int
587 tapdisk_channel_send_resume_request(tapdisk_channel_t *channel)
588 {
589 int len;
590 tapdisk_message_t message;
592 memset(&message, 0, sizeof(tapdisk_message_t));
594 len = strlen(channel->vdi_path);
596 DPRINTF("resuming %s\n", channel->path);
598 message.type = TAPDISK_MESSAGE_RESUME;
599 message.drivertype = channel->drivertype;
600 message.cookie = channel->cookie;
601 message.u.params.path_len = len;
602 strncpy(message.u.params.path, channel->vdi_path, len);
604 return tapdisk_channel_send_message(channel, &message, 2);
605 }
607 static int
608 tapdisk_channel_receive_resume_response(tapdisk_channel_t *channel,
609 tapdisk_message_t *message)
610 {
611 int err;
613 if (!xs_rm(channel->xsh, XBT_NULL, channel->pause_done_str)) {
614 err = -errno;
615 goto fail;
616 }
618 return 0;
620 fail:
621 tapdisk_channel_fatal(channel,
622 "failure receiving pause response: %d", err);
623 return err;
624 }
626 static void
627 tapdisk_channel_shutdown_event(struct xs_handle *xsh,
628 struct xenbus_watch *watch, const char *path)
629 {
630 int err;
631 tapdisk_channel_t *channel;
633 channel = watch->data;
635 DPRINTF("%s: got watch on %s\n", channel->path, path);
637 if (!xs_exists(channel->xsh, channel->path)) {
638 tapdisk_channel_close(channel);
639 return;
640 }
642 err = tapdisk_channel_validate_watch(channel, path);
643 if (err) {
644 if (err == -EINVAL)
645 tapdisk_channel_fatal(channel, "bad shutdown watch");
646 return;
647 }
649 tapdisk_channel_send_shutdown_request(channel);
650 }
652 static void
653 tapdisk_channel_pause_event(struct xs_handle *xsh,
654 struct xenbus_watch *watch, const char *path)
655 {
656 int err, paused;
657 tapdisk_channel_t *channel;
659 channel = watch->data;
661 DPRINTF("%s: got watch on %s\n", channel->path, path);
663 if (!xs_exists(channel->xsh, channel->path)) {
664 tapdisk_channel_close(channel);
665 return;
666 }
668 /* NB: The VBD is essentially considered ready since the
669 * backend hotplug event ocurred, which is just after
670 * start-tapdisk, not after watch registration. We start
671 * testing xenstore keys with the very first shot, but defer
672 * until after connection completion. */
674 err = tapdisk_channel_validate_watch(channel, path);
675 if (err) {
676 if (err == -EINVAL)
677 tapdisk_channel_fatal(channel, "bad pause watch");
679 if (err != -ENOENT)
680 return;
682 err = 0;
683 }
685 paused = xs_exists(xsh, channel->pause_done_str);
687 if (xs_exists(xsh, channel->pause_str)) {
688 /*
689 * Duplicate requests are a protocol validation, but
690 * impossible to identify if watch registration and an
691 * actual pause request may fire separately in close
692 * succession. Warn, but do not signal an error.
693 */
694 int pausing = channel->state == TAPDISK_CHANNEL_WAIT_PAUSE;
695 if (pausing || paused) {
696 DPRINTF("Ignoring pause event for %s vbd %s\n",
697 pausing ? "pausing" : "paused", channel->path);
698 goto out;
699 }
701 /* defer if tapdisk is not ready yet */
702 if (!channel->connected) {
703 DPRINTF("%s: deferring pause request\n", path);
704 channel->pause_needed = 1;
705 goto out;
706 }
708 err = tapdisk_channel_send_pause_request(channel);
710 } else if (xs_exists(xsh, channel->pause_done_str)) {
711 free(channel->params);
712 channel->params = NULL;
713 channel->vdi_path = NULL;
715 err = xs_gather(channel->xsh, channel->path,
716 "params", NULL, &channel->params, NULL);
717 if (err) {
718 EPRINTF("failure re-reading params: %d\n", err);
719 channel->params = NULL;
720 goto out;
721 }
723 err = tapdisk_channel_parse_params(channel);
724 if (err)
725 goto out;
727 err = tapdisk_channel_send_resume_request(channel);
728 if (err)
729 goto out;
730 }
732 err = 0;
734 out:
735 if (err)
736 tapdisk_channel_error(channel, "pause event failed: %d", err);
737 }
739 static int
740 tapdisk_channel_open_control_socket(char *devname)
741 {
742 int err, fd;
743 fd_set socks;
744 struct timeval timeout;
746 err = mkdir(BLKTAP_CTRL_DIR, 0755);
747 if (err == -1 && errno != EEXIST) {
748 EPRINTF("Failure creating %s directory: %d\n",
749 BLKTAP_CTRL_DIR, errno);
750 return -errno;
751 }
753 err = mkfifo(devname, S_IRWXU | S_IRWXG | S_IRWXO);
754 if (err) {
755 if (errno == EEXIST) {
756 /*
757 * Remove fifo since it may have data from
758 * it's previous use --- earlier invocation
759 * of tapdisk may not have read all messages.
760 */
761 err = unlink(devname);
762 if (err) {
763 EPRINTF("ERROR: unlink(%s) failed (%d)\n",
764 devname, errno);
765 return -errno;
766 }
768 err = mkfifo(devname, S_IRWXU | S_IRWXG | S_IRWXO);
769 }
771 if (err) {
772 EPRINTF("ERROR: pipe failed (%d)\n", errno);
773 return -errno;
774 }
775 }
777 fd = open(devname, O_RDWR | O_NONBLOCK);
778 if (fd == -1) {
779 EPRINTF("Failed to open %s\n", devname);
780 return -errno;
781 }
783 return fd;
784 }
786 static int
787 tapdisk_channel_get_device_number(tapdisk_channel_t *channel)
788 {
789 char *devname;
790 domid_translate_t tr;
791 int major, minor, err;
793 tr.domid = channel->domid;
794 tr.busid = channel->busid;
796 minor = ioctl(channel->blktap_fd, BLKTAP_IOCTL_NEWINTF, tr);
797 if (minor <= 0 || minor > MAX_TAP_DEV) {
798 EPRINTF("invalid dev id: %d\n", minor);
799 return -EINVAL;
800 }
802 major = ioctl(channel->blktap_fd, BLKTAP_IOCTL_MAJOR, minor);
803 if (major < 0) {
804 EPRINTF("invalid major id: %d\n", major);
805 return -EINVAL;
806 }
808 err = asprintf(&devname, "%s/%s%d",
809 BLKTAP_DEV_DIR, BLKTAP_DEV_NAME, minor);
810 if (err == -1) {
811 EPRINTF("get_new_dev: malloc failed\n");
812 return -ENOMEM;
813 }
815 err = make_blktap_device(devname, major, minor, S_IFCHR | 0600);
816 free(devname);
818 if (err)
819 return err;
821 DPRINTF("Received device id %d and major %d, "
822 "sent domid %d and be_id %d\n",
823 minor, major, tr.domid, tr.busid);
825 channel->major = major;
826 channel->minor = minor;
828 return 0;
829 }
831 static int
832 tapdisk_channel_start_process(tapdisk_channel_t *channel,
833 char *write_dev, char *read_dev)
834 {
835 pid_t child;
836 char *argv[] = { "tapdisk", write_dev, read_dev, NULL };
838 if ((child = fork()) == -1)
839 return -errno;
841 if (!child) {
842 int i;
843 for (i = 0 ; i < sysconf(_SC_OPEN_MAX) ; i++)
844 if (i != STDIN_FILENO &&
845 i != STDOUT_FILENO &&
846 i != STDERR_FILENO)
847 close(i);
849 execvp("tapdisk", argv);
850 _exit(1);
851 } else {
852 pid_t got;
853 do {
854 got = waitpid(child, NULL, 0);
855 } while (got != child);
856 }
857 return 0;
858 }
860 static int
861 tapdisk_channel_launch_tapdisk(tapdisk_channel_t *channel)
862 {
863 int err;
864 char *read_dev, *write_dev;
866 read_dev = NULL;
867 write_dev = NULL;
868 channel->read_fd = -1;
869 channel->write_fd = -1;
871 err = tapdisk_channel_get_device_number(channel);
872 if (err)
873 return err;
875 err = asprintf(&write_dev,
876 "%s/tapctrlwrite%d", BLKTAP_CTRL_DIR, channel->minor);
877 if (err == -1) {
878 err = -ENOMEM;
879 write_dev = NULL;
880 goto fail;
881 }
883 err = asprintf(&read_dev,
884 "%s/tapctrlread%d", BLKTAP_CTRL_DIR, channel->minor);
885 if (err == -1) {
886 err = -ENOMEM;
887 read_dev = NULL;
888 goto fail;
889 }
891 channel->write_fd = tapdisk_channel_open_control_socket(write_dev);
892 if (channel->write_fd < 0) {
893 err = channel->write_fd;
894 channel->write_fd = -1;
895 goto fail;
896 }
898 channel->read_fd = tapdisk_channel_open_control_socket(read_dev);
899 if (channel->read_fd < 0) {
900 err = channel->read_fd;
901 channel->read_fd = -1;
902 goto fail;
903 }
905 err = tapdisk_channel_start_process(channel, write_dev, read_dev);
906 if (err)
907 goto fail;
909 channel->open = 1;
910 channel->channel_id = channel->write_fd;
912 free(read_dev);
913 free(write_dev);
915 DPRINTF("process launched, channel = %d:%d\n",
916 channel->channel_id, channel->cookie);
918 return tapdisk_channel_send_pid_request(channel);
920 fail:
921 free(read_dev);
922 free(write_dev);
923 if (channel->read_fd != -1)
924 close(channel->read_fd);
925 if (channel->write_fd != -1)
926 close(channel->write_fd);
927 return err;
928 }
930 static int
931 tapdisk_channel_connect(tapdisk_channel_t *channel)
932 {
933 int err;
935 tapdisk_daemon_find_channel(channel);
937 if (!channel->tapdisk_pid)
938 return tapdisk_channel_launch_tapdisk(channel);
940 DPRINTF("%s: process exists: %d, channel = %d:%d\n",
941 channel->path, channel->tapdisk_pid,
942 channel->channel_id, channel->cookie);
944 err = tapdisk_channel_get_device_number(channel);
945 if (err)
946 return err;
948 return tapdisk_channel_send_pid_request(channel);
949 }
951 static int
952 tapdisk_channel_init(tapdisk_channel_t *channel)
953 {
954 int err;
956 channel->uuid_str = NULL;
957 channel->pause_str = NULL;
958 channel->pause_done_str = NULL;
959 channel->shutdown_str = NULL;
960 channel->share_tapdisk_str = NULL;
962 err = asprintf(&channel->uuid_str,
963 "%s/tapdisk-uuid", channel->path);
964 if (err == -1) {
965 channel->uuid_str = NULL;
966 goto fail;
967 }
969 err = asprintf(&channel->pause_str, "%s/pause", channel->path);
970 if (err == -1) {
971 channel->pause_str = NULL;
972 goto fail;
973 }
975 err = asprintf(&channel->pause_done_str,
976 "%s/pause-done", channel->path);
977 if (err == -1) {
978 channel->pause_done_str = NULL;
979 goto fail;
980 }
982 err = asprintf(&channel->shutdown_str,
983 "%s/shutdown-tapdisk", channel->path);
984 if (err == -1) {
985 channel->shutdown_str = NULL;
986 goto fail;
987 }
989 channel->share_tapdisk_str = "/local/domain/0/tapdisk/share-tapdisks";
991 return 0;
993 fail:
994 free(channel->uuid_str);
995 free(channel->pause_str);
996 free(channel->pause_done_str);
997 free(channel->shutdown_str);
998 channel->uuid_str = NULL;
999 channel->pause_str = NULL;
1000 channel->pause_done_str = NULL;
1001 channel->shutdown_str = NULL;
1002 channel->share_tapdisk_str = NULL;
1003 return -ENOMEM;
1006 static int
1007 tapdisk_channel_set_watches(tapdisk_channel_t *channel)
1009 int err;
1011 /* watch for pause events */
1012 channel->pause_watch.node = channel->pause_str;
1013 channel->pause_watch.callback = tapdisk_channel_pause_event;
1014 channel->pause_watch.data = channel;
1015 err = register_xenbus_watch(channel->xsh, &channel->pause_watch);
1016 if (err) {
1017 channel->pause_watch.node = NULL;
1018 goto fail;
1021 /* watch for shutdown events */
1022 channel->shutdown_watch.node = channel->shutdown_str;
1023 channel->shutdown_watch.callback = tapdisk_channel_shutdown_event;
1024 channel->shutdown_watch.data = channel;
1025 err = register_xenbus_watch(channel->xsh, &channel->shutdown_watch);
1026 if (err) {
1027 channel->shutdown_watch.node = NULL;
1028 goto fail;
1031 return 0;
1033 fail:
1034 if (channel->pause_watch.node) {
1035 unregister_xenbus_watch(channel->xsh, &channel->pause_watch);
1036 channel->pause_watch.node = NULL;
1038 if (channel->shutdown_watch.node) {
1039 unregister_xenbus_watch(channel->xsh, &channel->shutdown_watch);
1040 channel->shutdown_watch.node = NULL;
1042 return err;
1045 static void
1046 tapdisk_channel_get_storage_type(tapdisk_channel_t *channel)
1048 int err, type;
1049 unsigned int len;
1050 char *path, *stype;
1052 channel->storage = TAPDISK_STORAGE_TYPE_DEFAULT;
1054 err = asprintf(&path, "%s/sm-data/storage-type", channel->path);
1055 if (err == -1)
1056 return;
1058 stype = xs_read(channel->xsh, XBT_NULL, path, &len);
1059 if (!stype)
1060 goto out;
1061 else if (!strcmp(stype, "nfs"))
1062 channel->storage = TAPDISK_STORAGE_TYPE_NFS;
1063 else if (!strcmp(stype, "ext"))
1064 channel->storage = TAPDISK_STORAGE_TYPE_EXT;
1065 else if (!strcmp(stype, "lvm"))
1066 channel->storage = TAPDISK_STORAGE_TYPE_LVM;
1068 out:
1069 free(path);
1070 free(stype);
1073 static int
1074 tapdisk_channel_get_busid(tapdisk_channel_t *channel)
1076 int len, end;
1077 const char *ptr;
1078 char *tptr, num[10];
1080 len = strsep_len(channel->path, '/', 6);
1081 end = strlen(channel->path);
1082 if(len < 0 || end < 0) {
1083 EPRINTF("invalid path: %s\n", channel->path);
1084 return -EINVAL;
1087 ptr = channel->path + len + 1;
1088 strncpy(num, ptr, end - len);
1089 tptr = num + (end - (len + 1));
1090 *tptr = '\0';
1092 channel->busid = atoi(num);
1093 return 0;
1096 static int
1097 tapdisk_channel_parse_params(tapdisk_channel_t *channel)
1099 int i, size, err;
1100 unsigned int len;
1101 char *ptr, *path, handle[10];
1102 char *vdi_type;
1103 char *vtype;
1105 path = channel->params;
1106 size = sizeof(dtypes) / sizeof(disk_info_t *);
1108 if (strlen(path) + 1 >= TAPDISK_MESSAGE_MAX_PATH_LENGTH)
1109 goto fail;
1111 ptr = strchr(path, ':');
1112 if (!ptr)
1113 goto fail;
1115 channel->vdi_path = ptr + 1;
1116 memcpy(handle, path, (ptr - path));
1117 ptr = handle + (ptr - path);
1118 *ptr = '\0';
1120 err = asprintf(&vdi_type, "%s/sm-data/vdi-type", channel->path);
1121 if (err == -1)
1122 goto fail;
1124 if (xs_exists(channel->xsh, vdi_type)) {
1125 vtype = xs_read(channel->xsh, XBT_NULL, vdi_type, &len);
1126 free(vdi_type);
1127 if (!vtype)
1128 goto fail;
1129 if (len >= sizeof(handle) - 1) {
1130 free(vtype);
1131 goto fail;
1133 sprintf(handle, "%s", vtype);
1134 free(vtype);
1137 for (i = 0; i < size; i++) {
1138 if (strncmp(handle, dtypes[i]->handle, (ptr - path)))
1139 continue;
1141 if (dtypes[i]->idnum == -1)
1142 goto fail;
1144 channel->drivertype = dtypes[i]->idnum;
1145 return 0;
1148 fail:
1149 EPRINTF("%s: invalid blktap params: %s\n",
1150 channel->path, channel->params);
1151 channel->vdi_path = NULL;
1152 return -EINVAL;
1155 static int
1156 tapdisk_channel_gather_info(tapdisk_channel_t *channel)
1158 int err;
1160 err = xs_gather(channel->xsh, channel->path,
1161 "frontend", NULL, &channel->frontpath,
1162 "frontend-id", "%li", &channel->domid,
1163 "params", NULL, &channel->params,
1164 "mode", "%c", &channel->mode, NULL);
1165 if (err) {
1166 EPRINTF("could not find device info: %d\n", err);
1167 return err;
1170 err = tapdisk_channel_parse_params(channel);
1171 if (err)
1172 return err;
1174 err = tapdisk_channel_get_busid(channel);
1175 if (err)
1176 return err;
1178 tapdisk_channel_get_storage_type(channel);
1180 return 0;
1183 static int
1184 tapdisk_channel_verify_start_request(tapdisk_channel_t *channel)
1186 char *path;
1187 unsigned int err;
1189 err = asprintf(&path, "%s/start-tapdisk", channel->path);
1190 if (err == -1)
1191 goto mem_fail;
1193 if (!xs_exists(channel->xsh, path))
1194 goto fail;
1196 free(path);
1197 err = asprintf(&path, "%s/shutdown-request", channel->path);
1198 if (err == -1)
1199 goto mem_fail;
1201 if (xs_exists(channel->xsh, path))
1202 goto fail;
1204 if (xs_exists(channel->xsh, channel->shutdown_str))
1205 goto fail;
1207 free(path);
1208 err = asprintf(&path, "%s/shutdown-done", channel->path);
1209 if (err == -1)
1210 goto mem_fail;
1212 if (xs_exists(channel->xsh, path))
1213 goto fail;
1215 free(path);
1217 return 0;
1219 fail:
1220 free(path);
1221 EPRINTF("%s:%s: invalid start request\n", __func__, channel->path);
1222 return -EINVAL;
1224 mem_fail:
1225 EPRINTF("%s:%s: out of memory\n", __func__, channel->path);
1226 return -ENOMEM;
1229 void
1230 tapdisk_channel_close(tapdisk_channel_t *channel)
1232 if (channel->channel_id)
1233 DPRINTF("%s: closing channel %d:%d\n",
1234 channel->path, channel->channel_id, channel->cookie);
1236 if (channel->open)
1237 tapdisk_channel_send_shutdown_request(channel);
1239 if (channel->pause_watch.node) {
1240 unregister_xenbus_watch(channel->xsh, &channel->pause_watch);
1241 channel->pause_watch.node = NULL;
1244 if (channel->shutdown_watch.node) {
1245 unregister_xenbus_watch(channel->xsh, &channel->shutdown_watch);
1246 channel->shutdown_watch.node = NULL;
1249 tapdisk_daemon_close_channel(channel);
1251 free(channel->params);
1252 free(channel->frontpath);
1253 free(channel->shutdown_str);
1254 free(channel->pause_done_str);
1255 free(channel->pause_str);
1256 free(channel->uuid_str);
1257 free(channel->path);
1258 free(channel);
1261 int
1262 tapdisk_channel_open(tapdisk_channel_t **_channel,
1263 char *path, struct xs_handle *xsh,
1264 int blktap_fd, uint16_t cookie)
1266 int err;
1267 char *msg;
1268 tapdisk_channel_t *channel;
1270 msg = NULL;
1271 *_channel = NULL;
1273 channel = calloc(1, sizeof(tapdisk_channel_t));
1274 if (!channel)
1275 return -ENOMEM;
1277 channel->xsh = xsh;
1278 channel->blktap_fd = blktap_fd;
1279 channel->cookie = cookie;
1280 channel->state = TAPDISK_CHANNEL_IDLE;
1282 INIT_LIST_HEAD(&channel->list);
1284 channel->path = strdup(path);
1285 if (!channel->path) {
1286 err = -ENOMEM;
1287 goto fail;
1290 err = tapdisk_channel_init(channel);
1291 if (err) {
1292 msg = "allocating device";
1293 goto fail;
1296 err = tapdisk_channel_check_uuid(channel);
1297 if (err) {
1298 msg = "checking uuid";
1299 goto fail;
1302 err = tapdisk_channel_gather_info(channel);
1303 if (err) {
1304 msg = "gathering parameters";
1305 goto fail;
1308 err = tapdisk_channel_verify_start_request(channel);
1309 if (err) {
1310 msg = "invalid start request";
1311 goto fail;
1314 err = tapdisk_channel_set_watches(channel);
1315 if (err) {
1316 msg = "registering xenstore watches";
1317 goto fail;
1320 err = tapdisk_channel_connect(channel);
1321 if (err) {
1322 msg = "connecting to tapdisk";
1323 goto fail;
1326 *_channel = channel;
1327 return 0;
1329 fail:
1330 tapdisk_channel_fatal(channel, "%s: %d", (msg ? : "failure"), err);
1331 return err;
1334 int
1335 tapdisk_channel_receive_message(tapdisk_channel_t *c, tapdisk_message_t *m)
1337 int err;
1339 err = tapdisk_channel_validate_message(c, m);
1340 if (err)
1341 goto fail;
1343 switch (m->type) {
1344 case TAPDISK_MESSAGE_PID_RSP:
1345 return tapdisk_channel_receive_pid_response(c, m);
1347 case TAPDISK_MESSAGE_OPEN_RSP:
1348 return tapdisk_channel_receive_open_response(c, m);
1350 case TAPDISK_MESSAGE_PAUSE_RSP:
1351 return tapdisk_channel_receive_pause_response(c, m);
1353 case TAPDISK_MESSAGE_RESUME_RSP:
1354 return tapdisk_channel_receive_resume_response(c, m);
1356 case TAPDISK_MESSAGE_CLOSE_RSP:
1357 return tapdisk_channel_receive_shutdown_response(c, m);
1359 case TAPDISK_MESSAGE_RUNTIME_ERROR:
1360 return tapdisk_channel_receive_runtime_error(c, m);
1363 fail:
1364 tapdisk_channel_fatal(c, "received unexpected message %s in state %d",
1365 tapdisk_message_name(m->type), c->state);
1366 return -EINVAL;