LIBVHDDIR = $(BLKTAP_ROOT)/vhd/lib
-IBIN = tapdisk2 td-util td-rated tapdisk-stream tapdisk-diff
+IBIN = tapdisk2 td-util td-rated
LOCK_UTIL = lock-util
INST_DIR = /usr/sbin
TAP-OBJS := scheduler.o
TAP-OBJS += tapdisk-control.o
TAP-OBJS += tapdisk-vbd.o
+TAP-OBJS += tapdisk-blktap.o
TAP-OBJS += tapdisk-image.o
TAP-OBJS += tapdisk-driver.o
TAP-OBJS += tapdisk-disktype.o
if (err)
goto fail;
- lreq_bufsz = BLKIF_MAX_SEGMENTS_PER_REQUEST * sysconf(_SC_PAGE_SIZE);
+ lreq_bufsz = MAX_SEGMENTS_PER_REQ * sysconf(_SC_PAGE_SIZE);
cache->bufsz = LOCAL_CACHE_REQUESTS * lreq_bufsz;
prot = PROT_READ|PROT_WRITE;
struct vhd_request *r = &s->vreq_list[i];
td_request_t *t = &r->treq;
if (t->secs)
- DBG(TLOG_WARN, "%d: id: 0x%04"PRIx64", err: %d, op: %d,"
+ DBG(TLOG_WARN, "%d: vreq: %s.%d, err: %d, op: %d,"
" lsec: 0x%08"PRIx64", flags: %d, this: %p, "
- "next: %p, tx: %p\n", i, t->id, r->error, r->op,
+ "next: %p, tx: %p\n", i, t->vreq->name, t->sidx, r->error, r->op,
t->sec, r->flags, r, r->next, r->tx);
}
--- /dev/null
+/*
+ * Copyright (c) 2010, Citrix Systems, Inc.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of XenSource Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+#include "blktap.h"
+#include "tapdisk-vbd.h"
+#include "tapdisk-blktap.h"
+#include "tapdisk-server.h"
+
+#define wmb() /* blktap is synchronous */
+#define BLKTAP_PAGE_SIZE sysconf(_SC_PAGE_SIZE)
+#define BUG(_cond) td_panic()
+#define BUG_ON(_cond) if (unlikely(_cond)) { td_panic(); }
+
+#define DBG(_f, _a...) tlog_syslog(TLOG_DBG, _f, ##_a)
+#define INFO(_f, _a...) tlog_syslog(TLOG_INFO, _f, ##_a)
+#define ERR(_err, _f, _a...) tlog_error(_err, _f, ##_a)
+#define WARN(_f, _a...) tlog_syslog(TLOG_WARN, "WARNING: "_f "in %s:%d", \
+ ##_a, __func__, __LINE__)
+
+static void __tapdisk_blktap_close(td_blktap_t *);
+
+struct td_blktap_req {
+ td_vbd_request_t vreq;
+ unsigned int id;
+ char name[16];
+ struct td_iovec iov[BLKTAP_SEGMENT_MAX];
+};
+
+td_blktap_req_t *
+tapdisk_blktap_alloc_request(td_blktap_t *tap)
+{
+ td_blktap_req_t *req = NULL;
+
+ if (likely(tap->n_reqs_free))
+ req = tap->reqs_free[--tap->n_reqs_free];
+
+ return req;
+}
+
+void
+tapdisk_blktap_free_request(td_blktap_t *tap, td_blktap_req_t *req)
+{
+ BUG_ON(tap->n_reqs_free >= tap->n_reqs);
+ tap->reqs_free[tap->n_reqs_free++] = req;
+}
+
+static void
+tapdisk_blktap_reqs_free(td_blktap_t *tap)
+{
+ if (tap->reqs) {
+ free(tap->reqs);
+ tap->reqs = NULL;
+ }
+
+ if (tap->reqs_free) {
+ free(tap->reqs_free);
+ tap->reqs_free = NULL;
+ }
+}
+
+static int
+tapdisk_blktap_reqs_init(td_blktap_t *tap, int n_reqs)
+{
+ int i, err;
+
+ tap->reqs = malloc(n_reqs * sizeof(td_blktap_req_t));
+ if (!tap->reqs) {
+ err = -errno;
+ goto fail;
+ }
+
+ tap->reqs_free = malloc(n_reqs * sizeof(td_blktap_req_t*));
+ if (!tap->reqs_free) {
+ err = -errno;
+ goto fail;
+ }
+
+ tap->n_reqs = n_reqs;
+ tap->n_reqs_free = 0;
+
+ for (i = 0; i < n_reqs; i++)
+ tapdisk_blktap_free_request(tap, &tap->reqs[i]);
+
+ return 0;
+
+fail:
+ tapdisk_blktap_reqs_free(tap);
+ return err;
+}
+
+static void
+tapdisk_blktap_kick(td_blktap_t *tap)
+{
+ if (likely(tap->fd >= 0)) {
+ ioctl(tap->fd, BLKTAP_IOCTL_RESPOND, 0);
+ tap->stats.kicks.out++;
+ }
+}
+
+static int
+tapdisk_blktap_error_status(td_blktap_t *tap, int error)
+{
+ int status;
+
+ switch (error) {
+ case 0:
+ status = BLKTAP_RSP_OKAY;
+ break;
+ case -EOPNOTSUPP:
+ case EOPNOTSUPP:
+ status = BLKTAP_RSP_EOPNOTSUPP;
+ break;
+ default:
+ status = BLKTAP_RSP_ERROR;
+ break;
+ }
+
+ return status;
+}
+
+static void
+__tapdisk_blktap_push_responses(td_blktap_t *tap, int final)
+{
+ blktap_back_ring_t *ring = &tap->ring;
+
+ ring->rsp_prod_pvt++;
+
+ if (final) {
+ RING_PUSH_RESPONSES(&tap->ring);
+ tapdisk_blktap_kick(tap);
+ }
+
+ tap->stats.reqs.out++;
+}
+
+static void
+tapdisk_blktap_fail_request(td_blktap_t *tap,
+ blktap_ring_req_t *msg, int error)
+{
+ blktap_back_ring_t *ring = &tap->ring;
+ blktap_ring_rsp_t *rsp;
+
+ BUG_ON(!tap->vma);
+
+ rsp = RING_GET_RESPONSE(&tap->ring, ring->rsp_prod_pvt);
+
+ rsp->id = msg->id;
+ rsp->operation = msg->operation;
+ rsp->status = tapdisk_blktap_error_status(tap, error);
+
+ __tapdisk_blktap_push_responses(tap, 1);
+}
+
+static void
+tapdisk_blktap_put_response(td_blktap_t *tap,
+ td_blktap_req_t *req, int error, int final)
+{
+ blktap_back_ring_t *ring = &tap->ring;
+ blktap_ring_rsp_t *rsp;
+ int op = 0;
+
+ BUG_ON(!tap->vma);
+
+ rsp = RING_GET_RESPONSE(&tap->ring, ring->rsp_prod_pvt);
+
+ switch (req->vreq.op) {
+ case TD_OP_READ:
+ op = BLKTAP_OP_READ;
+ break;
+ case TD_OP_WRITE:
+ op = BLKTAP_OP_WRITE;
+ break;
+ default:
+ BUG();
+ }
+
+ rsp->id = req->id;
+ rsp->operation = op;
+ rsp->status = tapdisk_blktap_error_status(tap, error);
+
+ __tapdisk_blktap_push_responses(tap, final);
+}
+
+static void
+tapdisk_blktap_complete_request(td_blktap_t *tap,
+ td_blktap_req_t *req, int error,
+ int final)
+{
+ if (likely(tap->vma))
+ tapdisk_blktap_put_response(tap, req, error, final);
+
+ tapdisk_blktap_free_request(tap, req);
+}
+
+static void
+__tapdisk_blktap_request_cb(td_vbd_request_t *vreq, int error,
+ void *token, int final)
+{
+ td_blktap_req_t *req = containerof(vreq, td_blktap_req_t, vreq);
+ td_blktap_t *tap = token;
+
+ tapdisk_blktap_complete_request(tap, req, error, final);
+}
+
+static void
+tapdisk_blktap_vector_request(td_blktap_t *tap,
+ const blktap_ring_req_t *msg,
+ td_blktap_req_t *req)
+{
+ td_vbd_request_t *vreq = &req->vreq;
+ const struct blktap_segment *seg;
+ struct td_iovec *iov;
+ void *page, *next, *last;
+ size_t size;
+ int i;
+
+ iov = req->iov - 1;
+ last = NULL;
+
+ page = tap->vstart;
+ page += msg->id * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE;
+
+ for (i = 0; i < msg->nr_segments; i++) {
+ seg = &msg->seg[i];
+
+ next = page + (seg->first_sect << SECTOR_SHIFT);
+ size = seg->last_sect - seg->first_sect + 1;
+
+ if (next != last) {
+ iov++;
+ iov->base = next;
+ iov->secs = size;
+ } else
+ iov->secs += size;
+
+ last = iov->base + (iov->secs << SECTOR_SHIFT);
+ page += BLKTAP_PAGE_SIZE;
+ }
+
+ vreq->iov = req->iov;
+ vreq->iovcnt = iov - req->iov + 1;
+ vreq->sec = msg->sector_number;
+}
+
+static int
+tapdisk_blktap_parse_request(td_blktap_t *tap,
+ const blktap_ring_req_t *msg, td_blktap_req_t *req)
+{
+ td_vbd_request_t *vreq = &req->vreq;
+ int op, err = -EINVAL;
+
+ memset(req, 0, sizeof(*req));
+
+ switch (msg->operation) {
+ case BLKTAP_OP_READ:
+ op = TD_OP_READ;
+ break;
+ case BLKTAP_OP_WRITE:
+ op = TD_OP_WRITE;
+ break;
+ default:
+ goto fail;
+ }
+
+ if (msg->id > BLKTAP_RING_SIZE)
+ goto fail;
+
+ if (msg->nr_segments < 1 ||
+ msg->nr_segments > BLKTAP_SEGMENT_MAX)
+ goto fail;
+
+ req->id = msg->id;
+ snprintf(req->name, sizeof(req->name),
+ "tap-%d.%d", tap->minor, req->id);
+
+ vreq->op = op;
+ vreq->name = req->name;
+ vreq->token = tap;
+ vreq->cb = __tapdisk_blktap_request_cb;
+
+ tapdisk_blktap_vector_request(tap, msg, req);
+
+ err = 0;
+fail:
+ return err;
+}
+
+static void
+tapdisk_blktap_get_requests(td_blktap_t *tap)
+{
+ blktap_back_ring_t *ring = &tap->ring;
+ RING_IDX rp, rc;
+ int err;
+
+ rp = ring->sring->req_prod;
+
+ for (rc = ring->req_cons; rc != rp; rc++) {
+ blktap_ring_req_t *msg = RING_GET_REQUEST(ring, rc);
+ td_blktap_req_t *req;
+
+ tap->stats.reqs.in++;
+
+ req = tapdisk_blktap_alloc_request(tap);
+ if (!req) {
+ err = -EFAULT;
+ goto fail_ring;
+ }
+
+ err = tapdisk_blktap_parse_request(tap, msg, req);
+ if (err) {
+ tapdisk_blktap_fail_request(tap, msg, err);
+ tapdisk_blktap_free_request(tap, req);
+ goto fail_ring;
+ }
+
+ err = tapdisk_vbd_queue_request(tap->vbd, &req->vreq);
+ if (err)
+ tapdisk_blktap_complete_request(tap, req, err, 1);
+ }
+
+ ring->req_cons = rc;
+
+ return;
+
+fail_ring:
+ ERR(err, "ring error, disconnecting.");
+ __tapdisk_blktap_close(tap);
+}
+
+static void
+tapdisk_blktap_fd_event(event_id_t id, char mode, void *data)
+{
+ td_blktap_t *tap = data;
+
+ tap->stats.kicks.in++;
+ tapdisk_blktap_get_requests(tap);
+}
+
+int
+tapdisk_blktap_remove_device(td_blktap_t *tap)
+{
+ int err = 0;
+
+ if (likely(tap->fd >= 0)) {
+ err = ioctl(tap->fd, BLKTAP_IOCTL_REMOVE_DEVICE);
+ if (err)
+ err = -errno;
+ }
+
+ return err;
+}
+
+int
+tapdisk_blktap_compat_create_device(td_blktap_t *tap,
+ const struct blktap_device_info *bdi)
+{
+ struct blktap2_params params;
+ int err;
+
+ memset(¶ms, 0, sizeof(params));
+ params.capacity = bdi->capacity;
+ params.sector_size = bdi->sector_size;
+
+ err = ioctl(tap->fd, BLKTAP_IOCTL_CREATE_DEVICE_COMPAT, ¶ms);
+ if (err) {
+ err = -errno;
+ return err;
+ }
+
+ if (bdi->flags || bdi->physical_sector_size != bdi->sector_size)
+ WARN("fell back to compat ioctl(%d)",
+ BLKTAP_IOCTL_CREATE_DEVICE_COMPAT);
+
+ return 0;
+}
+
+#ifndef ENOIOCTLCMD
+#define ENOIOCTLCMD 515
+#endif
+
+int
+tapdisk_blktap_create_device(td_blktap_t *tap,
+ const td_disk_info_t *info, int rdonly)
+{
+ struct blktap_device_info bdi;
+ unsigned long flags;
+ int err;
+
+ memset(&bdi, 0, sizeof(bdi));
+
+ flags = 0;
+ flags |= rdonly & TD_OPEN_RDONLY ? BLKTAP_DEVICE_RO : 0;
+
+ bdi.capacity = info->size;
+ bdi.sector_size = info->sector_size;
+ bdi.physical_sector_size = info->sector_size;
+ bdi.flags = flags;
+
+ INFO("bdev: capacity=%llu sector_size=%u/%u flags=%#lx",
+ bdi.capacity, bdi.sector_size, bdi.physical_sector_size,
+ bdi.flags);
+
+ err = ioctl(tap->fd, BLKTAP_IOCTL_CREATE_DEVICE, &bdi);
+ if (!err)
+ return 0;
+
+ err = -errno;
+ if (err == -ENOTTY || err == -ENOIOCTLCMD)
+ err = tapdisk_blktap_compat_create_device(tap, &bdi);
+
+ return err;
+}
+
+static void
+tapdisk_blktap_unmap(td_blktap_t *tap)
+{
+ if (tap->vma) {
+ munmap(tap->vma, tap->vma_size);
+ tap->vma = NULL;
+ }
+}
+
+static int
+tapdisk_blktap_map(td_blktap_t *tap)
+{
+ int prot, flags, err;
+ void *vma;
+
+ tap->vma_size =
+ 1 + (BLKTAP_RING_SIZE *
+ BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE);
+
+ prot = PROT_READ | PROT_WRITE;
+ flags = MAP_SHARED;
+
+ vma = mmap(NULL, tap->vma_size, prot, flags, tap->fd, 0);
+ if (vma == MAP_FAILED) {
+ err = -errno;
+ goto fail;
+ }
+
+ tap->vma = vma;
+ tap->vstart = vma + BLKTAP_PAGE_SIZE;
+ BACK_RING_INIT(&tap->ring, (blktap_sring_t*)vma, BLKTAP_PAGE_SIZE);
+
+ return 0;
+
+fail:
+ tapdisk_blktap_unmap(tap);
+ return err;
+}
+
+static void
+__tapdisk_blktap_close(td_blktap_t *tap)
+{
+ /*
+ * NB. this can bail out at runtime. after munmap, blktap
+ * already failed all pending block reqs. AIO on buffers will
+ * -EFAULT. vreq completion just backs off once fd/vma are
+ * gone, so we'll drain, then idle until close().
+ */
+
+ if (tap->event_id >= 0) {
+ tapdisk_server_unregister_event(tap->event_id);
+ tap->event_id = -1;
+ }
+
+ tapdisk_blktap_unmap(tap);
+
+ if (tap->fd >= 0) {
+ close(tap->fd);
+ tap->fd = -1;
+ }
+}
+
+void
+tapdisk_blktap_close(td_blktap_t *tap)
+{
+ __tapdisk_blktap_close(tap);
+ tapdisk_blktap_reqs_free(tap);
+ free(tap);
+}
+
+int
+tapdisk_blktap_open(const char *devname, td_vbd_t *vbd, td_blktap_t **_tap)
+{
+ td_blktap_t *tap;
+ struct stat st;
+ int err;
+
+ tap = malloc(sizeof(*tap));
+ if (!tap) {
+ err = -errno;
+ goto fail;
+ }
+
+ memset(tap, 0, sizeof(*tap));
+ tap->fd = -1;
+ tap->event_id = -1;
+
+ tap->fd = open(devname, O_RDWR);
+ if (tap->fd < 0) {
+ err = -errno;
+ goto fail;
+ }
+
+ err = fstat(tap->fd, &st);
+ if (err) {
+ err = -errno;
+ goto fail;
+ }
+
+ tap->vbd = vbd;
+ tap->minor = minor(st.st_rdev);
+
+ err = tapdisk_blktap_map(tap);
+ if (err)
+ goto fail;
+
+ tap->event_id =
+ tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
+ tap->fd, 0,
+ tapdisk_blktap_fd_event,
+ tap);
+ if (tap->event_id < 0) {
+ err = tap->event_id;
+ goto fail;
+ }
+
+ err = tapdisk_blktap_reqs_init(tap, BLKTAP_RING_SIZE);
+ if (err)
+ goto fail;
+
+ if (_tap)
+ *_tap = tap;
+
+ return 0;
+
+fail:
+ if (tap)
+ tapdisk_blktap_close(tap);
+
+ return err;
+}
+
+void
+tapdisk_blktap_stats(td_blktap_t *tap, td_stats_t *st)
+{
+ tapdisk_stats_field(st, "minor", "d", tap->minor);
+
+ tapdisk_stats_field(st, "reqs", "[");
+ tapdisk_stats_val(st, "llu", tap->stats.reqs.in);
+ tapdisk_stats_val(st, "llu", tap->stats.reqs.out);
+ tapdisk_stats_leave(st, ']');
+
+ tapdisk_stats_field(st, "kicks", "[");
+ tapdisk_stats_val(st, "llu", tap->stats.kicks.in);
+ tapdisk_stats_val(st, "llu", tap->stats.kicks.out);
+ tapdisk_stats_leave(st, ']');
+}
--- /dev/null
+/*
+ * Copyright (c) 2010, Citrix Systems, Inc.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of XenSource Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TAPDISK_BLKTAP_H_
+#define _TAPDISK_BLKTAP_H_
+
+typedef struct td_blktap td_blktap_t;
+typedef struct td_blktap_req td_blktap_req_t;
+
+#include "blktap.h"
+#include "tapdisk-vbd.h"
+#include "list.h"
+
+struct td_blktap_stats {
+ struct {
+ unsigned long long in;
+ unsigned long long out;
+ } reqs;
+ struct {
+ unsigned long long in;
+ unsigned long long out;
+ } kicks;
+};
+
+struct td_blktap {
+ int minor;
+ td_vbd_t *vbd;
+
+ int fd;
+
+ void *vma;
+ size_t vma_size;
+
+ blktap_back_ring_t ring;
+ int event_id;
+ void *vstart;
+
+ int n_reqs;
+ td_blktap_req_t *reqs;
+ int n_reqs_free;
+ td_blktap_req_t **reqs_free;
+
+ struct list_head entry;
+
+ struct td_blktap_stats stats;
+};
+
+int tapdisk_blktap_open(const char *, td_vbd_t *, td_blktap_t **);
+void tapdisk_blktap_close(td_blktap_t *);
+
+int tapdisk_blktap_create_device(td_blktap_t *, const td_disk_info_t *, int ro);
+int tapdisk_blktap_remove_device(td_blktap_t *);
+
+void tapdisk_blktap_stats(td_blktap_t *, td_stats_t *);
+
+#endif /* _TAPDISK_BLKTAP_H_ */
#include <sys/mman.h>
#include "list.h"
-#include "blktap.h"
-#include "tapdisk-vbd.h"
#include "tapdisk.h"
+#include "tapdisk-vbd.h"
+#include "tapdisk-blktap.h"
#include "tapdisk-utils.h"
#include "tapdisk-server.h"
#include "tapdisk-message.h"
i = 0;
memset(&response, 0, sizeof(response));
-
response.type = TAPDISK_MESSAGE_LIST_MINORS_RSP;
response.cookie = request->cookie;
head = tapdisk_server_get_all_vbds();
list_for_each_entry(vbd, head, next) {
- response.u.minors.list[i++] = vbd->minor;
+ td_blktap_t *tap = vbd->tap;
+ if (!tap)
+ continue;
+
+ response.u.minors.list[i++] = tap->minor;
if (i >= TAPDISK_MESSAGE_MAX_MINORS) {
response.type = TAPDISK_MESSAGE_ERROR;
response.u.response.error = ERANGE;
list_for_each_entry(vbd, head, next) {
response.u.list.count = count--;
- response.u.list.minor = vbd->minor;
+ response.u.list.minor = vbd->tap ? vbd->tap->minor : -1;
response.u.list.state = vbd->state;
response.u.list.path[0] = 0;
err = tapdisk_vbd_attach(vbd, devname, minor);
free(devname);
- if (err)
+ if (err) {
+ ERR(err, "failure attaching to %s", devname);
goto fail_vbd;
+ }
tapdisk_server_add_vbd(vbd);
tapdisk_message_t *request)
{
int err;
- td_disk_info_t image;
td_vbd_t *vbd;
td_flag_t flags;
tapdisk_message_t response;
- struct blktap_device_info info;
+ td_disk_info_t info;
vbd = tapdisk_server_get_vbd(request->cookie);
if (!vbd) {
goto out;
}
- if (vbd->minor == -1) {
+ if (!vbd->tap) {
err = -EINVAL;
goto out;
}
if (err)
goto fail_close;
- err = tapdisk_vbd_get_disk_info(vbd, &image);
+ err = tapdisk_vbd_get_disk_info(vbd, &info);
if (err)
goto fail_close;
- memset(&info, 0, sizeof(info));
- info.capacity = image.size;
- info.sector_size = image.sector_size;
- info.physical_sector_size = image.sector_size;
- info.flags = 0;
- info.flags |= flags & TD_OPEN_RDONLY ? BLKTAP_DEVICE_RO : 0;
-
- err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_CREATE_DEVICE, &info);
-#ifdef BLKTAP_IOCTL_CREATE_DEVICE_COMPAT
-#ifndef ENOIOCTLCMD
-#define ENOIOCTLCMD 515
-#endif
- if (err && (errno == ENOTTY || errno == ENOIOCTLCMD)) {
- struct blktap2_params params;
- memset(¶ms, 0, sizeof(params));
- params.capacity = info.capacity;
- params.sector_size = info.sector_size;
- err = ioctl(vbd->ring.fd, BLKTAP_IOCTL_CREATE_DEVICE_COMPAT, ¶ms);
- if (!err && info.flags)
- EPRINTF("create device: using compat ioctl(%d),"
- " flags (%#x) dropped.",
- BLKTAP_IOCTL_CREATE_DEVICE_COMPAT, flags);
- }
-#endif
- if (err && errno != EEXIST) {
+ err = tapdisk_blktap_create_device(vbd->tap, &info,
+ !!(flags & TD_OPEN_RDONLY));
+ if (err && err != -EEXIST) {
err = -errno;
EPRINTF("create device failed: %d\n", err);
goto fail_close;
response.type = TAPDISK_MESSAGE_ERROR;
response.u.response.error = -err;
} else {
- response.u.image.sectors = image.size;
- response.u.image.sector_size = image.sector_size;
- response.u.image.info = image.info;
+ response.u.image.sectors = info.size;
+ response.u.image.sector_size = info.sector_size;
+ response.u.image.info = info.info;
response.type = TAPDISK_MESSAGE_OPEN_RSP;
}
}
do {
- err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_REMOVE_DEVICE);
+ err = tapdisk_blktap_remove_device(vbd->tap);
- if (!err || errno != EBUSY)
+ if (!err || err != -EBUSY)
break;
tapdisk_server_iterate();
} while (conn->fd >= 0);
- if (err) {
- err = -errno;
+ if (err)
ERR(err, "failure closing image\n");
- }
if (err == -ENOTTY) {
free(vbd->name);
vbd->name = NULL;
- if (vbd->minor == -1) {
+ if (!vbd->tap) {
tapdisk_server_remove_vbd(vbd);
free(vbd);
}
}
int
-tapdisk_image_check_ring_request(td_image_t *image, blkif_request_t *req)
+tapdisk_image_check_request(td_image_t *image, td_vbd_request_t *vreq)
{
td_driver_t *driver;
td_disk_info_t *info;
- uint64_t nsects, total;
- int i, err, psize, rdonly;
+ int i, rdonly, secs, err;
driver = image->driver;
if (!driver)
return -ENODEV;
- err = -EINVAL;
- nsects = 0;
- total = 0;
info = &driver->info;
-
rdonly = td_flag_test(image->flags, TD_OPEN_RDONLY);
- if (req->operation != BLKIF_OP_READ &&
- req->operation != BLKIF_OP_WRITE)
- goto fail;
+ secs = 0;
- if (req->operation == BLKIF_OP_WRITE && rdonly) {
- err = -EPERM;
+ if (vreq->iovcnt < 0) {
+ err = -EINVAL;
goto fail;
}
- if (!req->nr_segments || req->nr_segments > MAX_SEGMENTS_PER_REQ)
- goto fail;
-
- total = 0;
- psize = getpagesize();
+ for (i = 0; i < vreq->iovcnt; i++)
+ secs += vreq->iov[i].secs;
- for (i = 0; i < req->nr_segments; i++) {
- nsects = req->seg[i].last_sect - req->seg[i].first_sect + 1;
-
- if (req->seg[i].last_sect >= psize >> 9 || nsects <= 0)
+ switch (vreq->op) {
+ case TD_OP_WRITE:
+ if (rdonly) {
+ err = -EPERM;
goto fail;
-
- total += nsects;
- }
-
- if (req->sector_number + nsects > info->size)
+ }
+ /* continue */
+ case TD_OP_READ:
+ if (vreq->sec + secs > info->size) {
+ err = -EINVAL;
+ goto fail;
+ }
+ break;
+ default:
+ err = -EOPNOTSUPP;
goto fail;
+ }
return 0;
fail:
- ERR(err, "bad request on %s (%s, %llu): id: %llu: %d at %llu",
- image->name, (rdonly ? "ro" : "rw"), info->size, req->id,
- req->operation, req->sector_number + total);
+ ERR(err, "bad request on %s (%s, %llu): req %s op %d at %llu",
+ image->name, (rdonly ? "ro" : "rw"), info->size, vreq->name,
+ vreq->op, vreq->sec + secs);
+
return err;
}
void tapdisk_image_free(td_image_t *);
int tapdisk_image_check_td_request(td_image_t *, td_request_t);
-int tapdisk_image_check_ring_request(td_image_t *, blkif_request_t *);
+int tapdisk_image_check_request(td_image_t *, struct td_vbd_request *);
void tapdisk_image_stats(td_image_t *, td_stats_t *);
#endif
#include "tapdisk-vbd.h"
#include "tapdisk-server.h"
#include "tapdisk-disktype.h"
-#include "blktap.h"
#define POLL_READ 0
#define POLL_WRITE 1
#include <sys/mman.h>
#include <sys/ioctl.h>
-#include "blktap.h"
#include "libvhd.h"
+#include "tapdisk-blktap.h"
#include "tapdisk-image.h"
#include "tapdisk-driver.h"
#include "tapdisk-server.h"
#define TD_VBD_EIO_SLEEP 1
#define TD_VBD_WATCHDOG_TIMEOUT 10
-static void tapdisk_vbd_ring_event(event_id_t, char, void *);
static void tapdisk_vbd_complete_vbd_request(td_vbd_t *, td_vbd_request_t *);
-static void tapdisk_vbd_callback(void *, blkif_response_t *);
static int tapdisk_vbd_queue_ready(td_vbd_t *);
static void tapdisk_vbd_check_queue_state(td_vbd_t *);
* initialization
*/
-static inline void
-tapdisk_vbd_initialize_vreq(td_vbd_request_t *vreq)
-{
- memset(vreq, 0, sizeof(td_vbd_request_t));
- INIT_LIST_HEAD(&vreq->next);
-}
-
static void
tapdisk_vbd_mark_progress(td_vbd_t *vbd)
{
tapdisk_vbd_create(uint16_t uuid)
{
td_vbd_t *vbd;
- int i;
vbd = calloc(1, sizeof(td_vbd_t));
if (!vbd) {
}
vbd->uuid = uuid;
- vbd->minor = -1;
- vbd->ring.fd = -1;
-
- /* default blktap ring completion */
- vbd->callback = tapdisk_vbd_callback;
- vbd->argument = vbd;
INIT_LIST_HEAD(&vbd->images);
INIT_LIST_HEAD(&vbd->new_requests);
INIT_LIST_HEAD(&vbd->next);
tapdisk_vbd_mark_progress(vbd);
- for (i = 0; i < MAX_REQUESTS; i++)
- tapdisk_vbd_initialize_vreq(vbd->request_list + i);
-
return vbd;
}
return 0;
}
-void
-tapdisk_vbd_set_callback(td_vbd_t *vbd, td_vbd_cb_t callback, void *argument)
-{
- vbd->callback = callback;
- vbd->argument = argument;
-}
-
int
tapdisk_vbd_validate_chain(td_vbd_t *vbd)
{
int fd, err;
char *fn;
- err = asprintf(&fn, BLKTAP2_ENOSPC_SIGNAL_FILE"%d", vbd->minor);
+ err = asprintf(&fn, BLKTAP2_ENOSPC_SIGNAL_FILE"%d", vbd->tap->minor);
if (err == -1) {
EPRINTF("Failed to signal ENOSPC condition\n");
return;
return err;
}
-static int
-tapdisk_vbd_register_event_watches(td_vbd_t *vbd)
-{
- event_id_t id;
-
- id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
- vbd->ring.fd, 0,
- tapdisk_vbd_ring_event, vbd);
- if (id < 0)
- return id;
-
- vbd->ring_event_id = id;
-
- return 0;
-}
-
-static void
-tapdisk_vbd_unregister_events(td_vbd_t *vbd)
-{
- if (vbd->ring_event_id)
- tapdisk_server_unregister_event(vbd->ring_event_id);
-}
-
-static int
-tapdisk_vbd_map_device(td_vbd_t *vbd, const char *devname)
-{
-
- int err, psize;
- td_ring_t *ring;
-
- ring = &vbd->ring;
- psize = getpagesize();
-
- ring->fd = open(devname, O_RDWR);
- if (ring->fd == -1) {
- err = -errno;
- EPRINTF("failed to open %s: %d\n", devname, err);
- goto fail;
- }
-
- ring->mem = mmap(0, psize * BLKTAP_MMAP_REGION_SIZE,
- PROT_READ | PROT_WRITE, MAP_SHARED, ring->fd, 0);
- if (ring->mem == MAP_FAILED) {
- err = -errno;
- EPRINTF("failed to mmap %s: %d\n", devname, err);
- goto fail;
- }
-
- ring->sring = (blkif_sring_t *)((unsigned long)ring->mem);
- BACK_RING_INIT(&ring->fe_ring, ring->sring, psize);
-
- ring->vstart =
- (unsigned long)ring->mem + (BLKTAP_RING_PAGES * psize);
-
- return 0;
-
-fail:
- if (ring->mem && ring->mem != MAP_FAILED)
- munmap(ring->mem, psize * BLKTAP_MMAP_REGION_SIZE);
- if (ring->fd != -1)
- close(ring->fd);
- ring->fd = -1;
- ring->mem = NULL;
- return err;
-}
-
-static int
-tapdisk_vbd_unmap_device(td_vbd_t *vbd)
-{
- int psize;
-
- psize = getpagesize();
-
- if (vbd->ring.fd != -1)
- close(vbd->ring.fd);
- if (vbd->ring.mem > 0)
- munmap(vbd->ring.mem, psize * BLKTAP_MMAP_REGION_SIZE);
-
- return 0;
-}
-
void
tapdisk_vbd_detach(td_vbd_t *vbd)
{
- tapdisk_vbd_unregister_events(vbd);
+ td_blktap_t *tap = vbd->tap;
- tapdisk_vbd_unmap_device(vbd);
- vbd->minor = -1;
+ if (tap) {
+ tapdisk_blktap_close(tap);
+ vbd->tap = NULL;
+ }
}
-
int
tapdisk_vbd_attach(td_vbd_t *vbd, const char *devname, int minor)
{
- int err;
-
- err = tapdisk_vbd_map_device(vbd, devname);
- if (err)
- goto fail;
-
- err = tapdisk_vbd_register_event_watches(vbd);
- if (err)
- goto fail;
-
- vbd->minor = minor;
- return 0;
-
-fail:
- tapdisk_vbd_detach(vbd);
+ if (vbd->tap)
+ return -EALREADY;
- return err;
+ return tapdisk_blktap_open(devname, vbd, &vbd->tap);
}
int
if (!list_empty(&vbd->pending_requests))
return -EAGAIN;
- tapdisk_vbd_kick(vbd);
tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed);
DPRINTF("%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
vbd->name, vbd->state, new, pending, failed, completed);
DPRINTF("last activity: %010ld.%06ld, errors: 0x%04"PRIx64", "
"retries: 0x%04"PRIx64", received: 0x%08"PRIx64", "
- "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64", "
- "kicks in: 0x%08"PRIx64", out: 0x%08"PRIu64"\n",
+ "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n",
vbd->ts.tv_sec, vbd->ts.tv_usec,
vbd->errors, vbd->retries, vbd->received, vbd->returned,
- vbd->kicked, vbd->kicks_in, vbd->kicks_out);
+ vbd->kicked);
tapdisk_vbd_close_vdi(vbd);
tapdisk_vbd_detach(vbd);
DBG(TLOG_WARN, "%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
"failed: 0x%02x, completed: 0x%02x, last activity: %010ld.%06ld, "
"errors: 0x%04llx, retries: 0x%04llx, received: 0x%08llx, "
- "returned: 0x%08llx, kicked: 0x%08llx, "
- "kicks in: 0x%08"PRIx64", out: 0x%08"PRIx64"\n",
+ "returned: 0x%08llx, kicked: 0x%08llx\n",
vbd->name, vbd->state, new, pending, failed, completed,
vbd->ts.tv_sec, vbd->ts.tv_usec, vbd->errors, vbd->retries,
- vbd->received, vbd->returned, vbd->kicked,
- vbd->kicks_in, vbd->kicks_out);
+ vbd->received, vbd->returned, vbd->kicked);
tapdisk_vbd_for_each_image(vbd, image, tmp)
td_debug(image);
return 0;
}
-int
-tapdisk_vbd_kick(td_vbd_t *vbd)
-{
- int n;
- td_ring_t *ring;
-
- tapdisk_vbd_check_queue_state(vbd);
-
- ring = &vbd->ring;
- if (!ring->sring)
- return 0;
-
- n = (ring->fe_ring.rsp_prod_pvt - ring->fe_ring.sring->rsp_prod);
- if (!n)
- return 0;
-
- vbd->kicks_out++;
- vbd->kicked += n;
- RING_PUSH_RESPONSES(&ring->fe_ring);
- ioctl(ring->fd, BLKTAP_IOCTL_KICK_FE, 0);
-
- DBG(TLOG_INFO, "kicking %d: rec: 0x%08llx, ret: 0x%08llx, kicked: "
- "0x%08llx\n", n, vbd->received, vbd->returned, vbd->kicked);
-
- return n;
-}
-
-static inline void
-tapdisk_vbd_write_response_to_ring(td_vbd_t *vbd, blkif_response_t *rsp)
-{
- td_ring_t *ring;
- blkif_response_t *rspp;
-
- ring = &vbd->ring;
- rspp = RING_GET_RESPONSE(&ring->fe_ring, ring->fe_ring.rsp_prod_pvt);
- memcpy(rspp, rsp, sizeof(blkif_response_t));
- ring->fe_ring.rsp_prod_pvt++;
-}
-
-static void
-tapdisk_vbd_callback(void *arg, blkif_response_t *rsp)
-{
- td_vbd_t *vbd = (td_vbd_t *)arg;
- tapdisk_vbd_write_response_to_ring(vbd, rsp);
-}
-
-static void
-tapdisk_vbd_make_response(td_vbd_t *vbd, td_vbd_request_t *vreq)
-{
- blkif_request_t tmp;
- blkif_response_t *rsp;
-
- tmp = vreq->req;
- rsp = (blkif_response_t *)&vreq->req;
-
- rsp->id = tmp.id;
- rsp->operation = tmp.operation;
- rsp->status = vreq->status;
-
- DBG(TLOG_DBG, "writing req %d, sec 0x%08"PRIx64", res %d to ring\n",
- (int)tmp.id, tmp.sector_number, vreq->status);
-
- if (rsp->status != BLKIF_RSP_OKAY)
- ERR(-vreq->error, "returning BLKIF_RSP %d", rsp->status);
-
- vbd->returned++;
- vbd->callback(vbd->argument, rsp);
-}
-
static int
tapdisk_vbd_request_ttl(td_vbd_request_t *vreq,
const struct timeval *now)
timeout = tapdisk_vbd_request_ttl(vreq, now) < 0;
if (timeout)
- DBG(TLOG_INFO, "req %"PRIu64" timed out, retried %d times\n",
- vreq->req.id, vreq->num_retries);
+ DBG(TLOG_INFO, "req %s timed out, retried %d times\n",
+ vreq->name, vreq->num_retries);
return timeout;
}
!list_empty(&vbd->failed_requests))
tapdisk_vbd_issue_requests(vbd);
- tapdisk_vbd_for_each_request(vreq, tmp, &vbd->completed_requests) {
- tapdisk_vbd_make_response(vbd, vreq);
- list_del(&vreq->next);
- tapdisk_vbd_initialize_vreq(vreq);
- }
}
void
tapdisk_vbd_complete_vbd_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
{
if (!vreq->submitting && !vreq->secs_pending) {
- if (vreq->status == BLKIF_RSP_ERROR &&
+ if (vreq->status == BLKTAP_RSP_ERROR &&
tapdisk_vbd_request_should_retry(vbd, vreq))
tapdisk_vbd_move_request(vreq, &vbd->failed_requests);
else
}
if (err) {
- vreq->status = BLKIF_RSP_ERROR;
+ vreq->status = BLKTAP_RSP_ERROR;
vreq->error = (vreq->error ? : err);
if (err != -EBUSY) {
vbd->errors++;
- ERR(err, "req %"PRIu64": %s 0x%04x secs to "
- "0x%08"PRIx64, vreq->req.id,
+ ERR(err, "req %s: %s 0x%04x secs @ 0x%08"PRIx64,
+ vreq->name,
(treq.op == TD_OP_WRITE ? "write" : "read"),
treq.secs, treq.sec);
}
__tapdisk_vbd_complete_td_request(vbd, vreq, treq, -EBUSY);
}
-static void
+void
tapdisk_vbd_complete_td_request(td_request_t treq, int res)
{
td_vbd_t *vbd;
}
}
- DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08llx "
+ DBG(TLOG_DBG, "%s: req %s seg %d sec 0x%08llx "
"secs 0x%04x buf %p op %d res %d\n", image->name,
- (int)treq.id, treq.sidx, treq.sec, treq.secs,
- treq.buf, (int)vreq->req.operation, res);
+ vreq->name, treq.sidx, treq.sec, treq.secs,
+ treq.buf, vreq->op, res);
__tapdisk_vbd_complete_td_request(vbd, vreq, treq, res);
}
td_queue_write(vbd->secondary, clone);
}
-static inline void
-tapdisk_vbd_submit_request(td_vbd_t *vbd, blkif_request_t *req,
- td_request_t treq)
-{
- switch (req->operation) {
- case BLKIF_OP_WRITE:
- treq.op = TD_OP_WRITE;
- /* it's important to queue the mirror request before queuing
- * the main one. If the main image runs into ENOSPC, the
- * mirroring could be disabled before td_queue_write returns,
- * so if the mirror request was queued after (which would then
- * not happen), we'd lose that write and cause the process to
- * hang with unacknowledged writes */
- if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR)
- queue_mirror_req(vbd, treq);
- td_queue_write(treq.image, treq);
- break;
-
- case BLKIF_OP_READ:
- treq.op = TD_OP_READ;
- td_queue_read(treq.image, treq);
- break;
- }
-}
-
static int
tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
{
- char *page;
- td_ring_t *ring;
td_image_t *image;
td_request_t treq;
- uint64_t sector_nr;
- blkif_request_t *req;
- int i, err, id, nsects;
- struct timeval now;
- int treq_started = 0;
+ td_sector_t sec;
+ int i, err;
- req = &vreq->req;
- id = req->id;
- ring = &vbd->ring;
- sector_nr = req->sector_number;
- image = tapdisk_vbd_first_image(vbd);
+ sec = vreq->sec;
+ image = tapdisk_vbd_first_image(vbd);
vreq->submitting = 1;
goto fail;
}
- err = tapdisk_image_check_ring_request(image, req);
+ err = tapdisk_image_check_request(image, vreq);
if (err) {
vreq->error = err;
goto fail;
}
- memset(&treq, 0, sizeof(td_request_t));
- for (i = 0; i < req->nr_segments; i++) {
- nsects = req->seg[i].last_sect - req->seg[i].first_sect + 1;
- page = (char *)MMAP_VADDR(ring->vstart,
- (unsigned long)req->id, i);
- page += (req->seg[i].first_sect << SECTOR_SHIFT);
-
- if (treq_started) {
- if (page == treq.buf + (treq.secs << SECTOR_SHIFT)) {
- treq.secs += nsects;
- } else {
- tapdisk_vbd_submit_request(vbd, req, treq);
- treq_started = 0;
- }
- }
+ for (i = 0; i < vreq->iovcnt; i++) {
+ struct td_iovec *iov = &vreq->iov[i];
- if (!treq_started) {
- treq.id = id;
- treq.sidx = i;
- treq.buf = page;
- treq.sec = sector_nr;
- treq.secs = nsects;
- treq.image = image;
- treq.cb = tapdisk_vbd_complete_td_request;
- treq.cb_data = NULL;
- treq.private = vreq;
- treq_started = 1;
- }
+ treq.sidx = i;
+ treq.buf = iov->base;
+ treq.sec = sec;
+ treq.secs = iov->secs;
+ treq.image = image;
+ treq.cb = tapdisk_vbd_complete_td_request;
+ treq.cb_data = NULL;
+ treq.private = vreq;
- DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08llx secs 0x%04x "
- "buf %p op %d\n", image->name, id, i, treq.sec, treq.secs,
- treq.buf, (int)req->operation);
- vreq->secs_pending += nsects;
- vbd->secs_pending += nsects;
+ vreq->secs_pending += iov->secs;
+ vbd->secs_pending += iov->secs;
if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR &&
- req->operation == BLKIF_OP_WRITE) {
- vreq->secs_pending += nsects;
- vbd->secs_pending += nsects;
+ vreq->op == TD_OP_WRITE) {
+ vreq->secs_pending += iov->secs;
+ vbd->secs_pending += iov->secs;
}
- if (i == req->nr_segments - 1) {
- tapdisk_vbd_submit_request(vbd, req, treq);
- treq_started = 0;
+ switch (vreq->op) {
+ case TD_OP_WRITE:
+ treq.op = TD_OP_WRITE;
+ /* it's important to queue the mirror request before queuing
+ * the main one. If the main image runs into ENOSPC, the
+ * mirroring could be disabled before td_queue_write returns,
+ * so if the mirror request was queued after (which would then
+ * not happen), we'd lose that write and cause the process to
+ * hang with unacknowledged writes */
+ if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR)
+ queue_mirror_req(vbd, treq);
+ td_queue_write(treq.image, treq);
+ break;
+
+ case TD_OP_READ:
+ treq.op = TD_OP_READ;
+ td_queue_read(treq.image, treq);
+ break;
}
- sector_nr += nsects;
+ DBG(TLOG_DBG, "%s: req %s seg %d sec 0x%08llx secs 0x%04x "
+ "buf %p op %d\n", image->name, vreq->name, i, treq.sec, treq.secs,
+ treq.buf, vreq->op);
+ sec += iov->secs;
}
err = 0;
return err;
fail:
- vreq->status = BLKIF_RSP_ERROR;
+ vreq->status = BLKTAP_RSP_ERROR;
goto out;
}
vbd->retries++;
vreq->num_retries++;
vreq->error = 0;
- vreq->status = BLKIF_RSP_OKAY;
- DBG(TLOG_DBG, "retry #%d of req %"PRIu64", "
- "sec 0x%08"PRIx64", nr_segs: %d\n", vreq->num_retries,
- vreq->req.id, vreq->req.sector_number,
- vreq->req.nr_segments);
+ vreq->status = BLKTAP_RSP_OKAY;
+ DBG(TLOG_DBG, "retry #%d of req %s, "
+ "sec 0x%08"PRIx64", iovcnt: %d\n", vreq->num_retries,
+ vreq->name, vreq->sec, vreq->iovcnt);
err = tapdisk_vbd_issue_request(vbd, vreq);
/*
static void
tapdisk_vbd_count_new_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
{
- blkif_request_t *req = &vreq->req;
- struct blkif_request_segment *seg;
+ struct td_iovec *iov;
int write;
- write = req->operation == BLKIF_OP_WRITE;
+ write = vreq->op == TD_OP_WRITE;
- for (seg = &req->seg[0]; seg < &req->seg[req->nr_segments]; seg++) {
- int secs = seg->last_sect - seg->first_sect + 1;
- td_sector_count_add(&vbd->secs, secs, write);
- }
+ for (iov = &vreq->iov[0]; iov < &vreq->iov[vreq->iovcnt]; iov++)
+ td_sector_count_add(&vbd->secs, iov->secs, write);
}
static int
td_vbd_request_t *vreq, *tmp;
tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) {
- vreq->status = BLKIF_RSP_ERROR;
+ vreq->status = BLKTAP_RSP_ERROR;
tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
}
tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) {
- vreq->status = BLKIF_RSP_ERROR;
+ vreq->status = BLKTAP_RSP_ERROR;
tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
}
return tapdisk_vbd_issue_new_requests(vbd);
}
-static void
-tapdisk_vbd_pull_ring_requests(td_vbd_t *vbd)
-{
- int idx;
- RING_IDX rp, rc;
- td_ring_t *ring;
- blkif_request_t *req;
- td_vbd_request_t *vreq;
- struct timeval now;
-
- ring = &vbd->ring;
- if (!ring->sring)
- return;
-
- gettimeofday(&now, NULL);
-
- rp = ring->fe_ring.sring->req_prod;
- xen_rmb();
-
- for (rc = ring->fe_ring.req_cons; rc != rp; rc++) {
- req = RING_GET_REQUEST(&ring->fe_ring, rc);
- ++ring->fe_ring.req_cons;
-
- idx = req->id;
- vreq = &vbd->request_list[idx];
-
- ASSERT(list_empty(&vreq->next));
- ASSERT(vreq->secs_pending == 0);
-
- memcpy(&vreq->req, req, sizeof(blkif_request_t));
- vbd->received++;
- vreq->vbd = vbd;
- vreq->ts = now;
-
- tapdisk_vbd_move_request(vreq, &vbd->new_requests);
-
- DBG(TLOG_DBG, "%s: request %d \n", vbd->name, idx);
- }
-}
-
-static int
-tapdisk_vbd_pause_ring(td_vbd_t *vbd)
+int
+tapdisk_vbd_queue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
{
- int err;
-
- if (td_flag_test(vbd->state, TD_VBD_PAUSED))
- return 0;
-
- td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED);
+ gettimeofday(&vreq->ts, NULL);
+ vreq->vbd = vbd;
- err = tapdisk_vbd_quiesce_queue(vbd);
- if (err) {
- EPRINTF("%s: ring pause request on active queue\n", vbd->name);
- return err;
- }
-
- tapdisk_vbd_close_vdi(vbd);
+ list_add_tail(&vreq->next, &vbd->new_requests);
+ vbd->received++;
- err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_PAUSE, 0);
- if (err)
- EPRINTF("%s: pause ioctl failed: %d\n", vbd->name, errno);
- else {
- td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
- td_flag_set(vbd->state, TD_VBD_PAUSED);
- }
-
- return err;
+ return 0;
}
-static int
-tapdisk_vbd_resume_ring(td_vbd_t *vbd)
+void
+tapdisk_vbd_kick(td_vbd_t *vbd)
{
- int i, err, type;
- char message[BLKTAP2_MAX_MESSAGE_LEN];
- const char *path;
-
- memset(message, 0, sizeof(message));
-
- if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) {
- EPRINTF("%s: resume message for unpaused vbd\n", vbd->name);
- return -EINVAL;
- }
-
- err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_REOPEN, &message);
- if (err) {
- EPRINTF("%s: resume ioctl failed: %d\n", vbd->name, errno);
- return err;
- }
-
- type = tapdisk_disktype_parse_params(message, &path);
- if (type < 0) {
- err = type;
- EPRINTF("%s: invalid resume string %s\n", vbd->name, message);
- goto out;
- }
-
- tapdisk_vbd_start_queue(vbd);
-
- err = tapdisk_vbd_open_vdi(vbd, path, vbd->flags | TD_OPEN_STRICT, -1);
-out:
- if (!err) {
- struct blktap2_params params;
- td_disk_info_t info;
-
- memset(¶ms, 0, sizeof(params));
- tapdisk_vbd_get_disk_info(vbd, &info);
-
- params.sector_size = info.sector_size;
- params.capacity = info.size;
- snprintf(params.name, sizeof(params.name) - 1, "%s", message);
-
- ioctl(vbd->ring.fd, BLKTAP2_IOCTL_SET_PARAMS, ¶ms);
- td_flag_clear(vbd->state, TD_VBD_PAUSED);
- }
- ioctl(vbd->ring.fd, BLKTAP2_IOCTL_RESUME, err);
- return err;
-}
+ const struct list_head *list = &vbd->completed_requests;
+ td_vbd_request_t *vreq, *prev, *next;
-static int
-tapdisk_vbd_check_ring_message(td_vbd_t *vbd)
-{
- if (!vbd->ring.sring)
- return -EINVAL;
+ vbd->kicked++;
- switch (vbd->ring.sring->private.tapif_user.msg) {
- case 0:
- return 0;
+ while (!list_empty(list)) {
+ prev = list_entry(list->next, td_vbd_request_t, next);
+ list_del(&prev->next);
- case BLKTAP2_RING_MESSAGE_PAUSE:
- return tapdisk_vbd_pause_ring(vbd);
+ tapdisk_vbd_for_each_request(vreq, next, list) {
+ if (vreq->token == prev->token) {
- case BLKTAP2_RING_MESSAGE_RESUME:
- return tapdisk_vbd_resume_ring(vbd);
+ prev->cb(prev, prev->error, prev->token, 0);
+ vbd->returned++;
- case BLKTAP2_RING_MESSAGE_CLOSE:
- return tapdisk_vbd_close(vbd);
+ list_del(&vreq->next);
+ prev = vreq;
+ }
+ }
- default:
- return -EINVAL;
+ prev->cb(prev, prev->error, prev->token, 1);
+ vbd->returned++;
}
}
-static void
-tapdisk_vbd_ring_event(event_id_t id, char mode, void *private)
-{
- td_vbd_t *vbd;
-
- vbd = (td_vbd_t *)private;
-
- vbd->kicks_in++;
- tapdisk_vbd_pull_ring_requests(vbd);
- tapdisk_vbd_issue_requests(vbd);
-
- /* vbd may be destroyed after this call */
- tapdisk_vbd_check_ring_message(vbd);
-}
-
void
tapdisk_vbd_stats(td_vbd_t *vbd, td_stats_t *st)
{
tapdisk_stats_enter(st, '{');
tapdisk_stats_field(st, "name", "s", vbd->name);
- tapdisk_stats_field(st, "minor", "d", vbd->minor);
tapdisk_stats_field(st, "secs", "[");
tapdisk_stats_val(st, "llu", vbd->secs.rd);
tapdisk_image_stats(image, st);
tapdisk_stats_leave(st, ']');
+ if (vbd->tap) {
+ tapdisk_stats_field(st, "tap", "{");
+ tapdisk_blktap_stats(vbd->tap, st);
+ tapdisk_stats_leave(st, '}');
+ }
+
tapdisk_stats_field(st,
"FIXME_enospc_redirect_count",
"llu", vbd->FIXME_enospc_redirect_count);
#include "blktap.h"
#include "scheduler.h"
#include "tapdisk-image.h"
+#include "tapdisk-blktap.h"
#define TD_VBD_REQUEST_TIMEOUT 120
#define TD_VBD_MAX_RETRIES 100
#define TD_VBD_SECONDARY_MIRROR 1
#define TD_VBD_SECONDARY_STANDBY 2
-typedef struct td_ring td_ring_t;
-typedef struct td_vbd_request td_vbd_request_t;
-typedef struct td_vbd_handle td_vbd_t;
-typedef void (*td_vbd_cb_t) (void *, blkif_response_t *);
-
-struct td_ring {
- int fd;
- char *mem;
- blkif_sring_t *sring;
- blkif_back_ring_t fe_ring;
- unsigned long vstart;
-};
-
-struct td_vbd_request {
- blkif_request_t req;
- int16_t status;
-
- int error;
- int submitting;
- int secs_pending;
- int num_retries;
- struct timeval ts;
- struct timeval last_try;
-
- td_vbd_t *vbd;
- struct list_head next;
- struct list_head *list_head;
-};
-
struct td_vbd_handle {
char *name;
+ td_blktap_t *tap;
+
td_uuid_t uuid;
- int minor;
td_flag_t flags;
td_flag_t state;
struct list_head failed_requests;
struct list_head completed_requests;
- td_vbd_request_t request_list[MAX_REQUESTS];
-
- td_ring_t ring;
- event_id_t ring_event_id;
-
- td_vbd_cb_t callback;
- void *argument;
+ td_vbd_request_t request_list[MAX_REQUESTS]; /* XXX */
struct list_head next;
uint64_t retries;
uint64_t errors;
td_sector_count_t secs;
-
- uint64_t kicks_in;
- uint64_t kicks_out;
};
#define tapdisk_vbd_for_each_request(vreq, tmp, list) \
td_vbd_t *tapdisk_vbd_create(td_uuid_t);
int tapdisk_vbd_initialize(int, int, td_uuid_t);
-void tapdisk_vbd_set_callback(td_vbd_t *, td_vbd_cb_t, void *);
int tapdisk_vbd_open(td_vbd_t *, const char *, int, const char *, td_flag_t);
int tapdisk_vbd_close(td_vbd_t *);
int tapdisk_vbd_attach(td_vbd_t *, const char *, int);
void tapdisk_vbd_detach(td_vbd_t *);
+int tapdisk_vbd_queue_request(td_vbd_t *, td_vbd_request_t *);
void tapdisk_vbd_forward_request(td_request_t);
int tapdisk_vbd_get_disk_info(td_vbd_t *, td_disk_info_t *);
int tapdisk_vbd_kill_queue(td_vbd_t *);
int tapdisk_vbd_pause(td_vbd_t *);
int tapdisk_vbd_resume(td_vbd_t *, const char *);
-int tapdisk_vbd_kick(td_vbd_t *);
+void tapdisk_vbd_kick(td_vbd_t *);
void tapdisk_vbd_check_state(td_vbd_t *);
void tapdisk_vbd_check_progress(td_vbd_t *);
void tapdisk_vbd_debug(td_vbd_t *);
typedef struct td_driver_handle td_driver_t;
typedef struct td_image_handle td_image_t;
typedef struct td_sector_count td_sector_count_t;
+typedef struct td_vbd_request td_vbd_request_t;
+typedef struct td_vbd_handle td_vbd_t;
/*
* Prototype of the callback to activate as requests complete.
*/
typedef void (*td_callback_t)(td_request_t, int);
+typedef void (*td_vreq_callback_t)(td_vbd_request_t*, int, void*, int);
struct td_disk_id {
char *name;
uint32_t info;
};
+struct td_iovec {
+ void *base;
+ unsigned int secs;
+};
+
+struct td_vbd_request {
+ int op;
+ td_sector_t sec;
+ struct td_iovec *iov;
+ int iovcnt;
+
+ td_vreq_callback_t cb;
+ void *token;
+ const char *name;
+
+ int status;
+ int error;
+ int submitting;
+ int secs_pending;
+ int num_retries;
+ struct timeval ts;
+ struct timeval last_try;
+
+ td_vbd_t *vbd;
+ struct list_head next;
+ struct list_head *list_head;
+};
+
struct td_request {
int op;
- char *buf;
+ void *buf;
+
td_sector_t sec;
int secs;
td_callback_t cb;
void *cb_data;
- uint64_t id;
int sidx;
+ const td_vbd_request_t *vreq;
void *private;
};
#include <stddef.h>
#include <linux/blktap.h>
-#define BLKTAP_PAGE_SIZE sysconf(_SC_PAGE_SIZE)
-#define wmb()
-
#define BLKTAP2_SYSFS_DIR "/sys/class/blktap2"
#define BLKTAP2_CONTROL_NAME "blktap-control"
#define BLKTAP2_CONTROL_DIR "/var/run/"BLKTAP2_CONTROL_NAME
#define BLKTAP2_IO_DEVICE BLKTAP2_DIRECTORY"/tapdev"
#define BLKTAP2_ENOSPC_SIGNAL_FILE "/var/run/tapdisk-enospc"
-/*
- * compat crap to reduce patch size for now.
- */
-
-typedef blktap_sring_t blkif_sring_t;
-typedef blktap_back_ring_t blkif_back_ring_t;
-typedef blktap_ring_req_t blkif_request_t;
-typedef blktap_ring_rsp_t blkif_response_t;
-
-#define blkif_request_segment blktap_segment
-
-#define BLKIF_OP_WRITE BLKTAP_OP_WRITE
-#define BLKIF_OP_READ BLKTAP_OP_READ
-
-#define BLKIF_RSP_ERROR BLKTAP_RSP_ERROR
-#define BLKIF_RSP_OKAY BLKTAP_RSP_OKAY
-
-#define BLKIF_MAX_SEGMENTS_PER_REQUEST BLKTAP_SEGMENT_MAX
-#define rmb()
-
-#define BLKTAP_MMAP_PAGES (BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX)
-#define BLKTAP_RING_PAGES 1
-#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES)
-#define MMAP_VADDR(_vstart,_req,_seg) \
- ((_vstart) + \
- ((_req) * BLKTAP_SEGMENT_MAX * BLKTAP_PAGE_SIZE) + \
- ((_seg) * BLKTAP_PAGE_SIZE))
-
-#define BLKTAP_IOCTL_KICK_FE BLKTAP_IOCTL_RESPOND
-#define BLKTAP2_IOCTL_ALLOC_TAP BLKTAP_IOCTL_ALLOC_TAP
-#define BLKTAP2_IOCTL_FREE_TAP BLKTAP_IOCTL_FREE_TAP
-#define BLKTAP2_IOCTL_CREATE_DEVICE BLKTAP_IOCTL_CREATE_DEVICE
-#define BLKTAP2_IOCTL_REMOVE_DEVICE BLKTAP_IOCTL_REMOVE_DEVICE
-
-#define BLKTAP2_IOCTL_SET_PARAMS 203
-#define BLKTAP2_IOCTL_PAUSE 204
-#define BLKTAP2_IOCTL_REOPEN 205
-#define BLKTAP2_IOCTL_RESUME 206
-
-#define BLKTAP2_MAX_MESSAGE_LEN BLKTAP_NAME_MAX
-
-#define BLKTAP2_RING_MESSAGE_PAUSE 1
-#define BLKTAP2_RING_MESSAGE_RESUME 2
-#define BLKTAP2_RING_MESSAGE_CLOSE BLKTAP_RING_MESSAGE_CLOSE
-
#endif /* _TD_BLKTAP_H_ */
%{_sbindir}/vhd-update
%{_sbindir}/vhd-util
%{_sbindir}/vhd-index
-%{_sbindir}/tapdisk-stream
-%{_sbindir}/tapdisk-diff
%{_sbindir}/part-util
%{_sbindir}/vhdpartx
%{_sysconfdir}/udev/rules.d/blktap.rules