--- /dev/null
+/*
+ * evtdev.c
+ *
+ * Driver giving user-space access to the kernel's event channel.
+ *
+ * Copyright (c) 2015 Wei Liu <wei.liu2@citrix.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: $");
+
+#include "rumpxen_xendev.h"
+
+#include <bmk-rumpuser/rumpuser.h>
+#include <bmk-core/memalloc.h>
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+/* mini-os/os.h has some bad casts */
+#include <mini-os/events.h>
+#include <mini-os/wait.h>
+#pragma GCC diagnostic error "-Wcast-qual"
+
+/* For ioctl interface. */
+#include "xenio3.h"
+
+/*----- data structures -----*/
+#define EVTDEV_RING_SIZE 2048
+#define EVTDEV_RING_MASK 2047
+#define BYTES_PER_PORT (sizeof(evtchn_port_t) / sizeof(uint8_t))
+
+/* interrupt handler queues events here */
+/* protect against multiple writer (multiple events triggers at the
+ * same time) there is only one reader, the worker thread.*/
+static DECLARE_WAIT_QUEUE_HEAD(xenevt_waitq);
+static spinlock_t xenevt_ring_lock = SPIN_LOCK_UNLOCKED;
+static evtchn_port_t xenevt_ring[EVTDEV_RING_SIZE];
+static u_int xenevt_ring_prod, xenevt_ring_cons;
+
+struct xenevt_dev_data {
+ evtchn_port_t ring[EVTDEV_RING_SIZE];
+ u_int ring_cons;
+ u_int ring_prod;
+#define EVTDEV_F_OVERFLOW 0x1 /* ring overflow */
+ u_int flags;
+
+ kmutex_t lock;
+ struct selinfo selinfo; /* used by poll, see select(9) */
+ struct wait_queue_head waitq;
+};
+
+/* Kernel event -> device instance mapping */
+static kmutex_t devevent_lock;
+static struct xenevt_dev_data *devevents[NR_EVENT_CHANNELS];
+
+/*----- helpers -----*/
+#define WBITS (POLLOUT | POLLWRNORM)
+#define RBITS (POLLIN | POLLRDNORM)
+
+/* call with d->lock held */
+static void queue(struct xenevt_dev_data *d, evtchn_port_t port)
+{
+ if (d->ring_cons == ((d->ring_prod + 1) & EVTDEV_RING_MASK)) {
+ d->flags |= EVTDEV_RING_MASK;
+ printf("evtdev: ring overflow port %d\n", port);
+ } else {
+ d->ring[d->ring_prod] = port;
+ wmb();
+ d->ring_prod = (d->ring_prod + 1) & EVTDEV_RING_MASK;
+ }
+ /* notify */
+ minios_wake_up(&d->waitq);
+ selnotify(&d->selinfo, RBITS, NOTE_SUBMIT);
+}
+
+static void xenevt_thread_func(void *ign)
+{
+ u_int prod = xenevt_ring_prod;
+ u_int cons;
+
+ for (;;) {
+ minios_wait_event(xenevt_waitq, xenevt_ring_prod != prod);
+ prod = xenevt_ring_prod;
+ cons = xenevt_ring_cons;
+
+ mb();
+
+ while (cons != prod) {
+ evtchn_port_t port = xenevt_ring[cons];
+ struct xenevt_dev_data *d;
+
+ KASSERT(port < NR_EVENT_CHANNELS);
+
+ mutex_enter(&devevent_lock);
+
+ d = devevents[port];
+
+ KASSERT(d);
+
+ mutex_enter(&d->lock);
+
+ queue(d, port);
+
+ mutex_exit(&d->lock);
+ mutex_exit(&devevent_lock);
+
+ cons++;
+ }
+
+ mb();
+
+ xenevt_ring_cons = cons;
+ }
+}
+
+static void evtdev_handler(evtchn_port_t port, struct pt_regs * regs,
+ void *data)
+{
+ spin_lock(&xenevt_ring_lock);
+
+ if (xenevt_ring_cons == ((xenevt_ring_prod + 1) & EVTDEV_RING_MASK))
+ minios_printk("xenevt driver ring overflowed!\n");
+ else {
+ xenevt_ring[xenevt_ring_prod] = port;
+ wmb();
+ xenevt_ring_prod = (xenevt_ring_prod + 1) & EVTDEV_RING_MASK;
+ }
+ spin_unlock(&xenevt_ring_lock);
+
+ minios_wake_up(&xenevt_waitq);
+}
+
+/*----- request handling (writes to the device) -----*/
+static int
+xenevt_dev_write(struct file *fp, off_t *offset, struct uio *uio,
+ kauth_cred_t cred, int flags)
+{
+ struct xenevt_dev_data *d = fp->f_data;
+ uint16_t *chans = NULL;
+ int i, nentries, err;
+
+ DPRINTF(("/dev/xenevt: write...\n"));
+
+ if (uio->uio_resid == 0) {
+ err = 0;
+ goto out;
+ }
+
+ nentries = uio->uio_resid / sizeof(uint16_t);
+ if (nentries > NR_EVENT_CHANNELS) {
+ err = EMSGSIZE;
+ goto out;
+ }
+
+ chans = bmk_memcalloc(nentries, sizeof(uint16_t));
+ if (chans == NULL) {
+ err = ENOMEM;
+ goto out;
+ }
+
+ err = uiomove(chans, uio->uio_resid, uio);
+ if (err) goto out;
+
+ mutex_enter(&devevent_lock);
+ for (i = 0; i < nentries; i++) {
+ if (chans[i] < NR_EVENT_CHANNELS &&
+ devevents[chans[i]] == d)
+ minios_unmask_evtchn(chans[i]);
+ }
+ mutex_exit(&devevent_lock);
+
+ err = 0;
+out:
+ DPRINTF(("/dev/xenevt: write done, err=%d\n", err));
+ bmk_memfree(chans);
+ return err;
+}
+
+static int
+xenevt_dev_read(struct file *fp, off_t *offset, struct uio *uio,
+ kauth_cred_t cred, int read_flags)
+{
+ struct xenevt_dev_data *d = fp->f_data;
+ u_int cons, prod, len, uio_len;
+ int err;
+
+ DPRINTF(("/dev/xenevt: read...\n"));
+
+ mutex_enter(&d->lock);
+
+ err = 0;
+ while (err == 0) {
+ cons = d->ring_cons;
+ prod = d->ring_prod;
+
+ if (cons != prod) break; /* data available */
+
+ if (d->flags & EVTDEV_F_OVERFLOW) break;
+
+ /* nothing to read */
+ if ((fp->f_flag & FNONBLOCK) == 0) {
+ int nlocks;
+ DEFINE_WAIT(w);
+
+ minios_add_waiter(w, d->waitq);
+ mutex_exit(&d->lock);
+
+ rumpkern_unsched(&nlocks, 0);
+ bmk_sched();
+ rumpkern_sched(nlocks, 0);
+
+ mutex_enter(&d->lock);
+ minios_remove_waiter(w, d->waitq);
+ } else
+ err = EAGAIN;
+ }
+
+ if (err == 0 && (d->flags & EVTDEV_F_OVERFLOW))
+ err = EFBIG;
+
+ mutex_exit(&d->lock);
+
+ if (err) goto out;
+
+ uio_len = uio->uio_resid / BYTES_PER_PORT;
+ if (cons <= prod)
+ len = prod - cons;
+ else
+ len = EVTDEV_RING_SIZE - cons;
+ if (len > uio_len)
+ len = uio_len;
+ err = uiomove(&d->ring[cons], len * BYTES_PER_PORT, uio);
+ if (err) goto out;
+
+ cons = (cons + len) & EVTDEV_RING_MASK;
+ uio_len = uio->uio_resid / BYTES_PER_PORT;
+ if (uio_len == 0) goto done;
+
+ /* ring wrapped */
+ len = prod - cons;
+ if (len > uio_len)
+ len = uio_len;
+ err = uiomove(&d->ring[cons], len * BYTES_PER_PORT, uio);
+ if (err) goto out;
+ cons = (cons + len) & EVTDEV_RING_MASK;
+
+done:
+ mutex_enter(&d->lock);
+ d->ring_cons = cons;
+ mutex_exit(&d->lock);
+out:
+ DPRINTF(("/dev/xenevt: read done, err=%d\n", err));
+ return err;
+}
+
+/*----- more exciting reading -----*/
+static int
+xenevt_dev_poll(struct file *fp, int events)
+{
+ struct xenevt_dev_data *d = fp->f_data;
+ int revents = 0;
+
+ DPRINTF(("/dev/xenevt: poll events=0x%x...\n", events));
+
+ mutex_enter(&d->lock);
+
+ /* always writable because write is used to unmask event
+ * channel */
+ revents |= events & WBITS;
+
+ if ((events & RBITS) && (d->ring_prod != d->ring_cons))
+ revents |= events & RBITS;
+
+ /* in the case caller only interests in read but no data
+ * available to read */
+ if (!revents && (events & RBITS))
+ selrecord(curlwp, &d->selinfo);
+
+ mutex_exit(&d->lock);
+ DPRINTF(("/dev/xenevt: poll events=0x%x done, revents=0x%x\n",
+ events, revents));
+ return revents;
+}
+
+static int
+xenevt_dev_ioctl(struct file *fp, ulong cmd, void *data)
+{
+ struct xenevt_dev_data *d = fp->f_data;
+ int err;
+
+ switch (cmd) {
+ case IOCTL_EVTCHN_RESET:
+ {
+ mutex_enter(&d->lock);
+ d->ring_cons = d->ring_prod = 0;
+ d->flags = 0;
+ mutex_exit(&d->lock);
+ break;
+ }
+ case IOCTL_EVTCHN_BIND_VIRQ:
+ {
+ struct ioctl_evtchn_bind_virq *bind_virq = data;
+ evtchn_bind_virq_t op;
+
+ op.virq = bind_virq->virq;
+ op.vcpu = 0;
+ if ((err = minios_event_channel_op(EVTCHNOP_bind_virq, &op))) {
+ printf("IOCTL_EVTCHN_BIND_VIRQ failed: virq %d error %d\n",
+ bind_virq->virq, err);
+ return -err;
+ }
+ bind_virq->port = op.port;
+ mutex_enter(&devevent_lock);
+ KASSERT(devevents[bind_virq->port] == NULL);
+ devevents[bind_virq->port] = d;
+ mutex_exit(&devevent_lock);
+ minios_bind_evtchn(bind_virq->port, evtdev_handler, d);
+ minios_unmask_evtchn(bind_virq->port);
+
+ break;
+ }
+ case IOCTL_EVTCHN_BIND_INTERDOMAIN:
+ {
+ struct ioctl_evtchn_bind_interdomain *bind_intd = data;
+ evtchn_bind_interdomain_t op;
+
+ op.remote_dom = bind_intd->remote_domain;
+ op.remote_port = bind_intd->remote_port;
+ if ((err = minios_event_channel_op(EVTCHNOP_bind_interdomain, &op))) {
+ printf("IOCTL_EVTCHN_BIND_INTERDOMAIN failed: "
+ "remote domain %d port %d error %d\n",
+ bind_intd->remote_domain, bind_intd->remote_port, err);
+ return -err;
+ }
+ bind_intd->port = op.local_port;
+ mutex_enter(&devevent_lock);
+ KASSERT(devevents[bind_intd->port] == NULL);
+ devevents[bind_intd->port] = d;
+ mutex_exit(&devevent_lock);
+ minios_bind_evtchn(bind_intd->port, evtdev_handler, d);
+ minios_unmask_evtchn(bind_intd->port);
+
+ break;
+ }
+ case IOCTL_EVTCHN_BIND_UNBOUND_PORT:
+ {
+ struct ioctl_evtchn_bind_unbound_port *bind_unbound = data;
+ evtchn_alloc_unbound_t op;
+
+ op.dom = DOMID_SELF;
+ op.remote_dom = bind_unbound->remote_domain;
+ if ((err = minios_event_channel_op(EVTCHNOP_alloc_unbound, &op))) {
+ printf("IOCTL_EVTCHN_BIND_UNBOUND_PORT failed: "
+ "remote domain %d error %d\n",
+ bind_unbound->remote_domain, err);
+ return -err;
+ }
+ bind_unbound->port = op.port;
+ mutex_enter(&devevent_lock);
+ KASSERT(devevents[bind_unbound->port] == NULL);
+ devevents[bind_unbound->port] = d;
+ mutex_exit(&devevent_lock);
+ minios_bind_evtchn(bind_unbound->port, evtdev_handler, d);
+ minios_unmask_evtchn(bind_unbound->port);
+
+ break;
+ }
+ case IOCTL_EVTCHN_UNBIND:
+ {
+ struct ioctl_evtchn_unbind *unbind = data;
+
+ if (unbind->port > NR_EVENT_CHANNELS)
+ return EINVAL;
+ mutex_enter(&devevent_lock);
+ if (devevents[unbind->port] != d) {
+ mutex_exit(&devevent_lock);
+ return ENOTCONN;
+ }
+ devevents[unbind->port] = NULL;
+ mutex_exit(&devevent_lock);
+ minios_mask_evtchn(unbind->port);
+ minios_unbind_evtchn(unbind->port);
+
+ break;
+ }
+ case IOCTL_EVTCHN_NOTIFY:
+ {
+ struct ioctl_evtchn_notify *notify = data;
+
+ if (notify->port > NR_EVENT_CHANNELS)
+ return EINVAL;
+ mutex_enter(&devevent_lock);
+ if (devevents[notify->port] != d) {
+ mutex_exit(&devevent_lock);
+ return ENOTCONN;
+ }
+ minios_notify_remote_via_evtchn(notify->port);
+ mutex_exit(&devevent_lock);
+
+ break;
+ }
+ default:
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+/*----- setup etc. -----*/
+
+static int
+xenevt_dev_close(struct file *fp)
+{
+ struct xenevt_dev_data *d = fp->f_data;
+ int i;
+
+ DPRINTF(("/dev/xenevt: close...\n"));
+
+ mutex_enter(&devevent_lock);
+ mutex_enter(&d->lock);
+ for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+ if (devevents[i] == d) {
+ minios_unbind_evtchn(i);
+ devevents[i] = NULL;
+ }
+ }
+ mutex_exit(&d->lock);
+ mutex_exit(&devevent_lock);
+
+ seldestroy(&d->selinfo);
+ mutex_destroy(&d->lock);
+ bmk_memfree(d);
+
+ DPRINTF(("/dev/xenevt: close done.\n"));
+
+ fp->f_data = NULL;
+
+ return 0;
+}
+
+const struct fileops xenevt_dev_fileops = {
+ .fo_read = xenevt_dev_read,
+ .fo_write = xenevt_dev_write,
+ .fo_ioctl = xenevt_dev_ioctl,
+ .fo_fcntl = fnullop_fcntl,
+ .fo_poll = xenevt_dev_poll,
+ .fo_stat = fbadop_stat,
+ .fo_close = xenevt_dev_close,
+ .fo_kqfilter = fnullop_kqfilter,
+ .fo_restart = fnullop_restart,
+};
+
+int
+xenevt_dev_open(struct file *fp, void **fdata_r)
+{
+ struct xenevt_dev_data *d;
+
+ d = bmk_memcalloc(1, sizeof(*d));
+ if (!d)
+ return ENOMEM;
+
+ memset(d, 0, sizeof(*d));
+
+ mutex_init(&d->lock, MUTEX_DEFAULT, IPL_HIGH);
+ selinit(&d->selinfo);
+ minios_init_waitqueue_head(&d->waitq);
+
+ *fdata_r = d;
+ return 0;
+}
+
+void xenevt_dev_init(void)
+{
+ mutex_init(&devevent_lock, MUTEX_DEFAULT, IPL_HIGH);
+ bmk_sched_create("xenevt", NULL, 0, xenevt_thread_func, NULL,
+ NULL, 0);
+}
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
--- /dev/null
+/* $NetBSD: xenio3.h,v 1.3 2010/09/03 06:07:24 cegger Exp $ */
+/******************************************************************************
+ * evtchn.h
+ *
+ * Interface to /dev/xen/evtchn.
+ *
+ * Copyright (c) 2003-2005, K A Fraser
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_XENIO3_H__
+#define __XEN_XENIO3_H__
+
+/*
+ * Bind a fresh port to VIRQ @virq.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_VIRQ \
+ _IOWR('E', 4, struct ioctl_evtchn_bind_virq)
+struct ioctl_evtchn_bind_virq {
+ unsigned int virq;
+ unsigned int port;
+};
+
+/*
+ * Bind a fresh port to remote <@remote_domain, @remote_port>.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_INTERDOMAIN \
+ _IOWR('E', 5, struct ioctl_evtchn_bind_interdomain)
+struct ioctl_evtchn_bind_interdomain {
+ unsigned int remote_domain, remote_port;
+ unsigned int port;
+};
+
+/*
+ * Allocate a fresh port for binding to @remote_domain.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \
+ _IOWR('E', 6, struct ioctl_evtchn_bind_unbound_port)
+struct ioctl_evtchn_bind_unbound_port {
+ unsigned int remote_domain;
+ unsigned int port;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_UNBIND \
+ _IOW('E', 7, struct ioctl_evtchn_unbind)
+struct ioctl_evtchn_unbind {
+ unsigned int port;
+};
+
+/*
+ * Send event to previously allocated @port.
+ */
+#define IOCTL_EVTCHN_NOTIFY \
+ _IOW('E', 8, struct ioctl_evtchn_notify)
+struct ioctl_evtchn_notify {
+ unsigned int port;
+};
+
+/* Clear and reinitialise the event buffer. Clear error condition. */
+#define IOCTL_EVTCHN_RESET \
+ _IO('E', 9)
+
+#endif /* __XEN_XENIO3_H__ */