From: Wei Liu Date: Mon, 20 Apr 2015 16:35:54 +0000 (+0100) Subject: xen: introduce xenevt device X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=refs%2Fheads%2Fwip.xenevt;p=people%2Fliuw%2Frumprun.git xen: introduce xenevt device Expose a userspace event channel device so that applications can use Xen's event channel machinery. Signed-off-by: Wei Liu --- diff --git a/platform/xen/rumpxendev/Makefile b/platform/xen/rumpxendev/Makefile index 177b6e4..d68fe3b 100644 --- a/platform/xen/rumpxendev/Makefile +++ b/platform/xen/rumpxendev/Makefile @@ -2,6 +2,7 @@ LIB= rumpxen_xendev SRCS= xendev_component.c SRCS+= busdev.c +SRCS+= evtdev.c RUMPTOP= ${TOPRUMP} diff --git a/platform/xen/rumpxendev/evtdev.c b/platform/xen/rumpxendev/evtdev.c new file mode 100644 index 0000000..0a82283 --- /dev/null +++ b/platform/xen/rumpxendev/evtdev.c @@ -0,0 +1,507 @@ +/* + * evtdev.c + * + * Driver giving user-space access to the kernel's event channel. + * + * Copyright (c) 2015 Wei Liu + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: $"); + +#include "rumpxen_xendev.h" + +#include +#include + +#pragma GCC diagnostic ignored "-Wcast-qual" +/* mini-os/os.h has some bad casts */ +#include +#include +#pragma GCC diagnostic error "-Wcast-qual" + +/* For ioctl interface. */ +#include "xenio3.h" + +/*----- data structures -----*/ +#define EVTDEV_RING_SIZE 2048 +#define EVTDEV_RING_MASK 2047 +#define BYTES_PER_PORT (sizeof(evtchn_port_t) / sizeof(uint8_t)) + +/* interrupt handler queues events here */ +/* protect against multiple writer (multiple events triggers at the + * same time) there is only one reader, the worker thread.*/ +static DECLARE_WAIT_QUEUE_HEAD(xenevt_waitq); +static spinlock_t xenevt_ring_lock = SPIN_LOCK_UNLOCKED; +static evtchn_port_t xenevt_ring[EVTDEV_RING_SIZE]; +static u_int xenevt_ring_prod, xenevt_ring_cons; + +struct xenevt_dev_data { + evtchn_port_t ring[EVTDEV_RING_SIZE]; + u_int ring_cons; + u_int ring_prod; +#define EVTDEV_F_OVERFLOW 0x1 /* ring overflow */ + u_int flags; + + kmutex_t lock; + struct selinfo selinfo; /* used by poll, see select(9) */ + struct wait_queue_head waitq; +}; + +/* Kernel event -> device instance mapping */ +static kmutex_t devevent_lock; +static struct xenevt_dev_data *devevents[NR_EVENT_CHANNELS]; + +/*----- helpers -----*/ +#define WBITS (POLLOUT | POLLWRNORM) +#define RBITS (POLLIN | POLLRDNORM) + +/* call with d->lock held */ +static void queue(struct xenevt_dev_data *d, evtchn_port_t port) +{ + if (d->ring_cons == ((d->ring_prod + 1) & EVTDEV_RING_MASK)) { + d->flags |= EVTDEV_RING_MASK; + printf("evtdev: ring overflow port %d\n", port); + } else { + d->ring[d->ring_prod] = port; + wmb(); + d->ring_prod = (d->ring_prod + 1) & EVTDEV_RING_MASK; + } + /* notify */ + minios_wake_up(&d->waitq); + selnotify(&d->selinfo, RBITS, NOTE_SUBMIT); +} + +static void xenevt_thread_func(void *ign) +{ + u_int prod = xenevt_ring_prod; + u_int cons; + + for (;;) { + minios_wait_event(xenevt_waitq, xenevt_ring_prod != prod); + prod = xenevt_ring_prod; + cons = xenevt_ring_cons; + + mb(); + + while (cons != prod) { + evtchn_port_t port = xenevt_ring[cons]; + struct xenevt_dev_data *d; + + KASSERT(port < NR_EVENT_CHANNELS); + + mutex_enter(&devevent_lock); + + d = devevents[port]; + + KASSERT(d); + + mutex_enter(&d->lock); + + queue(d, port); + + mutex_exit(&d->lock); + mutex_exit(&devevent_lock); + + cons++; + } + + mb(); + + xenevt_ring_cons = cons; + } +} + +static void evtdev_handler(evtchn_port_t port, struct pt_regs * regs, + void *data) +{ + spin_lock(&xenevt_ring_lock); + + if (xenevt_ring_cons == ((xenevt_ring_prod + 1) & EVTDEV_RING_MASK)) + minios_printk("xenevt driver ring overflowed!\n"); + else { + xenevt_ring[xenevt_ring_prod] = port; + wmb(); + xenevt_ring_prod = (xenevt_ring_prod + 1) & EVTDEV_RING_MASK; + } + spin_unlock(&xenevt_ring_lock); + + minios_wake_up(&xenevt_waitq); +} + +/*----- request handling (writes to the device) -----*/ +static int +xenevt_dev_write(struct file *fp, off_t *offset, struct uio *uio, + kauth_cred_t cred, int flags) +{ + struct xenevt_dev_data *d = fp->f_data; + uint16_t *chans = NULL; + int i, nentries, err; + + DPRINTF(("/dev/xenevt: write...\n")); + + if (uio->uio_resid == 0) { + err = 0; + goto out; + } + + nentries = uio->uio_resid / sizeof(uint16_t); + if (nentries > NR_EVENT_CHANNELS) { + err = EMSGSIZE; + goto out; + } + + chans = bmk_memcalloc(nentries, sizeof(uint16_t)); + if (chans == NULL) { + err = ENOMEM; + goto out; + } + + err = uiomove(chans, uio->uio_resid, uio); + if (err) goto out; + + mutex_enter(&devevent_lock); + for (i = 0; i < nentries; i++) { + if (chans[i] < NR_EVENT_CHANNELS && + devevents[chans[i]] == d) + minios_unmask_evtchn(chans[i]); + } + mutex_exit(&devevent_lock); + + err = 0; +out: + DPRINTF(("/dev/xenevt: write done, err=%d\n", err)); + bmk_memfree(chans); + return err; +} + +static int +xenevt_dev_read(struct file *fp, off_t *offset, struct uio *uio, + kauth_cred_t cred, int read_flags) +{ + struct xenevt_dev_data *d = fp->f_data; + u_int cons, prod, len, uio_len; + int err; + + DPRINTF(("/dev/xenevt: read...\n")); + + mutex_enter(&d->lock); + + err = 0; + while (err == 0) { + cons = d->ring_cons; + prod = d->ring_prod; + + if (cons != prod) break; /* data available */ + + if (d->flags & EVTDEV_F_OVERFLOW) break; + + /* nothing to read */ + if ((fp->f_flag & FNONBLOCK) == 0) { + int nlocks; + DEFINE_WAIT(w); + + minios_add_waiter(w, d->waitq); + mutex_exit(&d->lock); + + rumpkern_unsched(&nlocks, 0); + bmk_sched(); + rumpkern_sched(nlocks, 0); + + mutex_enter(&d->lock); + minios_remove_waiter(w, d->waitq); + } else + err = EAGAIN; + } + + if (err == 0 && (d->flags & EVTDEV_F_OVERFLOW)) + err = EFBIG; + + mutex_exit(&d->lock); + + if (err) goto out; + + uio_len = uio->uio_resid / BYTES_PER_PORT; + if (cons <= prod) + len = prod - cons; + else + len = EVTDEV_RING_SIZE - cons; + if (len > uio_len) + len = uio_len; + err = uiomove(&d->ring[cons], len * BYTES_PER_PORT, uio); + if (err) goto out; + + cons = (cons + len) & EVTDEV_RING_MASK; + uio_len = uio->uio_resid / BYTES_PER_PORT; + if (uio_len == 0) goto done; + + /* ring wrapped */ + len = prod - cons; + if (len > uio_len) + len = uio_len; + err = uiomove(&d->ring[cons], len * BYTES_PER_PORT, uio); + if (err) goto out; + cons = (cons + len) & EVTDEV_RING_MASK; + +done: + mutex_enter(&d->lock); + d->ring_cons = cons; + mutex_exit(&d->lock); +out: + DPRINTF(("/dev/xenevt: read done, err=%d\n", err)); + return err; +} + +/*----- more exciting reading -----*/ +static int +xenevt_dev_poll(struct file *fp, int events) +{ + struct xenevt_dev_data *d = fp->f_data; + int revents = 0; + + DPRINTF(("/dev/xenevt: poll events=0x%x...\n", events)); + + mutex_enter(&d->lock); + + /* always writable because write is used to unmask event + * channel */ + revents |= events & WBITS; + + if ((events & RBITS) && (d->ring_prod != d->ring_cons)) + revents |= events & RBITS; + + /* in the case caller only interests in read but no data + * available to read */ + if (!revents && (events & RBITS)) + selrecord(curlwp, &d->selinfo); + + mutex_exit(&d->lock); + DPRINTF(("/dev/xenevt: poll events=0x%x done, revents=0x%x\n", + events, revents)); + return revents; +} + +static int +xenevt_dev_ioctl(struct file *fp, ulong cmd, void *data) +{ + struct xenevt_dev_data *d = fp->f_data; + int err; + + switch (cmd) { + case IOCTL_EVTCHN_RESET: + { + mutex_enter(&d->lock); + d->ring_cons = d->ring_prod = 0; + d->flags = 0; + mutex_exit(&d->lock); + break; + } + case IOCTL_EVTCHN_BIND_VIRQ: + { + struct ioctl_evtchn_bind_virq *bind_virq = data; + evtchn_bind_virq_t op; + + op.virq = bind_virq->virq; + op.vcpu = 0; + if ((err = minios_event_channel_op(EVTCHNOP_bind_virq, &op))) { + printf("IOCTL_EVTCHN_BIND_VIRQ failed: virq %d error %d\n", + bind_virq->virq, err); + return -err; + } + bind_virq->port = op.port; + mutex_enter(&devevent_lock); + KASSERT(devevents[bind_virq->port] == NULL); + devevents[bind_virq->port] = d; + mutex_exit(&devevent_lock); + minios_bind_evtchn(bind_virq->port, evtdev_handler, d); + minios_unmask_evtchn(bind_virq->port); + + break; + } + case IOCTL_EVTCHN_BIND_INTERDOMAIN: + { + struct ioctl_evtchn_bind_interdomain *bind_intd = data; + evtchn_bind_interdomain_t op; + + op.remote_dom = bind_intd->remote_domain; + op.remote_port = bind_intd->remote_port; + if ((err = minios_event_channel_op(EVTCHNOP_bind_interdomain, &op))) { + printf("IOCTL_EVTCHN_BIND_INTERDOMAIN failed: " + "remote domain %d port %d error %d\n", + bind_intd->remote_domain, bind_intd->remote_port, err); + return -err; + } + bind_intd->port = op.local_port; + mutex_enter(&devevent_lock); + KASSERT(devevents[bind_intd->port] == NULL); + devevents[bind_intd->port] = d; + mutex_exit(&devevent_lock); + minios_bind_evtchn(bind_intd->port, evtdev_handler, d); + minios_unmask_evtchn(bind_intd->port); + + break; + } + case IOCTL_EVTCHN_BIND_UNBOUND_PORT: + { + struct ioctl_evtchn_bind_unbound_port *bind_unbound = data; + evtchn_alloc_unbound_t op; + + op.dom = DOMID_SELF; + op.remote_dom = bind_unbound->remote_domain; + if ((err = minios_event_channel_op(EVTCHNOP_alloc_unbound, &op))) { + printf("IOCTL_EVTCHN_BIND_UNBOUND_PORT failed: " + "remote domain %d error %d\n", + bind_unbound->remote_domain, err); + return -err; + } + bind_unbound->port = op.port; + mutex_enter(&devevent_lock); + KASSERT(devevents[bind_unbound->port] == NULL); + devevents[bind_unbound->port] = d; + mutex_exit(&devevent_lock); + minios_bind_evtchn(bind_unbound->port, evtdev_handler, d); + minios_unmask_evtchn(bind_unbound->port); + + break; + } + case IOCTL_EVTCHN_UNBIND: + { + struct ioctl_evtchn_unbind *unbind = data; + + if (unbind->port > NR_EVENT_CHANNELS) + return EINVAL; + mutex_enter(&devevent_lock); + if (devevents[unbind->port] != d) { + mutex_exit(&devevent_lock); + return ENOTCONN; + } + devevents[unbind->port] = NULL; + mutex_exit(&devevent_lock); + minios_mask_evtchn(unbind->port); + minios_unbind_evtchn(unbind->port); + + break; + } + case IOCTL_EVTCHN_NOTIFY: + { + struct ioctl_evtchn_notify *notify = data; + + if (notify->port > NR_EVENT_CHANNELS) + return EINVAL; + mutex_enter(&devevent_lock); + if (devevents[notify->port] != d) { + mutex_exit(&devevent_lock); + return ENOTCONN; + } + minios_notify_remote_via_evtchn(notify->port); + mutex_exit(&devevent_lock); + + break; + } + default: + return EINVAL; + } + + return 0; +} + +/*----- setup etc. -----*/ + +static int +xenevt_dev_close(struct file *fp) +{ + struct xenevt_dev_data *d = fp->f_data; + int i; + + DPRINTF(("/dev/xenevt: close...\n")); + + mutex_enter(&devevent_lock); + mutex_enter(&d->lock); + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + if (devevents[i] == d) { + minios_unbind_evtchn(i); + devevents[i] = NULL; + } + } + mutex_exit(&d->lock); + mutex_exit(&devevent_lock); + + seldestroy(&d->selinfo); + mutex_destroy(&d->lock); + bmk_memfree(d); + + DPRINTF(("/dev/xenevt: close done.\n")); + + fp->f_data = NULL; + + return 0; +} + +const struct fileops xenevt_dev_fileops = { + .fo_read = xenevt_dev_read, + .fo_write = xenevt_dev_write, + .fo_ioctl = xenevt_dev_ioctl, + .fo_fcntl = fnullop_fcntl, + .fo_poll = xenevt_dev_poll, + .fo_stat = fbadop_stat, + .fo_close = xenevt_dev_close, + .fo_kqfilter = fnullop_kqfilter, + .fo_restart = fnullop_restart, +}; + +int +xenevt_dev_open(struct file *fp, void **fdata_r) +{ + struct xenevt_dev_data *d; + + d = bmk_memcalloc(1, sizeof(*d)); + if (!d) + return ENOMEM; + + memset(d, 0, sizeof(*d)); + + mutex_init(&d->lock, MUTEX_DEFAULT, IPL_HIGH); + selinit(&d->selinfo); + minios_init_waitqueue_head(&d->waitq); + + *fdata_r = d; + return 0; +} + +void xenevt_dev_init(void) +{ + mutex_init(&devevent_lock, MUTEX_DEFAULT, IPL_HIGH); + bmk_sched_create("xenevt", NULL, 0, xenevt_thread_func, NULL, + NULL, 0); +} + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff --git a/platform/xen/rumpxendev/rumpxen_xendev.h b/platform/xen/rumpxendev/rumpxen_xendev.h index d995738..15f755c 100644 --- a/platform/xen/rumpxendev/rumpxen_xendev.h +++ b/platform/xen/rumpxendev/rumpxen_xendev.h @@ -58,7 +58,9 @@ extern int xenbus_dev_open(struct file *fp, void **fdata); extern void xenbus_dev_init(void); extern const struct fileops xenbus_dev_fileops; - +extern void xenevt_dev_init(void); +extern int xenevt_dev_open(struct file *fp, void **fdata); +extern const struct fileops xenevt_dev_fileops; static inline void* xbd_malloc(size_t sz) diff --git a/platform/xen/rumpxendev/xendev_component.c b/platform/xen/rumpxendev/xendev_component.c index 8ce2c24..62f8746 100644 --- a/platform/xen/rumpxendev/xendev_component.c +++ b/platform/xen/rumpxendev/xendev_component.c @@ -57,6 +57,7 @@ static const struct xen_dev_info { [cmin] = { path, component##_dev_init, component##_dev_open, \ &component##_dev_fileops } XDEV(0, DEV_XEN "/xenbus", xenbus), + XDEV(1, "/dev/xenevt", xenevt), #undef XDEV }; diff --git a/platform/xen/rumpxendev/xenio3.h b/platform/xen/rumpxendev/xenio3.h new file mode 100644 index 0000000..319b47b --- /dev/null +++ b/platform/xen/rumpxendev/xenio3.h @@ -0,0 +1,89 @@ +/* $NetBSD: xenio3.h,v 1.3 2010/09/03 06:07:24 cegger Exp $ */ +/****************************************************************************** + * evtchn.h + * + * Interface to /dev/xen/evtchn. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __XEN_XENIO3_H__ +#define __XEN_XENIO3_H__ + +/* + * Bind a fresh port to VIRQ @virq. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOWR('E', 4, struct ioctl_evtchn_bind_virq) +struct ioctl_evtchn_bind_virq { + unsigned int virq; + unsigned int port; +}; + +/* + * Bind a fresh port to remote <@remote_domain, @remote_port>. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOWR('E', 5, struct ioctl_evtchn_bind_interdomain) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; + unsigned int port; +}; + +/* + * Allocate a fresh port for binding to @remote_domain. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOWR('E', 6, struct ioctl_evtchn_bind_unbound_port) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; + unsigned int port; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_UNBIND \ + _IOW('E', 7, struct ioctl_evtchn_unbind) +struct ioctl_evtchn_unbind { + unsigned int port; +}; + +/* + * Send event to previously allocated @port. + */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOW('E', 8, struct ioctl_evtchn_notify) +struct ioctl_evtchn_notify { + unsigned int port; +}; + +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IO('E', 9) + +#endif /* __XEN_XENIO3_H__ */