]> xenbits.xensource.com Git - people/julieng/freebsd.git/commitdiff
xen/console: Introduce a new console driver for Xen guest
authorJulien Grall <julien.grall@citrix.com>
Sun, 20 Sep 2015 10:49:15 +0000 (11:49 +0100)
committerJulien Grall <julien.grall@citrix.com>
Sat, 3 Oct 2015 18:37:13 +0000 (19:37 +0100)
The current Xen console driver is crashing very quickly when using it on ARM
guest. This is because of the console lock is recursive which may lead to
recurse on the tty lock and/or corrupt the ring pointer.

Furthermore, the console lock is not always taken where it should be and has
to be released too early because of the way the console has been designed.

Over the year, code has been added to support various new feature but the
driver has not been reworked. This brings to have code related to the
hypervisor console in ring specific function...

This new driver has been rewritten with this idea to only
have a small set of specific function to write either via the ring or the
hypercall.

Note that HVM support has been left aside for now because it requires external
feature to be used on ARM which are not yet upstreamed. A follow-up patch will
be sent with the ARM guest support.

This new console driver will be added in the build in the following patch.. It
has been divided to help reviewing.

List of items that may be good to have but not mandatory:
- Avoid to flush for each character written when using the tty.
- Use a ops structure to distinguish hypervisor vs ring helpers
- Support multiple console

sys/dev/xen/console/xen_console.c [new file with mode: 0644]

diff --git a/sys/dev/xen/console/xen_console.c b/sys/dev/xen/console/xen_console.c
new file mode 100644 (file)
index 0000000..3c1ed8c
--- /dev/null
@@ -0,0 +1,727 @@
+/*
+ * Copyright (c) 2015 Julien Grall <julien.grall@citrix.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/tty.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <machine/stdarg.h>
+#include <xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+#include <sys/cons.h>
+#include <sys/kdb.h>
+#include <sys/proc.h>
+
+#include <xen/interface/io/console.h>
+
+#include "opt_ddb.h"
+#include "opt_printf.h"
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+static char driver_name[] = "xc";
+
+struct xencons_priv {
+       /* Mutex to protect the shared ring and the internal buffers */
+       struct mtx                      mtx;
+       /* Interrupt handler used for notify the backend */
+       xen_intr_handle_t               intr_handle;
+       /* KDB internal state */
+#ifdef KDB
+       int                             altbrk;
+#endif
+       /* Status of the tty */
+       bool                            opened;
+       /* Callout used when the write buffer is full */
+       struct callout                  callout;
+
+       /* Internal buffers must be used with mtx locked */
+#define WBUF_SIZE     4096
+#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1))
+       char                            wbuf[WBUF_SIZE];
+       unsigned int                    wc, wp; /* Consumer/producer wbuf */
+
+#define RBUF_SIZE     1024
+#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1))
+       char                            rbuf[RBUF_SIZE];
+       unsigned int                    rc, rp; /* Consumer/producer rbuf */
+
+       /*
+        * Ring specific fields
+        * XXX: make an union?
+        */
+       /* Event channel number for early notification (PV only) */
+       uint32_t                        evtchn;
+       /* Console shared page */
+       struct xencons_interface        *intf;
+};
+
+/*
+ * Macros to use the console mutex
+ *
+ * The lock is not used when the kernel is panicing as it will never recover
+ * and we want to output no matter it costs.
+ */
+#define CN_LOCK(cons)                                                  \
+               do {                                                    \
+                       if (panicstr == NULL)                           \
+                               mtx_lock_spin(&(cons)->mtx);            \
+               } while (0)
+#define CN_UNLOCK(cons)                                                        \
+               do {                                                    \
+                       if (panicstr == NULL)                           \
+                               mtx_unlock_spin(&(cons)->mtx);          \
+               } while (0)
+#define CN_LOCK_ASSERT(cons)   mtx_assert(&(cons)->mtx, MA_OWNED)
+#define CN_LOCK_DESTROY(cons)  mtx_destroy(&(cons)->mtx)
+
+/*
+ * Data for the main console
+ * Necessary to support low-level console driver
+ */
+static struct xencons_priv main_cons;
+
+static void xencons_intr(void *arg);
+
+#define XC_POLLTIME    (hz/10)
+
+/*
+ * Virtual address of the shared console page (only for PV guest)
+ * TODO: Introduce a function to set it
+ */
+char *console_page;
+
+/*----------------------------- Debug function ------------------------------*/
+struct putchar_arg {
+       char    *buf;
+       size_t  size;
+       size_t  n_next;
+};
+
+static void
+putchar(int c, void *arg)
+{
+       struct putchar_arg *pca;
+
+       pca = (struct putchar_arg *)arg;
+
+       if (pca->buf == NULL) {
+               /*
+                * We have no buffer, output directly to the
+                * console char by char.
+                */
+               HYPERVISOR_console_write((char *)&c, 1);
+       } else {
+               pca->buf[pca->n_next++] = c;
+               if ((pca->size == pca->n_next) || (c = '\0')) {
+                       /* Flush the buffer */
+                       HYPERVISOR_console_write(pca->buf, pca->n_next);
+                       pca->n_next = 0;
+               }
+       }
+}
+
+void
+xc_printf(const char *fmt, ...)
+{
+       va_list ap;
+       struct putchar_arg pca;
+#ifdef PRINTF_BUFR_SIZE
+       char buf[PRINTF_BUFR_SIZE];
+
+       pca.buf = buf;
+       pca.size = sizeof(buf);
+       pca.n_next = 0;
+#else
+       pca.buf = NULL;
+       pca.size = 0;
+#endif
+
+       KASSERT((xen_domain()), ("call to xc_printf from non Xen guest"));
+
+       va_start(ap, fmt);
+       kvprintf(fmt, putchar, &pca, 10, ap);
+       va_end(ap);
+
+#ifdef PRINTF_BUFR_SIZE
+       if (pca.n_next != 0)
+               HYPERVISOR_console_write(buf, pca.n_next);
+#endif
+}
+
+/*------------------ Helpers for the hypervisor console ---------------------*/
+static void
+xencons_early_init_hypervisor(struct xencons_priv *cons)
+{
+       /*
+        * Nothing to setup for the low-level console when using
+        * the hypervisor console.
+        */
+}
+
+static int
+xencons_init_hypervisor(device_t dev, struct tty *tp)
+{
+       struct xencons_priv *cons = tty_softc(tp);
+       int err;
+
+       err = xen_intr_bind_virq(dev, VIRQ_CONSOLE, 0, NULL,
+                                xencons_intr, tp,
+                                INTR_TYPE_TTY, &cons->intr_handle);
+       if (err)
+               printf("Can't register console interrupt\n");
+
+       return err;
+}
+
+static int
+xencons_write_hypervisor(struct xencons_priv *cons, const char *buffer,
+                        unsigned int size)
+{
+       HYPERVISOR_console_io(CONSOLEIO_write, size, buffer);
+
+       return size;
+}
+
+static int
+xencons_read_hypervisor(struct xencons_priv *cons, const char *buffer,
+                       unsigned int size)
+{
+       CN_LOCK_ASSERT(cons);
+
+       return HYPERVISOR_console_io(CONSOLEIO_read, size, buffer);
+}
+
+/*------------------ Helpers for the ring console ---------------------------*/
+static void
+xencons_early_init_ring(struct xencons_priv *cons)
+{
+       /* The shared page for PV is already mapped by the boot code */
+       cons->intf = (struct xencons_interface *)console_page;
+       cons->evtchn = HYPERVISOR_start_info->console.domU.evtchn;
+}
+
+static int
+xencons_init_ring(device_t dev, struct tty *tp)
+{
+       struct xencons_priv *cons = tty_softc(tp);
+       int err;
+
+       if (!cons->evtchn)
+               return (ENODEV);
+
+       err = xen_intr_bind_local_port(dev, cons->evtchn, NULL,
+                                      xencons_intr, tp,
+                                      INTR_TYPE_MISC | INTR_MPSAFE,
+                                      &cons->intr_handle);
+       if (err != 0)
+               return (err);
+
+       return 0;
+}
+
+static void
+xencons_notify_ring(struct xencons_priv *cons)
+{
+       /*
+        * The console may be used before the ring interrupt is properly
+        * initialized.
+        * If so, fallback to directly use the event channel hypercall.
+        */
+       if (__predict_true(cons->intr_handle != NULL))
+               xen_intr_signal(cons->intr_handle);
+       else {
+               struct evtchn_send send = {
+                       .port = cons->evtchn
+               };
+
+               HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
+       }
+}
+
+static int
+xencons_write_ring(struct xencons_priv *cons, const char *buffer,
+                  unsigned int size)
+{
+       struct xencons_interface *intf = cons->intf;;
+       XENCONS_RING_IDX wcons, wprod;
+       int sent;
+
+       CN_LOCK_ASSERT(cons);
+
+       wcons = intf->out_cons;
+       wprod = intf->out_prod;
+       sent = 0;
+
+       mb();
+       KASSERT((wprod - wcons) <= sizeof(intf->out),
+               ("console send ring inconsistent"));
+
+       for (sent = 0; sent < size; sent++, wprod++) {
+               if ((wprod - wcons) >= sizeof(intf->out))
+                       break;
+               intf->out[MASK_XENCONS_IDX(wprod, intf->out)] = buffer[sent];
+       }
+
+       wmb();
+       intf->out_prod = wprod;
+
+       xencons_notify_ring(cons);
+
+       return sent;
+}
+
+static int
+xencons_read_ring(struct xencons_priv *cons, char *buffer, unsigned int size)
+{
+       struct xencons_interface *intf = cons->intf;
+       XENCONS_RING_IDX rcons, rprod;
+       unsigned int rsz;
+
+       CN_LOCK_ASSERT(cons);
+
+       rcons = intf->in_cons;
+       rprod = intf->in_prod;
+       rmb();
+
+       for (rsz = 0; rsz < size; rsz++, rcons++) {
+               if (rprod == rcons)
+                       break;
+               buffer[rsz] = intf->in[MASK_XENCONS_IDX(rcons, intf->in)];
+       }
+
+       wmb();
+       intf->in_cons = rcons;
+
+       /* No need to notify the backend if nothing has been read */
+       if (rsz != 0)
+               xencons_notify_ring(cons);
+
+       return rsz;
+}
+
+/*------------------ Wrappers to choose the way to output -------------------*/
+
+/*
+ * Called by the low-level driver during early boot to initialize the
+ * main console driver.
+ * Only the minimal set up to get a console should be done here.
+ */
+static void
+xencons_early_init(void)
+{
+       mtx_init(&main_cons.mtx, "XCONS LOCK", NULL, MTX_SPIN);
+
+       if (xen_initial_domain())
+               xencons_early_init_hypervisor(&main_cons);
+       else
+               xencons_early_init_ring(&main_cons);
+}
+
+static int
+xencons_init(device_t dev, struct tty *tp)
+{
+       if (xen_initial_domain())
+               return xencons_init_hypervisor(dev, tp);
+       else
+               return xencons_init_ring(dev, tp);
+}
+
+static int
+xencons_write(struct xencons_priv *cons, const char *buffer, unsigned int size)
+{
+       if (xen_initial_domain())
+               return xencons_write_hypervisor(cons, buffer, size);
+       else
+               return xencons_write_ring(cons, buffer, size);
+}
+
+static int
+xencons_read(struct xencons_priv *cons, char *buffer, unsigned int size)
+{
+       if (xen_initial_domain())
+               return xencons_read_hypervisor(cons, buffer, size);
+       else
+               return xencons_read_ring(cons, buffer, size);
+}
+
+/*
+ * Receive character from the console and put them in the internal buffer
+ * XXX: Handle overflow of the internal buffer
+ */
+static void
+xencons_rx(struct xencons_priv *cons)
+{
+       static char buf[16];
+       int sz;
+
+       CN_LOCK(cons);
+       while ((sz = xencons_read(cons, buf, sizeof(buf))) > 0) {
+               int i;
+
+               for (i = 0; i < sz; i++)
+                       cons->rbuf[RBUF_MASK(cons->rp++)] = buf[i];
+       }
+       CN_UNLOCK(cons);
+}
+
+/* Return true if the write buffer is full */
+static bool
+xencons_tx_full(struct xencons_priv *cons)
+{
+       unsigned int used;
+
+       CN_LOCK(cons);
+       used = cons->wp - cons->wc;
+       CN_UNLOCK(cons);
+
+       return (used >= WBUF_SIZE);
+}
+
+static void
+xencons_tx_flush(struct xencons_priv *cons, int force)
+{
+       int        sz;
+
+       CN_LOCK(cons);
+       while (cons->wc != cons->wp) {
+               int sent;
+               sz = cons->wp - cons->wc;
+               if (sz > (WBUF_SIZE - WBUF_MASK(cons->wc)))
+                       sz = WBUF_SIZE - WBUF_MASK(cons->wc);
+               sent = xencons_write(cons, &cons->wbuf[WBUF_MASK(cons->wc)],
+                                    sz);
+
+               /*
+                * The other end may not have been initialized. Ignore
+                * the force.
+                */
+               if (__predict_false(sent < 0))
+                       break;
+
+               /*
+                * If force is set, spin until the console data is
+                * flushed through the domain controller.
+                */
+               if (sent == 0 && __predict_true(!force))
+                       break;
+
+               cons->wc += sent;
+       }
+       CN_UNLOCK(cons);
+}
+
+static bool
+xencons_putc(struct xencons_priv *cons, int c, bool force_flush)
+{
+       CN_LOCK(cons);
+       if ((cons->wp - cons->wc) < WBUF_SIZE)
+               cons->wbuf[WBUF_MASK(cons->wp++)] = c;
+       CN_UNLOCK(cons);
+
+       xencons_tx_flush(cons, force_flush);
+
+       return xencons_tx_full(cons);
+}
+
+static int
+xencons_getc(struct xencons_priv *cons)
+{
+       int ret;
+
+       CN_LOCK(cons);
+       if (cons->rp != cons->rc) {
+               /* We need to return only one char */
+               ret = (int)cons->rbuf[RBUF_MASK(cons->rc)];
+               cons->rc++;
+       }
+       else
+               ret = -1;
+       CN_UNLOCK(cons);
+
+       return ret;
+}
+
+static bool
+xencons_tx(struct tty *tp)
+{
+
+       bool cons_full = false;
+       char c;
+       struct xencons_priv *cons = tty_softc(tp);
+
+       tty_lock_assert(tp, MA_OWNED);
+
+       /*
+        * Don't transmit any character if the buffer is full. Otherwise,
+        * characters may be lost
+        */
+       if (xencons_tx_full(cons))
+               return false;
+
+       while (ttydisc_getc(tp, &c, 1) == 1 && !cons_full)
+               cons_full = xencons_putc(cons, c, false);
+
+       return !cons_full;
+}
+
+/*
+ * Helpers to call while shutting down:
+ *     - Force flush all output
+ */
+static void
+xencons_shutdown(void *arg, int howto)
+{
+       struct tty *tp = arg;
+       struct xencons_priv *cons = tty_softc(tp);
+
+       xencons_tx_flush(cons, 1);
+}
+
+/*---------------------- Low-level console driver ---------------------------*/
+static void
+xc_cnprobe(struct consdev *cp)
+{
+       if (!xen_pv_domain())
+               return;
+
+       cp->cn_pri = CN_REMOTE;
+       sprintf(cp->cn_name, "%s0", driver_name);
+}
+
+static void
+xc_cninit(struct consdev *cp)
+{
+       xencons_early_init();
+}
+
+static void
+xc_cnterm(struct consdev *cp)
+{
+}
+
+static void
+xc_cngrab(struct consdev *cp)
+{
+}
+
+static void
+xc_cnungrab(struct consdev *cp)
+{
+}
+
+static int
+xc_cngetc(struct consdev *dev)
+{
+       xencons_rx(&main_cons);
+
+       return xencons_getc(&main_cons);
+}
+
+static void
+xc_cnputc(struct consdev *dev, int c)
+{
+       /*
+        * The low-level console is used by KDB and panic. We have to ensure
+        * that any character sent will be seen by the backend.
+        */
+       xencons_putc(&main_cons, c, true);
+}
+
+CONSOLE_DRIVER(xc);
+
+/*----------------------------- TTY driver ---------------------------------*/
+
+static int
+xctty_open(struct tty *tp)
+{
+       struct xencons_priv *cons = tty_softc(tp);
+
+       cons->opened = 1;
+
+       return (0);
+}
+
+static void
+xctty_close(struct tty *tp)
+{
+       struct xencons_priv *cons = tty_softc(tp);
+
+       cons->opened = 0;
+}
+
+static void
+xencons_timeout(void *v)
+{
+       struct tty *tp = v;
+       struct xencons_priv *cons = tty_softc(tp);
+
+       if (!xencons_tx(tp))
+               callout_reset(&cons->callout, XC_POLLTIME,
+                             xencons_timeout, tp);
+}
+
+static void
+xctty_outwakeup(struct tty *tp)
+{
+       struct xencons_priv *cons = tty_softc(tp);
+
+       callout_stop(&cons->callout);
+
+       if (!xencons_tx(tp))
+               callout_reset(&cons->callout, XC_POLLTIME,
+                             xencons_timeout, tp);
+}
+
+static struct ttydevsw xc_ttydevsw = {
+        .tsw_flags     = TF_NOPREFIX,
+        .tsw_open      = xctty_open,
+        .tsw_close     = xctty_close,
+        .tsw_outwakeup = xctty_outwakeup,
+};
+
+/*------------------------ Main console driver ------------------------------*/
+static void
+xc_identify(driver_t *driver, device_t parent)
+{
+       device_t child;
+
+#if defined(__arm__) || defined(__aarch64__)
+       if (!xen_domain())
+               return;
+#else
+       if (!xen_pv_domain())
+               return;
+#endif
+
+       child = BUS_ADD_CHILD(parent, 0, driver_name, 0);
+}
+
+static int
+xc_probe(device_t dev)
+{
+       device_set_desc(dev, "Xen Console");
+       return (BUS_PROBE_NOWILDCARD);
+}
+
+static int
+xc_attach(device_t dev)
+{
+       int err;
+       struct tty *tp;
+       /*
+        * The main console is already allocated statically in order to
+        * support low-level console
+        */
+       struct xencons_priv *cons = &main_cons;
+
+       tp = tty_alloc(&xc_ttydevsw, cons);
+       tty_makedev(tp, NULL, "%s%r", driver_name, 0);
+
+       callout_init_mtx(&cons->callout, tty_getlock(tp), 0);
+
+       err = xencons_init(dev, tp);
+       if (err) {
+               printf("xencons: Unable to initialize the console (%d)\n",
+                      err);
+               return err;
+       }
+
+       /* register handler to flush console on shutdown */
+       if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xencons_shutdown,
+                                  tp, SHUTDOWN_PRI_DEFAULT)) == NULL)
+               printf("xencons: shutdown event registration failed!\n");
+
+       return (0);
+}
+
+static void
+xencons_intr(void *arg)
+{
+       struct tty *tp = arg;
+       struct xencons_priv *cons = tty_softc(tp);
+       int ret;
+
+       /*
+        * The input will be used by the low-level console when KDB is active
+        */
+       if (kdb_active)
+               return;
+
+       /*
+        * It's not necessary to retrieve input when the tty is not opened
+        */
+       if (!cons->opened)
+               return;
+
+       xencons_rx(cons);
+
+       tty_lock(tp);
+       while ((ret = xencons_getc(cons)) != -1) {
+#ifdef KDB
+               kdb_alt_break(ret, &cons->altbrk);
+#endif
+               ttydisc_rint(tp, ret, 0);
+       }
+       ttydisc_rint_done(tp);
+       tty_unlock(tp);
+
+       /* Try to flush remaining characters if necessary */
+       xencons_tx_flush(cons, 0);
+}
+
+static devclass_t xc_devclass;
+
+static device_method_t xc_methods[] = {
+       DEVMETHOD(device_identify, xc_identify),
+       DEVMETHOD(device_probe, xc_probe),
+       DEVMETHOD(device_attach, xc_attach),
+
+       DEVMETHOD_END
+};
+
+static driver_t xc_driver = {
+       driver_name,
+       xc_methods,
+       0,
+};
+
+DRIVER_MODULE(xc, xenpv, xc_driver, xc_devclass, 0, 0);