]> xenbits.xensource.com Git - people/pauldu/xenpt.git/commitdiff
Initial commit
authorPaul Durrant <paul.durrant@citrix.com>
Fri, 1 Mar 2019 16:09:49 +0000 (16:09 +0000)
committerPaul Durrant <paul.durrant@citrix.com>
Fri, 1 Mar 2019 16:09:49 +0000 (16:09 +0000)
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
.gitignore [new file with mode: 0644]
Makefile [new file with mode: 0644]
debug.h [new file with mode: 0644]
guest_pci.c [new file with mode: 0644]
guest_pci.h [new file with mode: 0644]
host_pci.c [new file with mode: 0644]
host_pci.h [new file with mode: 0644]
xenpt.c [new file with mode: 0644]
xenpt.h [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..c97d991
--- /dev/null
@@ -0,0 +1,10 @@
+.hg
+*.orig
+*.rej
+*~
+*.o
+*.d
+TAGS
+tags
+cscope.files
+cscope.out
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..1095219
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,52 @@
+TARGET = xenpt
+
+OBJS :=        host_pci.o \
+       guest_pci.o \
+       xenpt.o
+
+CFLAGS  = -I$(shell pwd)/include
+
+# _GNU_SOURCE for asprintf.
+CFLAGS += -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_GNU_SOURCE
+
+CFLAGS += -Wall -Werror -g -O1
+
+ifeq ($(shell uname),Linux)
+LDLIBS := -lutil -lrt
+endif
+
+LDLIBS += -lxenctrl -lxenevtchn -lxenforeignmemory -lxendevicemodel
+
+# Get gcc to generate the dependencies for us.
+CFLAGS   += -Wp,-MD,$(@D)/.$(@F).d
+
+SUBDIRS  = $(filter-out ./,$(dir $(OBJS) $(LIBS)))
+DEPS     = .*.d
+
+LDFLAGS := -g
+
+all: $(TARGET)
+
+$(TARGET): $(LIBS) $(OBJS)
+       gcc -o $@ $(LDFLAGS) $(OBJS) $(LIBS) $(LDLIBS)
+
+%.o: %.c
+       gcc -o $@ $(CFLAGS) -c $<
+
+.PHONY: ALWAYS
+
+clean:
+       $(foreach dir,$(SUBDIRS),make -C $(dir) clean)
+       rm -f $(OBJS)
+       rm -f $(DEPS)
+       rm -f $(TARGET)
+       rm -f TAGS
+
+.PHONY: TAGS
+TAGS:
+       find . -name \*.[ch] | etags -
+
+-include $(DEPS)
+
+print-%:
+       echo $($*)
diff --git a/debug.h b/debug.h
new file mode 100644 (file)
index 0000000..2f80b9a
--- /dev/null
+++ b/debug.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2012, Citrix Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef  _DEBUG_H
+#define  _DEBUG_H
+
+#include <stdbool.h>
+
+#define DBG(...)                            \
+    do {                                    \
+        fprintf(stderr, "%s: ", __func__);  \
+        fprintf(stderr, __VA_ARGS__);       \
+        fflush(stderr);                     \
+    } while (false)
+
+#endif  /* _DEBUG_H */
diff --git a/guest_pci.c b/guest_pci.c
new file mode 100644 (file)
index 0000000..1957df7
--- /dev/null
@@ -0,0 +1,1450 @@
+/*
+ * Copyright (c) 2019, Citrix Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <err.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include <sys/types.h>
+
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+
+#include <xenctrl.h>
+#include <xendevicemodel.h>
+
+#include "debug.h"
+#include "xenpt.h"
+#include "guest_pci.h"
+#include "host_pci.h"
+
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof (array[0]))
+
+typedef struct guest_pci_grp guest_pci_grp_t;
+typedef struct guest_pci_reg guest_pci_reg_t;
+
+struct guest_pci_reg
+{
+    const char *name;
+    uint8_t offset;
+    unsigned int size;
+    void (*read)(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                 uint8_t *buffer);
+    void (*write)(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                  const uint8_t *buffer);
+};
+
+struct guest_pci_grp {
+    guest_pci_grp_t *next;
+    uint8_t id;
+    uint8_t base;
+    unsigned int size;
+    const guest_pci_reg_t *regs;
+    unsigned int nr_regs;
+};
+
+typedef enum guest_pci_bar_type {
+    GUEST_PCI_BAR_TYPE_NONE,
+    GUEST_PCI_BAR_TYPE_IO,
+    GUEST_PCI_BAR_TYPE_MEM32,
+    GUEST_PCI_BAR_TYPE_MEM64_LO,
+    GUEST_PCI_BAR_TYPE_MEM64_HI
+} guest_pci_bar_type_t;
+
+typedef struct guest_pci_bar {
+    guest_pci_bar_type_t type;
+    bool prefetch;
+    uint32_t size;
+    uint32_t host_addr;
+    uint32_t guest_addr;
+    bool mapped;
+} guest_pci_bar_t;
+
+typedef struct guest_pci_rom {
+    uint32_t size;
+    uint32_t host_addr;
+    uint32_t guest_addr;
+    bool mapped;
+    bool enabled;
+} guest_pci_rom_t;
+
+typedef struct guest_pci_msi {
+    bool enabled;
+    int pirq;
+    uint16_t flags;
+    uint32_t addr_hi;
+    uint32_t addr_lo;
+    uint16_t data;
+    bool masked;
+} guest_pci_msi_t;
+
+typedef struct guest_pci {
+    xendevicemodel_handle *xdh;
+    domid_t domid;
+    ioservid_t ioservid;
+    uint16_t segment;
+    uint8_t bus;
+    uint8_t device;
+    uint8_t function;
+    uint16_t cmd;
+    guest_pci_bar_t bar[6];
+    guest_pci_rom_t rom;
+    guest_pci_msi_t msi;
+    guest_pci_grp_t *grps;
+    guest_pci_grp_t **grps_tail;
+} guest_pci_t;
+
+static guest_pci_t state;
+
+static void
+bar_update(unsigned int i, bool enabled)
+{
+    guest_pci_bar_t *bar = &state.bar[i];
+    guest_pci_bar_t *hi;
+    guest_pci_bar_t *lo;
+    uint64_t guest_addr;
+    uint64_t host_addr;
+    uint32_t size;
+
+    if (bar->type == GUEST_PCI_BAR_TYPE_MEM32 ||
+        bar->type == GUEST_PCI_BAR_TYPE_IO) {
+        hi = NULL;
+        lo = bar;
+    } else if (bar->type == GUEST_PCI_BAR_TYPE_MEM64_HI) {
+        hi = bar;
+        lo = &state.bar[i - 1];
+    } else {
+        hi = &state.bar[i + 1];
+        lo = bar;
+
+        assert(bar->type == GUEST_PCI_BAR_TYPE_MEM64_LO);
+    }
+
+    guest_addr = hi ? hi->guest_addr : 0;
+    guest_addr = guest_addr << 32 | lo->guest_addr;
+
+    host_addr = hi ? hi->host_addr : 0;
+    host_addr = host_addr << 32 | lo->host_addr;
+
+    size = lo->size;
+
+    if (!enabled && bar->mapped) {
+        if (bar->type == GUEST_PCI_BAR_TYPE_MEM32 ||
+            bar->type == GUEST_PCI_BAR_TYPE_MEM64_HI ||
+            bar->type == GUEST_PCI_BAR_TYPE_MEM64_LO) {
+            host_pci_unmap_memory(guest_addr, host_addr, size);
+        } else {
+            assert(bar->type == GUEST_PCI_BAR_TYPE_IO);
+            host_pci_unmap_ioport(guest_addr, host_addr, size);
+        }
+        bar->mapped = false;
+    } else if (enabled && guest_addr && !bar->mapped) {
+        if (bar->type == GUEST_PCI_BAR_TYPE_MEM32 ||
+            bar->type == GUEST_PCI_BAR_TYPE_MEM64_HI ||
+            bar->type == GUEST_PCI_BAR_TYPE_MEM64_LO) {
+            host_pci_map_memory(guest_addr, host_addr, size);
+        } else {
+            assert(bar->type == GUEST_PCI_BAR_TYPE_IO);
+            host_pci_map_ioport(guest_addr, host_addr, size);
+        }
+        bar->mapped = true;
+    }
+}
+
+static void
+io_update(bool enabled)
+{
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(state.bar); i++) {
+        guest_pci_bar_t *bar = &state.bar[i];
+
+        if (bar->type != GUEST_PCI_BAR_TYPE_IO)
+            continue;
+
+        bar_update(i, enabled);
+    }
+}
+
+static void
+memory_update(bool enabled)
+{
+    unsigned int i;
+
+    for (i = 0; i < ARRAY_SIZE(state.bar); i++) {
+        guest_pci_bar_t *bar = &state.bar[i];
+
+        if (bar->type != GUEST_PCI_BAR_TYPE_MEM32 &&
+            bar->type != GUEST_PCI_BAR_TYPE_MEM64_HI)
+            continue;
+
+        bar_update(i, enabled);
+    }
+}
+
+static void
+cmd_write(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+          const uint8_t *buffer)
+{
+    uint16_t data = *(uint16_t *)buffer;
+    uint16_t enable;
+    uint16_t disable;
+
+    assert(!grp->id);
+    assert(!grp->base);
+    assert(reg->size == sizeof(data));
+
+    enable = ~state.cmd & data;
+    disable = state.cmd & ~data;
+
+    if (disable)
+        DBG("%s%s%s%s%s%s%s%s%s%s%s\n",
+            (disable & PCI_COMMAND_IO) ? "-IO" : "",
+            (disable & PCI_COMMAND_MEMORY) ? "-MEMORY" : "",
+            (disable & PCI_COMMAND_MASTER) ? "-MASTER" : "",
+            (disable & PCI_COMMAND_SPECIAL) ? "-SPECIAL" : "",
+            (disable & PCI_COMMAND_INVALIDATE) ? "-INVALIDATE" : "",
+            (disable & PCI_COMMAND_VGA_PALETTE) ? "-VGA_PALETTE" : "",
+            (disable & PCI_COMMAND_PARITY) ? "-PARITY" : "",
+            (disable & PCI_COMMAND_WAIT) ? "-WAIT" : "",
+            (disable & PCI_COMMAND_SERR) ? "-SERR" : "",
+            (disable & PCI_COMMAND_FAST_BACK) ? "-FAST_BACK-" : "",
+            (disable & PCI_COMMAND_INTX_DISABLE) ? "-INTX_DISABLE" : "");
+
+    if (enable)
+        DBG("%s%s%s%s%s%s%s%s%s%s%s\n",
+            (enable & PCI_COMMAND_IO) ? "+IO" : "",
+            (enable & PCI_COMMAND_MEMORY) ? "+MEMORY" : "",
+            (enable & PCI_COMMAND_MASTER) ? "+MASTER" : "",
+            (enable & PCI_COMMAND_SPECIAL) ? "+SPECIAL" : "",
+            (enable & PCI_COMMAND_INVALIDATE) ? "+INVALIDATE" : "",
+            (enable & PCI_COMMAND_VGA_PALETTE) ? "+VGA_PALETTE" : "",
+            (enable & PCI_COMMAND_PARITY) ? "+PARITY" : "",
+            (enable & PCI_COMMAND_WAIT) ? "+WAIT" : "",
+            (enable & PCI_COMMAND_SERR) ? "+SERR" : "",
+            (enable & PCI_COMMAND_FAST_BACK) ? "+FAST_BACK" : "",
+            (enable & PCI_COMMAND_INTX_DISABLE) ? "+INTX_DISABLE" : "");
+
+    if ((state.cmd & PCI_COMMAND_IO) != (data & PCI_COMMAND_IO))
+        io_update(data & PCI_COMMAND_IO);
+
+    if ((state.cmd & PCI_COMMAND_MEMORY) != (data & PCI_COMMAND_MEMORY))
+        memory_update(data & PCI_COMMAND_MEMORY);
+
+    state.cmd = data;
+
+    host_pci_config_write(grp->base + reg->offset, sizeof(data), &data);
+}
+
+static void
+sts_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+         uint8_t *buffer)
+{
+    uint16_t data;
+
+    assert(!grp->id);
+    assert(!grp->base);
+    assert(reg->size == sizeof(data));
+
+    host_pci_config_read(grp->base + reg->offset, sizeof(data), &data);
+
+    /* Clear the capabilities bit if we're not emulating any */
+    if (!grp->next)
+        data &= ~PCI_STATUS_CAP_LIST;
+
+    *(uint16_t *)buffer = data;
+}
+
+#define BAR_ADDR_TO_INDEX(_offset) \
+    ((_offset - PCI_BASE_ADDRESS_0) / sizeof(uint32_t))
+
+#define BAR_INDEX_TO_ADDR(_index) \
+    ((_index * sizeof(uint32_t)) + PCI_BASE_ADDRESS_0)
+
+#define BAR_TYPE(_bar) \
+    (((_bar)->type == GUEST_PCI_BAR_TYPE_MEM64_HI) ? \
+     "MEM64_HI" : \
+     ((_bar)->type == GUEST_PCI_BAR_TYPE_MEM64_LO) ? \
+     "MEM64_LO" : \
+     ((_bar)->type == GUEST_PCI_BAR_TYPE_MEM32) ? \
+     "MEM32" : \
+     ((_bar)->type == GUEST_PCI_BAR_TYPE_IO) ? \
+     "IO" : \
+     "NONE")
+
+static void
+bar_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+         uint8_t *buffer)
+{
+    unsigned int i = BAR_ADDR_TO_INDEX(reg->offset);
+    guest_pci_bar_t *bar = &state.bar[i];
+    uint32_t data;
+
+    assert(!grp->id);
+    assert(i < ARRAY_SIZE(state.bar));
+    assert(reg->size == sizeof(data));
+
+    data = bar->guest_addr;
+
+    switch (bar->type) {
+    case GUEST_PCI_BAR_TYPE_MEM64_HI:
+        break;
+
+    case GUEST_PCI_BAR_TYPE_MEM64_LO:
+        assert(!(data & ~PCI_BASE_ADDRESS_MEM_MASK));
+
+        data |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+        if (bar->prefetch)
+            data |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+        break;
+
+    case GUEST_PCI_BAR_TYPE_MEM32:
+        assert(!(data & ~PCI_BASE_ADDRESS_MEM_MASK));
+
+        data |= PCI_BASE_ADDRESS_MEM_TYPE_32;
+        if (bar->prefetch)
+            data |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+        break;
+
+    case GUEST_PCI_BAR_TYPE_IO:
+        assert(!(data & ~PCI_BASE_ADDRESS_IO_MASK));
+
+        data |= PCI_BASE_ADDRESS_SPACE_IO;
+        break;
+
+    case GUEST_PCI_BAR_TYPE_NONE:
+        data = 0;
+        break;
+    }
+
+    *(uint32_t *)buffer = data;
+}
+
+
+static void
+bar_write(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+          const uint8_t *buffer)
+{
+    unsigned int i = BAR_ADDR_TO_INDEX(reg->offset);
+    guest_pci_bar_t *bar = &state.bar[i];
+    bool enabled = false;
+    uint32_t data;
+
+    assert(!grp->id);
+    assert(i < ARRAY_SIZE(state.bar));
+    assert(reg->size == sizeof(data));
+
+    data = *(uint32_t *)buffer;
+
+    switch (bar->type) {
+    case GUEST_PCI_BAR_TYPE_MEM64_HI:
+        assert(!bar->size);
+        enabled = state.cmd & PCI_COMMAND_MEMORY;
+        break;
+
+    case GUEST_PCI_BAR_TYPE_MEM64_LO:
+    case GUEST_PCI_BAR_TYPE_MEM32:
+        assert(bar->size & PCI_BASE_ADDRESS_MEM_MASK);
+
+        enabled = state.cmd & PCI_COMMAND_MEMORY;
+        data &= ~(bar->size - 1);
+        break;
+
+    case GUEST_PCI_BAR_TYPE_IO:
+        assert(bar->size & PCI_BASE_ADDRESS_IO_MASK);
+
+        enabled = state.cmd & PCI_COMMAND_IO;
+        data &= ~(bar->size - 1);
+        break;
+
+    case GUEST_PCI_BAR_TYPE_NONE:
+        return;
+    }
+
+    if (bar->guest_addr != data) {
+        bar_update(i, false);
+        bar->guest_addr = data;
+        bar_update(i, enabled);
+    }
+}
+
+static void
+rom_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+         uint8_t *buffer)
+{
+    guest_pci_rom_t *rom = &state.rom;
+    uint32_t data;
+
+    assert(!grp->id);
+    assert(reg->size == sizeof(data));
+
+    data = rom->guest_addr;
+
+    if (rom->size) {
+        assert(!(data & ~PCI_ROM_ADDRESS_MASK));
+
+        if (rom->enabled)
+            data |= PCI_ROM_ADDRESS_ENABLE;
+    } else
+        data = 0;
+
+    *(uint32_t *)buffer = data;
+}
+
+static void
+rom_write(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+          const uint8_t *buffer)
+{
+    guest_pci_rom_t *rom = &state.rom;
+    uint32_t data;
+    bool enabled;
+
+    assert(!grp->id);
+    assert(reg->size == sizeof(data));
+
+    data = *(uint32_t *)buffer;
+
+    if (rom->size) {
+        assert(rom->size & PCI_ROM_ADDRESS_MASK);
+
+        enabled = data & PCI_ROM_ADDRESS_ENABLE;
+        data &= ~(rom->size - 1);
+    } else {
+        enabled = false;
+        data = 0;
+    }
+
+    if (rom->guest_addr != data || rom->enabled != enabled) {
+        if (rom->enabled && rom->guest_addr)
+            host_pci_unmap_memory(rom->guest_addr, rom->host_addr,
+                                  rom->size);
+
+        rom->guest_addr = data;
+        rom->enabled = enabled;
+
+        if (rom->enabled && rom->guest_addr)
+            host_pci_map_memory(rom->guest_addr, rom->host_addr,
+                                rom->size);
+    }
+}
+
+static void
+cap_id_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+            uint8_t *buffer)
+{
+    *buffer = grp->id;
+}
+
+static void
+cap_next_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+              uint8_t *buffer)
+{
+    *buffer = grp->next ? grp->next->base : 0;
+}
+
+static void
+vndr_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+          uint8_t *buffer)
+{
+    assert(grp->id == PCI_CAP_ID_VNDR);
+
+    *buffer = grp->size;
+}
+
+static uint64_t
+msi_addr(guest_pci_msi_t *msi)
+{
+    return (uint64_t)msi->addr_hi << 32 | msi->addr_lo;
+}
+
+static void
+msi_update(guest_pci_msi_t *msi)
+{
+    host_pci_update_msi(msi_addr(msi), msi->data, msi->pirq,
+                        msi->masked);
+}
+
+static void
+msi_enable(guest_pci_msi_t *msi)
+{
+    int rc;
+
+    rc = host_pci_map_msi(msi_addr(msi), msi->data, &msi->pirq);
+    if (rc)
+        goto fail1;
+
+    msi->enabled = true;
+
+    msi_update(msi);
+
+    return;
+
+fail1:
+    DBG("fail1\n");
+}
+
+static void
+msi_disable(guest_pci_msi_t *msi)
+{
+    host_pci_unmap_msi(msi_addr(msi), msi->data, msi->pirq);
+
+    msi->enabled = false;
+    msi->pirq = -1;
+}
+
+static void
+msi_flags_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+               uint8_t *buffer)
+{
+    guest_pci_msi_t *msi = &state.msi;
+    uint16_t data = msi->flags;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    if (msi->enabled)
+        data |= PCI_MSI_FLAGS_ENABLE;
+
+    *(uint16_t *)buffer = data;
+}
+
+static void
+msi_flags_write(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                const uint8_t *buffer)
+{
+    guest_pci_msi_t *msi = &state.msi;
+    uint16_t data = *(uint16_t *)buffer;
+    bool enabled;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    if (data & PCI_MSI_FLAGS_QSIZE)
+        DBG("multi-message\n");
+
+    enabled = data & PCI_MSI_FLAGS_ENABLE;
+
+    if (enabled && !msi->enabled)
+        msi_enable(msi);
+    else if (!enabled && msi->enabled)
+        msi_disable(msi);
+
+    data = msi->flags;
+    if (msi->enabled)
+        data |= PCI_MSI_FLAGS_ENABLE;
+
+    host_pci_config_write(grp->base + reg->offset, sizeof(data), &data);
+}
+
+static void
+msi_addr_lo_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                 uint8_t *buffer)
+{
+    guest_pci_msi_t *msi = &state.msi;
+    uint32_t data = msi->addr_lo;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    *(uint32_t *)buffer = data;
+}
+
+static void
+msi_addr_lo_write(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                  const uint8_t *buffer)
+{
+    guest_pci_msi_t *msi = &state.msi;
+    uint32_t data = *(uint32_t *)buffer;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    /* Bits 0:1 are reserved */
+    data &= ~0x00000003;
+
+    if (msi->addr_lo != data) {
+        msi->addr_lo = data;
+
+        if (msi->enabled)
+            msi_update(msi);
+    }
+
+    host_pci_config_write(grp->base + reg->offset, sizeof(data), &data);
+}
+
+static void
+msi_addr_hi_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                 uint8_t *buffer)
+{
+    guest_pci_msi_t *msi = &state.msi;
+    uint32_t data = msi->addr_hi;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    *(uint32_t *)buffer = data;
+}
+
+static void
+msi_addr_hi_write(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                  const uint8_t *buffer)
+{
+    guest_pci_msi_t *msi = &state.msi;
+    uint32_t data = *(uint32_t *)buffer;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    if (msi->addr_hi != data) {
+        msi->addr_hi = data;
+
+        if (msi->enabled)
+            msi_update(msi);
+    }
+
+    host_pci_config_write(grp->base + reg->offset, sizeof(data), &data);
+}
+
+static void
+msi_data_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+              uint8_t *buffer)
+{
+    guest_pci_msi_t *msi = &state.msi;
+    uint16_t data = msi->data;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    *(uint16_t *)buffer = data;
+}
+
+static void
+msi_data_write(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+               const uint8_t *buffer)
+{
+    guest_pci_msi_t *msi = &state.msi;
+    uint16_t data = *(uint16_t *)buffer;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    if (msi->data != data) {
+        msi->data = data;
+
+        if (msi->enabled)
+            msi_update(msi);
+    }
+
+    host_pci_config_write(grp->base + reg->offset, sizeof(data), &data);
+}
+
+static void
+msi_mask_write(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+               const uint8_t *buffer)
+{
+    guest_pci_msi_t *msi = &state.msi;
+    uint32_t data = *(uint32_t *)buffer;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    data &= 1;
+    msi->masked = data;
+
+    host_pci_config_write(grp->base + reg->offset, sizeof(data), &data);
+}
+
+static void
+msi_pending_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                 uint8_t *buffer)
+{
+    uint32_t data;
+
+    assert(grp->id == PCI_CAP_ID_MSI);
+    assert(reg->size == sizeof(data));
+
+    host_pci_config_read(grp->base + reg->offset, sizeof(data), &data);
+
+    *(uint32_t *)buffer = data & 1;
+}
+
+static void
+exp_devcap_read(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                uint8_t *buffer)
+{
+    uint32_t data;
+
+    assert(grp->id == PCI_CAP_ID_EXP);
+    assert(reg->size == sizeof(data));
+
+    host_pci_config_read(grp->base + reg->offset, sizeof(data), &data);
+
+    /* Don't expose FLR to a guest */
+    data &= ~PCI_EXP_DEVCAP_FLR;
+
+    *(uint32_t *)buffer = data;
+}
+
+static void
+exp_devctl_write(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+                 const uint8_t *buffer)
+{
+    uint16_t data = *(uint16_t *)buffer;
+
+    assert(grp->id == PCI_CAP_ID_EXP);
+    assert(reg->size == sizeof(data));
+
+    /* Prevent a guest from initiating FLR */
+    data &= ~PCI_EXP_DEVCTL_BCR_FLR;
+
+    host_pci_config_write(grp->base + reg->offset, sizeof(data), &data);
+}
+
+#define DEFINE_REG(_name, _type, _read, _write) \
+    {                                           \
+        .name = #_name,                         \
+        .offset = PCI_ ## _name,                \
+        .size = sizeof(_type),                  \
+        .read = _read,                          \
+        .write = _write,                        \
+    }
+
+static const guest_pci_reg_t std_header_regs[] = {
+    DEFINE_REG(VENDOR_ID, uint16_t, NULL, NULL),
+    DEFINE_REG(DEVICE_ID, uint16_t, NULL, NULL),
+    DEFINE_REG(COMMAND, uint16_t, NULL, cmd_write),
+    DEFINE_REG(STATUS, uint16_t, sts_read, NULL),
+    DEFINE_REG(REVISION_ID, uint8_t, NULL, NULL),
+    DEFINE_REG(CLASS_PROG, uint8_t, NULL, NULL),
+    DEFINE_REG(CLASS_DEVICE, uint16_t, NULL, NULL),
+    DEFINE_REG(CACHE_LINE_SIZE, uint8_t, NULL, NULL),
+    DEFINE_REG(LATENCY_TIMER, uint8_t, NULL, NULL),
+    DEFINE_REG(HEADER_TYPE, uint8_t, NULL, NULL),
+    DEFINE_REG(BIST, uint8_t, NULL, NULL),
+    DEFINE_REG(BASE_ADDRESS_0, uint32_t, bar_read, bar_write),
+    DEFINE_REG(BASE_ADDRESS_1, uint32_t, bar_read, bar_write),
+    DEFINE_REG(BASE_ADDRESS_2, uint32_t, bar_read, bar_write),
+    DEFINE_REG(BASE_ADDRESS_3, uint32_t, bar_read, bar_write),
+    DEFINE_REG(BASE_ADDRESS_4, uint32_t, bar_read, bar_write),
+    DEFINE_REG(BASE_ADDRESS_5, uint32_t, bar_read, bar_write),
+    DEFINE_REG(CARDBUS_CIS, uint32_t, NULL, NULL),
+    DEFINE_REG(SUBSYSTEM_VENDOR_ID, uint16_t, NULL, NULL),
+    DEFINE_REG(SUBSYSTEM_ID, uint16_t, NULL, NULL),
+    DEFINE_REG(ROM_ADDRESS, uint32_t, rom_read, rom_write),
+    DEFINE_REG(CAPABILITY_LIST, uint8_t, cap_next_read, NULL),
+    /* 0x35 - 0x3B: reserved */
+    DEFINE_REG(INTERRUPT_LINE, uint8_t, NULL, NULL),
+    DEFINE_REG(INTERRUPT_PIN, uint8_t, NULL, NULL),
+    DEFINE_REG(MIN_GNT, uint8_t, NULL, NULL),
+    DEFINE_REG(MAX_LAT, uint8_t, NULL, NULL),
+};
+
+#define PCI_VNDR_LEN 2
+
+static const guest_pci_reg_t vndr_regs[] = {
+    DEFINE_REG(CAP_LIST_ID, uint8_t, cap_id_read, NULL),
+    DEFINE_REG(CAP_LIST_NEXT, uint8_t, cap_next_read, NULL),
+    DEFINE_REG(VNDR_LEN, uint8_t, vndr_read, NULL),
+};
+
+static const guest_pci_reg_t msi32_regs[] = {
+    DEFINE_REG(CAP_LIST_ID, uint8_t, cap_id_read, NULL),
+    DEFINE_REG(CAP_LIST_NEXT, uint8_t, cap_next_read, NULL),
+    DEFINE_REG(MSI_FLAGS, uint16_t, msi_flags_read, msi_flags_write),
+    DEFINE_REG(MSI_ADDRESS_LO, uint32_t, msi_addr_lo_read, msi_addr_lo_write),
+    DEFINE_REG(MSI_DATA_32, uint16_t, msi_data_read, msi_data_write),
+};
+
+static const guest_pci_reg_t msi32_mask_regs[] = {
+    DEFINE_REG(CAP_LIST_ID, uint8_t, cap_id_read, NULL),
+    DEFINE_REG(CAP_LIST_NEXT, uint8_t, cap_next_read, NULL),
+    DEFINE_REG(MSI_FLAGS, uint16_t, msi_flags_read, msi_flags_write),
+    DEFINE_REG(MSI_ADDRESS_LO, uint32_t, msi_addr_lo_read, msi_addr_lo_write),
+    DEFINE_REG(MSI_DATA_32, uint16_t, msi_data_read, msi_data_write),
+    DEFINE_REG(MSI_MASK_32, uint32_t, NULL, msi_mask_write),
+    DEFINE_REG(MSI_PENDING_32, uint32_t, msi_pending_read, NULL),
+};
+
+static const guest_pci_reg_t msi64_regs[] = {
+    DEFINE_REG(CAP_LIST_ID, uint8_t, cap_id_read, NULL),
+    DEFINE_REG(CAP_LIST_NEXT, uint8_t, cap_next_read, NULL),
+    DEFINE_REG(MSI_FLAGS, uint16_t, msi_flags_read, msi_flags_write),
+    DEFINE_REG(MSI_ADDRESS_LO, uint32_t, msi_addr_lo_read, msi_addr_lo_write),
+    DEFINE_REG(MSI_ADDRESS_HI, uint32_t, msi_addr_hi_read, msi_addr_hi_write),
+    DEFINE_REG(MSI_DATA_64, uint16_t, msi_data_read, msi_data_write),
+};
+
+static const guest_pci_reg_t msi64_mask_regs[] = {
+    DEFINE_REG(CAP_LIST_ID, uint8_t, cap_id_read, NULL),
+    DEFINE_REG(CAP_LIST_NEXT, uint8_t, cap_next_read, NULL),
+    DEFINE_REG(MSI_FLAGS, uint16_t, msi_flags_read, msi_flags_write),
+    DEFINE_REG(MSI_ADDRESS_LO, uint32_t, msi_addr_lo_read, msi_addr_lo_write),
+    DEFINE_REG(MSI_ADDRESS_HI, uint32_t, msi_addr_hi_read, msi_addr_hi_write),
+    DEFINE_REG(MSI_DATA_64, uint16_t, msi_data_read, msi_data_write),
+    DEFINE_REG(MSI_MASK_64, uint32_t, NULL, msi_mask_write),
+    DEFINE_REG(MSI_PENDING_64, uint32_t, msi_pending_read, NULL),
+};
+
+static const guest_pci_reg_t pm_regs[] = {
+    DEFINE_REG(CAP_LIST_ID, uint8_t, cap_id_read, NULL),
+    DEFINE_REG(CAP_LIST_NEXT, uint8_t, cap_next_read, NULL),
+    DEFINE_REG(PM_PMC, uint16_t, NULL, NULL),
+    DEFINE_REG(PM_CTRL, uint16_t, NULL, NULL),
+    DEFINE_REG(PM_PPB_EXTENSIONS, uint8_t, NULL, NULL),
+    DEFINE_REG(PM_DATA_REGISTER, uint8_t, NULL, NULL),
+};
+
+static const guest_pci_reg_t exp_v1_regs[] = {
+    DEFINE_REG(CAP_LIST_ID, uint8_t, cap_id_read, NULL),
+    DEFINE_REG(CAP_LIST_NEXT, uint8_t, cap_next_read, NULL),
+    DEFINE_REG(EXP_FLAGS, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_DEVCAP, uint32_t, exp_devcap_read, NULL),
+    DEFINE_REG(EXP_DEVCTL, uint16_t, NULL, exp_devctl_write),
+    DEFINE_REG(EXP_DEVSTA, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_LNKCAP, uint32_t, NULL, NULL),
+    DEFINE_REG(EXP_LNKCTL, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_LNKSTA, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_SLTCAP, uint32_t, NULL, NULL),
+    DEFINE_REG(EXP_SLTCTL, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_SLTSTA, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_RTCTL, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_RTCAP, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_RTSTA, uint32_t, NULL, NULL),
+};
+
+static const guest_pci_reg_t exp_v2_regs[] = {
+    DEFINE_REG(CAP_LIST_ID, uint8_t, cap_id_read, NULL),
+    DEFINE_REG(CAP_LIST_NEXT, uint8_t, cap_next_read, NULL),
+    DEFINE_REG(EXP_FLAGS, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_DEVCAP, uint32_t, exp_devcap_read, NULL),
+    DEFINE_REG(EXP_DEVCTL, uint16_t, NULL, exp_devctl_write),
+    DEFINE_REG(EXP_DEVSTA, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_LNKCAP, uint32_t, NULL, NULL),
+    DEFINE_REG(EXP_LNKCTL, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_LNKSTA, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_SLTCAP, uint32_t, NULL, NULL),
+    DEFINE_REG(EXP_SLTCTL, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_SLTSTA, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_RTCTL, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_RTCAP, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_RTSTA, uint32_t, NULL, NULL),
+    DEFINE_REG(EXP_DEVCAP2, uint32_t, NULL, NULL),
+    DEFINE_REG(EXP_DEVCTL2, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_DEVSTA2, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_LNKCAP2, uint32_t, NULL, NULL),
+    DEFINE_REG(EXP_LNKCTL2, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_LNKSTA2, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_SLTCAP2, uint32_t, NULL, NULL),
+    DEFINE_REG(EXP_SLTCTL2, uint16_t, NULL, NULL),
+    DEFINE_REG(EXP_SLTSTA2, uint16_t, NULL, NULL),
+};
+
+static bool
+access_valid(uint64_t addr, unsigned int size)
+{
+    if (size != sizeof(uint8_t) && size != sizeof(uint16_t) &&
+        size != sizeof(uint32_t))
+        return false;
+
+    if (addr > PCI_CONFIG_SIZEOF || addr + size > PCI_CONFIG_SIZEOF)
+        return false;
+
+    return !(addr & (size - 1));
+}
+
+static guest_pci_grp_t *
+find_grp(uint64_t addr)
+{
+    guest_pci_grp_t *grp;
+
+    for (grp = state.grps; grp; grp = grp->next) {
+        uint64_t start = grp->base;
+        uint64_t end = start + grp->size - 1;
+
+        if (addr >= start && addr <= end)
+            return grp;
+    }
+
+    return NULL;
+}
+
+static const guest_pci_reg_t *
+find_reg(guest_pci_grp_t *grp, uint64_t addr)
+{
+    unsigned int i;
+
+    for (i = 0; i < grp->nr_regs; i++)
+    {
+        const guest_pci_reg_t *reg = &grp->regs[i];
+        uint64_t start = grp->base + reg->offset;
+        uint64_t end = start + reg->size - 1;
+
+        if (addr >= start && addr <= end)
+            return reg;
+    }
+
+    return NULL;
+}
+
+static void
+read_reg(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+         uint8_t *buffer)
+{
+    if (reg->read)
+        reg->read(grp, reg, buffer);
+    else
+        host_pci_config_read(grp->base + reg->offset, reg->size, buffer);
+}
+
+static void
+write_reg(guest_pci_grp_t *grp, const guest_pci_reg_t *reg,
+          uint8_t *buffer, unsigned int byte_mask)
+{
+    uint8_t scratch[sizeof(uint32_t)];
+    bool need_read;
+    unsigned int i;
+
+    /* We need a read if we're not updating the whole register */
+    need_read = (byte_mask != ((1u << reg->size) - 1));
+
+    if (need_read)
+        read_reg(grp, reg, scratch);
+
+    for (i = 0; i < reg->size; i++)
+        if (byte_mask & (1u << i))
+            scratch[i] = buffer[i];
+
+    if (reg->write) {
+        reg->write(grp, reg, scratch);
+    } else {
+        host_pci_config_write(grp->base + reg->offset, reg->size, scratch);
+    }
+}
+
+uint32_t
+guest_pci_config_read(uint64_t addr, unsigned int size)
+{
+    uint32_t data;
+    uint8_t buffer[sizeof(data)];
+    guest_pci_grp_t *grp;
+    const guest_pci_reg_t *reg;
+    unsigned int offset, i;
+
+    if (!access_valid(addr, size))
+        return 0;
+
+    /* Find the first register targetted by this access */
+    grp = find_grp(addr);
+    reg = grp ? find_reg(grp, addr) : NULL;
+
+    /*
+     * If there is no register (i.e. it's reserved space) or the access
+     * is aligned to the start of the register then there is no offset.
+     * Otherwise calculate the offset into the register.
+     */
+    if (!reg || (grp->base + reg->offset) == addr)
+        offset = 0;
+    else
+        offset = addr - (grp->base + reg->offset);
+
+    assert(offset + size <= sizeof(buffer));
+
+    for (i = 0; i < offset + size; ) {
+        unsigned int read_size = reg ? reg->size : 1;
+
+        assert(i + read_size <= sizeof(buffer));
+
+        if (reg)
+            read_reg(grp, reg, &buffer[i]);
+        else
+            buffer[i] = 0;
+
+        /* Move onto the next register if the access is not complete */
+        addr += read_size;
+        i += read_size;
+
+        /* Find the next register */
+        grp = find_grp(addr);
+        reg = grp ? find_reg(grp, addr) : NULL;
+    }
+
+    data = *(uint32_t *)buffer;
+    data >>= (offset * 8);
+    data &= (1ull << (size * 8)) - 1;
+
+    return data;
+}
+
+void
+guest_pci_config_write(uint64_t addr, unsigned int size, uint32_t data)
+{
+    uint8_t buffer[sizeof(data)];
+    unsigned int byte_mask;
+    guest_pci_grp_t *grp;
+    const guest_pci_reg_t *reg;
+    unsigned int offset, i;
+
+    if (!access_valid(addr, size))
+        return;
+
+    /* Find the first register targetted by this access */
+    grp = find_grp(addr);
+    reg = grp ? find_reg(grp, addr) : NULL;
+
+    /*
+     * If there is no register (i.e. it's reserved space) or the access
+     * is aligned to the start of the register then there is no offset.
+     * Otherwise calculate the offset into the register.
+     */
+    if (!reg || (grp->base + reg->offset) == addr)
+        offset = 0;
+    else
+        offset = addr - (grp->base + reg->offset);
+
+    assert(offset + size <= sizeof(buffer));
+
+    data &= (1ull << (size * 8)) - 1;
+    data <<= (offset * 8);
+    *(uint32_t *)buffer = data;
+
+    byte_mask = (1u << size) - 1;
+    byte_mask <<= offset;
+
+    for (i = 0; i < offset + size; ) {
+        unsigned int write_size = reg ? reg->size : 1;
+
+        assert(i + write_size <= sizeof(buffer));
+
+        if (reg)
+            write_reg(grp, reg, &buffer[i], byte_mask >> i);
+
+        /* Move onto the next register if the access is not complete */
+        addr += write_size;
+        i += write_size;
+
+        /* Find the next register */
+        grp = find_grp(addr);
+        reg = grp ? find_reg(grp, addr) : NULL;
+    }
+}
+
+void
+guest_pci_capability_add(uint8_t id, uint64_t base)
+{
+    guest_pci_grp_t *grp = NULL;
+
+    DBG("%s:\n", xenpt_grp_name(id));
+
+    switch (id) {
+    case PCI_CAP_ID_VNDR:
+    {
+        uint8_t len;
+
+        host_pci_config_read(base + PCI_VNDR_LEN, sizeof(len), &len);
+
+        grp = calloc(1, sizeof(*grp));
+        assert(grp);
+
+        grp->base = base;
+        grp->regs = vndr_regs;
+        grp->nr_regs = ARRAY_SIZE(vndr_regs);
+        grp->size = len;
+
+        break;
+    }
+    case PCI_CAP_ID_MSI:
+    {
+        uint16_t flags;
+        unsigned int count;
+        const guest_pci_reg_t *reg;
+
+        host_pci_config_read(base + PCI_MSI_FLAGS, sizeof(flags), &flags);
+
+        count = 1u << (flags & PCI_MSI_FLAGS_QMASK);
+        if (count > 1)
+            DBG("host supports %u messages\n", count);
+
+        state.msi.flags =
+            flags & (PCI_MSI_FLAGS_64BIT | PCI_MSI_FLAGS_MASKBIT);
+        state.msi.pirq = -1;
+
+        grp = calloc(1, sizeof(*grp));
+        assert(grp);
+
+        grp->base = base;
+
+        if (flags & PCI_MSI_FLAGS_64BIT) {
+            if (flags & PCI_MSI_FLAGS_MASKBIT) {
+                uint32_t mask;
+
+                grp->regs = msi64_mask_regs;
+                grp->nr_regs = ARRAY_SIZE(msi64_mask_regs);
+
+                host_pci_config_read(base + PCI_MSI_MASK_64,
+                                     sizeof(mask), &mask);
+                state.msi.masked = mask & 1;
+            } else {
+                grp->regs = msi64_regs;
+                grp->nr_regs = ARRAY_SIZE(msi64_regs);
+            }
+        } else {
+            if (flags & PCI_MSI_FLAGS_MASKBIT) {
+                uint32_t mask;
+
+                grp->regs = msi32_mask_regs;
+                grp->nr_regs = ARRAY_SIZE(msi32_mask_regs);
+
+                host_pci_config_read(base + PCI_MSI_MASK_32,
+                                     sizeof(mask), &mask);
+                state.msi.masked = mask & 1;
+            } else {
+                grp->regs = msi32_regs;
+                grp->nr_regs = ARRAY_SIZE(msi32_regs);
+            }
+        }
+
+        /* Determine the capability size from the emulated registers */
+        reg = &grp->regs[grp->nr_regs - 1];
+        grp->size = reg->offset + reg->size;
+
+        break;
+    }
+    case PCI_CAP_ID_PM:
+        grp = calloc(1, sizeof(*grp));
+        assert(grp);
+
+        grp->base = base;
+        grp->regs = pm_regs;
+        grp->nr_regs = ARRAY_SIZE(pm_regs);
+        grp->size = PCI_PM_SIZEOF;
+
+        break;
+
+    case PCI_CAP_ID_EXP: {
+        uint16_t flags, vers, type;
+        const guest_pci_reg_t *reg;
+
+        host_pci_config_read(base + PCI_EXP_FLAGS, sizeof(flags), &flags);
+
+        type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+        vers = flags & PCI_EXP_FLAGS_VERS;
+
+        DBG("type = %u vers = %u\n", type, vers);
+
+        /* We only emulate end-points */
+        switch (type) {
+        case PCI_EXP_TYPE_ENDPOINT:
+        case PCI_EXP_TYPE_LEG_END:
+        case PCI_EXP_TYPE_RC_END:
+            grp = calloc(1, sizeof(*grp));
+            assert(grp);
+
+            break;
+
+        default:
+            break;
+        }
+
+        if (!grp)
+            break;
+
+        grp->base = base;
+
+        switch (vers) {
+        case 1:
+            grp->regs = exp_v1_regs;
+            grp->nr_regs = ARRAY_SIZE(exp_v1_regs);
+            break;
+
+        case 2:
+            grp->regs = exp_v2_regs;
+            grp->nr_regs = ARRAY_SIZE(exp_v2_regs);
+            break;
+        }
+
+        /* Determine the capability size from the emulated registers */
+        reg = &grp->regs[grp->nr_regs - 1];
+        grp->size = reg->offset + reg->size;
+
+        break;
+    }
+    default:
+        DBG("NOT SUPPORTED\n");
+        break;
+    }
+
+    if (!grp)
+        return;
+
+    grp->id = id;
+    assert(grp->size);
+
+    assert(!grp->next);
+    *state.grps_tail = grp;
+    state.grps_tail = &grp->next;
+}
+
+void
+guest_pci_config_dump(void)
+{
+    guest_pci_grp_t *grp;
+
+    for (grp = state.grps; grp; grp = grp->next) {
+        unsigned int i;
+
+        for (i = 0; i < grp->nr_regs; i++)
+        {
+            const guest_pci_reg_t *reg = &grp->regs[i];
+            uint8_t buffer[sizeof(uint32_t)];
+
+            read_reg(grp, reg, buffer);
+
+            switch (reg->size) {
+            case 1:
+                DBG("%s:%s: %02x\n", xenpt_grp_name(grp->id), reg->name,
+                    *buffer);
+                break;
+
+            case 2:
+                DBG("%s:%s: %04x\n", xenpt_grp_name(grp->id), reg->name,
+                    *(uint16_t *)buffer);
+                break;
+
+            case 4:
+                DBG("%s:%s: %08x\n", xenpt_grp_name(grp->id), reg->name,
+                    *(uint32_t *)buffer);
+                break;
+
+            default:
+                assert(false);
+                break;
+            }
+        }
+    }
+}
+
+int
+guest_pci_initialize(xendevicemodel_handle *xdh,
+                     domid_t domid, ioservid_t ioservid,
+                     const char *guest_sbdf)
+{
+    guest_pci_grp_t *grp;
+    guest_pci_rom_t *rom = &state.rom;
+    unsigned int i;
+    uint32_t data;
+    unsigned int lsb;
+    const guest_pci_reg_t *reg;
+    int rc;
+
+    state.xdh = xdh;
+    state.domid = domid;
+    state.ioservid = ioservid;
+
+    rc = xenpt_parse_sbdf(guest_sbdf, &state.segment, &state.bus,
+                          &state.device, &state.function);
+    if (rc)
+        goto fail1;
+
+    DBG("%04x:%02x:%02x.%02x\n", state.segment, state.bus, state.device,
+        state.function);
+
+    rc = xendevicemodel_map_pcidev_to_ioreq_server(
+        state.xdh, state.domid, state.ioservid,
+        state.segment, state.bus, state.device, state.function);
+    if (rc)
+        goto fail2;
+
+    grp = calloc(1, sizeof(*grp));
+    assert(grp);
+
+    assert(grp->id == PCI_CAP_ID_STD_HEADER); /* pseudo id */
+    grp->regs = std_header_regs;
+    grp->nr_regs = ARRAY_SIZE(std_header_regs);
+    assert(!grp->base);
+    grp->size = PCI_STD_HEADER_SIZEOF;
+
+    assert(!grp->next);
+    state.grps = grp;
+    state.grps_tail = &grp->next;
+
+    for (i = 0; i < ARRAY_SIZE(state.bar); i++) {
+        guest_pci_bar_t *bar = &state.bar[i];
+        uint64_t addr = BAR_INDEX_TO_ADDR(i);
+
+        host_pci_config_read(addr, sizeof(bar->host_addr), &bar->host_addr);
+
+        if (i && state.bar[i - 1].type == GUEST_PCI_BAR_TYPE_MEM64_LO) {
+            bar->type = GUEST_PCI_BAR_TYPE_MEM64_HI;
+
+            DBG("BAR[%u]: type: %s\n", i, BAR_TYPE(bar));
+        } else {
+            data = ~0u;
+
+            host_pci_config_write(addr, sizeof(data), &data);
+            host_pci_config_read(addr, sizeof(data), &data);
+
+            if ((data & PCI_BASE_ADDRESS_SPACE) ==
+                PCI_BASE_ADDRESS_SPACE_IO) {
+                bar->type = GUEST_PCI_BAR_TYPE_IO;
+
+                data &= PCI_BASE_ADDRESS_IO_MASK;
+                bar->host_addr &= PCI_BASE_ADDRESS_IO_MASK;
+            } else {
+                if (data & PCI_BASE_ADDRESS_MEM_TYPE_64)
+                    bar->type = GUEST_PCI_BAR_TYPE_MEM64_LO;
+                else if (data != 0)
+                    bar->type = GUEST_PCI_BAR_TYPE_MEM32;
+                else
+                    bar->type = GUEST_PCI_BAR_TYPE_NONE;
+
+                bar->prefetch = data & PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+                data &= PCI_BASE_ADDRESS_MEM_MASK;
+                bar->host_addr &= PCI_BASE_ADDRESS_MEM_MASK;
+            }
+
+            lsb = ffs(data);
+            if (lsb)
+                bar->size = 1u << (lsb - 1);
+
+            DBG("BAR[%u]: type: %s size: %#x prefetchable: %s\n",
+                i, BAR_TYPE(bar), bar->size,
+                bar->prefetch ? "yes" : "no");
+
+            host_pci_config_write(addr, sizeof(bar->host_addr),
+                                  &bar->host_addr);
+        }
+    }
+
+    host_pci_config_read(PCI_ROM_ADDRESS, sizeof(rom->host_addr),
+                         &rom->host_addr);
+
+    data = ~0u;
+    data &= ~PCI_ROM_ADDRESS_ENABLE;
+
+    host_pci_config_write(PCI_ROM_ADDRESS, sizeof(data), &data);
+    host_pci_config_read(PCI_ROM_ADDRESS, sizeof(data), &data);
+
+    lsb = ffs(data);
+    if (lsb)
+        rom->size = 1u << (lsb - 1);
+
+    if (rom->size)
+        DBG("ROM: size: %#x\n", rom->size);
+
+    host_pci_config_write(PCI_ROM_ADDRESS, sizeof(rom->host_addr),
+                          &rom->host_addr);
+
+    rc = host_pci_map_interrupt_line();
+    if (rc < 0)
+        goto fail3;
+
+    /* Initialize mappings by setting the command register */
+    reg = find_reg(grp, PCI_COMMAND);
+    assert(reg);
+
+    data = 0;
+
+    reg->write(grp, reg, (uint8_t *)&data);
+
+    rc = host_pci_assign_device();
+    if (rc < 0)
+        goto fail4;
+
+    return 0;
+
+fail4:
+    DBG("fail4\n");
+
+fail3:
+    DBG("fail3\n");
+
+fail2:
+    DBG("fail2\n");
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+void
+guest_pci_teardown(void)
+{
+    guest_pci_grp_t *grp = state.grps;
+    guest_pci_msi_t *msi = &state.msi;
+    const guest_pci_reg_t *reg;
+    uint16_t data;
+
+    DBG("%04x:%02x:%02x.%02x\n", state.segment, state.bus, state.device,
+        state.function);
+
+    if (msi->enabled)
+        msi_disable(msi);
+
+    /* Tear down mappings by setting the command register */
+    reg = find_reg(grp, PCI_COMMAND);
+    assert(reg);
+
+    data = 0;
+    reg->write(grp, reg, (uint8_t *)&data);
+
+    host_pci_deassign_device();
+
+    host_pci_unmap_interrupt_line();
+
+    while (grp) {
+        guest_pci_grp_t *next = grp->next;
+
+        DBG("%s (@%02x)\n", xenpt_grp_name(grp->id), grp->base);
+
+        free(grp);
+        grp = next;
+    }
+    state.grps_tail = NULL;
+    state.grps = NULL;
+
+    (void) xendevicemodel_unmap_pcidev_from_ioreq_server(
+        state.xdh, state.domid, state.ioservid,
+        state.segment, state.bus, state.device, state.function);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * c-tab-always-indent: nil
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/guest_pci.h b/guest_pci.h
new file mode 100644 (file)
index 0000000..7ee488f
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019, Citrix Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef  _GUEST_PCI_H
+#define  _GUEST_PCI_H
+
+#include <xenctrl.h>
+#include <xendevicemodel.h>
+
+uint32_t guest_pci_config_read(uint64_t addr, unsigned int size);
+void guest_pci_config_write(uint64_t addr, unsigned int size, uint32_t data);
+
+void guest_pci_capability_add(uint8_t id, uint64_t host_base);
+
+void guest_pci_config_dump(void);
+
+int guest_pci_initialize(xendevicemodel_handle *xdh,
+                         domid_t domid, ioservid_t ioservid,
+                         const char *guest_sbdf);
+void guest_pci_teardown(void);
+
+#endif  /* _GUEST_PCI_H */
diff --git a/host_pci.c b/host_pci.c
new file mode 100644 (file)
index 0000000..ef47164
--- /dev/null
@@ -0,0 +1,792 @@
+/*
+ * Copyright (c) 2019, Citrix Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <err.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+
+#include <xenctrl.h>
+
+#include "debug.h"
+#include "xenpt.h"
+#include "host_pci.h"
+#include "guest_pci.h"
+
+typedef struct host_pci {
+    xc_interface *xch;
+    domid_t domid;
+    uint16_t segment;
+    uint8_t bus;
+    uint8_t device;
+    uint8_t function;
+    char *path;
+    int config_fd;
+    int pirq;
+    uint8_t pin;
+} host_pci_t;
+
+static host_pci_t state;
+
+void
+host_pci_config_read(off_t offset, size_t size, void *data)
+{
+    ssize_t rc = pread(state.config_fd, data, size, offset);
+
+    assert(rc == size);
+}
+
+void
+host_pci_config_write(off_t offset, size_t size, const void *data)
+{
+    ssize_t rc = pwrite(state.config_fd, data, size, offset);
+
+    assert(rc == size);
+}
+
+static int
+write_sbdf(const char *path)
+{
+    char *sbdf;
+    int fd;
+    int rc;
+
+    rc = asprintf(&sbdf, "%04x:%02x:%02x.%x", state.segment, state.bus,
+                  state.device, state.function);
+    if (rc < 0)
+        goto fail1;
+
+    fd = open(path, O_WRONLY);
+    if (fd < 0)
+        goto fail2;
+
+    rc = write(fd, sbdf, strlen(sbdf));
+    if (rc < 0)
+        goto fail3;
+
+    close(fd);
+    free(sbdf);
+
+    return 0;
+
+fail3:
+    DBG("fail3\n");
+
+    close(fd);
+
+fail2:
+    DBG("fail2\n");
+
+    free(sbdf);
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+static int
+unbind_device(const char *driver_path)
+{
+    char *unbind_path;
+    int rc;
+
+    DBG("%s\n", driver_path);
+
+    rc = asprintf(&unbind_path, "%s/unbind", driver_path);
+    if (rc < 0)
+        goto fail1;
+
+    rc = write_sbdf(unbind_path);
+    if (rc < 0)
+        goto fail2;
+
+    free(unbind_path);
+
+    return 0;
+
+fail2:
+    DBG("fail2\n");
+
+    free(unbind_path);
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+#define PCIBACK_PATH "/sys/bus/pci/drivers/pciback"
+
+static int
+bind_device(void)
+{
+    char *driver_link;
+    struct stat st;
+    char *path;
+    FILE *f;
+    unsigned int segment, bus, device, function;
+    int rc;
+
+    rc = asprintf(&driver_link, "%s/driver", state.path);
+    if (rc < 0)
+        goto fail1;
+
+    /* Make sure the device is not currently bound to a driver */
+    if (!lstat(driver_link, &st)) {
+        char *driver_path = realpath(driver_link, NULL);
+
+        if (!driver_path)
+            goto fail2;
+
+        rc = unbind_device(driver_path);
+
+        free(driver_path);
+
+        if (rc < 0)
+            goto fail3;
+    }
+
+    /* Check whether pciback already has a slot set up for the device */
+    rc = asprintf(&path, "%s/slots", PCIBACK_PATH);
+    if (rc < 0)
+        goto fail4;
+
+    f = fopen(path, "r");
+    if (!f)
+        goto fail5;
+
+    free(path);
+
+    while (fscanf(f, "%x:%x:%x.%x", &segment, &bus, &device,
+                  &function) == 4) {
+        if (segment == state.segment &&
+            bus == state.bus &&
+            device == state.device &&
+            function == state.function) {
+            fclose(f);
+            goto bind;
+        }
+    }
+
+    fclose(f);
+
+    rc = asprintf(&path, "%s/new_slot", PCIBACK_PATH);
+    if (rc < 0)
+        goto fail6;
+
+    rc = write_sbdf(path);
+
+    free(path);
+
+    if (rc < 0)
+        goto fail7;
+
+bind:
+    rc = asprintf(&path, "%s/bind", PCIBACK_PATH);
+    if (rc < 0)
+        goto fail8;
+
+    rc = write_sbdf(path);
+
+    free(path);
+
+    if (rc < 0)
+        goto fail9;
+
+    return 0;
+
+fail9:
+    DBG("fail9\n");
+
+fail8:
+    DBG("fail8\n");
+
+fail7:
+    DBG("fail7\n");
+
+fail6:
+    DBG("fail6\n");
+
+fail5:
+    DBG("fail5\n");
+
+fail4:
+    DBG("fail4\n");
+
+fail3:
+    DBG("fail3\n");
+
+fail2:
+    DBG("fail2\n");
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+static int
+reset_device(void)
+{
+    char *path;
+    int fd;
+    int rc;
+
+    rc = asprintf(&path, "%s/reset", state.path);
+    if (rc < 0)
+        goto fail1;
+
+    fd = open(path, O_WRONLY);
+    if (fd < 0)
+        goto fail2;
+
+    rc = write(fd, "1", 1);
+    if (rc < 0)
+        goto fail3;
+
+    close(fd);
+    free(path);
+
+    return 0;
+
+fail3:
+    DBG("fail3\n");
+
+    close(fd);
+
+fail2:
+    DBG("fail2\n");
+
+    free(path);
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+static int
+get_irq(unsigned int *irq)
+{
+    char *path;
+    int fd;
+    char buf[sizeof("XXX")];
+    int rc;
+
+    rc = asprintf(&path, "%s/irq", state.path);
+    if (rc < 0)
+        goto fail1;
+
+    fd = open(path, O_RDONLY);
+
+    free(path);
+
+    if (fd < 0)
+        goto fail2;
+
+    rc = read(fd, &buf, sizeof(buf) - 1);
+    if (rc < 0)
+        goto fail3;
+
+    buf[rc] = '\0';
+    *irq = strtoul(buf, NULL, 0);
+
+    close(fd);
+
+    return 0;
+
+fail3:
+    DBG("fail3\n");
+
+    close(fd);
+
+fail2:
+    DBG("fail2\n");
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+int
+host_pci_initialize(xc_interface *xch, domid_t domid, const char *host_sbdf)
+{
+    char *config_path;
+    uint8_t header_type;
+    int rc;
+
+    state.xch = xch;
+    state.domid = domid;
+
+    rc = xenpt_parse_sbdf(host_sbdf, &state.segment, &state.bus,
+                          &state.device, &state.function);
+    if (rc)
+        goto fail1;
+
+    DBG("%04x:%02x:%02x.%02x\n", state.segment, state.bus, state.device,
+        state.function);
+
+    rc = asprintf(&state.path, "/sys/bus/pci/devices/%04x:%02x:%02x.%d",
+                  state.segment, state.bus, state.device, state.function);
+    if (rc < 0)
+        goto fail2;
+
+    rc = asprintf(&config_path, "%s/config", state.path);
+    if (rc < 0)
+        goto fail3;
+
+    state.config_fd = open(config_path, O_RDWR);
+
+    free(config_path);
+
+    if (state.config_fd < 0)
+        goto fail4;
+
+    host_pci_config_read(PCI_HEADER_TYPE, sizeof(header_type),
+                         &header_type);
+    header_type &= 0x7F; /* The top bit is the multi-function flag */
+
+    /* Only support pass-through of normal devices, not bridges */
+    if (header_type != PCI_HEADER_TYPE_NORMAL) {
+        errno = EOPNOTSUPP;
+        goto fail5;
+    }
+
+    rc = bind_device();
+    if (rc < 0)
+        goto fail6;
+
+    rc = reset_device();
+    if (rc < 0)
+        goto fail7;
+
+    return 0;
+
+fail7:
+    DBG("fail7\n");
+
+fail6:
+    DBG("fail6\n");
+
+fail5:
+    DBG("fail5\n");
+
+    close(state.config_fd);
+
+fail4:
+    DBG("fail4\n");
+
+fail3:
+    DBG("fail3\n");
+
+fail2:
+    DBG("fail2\n");
+
+    free(state.path);
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+void
+host_pci_teardown(void)
+{
+    DBG("%04x:%02x:%02x.%02x\n", state.segment, state.bus, state.device,
+        state.function);
+
+    close(state.config_fd);
+    free(state.path);
+}
+
+void
+host_pci_probe_capabilities(void)
+{
+    uint16_t sts;
+    uint8_t cap;
+
+    host_pci_config_read(PCI_STATUS, sizeof(sts), &sts);
+
+    /* Check if there actually any capabilities */
+    if (!(sts & PCI_STATUS_CAP_LIST))
+        return;
+
+    host_pci_config_read(PCI_CAPABILITY_LIST, sizeof(cap), &cap);
+
+    for (;;) {
+        uint8_t id, next;
+
+        if (!cap)
+            break;
+
+        assert(cap >= PCI_STD_HEADER_SIZEOF);
+
+        host_pci_config_read(cap + PCI_CAP_LIST_ID, sizeof(id), &id);
+        host_pci_config_read(cap + PCI_CAP_LIST_NEXT, sizeof(next), &next);
+
+        DBG("%s (@%02x)\n", xenpt_grp_name(id), cap);
+
+        guest_pci_capability_add(id, cap);
+
+        cap = next;
+    }
+}
+
+int
+host_pci_assign_device(void)
+{
+    uint32_t sbdf = state.segment << 16 | state.bus << 8 |
+        PCI_DEVFN(state.device, state.function);
+    uint32_t flags = XEN_DOMCTL_DEV_RDM_RELAXED;
+
+    return xc_assign_device(state.xch, state.domid, sbdf, flags);
+}
+
+int
+host_pci_deassign_device(void)
+{
+    uint32_t sbdf = state.segment << 16 | state.bus << 8 |
+        PCI_DEVFN(state.device, state.function);
+
+    return xc_deassign_device(state.xch, state.domid, sbdf);
+}
+
+int host_pci_map_interrupt_line(void)
+{
+    unsigned int irq;
+    int rc;
+
+    rc = get_irq(&irq);
+    if (rc < 0)
+        goto fail1;
+
+    /*
+     * Store it in host config space so the register can be passed
+     * through.
+     */
+    host_pci_config_write(PCI_INTERRUPT_LINE, sizeof(uint8_t), &irq);
+
+    state.pirq = -1;
+    rc = xc_physdev_map_pirq(state.xch, state.domid, irq, &state.pirq);
+    if (rc < 0)
+        goto fail2;
+
+    rc = xc_domain_irq_permission(state.xch, state.domid, state.pirq, 1);
+    if (rc < 0)
+        goto fail3;
+
+    host_pci_config_read(PCI_INTERRUPT_PIN, sizeof(state.pin), &state.pin);
+
+    DBG("IRQ: line: %#x pin: %c\n", state.pirq, 'A' + state.pin);
+
+    rc = xc_domain_bind_pt_pci_irq(state.xch, state.domid,
+                                   state.pirq, state.bus,
+                                   PCI_DEVFN(state.device, state.function),
+                                   state.pin);
+    if (rc < 0)
+        goto fail4;
+
+    return 0;
+
+fail4:
+    DBG("fail4\n");
+
+fail3:
+    DBG("fail3\n");
+
+fail2:
+    DBG("fail2\n");
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+void
+host_pci_unmap_interrupt_line(void)
+{
+    (void) xc_domain_unbind_pt_irq(state.xch, state.domid,
+                                   state.pirq, PT_IRQ_TYPE_PCI,
+                                   state.bus,
+                                   PCI_DEVFN(state.device, state.function),
+                                   state.pin, 0);
+
+    (void) xc_physdev_unmap_pirq(state.xch, state.domid,
+                                 state.pirq);
+}
+
+int
+host_pci_map_ioport(uint32_t guest_port, uint32_t host_port, uint32_t size)
+{
+    int rc;
+
+    DBG("%#x -> %#x (+%#x)\n", guest_port, host_port, size);
+
+    assert(guest_port);
+
+    rc = xc_domain_ioport_permission(state.xch, state.domid, host_port,
+                                     size, 1);
+    if (rc)
+        goto fail1;
+
+    rc = xc_domain_ioport_mapping(state.xch, state.domid, guest_port,
+                                  host_port, size, DPCI_ADD_MAPPING);
+    if (rc)
+        goto fail2;
+
+    return 0;
+
+fail2:
+    DBG("fail2\n");
+
+    (void) xc_domain_ioport_permission(state.xch, state.domid, host_port,
+                                       size, 0);
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+void
+host_pci_unmap_ioport(uint32_t guest_port, uint32_t host_port,
+                      uint32_t size)
+{
+    DBG("%#x -> %#x (+%#x)\n", guest_port, host_port, size);
+
+    (void) xc_domain_ioport_mapping(state.xch, state.domid, guest_port,
+                                    host_port, size,
+                                    DPCI_REMOVE_MAPPING);
+
+    (void) xc_domain_ioport_permission(state.xch, state.domid, host_port,
+                                       size, 0);
+}
+
+#define XEN_PFN_DOWN(addr) (addr >> XC_PAGE_SHIFT)
+#define XEN_PFN_UP(addr) (((addr) + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT)
+
+int
+host_pci_map_memory(uint64_t guest_addr, uint64_t host_addr, uint32_t size)
+{
+    int rc;
+
+    DBG("%#"PRIx64" -> %#"PRIx64" (+%#x)\n", guest_addr, host_addr, size);
+
+    assert(guest_addr);
+
+    rc = xc_domain_iomem_permission(state.xch, state.domid,
+                                    XEN_PFN_DOWN(host_addr),
+                                    XEN_PFN_UP(size), 1);
+    if (rc)
+        goto fail1;
+
+    rc = xc_domain_memory_mapping(state.xch, state.domid,
+                                  XEN_PFN_DOWN(guest_addr),
+                                  XEN_PFN_DOWN(host_addr),
+                                  XEN_PFN_UP(size), DPCI_ADD_MAPPING);
+    if (rc)
+        goto fail2;
+
+    return 0;
+
+fail2:
+    DBG("fail2\n");
+
+    (void) xc_domain_iomem_permission(state.xch, state.domid,
+                                      XEN_PFN_UP(host_addr),
+                                      XEN_PFN_UP(size), 0);
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+void
+host_pci_unmap_memory(uint64_t guest_addr, uint64_t host_addr,
+                      uint32_t size)
+{
+    (void) xc_domain_memory_mapping(state.xch, state.domid,
+                                    XEN_PFN_DOWN(guest_addr),
+                                    XEN_PFN_DOWN(host_addr),
+                                    XEN_PFN_UP(size),
+                                    DPCI_REMOVE_MAPPING);
+
+    (void) xc_domain_iomem_permission(state.xch, state.domid,
+                                      XEN_PFN_DOWN(host_addr),
+                                      XEN_PFN_UP(size),  0);
+}
+
+#define MSI_DATA_VECTOR_SHIFT 0
+#define  MSI_DATA_VECTOR_MASK 0x000000ff
+
+#define MSI_DATA_DELIVERY_MODE_SHIFT 8
+#define MSI_DATA_LEVEL_SHIFT 14
+#define MSI_DATA_TRIGGER_SHIFT 15
+#define MSI_ADDR_DEST_MODE_SHIFT 2
+#define MSI_ADDR_REDIRECTION_SHIFT 3
+
+#define MSI_ADDR_DEST_ID_SHIFT 12
+#define MSI_ADDR_DEST_ID_MASK 0x000ff000
+
+#define MSI_ADDR_DEST_IDX_SHIFT 4
+
+static uint8_t
+msi_vector(uint32_t data)
+{
+    return (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+}
+
+static uint8_t
+msi_dest_id(uint64_t addr)
+{
+    return (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+}
+
+static uint32_t
+msi_ext_dest_id(uint64_t addr)
+{
+    return (addr >> 32) & 0xffffff00;
+}
+
+#define XEN_PT_GFLAGS_SHIFT_DEST_ID 0
+#define XEN_PT_GFLAGS_SHIFT_RH 8
+#define XEN_PT_GFLAGS_SHIFT_DM 9
+#define XEN_PT_GFLAGS_SHIFT_DELIV_MODE 12
+#define XEN_PT_GFLAGS_SHIFT_TRG_MODE 15
+#define XEN_PT_GFLAGS_SHIFT_UNMASKED 16
+
+static uint32_t
+msi_gflags(uint64_t addr, uint32_t data, bool masked)
+{
+    uint32_t dest_id;
+    uint32_t rh;
+    uint32_t dm;
+    uint32_t deliv_mode;
+    uint32_t trg_mode;
+    uint32_t unmasked;
+
+    rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1;
+    dm = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    dest_id = msi_dest_id(addr);
+    deliv_mode = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    trg_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    unmasked = masked ? 0 : 1u;
+
+    return (dest_id << XEN_PT_GFLAGS_SHIFT_DEST_ID) |
+           (rh << XEN_PT_GFLAGS_SHIFT_RH) |
+           (dm << XEN_PT_GFLAGS_SHIFT_DM) |
+           (deliv_mode << XEN_PT_GFLAGS_SHIFT_DELIV_MODE) |
+           (trg_mode << XEN_PT_GFLAGS_SHIFT_TRG_MODE) |
+           (unmasked << XEN_PT_GFLAGS_SHIFT_UNMASKED);
+}
+
+int
+host_pci_map_msi(uint64_t addr, uint32_t data, int *pirq)
+{
+    uint8_t gvec = msi_vector(data);
+    int rc;
+
+    DBG("%"PRIx64":%08x\n", addr, data);
+
+    if (!gvec) {
+        *pirq = msi_ext_dest_id(addr) | msi_dest_id(addr);
+        if (!*pirq)
+            *pirq = -1;
+        else
+            DBG("requested pirq %u\n", *pirq);
+    }
+
+    rc = xc_physdev_map_pirq_msi(state.xch, state.domid, *pirq, pirq,
+                                 PCI_DEVFN(state.device, state.function),
+                                 state.bus, 0, 0);
+    if (rc)
+        goto fail1;
+
+    return 0;
+
+fail1:
+    DBG("fail1\n");
+
+    return -1;
+}
+
+void
+host_pci_update_msi(uint64_t addr, uint32_t data, int pirq, bool masked)
+{
+    uint8_t gvec = msi_vector(data);
+    uint32_t gflags = msi_gflags(addr, data, masked);
+
+    DBG("%"PRIx64":%08x -> %#x (%s)\n", addr, data, pirq,
+        masked ? "masked" : "unmasked");
+
+
+    xc_domain_update_msi_irq(state.xch, state.domid, gvec, pirq, gflags, 0);
+}
+
+void
+host_pci_unmap_msi(uint64_t addr, uint32_t data, int pirq)
+{
+    uint8_t gvec = msi_vector(data);
+    uint32_t gflags = msi_gflags(addr, data, false);
+
+    if (pirq < 0)
+        return;
+
+    DBG("%"PRIx64":%08x\n", addr, data);
+
+    (void) xc_domain_unbind_msi_irq(state.xch, state.domid, gvec,
+                                    pirq, gflags);
+
+    (void) xc_physdev_unmap_pirq(state.xch, state.domid, pirq);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * c-tab-always-indent: nil
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/host_pci.h b/host_pci.h
new file mode 100644 (file)
index 0000000..057e4bc
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019, Citrix Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef  _HOST_PCI_H
+#define  _HOST_PCI_H
+
+#include <xenctrl.h>
+
+void host_pci_config_read(off_t offset, size_t size, void *data);
+void host_pci_config_write(off_t offset, size_t size, const void *data);
+
+int host_pci_initialize(xc_interface *xch, domid_t domid,
+                        const char *host_sbdf);
+void host_pci_teardown(void);
+
+void host_pci_probe_capabilities(void);
+
+int host_pci_assign_device(void);
+int host_pci_deassign_device(void);
+
+int host_pci_map_interrupt_line(void);
+void host_pci_unmap_interrupt_line(void);
+
+int host_pci_map_ioport(uint32_t guest_port, uint32_t host_port,
+                        uint32_t size);
+void host_pci_unmap_ioport(uint32_t guest_port, uint32_t host_port,
+                           uint32_t size);
+
+int host_pci_map_memory(uint64_t guest_addr, uint64_t host_addr,
+                        uint32_t size);
+void host_pci_unmap_memory(uint64_t guest_addr, uint64_t host_addr,
+                           uint32_t size);
+
+int host_pci_map_msi(uint64_t addr, uint32_t data, int *pirq);
+void host_pci_update_msi(uint64_t addr, uint32_t data, int pirq, bool masked);
+void host_pci_unmap_msi(uint64_t addr, uint32_t data, int pirq);
+
+#endif  /* _HOST_PCI_H */
diff --git a/xenpt.c b/xenpt.c
new file mode 100644 (file)
index 0000000..c72000c
--- /dev/null
+++ b/xenpt.c
@@ -0,0 +1,868 @@
+/*
+ * Copyright (c) 2012, Citrix Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <netinet/in.h>
+#include <pwd.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sched.h>
+#include <assert.h>
+
+#include <sys/mman.h>
+#include <sys/poll.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+
+#include <locale.h>
+
+#include <linux/pci_regs.h>
+
+#include <xenctrl.h>
+#include <xenevtchn.h>
+#include <xenforeignmemory.h>
+#include <xendevicemodel.h>
+#include <xen/hvm/ioreq.h>
+
+#include "debug.h"
+#include "xenpt.h"
+#include "host_pci.h"
+#include "guest_pci.h"
+
+#define mb() asm volatile ("" : : : "memory")
+
+enum {
+    XENPT_OPT_DOMAIN,
+    XENPT_OPT_HOST_SBDF,
+    XENPT_OPT_GUEST_SBDF,
+    XENPT_NR_OPTS
+    };
+
+static struct option xenpt_option[] = {
+    {"domain", 1, NULL, 0},
+    {"host-sbdf", 1, NULL, 0},
+    {"guest-sbdf", 1, NULL, 0},
+    {NULL, 0, NULL, 0}
+};
+
+static const char *xenpt_option_text[] = {
+    "<domid>",
+    "<host S:B:D.F>",
+    "<guest S:B:D.F>",
+    NULL
+};
+
+static const char *prog;
+
+static void
+usage(void)
+{
+    int i;
+
+    fprintf(stderr, "Usage: %s <options>\n\n", prog);
+
+    for (i = 0; i < XENPT_NR_OPTS; i++)
+        fprintf(stderr, "\t--%s %s\n",
+                xenpt_option[i].name,
+                xenpt_option_text[i]);
+
+    fprintf(stderr, "\n");
+
+    exit(2);
+}
+
+typedef enum {
+    XENPT_SEQ_UNINITIALIZED = 0,
+    XENPT_SEQ_XENCTRL_OPEN,
+    XENPT_SEQ_XENEVTCHN_OPEN,
+    XENPT_SEQ_XENFOREIGNMEMORY_OPEN,
+    XENPT_SEQ_XENDEVICEMODEL_OPEN,
+    XENPT_SEQ_SERVER_REGISTERED,
+    XENPT_SEQ_RESOURCE_MAPPED,
+    XENPT_SEQ_SERVER_ENABLED,
+    XENPT_SEQ_PORT_ARRAY_ALLOCATED,
+    XENPT_SEQ_PORTS_BOUND,
+    XENPT_SEQ_BUF_PORT_BOUND,
+    XENPT_SEQ_HOST_PCI_INITIALIZED,
+    XENPT_SEQ_GUEST_PCI_INITIALIZED,
+    XENPT_SEQ_HOST_PCI_PROBED_CAPABILITIES,
+    XENPT_SEQ_INITIALIZED,
+    XENPT_NR_SEQS
+} xenpt_seq_t;
+
+typedef struct xenpt_state {
+    xenpt_seq_t seq;
+    xc_interface *xch;
+    xenevtchn_handle *xeh;
+    xenforeignmemory_handle *xfh;
+    xendevicemodel_handle *xdh;
+    domid_t domid;
+    unsigned int vcpus;
+    ioservid_t ioservid;
+    xenforeignmemory_resource_handle *resource;
+    shared_iopage_t *shared_iopage;
+    evtchn_port_t *ioreq_local_port;
+    buffered_iopage_t *buffered_iopage;
+    evtchn_port_t buf_ioreq_port;
+    evtchn_port_t buf_ioreq_local_port;
+} xenpt_state_t;
+
+static xenpt_state_t state;
+
+static void
+handle_pci_config(ioreq_t *ioreq)
+{
+    uint32_t addr = ioreq->addr;
+
+    assert(ioreq->count == 1);
+
+    if (ioreq->dir == IOREQ_READ) {
+        if (!ioreq->data_is_ptr) {
+            ioreq->data = guest_pci_config_read(addr, ioreq->size);
+        } else {
+            assert(false);
+        }
+    } else {
+        if (!ioreq->data_is_ptr) {
+            guest_pci_config_write(addr, ioreq->size, ioreq->data);
+        } else {
+            assert(false);
+        }
+    }
+}
+
+static void
+handle_ioreq(ioreq_t *ioreq)
+{
+    switch (ioreq->type) {
+    case IOREQ_TYPE_PCI_CONFIG:
+        handle_pci_config(ioreq);
+        break;
+
+    case IOREQ_TYPE_PIO:
+    case IOREQ_TYPE_COPY:
+    case IOREQ_TYPE_TIMEOFFSET:
+    case IOREQ_TYPE_INVALIDATE:
+        break;
+
+    default:
+        DBG("UNKNOWN (%02x)", ioreq->type);
+        break;
+    }
+}
+
+static void
+xenpt_seq_next(void)
+{
+    assert(state.seq < XENPT_SEQ_INITIALIZED);
+
+    switch (++state.seq) {
+    case XENPT_SEQ_XENCTRL_OPEN:
+        DBG(">XENCTRL_OPEN\n");
+        break;
+
+    case XENPT_SEQ_XENEVTCHN_OPEN:
+        DBG(">XENEVTCHN_OPEN\n");
+        break;
+
+    case XENPT_SEQ_XENFOREIGNMEMORY_OPEN:
+        DBG(">XENFOREIGNMEMORY_OPEN\n");
+        break;
+
+    case XENPT_SEQ_XENDEVICEMODEL_OPEN:
+        DBG(">XENDEVICEMODEL_OPEN\n");
+        break;
+
+    case XENPT_SEQ_SERVER_REGISTERED:
+        DBG(">SERVER_REGISTERED\n");
+        DBG("ioservid = %u\n", state.ioservid);
+        break;
+
+    case XENPT_SEQ_RESOURCE_MAPPED:
+        DBG(">RESOURCE_MAPPED\n");
+        DBG("shared iopage = %p\n", state.shared_iopage);
+        DBG("buffered_iopage = %p\n", state.buffered_iopage);
+        break;
+
+    case XENPT_SEQ_SERVER_ENABLED:
+        DBG(">SERVER_ENABLED\n");
+        break;
+
+    case XENPT_SEQ_PORT_ARRAY_ALLOCATED:
+        DBG(">PORT_ARRAY_ALLOCATED\n");
+        break;
+
+    case XENPT_SEQ_PORTS_BOUND: {
+        int i;
+
+        DBG(">EVTCHN_PORTS_BOUND\n");
+
+        for (i = 0; i < state.vcpus; i++)
+            DBG("VCPU%d: %u -> %u\n", i,
+                state.shared_iopage->vcpu_ioreq[i].vp_eport,
+                state.ioreq_local_port[i]);
+
+        break;
+    }
+
+    case XENPT_SEQ_BUF_PORT_BOUND:
+        DBG(">EVTCHN_BUF_PORT_BOUND\n");
+
+        DBG("%u -> %u\n",
+            state.buf_ioreq_port,
+            state.buf_ioreq_local_port);
+        break;
+
+    case XENPT_SEQ_HOST_PCI_INITIALIZED:
+        DBG(">HOST_PCI_INITIALIZED\n");
+        break;
+
+    case XENPT_SEQ_GUEST_PCI_INITIALIZED:
+        DBG(">GUEST_PCI_INITIALIZED\n");
+        break;
+
+    case XENPT_SEQ_HOST_PCI_PROBED_CAPABILITIES:
+        DBG(">HOST_PCI_PROBED_CAPABILITIES\n");
+        break;
+
+    case XENPT_SEQ_INITIALIZED:
+        DBG(">INITIALIZED\n");
+        break;
+
+    default:
+        assert(false);
+        break;
+    }
+}
+
+static void
+xenpt_teardown(void)
+{
+    if (state.seq >= XENPT_SEQ_INITIALIZED)
+        DBG("<INITIALIZED\n");
+
+    if (state.seq >= XENPT_SEQ_HOST_PCI_PROBED_CAPABILITIES)
+        DBG("<HOST_PCI_PROBED_CAPABILITIES\n");
+
+    if (state.seq >= XENPT_SEQ_GUEST_PCI_INITIALIZED) {
+        DBG("<GUEST_PCI_INITIALIZED\n");
+        guest_pci_teardown();
+    }
+
+    if (state.seq >= XENPT_SEQ_HOST_PCI_INITIALIZED) {
+        DBG("<HOST_PCI_INITIALIZED\n");
+        host_pci_teardown();
+    }
+
+    if (state.seq >= XENPT_SEQ_BUF_PORT_BOUND) {
+        evtchn_port_t port = state.buf_ioreq_local_port;
+
+        DBG("<EVTCHN_BUF_PORT_BOUND\n");
+
+        DBG("%u\n", port);
+        (void) xenevtchn_unbind(state.xeh, port);
+    }
+
+    if (state.seq >= XENPT_SEQ_PORTS_BOUND)
+        DBG("<EVTCHN_PORTS_BOUND\n");
+
+    if (state.seq >= XENPT_SEQ_PORT_ARRAY_ALLOCATED) {
+        unsigned int i;
+
+        DBG("<PORT_ARRAY_ALLOCATED\n");
+
+        for (i = 0; i < state.vcpus; i++) {
+            evtchn_port_t port;
+
+            port = state.ioreq_local_port[i];
+
+            if (port >= 0) {
+                DBG("VCPU%d: %u\n", i, port);
+                (void) xenevtchn_unbind(state.xeh, port);
+            }
+        }
+
+        free(state.ioreq_local_port);
+    }
+
+    if (state.seq >= XENPT_SEQ_SERVER_ENABLED) {
+        DBG("<SERVER_ENABLED\n");
+        (void) xendevicemodel_set_ioreq_server_state(state.xdh,
+                                                     state.domid,
+                                                     state.ioservid,
+                                                     0);
+    }
+
+    if (state.seq >= XENPT_SEQ_RESOURCE_MAPPED) {
+        DBG("<RESOURCE_MAPPED\n");
+        xenforeignmemory_unmap_resource(state.xfh,
+                                        state.resource);
+    }
+
+    if (state.seq >= XENPT_SEQ_SERVER_REGISTERED) {
+        DBG("<SERVER_REGISTERED\n");
+        (void) xendevicemodel_destroy_ioreq_server(state.xdh,
+                                                   state.domid,
+                                                   state.ioservid);
+    }
+
+    if (state.seq >= XENPT_SEQ_XENDEVICEMODEL_OPEN) {
+        DBG("<XENDEVICEMODEL_OPEN\n");
+        xendevicemodel_close(state.xdh);
+    }
+
+    if (state.seq >= XENPT_SEQ_XENFOREIGNMEMORY_OPEN) {
+        DBG("<XENFOREIGNMEMORY_OPEN\n");
+        xenforeignmemory_close(state.xfh);
+    }
+
+    if (state.seq >= XENPT_SEQ_XENEVTCHN_OPEN) {
+        DBG("<XENEVTCHN_OPEN\n");
+        xenevtchn_close(state.xeh);
+    }
+
+    if (state.seq >= XENPT_SEQ_XENCTRL_OPEN) {
+        DBG("<XENCTRL_OPEN\n");
+        xc_interface_close(state.xch);
+    }
+}
+
+static struct sigaction sigterm_handler;
+
+static void
+xenpt_sigterm(int num)
+{
+    DBG("%s\n", strsignal(num));
+
+    xenpt_teardown();
+
+    exit(0);
+}
+
+static struct sigaction sigusr1_handler;
+
+static void
+xenpt_sigusr1(int num)
+{
+    DBG("%s\n", strsignal(num));
+
+    sigaction(SIGHUP, &sigusr1_handler, NULL);
+
+    guest_pci_config_dump();
+}
+
+static int
+xenpt_initialize(domid_t domid, const char *host_sbdf,
+                const char *guest_sbdf)
+{
+    int rc;
+    xc_dominfo_t dominfo;
+    void *addr;
+    evtchn_port_t port;
+    evtchn_port_t buf_port;
+    int i;
+
+    state.domid = domid;
+
+    state.xch = xc_interface_open(NULL, NULL, 0);
+    if (state.xch == NULL)
+        goto fail1;
+
+    xenpt_seq_next();
+
+    state.xeh = xenevtchn_open(NULL, 0);
+    if (state.xeh == NULL)
+        goto fail2;
+
+    xenpt_seq_next();
+
+    state.xfh = xenforeignmemory_open(NULL, 0);
+    if (state.xfh == NULL)
+        goto fail3;
+
+    xenpt_seq_next();
+
+    state.xdh = xendevicemodel_open(NULL, 0);
+    if (state.xdh == NULL)
+        goto fail4;
+
+    xenpt_seq_next();
+
+    rc = xc_domain_getinfo(state.xch, state.domid, 1, &dominfo);
+    if (rc < 0 || dominfo.domid != state.domid)
+        goto fail5;
+
+    state.vcpus = dominfo.max_vcpu_id + 1;
+
+    DBG("%d vCPU(s)\n", state.vcpus);
+
+    rc = xendevicemodel_create_ioreq_server(state.xdh,
+                                            state.domid, 1,
+                                            &state.ioservid);
+    if (rc < 0)
+        goto fail6;
+
+    xenpt_seq_next();
+
+    addr = NULL;
+    state.resource =
+        xenforeignmemory_map_resource(state.xfh, state.domid,
+                                      XENMEM_resource_ioreq_server,
+                                      state.ioservid, 0, 2,
+                                      &addr,
+                                      PROT_READ | PROT_WRITE, 0);
+    if (state.resource == NULL)
+        goto fail7;
+
+    state.buffered_iopage = addr;
+    state.shared_iopage = addr + XC_PAGE_SIZE;
+
+    rc = xendevicemodel_get_ioreq_server_info(state.xdh,
+                                              state.domid,
+                                              state.ioservid, NULL,
+                                              NULL, &buf_port);
+    if (rc < 0)
+        goto fail8;
+
+    xenpt_seq_next();
+
+    rc = xendevicemodel_set_ioreq_server_state(state.xdh,
+                                               state.domid,
+                                               state.ioservid,
+                                               1);
+    if (rc != 0)
+        goto fail9;
+
+    xenpt_seq_next();
+
+    state.ioreq_local_port = malloc(sizeof (evtchn_port_t) *
+                                    state.vcpus);
+    if (state.ioreq_local_port == NULL)
+        goto fail10;
+
+    for (i = 0; i < state.vcpus; i++)
+        state.ioreq_local_port[i] = -1;
+
+    xenpt_seq_next();
+
+    for (i = 0; i < state.vcpus; i++) {
+        port = state.shared_iopage->vcpu_ioreq[i].vp_eport;
+
+        rc = xenevtchn_bind_interdomain(state.xeh, state.domid,
+                                        port);
+        if (rc < 0)
+            goto fail11;
+
+        state.ioreq_local_port[i] = rc;
+    }
+
+    xenpt_seq_next();
+
+    rc = xenevtchn_bind_interdomain(state.xeh, state.domid,
+                                    buf_port);
+    if (rc < 0)
+        goto fail12;
+
+    state.buf_ioreq_local_port = rc;
+
+    xenpt_seq_next();
+
+    rc = host_pci_initialize(state.xch, state.domid, host_sbdf);
+    if (rc < 0)
+        goto fail13;
+
+    xenpt_seq_next();
+
+    rc = guest_pci_initialize(state.xdh, state.domid,
+                              state.ioservid, guest_sbdf);
+    if (rc < 0)
+        goto fail14;
+
+    xenpt_seq_next();
+
+    host_pci_probe_capabilities();
+
+    xenpt_seq_next();
+
+    xenpt_seq_next();
+
+    assert(state.seq == XENPT_SEQ_INITIALIZED);
+    return 0;
+
+fail14:
+    DBG("fail14\n");
+
+fail13:
+    DBG("fail13\n");
+
+fail12:
+    DBG("fail12\n");
+
+fail11:
+    DBG("fail11\n");
+
+fail10:
+    DBG("fail10\n");
+
+fail9:
+    DBG("fail9\n");
+
+fail8:
+    DBG("fail8\n");
+
+fail7:
+    DBG("fail7\n");
+
+fail6:
+    DBG("fail6\n");
+
+fail5:
+    DBG("fail5\n");
+
+fail4:
+    DBG("fail4\n");
+
+fail3:
+    DBG("fail3\n");
+
+fail2:
+    DBG("fail2\n");
+
+fail1:
+    DBG("fail1\n");
+
+    warn("fail");
+    return -1;
+}
+
+static void
+xenpt_poll_buffered_iopage(void)
+{
+    if (state.seq != XENPT_SEQ_INITIALIZED)
+        return;
+
+    for (;;) {
+        unsigned int read_pointer;
+        unsigned int write_pointer;
+
+        read_pointer = state.buffered_iopage->read_pointer;
+        write_pointer = state.buffered_iopage->write_pointer;
+
+        if (read_pointer == write_pointer)
+            break;
+
+        while (read_pointer != write_pointer) {
+            unsigned int slot;
+            buf_ioreq_t *buf_ioreq;
+            ioreq_t ioreq;
+
+            slot = read_pointer % IOREQ_BUFFER_SLOT_NUM;
+
+            buf_ioreq = &state.buffered_iopage->buf_ioreq[slot];
+
+            ioreq.size = 1UL << buf_ioreq->size;
+            ioreq.count = 1;
+            ioreq.addr = buf_ioreq->addr;
+            ioreq.data = buf_ioreq->data;
+            ioreq.state = STATE_IOREQ_READY;
+            ioreq.dir = buf_ioreq->dir;
+            ioreq.df = 1;
+            ioreq.type = buf_ioreq->type;
+            ioreq.data_is_ptr = 0;
+
+            read_pointer++;
+
+            if (ioreq.size == 8) {
+                slot = read_pointer % IOREQ_BUFFER_SLOT_NUM;
+                buf_ioreq = &state.buffered_iopage->buf_ioreq[slot];
+
+                ioreq.data |= ((uint64_t)buf_ioreq->data) << 32;
+
+                read_pointer++;
+            }
+
+            handle_ioreq(&ioreq);
+            mb();
+        }
+
+        state.buffered_iopage->read_pointer = read_pointer;
+        mb();
+    }
+}
+
+static void
+xenpt_poll_shared_iopage(unsigned int i)
+{
+    ioreq_t *ioreq;
+
+    if (state.seq != XENPT_SEQ_INITIALIZED)
+        return;
+
+    ioreq = &state.shared_iopage->vcpu_ioreq[i];
+    if (ioreq->state != STATE_IOREQ_READY)
+        return;
+
+    mb();
+
+    ioreq->state = STATE_IOREQ_INPROCESS;
+
+    handle_ioreq(ioreq);
+    mb();
+
+    ioreq->state = STATE_IORESP_READY;
+    mb();
+
+    xenevtchn_notify(state.xeh, state.ioreq_local_port[i]);
+}
+
+static void
+xenpt_poll_iopages(void)
+{
+    evtchn_port_t port;
+    int i;
+
+    if (state.seq != XENPT_SEQ_INITIALIZED)
+        return;
+
+    port = xenevtchn_pending(state.xeh);
+    if (port < 0)
+        return;
+
+    if (port == state.buf_ioreq_local_port) {
+        xenevtchn_unmask(state.xeh, port);
+        xenpt_poll_buffered_iopage();
+    } else {
+        for (i = 0; i < state.vcpus; i++) {
+            if (port == state.ioreq_local_port[i]) {
+                xenevtchn_unmask(state.xeh, port);
+                xenpt_poll_shared_iopage(i);
+            }
+        }
+    }
+}
+
+int
+main(int argc, char **argv, char **envp)
+{
+    char *domain_str;
+    char *host_sbdf_str;
+    char *guest_sbdf_str;
+    int index;
+    char *end;
+    domid_t domid;
+    sigset_t block;
+    struct pollfd pfd;
+    int rc;
+
+    prog = basename(argv[0]);
+
+    domain_str = NULL;
+    host_sbdf_str = NULL;
+    guest_sbdf_str = NULL;
+
+    for (;;) {
+        char    c;
+
+        c = getopt_long(argc, argv, "", xenpt_option, &index);
+        if (c == -1)
+            break;
+
+        if (c != 0) {
+            usage();
+            /*NOTREACHED*/
+        }
+
+        DBG("--%s = '%s'\n", xenpt_option[index].name, optarg);
+
+        switch (index) {
+        case XENPT_OPT_DOMAIN:
+            domain_str = optarg;
+            break;
+
+        case XENPT_OPT_HOST_SBDF:
+            host_sbdf_str = optarg;
+            break;
+
+        case XENPT_OPT_GUEST_SBDF:
+            guest_sbdf_str = optarg;
+            break;
+
+        default:
+            assert(false);
+            break;
+        }
+    }
+
+    if (domain_str == NULL ||
+        host_sbdf_str == NULL ||
+        guest_sbdf_str == NULL) {
+        usage();
+        /*NOTREACHED*/
+    }
+
+    domid = (domid_t)strtol(domain_str, &end, 0);
+    if (*end != '\0') {
+        fprintf(stderr, "invalid domain '%s'\n", domain_str);
+        exit(1);
+    }
+
+    sigfillset(&block);
+
+    memset(&sigterm_handler, 0, sizeof (struct sigaction));
+    sigterm_handler.sa_handler = xenpt_sigterm;
+
+    sigaction(SIGTERM, &sigterm_handler, NULL);
+    sigdelset(&block, SIGTERM);
+
+    sigaction(SIGINT, &sigterm_handler, NULL);
+    sigdelset(&block, SIGINT);
+
+    sigaction(SIGHUP, &sigterm_handler, NULL);
+    sigdelset(&block, SIGHUP);
+
+    sigaction(SIGABRT, &sigterm_handler, NULL);
+    sigdelset(&block, SIGABRT);
+
+    memset(&sigusr1_handler, 0, sizeof (struct sigaction));
+    sigusr1_handler.sa_handler = xenpt_sigusr1;
+
+    sigaction(SIGUSR1, &sigusr1_handler, NULL);
+    sigdelset(&block, SIGUSR1);
+
+    sigprocmask(SIG_BLOCK, &block, NULL);
+
+    rc = xenpt_initialize(domid, host_sbdf_str, guest_sbdf_str);
+    if (rc < 0) {
+        xenpt_teardown();
+        exit(1);
+    }
+
+    guest_pci_config_dump();
+
+    pfd.fd = xenevtchn_fd(state.xeh);
+    pfd.events = POLLIN | POLLERR | POLLHUP;
+    pfd.revents = 0;
+
+    for (;;) {
+        rc = poll(&pfd, 1, 5000);
+
+        if (rc > 0 && pfd.revents & POLLIN)
+            xenpt_poll_iopages();
+
+        if (rc < 0 && errno != EINTR)
+            break;
+    }
+
+    return 0;
+}
+
+int xenpt_parse_sbdf(const char *sbdf, uint16_t *segment, uint8_t *bus,
+                    uint8_t *device, uint8_t *function)
+{
+    unsigned int val[4];
+    int rc;
+
+    rc = sscanf(sbdf, "%x:%x:%x.%x", &val[0], &val[1], &val[2], &val[3]);
+    if (rc != 4) {
+        val[0] = 0;
+
+        rc = sscanf(sbdf, "%x:%x.%x", &val[1], &val[2], &val[3]);
+        if (rc != 3) {
+            errno = EINVAL;
+            return -1;
+        }
+    }
+
+    *segment = val[0];
+    *bus = val[1];
+    *device = val[2];
+    *function = val[3];
+
+    return 0;
+}
+
+const char *xenpt_grp_name(uint8_t id)
+{
+#define ID_NAME(_id) \
+    case PCI_CAP_ID_ ## _id: return #_id
+
+    switch (id) {
+    ID_NAME(STD_HEADER);
+    ID_NAME(PM);
+    ID_NAME(AGP);
+    ID_NAME(VPD);
+    ID_NAME(SLOTID);
+    ID_NAME(MSI);
+    ID_NAME(CHSWP);
+    ID_NAME(PCIX);
+    ID_NAME(HT);
+    ID_NAME(VNDR);
+    ID_NAME(DBG);
+    ID_NAME(CCRC);
+    ID_NAME(SHPC);
+    ID_NAME(SSVID);
+    ID_NAME(AGP3);
+    ID_NAME(SECDEV);
+    ID_NAME(EXP);
+    ID_NAME(MSIX);
+    ID_NAME(SATA);
+    ID_NAME(AF);
+    ID_NAME(EA);
+    default:
+        break;
+    }
+
+    return "UNKNOWN";
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * c-tab-always-indent: nil
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xenpt.h b/xenpt.h
new file mode 100644 (file)
index 0000000..aa19b2f
--- /dev/null
+++ b/xenpt.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2012, Citrix Systems Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef  _XENPT_H
+#define  _XENPT_H
+
+int xenpt_parse_sbdf(const char *sbdf, uint16_t *segment, uint8_t *bus,
+                     uint8_t *device, uint8_t *function);
+
+#define PCI_CAP_ID_STD_HEADER 0x00
+#define PCI_CONFIG_SIZEOF 0x100
+
+const char *xenpt_grp_name(uint8_t id);
+
+#endif  /* _XENPT_H */