vpci/msix: add MSI-X handlers

author Roger Pau Monne <roger.pau@citrix.com>

Thu, 22 Mar 2018 14:00:00 +0000 (15:00 +0100)

committer Jan Beulich <jbeulich@suse.com>

Fri, 23 Mar 2018 09:26:06 +0000 (10:26 +0100)
author Roger Pau Monne <roger.pau@citrix.com>
Thu, 22 Mar 2018 14:00:00 +0000 (15:00 +0100)
committer Jan Beulich <jbeulich@suse.com>
Fri, 23 Mar 2018 09:26:06 +0000 (10:26 +0100)
diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c

index 259814d95df884bf85abe05fcc1baf10af6ee0a0..d3f65eadbe45733045e2890dd2ca2402bdb67514 100644 (file)
--- a/xen/arch/x86/hvm/dom0_build.c
+++ b/xen/arch/x86/hvm/dom0_build.c
@@ -1117,7 +1117,7 @@ int __init dom0_construct_pvh(struct domain *d, const module_t *image,
  
      pvh_setup_mmcfg(d);
  
-    panic("Building a PVHv2 Dom0 is not yet supported.");
+    printk("WARNING: PVH is an experimental mode with limited functionality\n");
      return 0;
  }
  
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c

index 346e11f2d692e381d3f865058dfbe0617021c9e8..569b124603acd5c519813fad8f6e7ff59a8aa3fa 100644 (file)
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -588,6 +588,7 @@ int hvm_domain_initialise(struct domain *d)
      INIT_LIST_HEAD(&d->arch.hvm_domain.write_map.list);
      INIT_LIST_HEAD(&d->arch.hvm_domain.g2m_ioport_list);
      INIT_LIST_HEAD(&d->arch.hvm_domain.mmcfg_regions);
+    INIT_LIST_HEAD(&d->arch.hvm_domain.msix_tables);
  
      rc = create_perdomain_mapping(d, PERDOMAIN_VIRT_START, 0, NULL, NULL);
      if ( rc )
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c

index be59c56d436f315e1801c13c4ab6ddd9b5a58b13..c31d27c389833bb3880fcb027166045a8d53101b 100644 (file)
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -30,6 +30,7 @@
  #include <xen/lib.h>
  #include <xen/errno.h>
  #include <xen/sched.h>
+#include <xen/softirq.h>
  #include <xen/irq.h>
  #include <xen/vpci.h>
  #include <public/hvm/ioreq.h>
@@ -644,13 +645,10 @@ static unsigned int msi_gflags(uint16_t data, uint64_t addr, bool masked)
             (masked ? 0 : XEN_DOMCTL_VMSI_X86_UNMASKED);
  }
  
-void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
-                        unsigned int entry, bool mask)
+static void vpci_mask_pirq(struct domain *d, int pirq, bool mask)
  {
      unsigned long flags;
-    struct irq_desc *desc = domain_spin_lock_irq_desc(pdev->domain,
-                                                      msi->arch.pirq + entry,
-                                                      &flags);
+    struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
  
      if ( !desc )
          return;
@@ -658,23 +656,31 @@ void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
      spin_unlock_irqrestore(&desc->lock, flags);
  }
  
-int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
-                         unsigned int vectors)
+void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
+                        unsigned int entry, bool mask)
+{
+    vpci_mask_pirq(pdev->domain, msi->arch.pirq + entry, mask);
+}
+
+static int vpci_msi_enable(const struct pci_dev *pdev, uint32_t data,
+                           uint64_t address, unsigned int nr,
+                           paddr_t table_base, uint32_t mask)
  {
      struct msi_info msi_info = {
          .seg = pdev->seg,
          .bus = pdev->bus,
          .devfn = pdev->devfn,
-        .entry_nr = vectors,
+        .table_base = table_base,
+        .entry_nr = nr,
      };
-    unsigned int i;
-    int rc;
-
-    ASSERT(msi->arch.pirq == INVALID_PIRQ);
+    unsigned int i, vectors = table_base ? 1 : nr;
+    int rc, pirq = INVALID_PIRQ;
  
      /* Get a PIRQ. */
-    rc = allocate_and_map_msi_pirq(pdev->domain, -1, &msi->arch.pirq,
-                                   MAP_PIRQ_TYPE_MULTI_MSI, &msi_info);
+    rc = allocate_and_map_msi_pirq(pdev->domain, -1, &pirq,
+                                   table_base ? MAP_PIRQ_TYPE_MSI
+                                              : MAP_PIRQ_TYPE_MULTI_MSI,
+                                   &msi_info);
      if ( rc )
      {
          gdprintk(XENLOG_ERR, "%04x:%02x:%02x.%u: failed to map PIRQ: %d\n",
@@ -685,15 +691,14 @@ int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
  
      for ( i = 0; i < vectors; i++ )
      {
-        uint8_t vector = MASK_EXTR(msi->data, MSI_DATA_VECTOR_MASK);
-        uint8_t vector_mask = 0xff >> (8 - fls(msi->vectors) + 1);
+        uint8_t vector = MASK_EXTR(data, MSI_DATA_VECTOR_MASK);
+        uint8_t vector_mask = 0xff >> (8 - fls(vectors) + 1);
          struct xen_domctl_bind_pt_irq bind = {
-            .machine_irq = msi->arch.pirq + i,
+            .machine_irq = pirq + i,
              .irq_type = PT_IRQ_TYPE_MSI,
              .u.msi.gvec = (vector & ~vector_mask) |
                            ((vector + i) & vector_mask),
-            .u.msi.gflags = msi_gflags(msi->data, msi->address,
-                                       (msi->mask >> i) & 1),
+            .u.msi.gflags = msi_gflags(data, address, (mask >> i) & 1),
          };
  
          pcidevs_lock();
@@ -703,33 +708,49 @@ int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
              gdprintk(XENLOG_ERR,
                       "%04x:%02x:%02x.%u: failed to bind PIRQ %u: %d\n",
                       pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
-                     PCI_FUNC(pdev->devfn), msi->arch.pirq + i, rc);
+                     PCI_FUNC(pdev->devfn), pirq + i, rc);
              while ( bind.machine_irq-- )
                  pt_irq_destroy_bind(pdev->domain, &bind);
              spin_lock(&pdev->domain->event_lock);
-            unmap_domain_pirq(pdev->domain, msi->arch.pirq);
+            unmap_domain_pirq(pdev->domain, pirq);
              spin_unlock(&pdev->domain->event_lock);
              pcidevs_unlock();
-            msi->arch.pirq = INVALID_PIRQ;
              return rc;
          }
          pcidevs_unlock();
      }
  
-    return 0;
+    return pirq;
  }
  
-void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
+int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
+                         unsigned int vectors)
+{
+    int rc;
+
+    ASSERT(msi->arch.pirq == INVALID_PIRQ);
+    rc = vpci_msi_enable(pdev, msi->data, msi->address, vectors, 0, msi->mask);
+    if ( rc >= 0 )
+    {
+        msi->arch.pirq = rc;
+        rc = 0;
+    }
+
+    return rc;
+}
+
+static void vpci_msi_disable(const struct pci_dev *pdev, int pirq,
+                             unsigned int nr)
  {
      unsigned int i;
  
-    ASSERT(msi->arch.pirq != INVALID_PIRQ);
+    ASSERT(pirq != INVALID_PIRQ);
  
      pcidevs_lock();
-    for ( i = 0; i < msi->vectors; i++ )
+    for ( i = 0; i < nr; i++ )
      {
          struct xen_domctl_bind_pt_irq bind = {
-            .machine_irq = msi->arch.pirq + i,
+            .machine_irq = pirq + i,
              .irq_type = PT_IRQ_TYPE_MSI,
          };
          int rc;
@@ -739,10 +760,14 @@ void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
      }
  
      spin_lock(&pdev->domain->event_lock);
-    unmap_domain_pirq(pdev->domain, msi->arch.pirq);
+    unmap_domain_pirq(pdev->domain, pirq);
      spin_unlock(&pdev->domain->event_lock);
      pcidevs_unlock();
+}
  
+void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
+{
+    vpci_msi_disable(pdev, msi->arch.pirq, msi->vectors);
      msi->arch.pirq = INVALID_PIRQ;
  }
  
@@ -763,3 +788,82 @@ void vpci_msi_arch_print(const struct vpci_msi *msi)
             MASK_EXTR(msi->address, MSI_ADDR_DEST_ID_MASK),
             msi->arch.pirq);
  }
+
+void vpci_msix_arch_mask_entry(struct vpci_msix_entry *entry,
+                               const struct pci_dev *pdev, bool mask)
+{
+    ASSERT(entry->arch.pirq != INVALID_PIRQ);
+    vpci_mask_pirq(pdev->domain, entry->arch.pirq, mask);
+}
+
+int vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
+                                const struct pci_dev *pdev, paddr_t table_base)
+{
+    int rc;
+
+    ASSERT(entry->arch.pirq == INVALID_PIRQ);
+    rc = vpci_msi_enable(pdev, entry->data, entry->addr,
+                         vmsix_entry_nr(pdev->vpci->msix, entry),
+                         table_base, entry->masked);
+    if ( rc >= 0 )
+    {
+        entry->arch.pirq = rc;
+        rc = 0;
+    }
+
+    return rc;
+}
+
+int vpci_msix_arch_disable_entry(struct vpci_msix_entry *entry,
+                                 const struct pci_dev *pdev)
+{
+    if ( entry->arch.pirq == INVALID_PIRQ )
+        return -ENOENT;
+
+    vpci_msi_disable(pdev, entry->arch.pirq, 1);
+    entry->arch.pirq = INVALID_PIRQ;
+
+    return 0;
+}
+
+void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry)
+{
+    entry->arch.pirq = INVALID_PIRQ;
+}
+
+int vpci_msix_arch_print(const struct vpci_msix *msix)
+{
+    unsigned int i;
+
+    for ( i = 0; i < msix->max_entries; i++ )
+    {
+        const struct vpci_msix_entry *entry = &msix->entries[i];
+
+        printk("%6u vec=%02x%7s%6s%3sassert%5s%7s dest_id=%lu mask=%u pirq: %d\n",
+               i, MASK_EXTR(entry->data, MSI_DATA_VECTOR_MASK),
+               entry->data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
+               entry->data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
+               entry->data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
+               entry->addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
+               entry->addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "fixed",
+               MASK_EXTR(entry->addr, MSI_ADDR_DEST_ID_MASK),
+               entry->masked, entry->arch.pirq);
+        if ( i && !(i % 64) )
+        {
+            struct pci_dev *pdev = msix->pdev;
+
+            spin_unlock(&msix->pdev->vpci->lock);
+            process_pending_softirqs();
+            /* NB: we assume that pdev cannot go away for an alive domain. */
+            if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
+                return -EBUSY;
+            if ( pdev->vpci->msix != msix )
+            {
+                spin_unlock(&pdev->vpci->lock);
+                return -EAGAIN;
+            }
+        }
+    }
+
+    return 0;
+}
diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile

index 62cec9e82b2b4d18604db6cd5b52e6e0a27547b0..55d1bdfda02c9fc63b93ab5bd6407e336bf5b6d1 100644 (file)
--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1 +1 @@
-obj-y += vpci.o header.o msi.o
+obj-y += vpci.o header.o msi.o msix.o
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c

index 9fa07992cc9915aa2ed059032d3cf766d7510d08..91a71ca66e123acde991e27640772d5450ecff05 100644 (file)
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -190,6 +190,7 @@ static int modify_bars(const struct pci_dev *pdev, bool map, bool rom_only)
      struct vpci_header *header = &pdev->vpci->header;
      struct rangeset *mem = rangeset_new(NULL, NULL, 0);
      struct pci_dev *tmp, *dev = NULL;
+    const struct vpci_msix *msix = pdev->vpci->msix;
      unsigned int i;
      int rc;
  
@@ -226,6 +227,24 @@ static int modify_bars(const struct pci_dev *pdev, bool map, bool rom_only)
          }
      }
  
+    /* Remove any MSIX regions if present. */
+    for ( i = 0; msix && i < ARRAY_SIZE(msix->tables); i++ )
+    {
+        unsigned long start = PFN_DOWN(vmsix_table_addr(pdev->vpci, i));
+        unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
+                                     vmsix_table_size(pdev->vpci, i) - 1);
+
+        rc = rangeset_remove_range(mem, start, end);
+        if ( rc )
+        {
+            printk(XENLOG_G_WARNING
+                   "Failed to remove MSIX table [%lx, %lx]: %d\n",
+                   start, end, rc);
+            rangeset_destroy(mem);
+            return rc;
+        }
+    }
+
      /*
       * Check for overlaps with other BARs. Note that only BARs that are
       * currently mapped (enabled) are checked for overlaps.
diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c

index de4ddf562e93c035101d0c4ed581fc8c91356b12..ad26c38a92be2310a9f9ce7aaa2a1e0e4dd541a7 100644 (file)
--- a/xen/drivers/vpci/msi.c
+++ b/xen/drivers/vpci/msi.c
@@ -281,11 +281,12 @@ void vpci_dump_msi(void)
          if ( !has_vpci(d) )
              continue;
  
-        printk("vPCI MSI d%d\n", d->domain_id);
+        printk("vPCI MSI/MSI-X d%d\n", d->domain_id);
  
          list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
          {
              const struct vpci_msi *msi;
+            const struct vpci_msix *msix;
  
              if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
                  continue;
@@ -306,6 +307,30 @@ void vpci_dump_msi(void)
                  vpci_msi_arch_print(msi);
              }
  
+            msix = pdev->vpci->msix;
+            if ( msix && msix->enabled )
+            {
+                int rc;
+
+                printk("%04x:%02x:%02x.%u MSI-X\n", pdev->seg, pdev->bus,
+                       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+                printk("  entries: %u maskall: %d enabled: %d\n",
+                       msix->max_entries, msix->masked, msix->enabled);
+
+                rc = vpci_msix_arch_print(msix);
+                if ( rc )
+                {
+                    /*
+                     * On error vpci_msix_arch_print will always return without
+                     * holding the lock.
+                     */
+                    printk("unable to print all MSI-X entries: %d\n", rc);
+                    process_pending_softirqs();
+                    continue;
+                }
+            }
+
              spin_unlock(&pdev->vpci->lock);
              process_pending_softirqs();
          }
diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c

new file mode 100644 (file)

index 0000000..3b378c2
--- /dev/null
+++ b/xen/drivers/vpci/msix.c
@@ -0,0 +1,458 @@
+/*
+ * Handlers for accesses to the MSI-X capability structure and the memory
+ * region.
+ *
+ * Copyright (C) 2017 Citrix Systems R&D
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/sched.h>
+#include <xen/vpci.h>
+
+#include <asm/msi.h>
+
+#define VMSIX_SIZE(num) offsetof(struct vpci_msix, entries[num])
+
+#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr)                               \
+    ((addr) >= vmsix_table_addr(vpci, nr) &&                              \
+     (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
+
+static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
+                             void *data)
+{
+    const struct vpci_msix *msix = data;
+
+    return (msix->max_entries - 1) |
+           (msix->enabled ? PCI_MSIX_FLAGS_ENABLE : 0) |
+           (msix->masked ? PCI_MSIX_FLAGS_MASKALL : 0);
+}
+
+static int update_entry(struct vpci_msix_entry *entry,
+                        const struct pci_dev *pdev, unsigned int nr)
+{
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    int rc = vpci_msix_arch_disable_entry(entry, pdev);
+
+    /* Ignore ENOENT, it means the entry wasn't setup. */
+    if ( rc && rc != -ENOENT )
+    {
+        gprintk(XENLOG_WARNING,
+                "%04x:%02x:%02x.%u: unable to disable entry %u for update: %d\n",
+                pdev->seg, pdev->bus, slot, func, nr, rc);
+        return rc;
+    }
+
+    rc = vpci_msix_arch_enable_entry(entry, pdev,
+                                     vmsix_table_base(pdev->vpci,
+                                                      VPCI_MSIX_TABLE));
+    if ( rc )
+    {
+        gprintk(XENLOG_WARNING,
+                "%04x:%02x:%02x.%u: unable to enable entry %u: %d\n",
+                pdev->seg, pdev->bus, slot, func, nr, rc);
+        /* Entry is likely not properly configured. */
+        return rc;
+    }
+
+    return 0;
+}
+
+static void control_write(const struct pci_dev *pdev, unsigned int reg,
+                          uint32_t val, void *data)
+{
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    struct vpci_msix *msix = data;
+    bool new_masked = val & PCI_MSIX_FLAGS_MASKALL;
+    bool new_enabled = val & PCI_MSIX_FLAGS_ENABLE;
+    unsigned int i;
+    int rc;
+
+    if ( new_masked == msix->masked && new_enabled == msix->enabled )
+        return;
+
+    /*
+     * According to the PCI 3.0 specification, switching the enable bit to 1
+     * or the function mask bit to 0 should cause all the cached addresses
+     * and data fields to be recalculated.
+     *
+     * In order to avoid the overhead of disabling and enabling all the
+     * entries every time the guest sets the maskall bit, Xen will only
+     * perform the disable and enable sequence when the guest has written to
+     * the entry.
+     */
+    if ( new_enabled && !new_masked && (!msix->enabled || msix->masked) )
+    {
+        for ( i = 0; i < msix->max_entries; i++ )
+        {
+            if ( msix->entries[i].masked || !msix->entries[i].updated ||
+                 update_entry(&msix->entries[i], pdev, i) )
+                continue;
+
+            msix->entries[i].updated = false;
+        }
+    }
+    else if ( !new_enabled && msix->enabled )
+    {
+        /* Guest has disabled MSIX, disable all entries. */
+        for ( i = 0; i < msix->max_entries; i++ )
+        {
+            /*
+             * NB: vpci_msix_arch_disable can be called for entries that are
+             * not setup, it will return -ENOENT in that case.
+             */
+            rc = vpci_msix_arch_disable_entry(&msix->entries[i], pdev);
+            switch ( rc )
+            {
+            case 0:
+                /*
+                 * Mark the entry successfully disabled as updated, so that on
+                 * the next enable the entry is properly setup. This is done
+                 * so that the following flow works correctly:
+                 *
+                 * mask entry -> disable MSIX -> enable MSIX -> unmask entry
+                 *
+                 * Without setting 'updated', the 'unmask entry' step will fail
+                 * because the entry has not been updated, so it would not be
+                 * mapped/bound at all.
+                 */
+                msix->entries[i].updated = true;
+                break;
+            case -ENOENT:
+                /* Ignore non-present entry. */
+                break;
+            default:
+                gprintk(XENLOG_WARNING,
+                        "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n",
+                        pdev->seg, pdev->bus, slot, func, i, rc);
+                return;
+            }
+        }
+    }
+
+    msix->masked = new_masked;
+    msix->enabled = new_enabled;
+
+    val = control_read(pdev, reg, data);
+    if ( pci_msi_conf_write_intercept(msix->pdev, reg, 2, &val) >= 0 )
+        pci_conf_write16(pdev->seg, pdev->bus, slot, func, reg, val);
+}
+
+static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
+{
+    struct vpci_msix *msix;
+
+    list_for_each_entry ( msix, &d->arch.hvm_domain.msix_tables, next )
+    {
+        const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
+        unsigned int i;
+
+        for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
+            if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
+                 VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
+                return msix;
+    }
+
+    return NULL;
+}
+
+static int msix_accept(struct vcpu *v, unsigned long addr)
+{
+    return !!msix_find(v->domain, addr);
+}
+
+static bool access_allowed(const struct pci_dev *pdev, unsigned long addr,
+                           unsigned int len)
+{
+    /* Only allow aligned 32/64b accesses. */
+    if ( (len == 4 || len == 8) && !(addr & (len - 1)) )
+        return true;
+
+    gprintk(XENLOG_WARNING,
+            "%04x:%02x:%02x.%u: unaligned or invalid size MSI-X table access\n",
+            pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+    return false;
+}
+
+static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
+                                         paddr_t addr)
+{
+    paddr_t start = vmsix_table_addr(msix->pdev->vpci, VPCI_MSIX_TABLE);
+
+    return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
+}
+
+static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
+                     unsigned long *data)
+{
+    const struct domain *d = v->domain;
+    struct vpci_msix *msix = msix_find(d, addr);
+    const struct vpci_msix_entry *entry;
+    unsigned int offset;
+
+    *data = ~0ul;
+
+    if ( !msix )
+        return X86EMUL_RETRY;
+
+    if ( !access_allowed(msix->pdev, addr, len) )
+        return X86EMUL_OKAY;
+
+    if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
+    {
+        /*
+         * Access to PBA.
+         *
+         * TODO: note that this relies on having the PBA identity mapped to the
+         * guest address space. If this changes the address will need to be
+         * translated.
+         */
+        switch ( len )
+        {
+        case 4:
+            *data = readl(addr);
+            break;
+
+        case 8:
+            *data = readq(addr);
+            break;
+
+        default:
+            ASSERT_UNREACHABLE();
+            break;
+        }
+
+        return X86EMUL_OKAY;
+    }
+
+    spin_lock(&msix->pdev->vpci->lock);
+    entry = get_entry(msix, addr);
+    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+    switch ( offset )
+    {
+    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+        *data = entry->addr;
+        break;
+
+    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+        *data = entry->addr >> 32;
+        break;
+
+    case PCI_MSIX_ENTRY_DATA_OFFSET:
+        *data = entry->data;
+        if ( len == 8 )
+            *data |=
+                (uint64_t)(entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0) << 32;
+        break;
+
+    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+        *data = entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0;
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        break;
+    }
+    spin_unlock(&msix->pdev->vpci->lock);
+
+    return X86EMUL_OKAY;
+}
+
+static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
+                      unsigned long data)
+{
+    const struct domain *d = v->domain;
+    struct vpci_msix *msix = msix_find(d, addr);
+    struct vpci_msix_entry *entry;
+    unsigned int offset;
+
+    if ( !msix )
+        return X86EMUL_RETRY;
+
+    if ( !access_allowed(msix->pdev, addr, len) )
+        return X86EMUL_OKAY;
+
+    if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
+    {
+        /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
+        if ( is_hardware_domain(d) )
+        {
+            switch ( len )
+            {
+            case 4:
+                writel(data, addr);
+                break;
+
+            case 8:
+                writeq(data, addr);
+                break;
+
+            default:
+                ASSERT_UNREACHABLE();
+                break;
+            }
+        }
+
+        return X86EMUL_OKAY;
+    }
+
+    spin_lock(&msix->pdev->vpci->lock);
+    entry = get_entry(msix, addr);
+    offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+    /*
+     * NB: Xen allows writes to the data/address registers with the entry
+     * unmasked. The specification says this is undefined behavior, and Xen
+     * implements it as storing the written value, which will be made effective
+     * in the next mask/unmask cycle. This also mimics the implementation in
+     * QEMU.
+     */
+    switch ( offset )
+    {
+    case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+        entry->updated = true;
+        if ( len == 8 )
+        {
+            entry->addr = data;
+            break;
+        }
+        entry->addr &= ~0xffffffff;
+        entry->addr |= data;
+        break;
+
+    case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+        entry->updated = true;
+        entry->addr &= 0xffffffff;
+        entry->addr |= (uint64_t)data << 32;
+        break;
+
+    case PCI_MSIX_ENTRY_DATA_OFFSET:
+        entry->updated = true;
+        entry->data = data;
+
+        if ( len == 4 )
+            break;
+
+        data >>= 32;
+        /* fallthrough */
+    case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+    {
+        bool new_masked = data & PCI_MSIX_VECTOR_BITMASK;
+        const struct pci_dev *pdev = msix->pdev;
+
+        if ( entry->masked == new_masked )
+            /* No change in the mask bit, nothing to do. */
+            break;
+
+        /*
+         * Update the masked state before calling vpci_msix_arch_enable_entry,
+         * so that it picks the new state.
+         */
+        entry->masked = new_masked;
+        if ( !new_masked && msix->enabled && !msix->masked && entry->updated )
+        {
+            /*
+             * If MSI-X is enabled, the function mask is not active, the entry
+             * is being unmasked and there have been changes to the address or
+             * data fields Xen needs to disable and enable the entry in order
+             * to pick up the changes.
+             */
+            if ( update_entry(entry, pdev, vmsix_entry_nr(msix, entry)) )
+                break;
+
+            entry->updated = false;
+        }
+        else
+            vpci_msix_arch_mask_entry(entry, pdev, entry->masked);
+
+        break;
+    }
+
+    default:
+        ASSERT_UNREACHABLE();
+        break;
+    }
+    spin_unlock(&msix->pdev->vpci->lock);
+
+    return X86EMUL_OKAY;
+}
+
+static const struct hvm_mmio_ops vpci_msix_table_ops = {
+    .check = msix_accept,
+    .read = msix_read,
+    .write = msix_write,
+};
+
+static int init_msix(struct pci_dev *pdev)
+{
+    struct domain *d = pdev->domain;
+    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    unsigned int msix_offset, i, max_entries;
+    uint16_t control;
+    int rc;
+
+    msix_offset = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func,
+                                      PCI_CAP_ID_MSIX);
+    if ( !msix_offset )
+        return 0;
+
+    control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
+                              msix_control_reg(msix_offset));
+
+    max_entries = msix_table_size(control);
+
+    pdev->vpci->msix = xzalloc_bytes(VMSIX_SIZE(max_entries));
+    if ( !pdev->vpci->msix )
+        return -ENOMEM;
+
+    pdev->vpci->msix->max_entries = max_entries;
+    pdev->vpci->msix->pdev = pdev;
+
+    pdev->vpci->msix->tables[VPCI_MSIX_TABLE] =
+        pci_conf_read32(pdev->seg, pdev->bus, slot, func,
+                        msix_table_offset_reg(msix_offset));
+    pdev->vpci->msix->tables[VPCI_MSIX_PBA] =
+        pci_conf_read32(pdev->seg, pdev->bus, slot, func,
+                        msix_pba_offset_reg(msix_offset));
+
+    for ( i = 0; i < pdev->vpci->msix->max_entries; i++)
+    {
+        pdev->vpci->msix->entries[i].masked = true;
+        vpci_msix_arch_init_entry(&pdev->vpci->msix->entries[i]);
+    }
+
+    rc = vpci_add_register(pdev->vpci, control_read, control_write,
+                           msix_control_reg(msix_offset), 2, pdev->vpci->msix);
+    if ( rc )
+        return rc;
+
+    if ( list_empty(&d->arch.hvm_domain.msix_tables) )
+        register_mmio_handler(d, &vpci_msix_table_ops);
+
+    list_add(&pdev->vpci->msix->next, &d->arch.hvm_domain.msix_tables);
+
+    return 0;
+}
+REGISTER_VPCI_INIT(init_msix, VPCI_PRIORITY_HIGH);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c

index 3012b300136e10000e754b2c79a131e52d22314a..8ec9c916eaf42692c839aeaa725d8ada752c0eae 100644 (file)
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -47,6 +47,7 @@ void vpci_remove_device(struct pci_dev *pdev)
          xfree(r);
      }
      spin_unlock(&pdev->vpci->lock);
+    xfree(pdev->vpci->msix);
      xfree(pdev->vpci->msi);
      xfree(pdev->vpci);
      pdev->vpci = NULL;
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h

index 4c43502e00441413896d148284b58adf385cc874..e2530c6b8163c8d0939c586e0a281844df0e4836 100644 (file)
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -186,6 +186,9 @@ struct hvm_domain {
      struct list_head mmcfg_regions;
      rwlock_t mmcfg_lock;
  
+    /* List of MSI-X tables. */
+    struct list_head msix_tables;
+
      /* List of permanently write-mapped pages. */
      struct {
          spinlock_t lock;
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h

index 0fedb3473c4348ccf607b2064c77662611411f91..e6b6ed0b92b291f137257e7a6af7f30552f7c375 100644 (file)
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -132,6 +132,11 @@ struct vpci_arch_msi {
      int pirq;
  };
  
+/* Arch-specific MSI-X entry data for vPCI. */
+struct vpci_arch_msix_entry {
+    int pirq;
+};
+
  enum stdvga_cache_state {
      STDVGA_CACHE_UNINITIALIZED,
      STDVGA_CACHE_ENABLED,
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h

index 7266c17679377726af1b3751f18a469390b3a773..fc47163ba66a18e0783d857c1776286f668bac13 100644 (file)
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -115,6 +115,34 @@ struct vpci {
          struct vpci_arch_msi arch;
  #endif
      } *msi;
+
+    /* MSI-X data. */
+    struct vpci_msix {
+#ifdef __XEN__
+        struct pci_dev *pdev;
+        /* List link. */
+        struct list_head next;
+        /* Table information. */
+#define VPCI_MSIX_TABLE     0
+#define VPCI_MSIX_PBA       1
+#define VPCI_MSIX_MEM_NUM   2
+        uint32_t tables[VPCI_MSIX_MEM_NUM];
+        /* Maximum number of vectors supported by the device. */
+        uint16_t max_entries : 12;
+        /* MSI-X enabled? */
+        bool enabled         : 1;
+        /* Masked? */
+        bool masked          : 1;
+        /* Entries. */
+        struct vpci_msix_entry {
+            uint64_t addr;
+            uint32_t data;
+            bool masked  : 1;
+            bool updated : 1;
+            struct vpci_arch_msix_entry arch;
+        } entries[];
+#endif
+    } *msix;
  };
  
  struct vpci_vcpu {
@@ -137,6 +165,51 @@ int __must_check vpci_msi_arch_enable(struct vpci_msi *msi,
  void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev);
  void vpci_msi_arch_init(struct vpci_msi *msi);
  void vpci_msi_arch_print(const struct vpci_msi *msi);
+
+/* Arch-specific vPCI MSI-X helpers. */
+void vpci_msix_arch_mask_entry(struct vpci_msix_entry *entry,
+                               const struct pci_dev *pdev, bool mask);
+int __must_check vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
+                                             const struct pci_dev *pdev,
+                                             paddr_t table_base);
+int __must_check vpci_msix_arch_disable_entry(struct vpci_msix_entry *entry,
+                                              const struct pci_dev *pdev);
+void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry);
+int vpci_msix_arch_print(const struct vpci_msix *msix);
+
+/*
+ * Helper functions to fetch MSIX related data. They are used by both the
+ * emulated MSIX code and the BAR handlers.
+ */
+static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int nr)
+{
+    return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr;
+}
+
+static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int nr)
+{
+    return vmsix_table_base(vpci, nr) +
+           (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK);
+}
+
+/*
+ * Note regarding the size calculation of the PBA: the spec mentions "The last
+ * QWORD will not necessarily be fully populated", so it implies that the PBA
+ * size is 64-bit aligned.
+ */
+static inline size_t vmsix_table_size(const struct vpci *vpci, unsigned int nr)
+{
+    return
+        (nr == VPCI_MSIX_TABLE) ? vpci->msix->max_entries * PCI_MSIX_ENTRY_SIZE
+                                : ROUNDUP(DIV_ROUND_UP(vpci->msix->max_entries,
+                                                       8), 8);
+}
+
+static inline unsigned int vmsix_entry_nr(const struct vpci_msix *msix,
+                                          const struct vpci_msix_entry *entry)
+{
+    return entry - msix->entries;
+}
  #endif /* __XEN__ */
  
  #else /* !CONFIG_HAS_VPCI */
author	Roger Pau Monne <roger.pau@citrix.com>
	Thu, 22 Mar 2018 14:00:00 +0000 (15:00 +0100)
committer	Jan Beulich <jbeulich@suse.com>
	Fri, 23 Mar 2018 09:26:06 +0000 (10:26 +0100)
xen/arch/x86/hvm/dom0_build.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/hvm.c		patch \| blob \| blame \| history
xen/arch/x86/hvm/vmsi.c		patch \| blob \| blame \| history
xen/drivers/vpci/Makefile		patch \| blob \| blame \| history
xen/drivers/vpci/header.c		patch \| blob \| blame \| history
xen/drivers/vpci/msi.c		patch \| blob \| blame \| history
xen/drivers/vpci/msix.c	[new file with mode: 0644]	patch \| blob
xen/drivers/vpci/vpci.c		patch \| blob \| blame \| history
xen/include/asm-x86/hvm/domain.h		patch \| blob \| blame \| history
xen/include/asm-x86/hvm/io.h		patch \| blob \| blame \| history
xen/include/xen/vpci.h		patch \| blob \| blame \| history