pvh_setup_mmcfg(d);
- panic("Building a PVHv2 Dom0 is not yet supported.");
+ printk("WARNING: PVH is an experimental mode with limited functionality\n");
return 0;
}
INIT_LIST_HEAD(&d->arch.hvm_domain.write_map.list);
INIT_LIST_HEAD(&d->arch.hvm_domain.g2m_ioport_list);
INIT_LIST_HEAD(&d->arch.hvm_domain.mmcfg_regions);
+ INIT_LIST_HEAD(&d->arch.hvm_domain.msix_tables);
rc = create_perdomain_mapping(d, PERDOMAIN_VIRT_START, 0, NULL, NULL);
if ( rc )
#include <xen/lib.h>
#include <xen/errno.h>
#include <xen/sched.h>
+#include <xen/softirq.h>
#include <xen/irq.h>
#include <xen/vpci.h>
#include <public/hvm/ioreq.h>
(masked ? 0 : XEN_DOMCTL_VMSI_X86_UNMASKED);
}
-void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
- unsigned int entry, bool mask)
+static void vpci_mask_pirq(struct domain *d, int pirq, bool mask)
{
unsigned long flags;
- struct irq_desc *desc = domain_spin_lock_irq_desc(pdev->domain,
- msi->arch.pirq + entry,
- &flags);
+ struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
if ( !desc )
return;
spin_unlock_irqrestore(&desc->lock, flags);
}
-int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
- unsigned int vectors)
+void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
+ unsigned int entry, bool mask)
+{
+ vpci_mask_pirq(pdev->domain, msi->arch.pirq + entry, mask);
+}
+
+static int vpci_msi_enable(const struct pci_dev *pdev, uint32_t data,
+ uint64_t address, unsigned int nr,
+ paddr_t table_base, uint32_t mask)
{
struct msi_info msi_info = {
.seg = pdev->seg,
.bus = pdev->bus,
.devfn = pdev->devfn,
- .entry_nr = vectors,
+ .table_base = table_base,
+ .entry_nr = nr,
};
- unsigned int i;
- int rc;
-
- ASSERT(msi->arch.pirq == INVALID_PIRQ);
+ unsigned int i, vectors = table_base ? 1 : nr;
+ int rc, pirq = INVALID_PIRQ;
/* Get a PIRQ. */
- rc = allocate_and_map_msi_pirq(pdev->domain, -1, &msi->arch.pirq,
- MAP_PIRQ_TYPE_MULTI_MSI, &msi_info);
+ rc = allocate_and_map_msi_pirq(pdev->domain, -1, &pirq,
+ table_base ? MAP_PIRQ_TYPE_MSI
+ : MAP_PIRQ_TYPE_MULTI_MSI,
+ &msi_info);
if ( rc )
{
gdprintk(XENLOG_ERR, "%04x:%02x:%02x.%u: failed to map PIRQ: %d\n",
for ( i = 0; i < vectors; i++ )
{
- uint8_t vector = MASK_EXTR(msi->data, MSI_DATA_VECTOR_MASK);
- uint8_t vector_mask = 0xff >> (8 - fls(msi->vectors) + 1);
+ uint8_t vector = MASK_EXTR(data, MSI_DATA_VECTOR_MASK);
+ uint8_t vector_mask = 0xff >> (8 - fls(vectors) + 1);
struct xen_domctl_bind_pt_irq bind = {
- .machine_irq = msi->arch.pirq + i,
+ .machine_irq = pirq + i,
.irq_type = PT_IRQ_TYPE_MSI,
.u.msi.gvec = (vector & ~vector_mask) |
((vector + i) & vector_mask),
- .u.msi.gflags = msi_gflags(msi->data, msi->address,
- (msi->mask >> i) & 1),
+ .u.msi.gflags = msi_gflags(data, address, (mask >> i) & 1),
};
pcidevs_lock();
gdprintk(XENLOG_ERR,
"%04x:%02x:%02x.%u: failed to bind PIRQ %u: %d\n",
pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn), msi->arch.pirq + i, rc);
+ PCI_FUNC(pdev->devfn), pirq + i, rc);
while ( bind.machine_irq-- )
pt_irq_destroy_bind(pdev->domain, &bind);
spin_lock(&pdev->domain->event_lock);
- unmap_domain_pirq(pdev->domain, msi->arch.pirq);
+ unmap_domain_pirq(pdev->domain, pirq);
spin_unlock(&pdev->domain->event_lock);
pcidevs_unlock();
- msi->arch.pirq = INVALID_PIRQ;
return rc;
}
pcidevs_unlock();
}
- return 0;
+ return pirq;
}
-void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
+int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
+ unsigned int vectors)
+{
+ int rc;
+
+ ASSERT(msi->arch.pirq == INVALID_PIRQ);
+ rc = vpci_msi_enable(pdev, msi->data, msi->address, vectors, 0, msi->mask);
+ if ( rc >= 0 )
+ {
+ msi->arch.pirq = rc;
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static void vpci_msi_disable(const struct pci_dev *pdev, int pirq,
+ unsigned int nr)
{
unsigned int i;
- ASSERT(msi->arch.pirq != INVALID_PIRQ);
+ ASSERT(pirq != INVALID_PIRQ);
pcidevs_lock();
- for ( i = 0; i < msi->vectors; i++ )
+ for ( i = 0; i < nr; i++ )
{
struct xen_domctl_bind_pt_irq bind = {
- .machine_irq = msi->arch.pirq + i,
+ .machine_irq = pirq + i,
.irq_type = PT_IRQ_TYPE_MSI,
};
int rc;
}
spin_lock(&pdev->domain->event_lock);
- unmap_domain_pirq(pdev->domain, msi->arch.pirq);
+ unmap_domain_pirq(pdev->domain, pirq);
spin_unlock(&pdev->domain->event_lock);
pcidevs_unlock();
+}
+void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
+{
+ vpci_msi_disable(pdev, msi->arch.pirq, msi->vectors);
msi->arch.pirq = INVALID_PIRQ;
}
MASK_EXTR(msi->address, MSI_ADDR_DEST_ID_MASK),
msi->arch.pirq);
}
+
+void vpci_msix_arch_mask_entry(struct vpci_msix_entry *entry,
+ const struct pci_dev *pdev, bool mask)
+{
+ ASSERT(entry->arch.pirq != INVALID_PIRQ);
+ vpci_mask_pirq(pdev->domain, entry->arch.pirq, mask);
+}
+
+int vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
+ const struct pci_dev *pdev, paddr_t table_base)
+{
+ int rc;
+
+ ASSERT(entry->arch.pirq == INVALID_PIRQ);
+ rc = vpci_msi_enable(pdev, entry->data, entry->addr,
+ vmsix_entry_nr(pdev->vpci->msix, entry),
+ table_base, entry->masked);
+ if ( rc >= 0 )
+ {
+ entry->arch.pirq = rc;
+ rc = 0;
+ }
+
+ return rc;
+}
+
+int vpci_msix_arch_disable_entry(struct vpci_msix_entry *entry,
+ const struct pci_dev *pdev)
+{
+ if ( entry->arch.pirq == INVALID_PIRQ )
+ return -ENOENT;
+
+ vpci_msi_disable(pdev, entry->arch.pirq, 1);
+ entry->arch.pirq = INVALID_PIRQ;
+
+ return 0;
+}
+
+void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry)
+{
+ entry->arch.pirq = INVALID_PIRQ;
+}
+
+int vpci_msix_arch_print(const struct vpci_msix *msix)
+{
+ unsigned int i;
+
+ for ( i = 0; i < msix->max_entries; i++ )
+ {
+ const struct vpci_msix_entry *entry = &msix->entries[i];
+
+ printk("%6u vec=%02x%7s%6s%3sassert%5s%7s dest_id=%lu mask=%u pirq: %d\n",
+ i, MASK_EXTR(entry->data, MSI_DATA_VECTOR_MASK),
+ entry->data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
+ entry->data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
+ entry->data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
+ entry->addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
+ entry->addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "fixed",
+ MASK_EXTR(entry->addr, MSI_ADDR_DEST_ID_MASK),
+ entry->masked, entry->arch.pirq);
+ if ( i && !(i % 64) )
+ {
+ struct pci_dev *pdev = msix->pdev;
+
+ spin_unlock(&msix->pdev->vpci->lock);
+ process_pending_softirqs();
+ /* NB: we assume that pdev cannot go away for an alive domain. */
+ if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
+ return -EBUSY;
+ if ( pdev->vpci->msix != msix )
+ {
+ spin_unlock(&pdev->vpci->lock);
+ return -EAGAIN;
+ }
+ }
+ }
+
+ return 0;
+}
-obj-y += vpci.o header.o msi.o
+obj-y += vpci.o header.o msi.o msix.o
struct vpci_header *header = &pdev->vpci->header;
struct rangeset *mem = rangeset_new(NULL, NULL, 0);
struct pci_dev *tmp, *dev = NULL;
+ const struct vpci_msix *msix = pdev->vpci->msix;
unsigned int i;
int rc;
}
}
+ /* Remove any MSIX regions if present. */
+ for ( i = 0; msix && i < ARRAY_SIZE(msix->tables); i++ )
+ {
+ unsigned long start = PFN_DOWN(vmsix_table_addr(pdev->vpci, i));
+ unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
+ vmsix_table_size(pdev->vpci, i) - 1);
+
+ rc = rangeset_remove_range(mem, start, end);
+ if ( rc )
+ {
+ printk(XENLOG_G_WARNING
+ "Failed to remove MSIX table [%lx, %lx]: %d\n",
+ start, end, rc);
+ rangeset_destroy(mem);
+ return rc;
+ }
+ }
+
/*
* Check for overlaps with other BARs. Note that only BARs that are
* currently mapped (enabled) are checked for overlaps.
if ( !has_vpci(d) )
continue;
- printk("vPCI MSI d%d\n", d->domain_id);
+ printk("vPCI MSI/MSI-X d%d\n", d->domain_id);
list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
{
const struct vpci_msi *msi;
+ const struct vpci_msix *msix;
if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
continue;
vpci_msi_arch_print(msi);
}
+ msix = pdev->vpci->msix;
+ if ( msix && msix->enabled )
+ {
+ int rc;
+
+ printk("%04x:%02x:%02x.%u MSI-X\n", pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+ printk(" entries: %u maskall: %d enabled: %d\n",
+ msix->max_entries, msix->masked, msix->enabled);
+
+ rc = vpci_msix_arch_print(msix);
+ if ( rc )
+ {
+ /*
+ * On error vpci_msix_arch_print will always return without
+ * holding the lock.
+ */
+ printk("unable to print all MSI-X entries: %d\n", rc);
+ process_pending_softirqs();
+ continue;
+ }
+ }
+
spin_unlock(&pdev->vpci->lock);
process_pending_softirqs();
}
--- /dev/null
+/*
+ * Handlers for accesses to the MSI-X capability structure and the memory
+ * region.
+ *
+ * Copyright (C) 2017 Citrix Systems R&D
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/sched.h>
+#include <xen/vpci.h>
+
+#include <asm/msi.h>
+
+#define VMSIX_SIZE(num) offsetof(struct vpci_msix, entries[num])
+
+#define VMSIX_ADDR_IN_RANGE(addr, vpci, nr) \
+ ((addr) >= vmsix_table_addr(vpci, nr) && \
+ (addr) < vmsix_table_addr(vpci, nr) + vmsix_table_size(vpci, nr))
+
+static uint32_t control_read(const struct pci_dev *pdev, unsigned int reg,
+ void *data)
+{
+ const struct vpci_msix *msix = data;
+
+ return (msix->max_entries - 1) |
+ (msix->enabled ? PCI_MSIX_FLAGS_ENABLE : 0) |
+ (msix->masked ? PCI_MSIX_FLAGS_MASKALL : 0);
+}
+
+static int update_entry(struct vpci_msix_entry *entry,
+ const struct pci_dev *pdev, unsigned int nr)
+{
+ uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+ int rc = vpci_msix_arch_disable_entry(entry, pdev);
+
+ /* Ignore ENOENT, it means the entry wasn't setup. */
+ if ( rc && rc != -ENOENT )
+ {
+ gprintk(XENLOG_WARNING,
+ "%04x:%02x:%02x.%u: unable to disable entry %u for update: %d\n",
+ pdev->seg, pdev->bus, slot, func, nr, rc);
+ return rc;
+ }
+
+ rc = vpci_msix_arch_enable_entry(entry, pdev,
+ vmsix_table_base(pdev->vpci,
+ VPCI_MSIX_TABLE));
+ if ( rc )
+ {
+ gprintk(XENLOG_WARNING,
+ "%04x:%02x:%02x.%u: unable to enable entry %u: %d\n",
+ pdev->seg, pdev->bus, slot, func, nr, rc);
+ /* Entry is likely not properly configured. */
+ return rc;
+ }
+
+ return 0;
+}
+
+static void control_write(const struct pci_dev *pdev, unsigned int reg,
+ uint32_t val, void *data)
+{
+ uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+ struct vpci_msix *msix = data;
+ bool new_masked = val & PCI_MSIX_FLAGS_MASKALL;
+ bool new_enabled = val & PCI_MSIX_FLAGS_ENABLE;
+ unsigned int i;
+ int rc;
+
+ if ( new_masked == msix->masked && new_enabled == msix->enabled )
+ return;
+
+ /*
+ * According to the PCI 3.0 specification, switching the enable bit to 1
+ * or the function mask bit to 0 should cause all the cached addresses
+ * and data fields to be recalculated.
+ *
+ * In order to avoid the overhead of disabling and enabling all the
+ * entries every time the guest sets the maskall bit, Xen will only
+ * perform the disable and enable sequence when the guest has written to
+ * the entry.
+ */
+ if ( new_enabled && !new_masked && (!msix->enabled || msix->masked) )
+ {
+ for ( i = 0; i < msix->max_entries; i++ )
+ {
+ if ( msix->entries[i].masked || !msix->entries[i].updated ||
+ update_entry(&msix->entries[i], pdev, i) )
+ continue;
+
+ msix->entries[i].updated = false;
+ }
+ }
+ else if ( !new_enabled && msix->enabled )
+ {
+ /* Guest has disabled MSIX, disable all entries. */
+ for ( i = 0; i < msix->max_entries; i++ )
+ {
+ /*
+ * NB: vpci_msix_arch_disable can be called for entries that are
+ * not setup, it will return -ENOENT in that case.
+ */
+ rc = vpci_msix_arch_disable_entry(&msix->entries[i], pdev);
+ switch ( rc )
+ {
+ case 0:
+ /*
+ * Mark the entry successfully disabled as updated, so that on
+ * the next enable the entry is properly setup. This is done
+ * so that the following flow works correctly:
+ *
+ * mask entry -> disable MSIX -> enable MSIX -> unmask entry
+ *
+ * Without setting 'updated', the 'unmask entry' step will fail
+ * because the entry has not been updated, so it would not be
+ * mapped/bound at all.
+ */
+ msix->entries[i].updated = true;
+ break;
+ case -ENOENT:
+ /* Ignore non-present entry. */
+ break;
+ default:
+ gprintk(XENLOG_WARNING,
+ "%04x:%02x:%02x.%u: unable to disable entry %u: %d\n",
+ pdev->seg, pdev->bus, slot, func, i, rc);
+ return;
+ }
+ }
+ }
+
+ msix->masked = new_masked;
+ msix->enabled = new_enabled;
+
+ val = control_read(pdev, reg, data);
+ if ( pci_msi_conf_write_intercept(msix->pdev, reg, 2, &val) >= 0 )
+ pci_conf_write16(pdev->seg, pdev->bus, slot, func, reg, val);
+}
+
+static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
+{
+ struct vpci_msix *msix;
+
+ list_for_each_entry ( msix, &d->arch.hvm_domain.msix_tables, next )
+ {
+ const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
+ unsigned int i;
+
+ for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
+ if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
+ VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
+ return msix;
+ }
+
+ return NULL;
+}
+
+static int msix_accept(struct vcpu *v, unsigned long addr)
+{
+ return !!msix_find(v->domain, addr);
+}
+
+static bool access_allowed(const struct pci_dev *pdev, unsigned long addr,
+ unsigned int len)
+{
+ /* Only allow aligned 32/64b accesses. */
+ if ( (len == 4 || len == 8) && !(addr & (len - 1)) )
+ return true;
+
+ gprintk(XENLOG_WARNING,
+ "%04x:%02x:%02x.%u: unaligned or invalid size MSI-X table access\n",
+ pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+ return false;
+}
+
+static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
+ paddr_t addr)
+{
+ paddr_t start = vmsix_table_addr(msix->pdev->vpci, VPCI_MSIX_TABLE);
+
+ return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
+}
+
+static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
+ unsigned long *data)
+{
+ const struct domain *d = v->domain;
+ struct vpci_msix *msix = msix_find(d, addr);
+ const struct vpci_msix_entry *entry;
+ unsigned int offset;
+
+ *data = ~0ul;
+
+ if ( !msix )
+ return X86EMUL_RETRY;
+
+ if ( !access_allowed(msix->pdev, addr, len) )
+ return X86EMUL_OKAY;
+
+ if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
+ {
+ /*
+ * Access to PBA.
+ *
+ * TODO: note that this relies on having the PBA identity mapped to the
+ * guest address space. If this changes the address will need to be
+ * translated.
+ */
+ switch ( len )
+ {
+ case 4:
+ *data = readl(addr);
+ break;
+
+ case 8:
+ *data = readq(addr);
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ break;
+ }
+
+ return X86EMUL_OKAY;
+ }
+
+ spin_lock(&msix->pdev->vpci->lock);
+ entry = get_entry(msix, addr);
+ offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+ switch ( offset )
+ {
+ case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+ *data = entry->addr;
+ break;
+
+ case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+ *data = entry->addr >> 32;
+ break;
+
+ case PCI_MSIX_ENTRY_DATA_OFFSET:
+ *data = entry->data;
+ if ( len == 8 )
+ *data |=
+ (uint64_t)(entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0) << 32;
+ break;
+
+ case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+ *data = entry->masked ? PCI_MSIX_VECTOR_BITMASK : 0;
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ break;
+ }
+ spin_unlock(&msix->pdev->vpci->lock);
+
+ return X86EMUL_OKAY;
+}
+
+static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
+ unsigned long data)
+{
+ const struct domain *d = v->domain;
+ struct vpci_msix *msix = msix_find(d, addr);
+ struct vpci_msix_entry *entry;
+ unsigned int offset;
+
+ if ( !msix )
+ return X86EMUL_RETRY;
+
+ if ( !access_allowed(msix->pdev, addr, len) )
+ return X86EMUL_OKAY;
+
+ if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
+ {
+ /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
+ if ( is_hardware_domain(d) )
+ {
+ switch ( len )
+ {
+ case 4:
+ writel(data, addr);
+ break;
+
+ case 8:
+ writeq(data, addr);
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ break;
+ }
+ }
+
+ return X86EMUL_OKAY;
+ }
+
+ spin_lock(&msix->pdev->vpci->lock);
+ entry = get_entry(msix, addr);
+ offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
+
+ /*
+ * NB: Xen allows writes to the data/address registers with the entry
+ * unmasked. The specification says this is undefined behavior, and Xen
+ * implements it as storing the written value, which will be made effective
+ * in the next mask/unmask cycle. This also mimics the implementation in
+ * QEMU.
+ */
+ switch ( offset )
+ {
+ case PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET:
+ entry->updated = true;
+ if ( len == 8 )
+ {
+ entry->addr = data;
+ break;
+ }
+ entry->addr &= ~0xffffffff;
+ entry->addr |= data;
+ break;
+
+ case PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET:
+ entry->updated = true;
+ entry->addr &= 0xffffffff;
+ entry->addr |= (uint64_t)data << 32;
+ break;
+
+ case PCI_MSIX_ENTRY_DATA_OFFSET:
+ entry->updated = true;
+ entry->data = data;
+
+ if ( len == 4 )
+ break;
+
+ data >>= 32;
+ /* fallthrough */
+ case PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET:
+ {
+ bool new_masked = data & PCI_MSIX_VECTOR_BITMASK;
+ const struct pci_dev *pdev = msix->pdev;
+
+ if ( entry->masked == new_masked )
+ /* No change in the mask bit, nothing to do. */
+ break;
+
+ /*
+ * Update the masked state before calling vpci_msix_arch_enable_entry,
+ * so that it picks the new state.
+ */
+ entry->masked = new_masked;
+ if ( !new_masked && msix->enabled && !msix->masked && entry->updated )
+ {
+ /*
+ * If MSI-X is enabled, the function mask is not active, the entry
+ * is being unmasked and there have been changes to the address or
+ * data fields Xen needs to disable and enable the entry in order
+ * to pick up the changes.
+ */
+ if ( update_entry(entry, pdev, vmsix_entry_nr(msix, entry)) )
+ break;
+
+ entry->updated = false;
+ }
+ else
+ vpci_msix_arch_mask_entry(entry, pdev, entry->masked);
+
+ break;
+ }
+
+ default:
+ ASSERT_UNREACHABLE();
+ break;
+ }
+ spin_unlock(&msix->pdev->vpci->lock);
+
+ return X86EMUL_OKAY;
+}
+
+static const struct hvm_mmio_ops vpci_msix_table_ops = {
+ .check = msix_accept,
+ .read = msix_read,
+ .write = msix_write,
+};
+
+static int init_msix(struct pci_dev *pdev)
+{
+ struct domain *d = pdev->domain;
+ uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+ unsigned int msix_offset, i, max_entries;
+ uint16_t control;
+ int rc;
+
+ msix_offset = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func,
+ PCI_CAP_ID_MSIX);
+ if ( !msix_offset )
+ return 0;
+
+ control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
+ msix_control_reg(msix_offset));
+
+ max_entries = msix_table_size(control);
+
+ pdev->vpci->msix = xzalloc_bytes(VMSIX_SIZE(max_entries));
+ if ( !pdev->vpci->msix )
+ return -ENOMEM;
+
+ pdev->vpci->msix->max_entries = max_entries;
+ pdev->vpci->msix->pdev = pdev;
+
+ pdev->vpci->msix->tables[VPCI_MSIX_TABLE] =
+ pci_conf_read32(pdev->seg, pdev->bus, slot, func,
+ msix_table_offset_reg(msix_offset));
+ pdev->vpci->msix->tables[VPCI_MSIX_PBA] =
+ pci_conf_read32(pdev->seg, pdev->bus, slot, func,
+ msix_pba_offset_reg(msix_offset));
+
+ for ( i = 0; i < pdev->vpci->msix->max_entries; i++)
+ {
+ pdev->vpci->msix->entries[i].masked = true;
+ vpci_msix_arch_init_entry(&pdev->vpci->msix->entries[i]);
+ }
+
+ rc = vpci_add_register(pdev->vpci, control_read, control_write,
+ msix_control_reg(msix_offset), 2, pdev->vpci->msix);
+ if ( rc )
+ return rc;
+
+ if ( list_empty(&d->arch.hvm_domain.msix_tables) )
+ register_mmio_handler(d, &vpci_msix_table_ops);
+
+ list_add(&pdev->vpci->msix->next, &d->arch.hvm_domain.msix_tables);
+
+ return 0;
+}
+REGISTER_VPCI_INIT(init_msix, VPCI_PRIORITY_HIGH);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
xfree(r);
}
spin_unlock(&pdev->vpci->lock);
+ xfree(pdev->vpci->msix);
xfree(pdev->vpci->msi);
xfree(pdev->vpci);
pdev->vpci = NULL;
struct list_head mmcfg_regions;
rwlock_t mmcfg_lock;
+ /* List of MSI-X tables. */
+ struct list_head msix_tables;
+
/* List of permanently write-mapped pages. */
struct {
spinlock_t lock;
int pirq;
};
+/* Arch-specific MSI-X entry data for vPCI. */
+struct vpci_arch_msix_entry {
+ int pirq;
+};
+
enum stdvga_cache_state {
STDVGA_CACHE_UNINITIALIZED,
STDVGA_CACHE_ENABLED,
struct vpci_arch_msi arch;
#endif
} *msi;
+
+ /* MSI-X data. */
+ struct vpci_msix {
+#ifdef __XEN__
+ struct pci_dev *pdev;
+ /* List link. */
+ struct list_head next;
+ /* Table information. */
+#define VPCI_MSIX_TABLE 0
+#define VPCI_MSIX_PBA 1
+#define VPCI_MSIX_MEM_NUM 2
+ uint32_t tables[VPCI_MSIX_MEM_NUM];
+ /* Maximum number of vectors supported by the device. */
+ uint16_t max_entries : 12;
+ /* MSI-X enabled? */
+ bool enabled : 1;
+ /* Masked? */
+ bool masked : 1;
+ /* Entries. */
+ struct vpci_msix_entry {
+ uint64_t addr;
+ uint32_t data;
+ bool masked : 1;
+ bool updated : 1;
+ struct vpci_arch_msix_entry arch;
+ } entries[];
+#endif
+ } *msix;
};
struct vpci_vcpu {
void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev);
void vpci_msi_arch_init(struct vpci_msi *msi);
void vpci_msi_arch_print(const struct vpci_msi *msi);
+
+/* Arch-specific vPCI MSI-X helpers. */
+void vpci_msix_arch_mask_entry(struct vpci_msix_entry *entry,
+ const struct pci_dev *pdev, bool mask);
+int __must_check vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
+ const struct pci_dev *pdev,
+ paddr_t table_base);
+int __must_check vpci_msix_arch_disable_entry(struct vpci_msix_entry *entry,
+ const struct pci_dev *pdev);
+void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry);
+int vpci_msix_arch_print(const struct vpci_msix *msix);
+
+/*
+ * Helper functions to fetch MSIX related data. They are used by both the
+ * emulated MSIX code and the BAR handlers.
+ */
+static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int nr)
+{
+ return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr;
+}
+
+static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int nr)
+{
+ return vmsix_table_base(vpci, nr) +
+ (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK);
+}
+
+/*
+ * Note regarding the size calculation of the PBA: the spec mentions "The last
+ * QWORD will not necessarily be fully populated", so it implies that the PBA
+ * size is 64-bit aligned.
+ */
+static inline size_t vmsix_table_size(const struct vpci *vpci, unsigned int nr)
+{
+ return
+ (nr == VPCI_MSIX_TABLE) ? vpci->msix->max_entries * PCI_MSIX_ENTRY_SIZE
+ : ROUNDUP(DIV_ROUND_UP(vpci->msix->max_entries,
+ 8), 8);
+}
+
+static inline unsigned int vmsix_entry_nr(const struct vpci_msix *msix,
+ const struct vpci_msix_entry *entry)
+{
+ return entry - msix->entries;
+}
#endif /* __XEN__ */
#else /* !CONFIG_HAS_VPCI */