}
set_bit(i, iommu->domid_bitmap);
+ context->hi &= ~(((1 << DID_FIELD_WIDTH) - 1) << DID_HIGH_OFFSET);
context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
return 0;
}
}
}
+/*
+ * This function returns
+ * - a negative errno value upon error,
+ * - zero upon success when previously the entry was non-present, or this isn't
+ * the "main" request for a device (pdev == NULL), or for no-op quarantining
+ * assignments,
+ * - positive (one) upon success when previously the entry was present and this
+ * is the "main" request for a device (pdev != NULL).
+ */
int domain_context_mapping_one(
struct domain *domain,
struct iommu *iommu,
- u8 bus, u8 devfn, const struct pci_dev *pdev)
+ uint8_t bus, uint8_t devfn, const struct pci_dev *pdev,
+ unsigned int mode)
{
struct domain_iommu *hd = dom_iommu(domain);
- struct context_entry *context, *context_entries;
+ struct context_entry *context, *context_entries, lctxt;
+ __uint128_t old;
u64 maddr, pgd_maddr;
- u16 seg = iommu->intel->drhd->segment;
+ uint16_t seg = iommu->intel->drhd->segment, prev_did = 0;
+ struct domain *prev_dom = NULL;
int agaw, rc, ret;
bool_t flush_dev_iotlb;
maddr = bus_to_context_maddr(iommu, bus);
context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
context = &context_entries[devfn];
+ old = (lctxt = *context).full;
- if ( context_present(*context) )
+ if ( context_present(lctxt) )
{
- spin_unlock(&iommu->lock);
- unmap_vtd_domain_page(context_entries);
- return 0;
+ domid_t domid;
+
+ prev_did = context_domain_id(lctxt);
+ domid = iommu->domid_map[prev_did];
+ if ( domid < DOMID_FIRST_RESERVED )
+ prev_dom = rcu_lock_domain_by_id(domid);
+ else if ( domid == DOMID_IO )
+ prev_dom = rcu_lock_domain(dom_io);
+ if ( !prev_dom )
+ {
+ spin_unlock(&iommu->lock);
+ unmap_vtd_domain_page(context_entries);
+ dprintk(XENLOG_DEBUG VTDPREFIX,
+ "no domain for did %u (nr_dom %u)\n",
+ prev_did, cap_ndoms(iommu->cap));
+ return -ESRCH;
+ }
}
if ( iommu_hwdom_passthrough && is_hardware_domain(domain) )
{
- context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
+ context_set_translation_type(lctxt, CONTEXT_TT_PASS_THRU);
agaw = level_to_agaw(iommu->nr_pt_levels);
}
else
spin_unlock(&hd->arch.mapping_lock);
spin_unlock(&iommu->lock);
unmap_vtd_domain_page(context_entries);
+ if ( prev_dom )
+ rcu_unlock_domain(prev_dom);
return -ENOMEM;
}
}
goto nomem;
}
- context_set_address_root(*context, pgd_maddr);
+ context_set_address_root(lctxt, pgd_maddr);
if ( ats_enabled && ecap_dev_iotlb(iommu->ecap) )
- context_set_translation_type(*context, CONTEXT_TT_DEV_IOTLB);
+ context_set_translation_type(lctxt, CONTEXT_TT_DEV_IOTLB);
else
- context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+ context_set_translation_type(lctxt, CONTEXT_TT_MULTI_LEVEL);
spin_unlock(&hd->arch.mapping_lock);
}
- if ( context_set_domain_id(context, domain, iommu) )
+ rc = context_set_domain_id(&lctxt, domain, iommu);
+ if ( rc )
{
+ unlock:
spin_unlock(&iommu->lock);
unmap_vtd_domain_page(context_entries);
- return -EFAULT;
+ if ( prev_dom )
+ rcu_unlock_domain(prev_dom);
+ return rc;
+ }
+
+ if ( !prev_dom )
+ {
+ context_set_address_width(lctxt, agaw);
+ context_set_fault_enable(lctxt);
+ context_set_present(lctxt);
+ }
+ else if ( prev_dom == domain )
+ {
+ ASSERT(lctxt.full == context->full);
+ rc = !!pdev;
+ goto unlock;
+ }
+ else
+ {
+ ASSERT(context_address_width(lctxt) == agaw);
+ ASSERT(!context_fault_disable(lctxt));
+ }
+
+ if ( cpu_has_cx16 )
+ {
+ __uint128_t res = cmpxchg16b(context, &old, &lctxt.full);
+
+ /*
+ * Hardware does not update the context entry behind our backs,
+ * so the return value should match "old".
+ */
+ if ( res != old )
+ {
+ if ( pdev )
+ check_cleanup_domid_map(domain, pdev, iommu);
+ printk(XENLOG_ERR
+ "%04x:%02x:%02x.%u: unexpected context entry %016lx_%016lx (expected %016lx_%016lx)\n",
+ pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ (uint64_t)(res >> 64), (uint64_t)res,
+ (uint64_t)(old >> 64), (uint64_t)old);
+ rc = -EILSEQ;
+ goto unlock;
+ }
+ }
+ else if ( !prev_dom || !(mode & MAP_WITH_RMRR) )
+ {
+ context_clear_present(*context);
+ iommu_sync_cache(context, sizeof(*context));
+
+ write_atomic(&context->hi, lctxt.hi);
+ /* No barrier should be needed between these two. */
+ write_atomic(&context->lo, lctxt.lo);
+ }
+ else /* Best effort, updating DID last. */
+ {
+ /*
+ * By non-atomically updating the context entry's DID field last,
+ * during a short window in time TLB entries with the old domain ID
+ * but the new page tables may be inserted. This could affect I/O
+ * of other devices using this same (old) domain ID. Such updating
+ * therefore is not a problem if this was the only device associated
+ * with the old domain ID. Diverting I/O of any of a dying domain's
+ * devices to the quarantine page tables is intended anyway.
+ */
+ if ( !(mode & (MAP_OWNER_DYING | MAP_SINGLE_DEVICE)) )
+ printk(XENLOG_WARNING VTDPREFIX
+ " %04x:%02x:%02x.%u: reassignment may cause %pd data corruption\n",
+ seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), prev_dom);
+
+ write_atomic(&context->lo, lctxt.lo);
+ /* No barrier should be needed between these two. */
+ write_atomic(&context->hi, lctxt.hi);
}
- context_set_address_width(*context, agaw);
- context_set_fault_enable(*context);
- context_set_present(*context);
iommu_sync_cache(context, sizeof(struct context_entry));
spin_unlock(&iommu->lock);
- /* Context entry was previously non-present (with domid 0). */
- rc = iommu_flush_context_device(iommu, 0, PCI_BDF2(bus, devfn),
- DMA_CCMD_MASK_NOBIT, 1);
+ rc = iommu_flush_context_device(iommu, prev_did, PCI_BDF2(bus, devfn),
+ DMA_CCMD_MASK_NOBIT, !prev_dom);
flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
- ret = iommu_flush_iotlb_dsi(iommu, 0, 1, flush_dev_iotlb);
+ ret = iommu_flush_iotlb_dsi(iommu, prev_did, !prev_dom, flush_dev_iotlb);
/*
* The current logic for returns:
unmap_vtd_domain_page(context_entries);
if ( !seg && !rc )
- rc = me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC);
+ rc = me_wifi_quirk(domain, bus, devfn, mode);
- return rc;
+ if ( rc )
+ {
+ if ( !prev_dom )
+ domain_context_unmap_one(domain, iommu, bus, devfn);
+ else if ( prev_dom != domain ) /* Avoid infinite recursion. */
+ domain_context_mapping_one(prev_dom, iommu, bus, devfn, pdev,
+ mode & MAP_WITH_RMRR);
+ }
+
+ if ( prev_dom )
+ rcu_unlock_domain(prev_dom);
+
+ return rc ?: pdev && prev_dom;
}
+static int domain_context_unmap(struct domain *d, uint8_t devfn,
+ struct pci_dev *pdev);
+
static int domain_context_mapping(struct domain *domain, u8 devfn,
struct pci_dev *pdev)
{
struct acpi_drhd_unit *drhd;
+ const struct acpi_rmrr_unit *rmrr;
int ret = 0;
- u8 seg = pdev->seg, bus = pdev->bus, secbus;
+ unsigned int i, mode = 0;
+ uint16_t seg = pdev->seg, bdf;
+ uint8_t bus = pdev->bus, secbus;
drhd = acpi_find_matched_drhd_unit(pdev);
if ( !drhd )
ASSERT(pcidevs_locked());
+ for_each_rmrr_device( rmrr, bdf, i )
+ {
+ if ( rmrr->segment != pdev->seg ||
+ bdf != PCI_BDF2(pdev->bus, pdev->devfn) )
+ continue;
+
+ mode |= MAP_WITH_RMRR;
+ break;
+ }
+
+ if ( domain != pdev->domain )
+ {
+ if ( pdev->domain->is_dying )
+ mode |= MAP_OWNER_DYING;
+ else if ( drhd &&
+ !any_pdev_behind_iommu(pdev->domain, pdev, drhd->iommu) &&
+ !pdev->phantom_stride )
+ mode |= MAP_SINGLE_DEVICE;
+ }
+
switch ( pdev->type )
{
+ bool prev_present;
+
case DEV_TYPE_PCI_HOST_BRIDGE:
if ( iommu_debug )
printk(VTDPREFIX "d%d:Hostbridge: skip %04x:%02x:%02x.%u map\n",
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
- pdev);
+ pdev, mode);
+ if ( ret > 0 )
+ ret = 0;
if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
enable_ats_device(pdev, &drhd->iommu->ats_devices);
PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
- pdev);
- if ( ret )
+ pdev, mode);
+ if ( ret < 0 )
break;
+ prev_present = ret;
+ ret = 0;
if ( find_upstream_bridge(seg, &bus, &devfn, &secbus) < 1 )
break;
+ /*
+ * Strictly speaking if the device is the only one behind this bridge
+ * and the only one with this (secbus,0,0) tuple, it could be allowed
+ * to be re-assigned regardless of RMRR presence. But let's deal with
+ * that case only if it is actually found in the wild.
+ */
+ if ( prev_present && (mode & MAP_WITH_RMRR) &&
+ domain != pdev->domain )
+ ret = -EOPNOTSUPP;
+
/*
* Mapping a bridge should, if anything, pass the struct pci_dev of
* that bridge. Since bridges don't normally get assigned to guests,
* their owner would be the wrong one. Pass NULL instead.
*/
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
- NULL);
+ if ( ret >= 0 )
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
+ NULL, mode);
/*
* Devices behind PCIe-to-PCI/PCIx bridge may generate different
if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE &&
(secbus != pdev->bus || pdev->devfn != 0) )
ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0,
- NULL);
+ NULL, mode);
+
+ if ( ret )
+ {
+ if ( !prev_present )
+ domain_context_unmap(domain, devfn, pdev);
+ else if ( pdev->domain != domain ) /* Avoid infinite recursion. */
+ domain_context_mapping(pdev->domain, devfn, pdev);
+ }
break;
{
int ret;
- ret = domain_context_unmap(source, devfn, pdev);
- if ( ret )
- return ret;
+ if ( !has_arch_pdevs(target) )
+ vmx_pi_hooks_assign(target);
/*
* Devices assigned to untrusted domains (here assumed to be any domU)
if ( (target != hardware_domain) && !iommu_intremap )
untrusted_msi = true;
+ ret = domain_context_mapping(target, devfn, pdev);
+ if ( ret )
+ {
+ if ( !has_arch_pdevs(target) )
+ vmx_pi_hooks_deassign(target);
+ return ret;
+ }
+
+ if ( pdev->devfn == devfn )
+ {
+ const struct acpi_drhd_unit *drhd = acpi_find_matched_drhd_unit(pdev);
+
+ if ( drhd )
+ check_cleanup_domid_map(source, pdev, drhd->iommu);
+ }
+
+ if ( devfn == pdev->devfn && pdev->domain != target )
+ {
+ list_move(&pdev->domain_list, &target->arch.pdev_list);
+ pdev->domain = target;
+ }
+
+ if ( !has_arch_pdevs(source) )
+ vmx_pi_hooks_deassign(source);
+
/*
* If the device belongs to the hardware domain, and it has RMRR, don't
* remove it from the hardware domain, because BIOS may use RMRR at
}
}
- if ( devfn == pdev->devfn && pdev->domain != dom_io )
- {
- list_move(&pdev->domain_list, &dom_io->arch.pdev_list);
- pdev->domain = dom_io;
- }
-
- if ( !has_arch_pdevs(source) )
- vmx_pi_hooks_deassign(source);
-
- if ( !has_arch_pdevs(target) )
- vmx_pi_hooks_assign(target);
-
- ret = domain_context_mapping(target, devfn, pdev);
- if ( ret )
- {
- if ( !has_arch_pdevs(target) )
- vmx_pi_hooks_deassign(target);
-
- return ret;
- }
-
- if ( devfn == pdev->devfn && pdev->domain != target )
- {
- list_move(&pdev->domain_list, &target->arch.pdev_list);
- pdev->domain = target;
- }
-
- return ret;
+ return 0;
}
static int intel_iommu_assign_device(