return flush_flags;
}
-void amd_iommu_set_root_page_table(uint32_t *dte, uint64_t root_ptr,
- uint16_t domain_id, uint8_t paging_mode,
- uint8_t valid)
+/*
+ * This function returns
+ * - -errno for errors,
+ * - 0 for a successful update, atomic when necessary
+ * - 1 for a successful but non-atomic update, which may need to be warned
+ * about by the caller.
+ */
+int amd_iommu_set_root_page_table(uint32_t *dte, uint64_t root_ptr,
+ uint16_t domain_id, uint8_t paging_mode,
+ unsigned int flags)
{
+ bool valid = flags & SET_ROOT_VALID;
uint32_t addr_hi, addr_lo, entry, dte0 = dte[0];
+ addr_lo = root_ptr & DMA_32BIT_MASK;
+ addr_hi = root_ptr >> 32;
+
+ if ( get_field_from_reg_u32(dte0, IOMMU_DEV_TABLE_VALID_MASK,
+ IOMMU_DEV_TABLE_VALID_SHIFT) &&
+ get_field_from_reg_u32(dte0, IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
+ IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT) &&
+ (cpu_has_cx16 || (flags & SET_ROOT_WITH_UNITY_MAP)) )
+ {
+ union {
+ uint32_t dte[4];
+ uint64_t raw64[2];
+ __uint128_t raw128;
+ } ldte;
+ __uint128_t old;
+ int ret = 0;
+
+ memcpy(ldte.dte, dte, sizeof(ldte));
+ old = ldte.raw128;
+
+ set_field_in_reg_u32(domain_id, ldte.dte[2],
+ IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
+ IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &ldte.dte[2]);
+
+ set_field_in_reg_u32(addr_hi, ldte.dte[1],
+ IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
+ IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT,
+ &ldte.dte[1]);
+ set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, ldte.dte[1],
+ IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_MASK,
+ IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_SHIFT,
+ &ldte.dte[1]);
+ set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, ldte.dte[1],
+ IOMMU_DEV_TABLE_IO_READ_PERMISSION_MASK,
+ IOMMU_DEV_TABLE_IO_READ_PERMISSION_SHIFT,
+ &ldte.dte[1]);
+
+ set_field_in_reg_u32(addr_lo >> PAGE_SHIFT, ldte.dte[0],
+ IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
+ IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT,
+ &ldte.dte[0]);
+ set_field_in_reg_u32(paging_mode, ldte.dte[0],
+ IOMMU_DEV_TABLE_PAGING_MODE_MASK,
+ IOMMU_DEV_TABLE_PAGING_MODE_SHIFT, &ldte.dte[0]);
+ set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, ldte.dte[0],
+ IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
+ IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT,
+ &ldte.dte[0]);
+ set_field_in_reg_u32(valid ? IOMMU_CONTROL_ENABLED
+ : IOMMU_CONTROL_DISABLED,
+ ldte.dte[0], IOMMU_DEV_TABLE_VALID_MASK,
+ IOMMU_DEV_TABLE_VALID_SHIFT, &ldte.dte[0]);
+
+ if ( cpu_has_cx16 )
+ {
+ __uint128_t res = cmpxchg16b(dte, &old, &ldte.raw128);
+
+ /*
+ * Hardware does not update the DTE behind our backs, so the
+ * return value should match "old".
+ */
+ if ( res != old )
+ {
+ printk(XENLOG_ERR
+ "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
+ domain_id,
+ (uint64_t)(res >> 64), (uint64_t)res,
+ (uint64_t)(old >> 64), (uint64_t)old);
+ ret = -EILSEQ;
+ }
+ }
+ else /* Best effort, updating domain_id last. */
+ {
+ uint64_t *ptr = (void *)dte;
+
+ write_atomic(ptr + 0, ldte.raw64[0]);
+ /* No barrier should be needed between these two. */
+ write_atomic(ptr + 1, ldte.raw64[1]);
+
+ ret = 1;
+ }
+
+ return ret;
+ }
+
if ( valid ||
get_field_from_reg_u32(dte0, IOMMU_DEV_TABLE_VALID_MASK,
IOMMU_DEV_TABLE_VALID_SHIFT) )
IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry);
dte[2] = entry;
- addr_lo = root_ptr & DMA_32BIT_MASK;
- addr_hi = root_ptr >> 32;
-
set_field_in_reg_u32(addr_hi, 0,
IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT, &entry);
IOMMU_DEV_TABLE_VALID_MASK,
IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
write_atomic(&dte[0], entry);
+
+ return 0;
+}
+
+paddr_t amd_iommu_get_root_page_table(const uint32_t *dte)
+{
+ uint32_t lo = get_field_from_reg_u32(
+ dte[0], IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
+ IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT);
+ uint32_t hi = get_field_from_reg_u32(
+ dte[1], IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
+ IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT);
+
+ return ((paddr_t)hi << 32) | (lo << PAGE_SHIFT);
}
void iommu_dte_set_iotlb(uint32_t *dte, uint8_t i)
dte[0] = entry;
}
-static void amd_iommu_setup_domain_device(
+static int __must_check allocate_domain_resources(struct domain_iommu *hd)
+{
+ int rc;
+
+ spin_lock(&hd->arch.mapping_lock);
+ rc = amd_iommu_alloc_root(hd);
+ spin_unlock(&hd->arch.mapping_lock);
+
+ return rc;
+}
+
+static bool any_pdev_behind_iommu(const struct domain *d,
+ const struct pci_dev *exclude,
+ const struct amd_iommu *iommu)
+{
+ const struct pci_dev *pdev;
+
+ for_each_pdev ( d, pdev )
+ {
+ if ( pdev == exclude )
+ continue;
+
+ if ( find_iommu_for_device(pdev->seg,
+ PCI_BDF2(pdev->bus, pdev->devfn)) == iommu )
+ return true;
+ }
+
+ return false;
+}
+
+static int __must_check amd_iommu_setup_domain_device(
struct domain *domain, struct amd_iommu *iommu,
u8 devfn, struct pci_dev *pdev)
{
- void *dte;
+ uint32_t *dte;
unsigned long flags;
- int req_id, valid = 1;
- int dte_i = 0;
+ unsigned int req_id, sr_flags;
+ int dte_i = 0, rc;
u8 bus = pdev->bus;
- const struct domain_iommu *hd = dom_iommu(domain);
+ struct domain_iommu *hd = dom_iommu(domain);
+ const struct ivrs_mappings *ivrs_dev;
+
+ BUG_ON(!hd->arch.paging_mode || !iommu->dev_table.buffer);
- BUG_ON( !hd->arch.root_table || !hd->arch.paging_mode ||
- !iommu->dev_table.buffer );
+ rc = allocate_domain_resources(hd);
+ if ( rc )
+ return rc;
- if ( iommu_hwdom_passthrough && is_hardware_domain(domain) )
- valid = 0;
+ req_id = get_dma_requestor_id(iommu->seg,
+ PCI_BDF2(pdev->bus, pdev->devfn));
+ ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id];
+ sr_flags = (iommu_hwdom_passthrough && is_hardware_domain(domain)
+ ? 0 : SET_ROOT_VALID)
+ | (ivrs_dev->unity_map ? SET_ROOT_WITH_UNITY_MAP : 0);
if ( ats_enabled )
dte_i = 1;
/* get device-table entry */
req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+ ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id];
spin_lock_irqsave(&iommu->lock, flags);
if ( !is_translation_valid((u32 *)dte) )
{
/* bind DTE to domain page-tables */
- amd_iommu_set_root_page_table(
- (u32 *)dte, page_to_maddr(hd->arch.root_table), domain->domain_id,
- hd->arch.paging_mode, valid);
+ rc = amd_iommu_set_root_page_table(
+ dte, page_to_maddr(hd->arch.root_table),
+ domain->domain_id, hd->arch.paging_mode, sr_flags);
+ if ( rc )
+ {
+ ASSERT(rc < 0);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return rc;
+ }
if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
iommu_dte_set_iotlb((u32 *)dte, dte_i);
amd_iommu_flush_device(iommu, req_id);
+ }
+ else if ( amd_iommu_get_root_page_table(dte) !=
+ page_to_maddr(hd->arch.root_table) )
+ {
+ /*
+ * Strictly speaking if the device is the only one with this requestor
+ * ID, it could be allowed to be re-assigned regardless of unity map
+ * presence. But let's deal with that case only if it is actually
+ * found in the wild.
+ */
+ if ( req_id != PCI_BDF2(bus, devfn) &&
+ (sr_flags & SET_ROOT_WITH_UNITY_MAP) )
+ rc = -EOPNOTSUPP;
+ else
+ rc = amd_iommu_set_root_page_table(
+ dte, page_to_maddr(hd->arch.root_table),
+ domain->domain_id, hd->arch.paging_mode, sr_flags);
+ if ( rc < 0 )
+ {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return rc;
+ }
+ if ( rc &&
+ domain != pdev->domain &&
+ /*
+ * By non-atomically updating the DTE's domain ID field last,
+ * during a short window in time TLB entries with the old domain
+ * ID but the new page tables may have been inserted. This could
+ * affect I/O of other devices using this same (old) domain ID.
+ * Such updating therefore is not a problem if this was the only
+ * device associated with the old domain ID. Diverting I/O of any
+ * of a dying domain's devices to the quarantine page tables is
+ * intended anyway.
+ */
+ !pdev->domain->is_dying &&
+ (any_pdev_behind_iommu(pdev->domain, pdev, iommu) ||
+ pdev->phantom_stride) )
+ printk(" %04x:%02x:%02x.%u: reassignment may cause %pd data corruption\n",
+ pdev->seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pdev->domain);
- AMD_IOMMU_DEBUG("Setup I/O page table: device id = %#x, type = %#x, "
- "root table = %#"PRIx64", "
- "domain = %d, paging mode = %d\n",
- req_id, pdev->type,
- page_to_maddr(hd->arch.root_table),
- domain->domain_id, hd->arch.paging_mode);
+ if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
+ iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
+ ASSERT(get_field_from_reg_u32(
+ dte[3], IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK,
+ IOMMU_DEV_TABLE_IOTLB_SUPPORT_SHIFT) == dte_i);
+
+ amd_iommu_flush_device(iommu, req_id);
}
spin_unlock_irqrestore(&iommu->lock, flags);
+ AMD_IOMMU_DEBUG("Setup I/O page table: device id = %#x, type = %#x, "
+ "root table = %#"PRIx64", "
+ "domain = %d, paging mode = %d\n",
+ req_id, pdev->type,
+ page_to_maddr(hd->arch.root_table),
+ domain->domain_id, hd->arch.paging_mode);
+
ASSERT(pcidevs_locked());
if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
}
+
+ return 0;
}
int __init amd_iov_detect(void)
return 0;
}
-static int __must_check allocate_domain_resources(struct domain_iommu *hd)
-{
- int rc;
-
- spin_lock(&hd->arch.mapping_lock);
- rc = amd_iommu_alloc_root(hd);
- spin_unlock(&hd->arch.mapping_lock);
-
- return rc;
-}
-
int __read_mostly amd_iommu_min_paging_mode = 1;
static int amd_iommu_domain_init(struct domain *d)
{
struct amd_iommu *iommu;
int bdf, rc;
- struct domain_iommu *t = dom_iommu(target);
const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
bdf = PCI_BDF2(pdev->bus, pdev->devfn);
return -ENODEV;
}
- amd_iommu_disable_domain_device(source, iommu, devfn, pdev);
+ rc = amd_iommu_setup_domain_device(target, iommu, devfn, pdev);
+ if ( rc )
+ return rc;
+
+ if ( devfn == pdev->devfn && pdev->domain != target )
+ {
+ list_move(&pdev->domain_list, &target->arch.pdev_list);
+ pdev->domain = target;
+ }
/*
* If the device belongs to the hardware domain, and it has a unity mapping,
return rc;
}
- if ( devfn == pdev->devfn && pdev->domain != dom_io )
- {
- list_move(&pdev->domain_list, &dom_io->arch.pdev_list);
- pdev->domain = dom_io;
- }
-
- rc = allocate_domain_resources(t);
- if ( rc )
- return rc;
-
- amd_iommu_setup_domain_device(target, iommu, devfn, pdev);
AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
source->domain_id, target->domain_id);
- if ( devfn == pdev->devfn && pdev->domain != target )
- {
- list_move(&pdev->domain_list, &target->arch.pdev_list);
- pdev->domain = target;
- }
-
return 0;
}
return -ENODEV;
}
- amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
- return 0;
+ return amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
}
static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev)