}
}
-int __init amd_iommu_quarantine_init(struct domain *d)
+static int fill_qpt(uint64_t *this, unsigned int level,
+ struct page_info *pgs[IOMMU_MAX_PT_LEVELS],
+ struct pci_dev *pdev)
{
- struct domain_iommu *hd = dom_iommu(d);
+ unsigned int i;
+ int rc = 0;
+
+ for ( i = 0; !rc && i < PTE_PER_TABLE_SIZE; ++i )
+ {
+ uint32_t *pte = (uint32_t *)&this[i];
+ uint64_t *next;
+
+ if ( !get_field_from_reg_u32(pte[0], IOMMU_PTE_PRESENT_MASK,
+ IOMMU_PTE_PRESENT_SHIFT) )
+ {
+ if ( !pgs[level] )
+ {
+ /*
+ * The pgtable allocator is fine for the leaf page, as well as
+ * page table pages, and the resulting allocations are always
+ * zeroed.
+ */
+ pgs[level] = alloc_amd_iommu_pgtable();
+ if ( !pgs[level] )
+ {
+ rc = -ENOMEM;
+ break;
+ }
+
+ page_list_add(pgs[level], &pdev->arch.pgtables_list);
+
+ if ( level )
+ {
+ next = __map_domain_page(pgs[level]);
+ rc = fill_qpt(next, level - 1, pgs, pdev);
+ unmap_domain_page(next);
+ }
+ }
+
+ /*
+ * PDEs are essentially a subset of PTEs, so this function
+ * is fine to use even at the leaf.
+ */
+ set_iommu_pde_present(pte, mfn_x(page_to_mfn(pgs[level])), level,
+ true, true);
+ }
+ else if ( level &&
+ get_field_from_reg_u32(pte[0],
+ IOMMU_PDE_NEXT_LEVEL_MASK,
+ IOMMU_PDE_NEXT_LEVEL_SHIFT) )
+ {
+ paddr_t addr_hi = get_field_from_reg_u32(pte[1],
+ IOMMU_PTE_ADDR_HIGH_MASK,
+ IOMMU_PTE_ADDR_HIGH_SHIFT);
+ paddr_t addr_lo = get_field_from_reg_u32(pte[0],
+ IOMMU_PTE_ADDR_LOW_MASK,
+ IOMMU_PTE_ADDR_LOW_SHIFT);
+ unsigned long mfn = (addr_hi << (32 - PAGE_SHIFT)) | addr_lo;
+
+ page_list_add(mfn_to_page(_mfn(mfn)), &pdev->arch.pgtables_list);
+ next = map_domain_page(_mfn(mfn));
+ rc = fill_qpt(next, level - 1, pgs, pdev);
+ unmap_domain_page(next);
+ }
+ }
+
+ return rc;
+}
+
+int amd_iommu_quarantine_init(struct pci_dev *pdev)
+{
+ struct domain_iommu *hd = dom_iommu(dom_io);
unsigned long end_gfn =
1ul << (DEFAULT_DOMAIN_ADDRESS_WIDTH - PAGE_SHIFT);
unsigned int level = amd_iommu_get_paging_mode(end_gfn);
- uint64_t *table;
+ unsigned int req_id = get_dma_requestor_id(pdev->seg,
+ PCI_BDF2(pdev->bus, pdev->devfn));
+ const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
+ int rc;
- if ( hd->arch.root_table )
+ ASSERT(pcidevs_locked());
+ ASSERT(!hd->arch.root_table);
+
+ ASSERT(pdev->arch.pseudo_domid != DOMID_INVALID);
+
+ if ( pdev->arch.amd.root_table )
{
- ASSERT_UNREACHABLE();
+ clear_domain_page(pdev->arch.leaf_mfn);
return 0;
}
- spin_lock(&hd->arch.mapping_lock);
+ pdev->arch.amd.root_table = alloc_amd_iommu_pgtable();
+ if ( !pdev->arch.amd.root_table )
+ return -ENOMEM;
- hd->arch.root_table = alloc_amd_iommu_pgtable();
- if ( !hd->arch.root_table )
- goto out;
+ /* Transiently install the root into DomIO, for iommu_identity_mapping(). */
+ hd->arch.root_table = pdev->arch.amd.root_table;
- table = __map_domain_page(hd->arch.root_table);
- while ( level )
+ rc = amd_iommu_reserve_domain_unity_map(dom_io,
+ ivrs_mappings[req_id].unity_map,
+ 0);
+
+ iommu_identity_map_teardown(dom_io);
+ hd->arch.root_table = NULL;
+
+ if ( rc )
+ printk("%04x:%02x:%02x.%u: quarantine unity mapping failed\n",
+ pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ else
{
- struct page_info *pg;
- unsigned int i;
-
- /*
- * The pgtable allocator is fine for the leaf page, as well as
- * page table pages, and the resulting allocations are always
- * zeroed.
- */
- pg = alloc_amd_iommu_pgtable();
- if ( !pg )
- break;
-
- for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
- {
- uint32_t *pde = (uint32_t *)&table[i];
+ uint64_t *root;
+ struct page_info *pgs[IOMMU_MAX_PT_LEVELS] = {};
- /*
- * PDEs are essentially a subset of PTEs, so this function
- * is fine to use even at the leaf.
- */
- set_iommu_pde_present(pde, mfn_x(page_to_mfn(pg)), level - 1,
- false, true);
- }
+ spin_lock(&hd->arch.mapping_lock);
- unmap_domain_page(table);
- table = __map_domain_page(pg);
- level--;
+ root = __map_domain_page(pdev->arch.amd.root_table);
+ rc = fill_qpt(root, level - 1, pgs, pdev);
+ unmap_domain_page(root);
+
+ pdev->arch.leaf_mfn = page_to_mfn(pgs[0]);
+
+ spin_unlock(&hd->arch.mapping_lock);
}
- unmap_domain_page(table);
- out:
- spin_unlock(&hd->arch.mapping_lock);
+ if ( rc )
+ amd_iommu_quarantine_teardown(pdev);
+
+ return rc;
+}
+
+void amd_iommu_quarantine_teardown(struct pci_dev *pdev)
+{
+ struct page_info *pg;
+
+ ASSERT(pcidevs_locked());
+
+ if ( !pdev->arch.amd.root_table )
+ return;
+
+ while ( (pg = page_list_remove_head(&pdev->arch.pgtables_list)) )
+ free_amd_iommu_pgtable(pg);
- /* Pages leaked in failure case */
- return level ? -ENOMEM : 0;
+ pdev->arch.amd.root_table = NULL;
}
/*
u8 bus = pdev->bus;
struct domain_iommu *hd = dom_iommu(domain);
const struct ivrs_mappings *ivrs_dev;
+ const struct page_info *root_pg;
+ domid_t domid;
BUG_ON(!hd->arch.paging_mode || !iommu->dev_table.buffer);
dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id];
+ if ( domain != dom_io )
+ {
+ root_pg = hd->arch.root_table;
+ domid = domain->domain_id;
+ }
+ else
+ {
+ root_pg = pdev->arch.amd.root_table;
+ domid = pdev->arch.pseudo_domid;
+ }
+
spin_lock_irqsave(&iommu->lock, flags);
if ( !is_translation_valid((u32 *)dte) )
{
/* bind DTE to domain page-tables */
rc = amd_iommu_set_root_page_table(
- dte, page_to_maddr(hd->arch.root_table),
- domain->domain_id, hd->arch.paging_mode, sr_flags);
+ dte, page_to_maddr(root_pg), domid,
+ hd->arch.paging_mode, sr_flags);
if ( rc )
{
ASSERT(rc < 0);
amd_iommu_flush_device(iommu, req_id);
}
- else if ( amd_iommu_get_root_page_table(dte) !=
- page_to_maddr(hd->arch.root_table) )
+ else if ( amd_iommu_get_root_page_table(dte) != page_to_maddr(root_pg) )
{
/*
* Strictly speaking if the device is the only one with this requestor
rc = -EOPNOTSUPP;
else
rc = amd_iommu_set_root_page_table(
- dte, page_to_maddr(hd->arch.root_table),
- domain->domain_id, hd->arch.paging_mode, sr_flags);
+ dte, page_to_maddr(root_pg), domid,
+ hd->arch.paging_mode, sr_flags);
if ( rc < 0 )
{
spin_unlock_irqrestore(&iommu->lock, flags);
* intended anyway.
*/
!pdev->domain->is_dying &&
+ pdev->domain != dom_io &&
(any_pdev_behind_iommu(pdev->domain, pdev, iommu) ||
pdev->phantom_stride) )
printk(" %04x:%02x:%02x.%u: reassignment may cause %pd data corruption\n",
AMD_IOMMU_DEBUG("Setup I/O page table: device id = %#x, type = %#x, "
"root table = %#"PRIx64", "
"domain = %d, paging mode = %d\n",
- req_id, pdev->type,
- page_to_maddr(hd->arch.root_table),
- domain->domain_id, hd->arch.paging_mode);
+ req_id, pdev->type, page_to_maddr(root_pg),
+ domid, hd->arch.paging_mode);
ASSERT(pcidevs_locked());
int amd_iommu_alloc_root(struct domain_iommu *hd)
{
- if ( unlikely(!hd->arch.root_table) )
+ if ( unlikely(!hd->arch.root_table) && hd != dom_iommu(dom_io) )
{
hd->arch.root_table = alloc_amd_iommu_pgtable();
if ( !hd->arch.root_table )
AMD_IOMMU_DEBUG("Disable: device id = %#x, "
"domain = %d, paging mode = %d\n",
- req_id, domain->domain_id,
+ req_id,
+ get_field_from_reg_u32(((uint32_t *)dte)[2],
+ IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
+ IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT),
dom_iommu(domain)->arch.paging_mode);
}
spin_unlock_irqrestore(&iommu->lock, flags);
amd_iommu_disable_domain_device(pdev->domain, iommu, devfn, pdev);
+ amd_iommu_quarantine_teardown(pdev);
+
iommu_free_domid(pdev->arch.pseudo_domid, iommu->domid_map);
pdev->arch.pseudo_domid = DOMID_INVALID;
#include "vtd.h"
#include "../ats.h"
+#define DEVICE_DOMID(d, pdev) ((d) != dom_io ? (d)->domain_id \
+ : (pdev)->arch.pseudo_domid)
+#define DEVICE_PGTABLE(d, pdev) ((d) != dom_io \
+ ? dom_iommu(d)->arch.pgd_maddr \
+ : (pdev)->arch.vtd.pgd_maddr)
+
/* Possible unfiltered LAPIC/MSI messages from untrusted sources? */
bool __read_mostly untrusted_msi;
#define DID_FIELD_WIDTH 16
#define DID_HIGH_OFFSET 8
+
+/*
+ * This function may have "context" passed as NULL, to merely obtain a DID
+ * for "domid".
+ */
static int context_set_domain_id(struct context_entry *context,
domid_t domid, struct iommu *iommu)
{
unsigned long nr_dom, i;
int found = 0;
- ASSERT(spin_is_locked(&iommu->lock));
+ ASSERT(pcidevs_locked());
nr_dom = cap_ndoms(iommu->cap);
i = find_first_bit(iommu->domid_bitmap, nr_dom);
}
set_bit(i, iommu->domid_bitmap);
- context->hi &= ~(((1 << DID_FIELD_WIDTH) - 1) << DID_HIGH_OFFSET);
- context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
+
+ if ( context )
+ {
+ context->hi &= ~(((1 << DID_FIELD_WIDTH) - 1) << DID_HIGH_OFFSET);
+ context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
+ }
+
return 0;
}
const struct pci_dev *exclude,
struct iommu *iommu)
{
- bool found = any_pdev_behind_iommu(d, exclude, iommu);
+ bool found;
+ if ( d == dom_io )
+ return;
+
+ found = any_pdev_behind_iommu(d, exclude, iommu);
/*
* Hidden devices are associated with DomXEN but usable by the hardware
* domain. Hence they need considering here as well.
domid = iommu->domid_map[prev_did];
if ( domid < DOMID_FIRST_RESERVED )
prev_dom = rcu_lock_domain_by_id(domid);
- else if ( domid == DOMID_IO )
+ else if ( pdev ? domid == pdev->arch.pseudo_domid : domid > DOMID_MASK )
prev_dom = rcu_lock_domain(dom_io);
if ( !prev_dom )
{
{
if ( !prev_dom )
domain_context_unmap_one(domain, iommu, bus, devfn,
- domain->domain_id);
+ DEVICE_DOMID(domain, pdev));
else if ( prev_dom != domain ) /* Avoid infinite recursion. */
- {
- hd = dom_iommu(prev_dom);
domain_context_mapping_one(prev_dom, iommu, bus, devfn, pdev,
- domain->domain_id,
- hd->arch.pgd_maddr,
+ DEVICE_DOMID(prev_dom, pdev),
+ DEVICE_PGTABLE(prev_dom, pdev),
mode & MAP_WITH_RMRR);
- }
}
if ( prev_dom )
{
struct acpi_drhd_unit *drhd;
const struct acpi_rmrr_unit *rmrr;
- paddr_t pgd_maddr = dom_iommu(domain)->arch.pgd_maddr;
+ paddr_t pgd_maddr = DEVICE_PGTABLE(domain, pdev);
domid_t orig_domid = pdev->arch.pseudo_domid;
int ret = 0;
unsigned int i, mode = 0;
break;
}
- if ( domain != pdev->domain )
+ if ( domain != pdev->domain && pdev->domain != dom_io )
{
if ( pdev->domain->is_dying )
mode |= MAP_OWNER_DYING;
printk(VTDPREFIX "d%d:PCIe: map %04x:%02x:%02x.%u\n",
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
- pdev, domain->domain_id, pgd_maddr,
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn, pdev,
+ DEVICE_DOMID(domain, pdev), pgd_maddr,
mode);
if ( ret > 0 )
ret = 0;
PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
- pdev, domain->domain_id, pgd_maddr,
- mode);
+ pdev, DEVICE_DOMID(domain, pdev),
+ pgd_maddr, mode);
if ( ret < 0 )
break;
prev_present = ret;
*/
if ( ret >= 0 )
ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn,
- NULL, domain->domain_id, pgd_maddr,
- mode);
+ NULL, DEVICE_DOMID(domain, pdev),
+ pgd_maddr, mode);
/*
* Devices behind PCIe-to-PCI/PCIx bridge may generate different
if ( !ret && pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE &&
(secbus != pdev->bus || pdev->devfn != 0) )
ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0,
- NULL, domain->domain_id, pgd_maddr,
- mode);
+ NULL, DEVICE_DOMID(domain, pdev),
+ pgd_maddr, mode);
if ( ret )
{
domain->domain_id, seg, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_unmap_one(domain, iommu, bus, devfn,
- domain->domain_id);
+ DEVICE_DOMID(domain, pdev));
if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
disable_ats_device(pdev);
printk(VTDPREFIX "d%d:PCI: unmap %04x:%02x:%02x.%u\n",
domain->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_unmap_one(domain, iommu, bus, devfn,
- domain->domain_id);
+ DEVICE_DOMID(domain, pdev));
if ( ret )
break;
if ( find_upstream_bridge(seg, &tmp_bus, &tmp_devfn, &secbus) < 1 )
break;
+ ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn,
+ DEVICE_DOMID(domain, pdev));
/* PCIe to PCI/PCIx bridge */
- if ( pdev_type(seg, tmp_bus, tmp_devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
- {
- ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn,
- domain->domain_id);
- if ( !ret )
- ret = domain_context_unmap_one(domain, iommu, secbus, 0,
- domain->domain_id);
- }
- else /* Legacy PCI bridge */
- ret = domain_context_unmap_one(domain, iommu, tmp_bus, tmp_devfn,
- domain->domain_id);
+ if ( !ret && pdev_type(seg, tmp_bus, tmp_devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
+ ret = domain_context_unmap_one(domain, iommu, secbus, 0,
+ DEVICE_DOMID(domain, pdev));
break;
spin_unlock(&hd->arch.mapping_lock);
}
+static void quarantine_teardown(struct pci_dev *pdev,
+ const struct acpi_drhd_unit *drhd)
+{
+ struct page_info *pg;
+
+ ASSERT(pcidevs_locked());
+
+ if ( !pdev->arch.vtd.pgd_maddr )
+ return;
+
+ while ( (pg = page_list_remove_head(&pdev->arch.pgtables_list)) )
+ free_domheap_page(pg);
+
+ pdev->arch.vtd.pgd_maddr = 0;
+
+ if ( drhd )
+ cleanup_domid_map(pdev->arch.pseudo_domid, drhd->iommu);
+}
+
static int __must_check intel_iommu_map_page(struct domain *d, dfn_t dfn,
mfn_t mfn, unsigned int flags,
unsigned int *flush_flags)
rmrr->end_address, 0);
}
+ quarantine_teardown(pdev, drhd);
+
if ( drhd )
{
iommu_free_domid(pdev->arch.pseudo_domid,
vtd_dump_p2m_table_level(hd->arch.pgd_maddr, agaw_to_level(hd->arch.agaw), 0, 0);
}
-static int __init intel_iommu_quarantine_init(struct domain *d)
+static int fill_qpt(struct dma_pte *this, unsigned int level,
+ paddr_t maddrs[6], struct pci_dev *pdev)
{
- struct domain_iommu *hd = dom_iommu(d);
- struct dma_pte *parent;
+ unsigned int i;
+ int rc = 0;
+
+ for ( i = 0; !rc && i < PTE_NUM; ++i )
+ {
+ struct dma_pte *pte = &this[i], *next;
+
+ if ( !dma_pte_present(*pte) )
+ {
+ if ( !maddrs[level] )
+ {
+ /*
+ * The pgtable allocator is fine for the leaf page, as well as
+ * page table pages, and the resulting allocations are always
+ * zeroed.
+ */
+ maddrs[level] = alloc_pgtable_maddr(NULL, 1);
+ if ( !maddrs[level] )
+ {
+ rc = -ENOMEM;
+ break;
+ }
+
+ page_list_add(maddr_to_page(maddrs[level]),
+ &pdev->arch.pgtables_list);
+
+ if ( level )
+ {
+ next = map_vtd_domain_page(maddrs[level]);
+ rc = fill_qpt(next, level - 1, maddrs, pdev);
+ unmap_vtd_domain_page(next);
+ }
+ }
+
+ dma_set_pte_addr(*pte, maddrs[level]);
+ dma_set_pte_readable(*pte);
+ dma_set_pte_writable(*pte);
+ }
+ else if ( level && !dma_pte_superpage(*pte) )
+ {
+ page_list_add(maddr_to_page(dma_pte_addr(*pte)),
+ &pdev->arch.pgtables_list);
+ next = map_vtd_domain_page(dma_pte_addr(*pte));
+ rc = fill_qpt(next, level - 1, maddrs, pdev);
+ unmap_vtd_domain_page(next);
+ }
+ }
+
+ return rc;
+}
+
+static int intel_iommu_quarantine_init(struct pci_dev *pdev)
+{
+ struct domain_iommu *hd = dom_iommu(dom_io);
+ paddr_t maddr;
unsigned int agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
unsigned int level = agaw_to_level(agaw);
+ const struct acpi_drhd_unit *drhd;
+ const struct acpi_rmrr_unit *rmrr;
+ unsigned int i, bdf;
+ bool rmrr_found = false;
+ int rc;
- if ( hd->arch.pgd_maddr )
+ ASSERT(pcidevs_locked());
+ ASSERT(!hd->arch.pgd_maddr);
+
+ if ( pdev->arch.vtd.pgd_maddr )
{
- ASSERT_UNREACHABLE();
+ clear_domain_page(pdev->arch.leaf_mfn);
return 0;
}
- spin_lock(&hd->arch.mapping_lock);
+ drhd = acpi_find_matched_drhd_unit(pdev);
+ if ( !drhd )
+ return -ENODEV;
- hd->arch.pgd_maddr = alloc_pgtable_maddr(NULL, 1);
- if ( !hd->arch.pgd_maddr )
- goto out;
+ maddr = alloc_pgtable_maddr(NULL, 1);
+ if ( !maddr )
+ return -ENOMEM;
- parent = map_vtd_domain_page(hd->arch.pgd_maddr);
- while ( level )
- {
- uint64_t maddr;
- unsigned int offset;
+ rc = context_set_domain_id(NULL, pdev->arch.pseudo_domid, drhd->iommu);
- /*
- * The pgtable allocator is fine for the leaf page, as well as
- * page table pages, and the resulting allocations are always
- * zeroed.
- */
- maddr = alloc_pgtable_maddr(NULL, 1);
- if ( !maddr )
+ /* Transiently install the root into DomIO, for iommu_identity_mapping(). */
+ hd->arch.pgd_maddr = maddr;
+
+ for_each_rmrr_device ( rmrr, bdf, i )
+ {
+ if ( rc )
break;
- for ( offset = 0; offset < PTE_NUM; offset++ )
+ if ( rmrr->segment == pdev->seg &&
+ bdf == PCI_BDF2(pdev->bus, pdev->devfn) )
{
- struct dma_pte *pte = &parent[offset];
+ rmrr_found = true;
- dma_set_pte_addr(*pte, maddr);
- dma_set_pte_readable(*pte);
+ rc = iommu_identity_mapping(dom_io, p2m_access_rw,
+ rmrr->base_address, rmrr->end_address,
+ 0);
+ if ( rc )
+ printk(XENLOG_ERR VTDPREFIX
+ "%04x:%02x:%02x.%u: RMRR quarantine mapping failed\n",
+ pdev->seg, pdev->bus,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
}
- iommu_sync_cache(parent, PAGE_SIZE);
+ }
- unmap_vtd_domain_page(parent);
- parent = map_vtd_domain_page(maddr);
- level--;
+ iommu_identity_map_teardown(dom_io);
+ hd->arch.pgd_maddr = 0;
+ pdev->arch.vtd.pgd_maddr = maddr;
+
+ if ( !rc )
+ {
+ struct dma_pte *root;
+ paddr_t maddrs[6] = {};
+
+ spin_lock(&hd->arch.mapping_lock);
+
+ root = map_vtd_domain_page(maddr);
+ rc = fill_qpt(root, level - 1, maddrs, pdev);
+ unmap_vtd_domain_page(root);
+
+ pdev->arch.leaf_mfn = maddr_to_mfn(maddrs[0]);
+
+ spin_unlock(&hd->arch.mapping_lock);
}
- unmap_vtd_domain_page(parent);
- out:
- spin_unlock(&hd->arch.mapping_lock);
+ if ( rc )
+ quarantine_teardown(pdev, drhd);
- /* Pages leaked in failure case */
- return level ? -ENOMEM : 0;
+ return rc;
}
const struct iommu_ops __initconstrel intel_iommu_ops = {