]> xenbits.xensource.com Git - people/iwj/xen.git/commitdiff
AMD/IOMMU: don't blindly allocate interrupt remapping tables
authorJan Beulich <jbeulich@suse.com>
Wed, 25 Sep 2019 14:00:05 +0000 (16:00 +0200)
committerJan Beulich <jbeulich@suse.com>
Wed, 25 Sep 2019 14:00:05 +0000 (16:00 +0200)
ACPI tables are free to list far more device coordinates than there are
actual devices. By delaying the table allocations for most cases, and
doing them only when an actual device is known to be present at a given
position, overall memory used for the tables goes down from over 500k
pages to just over 1k (on my system having such ACPI table contents).

Tables continue to get allocated right away for special entries
(IO-APIC, HPET) as well as for alias IDs. While in the former case
that's simply because there may not be any device at a given position,
in the latter case this is to avoid having to introduce ref-counting of
table usage.

The change involves invoking
iterate_ivrs_mappings(amd_iommu_setup_device_table) a second time,
because the function now wants to be able to find PCI devices, which
isn't possible yet when IOMMU setup happens very early during x2APIC
mode setup. In this context amd_iommu_init_interrupt() gets renamed as
well.

The logic adjusting a DTE's interrupt remapping attributes also gets
changed, such that the lack of an IRT would result in target aborted
rather than non-remapped interrupts (should any occur).

Note that for now phantom functions get separate IRTs allocated, as was
the case before.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
xen/drivers/passthrough/amd/iommu_acpi.c
xen/drivers/passthrough/amd/iommu_init.c
xen/drivers/passthrough/amd/iommu_intr.c
xen/drivers/passthrough/amd/iommu_map.c
xen/drivers/passthrough/amd/pci_amd_iommu.c
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h

index 8df034730f54e401929d8480fe2798498b4180ab..7fa732c55c3ffcdcec02e129bbcc5e56242191ce 100644 (file)
@@ -53,7 +53,8 @@ union acpi_ivhd_device {
 };
 
 static void __init add_ivrs_mapping_entry(
-    u16 bdf, u16 alias_id, u8 flags, struct amd_iommu *iommu)
+    uint16_t bdf, uint16_t alias_id, uint8_t flags, bool alloc_irt,
+    struct amd_iommu *iommu)
 {
     struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(iommu->seg);
 
@@ -69,27 +70,32 @@ static void __init add_ivrs_mapping_entry(
     if ( iommu->bdf == bdf )
         return;
 
-    if ( !ivrs_mappings[alias_id].intremap_table )
+    /* Allocate interrupt remapping table if needed. */
+    if ( iommu_intremap && !ivrs_mappings[alias_id].intremap_table )
     {
-         /* allocate per-device interrupt remapping table */
-         if ( amd_iommu_perdev_intremap )
-             ivrs_mappings[alias_id].intremap_table =
-                 amd_iommu_alloc_intremap_table(
-                     iommu,
-                     &ivrs_mappings[alias_id].intremap_inuse);
-         else
-         {
-             if ( shared_intremap_table == NULL  )
-                 shared_intremap_table = amd_iommu_alloc_intremap_table(
-                     iommu,
-                     &shared_intremap_inuse);
-             ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
-             ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
-         }
-
-         if ( !ivrs_mappings[alias_id].intremap_table )
-             panic("No memory for %04x:%02x:%02x.%u's IRT\n", iommu->seg,
-                   PCI_BUS(alias_id), PCI_SLOT(alias_id), PCI_FUNC(alias_id));
+        if ( !amd_iommu_perdev_intremap )
+        {
+            if ( !shared_intremap_table )
+                shared_intremap_table = amd_iommu_alloc_intremap_table(
+                    iommu, &shared_intremap_inuse);
+
+            if ( !shared_intremap_table )
+                panic("No memory for shared IRT\n");
+
+            ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+            ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
+        }
+        else if ( alloc_irt )
+        {
+            ivrs_mappings[alias_id].intremap_table =
+                amd_iommu_alloc_intremap_table(
+                    iommu, &ivrs_mappings[alias_id].intremap_inuse);
+
+            if ( !ivrs_mappings[alias_id].intremap_table )
+                panic("No memory for %04x:%02x:%02x.%u's IRT\n",
+                      iommu->seg, PCI_BUS(alias_id), PCI_SLOT(alias_id),
+                      PCI_FUNC(alias_id));
+        }
     }
 
     ivrs_mappings[alias_id].valid = true;
@@ -433,7 +439,8 @@ static u16 __init parse_ivhd_device_select(
         return 0;
     }
 
-    add_ivrs_mapping_entry(bdf, bdf, select->header.data_setting, iommu);
+    add_ivrs_mapping_entry(bdf, bdf, select->header.data_setting, false,
+                           iommu);
 
     return sizeof(*select);
 }
@@ -479,7 +486,7 @@ static u16 __init parse_ivhd_device_range(
 
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
         add_ivrs_mapping_entry(bdf, bdf, range->start.header.data_setting,
-                               iommu);
+                               false, iommu);
 
     return dev_length;
 }
@@ -513,7 +520,8 @@ static u16 __init parse_ivhd_device_alias(
 
     AMD_IOMMU_DEBUG(" Dev_Id Alias: %#x\n", alias_id);
 
-    add_ivrs_mapping_entry(bdf, alias_id, alias->header.data_setting, iommu);
+    add_ivrs_mapping_entry(bdf, alias_id, alias->header.data_setting, true,
+                           iommu);
 
     return dev_length;
 }
@@ -568,7 +576,7 @@ static u16 __init parse_ivhd_device_alias_range(
 
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
         add_ivrs_mapping_entry(bdf, alias_id, range->alias.header.data_setting,
-                               iommu);
+                               true, iommu);
 
     return dev_length;
 }
@@ -593,7 +601,7 @@ static u16 __init parse_ivhd_device_extended(
         return 0;
     }
 
-    add_ivrs_mapping_entry(bdf, bdf, ext->header.data_setting, iommu);
+    add_ivrs_mapping_entry(bdf, bdf, ext->header.data_setting, false, iommu);
 
     return dev_length;
 }
@@ -640,7 +648,7 @@ static u16 __init parse_ivhd_device_extended_range(
 
     for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
         add_ivrs_mapping_entry(bdf, bdf, range->extended.header.data_setting,
-                               iommu);
+                               false, iommu);
 
     return dev_length;
 }
@@ -733,7 +741,8 @@ static u16 __init parse_ivhd_device_special(
     AMD_IOMMU_DEBUG("IVHD Special: %04x:%02x:%02x.%u variety %#x handle %#x\n",
                     seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf),
                     special->variety, special->handle);
-    add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, iommu);
+    add_ivrs_mapping_entry(bdf, bdf, special->header.data_setting, true,
+                           iommu);
 
     switch ( special->variety )
     {
index 100d0bba3baf106b8dbf2e7008f6c3dccb4de1e4..df83cfd5400fec93e84fcc053ab5d53bf0917bef 100644 (file)
@@ -30,6 +30,7 @@
 #include <xen/delay.h>
 
 static int __initdata nr_amd_iommus;
+static bool __initdata pci_init;
 
 static void do_amd_iommu_irq(unsigned long data);
 static DECLARE_SOFTIRQ_TASKLET(amd_iommu_irq_tasklet, do_amd_iommu_irq, 0);
@@ -1244,17 +1245,20 @@ static int __init amd_iommu_setup_device_table(
 
     BUG_ON( (ivrs_bdf_entries == 0) );
 
-    /* allocate 'device table' on a 4K boundary */
-    device_table.alloc_size = PAGE_SIZE <<
-                              get_order_from_bytes(
-                              PAGE_ALIGN(ivrs_bdf_entries *
-                              IOMMU_DEV_TABLE_ENTRY_SIZE));
-    device_table.entries = device_table.alloc_size /
-                           IOMMU_DEV_TABLE_ENTRY_SIZE;
-
-    device_table.buffer = allocate_buffer(device_table.alloc_size,
-                                          "Device Table");
-    if  ( device_table.buffer == NULL )
+    if ( !device_table.buffer )
+    {
+        /* allocate 'device table' on a 4K boundary */
+        device_table.alloc_size = PAGE_SIZE <<
+                                  get_order_from_bytes(
+                                  PAGE_ALIGN(ivrs_bdf_entries *
+                                  IOMMU_DEV_TABLE_ENTRY_SIZE));
+        device_table.entries = device_table.alloc_size /
+                               IOMMU_DEV_TABLE_ENTRY_SIZE;
+
+        device_table.buffer = allocate_buffer(device_table.alloc_size,
+                                              "Device Table");
+    }
+    if ( !device_table.buffer )
         return -ENOMEM;
 
     /* Add device table entries */
@@ -1263,13 +1267,46 @@ static int __init amd_iommu_setup_device_table(
         if ( ivrs_mappings[bdf].valid )
         {
             void *dte;
+            const struct pci_dev *pdev = NULL;
 
             /* add device table entry */
             dte = device_table.buffer + (bdf * IOMMU_DEV_TABLE_ENTRY_SIZE);
             iommu_dte_add_device_entry(dte, &ivrs_mappings[bdf]);
 
+            if ( iommu_intremap &&
+                 ivrs_mappings[bdf].dte_requestor_id == bdf &&
+                 !ivrs_mappings[bdf].intremap_table )
+            {
+                if ( !pci_init )
+                    continue;
+                pcidevs_lock();
+                pdev = pci_get_pdev(seg, PCI_BUS(bdf), PCI_DEVFN2(bdf));
+                pcidevs_unlock();
+            }
+
+            if ( pdev )
+            {
+                unsigned int req_id = bdf;
+
+                do {
+                    ivrs_mappings[req_id].intremap_table =
+                        amd_iommu_alloc_intremap_table(
+                            ivrs_mappings[bdf].iommu,
+                            &ivrs_mappings[req_id].intremap_inuse);
+                    if ( !ivrs_mappings[req_id].intremap_table )
+                        return -ENOMEM;
+
+                    if ( !pdev->phantom_stride )
+                        break;
+                    req_id += pdev->phantom_stride;
+                } while ( PCI_SLOT(req_id) == pdev->sbdf.dev );
+            }
+
             amd_iommu_set_intremap_table(
-                dte, virt_to_maddr(ivrs_mappings[bdf].intremap_table),
+                dte,
+                ivrs_mappings[bdf].intremap_table
+                ? virt_to_maddr(ivrs_mappings[bdf].intremap_table)
+                : 0,
                 iommu_intremap);
         }
     }
@@ -1402,7 +1439,8 @@ int __init amd_iommu_init(bool xt)
     if ( rc )
         goto error_out;
 
-    /* allocate and initialize a global device table shared by all iommus */
+    /* Allocate and initialize device table(s). */
+    pci_init = !xt;
     rc = iterate_ivrs_mappings(amd_iommu_setup_device_table);
     if ( rc )
         goto error_out;
@@ -1422,7 +1460,7 @@ int __init amd_iommu_init(bool xt)
         /*
          * Setting up of the IOMMU interrupts cannot occur yet at the (very
          * early) time we get here when enabling x2APIC mode. Suppress it
-         * here, and do it explicitly in amd_iommu_init_interrupt().
+         * here, and do it explicitly in amd_iommu_init_late().
          */
         rc = amd_iommu_init_one(iommu, !xt);
         if ( rc )
@@ -1436,11 +1474,16 @@ error_out:
     return rc;
 }
 
-int __init amd_iommu_init_interrupt(void)
+int __init amd_iommu_init_late(void)
 {
     struct amd_iommu *iommu;
     int rc = 0;
 
+    /* Further initialize the device table(s). */
+    pci_init = true;
+    if ( iommu_intremap )
+        rc = iterate_ivrs_mappings(amd_iommu_setup_device_table);
+
     for_each_amd_iommu ( iommu )
     {
         struct irq_desc *desc;
index e3018e16f480404b2a9462677a0f9b509ff369f6..dfcf66a0a5c657694f3e87ff8e5eec70d275e694 100644 (file)
@@ -789,7 +789,7 @@ void amd_iommu_read_msi_from_ire(
     }
 }
 
-int __init amd_iommu_free_intremap_table(
+int amd_iommu_free_intremap_table(
     const struct amd_iommu *iommu, struct ivrs_mappings *ivrs_mapping)
 {
     void **tblp;
@@ -814,7 +814,7 @@ int __init amd_iommu_free_intremap_table(
     return 0;
 }
 
-void *__init amd_iommu_alloc_intremap_table(
+void *amd_iommu_alloc_intremap_table(
     const struct amd_iommu *iommu, unsigned long **inuse_map)
 {
     unsigned int order = intremap_table_order(iommu);
index b5e565880ee3ae4bcc6a4ea73d6622caf3fb247a..507a705c09fc941a650376f91dc4a234057eae52 100644 (file)
@@ -116,8 +116,9 @@ void __init amd_iommu_set_intremap_table(
     struct amd_iommu_dte *dte, uint64_t intremap_ptr, bool valid)
 {
     dte->it_root = intremap_ptr >> 6;
-    dte->int_tab_len = IOMMU_INTREMAP_ORDER;
-    dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED;
+    dte->int_tab_len = intremap_ptr ? IOMMU_INTREMAP_ORDER : 0;
+    dte->int_ctl = intremap_ptr ? IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED
+                                : IOMMU_DEV_TABLE_INT_CONTROL_ABORTED;
     dte->ig = false; /* unmapped interrupts result in i/o page faults */
     dte->iv = valid;
 }
index 465a49798d631cde6f2971f3482cdb8e1e50d72e..1fafc8cdefb643bf26b5f0a091cb373a13804258 100644 (file)
@@ -164,7 +164,7 @@ static int __init iov_detect(void)
     if ( !iommu_enable && !iommu_intremap )
         return 0;
 
-    if ( (init_done ? amd_iommu_init_interrupt()
+    if ( (init_done ? amd_iommu_init_late()
                     : amd_iommu_init(false)) != 0 )
     {
         printk("AMD-Vi: Error initialization\n");
@@ -428,6 +428,7 @@ static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev)
 {
     struct amd_iommu *iommu;
     u16 bdf;
+    struct ivrs_mappings *ivrs_mappings;
 
     if ( !pdev->domain )
         return -EINVAL;
@@ -457,6 +458,36 @@ static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev)
         return -ENODEV;
     }
 
+    ivrs_mappings = get_ivrs_mappings(pdev->seg);
+    bdf = PCI_BDF2(pdev->bus, devfn);
+    if ( !ivrs_mappings ||
+         !ivrs_mappings[ivrs_mappings[bdf].dte_requestor_id].valid )
+        return -EPERM;
+
+    if ( iommu_intremap &&
+         ivrs_mappings[bdf].dte_requestor_id == bdf &&
+         !ivrs_mappings[bdf].intremap_table )
+    {
+        unsigned long flags;
+
+        ivrs_mappings[bdf].intremap_table =
+            amd_iommu_alloc_intremap_table(
+                iommu, &ivrs_mappings[bdf].intremap_inuse);
+        if ( !ivrs_mappings[bdf].intremap_table )
+            return -ENOMEM;
+
+        spin_lock_irqsave(&iommu->lock, flags);
+
+        amd_iommu_set_intremap_table(
+            iommu->dev_table.buffer + (bdf * IOMMU_DEV_TABLE_ENTRY_SIZE),
+            virt_to_maddr(ivrs_mappings[bdf].intremap_table),
+            iommu_intremap);
+
+        amd_iommu_flush_device(iommu, bdf);
+
+        spin_unlock_irqrestore(&iommu->lock, flags);
+    }
+
     amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
     return 0;
 }
@@ -465,6 +496,8 @@ static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
 {
     struct amd_iommu *iommu;
     u16 bdf;
+    struct ivrs_mappings *ivrs_mappings;
+
     if ( !pdev->domain )
         return -EINVAL;
 
@@ -480,6 +513,14 @@ static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
     }
 
     amd_iommu_disable_domain_device(pdev->domain, iommu, devfn, pdev);
+
+    ivrs_mappings = get_ivrs_mappings(pdev->seg);
+    bdf = PCI_BDF2(pdev->bus, devfn);
+    if ( amd_iommu_perdev_intremap &&
+         ivrs_mappings[bdf].dte_requestor_id == bdf &&
+         ivrs_mappings[bdf].intremap_table )
+        amd_iommu_free_intremap_table(iommu, &ivrs_mappings[bdf]);
+
     return 0;
 }
 
index 369177d1df8821972716e6d306502d1ec9546710..0fa5d53c915a3e57f38f088defb23c5b74b36bed 100644 (file)
@@ -50,7 +50,7 @@ void get_iommu_features(struct amd_iommu *iommu);
 /* amd-iommu-init functions */
 int amd_iommu_prepare(bool xt);
 int amd_iommu_init(bool xt);
-int amd_iommu_init_interrupt(void);
+int amd_iommu_init_late(void);
 int amd_iommu_update_ivrs_mapping_acpi(void);
 int iov_adjust_irq_affinities(void);