ia64/xen-unstable

changeset 19420:115c164721dc

vtd: fix Dom0 S3 when VT-d is enabled.

On some platforms that support Queued Invalidation and Interrupt
Remapping, Dom0 S3 doesn't work. The patch fixes the issue.

1) In device_power_down(), we should invoke iommu_suspend() after
ioapic_suspend(); in device_power_up(), we should invoke
iommu_resume() before ioapic_resume().

2) Add 2 functions: disable_qinval() and disable_intremap(); in
iommu_suspend(), we invoke them and iommu_disable_translation().
Rename qinval_setup() to enable_qinval() and rename
intremap_setup() to enable_intremap().

3) In iommu_resume(), remove the unnecessary
iommu_flush_{context, iotlb}_global() -- actually we mustn't do that
if Queued Invalidation was enabled before S3 because at this point of
S3 resume, Queued Invalidation hasn't been re-enabled.

4) Add a static global array ioapic_pin_to_intremap_index[] to
remember what intremap_index an ioapic pin uses -- during S3 resume,
ioapic_resume() re-writes all the ioapic RTEs, so we can use the array
to re-use the previously-allocated IRTE;

5) Some cleanups:
a) Change some failure handlings in enable_intremap() to panic().
b) Remove the unnecessary local variable iec_cap in
__iommu_flush_iec().
c) Add a dmar_writeq(iommu->reg, DMAR_IQT_REG, 0) in
enable_qinval().

Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Mar 20 09:36:57 2009 +0000 (2009-03-20)
parents bbfcea821a0d
children ee93f866c74c
files xen/arch/x86/acpi/power.c xen/drivers/passthrough/vtd/extern.h xen/drivers/passthrough/vtd/intremap.c xen/drivers/passthrough/vtd/iommu.c xen/drivers/passthrough/vtd/qinval.c
line diff
     1.1 --- a/xen/arch/x86/acpi/power.c	Fri Mar 20 09:34:09 2009 +0000
     1.2 +++ b/xen/arch/x86/acpi/power.c	Fri Mar 20 09:36:57 2009 +0000
     1.3 @@ -44,16 +44,16 @@ void do_suspend_lowlevel(void);
     1.4  
     1.5  static int device_power_down(void)
     1.6  {
     1.7 -    iommu_suspend();
     1.8 -
     1.9      console_suspend();
    1.10  
    1.11      time_suspend();
    1.12  
    1.13      i8259A_suspend();
    1.14 -    
    1.15 +
    1.16      ioapic_suspend();
    1.17 -    
    1.18 +
    1.19 +    iommu_suspend();
    1.20 +
    1.21      lapic_suspend();
    1.22  
    1.23      return 0;
    1.24 @@ -62,16 +62,16 @@ static int device_power_down(void)
    1.25  static void device_power_up(void)
    1.26  {
    1.27      lapic_resume();
    1.28 -    
    1.29 +
    1.30 +    iommu_resume();
    1.31 +
    1.32      ioapic_resume();
    1.33  
    1.34      i8259A_resume();
    1.35 -    
    1.36 +
    1.37      time_resume();
    1.38  
    1.39      console_resume();
    1.40 -
    1.41 -    iommu_resume();
    1.42  }
    1.43  
    1.44  static void freeze_domains(void)
     2.1 --- a/xen/drivers/passthrough/vtd/extern.h	Fri Mar 20 09:34:09 2009 +0000
     2.2 +++ b/xen/drivers/passthrough/vtd/extern.h	Fri Mar 20 09:36:57 2009 +0000
     2.3 @@ -30,8 +30,10 @@ void print_iommu_regs(struct acpi_drhd_u
     2.4  void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn);
     2.5  void dump_iommu_info(unsigned char key);
     2.6  
     2.7 -int qinval_setup(struct iommu *iommu);
     2.8 -int intremap_setup(struct iommu *iommu);
     2.9 +int enable_qinval(struct iommu *iommu);
    2.10 +void disable_qinval(struct iommu *iommu);
    2.11 +int enable_intremap(struct iommu *iommu);
    2.12 +void disable_intremap(struct iommu *iommu);
    2.13  int queue_invalidate_context(struct iommu *iommu,
    2.14      u16 did, u16 source_id, u8 function_mask, u8 granu);
    2.15  int queue_invalidate_iotlb(struct iommu *iommu,
     3.1 --- a/xen/drivers/passthrough/vtd/intremap.c	Fri Mar 20 09:34:09 2009 +0000
     3.2 +++ b/xen/drivers/passthrough/vtd/intremap.c	Fri Mar 20 09:36:57 2009 +0000
     3.3 @@ -34,6 +34,15 @@
     3.4  #define dest_SMI -1
     3.5  #endif
     3.6  
     3.7 +/* The max number of IOAPIC (or IOSAPIC) pin. The typical values can be 24 or
     3.8 + * 48 on x86 and Itanium platforms. Here we use a biger number 256. This
     3.9 + * should be big enough. Actually now IREMAP_ENTRY_NR is also 256.
    3.10 + */
    3.11 +#define MAX_IOAPIC_PIN_NUM  256
    3.12 +
    3.13 +static int ioapic_pin_to_intremap_index[MAX_IOAPIC_PIN_NUM] =
    3.14 +    { [0 ... MAX_IOAPIC_PIN_NUM-1] = -1 };
    3.15 +
    3.16  u16 apicid_to_bdf(int apic_id)
    3.17  {
    3.18      struct acpi_drhd_unit *drhd = ioapic_to_drhd(apic_id);
    3.19 @@ -94,7 +103,7 @@ static int remap_entry_to_ioapic_rte(
    3.20  }
    3.21  
    3.22  static int ioapic_rte_to_remap_entry(struct iommu *iommu,
    3.23 -    int apic_id, struct IO_xAPIC_route_entry *old_rte,
    3.24 +    int apic_id, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
    3.25      unsigned int rte_upper, unsigned int value)
    3.26  {
    3.27      struct iremap_entry *iremap_entry = NULL, *iremap_entries;
    3.28 @@ -108,13 +117,14 @@ static int ioapic_rte_to_remap_entry(str
    3.29      remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
    3.30      spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
    3.31  
    3.32 -    if ( remap_rte->format == 0 )
    3.33 +    if ( ioapic_pin_to_intremap_index[ioapic_pin] < 0 )
    3.34      {
    3.35          ir_ctrl->iremap_index++;
    3.36          index = ir_ctrl->iremap_index;
    3.37 +        ioapic_pin_to_intremap_index[ioapic_pin] = index;
    3.38      }
    3.39      else
    3.40 -        index = (remap_rte->index_15 << 15) | remap_rte->index_0_14;
    3.41 +        index = ioapic_pin_to_intremap_index[ioapic_pin];
    3.42  
    3.43      if ( index > IREMAP_ENTRY_NR - 1 )
    3.44      {
    3.45 @@ -232,6 +242,7 @@ unsigned int io_apic_read_remap_rte(
    3.46  void io_apic_write_remap_rte(
    3.47      unsigned int apic, unsigned int reg, unsigned int value)
    3.48  {
    3.49 +    unsigned int ioapic_pin = (reg - 0x10) / 2;
    3.50      struct IO_xAPIC_route_entry old_rte = { 0 };
    3.51      struct IO_APIC_route_remap_entry *remap_rte;
    3.52      unsigned int rte_upper = (reg & 1) ? 1 : 0;
    3.53 @@ -289,7 +300,8 @@ void io_apic_write_remap_rte(
    3.54      *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
    3.55      remap_rte->mask = saved_mask;
    3.56  
    3.57 -    if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic),
    3.58 +    ASSERT(ioapic_pin < MAX_IOAPIC_PIN_NUM);
    3.59 +    if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic), ioapic_pin,
    3.60                                     &old_rte, rte_upper, value) )
    3.61      {
    3.62          *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
    3.63 @@ -491,13 +503,12 @@ void msi_msg_write_remap_rte(
    3.64  }
    3.65  #endif
    3.66  
    3.67 -int intremap_setup(struct iommu *iommu)
    3.68 +int enable_intremap(struct iommu *iommu)
    3.69  {
    3.70      struct ir_ctrl *ir_ctrl;
    3.71      s_time_t start_time;
    3.72  
    3.73 -    if ( !ecap_intr_remap(iommu->ecap) )
    3.74 -        return -ENODEV;
    3.75 +    ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
    3.76  
    3.77      ir_ctrl = iommu_ir_ctrl(iommu);
    3.78      if ( ir_ctrl->iremap_maddr == 0 )
    3.79 @@ -517,7 +528,7 @@ int intremap_setup(struct iommu *iommu)
    3.80      ir_ctrl->iremap_maddr |=
    3.81              ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIME_SHIFT) : 0;
    3.82  #endif
    3.83 -    /* set size of the interrupt remapping table */ 
    3.84 +    /* set size of the interrupt remapping table */
    3.85      ir_ctrl->iremap_maddr |= IRTA_REG_TABLE_SIZE;
    3.86      dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
    3.87  
    3.88 @@ -530,11 +541,7 @@ int intremap_setup(struct iommu *iommu)
    3.89      while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_SIRTPS) )
    3.90      {
    3.91          if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
    3.92 -        {
    3.93 -            dprintk(XENLOG_ERR VTDPREFIX,
    3.94 -                    "Cannot set SIRTP field for interrupt remapping\n");
    3.95 -            return -ENODEV;
    3.96 -        }
    3.97 +            panic("Cannot set SIRTP field for interrupt remapping\n");
    3.98          cpu_relax();
    3.99      }
   3.100  
   3.101 @@ -546,11 +553,7 @@ int intremap_setup(struct iommu *iommu)
   3.102      while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_CFIS) )
   3.103      {
   3.104          if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
   3.105 -        {
   3.106 -            dprintk(XENLOG_ERR VTDPREFIX,
   3.107 -                    "Cannot set CFI field for interrupt remapping\n");
   3.108 -            return -ENODEV;
   3.109 -        }
   3.110 +            panic("Cannot set CFI field for interrupt remapping\n");
   3.111          cpu_relax();
   3.112      }
   3.113  
   3.114 @@ -561,12 +564,8 @@ int intremap_setup(struct iommu *iommu)
   3.115      start_time = NOW();
   3.116      while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_IRES) )
   3.117      {
   3.118 -        if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) ) 
   3.119 -        {
   3.120 -            dprintk(XENLOG_ERR VTDPREFIX,
   3.121 -                    "Cannot set IRE field for interrupt remapping\n");
   3.122 -            return -ENODEV;
   3.123 -        }
   3.124 +        if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
   3.125 +            panic("Cannot set IRE field for interrupt remapping\n");
   3.126          cpu_relax();
   3.127      }
   3.128  
   3.129 @@ -575,3 +574,21 @@ int intremap_setup(struct iommu *iommu)
   3.130  
   3.131      return 0;
   3.132  }
   3.133 +
   3.134 +void disable_intremap(struct iommu *iommu)
   3.135 +{
   3.136 +    s_time_t start_time;
   3.137 +
   3.138 +    ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
   3.139 +
   3.140 +    iommu->gcmd &= ~(DMA_GCMD_SIRTP | DMA_GCMD_CFI | DMA_GCMD_IRE);
   3.141 +    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
   3.142 +
   3.143 +    start_time = NOW();
   3.144 +    while ( dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_IRES )
   3.145 +    {
   3.146 +        if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
   3.147 +            panic("Cannot clear IRE field for interrupt remapping\n");
   3.148 +        cpu_relax();
   3.149 +    }
   3.150 +}
     4.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Fri Mar 20 09:34:09 2009 +0000
     4.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Fri Mar 20 09:36:57 2009 +0000
     4.3 @@ -639,7 +639,7 @@ static void iommu_enable_translation(str
     4.4      spin_unlock_irqrestore(&iommu->register_lock, flags);
     4.5  }
     4.6  
     4.7 -int iommu_disable_translation(struct iommu *iommu)
     4.8 +static void iommu_disable_translation(struct iommu *iommu)
     4.9  {
    4.10      u32 sts;
    4.11      unsigned long flags;
    4.12 @@ -662,7 +662,6 @@ int iommu_disable_translation(struct iom
    4.13          cpu_relax();
    4.14      }
    4.15      spin_unlock_irqrestore(&iommu->register_lock, flags);
    4.16 -    return 0;
    4.17  }
    4.18  
    4.19  static struct iommu *vector_to_iommu[NR_VECTORS];
    4.20 @@ -1690,7 +1689,7 @@ static int init_vtd_hw(void)
    4.21          for_each_drhd_unit ( drhd )
    4.22          {
    4.23              iommu = drhd->iommu;
    4.24 -            if ( qinval_setup(iommu) != 0 )
    4.25 +            if ( enable_qinval(iommu) != 0 )
    4.26              {
    4.27                  dprintk(XENLOG_INFO VTDPREFIX,
    4.28                          "Failed to enable Queued Invalidation!\n");
    4.29 @@ -1704,7 +1703,7 @@ static int init_vtd_hw(void)
    4.30          for_each_drhd_unit ( drhd )
    4.31          {
    4.32              iommu = drhd->iommu;
    4.33 -            if ( intremap_setup(iommu) != 0 )
    4.34 +            if ( enable_intremap(iommu) != 0 )
    4.35              {
    4.36                  dprintk(XENLOG_INFO VTDPREFIX,
    4.37                          "Failed to enable Interrupt Remapping!\n");
    4.38 @@ -1934,6 +1933,14 @@ void iommu_suspend(void)
    4.39              (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
    4.40          iommu_state[i][DMAR_FEUADDR_REG] =
    4.41              (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
    4.42 +
    4.43 +        iommu_disable_translation(iommu);
    4.44 +
    4.45 +        if ( iommu_intremap )
    4.46 +            disable_intremap(iommu);
    4.47 +
    4.48 +        if ( iommu_qinval )
    4.49 +            disable_qinval(iommu);
    4.50      }
    4.51  }
    4.52  
    4.53 @@ -1946,7 +1953,11 @@ void iommu_resume(void)
    4.54      if ( !vtd_enabled )
    4.55          return;
    4.56  
    4.57 -    iommu_flush_all();
    4.58 +    /* Not sure whether the flush operation is required to meet iommu
    4.59 +     * specification. Note that BIOS also executes in S3 resume and iommu may
    4.60 +     * be touched again, so let us do the flush operation for safety.
    4.61 +     */
    4.62 +    flush_all_cache();
    4.63  
    4.64      if ( init_vtd_hw() != 0  && force_iommu )
    4.65           panic("IOMMU setup failed, crash Xen for security purpose!\n");
    4.66 @@ -1964,6 +1975,7 @@ void iommu_resume(void)
    4.67                      (u32) iommu_state[i][DMAR_FEADDR_REG]);
    4.68          dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
    4.69                      (u32) iommu_state[i][DMAR_FEUADDR_REG]);
    4.70 +
    4.71          iommu_enable_translation(iommu);
    4.72      }
    4.73  }
     5.1 --- a/xen/drivers/passthrough/vtd/qinval.c	Fri Mar 20 09:34:09 2009 +0000
     5.2 +++ b/xen/drivers/passthrough/vtd/qinval.c	Fri Mar 20 09:36:57 2009 +0000
     5.3 @@ -319,7 +319,6 @@ int queue_invalidate_iec(struct iommu *i
     5.4  
     5.5  int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
     5.6  {
     5.7 -    u64 iec_cap;
     5.8      int ret;
     5.9      ret = queue_invalidate_iec(iommu, granu, im, iidx);
    5.10      ret |= invalidate_sync(iommu);
    5.11 @@ -328,7 +327,7 @@ int __iommu_flush_iec(struct iommu *iomm
    5.12       * reading vt-d architecture register will ensure
    5.13       * draining happens in implementation independent way.
    5.14       */
    5.15 -    iec_cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
    5.16 +    (void)dmar_readq(iommu->reg, DMAR_CAP_REG);
    5.17      return ret;
    5.18  }
    5.19  
    5.20 @@ -413,7 +412,7 @@ static int flush_iotlb_qi(
    5.21      return ret;
    5.22  }
    5.23  
    5.24 -int qinval_setup(struct iommu *iommu)
    5.25 +int enable_qinval(struct iommu *iommu)
    5.26  {
    5.27      s_time_t start_time;
    5.28      struct qi_ctrl *qi_ctrl;
    5.29 @@ -422,8 +421,7 @@ int qinval_setup(struct iommu *iommu)
    5.30      qi_ctrl = iommu_qi_ctrl(iommu);
    5.31      flush = iommu_get_flush(iommu);
    5.32  
    5.33 -    if ( !ecap_queued_inval(iommu->ecap) )
    5.34 -        return -ENODEV;
    5.35 +    ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
    5.36  
    5.37      if ( qi_ctrl->qinval_maddr == 0 )
    5.38      {
    5.39 @@ -448,6 +446,8 @@ int qinval_setup(struct iommu *iommu)
    5.40      qi_ctrl->qinval_maddr |= IQA_REG_QS;
    5.41      dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
    5.42  
    5.43 +    dmar_writeq(iommu->reg, DMAR_IQT_REG, 0);
    5.44 +
    5.45      /* enable queued invalidation hardware */
    5.46      iommu->gcmd |= DMA_GCMD_QIE;
    5.47      dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
    5.48 @@ -463,3 +463,22 @@ int qinval_setup(struct iommu *iommu)
    5.49  
    5.50      return 0;
    5.51  }
    5.52 +
    5.53 +void disable_qinval(struct iommu *iommu)
    5.54 +{
    5.55 +    s_time_t start_time;
    5.56 +
    5.57 +    ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
    5.58 +
    5.59 +    iommu->gcmd &= ~DMA_GCMD_QIE;
    5.60 +    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
    5.61 +
    5.62 +    /* Make sure hardware complete it */
    5.63 +    start_time = NOW();
    5.64 +    while ( dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_QIES )
    5.65 +    {
    5.66 +        if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
    5.67 +            panic("Cannot clear QIE field for queue invalidation\n");
    5.68 +        cpu_relax();
    5.69 +    }
    5.70 +}