ia64/xen-unstable

changeset 17759:ecd266cebcab

vtd: Various cleanups and fixes:
* Handle DRHDs with different supported AGAWs. To support this we
create page tables which always have 4 levels, and skip top levels
for units which support only 2 or 3 levels.
* Handle systems with mixed DRHD support for cache snooping. We must
pessimistically CLFLUSH if any DRHD does not support snooping.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri May 30 15:06:08 2008 +0100 (2008-05-30)
parents 121d196b4cc8
children c76e7f396c03
files xen/drivers/passthrough/vtd/iommu.c xen/drivers/passthrough/vtd/iommu.h xen/drivers/passthrough/vtd/vtd.h xen/drivers/passthrough/vtd/x86/vtd.c xen/include/xen/iommu.h
line diff
     1.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Thu May 29 14:30:48 2008 +0100
     1.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Fri May 30 15:06:08 2008 +0100
     1.3 @@ -112,28 +112,27 @@ struct iommu_flush *iommu_get_flush(stru
     1.4      return iommu ? &iommu->intel->flush : NULL;
     1.5  }
     1.6  
     1.7 -unsigned int clflush_size;
     1.8 -void clflush_cache_range(void *adr, int size)
     1.9 +static unsigned int clflush_size;
    1.10 +static int iommus_incoherent;
    1.11 +static void __iommu_flush_cache(void *addr, int size)
    1.12  {
    1.13      int i;
    1.14 +
    1.15 +    if ( !iommus_incoherent )
    1.16 +        return;
    1.17 +
    1.18      for ( i = 0; i < size; i += clflush_size )
    1.19 -        clflush(adr + i);
    1.20 +        clflush((char *)addr + i);
    1.21  }
    1.22  
    1.23 -static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
    1.24 +void iommu_flush_cache_entry(void *addr)
    1.25  {
    1.26 -    if ( !ecap_coherent(iommu->ecap) )
    1.27 -        clflush_cache_range(addr, size);
    1.28 +    __iommu_flush_cache(addr, 8);
    1.29  }
    1.30  
    1.31 -void iommu_flush_cache_entry(struct iommu *iommu, void *addr)
    1.32 +void iommu_flush_cache_page(void *addr)
    1.33  {
    1.34 -    __iommu_flush_cache(iommu, addr, 8);
    1.35 -}
    1.36 -
    1.37 -void iommu_flush_cache_page(struct iommu *iommu, void *addr)
    1.38 -{
    1.39 -    __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K);
    1.40 +    __iommu_flush_cache(addr, PAGE_SIZE_4K);
    1.41  }
    1.42  
    1.43  int nr_iommus;
    1.44 @@ -157,7 +156,7 @@ static u64 bus_to_context_maddr(struct i
    1.45          }
    1.46          set_root_value(*root, maddr);
    1.47          set_root_present(*root);
    1.48 -        iommu_flush_cache_entry(iommu, root);
    1.49 +        iommu_flush_cache_entry(root);
    1.50      }
    1.51      maddr = (u64) get_context_addr(*root);
    1.52      unmap_vtd_domain_page(root_entries);
    1.53 @@ -194,8 +193,6 @@ static int device_context_mapped(struct 
    1.54  static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
    1.55  {
    1.56      struct hvm_iommu *hd = domain_hvm_iommu(domain);
    1.57 -    struct acpi_drhd_unit *drhd;
    1.58 -    struct iommu *iommu;
    1.59      int addr_width = agaw_to_width(hd->agaw);
    1.60      struct dma_pte *parent, *pte = NULL;
    1.61      int level = agaw_to_level(hd->agaw);
    1.62 @@ -204,19 +201,11 @@ static u64 addr_to_dma_page_maddr(struct
    1.63      u64 pte_maddr = 0, maddr;
    1.64      u64 *vaddr = NULL;
    1.65  
    1.66 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
    1.67 -    iommu = drhd->iommu;
    1.68 -
    1.69      addr &= (((u64)1) << addr_width) - 1;
    1.70      spin_lock_irqsave(&hd->mapping_lock, flags);
    1.71      if ( hd->pgd_maddr == 0 )
    1.72 -    {
    1.73 -        if ( !alloc )
    1.74 +        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) )
    1.75              goto out;
    1.76 -        hd->pgd_maddr = alloc_pgtable_maddr();
    1.77 -        if ( hd->pgd_maddr == 0 )
    1.78 -            goto out;
    1.79 -    }
    1.80  
    1.81      parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
    1.82      while ( level > 1 )
    1.83 @@ -240,7 +229,7 @@ static u64 addr_to_dma_page_maddr(struct
    1.84               */
    1.85              dma_set_pte_readable(*pte);
    1.86              dma_set_pte_writable(*pte);
    1.87 -            iommu_flush_cache_entry(iommu, pte);
    1.88 +            iommu_flush_cache_entry(pte);
    1.89          }
    1.90          else
    1.91          {
    1.92 @@ -551,8 +540,6 @@ static void dma_pte_clear_one(struct dom
    1.93      struct dma_pte *page = NULL, *pte = NULL;
    1.94      u64 pg_maddr;
    1.95  
    1.96 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
    1.97 -
    1.98      /* get last level pte */
    1.99      pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
   1.100      if ( pg_maddr == 0 )
   1.101 @@ -567,14 +554,14 @@ static void dma_pte_clear_one(struct dom
   1.102      }
   1.103  
   1.104      dma_clear_pte(*pte); 
   1.105 -    iommu_flush_cache_entry(drhd->iommu, pte);
   1.106 +    iommu_flush_cache_entry(pte);
   1.107  
   1.108      for_each_drhd_unit ( drhd )
   1.109      {
   1.110          iommu = drhd->iommu;
   1.111 -
   1.112          if ( test_bit(iommu->index, &hd->iommu_bitmap) )
   1.113 -            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain), addr, 1, 0);
   1.114 +            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
   1.115 +                                  addr, 1, 0);
   1.116      }
   1.117  
   1.118      unmap_vtd_domain_page(page);
   1.119 @@ -603,7 +590,6 @@ static void dma_pte_clear_range(struct d
   1.120  static void iommu_free_next_pagetable(u64 pt_maddr, unsigned long index,
   1.121                                        int level)
   1.122  {
   1.123 -    struct acpi_drhd_unit *drhd;
   1.124      unsigned long next_index;
   1.125      struct dma_pte *pt_vaddr, *pde;
   1.126      int next_level;
   1.127 @@ -613,50 +599,38 @@ static void iommu_free_next_pagetable(u6
   1.128  
   1.129      pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
   1.130      pde = &pt_vaddr[index];
   1.131 -    if ( dma_pte_addr(*pde) != 0 )
   1.132 +    if ( dma_pte_addr(*pde) == 0 )
   1.133 +        goto out;
   1.134 +
   1.135 +    next_level = level - 1;
   1.136 +    if ( next_level > 1 )
   1.137      {
   1.138 -        next_level = level - 1;
   1.139 -        if ( next_level > 1 )
   1.140 -        {
   1.141 -            next_index = 0;
   1.142 -            do
   1.143 -            {
   1.144 -                iommu_free_next_pagetable(pde->val,
   1.145 -                                          next_index, next_level);
   1.146 -                next_index++;
   1.147 -            } while ( next_index < PTE_NUM );
   1.148 -        }
   1.149 +        for ( next_index = 0; next_index < PTE_NUM; next_index++ )
   1.150 +            iommu_free_next_pagetable(pde->val, next_index, next_level);
   1.151 +    }
   1.152  
   1.153 -        dma_clear_pte(*pde);
   1.154 -        drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.155 -        iommu_flush_cache_entry(drhd->iommu, pde);
   1.156 -        free_pgtable_maddr(pde->val);
   1.157 -        unmap_vtd_domain_page(pt_vaddr);
   1.158 -    }
   1.159 -    else
   1.160 -        unmap_vtd_domain_page(pt_vaddr);
   1.161 +    dma_clear_pte(*pde);
   1.162 +    iommu_flush_cache_entry(pde);
   1.163 +    free_pgtable_maddr(pde->val);
   1.164 +
   1.165 + out:
   1.166 +    unmap_vtd_domain_page(pt_vaddr);
   1.167  }
   1.168  
   1.169  /* free all VT-d page tables when shut down or destroy domain. */
   1.170  static void iommu_free_pagetable(struct domain *domain)
   1.171  {
   1.172 -    unsigned long index;
   1.173      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   1.174 -    int total_level = agaw_to_level(hd->agaw);
   1.175 +    int i, total_level = agaw_to_level(hd->agaw);
   1.176 +
   1.177 +    if ( hd->pgd_maddr == 0 )
   1.178 +        return;
   1.179  
   1.180 -    if ( hd->pgd_maddr != 0 )
   1.181 -    {
   1.182 -        index = 0;
   1.183 -        do
   1.184 -        {
   1.185 -            iommu_free_next_pagetable(hd->pgd_maddr,
   1.186 -                                      index, total_level + 1);
   1.187 -            index++;
   1.188 -        } while ( index < PTE_NUM );
   1.189 +    for ( i = 0; i < PTE_NUM; i++ )
   1.190 +        iommu_free_next_pagetable(hd->pgd_maddr, i, total_level + 1);
   1.191  
   1.192 -        free_pgtable_maddr(hd->pgd_maddr);
   1.193 -        hd->pgd_maddr = 0;
   1.194 -    }
   1.195 +    free_pgtable_maddr(hd->pgd_maddr);
   1.196 +    hd->pgd_maddr = 0;
   1.197  }
   1.198  
   1.199  static int iommu_set_root_entry(struct iommu *iommu)
   1.200 @@ -977,6 +951,8 @@ int iommu_set_interrupt(struct iommu *io
   1.201  static int iommu_alloc(struct acpi_drhd_unit *drhd)
   1.202  {
   1.203      struct iommu *iommu;
   1.204 +    unsigned long sagaw;
   1.205 +    int agaw;
   1.206  
   1.207      if ( nr_iommus > MAX_IOMMUS )
   1.208      {
   1.209 @@ -1004,6 +980,23 @@ static int iommu_alloc(struct acpi_drhd_
   1.210      iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
   1.211      iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
   1.212  
   1.213 +    /* Calculate number of pagetable levels: between 2 and 4. */
   1.214 +    sagaw = cap_sagaw(iommu->cap);
   1.215 +    for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
   1.216 +        if ( test_bit(agaw, &sagaw) )
   1.217 +            break;
   1.218 +    if ( agaw < 0 )
   1.219 +    {
   1.220 +        gdprintk(XENLOG_ERR VTDPREFIX,
   1.221 +                 "IOMMU: unsupported sagaw %lx\n", sagaw);
   1.222 +        xfree(iommu);
   1.223 +        return -ENODEV;
   1.224 +    }
   1.225 +    iommu->nr_pt_levels = agaw_to_level(agaw);
   1.226 +
   1.227 +    if ( !ecap_coherent(iommu->ecap) )
   1.228 +        iommus_incoherent = 1;
   1.229 +
   1.230      spin_lock_init(&iommu->lock);
   1.231      spin_lock_init(&iommu->register_lock);
   1.232  
   1.233 @@ -1045,10 +1038,7 @@ static int intel_iommu_domain_init(struc
   1.234  {
   1.235      struct hvm_iommu *hd = domain_hvm_iommu(d);
   1.236      struct iommu *iommu = NULL;
   1.237 -    int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
   1.238 -    int adjust_width, agaw;
   1.239      u64 i;
   1.240 -    unsigned long sagaw;
   1.241      struct acpi_drhd_unit *drhd;
   1.242  
   1.243      INIT_LIST_HEAD(&hd->pdev_list);
   1.244 @@ -1056,22 +1046,7 @@ static int intel_iommu_domain_init(struc
   1.245      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.246      iommu = drhd->iommu;
   1.247  
   1.248 -    /* Calculate AGAW. */
   1.249 -    if ( guest_width > cap_mgaw(iommu->cap) )
   1.250 -        guest_width = cap_mgaw(iommu->cap);
   1.251 -    adjust_width = guestwidth_to_adjustwidth(guest_width);
   1.252 -    agaw = width_to_agaw(adjust_width);
   1.253 -    /* FIXME: hardware doesn't support it, choose a bigger one? */
   1.254 -    sagaw = cap_sagaw(iommu->cap);
   1.255 -    if ( !test_bit(agaw, &sagaw) )
   1.256 -    {
   1.257 -        gdprintk(XENLOG_ERR VTDPREFIX,
   1.258 -                 "IOMMU: hardware doesn't support the agaw\n");
   1.259 -        agaw = find_next_bit(&sagaw, 5, agaw);
   1.260 -        if ( agaw >= 5 )
   1.261 -            return -ENODEV;
   1.262 -    }
   1.263 -    hd->agaw = agaw;
   1.264 +    hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
   1.265  
   1.266      if ( d->domain_id == 0 )
   1.267      {
   1.268 @@ -1115,7 +1090,8 @@ static int domain_context_mapping_one(
   1.269      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   1.270      struct context_entry *context, *context_entries;
   1.271      unsigned long flags;
   1.272 -    u64 maddr;
   1.273 +    u64 maddr, pgd_maddr;
   1.274 +    int agaw;
   1.275  
   1.276      maddr = bus_to_context_maddr(iommu, bus);
   1.277      context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
   1.278 @@ -1135,17 +1111,33 @@ static int domain_context_mapping_one(
   1.279      else
   1.280      {
   1.281  #endif
   1.282 +        /* Ensure we have pagetables allocated down to leaf PTE. */
   1.283          if ( hd->pgd_maddr == 0 )
   1.284          {
   1.285 -            hd->pgd_maddr = alloc_pgtable_maddr();
   1.286 +            addr_to_dma_page_maddr(domain, 0, 1);
   1.287              if ( hd->pgd_maddr == 0 )
   1.288              {
   1.289 +            nomem:
   1.290                  unmap_vtd_domain_page(context_entries);
   1.291                  spin_unlock_irqrestore(&iommu->lock, flags);
   1.292                  return -ENOMEM;
   1.293              }
   1.294          }
   1.295 -        context_set_address_root(*context, hd->pgd_maddr);
   1.296 +
   1.297 +        /* Skip top levels of page tables for 2- and 3-level DRHDs. */
   1.298 +        pgd_maddr = hd->pgd_maddr;
   1.299 +        for ( agaw = level_to_agaw(4);
   1.300 +              agaw != level_to_agaw(iommu->nr_pt_levels);
   1.301 +              agaw-- )
   1.302 +        {
   1.303 +            struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
   1.304 +            pgd_maddr = dma_pte_addr(*p);
   1.305 +            unmap_vtd_domain_page(p);
   1.306 +            if ( pgd_maddr == 0 )
   1.307 +                goto nomem;
   1.308 +        }
   1.309 +
   1.310 +        context_set_address_root(*context, pgd_maddr);
   1.311          context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
   1.312  #ifdef CONTEXT_PASSTHRU
   1.313      }
   1.314 @@ -1156,10 +1148,10 @@ static int domain_context_mapping_one(
   1.315       * be 1 based as required by intel's iommu hw.
   1.316       */
   1.317      context_set_domain_id(context, domain);
   1.318 -    context_set_address_width(*context, hd->agaw);
   1.319 +    context_set_address_width(*context, agaw);
   1.320      context_set_fault_enable(*context);
   1.321      context_set_present(*context);
   1.322 -    iommu_flush_cache_entry(iommu, context);
   1.323 +    iommu_flush_cache_entry(context);
   1.324  
   1.325      unmap_vtd_domain_page(context_entries);
   1.326  
   1.327 @@ -1316,7 +1308,7 @@ static int domain_context_unmap_one(
   1.328      spin_lock_irqsave(&iommu->lock, flags);
   1.329      context_clear_present(*context);
   1.330      context_clear_entry(*context);
   1.331 -    iommu_flush_cache_entry(iommu, context);
   1.332 +    iommu_flush_cache_entry(context);
   1.333      iommu_flush_context_global(iommu, 0);
   1.334      iommu_flush_iotlb_global(iommu, 0);
   1.335      unmap_vtd_domain_page(context_entries);
   1.336 @@ -1499,9 +1491,6 @@ int intel_iommu_map_page(
   1.337      u64 pg_maddr;
   1.338      int pte_present;
   1.339  
   1.340 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.341 -    iommu = drhd->iommu;
   1.342 -
   1.343  #ifdef CONTEXT_PASSTHRU
   1.344      /* do nothing if dom0 and iommu supports pass thru */
   1.345      if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
   1.346 @@ -1516,7 +1505,7 @@ int intel_iommu_map_page(
   1.347      pte_present = dma_pte_present(*pte);
   1.348      dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
   1.349      dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
   1.350 -    iommu_flush_cache_entry(iommu, pte);
   1.351 +    iommu_flush_cache_entry(pte);
   1.352      unmap_vtd_domain_page(page);
   1.353  
   1.354      for_each_drhd_unit ( drhd )
   1.355 @@ -1565,10 +1554,9 @@ int iommu_page_mapping(struct domain *do
   1.356      int index;
   1.357      u64 pg_maddr;
   1.358  
   1.359 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.360 -    iommu = drhd->iommu;
   1.361      if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
   1.362          return -EINVAL;
   1.363 +
   1.364      iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
   1.365      start_pfn = hpa >> PAGE_SHIFT_4K;
   1.366      end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
   1.367 @@ -1582,7 +1570,7 @@ int iommu_page_mapping(struct domain *do
   1.368          pte = page + (start_pfn & LEVEL_MASK);
   1.369          dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
   1.370          dma_set_pte_prot(*pte, prot);
   1.371 -        iommu_flush_cache_entry(iommu, pte);
   1.372 +        iommu_flush_cache_entry(pte);
   1.373          unmap_vtd_domain_page(page);
   1.374          start_pfn++;
   1.375          index++;
     2.1 --- a/xen/drivers/passthrough/vtd/iommu.h	Thu May 29 14:30:48 2008 +0100
     2.2 +++ b/xen/drivers/passthrough/vtd/iommu.h	Fri May 30 15:06:08 2008 +0100
     2.3 @@ -236,6 +236,7 @@ struct context_entry {
     2.4  #define LEVEL_STRIDE       (9)
     2.5  #define LEVEL_MASK         ((1 << LEVEL_STRIDE) - 1)
     2.6  #define PTE_NUM            (1 << LEVEL_STRIDE)
     2.7 +#define level_to_agaw(val) ((val) - 2)
     2.8  #define agaw_to_level(val) ((val) + 2)
     2.9  #define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
    2.10  #define width_to_agaw(w)   ((w - 30)/LEVEL_STRIDE)
     3.1 --- a/xen/drivers/passthrough/vtd/vtd.h	Thu May 29 14:30:48 2008 +0100
     3.2 +++ b/xen/drivers/passthrough/vtd/vtd.h	Fri May 30 15:06:08 2008 +0100
     3.3 @@ -66,7 +66,7 @@ void free_pgtable_maddr(u64 maddr);
     3.4  void *map_vtd_domain_page(u64 maddr);
     3.5  void unmap_vtd_domain_page(void *va);
     3.6  
     3.7 -void iommu_flush_cache_entry(struct iommu *iommu, void *addr);
     3.8 -void iommu_flush_cache_page(struct iommu *iommu, void *addr);
     3.9 +void iommu_flush_cache_entry(void *addr);
    3.10 +void iommu_flush_cache_page(void *addr);
    3.11  
    3.12  #endif // _VTD_H_
     4.1 --- a/xen/drivers/passthrough/vtd/x86/vtd.c	Thu May 29 14:30:48 2008 +0100
     4.2 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c	Fri May 30 15:06:08 2008 +0100
     4.3 @@ -41,8 +41,6 @@ u64 alloc_pgtable_maddr(void)
     4.4  {
     4.5      struct page_info *pg;
     4.6      u64 *vaddr;
     4.7 -    struct acpi_drhd_unit *drhd;
     4.8 -    struct iommu *iommu;
     4.9  
    4.10      pg = alloc_domheap_page(NULL, 0);
    4.11      vaddr = map_domain_page(page_to_mfn(pg));
    4.12 @@ -50,9 +48,7 @@ u64 alloc_pgtable_maddr(void)
    4.13          return 0;
    4.14      memset(vaddr, 0, PAGE_SIZE);
    4.15  
    4.16 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
    4.17 -    iommu = drhd->iommu;
    4.18 -    iommu_flush_cache_page(iommu, vaddr);
    4.19 +    iommu_flush_cache_page(vaddr);
    4.20      unmap_domain_page(vaddr);
    4.21  
    4.22      return page_to_maddr(pg);
     5.1 --- a/xen/include/xen/iommu.h	Thu May 29 14:30:48 2008 +0100
     5.2 +++ b/xen/include/xen/iommu.h	Fri May 30 15:06:08 2008 +0100
     5.3 @@ -47,6 +47,7 @@ struct iommu {
     5.4      void __iomem *reg; /* Pointer to hardware regs, virtual addr */
     5.5      u32	index;         /* Sequence number of iommu */
     5.6      u32	gcmd;          /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
     5.7 +    u32 nr_pt_levels;
     5.8      u64	cap;
     5.9      u64	ecap;
    5.10      spinlock_t lock; /* protect context, domain ids */