ia64/xen-unstable

changeset 16013:f34cc7d1f2f6

vt-d: Allocate iommu pages from domheap rather than xenheap.

xenheap size is 9M on x86/32 xen, it's not enough to setup 1:1 mapping
page table for dom0. It causes dom0 cannot boot successfully. Instead
of xenheap, this patch setup 1:1 mapping page tabel in domheap, and
use map_domain_page() to get temporary mappings when need them.

Signed-off-by: Weidong Han <weidong.han@intel.com>
author Keir Fraser <keir@xensource.com>
date Mon Oct 01 06:34:40 2007 +0100 (2007-10-01)
parents ff4ff3e3ebbe
children db075ecf29b2
files xen/arch/x86/hvm/vmx/vtd/intel-iommu.c
line diff
     1.1 --- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c	Fri Sep 28 16:00:44 2007 +0100
     1.2 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c	Mon Oct 01 06:34:40 2007 +0100
     1.3 @@ -134,7 +134,7 @@ static int device_context_mapped(struct 
     1.4  #define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
     1.5  #define level_size(l) (1 << level_to_offset_bits(l))
     1.6  #define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
     1.7 -static struct dma_pte *addr_to_dma_pte(struct domain *domain, u64 addr)
     1.8 +static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
     1.9  {
    1.10      struct hvm_iommu *hd = domain_hvm_iommu(domain);
    1.11      struct acpi_drhd_unit *drhd;
    1.12 @@ -144,6 +144,8 @@ static struct dma_pte *addr_to_dma_pte(s
    1.13      int level = agaw_to_level(hd->agaw);
    1.14      int offset;
    1.15      unsigned long flags;
    1.16 +    struct page_info *pg = NULL;
    1.17 +    u64 *vaddr = NULL;
    1.18  
    1.19      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
    1.20      iommu = drhd->iommu;
    1.21 @@ -153,79 +155,105 @@ static struct dma_pte *addr_to_dma_pte(s
    1.22      if ( !hd->pgd )
    1.23      {
    1.24          pgd = (struct dma_pte *)alloc_xenheap_page();
    1.25 -        if ( !pgd && !hd->pgd )
    1.26 +        if ( !pgd )
    1.27          {
    1.28              spin_unlock_irqrestore(&hd->mapping_lock, flags);
    1.29              return NULL;
    1.30          }
    1.31 -        memset((u8*)pgd, 0, PAGE_SIZE);
    1.32 -        if ( !hd->pgd )
    1.33 -            hd->pgd = pgd;
    1.34 -        else /* somebody is fast */
    1.35 -            free_xenheap_page((void *) pgd);
    1.36 +        memset(pgd, 0, PAGE_SIZE);
    1.37 +        hd->pgd = pgd;
    1.38      }
    1.39 +
    1.40      parent = hd->pgd;
    1.41 -    while ( level > 0 )
    1.42 +    while ( level > 1 )
    1.43      {
    1.44 -        u8 *tmp;
    1.45          offset = address_level_offset(addr, level);
    1.46          pte = &parent[offset];
    1.47 -        if ( level == 1 )
    1.48 -            break;
    1.49 +
    1.50          if ( dma_pte_addr(*pte) == 0 )
    1.51          {
    1.52 -            tmp = alloc_xenheap_page();
    1.53 -            memset(tmp, 0, PAGE_SIZE);
    1.54 -            iommu_flush_cache_page(iommu, tmp);
    1.55 -
    1.56 -            if ( !tmp && dma_pte_addr(*pte) == 0 )
    1.57 +            pg = alloc_domheap_page(NULL);
    1.58 +            vaddr = map_domain_page(mfn_x(page_to_mfn(pg)));
    1.59 +            if ( !vaddr )
    1.60              {
    1.61                  spin_unlock_irqrestore(&hd->mapping_lock, flags);
    1.62                  return NULL;
    1.63              }
    1.64 -            if ( dma_pte_addr(*pte) == 0 )
    1.65 +            memset(vaddr, 0, PAGE_SIZE);
    1.66 +            iommu_flush_cache_page(iommu, vaddr);
    1.67 +
    1.68 +            dma_set_pte_addr(*pte, page_to_maddr(pg));
    1.69 +
    1.70 +            /*
    1.71 +             * high level table always sets r/w, last level
    1.72 +             * page table control read/write
    1.73 +             */
    1.74 +            dma_set_pte_readable(*pte);
    1.75 +            dma_set_pte_writable(*pte);
    1.76 +            iommu_flush_cache_entry(iommu, pte);
    1.77 +        }
    1.78 +        else
    1.79 +        {
    1.80 +            pg = maddr_to_page(pte->val);
    1.81 +            vaddr = map_domain_page(mfn_x(page_to_mfn(pg)));
    1.82 +            if ( !vaddr )
    1.83              {
    1.84 -                dma_set_pte_addr(*pte,
    1.85 -                                 virt_to_maddr(tmp));
    1.86 -                /*
    1.87 -                 * high level table always sets r/w, last level
    1.88 -                 * page table control read/write
    1.89 -                 */
    1.90 -                dma_set_pte_readable(*pte);
    1.91 -                dma_set_pte_writable(*pte);
    1.92 -                iommu_flush_cache_entry(iommu, pte);
    1.93 -            } else /* somebody is fast */
    1.94 -                free_xenheap_page(tmp);
    1.95 +                spin_unlock_irqrestore(&hd->mapping_lock, flags);
    1.96 +                return NULL;
    1.97 +            }
    1.98          }
    1.99 -        parent = maddr_to_virt(dma_pte_addr(*pte));
   1.100 +
   1.101 +        if ( parent != hd->pgd )
   1.102 +            unmap_domain_page(parent);
   1.103 +
   1.104 +        if ( level == 2 && vaddr )
   1.105 +        {
   1.106 +            unmap_domain_page(vaddr);
   1.107 +            break;
   1.108 +        }
   1.109 +
   1.110 +        parent = (struct dma_pte *)vaddr;
   1.111 +        vaddr = NULL;
   1.112          level--;
   1.113      }
   1.114 +
   1.115      spin_unlock_irqrestore(&hd->mapping_lock, flags);
   1.116 -    return pte;
   1.117 +    return pg;
   1.118  }
   1.119  
   1.120 -/* return address's pte at specific level */
   1.121 -static struct dma_pte *dma_addr_level_pte(struct domain *domain, u64 addr,
   1.122 -                                          int level)
   1.123 +/* return address's page at specific level */
   1.124 +static struct page_info *dma_addr_level_page(struct domain *domain,
   1.125 +                                             u64 addr, int level)
   1.126  {
   1.127      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   1.128      struct dma_pte *parent, *pte = NULL;
   1.129      int total = agaw_to_level(hd->agaw);
   1.130      int offset;
   1.131 +    struct page_info *pg = NULL;
   1.132  
   1.133      parent = hd->pgd;
   1.134      while ( level <= total )
   1.135      {
   1.136          offset = address_level_offset(addr, total);
   1.137          pte = &parent[offset];
   1.138 -        if ( level == total )
   1.139 -            return pte;
   1.140 +        if ( dma_pte_addr(*pte) == 0 )
   1.141 +        {
   1.142 +            if ( parent != hd->pgd )
   1.143 +                unmap_domain_page(parent);
   1.144 +            break;
   1.145 +        }
   1.146  
   1.147 -        if ( dma_pte_addr(*pte) == 0 )
   1.148 -            break;
   1.149 -        parent = maddr_to_virt(dma_pte_addr(*pte));
   1.150 +        pg = maddr_to_page(pte->val);
   1.151 +        if ( parent != hd->pgd )
   1.152 +            unmap_domain_page(parent);
   1.153 +
   1.154 +        if ( level == total )
   1.155 +            return pg;
   1.156 +
   1.157 +        parent = map_domain_page(mfn_x(page_to_mfn(pg)));
   1.158          total--;
   1.159      }
   1.160 +
   1.161      return NULL;
   1.162  }
   1.163  
   1.164 @@ -506,12 +534,16 @@ static void dma_pte_clear_one(struct dom
   1.165      struct acpi_drhd_unit *drhd;
   1.166      struct iommu *iommu;
   1.167      struct dma_pte *pte = NULL;
   1.168 +    struct page_info *pg = NULL;
   1.169  
   1.170      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.171  
   1.172      /* get last level pte */
   1.173 -    pte = dma_addr_level_pte(domain, addr, 1);
   1.174 -
   1.175 +    pg = dma_addr_level_page(domain, addr, 1);
   1.176 +    if ( !pg )
   1.177 +        return;
   1.178 +    pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
   1.179 +    pte += address_level_offset(addr, 1);
   1.180      if ( pte )
   1.181      {
   1.182          dma_clear_pte(*pte);
   1.183 @@ -559,6 +591,7 @@ void dma_pte_free_pagetable(struct domai
   1.184      int total = agaw_to_level(hd->agaw);
   1.185      int level;
   1.186      u32 tmp;
   1.187 +    struct page_info *pg = NULL;
   1.188  
   1.189      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.190      iommu = drhd->iommu;
   1.191 @@ -576,13 +609,16 @@ void dma_pte_free_pagetable(struct domai
   1.192  
   1.193          while ( tmp < end )
   1.194          {
   1.195 -            pte = dma_addr_level_pte(domain, tmp, level);
   1.196 -            if ( pte )
   1.197 -            {
   1.198 -                free_xenheap_page((void *) maddr_to_virt(dma_pte_addr(*pte)));
   1.199 -                dma_clear_pte(*pte);
   1.200 -                iommu_flush_cache_entry(iommu, pte);
   1.201 -            }
   1.202 +            pg = dma_addr_level_page(domain, tmp, level);
   1.203 +            if ( !pg )
   1.204 +                return;
   1.205 +            pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
   1.206 +            pte += address_level_offset(tmp, level);
   1.207 +            dma_clear_pte(*pte);
   1.208 +            iommu_flush_cache_entry(iommu, pte);
   1.209 +            unmap_domain_page(pte);
   1.210 +            free_domheap_page(pg);
   1.211 +
   1.212              tmp += level_size(level);
   1.213          }
   1.214          level++;
   1.215 @@ -1445,6 +1481,7 @@ int iommu_map_page(struct domain *d, pad
   1.216      struct acpi_drhd_unit *drhd;
   1.217      struct iommu *iommu;
   1.218      struct dma_pte *pte = NULL;
   1.219 +    struct page_info *pg = NULL;
   1.220  
   1.221      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.222      iommu = drhd->iommu;
   1.223 @@ -1453,12 +1490,15 @@ int iommu_map_page(struct domain *d, pad
   1.224      if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
   1.225          return 0;
   1.226  
   1.227 -    pte = addr_to_dma_pte(d, gfn << PAGE_SHIFT_4K);
   1.228 -    if ( !pte )
   1.229 +    pg = addr_to_dma_page(d, gfn << PAGE_SHIFT_4K);
   1.230 +    if ( !pg )
   1.231          return -ENOMEM;
   1.232 +    pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
   1.233 +    pte += mfn & LEVEL_MASK;
   1.234      dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
   1.235      dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
   1.236      iommu_flush_cache_entry(iommu, pte);
   1.237 +    unmap_domain_page(pte);
   1.238  
   1.239      for_each_drhd_unit ( drhd )
   1.240      {
   1.241 @@ -1477,7 +1517,6 @@ int iommu_unmap_page(struct domain *d, d
   1.242  {
   1.243      struct acpi_drhd_unit *drhd;
   1.244      struct iommu *iommu;
   1.245 -    struct dma_pte *pte = NULL;
   1.246  
   1.247      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.248      iommu = drhd->iommu;
   1.249 @@ -1486,10 +1525,8 @@ int iommu_unmap_page(struct domain *d, d
   1.250      if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
   1.251          return 0;
   1.252  
   1.253 -    /* get last level pte */
   1.254 -    pte = dma_addr_level_pte(d, gfn << PAGE_SHIFT_4K, 1);
   1.255      dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
   1.256 -    
   1.257 +
   1.258      return 0;
   1.259  }
   1.260  
   1.261 @@ -1501,6 +1538,7 @@ int iommu_page_mapping(struct domain *do
   1.262      unsigned long start_pfn, end_pfn;
   1.263      struct dma_pte *pte = NULL;
   1.264      int index;
   1.265 +    struct page_info *pg = NULL;
   1.266  
   1.267      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.268      iommu = drhd->iommu;
   1.269 @@ -1513,12 +1551,15 @@ int iommu_page_mapping(struct domain *do
   1.270      index = 0;
   1.271      while ( start_pfn < end_pfn )
   1.272      {
   1.273 -        pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
   1.274 -        if ( !pte )
   1.275 +        pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
   1.276 +        if ( !pg )
   1.277              return -ENOMEM;
   1.278 +        pte = (struct dma_pte *)map_domain_page(mfn_x(page_to_mfn(pg)));
   1.279 +        pte += start_pfn & LEVEL_MASK;
   1.280          dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
   1.281          dma_set_pte_prot(*pte, prot);
   1.282          iommu_flush_cache_entry(iommu, pte);
   1.283 +        unmap_domain_page(pte);
   1.284          start_pfn++;
   1.285          index++;
   1.286      }
   1.287 @@ -1537,12 +1578,8 @@ int iommu_page_mapping(struct domain *do
   1.288  
   1.289  int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size)
   1.290  {
   1.291 -    struct dma_pte *pte = NULL;
   1.292 +    dma_pte_clear_range(domain, addr, addr + size);
   1.293  
   1.294 -    /* get last level pte */
   1.295 -    pte = dma_addr_level_pte(domain, addr, 1);
   1.296 -    dma_pte_clear_range(domain, addr, addr + size);
   1.297 -    
   1.298      return 0;
   1.299  }
   1.300