ia64/xen-unstable

changeset 17432:1d3aaa6a8b87

VT-d: Allocates page table pgd, root_entry, iremap and qinval from
domheap rather than xenheap, and get rid of structure page_info in
iommu.c.

Signed-off-by: Weidong Han <weidong.han@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Apr 10 09:22:38 2008 +0100 (2008-04-10)
parents 85848be18ba2
children 9153b99a7066
files xen/drivers/passthrough/vtd/intremap.c xen/drivers/passthrough/vtd/iommu.c xen/drivers/passthrough/vtd/iommu.h xen/drivers/passthrough/vtd/qinval.c xen/drivers/passthrough/vtd/utils.c xen/drivers/passthrough/vtd/x86/vtd.c xen/include/xen/hvm/iommu.h xen/include/xen/iommu.h
line diff
     1.1 --- a/xen/drivers/passthrough/vtd/intremap.c	Thu Apr 10 09:20:07 2008 +0100
     1.2 +++ b/xen/drivers/passthrough/vtd/intremap.c	Thu Apr 10 09:22:38 2008 +0100
     1.3 @@ -45,7 +45,7 @@ u16 apicid_to_bdf(int apic_id)
     1.4  static void remap_entry_to_ioapic_rte(
     1.5      struct iommu *iommu, struct IO_APIC_route_entry *old_rte)
     1.6  {
     1.7 -    struct iremap_entry *iremap_entry = NULL;
     1.8 +    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
     1.9      struct IO_APIC_route_remap_entry *remap_rte;
    1.10      unsigned int index;
    1.11      unsigned long flags;
    1.12 @@ -70,7 +70,9 @@ static void remap_entry_to_ioapic_rte(
    1.13  
    1.14      spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
    1.15  
    1.16 -    iremap_entry = &ir_ctrl->iremap[index];
    1.17 +    iremap_entries =
    1.18 +        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
    1.19 +    iremap_entry = &iremap_entries[index];
    1.20  
    1.21      old_rte->vector = iremap_entry->lo.vector;
    1.22      old_rte->delivery_mode = iremap_entry->lo.dlm;
    1.23 @@ -80,13 +82,14 @@ static void remap_entry_to_ioapic_rte(
    1.24      old_rte->dest.logical.__reserved_1 = 0;
    1.25      old_rte->dest.logical.logical_dest = iremap_entry->lo.dst;
    1.26  
    1.27 +    unmap_vtd_domain_page(iremap_entries);
    1.28      spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
    1.29  }
    1.30  
    1.31  static void ioapic_rte_to_remap_entry(struct iommu *iommu,
    1.32      int apic_id, struct IO_APIC_route_entry *old_rte)
    1.33  {
    1.34 -    struct iremap_entry *iremap_entry = NULL;
    1.35 +    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
    1.36      struct IO_APIC_route_remap_entry *remap_rte;
    1.37      unsigned int index;
    1.38      unsigned long flags;
    1.39 @@ -103,7 +106,10 @@ static void ioapic_rte_to_remap_entry(st
    1.40          goto out;
    1.41      }
    1.42  
    1.43 -    iremap_entry = &(ir_ctrl->iremap[index]);
    1.44 +    iremap_entries =
    1.45 +        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
    1.46 +    iremap_entry = &iremap_entries[index];
    1.47 +
    1.48      if ( *(u64 *)iremap_entry != 0 )
    1.49          dprintk(XENLOG_WARNING VTDPREFIX,
    1.50                 "Interrupt remapping entry is in use already!\n");
    1.51 @@ -124,12 +130,13 @@ static void ioapic_rte_to_remap_entry(st
    1.52      iremap_entry->lo.p = 1;    /* finally, set present bit */
    1.53      ir_ctrl->iremap_index++;
    1.54  
    1.55 +    unmap_vtd_domain_page(iremap_entries);
    1.56      iommu_flush_iec_index(iommu, 0, index);
    1.57      ret = invalidate_sync(iommu);
    1.58  
    1.59 -    /* now construct new ioapic rte entry */ 
    1.60 +    /* now construct new ioapic rte entry */
    1.61      remap_rte->vector = old_rte->vector;
    1.62 -    remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */ 
    1.63 +    remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
    1.64      remap_rte->index_15 = index & 0x8000;
    1.65      remap_rte->index_0_14 = index & 0x7fff;
    1.66      remap_rte->delivery_status = old_rte->delivery_status;
    1.67 @@ -154,7 +161,7 @@ io_apic_read_remap_rte(
    1.68      struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
    1.69      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
    1.70  
    1.71 -    if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) )
    1.72 +    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
    1.73      {
    1.74          *IO_APIC_BASE(apic) = reg;
    1.75          return *(IO_APIC_BASE(apic)+4);
    1.76 @@ -200,7 +207,7 @@ io_apic_write_remap_rte(
    1.77      struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
    1.78      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
    1.79  
    1.80 -    if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) )
    1.81 +    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
    1.82      {
    1.83          *IO_APIC_BASE(apic) = reg;
    1.84          *(IO_APIC_BASE(apic)+4) = value;
    1.85 @@ -238,32 +245,30 @@ int intremap_setup(struct iommu *iommu)
    1.86  {
    1.87      struct ir_ctrl *ir_ctrl;
    1.88      unsigned long start_time;
    1.89 -    u64 paddr;
    1.90  
    1.91      if ( !ecap_intr_remap(iommu->ecap) )
    1.92          return -ENODEV;
    1.93  
    1.94      ir_ctrl = iommu_ir_ctrl(iommu);
    1.95 -    if ( ir_ctrl->iremap == NULL )
    1.96 +    if ( ir_ctrl->iremap_maddr == 0 )
    1.97      {
    1.98 -        ir_ctrl->iremap = alloc_xenheap_page();
    1.99 -        if ( ir_ctrl->iremap == NULL )
   1.100 +        ir_ctrl->iremap_maddr = alloc_pgtable_maddr();
   1.101 +        if ( ir_ctrl->iremap_maddr == 0 )
   1.102          {
   1.103              dprintk(XENLOG_WARNING VTDPREFIX,
   1.104 -                    "Cannot allocate memory for ir_ctrl->iremap\n");
   1.105 +                    "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
   1.106              return -ENODEV;
   1.107          }
   1.108 -        memset(ir_ctrl->iremap, 0, PAGE_SIZE);
   1.109      }
   1.110  
   1.111 -    paddr = virt_to_maddr(ir_ctrl->iremap);
   1.112  #if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT)
   1.113      /* set extended interrupt mode bit */
   1.114 -    paddr |= ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
   1.115 +    ir_ctrl->iremap_maddr |=
   1.116 +            ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
   1.117  #endif
   1.118      /* size field = 256 entries per 4K page = 8 - 1 */
   1.119 -    paddr |= 7;
   1.120 -    dmar_writeq(iommu->reg, DMAR_IRTA_REG, paddr);
   1.121 +    ir_ctrl->iremap_maddr |= 7;
   1.122 +    dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
   1.123  
   1.124      /* set SIRTP */
   1.125      iommu->gcmd |= DMA_GCMD_SIRTP;
     2.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Thu Apr 10 09:20:07 2008 +0100
     2.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Thu Apr 10 09:22:38 2008 +0100
     2.3 @@ -185,71 +185,70 @@ void iommu_flush_cache_page(struct iommu
     2.4  
     2.5  int nr_iommus;
     2.6  /* context entry handling */
     2.7 -static struct context_entry * device_to_context_entry(struct iommu *iommu,
     2.8 -                                                      u8 bus, u8 devfn)
     2.9 +static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
    2.10  {
    2.11 -    struct root_entry *root;
    2.12 -    struct context_entry *context;
    2.13 -    unsigned long phy_addr;
    2.14 +    struct root_entry *root, *root_entries;
    2.15      unsigned long flags;
    2.16 +    u64 maddr;
    2.17  
    2.18      spin_lock_irqsave(&iommu->lock, flags);
    2.19 -    root = &iommu->root_entry[bus];
    2.20 +    root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
    2.21 +    root = &root_entries[bus];
    2.22      if ( !root_present(*root) )
    2.23      {
    2.24 -        phy_addr = (unsigned long) alloc_xenheap_page();
    2.25 -        if ( !phy_addr )
    2.26 +        maddr = alloc_pgtable_maddr();
    2.27 +        if ( maddr == 0 )
    2.28          {
    2.29              spin_unlock_irqrestore(&iommu->lock, flags);
    2.30 -            return NULL;
    2.31 +            return 0;
    2.32          }
    2.33 -        memset((void *) phy_addr, 0, PAGE_SIZE);
    2.34 -        iommu_flush_cache_page(iommu, (void *)phy_addr);
    2.35 -        phy_addr = virt_to_maddr((void *)phy_addr);
    2.36 -        set_root_value(*root, phy_addr);
    2.37 +        set_root_value(*root, maddr);
    2.38          set_root_present(*root);
    2.39          iommu_flush_cache_entry(iommu, root);
    2.40      }
    2.41 -    phy_addr = (unsigned long) get_context_addr(*root);
    2.42 -    context = (struct context_entry *)maddr_to_virt(phy_addr);
    2.43 +    maddr = (u64) get_context_addr(*root);
    2.44 +    unmap_vtd_domain_page(root_entries);
    2.45      spin_unlock_irqrestore(&iommu->lock, flags);
    2.46 -    return &context[devfn];
    2.47 +    return maddr;
    2.48  }
    2.49  
    2.50  static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
    2.51  {
    2.52 -    struct root_entry *root;
    2.53 +    struct root_entry *root, *root_entries;
    2.54      struct context_entry *context;
    2.55 -    unsigned long phy_addr;
    2.56 +    u64 context_maddr;
    2.57      int ret;
    2.58      unsigned long flags;
    2.59  
    2.60      spin_lock_irqsave(&iommu->lock, flags);
    2.61 -    root = &iommu->root_entry[bus];
    2.62 +    root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
    2.63 +    root = &root_entries[bus];
    2.64      if ( !root_present(*root) )
    2.65      {
    2.66          ret = 0;
    2.67          goto out;
    2.68      }
    2.69 -    phy_addr = get_context_addr(*root);
    2.70 -    context = (struct context_entry *)maddr_to_virt(phy_addr);
    2.71 +    context_maddr = get_context_addr(*root);
    2.72 +    context = (struct context_entry *)map_vtd_domain_page(context_maddr);
    2.73      ret = context_present(context[devfn]);
    2.74 +    unmap_vtd_domain_page(context);
    2.75   out:
    2.76 +    unmap_vtd_domain_page(root_entries);
    2.77      spin_unlock_irqrestore(&iommu->lock, flags);
    2.78      return ret;
    2.79  }
    2.80  
    2.81 -static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
    2.82 +static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr)
    2.83  {
    2.84      struct hvm_iommu *hd = domain_hvm_iommu(domain);
    2.85      struct acpi_drhd_unit *drhd;
    2.86      struct iommu *iommu;
    2.87      int addr_width = agaw_to_width(hd->agaw);
    2.88 -    struct dma_pte *parent, *pte = NULL, *pgd;
    2.89 +    struct dma_pte *parent, *pte = NULL;
    2.90      int level = agaw_to_level(hd->agaw);
    2.91      int offset;
    2.92      unsigned long flags;
    2.93 -    struct page_info *pg = NULL;
    2.94 +    u64 pte_maddr = 0;
    2.95      u64 *vaddr = NULL;
    2.96  
    2.97      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
    2.98 @@ -257,19 +256,14 @@ static struct page_info *addr_to_dma_pag
    2.99  
   2.100      addr &= (((u64)1) << addr_width) - 1;
   2.101      spin_lock_irqsave(&hd->mapping_lock, flags);
   2.102 -    if ( !hd->pgd )
   2.103 +    if ( hd->pgd_maddr == 0 )
   2.104      {
   2.105 -        pgd = (struct dma_pte *)alloc_xenheap_page();
   2.106 -        if ( !pgd )
   2.107 -        {
   2.108 -            spin_unlock_irqrestore(&hd->mapping_lock, flags);
   2.109 -            return NULL;
   2.110 -        }
   2.111 -        memset(pgd, 0, PAGE_SIZE);
   2.112 -        hd->pgd = pgd;
   2.113 +        hd->pgd_maddr = alloc_pgtable_maddr();
   2.114 +        if ( hd->pgd_maddr == 0 )
   2.115 +            return 0;
   2.116      }
   2.117  
   2.118 -    parent = hd->pgd;
   2.119 +    parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
   2.120      while ( level > 1 )
   2.121      {
   2.122          offset = address_level_offset(addr, level);
   2.123 @@ -277,18 +271,15 @@ static struct page_info *addr_to_dma_pag
   2.124  
   2.125          if ( dma_pte_addr(*pte) == 0 )
   2.126          {
   2.127 -            pg = alloc_domheap_page(
   2.128 -                NULL, MEMF_node(domain_to_node(domain)));
   2.129 -            vaddr = map_domain_page(page_to_mfn(pg));
   2.130 +            u64 maddr = alloc_pgtable_maddr();
   2.131 +            dma_set_pte_addr(*pte, maddr);
   2.132 +            vaddr = map_vtd_domain_page(maddr);
   2.133              if ( !vaddr )
   2.134              {
   2.135 +                unmap_vtd_domain_page(parent);
   2.136                  spin_unlock_irqrestore(&hd->mapping_lock, flags);
   2.137 -                return NULL;
   2.138 +                return 0;
   2.139              }
   2.140 -            memset(vaddr, 0, PAGE_SIZE);
   2.141 -            iommu_flush_cache_page(iommu, vaddr);
   2.142 -
   2.143 -            dma_set_pte_addr(*pte, page_to_maddr(pg));
   2.144  
   2.145              /*
   2.146               * high level table always sets r/w, last level
   2.147 @@ -300,21 +291,20 @@ static struct page_info *addr_to_dma_pag
   2.148          }
   2.149          else
   2.150          {
   2.151 -            pg = maddr_to_page(pte->val);
   2.152 -            vaddr = map_domain_page(page_to_mfn(pg));
   2.153 +            vaddr = map_vtd_domain_page(pte->val);
   2.154              if ( !vaddr )
   2.155              {
   2.156 +                unmap_vtd_domain_page(parent);
   2.157                  spin_unlock_irqrestore(&hd->mapping_lock, flags);
   2.158 -                return NULL;
   2.159 +                return 0;
   2.160              }
   2.161          }
   2.162  
   2.163 -        if ( parent != hd->pgd )
   2.164 -            unmap_domain_page(parent);
   2.165 -
   2.166 -        if ( level == 2 && vaddr )
   2.167 +        unmap_vtd_domain_page(parent);
   2.168 +        if ( level == 2 )
   2.169          {
   2.170 -            unmap_domain_page(vaddr);
   2.171 +            pte_maddr = pte->val & PAGE_MASK_4K;
   2.172 +            unmap_vtd_domain_page(vaddr);
   2.173              break;
   2.174          }
   2.175  
   2.176 @@ -324,43 +314,42 @@ static struct page_info *addr_to_dma_pag
   2.177      }
   2.178  
   2.179      spin_unlock_irqrestore(&hd->mapping_lock, flags);
   2.180 -    return pg;
   2.181 +    return pte_maddr;
   2.182  }
   2.183  
   2.184  /* return address's page at specific level */
   2.185 -static struct page_info *dma_addr_level_page(struct domain *domain,
   2.186 -                                             u64 addr, int level)
   2.187 +static u64 dma_addr_level_page_maddr(
   2.188 +    struct domain *domain, u64 addr, int level)
   2.189  {
   2.190      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   2.191      struct dma_pte *parent, *pte = NULL;
   2.192      int total = agaw_to_level(hd->agaw);
   2.193      int offset;
   2.194 -    struct page_info *pg = NULL;
   2.195 +    u64 pg_maddr = hd->pgd_maddr;
   2.196  
   2.197 -    parent = hd->pgd;
   2.198 +    if ( pg_maddr == 0 )
   2.199 +        return 0;
   2.200 +
   2.201 +    parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
   2.202      while ( level <= total )
   2.203      {
   2.204          offset = address_level_offset(addr, total);
   2.205          pte = &parent[offset];
   2.206          if ( dma_pte_addr(*pte) == 0 )
   2.207 -        {
   2.208 -            if ( parent != hd->pgd )
   2.209 -                unmap_domain_page(parent);
   2.210              break;
   2.211 -        }
   2.212  
   2.213 -        pg = maddr_to_page(pte->val);
   2.214 -        if ( parent != hd->pgd )
   2.215 -            unmap_domain_page(parent);
   2.216 +        pg_maddr = pte->val & PAGE_MASK_4K;
   2.217 +        unmap_vtd_domain_page(parent);
   2.218  
   2.219          if ( level == total )
   2.220 -            return pg;
   2.221 +            return pg_maddr;
   2.222  
   2.223 -        parent = map_domain_page(page_to_mfn(pg));
   2.224 +        parent = map_vtd_domain_page(pte->val);
   2.225          total--;
   2.226      }
   2.227  
   2.228 -    return NULL;
   2.229 +    unmap_vtd_domain_page(parent);
   2.230 +    return 0;
   2.231  }
   2.232  
   2.233  static void iommu_flush_write_buffer(struct iommu *iommu)
   2.234 @@ -639,17 +628,17 @@ static void dma_pte_clear_one(struct dom
   2.235  {
   2.236      struct acpi_drhd_unit *drhd;
   2.237      struct iommu *iommu;
   2.238 -    struct dma_pte *pte = NULL;
   2.239 -    struct page_info *pg = NULL;
   2.240 +    struct dma_pte *page = NULL, *pte = NULL;
   2.241 +    u64 pg_maddr;
   2.242  
   2.243      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   2.244  
   2.245      /* get last level pte */
   2.246 -    pg = dma_addr_level_page(domain, addr, 1);
   2.247 -    if ( !pg )
   2.248 +    pg_maddr = dma_addr_level_page_maddr(domain, addr, 1);
   2.249 +    if ( pg_maddr == 0 )
   2.250          return;
   2.251 -    pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
   2.252 -    pte += address_level_offset(addr, 1);
   2.253 +    page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
   2.254 +    pte = page + address_level_offset(addr, 1);
   2.255      if ( pte )
   2.256      {
   2.257          dma_clear_pte(*pte);
   2.258 @@ -665,7 +654,7 @@ static void dma_pte_clear_one(struct dom
   2.259                  iommu_flush_write_buffer(iommu);
   2.260          }
   2.261      }
   2.262 -    unmap_domain_page(pte);
   2.263 +    unmap_vtd_domain_page(page);
   2.264  }
   2.265  
   2.266  /* clear last level pte, a tlb flush should be followed */
   2.267 @@ -695,11 +684,11 @@ void dma_pte_free_pagetable(struct domai
   2.268      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   2.269      struct iommu *iommu;
   2.270      int addr_width = agaw_to_width(hd->agaw);
   2.271 -    struct dma_pte *pte;
   2.272 +    struct dma_pte *page, *pte;
   2.273      int total = agaw_to_level(hd->agaw);
   2.274      int level;
   2.275      u32 tmp;
   2.276 -    struct page_info *pg = NULL;
   2.277 +    u64 pg_maddr;
   2.278  
   2.279      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   2.280      iommu = drhd->iommu;
   2.281 @@ -717,15 +706,15 @@ void dma_pte_free_pagetable(struct domai
   2.282  
   2.283          while ( tmp < end )
   2.284          {
   2.285 -            pg = dma_addr_level_page(domain, tmp, level);
   2.286 -            if ( !pg )
   2.287 +            pg_maddr = dma_addr_level_page_maddr(domain, tmp, level);
   2.288 +            if ( pg_maddr == 0 )
   2.289                  return;
   2.290 -            pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
   2.291 -            pte += address_level_offset(tmp, level);
   2.292 +            page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
   2.293 +            pte = page + address_level_offset(tmp, level);
   2.294              dma_clear_pte(*pte);
   2.295              iommu_flush_cache_entry(iommu, pte);
   2.296 -            unmap_domain_page(pte);
   2.297 -            free_domheap_page(pg);
   2.298 +            unmap_vtd_domain_page(page);
   2.299 +            free_pgtable_maddr(pg_maddr);
   2.300  
   2.301              tmp += level_size(level);
   2.302          }
   2.303 @@ -735,17 +724,15 @@ void dma_pte_free_pagetable(struct domai
   2.304      /* free pgd */
   2.305      if ( start == 0 && end == ((((u64)1) << addr_width) - 1) )
   2.306      {
   2.307 -        free_xenheap_page((void *)hd->pgd);
   2.308 -        hd->pgd = NULL;
   2.309 +        free_pgtable_maddr(hd->pgd_maddr);
   2.310 +        hd->pgd_maddr = 0;
   2.311      }
   2.312  }
   2.313  
   2.314  /* iommu handling */
   2.315  static int iommu_set_root_entry(struct iommu *iommu)
   2.316  {
   2.317 -    void *addr;
   2.318      u32 cmd, sts;
   2.319 -    struct root_entry *root;
   2.320      unsigned long flags;
   2.321  
   2.322      if ( iommu == NULL )
   2.323 @@ -755,25 +742,19 @@ static int iommu_set_root_entry(struct i
   2.324          return -EINVAL;
   2.325      }
   2.326  
   2.327 -    if ( unlikely(!iommu->root_entry) )
   2.328 +    if ( iommu->root_maddr != 0 )
   2.329      {
   2.330 -        root = (struct root_entry *)alloc_xenheap_page();
   2.331 -        if ( root == NULL )
   2.332 -            return -ENOMEM;
   2.333 -
   2.334 -        memset((u8*)root, 0, PAGE_SIZE);
   2.335 -        iommu_flush_cache_page(iommu, root);
   2.336 -
   2.337 -        if ( cmpxchg((unsigned long *)&iommu->root_entry,
   2.338 -                     0, (unsigned long)root) != 0 )
   2.339 -            free_xenheap_page((void *)root);
   2.340 +        free_pgtable_maddr(iommu->root_maddr);
   2.341 +        iommu->root_maddr = 0;
   2.342      }
   2.343  
   2.344 -    addr = iommu->root_entry;
   2.345 -
   2.346      spin_lock_irqsave(&iommu->register_lock, flags);
   2.347  
   2.348 -    dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
   2.349 +    iommu->root_maddr = alloc_pgtable_maddr();
   2.350 +    if ( iommu->root_maddr == 0 )
   2.351 +        return -ENOMEM;
   2.352 +
   2.353 +    dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
   2.354      cmd = iommu->gcmd | DMA_GCMD_SRTP;
   2.355      dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
   2.356  
   2.357 @@ -1110,8 +1091,11 @@ static void free_iommu(struct iommu *iom
   2.358  {
   2.359      if ( !iommu )
   2.360          return;
   2.361 -    if ( iommu->root_entry )
   2.362 -        free_xenheap_page((void *)iommu->root_entry);
   2.363 +    if ( iommu->root_maddr != 0 )
   2.364 +    {
   2.365 +        free_pgtable_maddr(iommu->root_maddr);
   2.366 +        iommu->root_maddr = 0;
   2.367 +    }
   2.368      if ( iommu->reg )
   2.369          iounmap(iommu->reg);
   2.370      free_intel_iommu(iommu->intel);
   2.371 @@ -1166,13 +1150,17 @@ static int domain_context_mapping_one(
   2.372      u8 bus, u8 devfn)
   2.373  {
   2.374      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   2.375 -    struct context_entry *context;
   2.376 +    struct context_entry *context, *context_entries;
   2.377      unsigned long flags;
   2.378      int ret = 0;
   2.379 +    u64 maddr;
   2.380  
   2.381 -    context = device_to_context_entry(iommu, bus, devfn);
   2.382 +    maddr = bus_to_context_maddr(iommu, bus);
   2.383 +    context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
   2.384 +    context = &context_entries[devfn];
   2.385      if ( !context )
   2.386      {
   2.387 +        unmap_vtd_domain_page(context_entries);
   2.388          gdprintk(XENLOG_ERR VTDPREFIX,
   2.389                   "domain_context_mapping_one:context == NULL:"
   2.390                   "bdf = %x:%x:%x\n",
   2.391 @@ -1182,6 +1170,7 @@ static int domain_context_mapping_one(
   2.392  
   2.393      if ( context_present(*context) )
   2.394      {
   2.395 +        unmap_vtd_domain_page(context_entries);
   2.396          gdprintk(XENLOG_WARNING VTDPREFIX,
   2.397                   "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
   2.398                   bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   2.399 @@ -1202,19 +1191,8 @@ static int domain_context_mapping_one(
   2.400      else
   2.401      {
   2.402  #endif
   2.403 -        if ( !hd->pgd )
   2.404 -        {
   2.405 -            struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
   2.406 -            if ( !pgd )
   2.407 -            {
   2.408 -                spin_unlock_irqrestore(&hd->mapping_lock, flags);
   2.409 -                return -ENOMEM;
   2.410 -            }
   2.411 -            memset(pgd, 0, PAGE_SIZE);
   2.412 -            hd->pgd = pgd;
   2.413 -        }
   2.414 - 
   2.415 -        context_set_address_root(*context, virt_to_maddr(hd->pgd));
   2.416 +        ASSERT(hd->pgd_maddr != 0);
   2.417 +        context_set_address_root(*context, hd->pgd_maddr);
   2.418          context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
   2.419  #ifdef CONTEXT_PASSTHRU
   2.420      }
   2.421 @@ -1226,9 +1204,11 @@ static int domain_context_mapping_one(
   2.422  
   2.423      gdprintk(XENLOG_INFO VTDPREFIX,
   2.424               "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
   2.425 -             " hd->pgd=%p\n",
   2.426 +             " hd->pgd_maddr=%"PRIx64"\n",
   2.427               bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
   2.428 -             context->hi, context->lo, hd->pgd);
   2.429 +             context->hi, context->lo, hd->pgd_maddr);
   2.430 +
   2.431 +    unmap_vtd_domain_page(context_entries);
   2.432  
   2.433      if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
   2.434                                      (((u16)bus) << 8) | devfn,
   2.435 @@ -1389,12 +1369,16 @@ static int domain_context_unmap_one(
   2.436      struct iommu *iommu,
   2.437      u8 bus, u8 devfn)
   2.438  {
   2.439 -    struct context_entry *context;
   2.440 +    struct context_entry *context, *context_entries;
   2.441      unsigned long flags;
   2.442 +    u64 maddr;
   2.443  
   2.444 -    context = device_to_context_entry(iommu, bus, devfn);
   2.445 +    maddr = bus_to_context_maddr(iommu, bus);
   2.446 +    context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
   2.447 +    context = &context_entries[devfn];
   2.448      if ( !context )
   2.449      {
   2.450 +        unmap_vtd_domain_page(context_entries);
   2.451          gdprintk(XENLOG_ERR VTDPREFIX,
   2.452                   "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
   2.453                   bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   2.454 @@ -1403,6 +1387,7 @@ static int domain_context_unmap_one(
   2.455  
   2.456      if ( !context_present(*context) )
   2.457      {
   2.458 +        unmap_vtd_domain_page(context_entries);
   2.459          gdprintk(XENLOG_WARNING VTDPREFIX,
   2.460                   "domain_context_unmap_one-%x:%x:%x- "
   2.461                   "context NOT present:return\n",
   2.462 @@ -1420,6 +1405,7 @@ static int domain_context_unmap_one(
   2.463      iommu_flush_cache_entry(iommu, context);
   2.464      iommu_flush_context_global(iommu, 0);
   2.465      iommu_flush_iotlb_global(iommu, 0);
   2.466 +    unmap_vtd_domain_page(context_entries);
   2.467      spin_unlock_irqrestore(&iommu->lock, flags);
   2.468  
   2.469      return 0;
   2.470 @@ -1575,36 +1561,7 @@ void iommu_domain_teardown(struct domain
   2.471          return;
   2.472  
   2.473      iommu_domid_release(d);
   2.474 -
   2.475 -#if CONFIG_PAGING_LEVELS == 3
   2.476 -    {
   2.477 -        struct hvm_iommu *hd  = domain_hvm_iommu(d);
   2.478 -        int level = agaw_to_level(hd->agaw);
   2.479 -        struct dma_pte *pgd = NULL;
   2.480 -
   2.481 -        switch ( level )
   2.482 -        {
   2.483 -        case VTD_PAGE_TABLE_LEVEL_3:
   2.484 -            if ( hd->pgd )
   2.485 -                free_xenheap_page((void *)hd->pgd);
   2.486 -            break;
   2.487 -        case VTD_PAGE_TABLE_LEVEL_4:
   2.488 -            if ( hd->pgd )
   2.489 -            {
   2.490 -                pgd = hd->pgd;
   2.491 -                if ( pgd[0].val != 0 )
   2.492 -                    free_xenheap_page((void*)maddr_to_virt(
   2.493 -                        dma_pte_addr(pgd[0])));
   2.494 -                free_xenheap_page((void *)hd->pgd);
   2.495 -            }
   2.496 -            break;
   2.497 -        default:
   2.498 -            gdprintk(XENLOG_ERR VTDPREFIX,
   2.499 -                     "Unsupported p2m table sharing level!\n");
   2.500 -            break;
   2.501 -        }
   2.502 -    }
   2.503 -#endif
   2.504 +    iommu_free_pgd(d);
   2.505      return_devices_to_dom0(d);
   2.506  }
   2.507  
   2.508 @@ -1630,8 +1587,8 @@ int intel_iommu_map_page(
   2.509  {
   2.510      struct acpi_drhd_unit *drhd;
   2.511      struct iommu *iommu;
   2.512 -    struct dma_pte *pte = NULL;
   2.513 -    struct page_info *pg = NULL;
   2.514 +    struct dma_pte *page = NULL, *pte = NULL;
   2.515 +    u64 pg_maddr;
   2.516  
   2.517      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   2.518      iommu = drhd->iommu;
   2.519 @@ -1642,15 +1599,15 @@ int intel_iommu_map_page(
   2.520          return 0;
   2.521  #endif
   2.522  
   2.523 -    pg = addr_to_dma_page(d, (paddr_t)gfn << PAGE_SHIFT_4K);
   2.524 -    if ( !pg )
   2.525 +    pg_maddr = addr_to_dma_page_maddr(d, gfn << PAGE_SHIFT_4K);
   2.526 +    if ( pg_maddr == 0 )
   2.527          return -ENOMEM;
   2.528 -    pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
   2.529 -    pte += gfn & LEVEL_MASK;
   2.530 +    page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
   2.531 +    pte = page + (gfn & LEVEL_MASK);
   2.532      dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
   2.533      dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
   2.534      iommu_flush_cache_entry(iommu, pte);
   2.535 -    unmap_domain_page(pte);
   2.536 +    unmap_vtd_domain_page(page);
   2.537  
   2.538      for_each_drhd_unit ( drhd )
   2.539      {
   2.540 @@ -1690,9 +1647,9 @@ int iommu_page_mapping(struct domain *do
   2.541      struct acpi_drhd_unit *drhd;
   2.542      struct iommu *iommu;
   2.543      unsigned long start_pfn, end_pfn;
   2.544 -    struct dma_pte *pte = NULL;
   2.545 +    struct dma_pte *page = NULL, *pte = NULL;
   2.546      int index;
   2.547 -    struct page_info *pg = NULL;
   2.548 +    u64 pg_maddr;
   2.549  
   2.550      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   2.551      iommu = drhd->iommu;
   2.552 @@ -1705,15 +1662,15 @@ int iommu_page_mapping(struct domain *do
   2.553      index = 0;
   2.554      while ( start_pfn < end_pfn )
   2.555      {
   2.556 -        pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
   2.557 -        if ( !pg )
   2.558 +        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index);
   2.559 +        if ( pg_maddr == 0 )
   2.560              return -ENOMEM;
   2.561 -        pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
   2.562 -        pte += start_pfn & LEVEL_MASK;
   2.563 +        page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
   2.564 +        pte = page + (start_pfn & LEVEL_MASK);
   2.565          dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
   2.566          dma_set_pte_prot(*pte, prot);
   2.567          iommu_flush_cache_entry(iommu, pte);
   2.568 -        unmap_domain_page(pte);
   2.569 +        unmap_vtd_domain_page(page);
   2.570          start_pfn++;
   2.571          index++;
   2.572      }
   2.573 @@ -2051,159 +2008,6 @@ int intel_iommu_assign_device(struct dom
   2.574      return ret;
   2.575  }
   2.576  
   2.577 -void iommu_set_pgd(struct domain *d)
   2.578 -{
   2.579 -    struct hvm_iommu *hd  = domain_hvm_iommu(d);
   2.580 -    unsigned long p2m_table;
   2.581 -
   2.582 -    if ( hd->pgd )
   2.583 -    {
   2.584 -        gdprintk(XENLOG_INFO VTDPREFIX,
   2.585 -                 "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
   2.586 -        hd->pgd = NULL;
   2.587 -    }
   2.588 -    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
   2.589 -
   2.590 -    if ( paging_mode_hap(d) )
   2.591 -    {
   2.592 -        int level = agaw_to_level(hd->agaw);
   2.593 -        struct dma_pte *dpte = NULL;
   2.594 -        mfn_t pgd_mfn;
   2.595 -
   2.596 -        switch ( level )
   2.597 -        {
   2.598 -        case VTD_PAGE_TABLE_LEVEL_3:
   2.599 -            dpte = map_domain_page(p2m_table);
   2.600 -            if ( !dma_pte_present(*dpte) )
   2.601 -            {
   2.602 -                gdprintk(XENLOG_ERR VTDPREFIX,
   2.603 -                         "iommu_set_pgd: second level wasn't there\n");
   2.604 -                unmap_domain_page(dpte);
   2.605 -                return;
   2.606 -            }
   2.607 -            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
   2.608 -            unmap_domain_page(dpte);
   2.609 -            hd->pgd = maddr_to_virt(pagetable_get_paddr(
   2.610 -                pagetable_from_mfn(pgd_mfn)));
   2.611 -            break;
   2.612 -        case VTD_PAGE_TABLE_LEVEL_4:
   2.613 -            pgd_mfn = _mfn(p2m_table);
   2.614 -            hd->pgd = maddr_to_virt(pagetable_get_paddr(
   2.615 -                pagetable_from_mfn(pgd_mfn)));
   2.616 -            break;
   2.617 -        default:
   2.618 -            gdprintk(XENLOG_ERR VTDPREFIX,
   2.619 -                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
   2.620 -            break;
   2.621 -        }
   2.622 -    }
   2.623 -    else
   2.624 -    {
   2.625 -#if CONFIG_PAGING_LEVELS == 3
   2.626 -        int level = agaw_to_level(hd->agaw);
   2.627 -        struct dma_pte *pmd = NULL;
   2.628 -        struct dma_pte *pgd = NULL;
   2.629 -        struct dma_pte *pte = NULL;
   2.630 -        l3_pgentry_t *l3e;
   2.631 -        unsigned long flags;
   2.632 -        int i;
   2.633 -
   2.634 -        spin_lock_irqsave(&hd->mapping_lock, flags);
   2.635 -        if ( !hd->pgd )
   2.636 -        {
   2.637 -            pgd = (struct dma_pte *)alloc_xenheap_page();
   2.638 -            if ( !pgd )
   2.639 -            {
   2.640 -                spin_unlock_irqrestore(&hd->mapping_lock, flags);
   2.641 -                gdprintk(XENLOG_ERR VTDPREFIX,
   2.642 -                         "Allocate pgd memory failed!\n");
   2.643 -                return;
   2.644 -            }
   2.645 -            memset(pgd, 0, PAGE_SIZE);
   2.646 -            hd->pgd = pgd;
   2.647 -       }
   2.648 -
   2.649 -        l3e = map_domain_page(p2m_table);
   2.650 -        switch ( level )
   2.651 -        {
   2.652 -        case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
   2.653 -            /* We only support 8 entries for the PAE L3 p2m table */
   2.654 -            for ( i = 0; i < 8 ; i++ )
   2.655 -            {
   2.656 -                /* Don't create new L2 entry, use ones from p2m table */
   2.657 -                pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
   2.658 -            }
   2.659 -            break;
   2.660 -
   2.661 -        case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
   2.662 -            /* We allocate one more page for the top vtd page table. */
   2.663 -            pmd = (struct dma_pte *)alloc_xenheap_page();
   2.664 -            if ( !pmd )
   2.665 -            {
   2.666 -                unmap_domain_page(l3e);
   2.667 -                spin_unlock_irqrestore(&hd->mapping_lock, flags);
   2.668 -                gdprintk(XENLOG_ERR VTDPREFIX,
   2.669 -                         "Allocate pmd memory failed!\n");
   2.670 -                return;
   2.671 -            }
   2.672 -            memset((u8*)pmd, 0, PAGE_SIZE);
   2.673 -            pte = &pgd[0];
   2.674 -            dma_set_pte_addr(*pte, virt_to_maddr(pmd));
   2.675 -            dma_set_pte_readable(*pte);
   2.676 -            dma_set_pte_writable(*pte);
   2.677 -
   2.678 -            for ( i = 0; i < 8; i++ )
   2.679 -            {
   2.680 -                /* Don't create new L2 entry, use ones from p2m table */
   2.681 -                pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
   2.682 -            }
   2.683 -            break;
   2.684 -        default:
   2.685 -            gdprintk(XENLOG_ERR VTDPREFIX,
   2.686 -                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
   2.687 -            break;
   2.688 -        }
   2.689 -        unmap_domain_page(l3e);
   2.690 -        spin_unlock_irqrestore(&hd->mapping_lock, flags);
   2.691 -#elif CONFIG_PAGING_LEVELS == 4
   2.692 -        int level = agaw_to_level(hd->agaw);
   2.693 -        l3_pgentry_t *l3e;
   2.694 -        mfn_t pgd_mfn;
   2.695 -
   2.696 -        switch ( level )
   2.697 -        {
   2.698 -        case VTD_PAGE_TABLE_LEVEL_3:
   2.699 -            l3e = map_domain_page(p2m_table);
   2.700 -            if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
   2.701 -            {
   2.702 -                gdprintk(XENLOG_ERR VTDPREFIX,
   2.703 -                         "iommu_set_pgd: second level wasn't there\n");
   2.704 -                unmap_domain_page(l3e);
   2.705 -                return;
   2.706 -            }
   2.707 -            pgd_mfn = _mfn(l3e_get_pfn(*l3e));
   2.708 -            unmap_domain_page(l3e);
   2.709 -            hd->pgd = maddr_to_virt(pagetable_get_paddr(
   2.710 -                pagetable_from_mfn(pgd_mfn)));
   2.711 -            break;
   2.712 -
   2.713 -        case VTD_PAGE_TABLE_LEVEL_4:
   2.714 -            pgd_mfn = _mfn(p2m_table);
   2.715 -            hd->pgd = maddr_to_virt(pagetable_get_paddr(
   2.716 -                pagetable_from_mfn(pgd_mfn)));
   2.717 -            break;
   2.718 -        default:
   2.719 -            gdprintk(XENLOG_ERR VTDPREFIX,
   2.720 -                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
   2.721 -            break;
   2.722 -        }
   2.723 -#endif
   2.724 -    }
   2.725 -    gdprintk(XENLOG_INFO VTDPREFIX,
   2.726 -             "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
   2.727 -}
   2.728 -
   2.729 -
   2.730  u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
   2.731  int iommu_suspend(void)
   2.732  {
     3.1 --- a/xen/drivers/passthrough/vtd/iommu.h	Thu Apr 10 09:20:07 2008 +0100
     3.2 +++ b/xen/drivers/passthrough/vtd/iommu.h	Thu Apr 10 09:22:38 2008 +0100
     3.3 @@ -425,7 +425,7 @@ extern struct list_head acpi_rmrr_units;
     3.4  extern struct list_head acpi_ioapic_units;
     3.5  
     3.6  struct qi_ctrl {
     3.7 -    struct qinval_entry *qinval;         /* queue invalidation page */
     3.8 +    u64 qinval_maddr;  /* queue invalidation page machine address */
     3.9      int qinval_index;                    /* queue invalidation index */
    3.10      spinlock_t qinval_lock;      /* lock for queue invalidation page */
    3.11      spinlock_t qinval_poll_lock; /* lock for queue invalidation poll addr */
    3.12 @@ -433,7 +433,7 @@ struct qi_ctrl {
    3.13  };
    3.14  
    3.15  struct ir_ctrl {
    3.16 -    struct iremap_entry *iremap; /* interrupt remap table */
    3.17 +    u64 iremap_maddr;            /* interrupt remap table machine address */
    3.18      int iremap_index;            /* interrupt remap index */
    3.19      spinlock_t iremap_lock;      /* lock for irq remappping table */
    3.20  };
     4.1 --- a/xen/drivers/passthrough/vtd/qinval.c	Thu Apr 10 09:20:07 2008 +0100
     4.2 +++ b/xen/drivers/passthrough/vtd/qinval.c	Thu Apr 10 09:22:38 2008 +0100
     4.3 @@ -63,13 +63,14 @@ static int qinval_update_qtail(struct io
     4.4  static int gen_cc_inv_dsc(struct iommu *iommu, int index,
     4.5      u16 did, u16 source_id, u8 function_mask, u8 granu)
     4.6  {
     4.7 -    u64 *ptr64;
     4.8      unsigned long flags;
     4.9 -    struct qinval_entry * qinval_entry = NULL;
    4.10 +    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
    4.11      struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
    4.12  
    4.13      spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
    4.14 -    qinval_entry = &qi_ctrl->qinval[index];
    4.15 +    qinval_entries =
    4.16 +        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
    4.17 +    qinval_entry = &qinval_entries[index];
    4.18      qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
    4.19      qinval_entry->q.cc_inv_dsc.lo.granu = granu;
    4.20      qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
    4.21 @@ -78,9 +79,10 @@ static int gen_cc_inv_dsc(struct iommu *
    4.22      qinval_entry->q.cc_inv_dsc.lo.fm = function_mask;
    4.23      qinval_entry->q.cc_inv_dsc.lo.res_2 = 0;
    4.24      qinval_entry->q.cc_inv_dsc.hi.res = 0;
    4.25 +
    4.26 +    unmap_vtd_domain_page(qinval_entries);
    4.27      spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
    4.28  
    4.29 -    ptr64 = (u64 *)qinval_entry;
    4.30      return 0;
    4.31  }
    4.32  
    4.33 @@ -93,7 +95,7 @@ int queue_invalidate_context(struct iomm
    4.34  
    4.35      spin_lock_irqsave(&iommu->register_lock, flags);
    4.36      index = qinval_next_index(iommu);
    4.37 -    if (index == -1)
    4.38 +    if ( index == -1 )
    4.39          return -EBUSY;
    4.40      ret = gen_cc_inv_dsc(iommu, index, did, source_id,
    4.41                           function_mask, granu);
    4.42 @@ -106,14 +108,16 @@ static int gen_iotlb_inv_dsc(struct iomm
    4.43      u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
    4.44  {
    4.45      unsigned long flags;
    4.46 -    struct qinval_entry * qinval_entry = NULL;
    4.47 +    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
    4.48      struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
    4.49  
    4.50      if ( index == -1 )
    4.51          return -1;
    4.52      spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
    4.53  
    4.54 -    qinval_entry = &qi_ctrl->qinval[index];
    4.55 +    qinval_entries =
    4.56 +        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
    4.57 +    qinval_entry = &qinval_entries[index];
    4.58      qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
    4.59      qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
    4.60      qinval_entry->q.iotlb_inv_dsc.lo.dr = 0;
    4.61 @@ -127,6 +131,7 @@ static int gen_iotlb_inv_dsc(struct iomm
    4.62      qinval_entry->q.iotlb_inv_dsc.hi.res_1 = 0;
    4.63      qinval_entry->q.iotlb_inv_dsc.hi.addr = addr;
    4.64  
    4.65 +    unmap_vtd_domain_page(qinval_entries);
    4.66      spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
    4.67      return 0;
    4.68  }
    4.69 @@ -151,15 +156,16 @@ int queue_invalidate_iotlb(struct iommu 
    4.70  static int gen_wait_dsc(struct iommu *iommu, int index,
    4.71      u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
    4.72  {
    4.73 -    u64 *ptr64;
    4.74      unsigned long flags;
    4.75 -    struct qinval_entry * qinval_entry = NULL;
    4.76 +    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
    4.77      struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
    4.78  
    4.79      if ( index == -1 )
    4.80          return -1;
    4.81      spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
    4.82 -    qinval_entry = &qi_ctrl->qinval[index];
    4.83 +    qinval_entries =
    4.84 +        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
    4.85 +    qinval_entry = &qinval_entries[index];
    4.86      qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
    4.87      qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
    4.88      qinval_entry->q.inv_wait_dsc.lo.sw = sw;
    4.89 @@ -168,8 +174,8 @@ static int gen_wait_dsc(struct iommu *io
    4.90      qinval_entry->q.inv_wait_dsc.lo.sdata = sdata;
    4.91      qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
    4.92      qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(saddr) >> 2;
    4.93 +    unmap_vtd_domain_page(qinval_entries);
    4.94      spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
    4.95 -    ptr64 = (u64 *)qinval_entry;
    4.96      return 0;
    4.97  }
    4.98  
    4.99 @@ -185,7 +191,7 @@ static int queue_invalidate_wait(struct 
   4.100      spin_lock_irqsave(&qi_ctrl->qinval_poll_lock, flags);
   4.101      spin_lock_irqsave(&iommu->register_lock, flags);
   4.102      index = qinval_next_index(iommu);
   4.103 -    if (*saddr == 1)
   4.104 +    if ( *saddr == 1 )
   4.105          *saddr = 0;
   4.106      ret = gen_wait_dsc(iommu, index, iflag, sw, fn, sdata, saddr);
   4.107      ret |= qinval_update_qtail(iommu, index);
   4.108 @@ -196,8 +202,10 @@ static int queue_invalidate_wait(struct 
   4.109      {
   4.110          /* In case all wait descriptor writes to same addr with same data */
   4.111          start_time = jiffies;
   4.112 -        while ( *saddr != 1 ) {
   4.113 -            if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) {
   4.114 +        while ( *saddr != 1 )
   4.115 +        {
   4.116 +            if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
   4.117 +            {
   4.118                  print_qi_regs(iommu);
   4.119                  panic("queue invalidate wait descriptor was not executed\n");
   4.120              }
   4.121 @@ -213,7 +221,7 @@ int invalidate_sync(struct iommu *iommu)
   4.122      int ret = -1;
   4.123      struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
   4.124  
   4.125 -    if (qi_ctrl->qinval)
   4.126 +    if ( qi_ctrl->qinval_maddr == 0 )
   4.127      {
   4.128          ret = queue_invalidate_wait(iommu,
   4.129              0, 1, 1, 1, &qi_ctrl->qinval_poll_status);
   4.130 @@ -226,14 +234,16 @@ static int gen_dev_iotlb_inv_dsc(struct 
   4.131      u32 max_invs_pend, u16 sid, u16 size, u64 addr)
   4.132  {
   4.133      unsigned long flags;
   4.134 -    struct qinval_entry * qinval_entry = NULL;
   4.135 +    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
   4.136      struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
   4.137  
   4.138      if ( index == -1 )
   4.139          return -1;
   4.140      spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
   4.141  
   4.142 -    qinval_entry = &qi_ctrl->qinval[index];
   4.143 +    qinval_entries =
   4.144 +        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
   4.145 +    qinval_entry = &qinval_entries[index];
   4.146      qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
   4.147      qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
   4.148      qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
   4.149 @@ -244,6 +254,7 @@ static int gen_dev_iotlb_inv_dsc(struct 
   4.150      qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
   4.151      qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr;
   4.152  
   4.153 +    unmap_vtd_domain_page(qinval_entries);
   4.154      spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
   4.155      return 0;
   4.156  }
   4.157 @@ -268,14 +279,16 @@ static int gen_iec_inv_dsc(struct iommu 
   4.158      u8 granu, u8 im, u16 iidx)
   4.159  {
   4.160      unsigned long flags;
   4.161 -    struct qinval_entry * qinval_entry = NULL;
   4.162 +    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
   4.163      struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
   4.164  
   4.165      if ( index == -1 )
   4.166          return -1;
   4.167      spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
   4.168  
   4.169 -    qinval_entry = &qi_ctrl->qinval[index];
   4.170 +    qinval_entries =
   4.171 +        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
   4.172 +    qinval_entry = &qinval_entries[index];
   4.173      qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
   4.174      qinval_entry->q.iec_inv_dsc.lo.granu = granu;
   4.175      qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
   4.176 @@ -284,6 +297,7 @@ static int gen_iec_inv_dsc(struct iommu 
   4.177      qinval_entry->q.iec_inv_dsc.lo.res_2 = 0;
   4.178      qinval_entry->q.iec_inv_dsc.hi.res = 0;
   4.179  
   4.180 +    unmap_vtd_domain_page(qinval_entries);
   4.181      spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
   4.182      return 0;
   4.183  }
   4.184 @@ -349,7 +363,7 @@ static int flush_context_qi(
   4.185              did = 0;
   4.186      }
   4.187  
   4.188 -    if (qi_ctrl->qinval)
   4.189 +    if ( qi_ctrl->qinval_maddr != 0 )
   4.190      {
   4.191          ret = queue_invalidate_context(iommu, did, sid, fm,
   4.192                                         type >> DMA_CCMD_INVL_GRANU_OFFSET);
   4.193 @@ -382,7 +396,8 @@ static int flush_iotlb_qi(
   4.194              did = 0;
   4.195      }
   4.196  
   4.197 -    if (qi_ctrl->qinval) {
   4.198 +    if ( qi_ctrl->qinval_maddr != 0 )
   4.199 +    {
   4.200          /* use queued invalidation */
   4.201          if (cap_write_drain(iommu->cap))
   4.202              dw = 1;
   4.203 @@ -400,7 +415,6 @@ static int flush_iotlb_qi(
   4.204  int qinval_setup(struct iommu *iommu)
   4.205  {
   4.206      unsigned long start_time;
   4.207 -    u64 paddr;
   4.208      u32 status = 0;
   4.209      struct qi_ctrl *qi_ctrl;
   4.210      struct iommu_flush *flush;
   4.211 @@ -411,15 +425,14 @@ int qinval_setup(struct iommu *iommu)
   4.212      if ( !ecap_queued_inval(iommu->ecap) )
   4.213          return -ENODEV;
   4.214  
   4.215 -    if (qi_ctrl->qinval == NULL) {
   4.216 -        qi_ctrl->qinval = alloc_xenheap_page();
   4.217 -        if (qi_ctrl->qinval == NULL)
   4.218 -            panic("Cannot allocate memory for qi_ctrl->qinval\n");
   4.219 -        memset((u8*)qi_ctrl->qinval, 0, PAGE_SIZE_4K);
   4.220 +    if ( qi_ctrl->qinval_maddr == 0 )
   4.221 +    {
   4.222 +        qi_ctrl->qinval_maddr = alloc_pgtable_maddr();
   4.223 +        if ( qi_ctrl->qinval_maddr == 0 )
   4.224 +            panic("Cannot allocate memory for qi_ctrl->qinval_maddr\n");
   4.225          flush->context = flush_context_qi;
   4.226          flush->iotlb = flush_iotlb_qi;
   4.227      }
   4.228 -    paddr = virt_to_maddr(qi_ctrl->qinval);
   4.229  
   4.230      /* Setup Invalidation Queue Address(IQA) register with the
   4.231       * address of the page we just allocated.  QS field at
   4.232 @@ -428,7 +441,7 @@ int qinval_setup(struct iommu *iommu)
   4.233       * registers are automatically reset to 0 with write
   4.234       * to IQA register.
   4.235       */
   4.236 -    dmar_writeq(iommu->reg, DMAR_IQA_REG, paddr);
   4.237 +    dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
   4.238  
   4.239      /* enable queued invalidation hardware */
   4.240      iommu->gcmd |= DMA_GCMD_QIE;
   4.241 @@ -436,11 +449,12 @@ int qinval_setup(struct iommu *iommu)
   4.242  
   4.243      /* Make sure hardware complete it */
   4.244      start_time = jiffies;
   4.245 -    while (1) {
   4.246 +    while ( 1 )
   4.247 +    {
   4.248          status = dmar_readl(iommu->reg, DMAR_GSTS_REG);
   4.249 -        if (status & DMA_GSTS_QIES)
   4.250 +        if ( status & DMA_GSTS_QIES )
   4.251              break;
   4.252 -        if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
   4.253 +        if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
   4.254              panic("Cannot set QIE field for queue invalidation\n");
   4.255          cpu_relax();
   4.256      }
     5.1 --- a/xen/drivers/passthrough/vtd/utils.c	Thu Apr 10 09:20:07 2008 +0100
     5.2 +++ b/xen/drivers/passthrough/vtd/utils.c	Thu Apr 10 09:22:38 2008 +0100
     5.3 @@ -25,6 +25,7 @@
     5.4  #include "../pci-direct.h"
     5.5  #include "../pci_regs.h"
     5.6  #include "msi.h"
     5.7 +#include "vtd.h"
     5.8  
     5.9  #define INTEL   0x8086
    5.10  #define SEABURG 0x4000
    5.11 @@ -243,7 +244,7 @@ u32 get_level_index(unsigned long gmfn, 
    5.12  }
    5.13  
    5.14  void print_vtd_entries(
    5.15 -    struct domain *d, 
    5.16 +    struct domain *d,
    5.17      struct iommu *iommu,
    5.18      int bus, int devfn,
    5.19      unsigned long gmfn)
    5.20 @@ -261,37 +262,40 @@ void print_vtd_entries(
    5.21      printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x gmfn = %lx\n",
    5.22             d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
    5.23  
    5.24 -    if ( hd->pgd == NULL )
    5.25 +    if ( hd->pgd_maddr == 0 )
    5.26      {
    5.27 -        printk("    hg->pgd == NULL\n");
    5.28 +        printk("    hd->pgd_maddr == 0\n");
    5.29          return;
    5.30      }
    5.31 -    printk("    d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
    5.32 -           hd->pgd, virt_to_maddr(hd->pgd));
    5.33 +    printk("    hd->pgd_maddr = %"PRIx64"\n", hd->pgd_maddr);
    5.34  
    5.35      for_each_drhd_unit ( drhd )
    5.36      {
    5.37          printk("---- print_vtd_entries %d ----\n", i++);
    5.38  
    5.39 -        root_entry = iommu->root_entry;
    5.40 -        if ( root_entry == NULL )
    5.41 +        if ( iommu->root_maddr == 0 )
    5.42          {
    5.43 -            printk("    root_entry == NULL\n");
    5.44 +            printk("    iommu->root_maddr = 0\n");
    5.45              continue;
    5.46          }
    5.47  
    5.48 +        root_entry =
    5.49 +            (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
    5.50 + 
    5.51          printk("    root_entry = %p\n", root_entry);
    5.52          printk("    root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
    5.53          if ( !root_present(root_entry[bus]) )
    5.54          {
    5.55 +            unmap_vtd_domain_page(root_entry);
    5.56              printk("    root_entry[%x] not present\n", bus);
    5.57              continue;
    5.58          }
    5.59  
    5.60          ctxt_entry =
    5.61 -            maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT);
    5.62 +            (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
    5.63          if ( ctxt_entry == NULL )
    5.64          {
    5.65 +            unmap_vtd_domain_page(root_entry);
    5.66              printk("    ctxt_entry == NULL\n");
    5.67              continue;
    5.68          }
    5.69 @@ -301,6 +305,8 @@ void print_vtd_entries(
    5.70                 devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
    5.71          if ( !context_present(ctxt_entry[devfn]) )
    5.72          {
    5.73 +            unmap_vtd_domain_page(ctxt_entry);
    5.74 +            unmap_vtd_domain_page(root_entry);
    5.75              printk("    ctxt_entry[%x] not present\n", devfn);
    5.76              continue;
    5.77          }
    5.78 @@ -308,6 +314,8 @@ void print_vtd_entries(
    5.79          if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
    5.80               level != VTD_PAGE_TABLE_LEVEL_4)
    5.81          {
    5.82 +            unmap_vtd_domain_page(ctxt_entry);
    5.83 +            unmap_vtd_domain_page(root_entry);
    5.84              printk("Unsupported VTD page table level (%d)!\n", level);
    5.85              continue;
    5.86          }
    5.87 @@ -319,6 +327,8 @@ void print_vtd_entries(
    5.88              printk("    l%d = %p\n", level, l);
    5.89              if ( l == NULL )
    5.90              {
    5.91 +                unmap_vtd_domain_page(ctxt_entry);
    5.92 +                unmap_vtd_domain_page(root_entry);
    5.93                  printk("    l%d == NULL\n", level);
    5.94                  break;
    5.95              }
    5.96 @@ -329,6 +339,8 @@ void print_vtd_entries(
    5.97              pte.val = l[l_index];
    5.98              if ( !dma_pte_present(pte) )
    5.99              {
   5.100 +                unmap_vtd_domain_page(ctxt_entry);
   5.101 +                unmap_vtd_domain_page(root_entry);
   5.102                  printk("    l%d[%x] not present\n", level, l_index);
   5.103                  break;
   5.104              }
     6.1 --- a/xen/drivers/passthrough/vtd/x86/vtd.c	Thu Apr 10 09:20:07 2008 +0100
     6.2 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c	Thu Apr 10 09:22:38 2008 +0100
     6.3 @@ -20,6 +20,7 @@
     6.4  
     6.5  #include <xen/sched.h>
     6.6  #include <xen/domain_page.h>
     6.7 +#include <asm/paging.h>
     6.8  #include <xen/iommu.h>
     6.9  #include "../iommu.h"
    6.10  #include "../dmar.h"
    6.11 @@ -124,3 +125,179 @@ void hvm_dpci_isairq_eoi(struct domain *
    6.12          }
    6.13      }
    6.14  }
    6.15 +
    6.16 +void iommu_set_pgd(struct domain *d)
    6.17 +{
    6.18 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
    6.19 +    unsigned long p2m_table;
    6.20 +    int level = agaw_to_level(hd->agaw);
    6.21 +    l3_pgentry_t *l3e;
    6.22 +
    6.23 +    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
    6.24 +
    6.25 +    if ( paging_mode_hap(d) )
    6.26 +    {
    6.27 +        int level = agaw_to_level(hd->agaw);
    6.28 +        struct dma_pte *dpte = NULL;
    6.29 +        mfn_t pgd_mfn;
    6.30 +
    6.31 +        switch ( level )
    6.32 +        {
    6.33 +        case VTD_PAGE_TABLE_LEVEL_3:
    6.34 +            dpte = map_domain_page(p2m_table);
    6.35 +            if ( !dma_pte_present(*dpte) )
    6.36 +            {
    6.37 +                gdprintk(XENLOG_ERR VTDPREFIX,
    6.38 +                         "iommu_set_pgd: second level wasn't there\n");
    6.39 +                unmap_domain_page(dpte);
    6.40 +                return;
    6.41 +            }
    6.42 +            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
    6.43 +            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
    6.44 +            unmap_domain_page(dpte);
    6.45 +            break;
    6.46 +        case VTD_PAGE_TABLE_LEVEL_4:
    6.47 +            pgd_mfn = _mfn(p2m_table);
    6.48 +            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
    6.49 +            break;
    6.50 +        default:
    6.51 +            gdprintk(XENLOG_ERR VTDPREFIX,
    6.52 +                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
    6.53 +            break;
    6.54 +        }
    6.55 +    }
    6.56 +    else
    6.57 +    {
    6.58 +#if CONFIG_PAGING_LEVELS == 3
    6.59 +        struct dma_pte *pte = NULL, *pgd_vaddr = NULL, *pmd_vaddr = NULL;
    6.60 +        int i;
    6.61 +        u64 pmd_maddr;
    6.62 +        unsigned long flags;
    6.63 +
    6.64 +        spin_lock_irqsave(&hd->mapping_lock, flags);
    6.65 +        hd->pgd_maddr = alloc_pgtable_maddr();
    6.66 +        if ( hd->pgd_maddr == 0 )
    6.67 +        {
    6.68 +            spin_unlock_irqrestore(&hd->mapping_lock, flags);
    6.69 +            gdprintk(XENLOG_ERR VTDPREFIX,
    6.70 +                     "Allocate pgd memory failed!\n");
    6.71 +            return;
    6.72 +        }
    6.73 +
    6.74 +        pgd_vaddr = map_vtd_domain_page(hd->pgd_maddr);
    6.75 +        l3e = map_domain_page(p2m_table);
    6.76 +        switch ( level )
    6.77 +        {
    6.78 +        case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
    6.79 +            /* We only support 8 entries for the PAE L3 p2m table */
    6.80 +            for ( i = 0; i < 8 ; i++ )
    6.81 +            {
    6.82 +                /* Don't create new L2 entry, use ones from p2m table */
    6.83 +                pgd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
    6.84 +            }
    6.85 +            break;
    6.86 +
    6.87 +        case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
    6.88 +            /* We allocate one more page for the top vtd page table. */
    6.89 +            pmd_maddr = alloc_pgtable_maddr();
    6.90 +            if ( pmd_maddr == 0 )
    6.91 +            {
    6.92 +                unmap_vtd_domain_page(pgd_vaddr);
    6.93 +                unmap_domain_page(l3e);
    6.94 +                spin_unlock_irqrestore(&hd->mapping_lock, flags);
    6.95 +                gdprintk(XENLOG_ERR VTDPREFIX,
    6.96 +                         "Allocate pmd memory failed!\n");
    6.97 +                return;
    6.98 +            }
    6.99 +
   6.100 +            pte = &pgd_vaddr[0];
   6.101 +            dma_set_pte_addr(*pte, pmd_maddr);
   6.102 +            dma_set_pte_readable(*pte);
   6.103 +            dma_set_pte_writable(*pte);
   6.104 +
   6.105 +            pmd_vaddr = map_vtd_domain_page(pmd_maddr);
   6.106 +            for ( i = 0; i < 8; i++ )
   6.107 +            {
   6.108 +                /* Don't create new L2 entry, use ones from p2m table */
   6.109 +                pmd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
   6.110 +            }
   6.111 +
   6.112 +            unmap_vtd_domain_page(pmd_vaddr);
   6.113 +            break;
   6.114 +        default:
   6.115 +            gdprintk(XENLOG_ERR VTDPREFIX,
   6.116 +                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
   6.117 +            break;
   6.118 +        }
   6.119 +
   6.120 +        unmap_vtd_domain_page(pgd_vaddr);
   6.121 +        unmap_domain_page(l3e);
   6.122 +        spin_unlock_irqrestore(&hd->mapping_lock, flags);
   6.123 +
   6.124 +#elif CONFIG_PAGING_LEVELS == 4
   6.125 +        mfn_t pgd_mfn;
   6.126 +
   6.127 +        switch ( level )
   6.128 +        {
   6.129 +        case VTD_PAGE_TABLE_LEVEL_3:
   6.130 +            l3e = map_domain_page(p2m_table);
   6.131 +            if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
   6.132 +            {
   6.133 +                gdprintk(XENLOG_ERR VTDPREFIX,
   6.134 +                         "iommu_set_pgd: second level wasn't there\n");
   6.135 +                unmap_domain_page(l3e);
   6.136 +                return;
   6.137 +            }
   6.138 +
   6.139 +            pgd_mfn = _mfn(l3e_get_pfn(*l3e));
   6.140 +            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
   6.141 +            unmap_domain_page(l3e);
   6.142 +            break;
   6.143 +        case VTD_PAGE_TABLE_LEVEL_4:
   6.144 +            pgd_mfn = _mfn(p2m_table);
   6.145 +            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
   6.146 +            break;
   6.147 +        default:
   6.148 +            gdprintk(XENLOG_ERR VTDPREFIX,
   6.149 +                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
   6.150 +            break;
   6.151 +        }
   6.152 +#endif
   6.153 +    }
   6.154 +}
   6.155 +
   6.156 +void iommu_free_pgd(struct domain *d)
   6.157 +{
   6.158 +#if CONFIG_PAGING_LEVELS == 3
   6.159 +    struct hvm_iommu *hd  = domain_hvm_iommu(d);
   6.160 +    int level = agaw_to_level(hd->agaw);
   6.161 +    struct dma_pte *pgd_vaddr = NULL;
   6.162 +
   6.163 +    switch ( level )
   6.164 +    {
   6.165 +    case VTD_PAGE_TABLE_LEVEL_3:
   6.166 +        if ( hd->pgd_maddr != 0 )
   6.167 +        {
   6.168 +            free_pgtable_maddr(hd->pgd_maddr);
   6.169 +            hd->pgd_maddr = 0;
   6.170 +        }
   6.171 +        break;
   6.172 +    case VTD_PAGE_TABLE_LEVEL_4:
   6.173 +        if ( hd->pgd_maddr != 0 )
   6.174 +        {
   6.175 +            pgd_vaddr = (struct dma_pte*)map_vtd_domain_page(hd->pgd_maddr);
   6.176 +            if ( pgd_vaddr[0].val != 0 )
   6.177 +                free_pgtable_maddr(pgd_vaddr[0].val);
   6.178 +            unmap_vtd_domain_page(pgd_vaddr);
   6.179 +            free_pgtable_maddr(hd->pgd_maddr);
   6.180 +            hd->pgd_maddr = 0;
   6.181 +        }
   6.182 +        break;
   6.183 +    default:
   6.184 +        gdprintk(XENLOG_ERR VTDPREFIX,
   6.185 +                 "Unsupported p2m table sharing level!\n");
   6.186 +        break;
   6.187 +    }
   6.188 +#endif
   6.189 +}
   6.190 +
     7.1 --- a/xen/include/xen/hvm/iommu.h	Thu Apr 10 09:20:07 2008 +0100
     7.2 +++ b/xen/include/xen/hvm/iommu.h	Thu Apr 10 09:22:38 2008 +0100
     7.3 @@ -38,7 +38,7 @@ struct g2m_ioport {
     7.4  struct hvm_iommu {
     7.5      spinlock_t iommu_list_lock;    /* protect iommu specific lists */
     7.6      struct list_head pdev_list;    /* direct accessed pci devices */
     7.7 -    struct dma_pte *pgd;           /* io page directory root */
     7.8 +    u64 pgd_maddr;                 /* io page directory machine address */
     7.9      spinlock_t mapping_lock;       /* io page table lock */
    7.10      int agaw;     /* adjusted guest address width, 0 is level 2 30-bit */
    7.11      struct list_head g2m_ioport_list;  /* guest to machine ioport mapping */
     8.1 --- a/xen/include/xen/iommu.h	Thu Apr 10 09:20:07 2008 +0100
     8.2 +++ b/xen/include/xen/iommu.h	Thu Apr 10 09:22:38 2008 +0100
     8.3 @@ -67,7 +67,7 @@ struct iommu {
     8.4      u64	ecap;
     8.5      spinlock_t lock; /* protect context, domain ids */
     8.6      spinlock_t register_lock; /* protect iommu register handling */
     8.7 -    struct root_entry *root_entry; /* virtual address */
     8.8 +    u64 root_maddr; /* root entry machine address */
     8.9      unsigned int vector;
    8.10      struct intel_iommu *intel;
    8.11  };
    8.12 @@ -85,6 +85,7 @@ int iommu_map_page(struct domain *d, uns
    8.13  int iommu_unmap_page(struct domain *d, unsigned long gfn);
    8.14  void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry);
    8.15  void iommu_set_pgd(struct domain *d);
    8.16 +void iommu_free_pgd(struct domain *d);
    8.17  void iommu_domain_teardown(struct domain *d);
    8.18  int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
    8.19  int dpci_ioport_intercept(ioreq_t *p);