ia64/xen-unstable

view xen/arch/x86/hvm/vmx/vtd/intel-iommu.c @ 16830:cc5bb500df5f

vtd: Enable queued invalidation method if such HW support is
detected. Otherwise, register invalidation method is used.

Signed-off-by: Allen Kay <allen.m.kay@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jan 22 09:48:51 2008 +0000 (2008-01-22)
parents f983aa8e4b26
children 74a9bfccddba
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/init.h>
23 #include <xen/irq.h>
24 #include <xen/spinlock.h>
25 #include <xen/sched.h>
26 #include <xen/xmalloc.h>
27 #include <xen/domain_page.h>
28 #include <asm/delay.h>
29 #include <asm/string.h>
30 #include <asm/mm.h>
31 #include <asm/iommu.h>
32 #include <asm/hvm/vmx/intel-iommu.h>
33 #include "dmar.h"
34 #include "pci-direct.h"
35 #include "pci_regs.h"
36 #include "msi.h"
37 #include "extern.h"
39 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
41 static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
42 static int domid_bitmap_size; /* domain id bitmap size in bit */
43 static void *domid_bitmap; /* iommu domain id bitmap */
45 #define DID_FIELD_WIDTH 16
46 #define DID_HIGH_OFFSET 8
47 static void context_set_domain_id(struct context_entry *context,
48 struct domain *d)
49 {
50 unsigned long flags;
51 domid_t iommu_domid = domain_iommu_domid(d);
53 if ( iommu_domid == 0 )
54 {
55 spin_lock_irqsave(&domid_bitmap_lock, flags);
56 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
57 set_bit(iommu_domid, domid_bitmap);
58 spin_unlock_irqrestore(&domid_bitmap_lock, flags);
59 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
60 }
62 context->hi &= (1 << DID_HIGH_OFFSET) - 1;
63 context->hi |= iommu_domid << DID_HIGH_OFFSET;
64 }
66 static void iommu_domid_release(struct domain *d)
67 {
68 domid_t iommu_domid = domain_iommu_domid(d);
70 if ( iommu_domid != 0 )
71 {
72 d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
73 clear_bit(iommu_domid, domid_bitmap);
74 }
75 }
77 unsigned int x86_clflush_size;
78 void clflush_cache_range(void *adr, int size)
79 {
80 int i;
81 for ( i = 0; i < size; i += x86_clflush_size )
82 clflush(adr + i);
83 }
85 static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
86 {
87 if ( !ecap_coherent(iommu->ecap) )
88 clflush_cache_range(addr, size);
89 }
91 #define iommu_flush_cache_entry(iommu, addr) \
92 __iommu_flush_cache(iommu, addr, 8)
93 #define iommu_flush_cache_page(iommu, addr) \
94 __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
96 int nr_iommus;
97 /* context entry handling */
98 static struct context_entry * device_to_context_entry(struct iommu *iommu,
99 u8 bus, u8 devfn)
100 {
101 struct root_entry *root;
102 struct context_entry *context;
103 unsigned long phy_addr;
104 unsigned long flags;
106 spin_lock_irqsave(&iommu->lock, flags);
107 root = &iommu->root_entry[bus];
108 if ( !root_present(*root) )
109 {
110 phy_addr = (unsigned long) alloc_xenheap_page();
111 if ( !phy_addr )
112 {
113 spin_unlock_irqrestore(&iommu->lock, flags);
114 return NULL;
115 }
116 memset((void *) phy_addr, 0, PAGE_SIZE);
117 iommu_flush_cache_page(iommu, (void *)phy_addr);
118 phy_addr = virt_to_maddr((void *)phy_addr);
119 set_root_value(*root, phy_addr);
120 set_root_present(*root);
121 iommu_flush_cache_entry(iommu, root);
122 }
123 phy_addr = (unsigned long) get_context_addr(*root);
124 context = (struct context_entry *)maddr_to_virt(phy_addr);
125 spin_unlock_irqrestore(&iommu->lock, flags);
126 return &context[devfn];
127 }
129 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
130 {
131 struct root_entry *root;
132 struct context_entry *context;
133 unsigned long phy_addr;
134 int ret;
135 unsigned long flags;
137 spin_lock_irqsave(&iommu->lock, flags);
138 root = &iommu->root_entry[bus];
139 if ( !root_present(*root) )
140 {
141 ret = 0;
142 goto out;
143 }
144 phy_addr = get_context_addr(*root);
145 context = (struct context_entry *)maddr_to_virt(phy_addr);
146 ret = context_present(context[devfn]);
147 out:
148 spin_unlock_irqrestore(&iommu->lock, flags);
149 return ret;
150 }
152 static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
153 {
154 struct hvm_iommu *hd = domain_hvm_iommu(domain);
155 struct acpi_drhd_unit *drhd;
156 struct iommu *iommu;
157 int addr_width = agaw_to_width(hd->agaw);
158 struct dma_pte *parent, *pte = NULL, *pgd;
159 int level = agaw_to_level(hd->agaw);
160 int offset;
161 unsigned long flags;
162 struct page_info *pg = NULL;
163 u64 *vaddr = NULL;
165 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
166 iommu = drhd->iommu;
168 addr &= (((u64)1) << addr_width) - 1;
169 spin_lock_irqsave(&hd->mapping_lock, flags);
170 if ( !hd->pgd )
171 {
172 pgd = (struct dma_pte *)alloc_xenheap_page();
173 if ( !pgd )
174 {
175 spin_unlock_irqrestore(&hd->mapping_lock, flags);
176 return NULL;
177 }
178 memset(pgd, 0, PAGE_SIZE);
179 hd->pgd = pgd;
180 }
182 parent = hd->pgd;
183 while ( level > 1 )
184 {
185 offset = address_level_offset(addr, level);
186 pte = &parent[offset];
188 if ( dma_pte_addr(*pte) == 0 )
189 {
190 pg = alloc_domheap_page(NULL);
191 vaddr = map_domain_page(page_to_mfn(pg));
192 if ( !vaddr )
193 {
194 spin_unlock_irqrestore(&hd->mapping_lock, flags);
195 return NULL;
196 }
197 memset(vaddr, 0, PAGE_SIZE);
198 iommu_flush_cache_page(iommu, vaddr);
200 dma_set_pte_addr(*pte, page_to_maddr(pg));
202 /*
203 * high level table always sets r/w, last level
204 * page table control read/write
205 */
206 dma_set_pte_readable(*pte);
207 dma_set_pte_writable(*pte);
208 iommu_flush_cache_entry(iommu, pte);
209 }
210 else
211 {
212 pg = maddr_to_page(pte->val);
213 vaddr = map_domain_page(page_to_mfn(pg));
214 if ( !vaddr )
215 {
216 spin_unlock_irqrestore(&hd->mapping_lock, flags);
217 return NULL;
218 }
219 }
221 if ( parent != hd->pgd )
222 unmap_domain_page(parent);
224 if ( level == 2 && vaddr )
225 {
226 unmap_domain_page(vaddr);
227 break;
228 }
230 parent = (struct dma_pte *)vaddr;
231 vaddr = NULL;
232 level--;
233 }
235 spin_unlock_irqrestore(&hd->mapping_lock, flags);
236 return pg;
237 }
239 /* return address's page at specific level */
240 static struct page_info *dma_addr_level_page(struct domain *domain,
241 u64 addr, int level)
242 {
243 struct hvm_iommu *hd = domain_hvm_iommu(domain);
244 struct dma_pte *parent, *pte = NULL;
245 int total = agaw_to_level(hd->agaw);
246 int offset;
247 struct page_info *pg = NULL;
249 parent = hd->pgd;
250 while ( level <= total )
251 {
252 offset = address_level_offset(addr, total);
253 pte = &parent[offset];
254 if ( dma_pte_addr(*pte) == 0 )
255 {
256 if ( parent != hd->pgd )
257 unmap_domain_page(parent);
258 break;
259 }
261 pg = maddr_to_page(pte->val);
262 if ( parent != hd->pgd )
263 unmap_domain_page(parent);
265 if ( level == total )
266 return pg;
268 parent = map_domain_page(page_to_mfn(pg));
269 total--;
270 }
272 return NULL;
273 }
275 static void iommu_flush_write_buffer(struct iommu *iommu)
276 {
277 u32 val;
278 unsigned long flag;
279 unsigned long start_time;
281 if ( !cap_rwbf(iommu->cap) )
282 return;
283 val = iommu->gcmd | DMA_GCMD_WBF;
285 spin_lock_irqsave(&iommu->register_lock, flag);
286 dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
288 /* Make sure hardware complete it */
289 start_time = jiffies;
290 for ( ; ; )
291 {
292 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
293 if ( !(val & DMA_GSTS_WBFS) )
294 break;
295 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
296 panic("DMAR hardware is malfunctional,"
297 " please disable IOMMU\n");
298 cpu_relax();
299 }
300 spin_unlock_irqrestore(&iommu->register_lock, flag);
301 }
303 /* return value determine if we need a write buffer flush */
304 static int flush_context_reg(
305 void *_iommu,
306 u16 did, u16 source_id, u8 function_mask, u64 type,
307 int non_present_entry_flush)
308 {
309 struct iommu *iommu = (struct iommu *) _iommu;
310 u64 val = 0;
311 unsigned long flag;
312 unsigned long start_time;
314 /*
315 * In the non-present entry flush case, if hardware doesn't cache
316 * non-present entry we do nothing and if hardware cache non-present
317 * entry, we flush entries of domain 0 (the domain id is used to cache
318 * any non-present entries)
319 */
320 if ( non_present_entry_flush )
321 {
322 if ( !cap_caching_mode(iommu->cap) )
323 return 1;
324 else
325 did = 0;
326 }
328 /* use register invalidation */
329 switch ( type )
330 {
331 case DMA_CCMD_GLOBAL_INVL:
332 val = DMA_CCMD_GLOBAL_INVL;
333 break;
334 case DMA_CCMD_DOMAIN_INVL:
335 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
336 break;
337 case DMA_CCMD_DEVICE_INVL:
338 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
339 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
340 break;
341 default:
342 BUG();
343 }
344 val |= DMA_CCMD_ICC;
346 spin_lock_irqsave(&iommu->register_lock, flag);
347 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
349 /* Make sure hardware complete it */
350 start_time = jiffies;
351 for ( ; ; )
352 {
353 val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
354 if ( !(val & DMA_CCMD_ICC) )
355 break;
356 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
357 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
358 cpu_relax();
359 }
360 spin_unlock_irqrestore(&iommu->register_lock, flag);
361 /* flush context entry will implictly flush write buffer */
362 return 0;
363 }
365 static int inline iommu_flush_context_global(
366 struct iommu *iommu, int non_present_entry_flush)
367 {
368 struct iommu_flush *flush = iommu_get_flush(iommu);
369 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
370 non_present_entry_flush);
371 }
373 static int inline iommu_flush_context_domain(
374 struct iommu *iommu, u16 did, int non_present_entry_flush)
375 {
376 struct iommu_flush *flush = iommu_get_flush(iommu);
377 return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
378 non_present_entry_flush);
379 }
381 static int inline iommu_flush_context_device(
382 struct iommu *iommu, u16 did, u16 source_id,
383 u8 function_mask, int non_present_entry_flush)
384 {
385 struct iommu_flush *flush = iommu_get_flush(iommu);
386 return flush->context(iommu, did, source_id, function_mask,
387 DMA_CCMD_DEVICE_INVL,
388 non_present_entry_flush);
389 }
391 /* return value determine if we need a write buffer flush */
392 static int flush_iotlb_reg(void *_iommu, u16 did,
393 u64 addr, unsigned int size_order, u64 type,
394 int non_present_entry_flush)
395 {
396 struct iommu *iommu = (struct iommu *) _iommu;
397 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
398 u64 val = 0, val_iva = 0;
399 unsigned long flag;
400 unsigned long start_time;
402 /*
403 * In the non-present entry flush case, if hardware doesn't cache
404 * non-present entry we do nothing and if hardware cache non-present
405 * entry, we flush entries of domain 0 (the domain id is used to cache
406 * any non-present entries)
407 */
408 if ( non_present_entry_flush )
409 {
410 if ( !cap_caching_mode(iommu->cap) )
411 return 1;
412 else
413 did = 0;
414 }
416 /* use register invalidation */
417 switch ( type )
418 {
419 case DMA_TLB_GLOBAL_FLUSH:
420 /* global flush doesn't need set IVA_REG */
421 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
422 break;
423 case DMA_TLB_DSI_FLUSH:
424 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
425 break;
426 case DMA_TLB_PSI_FLUSH:
427 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
428 /* Note: always flush non-leaf currently */
429 val_iva = size_order | addr;
430 break;
431 default:
432 BUG();
433 }
434 /* Note: set drain read/write */
435 if ( cap_read_drain(iommu->cap) )
436 val |= DMA_TLB_READ_DRAIN;
437 if ( cap_write_drain(iommu->cap) )
438 val |= DMA_TLB_WRITE_DRAIN;
440 spin_lock_irqsave(&iommu->register_lock, flag);
441 /* Note: Only uses first TLB reg currently */
442 if ( val_iva )
443 dmar_writeq(iommu->reg, tlb_offset, val_iva);
444 dmar_writeq(iommu->reg, tlb_offset + 8, val);
446 /* Make sure hardware complete it */
447 start_time = jiffies;
448 for ( ; ; )
449 {
450 val = dmar_readq(iommu->reg, tlb_offset + 8);
451 if ( !(val & DMA_TLB_IVT) )
452 break;
453 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
454 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
455 cpu_relax();
456 }
457 spin_unlock_irqrestore(&iommu->register_lock, flag);
459 /* check IOTLB invalidation granularity */
460 if ( DMA_TLB_IAIG(val) == 0 )
461 printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
462 if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
463 printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
464 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
465 /* flush context entry will implictly flush write buffer */
466 return 0;
467 }
469 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
470 int non_present_entry_flush)
471 {
472 struct iommu_flush *flush = iommu_get_flush(iommu);
473 return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
474 non_present_entry_flush);
475 }
477 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
478 int non_present_entry_flush)
479 {
480 struct iommu_flush *flush = iommu_get_flush(iommu);
481 return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
482 non_present_entry_flush);
483 }
485 static int inline get_alignment(u64 base, unsigned int size)
486 {
487 int t = 0;
488 u64 end;
490 end = base + size - 1;
491 while ( base != end )
492 {
493 t++;
494 base >>= 1;
495 end >>= 1;
496 }
497 return t;
498 }
500 static int inline iommu_flush_iotlb_psi(
501 struct iommu *iommu, u16 did,
502 u64 addr, unsigned int pages, int non_present_entry_flush)
503 {
504 unsigned int align;
505 struct iommu_flush *flush = iommu_get_flush(iommu);
507 BUG_ON(addr & (~PAGE_MASK_4K));
508 BUG_ON(pages == 0);
510 /* Fallback to domain selective flush if no PSI support */
511 if ( !cap_pgsel_inv(iommu->cap) )
512 return iommu_flush_iotlb_dsi(iommu, did,
513 non_present_entry_flush);
515 /*
516 * PSI requires page size is 2 ^ x, and the base address is naturally
517 * aligned to the size
518 */
519 align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
520 /* Fallback to domain selective flush if size is too big */
521 if ( align > cap_max_amask_val(iommu->cap) )
522 return iommu_flush_iotlb_dsi(iommu, did,
523 non_present_entry_flush);
525 addr >>= PAGE_SHIFT_4K + align;
526 addr <<= PAGE_SHIFT_4K + align;
528 return flush->iotlb(iommu, did, addr, align,
529 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
530 }
532 void iommu_flush_all(void)
533 {
534 struct acpi_drhd_unit *drhd;
535 struct iommu *iommu;
537 wbinvd();
538 for_each_drhd_unit ( drhd )
539 {
540 iommu = drhd->iommu;
541 iommu_flush_context_global(iommu, 0);
542 iommu_flush_iotlb_global(iommu, 0);
543 }
544 }
546 /* clear one page's page table */
547 static void dma_pte_clear_one(struct domain *domain, u64 addr)
548 {
549 struct acpi_drhd_unit *drhd;
550 struct iommu *iommu;
551 struct dma_pte *pte = NULL;
552 struct page_info *pg = NULL;
554 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
556 /* get last level pte */
557 pg = dma_addr_level_page(domain, addr, 1);
558 if ( !pg )
559 return;
560 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
561 pte += address_level_offset(addr, 1);
562 if ( pte )
563 {
564 dma_clear_pte(*pte);
565 iommu_flush_cache_entry(drhd->iommu, pte);
567 for_each_drhd_unit ( drhd )
568 {
569 iommu = drhd->iommu;
570 if ( cap_caching_mode(iommu->cap) )
571 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
572 addr, 1, 0);
573 else if (cap_rwbf(iommu->cap))
574 iommu_flush_write_buffer(iommu);
575 }
576 }
577 unmap_domain_page(pte);
578 }
580 /* clear last level pte, a tlb flush should be followed */
581 static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
582 {
583 struct hvm_iommu *hd = domain_hvm_iommu(domain);
584 int addr_width = agaw_to_width(hd->agaw);
586 start &= (((u64)1) << addr_width) - 1;
587 end &= (((u64)1) << addr_width) - 1;
588 /* in case it's partial page */
589 start = PAGE_ALIGN_4K(start);
590 end &= PAGE_MASK_4K;
592 /* we don't need lock here, nobody else touches the iova range */
593 while ( start < end )
594 {
595 dma_pte_clear_one(domain, start);
596 start += PAGE_SIZE_4K;
597 }
598 }
600 /* free page table pages. last level pte should already be cleared */
601 void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
602 {
603 struct acpi_drhd_unit *drhd;
604 struct hvm_iommu *hd = domain_hvm_iommu(domain);
605 struct iommu *iommu;
606 int addr_width = agaw_to_width(hd->agaw);
607 struct dma_pte *pte;
608 int total = agaw_to_level(hd->agaw);
609 int level;
610 u32 tmp;
611 struct page_info *pg = NULL;
613 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
614 iommu = drhd->iommu;
616 start &= (((u64)1) << addr_width) - 1;
617 end &= (((u64)1) << addr_width) - 1;
619 /* we don't need lock here, nobody else touches the iova range */
620 level = 2;
621 while ( level <= total )
622 {
623 tmp = align_to_level(start, level);
624 if ( (tmp >= end) || ((tmp + level_size(level)) > end) )
625 return;
627 while ( tmp < end )
628 {
629 pg = dma_addr_level_page(domain, tmp, level);
630 if ( !pg )
631 return;
632 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
633 pte += address_level_offset(tmp, level);
634 dma_clear_pte(*pte);
635 iommu_flush_cache_entry(iommu, pte);
636 unmap_domain_page(pte);
637 free_domheap_page(pg);
639 tmp += level_size(level);
640 }
641 level++;
642 }
644 /* free pgd */
645 if ( start == 0 && end == ((((u64)1) << addr_width) - 1) )
646 {
647 free_xenheap_page((void *)hd->pgd);
648 hd->pgd = NULL;
649 }
650 }
652 /* iommu handling */
653 static int iommu_set_root_entry(struct iommu *iommu)
654 {
655 void *addr;
656 u32 cmd, sts;
657 struct root_entry *root;
658 unsigned long flags;
660 if ( iommu == NULL )
661 {
662 gdprintk(XENLOG_ERR VTDPREFIX,
663 "iommu_set_root_entry: iommu == NULL\n");
664 return -EINVAL;
665 }
667 if ( unlikely(!iommu->root_entry) )
668 {
669 root = (struct root_entry *)alloc_xenheap_page();
670 if ( root == NULL )
671 return -ENOMEM;
673 memset((u8*)root, 0, PAGE_SIZE);
674 iommu_flush_cache_page(iommu, root);
676 if ( cmpxchg((unsigned long *)&iommu->root_entry,
677 0, (unsigned long)root) != 0 )
678 free_xenheap_page((void *)root);
679 }
681 addr = iommu->root_entry;
683 spin_lock_irqsave(&iommu->register_lock, flags);
685 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
686 cmd = iommu->gcmd | DMA_GCMD_SRTP;
687 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
689 /* Make sure hardware complete it */
690 for ( ; ; )
691 {
692 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
693 if ( sts & DMA_GSTS_RTPS )
694 break;
695 cpu_relax();
696 }
698 spin_unlock_irqrestore(&iommu->register_lock, flags);
700 return 0;
701 }
703 static int iommu_enable_translation(struct iommu *iommu)
704 {
705 u32 sts;
706 unsigned long flags;
708 dprintk(XENLOG_INFO VTDPREFIX,
709 "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
710 spin_lock_irqsave(&iommu->register_lock, flags);
711 iommu->gcmd |= DMA_GCMD_TE;
712 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
713 /* Make sure hardware complete it */
714 for ( ; ; )
715 {
716 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
717 if ( sts & DMA_GSTS_TES )
718 break;
719 cpu_relax();
720 }
722 /* Disable PMRs when VT-d engine takes effect per spec definition */
723 disable_pmr(iommu);
724 spin_unlock_irqrestore(&iommu->register_lock, flags);
725 return 0;
726 }
728 int iommu_disable_translation(struct iommu *iommu)
729 {
730 u32 sts;
731 unsigned long flags;
733 spin_lock_irqsave(&iommu->register_lock, flags);
734 iommu->gcmd &= ~ DMA_GCMD_TE;
735 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
737 /* Make sure hardware complete it */
738 for ( ; ; )
739 {
740 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
741 if ( !(sts & DMA_GSTS_TES) )
742 break;
743 cpu_relax();
744 }
745 spin_unlock_irqrestore(&iommu->register_lock, flags);
746 return 0;
747 }
749 static struct iommu *vector_to_iommu[NR_VECTORS];
750 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
751 u8 fault_reason, u16 source_id, u32 addr)
752 {
753 dprintk(XENLOG_WARNING VTDPREFIX,
754 "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
755 (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
756 PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
757 fault_reason, iommu->reg);
759 if (fault_reason < 0x20)
760 print_vtd_entries(current->domain, iommu, (source_id >> 8),
761 (source_id & 0xff), (addr >> PAGE_SHIFT));
763 return 0;
764 }
766 static void iommu_fault_status(u32 fault_status)
767 {
768 if (fault_status & DMA_FSTS_PFO)
769 dprintk(XENLOG_ERR VTDPREFIX,
770 "iommu_fault_status: Fault Overflow\n");
771 else
772 if (fault_status & DMA_FSTS_PPF)
773 dprintk(XENLOG_ERR VTDPREFIX,
774 "iommu_fault_status: Primary Pending Fault\n");
775 else
776 if (fault_status & DMA_FSTS_AFO)
777 dprintk(XENLOG_ERR VTDPREFIX,
778 "iommu_fault_status: Advanced Fault Overflow\n");
779 else
780 if (fault_status & DMA_FSTS_APF)
781 dprintk(XENLOG_ERR VTDPREFIX,
782 "iommu_fault_status: Advanced Pending Fault\n");
783 else
784 if (fault_status & DMA_FSTS_IQE)
785 dprintk(XENLOG_ERR VTDPREFIX,
786 "iommu_fault_status: Invalidation Queue Error\n");
787 else
788 if (fault_status & DMA_FSTS_ICE)
789 dprintk(XENLOG_ERR VTDPREFIX,
790 "iommu_fault_status: Invalidation Completion Error\n");
791 else
792 if (fault_status & DMA_FSTS_ITE)
793 dprintk(XENLOG_ERR VTDPREFIX,
794 "iommu_fault_status: Invalidation Time-out Error\n");
795 }
797 #define PRIMARY_FAULT_REG_LEN (16)
798 static void iommu_page_fault(int vector, void *dev_id,
799 struct cpu_user_regs *regs)
800 {
801 struct iommu *iommu = dev_id;
802 int reg, fault_index;
803 u32 fault_status;
804 unsigned long flags;
806 dprintk(XENLOG_WARNING VTDPREFIX,
807 "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
809 spin_lock_irqsave(&iommu->register_lock, flags);
810 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
811 spin_unlock_irqrestore(&iommu->register_lock, flags);
813 iommu_fault_status(fault_status);
815 /* FIXME: ignore advanced fault log */
816 if ( !(fault_status & DMA_FSTS_PPF) )
817 return;
818 fault_index = dma_fsts_fault_record_index(fault_status);
819 reg = cap_fault_reg_offset(iommu->cap);
820 for ( ; ; )
821 {
822 u8 fault_reason;
823 u16 source_id;
824 u32 guest_addr, data;
825 int type;
827 /* highest 32 bits */
828 spin_lock_irqsave(&iommu->register_lock, flags);
829 data = dmar_readl(iommu->reg, reg +
830 fault_index * PRIMARY_FAULT_REG_LEN + 12);
831 if ( !(data & DMA_FRCD_F) )
832 {
833 spin_unlock_irqrestore(&iommu->register_lock, flags);
834 break;
835 }
837 fault_reason = dma_frcd_fault_reason(data);
838 type = dma_frcd_type(data);
840 data = dmar_readl(iommu->reg, reg +
841 fault_index * PRIMARY_FAULT_REG_LEN + 8);
842 source_id = dma_frcd_source_id(data);
844 guest_addr = dmar_readq(iommu->reg, reg +
845 fault_index * PRIMARY_FAULT_REG_LEN);
846 guest_addr = dma_frcd_page_addr(guest_addr);
847 /* clear the fault */
848 dmar_writel(iommu->reg, reg +
849 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
850 spin_unlock_irqrestore(&iommu->register_lock, flags);
852 iommu_page_fault_do_one(iommu, type, fault_reason,
853 source_id, guest_addr);
855 fault_index++;
856 if ( fault_index > cap_num_fault_regs(iommu->cap) )
857 fault_index = 0;
858 }
860 /* clear primary fault overflow */
861 if ( fault_status & DMA_FSTS_PFO )
862 {
863 spin_lock_irqsave(&iommu->register_lock, flags);
864 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
865 spin_unlock_irqrestore(&iommu->register_lock, flags);
866 }
867 }
869 static void dma_msi_unmask(unsigned int vector)
870 {
871 struct iommu *iommu = vector_to_iommu[vector];
872 unsigned long flags;
874 /* unmask it */
875 spin_lock_irqsave(&iommu->register_lock, flags);
876 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
877 spin_unlock_irqrestore(&iommu->register_lock, flags);
878 }
880 static void dma_msi_mask(unsigned int vector)
881 {
882 unsigned long flags;
883 struct iommu *iommu = vector_to_iommu[vector];
885 /* mask it */
886 spin_lock_irqsave(&iommu->register_lock, flags);
887 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
888 spin_unlock_irqrestore(&iommu->register_lock, flags);
889 }
891 static unsigned int dma_msi_startup(unsigned int vector)
892 {
893 dma_msi_unmask(vector);
894 return 0;
895 }
897 static void dma_msi_end(unsigned int vector)
898 {
899 dma_msi_unmask(vector);
900 ack_APIC_irq();
901 }
903 static void dma_msi_data_init(struct iommu *iommu, int vector)
904 {
905 u32 msi_data = 0;
906 unsigned long flags;
908 /* Fixed, edge, assert mode. Follow MSI setting */
909 msi_data |= vector & 0xff;
910 msi_data |= 1 << 14;
912 spin_lock_irqsave(&iommu->register_lock, flags);
913 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
914 spin_unlock_irqrestore(&iommu->register_lock, flags);
915 }
917 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
918 {
919 u64 msi_address;
920 unsigned long flags;
922 /* Physical, dedicated cpu. Follow MSI setting */
923 msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
924 msi_address |= MSI_PHYSICAL_MODE << 2;
925 msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
926 msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
928 spin_lock_irqsave(&iommu->register_lock, flags);
929 dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
930 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
931 spin_unlock_irqrestore(&iommu->register_lock, flags);
932 }
934 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
935 {
936 struct iommu *iommu = vector_to_iommu[vector];
937 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
938 }
940 static struct hw_interrupt_type dma_msi_type = {
941 .typename = "DMA_MSI",
942 .startup = dma_msi_startup,
943 .shutdown = dma_msi_mask,
944 .enable = dma_msi_unmask,
945 .disable = dma_msi_mask,
946 .ack = dma_msi_mask,
947 .end = dma_msi_end,
948 .set_affinity = dma_msi_set_affinity,
949 };
951 int iommu_set_interrupt(struct iommu *iommu)
952 {
953 int vector, ret;
955 vector = assign_irq_vector(AUTO_ASSIGN);
956 vector_to_iommu[vector] = iommu;
958 /* VT-d fault is a MSI, make irq == vector */
959 irq_vector[vector] = vector;
960 vector_irq[vector] = vector;
962 if ( !vector )
963 {
964 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
965 return -EINVAL;
966 }
968 irq_desc[vector].handler = &dma_msi_type;
969 ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
970 if ( ret )
971 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
972 return vector;
973 }
975 struct iommu *iommu_alloc(void *hw_data)
976 {
977 struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
978 struct iommu *iommu;
979 struct qi_ctrl *qi_ctrl;
980 struct ir_ctrl *ir_ctrl;
982 if ( nr_iommus > MAX_IOMMUS )
983 {
984 gdprintk(XENLOG_ERR VTDPREFIX,
985 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
986 return NULL;
987 }
989 iommu = xmalloc(struct iommu);
990 if ( !iommu )
991 return NULL;
992 memset(iommu, 0, sizeof(struct iommu));
994 set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
995 iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
997 printk("iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
998 iommu->reg, drhd->address);
1000 nr_iommus++;
1002 if ( !iommu->reg )
1004 printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
1005 goto error;
1008 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
1009 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
1011 printk("iommu_alloc: cap = %"PRIx64"\n",iommu->cap);
1012 printk("iommu_alloc: ecap = %"PRIx64"\n", iommu->ecap);
1014 spin_lock_init(&iommu->lock);
1015 spin_lock_init(&iommu->register_lock);
1017 qi_ctrl = iommu_qi_ctrl(iommu);
1018 spin_lock_init(&qi_ctrl->qinval_lock);
1019 spin_lock_init(&qi_ctrl->qinval_poll_lock);
1021 ir_ctrl = iommu_ir_ctrl(iommu);
1022 spin_lock_init(&ir_ctrl->iremap_lock);
1024 drhd->iommu = iommu;
1025 return iommu;
1026 error:
1027 xfree(iommu);
1028 return NULL;
1031 static void free_iommu(struct iommu *iommu)
1033 if ( !iommu )
1034 return;
1035 if ( iommu->root_entry )
1036 free_xenheap_page((void *)iommu->root_entry);
1037 if ( iommu->reg )
1038 iounmap(iommu->reg);
1039 free_irq(iommu->vector);
1040 xfree(iommu);
1043 #define guestwidth_to_adjustwidth(gaw) ({ \
1044 int agaw, r = (gaw - 12) % 9; \
1045 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
1046 if ( agaw > 64 ) \
1047 agaw = 64; \
1048 agaw; })
1050 int iommu_domain_init(struct domain *domain)
1052 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1053 struct iommu *iommu = NULL;
1054 int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
1055 int adjust_width, agaw;
1056 unsigned long sagaw;
1057 struct acpi_drhd_unit *drhd;
1059 spin_lock_init(&hd->mapping_lock);
1060 spin_lock_init(&hd->iommu_list_lock);
1061 INIT_LIST_HEAD(&hd->pdev_list);
1062 INIT_LIST_HEAD(&hd->g2m_ioport_list);
1064 if ( !vtd_enabled || list_empty(&acpi_drhd_units) )
1065 return 0;
1067 for_each_drhd_unit ( drhd )
1068 iommu = drhd->iommu ? : iommu_alloc(drhd);
1070 /* calculate AGAW */
1071 if (guest_width > cap_mgaw(iommu->cap))
1072 guest_width = cap_mgaw(iommu->cap);
1073 adjust_width = guestwidth_to_adjustwidth(guest_width);
1074 agaw = width_to_agaw(adjust_width);
1075 /* FIXME: hardware doesn't support it, choose a bigger one? */
1076 sagaw = cap_sagaw(iommu->cap);
1077 if ( !test_bit(agaw, &sagaw) )
1079 gdprintk(XENLOG_ERR VTDPREFIX,
1080 "IOMMU: hardware doesn't support the agaw\n");
1081 agaw = find_next_bit(&sagaw, 5, agaw);
1082 if ( agaw >= 5 )
1083 return -ENODEV;
1085 hd->agaw = agaw;
1086 return 0;
1089 static int domain_context_mapping_one(
1090 struct domain *domain,
1091 struct iommu *iommu,
1092 u8 bus, u8 devfn)
1094 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1095 struct context_entry *context;
1096 unsigned long flags;
1097 int ret = 0;
1099 context = device_to_context_entry(iommu, bus, devfn);
1100 if ( !context )
1102 gdprintk(XENLOG_ERR VTDPREFIX,
1103 "domain_context_mapping_one:context == NULL:"
1104 "bdf = %x:%x:%x\n",
1105 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1106 return -ENOMEM;
1109 if ( context_present(*context) )
1111 gdprintk(XENLOG_WARNING VTDPREFIX,
1112 "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
1113 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1114 return 0;
1117 spin_lock_irqsave(&iommu->lock, flags);
1118 /*
1119 * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
1120 * be 1 based as required by intel's iommu hw.
1121 */
1122 context_set_domain_id(context, domain);
1123 context_set_address_width(*context, hd->agaw);
1125 if ( ecap_pass_thru(iommu->ecap) )
1126 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1127 #ifdef CONTEXT_PASSTHRU
1128 else
1130 #endif
1131 if ( !hd->pgd )
1133 struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
1134 if ( !pgd )
1136 spin_unlock_irqrestore(&hd->mapping_lock, flags);
1137 return -ENOMEM;
1139 memset(pgd, 0, PAGE_SIZE);
1140 hd->pgd = pgd;
1143 context_set_address_root(*context, virt_to_maddr(hd->pgd));
1144 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1145 #ifdef CONTEXT_PASSTHRU
1147 #endif
1149 context_set_fault_enable(*context);
1150 context_set_present(*context);
1151 iommu_flush_cache_entry(iommu, context);
1153 gdprintk(XENLOG_INFO VTDPREFIX,
1154 "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
1155 " hd->pgd=%p\n",
1156 bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1157 context->hi, context->lo, hd->pgd);
1159 if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
1160 (((u16)bus) << 8) | devfn,
1161 DMA_CCMD_MASK_NOBIT, 1) )
1162 iommu_flush_write_buffer(iommu);
1163 else
1164 iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
1165 spin_unlock_irqrestore(&iommu->lock, flags);
1166 return ret;
1169 static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
1171 u8 id;
1172 int ttl = 48;
1174 while ( ttl-- )
1176 pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
1177 if ( pos < 0x40 )
1178 break;
1180 pos &= ~3;
1181 id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1182 pos + PCI_CAP_LIST_ID);
1184 if ( id == 0xff )
1185 break;
1186 if ( id == cap )
1187 return pos;
1189 pos += PCI_CAP_LIST_NEXT;
1191 return 0;
1194 #define PCI_BASE_CLASS_BRIDGE 0x06
1195 #define PCI_CLASS_BRIDGE_PCI 0x0604
1197 #define DEV_TYPE_PCIe_ENDPOINT 1
1198 #define DEV_TYPE_PCI_BRIDGE 2
1199 #define DEV_TYPE_PCI 3
1201 int pdev_type(struct pci_dev *dev)
1203 u16 class_device;
1204 u16 status;
1206 class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
1207 PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
1208 if ( class_device == PCI_CLASS_BRIDGE_PCI )
1209 return DEV_TYPE_PCI_BRIDGE;
1211 status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
1212 PCI_FUNC(dev->devfn), PCI_STATUS);
1214 if ( !(status & PCI_STATUS_CAP_LIST) )
1215 return DEV_TYPE_PCI;
1217 if ( __pci_find_next_cap(dev->bus, dev->devfn,
1218 PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) )
1219 return DEV_TYPE_PCIe_ENDPOINT;
1221 return DEV_TYPE_PCI;
1224 #define MAX_BUSES 256
1225 struct pci_dev bus2bridge[MAX_BUSES];
1227 static int domain_context_mapping(
1228 struct domain *domain,
1229 struct iommu *iommu,
1230 struct pci_dev *pdev)
1232 int ret = 0;
1233 int dev, func, sec_bus, sub_bus;
1234 u32 type;
1236 type = pdev_type(pdev);
1237 switch ( type )
1239 case DEV_TYPE_PCI_BRIDGE:
1240 sec_bus = read_pci_config_byte(
1241 pdev->bus, PCI_SLOT(pdev->devfn),
1242 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1244 if ( bus2bridge[sec_bus].bus == 0 )
1246 bus2bridge[sec_bus].bus = pdev->bus;
1247 bus2bridge[sec_bus].devfn = pdev->devfn;
1250 sub_bus = read_pci_config_byte(
1251 pdev->bus, PCI_SLOT(pdev->devfn),
1252 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1254 if ( sec_bus != sub_bus )
1255 gdprintk(XENLOG_WARNING VTDPREFIX,
1256 "context_context_mapping: nested PCI bridge not "
1257 "supported: bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
1258 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1259 sec_bus, sub_bus);
1260 break;
1261 case DEV_TYPE_PCIe_ENDPOINT:
1262 gdprintk(XENLOG_INFO VTDPREFIX,
1263 "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
1264 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1265 ret = domain_context_mapping_one(domain, iommu,
1266 (u8)(pdev->bus), (u8)(pdev->devfn));
1267 break;
1268 case DEV_TYPE_PCI:
1269 gdprintk(XENLOG_INFO VTDPREFIX,
1270 "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
1271 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1273 if ( pdev->bus == 0 )
1274 ret = domain_context_mapping_one(
1275 domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn));
1276 else
1278 if ( bus2bridge[pdev->bus].bus != 0 )
1279 gdprintk(XENLOG_WARNING VTDPREFIX,
1280 "domain_context_mapping:bus2bridge"
1281 "[%d].bus != 0\n", pdev->bus);
1283 ret = domain_context_mapping_one(
1284 domain, iommu,
1285 (u8)(bus2bridge[pdev->bus].bus),
1286 (u8)(bus2bridge[pdev->bus].devfn));
1288 /* now map everything behind the PCI bridge */
1289 for ( dev = 0; dev < 32; dev++ )
1291 for ( func = 0; func < 8; func++ )
1293 ret = domain_context_mapping_one(
1294 domain, iommu,
1295 pdev->bus, (u8)PCI_DEVFN(dev, func));
1296 if ( ret )
1297 return ret;
1301 break;
1302 default:
1303 gdprintk(XENLOG_ERR VTDPREFIX,
1304 "domain_context_mapping:unknown type : bdf = %x:%x:%x\n",
1305 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1306 ret = -EINVAL;
1307 break;
1310 return ret;
1313 static int domain_context_unmap_one(
1314 struct domain *domain,
1315 struct iommu *iommu,
1316 u8 bus, u8 devfn)
1318 struct context_entry *context;
1319 unsigned long flags;
1321 context = device_to_context_entry(iommu, bus, devfn);
1322 if ( !context )
1324 gdprintk(XENLOG_ERR VTDPREFIX,
1325 "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
1326 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1327 return -ENOMEM;
1330 if ( !context_present(*context) )
1332 gdprintk(XENLOG_WARNING VTDPREFIX,
1333 "domain_context_unmap_one-%x:%x:%x- "
1334 "context NOT present:return\n",
1335 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1336 return 0;
1339 gdprintk(XENLOG_INFO VTDPREFIX,
1340 "domain_context_unmap_one: bdf = %x:%x:%x\n",
1341 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1343 spin_lock_irqsave(&iommu->lock, flags);
1344 context_clear_present(*context);
1345 context_clear_entry(*context);
1346 iommu_flush_cache_entry(iommu, context);
1347 iommu_flush_context_global(iommu, 0);
1348 iommu_flush_iotlb_global(iommu, 0);
1349 spin_unlock_irqrestore(&iommu->lock, flags);
1351 return 0;
1354 static int domain_context_unmap(
1355 struct domain *domain,
1356 struct iommu *iommu,
1357 struct pci_dev *pdev)
1359 int ret = 0;
1360 int dev, func, sec_bus, sub_bus;
1361 u32 type;
1363 type = pdev_type(pdev);
1364 switch ( type )
1366 case DEV_TYPE_PCI_BRIDGE:
1367 sec_bus = read_pci_config_byte(
1368 pdev->bus, PCI_SLOT(pdev->devfn),
1369 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1370 sub_bus = read_pci_config_byte(
1371 pdev->bus, PCI_SLOT(pdev->devfn),
1372 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1374 gdprintk(XENLOG_INFO VTDPREFIX,
1375 "domain_context_unmap:BRIDGE:%x:%x:%x "
1376 "sec_bus=%x sub_bus=%x\n",
1377 pdev->bus, PCI_SLOT(pdev->devfn),
1378 PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
1379 break;
1380 case DEV_TYPE_PCIe_ENDPOINT:
1381 gdprintk(XENLOG_INFO VTDPREFIX,
1382 "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
1383 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1384 ret = domain_context_unmap_one(domain, iommu,
1385 (u8)(pdev->bus), (u8)(pdev->devfn));
1386 break;
1387 case DEV_TYPE_PCI:
1388 gdprintk(XENLOG_INFO VTDPREFIX,
1389 "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
1390 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1391 if ( pdev->bus == 0 )
1392 ret = domain_context_unmap_one(
1393 domain, iommu,
1394 (u8)(pdev->bus), (u8)(pdev->devfn));
1395 else
1397 if ( bus2bridge[pdev->bus].bus != 0 )
1398 gdprintk(XENLOG_WARNING VTDPREFIX,
1399 "domain_context_unmap:"
1400 "bus2bridge[%d].bus != 0\n", pdev->bus);
1402 ret = domain_context_unmap_one(domain, iommu,
1403 (u8)(bus2bridge[pdev->bus].bus),
1404 (u8)(bus2bridge[pdev->bus].devfn));
1406 /* Unmap everything behind the PCI bridge */
1407 for ( dev = 0; dev < 32; dev++ )
1409 for ( func = 0; func < 8; func++ )
1411 ret = domain_context_unmap_one(
1412 domain, iommu,
1413 pdev->bus, (u8)PCI_DEVFN(dev, func));
1414 if ( ret )
1415 return ret;
1419 break;
1420 default:
1421 gdprintk(XENLOG_ERR VTDPREFIX,
1422 "domain_context_unmap:unknown type: bdf = %x:%x:%x\n",
1423 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1424 ret = -EINVAL;
1425 break;
1428 return ret;
1431 void reassign_device_ownership(
1432 struct domain *source,
1433 struct domain *target,
1434 u8 bus, u8 devfn)
1436 struct hvm_iommu *source_hd = domain_hvm_iommu(source);
1437 struct hvm_iommu *target_hd = domain_hvm_iommu(target);
1438 struct pci_dev *pdev;
1439 struct acpi_drhd_unit *drhd;
1440 struct iommu *iommu;
1441 int status;
1442 unsigned long flags;
1444 gdprintk(XENLOG_INFO VTDPREFIX,
1445 "reassign_device-%x:%x:%x- source = %d target = %d\n",
1446 bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1447 source->domain_id, target->domain_id);
1449 for_each_pdev( source, pdev )
1451 if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
1452 continue;
1454 drhd = acpi_find_matched_drhd_unit(pdev);
1455 iommu = drhd->iommu;
1456 domain_context_unmap(source, iommu, pdev);
1458 /* Move pci device from the source domain to target domain. */
1459 spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
1460 spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
1461 list_move(&pdev->list, &target_hd->pdev_list);
1462 spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
1463 spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
1465 status = domain_context_mapping(target, iommu, pdev);
1466 if ( status != 0 )
1467 gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
1469 break;
1473 void return_devices_to_dom0(struct domain *d)
1475 struct hvm_iommu *hd = domain_hvm_iommu(d);
1476 struct pci_dev *pdev;
1478 while ( !list_empty(&hd->pdev_list) )
1480 pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
1481 dprintk(XENLOG_INFO VTDPREFIX,
1482 "return_devices_to_dom0: bdf = %x:%x:%x\n",
1483 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1484 reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
1487 #ifdef VTD_DEBUG
1488 for_each_pdev ( dom0, pdev )
1489 dprintk(XENLOG_INFO VTDPREFIX,
1490 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
1491 dom0->domain_id, pdev->bus,
1492 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1493 #endif
1496 void iommu_domain_teardown(struct domain *d)
1498 if ( list_empty(&acpi_drhd_units) )
1499 return;
1501 iommu_domid_release(d);
1503 #if CONFIG_PAGING_LEVELS == 3
1505 struct hvm_iommu *hd = domain_hvm_iommu(d);
1506 int level = agaw_to_level(hd->agaw);
1507 struct dma_pte *pgd = NULL;
1509 switch ( level )
1511 case VTD_PAGE_TABLE_LEVEL_3:
1512 if ( hd->pgd )
1513 free_xenheap_page((void *)hd->pgd);
1514 break;
1515 case VTD_PAGE_TABLE_LEVEL_4:
1516 if ( hd->pgd )
1518 pgd = hd->pgd;
1519 if ( pgd[0].val != 0 )
1520 free_xenheap_page((void*)maddr_to_virt(
1521 dma_pte_addr(pgd[0])));
1522 free_xenheap_page((void *)hd->pgd);
1524 break;
1525 default:
1526 gdprintk(XENLOG_ERR VTDPREFIX,
1527 "Unsupported p2m table sharing level!\n");
1528 break;
1531 #endif
1532 return_devices_to_dom0(d);
1535 static int domain_context_mapped(struct pci_dev *pdev)
1537 struct acpi_drhd_unit *drhd;
1538 struct iommu *iommu;
1539 int ret;
1541 for_each_drhd_unit ( drhd )
1543 iommu = drhd->iommu;
1544 ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
1545 if ( ret )
1546 return ret;
1549 return 0;
1552 int iommu_map_page(struct domain *d, paddr_t gfn, paddr_t mfn)
1554 struct acpi_drhd_unit *drhd;
1555 struct iommu *iommu;
1556 struct dma_pte *pte = NULL;
1557 struct page_info *pg = NULL;
1559 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1560 iommu = drhd->iommu;
1562 #ifdef CONTEXT_PASSTHRU
1563 /* do nothing if dom0 and iommu supports pass thru */
1564 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1565 return 0;
1566 #endif
1568 pg = addr_to_dma_page(d, gfn << PAGE_SHIFT_4K);
1569 if ( !pg )
1570 return -ENOMEM;
1571 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
1572 pte += gfn & LEVEL_MASK;
1573 dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
1574 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
1575 iommu_flush_cache_entry(iommu, pte);
1576 unmap_domain_page(pte);
1578 for_each_drhd_unit ( drhd )
1580 iommu = drhd->iommu;
1581 if ( cap_caching_mode(iommu->cap) )
1582 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1583 gfn << PAGE_SHIFT_4K, 1, 0);
1584 else if ( cap_rwbf(iommu->cap) )
1585 iommu_flush_write_buffer(iommu);
1588 return 0;
1591 int iommu_unmap_page(struct domain *d, dma_addr_t gfn)
1593 struct acpi_drhd_unit *drhd;
1594 struct iommu *iommu;
1596 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1597 iommu = drhd->iommu;
1599 #ifdef CONTEXT_PASSTHRU
1600 /* do nothing if dom0 and iommu supports pass thru */
1601 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1602 return 0;
1603 #endif
1605 dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
1607 return 0;
1610 int iommu_page_mapping(struct domain *domain, dma_addr_t iova,
1611 void *hpa, size_t size, int prot)
1613 struct acpi_drhd_unit *drhd;
1614 struct iommu *iommu;
1615 unsigned long start_pfn, end_pfn;
1616 struct dma_pte *pte = NULL;
1617 int index;
1618 struct page_info *pg = NULL;
1620 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1621 iommu = drhd->iommu;
1622 if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
1623 return -EINVAL;
1624 iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
1625 start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
1626 end_pfn = (unsigned long)
1627 ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
1628 index = 0;
1629 while ( start_pfn < end_pfn )
1631 pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
1632 if ( !pg )
1633 return -ENOMEM;
1634 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
1635 pte += start_pfn & LEVEL_MASK;
1636 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1637 dma_set_pte_prot(*pte, prot);
1638 iommu_flush_cache_entry(iommu, pte);
1639 unmap_domain_page(pte);
1640 start_pfn++;
1641 index++;
1644 for_each_drhd_unit ( drhd )
1646 iommu = drhd->iommu;
1647 if ( cap_caching_mode(iommu->cap) )
1648 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
1649 iova, index, 0);
1650 else if ( cap_rwbf(iommu->cap) )
1651 iommu_flush_write_buffer(iommu);
1654 return 0;
1657 int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size)
1659 dma_pte_clear_range(domain, addr, addr + size);
1661 return 0;
1664 void iommu_flush(struct domain *d, dma_addr_t gfn, u64 *p2m_entry)
1666 struct acpi_drhd_unit *drhd;
1667 struct iommu *iommu = NULL;
1668 struct dma_pte *pte = (struct dma_pte *) p2m_entry;
1670 for_each_drhd_unit ( drhd )
1672 iommu = drhd->iommu;
1673 if ( cap_caching_mode(iommu->cap) )
1674 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1675 gfn << PAGE_SHIFT_4K, 1, 0);
1676 else if ( cap_rwbf(iommu->cap) )
1677 iommu_flush_write_buffer(iommu);
1680 iommu_flush_cache_entry(iommu, pte);
1683 static int iommu_prepare_rmrr_dev(
1684 struct domain *d,
1685 struct acpi_rmrr_unit *rmrr,
1686 struct pci_dev *pdev)
1688 struct acpi_drhd_unit *drhd;
1689 unsigned long size;
1690 int ret;
1692 /* page table init */
1693 size = rmrr->end_address - rmrr->base_address + 1;
1694 ret = iommu_page_mapping(d, rmrr->base_address,
1695 (void *)rmrr->base_address, size,
1696 DMA_PTE_READ|DMA_PTE_WRITE);
1697 if ( ret )
1698 return ret;
1700 if ( domain_context_mapped(pdev) == 0 )
1702 drhd = acpi_find_matched_drhd_unit(pdev);
1703 ret = domain_context_mapping(d, drhd->iommu, pdev);
1704 if ( !ret )
1705 return 0;
1708 return ret;
1711 void __init setup_dom0_devices(void)
1713 struct hvm_iommu *hd = domain_hvm_iommu(dom0);
1714 struct acpi_drhd_unit *drhd;
1715 struct pci_dev *pdev;
1716 int bus, dev, func, ret;
1717 u32 l;
1719 #ifdef DEBUG_VTD_CONTEXT_ENTRY
1720 for ( bus = 0; bus < 256; bus++ )
1722 for ( dev = 0; dev < 32; dev++ )
1724 for ( func = 0; func < 8; func++ )
1726 struct context_entry *context;
1727 struct pci_dev device;
1729 device.bus = bus;
1730 device.devfn = PCI_DEVFN(dev, func);
1731 drhd = acpi_find_matched_drhd_unit(&device);
1732 context = device_to_context_entry(drhd->iommu,
1733 bus, PCI_DEVFN(dev, func));
1734 if ( (context->lo != 0) || (context->hi != 0) )
1735 dprintk(XENLOG_INFO VTDPREFIX,
1736 "setup_dom0_devices-%x:%x:%x- context not 0\n",
1737 bus, dev, func);
1741 #endif
1743 for ( bus = 0; bus < 256; bus++ )
1745 for ( dev = 0; dev < 32; dev++ )
1747 for ( func = 0; func < 8; func++ )
1749 l = read_pci_config(bus, dev, func, PCI_VENDOR_ID);
1750 /* some broken boards return 0 or ~0 if a slot is empty: */
1751 if ( (l == 0xffffffff) || (l == 0x00000000) ||
1752 (l == 0x0000ffff) || (l == 0xffff0000) )
1753 continue;
1754 pdev = xmalloc(struct pci_dev);
1755 pdev->bus = bus;
1756 pdev->devfn = PCI_DEVFN(dev, func);
1757 list_add_tail(&pdev->list, &hd->pdev_list);
1759 drhd = acpi_find_matched_drhd_unit(pdev);
1760 ret = domain_context_mapping(dom0, drhd->iommu, pdev);
1761 if ( ret != 0 )
1762 gdprintk(XENLOG_ERR VTDPREFIX,
1763 "domain_context_mapping failed\n");
1768 for_each_pdev ( dom0, pdev )
1769 dprintk(XENLOG_INFO VTDPREFIX,
1770 "setup_dom0_devices: bdf = %x:%x:%x\n",
1771 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1774 void clear_fault_bits(struct iommu *iommu)
1776 u64 val;
1778 val = dmar_readq(
1779 iommu->reg,
1780 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
1781 dmar_writeq(
1782 iommu->reg,
1783 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
1784 val);
1785 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
1788 static int init_vtd_hw(void)
1790 struct acpi_drhd_unit *drhd;
1791 struct iommu *iommu;
1792 struct iommu_flush *flush = NULL;
1793 int vector;
1794 int ret;
1796 for_each_drhd_unit ( drhd )
1798 iommu = drhd->iommu;
1799 ret = iommu_set_root_entry(iommu);
1800 if ( ret )
1802 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
1803 return -EIO;
1806 vector = iommu_set_interrupt(iommu);
1807 dma_msi_data_init(iommu, vector);
1808 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
1809 iommu->vector = vector;
1810 clear_fault_bits(iommu);
1811 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
1813 /* initialize flush functions */
1814 flush = iommu_get_flush(iommu);
1815 flush->context = flush_context_reg;
1816 flush->iotlb = flush_iotlb_reg;
1818 if ( qinval_setup(iommu) != 0);
1819 dprintk(XENLOG_ERR VTDPREFIX,
1820 "Queued Invalidation hardware not found\n");
1822 return 0;
1825 static int enable_vtd_translation(void)
1827 struct acpi_drhd_unit *drhd;
1828 struct iommu *iommu;
1830 for_each_drhd_unit ( drhd )
1832 iommu = drhd->iommu;
1833 if ( iommu_enable_translation(iommu) )
1834 return -EIO;
1836 return 0;
1839 static void setup_dom0_rmrr(void)
1841 struct acpi_rmrr_unit *rmrr;
1842 struct pci_dev *pdev;
1843 int ret;
1845 for_each_rmrr_device ( rmrr, pdev )
1846 ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
1847 if ( ret )
1848 gdprintk(XENLOG_ERR VTDPREFIX,
1849 "IOMMU: mapping reserved region failed\n");
1850 end_for_each_rmrr_device ( rmrr, pdev )
1853 int iommu_setup(void)
1855 struct hvm_iommu *hd = domain_hvm_iommu(dom0);
1856 struct acpi_drhd_unit *drhd;
1857 struct iommu *iommu;
1858 unsigned long i;
1860 if ( !vtd_enabled )
1861 return 0;
1863 spin_lock_init(&domid_bitmap_lock);
1864 INIT_LIST_HEAD(&hd->pdev_list);
1866 /* setup clflush size */
1867 x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
1869 /* Allocate IO page directory page for the domain. */
1870 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1871 iommu = drhd->iommu;
1873 /* Allocate domain id bitmap, and set bit 0 as reserved */
1874 domid_bitmap_size = cap_ndoms(iommu->cap);
1875 domid_bitmap = xmalloc_bytes(domid_bitmap_size / 8);
1876 if ( domid_bitmap == NULL )
1877 goto error;
1878 memset(domid_bitmap, 0, domid_bitmap_size / 8);
1879 set_bit(0, domid_bitmap);
1881 /* setup 1:1 page table for dom0 */
1882 for ( i = 0; i < max_page; i++ )
1883 iommu_map_page(dom0, i, i);
1885 enable_vtd_translation();
1886 if ( init_vtd_hw() )
1887 goto error;
1888 setup_dom0_devices();
1889 setup_dom0_rmrr();
1890 iommu_flush_all();
1892 return 0;
1894 error:
1895 printk("iommu_setup() failed\n");
1896 for_each_drhd_unit ( drhd )
1898 iommu = drhd->iommu;
1899 free_iommu(iommu);
1901 return -EIO;
1904 /*
1905 * If the device isn't owned by dom0, it means it already
1906 * has been assigned to other domain, or it's not exist.
1907 */
1908 int device_assigned(u8 bus, u8 devfn)
1910 struct pci_dev *pdev;
1912 for_each_pdev( dom0, pdev )
1913 if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
1914 return 0;
1916 return 1;
1919 int assign_device(struct domain *d, u8 bus, u8 devfn)
1921 struct acpi_rmrr_unit *rmrr;
1922 struct pci_dev *pdev;
1923 int ret = 0;
1925 if ( list_empty(&acpi_drhd_units) )
1926 return ret;
1928 gdprintk(XENLOG_INFO VTDPREFIX,
1929 "assign_device: bus = %x dev = %x func = %x\n",
1930 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1932 reassign_device_ownership(dom0, d, bus, devfn);
1934 /* Setup rmrr identify mapping */
1935 for_each_rmrr_device( rmrr, pdev )
1936 if ( pdev->bus == bus && pdev->devfn == devfn )
1938 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
1939 if ( ret )
1941 gdprintk(XENLOG_ERR VTDPREFIX,
1942 "IOMMU: mapping reserved region failed\n");
1943 return ret;
1946 end_for_each_rmrr_device(rmrr, pdev)
1948 return ret;
1951 void iommu_set_pgd(struct domain *d)
1953 struct hvm_iommu *hd = domain_hvm_iommu(d);
1954 unsigned long p2m_table;
1956 if ( hd->pgd )
1958 gdprintk(XENLOG_INFO VTDPREFIX,
1959 "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
1960 hd->pgd = NULL;
1962 p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
1964 #if CONFIG_PAGING_LEVELS == 3
1965 if ( !hd->pgd )
1967 int level = agaw_to_level(hd->agaw);
1968 struct dma_pte *pmd = NULL;
1969 struct dma_pte *pgd = NULL;
1970 struct dma_pte *pte = NULL;
1971 l3_pgentry_t *l3e;
1972 unsigned long flags;
1973 int i;
1975 spin_lock_irqsave(&hd->mapping_lock, flags);
1976 if ( !hd->pgd )
1978 pgd = (struct dma_pte *)alloc_xenheap_page();
1979 if ( !pgd )
1981 spin_unlock_irqrestore(&hd->mapping_lock, flags);
1982 gdprintk(XENLOG_ERR VTDPREFIX,
1983 "Allocate pgd memory failed!\n");
1984 return;
1986 memset(pgd, 0, PAGE_SIZE);
1987 hd->pgd = pgd;
1990 l3e = map_domain_page(p2m_table);
1991 switch ( level )
1993 case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */
1994 /* We only support 8 entries for the PAE L3 p2m table */
1995 for ( i = 0; i < 8 ; i++ )
1997 /* Don't create new L2 entry, use ones from p2m table */
1998 pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
2000 break;
2002 case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */
2003 /* We allocate one more page for the top vtd page table. */
2004 pmd = (struct dma_pte *)alloc_xenheap_page();
2005 if ( !pmd )
2007 unmap_domain_page(l3e);
2008 spin_unlock_irqrestore(&hd->mapping_lock, flags);
2009 gdprintk(XENLOG_ERR VTDPREFIX,
2010 "Allocate pmd memory failed!\n");
2011 return;
2013 memset((u8*)pmd, 0, PAGE_SIZE);
2014 pte = &pgd[0];
2015 dma_set_pte_addr(*pte, virt_to_maddr(pmd));
2016 dma_set_pte_readable(*pte);
2017 dma_set_pte_writable(*pte);
2019 for ( i = 0; i < 8; i++ )
2021 /* Don't create new L2 entry, use ones from p2m table */
2022 pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
2024 break;
2025 default:
2026 gdprintk(XENLOG_ERR VTDPREFIX,
2027 "iommu_set_pgd:Unsupported p2m table sharing level!\n");
2028 break;
2030 unmap_domain_page(l3e);
2031 spin_unlock_irqrestore(&hd->mapping_lock, flags);
2033 #elif CONFIG_PAGING_LEVELS == 4
2034 if ( !hd->pgd )
2036 int level = agaw_to_level(hd->agaw);
2037 l3_pgentry_t *l3e;
2038 mfn_t pgd_mfn;
2040 switch ( level )
2042 case VTD_PAGE_TABLE_LEVEL_3:
2043 l3e = map_domain_page(p2m_table);
2044 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
2046 gdprintk(XENLOG_ERR VTDPREFIX,
2047 "iommu_set_pgd: second level wasn't there\n");
2048 unmap_domain_page(l3e);
2049 return;
2051 pgd_mfn = _mfn(l3e_get_pfn(*l3e));
2052 unmap_domain_page(l3e);
2053 hd->pgd = maddr_to_virt(pagetable_get_paddr(
2054 pagetable_from_mfn(pgd_mfn)));
2055 break;
2057 case VTD_PAGE_TABLE_LEVEL_4:
2058 pgd_mfn = _mfn(p2m_table);
2059 hd->pgd = maddr_to_virt(pagetable_get_paddr(
2060 pagetable_from_mfn(pgd_mfn)));
2061 break;
2062 default:
2063 gdprintk(XENLOG_ERR VTDPREFIX,
2064 "iommu_set_pgd:Unsupported p2m table sharing level!\n");
2065 break;
2068 #endif
2069 gdprintk(XENLOG_INFO VTDPREFIX,
2070 "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
2074 u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
2075 int iommu_suspend(void)
2077 struct acpi_drhd_unit *drhd;
2078 struct iommu *iommu;
2079 int i = 0;
2081 iommu_flush_all();
2083 for_each_drhd_unit ( drhd )
2085 iommu = drhd->iommu;
2086 iommu_state[DMAR_RTADDR_REG * i] =
2087 (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
2088 iommu_state[DMAR_FECTL_REG * i] =
2089 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
2090 iommu_state[DMAR_FEDATA_REG * i] =
2091 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
2092 iommu_state[DMAR_FEADDR_REG * i] =
2093 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
2094 iommu_state[DMAR_FEUADDR_REG * i] =
2095 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
2096 iommu_state[DMAR_PLMBASE_REG * i] =
2097 (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
2098 iommu_state[DMAR_PLMLIMIT_REG * i] =
2099 (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
2100 iommu_state[DMAR_PHMBASE_REG * i] =
2101 (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
2102 iommu_state[DMAR_PHMLIMIT_REG * i] =
2103 (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
2104 i++;
2107 return 0;
2110 int iommu_resume(void)
2112 struct acpi_drhd_unit *drhd;
2113 struct iommu *iommu;
2114 int i = 0;
2116 iommu_flush_all();
2118 init_vtd_hw();
2119 for_each_drhd_unit ( drhd )
2121 iommu = drhd->iommu;
2122 dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
2123 (u64) iommu_state[DMAR_RTADDR_REG * i]);
2124 dmar_writel(iommu->reg, DMAR_FECTL_REG,
2125 (u32) iommu_state[DMAR_FECTL_REG * i]);
2126 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
2127 (u32) iommu_state[DMAR_FEDATA_REG * i]);
2128 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
2129 (u32) iommu_state[DMAR_FEADDR_REG * i]);
2130 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
2131 (u32) iommu_state[DMAR_FEUADDR_REG * i]);
2132 dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
2133 (u32) iommu_state[DMAR_PLMBASE_REG * i]);
2134 dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
2135 (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
2136 dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
2137 (u64) iommu_state[DMAR_PHMBASE_REG * i]);
2138 dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
2139 (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
2141 if ( iommu_enable_translation(iommu) )
2142 return -EIO;
2143 i++;
2145 return 0;
2148 /*
2149 * Local variables:
2150 * mode: C
2151 * c-set-style: "BSD"
2152 * c-basic-offset: 4
2153 * tab-width: 4
2154 * indent-tabs-mode: nil
2155 * End:
2156 */