ia64/xen-unstable

view xen/drivers/passthrough/vtd/iommu.c @ 17265:70f9a2110421

Intel VTD: Ignore USB RMRR for HVM guest

USB controller RMRR (0xed000 - 0xeffff) conflicts with HVM guest bios
region. Setting identity mapping for it will cover the guest bios
region in p2m table. This causes system crash.

As VT-d spec says, USB controller RMRR is used in case of DMA
performed by a USB controller under BIOS SMM control for legacy
keyboard emulation. Whereas, current guest BIOS doesn't support
emulating stardand Keyboard/mouse, and it also doesn't support SMM
mode. Actually it is no chance to use USB controller RMRR now.

This patch ignores the USB controller RMRR for HVM guest.

Signed-off-by: Weidong Han <weidong.han@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Mar 19 10:22:49 2008 +0000 (2008-03-19)
parents bf8a3fc79093
children fedb66235136
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/irq.h>
23 #include <xen/sched.h>
24 #include <xen/xmalloc.h>
25 #include <xen/domain_page.h>
26 #include <xen/iommu.h>
27 #include "iommu.h"
28 #include "dmar.h"
29 #include "../pci-direct.h"
30 #include "../pci_regs.h"
31 #include "msi.h"
32 #include "extern.h"
34 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
36 static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
37 static int domid_bitmap_size; /* domain id bitmap size in bits */
38 static unsigned long *domid_bitmap; /* iommu domain id bitmap */
40 #define DID_FIELD_WIDTH 16
41 #define DID_HIGH_OFFSET 8
42 static void context_set_domain_id(struct context_entry *context,
43 struct domain *d)
44 {
45 unsigned long flags;
46 domid_t iommu_domid = domain_iommu_domid(d);
48 if ( iommu_domid == 0 )
49 {
50 spin_lock_irqsave(&domid_bitmap_lock, flags);
51 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
52 set_bit(iommu_domid, domid_bitmap);
53 spin_unlock_irqrestore(&domid_bitmap_lock, flags);
54 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
55 }
57 context->hi &= (1 << DID_HIGH_OFFSET) - 1;
58 context->hi |= iommu_domid << DID_HIGH_OFFSET;
59 }
61 static void iommu_domid_release(struct domain *d)
62 {
63 domid_t iommu_domid = domain_iommu_domid(d);
65 if ( iommu_domid != 0 )
66 {
67 d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
68 clear_bit(iommu_domid, domid_bitmap);
69 }
70 }
72 static struct intel_iommu *alloc_intel_iommu(void)
73 {
74 struct intel_iommu *intel;
76 intel = xmalloc(struct intel_iommu);
77 if ( !intel )
78 {
79 gdprintk(XENLOG_ERR VTDPREFIX,
80 "Allocate intel_iommu failed.\n");
81 return NULL;
82 }
83 memset(intel, 0, sizeof(struct intel_iommu));
85 spin_lock_init(&intel->qi_ctrl.qinval_lock);
86 spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
88 spin_lock_init(&intel->ir_ctrl.iremap_lock);
90 return intel;
91 }
93 static void free_intel_iommu(struct intel_iommu *intel)
94 {
95 if ( intel )
96 {
97 xfree(intel);
98 intel = NULL;
99 }
100 }
102 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
103 {
104 if ( !iommu )
105 return NULL;
107 if ( !iommu->intel )
108 {
109 iommu->intel = alloc_intel_iommu();
110 if ( !iommu->intel )
111 {
112 dprintk(XENLOG_ERR VTDPREFIX,
113 "iommu_qi_ctrl: Allocate iommu->intel failed.\n");
114 return NULL;
115 }
116 }
118 return &(iommu->intel->qi_ctrl);
119 }
121 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
122 {
123 if ( !iommu )
124 return NULL;
126 if ( !iommu->intel )
127 {
128 iommu->intel = alloc_intel_iommu();
129 if ( !iommu->intel )
130 {
131 dprintk(XENLOG_ERR VTDPREFIX,
132 "iommu_ir_ctrl: Allocate iommu->intel failed.\n");
133 return NULL;
134 }
135 }
137 return &(iommu->intel->ir_ctrl);
138 }
140 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
141 {
142 if ( !iommu )
143 return NULL;
145 if ( !iommu->intel )
146 {
147 iommu->intel = alloc_intel_iommu();
148 if ( !iommu->intel )
149 {
150 dprintk(XENLOG_ERR VTDPREFIX,
151 "iommu_get_flush: Allocate iommu->intel failed.\n");
152 return NULL;
153 }
154 }
156 return &(iommu->intel->flush);
157 }
159 unsigned int x86_clflush_size;
160 void clflush_cache_range(void *adr, int size)
161 {
162 int i;
163 for ( i = 0; i < size; i += x86_clflush_size )
164 clflush(adr + i);
165 }
167 static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
168 {
169 if ( !ecap_coherent(iommu->ecap) )
170 clflush_cache_range(addr, size);
171 }
173 #define iommu_flush_cache_entry(iommu, addr) \
174 __iommu_flush_cache(iommu, addr, 8)
175 #define iommu_flush_cache_page(iommu, addr) \
176 __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
178 int nr_iommus;
179 /* context entry handling */
180 static struct context_entry * device_to_context_entry(struct iommu *iommu,
181 u8 bus, u8 devfn)
182 {
183 struct root_entry *root;
184 struct context_entry *context;
185 unsigned long phy_addr;
186 unsigned long flags;
188 spin_lock_irqsave(&iommu->lock, flags);
189 root = &iommu->root_entry[bus];
190 if ( !root_present(*root) )
191 {
192 phy_addr = (unsigned long) alloc_xenheap_page();
193 if ( !phy_addr )
194 {
195 spin_unlock_irqrestore(&iommu->lock, flags);
196 return NULL;
197 }
198 memset((void *) phy_addr, 0, PAGE_SIZE);
199 iommu_flush_cache_page(iommu, (void *)phy_addr);
200 phy_addr = virt_to_maddr((void *)phy_addr);
201 set_root_value(*root, phy_addr);
202 set_root_present(*root);
203 iommu_flush_cache_entry(iommu, root);
204 }
205 phy_addr = (unsigned long) get_context_addr(*root);
206 context = (struct context_entry *)maddr_to_virt(phy_addr);
207 spin_unlock_irqrestore(&iommu->lock, flags);
208 return &context[devfn];
209 }
211 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
212 {
213 struct root_entry *root;
214 struct context_entry *context;
215 unsigned long phy_addr;
216 int ret;
217 unsigned long flags;
219 spin_lock_irqsave(&iommu->lock, flags);
220 root = &iommu->root_entry[bus];
221 if ( !root_present(*root) )
222 {
223 ret = 0;
224 goto out;
225 }
226 phy_addr = get_context_addr(*root);
227 context = (struct context_entry *)maddr_to_virt(phy_addr);
228 ret = context_present(context[devfn]);
229 out:
230 spin_unlock_irqrestore(&iommu->lock, flags);
231 return ret;
232 }
234 static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
235 {
236 struct hvm_iommu *hd = domain_hvm_iommu(domain);
237 struct acpi_drhd_unit *drhd;
238 struct iommu *iommu;
239 int addr_width = agaw_to_width(hd->agaw);
240 struct dma_pte *parent, *pte = NULL, *pgd;
241 int level = agaw_to_level(hd->agaw);
242 int offset;
243 unsigned long flags;
244 struct page_info *pg = NULL;
245 u64 *vaddr = NULL;
247 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
248 iommu = drhd->iommu;
250 addr &= (((u64)1) << addr_width) - 1;
251 spin_lock_irqsave(&hd->mapping_lock, flags);
252 if ( !hd->pgd )
253 {
254 pgd = (struct dma_pte *)alloc_xenheap_page();
255 if ( !pgd )
256 {
257 spin_unlock_irqrestore(&hd->mapping_lock, flags);
258 return NULL;
259 }
260 memset(pgd, 0, PAGE_SIZE);
261 hd->pgd = pgd;
262 }
264 parent = hd->pgd;
265 while ( level > 1 )
266 {
267 offset = address_level_offset(addr, level);
268 pte = &parent[offset];
270 if ( dma_pte_addr(*pte) == 0 )
271 {
272 pg = alloc_domheap_page(NULL);
273 vaddr = map_domain_page(page_to_mfn(pg));
274 if ( !vaddr )
275 {
276 spin_unlock_irqrestore(&hd->mapping_lock, flags);
277 return NULL;
278 }
279 memset(vaddr, 0, PAGE_SIZE);
280 iommu_flush_cache_page(iommu, vaddr);
282 dma_set_pte_addr(*pte, page_to_maddr(pg));
284 /*
285 * high level table always sets r/w, last level
286 * page table control read/write
287 */
288 dma_set_pte_readable(*pte);
289 dma_set_pte_writable(*pte);
290 iommu_flush_cache_entry(iommu, pte);
291 }
292 else
293 {
294 pg = maddr_to_page(pte->val);
295 vaddr = map_domain_page(page_to_mfn(pg));
296 if ( !vaddr )
297 {
298 spin_unlock_irqrestore(&hd->mapping_lock, flags);
299 return NULL;
300 }
301 }
303 if ( parent != hd->pgd )
304 unmap_domain_page(parent);
306 if ( level == 2 && vaddr )
307 {
308 unmap_domain_page(vaddr);
309 break;
310 }
312 parent = (struct dma_pte *)vaddr;
313 vaddr = NULL;
314 level--;
315 }
317 spin_unlock_irqrestore(&hd->mapping_lock, flags);
318 return pg;
319 }
321 /* return address's page at specific level */
322 static struct page_info *dma_addr_level_page(struct domain *domain,
323 u64 addr, int level)
324 {
325 struct hvm_iommu *hd = domain_hvm_iommu(domain);
326 struct dma_pte *parent, *pte = NULL;
327 int total = agaw_to_level(hd->agaw);
328 int offset;
329 struct page_info *pg = NULL;
331 parent = hd->pgd;
332 while ( level <= total )
333 {
334 offset = address_level_offset(addr, total);
335 pte = &parent[offset];
336 if ( dma_pte_addr(*pte) == 0 )
337 {
338 if ( parent != hd->pgd )
339 unmap_domain_page(parent);
340 break;
341 }
343 pg = maddr_to_page(pte->val);
344 if ( parent != hd->pgd )
345 unmap_domain_page(parent);
347 if ( level == total )
348 return pg;
350 parent = map_domain_page(page_to_mfn(pg));
351 total--;
352 }
354 return NULL;
355 }
357 static void iommu_flush_write_buffer(struct iommu *iommu)
358 {
359 u32 val;
360 unsigned long flag;
361 unsigned long start_time;
363 if ( !cap_rwbf(iommu->cap) )
364 return;
365 val = iommu->gcmd | DMA_GCMD_WBF;
367 spin_lock_irqsave(&iommu->register_lock, flag);
368 dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
370 /* Make sure hardware complete it */
371 start_time = jiffies;
372 for ( ; ; )
373 {
374 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
375 if ( !(val & DMA_GSTS_WBFS) )
376 break;
377 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
378 panic("DMAR hardware is malfunctional,"
379 " please disable IOMMU\n");
380 cpu_relax();
381 }
382 spin_unlock_irqrestore(&iommu->register_lock, flag);
383 }
385 /* return value determine if we need a write buffer flush */
386 static int flush_context_reg(
387 void *_iommu,
388 u16 did, u16 source_id, u8 function_mask, u64 type,
389 int non_present_entry_flush)
390 {
391 struct iommu *iommu = (struct iommu *) _iommu;
392 u64 val = 0;
393 unsigned long flag;
394 unsigned long start_time;
396 /*
397 * In the non-present entry flush case, if hardware doesn't cache
398 * non-present entry we do nothing and if hardware cache non-present
399 * entry, we flush entries of domain 0 (the domain id is used to cache
400 * any non-present entries)
401 */
402 if ( non_present_entry_flush )
403 {
404 if ( !cap_caching_mode(iommu->cap) )
405 return 1;
406 else
407 did = 0;
408 }
410 /* use register invalidation */
411 switch ( type )
412 {
413 case DMA_CCMD_GLOBAL_INVL:
414 val = DMA_CCMD_GLOBAL_INVL;
415 break;
416 case DMA_CCMD_DOMAIN_INVL:
417 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
418 break;
419 case DMA_CCMD_DEVICE_INVL:
420 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
421 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
422 break;
423 default:
424 BUG();
425 }
426 val |= DMA_CCMD_ICC;
428 spin_lock_irqsave(&iommu->register_lock, flag);
429 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
431 /* Make sure hardware complete it */
432 start_time = jiffies;
433 for ( ; ; )
434 {
435 val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
436 if ( !(val & DMA_CCMD_ICC) )
437 break;
438 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
439 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
440 cpu_relax();
441 }
442 spin_unlock_irqrestore(&iommu->register_lock, flag);
443 /* flush context entry will implictly flush write buffer */
444 return 0;
445 }
447 static int inline iommu_flush_context_global(
448 struct iommu *iommu, int non_present_entry_flush)
449 {
450 struct iommu_flush *flush = iommu_get_flush(iommu);
451 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
452 non_present_entry_flush);
453 }
455 static int inline iommu_flush_context_domain(
456 struct iommu *iommu, u16 did, int non_present_entry_flush)
457 {
458 struct iommu_flush *flush = iommu_get_flush(iommu);
459 return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
460 non_present_entry_flush);
461 }
463 static int inline iommu_flush_context_device(
464 struct iommu *iommu, u16 did, u16 source_id,
465 u8 function_mask, int non_present_entry_flush)
466 {
467 struct iommu_flush *flush = iommu_get_flush(iommu);
468 return flush->context(iommu, did, source_id, function_mask,
469 DMA_CCMD_DEVICE_INVL,
470 non_present_entry_flush);
471 }
473 /* return value determine if we need a write buffer flush */
474 static int flush_iotlb_reg(void *_iommu, u16 did,
475 u64 addr, unsigned int size_order, u64 type,
476 int non_present_entry_flush)
477 {
478 struct iommu *iommu = (struct iommu *) _iommu;
479 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
480 u64 val = 0, val_iva = 0;
481 unsigned long flag;
482 unsigned long start_time;
484 /*
485 * In the non-present entry flush case, if hardware doesn't cache
486 * non-present entry we do nothing and if hardware cache non-present
487 * entry, we flush entries of domain 0 (the domain id is used to cache
488 * any non-present entries)
489 */
490 if ( non_present_entry_flush )
491 {
492 if ( !cap_caching_mode(iommu->cap) )
493 return 1;
494 else
495 did = 0;
496 }
498 /* use register invalidation */
499 switch ( type )
500 {
501 case DMA_TLB_GLOBAL_FLUSH:
502 /* global flush doesn't need set IVA_REG */
503 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
504 break;
505 case DMA_TLB_DSI_FLUSH:
506 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
507 break;
508 case DMA_TLB_PSI_FLUSH:
509 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
510 /* Note: always flush non-leaf currently */
511 val_iva = size_order | addr;
512 break;
513 default:
514 BUG();
515 }
516 /* Note: set drain read/write */
517 if ( cap_read_drain(iommu->cap) )
518 val |= DMA_TLB_READ_DRAIN;
519 if ( cap_write_drain(iommu->cap) )
520 val |= DMA_TLB_WRITE_DRAIN;
522 spin_lock_irqsave(&iommu->register_lock, flag);
523 /* Note: Only uses first TLB reg currently */
524 if ( val_iva )
525 dmar_writeq(iommu->reg, tlb_offset, val_iva);
526 dmar_writeq(iommu->reg, tlb_offset + 8, val);
528 /* Make sure hardware complete it */
529 start_time = jiffies;
530 for ( ; ; )
531 {
532 val = dmar_readq(iommu->reg, tlb_offset + 8);
533 if ( !(val & DMA_TLB_IVT) )
534 break;
535 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
536 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
537 cpu_relax();
538 }
539 spin_unlock_irqrestore(&iommu->register_lock, flag);
541 /* check IOTLB invalidation granularity */
542 if ( DMA_TLB_IAIG(val) == 0 )
543 printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
544 if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
545 printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
546 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
547 /* flush context entry will implictly flush write buffer */
548 return 0;
549 }
551 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
552 int non_present_entry_flush)
553 {
554 struct iommu_flush *flush = iommu_get_flush(iommu);
555 return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
556 non_present_entry_flush);
557 }
559 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
560 int non_present_entry_flush)
561 {
562 struct iommu_flush *flush = iommu_get_flush(iommu);
563 return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
564 non_present_entry_flush);
565 }
567 static int inline get_alignment(u64 base, unsigned int size)
568 {
569 int t = 0;
570 u64 end;
572 end = base + size - 1;
573 while ( base != end )
574 {
575 t++;
576 base >>= 1;
577 end >>= 1;
578 }
579 return t;
580 }
582 static int inline iommu_flush_iotlb_psi(
583 struct iommu *iommu, u16 did,
584 u64 addr, unsigned int pages, int non_present_entry_flush)
585 {
586 unsigned int align;
587 struct iommu_flush *flush = iommu_get_flush(iommu);
589 BUG_ON(addr & (~PAGE_MASK_4K));
590 BUG_ON(pages == 0);
592 /* Fallback to domain selective flush if no PSI support */
593 if ( !cap_pgsel_inv(iommu->cap) )
594 return iommu_flush_iotlb_dsi(iommu, did,
595 non_present_entry_flush);
597 /*
598 * PSI requires page size is 2 ^ x, and the base address is naturally
599 * aligned to the size
600 */
601 align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
602 /* Fallback to domain selective flush if size is too big */
603 if ( align > cap_max_amask_val(iommu->cap) )
604 return iommu_flush_iotlb_dsi(iommu, did,
605 non_present_entry_flush);
607 addr >>= PAGE_SHIFT_4K + align;
608 addr <<= PAGE_SHIFT_4K + align;
610 return flush->iotlb(iommu, did, addr, align,
611 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
612 }
614 void iommu_flush_all(void)
615 {
616 struct acpi_drhd_unit *drhd;
617 struct iommu *iommu;
619 wbinvd();
620 for_each_drhd_unit ( drhd )
621 {
622 iommu = drhd->iommu;
623 iommu_flush_context_global(iommu, 0);
624 iommu_flush_iotlb_global(iommu, 0);
625 }
626 }
628 /* clear one page's page table */
629 static void dma_pte_clear_one(struct domain *domain, u64 addr)
630 {
631 struct acpi_drhd_unit *drhd;
632 struct iommu *iommu;
633 struct dma_pte *pte = NULL;
634 struct page_info *pg = NULL;
636 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
638 /* get last level pte */
639 pg = dma_addr_level_page(domain, addr, 1);
640 if ( !pg )
641 return;
642 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
643 pte += address_level_offset(addr, 1);
644 if ( pte )
645 {
646 dma_clear_pte(*pte);
647 iommu_flush_cache_entry(drhd->iommu, pte);
649 for_each_drhd_unit ( drhd )
650 {
651 iommu = drhd->iommu;
652 if ( cap_caching_mode(iommu->cap) )
653 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
654 addr, 1, 0);
655 else if (cap_rwbf(iommu->cap))
656 iommu_flush_write_buffer(iommu);
657 }
658 }
659 unmap_domain_page(pte);
660 }
662 /* clear last level pte, a tlb flush should be followed */
663 static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
664 {
665 struct hvm_iommu *hd = domain_hvm_iommu(domain);
666 int addr_width = agaw_to_width(hd->agaw);
668 start &= (((u64)1) << addr_width) - 1;
669 end &= (((u64)1) << addr_width) - 1;
670 /* in case it's partial page */
671 start = PAGE_ALIGN_4K(start);
672 end &= PAGE_MASK_4K;
674 /* we don't need lock here, nobody else touches the iova range */
675 while ( start < end )
676 {
677 dma_pte_clear_one(domain, start);
678 start += PAGE_SIZE_4K;
679 }
680 }
682 /* free page table pages. last level pte should already be cleared */
683 void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
684 {
685 struct acpi_drhd_unit *drhd;
686 struct hvm_iommu *hd = domain_hvm_iommu(domain);
687 struct iommu *iommu;
688 int addr_width = agaw_to_width(hd->agaw);
689 struct dma_pte *pte;
690 int total = agaw_to_level(hd->agaw);
691 int level;
692 u32 tmp;
693 struct page_info *pg = NULL;
695 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
696 iommu = drhd->iommu;
698 start &= (((u64)1) << addr_width) - 1;
699 end &= (((u64)1) << addr_width) - 1;
701 /* we don't need lock here, nobody else touches the iova range */
702 level = 2;
703 while ( level <= total )
704 {
705 tmp = align_to_level(start, level);
706 if ( (tmp >= end) || ((tmp + level_size(level)) > end) )
707 return;
709 while ( tmp < end )
710 {
711 pg = dma_addr_level_page(domain, tmp, level);
712 if ( !pg )
713 return;
714 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
715 pte += address_level_offset(tmp, level);
716 dma_clear_pte(*pte);
717 iommu_flush_cache_entry(iommu, pte);
718 unmap_domain_page(pte);
719 free_domheap_page(pg);
721 tmp += level_size(level);
722 }
723 level++;
724 }
726 /* free pgd */
727 if ( start == 0 && end == ((((u64)1) << addr_width) - 1) )
728 {
729 free_xenheap_page((void *)hd->pgd);
730 hd->pgd = NULL;
731 }
732 }
734 /* iommu handling */
735 static int iommu_set_root_entry(struct iommu *iommu)
736 {
737 void *addr;
738 u32 cmd, sts;
739 struct root_entry *root;
740 unsigned long flags;
742 if ( iommu == NULL )
743 {
744 gdprintk(XENLOG_ERR VTDPREFIX,
745 "iommu_set_root_entry: iommu == NULL\n");
746 return -EINVAL;
747 }
749 if ( unlikely(!iommu->root_entry) )
750 {
751 root = (struct root_entry *)alloc_xenheap_page();
752 if ( root == NULL )
753 return -ENOMEM;
755 memset((u8*)root, 0, PAGE_SIZE);
756 iommu_flush_cache_page(iommu, root);
758 if ( cmpxchg((unsigned long *)&iommu->root_entry,
759 0, (unsigned long)root) != 0 )
760 free_xenheap_page((void *)root);
761 }
763 addr = iommu->root_entry;
765 spin_lock_irqsave(&iommu->register_lock, flags);
767 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
768 cmd = iommu->gcmd | DMA_GCMD_SRTP;
769 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
771 /* Make sure hardware complete it */
772 for ( ; ; )
773 {
774 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
775 if ( sts & DMA_GSTS_RTPS )
776 break;
777 cpu_relax();
778 }
780 spin_unlock_irqrestore(&iommu->register_lock, flags);
782 return 0;
783 }
785 static int iommu_enable_translation(struct iommu *iommu)
786 {
787 u32 sts;
788 unsigned long flags;
790 dprintk(XENLOG_INFO VTDPREFIX,
791 "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
792 spin_lock_irqsave(&iommu->register_lock, flags);
793 iommu->gcmd |= DMA_GCMD_TE;
794 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
795 /* Make sure hardware complete it */
796 for ( ; ; )
797 {
798 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
799 if ( sts & DMA_GSTS_TES )
800 break;
801 cpu_relax();
802 }
804 /* Disable PMRs when VT-d engine takes effect per spec definition */
805 disable_pmr(iommu);
806 spin_unlock_irqrestore(&iommu->register_lock, flags);
807 return 0;
808 }
810 int iommu_disable_translation(struct iommu *iommu)
811 {
812 u32 sts;
813 unsigned long flags;
815 spin_lock_irqsave(&iommu->register_lock, flags);
816 iommu->gcmd &= ~ DMA_GCMD_TE;
817 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
819 /* Make sure hardware complete it */
820 for ( ; ; )
821 {
822 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
823 if ( !(sts & DMA_GSTS_TES) )
824 break;
825 cpu_relax();
826 }
827 spin_unlock_irqrestore(&iommu->register_lock, flags);
828 return 0;
829 }
831 static struct iommu *vector_to_iommu[NR_VECTORS];
832 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
833 u8 fault_reason, u16 source_id, u32 addr)
834 {
835 dprintk(XENLOG_WARNING VTDPREFIX,
836 "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
837 (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
838 PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
839 fault_reason, iommu->reg);
841 if ( fault_reason < 0x20 )
842 print_vtd_entries(current->domain, iommu, (source_id >> 8),
843 (source_id & 0xff), (addr >> PAGE_SHIFT));
845 return 0;
846 }
848 static void iommu_fault_status(u32 fault_status)
849 {
850 if ( fault_status & DMA_FSTS_PFO )
851 dprintk(XENLOG_ERR VTDPREFIX,
852 "iommu_fault_status: Fault Overflow\n");
853 else if ( fault_status & DMA_FSTS_PPF )
854 dprintk(XENLOG_ERR VTDPREFIX,
855 "iommu_fault_status: Primary Pending Fault\n");
856 else if ( fault_status & DMA_FSTS_AFO )
857 dprintk(XENLOG_ERR VTDPREFIX,
858 "iommu_fault_status: Advanced Fault Overflow\n");
859 else if ( fault_status & DMA_FSTS_APF )
860 dprintk(XENLOG_ERR VTDPREFIX,
861 "iommu_fault_status: Advanced Pending Fault\n");
862 else if ( fault_status & DMA_FSTS_IQE )
863 dprintk(XENLOG_ERR VTDPREFIX,
864 "iommu_fault_status: Invalidation Queue Error\n");
865 else if ( fault_status & DMA_FSTS_ICE )
866 dprintk(XENLOG_ERR VTDPREFIX,
867 "iommu_fault_status: Invalidation Completion Error\n");
868 else if ( fault_status & DMA_FSTS_ITE )
869 dprintk(XENLOG_ERR VTDPREFIX,
870 "iommu_fault_status: Invalidation Time-out Error\n");
871 }
873 #define PRIMARY_FAULT_REG_LEN (16)
874 static void iommu_page_fault(int vector, void *dev_id,
875 struct cpu_user_regs *regs)
876 {
877 struct iommu *iommu = dev_id;
878 int reg, fault_index;
879 u32 fault_status;
880 unsigned long flags;
882 dprintk(XENLOG_WARNING VTDPREFIX,
883 "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
885 spin_lock_irqsave(&iommu->register_lock, flags);
886 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
887 spin_unlock_irqrestore(&iommu->register_lock, flags);
889 iommu_fault_status(fault_status);
891 /* FIXME: ignore advanced fault log */
892 if ( !(fault_status & DMA_FSTS_PPF) )
893 return;
894 fault_index = dma_fsts_fault_record_index(fault_status);
895 reg = cap_fault_reg_offset(iommu->cap);
896 for ( ; ; )
897 {
898 u8 fault_reason;
899 u16 source_id;
900 u32 guest_addr, data;
901 int type;
903 /* highest 32 bits */
904 spin_lock_irqsave(&iommu->register_lock, flags);
905 data = dmar_readl(iommu->reg, reg +
906 fault_index * PRIMARY_FAULT_REG_LEN + 12);
907 if ( !(data & DMA_FRCD_F) )
908 {
909 spin_unlock_irqrestore(&iommu->register_lock, flags);
910 break;
911 }
913 fault_reason = dma_frcd_fault_reason(data);
914 type = dma_frcd_type(data);
916 data = dmar_readl(iommu->reg, reg +
917 fault_index * PRIMARY_FAULT_REG_LEN + 8);
918 source_id = dma_frcd_source_id(data);
920 guest_addr = dmar_readq(iommu->reg, reg +
921 fault_index * PRIMARY_FAULT_REG_LEN);
922 guest_addr = dma_frcd_page_addr(guest_addr);
923 /* clear the fault */
924 dmar_writel(iommu->reg, reg +
925 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
926 spin_unlock_irqrestore(&iommu->register_lock, flags);
928 iommu_page_fault_do_one(iommu, type, fault_reason,
929 source_id, guest_addr);
931 fault_index++;
932 if ( fault_index > cap_num_fault_regs(iommu->cap) )
933 fault_index = 0;
934 }
936 /* clear primary fault overflow */
937 if ( fault_status & DMA_FSTS_PFO )
938 {
939 spin_lock_irqsave(&iommu->register_lock, flags);
940 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
941 spin_unlock_irqrestore(&iommu->register_lock, flags);
942 }
943 }
945 static void dma_msi_unmask(unsigned int vector)
946 {
947 struct iommu *iommu = vector_to_iommu[vector];
948 unsigned long flags;
950 /* unmask it */
951 spin_lock_irqsave(&iommu->register_lock, flags);
952 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
953 spin_unlock_irqrestore(&iommu->register_lock, flags);
954 }
956 static void dma_msi_mask(unsigned int vector)
957 {
958 unsigned long flags;
959 struct iommu *iommu = vector_to_iommu[vector];
961 /* mask it */
962 spin_lock_irqsave(&iommu->register_lock, flags);
963 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
964 spin_unlock_irqrestore(&iommu->register_lock, flags);
965 }
967 static unsigned int dma_msi_startup(unsigned int vector)
968 {
969 dma_msi_unmask(vector);
970 return 0;
971 }
973 static void dma_msi_end(unsigned int vector)
974 {
975 dma_msi_unmask(vector);
976 ack_APIC_irq();
977 }
979 static void dma_msi_data_init(struct iommu *iommu, int vector)
980 {
981 u32 msi_data = 0;
982 unsigned long flags;
984 /* Fixed, edge, assert mode. Follow MSI setting */
985 msi_data |= vector & 0xff;
986 msi_data |= 1 << 14;
988 spin_lock_irqsave(&iommu->register_lock, flags);
989 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
990 spin_unlock_irqrestore(&iommu->register_lock, flags);
991 }
993 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
994 {
995 u64 msi_address;
996 unsigned long flags;
998 /* Physical, dedicated cpu. Follow MSI setting */
999 msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
1000 msi_address |= MSI_PHYSICAL_MODE << 2;
1001 msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
1002 msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
1004 spin_lock_irqsave(&iommu->register_lock, flags);
1005 dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
1006 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
1007 spin_unlock_irqrestore(&iommu->register_lock, flags);
1010 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
1012 struct iommu *iommu = vector_to_iommu[vector];
1013 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
1016 static struct hw_interrupt_type dma_msi_type = {
1017 .typename = "DMA_MSI",
1018 .startup = dma_msi_startup,
1019 .shutdown = dma_msi_mask,
1020 .enable = dma_msi_unmask,
1021 .disable = dma_msi_mask,
1022 .ack = dma_msi_mask,
1023 .end = dma_msi_end,
1024 .set_affinity = dma_msi_set_affinity,
1025 };
1027 int iommu_set_interrupt(struct iommu *iommu)
1029 int vector, ret;
1031 vector = assign_irq_vector(AUTO_ASSIGN);
1032 vector_to_iommu[vector] = iommu;
1034 /* VT-d fault is a MSI, make irq == vector */
1035 irq_vector[vector] = vector;
1036 vector_irq[vector] = vector;
1038 if ( !vector )
1040 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
1041 return -EINVAL;
1044 irq_desc[vector].handler = &dma_msi_type;
1045 ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
1046 if ( ret )
1047 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
1048 return vector;
1051 struct iommu *iommu_alloc(void *hw_data)
1053 struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
1054 struct iommu *iommu;
1056 if ( nr_iommus > MAX_IOMMUS )
1058 gdprintk(XENLOG_ERR VTDPREFIX,
1059 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
1060 return NULL;
1063 iommu = xmalloc(struct iommu);
1064 if ( !iommu )
1065 return NULL;
1066 memset(iommu, 0, sizeof(struct iommu));
1068 set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
1069 iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
1071 printk("iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
1072 iommu->reg, drhd->address);
1074 nr_iommus++;
1076 if ( !iommu->reg )
1078 printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
1079 goto error;
1082 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
1083 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
1085 printk("iommu_alloc: cap = %"PRIx64"\n",iommu->cap);
1086 printk("iommu_alloc: ecap = %"PRIx64"\n", iommu->ecap);
1088 spin_lock_init(&iommu->lock);
1089 spin_lock_init(&iommu->register_lock);
1091 iommu->intel = alloc_intel_iommu();
1093 drhd->iommu = iommu;
1094 return iommu;
1095 error:
1096 xfree(iommu);
1097 return NULL;
1100 static void free_iommu(struct iommu *iommu)
1102 if ( !iommu )
1103 return;
1104 if ( iommu->root_entry )
1105 free_xenheap_page((void *)iommu->root_entry);
1106 if ( iommu->reg )
1107 iounmap(iommu->reg);
1108 free_intel_iommu(iommu->intel);
1109 free_irq(iommu->vector);
1110 xfree(iommu);
1113 #define guestwidth_to_adjustwidth(gaw) ({ \
1114 int agaw, r = (gaw - 12) % 9; \
1115 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
1116 if ( agaw > 64 ) \
1117 agaw = 64; \
1118 agaw; })
1120 int intel_iommu_domain_init(struct domain *domain)
1122 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1123 struct iommu *iommu = NULL;
1124 int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
1125 int adjust_width, agaw;
1126 unsigned long sagaw;
1127 struct acpi_drhd_unit *drhd;
1129 if ( !vtd_enabled || list_empty(&acpi_drhd_units) )
1130 return 0;
1132 for_each_drhd_unit ( drhd )
1133 iommu = drhd->iommu ? : iommu_alloc(drhd);
1135 /* calculate AGAW */
1136 if ( guest_width > cap_mgaw(iommu->cap) )
1137 guest_width = cap_mgaw(iommu->cap);
1138 adjust_width = guestwidth_to_adjustwidth(guest_width);
1139 agaw = width_to_agaw(adjust_width);
1140 /* FIXME: hardware doesn't support it, choose a bigger one? */
1141 sagaw = cap_sagaw(iommu->cap);
1142 if ( !test_bit(agaw, &sagaw) )
1144 gdprintk(XENLOG_ERR VTDPREFIX,
1145 "IOMMU: hardware doesn't support the agaw\n");
1146 agaw = find_next_bit(&sagaw, 5, agaw);
1147 if ( agaw >= 5 )
1148 return -ENODEV;
1150 hd->agaw = agaw;
1151 return 0;
1154 static int domain_context_mapping_one(
1155 struct domain *domain,
1156 struct iommu *iommu,
1157 u8 bus, u8 devfn)
1159 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1160 struct context_entry *context;
1161 unsigned long flags;
1162 int ret = 0;
1164 context = device_to_context_entry(iommu, bus, devfn);
1165 if ( !context )
1167 gdprintk(XENLOG_ERR VTDPREFIX,
1168 "domain_context_mapping_one:context == NULL:"
1169 "bdf = %x:%x:%x\n",
1170 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1171 return -ENOMEM;
1174 if ( context_present(*context) )
1176 gdprintk(XENLOG_WARNING VTDPREFIX,
1177 "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
1178 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1179 return 0;
1182 spin_lock_irqsave(&iommu->lock, flags);
1183 /*
1184 * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
1185 * be 1 based as required by intel's iommu hw.
1186 */
1187 context_set_domain_id(context, domain);
1188 context_set_address_width(*context, hd->agaw);
1190 if ( ecap_pass_thru(iommu->ecap) )
1191 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1192 #ifdef CONTEXT_PASSTHRU
1193 else
1195 #endif
1196 if ( !hd->pgd )
1198 struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
1199 if ( !pgd )
1201 spin_unlock_irqrestore(&hd->mapping_lock, flags);
1202 return -ENOMEM;
1204 memset(pgd, 0, PAGE_SIZE);
1205 hd->pgd = pgd;
1208 context_set_address_root(*context, virt_to_maddr(hd->pgd));
1209 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1210 #ifdef CONTEXT_PASSTHRU
1212 #endif
1214 context_set_fault_enable(*context);
1215 context_set_present(*context);
1216 iommu_flush_cache_entry(iommu, context);
1218 gdprintk(XENLOG_INFO VTDPREFIX,
1219 "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
1220 " hd->pgd=%p\n",
1221 bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1222 context->hi, context->lo, hd->pgd);
1224 if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
1225 (((u16)bus) << 8) | devfn,
1226 DMA_CCMD_MASK_NOBIT, 1) )
1227 iommu_flush_write_buffer(iommu);
1228 else
1229 iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
1230 spin_unlock_irqrestore(&iommu->lock, flags);
1231 return ret;
1234 static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
1236 u8 id;
1237 int ttl = 48;
1239 while ( ttl-- )
1241 pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
1242 if ( pos < 0x40 )
1243 break;
1245 pos &= ~3;
1246 id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1247 pos + PCI_CAP_LIST_ID);
1249 if ( id == 0xff )
1250 break;
1251 if ( id == cap )
1252 return pos;
1254 pos += PCI_CAP_LIST_NEXT;
1256 return 0;
1259 #define PCI_BASE_CLASS_BRIDGE 0x06
1260 #define PCI_CLASS_BRIDGE_PCI 0x0604
1262 #define DEV_TYPE_PCIe_ENDPOINT 1
1263 #define DEV_TYPE_PCI_BRIDGE 2
1264 #define DEV_TYPE_PCI 3
1266 int pdev_type(struct pci_dev *dev)
1268 u16 class_device;
1269 u16 status;
1271 class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
1272 PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
1273 if ( class_device == PCI_CLASS_BRIDGE_PCI )
1274 return DEV_TYPE_PCI_BRIDGE;
1276 status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
1277 PCI_FUNC(dev->devfn), PCI_STATUS);
1279 if ( !(status & PCI_STATUS_CAP_LIST) )
1280 return DEV_TYPE_PCI;
1282 if ( __pci_find_next_cap(dev->bus, dev->devfn,
1283 PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) )
1284 return DEV_TYPE_PCIe_ENDPOINT;
1286 return DEV_TYPE_PCI;
1289 #define MAX_BUSES 256
1290 struct pci_dev bus2bridge[MAX_BUSES];
1292 static int domain_context_mapping(
1293 struct domain *domain,
1294 struct iommu *iommu,
1295 struct pci_dev *pdev)
1297 int ret = 0;
1298 int dev, func, sec_bus, sub_bus;
1299 u32 type;
1301 type = pdev_type(pdev);
1302 switch ( type )
1304 case DEV_TYPE_PCI_BRIDGE:
1305 sec_bus = read_pci_config_byte(
1306 pdev->bus, PCI_SLOT(pdev->devfn),
1307 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1309 if ( bus2bridge[sec_bus].bus == 0 )
1311 bus2bridge[sec_bus].bus = pdev->bus;
1312 bus2bridge[sec_bus].devfn = pdev->devfn;
1315 sub_bus = read_pci_config_byte(
1316 pdev->bus, PCI_SLOT(pdev->devfn),
1317 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1319 if ( sec_bus != sub_bus )
1320 gdprintk(XENLOG_WARNING VTDPREFIX,
1321 "context_context_mapping: nested PCI bridge not "
1322 "supported: bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
1323 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1324 sec_bus, sub_bus);
1325 break;
1326 case DEV_TYPE_PCIe_ENDPOINT:
1327 gdprintk(XENLOG_INFO VTDPREFIX,
1328 "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
1329 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1330 ret = domain_context_mapping_one(domain, iommu,
1331 (u8)(pdev->bus), (u8)(pdev->devfn));
1332 break;
1333 case DEV_TYPE_PCI:
1334 gdprintk(XENLOG_INFO VTDPREFIX,
1335 "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
1336 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1338 if ( pdev->bus == 0 )
1339 ret = domain_context_mapping_one(
1340 domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn));
1341 else
1343 if ( bus2bridge[pdev->bus].bus != 0 )
1344 gdprintk(XENLOG_WARNING VTDPREFIX,
1345 "domain_context_mapping:bus2bridge"
1346 "[%d].bus != 0\n", pdev->bus);
1348 ret = domain_context_mapping_one(
1349 domain, iommu,
1350 (u8)(bus2bridge[pdev->bus].bus),
1351 (u8)(bus2bridge[pdev->bus].devfn));
1353 /* now map everything behind the PCI bridge */
1354 for ( dev = 0; dev < 32; dev++ )
1356 for ( func = 0; func < 8; func++ )
1358 ret = domain_context_mapping_one(
1359 domain, iommu,
1360 pdev->bus, (u8)PCI_DEVFN(dev, func));
1361 if ( ret )
1362 return ret;
1366 break;
1367 default:
1368 gdprintk(XENLOG_ERR VTDPREFIX,
1369 "domain_context_mapping:unknown type : bdf = %x:%x:%x\n",
1370 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1371 ret = -EINVAL;
1372 break;
1375 return ret;
1378 static int domain_context_unmap_one(
1379 struct domain *domain,
1380 struct iommu *iommu,
1381 u8 bus, u8 devfn)
1383 struct context_entry *context;
1384 unsigned long flags;
1386 context = device_to_context_entry(iommu, bus, devfn);
1387 if ( !context )
1389 gdprintk(XENLOG_ERR VTDPREFIX,
1390 "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
1391 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1392 return -ENOMEM;
1395 if ( !context_present(*context) )
1397 gdprintk(XENLOG_WARNING VTDPREFIX,
1398 "domain_context_unmap_one-%x:%x:%x- "
1399 "context NOT present:return\n",
1400 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1401 return 0;
1404 gdprintk(XENLOG_INFO VTDPREFIX,
1405 "domain_context_unmap_one: bdf = %x:%x:%x\n",
1406 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1408 spin_lock_irqsave(&iommu->lock, flags);
1409 context_clear_present(*context);
1410 context_clear_entry(*context);
1411 iommu_flush_cache_entry(iommu, context);
1412 iommu_flush_context_global(iommu, 0);
1413 iommu_flush_iotlb_global(iommu, 0);
1414 spin_unlock_irqrestore(&iommu->lock, flags);
1416 return 0;
1419 static int domain_context_unmap(
1420 struct domain *domain,
1421 struct iommu *iommu,
1422 struct pci_dev *pdev)
1424 int ret = 0;
1425 int dev, func, sec_bus, sub_bus;
1426 u32 type;
1428 type = pdev_type(pdev);
1429 switch ( type )
1431 case DEV_TYPE_PCI_BRIDGE:
1432 sec_bus = read_pci_config_byte(
1433 pdev->bus, PCI_SLOT(pdev->devfn),
1434 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1435 sub_bus = read_pci_config_byte(
1436 pdev->bus, PCI_SLOT(pdev->devfn),
1437 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1439 gdprintk(XENLOG_INFO VTDPREFIX,
1440 "domain_context_unmap:BRIDGE:%x:%x:%x "
1441 "sec_bus=%x sub_bus=%x\n",
1442 pdev->bus, PCI_SLOT(pdev->devfn),
1443 PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
1444 break;
1445 case DEV_TYPE_PCIe_ENDPOINT:
1446 gdprintk(XENLOG_INFO VTDPREFIX,
1447 "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
1448 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1449 ret = domain_context_unmap_one(domain, iommu,
1450 (u8)(pdev->bus), (u8)(pdev->devfn));
1451 break;
1452 case DEV_TYPE_PCI:
1453 gdprintk(XENLOG_INFO VTDPREFIX,
1454 "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
1455 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1456 if ( pdev->bus == 0 )
1457 ret = domain_context_unmap_one(
1458 domain, iommu,
1459 (u8)(pdev->bus), (u8)(pdev->devfn));
1460 else
1462 if ( bus2bridge[pdev->bus].bus != 0 )
1463 gdprintk(XENLOG_WARNING VTDPREFIX,
1464 "domain_context_unmap:"
1465 "bus2bridge[%d].bus != 0\n", pdev->bus);
1467 ret = domain_context_unmap_one(domain, iommu,
1468 (u8)(bus2bridge[pdev->bus].bus),
1469 (u8)(bus2bridge[pdev->bus].devfn));
1471 /* Unmap everything behind the PCI bridge */
1472 for ( dev = 0; dev < 32; dev++ )
1474 for ( func = 0; func < 8; func++ )
1476 ret = domain_context_unmap_one(
1477 domain, iommu,
1478 pdev->bus, (u8)PCI_DEVFN(dev, func));
1479 if ( ret )
1480 return ret;
1484 break;
1485 default:
1486 gdprintk(XENLOG_ERR VTDPREFIX,
1487 "domain_context_unmap:unknown type: bdf = %x:%x:%x\n",
1488 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1489 ret = -EINVAL;
1490 break;
1493 return ret;
1496 void reassign_device_ownership(
1497 struct domain *source,
1498 struct domain *target,
1499 u8 bus, u8 devfn)
1501 struct hvm_iommu *source_hd = domain_hvm_iommu(source);
1502 struct hvm_iommu *target_hd = domain_hvm_iommu(target);
1503 struct pci_dev *pdev;
1504 struct acpi_drhd_unit *drhd;
1505 struct iommu *iommu;
1506 int status;
1507 unsigned long flags;
1509 gdprintk(XENLOG_INFO VTDPREFIX,
1510 "reassign_device-%x:%x:%x- source = %d target = %d\n",
1511 bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1512 source->domain_id, target->domain_id);
1514 pdev_flr(bus, devfn);
1516 for_each_pdev( source, pdev )
1518 if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
1519 continue;
1521 drhd = acpi_find_matched_drhd_unit(pdev);
1522 iommu = drhd->iommu;
1523 domain_context_unmap(source, iommu, pdev);
1525 /* Move pci device from the source domain to target domain. */
1526 spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
1527 spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
1528 list_move(&pdev->list, &target_hd->pdev_list);
1529 spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
1530 spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
1532 status = domain_context_mapping(target, iommu, pdev);
1533 if ( status != 0 )
1534 gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
1536 break;
1540 void return_devices_to_dom0(struct domain *d)
1542 struct hvm_iommu *hd = domain_hvm_iommu(d);
1543 struct pci_dev *pdev;
1545 while ( !list_empty(&hd->pdev_list) )
1547 pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
1548 dprintk(XENLOG_INFO VTDPREFIX,
1549 "return_devices_to_dom0: bdf = %x:%x:%x\n",
1550 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1551 reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
1554 #ifdef VTD_DEBUG
1555 for_each_pdev ( dom0, pdev )
1556 dprintk(XENLOG_INFO VTDPREFIX,
1557 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
1558 dom0->domain_id, pdev->bus,
1559 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1560 #endif
1563 void iommu_domain_teardown(struct domain *d)
1565 if ( list_empty(&acpi_drhd_units) )
1566 return;
1568 iommu_domid_release(d);
1570 #if CONFIG_PAGING_LEVELS == 3
1572 struct hvm_iommu *hd = domain_hvm_iommu(d);
1573 int level = agaw_to_level(hd->agaw);
1574 struct dma_pte *pgd = NULL;
1576 switch ( level )
1578 case VTD_PAGE_TABLE_LEVEL_3:
1579 if ( hd->pgd )
1580 free_xenheap_page((void *)hd->pgd);
1581 break;
1582 case VTD_PAGE_TABLE_LEVEL_4:
1583 if ( hd->pgd )
1585 pgd = hd->pgd;
1586 if ( pgd[0].val != 0 )
1587 free_xenheap_page((void*)maddr_to_virt(
1588 dma_pte_addr(pgd[0])));
1589 free_xenheap_page((void *)hd->pgd);
1591 break;
1592 default:
1593 gdprintk(XENLOG_ERR VTDPREFIX,
1594 "Unsupported p2m table sharing level!\n");
1595 break;
1598 #endif
1599 return_devices_to_dom0(d);
1602 static int domain_context_mapped(struct pci_dev *pdev)
1604 struct acpi_drhd_unit *drhd;
1605 struct iommu *iommu;
1606 int ret;
1608 for_each_drhd_unit ( drhd )
1610 iommu = drhd->iommu;
1611 ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
1612 if ( ret )
1613 return ret;
1616 return 0;
1619 int intel_iommu_map_page(
1620 struct domain *d, unsigned long gfn, unsigned long mfn)
1622 struct acpi_drhd_unit *drhd;
1623 struct iommu *iommu;
1624 struct dma_pte *pte = NULL;
1625 struct page_info *pg = NULL;
1627 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1628 iommu = drhd->iommu;
1630 #ifdef CONTEXT_PASSTHRU
1631 /* do nothing if dom0 and iommu supports pass thru */
1632 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1633 return 0;
1634 #endif
1636 pg = addr_to_dma_page(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1637 if ( !pg )
1638 return -ENOMEM;
1639 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
1640 pte += gfn & LEVEL_MASK;
1641 dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
1642 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
1643 iommu_flush_cache_entry(iommu, pte);
1644 unmap_domain_page(pte);
1646 for_each_drhd_unit ( drhd )
1648 iommu = drhd->iommu;
1649 if ( cap_caching_mode(iommu->cap) )
1650 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1651 (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
1652 else if ( cap_rwbf(iommu->cap) )
1653 iommu_flush_write_buffer(iommu);
1656 return 0;
1659 int intel_iommu_unmap_page(struct domain *d, unsigned long gfn)
1661 struct acpi_drhd_unit *drhd;
1662 struct iommu *iommu;
1664 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1665 iommu = drhd->iommu;
1667 #ifdef CONTEXT_PASSTHRU
1668 /* do nothing if dom0 and iommu supports pass thru */
1669 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1670 return 0;
1671 #endif
1673 dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1675 return 0;
1678 int iommu_page_mapping(struct domain *domain, paddr_t iova,
1679 void *hpa, size_t size, int prot)
1681 struct acpi_drhd_unit *drhd;
1682 struct iommu *iommu;
1683 unsigned long start_pfn, end_pfn;
1684 struct dma_pte *pte = NULL;
1685 int index;
1686 struct page_info *pg = NULL;
1688 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1689 iommu = drhd->iommu;
1690 if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
1691 return -EINVAL;
1692 iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
1693 start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
1694 end_pfn = (unsigned long)
1695 ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
1696 index = 0;
1697 while ( start_pfn < end_pfn )
1699 pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
1700 if ( !pg )
1701 return -ENOMEM;
1702 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
1703 pte += start_pfn & LEVEL_MASK;
1704 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1705 dma_set_pte_prot(*pte, prot);
1706 iommu_flush_cache_entry(iommu, pte);
1707 unmap_domain_page(pte);
1708 start_pfn++;
1709 index++;
1712 for_each_drhd_unit ( drhd )
1714 iommu = drhd->iommu;
1715 if ( cap_caching_mode(iommu->cap) )
1716 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
1717 iova, index, 0);
1718 else if ( cap_rwbf(iommu->cap) )
1719 iommu_flush_write_buffer(iommu);
1722 return 0;
1725 int iommu_page_unmapping(struct domain *domain, paddr_t addr, size_t size)
1727 dma_pte_clear_range(domain, addr, addr + size);
1729 return 0;
1732 void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry)
1734 struct acpi_drhd_unit *drhd;
1735 struct iommu *iommu = NULL;
1736 struct dma_pte *pte = (struct dma_pte *) p2m_entry;
1738 for_each_drhd_unit ( drhd )
1740 iommu = drhd->iommu;
1741 if ( cap_caching_mode(iommu->cap) )
1742 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1743 (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
1744 else if ( cap_rwbf(iommu->cap) )
1745 iommu_flush_write_buffer(iommu);
1748 iommu_flush_cache_entry(iommu, pte);
1751 static int iommu_prepare_rmrr_dev(
1752 struct domain *d,
1753 struct acpi_rmrr_unit *rmrr,
1754 struct pci_dev *pdev)
1756 struct acpi_drhd_unit *drhd;
1757 unsigned long size;
1758 int ret;
1760 /* page table init */
1761 size = rmrr->end_address - rmrr->base_address + 1;
1762 ret = iommu_page_mapping(d, rmrr->base_address,
1763 (void *)rmrr->base_address, size,
1764 DMA_PTE_READ|DMA_PTE_WRITE);
1765 if ( ret )
1766 return ret;
1768 if ( domain_context_mapped(pdev) == 0 )
1770 drhd = acpi_find_matched_drhd_unit(pdev);
1771 ret = domain_context_mapping(d, drhd->iommu, pdev);
1772 if ( !ret )
1773 return 0;
1776 return ret;
1779 void __init setup_dom0_devices(void)
1781 struct hvm_iommu *hd = domain_hvm_iommu(dom0);
1782 struct acpi_drhd_unit *drhd;
1783 struct pci_dev *pdev;
1784 int bus, dev, func, ret;
1785 u32 l;
1787 #ifdef DEBUG_VTD_CONTEXT_ENTRY
1788 for ( bus = 0; bus < 256; bus++ )
1790 for ( dev = 0; dev < 32; dev++ )
1792 for ( func = 0; func < 8; func++ )
1794 struct context_entry *context;
1795 struct pci_dev device;
1797 device.bus = bus;
1798 device.devfn = PCI_DEVFN(dev, func);
1799 drhd = acpi_find_matched_drhd_unit(&device);
1800 context = device_to_context_entry(drhd->iommu,
1801 bus, PCI_DEVFN(dev, func));
1802 if ( (context->lo != 0) || (context->hi != 0) )
1803 dprintk(XENLOG_INFO VTDPREFIX,
1804 "setup_dom0_devices-%x:%x:%x- context not 0\n",
1805 bus, dev, func);
1809 #endif
1811 for ( bus = 0; bus < 256; bus++ )
1813 for ( dev = 0; dev < 32; dev++ )
1815 for ( func = 0; func < 8; func++ )
1817 l = read_pci_config(bus, dev, func, PCI_VENDOR_ID);
1818 /* some broken boards return 0 or ~0 if a slot is empty: */
1819 if ( (l == 0xffffffff) || (l == 0x00000000) ||
1820 (l == 0x0000ffff) || (l == 0xffff0000) )
1821 continue;
1822 pdev = xmalloc(struct pci_dev);
1823 pdev->bus = bus;
1824 pdev->devfn = PCI_DEVFN(dev, func);
1825 list_add_tail(&pdev->list, &hd->pdev_list);
1827 drhd = acpi_find_matched_drhd_unit(pdev);
1828 ret = domain_context_mapping(dom0, drhd->iommu, pdev);
1829 if ( ret != 0 )
1830 gdprintk(XENLOG_ERR VTDPREFIX,
1831 "domain_context_mapping failed\n");
1836 for_each_pdev ( dom0, pdev )
1837 dprintk(XENLOG_INFO VTDPREFIX,
1838 "setup_dom0_devices: bdf = %x:%x:%x\n",
1839 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1842 void clear_fault_bits(struct iommu *iommu)
1844 u64 val;
1846 val = dmar_readq(
1847 iommu->reg,
1848 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
1849 dmar_writeq(
1850 iommu->reg,
1851 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
1852 val);
1853 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
1856 static int init_vtd_hw(void)
1858 struct acpi_drhd_unit *drhd;
1859 struct iommu *iommu;
1860 struct iommu_flush *flush = NULL;
1861 int vector;
1862 int ret;
1864 for_each_drhd_unit ( drhd )
1866 iommu = drhd->iommu;
1867 ret = iommu_set_root_entry(iommu);
1868 if ( ret )
1870 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
1871 return -EIO;
1874 vector = iommu_set_interrupt(iommu);
1875 dma_msi_data_init(iommu, vector);
1876 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
1877 iommu->vector = vector;
1878 clear_fault_bits(iommu);
1879 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
1881 /* initialize flush functions */
1882 flush = iommu_get_flush(iommu);
1883 flush->context = flush_context_reg;
1884 flush->iotlb = flush_iotlb_reg;
1886 return 0;
1889 static int init_vtd2_hw(void)
1891 struct acpi_drhd_unit *drhd;
1892 struct iommu *iommu;
1894 for_each_drhd_unit ( drhd )
1896 iommu = drhd->iommu;
1897 if ( qinval_setup(iommu) != 0 )
1898 dprintk(XENLOG_ERR VTDPREFIX,
1899 "Queued Invalidation hardware not found\n");
1901 if ( intremap_setup(iommu) != 0 )
1902 dprintk(XENLOG_ERR VTDPREFIX,
1903 "Interrupt Remapping hardware not found\n");
1905 return 0;
1908 static int enable_vtd_translation(void)
1910 struct acpi_drhd_unit *drhd;
1911 struct iommu *iommu;
1913 for_each_drhd_unit ( drhd )
1915 iommu = drhd->iommu;
1916 if ( iommu_enable_translation(iommu) )
1917 return -EIO;
1919 return 0;
1922 static void setup_dom0_rmrr(void)
1924 struct acpi_rmrr_unit *rmrr;
1925 struct pci_dev *pdev;
1926 int ret;
1928 for_each_rmrr_device ( rmrr, pdev )
1929 ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
1930 if ( ret )
1931 gdprintk(XENLOG_ERR VTDPREFIX,
1932 "IOMMU: mapping reserved region failed\n");
1933 end_for_each_rmrr_device ( rmrr, pdev )
1936 int iommu_setup(void)
1938 struct hvm_iommu *hd = domain_hvm_iommu(dom0);
1939 struct acpi_drhd_unit *drhd;
1940 struct iommu *iommu;
1941 unsigned long i;
1943 if ( !vtd_enabled )
1944 return 0;
1946 spin_lock_init(&domid_bitmap_lock);
1947 INIT_LIST_HEAD(&hd->pdev_list);
1949 /* setup clflush size */
1950 x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
1952 /* Allocate IO page directory page for the domain. */
1953 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1954 iommu = drhd->iommu;
1956 /* Allocate domain id bitmap, and set bit 0 as reserved */
1957 domid_bitmap_size = cap_ndoms(iommu->cap);
1958 domid_bitmap = xmalloc_array(unsigned long,
1959 BITS_TO_LONGS(domid_bitmap_size));
1960 if ( domid_bitmap == NULL )
1961 goto error;
1962 memset(domid_bitmap, 0, domid_bitmap_size / 8);
1963 set_bit(0, domid_bitmap);
1965 /* setup 1:1 page table for dom0 */
1966 for ( i = 0; i < max_page; i++ )
1967 iommu_map_page(dom0, i, i);
1969 init_vtd_hw();
1970 setup_dom0_devices();
1971 setup_dom0_rmrr();
1972 iommu_flush_all();
1973 enable_vtd_translation();
1974 init_vtd2_hw();
1976 return 0;
1978 error:
1979 printk("iommu_setup() failed\n");
1980 for_each_drhd_unit ( drhd )
1982 iommu = drhd->iommu;
1983 free_iommu(iommu);
1985 return -EIO;
1988 /*
1989 * If the device isn't owned by dom0, it means it already
1990 * has been assigned to other domain, or it's not exist.
1991 */
1992 int device_assigned(u8 bus, u8 devfn)
1994 struct pci_dev *pdev;
1996 for_each_pdev( dom0, pdev )
1997 if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
1998 return 0;
2000 return 1;
2003 int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
2005 struct acpi_rmrr_unit *rmrr;
2006 struct pci_dev *pdev;
2007 int ret = 0;
2009 if ( list_empty(&acpi_drhd_units) )
2010 return ret;
2012 gdprintk(XENLOG_INFO VTDPREFIX,
2013 "assign_device: bus = %x dev = %x func = %x\n",
2014 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2016 reassign_device_ownership(dom0, d, bus, devfn);
2018 /* Setup rmrr identify mapping */
2019 for_each_rmrr_device( rmrr, pdev )
2020 if ( pdev->bus == bus && pdev->devfn == devfn )
2022 /* FIXME: Because USB RMRR conflicts with guest bios region,
2023 * ignore USB RMRR temporarily.
2024 */
2025 if ( is_usb_device(pdev) )
2026 return 0;
2028 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
2029 if ( ret )
2031 gdprintk(XENLOG_ERR VTDPREFIX,
2032 "IOMMU: mapping reserved region failed\n");
2033 return ret;
2036 end_for_each_rmrr_device(rmrr, pdev)
2038 return ret;
2041 void iommu_set_pgd(struct domain *d)
2043 struct hvm_iommu *hd = domain_hvm_iommu(d);
2044 unsigned long p2m_table;
2046 if ( hd->pgd )
2048 gdprintk(XENLOG_INFO VTDPREFIX,
2049 "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
2050 hd->pgd = NULL;
2052 p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
2054 #if CONFIG_PAGING_LEVELS == 3
2055 if ( !hd->pgd )
2057 int level = agaw_to_level(hd->agaw);
2058 struct dma_pte *pmd = NULL;
2059 struct dma_pte *pgd = NULL;
2060 struct dma_pte *pte = NULL;
2061 l3_pgentry_t *l3e;
2062 unsigned long flags;
2063 int i;
2065 spin_lock_irqsave(&hd->mapping_lock, flags);
2066 if ( !hd->pgd )
2068 pgd = (struct dma_pte *)alloc_xenheap_page();
2069 if ( !pgd )
2071 spin_unlock_irqrestore(&hd->mapping_lock, flags);
2072 gdprintk(XENLOG_ERR VTDPREFIX,
2073 "Allocate pgd memory failed!\n");
2074 return;
2076 memset(pgd, 0, PAGE_SIZE);
2077 hd->pgd = pgd;
2080 l3e = map_domain_page(p2m_table);
2081 switch ( level )
2083 case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */
2084 /* We only support 8 entries for the PAE L3 p2m table */
2085 for ( i = 0; i < 8 ; i++ )
2087 /* Don't create new L2 entry, use ones from p2m table */
2088 pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
2090 break;
2092 case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */
2093 /* We allocate one more page for the top vtd page table. */
2094 pmd = (struct dma_pte *)alloc_xenheap_page();
2095 if ( !pmd )
2097 unmap_domain_page(l3e);
2098 spin_unlock_irqrestore(&hd->mapping_lock, flags);
2099 gdprintk(XENLOG_ERR VTDPREFIX,
2100 "Allocate pmd memory failed!\n");
2101 return;
2103 memset((u8*)pmd, 0, PAGE_SIZE);
2104 pte = &pgd[0];
2105 dma_set_pte_addr(*pte, virt_to_maddr(pmd));
2106 dma_set_pte_readable(*pte);
2107 dma_set_pte_writable(*pte);
2109 for ( i = 0; i < 8; i++ )
2111 /* Don't create new L2 entry, use ones from p2m table */
2112 pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
2114 break;
2115 default:
2116 gdprintk(XENLOG_ERR VTDPREFIX,
2117 "iommu_set_pgd:Unsupported p2m table sharing level!\n");
2118 break;
2120 unmap_domain_page(l3e);
2121 spin_unlock_irqrestore(&hd->mapping_lock, flags);
2123 #elif CONFIG_PAGING_LEVELS == 4
2124 if ( !hd->pgd )
2126 int level = agaw_to_level(hd->agaw);
2127 l3_pgentry_t *l3e;
2128 mfn_t pgd_mfn;
2130 switch ( level )
2132 case VTD_PAGE_TABLE_LEVEL_3:
2133 l3e = map_domain_page(p2m_table);
2134 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
2136 gdprintk(XENLOG_ERR VTDPREFIX,
2137 "iommu_set_pgd: second level wasn't there\n");
2138 unmap_domain_page(l3e);
2139 return;
2141 pgd_mfn = _mfn(l3e_get_pfn(*l3e));
2142 unmap_domain_page(l3e);
2143 hd->pgd = maddr_to_virt(pagetable_get_paddr(
2144 pagetable_from_mfn(pgd_mfn)));
2145 break;
2147 case VTD_PAGE_TABLE_LEVEL_4:
2148 pgd_mfn = _mfn(p2m_table);
2149 hd->pgd = maddr_to_virt(pagetable_get_paddr(
2150 pagetable_from_mfn(pgd_mfn)));
2151 break;
2152 default:
2153 gdprintk(XENLOG_ERR VTDPREFIX,
2154 "iommu_set_pgd:Unsupported p2m table sharing level!\n");
2155 break;
2158 #endif
2159 gdprintk(XENLOG_INFO VTDPREFIX,
2160 "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
2164 u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
2165 int iommu_suspend(void)
2167 struct acpi_drhd_unit *drhd;
2168 struct iommu *iommu;
2169 int i = 0;
2171 iommu_flush_all();
2173 for_each_drhd_unit ( drhd )
2175 iommu = drhd->iommu;
2176 iommu_state[DMAR_RTADDR_REG * i] =
2177 (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
2178 iommu_state[DMAR_FECTL_REG * i] =
2179 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
2180 iommu_state[DMAR_FEDATA_REG * i] =
2181 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
2182 iommu_state[DMAR_FEADDR_REG * i] =
2183 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
2184 iommu_state[DMAR_FEUADDR_REG * i] =
2185 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
2186 iommu_state[DMAR_PLMBASE_REG * i] =
2187 (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
2188 iommu_state[DMAR_PLMLIMIT_REG * i] =
2189 (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
2190 iommu_state[DMAR_PHMBASE_REG * i] =
2191 (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
2192 iommu_state[DMAR_PHMLIMIT_REG * i] =
2193 (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
2194 i++;
2197 return 0;
2200 int iommu_resume(void)
2202 struct acpi_drhd_unit *drhd;
2203 struct iommu *iommu;
2204 int i = 0;
2206 iommu_flush_all();
2208 init_vtd_hw();
2209 for_each_drhd_unit ( drhd )
2211 iommu = drhd->iommu;
2212 dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
2213 (u64) iommu_state[DMAR_RTADDR_REG * i]);
2214 dmar_writel(iommu->reg, DMAR_FECTL_REG,
2215 (u32) iommu_state[DMAR_FECTL_REG * i]);
2216 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
2217 (u32) iommu_state[DMAR_FEDATA_REG * i]);
2218 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
2219 (u32) iommu_state[DMAR_FEADDR_REG * i]);
2220 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
2221 (u32) iommu_state[DMAR_FEUADDR_REG * i]);
2222 dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
2223 (u32) iommu_state[DMAR_PLMBASE_REG * i]);
2224 dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
2225 (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
2226 dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
2227 (u64) iommu_state[DMAR_PHMBASE_REG * i]);
2228 dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
2229 (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
2231 if ( iommu_enable_translation(iommu) )
2232 return -EIO;
2233 i++;
2235 return 0;
2238 struct iommu_ops intel_iommu_ops = {
2239 .init = intel_iommu_domain_init,
2240 .assign_device = intel_iommu_assign_device,
2241 .teardown = iommu_domain_teardown,
2242 .map_page = intel_iommu_map_page,
2243 .unmap_page = intel_iommu_unmap_page,
2244 .reassign_device = reassign_device_ownership,
2245 };
2247 /*
2248 * Local variables:
2249 * mode: C
2250 * c-set-style: "BSD"
2251 * c-basic-offset: 4
2252 * tab-width: 4
2253 * indent-tabs-mode: nil
2254 * End:
2255 */