ia64/xen-unstable

view xen/drivers/passthrough/vtd/iommu.c @ 18659:d752eaa7c1db

vtd: make the xen_in_range/tboot_in_range checkings also work for IA64.

Signed-off-by: Anthony Xu <anthony.xu@intel.com>
Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Oct 20 15:14:55 2008 +0100 (2008-10-20)
parents e57ca7937ae8
children 2a25fd94c6f2
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/irq.h>
23 #include <xen/sched.h>
24 #include <xen/xmalloc.h>
25 #include <xen/domain_page.h>
26 #include <xen/iommu.h>
27 #include <asm/hvm/iommu.h>
28 #include <xen/numa.h>
29 #include <xen/time.h>
30 #include <xen/pci.h>
31 #include <xen/pci_regs.h>
32 #include <xen/keyhandler.h>
33 #include "iommu.h"
34 #include "dmar.h"
35 #include "extern.h"
36 #include "vtd.h"
38 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
40 static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
41 static int domid_bitmap_size; /* domain id bitmap size in bits */
42 static unsigned long *domid_bitmap; /* iommu domain id bitmap */
44 static void setup_dom0_devices(struct domain *d);
45 static void setup_dom0_rmrr(struct domain *d);
47 #define DID_FIELD_WIDTH 16
48 #define DID_HIGH_OFFSET 8
49 static void context_set_domain_id(struct context_entry *context,
50 struct domain *d)
51 {
52 unsigned long flags;
53 domid_t iommu_domid = domain_iommu_domid(d);
55 if ( iommu_domid == 0 )
56 {
57 spin_lock_irqsave(&domid_bitmap_lock, flags);
58 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
59 set_bit(iommu_domid, domid_bitmap);
60 spin_unlock_irqrestore(&domid_bitmap_lock, flags);
61 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
62 }
64 context->hi &= (1 << DID_HIGH_OFFSET) - 1;
65 context->hi |= iommu_domid << DID_HIGH_OFFSET;
66 }
68 static void iommu_domid_release(struct domain *d)
69 {
70 domid_t iommu_domid = domain_iommu_domid(d);
72 if ( iommu_domid != 0 )
73 {
74 d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
75 clear_bit(iommu_domid, domid_bitmap);
76 }
77 }
79 static struct intel_iommu *alloc_intel_iommu(void)
80 {
81 struct intel_iommu *intel;
83 intel = xmalloc(struct intel_iommu);
84 if ( intel == NULL )
85 return NULL;
86 memset(intel, 0, sizeof(struct intel_iommu));
88 spin_lock_init(&intel->qi_ctrl.qinval_lock);
89 spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
90 spin_lock_init(&intel->ir_ctrl.iremap_lock);
92 return intel;
93 }
95 static void free_intel_iommu(struct intel_iommu *intel)
96 {
97 xfree(intel);
98 }
100 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
101 {
102 return iommu ? &iommu->intel->qi_ctrl : NULL;
103 }
105 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
106 {
107 return iommu ? &iommu->intel->ir_ctrl : NULL;
108 }
110 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
111 {
112 return iommu ? &iommu->intel->flush : NULL;
113 }
115 static unsigned int clflush_size;
116 static int iommus_incoherent;
117 static void __iommu_flush_cache(void *addr, int size)
118 {
119 int i;
121 if ( !iommus_incoherent )
122 return;
124 for ( i = 0; i < size; i += clflush_size )
125 cacheline_flush((char *)addr + i);
126 }
128 void iommu_flush_cache_entry(void *addr)
129 {
130 __iommu_flush_cache(addr, 8);
131 }
133 void iommu_flush_cache_page(void *addr)
134 {
135 __iommu_flush_cache(addr, PAGE_SIZE_4K);
136 }
138 int nr_iommus;
139 /* context entry handling */
140 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
141 {
142 struct root_entry *root, *root_entries;
143 unsigned long flags;
144 u64 maddr;
146 spin_lock_irqsave(&iommu->lock, flags);
147 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
148 root = &root_entries[bus];
149 if ( !root_present(*root) )
150 {
151 maddr = alloc_pgtable_maddr();
152 if ( maddr == 0 )
153 {
154 unmap_vtd_domain_page(root_entries);
155 spin_unlock_irqrestore(&iommu->lock, flags);
156 return 0;
157 }
158 set_root_value(*root, maddr);
159 set_root_present(*root);
160 iommu_flush_cache_entry(root);
161 }
162 maddr = (u64) get_context_addr(*root);
163 unmap_vtd_domain_page(root_entries);
164 spin_unlock_irqrestore(&iommu->lock, flags);
165 return maddr;
166 }
168 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
169 {
170 struct root_entry *root, *root_entries;
171 struct context_entry *context;
172 u64 context_maddr;
173 int ret;
174 unsigned long flags;
176 spin_lock_irqsave(&iommu->lock, flags);
177 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
178 root = &root_entries[bus];
179 if ( !root_present(*root) )
180 {
181 ret = 0;
182 goto out;
183 }
184 context_maddr = get_context_addr(*root);
185 context = (struct context_entry *)map_vtd_domain_page(context_maddr);
186 ret = context_present(context[devfn]);
187 unmap_vtd_domain_page(context);
188 out:
189 unmap_vtd_domain_page(root_entries);
190 spin_unlock_irqrestore(&iommu->lock, flags);
191 return ret;
192 }
194 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
195 {
196 struct hvm_iommu *hd = domain_hvm_iommu(domain);
197 int addr_width = agaw_to_width(hd->agaw);
198 struct dma_pte *parent, *pte = NULL;
199 int level = agaw_to_level(hd->agaw);
200 int offset;
201 unsigned long flags;
202 u64 pte_maddr = 0, maddr;
203 u64 *vaddr = NULL;
205 addr &= (((u64)1) << addr_width) - 1;
206 spin_lock_irqsave(&hd->mapping_lock, flags);
207 if ( hd->pgd_maddr == 0 )
208 if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) )
209 goto out;
211 parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
212 while ( level > 1 )
213 {
214 offset = address_level_offset(addr, level);
215 pte = &parent[offset];
217 if ( dma_pte_addr(*pte) == 0 )
218 {
219 if ( !alloc )
220 break;
221 maddr = alloc_pgtable_maddr();
222 dma_set_pte_addr(*pte, maddr);
223 vaddr = map_vtd_domain_page(maddr);
224 if ( !vaddr )
225 break;
227 /*
228 * high level table always sets r/w, last level
229 * page table control read/write
230 */
231 dma_set_pte_readable(*pte);
232 dma_set_pte_writable(*pte);
233 iommu_flush_cache_entry(pte);
234 }
235 else
236 {
237 vaddr = map_vtd_domain_page(pte->val);
238 if ( !vaddr )
239 break;
240 }
242 if ( level == 2 )
243 {
244 pte_maddr = pte->val & PAGE_MASK_4K;
245 unmap_vtd_domain_page(vaddr);
246 break;
247 }
249 unmap_vtd_domain_page(parent);
250 parent = (struct dma_pte *)vaddr;
251 vaddr = NULL;
252 level--;
253 }
255 unmap_vtd_domain_page(parent);
256 out:
257 spin_unlock_irqrestore(&hd->mapping_lock, flags);
258 return pte_maddr;
259 }
261 static void iommu_flush_write_buffer(struct iommu *iommu)
262 {
263 u32 val;
264 unsigned long flag;
265 s_time_t start_time;
267 if ( !cap_rwbf(iommu->cap) )
268 return;
269 val = iommu->gcmd | DMA_GCMD_WBF;
271 spin_lock_irqsave(&iommu->register_lock, flag);
272 dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
274 /* Make sure hardware complete it */
275 start_time = NOW();
276 for ( ; ; )
277 {
278 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
279 if ( !(val & DMA_GSTS_WBFS) )
280 break;
281 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
282 panic("%s: DMAR hardware is malfunctional,"
283 " please disable IOMMU\n", __func__);
284 cpu_relax();
285 }
286 spin_unlock_irqrestore(&iommu->register_lock, flag);
287 }
289 /* return value determine if we need a write buffer flush */
290 static int flush_context_reg(
291 void *_iommu,
292 u16 did, u16 source_id, u8 function_mask, u64 type,
293 int non_present_entry_flush)
294 {
295 struct iommu *iommu = (struct iommu *) _iommu;
296 u64 val = 0;
297 unsigned long flag;
298 s_time_t start_time;
300 /*
301 * In the non-present entry flush case, if hardware doesn't cache
302 * non-present entry we do nothing and if hardware cache non-present
303 * entry, we flush entries of domain 0 (the domain id is used to cache
304 * any non-present entries)
305 */
306 if ( non_present_entry_flush )
307 {
308 if ( !cap_caching_mode(iommu->cap) )
309 return 1;
310 else
311 did = 0;
312 }
314 /* use register invalidation */
315 switch ( type )
316 {
317 case DMA_CCMD_GLOBAL_INVL:
318 val = DMA_CCMD_GLOBAL_INVL;
319 break;
320 case DMA_CCMD_DOMAIN_INVL:
321 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
322 break;
323 case DMA_CCMD_DEVICE_INVL:
324 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
325 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
326 break;
327 default:
328 BUG();
329 }
330 val |= DMA_CCMD_ICC;
332 spin_lock_irqsave(&iommu->register_lock, flag);
333 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
335 /* Make sure hardware complete it */
336 start_time = NOW();
337 for ( ; ; )
338 {
339 val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
340 if ( !(val & DMA_CCMD_ICC) )
341 break;
342 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
343 panic("%s: DMAR hardware is malfunctional,"
344 " please disable IOMMU\n", __func__);
345 cpu_relax();
346 }
347 spin_unlock_irqrestore(&iommu->register_lock, flag);
348 /* flush context entry will implicitly flush write buffer */
349 return 0;
350 }
352 static int inline iommu_flush_context_global(
353 struct iommu *iommu, int non_present_entry_flush)
354 {
355 struct iommu_flush *flush = iommu_get_flush(iommu);
356 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
357 non_present_entry_flush);
358 }
360 static int inline iommu_flush_context_domain(
361 struct iommu *iommu, u16 did, int non_present_entry_flush)
362 {
363 struct iommu_flush *flush = iommu_get_flush(iommu);
364 return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
365 non_present_entry_flush);
366 }
368 static int inline iommu_flush_context_device(
369 struct iommu *iommu, u16 did, u16 source_id,
370 u8 function_mask, int non_present_entry_flush)
371 {
372 struct iommu_flush *flush = iommu_get_flush(iommu);
373 return flush->context(iommu, did, source_id, function_mask,
374 DMA_CCMD_DEVICE_INVL,
375 non_present_entry_flush);
376 }
378 /* return value determine if we need a write buffer flush */
379 static int flush_iotlb_reg(void *_iommu, u16 did,
380 u64 addr, unsigned int size_order, u64 type,
381 int non_present_entry_flush)
382 {
383 struct iommu *iommu = (struct iommu *) _iommu;
384 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
385 u64 val = 0, val_iva = 0;
386 unsigned long flag;
387 s_time_t start_time;
389 /*
390 * In the non-present entry flush case, if hardware doesn't cache
391 * non-present entry we do nothing and if hardware cache non-present
392 * entry, we flush entries of domain 0 (the domain id is used to cache
393 * any non-present entries)
394 */
395 if ( non_present_entry_flush )
396 {
397 if ( !cap_caching_mode(iommu->cap) )
398 return 1;
399 else
400 did = 0;
401 }
403 /* use register invalidation */
404 switch ( type )
405 {
406 case DMA_TLB_GLOBAL_FLUSH:
407 /* global flush doesn't need set IVA_REG */
408 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
409 break;
410 case DMA_TLB_DSI_FLUSH:
411 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
412 break;
413 case DMA_TLB_PSI_FLUSH:
414 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
415 /* Note: always flush non-leaf currently */
416 val_iva = size_order | addr;
417 break;
418 default:
419 BUG();
420 }
421 /* Note: set drain read/write */
422 if ( cap_read_drain(iommu->cap) )
423 val |= DMA_TLB_READ_DRAIN;
424 if ( cap_write_drain(iommu->cap) )
425 val |= DMA_TLB_WRITE_DRAIN;
427 spin_lock_irqsave(&iommu->register_lock, flag);
428 /* Note: Only uses first TLB reg currently */
429 if ( val_iva )
430 dmar_writeq(iommu->reg, tlb_offset, val_iva);
431 dmar_writeq(iommu->reg, tlb_offset + 8, val);
433 /* Make sure hardware complete it */
434 start_time = NOW();
435 for ( ; ; )
436 {
437 val = dmar_readq(iommu->reg, tlb_offset + 8);
438 if ( !(val & DMA_TLB_IVT) )
439 break;
440 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
441 panic("%s: DMAR hardware is malfunctional,"
442 " please disable IOMMU\n", __func__);
443 cpu_relax();
444 }
445 spin_unlock_irqrestore(&iommu->register_lock, flag);
447 /* check IOTLB invalidation granularity */
448 if ( DMA_TLB_IAIG(val) == 0 )
449 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: flush IOTLB failed\n");
451 if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
452 dprintk(XENLOG_INFO VTDPREFIX,
453 "IOMMU: tlb flush request %x, actual %x\n",
454 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
455 /* flush iotlb entry will implicitly flush write buffer */
456 return 0;
457 }
459 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
460 int non_present_entry_flush)
461 {
462 struct iommu_flush *flush = iommu_get_flush(iommu);
463 return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
464 non_present_entry_flush);
465 }
467 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
468 int non_present_entry_flush)
469 {
470 struct iommu_flush *flush = iommu_get_flush(iommu);
471 return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
472 non_present_entry_flush);
473 }
475 static int inline get_alignment(u64 base, unsigned int size)
476 {
477 int t = 0;
478 u64 end;
480 end = base + size - 1;
481 while ( base != end )
482 {
483 t++;
484 base >>= 1;
485 end >>= 1;
486 }
487 return t;
488 }
490 static int inline iommu_flush_iotlb_psi(
491 struct iommu *iommu, u16 did,
492 u64 addr, unsigned int pages, int non_present_entry_flush)
493 {
494 unsigned int align;
495 struct iommu_flush *flush = iommu_get_flush(iommu);
497 ASSERT(!(addr & (~PAGE_MASK_4K)));
498 ASSERT(pages > 0);
500 /* Fallback to domain selective flush if no PSI support */
501 if ( !cap_pgsel_inv(iommu->cap) )
502 return iommu_flush_iotlb_dsi(iommu, did,
503 non_present_entry_flush);
505 /*
506 * PSI requires page size is 2 ^ x, and the base address is naturally
507 * aligned to the size
508 */
509 align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
510 /* Fallback to domain selective flush if size is too big */
511 if ( align > cap_max_amask_val(iommu->cap) )
512 return iommu_flush_iotlb_dsi(iommu, did,
513 non_present_entry_flush);
515 addr >>= PAGE_SHIFT_4K + align;
516 addr <<= PAGE_SHIFT_4K + align;
518 return flush->iotlb(iommu, did, addr, align,
519 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
520 }
522 void iommu_flush_all(void)
523 {
524 struct acpi_drhd_unit *drhd;
525 struct iommu *iommu;
527 flush_all_cache();
528 for_each_drhd_unit ( drhd )
529 {
530 iommu = drhd->iommu;
531 iommu_flush_context_global(iommu, 0);
532 iommu_flush_iotlb_global(iommu, 0);
533 }
534 }
536 /* clear one page's page table */
537 static void dma_pte_clear_one(struct domain *domain, u64 addr)
538 {
539 struct hvm_iommu *hd = domain_hvm_iommu(domain);
540 struct acpi_drhd_unit *drhd;
541 struct iommu *iommu;
542 struct dma_pte *page = NULL, *pte = NULL;
543 u64 pg_maddr;
545 /* get last level pte */
546 pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
547 if ( pg_maddr == 0 )
548 return;
549 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
550 pte = page + address_level_offset(addr, 1);
552 if ( !dma_pte_present(*pte) )
553 {
554 unmap_vtd_domain_page(page);
555 return;
556 }
558 dma_clear_pte(*pte);
559 iommu_flush_cache_entry(pte);
561 for_each_drhd_unit ( drhd )
562 {
563 iommu = drhd->iommu;
564 if ( test_bit(iommu->index, &hd->iommu_bitmap) )
565 if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
566 addr, 1, 0))
567 iommu_flush_write_buffer(iommu);
568 }
570 unmap_vtd_domain_page(page);
571 }
573 static void iommu_free_pagetable(u64 pt_maddr, int level)
574 {
575 int i;
576 struct dma_pte *pt_vaddr, *pte;
577 int next_level = level - 1;
579 if ( pt_maddr == 0 )
580 return;
582 pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
584 for ( i = 0; i < PTE_NUM; i++ )
585 {
586 pte = &pt_vaddr[i];
587 if ( !dma_pte_present(*pte) )
588 continue;
590 if ( next_level >= 1 )
591 iommu_free_pagetable(dma_pte_addr(*pte), next_level);
593 dma_clear_pte(*pte);
594 iommu_flush_cache_entry(pte);
595 }
597 unmap_vtd_domain_page(pt_vaddr);
598 free_pgtable_maddr(pt_maddr);
599 }
601 static int iommu_set_root_entry(struct iommu *iommu)
602 {
603 u32 cmd, sts;
604 unsigned long flags;
605 s_time_t start_time;
607 spin_lock_irqsave(&iommu->register_lock, flags);
609 if ( iommu->root_maddr == 0 )
610 iommu->root_maddr = alloc_pgtable_maddr();
611 if ( iommu->root_maddr == 0 )
612 {
613 spin_unlock_irqrestore(&iommu->register_lock, flags);
614 return -ENOMEM;
615 }
617 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
618 cmd = iommu->gcmd | DMA_GCMD_SRTP;
619 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
621 /* Make sure hardware complete it */
622 start_time = NOW();
623 for ( ; ; )
624 {
625 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
626 if ( sts & DMA_GSTS_RTPS )
627 break;
628 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
629 panic("%s: DMAR hardware is malfunctional,"
630 " please disable IOMMU\n", __func__);
631 cpu_relax();
632 }
634 spin_unlock_irqrestore(&iommu->register_lock, flags);
636 return 0;
637 }
639 static int iommu_enable_translation(struct iommu *iommu)
640 {
641 u32 sts;
642 unsigned long flags;
643 s_time_t start_time;
645 dprintk(XENLOG_INFO VTDPREFIX,
646 "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
647 spin_lock_irqsave(&iommu->register_lock, flags);
648 iommu->gcmd |= DMA_GCMD_TE;
649 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
650 /* Make sure hardware complete it */
651 start_time = NOW();
652 for ( ; ; )
653 {
654 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
655 if ( sts & DMA_GSTS_TES )
656 break;
657 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
658 panic("%s: DMAR hardware is malfunctional,"
659 " please disable IOMMU\n", __func__);
660 cpu_relax();
661 }
663 /* Disable PMRs when VT-d engine takes effect per spec definition */
664 disable_pmr(iommu);
665 spin_unlock_irqrestore(&iommu->register_lock, flags);
666 return 0;
667 }
669 int iommu_disable_translation(struct iommu *iommu)
670 {
671 u32 sts;
672 unsigned long flags;
673 s_time_t start_time;
675 spin_lock_irqsave(&iommu->register_lock, flags);
676 iommu->gcmd &= ~ DMA_GCMD_TE;
677 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
679 /* Make sure hardware complete it */
680 start_time = NOW();
681 for ( ; ; )
682 {
683 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
684 if ( !(sts & DMA_GSTS_TES) )
685 break;
686 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
687 panic("%s: DMAR hardware is malfunctional,"
688 " please disable IOMMU\n", __func__);
689 cpu_relax();
690 }
691 spin_unlock_irqrestore(&iommu->register_lock, flags);
692 return 0;
693 }
695 static struct iommu *vector_to_iommu[NR_VECTORS];
696 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
697 u8 fault_reason, u16 source_id, u64 addr)
698 {
699 dprintk(XENLOG_WARNING VTDPREFIX,
700 "iommu_fault:%s: %x:%x.%x addr %"PRIx64" REASON %x "
701 "iommu->reg = %p\n",
702 (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
703 PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
704 fault_reason, iommu->reg);
706 #ifndef __i386__ /* map_domain_page() cannot be used in this context */
707 if ( fault_reason < 0x20 )
708 print_vtd_entries(iommu, (source_id >> 8),
709 (source_id & 0xff), (addr >> PAGE_SHIFT));
710 #endif
712 return 0;
713 }
715 static void iommu_fault_status(u32 fault_status)
716 {
717 if ( fault_status & DMA_FSTS_PFO )
718 dprintk(XENLOG_ERR VTDPREFIX,
719 "iommu_fault_status: Fault Overflow\n");
720 else if ( fault_status & DMA_FSTS_PPF )
721 dprintk(XENLOG_ERR VTDPREFIX,
722 "iommu_fault_status: Primary Pending Fault\n");
723 else if ( fault_status & DMA_FSTS_AFO )
724 dprintk(XENLOG_ERR VTDPREFIX,
725 "iommu_fault_status: Advanced Fault Overflow\n");
726 else if ( fault_status & DMA_FSTS_APF )
727 dprintk(XENLOG_ERR VTDPREFIX,
728 "iommu_fault_status: Advanced Pending Fault\n");
729 else if ( fault_status & DMA_FSTS_IQE )
730 dprintk(XENLOG_ERR VTDPREFIX,
731 "iommu_fault_status: Invalidation Queue Error\n");
732 else if ( fault_status & DMA_FSTS_ICE )
733 dprintk(XENLOG_ERR VTDPREFIX,
734 "iommu_fault_status: Invalidation Completion Error\n");
735 else if ( fault_status & DMA_FSTS_ITE )
736 dprintk(XENLOG_ERR VTDPREFIX,
737 "iommu_fault_status: Invalidation Time-out Error\n");
738 }
740 #define PRIMARY_FAULT_REG_LEN (16)
741 static void iommu_page_fault(int vector, void *dev_id,
742 struct cpu_user_regs *regs)
743 {
744 struct iommu *iommu = dev_id;
745 int reg, fault_index;
746 u32 fault_status;
747 unsigned long flags;
749 dprintk(XENLOG_WARNING VTDPREFIX,
750 "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
752 spin_lock_irqsave(&iommu->register_lock, flags);
753 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
754 spin_unlock_irqrestore(&iommu->register_lock, flags);
756 iommu_fault_status(fault_status);
758 /* FIXME: ignore advanced fault log */
759 if ( !(fault_status & DMA_FSTS_PPF) )
760 return;
761 fault_index = dma_fsts_fault_record_index(fault_status);
762 reg = cap_fault_reg_offset(iommu->cap);
763 for ( ; ; )
764 {
765 u8 fault_reason;
766 u16 source_id;
767 u32 data;
768 u64 guest_addr;
769 int type;
771 /* highest 32 bits */
772 spin_lock_irqsave(&iommu->register_lock, flags);
773 data = dmar_readl(iommu->reg, reg +
774 fault_index * PRIMARY_FAULT_REG_LEN + 12);
775 if ( !(data & DMA_FRCD_F) )
776 {
777 spin_unlock_irqrestore(&iommu->register_lock, flags);
778 break;
779 }
781 fault_reason = dma_frcd_fault_reason(data);
782 type = dma_frcd_type(data);
784 data = dmar_readl(iommu->reg, reg +
785 fault_index * PRIMARY_FAULT_REG_LEN + 8);
786 source_id = dma_frcd_source_id(data);
788 guest_addr = dmar_readq(iommu->reg, reg +
789 fault_index * PRIMARY_FAULT_REG_LEN);
790 guest_addr = dma_frcd_page_addr(guest_addr);
791 /* clear the fault */
792 dmar_writel(iommu->reg, reg +
793 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
794 spin_unlock_irqrestore(&iommu->register_lock, flags);
796 iommu_page_fault_do_one(iommu, type, fault_reason,
797 source_id, guest_addr);
799 fault_index++;
800 if ( fault_index > cap_num_fault_regs(iommu->cap) )
801 fault_index = 0;
802 }
804 /* clear primary fault overflow */
805 if ( fault_status & DMA_FSTS_PFO )
806 {
807 spin_lock_irqsave(&iommu->register_lock, flags);
808 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
809 spin_unlock_irqrestore(&iommu->register_lock, flags);
810 }
811 }
813 static void dma_msi_unmask(unsigned int vector)
814 {
815 struct iommu *iommu = vector_to_iommu[vector];
816 unsigned long flags;
818 /* unmask it */
819 spin_lock_irqsave(&iommu->register_lock, flags);
820 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
821 spin_unlock_irqrestore(&iommu->register_lock, flags);
822 }
824 static void dma_msi_mask(unsigned int vector)
825 {
826 unsigned long flags;
827 struct iommu *iommu = vector_to_iommu[vector];
829 /* mask it */
830 spin_lock_irqsave(&iommu->register_lock, flags);
831 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
832 spin_unlock_irqrestore(&iommu->register_lock, flags);
833 }
835 static unsigned int dma_msi_startup(unsigned int vector)
836 {
837 dma_msi_unmask(vector);
838 return 0;
839 }
841 static void dma_msi_end(unsigned int vector)
842 {
843 dma_msi_unmask(vector);
844 ack_APIC_irq();
845 }
847 static void dma_msi_data_init(struct iommu *iommu, int vector)
848 {
849 u32 msi_data = 0;
850 unsigned long flags;
852 /* Fixed, edge, assert mode. Follow MSI setting */
853 msi_data |= vector & 0xff;
854 msi_data |= 1 << 14;
856 spin_lock_irqsave(&iommu->register_lock, flags);
857 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
858 spin_unlock_irqrestore(&iommu->register_lock, flags);
859 }
861 #ifdef SUPPORT_MSI_REMAPPING
862 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
863 {
864 u64 msi_address;
865 unsigned long flags;
867 /* Physical, dedicated cpu. Follow MSI setting */
868 msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
869 msi_address |= MSI_PHYSICAL_MODE << 2;
870 msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
871 msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
873 spin_lock_irqsave(&iommu->register_lock, flags);
874 dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
875 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
876 spin_unlock_irqrestore(&iommu->register_lock, flags);
877 }
878 #else
879 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
880 {
881 /* ia64: TODO */
882 }
883 #endif
885 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
886 {
887 struct iommu *iommu = vector_to_iommu[vector];
888 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
889 }
891 static struct hw_interrupt_type dma_msi_type = {
892 .typename = "DMA_MSI",
893 .startup = dma_msi_startup,
894 .shutdown = dma_msi_mask,
895 .enable = dma_msi_unmask,
896 .disable = dma_msi_mask,
897 .ack = dma_msi_mask,
898 .end = dma_msi_end,
899 .set_affinity = dma_msi_set_affinity,
900 };
902 int iommu_set_interrupt(struct iommu *iommu)
903 {
904 int vector, ret;
906 vector = assign_irq_vector(AUTO_ASSIGN);
907 vector_to_iommu[vector] = iommu;
909 /* VT-d fault is a MSI, make irq == vector */
910 irq_vector[vector] = vector;
911 vector_irq[vector] = vector;
913 if ( !vector )
914 {
915 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
916 return -EINVAL;
917 }
919 irq_desc[vector].handler = &dma_msi_type;
920 ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
921 if ( ret )
922 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
923 return vector;
924 }
926 static int iommu_alloc(struct acpi_drhd_unit *drhd)
927 {
928 struct iommu *iommu;
929 unsigned long sagaw;
930 int agaw;
932 if ( nr_iommus > MAX_IOMMUS )
933 {
934 gdprintk(XENLOG_ERR VTDPREFIX,
935 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
936 return -ENOMEM;
937 }
939 iommu = xmalloc(struct iommu);
940 if ( iommu == NULL )
941 return -ENOMEM;
942 memset(iommu, 0, sizeof(struct iommu));
944 iommu->intel = alloc_intel_iommu();
945 if ( iommu->intel == NULL )
946 {
947 xfree(iommu);
948 return -ENOMEM;
949 }
951 iommu->reg = map_to_nocache_virt(nr_iommus, drhd->address);
952 iommu->index = nr_iommus++;
954 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
955 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
957 /* Calculate number of pagetable levels: between 2 and 4. */
958 sagaw = cap_sagaw(iommu->cap);
959 for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
960 if ( test_bit(agaw, &sagaw) )
961 break;
962 if ( agaw < 0 )
963 {
964 gdprintk(XENLOG_ERR VTDPREFIX,
965 "IOMMU: unsupported sagaw %lx\n", sagaw);
966 xfree(iommu);
967 return -ENODEV;
968 }
969 iommu->nr_pt_levels = agaw_to_level(agaw);
971 if ( !ecap_coherent(iommu->ecap) )
972 iommus_incoherent = 1;
974 spin_lock_init(&iommu->lock);
975 spin_lock_init(&iommu->register_lock);
977 drhd->iommu = iommu;
978 return 0;
979 }
981 static void iommu_free(struct acpi_drhd_unit *drhd)
982 {
983 struct iommu *iommu = drhd->iommu;
985 if ( iommu == NULL )
986 return;
988 if ( iommu->root_maddr != 0 )
989 {
990 free_pgtable_maddr(iommu->root_maddr);
991 iommu->root_maddr = 0;
992 }
994 if ( iommu->reg )
995 iounmap(iommu->reg);
997 free_intel_iommu(iommu->intel);
998 free_irq(iommu->vector);
999 xfree(iommu);
1001 drhd->iommu = NULL;
1004 #define guestwidth_to_adjustwidth(gaw) ({ \
1005 int agaw, r = (gaw - 12) % 9; \
1006 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
1007 if ( agaw > 64 ) \
1008 agaw = 64; \
1009 agaw; })
1011 static int intel_iommu_domain_init(struct domain *d)
1013 struct hvm_iommu *hd = domain_hvm_iommu(d);
1014 struct iommu *iommu = NULL;
1015 u64 i, j, tmp;
1016 struct acpi_drhd_unit *drhd;
1018 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1019 iommu = drhd->iommu;
1021 hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
1023 if ( d->domain_id == 0 )
1025 extern int xen_in_range(paddr_t start, paddr_t end);
1026 extern int tboot_in_range(paddr_t start, paddr_t end);
1028 /*
1029 * Set up 1:1 page table for dom0 except the critical segments
1030 * like Xen and tboot.
1031 */
1032 for ( i = 0; i < max_page; i++ )
1034 if ( xen_in_range(i << PAGE_SHIFT, (i + 1) << PAGE_SHIFT) ||
1035 tboot_in_range(i << PAGE_SHIFT, (i + 1) << PAGE_SHIFT) )
1036 continue;
1038 tmp = 1 << (PAGE_SHIFT - PAGE_SHIFT_4K);
1039 for ( j = 0; j < tmp; j++ )
1040 iommu_map_page(d, (i*tmp+j), (i*tmp+j));
1043 setup_dom0_devices(d);
1044 setup_dom0_rmrr(d);
1046 iommu_flush_all();
1048 for_each_drhd_unit ( drhd )
1050 iommu = drhd->iommu;
1051 if ( iommu_enable_translation(iommu) )
1052 return -EIO;
1056 return 0;
1059 static int domain_context_mapping_one(
1060 struct domain *domain,
1061 struct iommu *iommu,
1062 u8 bus, u8 devfn)
1064 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1065 struct context_entry *context, *context_entries;
1066 unsigned long flags;
1067 u64 maddr, pgd_maddr;
1068 int agaw;
1070 maddr = bus_to_context_maddr(iommu, bus);
1071 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1072 context = &context_entries[devfn];
1074 if ( context_present(*context) )
1076 unmap_vtd_domain_page(context_entries);
1077 return 0;
1080 spin_lock_irqsave(&iommu->lock, flags);
1081 if ( iommu_passthrough &&
1082 ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
1084 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1085 agaw = level_to_agaw(iommu->nr_pt_levels);
1087 else
1089 /* Ensure we have pagetables allocated down to leaf PTE. */
1090 if ( hd->pgd_maddr == 0 )
1092 addr_to_dma_page_maddr(domain, 0, 1);
1093 if ( hd->pgd_maddr == 0 )
1095 nomem:
1096 unmap_vtd_domain_page(context_entries);
1097 spin_unlock_irqrestore(&iommu->lock, flags);
1098 return -ENOMEM;
1102 /* Skip top levels of page tables for 2- and 3-level DRHDs. */
1103 pgd_maddr = hd->pgd_maddr;
1104 for ( agaw = level_to_agaw(4);
1105 agaw != level_to_agaw(iommu->nr_pt_levels);
1106 agaw-- )
1108 struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
1109 pgd_maddr = dma_pte_addr(*p);
1110 unmap_vtd_domain_page(p);
1111 if ( pgd_maddr == 0 )
1112 goto nomem;
1115 context_set_address_root(*context, pgd_maddr);
1116 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1119 /*
1120 * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
1121 * be 1 based as required by intel's iommu hw.
1122 */
1123 context_set_domain_id(context, domain);
1124 context_set_address_width(*context, agaw);
1125 context_set_fault_enable(*context);
1126 context_set_present(*context);
1127 iommu_flush_cache_entry(context);
1129 unmap_vtd_domain_page(context_entries);
1131 /* Context entry was previously non-present (with domid 0). */
1132 iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
1133 DMA_CCMD_MASK_NOBIT, 1);
1134 if ( iommu_flush_iotlb_dsi(iommu, 0, 1) )
1135 iommu_flush_write_buffer(iommu);
1137 set_bit(iommu->index, &hd->iommu_bitmap);
1138 spin_unlock_irqrestore(&iommu->lock, flags);
1140 return 0;
1143 #define PCI_BASE_CLASS_BRIDGE 0x06
1144 #define PCI_CLASS_BRIDGE_PCI 0x0604
1146 enum {
1147 DEV_TYPE_PCIe_ENDPOINT,
1148 DEV_TYPE_PCIe_BRIDGE,
1149 DEV_TYPE_PCI_BRIDGE,
1150 DEV_TYPE_PCI,
1151 };
1153 int pdev_type(u8 bus, u8 devfn)
1155 u16 class_device;
1156 u16 status, creg;
1157 int pos;
1158 u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
1160 class_device = pci_conf_read16(bus, d, f, PCI_CLASS_DEVICE);
1161 if ( class_device == PCI_CLASS_BRIDGE_PCI )
1163 pos = pci_find_next_cap(bus, devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP);
1164 if ( !pos )
1165 return DEV_TYPE_PCI_BRIDGE;
1166 creg = pci_conf_read16(bus, d, f, pos + PCI_EXP_FLAGS);
1167 return ((creg & PCI_EXP_FLAGS_TYPE) >> 4) == PCI_EXP_TYPE_PCI_BRIDGE ?
1168 DEV_TYPE_PCI_BRIDGE : DEV_TYPE_PCIe_BRIDGE;
1171 status = pci_conf_read16(bus, d, f, PCI_STATUS);
1172 if ( !(status & PCI_STATUS_CAP_LIST) )
1173 return DEV_TYPE_PCI;
1175 if ( pci_find_next_cap(bus, devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) )
1176 return DEV_TYPE_PCIe_ENDPOINT;
1178 return DEV_TYPE_PCI;
1181 #define MAX_BUSES 256
1182 static struct { u8 map, bus, devfn; } bus2bridge[MAX_BUSES];
1184 static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
1186 int cnt = 0;
1187 *secbus = *bus;
1189 if ( *bus == 0 )
1190 /* assume integrated PCI devices in RC have valid requester-id */
1191 return 1;
1193 if ( !bus2bridge[*bus].map )
1194 return 0;
1196 while ( bus2bridge[*bus].map )
1198 *secbus = *bus;
1199 *devfn = bus2bridge[*bus].devfn;
1200 *bus = bus2bridge[*bus].bus;
1201 if ( cnt++ >= MAX_BUSES )
1202 return 0;
1205 return 1;
1208 static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn)
1210 struct acpi_drhd_unit *drhd;
1211 int ret = 0;
1212 u16 sec_bus, sub_bus, ob, odf;
1213 u32 type;
1214 u8 secbus;
1216 drhd = acpi_find_matched_drhd_unit(bus, devfn);
1217 if ( !drhd )
1218 return -ENODEV;
1220 type = pdev_type(bus, devfn);
1221 switch ( type )
1223 case DEV_TYPE_PCIe_BRIDGE:
1224 case DEV_TYPE_PCI_BRIDGE:
1225 sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1226 PCI_SECONDARY_BUS);
1227 sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1228 PCI_SUBORDINATE_BUS);
1229 /*dmar_scope_add_buses(&drhd->scope, sec_bus, sub_bus);*/
1231 if ( type == DEV_TYPE_PCIe_BRIDGE )
1232 break;
1234 for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ )
1236 bus2bridge[sec_bus].map = 1;
1237 bus2bridge[sec_bus].bus = bus;
1238 bus2bridge[sec_bus].devfn = devfn;
1240 break;
1242 case DEV_TYPE_PCIe_ENDPOINT:
1243 gdprintk(XENLOG_INFO VTDPREFIX,
1244 "domain_context_mapping:PCIe: bdf = %x:%x.%x\n",
1245 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1246 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
1247 break;
1249 case DEV_TYPE_PCI:
1250 gdprintk(XENLOG_INFO VTDPREFIX,
1251 "domain_context_mapping:PCI: bdf = %x:%x.%x\n",
1252 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1254 ob = bus; odf = devfn;
1255 if ( !find_pcie_endpoint(&bus, &devfn, &secbus) )
1257 gdprintk(XENLOG_WARNING VTDPREFIX,
1258 "domain_context_mapping:invalid\n");
1259 break;
1262 if ( ob != bus || odf != devfn )
1263 gdprintk(XENLOG_INFO VTDPREFIX,
1264 "domain_context_mapping:map: "
1265 "bdf = %x:%x.%x -> %x:%x.%x\n",
1266 ob, PCI_SLOT(odf), PCI_FUNC(odf),
1267 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1269 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
1270 if ( secbus != bus )
1271 /*
1272 * The source-id for transactions on non-PCIe buses seem
1273 * to originate from devfn=0 on the secondary bus behind
1274 * the bridge. Map that id as well. The id to use in
1275 * these scanarios is not particularly well documented
1276 * anywhere.
1277 */
1278 domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
1279 break;
1281 default:
1282 gdprintk(XENLOG_ERR VTDPREFIX,
1283 "domain_context_mapping:unknown type : bdf = %x:%x.%x\n",
1284 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1285 ret = -EINVAL;
1286 break;
1289 return ret;
1292 static int domain_context_unmap_one(
1293 struct domain *domain,
1294 struct iommu *iommu,
1295 u8 bus, u8 devfn)
1297 struct context_entry *context, *context_entries;
1298 unsigned long flags;
1299 u64 maddr;
1301 maddr = bus_to_context_maddr(iommu, bus);
1302 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1303 context = &context_entries[devfn];
1305 if ( !context_present(*context) )
1307 unmap_vtd_domain_page(context_entries);
1308 return 0;
1311 spin_lock_irqsave(&iommu->lock, flags);
1312 context_clear_present(*context);
1313 context_clear_entry(*context);
1314 iommu_flush_cache_entry(context);
1315 iommu_flush_context_domain(iommu, domain_iommu_domid(domain), 0);
1316 iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
1317 unmap_vtd_domain_page(context_entries);
1318 spin_unlock_irqrestore(&iommu->lock, flags);
1320 return 0;
1323 static int domain_context_unmap(struct domain *domain, u8 bus, u8 devfn)
1325 struct acpi_drhd_unit *drhd;
1326 u16 sec_bus, sub_bus;
1327 int ret = 0;
1328 u32 type;
1329 u8 secbus;
1331 drhd = acpi_find_matched_drhd_unit(bus, devfn);
1332 if ( !drhd )
1333 return -ENODEV;
1335 type = pdev_type(bus, devfn);
1336 switch ( type )
1338 case DEV_TYPE_PCIe_BRIDGE:
1339 case DEV_TYPE_PCI_BRIDGE:
1340 sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1341 PCI_SECONDARY_BUS);
1342 sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1343 PCI_SUBORDINATE_BUS);
1344 /*dmar_scope_remove_buses(&drhd->scope, sec_bus, sub_bus);*/
1345 if ( DEV_TYPE_PCI_BRIDGE )
1346 ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
1347 break;
1349 case DEV_TYPE_PCIe_ENDPOINT:
1350 ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
1351 break;
1353 case DEV_TYPE_PCI:
1354 if ( find_pcie_endpoint(&bus, &devfn, &secbus) )
1355 ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
1356 if ( bus != secbus )
1357 domain_context_unmap_one(domain, drhd->iommu, secbus, 0);
1358 break;
1360 default:
1361 gdprintk(XENLOG_ERR VTDPREFIX,
1362 "domain_context_unmap:unknown type: bdf = %x:%x:%x\n",
1363 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1364 ret = -EINVAL;
1365 break;
1368 return ret;
1371 static int reassign_device_ownership(
1372 struct domain *source,
1373 struct domain *target,
1374 u8 bus, u8 devfn)
1376 struct hvm_iommu *source_hd = domain_hvm_iommu(source);
1377 struct pci_dev *pdev;
1378 struct acpi_drhd_unit *drhd;
1379 struct iommu *pdev_iommu;
1380 int ret, found = 0;
1382 if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
1383 return -ENODEV;
1385 drhd = acpi_find_matched_drhd_unit(bus, devfn);
1386 pdev_iommu = drhd->iommu;
1387 domain_context_unmap(source, bus, devfn);
1389 ret = domain_context_mapping(target, bus, devfn);
1390 if ( ret )
1391 return ret;
1393 write_lock(&pcidevs_lock);
1394 list_move(&pdev->domain_list, &target->arch.pdev_list);
1395 write_unlock(&pcidevs_lock);
1396 pdev->domain = target;
1398 spin_unlock(&pdev->lock);
1400 read_lock(&pcidevs_lock);
1401 for_each_pdev ( source, pdev )
1403 drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
1404 if ( drhd->iommu == pdev_iommu )
1406 found = 1;
1407 break;
1410 read_unlock(&pcidevs_lock);
1412 if ( !found )
1413 clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
1415 return ret;
1418 void iommu_domain_teardown(struct domain *d)
1420 struct hvm_iommu *hd = domain_hvm_iommu(d);
1422 if ( list_empty(&acpi_drhd_units) )
1423 return;
1425 iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
1426 hd->pgd_maddr = 0;
1427 iommu_domid_release(d);
1430 static int domain_context_mapped(u8 bus, u8 devfn)
1432 struct acpi_drhd_unit *drhd;
1434 for_each_drhd_unit ( drhd )
1435 if ( device_context_mapped(drhd->iommu, bus, devfn) )
1436 return 1;
1438 return 0;
1441 int intel_iommu_map_page(
1442 struct domain *d, unsigned long gfn, unsigned long mfn)
1444 struct hvm_iommu *hd = domain_hvm_iommu(d);
1445 struct acpi_drhd_unit *drhd;
1446 struct iommu *iommu;
1447 struct dma_pte *page = NULL, *pte = NULL;
1448 u64 pg_maddr;
1449 int pte_present;
1451 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1452 iommu = drhd->iommu;
1454 /* do nothing if dom0 and iommu supports pass thru */
1455 if ( iommu_passthrough &&
1456 ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1457 return 0;
1459 pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
1460 if ( pg_maddr == 0 )
1461 return -ENOMEM;
1462 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1463 pte = page + (gfn & LEVEL_MASK);
1464 pte_present = dma_pte_present(*pte);
1465 dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
1466 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
1467 iommu_flush_cache_entry(pte);
1468 unmap_vtd_domain_page(page);
1470 for_each_drhd_unit ( drhd )
1472 iommu = drhd->iommu;
1474 if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
1475 continue;
1477 if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1478 (paddr_t)gfn << PAGE_SHIFT_4K, 1,
1479 !pte_present) )
1480 iommu_flush_write_buffer(iommu);
1483 return 0;
1486 int intel_iommu_unmap_page(struct domain *d, unsigned long gfn)
1488 struct acpi_drhd_unit *drhd;
1489 struct iommu *iommu;
1491 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1492 iommu = drhd->iommu;
1494 /* do nothing if dom0 and iommu supports pass thru */
1495 if ( iommu_passthrough &&
1496 ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1497 return 0;
1499 dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1501 return 0;
1504 static int iommu_prepare_rmrr_dev(struct domain *d,
1505 struct acpi_rmrr_unit *rmrr,
1506 u8 bus, u8 devfn)
1508 int ret = 0;
1509 u64 base, end;
1510 unsigned long base_pfn, end_pfn;
1512 ASSERT(rmrr->base_address < rmrr->end_address);
1514 base = rmrr->base_address & PAGE_MASK_4K;
1515 base_pfn = base >> PAGE_SHIFT_4K;
1516 end = PAGE_ALIGN_4K(rmrr->end_address);
1517 end_pfn = end >> PAGE_SHIFT_4K;
1519 while ( base_pfn < end_pfn )
1521 intel_iommu_map_page(d, base_pfn, base_pfn);
1522 base_pfn++;
1525 if ( domain_context_mapped(bus, devfn) == 0 )
1526 ret = domain_context_mapping(d, bus, devfn);
1528 return ret;
1531 static int intel_iommu_add_device(struct pci_dev *pdev)
1533 struct acpi_rmrr_unit *rmrr;
1534 u16 bdf;
1535 int ret, i;
1537 if ( !pdev->domain )
1538 return -EINVAL;
1540 ret = domain_context_mapping(pdev->domain, pdev->bus, pdev->devfn);
1541 if ( ret )
1543 gdprintk(XENLOG_ERR VTDPREFIX,
1544 "intel_iommu_add_device: context mapping failed\n");
1545 return ret;
1548 for_each_rmrr_device ( rmrr, bdf, i )
1550 if ( PCI_BUS(bdf) == pdev->bus && PCI_DEVFN2(bdf) == pdev->devfn )
1552 ret = iommu_prepare_rmrr_dev(pdev->domain, rmrr,
1553 pdev->bus, pdev->devfn);
1554 if ( ret )
1555 gdprintk(XENLOG_ERR VTDPREFIX,
1556 "intel_iommu_add_device: RMRR mapping failed\n");
1557 break;
1561 return ret;
1564 static int intel_iommu_remove_device(struct pci_dev *pdev)
1566 struct acpi_rmrr_unit *rmrr;
1567 u16 bdf;
1568 int i;
1570 if ( !pdev->domain )
1571 return -EINVAL;
1573 /* If the device belongs to dom0, and it has RMRR, don't remove it
1574 * from dom0, because BIOS may use RMRR at booting time.
1575 */
1576 if ( pdev->domain->domain_id == 0 )
1578 for_each_rmrr_device ( rmrr, bdf, i )
1580 if ( PCI_BUS(bdf) == pdev->bus &&
1581 PCI_DEVFN2(bdf) == pdev->devfn )
1582 return 0;
1586 return domain_context_unmap(pdev->domain, pdev->bus, pdev->devfn);
1589 static void setup_dom0_devices(struct domain *d)
1591 struct hvm_iommu *hd;
1592 struct pci_dev *pdev;
1593 int bus, dev, func;
1594 u32 l;
1596 hd = domain_hvm_iommu(d);
1598 write_lock(&pcidevs_lock);
1599 for ( bus = 0; bus < 256; bus++ )
1601 for ( dev = 0; dev < 32; dev++ )
1603 for ( func = 0; func < 8; func++ )
1605 l = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID);
1606 /* some broken boards return 0 or ~0 if a slot is empty: */
1607 if ( (l == 0xffffffff) || (l == 0x00000000) ||
1608 (l == 0x0000ffff) || (l == 0xffff0000) )
1609 continue;
1611 pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
1612 pdev->domain = d;
1613 list_add(&pdev->domain_list, &d->arch.pdev_list);
1614 domain_context_mapping(d, pdev->bus, pdev->devfn);
1618 write_unlock(&pcidevs_lock);
1621 void clear_fault_bits(struct iommu *iommu)
1623 u64 val;
1625 val = dmar_readq(
1626 iommu->reg,
1627 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
1628 dmar_writeq(
1629 iommu->reg,
1630 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
1631 val);
1632 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
1635 static int init_vtd_hw(void)
1637 struct acpi_drhd_unit *drhd;
1638 struct iommu *iommu;
1639 struct iommu_flush *flush = NULL;
1640 int vector;
1641 int ret;
1643 for_each_drhd_unit ( drhd )
1645 iommu = drhd->iommu;
1646 ret = iommu_set_root_entry(iommu);
1647 if ( ret )
1649 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
1650 return -EIO;
1653 vector = iommu_set_interrupt(iommu);
1654 dma_msi_data_init(iommu, vector);
1655 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
1656 iommu->vector = vector;
1657 clear_fault_bits(iommu);
1658 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
1660 /* initialize flush functions */
1661 flush = iommu_get_flush(iommu);
1662 flush->context = flush_context_reg;
1663 flush->iotlb = flush_iotlb_reg;
1666 for_each_drhd_unit ( drhd )
1668 iommu = drhd->iommu;
1669 if ( qinval_setup(iommu) != 0 )
1670 dprintk(XENLOG_INFO VTDPREFIX,
1671 "Queued Invalidation hardware not found\n");
1674 for_each_drhd_unit ( drhd )
1676 iommu = drhd->iommu;
1677 if ( intremap_setup(iommu) != 0 )
1678 dprintk(XENLOG_INFO VTDPREFIX,
1679 "Interrupt Remapping hardware not found\n");
1682 return 0;
1685 static void setup_dom0_rmrr(struct domain *d)
1687 struct acpi_rmrr_unit *rmrr;
1688 u16 bdf;
1689 int ret, i;
1691 for_each_rmrr_device ( rmrr, bdf, i )
1693 ret = iommu_prepare_rmrr_dev(d, rmrr, PCI_BUS(bdf), PCI_DEVFN2(bdf));
1694 if ( ret )
1695 gdprintk(XENLOG_ERR VTDPREFIX,
1696 "IOMMU: mapping reserved region failed\n");
1700 int intel_vtd_setup(void)
1702 struct acpi_drhd_unit *drhd;
1703 struct iommu *iommu;
1705 if ( !vtd_enabled )
1706 return -ENODEV;
1708 spin_lock_init(&domid_bitmap_lock);
1709 clflush_size = get_cache_line_size();
1711 for_each_drhd_unit ( drhd )
1712 if ( iommu_alloc(drhd) != 0 )
1713 goto error;
1715 /* Allocate IO page directory page for the domain. */
1716 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1717 iommu = drhd->iommu;
1719 /* Allocate domain id bitmap, and set bit 0 as reserved */
1720 domid_bitmap_size = cap_ndoms(iommu->cap);
1721 domid_bitmap = xmalloc_array(unsigned long,
1722 BITS_TO_LONGS(domid_bitmap_size));
1723 if ( domid_bitmap == NULL )
1724 goto error;
1725 memset(domid_bitmap, 0, domid_bitmap_size / 8);
1726 set_bit(0, domid_bitmap);
1728 if ( init_vtd_hw() )
1729 goto error;
1731 register_keyhandler('V', dump_iommu_info, "dump iommu info");
1733 return 0;
1735 error:
1736 for_each_drhd_unit ( drhd )
1737 iommu_free(drhd);
1738 vtd_enabled = 0;
1739 return -ENOMEM;
1742 /*
1743 * If the device isn't owned by dom0, it means it already
1744 * has been assigned to other domain, or it's not exist.
1745 */
1746 int device_assigned(u8 bus, u8 devfn)
1748 struct pci_dev *pdev;
1750 if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
1752 spin_unlock(&pdev->lock);
1753 return 0;
1756 return 1;
1759 int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
1761 struct acpi_rmrr_unit *rmrr;
1762 int ret = 0, i;
1763 u16 bdf;
1765 if ( list_empty(&acpi_drhd_units) )
1766 return -ENODEV;
1768 ret = reassign_device_ownership(dom0, d, bus, devfn);
1769 if ( ret )
1770 return ret;
1772 /* Setup rmrr identity mapping */
1773 for_each_rmrr_device( rmrr, bdf, i )
1775 if ( PCI_BUS(bdf) == bus && PCI_DEVFN2(bdf) == devfn )
1777 /* FIXME: Because USB RMRR conflicts with guest bios region,
1778 * ignore USB RMRR temporarily.
1779 */
1780 if ( is_usb_device(bus, devfn) )
1781 return 0;
1783 ret = iommu_prepare_rmrr_dev(d, rmrr, bus, devfn);
1784 if ( ret )
1785 gdprintk(XENLOG_ERR VTDPREFIX,
1786 "IOMMU: mapping reserved region failed\n");
1787 return ret;
1791 return ret;
1794 static int intel_iommu_group_id(u8 bus, u8 devfn)
1796 u8 secbus;
1797 if ( !bus2bridge[bus].map || find_pcie_endpoint(&bus, &devfn, &secbus) )
1798 return PCI_BDF2(bus, devfn);
1799 else
1800 return -1;
1803 static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS];
1804 int iommu_suspend(void)
1806 struct acpi_drhd_unit *drhd;
1807 struct iommu *iommu;
1808 u32 i;
1810 if ( !vtd_enabled )
1811 return 0;
1813 iommu_flush_all();
1815 for_each_drhd_unit ( drhd )
1817 iommu = drhd->iommu;
1818 i = iommu->index;
1820 iommu_state[i][DMAR_FECTL_REG] =
1821 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
1822 iommu_state[i][DMAR_FEDATA_REG] =
1823 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
1824 iommu_state[i][DMAR_FEADDR_REG] =
1825 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
1826 iommu_state[i][DMAR_FEUADDR_REG] =
1827 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
1830 return 0;
1833 int iommu_resume(void)
1835 struct acpi_drhd_unit *drhd;
1836 struct iommu *iommu;
1837 u32 i;
1839 if ( !vtd_enabled )
1840 return 0;
1842 iommu_flush_all();
1844 if ( init_vtd_hw() != 0 && force_iommu )
1845 panic("IOMMU setup failed, crash Xen for security purpose!\n");
1847 for_each_drhd_unit ( drhd )
1849 iommu = drhd->iommu;
1850 i = iommu->index;
1852 dmar_writel(iommu->reg, DMAR_FECTL_REG,
1853 (u32) iommu_state[i][DMAR_FECTL_REG]);
1854 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
1855 (u32) iommu_state[i][DMAR_FEDATA_REG]);
1856 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
1857 (u32) iommu_state[i][DMAR_FEADDR_REG]);
1858 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
1859 (u32) iommu_state[i][DMAR_FEUADDR_REG]);
1861 if ( iommu_enable_translation(iommu) )
1862 return -EIO;
1865 return 0;
1868 struct iommu_ops intel_iommu_ops = {
1869 .init = intel_iommu_domain_init,
1870 .add_device = intel_iommu_add_device,
1871 .remove_device = intel_iommu_remove_device,
1872 .assign_device = intel_iommu_assign_device,
1873 .teardown = iommu_domain_teardown,
1874 .map_page = intel_iommu_map_page,
1875 .unmap_page = intel_iommu_unmap_page,
1876 .reassign_device = reassign_device_ownership,
1877 .get_device_group_id = intel_iommu_group_id,
1878 .update_ire_from_apic = io_apic_write_remap_rte,
1879 .update_ire_from_msi = msi_msg_write_remap_rte,
1880 };
1882 /*
1883 * Local variables:
1884 * mode: C
1885 * c-set-style: "BSD"
1886 * c-basic-offset: 4
1887 * tab-width: 4
1888 * indent-tabs-mode: nil
1889 * End:
1890 */