ia64/xen-unstable

view xen/drivers/passthrough/vtd/iommu.c @ 18650:609d0d34450f

vtd: code cleanup

Remove iommu_page_mapping/unmapping, which are redundant because
intel_iommu_map_page/unmap_page can handle their functions.

Correct IRTA_REG_EIMI_SHIFT to IRTA_REG_EIME_SHIFT.

and also remove useless declarations in iommu.c

Signed-off-by: Weidong Han <weidong.han@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Oct 17 12:04:11 2008 +0100 (2008-10-17)
parents 0feedf7dcc73
children e57ca7937ae8
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/irq.h>
23 #include <xen/sched.h>
24 #include <xen/xmalloc.h>
25 #include <xen/domain_page.h>
26 #include <xen/iommu.h>
27 #include <xen/numa.h>
28 #include <xen/time.h>
29 #include <xen/pci.h>
30 #include <xen/pci_regs.h>
31 #include <xen/keyhandler.h>
32 #include "iommu.h"
33 #include "dmar.h"
34 #include "extern.h"
35 #include "vtd.h"
37 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
39 static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
40 static int domid_bitmap_size; /* domain id bitmap size in bits */
41 static unsigned long *domid_bitmap; /* iommu domain id bitmap */
43 static void setup_dom0_devices(struct domain *d);
44 static void setup_dom0_rmrr(struct domain *d);
46 #define DID_FIELD_WIDTH 16
47 #define DID_HIGH_OFFSET 8
48 static void context_set_domain_id(struct context_entry *context,
49 struct domain *d)
50 {
51 unsigned long flags;
52 domid_t iommu_domid = domain_iommu_domid(d);
54 if ( iommu_domid == 0 )
55 {
56 spin_lock_irqsave(&domid_bitmap_lock, flags);
57 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
58 set_bit(iommu_domid, domid_bitmap);
59 spin_unlock_irqrestore(&domid_bitmap_lock, flags);
60 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
61 }
63 context->hi &= (1 << DID_HIGH_OFFSET) - 1;
64 context->hi |= iommu_domid << DID_HIGH_OFFSET;
65 }
67 static void iommu_domid_release(struct domain *d)
68 {
69 domid_t iommu_domid = domain_iommu_domid(d);
71 if ( iommu_domid != 0 )
72 {
73 d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
74 clear_bit(iommu_domid, domid_bitmap);
75 }
76 }
78 static struct intel_iommu *alloc_intel_iommu(void)
79 {
80 struct intel_iommu *intel;
82 intel = xmalloc(struct intel_iommu);
83 if ( intel == NULL )
84 return NULL;
85 memset(intel, 0, sizeof(struct intel_iommu));
87 spin_lock_init(&intel->qi_ctrl.qinval_lock);
88 spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
89 spin_lock_init(&intel->ir_ctrl.iremap_lock);
91 return intel;
92 }
94 static void free_intel_iommu(struct intel_iommu *intel)
95 {
96 xfree(intel);
97 }
99 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
100 {
101 return iommu ? &iommu->intel->qi_ctrl : NULL;
102 }
104 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
105 {
106 return iommu ? &iommu->intel->ir_ctrl : NULL;
107 }
109 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
110 {
111 return iommu ? &iommu->intel->flush : NULL;
112 }
114 static unsigned int clflush_size;
115 static int iommus_incoherent;
116 static void __iommu_flush_cache(void *addr, int size)
117 {
118 int i;
120 if ( !iommus_incoherent )
121 return;
123 for ( i = 0; i < size; i += clflush_size )
124 cacheline_flush((char *)addr + i);
125 }
127 void iommu_flush_cache_entry(void *addr)
128 {
129 __iommu_flush_cache(addr, 8);
130 }
132 void iommu_flush_cache_page(void *addr)
133 {
134 __iommu_flush_cache(addr, PAGE_SIZE_4K);
135 }
137 int nr_iommus;
138 /* context entry handling */
139 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
140 {
141 struct root_entry *root, *root_entries;
142 unsigned long flags;
143 u64 maddr;
145 spin_lock_irqsave(&iommu->lock, flags);
146 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
147 root = &root_entries[bus];
148 if ( !root_present(*root) )
149 {
150 maddr = alloc_pgtable_maddr();
151 if ( maddr == 0 )
152 {
153 unmap_vtd_domain_page(root_entries);
154 spin_unlock_irqrestore(&iommu->lock, flags);
155 return 0;
156 }
157 set_root_value(*root, maddr);
158 set_root_present(*root);
159 iommu_flush_cache_entry(root);
160 }
161 maddr = (u64) get_context_addr(*root);
162 unmap_vtd_domain_page(root_entries);
163 spin_unlock_irqrestore(&iommu->lock, flags);
164 return maddr;
165 }
167 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
168 {
169 struct root_entry *root, *root_entries;
170 struct context_entry *context;
171 u64 context_maddr;
172 int ret;
173 unsigned long flags;
175 spin_lock_irqsave(&iommu->lock, flags);
176 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
177 root = &root_entries[bus];
178 if ( !root_present(*root) )
179 {
180 ret = 0;
181 goto out;
182 }
183 context_maddr = get_context_addr(*root);
184 context = (struct context_entry *)map_vtd_domain_page(context_maddr);
185 ret = context_present(context[devfn]);
186 unmap_vtd_domain_page(context);
187 out:
188 unmap_vtd_domain_page(root_entries);
189 spin_unlock_irqrestore(&iommu->lock, flags);
190 return ret;
191 }
193 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
194 {
195 struct hvm_iommu *hd = domain_hvm_iommu(domain);
196 int addr_width = agaw_to_width(hd->agaw);
197 struct dma_pte *parent, *pte = NULL;
198 int level = agaw_to_level(hd->agaw);
199 int offset;
200 unsigned long flags;
201 u64 pte_maddr = 0, maddr;
202 u64 *vaddr = NULL;
204 addr &= (((u64)1) << addr_width) - 1;
205 spin_lock_irqsave(&hd->mapping_lock, flags);
206 if ( hd->pgd_maddr == 0 )
207 if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) )
208 goto out;
210 parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
211 while ( level > 1 )
212 {
213 offset = address_level_offset(addr, level);
214 pte = &parent[offset];
216 if ( dma_pte_addr(*pte) == 0 )
217 {
218 if ( !alloc )
219 break;
220 maddr = alloc_pgtable_maddr();
221 dma_set_pte_addr(*pte, maddr);
222 vaddr = map_vtd_domain_page(maddr);
223 if ( !vaddr )
224 break;
226 /*
227 * high level table always sets r/w, last level
228 * page table control read/write
229 */
230 dma_set_pte_readable(*pte);
231 dma_set_pte_writable(*pte);
232 iommu_flush_cache_entry(pte);
233 }
234 else
235 {
236 vaddr = map_vtd_domain_page(pte->val);
237 if ( !vaddr )
238 break;
239 }
241 if ( level == 2 )
242 {
243 pte_maddr = pte->val & PAGE_MASK_4K;
244 unmap_vtd_domain_page(vaddr);
245 break;
246 }
248 unmap_vtd_domain_page(parent);
249 parent = (struct dma_pte *)vaddr;
250 vaddr = NULL;
251 level--;
252 }
254 unmap_vtd_domain_page(parent);
255 out:
256 spin_unlock_irqrestore(&hd->mapping_lock, flags);
257 return pte_maddr;
258 }
260 static void iommu_flush_write_buffer(struct iommu *iommu)
261 {
262 u32 val;
263 unsigned long flag;
264 s_time_t start_time;
266 if ( !cap_rwbf(iommu->cap) )
267 return;
268 val = iommu->gcmd | DMA_GCMD_WBF;
270 spin_lock_irqsave(&iommu->register_lock, flag);
271 dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
273 /* Make sure hardware complete it */
274 start_time = NOW();
275 for ( ; ; )
276 {
277 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
278 if ( !(val & DMA_GSTS_WBFS) )
279 break;
280 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
281 panic("%s: DMAR hardware is malfunctional,"
282 " please disable IOMMU\n", __func__);
283 cpu_relax();
284 }
285 spin_unlock_irqrestore(&iommu->register_lock, flag);
286 }
288 /* return value determine if we need a write buffer flush */
289 static int flush_context_reg(
290 void *_iommu,
291 u16 did, u16 source_id, u8 function_mask, u64 type,
292 int non_present_entry_flush)
293 {
294 struct iommu *iommu = (struct iommu *) _iommu;
295 u64 val = 0;
296 unsigned long flag;
297 s_time_t start_time;
299 /*
300 * In the non-present entry flush case, if hardware doesn't cache
301 * non-present entry we do nothing and if hardware cache non-present
302 * entry, we flush entries of domain 0 (the domain id is used to cache
303 * any non-present entries)
304 */
305 if ( non_present_entry_flush )
306 {
307 if ( !cap_caching_mode(iommu->cap) )
308 return 1;
309 else
310 did = 0;
311 }
313 /* use register invalidation */
314 switch ( type )
315 {
316 case DMA_CCMD_GLOBAL_INVL:
317 val = DMA_CCMD_GLOBAL_INVL;
318 break;
319 case DMA_CCMD_DOMAIN_INVL:
320 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
321 break;
322 case DMA_CCMD_DEVICE_INVL:
323 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
324 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
325 break;
326 default:
327 BUG();
328 }
329 val |= DMA_CCMD_ICC;
331 spin_lock_irqsave(&iommu->register_lock, flag);
332 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
334 /* Make sure hardware complete it */
335 start_time = NOW();
336 for ( ; ; )
337 {
338 val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
339 if ( !(val & DMA_CCMD_ICC) )
340 break;
341 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
342 panic("%s: DMAR hardware is malfunctional,"
343 " please disable IOMMU\n", __func__);
344 cpu_relax();
345 }
346 spin_unlock_irqrestore(&iommu->register_lock, flag);
347 /* flush context entry will implicitly flush write buffer */
348 return 0;
349 }
351 static int inline iommu_flush_context_global(
352 struct iommu *iommu, int non_present_entry_flush)
353 {
354 struct iommu_flush *flush = iommu_get_flush(iommu);
355 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
356 non_present_entry_flush);
357 }
359 static int inline iommu_flush_context_domain(
360 struct iommu *iommu, u16 did, int non_present_entry_flush)
361 {
362 struct iommu_flush *flush = iommu_get_flush(iommu);
363 return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
364 non_present_entry_flush);
365 }
367 static int inline iommu_flush_context_device(
368 struct iommu *iommu, u16 did, u16 source_id,
369 u8 function_mask, int non_present_entry_flush)
370 {
371 struct iommu_flush *flush = iommu_get_flush(iommu);
372 return flush->context(iommu, did, source_id, function_mask,
373 DMA_CCMD_DEVICE_INVL,
374 non_present_entry_flush);
375 }
377 /* return value determine if we need a write buffer flush */
378 static int flush_iotlb_reg(void *_iommu, u16 did,
379 u64 addr, unsigned int size_order, u64 type,
380 int non_present_entry_flush)
381 {
382 struct iommu *iommu = (struct iommu *) _iommu;
383 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
384 u64 val = 0, val_iva = 0;
385 unsigned long flag;
386 s_time_t start_time;
388 /*
389 * In the non-present entry flush case, if hardware doesn't cache
390 * non-present entry we do nothing and if hardware cache non-present
391 * entry, we flush entries of domain 0 (the domain id is used to cache
392 * any non-present entries)
393 */
394 if ( non_present_entry_flush )
395 {
396 if ( !cap_caching_mode(iommu->cap) )
397 return 1;
398 else
399 did = 0;
400 }
402 /* use register invalidation */
403 switch ( type )
404 {
405 case DMA_TLB_GLOBAL_FLUSH:
406 /* global flush doesn't need set IVA_REG */
407 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
408 break;
409 case DMA_TLB_DSI_FLUSH:
410 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
411 break;
412 case DMA_TLB_PSI_FLUSH:
413 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
414 /* Note: always flush non-leaf currently */
415 val_iva = size_order | addr;
416 break;
417 default:
418 BUG();
419 }
420 /* Note: set drain read/write */
421 if ( cap_read_drain(iommu->cap) )
422 val |= DMA_TLB_READ_DRAIN;
423 if ( cap_write_drain(iommu->cap) )
424 val |= DMA_TLB_WRITE_DRAIN;
426 spin_lock_irqsave(&iommu->register_lock, flag);
427 /* Note: Only uses first TLB reg currently */
428 if ( val_iva )
429 dmar_writeq(iommu->reg, tlb_offset, val_iva);
430 dmar_writeq(iommu->reg, tlb_offset + 8, val);
432 /* Make sure hardware complete it */
433 start_time = NOW();
434 for ( ; ; )
435 {
436 val = dmar_readq(iommu->reg, tlb_offset + 8);
437 if ( !(val & DMA_TLB_IVT) )
438 break;
439 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
440 panic("%s: DMAR hardware is malfunctional,"
441 " please disable IOMMU\n", __func__);
442 cpu_relax();
443 }
444 spin_unlock_irqrestore(&iommu->register_lock, flag);
446 /* check IOTLB invalidation granularity */
447 if ( DMA_TLB_IAIG(val) == 0 )
448 dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: flush IOTLB failed\n");
450 if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
451 dprintk(XENLOG_INFO VTDPREFIX,
452 "IOMMU: tlb flush request %x, actual %x\n",
453 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
454 /* flush iotlb entry will implicitly flush write buffer */
455 return 0;
456 }
458 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
459 int non_present_entry_flush)
460 {
461 struct iommu_flush *flush = iommu_get_flush(iommu);
462 return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
463 non_present_entry_flush);
464 }
466 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
467 int non_present_entry_flush)
468 {
469 struct iommu_flush *flush = iommu_get_flush(iommu);
470 return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
471 non_present_entry_flush);
472 }
474 static int inline get_alignment(u64 base, unsigned int size)
475 {
476 int t = 0;
477 u64 end;
479 end = base + size - 1;
480 while ( base != end )
481 {
482 t++;
483 base >>= 1;
484 end >>= 1;
485 }
486 return t;
487 }
489 static int inline iommu_flush_iotlb_psi(
490 struct iommu *iommu, u16 did,
491 u64 addr, unsigned int pages, int non_present_entry_flush)
492 {
493 unsigned int align;
494 struct iommu_flush *flush = iommu_get_flush(iommu);
496 ASSERT(!(addr & (~PAGE_MASK_4K)));
497 ASSERT(pages > 0);
499 /* Fallback to domain selective flush if no PSI support */
500 if ( !cap_pgsel_inv(iommu->cap) )
501 return iommu_flush_iotlb_dsi(iommu, did,
502 non_present_entry_flush);
504 /*
505 * PSI requires page size is 2 ^ x, and the base address is naturally
506 * aligned to the size
507 */
508 align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
509 /* Fallback to domain selective flush if size is too big */
510 if ( align > cap_max_amask_val(iommu->cap) )
511 return iommu_flush_iotlb_dsi(iommu, did,
512 non_present_entry_flush);
514 addr >>= PAGE_SHIFT_4K + align;
515 addr <<= PAGE_SHIFT_4K + align;
517 return flush->iotlb(iommu, did, addr, align,
518 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
519 }
521 void iommu_flush_all(void)
522 {
523 struct acpi_drhd_unit *drhd;
524 struct iommu *iommu;
526 flush_all_cache();
527 for_each_drhd_unit ( drhd )
528 {
529 iommu = drhd->iommu;
530 iommu_flush_context_global(iommu, 0);
531 iommu_flush_iotlb_global(iommu, 0);
532 }
533 }
535 /* clear one page's page table */
536 static void dma_pte_clear_one(struct domain *domain, u64 addr)
537 {
538 struct hvm_iommu *hd = domain_hvm_iommu(domain);
539 struct acpi_drhd_unit *drhd;
540 struct iommu *iommu;
541 struct dma_pte *page = NULL, *pte = NULL;
542 u64 pg_maddr;
544 /* get last level pte */
545 pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
546 if ( pg_maddr == 0 )
547 return;
548 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
549 pte = page + address_level_offset(addr, 1);
551 if ( !dma_pte_present(*pte) )
552 {
553 unmap_vtd_domain_page(page);
554 return;
555 }
557 dma_clear_pte(*pte);
558 iommu_flush_cache_entry(pte);
560 for_each_drhd_unit ( drhd )
561 {
562 iommu = drhd->iommu;
563 if ( test_bit(iommu->index, &hd->iommu_bitmap) )
564 if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
565 addr, 1, 0))
566 iommu_flush_write_buffer(iommu);
567 }
569 unmap_vtd_domain_page(page);
570 }
572 static void iommu_free_pagetable(u64 pt_maddr, int level)
573 {
574 int i;
575 struct dma_pte *pt_vaddr, *pte;
576 int next_level = level - 1;
578 if ( pt_maddr == 0 )
579 return;
581 pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
583 for ( i = 0; i < PTE_NUM; i++ )
584 {
585 pte = &pt_vaddr[i];
586 if ( !dma_pte_present(*pte) )
587 continue;
589 if ( next_level >= 1 )
590 iommu_free_pagetable(dma_pte_addr(*pte), next_level);
592 dma_clear_pte(*pte);
593 iommu_flush_cache_entry(pte);
594 }
596 unmap_vtd_domain_page(pt_vaddr);
597 free_pgtable_maddr(pt_maddr);
598 }
600 static int iommu_set_root_entry(struct iommu *iommu)
601 {
602 u32 cmd, sts;
603 unsigned long flags;
604 s_time_t start_time;
606 spin_lock_irqsave(&iommu->register_lock, flags);
608 if ( iommu->root_maddr == 0 )
609 iommu->root_maddr = alloc_pgtable_maddr();
610 if ( iommu->root_maddr == 0 )
611 {
612 spin_unlock_irqrestore(&iommu->register_lock, flags);
613 return -ENOMEM;
614 }
616 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
617 cmd = iommu->gcmd | DMA_GCMD_SRTP;
618 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
620 /* Make sure hardware complete it */
621 start_time = NOW();
622 for ( ; ; )
623 {
624 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
625 if ( sts & DMA_GSTS_RTPS )
626 break;
627 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
628 panic("%s: DMAR hardware is malfunctional,"
629 " please disable IOMMU\n", __func__);
630 cpu_relax();
631 }
633 spin_unlock_irqrestore(&iommu->register_lock, flags);
635 return 0;
636 }
638 static int iommu_enable_translation(struct iommu *iommu)
639 {
640 u32 sts;
641 unsigned long flags;
642 s_time_t start_time;
644 dprintk(XENLOG_INFO VTDPREFIX,
645 "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
646 spin_lock_irqsave(&iommu->register_lock, flags);
647 iommu->gcmd |= DMA_GCMD_TE;
648 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
649 /* Make sure hardware complete it */
650 start_time = NOW();
651 for ( ; ; )
652 {
653 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
654 if ( sts & DMA_GSTS_TES )
655 break;
656 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
657 panic("%s: DMAR hardware is malfunctional,"
658 " please disable IOMMU\n", __func__);
659 cpu_relax();
660 }
662 /* Disable PMRs when VT-d engine takes effect per spec definition */
663 disable_pmr(iommu);
664 spin_unlock_irqrestore(&iommu->register_lock, flags);
665 return 0;
666 }
668 int iommu_disable_translation(struct iommu *iommu)
669 {
670 u32 sts;
671 unsigned long flags;
672 s_time_t start_time;
674 spin_lock_irqsave(&iommu->register_lock, flags);
675 iommu->gcmd &= ~ DMA_GCMD_TE;
676 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
678 /* Make sure hardware complete it */
679 start_time = NOW();
680 for ( ; ; )
681 {
682 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
683 if ( !(sts & DMA_GSTS_TES) )
684 break;
685 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
686 panic("%s: DMAR hardware is malfunctional,"
687 " please disable IOMMU\n", __func__);
688 cpu_relax();
689 }
690 spin_unlock_irqrestore(&iommu->register_lock, flags);
691 return 0;
692 }
694 static struct iommu *vector_to_iommu[NR_VECTORS];
695 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
696 u8 fault_reason, u16 source_id, u64 addr)
697 {
698 dprintk(XENLOG_WARNING VTDPREFIX,
699 "iommu_fault:%s: %x:%x.%x addr %"PRIx64" REASON %x "
700 "iommu->reg = %p\n",
701 (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
702 PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
703 fault_reason, iommu->reg);
705 #ifndef __i386__ /* map_domain_page() cannot be used in this context */
706 if ( fault_reason < 0x20 )
707 print_vtd_entries(iommu, (source_id >> 8),
708 (source_id & 0xff), (addr >> PAGE_SHIFT));
709 #endif
711 return 0;
712 }
714 static void iommu_fault_status(u32 fault_status)
715 {
716 if ( fault_status & DMA_FSTS_PFO )
717 dprintk(XENLOG_ERR VTDPREFIX,
718 "iommu_fault_status: Fault Overflow\n");
719 else if ( fault_status & DMA_FSTS_PPF )
720 dprintk(XENLOG_ERR VTDPREFIX,
721 "iommu_fault_status: Primary Pending Fault\n");
722 else if ( fault_status & DMA_FSTS_AFO )
723 dprintk(XENLOG_ERR VTDPREFIX,
724 "iommu_fault_status: Advanced Fault Overflow\n");
725 else if ( fault_status & DMA_FSTS_APF )
726 dprintk(XENLOG_ERR VTDPREFIX,
727 "iommu_fault_status: Advanced Pending Fault\n");
728 else if ( fault_status & DMA_FSTS_IQE )
729 dprintk(XENLOG_ERR VTDPREFIX,
730 "iommu_fault_status: Invalidation Queue Error\n");
731 else if ( fault_status & DMA_FSTS_ICE )
732 dprintk(XENLOG_ERR VTDPREFIX,
733 "iommu_fault_status: Invalidation Completion Error\n");
734 else if ( fault_status & DMA_FSTS_ITE )
735 dprintk(XENLOG_ERR VTDPREFIX,
736 "iommu_fault_status: Invalidation Time-out Error\n");
737 }
739 #define PRIMARY_FAULT_REG_LEN (16)
740 static void iommu_page_fault(int vector, void *dev_id,
741 struct cpu_user_regs *regs)
742 {
743 struct iommu *iommu = dev_id;
744 int reg, fault_index;
745 u32 fault_status;
746 unsigned long flags;
748 dprintk(XENLOG_WARNING VTDPREFIX,
749 "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
751 spin_lock_irqsave(&iommu->register_lock, flags);
752 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
753 spin_unlock_irqrestore(&iommu->register_lock, flags);
755 iommu_fault_status(fault_status);
757 /* FIXME: ignore advanced fault log */
758 if ( !(fault_status & DMA_FSTS_PPF) )
759 return;
760 fault_index = dma_fsts_fault_record_index(fault_status);
761 reg = cap_fault_reg_offset(iommu->cap);
762 for ( ; ; )
763 {
764 u8 fault_reason;
765 u16 source_id;
766 u32 data;
767 u64 guest_addr;
768 int type;
770 /* highest 32 bits */
771 spin_lock_irqsave(&iommu->register_lock, flags);
772 data = dmar_readl(iommu->reg, reg +
773 fault_index * PRIMARY_FAULT_REG_LEN + 12);
774 if ( !(data & DMA_FRCD_F) )
775 {
776 spin_unlock_irqrestore(&iommu->register_lock, flags);
777 break;
778 }
780 fault_reason = dma_frcd_fault_reason(data);
781 type = dma_frcd_type(data);
783 data = dmar_readl(iommu->reg, reg +
784 fault_index * PRIMARY_FAULT_REG_LEN + 8);
785 source_id = dma_frcd_source_id(data);
787 guest_addr = dmar_readq(iommu->reg, reg +
788 fault_index * PRIMARY_FAULT_REG_LEN);
789 guest_addr = dma_frcd_page_addr(guest_addr);
790 /* clear the fault */
791 dmar_writel(iommu->reg, reg +
792 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
793 spin_unlock_irqrestore(&iommu->register_lock, flags);
795 iommu_page_fault_do_one(iommu, type, fault_reason,
796 source_id, guest_addr);
798 fault_index++;
799 if ( fault_index > cap_num_fault_regs(iommu->cap) )
800 fault_index = 0;
801 }
803 /* clear primary fault overflow */
804 if ( fault_status & DMA_FSTS_PFO )
805 {
806 spin_lock_irqsave(&iommu->register_lock, flags);
807 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
808 spin_unlock_irqrestore(&iommu->register_lock, flags);
809 }
810 }
812 static void dma_msi_unmask(unsigned int vector)
813 {
814 struct iommu *iommu = vector_to_iommu[vector];
815 unsigned long flags;
817 /* unmask it */
818 spin_lock_irqsave(&iommu->register_lock, flags);
819 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
820 spin_unlock_irqrestore(&iommu->register_lock, flags);
821 }
823 static void dma_msi_mask(unsigned int vector)
824 {
825 unsigned long flags;
826 struct iommu *iommu = vector_to_iommu[vector];
828 /* mask it */
829 spin_lock_irqsave(&iommu->register_lock, flags);
830 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
831 spin_unlock_irqrestore(&iommu->register_lock, flags);
832 }
834 static unsigned int dma_msi_startup(unsigned int vector)
835 {
836 dma_msi_unmask(vector);
837 return 0;
838 }
840 static void dma_msi_end(unsigned int vector)
841 {
842 dma_msi_unmask(vector);
843 ack_APIC_irq();
844 }
846 static void dma_msi_data_init(struct iommu *iommu, int vector)
847 {
848 u32 msi_data = 0;
849 unsigned long flags;
851 /* Fixed, edge, assert mode. Follow MSI setting */
852 msi_data |= vector & 0xff;
853 msi_data |= 1 << 14;
855 spin_lock_irqsave(&iommu->register_lock, flags);
856 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
857 spin_unlock_irqrestore(&iommu->register_lock, flags);
858 }
860 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
861 {
862 u64 msi_address;
863 unsigned long flags;
865 /* Physical, dedicated cpu. Follow MSI setting */
866 msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
867 msi_address |= MSI_PHYSICAL_MODE << 2;
868 msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
869 msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
871 spin_lock_irqsave(&iommu->register_lock, flags);
872 dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
873 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
874 spin_unlock_irqrestore(&iommu->register_lock, flags);
875 }
877 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
878 {
879 struct iommu *iommu = vector_to_iommu[vector];
880 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
881 }
883 static struct hw_interrupt_type dma_msi_type = {
884 .typename = "DMA_MSI",
885 .startup = dma_msi_startup,
886 .shutdown = dma_msi_mask,
887 .enable = dma_msi_unmask,
888 .disable = dma_msi_mask,
889 .ack = dma_msi_mask,
890 .end = dma_msi_end,
891 .set_affinity = dma_msi_set_affinity,
892 };
894 int iommu_set_interrupt(struct iommu *iommu)
895 {
896 int vector, ret;
898 vector = assign_irq_vector(AUTO_ASSIGN);
899 vector_to_iommu[vector] = iommu;
901 /* VT-d fault is a MSI, make irq == vector */
902 irq_vector[vector] = vector;
903 vector_irq[vector] = vector;
905 if ( !vector )
906 {
907 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
908 return -EINVAL;
909 }
911 irq_desc[vector].handler = &dma_msi_type;
912 ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
913 if ( ret )
914 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
915 return vector;
916 }
918 static int iommu_alloc(struct acpi_drhd_unit *drhd)
919 {
920 struct iommu *iommu;
921 unsigned long sagaw;
922 int agaw;
924 if ( nr_iommus > MAX_IOMMUS )
925 {
926 gdprintk(XENLOG_ERR VTDPREFIX,
927 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
928 return -ENOMEM;
929 }
931 iommu = xmalloc(struct iommu);
932 if ( iommu == NULL )
933 return -ENOMEM;
934 memset(iommu, 0, sizeof(struct iommu));
936 iommu->intel = alloc_intel_iommu();
937 if ( iommu->intel == NULL )
938 {
939 xfree(iommu);
940 return -ENOMEM;
941 }
943 iommu->reg = map_to_nocache_virt(nr_iommus, drhd->address);
944 iommu->index = nr_iommus++;
946 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
947 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
949 /* Calculate number of pagetable levels: between 2 and 4. */
950 sagaw = cap_sagaw(iommu->cap);
951 for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
952 if ( test_bit(agaw, &sagaw) )
953 break;
954 if ( agaw < 0 )
955 {
956 gdprintk(XENLOG_ERR VTDPREFIX,
957 "IOMMU: unsupported sagaw %lx\n", sagaw);
958 xfree(iommu);
959 return -ENODEV;
960 }
961 iommu->nr_pt_levels = agaw_to_level(agaw);
963 if ( !ecap_coherent(iommu->ecap) )
964 iommus_incoherent = 1;
966 spin_lock_init(&iommu->lock);
967 spin_lock_init(&iommu->register_lock);
969 drhd->iommu = iommu;
970 return 0;
971 }
973 static void iommu_free(struct acpi_drhd_unit *drhd)
974 {
975 struct iommu *iommu = drhd->iommu;
977 if ( iommu == NULL )
978 return;
980 if ( iommu->root_maddr != 0 )
981 {
982 free_pgtable_maddr(iommu->root_maddr);
983 iommu->root_maddr = 0;
984 }
986 if ( iommu->reg )
987 iounmap(iommu->reg);
989 free_intel_iommu(iommu->intel);
990 free_irq(iommu->vector);
991 xfree(iommu);
993 drhd->iommu = NULL;
994 }
996 #define guestwidth_to_adjustwidth(gaw) ({ \
997 int agaw, r = (gaw - 12) % 9; \
998 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
999 if ( agaw > 64 ) \
1000 agaw = 64; \
1001 agaw; })
1003 static int intel_iommu_domain_init(struct domain *d)
1005 struct hvm_iommu *hd = domain_hvm_iommu(d);
1006 struct iommu *iommu = NULL;
1007 u64 i;
1008 struct acpi_drhd_unit *drhd;
1010 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1011 iommu = drhd->iommu;
1013 hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
1015 if ( d->domain_id == 0 )
1017 extern int xen_in_range(paddr_t start, paddr_t end);
1018 extern int tboot_in_range(paddr_t start, paddr_t end);
1020 /*
1021 * Set up 1:1 page table for dom0 except the critical segments
1022 * like Xen and tboot.
1023 */
1024 for ( i = 0; i < max_page; i++ )
1026 if ( xen_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) ||
1027 tboot_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) )
1028 continue;
1030 iommu_map_page(d, i, i);
1033 setup_dom0_devices(d);
1034 setup_dom0_rmrr(d);
1036 iommu_flush_all();
1038 for_each_drhd_unit ( drhd )
1040 iommu = drhd->iommu;
1041 if ( iommu_enable_translation(iommu) )
1042 return -EIO;
1046 return 0;
1049 static int domain_context_mapping_one(
1050 struct domain *domain,
1051 struct iommu *iommu,
1052 u8 bus, u8 devfn)
1054 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1055 struct context_entry *context, *context_entries;
1056 unsigned long flags;
1057 u64 maddr, pgd_maddr;
1058 int agaw;
1060 maddr = bus_to_context_maddr(iommu, bus);
1061 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1062 context = &context_entries[devfn];
1064 if ( context_present(*context) )
1066 unmap_vtd_domain_page(context_entries);
1067 return 0;
1070 spin_lock_irqsave(&iommu->lock, flags);
1071 if ( iommu_passthrough &&
1072 ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
1074 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1075 agaw = level_to_agaw(iommu->nr_pt_levels);
1077 else
1079 /* Ensure we have pagetables allocated down to leaf PTE. */
1080 if ( hd->pgd_maddr == 0 )
1082 addr_to_dma_page_maddr(domain, 0, 1);
1083 if ( hd->pgd_maddr == 0 )
1085 nomem:
1086 unmap_vtd_domain_page(context_entries);
1087 spin_unlock_irqrestore(&iommu->lock, flags);
1088 return -ENOMEM;
1092 /* Skip top levels of page tables for 2- and 3-level DRHDs. */
1093 pgd_maddr = hd->pgd_maddr;
1094 for ( agaw = level_to_agaw(4);
1095 agaw != level_to_agaw(iommu->nr_pt_levels);
1096 agaw-- )
1098 struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
1099 pgd_maddr = dma_pte_addr(*p);
1100 unmap_vtd_domain_page(p);
1101 if ( pgd_maddr == 0 )
1102 goto nomem;
1105 context_set_address_root(*context, pgd_maddr);
1106 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1109 /*
1110 * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
1111 * be 1 based as required by intel's iommu hw.
1112 */
1113 context_set_domain_id(context, domain);
1114 context_set_address_width(*context, agaw);
1115 context_set_fault_enable(*context);
1116 context_set_present(*context);
1117 iommu_flush_cache_entry(context);
1119 unmap_vtd_domain_page(context_entries);
1121 /* Context entry was previously non-present (with domid 0). */
1122 iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
1123 DMA_CCMD_MASK_NOBIT, 1);
1124 if ( iommu_flush_iotlb_dsi(iommu, 0, 1) )
1125 iommu_flush_write_buffer(iommu);
1127 set_bit(iommu->index, &hd->iommu_bitmap);
1128 spin_unlock_irqrestore(&iommu->lock, flags);
1130 return 0;
1133 #define PCI_BASE_CLASS_BRIDGE 0x06
1134 #define PCI_CLASS_BRIDGE_PCI 0x0604
1136 enum {
1137 DEV_TYPE_PCIe_ENDPOINT,
1138 DEV_TYPE_PCIe_BRIDGE,
1139 DEV_TYPE_PCI_BRIDGE,
1140 DEV_TYPE_PCI,
1141 };
1143 int pdev_type(u8 bus, u8 devfn)
1145 u16 class_device;
1146 u16 status, creg;
1147 int pos;
1148 u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
1150 class_device = pci_conf_read16(bus, d, f, PCI_CLASS_DEVICE);
1151 if ( class_device == PCI_CLASS_BRIDGE_PCI )
1153 pos = pci_find_next_cap(bus, devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP);
1154 if ( !pos )
1155 return DEV_TYPE_PCI_BRIDGE;
1156 creg = pci_conf_read16(bus, d, f, pos + PCI_EXP_FLAGS);
1157 return ((creg & PCI_EXP_FLAGS_TYPE) >> 4) == PCI_EXP_TYPE_PCI_BRIDGE ?
1158 DEV_TYPE_PCI_BRIDGE : DEV_TYPE_PCIe_BRIDGE;
1161 status = pci_conf_read16(bus, d, f, PCI_STATUS);
1162 if ( !(status & PCI_STATUS_CAP_LIST) )
1163 return DEV_TYPE_PCI;
1165 if ( pci_find_next_cap(bus, devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) )
1166 return DEV_TYPE_PCIe_ENDPOINT;
1168 return DEV_TYPE_PCI;
1171 #define MAX_BUSES 256
1172 static struct { u8 map, bus, devfn; } bus2bridge[MAX_BUSES];
1174 static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
1176 int cnt = 0;
1177 *secbus = *bus;
1179 if ( *bus == 0 )
1180 /* assume integrated PCI devices in RC have valid requester-id */
1181 return 1;
1183 if ( !bus2bridge[*bus].map )
1184 return 0;
1186 while ( bus2bridge[*bus].map )
1188 *secbus = *bus;
1189 *devfn = bus2bridge[*bus].devfn;
1190 *bus = bus2bridge[*bus].bus;
1191 if ( cnt++ >= MAX_BUSES )
1192 return 0;
1195 return 1;
1198 static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn)
1200 struct acpi_drhd_unit *drhd;
1201 int ret = 0;
1202 u16 sec_bus, sub_bus, ob, odf;
1203 u32 type;
1204 u8 secbus;
1206 drhd = acpi_find_matched_drhd_unit(bus, devfn);
1207 if ( !drhd )
1208 return -ENODEV;
1210 type = pdev_type(bus, devfn);
1211 switch ( type )
1213 case DEV_TYPE_PCIe_BRIDGE:
1214 case DEV_TYPE_PCI_BRIDGE:
1215 sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1216 PCI_SECONDARY_BUS);
1217 sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1218 PCI_SUBORDINATE_BUS);
1219 /*dmar_scope_add_buses(&drhd->scope, sec_bus, sub_bus);*/
1221 if ( type == DEV_TYPE_PCIe_BRIDGE )
1222 break;
1224 for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ )
1226 bus2bridge[sec_bus].map = 1;
1227 bus2bridge[sec_bus].bus = bus;
1228 bus2bridge[sec_bus].devfn = devfn;
1230 break;
1232 case DEV_TYPE_PCIe_ENDPOINT:
1233 gdprintk(XENLOG_INFO VTDPREFIX,
1234 "domain_context_mapping:PCIe: bdf = %x:%x.%x\n",
1235 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1236 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
1237 break;
1239 case DEV_TYPE_PCI:
1240 gdprintk(XENLOG_INFO VTDPREFIX,
1241 "domain_context_mapping:PCI: bdf = %x:%x.%x\n",
1242 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1244 ob = bus; odf = devfn;
1245 if ( !find_pcie_endpoint(&bus, &devfn, &secbus) )
1247 gdprintk(XENLOG_WARNING VTDPREFIX,
1248 "domain_context_mapping:invalid\n");
1249 break;
1252 if ( ob != bus || odf != devfn )
1253 gdprintk(XENLOG_INFO VTDPREFIX,
1254 "domain_context_mapping:map: "
1255 "bdf = %x:%x.%x -> %x:%x.%x\n",
1256 ob, PCI_SLOT(odf), PCI_FUNC(odf),
1257 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1259 ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
1260 if ( secbus != bus )
1261 /*
1262 * The source-id for transactions on non-PCIe buses seem
1263 * to originate from devfn=0 on the secondary bus behind
1264 * the bridge. Map that id as well. The id to use in
1265 * these scanarios is not particularly well documented
1266 * anywhere.
1267 */
1268 domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
1269 break;
1271 default:
1272 gdprintk(XENLOG_ERR VTDPREFIX,
1273 "domain_context_mapping:unknown type : bdf = %x:%x.%x\n",
1274 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1275 ret = -EINVAL;
1276 break;
1279 return ret;
1282 static int domain_context_unmap_one(
1283 struct domain *domain,
1284 struct iommu *iommu,
1285 u8 bus, u8 devfn)
1287 struct context_entry *context, *context_entries;
1288 unsigned long flags;
1289 u64 maddr;
1291 maddr = bus_to_context_maddr(iommu, bus);
1292 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1293 context = &context_entries[devfn];
1295 if ( !context_present(*context) )
1297 unmap_vtd_domain_page(context_entries);
1298 return 0;
1301 spin_lock_irqsave(&iommu->lock, flags);
1302 context_clear_present(*context);
1303 context_clear_entry(*context);
1304 iommu_flush_cache_entry(context);
1305 iommu_flush_context_domain(iommu, domain_iommu_domid(domain), 0);
1306 iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
1307 unmap_vtd_domain_page(context_entries);
1308 spin_unlock_irqrestore(&iommu->lock, flags);
1310 return 0;
1313 static int domain_context_unmap(struct domain *domain, u8 bus, u8 devfn)
1315 struct acpi_drhd_unit *drhd;
1316 u16 sec_bus, sub_bus;
1317 int ret = 0;
1318 u32 type;
1319 u8 secbus;
1321 drhd = acpi_find_matched_drhd_unit(bus, devfn);
1322 if ( !drhd )
1323 return -ENODEV;
1325 type = pdev_type(bus, devfn);
1326 switch ( type )
1328 case DEV_TYPE_PCIe_BRIDGE:
1329 case DEV_TYPE_PCI_BRIDGE:
1330 sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1331 PCI_SECONDARY_BUS);
1332 sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1333 PCI_SUBORDINATE_BUS);
1334 /*dmar_scope_remove_buses(&drhd->scope, sec_bus, sub_bus);*/
1335 if ( DEV_TYPE_PCI_BRIDGE )
1336 ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
1337 break;
1339 case DEV_TYPE_PCIe_ENDPOINT:
1340 ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
1341 break;
1343 case DEV_TYPE_PCI:
1344 if ( find_pcie_endpoint(&bus, &devfn, &secbus) )
1345 ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
1346 if ( bus != secbus )
1347 domain_context_unmap_one(domain, drhd->iommu, secbus, 0);
1348 break;
1350 default:
1351 gdprintk(XENLOG_ERR VTDPREFIX,
1352 "domain_context_unmap:unknown type: bdf = %x:%x:%x\n",
1353 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1354 ret = -EINVAL;
1355 break;
1358 return ret;
1361 static int reassign_device_ownership(
1362 struct domain *source,
1363 struct domain *target,
1364 u8 bus, u8 devfn)
1366 struct hvm_iommu *source_hd = domain_hvm_iommu(source);
1367 struct pci_dev *pdev;
1368 struct acpi_drhd_unit *drhd;
1369 struct iommu *pdev_iommu;
1370 int ret, found = 0;
1372 if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
1373 return -ENODEV;
1375 drhd = acpi_find_matched_drhd_unit(bus, devfn);
1376 pdev_iommu = drhd->iommu;
1377 domain_context_unmap(source, bus, devfn);
1379 ret = domain_context_mapping(target, bus, devfn);
1380 if ( ret )
1381 return ret;
1383 write_lock(&pcidevs_lock);
1384 list_move(&pdev->domain_list, &target->arch.pdev_list);
1385 write_unlock(&pcidevs_lock);
1386 pdev->domain = target;
1388 spin_unlock(&pdev->lock);
1390 read_lock(&pcidevs_lock);
1391 for_each_pdev ( source, pdev )
1393 drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
1394 if ( drhd->iommu == pdev_iommu )
1396 found = 1;
1397 break;
1400 read_unlock(&pcidevs_lock);
1402 if ( !found )
1403 clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
1405 return ret;
1408 void iommu_domain_teardown(struct domain *d)
1410 struct hvm_iommu *hd = domain_hvm_iommu(d);
1412 if ( list_empty(&acpi_drhd_units) )
1413 return;
1415 iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
1416 hd->pgd_maddr = 0;
1417 iommu_domid_release(d);
1420 static int domain_context_mapped(u8 bus, u8 devfn)
1422 struct acpi_drhd_unit *drhd;
1424 for_each_drhd_unit ( drhd )
1425 if ( device_context_mapped(drhd->iommu, bus, devfn) )
1426 return 1;
1428 return 0;
1431 int intel_iommu_map_page(
1432 struct domain *d, unsigned long gfn, unsigned long mfn)
1434 struct hvm_iommu *hd = domain_hvm_iommu(d);
1435 struct acpi_drhd_unit *drhd;
1436 struct iommu *iommu;
1437 struct dma_pte *page = NULL, *pte = NULL;
1438 u64 pg_maddr;
1439 int pte_present;
1441 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1442 iommu = drhd->iommu;
1444 /* do nothing if dom0 and iommu supports pass thru */
1445 if ( iommu_passthrough &&
1446 ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1447 return 0;
1449 pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
1450 if ( pg_maddr == 0 )
1451 return -ENOMEM;
1452 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1453 pte = page + (gfn & LEVEL_MASK);
1454 pte_present = dma_pte_present(*pte);
1455 dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
1456 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
1457 iommu_flush_cache_entry(pte);
1458 unmap_vtd_domain_page(page);
1460 for_each_drhd_unit ( drhd )
1462 iommu = drhd->iommu;
1464 if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
1465 continue;
1467 if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1468 (paddr_t)gfn << PAGE_SHIFT_4K, 1,
1469 !pte_present) )
1470 iommu_flush_write_buffer(iommu);
1473 return 0;
1476 int intel_iommu_unmap_page(struct domain *d, unsigned long gfn)
1478 struct acpi_drhd_unit *drhd;
1479 struct iommu *iommu;
1481 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1482 iommu = drhd->iommu;
1484 /* do nothing if dom0 and iommu supports pass thru */
1485 if ( iommu_passthrough &&
1486 ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1487 return 0;
1489 dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1491 return 0;
1494 static int iommu_prepare_rmrr_dev(struct domain *d,
1495 struct acpi_rmrr_unit *rmrr,
1496 u8 bus, u8 devfn)
1498 int ret = 0;
1499 u64 base, end;
1500 unsigned long base_pfn, end_pfn;
1502 ASSERT(rmrr->base_address < rmrr->end_address);
1504 base = rmrr->base_address & PAGE_MASK_4K;
1505 base_pfn = base >> PAGE_SHIFT_4K;
1506 end = PAGE_ALIGN_4K(rmrr->end_address);
1507 end_pfn = end >> PAGE_SHIFT_4K;
1509 while ( base_pfn < end_pfn )
1511 intel_iommu_map_page(d, base_pfn, base_pfn);
1512 base_pfn++;
1515 if ( domain_context_mapped(bus, devfn) == 0 )
1516 ret = domain_context_mapping(d, bus, devfn);
1518 return ret;
1521 static int intel_iommu_add_device(struct pci_dev *pdev)
1523 struct acpi_rmrr_unit *rmrr;
1524 u16 bdf;
1525 int ret, i;
1527 if ( !pdev->domain )
1528 return -EINVAL;
1530 ret = domain_context_mapping(pdev->domain, pdev->bus, pdev->devfn);
1531 if ( ret )
1533 gdprintk(XENLOG_ERR VTDPREFIX,
1534 "intel_iommu_add_device: context mapping failed\n");
1535 return ret;
1538 for_each_rmrr_device ( rmrr, bdf, i )
1540 if ( PCI_BUS(bdf) == pdev->bus && PCI_DEVFN2(bdf) == pdev->devfn )
1542 ret = iommu_prepare_rmrr_dev(pdev->domain, rmrr,
1543 pdev->bus, pdev->devfn);
1544 if ( ret )
1545 gdprintk(XENLOG_ERR VTDPREFIX,
1546 "intel_iommu_add_device: RMRR mapping failed\n");
1547 break;
1551 return ret;
1554 static int intel_iommu_remove_device(struct pci_dev *pdev)
1556 struct acpi_rmrr_unit *rmrr;
1557 u16 bdf;
1558 int i;
1560 if ( !pdev->domain )
1561 return -EINVAL;
1563 /* If the device belongs to dom0, and it has RMRR, don't remove it
1564 * from dom0, because BIOS may use RMRR at booting time.
1565 */
1566 if ( pdev->domain->domain_id == 0 )
1568 for_each_rmrr_device ( rmrr, bdf, i )
1570 if ( PCI_BUS(bdf) == pdev->bus &&
1571 PCI_DEVFN2(bdf) == pdev->devfn )
1572 return 0;
1576 return domain_context_unmap(pdev->domain, pdev->bus, pdev->devfn);
1579 static void setup_dom0_devices(struct domain *d)
1581 struct hvm_iommu *hd;
1582 struct pci_dev *pdev;
1583 int bus, dev, func;
1584 u32 l;
1586 hd = domain_hvm_iommu(d);
1588 write_lock(&pcidevs_lock);
1589 for ( bus = 0; bus < 256; bus++ )
1591 for ( dev = 0; dev < 32; dev++ )
1593 for ( func = 0; func < 8; func++ )
1595 l = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID);
1596 /* some broken boards return 0 or ~0 if a slot is empty: */
1597 if ( (l == 0xffffffff) || (l == 0x00000000) ||
1598 (l == 0x0000ffff) || (l == 0xffff0000) )
1599 continue;
1601 pdev = alloc_pdev(bus, PCI_DEVFN(dev, func));
1602 pdev->domain = d;
1603 list_add(&pdev->domain_list, &d->arch.pdev_list);
1604 domain_context_mapping(d, pdev->bus, pdev->devfn);
1608 write_unlock(&pcidevs_lock);
1611 void clear_fault_bits(struct iommu *iommu)
1613 u64 val;
1615 val = dmar_readq(
1616 iommu->reg,
1617 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
1618 dmar_writeq(
1619 iommu->reg,
1620 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
1621 val);
1622 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
1625 static int init_vtd_hw(void)
1627 struct acpi_drhd_unit *drhd;
1628 struct iommu *iommu;
1629 struct iommu_flush *flush = NULL;
1630 int vector;
1631 int ret;
1633 for_each_drhd_unit ( drhd )
1635 iommu = drhd->iommu;
1636 ret = iommu_set_root_entry(iommu);
1637 if ( ret )
1639 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
1640 return -EIO;
1643 vector = iommu_set_interrupt(iommu);
1644 dma_msi_data_init(iommu, vector);
1645 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
1646 iommu->vector = vector;
1647 clear_fault_bits(iommu);
1648 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
1650 /* initialize flush functions */
1651 flush = iommu_get_flush(iommu);
1652 flush->context = flush_context_reg;
1653 flush->iotlb = flush_iotlb_reg;
1656 for_each_drhd_unit ( drhd )
1658 iommu = drhd->iommu;
1659 if ( qinval_setup(iommu) != 0 )
1660 dprintk(XENLOG_INFO VTDPREFIX,
1661 "Queued Invalidation hardware not found\n");
1664 for_each_drhd_unit ( drhd )
1666 iommu = drhd->iommu;
1667 if ( intremap_setup(iommu) != 0 )
1668 dprintk(XENLOG_INFO VTDPREFIX,
1669 "Interrupt Remapping hardware not found\n");
1672 return 0;
1675 static void setup_dom0_rmrr(struct domain *d)
1677 struct acpi_rmrr_unit *rmrr;
1678 u16 bdf;
1679 int ret, i;
1681 for_each_rmrr_device ( rmrr, bdf, i )
1683 ret = iommu_prepare_rmrr_dev(d, rmrr, PCI_BUS(bdf), PCI_DEVFN2(bdf));
1684 if ( ret )
1685 gdprintk(XENLOG_ERR VTDPREFIX,
1686 "IOMMU: mapping reserved region failed\n");
1690 int intel_vtd_setup(void)
1692 struct acpi_drhd_unit *drhd;
1693 struct iommu *iommu;
1695 if ( !vtd_enabled )
1696 return -ENODEV;
1698 spin_lock_init(&domid_bitmap_lock);
1699 clflush_size = get_cache_line_size();
1701 for_each_drhd_unit ( drhd )
1702 if ( iommu_alloc(drhd) != 0 )
1703 goto error;
1705 /* Allocate IO page directory page for the domain. */
1706 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1707 iommu = drhd->iommu;
1709 /* Allocate domain id bitmap, and set bit 0 as reserved */
1710 domid_bitmap_size = cap_ndoms(iommu->cap);
1711 domid_bitmap = xmalloc_array(unsigned long,
1712 BITS_TO_LONGS(domid_bitmap_size));
1713 if ( domid_bitmap == NULL )
1714 goto error;
1715 memset(domid_bitmap, 0, domid_bitmap_size / 8);
1716 set_bit(0, domid_bitmap);
1718 if ( init_vtd_hw() )
1719 goto error;
1721 register_keyhandler('V', dump_iommu_info, "dump iommu info");
1723 return 0;
1725 error:
1726 for_each_drhd_unit ( drhd )
1727 iommu_free(drhd);
1728 vtd_enabled = 0;
1729 return -ENOMEM;
1732 /*
1733 * If the device isn't owned by dom0, it means it already
1734 * has been assigned to other domain, or it's not exist.
1735 */
1736 int device_assigned(u8 bus, u8 devfn)
1738 struct pci_dev *pdev;
1740 if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
1742 spin_unlock(&pdev->lock);
1743 return 0;
1746 return 1;
1749 int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
1751 struct acpi_rmrr_unit *rmrr;
1752 int ret = 0, i;
1753 u16 bdf;
1755 if ( list_empty(&acpi_drhd_units) )
1756 return -ENODEV;
1758 ret = reassign_device_ownership(dom0, d, bus, devfn);
1759 if ( ret )
1760 return ret;
1762 /* Setup rmrr identity mapping */
1763 for_each_rmrr_device( rmrr, bdf, i )
1765 if ( PCI_BUS(bdf) == bus && PCI_DEVFN2(bdf) == devfn )
1767 /* FIXME: Because USB RMRR conflicts with guest bios region,
1768 * ignore USB RMRR temporarily.
1769 */
1770 if ( is_usb_device(bus, devfn) )
1771 return 0;
1773 ret = iommu_prepare_rmrr_dev(d, rmrr, bus, devfn);
1774 if ( ret )
1775 gdprintk(XENLOG_ERR VTDPREFIX,
1776 "IOMMU: mapping reserved region failed\n");
1777 return ret;
1781 return ret;
1784 static int intel_iommu_group_id(u8 bus, u8 devfn)
1786 u8 secbus;
1787 if ( !bus2bridge[bus].map || find_pcie_endpoint(&bus, &devfn, &secbus) )
1788 return PCI_BDF2(bus, devfn);
1789 else
1790 return -1;
1793 static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS];
1794 int iommu_suspend(void)
1796 struct acpi_drhd_unit *drhd;
1797 struct iommu *iommu;
1798 u32 i;
1800 if ( !vtd_enabled )
1801 return 0;
1803 iommu_flush_all();
1805 for_each_drhd_unit ( drhd )
1807 iommu = drhd->iommu;
1808 i = iommu->index;
1810 iommu_state[i][DMAR_FECTL_REG] =
1811 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
1812 iommu_state[i][DMAR_FEDATA_REG] =
1813 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
1814 iommu_state[i][DMAR_FEADDR_REG] =
1815 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
1816 iommu_state[i][DMAR_FEUADDR_REG] =
1817 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
1820 return 0;
1823 int iommu_resume(void)
1825 struct acpi_drhd_unit *drhd;
1826 struct iommu *iommu;
1827 u32 i;
1829 if ( !vtd_enabled )
1830 return 0;
1832 iommu_flush_all();
1834 if ( init_vtd_hw() != 0 && force_iommu )
1835 panic("IOMMU setup failed, crash Xen for security purpose!\n");
1837 for_each_drhd_unit ( drhd )
1839 iommu = drhd->iommu;
1840 i = iommu->index;
1842 dmar_writel(iommu->reg, DMAR_FECTL_REG,
1843 (u32) iommu_state[i][DMAR_FECTL_REG]);
1844 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
1845 (u32) iommu_state[i][DMAR_FEDATA_REG]);
1846 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
1847 (u32) iommu_state[i][DMAR_FEADDR_REG]);
1848 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
1849 (u32) iommu_state[i][DMAR_FEUADDR_REG]);
1851 if ( iommu_enable_translation(iommu) )
1852 return -EIO;
1855 return 0;
1858 struct iommu_ops intel_iommu_ops = {
1859 .init = intel_iommu_domain_init,
1860 .add_device = intel_iommu_add_device,
1861 .remove_device = intel_iommu_remove_device,
1862 .assign_device = intel_iommu_assign_device,
1863 .teardown = iommu_domain_teardown,
1864 .map_page = intel_iommu_map_page,
1865 .unmap_page = intel_iommu_unmap_page,
1866 .reassign_device = reassign_device_ownership,
1867 .get_device_group_id = intel_iommu_group_id,
1868 .update_ire_from_apic = io_apic_write_remap_rte,
1869 .update_ire_from_msi = msi_msg_write_remap_rte,
1870 };
1872 /*
1873 * Local variables:
1874 * mode: C
1875 * c-set-style: "BSD"
1876 * c-basic-offset: 4
1877 * tab-width: 4
1878 * indent-tabs-mode: nil
1879 * End:
1880 */