ia64/xen-unstable

view xen/drivers/passthrough/vtd/iommu.c @ 17868:42323a447cbe

vt-d: Quieten down overzealous logging.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jun 16 11:49:15 2008 +0100 (2008-06-16)
parents 0216f0d07efe
children 73b798732e77
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/irq.h>
23 #include <xen/sched.h>
24 #include <xen/xmalloc.h>
25 #include <xen/domain_page.h>
26 #include <xen/iommu.h>
27 #include <xen/numa.h>
28 #include <xen/time.h>
29 #include <xen/pci.h>
30 #include <xen/pci_regs.h>
31 #include <asm/paging.h>
32 #include <asm/msi.h>
33 #include "iommu.h"
34 #include "dmar.h"
35 #include "extern.h"
36 #include "vtd.h"
38 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
40 static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
41 static int domid_bitmap_size; /* domain id bitmap size in bits */
42 static unsigned long *domid_bitmap; /* iommu domain id bitmap */
44 static void setup_dom0_devices(struct domain *d);
45 static void setup_dom0_rmrr(struct domain *d);
47 #define DID_FIELD_WIDTH 16
48 #define DID_HIGH_OFFSET 8
49 static void context_set_domain_id(struct context_entry *context,
50 struct domain *d)
51 {
52 unsigned long flags;
53 domid_t iommu_domid = domain_iommu_domid(d);
55 if ( iommu_domid == 0 )
56 {
57 spin_lock_irqsave(&domid_bitmap_lock, flags);
58 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
59 set_bit(iommu_domid, domid_bitmap);
60 spin_unlock_irqrestore(&domid_bitmap_lock, flags);
61 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
62 }
64 context->hi &= (1 << DID_HIGH_OFFSET) - 1;
65 context->hi |= iommu_domid << DID_HIGH_OFFSET;
66 }
68 static void iommu_domid_release(struct domain *d)
69 {
70 domid_t iommu_domid = domain_iommu_domid(d);
72 if ( iommu_domid != 0 )
73 {
74 d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
75 clear_bit(iommu_domid, domid_bitmap);
76 }
77 }
79 static struct intel_iommu *alloc_intel_iommu(void)
80 {
81 struct intel_iommu *intel;
83 intel = xmalloc(struct intel_iommu);
84 if ( intel == NULL )
85 return NULL;
86 memset(intel, 0, sizeof(struct intel_iommu));
88 spin_lock_init(&intel->qi_ctrl.qinval_lock);
89 spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
90 spin_lock_init(&intel->ir_ctrl.iremap_lock);
92 return intel;
93 }
95 static void free_intel_iommu(struct intel_iommu *intel)
96 {
97 xfree(intel);
98 }
100 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
101 {
102 return iommu ? &iommu->intel->qi_ctrl : NULL;
103 }
105 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
106 {
107 return iommu ? &iommu->intel->ir_ctrl : NULL;
108 }
110 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
111 {
112 return iommu ? &iommu->intel->flush : NULL;
113 }
115 static unsigned int clflush_size;
116 static int iommus_incoherent;
117 static void __iommu_flush_cache(void *addr, int size)
118 {
119 int i;
121 if ( !iommus_incoherent )
122 return;
124 for ( i = 0; i < size; i += clflush_size )
125 clflush((char *)addr + i);
126 }
128 void iommu_flush_cache_entry(void *addr)
129 {
130 __iommu_flush_cache(addr, 8);
131 }
133 void iommu_flush_cache_page(void *addr)
134 {
135 __iommu_flush_cache(addr, PAGE_SIZE_4K);
136 }
138 int nr_iommus;
139 /* context entry handling */
140 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
141 {
142 struct root_entry *root, *root_entries;
143 unsigned long flags;
144 u64 maddr;
146 spin_lock_irqsave(&iommu->lock, flags);
147 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
148 root = &root_entries[bus];
149 if ( !root_present(*root) )
150 {
151 maddr = alloc_pgtable_maddr();
152 if ( maddr == 0 )
153 {
154 spin_unlock_irqrestore(&iommu->lock, flags);
155 return 0;
156 }
157 set_root_value(*root, maddr);
158 set_root_present(*root);
159 iommu_flush_cache_entry(root);
160 }
161 maddr = (u64) get_context_addr(*root);
162 unmap_vtd_domain_page(root_entries);
163 spin_unlock_irqrestore(&iommu->lock, flags);
164 return maddr;
165 }
167 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
168 {
169 struct root_entry *root, *root_entries;
170 struct context_entry *context;
171 u64 context_maddr;
172 int ret;
173 unsigned long flags;
175 spin_lock_irqsave(&iommu->lock, flags);
176 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
177 root = &root_entries[bus];
178 if ( !root_present(*root) )
179 {
180 ret = 0;
181 goto out;
182 }
183 context_maddr = get_context_addr(*root);
184 context = (struct context_entry *)map_vtd_domain_page(context_maddr);
185 ret = context_present(context[devfn]);
186 unmap_vtd_domain_page(context);
187 out:
188 unmap_vtd_domain_page(root_entries);
189 spin_unlock_irqrestore(&iommu->lock, flags);
190 return ret;
191 }
193 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
194 {
195 struct hvm_iommu *hd = domain_hvm_iommu(domain);
196 int addr_width = agaw_to_width(hd->agaw);
197 struct dma_pte *parent, *pte = NULL;
198 int level = agaw_to_level(hd->agaw);
199 int offset;
200 unsigned long flags;
201 u64 pte_maddr = 0, maddr;
202 u64 *vaddr = NULL;
204 addr &= (((u64)1) << addr_width) - 1;
205 spin_lock_irqsave(&hd->mapping_lock, flags);
206 if ( hd->pgd_maddr == 0 )
207 if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) )
208 goto out;
210 parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
211 while ( level > 1 )
212 {
213 offset = address_level_offset(addr, level);
214 pte = &parent[offset];
216 if ( dma_pte_addr(*pte) == 0 )
217 {
218 if ( !alloc )
219 break;
220 maddr = alloc_pgtable_maddr();
221 dma_set_pte_addr(*pte, maddr);
222 vaddr = map_vtd_domain_page(maddr);
223 if ( !vaddr )
224 break;
226 /*
227 * high level table always sets r/w, last level
228 * page table control read/write
229 */
230 dma_set_pte_readable(*pte);
231 dma_set_pte_writable(*pte);
232 iommu_flush_cache_entry(pte);
233 }
234 else
235 {
236 vaddr = map_vtd_domain_page(pte->val);
237 if ( !vaddr )
238 break;
239 }
241 if ( level == 2 )
242 {
243 pte_maddr = pte->val & PAGE_MASK_4K;
244 unmap_vtd_domain_page(vaddr);
245 break;
246 }
248 unmap_vtd_domain_page(parent);
249 parent = (struct dma_pte *)vaddr;
250 vaddr = NULL;
251 level--;
252 }
254 unmap_vtd_domain_page(parent);
255 out:
256 spin_unlock_irqrestore(&hd->mapping_lock, flags);
257 return pte_maddr;
258 }
260 static void iommu_flush_write_buffer(struct iommu *iommu)
261 {
262 u32 val;
263 unsigned long flag;
264 s_time_t start_time;
266 if ( !cap_rwbf(iommu->cap) )
267 return;
268 val = iommu->gcmd | DMA_GCMD_WBF;
270 spin_lock_irqsave(&iommu->register_lock, flag);
271 dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
273 /* Make sure hardware complete it */
274 start_time = NOW();
275 for ( ; ; )
276 {
277 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
278 if ( !(val & DMA_GSTS_WBFS) )
279 break;
280 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
281 panic("DMAR hardware is malfunctional,"
282 " please disable IOMMU\n");
283 cpu_relax();
284 }
285 spin_unlock_irqrestore(&iommu->register_lock, flag);
286 }
288 /* return value determine if we need a write buffer flush */
289 static int flush_context_reg(
290 void *_iommu,
291 u16 did, u16 source_id, u8 function_mask, u64 type,
292 int non_present_entry_flush)
293 {
294 struct iommu *iommu = (struct iommu *) _iommu;
295 u64 val = 0;
296 unsigned long flag;
297 s_time_t start_time;
299 /*
300 * In the non-present entry flush case, if hardware doesn't cache
301 * non-present entry we do nothing and if hardware cache non-present
302 * entry, we flush entries of domain 0 (the domain id is used to cache
303 * any non-present entries)
304 */
305 if ( non_present_entry_flush )
306 {
307 if ( !cap_caching_mode(iommu->cap) )
308 return 1;
309 else
310 did = 0;
311 }
313 /* use register invalidation */
314 switch ( type )
315 {
316 case DMA_CCMD_GLOBAL_INVL:
317 val = DMA_CCMD_GLOBAL_INVL;
318 break;
319 case DMA_CCMD_DOMAIN_INVL:
320 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
321 break;
322 case DMA_CCMD_DEVICE_INVL:
323 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
324 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
325 break;
326 default:
327 BUG();
328 }
329 val |= DMA_CCMD_ICC;
331 spin_lock_irqsave(&iommu->register_lock, flag);
332 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
334 /* Make sure hardware complete it */
335 start_time = NOW();
336 for ( ; ; )
337 {
338 val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
339 if ( !(val & DMA_CCMD_ICC) )
340 break;
341 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
342 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
343 cpu_relax();
344 }
345 spin_unlock_irqrestore(&iommu->register_lock, flag);
346 /* flush context entry will implictly flush write buffer */
347 return 0;
348 }
350 static int inline iommu_flush_context_global(
351 struct iommu *iommu, int non_present_entry_flush)
352 {
353 struct iommu_flush *flush = iommu_get_flush(iommu);
354 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
355 non_present_entry_flush);
356 }
358 static int inline iommu_flush_context_domain(
359 struct iommu *iommu, u16 did, int non_present_entry_flush)
360 {
361 struct iommu_flush *flush = iommu_get_flush(iommu);
362 return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
363 non_present_entry_flush);
364 }
366 static int inline iommu_flush_context_device(
367 struct iommu *iommu, u16 did, u16 source_id,
368 u8 function_mask, int non_present_entry_flush)
369 {
370 struct iommu_flush *flush = iommu_get_flush(iommu);
371 return flush->context(iommu, did, source_id, function_mask,
372 DMA_CCMD_DEVICE_INVL,
373 non_present_entry_flush);
374 }
376 /* return value determine if we need a write buffer flush */
377 static int flush_iotlb_reg(void *_iommu, u16 did,
378 u64 addr, unsigned int size_order, u64 type,
379 int non_present_entry_flush)
380 {
381 struct iommu *iommu = (struct iommu *) _iommu;
382 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
383 u64 val = 0, val_iva = 0;
384 unsigned long flag;
385 s_time_t start_time;
387 /*
388 * In the non-present entry flush case, if hardware doesn't cache
389 * non-present entry we do nothing and if hardware cache non-present
390 * entry, we flush entries of domain 0 (the domain id is used to cache
391 * any non-present entries)
392 */
393 if ( non_present_entry_flush )
394 {
395 if ( !cap_caching_mode(iommu->cap) )
396 return 1;
397 else
398 did = 0;
399 }
401 /* use register invalidation */
402 switch ( type )
403 {
404 case DMA_TLB_GLOBAL_FLUSH:
405 /* global flush doesn't need set IVA_REG */
406 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
407 break;
408 case DMA_TLB_DSI_FLUSH:
409 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
410 break;
411 case DMA_TLB_PSI_FLUSH:
412 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
413 /* Note: always flush non-leaf currently */
414 val_iva = size_order | addr;
415 break;
416 default:
417 BUG();
418 }
419 /* Note: set drain read/write */
420 if ( cap_read_drain(iommu->cap) )
421 val |= DMA_TLB_READ_DRAIN;
422 if ( cap_write_drain(iommu->cap) )
423 val |= DMA_TLB_WRITE_DRAIN;
425 spin_lock_irqsave(&iommu->register_lock, flag);
426 /* Note: Only uses first TLB reg currently */
427 if ( val_iva )
428 dmar_writeq(iommu->reg, tlb_offset, val_iva);
429 dmar_writeq(iommu->reg, tlb_offset + 8, val);
431 /* Make sure hardware complete it */
432 start_time = NOW();
433 for ( ; ; )
434 {
435 val = dmar_readq(iommu->reg, tlb_offset + 8);
436 if ( !(val & DMA_TLB_IVT) )
437 break;
438 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
439 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
440 cpu_relax();
441 }
442 spin_unlock_irqrestore(&iommu->register_lock, flag);
444 /* check IOTLB invalidation granularity */
445 if ( DMA_TLB_IAIG(val) == 0 )
446 printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
448 #ifdef VTD_DEBUG
449 if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
450 printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
451 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
452 #endif
453 /* flush context entry will implictly flush write buffer */
454 return 0;
455 }
457 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
458 int non_present_entry_flush)
459 {
460 struct iommu_flush *flush = iommu_get_flush(iommu);
461 return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
462 non_present_entry_flush);
463 }
465 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
466 int non_present_entry_flush)
467 {
468 struct iommu_flush *flush = iommu_get_flush(iommu);
469 return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
470 non_present_entry_flush);
471 }
473 static int inline get_alignment(u64 base, unsigned int size)
474 {
475 int t = 0;
476 u64 end;
478 end = base + size - 1;
479 while ( base != end )
480 {
481 t++;
482 base >>= 1;
483 end >>= 1;
484 }
485 return t;
486 }
488 static int inline iommu_flush_iotlb_psi(
489 struct iommu *iommu, u16 did,
490 u64 addr, unsigned int pages, int non_present_entry_flush)
491 {
492 unsigned int align;
493 struct iommu_flush *flush = iommu_get_flush(iommu);
495 BUG_ON(addr & (~PAGE_MASK_4K));
496 BUG_ON(pages == 0);
498 /* Fallback to domain selective flush if no PSI support */
499 if ( !cap_pgsel_inv(iommu->cap) )
500 return iommu_flush_iotlb_dsi(iommu, did,
501 non_present_entry_flush);
503 /*
504 * PSI requires page size is 2 ^ x, and the base address is naturally
505 * aligned to the size
506 */
507 align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
508 /* Fallback to domain selective flush if size is too big */
509 if ( align > cap_max_amask_val(iommu->cap) )
510 return iommu_flush_iotlb_dsi(iommu, did,
511 non_present_entry_flush);
513 addr >>= PAGE_SHIFT_4K + align;
514 addr <<= PAGE_SHIFT_4K + align;
516 return flush->iotlb(iommu, did, addr, align,
517 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
518 }
520 void iommu_flush_all(void)
521 {
522 struct acpi_drhd_unit *drhd;
523 struct iommu *iommu;
525 wbinvd();
526 for_each_drhd_unit ( drhd )
527 {
528 iommu = drhd->iommu;
529 iommu_flush_context_global(iommu, 0);
530 iommu_flush_iotlb_global(iommu, 0);
531 }
532 }
534 /* clear one page's page table */
535 static void dma_pte_clear_one(struct domain *domain, u64 addr)
536 {
537 struct hvm_iommu *hd = domain_hvm_iommu(domain);
538 struct acpi_drhd_unit *drhd;
539 struct iommu *iommu;
540 struct dma_pte *page = NULL, *pte = NULL;
541 u64 pg_maddr;
543 /* get last level pte */
544 pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
545 if ( pg_maddr == 0 )
546 return;
547 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
548 pte = page + address_level_offset(addr, 1);
550 if ( !dma_pte_present(*pte) )
551 {
552 unmap_vtd_domain_page(page);
553 return;
554 }
556 dma_clear_pte(*pte);
557 iommu_flush_cache_entry(pte);
559 for_each_drhd_unit ( drhd )
560 {
561 iommu = drhd->iommu;
562 if ( test_bit(iommu->index, &hd->iommu_bitmap) )
563 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
564 addr, 1, 0);
565 }
567 unmap_vtd_domain_page(page);
568 }
570 /* clear last level pte, a tlb flush should be followed */
571 static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
572 {
573 struct hvm_iommu *hd = domain_hvm_iommu(domain);
574 int addr_width = agaw_to_width(hd->agaw);
576 start &= (((u64)1) << addr_width) - 1;
577 end &= (((u64)1) << addr_width) - 1;
578 /* in case it's partial page */
579 start = PAGE_ALIGN_4K(start);
580 end &= PAGE_MASK_4K;
582 /* we don't need lock here, nobody else touches the iova range */
583 while ( start < end )
584 {
585 dma_pte_clear_one(domain, start);
586 start += PAGE_SIZE_4K;
587 }
588 }
590 static void iommu_free_pagetable(u64 pt_maddr, int level)
591 {
592 int i;
593 struct dma_pte *pt_vaddr, *pte;
594 int next_level = level - 1;
596 if ( pt_maddr == 0 )
597 return;
599 pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
601 for ( i = 0; i < PTE_NUM; i++ )
602 {
603 pte = &pt_vaddr[i];
604 if ( !dma_pte_present(*pte) )
605 continue;
607 if ( next_level >= 1 )
608 iommu_free_pagetable(dma_pte_addr(*pte), next_level);
610 dma_clear_pte(*pte);
611 iommu_flush_cache_entry(pte);
612 }
614 unmap_vtd_domain_page(pt_vaddr);
615 free_pgtable_maddr(pt_maddr);
616 }
618 static int iommu_set_root_entry(struct iommu *iommu)
619 {
620 u32 cmd, sts;
621 unsigned long flags;
622 s_time_t start_time;
624 if ( iommu->root_maddr != 0 )
625 {
626 free_pgtable_maddr(iommu->root_maddr);
627 iommu->root_maddr = 0;
628 }
630 spin_lock_irqsave(&iommu->register_lock, flags);
632 iommu->root_maddr = alloc_pgtable_maddr();
633 if ( iommu->root_maddr == 0 )
634 return -ENOMEM;
636 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
637 cmd = iommu->gcmd | DMA_GCMD_SRTP;
638 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
640 /* Make sure hardware complete it */
641 start_time = NOW();
642 for ( ; ; )
643 {
644 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
645 if ( sts & DMA_GSTS_RTPS )
646 break;
647 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
648 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
649 cpu_relax();
650 }
652 spin_unlock_irqrestore(&iommu->register_lock, flags);
654 return 0;
655 }
657 static int iommu_enable_translation(struct iommu *iommu)
658 {
659 u32 sts;
660 unsigned long flags;
661 s_time_t start_time;
663 dprintk(XENLOG_INFO VTDPREFIX,
664 "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
665 spin_lock_irqsave(&iommu->register_lock, flags);
666 iommu->gcmd |= DMA_GCMD_TE;
667 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
668 /* Make sure hardware complete it */
669 start_time = NOW();
670 for ( ; ; )
671 {
672 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
673 if ( sts & DMA_GSTS_TES )
674 break;
675 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
676 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
677 cpu_relax();
678 }
680 /* Disable PMRs when VT-d engine takes effect per spec definition */
681 disable_pmr(iommu);
682 spin_unlock_irqrestore(&iommu->register_lock, flags);
683 return 0;
684 }
686 int iommu_disable_translation(struct iommu *iommu)
687 {
688 u32 sts;
689 unsigned long flags;
690 s_time_t start_time;
692 spin_lock_irqsave(&iommu->register_lock, flags);
693 iommu->gcmd &= ~ DMA_GCMD_TE;
694 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
696 /* Make sure hardware complete it */
697 start_time = NOW();
698 for ( ; ; )
699 {
700 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
701 if ( !(sts & DMA_GSTS_TES) )
702 break;
703 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
704 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
705 cpu_relax();
706 }
707 spin_unlock_irqrestore(&iommu->register_lock, flags);
708 return 0;
709 }
711 static struct iommu *vector_to_iommu[NR_VECTORS];
712 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
713 u8 fault_reason, u16 source_id, u64 addr)
714 {
715 dprintk(XENLOG_WARNING VTDPREFIX,
716 "iommu_fault:%s: %x:%x.%x addr %"PRIx64" REASON %x "
717 "iommu->reg = %p\n",
718 (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
719 PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
720 fault_reason, iommu->reg);
722 if ( fault_reason < 0x20 )
723 print_vtd_entries(iommu, (source_id >> 8),
724 (source_id & 0xff), (addr >> PAGE_SHIFT));
726 return 0;
727 }
729 static void iommu_fault_status(u32 fault_status)
730 {
731 if ( fault_status & DMA_FSTS_PFO )
732 dprintk(XENLOG_ERR VTDPREFIX,
733 "iommu_fault_status: Fault Overflow\n");
734 else if ( fault_status & DMA_FSTS_PPF )
735 dprintk(XENLOG_ERR VTDPREFIX,
736 "iommu_fault_status: Primary Pending Fault\n");
737 else if ( fault_status & DMA_FSTS_AFO )
738 dprintk(XENLOG_ERR VTDPREFIX,
739 "iommu_fault_status: Advanced Fault Overflow\n");
740 else if ( fault_status & DMA_FSTS_APF )
741 dprintk(XENLOG_ERR VTDPREFIX,
742 "iommu_fault_status: Advanced Pending Fault\n");
743 else if ( fault_status & DMA_FSTS_IQE )
744 dprintk(XENLOG_ERR VTDPREFIX,
745 "iommu_fault_status: Invalidation Queue Error\n");
746 else if ( fault_status & DMA_FSTS_ICE )
747 dprintk(XENLOG_ERR VTDPREFIX,
748 "iommu_fault_status: Invalidation Completion Error\n");
749 else if ( fault_status & DMA_FSTS_ITE )
750 dprintk(XENLOG_ERR VTDPREFIX,
751 "iommu_fault_status: Invalidation Time-out Error\n");
752 }
754 #define PRIMARY_FAULT_REG_LEN (16)
755 static void iommu_page_fault(int vector, void *dev_id,
756 struct cpu_user_regs *regs)
757 {
758 struct iommu *iommu = dev_id;
759 int reg, fault_index;
760 u32 fault_status;
761 unsigned long flags;
763 dprintk(XENLOG_WARNING VTDPREFIX,
764 "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
766 spin_lock_irqsave(&iommu->register_lock, flags);
767 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
768 spin_unlock_irqrestore(&iommu->register_lock, flags);
770 iommu_fault_status(fault_status);
772 /* FIXME: ignore advanced fault log */
773 if ( !(fault_status & DMA_FSTS_PPF) )
774 return;
775 fault_index = dma_fsts_fault_record_index(fault_status);
776 reg = cap_fault_reg_offset(iommu->cap);
777 for ( ; ; )
778 {
779 u8 fault_reason;
780 u16 source_id;
781 u32 data;
782 u64 guest_addr;
783 int type;
785 /* highest 32 bits */
786 spin_lock_irqsave(&iommu->register_lock, flags);
787 data = dmar_readl(iommu->reg, reg +
788 fault_index * PRIMARY_FAULT_REG_LEN + 12);
789 if ( !(data & DMA_FRCD_F) )
790 {
791 spin_unlock_irqrestore(&iommu->register_lock, flags);
792 break;
793 }
795 fault_reason = dma_frcd_fault_reason(data);
796 type = dma_frcd_type(data);
798 data = dmar_readl(iommu->reg, reg +
799 fault_index * PRIMARY_FAULT_REG_LEN + 8);
800 source_id = dma_frcd_source_id(data);
802 guest_addr = dmar_readq(iommu->reg, reg +
803 fault_index * PRIMARY_FAULT_REG_LEN);
804 guest_addr = dma_frcd_page_addr(guest_addr);
805 /* clear the fault */
806 dmar_writel(iommu->reg, reg +
807 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
808 spin_unlock_irqrestore(&iommu->register_lock, flags);
810 iommu_page_fault_do_one(iommu, type, fault_reason,
811 source_id, guest_addr);
813 fault_index++;
814 if ( fault_index > cap_num_fault_regs(iommu->cap) )
815 fault_index = 0;
816 }
818 /* clear primary fault overflow */
819 if ( fault_status & DMA_FSTS_PFO )
820 {
821 spin_lock_irqsave(&iommu->register_lock, flags);
822 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
823 spin_unlock_irqrestore(&iommu->register_lock, flags);
824 }
825 }
827 static void dma_msi_unmask(unsigned int vector)
828 {
829 struct iommu *iommu = vector_to_iommu[vector];
830 unsigned long flags;
832 /* unmask it */
833 spin_lock_irqsave(&iommu->register_lock, flags);
834 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
835 spin_unlock_irqrestore(&iommu->register_lock, flags);
836 }
838 static void dma_msi_mask(unsigned int vector)
839 {
840 unsigned long flags;
841 struct iommu *iommu = vector_to_iommu[vector];
843 /* mask it */
844 spin_lock_irqsave(&iommu->register_lock, flags);
845 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
846 spin_unlock_irqrestore(&iommu->register_lock, flags);
847 }
849 static unsigned int dma_msi_startup(unsigned int vector)
850 {
851 dma_msi_unmask(vector);
852 return 0;
853 }
855 static void dma_msi_end(unsigned int vector)
856 {
857 dma_msi_unmask(vector);
858 ack_APIC_irq();
859 }
861 static void dma_msi_data_init(struct iommu *iommu, int vector)
862 {
863 u32 msi_data = 0;
864 unsigned long flags;
866 /* Fixed, edge, assert mode. Follow MSI setting */
867 msi_data |= vector & 0xff;
868 msi_data |= 1 << 14;
870 spin_lock_irqsave(&iommu->register_lock, flags);
871 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
872 spin_unlock_irqrestore(&iommu->register_lock, flags);
873 }
875 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
876 {
877 u64 msi_address;
878 unsigned long flags;
880 /* Physical, dedicated cpu. Follow MSI setting */
881 msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
882 msi_address |= MSI_PHYSICAL_MODE << 2;
883 msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
884 msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
886 spin_lock_irqsave(&iommu->register_lock, flags);
887 dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
888 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
889 spin_unlock_irqrestore(&iommu->register_lock, flags);
890 }
892 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
893 {
894 struct iommu *iommu = vector_to_iommu[vector];
895 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
896 }
898 static struct hw_interrupt_type dma_msi_type = {
899 .typename = "DMA_MSI",
900 .startup = dma_msi_startup,
901 .shutdown = dma_msi_mask,
902 .enable = dma_msi_unmask,
903 .disable = dma_msi_mask,
904 .ack = dma_msi_mask,
905 .end = dma_msi_end,
906 .set_affinity = dma_msi_set_affinity,
907 };
909 int iommu_set_interrupt(struct iommu *iommu)
910 {
911 int vector, ret;
913 vector = assign_irq_vector(AUTO_ASSIGN);
914 vector_to_iommu[vector] = iommu;
916 /* VT-d fault is a MSI, make irq == vector */
917 irq_vector[vector] = vector;
918 vector_irq[vector] = vector;
920 if ( !vector )
921 {
922 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
923 return -EINVAL;
924 }
926 irq_desc[vector].handler = &dma_msi_type;
927 ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
928 if ( ret )
929 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
930 return vector;
931 }
933 static int iommu_alloc(struct acpi_drhd_unit *drhd)
934 {
935 struct iommu *iommu;
936 unsigned long sagaw;
937 int agaw;
939 if ( nr_iommus > MAX_IOMMUS )
940 {
941 gdprintk(XENLOG_ERR VTDPREFIX,
942 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
943 return -ENOMEM;
944 }
946 iommu = xmalloc(struct iommu);
947 if ( iommu == NULL )
948 return -ENOMEM;
949 memset(iommu, 0, sizeof(struct iommu));
951 iommu->intel = alloc_intel_iommu();
952 if ( iommu->intel == NULL )
953 {
954 xfree(iommu);
955 return -ENOMEM;
956 }
958 set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
959 iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
960 iommu->index = nr_iommus++;
962 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
963 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
965 /* Calculate number of pagetable levels: between 2 and 4. */
966 sagaw = cap_sagaw(iommu->cap);
967 for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
968 if ( test_bit(agaw, &sagaw) )
969 break;
970 if ( agaw < 0 )
971 {
972 gdprintk(XENLOG_ERR VTDPREFIX,
973 "IOMMU: unsupported sagaw %lx\n", sagaw);
974 xfree(iommu);
975 return -ENODEV;
976 }
977 iommu->nr_pt_levels = agaw_to_level(agaw);
979 if ( !ecap_coherent(iommu->ecap) )
980 iommus_incoherent = 1;
982 spin_lock_init(&iommu->lock);
983 spin_lock_init(&iommu->register_lock);
985 drhd->iommu = iommu;
986 return 0;
987 }
989 static void iommu_free(struct acpi_drhd_unit *drhd)
990 {
991 struct iommu *iommu = drhd->iommu;
993 if ( iommu == NULL )
994 return;
996 if ( iommu->root_maddr != 0 )
997 {
998 free_pgtable_maddr(iommu->root_maddr);
999 iommu->root_maddr = 0;
1002 if ( iommu->reg )
1003 iounmap(iommu->reg);
1005 free_intel_iommu(iommu->intel);
1006 free_irq(iommu->vector);
1007 xfree(iommu);
1009 drhd->iommu = NULL;
1012 #define guestwidth_to_adjustwidth(gaw) ({ \
1013 int agaw, r = (gaw - 12) % 9; \
1014 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
1015 if ( agaw > 64 ) \
1016 agaw = 64; \
1017 agaw; })
1019 static int intel_iommu_domain_init(struct domain *d)
1021 struct hvm_iommu *hd = domain_hvm_iommu(d);
1022 struct iommu *iommu = NULL;
1023 u64 i;
1024 struct acpi_drhd_unit *drhd;
1026 INIT_LIST_HEAD(&hd->pdev_list);
1028 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1029 iommu = drhd->iommu;
1031 hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
1033 if ( d->domain_id == 0 )
1035 extern int xen_in_range(paddr_t start, paddr_t end);
1036 extern int tboot_in_range(paddr_t start, paddr_t end);
1038 /*
1039 * Set up 1:1 page table for dom0 except the critical segments
1040 * like Xen and tboot.
1041 */
1042 for ( i = 0; i < max_page; i++ )
1044 if ( xen_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) ||
1045 tboot_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) )
1046 continue;
1048 iommu_map_page(d, i, i);
1051 setup_dom0_devices(d);
1052 setup_dom0_rmrr(d);
1054 iommu_flush_all();
1056 for_each_drhd_unit ( drhd )
1058 iommu = drhd->iommu;
1059 if ( iommu_enable_translation(iommu) )
1060 return -EIO;
1064 return 0;
1067 static int domain_context_mapping_one(
1068 struct domain *domain,
1069 struct iommu *iommu,
1070 u8 bus, u8 devfn)
1072 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1073 struct context_entry *context, *context_entries;
1074 unsigned long flags;
1075 u64 maddr, pgd_maddr;
1076 int agaw;
1078 maddr = bus_to_context_maddr(iommu, bus);
1079 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1080 context = &context_entries[devfn];
1082 if ( context_present(*context) )
1084 unmap_vtd_domain_page(context_entries);
1085 return 0;
1088 spin_lock_irqsave(&iommu->lock, flags);
1090 #ifdef CONTEXT_PASSTHRU
1091 if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
1092 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1093 else
1095 #endif
1096 /* Ensure we have pagetables allocated down to leaf PTE. */
1097 if ( hd->pgd_maddr == 0 )
1099 addr_to_dma_page_maddr(domain, 0, 1);
1100 if ( hd->pgd_maddr == 0 )
1102 nomem:
1103 unmap_vtd_domain_page(context_entries);
1104 spin_unlock_irqrestore(&iommu->lock, flags);
1105 return -ENOMEM;
1109 /* Skip top levels of page tables for 2- and 3-level DRHDs. */
1110 pgd_maddr = hd->pgd_maddr;
1111 for ( agaw = level_to_agaw(4);
1112 agaw != level_to_agaw(iommu->nr_pt_levels);
1113 agaw-- )
1115 struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
1116 pgd_maddr = dma_pte_addr(*p);
1117 unmap_vtd_domain_page(p);
1118 if ( pgd_maddr == 0 )
1119 goto nomem;
1122 context_set_address_root(*context, pgd_maddr);
1123 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1124 #ifdef CONTEXT_PASSTHRU
1126 #endif
1128 /*
1129 * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
1130 * be 1 based as required by intel's iommu hw.
1131 */
1132 context_set_domain_id(context, domain);
1133 context_set_address_width(*context, agaw);
1134 context_set_fault_enable(*context);
1135 context_set_present(*context);
1136 iommu_flush_cache_entry(context);
1138 unmap_vtd_domain_page(context_entries);
1140 /* Context entry was previously non-present (with domid 0). */
1141 iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
1142 DMA_CCMD_MASK_NOBIT, 1);
1143 if ( iommu_flush_iotlb_dsi(iommu, 0, 1) )
1144 iommu_flush_write_buffer(iommu);
1146 set_bit(iommu->index, &hd->iommu_bitmap);
1147 spin_unlock_irqrestore(&iommu->lock, flags);
1149 return 0;
1152 #define PCI_BASE_CLASS_BRIDGE 0x06
1153 #define PCI_CLASS_BRIDGE_PCI 0x0604
1155 #define DEV_TYPE_PCIe_ENDPOINT 1
1156 #define DEV_TYPE_PCI_BRIDGE 2
1157 #define DEV_TYPE_PCI 3
1159 int pdev_type(struct pci_dev *dev)
1161 u16 class_device;
1162 u16 status;
1164 class_device = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn),
1165 PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
1166 if ( class_device == PCI_CLASS_BRIDGE_PCI )
1167 return DEV_TYPE_PCI_BRIDGE;
1169 status = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn),
1170 PCI_FUNC(dev->devfn), PCI_STATUS);
1172 if ( !(status & PCI_STATUS_CAP_LIST) )
1173 return DEV_TYPE_PCI;
1175 if ( pci_find_next_cap(dev->bus, dev->devfn,
1176 PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) )
1177 return DEV_TYPE_PCIe_ENDPOINT;
1179 return DEV_TYPE_PCI;
1182 #define MAX_BUSES 256
1183 struct pci_dev bus2bridge[MAX_BUSES];
1185 static int domain_context_mapping(
1186 struct domain *domain,
1187 struct iommu *iommu,
1188 struct pci_dev *pdev)
1190 int ret = 0;
1191 int dev, func, sec_bus, sub_bus;
1192 u32 type;
1194 type = pdev_type(pdev);
1195 switch ( type )
1197 case DEV_TYPE_PCI_BRIDGE:
1198 sec_bus = pci_conf_read8(
1199 pdev->bus, PCI_SLOT(pdev->devfn),
1200 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1202 if ( bus2bridge[sec_bus].bus == 0 )
1204 bus2bridge[sec_bus].bus = pdev->bus;
1205 bus2bridge[sec_bus].devfn = pdev->devfn;
1208 sub_bus = pci_conf_read8(
1209 pdev->bus, PCI_SLOT(pdev->devfn),
1210 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1212 if ( sec_bus != sub_bus )
1213 gdprintk(XENLOG_WARNING VTDPREFIX,
1214 "context_context_mapping: nested PCI bridge not "
1215 "supported: bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
1216 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1217 sec_bus, sub_bus);
1218 break;
1219 case DEV_TYPE_PCIe_ENDPOINT:
1220 gdprintk(XENLOG_INFO VTDPREFIX,
1221 "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
1222 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1223 ret = domain_context_mapping_one(domain, iommu,
1224 (u8)(pdev->bus), (u8)(pdev->devfn));
1225 break;
1226 case DEV_TYPE_PCI:
1227 gdprintk(XENLOG_INFO VTDPREFIX,
1228 "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
1229 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1231 if ( pdev->bus == 0 )
1232 ret = domain_context_mapping_one(
1233 domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn));
1234 else
1236 if ( bus2bridge[pdev->bus].bus != 0 )
1237 gdprintk(XENLOG_WARNING VTDPREFIX,
1238 "domain_context_mapping:bus2bridge"
1239 "[%d].bus != 0\n", pdev->bus);
1241 ret = domain_context_mapping_one(
1242 domain, iommu,
1243 (u8)(bus2bridge[pdev->bus].bus),
1244 (u8)(bus2bridge[pdev->bus].devfn));
1246 /* now map everything behind the PCI bridge */
1247 for ( dev = 0; dev < 32; dev++ )
1249 for ( func = 0; func < 8; func++ )
1251 ret = domain_context_mapping_one(
1252 domain, iommu,
1253 pdev->bus, (u8)PCI_DEVFN(dev, func));
1254 if ( ret )
1255 return ret;
1259 break;
1260 default:
1261 gdprintk(XENLOG_ERR VTDPREFIX,
1262 "domain_context_mapping:unknown type : bdf = %x:%x:%x\n",
1263 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1264 ret = -EINVAL;
1265 break;
1268 return ret;
1271 static int domain_context_unmap_one(
1272 struct domain *domain,
1273 struct iommu *iommu,
1274 u8 bus, u8 devfn)
1276 struct context_entry *context, *context_entries;
1277 unsigned long flags;
1278 u64 maddr;
1280 maddr = bus_to_context_maddr(iommu, bus);
1281 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1282 context = &context_entries[devfn];
1284 if ( !context_present(*context) )
1286 unmap_vtd_domain_page(context_entries);
1287 return 0;
1290 spin_lock_irqsave(&iommu->lock, flags);
1291 context_clear_present(*context);
1292 context_clear_entry(*context);
1293 iommu_flush_cache_entry(context);
1294 iommu_flush_context_global(iommu, 0);
1295 iommu_flush_iotlb_global(iommu, 0);
1296 unmap_vtd_domain_page(context_entries);
1297 spin_unlock_irqrestore(&iommu->lock, flags);
1299 return 0;
1302 static int domain_context_unmap(
1303 struct domain *domain,
1304 struct iommu *iommu,
1305 struct pci_dev *pdev)
1307 int ret = 0;
1308 int dev, func, sec_bus, sub_bus;
1309 u32 type;
1311 type = pdev_type(pdev);
1312 switch ( type )
1314 case DEV_TYPE_PCI_BRIDGE:
1315 sec_bus = pci_conf_read8(
1316 pdev->bus, PCI_SLOT(pdev->devfn),
1317 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1318 sub_bus = pci_conf_read8(
1319 pdev->bus, PCI_SLOT(pdev->devfn),
1320 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1321 break;
1322 case DEV_TYPE_PCIe_ENDPOINT:
1323 ret = domain_context_unmap_one(domain, iommu,
1324 (u8)(pdev->bus), (u8)(pdev->devfn));
1325 break;
1326 case DEV_TYPE_PCI:
1327 if ( pdev->bus == 0 )
1328 ret = domain_context_unmap_one(
1329 domain, iommu,
1330 (u8)(pdev->bus), (u8)(pdev->devfn));
1331 else
1333 if ( bus2bridge[pdev->bus].bus != 0 )
1334 gdprintk(XENLOG_WARNING VTDPREFIX,
1335 "domain_context_unmap:"
1336 "bus2bridge[%d].bus != 0\n", pdev->bus);
1338 ret = domain_context_unmap_one(domain, iommu,
1339 (u8)(bus2bridge[pdev->bus].bus),
1340 (u8)(bus2bridge[pdev->bus].devfn));
1342 /* Unmap everything behind the PCI bridge */
1343 for ( dev = 0; dev < 32; dev++ )
1345 for ( func = 0; func < 8; func++ )
1347 ret = domain_context_unmap_one(
1348 domain, iommu,
1349 pdev->bus, (u8)PCI_DEVFN(dev, func));
1350 if ( ret )
1351 return ret;
1355 break;
1356 default:
1357 gdprintk(XENLOG_ERR VTDPREFIX,
1358 "domain_context_unmap:unknown type: bdf = %x:%x:%x\n",
1359 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1360 ret = -EINVAL;
1361 break;
1364 return ret;
1367 void reassign_device_ownership(
1368 struct domain *source,
1369 struct domain *target,
1370 u8 bus, u8 devfn)
1372 struct hvm_iommu *source_hd = domain_hvm_iommu(source);
1373 struct hvm_iommu *target_hd = domain_hvm_iommu(target);
1374 struct pci_dev *pdev, *pdev2;
1375 struct acpi_drhd_unit *drhd;
1376 struct iommu *iommu;
1377 int status;
1378 unsigned long flags;
1379 int found = 0;
1381 pdev_flr(bus, devfn);
1383 for_each_pdev( source, pdev )
1384 if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
1385 goto found;
1387 return;
1389 found:
1390 drhd = acpi_find_matched_drhd_unit(pdev);
1391 iommu = drhd->iommu;
1392 domain_context_unmap(source, iommu, pdev);
1394 /* Move pci device from the source domain to target domain. */
1395 spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
1396 spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
1397 list_move(&pdev->list, &target_hd->pdev_list);
1398 spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
1399 spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
1401 for_each_pdev ( source, pdev2 )
1403 drhd = acpi_find_matched_drhd_unit(pdev2);
1404 if ( drhd->iommu == iommu )
1406 found = 1;
1407 break;
1410 if ( !found )
1411 clear_bit(iommu->index, &source_hd->iommu_bitmap);
1413 status = domain_context_mapping(target, iommu, pdev);
1414 if ( status != 0 )
1415 gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
1418 void return_devices_to_dom0(struct domain *d)
1420 struct hvm_iommu *hd = domain_hvm_iommu(d);
1421 struct pci_dev *pdev;
1423 while ( !list_empty(&hd->pdev_list) )
1425 pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
1426 pci_cleanup_msi(pdev->bus, pdev->devfn);
1427 reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
1430 #ifdef VTD_DEBUG
1431 for_each_pdev ( dom0, pdev )
1432 dprintk(XENLOG_INFO VTDPREFIX,
1433 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
1434 dom0->domain_id, pdev->bus,
1435 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1436 #endif
1439 void iommu_domain_teardown(struct domain *d)
1441 struct hvm_iommu *hd = domain_hvm_iommu(d);
1443 if ( list_empty(&acpi_drhd_units) )
1444 return;
1446 return_devices_to_dom0(d);
1447 iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
1448 hd->pgd_maddr = 0;
1449 iommu_domid_release(d);
1452 static int domain_context_mapped(struct pci_dev *pdev)
1454 struct acpi_drhd_unit *drhd;
1455 struct iommu *iommu;
1456 int ret;
1458 for_each_drhd_unit ( drhd )
1460 iommu = drhd->iommu;
1461 ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
1462 if ( ret )
1463 return ret;
1466 return 0;
1469 int intel_iommu_map_page(
1470 struct domain *d, unsigned long gfn, unsigned long mfn)
1472 struct hvm_iommu *hd = domain_hvm_iommu(d);
1473 struct acpi_drhd_unit *drhd;
1474 struct iommu *iommu;
1475 struct dma_pte *page = NULL, *pte = NULL;
1476 u64 pg_maddr;
1477 int pte_present;
1479 #ifdef CONTEXT_PASSTHRU
1480 /* do nothing if dom0 and iommu supports pass thru */
1481 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1482 return 0;
1483 #endif
1485 pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
1486 if ( pg_maddr == 0 )
1487 return -ENOMEM;
1488 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1489 pte = page + (gfn & LEVEL_MASK);
1490 pte_present = dma_pte_present(*pte);
1491 dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
1492 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
1493 iommu_flush_cache_entry(pte);
1494 unmap_vtd_domain_page(page);
1496 for_each_drhd_unit ( drhd )
1498 iommu = drhd->iommu;
1500 if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
1501 continue;
1503 if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1504 (paddr_t)gfn << PAGE_SHIFT_4K, 1,
1505 !pte_present) )
1506 iommu_flush_write_buffer(iommu);
1509 return 0;
1512 int intel_iommu_unmap_page(struct domain *d, unsigned long gfn)
1514 struct acpi_drhd_unit *drhd;
1515 struct iommu *iommu;
1517 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1518 iommu = drhd->iommu;
1520 #ifdef CONTEXT_PASSTHRU
1521 /* do nothing if dom0 and iommu supports pass thru */
1522 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1523 return 0;
1524 #endif
1526 dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1528 return 0;
1531 int iommu_page_mapping(struct domain *domain, paddr_t iova,
1532 paddr_t hpa, size_t size, int prot)
1534 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1535 struct acpi_drhd_unit *drhd;
1536 struct iommu *iommu;
1537 u64 start_pfn, end_pfn;
1538 struct dma_pte *page = NULL, *pte = NULL;
1539 int index;
1540 u64 pg_maddr;
1542 if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
1543 return -EINVAL;
1545 iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
1546 start_pfn = hpa >> PAGE_SHIFT_4K;
1547 end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
1548 index = 0;
1549 while ( start_pfn < end_pfn )
1551 pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K*index, 1);
1552 if ( pg_maddr == 0 )
1553 return -ENOMEM;
1554 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1555 pte = page + (start_pfn & LEVEL_MASK);
1556 dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
1557 dma_set_pte_prot(*pte, prot);
1558 iommu_flush_cache_entry(pte);
1559 unmap_vtd_domain_page(page);
1560 start_pfn++;
1561 index++;
1564 for_each_drhd_unit ( drhd )
1566 iommu = drhd->iommu;
1568 if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
1569 continue;
1571 if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
1572 iova, index, 1) )
1573 iommu_flush_write_buffer(iommu);
1576 return 0;
1579 int iommu_page_unmapping(struct domain *domain, paddr_t addr, size_t size)
1581 dma_pte_clear_range(domain, addr, addr + size);
1583 return 0;
1586 static int iommu_prepare_rmrr_dev(
1587 struct domain *d,
1588 struct acpi_rmrr_unit *rmrr,
1589 struct pci_dev *pdev)
1591 struct acpi_drhd_unit *drhd;
1592 unsigned long size;
1593 int ret;
1595 /* page table init */
1596 size = rmrr->end_address - rmrr->base_address + 1;
1597 ret = iommu_page_mapping(d, rmrr->base_address,
1598 rmrr->base_address, size,
1599 DMA_PTE_READ|DMA_PTE_WRITE);
1600 if ( ret )
1601 return ret;
1603 if ( domain_context_mapped(pdev) == 0 )
1605 drhd = acpi_find_matched_drhd_unit(pdev);
1606 ret = domain_context_mapping(d, drhd->iommu, pdev);
1607 if ( !ret )
1608 return 0;
1611 return ret;
1614 static void setup_dom0_devices(struct domain *d)
1616 struct hvm_iommu *hd;
1617 struct acpi_drhd_unit *drhd;
1618 struct pci_dev *pdev;
1619 int bus, dev, func, ret;
1620 u32 l;
1622 hd = domain_hvm_iommu(d);
1624 for ( bus = 0; bus < 256; bus++ )
1626 for ( dev = 0; dev < 32; dev++ )
1628 for ( func = 0; func < 8; func++ )
1630 l = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID);
1631 /* some broken boards return 0 or ~0 if a slot is empty: */
1632 if ( (l == 0xffffffff) || (l == 0x00000000) ||
1633 (l == 0x0000ffff) || (l == 0xffff0000) )
1634 continue;
1635 pdev = xmalloc(struct pci_dev);
1636 pdev->bus = bus;
1637 pdev->devfn = PCI_DEVFN(dev, func);
1638 list_add_tail(&pdev->list, &hd->pdev_list);
1640 drhd = acpi_find_matched_drhd_unit(pdev);
1641 ret = domain_context_mapping(d, drhd->iommu, pdev);
1642 if ( ret != 0 )
1643 gdprintk(XENLOG_ERR VTDPREFIX,
1644 "domain_context_mapping failed\n");
1650 void clear_fault_bits(struct iommu *iommu)
1652 u64 val;
1654 val = dmar_readq(
1655 iommu->reg,
1656 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
1657 dmar_writeq(
1658 iommu->reg,
1659 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
1660 val);
1661 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
1664 static int init_vtd_hw(void)
1666 struct acpi_drhd_unit *drhd;
1667 struct iommu *iommu;
1668 struct iommu_flush *flush = NULL;
1669 int vector;
1670 int ret;
1672 for_each_drhd_unit ( drhd )
1674 iommu = drhd->iommu;
1675 ret = iommu_set_root_entry(iommu);
1676 if ( ret )
1678 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
1679 return -EIO;
1682 vector = iommu_set_interrupt(iommu);
1683 dma_msi_data_init(iommu, vector);
1684 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
1685 iommu->vector = vector;
1686 clear_fault_bits(iommu);
1687 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
1689 /* initialize flush functions */
1690 flush = iommu_get_flush(iommu);
1691 flush->context = flush_context_reg;
1692 flush->iotlb = flush_iotlb_reg;
1695 for_each_drhd_unit ( drhd )
1697 iommu = drhd->iommu;
1698 if ( qinval_setup(iommu) != 0 )
1699 dprintk(XENLOG_INFO VTDPREFIX,
1700 "Queued Invalidation hardware not found\n");
1703 for_each_drhd_unit ( drhd )
1705 iommu = drhd->iommu;
1706 if ( intremap_setup(iommu) != 0 )
1707 dprintk(XENLOG_INFO VTDPREFIX,
1708 "Interrupt Remapping hardware not found\n");
1711 return 0;
1714 static void setup_dom0_rmrr(struct domain *d)
1716 struct acpi_rmrr_unit *rmrr;
1717 struct pci_dev *pdev;
1718 int ret;
1720 for_each_rmrr_device ( rmrr, pdev )
1721 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
1722 if ( ret )
1723 gdprintk(XENLOG_ERR VTDPREFIX,
1724 "IOMMU: mapping reserved region failed\n");
1725 end_for_each_rmrr_device ( rmrr, pdev )
1728 int intel_vtd_setup(void)
1730 struct acpi_drhd_unit *drhd;
1731 struct iommu *iommu;
1733 if ( !vtd_enabled )
1734 return -ENODEV;
1736 spin_lock_init(&domid_bitmap_lock);
1737 clflush_size = get_clflush_size();
1739 for_each_drhd_unit ( drhd )
1740 if ( iommu_alloc(drhd) != 0 )
1741 goto error;
1743 /* Allocate IO page directory page for the domain. */
1744 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1745 iommu = drhd->iommu;
1747 /* Allocate domain id bitmap, and set bit 0 as reserved */
1748 domid_bitmap_size = cap_ndoms(iommu->cap);
1749 domid_bitmap = xmalloc_array(unsigned long,
1750 BITS_TO_LONGS(domid_bitmap_size));
1751 if ( domid_bitmap == NULL )
1752 goto error;
1753 memset(domid_bitmap, 0, domid_bitmap_size / 8);
1754 set_bit(0, domid_bitmap);
1756 init_vtd_hw();
1758 return 0;
1760 error:
1761 for_each_drhd_unit ( drhd )
1762 iommu_free(drhd);
1763 vtd_enabled = 0;
1764 return -ENOMEM;
1767 /*
1768 * If the device isn't owned by dom0, it means it already
1769 * has been assigned to other domain, or it's not exist.
1770 */
1771 int device_assigned(u8 bus, u8 devfn)
1773 struct pci_dev *pdev;
1775 for_each_pdev( dom0, pdev )
1776 if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
1777 return 0;
1779 return 1;
1782 int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
1784 struct acpi_rmrr_unit *rmrr;
1785 struct pci_dev *pdev;
1786 int ret = 0;
1788 if ( list_empty(&acpi_drhd_units) )
1789 return ret;
1791 reassign_device_ownership(dom0, d, bus, devfn);
1793 /* Setup rmrr identify mapping */
1794 for_each_rmrr_device( rmrr, pdev )
1795 if ( pdev->bus == bus && pdev->devfn == devfn )
1797 /* FIXME: Because USB RMRR conflicts with guest bios region,
1798 * ignore USB RMRR temporarily.
1799 */
1800 if ( is_usb_device(pdev) )
1801 return 0;
1803 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
1804 if ( ret )
1806 gdprintk(XENLOG_ERR VTDPREFIX,
1807 "IOMMU: mapping reserved region failed\n");
1808 return ret;
1811 end_for_each_rmrr_device(rmrr, pdev)
1813 return ret;
1816 u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
1817 int iommu_suspend(void)
1819 struct acpi_drhd_unit *drhd;
1820 struct iommu *iommu;
1821 int i = 0;
1823 iommu_flush_all();
1825 for_each_drhd_unit ( drhd )
1827 iommu = drhd->iommu;
1828 iommu_state[DMAR_RTADDR_REG * i] =
1829 (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
1830 iommu_state[DMAR_FECTL_REG * i] =
1831 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
1832 iommu_state[DMAR_FEDATA_REG * i] =
1833 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
1834 iommu_state[DMAR_FEADDR_REG * i] =
1835 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
1836 iommu_state[DMAR_FEUADDR_REG * i] =
1837 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
1838 iommu_state[DMAR_PLMBASE_REG * i] =
1839 (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
1840 iommu_state[DMAR_PLMLIMIT_REG * i] =
1841 (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
1842 iommu_state[DMAR_PHMBASE_REG * i] =
1843 (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
1844 iommu_state[DMAR_PHMLIMIT_REG * i] =
1845 (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
1846 i++;
1849 return 0;
1852 int iommu_resume(void)
1854 struct acpi_drhd_unit *drhd;
1855 struct iommu *iommu;
1856 int i = 0;
1858 iommu_flush_all();
1860 init_vtd_hw();
1861 for_each_drhd_unit ( drhd )
1863 iommu = drhd->iommu;
1864 dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
1865 (u64) iommu_state[DMAR_RTADDR_REG * i]);
1866 dmar_writel(iommu->reg, DMAR_FECTL_REG,
1867 (u32) iommu_state[DMAR_FECTL_REG * i]);
1868 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
1869 (u32) iommu_state[DMAR_FEDATA_REG * i]);
1870 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
1871 (u32) iommu_state[DMAR_FEADDR_REG * i]);
1872 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
1873 (u32) iommu_state[DMAR_FEUADDR_REG * i]);
1874 dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
1875 (u32) iommu_state[DMAR_PLMBASE_REG * i]);
1876 dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
1877 (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
1878 dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
1879 (u64) iommu_state[DMAR_PHMBASE_REG * i]);
1880 dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
1881 (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
1883 if ( iommu_enable_translation(iommu) )
1884 return -EIO;
1885 i++;
1887 return 0;
1890 struct iommu_ops intel_iommu_ops = {
1891 .init = intel_iommu_domain_init,
1892 .assign_device = intel_iommu_assign_device,
1893 .teardown = iommu_domain_teardown,
1894 .map_page = intel_iommu_map_page,
1895 .unmap_page = intel_iommu_unmap_page,
1896 .reassign_device = reassign_device_ownership,
1897 .get_device_group_id = NULL,
1898 };
1900 /*
1901 * Local variables:
1902 * mode: C
1903 * c-set-style: "BSD"
1904 * c-basic-offset: 4
1905 * tab-width: 4
1906 * indent-tabs-mode: nil
1907 * End:
1908 */