ia64/xen-unstable

view xen/drivers/passthrough/vtd/iommu.c @ 17540:8bced3d8a907

MSI 1/6: Move PCI functions and headers to a common location.

Signed-off-by: Shan Haitao <haitao.shan@intel.com>
Signed-off-by: Jiang Yunhong <yunhong.jiang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu May 01 10:26:58 2008 +0100 (2008-05-01)
parents 2ab9f85f221f
children 86c0353f19d0
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/irq.h>
23 #include <xen/sched.h>
24 #include <xen/xmalloc.h>
25 #include <xen/domain_page.h>
26 #include <xen/iommu.h>
27 #include <xen/numa.h>
28 #include <xen/time.h>
29 #include <xen/pci.h>
30 #include <xen/pci_regs.h>
31 #include <asm/paging.h>
32 #include "iommu.h"
33 #include "dmar.h"
34 #include "msi.h"
35 #include "extern.h"
36 #include "vtd.h"
38 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
40 static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
41 static int domid_bitmap_size; /* domain id bitmap size in bits */
42 static unsigned long *domid_bitmap; /* iommu domain id bitmap */
44 static void setup_dom0_devices(struct domain *d);
45 static void setup_dom0_rmrr(struct domain *d);
47 #define DID_FIELD_WIDTH 16
48 #define DID_HIGH_OFFSET 8
49 static void context_set_domain_id(struct context_entry *context,
50 struct domain *d)
51 {
52 unsigned long flags;
53 domid_t iommu_domid = domain_iommu_domid(d);
55 if ( iommu_domid == 0 )
56 {
57 spin_lock_irqsave(&domid_bitmap_lock, flags);
58 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
59 set_bit(iommu_domid, domid_bitmap);
60 spin_unlock_irqrestore(&domid_bitmap_lock, flags);
61 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
62 }
64 context->hi &= (1 << DID_HIGH_OFFSET) - 1;
65 context->hi |= iommu_domid << DID_HIGH_OFFSET;
66 }
68 static void iommu_domid_release(struct domain *d)
69 {
70 domid_t iommu_domid = domain_iommu_domid(d);
72 if ( iommu_domid != 0 )
73 {
74 d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
75 clear_bit(iommu_domid, domid_bitmap);
76 }
77 }
79 static struct intel_iommu *alloc_intel_iommu(void)
80 {
81 struct intel_iommu *intel;
83 intel = xmalloc(struct intel_iommu);
84 if ( intel == NULL )
85 return NULL;
86 memset(intel, 0, sizeof(struct intel_iommu));
88 spin_lock_init(&intel->qi_ctrl.qinval_lock);
89 spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
90 spin_lock_init(&intel->ir_ctrl.iremap_lock);
92 return intel;
93 }
95 static void free_intel_iommu(struct intel_iommu *intel)
96 {
97 xfree(intel);
98 }
100 struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
101 {
102 return iommu ? &iommu->intel->qi_ctrl : NULL;
103 }
105 struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
106 {
107 return iommu ? &iommu->intel->ir_ctrl : NULL;
108 }
110 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
111 {
112 return iommu ? &iommu->intel->flush : NULL;
113 }
115 unsigned int clflush_size;
116 void clflush_cache_range(void *adr, int size)
117 {
118 int i;
119 for ( i = 0; i < size; i += clflush_size )
120 clflush(adr + i);
121 }
123 static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
124 {
125 if ( !ecap_coherent(iommu->ecap) )
126 clflush_cache_range(addr, size);
127 }
129 void iommu_flush_cache_entry(struct iommu *iommu, void *addr)
130 {
131 __iommu_flush_cache(iommu, addr, 8);
132 }
134 void iommu_flush_cache_page(struct iommu *iommu, void *addr)
135 {
136 __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K);
137 }
139 int nr_iommus;
140 /* context entry handling */
141 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
142 {
143 struct root_entry *root, *root_entries;
144 unsigned long flags;
145 u64 maddr;
147 spin_lock_irqsave(&iommu->lock, flags);
148 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
149 root = &root_entries[bus];
150 if ( !root_present(*root) )
151 {
152 maddr = alloc_pgtable_maddr();
153 if ( maddr == 0 )
154 {
155 spin_unlock_irqrestore(&iommu->lock, flags);
156 return 0;
157 }
158 set_root_value(*root, maddr);
159 set_root_present(*root);
160 iommu_flush_cache_entry(iommu, root);
161 }
162 maddr = (u64) get_context_addr(*root);
163 unmap_vtd_domain_page(root_entries);
164 spin_unlock_irqrestore(&iommu->lock, flags);
165 return maddr;
166 }
168 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
169 {
170 struct root_entry *root, *root_entries;
171 struct context_entry *context;
172 u64 context_maddr;
173 int ret;
174 unsigned long flags;
176 spin_lock_irqsave(&iommu->lock, flags);
177 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
178 root = &root_entries[bus];
179 if ( !root_present(*root) )
180 {
181 ret = 0;
182 goto out;
183 }
184 context_maddr = get_context_addr(*root);
185 context = (struct context_entry *)map_vtd_domain_page(context_maddr);
186 ret = context_present(context[devfn]);
187 unmap_vtd_domain_page(context);
188 out:
189 unmap_vtd_domain_page(root_entries);
190 spin_unlock_irqrestore(&iommu->lock, flags);
191 return ret;
192 }
194 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr)
195 {
196 struct hvm_iommu *hd = domain_hvm_iommu(domain);
197 struct acpi_drhd_unit *drhd;
198 struct iommu *iommu;
199 int addr_width = agaw_to_width(hd->agaw);
200 struct dma_pte *parent, *pte = NULL;
201 int level = agaw_to_level(hd->agaw);
202 int offset;
203 unsigned long flags;
204 u64 pte_maddr = 0;
205 u64 *vaddr = NULL;
207 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
208 iommu = drhd->iommu;
210 addr &= (((u64)1) << addr_width) - 1;
211 spin_lock_irqsave(&hd->mapping_lock, flags);
212 if ( hd->pgd_maddr == 0 )
213 {
214 hd->pgd_maddr = alloc_pgtable_maddr();
215 if ( hd->pgd_maddr == 0 )
216 return 0;
217 }
219 parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
220 while ( level > 1 )
221 {
222 offset = address_level_offset(addr, level);
223 pte = &parent[offset];
225 if ( dma_pte_addr(*pte) == 0 )
226 {
227 u64 maddr = alloc_pgtable_maddr();
228 dma_set_pte_addr(*pte, maddr);
229 vaddr = map_vtd_domain_page(maddr);
230 if ( !vaddr )
231 break;
233 /*
234 * high level table always sets r/w, last level
235 * page table control read/write
236 */
237 dma_set_pte_readable(*pte);
238 dma_set_pte_writable(*pte);
239 iommu_flush_cache_entry(iommu, pte);
240 }
241 else
242 {
243 vaddr = map_vtd_domain_page(pte->val);
244 if ( !vaddr )
245 break;
246 }
248 if ( level == 2 )
249 {
250 pte_maddr = pte->val & PAGE_MASK_4K;
251 unmap_vtd_domain_page(vaddr);
252 break;
253 }
255 unmap_vtd_domain_page(parent);
256 parent = (struct dma_pte *)vaddr;
257 vaddr = NULL;
258 level--;
259 }
261 unmap_vtd_domain_page(parent);
262 spin_unlock_irqrestore(&hd->mapping_lock, flags);
263 return pte_maddr;
264 }
266 /* return address's page at specific level */
267 static u64 dma_addr_level_page_maddr(
268 struct domain *domain, u64 addr, int level)
269 {
270 struct hvm_iommu *hd = domain_hvm_iommu(domain);
271 struct dma_pte *parent, *pte = NULL;
272 int total = agaw_to_level(hd->agaw);
273 int offset;
274 u64 pg_maddr = hd->pgd_maddr;
276 if ( pg_maddr == 0 )
277 return 0;
279 parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
280 while ( level <= total )
281 {
282 offset = address_level_offset(addr, total);
283 pte = &parent[offset];
284 if ( dma_pte_addr(*pte) == 0 )
285 break;
287 pg_maddr = pte->val & PAGE_MASK_4K;
288 unmap_vtd_domain_page(parent);
290 if ( level == total )
291 return pg_maddr;
293 parent = map_vtd_domain_page(pte->val);
294 total--;
295 }
297 unmap_vtd_domain_page(parent);
298 return 0;
299 }
301 static void iommu_flush_write_buffer(struct iommu *iommu)
302 {
303 u32 val;
304 unsigned long flag;
305 s_time_t start_time;
307 if ( !cap_rwbf(iommu->cap) )
308 return;
309 val = iommu->gcmd | DMA_GCMD_WBF;
311 spin_lock_irqsave(&iommu->register_lock, flag);
312 dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
314 /* Make sure hardware complete it */
315 start_time = NOW();
316 for ( ; ; )
317 {
318 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
319 if ( !(val & DMA_GSTS_WBFS) )
320 break;
321 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
322 panic("DMAR hardware is malfunctional,"
323 " please disable IOMMU\n");
324 cpu_relax();
325 }
326 spin_unlock_irqrestore(&iommu->register_lock, flag);
327 }
329 /* return value determine if we need a write buffer flush */
330 static int flush_context_reg(
331 void *_iommu,
332 u16 did, u16 source_id, u8 function_mask, u64 type,
333 int non_present_entry_flush)
334 {
335 struct iommu *iommu = (struct iommu *) _iommu;
336 u64 val = 0;
337 unsigned long flag;
338 s_time_t start_time;
340 /*
341 * In the non-present entry flush case, if hardware doesn't cache
342 * non-present entry we do nothing and if hardware cache non-present
343 * entry, we flush entries of domain 0 (the domain id is used to cache
344 * any non-present entries)
345 */
346 if ( non_present_entry_flush )
347 {
348 if ( !cap_caching_mode(iommu->cap) )
349 return 1;
350 else
351 did = 0;
352 }
354 /* use register invalidation */
355 switch ( type )
356 {
357 case DMA_CCMD_GLOBAL_INVL:
358 val = DMA_CCMD_GLOBAL_INVL;
359 break;
360 case DMA_CCMD_DOMAIN_INVL:
361 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
362 break;
363 case DMA_CCMD_DEVICE_INVL:
364 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
365 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
366 break;
367 default:
368 BUG();
369 }
370 val |= DMA_CCMD_ICC;
372 spin_lock_irqsave(&iommu->register_lock, flag);
373 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
375 /* Make sure hardware complete it */
376 start_time = NOW();
377 for ( ; ; )
378 {
379 val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
380 if ( !(val & DMA_CCMD_ICC) )
381 break;
382 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
383 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
384 cpu_relax();
385 }
386 spin_unlock_irqrestore(&iommu->register_lock, flag);
387 /* flush context entry will implictly flush write buffer */
388 return 0;
389 }
391 static int inline iommu_flush_context_global(
392 struct iommu *iommu, int non_present_entry_flush)
393 {
394 struct iommu_flush *flush = iommu_get_flush(iommu);
395 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
396 non_present_entry_flush);
397 }
399 static int inline iommu_flush_context_domain(
400 struct iommu *iommu, u16 did, int non_present_entry_flush)
401 {
402 struct iommu_flush *flush = iommu_get_flush(iommu);
403 return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
404 non_present_entry_flush);
405 }
407 static int inline iommu_flush_context_device(
408 struct iommu *iommu, u16 did, u16 source_id,
409 u8 function_mask, int non_present_entry_flush)
410 {
411 struct iommu_flush *flush = iommu_get_flush(iommu);
412 return flush->context(iommu, did, source_id, function_mask,
413 DMA_CCMD_DEVICE_INVL,
414 non_present_entry_flush);
415 }
417 /* return value determine if we need a write buffer flush */
418 static int flush_iotlb_reg(void *_iommu, u16 did,
419 u64 addr, unsigned int size_order, u64 type,
420 int non_present_entry_flush)
421 {
422 struct iommu *iommu = (struct iommu *) _iommu;
423 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
424 u64 val = 0, val_iva = 0;
425 unsigned long flag;
426 s_time_t start_time;
428 /*
429 * In the non-present entry flush case, if hardware doesn't cache
430 * non-present entry we do nothing and if hardware cache non-present
431 * entry, we flush entries of domain 0 (the domain id is used to cache
432 * any non-present entries)
433 */
434 if ( non_present_entry_flush )
435 {
436 if ( !cap_caching_mode(iommu->cap) )
437 return 1;
438 else
439 did = 0;
440 }
442 /* use register invalidation */
443 switch ( type )
444 {
445 case DMA_TLB_GLOBAL_FLUSH:
446 /* global flush doesn't need set IVA_REG */
447 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
448 break;
449 case DMA_TLB_DSI_FLUSH:
450 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
451 break;
452 case DMA_TLB_PSI_FLUSH:
453 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
454 /* Note: always flush non-leaf currently */
455 val_iva = size_order | addr;
456 break;
457 default:
458 BUG();
459 }
460 /* Note: set drain read/write */
461 if ( cap_read_drain(iommu->cap) )
462 val |= DMA_TLB_READ_DRAIN;
463 if ( cap_write_drain(iommu->cap) )
464 val |= DMA_TLB_WRITE_DRAIN;
466 spin_lock_irqsave(&iommu->register_lock, flag);
467 /* Note: Only uses first TLB reg currently */
468 if ( val_iva )
469 dmar_writeq(iommu->reg, tlb_offset, val_iva);
470 dmar_writeq(iommu->reg, tlb_offset + 8, val);
472 /* Make sure hardware complete it */
473 start_time = NOW();
474 for ( ; ; )
475 {
476 val = dmar_readq(iommu->reg, tlb_offset + 8);
477 if ( !(val & DMA_TLB_IVT) )
478 break;
479 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
480 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
481 cpu_relax();
482 }
483 spin_unlock_irqrestore(&iommu->register_lock, flag);
485 /* check IOTLB invalidation granularity */
486 if ( DMA_TLB_IAIG(val) == 0 )
487 printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
488 if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
489 printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
490 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
491 /* flush context entry will implictly flush write buffer */
492 return 0;
493 }
495 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
496 int non_present_entry_flush)
497 {
498 struct iommu_flush *flush = iommu_get_flush(iommu);
499 return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
500 non_present_entry_flush);
501 }
503 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
504 int non_present_entry_flush)
505 {
506 struct iommu_flush *flush = iommu_get_flush(iommu);
507 return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
508 non_present_entry_flush);
509 }
511 static int inline get_alignment(u64 base, unsigned int size)
512 {
513 int t = 0;
514 u64 end;
516 end = base + size - 1;
517 while ( base != end )
518 {
519 t++;
520 base >>= 1;
521 end >>= 1;
522 }
523 return t;
524 }
526 static int inline iommu_flush_iotlb_psi(
527 struct iommu *iommu, u16 did,
528 u64 addr, unsigned int pages, int non_present_entry_flush)
529 {
530 unsigned int align;
531 struct iommu_flush *flush = iommu_get_flush(iommu);
533 BUG_ON(addr & (~PAGE_MASK_4K));
534 BUG_ON(pages == 0);
536 /* Fallback to domain selective flush if no PSI support */
537 if ( !cap_pgsel_inv(iommu->cap) )
538 return iommu_flush_iotlb_dsi(iommu, did,
539 non_present_entry_flush);
541 /*
542 * PSI requires page size is 2 ^ x, and the base address is naturally
543 * aligned to the size
544 */
545 align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
546 /* Fallback to domain selective flush if size is too big */
547 if ( align > cap_max_amask_val(iommu->cap) )
548 return iommu_flush_iotlb_dsi(iommu, did,
549 non_present_entry_flush);
551 addr >>= PAGE_SHIFT_4K + align;
552 addr <<= PAGE_SHIFT_4K + align;
554 return flush->iotlb(iommu, did, addr, align,
555 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
556 }
558 void iommu_flush_all(void)
559 {
560 struct acpi_drhd_unit *drhd;
561 struct iommu *iommu;
563 wbinvd();
564 for_each_drhd_unit ( drhd )
565 {
566 iommu = drhd->iommu;
567 iommu_flush_context_global(iommu, 0);
568 iommu_flush_iotlb_global(iommu, 0);
569 }
570 }
572 /* clear one page's page table */
573 static void dma_pte_clear_one(struct domain *domain, u64 addr)
574 {
575 struct acpi_drhd_unit *drhd;
576 struct iommu *iommu;
577 struct dma_pte *page = NULL, *pte = NULL;
578 u64 pg_maddr;
580 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
582 /* get last level pte */
583 pg_maddr = dma_addr_level_page_maddr(domain, addr, 1);
584 if ( pg_maddr == 0 )
585 return;
586 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
587 pte = page + address_level_offset(addr, 1);
588 if ( pte )
589 {
590 dma_clear_pte(*pte);
591 iommu_flush_cache_entry(drhd->iommu, pte);
593 for_each_drhd_unit ( drhd )
594 {
595 iommu = drhd->iommu;
596 if ( cap_caching_mode(iommu->cap) )
597 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
598 addr, 1, 0);
599 else if (cap_rwbf(iommu->cap))
600 iommu_flush_write_buffer(iommu);
601 }
602 }
603 unmap_vtd_domain_page(page);
604 }
606 /* clear last level pte, a tlb flush should be followed */
607 static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
608 {
609 struct hvm_iommu *hd = domain_hvm_iommu(domain);
610 int addr_width = agaw_to_width(hd->agaw);
612 start &= (((u64)1) << addr_width) - 1;
613 end &= (((u64)1) << addr_width) - 1;
614 /* in case it's partial page */
615 start = PAGE_ALIGN_4K(start);
616 end &= PAGE_MASK_4K;
618 /* we don't need lock here, nobody else touches the iova range */
619 while ( start < end )
620 {
621 dma_pte_clear_one(domain, start);
622 start += PAGE_SIZE_4K;
623 }
624 }
626 /* free page table pages. last level pte should already be cleared */
627 void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
628 {
629 struct acpi_drhd_unit *drhd;
630 struct hvm_iommu *hd = domain_hvm_iommu(domain);
631 struct iommu *iommu;
632 int addr_width = agaw_to_width(hd->agaw);
633 struct dma_pte *page, *pte;
634 int total = agaw_to_level(hd->agaw);
635 int level;
636 u64 tmp;
637 u64 pg_maddr;
639 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
640 iommu = drhd->iommu;
642 start &= (((u64)1) << addr_width) - 1;
643 end &= (((u64)1) << addr_width) - 1;
645 /* we don't need lock here, nobody else touches the iova range */
646 level = 2;
647 while ( level <= total )
648 {
649 tmp = align_to_level(start, level);
650 if ( (tmp >= end) || ((tmp + level_size(level)) > end) )
651 return;
653 while ( tmp < end )
654 {
655 pg_maddr = dma_addr_level_page_maddr(domain, tmp, level);
656 if ( pg_maddr == 0 )
657 {
658 tmp += level_size(level);
659 continue;
660 }
661 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
662 pte = page + address_level_offset(tmp, level);
663 dma_clear_pte(*pte);
664 iommu_flush_cache_entry(iommu, pte);
665 unmap_vtd_domain_page(page);
666 free_pgtable_maddr(pg_maddr);
668 tmp += level_size(level);
669 }
670 level++;
671 }
673 /* free pgd */
674 if ( start == 0 && end >= ((((u64)1) << addr_width) - 1) )
675 {
676 free_pgtable_maddr(hd->pgd_maddr);
677 hd->pgd_maddr = 0;
678 }
679 }
681 /* free all VT-d page tables when shut down or destroy domain. */
682 static void iommu_free_pagetable(struct domain *domain)
683 {
684 struct hvm_iommu *hd = domain_hvm_iommu(domain);
685 int addr_width = agaw_to_width(hd->agaw);
686 u64 start, end;
688 start = 0;
689 end = (((u64)1) << addr_width) - 1;
691 dma_pte_free_pagetable(domain, start, end);
692 }
694 static int iommu_set_root_entry(struct iommu *iommu)
695 {
696 u32 cmd, sts;
697 unsigned long flags;
698 s_time_t start_time;
700 if ( iommu->root_maddr != 0 )
701 {
702 free_pgtable_maddr(iommu->root_maddr);
703 iommu->root_maddr = 0;
704 }
706 spin_lock_irqsave(&iommu->register_lock, flags);
708 iommu->root_maddr = alloc_pgtable_maddr();
709 if ( iommu->root_maddr == 0 )
710 return -ENOMEM;
712 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
713 cmd = iommu->gcmd | DMA_GCMD_SRTP;
714 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
716 /* Make sure hardware complete it */
717 start_time = NOW();
718 for ( ; ; )
719 {
720 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
721 if ( sts & DMA_GSTS_RTPS )
722 break;
723 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
724 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
725 cpu_relax();
726 }
728 spin_unlock_irqrestore(&iommu->register_lock, flags);
730 return 0;
731 }
733 static int iommu_enable_translation(struct iommu *iommu)
734 {
735 u32 sts;
736 unsigned long flags;
737 s_time_t start_time;
739 dprintk(XENLOG_INFO VTDPREFIX,
740 "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
741 spin_lock_irqsave(&iommu->register_lock, flags);
742 iommu->gcmd |= DMA_GCMD_TE;
743 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
744 /* Make sure hardware complete it */
745 start_time = NOW();
746 for ( ; ; )
747 {
748 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
749 if ( sts & DMA_GSTS_TES )
750 break;
751 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
752 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
753 cpu_relax();
754 }
756 /* Disable PMRs when VT-d engine takes effect per spec definition */
757 disable_pmr(iommu);
758 spin_unlock_irqrestore(&iommu->register_lock, flags);
759 return 0;
760 }
762 int iommu_disable_translation(struct iommu *iommu)
763 {
764 u32 sts;
765 unsigned long flags;
766 s_time_t start_time;
768 spin_lock_irqsave(&iommu->register_lock, flags);
769 iommu->gcmd &= ~ DMA_GCMD_TE;
770 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
772 /* Make sure hardware complete it */
773 start_time = NOW();
774 for ( ; ; )
775 {
776 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
777 if ( !(sts & DMA_GSTS_TES) )
778 break;
779 if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
780 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
781 cpu_relax();
782 }
783 spin_unlock_irqrestore(&iommu->register_lock, flags);
784 return 0;
785 }
787 static struct iommu *vector_to_iommu[NR_VECTORS];
788 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
789 u8 fault_reason, u16 source_id, u32 addr)
790 {
791 dprintk(XENLOG_WARNING VTDPREFIX,
792 "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
793 (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
794 PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
795 fault_reason, iommu->reg);
797 if ( fault_reason < 0x20 )
798 print_vtd_entries(current->domain, iommu, (source_id >> 8),
799 (source_id & 0xff), (addr >> PAGE_SHIFT));
801 return 0;
802 }
804 static void iommu_fault_status(u32 fault_status)
805 {
806 if ( fault_status & DMA_FSTS_PFO )
807 dprintk(XENLOG_ERR VTDPREFIX,
808 "iommu_fault_status: Fault Overflow\n");
809 else if ( fault_status & DMA_FSTS_PPF )
810 dprintk(XENLOG_ERR VTDPREFIX,
811 "iommu_fault_status: Primary Pending Fault\n");
812 else if ( fault_status & DMA_FSTS_AFO )
813 dprintk(XENLOG_ERR VTDPREFIX,
814 "iommu_fault_status: Advanced Fault Overflow\n");
815 else if ( fault_status & DMA_FSTS_APF )
816 dprintk(XENLOG_ERR VTDPREFIX,
817 "iommu_fault_status: Advanced Pending Fault\n");
818 else if ( fault_status & DMA_FSTS_IQE )
819 dprintk(XENLOG_ERR VTDPREFIX,
820 "iommu_fault_status: Invalidation Queue Error\n");
821 else if ( fault_status & DMA_FSTS_ICE )
822 dprintk(XENLOG_ERR VTDPREFIX,
823 "iommu_fault_status: Invalidation Completion Error\n");
824 else if ( fault_status & DMA_FSTS_ITE )
825 dprintk(XENLOG_ERR VTDPREFIX,
826 "iommu_fault_status: Invalidation Time-out Error\n");
827 }
829 #define PRIMARY_FAULT_REG_LEN (16)
830 static void iommu_page_fault(int vector, void *dev_id,
831 struct cpu_user_regs *regs)
832 {
833 struct iommu *iommu = dev_id;
834 int reg, fault_index;
835 u32 fault_status;
836 unsigned long flags;
838 dprintk(XENLOG_WARNING VTDPREFIX,
839 "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
841 spin_lock_irqsave(&iommu->register_lock, flags);
842 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
843 spin_unlock_irqrestore(&iommu->register_lock, flags);
845 iommu_fault_status(fault_status);
847 /* FIXME: ignore advanced fault log */
848 if ( !(fault_status & DMA_FSTS_PPF) )
849 return;
850 fault_index = dma_fsts_fault_record_index(fault_status);
851 reg = cap_fault_reg_offset(iommu->cap);
852 for ( ; ; )
853 {
854 u8 fault_reason;
855 u16 source_id;
856 u32 guest_addr, data;
857 int type;
859 /* highest 32 bits */
860 spin_lock_irqsave(&iommu->register_lock, flags);
861 data = dmar_readl(iommu->reg, reg +
862 fault_index * PRIMARY_FAULT_REG_LEN + 12);
863 if ( !(data & DMA_FRCD_F) )
864 {
865 spin_unlock_irqrestore(&iommu->register_lock, flags);
866 break;
867 }
869 fault_reason = dma_frcd_fault_reason(data);
870 type = dma_frcd_type(data);
872 data = dmar_readl(iommu->reg, reg +
873 fault_index * PRIMARY_FAULT_REG_LEN + 8);
874 source_id = dma_frcd_source_id(data);
876 guest_addr = dmar_readq(iommu->reg, reg +
877 fault_index * PRIMARY_FAULT_REG_LEN);
878 guest_addr = dma_frcd_page_addr(guest_addr);
879 /* clear the fault */
880 dmar_writel(iommu->reg, reg +
881 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
882 spin_unlock_irqrestore(&iommu->register_lock, flags);
884 iommu_page_fault_do_one(iommu, type, fault_reason,
885 source_id, guest_addr);
887 fault_index++;
888 if ( fault_index > cap_num_fault_regs(iommu->cap) )
889 fault_index = 0;
890 }
892 /* clear primary fault overflow */
893 if ( fault_status & DMA_FSTS_PFO )
894 {
895 spin_lock_irqsave(&iommu->register_lock, flags);
896 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
897 spin_unlock_irqrestore(&iommu->register_lock, flags);
898 }
899 }
901 static void dma_msi_unmask(unsigned int vector)
902 {
903 struct iommu *iommu = vector_to_iommu[vector];
904 unsigned long flags;
906 /* unmask it */
907 spin_lock_irqsave(&iommu->register_lock, flags);
908 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
909 spin_unlock_irqrestore(&iommu->register_lock, flags);
910 }
912 static void dma_msi_mask(unsigned int vector)
913 {
914 unsigned long flags;
915 struct iommu *iommu = vector_to_iommu[vector];
917 /* mask it */
918 spin_lock_irqsave(&iommu->register_lock, flags);
919 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
920 spin_unlock_irqrestore(&iommu->register_lock, flags);
921 }
923 static unsigned int dma_msi_startup(unsigned int vector)
924 {
925 dma_msi_unmask(vector);
926 return 0;
927 }
929 static void dma_msi_end(unsigned int vector)
930 {
931 dma_msi_unmask(vector);
932 ack_APIC_irq();
933 }
935 static void dma_msi_data_init(struct iommu *iommu, int vector)
936 {
937 u32 msi_data = 0;
938 unsigned long flags;
940 /* Fixed, edge, assert mode. Follow MSI setting */
941 msi_data |= vector & 0xff;
942 msi_data |= 1 << 14;
944 spin_lock_irqsave(&iommu->register_lock, flags);
945 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
946 spin_unlock_irqrestore(&iommu->register_lock, flags);
947 }
949 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
950 {
951 u64 msi_address;
952 unsigned long flags;
954 /* Physical, dedicated cpu. Follow MSI setting */
955 msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
956 msi_address |= MSI_PHYSICAL_MODE << 2;
957 msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
958 msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
960 spin_lock_irqsave(&iommu->register_lock, flags);
961 dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
962 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
963 spin_unlock_irqrestore(&iommu->register_lock, flags);
964 }
966 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
967 {
968 struct iommu *iommu = vector_to_iommu[vector];
969 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
970 }
972 static struct hw_interrupt_type dma_msi_type = {
973 .typename = "DMA_MSI",
974 .startup = dma_msi_startup,
975 .shutdown = dma_msi_mask,
976 .enable = dma_msi_unmask,
977 .disable = dma_msi_mask,
978 .ack = dma_msi_mask,
979 .end = dma_msi_end,
980 .set_affinity = dma_msi_set_affinity,
981 };
983 int iommu_set_interrupt(struct iommu *iommu)
984 {
985 int vector, ret;
987 vector = assign_irq_vector(AUTO_ASSIGN);
988 vector_to_iommu[vector] = iommu;
990 /* VT-d fault is a MSI, make irq == vector */
991 irq_vector[vector] = vector;
992 vector_irq[vector] = vector;
994 if ( !vector )
995 {
996 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
997 return -EINVAL;
998 }
1000 irq_desc[vector].handler = &dma_msi_type;
1001 ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
1002 if ( ret )
1003 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
1004 return vector;
1007 static int iommu_alloc(struct acpi_drhd_unit *drhd)
1009 struct iommu *iommu;
1011 if ( nr_iommus > MAX_IOMMUS )
1013 gdprintk(XENLOG_ERR VTDPREFIX,
1014 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
1015 return -ENOMEM;
1018 iommu = xmalloc(struct iommu);
1019 if ( iommu == NULL )
1020 return -ENOMEM;
1021 memset(iommu, 0, sizeof(struct iommu));
1023 iommu->intel = alloc_intel_iommu();
1024 if ( iommu->intel == NULL )
1026 xfree(iommu);
1027 return -ENOMEM;
1030 set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
1031 iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
1032 nr_iommus++;
1034 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
1035 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
1037 spin_lock_init(&iommu->lock);
1038 spin_lock_init(&iommu->register_lock);
1040 drhd->iommu = iommu;
1041 return 0;
1044 static void iommu_free(struct acpi_drhd_unit *drhd)
1046 struct iommu *iommu = drhd->iommu;
1048 if ( iommu == NULL )
1049 return;
1051 if ( iommu->root_maddr != 0 )
1053 free_pgtable_maddr(iommu->root_maddr);
1054 iommu->root_maddr = 0;
1057 if ( iommu->reg )
1058 iounmap(iommu->reg);
1060 free_intel_iommu(iommu->intel);
1061 free_irq(iommu->vector);
1062 xfree(iommu);
1064 drhd->iommu = NULL;
1067 #define guestwidth_to_adjustwidth(gaw) ({ \
1068 int agaw, r = (gaw - 12) % 9; \
1069 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
1070 if ( agaw > 64 ) \
1071 agaw = 64; \
1072 agaw; })
1074 static int intel_iommu_domain_init(struct domain *d)
1076 struct hvm_iommu *hd = domain_hvm_iommu(d);
1077 struct iommu *iommu = NULL;
1078 int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
1079 int i, adjust_width, agaw;
1080 unsigned long sagaw;
1081 struct acpi_drhd_unit *drhd;
1083 INIT_LIST_HEAD(&hd->pdev_list);
1085 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1086 iommu = drhd->iommu;
1088 /* Calculate AGAW. */
1089 if ( guest_width > cap_mgaw(iommu->cap) )
1090 guest_width = cap_mgaw(iommu->cap);
1091 adjust_width = guestwidth_to_adjustwidth(guest_width);
1092 agaw = width_to_agaw(adjust_width);
1093 /* FIXME: hardware doesn't support it, choose a bigger one? */
1094 sagaw = cap_sagaw(iommu->cap);
1095 if ( !test_bit(agaw, &sagaw) )
1097 gdprintk(XENLOG_ERR VTDPREFIX,
1098 "IOMMU: hardware doesn't support the agaw\n");
1099 agaw = find_next_bit(&sagaw, 5, agaw);
1100 if ( agaw >= 5 )
1101 return -ENODEV;
1103 hd->agaw = agaw;
1105 if ( d->domain_id == 0 )
1107 /* Set up 1:1 page table for dom0. */
1108 for ( i = 0; i < max_page; i++ )
1109 iommu_map_page(d, i, i);
1111 setup_dom0_devices(d);
1112 setup_dom0_rmrr(d);
1114 iommu_flush_all();
1116 for_each_drhd_unit ( drhd )
1118 iommu = drhd->iommu;
1119 if ( iommu_enable_translation(iommu) )
1120 return -EIO;
1124 return 0;
1127 static int domain_context_mapping_one(
1128 struct domain *domain,
1129 struct iommu *iommu,
1130 u8 bus, u8 devfn)
1132 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1133 struct context_entry *context, *context_entries;
1134 unsigned long flags;
1135 u64 maddr;
1137 maddr = bus_to_context_maddr(iommu, bus);
1138 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1139 context = &context_entries[devfn];
1141 if ( context_present(*context) )
1143 unmap_vtd_domain_page(context_entries);
1144 return 0;
1147 spin_lock_irqsave(&iommu->lock, flags);
1148 /*
1149 * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
1150 * be 1 based as required by intel's iommu hw.
1151 */
1152 context_set_domain_id(context, domain);
1153 context_set_address_width(*context, hd->agaw);
1155 if ( ecap_pass_thru(iommu->ecap) )
1156 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1157 #ifdef CONTEXT_PASSTHRU
1158 else
1160 #endif
1161 ASSERT(hd->pgd_maddr != 0);
1162 context_set_address_root(*context, hd->pgd_maddr);
1163 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1164 #ifdef CONTEXT_PASSTHRU
1166 #endif
1168 context_set_fault_enable(*context);
1169 context_set_present(*context);
1170 iommu_flush_cache_entry(iommu, context);
1172 unmap_vtd_domain_page(context_entries);
1174 if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
1175 (((u16)bus) << 8) | devfn,
1176 DMA_CCMD_MASK_NOBIT, 1) )
1177 iommu_flush_write_buffer(iommu);
1178 else
1179 iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
1180 spin_unlock_irqrestore(&iommu->lock, flags);
1182 return 0;
1185 #define PCI_BASE_CLASS_BRIDGE 0x06
1186 #define PCI_CLASS_BRIDGE_PCI 0x0604
1188 #define DEV_TYPE_PCIe_ENDPOINT 1
1189 #define DEV_TYPE_PCI_BRIDGE 2
1190 #define DEV_TYPE_PCI 3
1192 int pdev_type(struct pci_dev *dev)
1194 u16 class_device;
1195 u16 status;
1197 class_device = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn),
1198 PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
1199 if ( class_device == PCI_CLASS_BRIDGE_PCI )
1200 return DEV_TYPE_PCI_BRIDGE;
1202 status = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn),
1203 PCI_FUNC(dev->devfn), PCI_STATUS);
1205 if ( !(status & PCI_STATUS_CAP_LIST) )
1206 return DEV_TYPE_PCI;
1208 if ( pci_find_next_cap(dev->bus, dev->devfn,
1209 PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) )
1210 return DEV_TYPE_PCIe_ENDPOINT;
1212 return DEV_TYPE_PCI;
1215 #define MAX_BUSES 256
1216 struct pci_dev bus2bridge[MAX_BUSES];
1218 static int domain_context_mapping(
1219 struct domain *domain,
1220 struct iommu *iommu,
1221 struct pci_dev *pdev)
1223 int ret = 0;
1224 int dev, func, sec_bus, sub_bus;
1225 u32 type;
1227 type = pdev_type(pdev);
1228 switch ( type )
1230 case DEV_TYPE_PCI_BRIDGE:
1231 sec_bus = pci_conf_read8(
1232 pdev->bus, PCI_SLOT(pdev->devfn),
1233 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1235 if ( bus2bridge[sec_bus].bus == 0 )
1237 bus2bridge[sec_bus].bus = pdev->bus;
1238 bus2bridge[sec_bus].devfn = pdev->devfn;
1241 sub_bus = pci_conf_read8(
1242 pdev->bus, PCI_SLOT(pdev->devfn),
1243 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1245 if ( sec_bus != sub_bus )
1246 gdprintk(XENLOG_WARNING VTDPREFIX,
1247 "context_context_mapping: nested PCI bridge not "
1248 "supported: bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
1249 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1250 sec_bus, sub_bus);
1251 break;
1252 case DEV_TYPE_PCIe_ENDPOINT:
1253 gdprintk(XENLOG_INFO VTDPREFIX,
1254 "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
1255 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1256 ret = domain_context_mapping_one(domain, iommu,
1257 (u8)(pdev->bus), (u8)(pdev->devfn));
1258 break;
1259 case DEV_TYPE_PCI:
1260 gdprintk(XENLOG_INFO VTDPREFIX,
1261 "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
1262 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1264 if ( pdev->bus == 0 )
1265 ret = domain_context_mapping_one(
1266 domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn));
1267 else
1269 if ( bus2bridge[pdev->bus].bus != 0 )
1270 gdprintk(XENLOG_WARNING VTDPREFIX,
1271 "domain_context_mapping:bus2bridge"
1272 "[%d].bus != 0\n", pdev->bus);
1274 ret = domain_context_mapping_one(
1275 domain, iommu,
1276 (u8)(bus2bridge[pdev->bus].bus),
1277 (u8)(bus2bridge[pdev->bus].devfn));
1279 /* now map everything behind the PCI bridge */
1280 for ( dev = 0; dev < 32; dev++ )
1282 for ( func = 0; func < 8; func++ )
1284 ret = domain_context_mapping_one(
1285 domain, iommu,
1286 pdev->bus, (u8)PCI_DEVFN(dev, func));
1287 if ( ret )
1288 return ret;
1292 break;
1293 default:
1294 gdprintk(XENLOG_ERR VTDPREFIX,
1295 "domain_context_mapping:unknown type : bdf = %x:%x:%x\n",
1296 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1297 ret = -EINVAL;
1298 break;
1301 return ret;
1304 static int domain_context_unmap_one(
1305 struct domain *domain,
1306 struct iommu *iommu,
1307 u8 bus, u8 devfn)
1309 struct context_entry *context, *context_entries;
1310 unsigned long flags;
1311 u64 maddr;
1313 maddr = bus_to_context_maddr(iommu, bus);
1314 context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
1315 context = &context_entries[devfn];
1317 if ( !context_present(*context) )
1319 unmap_vtd_domain_page(context_entries);
1320 return 0;
1323 spin_lock_irqsave(&iommu->lock, flags);
1324 context_clear_present(*context);
1325 context_clear_entry(*context);
1326 iommu_flush_cache_entry(iommu, context);
1327 iommu_flush_context_global(iommu, 0);
1328 iommu_flush_iotlb_global(iommu, 0);
1329 unmap_vtd_domain_page(context_entries);
1330 spin_unlock_irqrestore(&iommu->lock, flags);
1332 return 0;
1335 static int domain_context_unmap(
1336 struct domain *domain,
1337 struct iommu *iommu,
1338 struct pci_dev *pdev)
1340 int ret = 0;
1341 int dev, func, sec_bus, sub_bus;
1342 u32 type;
1344 type = pdev_type(pdev);
1345 switch ( type )
1347 case DEV_TYPE_PCI_BRIDGE:
1348 sec_bus = pci_conf_read8(
1349 pdev->bus, PCI_SLOT(pdev->devfn),
1350 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1351 sub_bus = pci_conf_read8(
1352 pdev->bus, PCI_SLOT(pdev->devfn),
1353 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1354 break;
1355 case DEV_TYPE_PCIe_ENDPOINT:
1356 ret = domain_context_unmap_one(domain, iommu,
1357 (u8)(pdev->bus), (u8)(pdev->devfn));
1358 break;
1359 case DEV_TYPE_PCI:
1360 if ( pdev->bus == 0 )
1361 ret = domain_context_unmap_one(
1362 domain, iommu,
1363 (u8)(pdev->bus), (u8)(pdev->devfn));
1364 else
1366 if ( bus2bridge[pdev->bus].bus != 0 )
1367 gdprintk(XENLOG_WARNING VTDPREFIX,
1368 "domain_context_unmap:"
1369 "bus2bridge[%d].bus != 0\n", pdev->bus);
1371 ret = domain_context_unmap_one(domain, iommu,
1372 (u8)(bus2bridge[pdev->bus].bus),
1373 (u8)(bus2bridge[pdev->bus].devfn));
1375 /* Unmap everything behind the PCI bridge */
1376 for ( dev = 0; dev < 32; dev++ )
1378 for ( func = 0; func < 8; func++ )
1380 ret = domain_context_unmap_one(
1381 domain, iommu,
1382 pdev->bus, (u8)PCI_DEVFN(dev, func));
1383 if ( ret )
1384 return ret;
1388 break;
1389 default:
1390 gdprintk(XENLOG_ERR VTDPREFIX,
1391 "domain_context_unmap:unknown type: bdf = %x:%x:%x\n",
1392 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1393 ret = -EINVAL;
1394 break;
1397 return ret;
1400 void reassign_device_ownership(
1401 struct domain *source,
1402 struct domain *target,
1403 u8 bus, u8 devfn)
1405 struct hvm_iommu *source_hd = domain_hvm_iommu(source);
1406 struct hvm_iommu *target_hd = domain_hvm_iommu(target);
1407 struct pci_dev *pdev;
1408 struct acpi_drhd_unit *drhd;
1409 struct iommu *iommu;
1410 int status;
1411 unsigned long flags;
1413 pdev_flr(bus, devfn);
1415 for_each_pdev( source, pdev )
1416 if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
1417 goto found;
1419 return;
1421 found:
1422 drhd = acpi_find_matched_drhd_unit(pdev);
1423 iommu = drhd->iommu;
1424 domain_context_unmap(source, iommu, pdev);
1426 /* Move pci device from the source domain to target domain. */
1427 spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
1428 spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
1429 list_move(&pdev->list, &target_hd->pdev_list);
1430 spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
1431 spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
1433 status = domain_context_mapping(target, iommu, pdev);
1434 if ( status != 0 )
1435 gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
1438 void return_devices_to_dom0(struct domain *d)
1440 struct hvm_iommu *hd = domain_hvm_iommu(d);
1441 struct pci_dev *pdev;
1443 while ( !list_empty(&hd->pdev_list) )
1445 pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
1446 reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
1449 #ifdef VTD_DEBUG
1450 for_each_pdev ( dom0, pdev )
1451 dprintk(XENLOG_INFO VTDPREFIX,
1452 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
1453 dom0->domain_id, pdev->bus,
1454 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1455 #endif
1458 void iommu_domain_teardown(struct domain *d)
1460 if ( list_empty(&acpi_drhd_units) )
1461 return;
1463 iommu_free_pagetable(d);
1464 return_devices_to_dom0(d);
1465 iommu_domid_release(d);
1468 static int domain_context_mapped(struct pci_dev *pdev)
1470 struct acpi_drhd_unit *drhd;
1471 struct iommu *iommu;
1472 int ret;
1474 for_each_drhd_unit ( drhd )
1476 iommu = drhd->iommu;
1477 ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
1478 if ( ret )
1479 return ret;
1482 return 0;
1485 int intel_iommu_map_page(
1486 struct domain *d, unsigned long gfn, unsigned long mfn)
1488 struct acpi_drhd_unit *drhd;
1489 struct iommu *iommu;
1490 struct dma_pte *page = NULL, *pte = NULL;
1491 u64 pg_maddr;
1493 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1494 iommu = drhd->iommu;
1496 #ifdef CONTEXT_PASSTHRU
1497 /* do nothing if dom0 and iommu supports pass thru */
1498 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1499 return 0;
1500 #endif
1502 pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1503 if ( pg_maddr == 0 )
1504 return -ENOMEM;
1505 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1506 pte = page + (gfn & LEVEL_MASK);
1507 dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
1508 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
1509 iommu_flush_cache_entry(iommu, pte);
1510 unmap_vtd_domain_page(page);
1512 for_each_drhd_unit ( drhd )
1514 iommu = drhd->iommu;
1515 if ( cap_caching_mode(iommu->cap) )
1516 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1517 (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
1518 else if ( cap_rwbf(iommu->cap) )
1519 iommu_flush_write_buffer(iommu);
1522 return 0;
1525 int intel_iommu_unmap_page(struct domain *d, unsigned long gfn)
1527 struct acpi_drhd_unit *drhd;
1528 struct iommu *iommu;
1530 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1531 iommu = drhd->iommu;
1533 #ifdef CONTEXT_PASSTHRU
1534 /* do nothing if dom0 and iommu supports pass thru */
1535 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1536 return 0;
1537 #endif
1539 dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
1541 return 0;
1544 int iommu_page_mapping(struct domain *domain, paddr_t iova,
1545 paddr_t hpa, size_t size, int prot)
1547 struct acpi_drhd_unit *drhd;
1548 struct iommu *iommu;
1549 u64 start_pfn, end_pfn;
1550 struct dma_pte *page = NULL, *pte = NULL;
1551 int index;
1552 u64 pg_maddr;
1554 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1555 iommu = drhd->iommu;
1556 if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
1557 return -EINVAL;
1558 iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
1559 start_pfn = hpa >> PAGE_SHIFT_4K;
1560 end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
1561 index = 0;
1562 while ( start_pfn < end_pfn )
1564 pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index);
1565 if ( pg_maddr == 0 )
1566 return -ENOMEM;
1567 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
1568 pte = page + (start_pfn & LEVEL_MASK);
1569 dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
1570 dma_set_pte_prot(*pte, prot);
1571 iommu_flush_cache_entry(iommu, pte);
1572 unmap_vtd_domain_page(page);
1573 start_pfn++;
1574 index++;
1577 for_each_drhd_unit ( drhd )
1579 iommu = drhd->iommu;
1580 if ( cap_caching_mode(iommu->cap) )
1581 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
1582 iova, index, 0);
1583 else if ( cap_rwbf(iommu->cap) )
1584 iommu_flush_write_buffer(iommu);
1587 return 0;
1590 int iommu_page_unmapping(struct domain *domain, paddr_t addr, size_t size)
1592 dma_pte_clear_range(domain, addr, addr + size);
1594 return 0;
1597 void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry)
1599 struct acpi_drhd_unit *drhd;
1600 struct iommu *iommu = NULL;
1601 struct dma_pte *pte = (struct dma_pte *) p2m_entry;
1603 for_each_drhd_unit ( drhd )
1605 iommu = drhd->iommu;
1606 if ( cap_caching_mode(iommu->cap) )
1607 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1608 (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
1609 else if ( cap_rwbf(iommu->cap) )
1610 iommu_flush_write_buffer(iommu);
1613 iommu_flush_cache_entry(iommu, pte);
1616 static int iommu_prepare_rmrr_dev(
1617 struct domain *d,
1618 struct acpi_rmrr_unit *rmrr,
1619 struct pci_dev *pdev)
1621 struct acpi_drhd_unit *drhd;
1622 unsigned long size;
1623 int ret;
1625 /* page table init */
1626 size = rmrr->end_address - rmrr->base_address + 1;
1627 ret = iommu_page_mapping(d, rmrr->base_address,
1628 rmrr->base_address, size,
1629 DMA_PTE_READ|DMA_PTE_WRITE);
1630 if ( ret )
1631 return ret;
1633 if ( domain_context_mapped(pdev) == 0 )
1635 drhd = acpi_find_matched_drhd_unit(pdev);
1636 ret = domain_context_mapping(d, drhd->iommu, pdev);
1637 if ( !ret )
1638 return 0;
1641 return ret;
1644 static void setup_dom0_devices(struct domain *d)
1646 struct hvm_iommu *hd;
1647 struct acpi_drhd_unit *drhd;
1648 struct pci_dev *pdev;
1649 int bus, dev, func, ret;
1650 u32 l;
1652 hd = domain_hvm_iommu(d);
1654 for ( bus = 0; bus < 256; bus++ )
1656 for ( dev = 0; dev < 32; dev++ )
1658 for ( func = 0; func < 8; func++ )
1660 l = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID);
1661 /* some broken boards return 0 or ~0 if a slot is empty: */
1662 if ( (l == 0xffffffff) || (l == 0x00000000) ||
1663 (l == 0x0000ffff) || (l == 0xffff0000) )
1664 continue;
1665 pdev = xmalloc(struct pci_dev);
1666 pdev->bus = bus;
1667 pdev->devfn = PCI_DEVFN(dev, func);
1668 list_add_tail(&pdev->list, &hd->pdev_list);
1670 drhd = acpi_find_matched_drhd_unit(pdev);
1671 ret = domain_context_mapping(d, drhd->iommu, pdev);
1672 if ( ret != 0 )
1673 gdprintk(XENLOG_ERR VTDPREFIX,
1674 "domain_context_mapping failed\n");
1680 void clear_fault_bits(struct iommu *iommu)
1682 u64 val;
1684 val = dmar_readq(
1685 iommu->reg,
1686 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
1687 dmar_writeq(
1688 iommu->reg,
1689 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
1690 val);
1691 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
1694 static int init_vtd_hw(void)
1696 struct acpi_drhd_unit *drhd;
1697 struct iommu *iommu;
1698 struct iommu_flush *flush = NULL;
1699 int vector;
1700 int ret;
1702 for_each_drhd_unit ( drhd )
1704 iommu = drhd->iommu;
1705 ret = iommu_set_root_entry(iommu);
1706 if ( ret )
1708 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
1709 return -EIO;
1712 vector = iommu_set_interrupt(iommu);
1713 dma_msi_data_init(iommu, vector);
1714 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
1715 iommu->vector = vector;
1716 clear_fault_bits(iommu);
1717 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
1719 /* initialize flush functions */
1720 flush = iommu_get_flush(iommu);
1721 flush->context = flush_context_reg;
1722 flush->iotlb = flush_iotlb_reg;
1725 for_each_drhd_unit ( drhd )
1727 iommu = drhd->iommu;
1728 if ( qinval_setup(iommu) != 0 )
1729 dprintk(XENLOG_ERR VTDPREFIX,
1730 "Queued Invalidation hardware not found\n");
1733 for_each_drhd_unit ( drhd )
1735 iommu = drhd->iommu;
1736 if ( intremap_setup(iommu) != 0 )
1737 dprintk(XENLOG_ERR VTDPREFIX,
1738 "Interrupt Remapping hardware not found\n");
1741 return 0;
1744 static void setup_dom0_rmrr(struct domain *d)
1746 struct acpi_rmrr_unit *rmrr;
1747 struct pci_dev *pdev;
1748 int ret;
1750 for_each_rmrr_device ( rmrr, pdev )
1751 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
1752 if ( ret )
1753 gdprintk(XENLOG_ERR VTDPREFIX,
1754 "IOMMU: mapping reserved region failed\n");
1755 end_for_each_rmrr_device ( rmrr, pdev )
1758 int intel_vtd_setup(void)
1760 struct acpi_drhd_unit *drhd;
1761 struct iommu *iommu;
1763 if ( !vtd_enabled )
1764 return -ENODEV;
1766 spin_lock_init(&domid_bitmap_lock);
1767 clflush_size = get_clflush_size();
1769 for_each_drhd_unit ( drhd )
1770 if ( iommu_alloc(drhd) != 0 )
1771 goto error;
1773 /* Allocate IO page directory page for the domain. */
1774 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1775 iommu = drhd->iommu;
1777 /* Allocate domain id bitmap, and set bit 0 as reserved */
1778 domid_bitmap_size = cap_ndoms(iommu->cap);
1779 domid_bitmap = xmalloc_array(unsigned long,
1780 BITS_TO_LONGS(domid_bitmap_size));
1781 if ( domid_bitmap == NULL )
1782 goto error;
1783 memset(domid_bitmap, 0, domid_bitmap_size / 8);
1784 set_bit(0, domid_bitmap);
1786 init_vtd_hw();
1788 return 0;
1790 error:
1791 for_each_drhd_unit ( drhd )
1792 iommu_free(drhd);
1793 vtd_enabled = 0;
1794 return -ENOMEM;
1797 /*
1798 * If the device isn't owned by dom0, it means it already
1799 * has been assigned to other domain, or it's not exist.
1800 */
1801 int device_assigned(u8 bus, u8 devfn)
1803 struct pci_dev *pdev;
1805 for_each_pdev( dom0, pdev )
1806 if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
1807 return 0;
1809 return 1;
1812 int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
1814 struct acpi_rmrr_unit *rmrr;
1815 struct pci_dev *pdev;
1816 int ret = 0;
1818 if ( list_empty(&acpi_drhd_units) )
1819 return ret;
1821 reassign_device_ownership(dom0, d, bus, devfn);
1823 /* Setup rmrr identify mapping */
1824 for_each_rmrr_device( rmrr, pdev )
1825 if ( pdev->bus == bus && pdev->devfn == devfn )
1827 /* FIXME: Because USB RMRR conflicts with guest bios region,
1828 * ignore USB RMRR temporarily.
1829 */
1830 if ( is_usb_device(pdev) )
1831 return 0;
1833 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
1834 if ( ret )
1836 gdprintk(XENLOG_ERR VTDPREFIX,
1837 "IOMMU: mapping reserved region failed\n");
1838 return ret;
1841 end_for_each_rmrr_device(rmrr, pdev)
1843 return ret;
1846 u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
1847 int iommu_suspend(void)
1849 struct acpi_drhd_unit *drhd;
1850 struct iommu *iommu;
1851 int i = 0;
1853 iommu_flush_all();
1855 for_each_drhd_unit ( drhd )
1857 iommu = drhd->iommu;
1858 iommu_state[DMAR_RTADDR_REG * i] =
1859 (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
1860 iommu_state[DMAR_FECTL_REG * i] =
1861 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
1862 iommu_state[DMAR_FEDATA_REG * i] =
1863 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
1864 iommu_state[DMAR_FEADDR_REG * i] =
1865 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
1866 iommu_state[DMAR_FEUADDR_REG * i] =
1867 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
1868 iommu_state[DMAR_PLMBASE_REG * i] =
1869 (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
1870 iommu_state[DMAR_PLMLIMIT_REG * i] =
1871 (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
1872 iommu_state[DMAR_PHMBASE_REG * i] =
1873 (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
1874 iommu_state[DMAR_PHMLIMIT_REG * i] =
1875 (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
1876 i++;
1879 return 0;
1882 int iommu_resume(void)
1884 struct acpi_drhd_unit *drhd;
1885 struct iommu *iommu;
1886 int i = 0;
1888 iommu_flush_all();
1890 init_vtd_hw();
1891 for_each_drhd_unit ( drhd )
1893 iommu = drhd->iommu;
1894 dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
1895 (u64) iommu_state[DMAR_RTADDR_REG * i]);
1896 dmar_writel(iommu->reg, DMAR_FECTL_REG,
1897 (u32) iommu_state[DMAR_FECTL_REG * i]);
1898 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
1899 (u32) iommu_state[DMAR_FEDATA_REG * i]);
1900 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
1901 (u32) iommu_state[DMAR_FEADDR_REG * i]);
1902 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
1903 (u32) iommu_state[DMAR_FEUADDR_REG * i]);
1904 dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
1905 (u32) iommu_state[DMAR_PLMBASE_REG * i]);
1906 dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
1907 (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
1908 dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
1909 (u64) iommu_state[DMAR_PHMBASE_REG * i]);
1910 dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
1911 (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
1913 if ( iommu_enable_translation(iommu) )
1914 return -EIO;
1915 i++;
1917 return 0;
1920 struct iommu_ops intel_iommu_ops = {
1921 .init = intel_iommu_domain_init,
1922 .assign_device = intel_iommu_assign_device,
1923 .teardown = iommu_domain_teardown,
1924 .map_page = intel_iommu_map_page,
1925 .unmap_page = intel_iommu_unmap_page,
1926 .reassign_device = reassign_device_ownership,
1927 };
1929 /*
1930 * Local variables:
1931 * mode: C
1932 * c-set-style: "BSD"
1933 * c-basic-offset: 4
1934 * tab-width: 4
1935 * indent-tabs-mode: nil
1936 * End:
1937 */