ia64/xen-unstable

view xen/arch/x86/hvm/vmx/vtd/intel-iommu.c @ 16875:74a9bfccddba

vt-d: Do FLR of assigned devices with VT-d

Currently there is a pdev_flr() function to do FLR before device
assignment in qemu, but most of devices don't have FLR capability.
What's more, should do FLR before assignment and deassignment for
keeping correct device status. If the device doesn't have FLR
capablility, this patch implemented to enter D3hot and return to D0 to
do FLR. And exposed pdev_flr() in VT-d utils, then it can be invoked
by assignment and deassignment functions.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Anthony Xu <anthony.xu@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jan 24 14:39:38 2008 +0000 (2008-01-24)
parents cc5bb500df5f
children faefbf2c4840
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Allen Kay <allen.m.kay@intel.com> - adapted to xen
20 */
22 #include <xen/init.h>
23 #include <xen/irq.h>
24 #include <xen/spinlock.h>
25 #include <xen/sched.h>
26 #include <xen/xmalloc.h>
27 #include <xen/domain_page.h>
28 #include <asm/delay.h>
29 #include <asm/string.h>
30 #include <asm/mm.h>
31 #include <asm/iommu.h>
32 #include <asm/hvm/vmx/intel-iommu.h>
33 #include "dmar.h"
34 #include "pci-direct.h"
35 #include "pci_regs.h"
36 #include "msi.h"
37 #include "extern.h"
39 #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
41 static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
42 static int domid_bitmap_size; /* domain id bitmap size in bit */
43 static void *domid_bitmap; /* iommu domain id bitmap */
45 #define DID_FIELD_WIDTH 16
46 #define DID_HIGH_OFFSET 8
47 static void context_set_domain_id(struct context_entry *context,
48 struct domain *d)
49 {
50 unsigned long flags;
51 domid_t iommu_domid = domain_iommu_domid(d);
53 if ( iommu_domid == 0 )
54 {
55 spin_lock_irqsave(&domid_bitmap_lock, flags);
56 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
57 set_bit(iommu_domid, domid_bitmap);
58 spin_unlock_irqrestore(&domid_bitmap_lock, flags);
59 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
60 }
62 context->hi &= (1 << DID_HIGH_OFFSET) - 1;
63 context->hi |= iommu_domid << DID_HIGH_OFFSET;
64 }
66 static void iommu_domid_release(struct domain *d)
67 {
68 domid_t iommu_domid = domain_iommu_domid(d);
70 if ( iommu_domid != 0 )
71 {
72 d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
73 clear_bit(iommu_domid, domid_bitmap);
74 }
75 }
77 unsigned int x86_clflush_size;
78 void clflush_cache_range(void *adr, int size)
79 {
80 int i;
81 for ( i = 0; i < size; i += x86_clflush_size )
82 clflush(adr + i);
83 }
85 static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
86 {
87 if ( !ecap_coherent(iommu->ecap) )
88 clflush_cache_range(addr, size);
89 }
91 #define iommu_flush_cache_entry(iommu, addr) \
92 __iommu_flush_cache(iommu, addr, 8)
93 #define iommu_flush_cache_page(iommu, addr) \
94 __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
96 int nr_iommus;
97 /* context entry handling */
98 static struct context_entry * device_to_context_entry(struct iommu *iommu,
99 u8 bus, u8 devfn)
100 {
101 struct root_entry *root;
102 struct context_entry *context;
103 unsigned long phy_addr;
104 unsigned long flags;
106 spin_lock_irqsave(&iommu->lock, flags);
107 root = &iommu->root_entry[bus];
108 if ( !root_present(*root) )
109 {
110 phy_addr = (unsigned long) alloc_xenheap_page();
111 if ( !phy_addr )
112 {
113 spin_unlock_irqrestore(&iommu->lock, flags);
114 return NULL;
115 }
116 memset((void *) phy_addr, 0, PAGE_SIZE);
117 iommu_flush_cache_page(iommu, (void *)phy_addr);
118 phy_addr = virt_to_maddr((void *)phy_addr);
119 set_root_value(*root, phy_addr);
120 set_root_present(*root);
121 iommu_flush_cache_entry(iommu, root);
122 }
123 phy_addr = (unsigned long) get_context_addr(*root);
124 context = (struct context_entry *)maddr_to_virt(phy_addr);
125 spin_unlock_irqrestore(&iommu->lock, flags);
126 return &context[devfn];
127 }
129 static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
130 {
131 struct root_entry *root;
132 struct context_entry *context;
133 unsigned long phy_addr;
134 int ret;
135 unsigned long flags;
137 spin_lock_irqsave(&iommu->lock, flags);
138 root = &iommu->root_entry[bus];
139 if ( !root_present(*root) )
140 {
141 ret = 0;
142 goto out;
143 }
144 phy_addr = get_context_addr(*root);
145 context = (struct context_entry *)maddr_to_virt(phy_addr);
146 ret = context_present(context[devfn]);
147 out:
148 spin_unlock_irqrestore(&iommu->lock, flags);
149 return ret;
150 }
152 static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
153 {
154 struct hvm_iommu *hd = domain_hvm_iommu(domain);
155 struct acpi_drhd_unit *drhd;
156 struct iommu *iommu;
157 int addr_width = agaw_to_width(hd->agaw);
158 struct dma_pte *parent, *pte = NULL, *pgd;
159 int level = agaw_to_level(hd->agaw);
160 int offset;
161 unsigned long flags;
162 struct page_info *pg = NULL;
163 u64 *vaddr = NULL;
165 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
166 iommu = drhd->iommu;
168 addr &= (((u64)1) << addr_width) - 1;
169 spin_lock_irqsave(&hd->mapping_lock, flags);
170 if ( !hd->pgd )
171 {
172 pgd = (struct dma_pte *)alloc_xenheap_page();
173 if ( !pgd )
174 {
175 spin_unlock_irqrestore(&hd->mapping_lock, flags);
176 return NULL;
177 }
178 memset(pgd, 0, PAGE_SIZE);
179 hd->pgd = pgd;
180 }
182 parent = hd->pgd;
183 while ( level > 1 )
184 {
185 offset = address_level_offset(addr, level);
186 pte = &parent[offset];
188 if ( dma_pte_addr(*pte) == 0 )
189 {
190 pg = alloc_domheap_page(NULL);
191 vaddr = map_domain_page(page_to_mfn(pg));
192 if ( !vaddr )
193 {
194 spin_unlock_irqrestore(&hd->mapping_lock, flags);
195 return NULL;
196 }
197 memset(vaddr, 0, PAGE_SIZE);
198 iommu_flush_cache_page(iommu, vaddr);
200 dma_set_pte_addr(*pte, page_to_maddr(pg));
202 /*
203 * high level table always sets r/w, last level
204 * page table control read/write
205 */
206 dma_set_pte_readable(*pte);
207 dma_set_pte_writable(*pte);
208 iommu_flush_cache_entry(iommu, pte);
209 }
210 else
211 {
212 pg = maddr_to_page(pte->val);
213 vaddr = map_domain_page(page_to_mfn(pg));
214 if ( !vaddr )
215 {
216 spin_unlock_irqrestore(&hd->mapping_lock, flags);
217 return NULL;
218 }
219 }
221 if ( parent != hd->pgd )
222 unmap_domain_page(parent);
224 if ( level == 2 && vaddr )
225 {
226 unmap_domain_page(vaddr);
227 break;
228 }
230 parent = (struct dma_pte *)vaddr;
231 vaddr = NULL;
232 level--;
233 }
235 spin_unlock_irqrestore(&hd->mapping_lock, flags);
236 return pg;
237 }
239 /* return address's page at specific level */
240 static struct page_info *dma_addr_level_page(struct domain *domain,
241 u64 addr, int level)
242 {
243 struct hvm_iommu *hd = domain_hvm_iommu(domain);
244 struct dma_pte *parent, *pte = NULL;
245 int total = agaw_to_level(hd->agaw);
246 int offset;
247 struct page_info *pg = NULL;
249 parent = hd->pgd;
250 while ( level <= total )
251 {
252 offset = address_level_offset(addr, total);
253 pte = &parent[offset];
254 if ( dma_pte_addr(*pte) == 0 )
255 {
256 if ( parent != hd->pgd )
257 unmap_domain_page(parent);
258 break;
259 }
261 pg = maddr_to_page(pte->val);
262 if ( parent != hd->pgd )
263 unmap_domain_page(parent);
265 if ( level == total )
266 return pg;
268 parent = map_domain_page(page_to_mfn(pg));
269 total--;
270 }
272 return NULL;
273 }
275 static void iommu_flush_write_buffer(struct iommu *iommu)
276 {
277 u32 val;
278 unsigned long flag;
279 unsigned long start_time;
281 if ( !cap_rwbf(iommu->cap) )
282 return;
283 val = iommu->gcmd | DMA_GCMD_WBF;
285 spin_lock_irqsave(&iommu->register_lock, flag);
286 dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
288 /* Make sure hardware complete it */
289 start_time = jiffies;
290 for ( ; ; )
291 {
292 val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
293 if ( !(val & DMA_GSTS_WBFS) )
294 break;
295 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
296 panic("DMAR hardware is malfunctional,"
297 " please disable IOMMU\n");
298 cpu_relax();
299 }
300 spin_unlock_irqrestore(&iommu->register_lock, flag);
301 }
303 /* return value determine if we need a write buffer flush */
304 static int flush_context_reg(
305 void *_iommu,
306 u16 did, u16 source_id, u8 function_mask, u64 type,
307 int non_present_entry_flush)
308 {
309 struct iommu *iommu = (struct iommu *) _iommu;
310 u64 val = 0;
311 unsigned long flag;
312 unsigned long start_time;
314 /*
315 * In the non-present entry flush case, if hardware doesn't cache
316 * non-present entry we do nothing and if hardware cache non-present
317 * entry, we flush entries of domain 0 (the domain id is used to cache
318 * any non-present entries)
319 */
320 if ( non_present_entry_flush )
321 {
322 if ( !cap_caching_mode(iommu->cap) )
323 return 1;
324 else
325 did = 0;
326 }
328 /* use register invalidation */
329 switch ( type )
330 {
331 case DMA_CCMD_GLOBAL_INVL:
332 val = DMA_CCMD_GLOBAL_INVL;
333 break;
334 case DMA_CCMD_DOMAIN_INVL:
335 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
336 break;
337 case DMA_CCMD_DEVICE_INVL:
338 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
339 |DMA_CCMD_SID(source_id)|DMA_CCMD_FM(function_mask);
340 break;
341 default:
342 BUG();
343 }
344 val |= DMA_CCMD_ICC;
346 spin_lock_irqsave(&iommu->register_lock, flag);
347 dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
349 /* Make sure hardware complete it */
350 start_time = jiffies;
351 for ( ; ; )
352 {
353 val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
354 if ( !(val & DMA_CCMD_ICC) )
355 break;
356 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
357 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
358 cpu_relax();
359 }
360 spin_unlock_irqrestore(&iommu->register_lock, flag);
361 /* flush context entry will implictly flush write buffer */
362 return 0;
363 }
365 static int inline iommu_flush_context_global(
366 struct iommu *iommu, int non_present_entry_flush)
367 {
368 struct iommu_flush *flush = iommu_get_flush(iommu);
369 return flush->context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
370 non_present_entry_flush);
371 }
373 static int inline iommu_flush_context_domain(
374 struct iommu *iommu, u16 did, int non_present_entry_flush)
375 {
376 struct iommu_flush *flush = iommu_get_flush(iommu);
377 return flush->context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
378 non_present_entry_flush);
379 }
381 static int inline iommu_flush_context_device(
382 struct iommu *iommu, u16 did, u16 source_id,
383 u8 function_mask, int non_present_entry_flush)
384 {
385 struct iommu_flush *flush = iommu_get_flush(iommu);
386 return flush->context(iommu, did, source_id, function_mask,
387 DMA_CCMD_DEVICE_INVL,
388 non_present_entry_flush);
389 }
391 /* return value determine if we need a write buffer flush */
392 static int flush_iotlb_reg(void *_iommu, u16 did,
393 u64 addr, unsigned int size_order, u64 type,
394 int non_present_entry_flush)
395 {
396 struct iommu *iommu = (struct iommu *) _iommu;
397 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
398 u64 val = 0, val_iva = 0;
399 unsigned long flag;
400 unsigned long start_time;
402 /*
403 * In the non-present entry flush case, if hardware doesn't cache
404 * non-present entry we do nothing and if hardware cache non-present
405 * entry, we flush entries of domain 0 (the domain id is used to cache
406 * any non-present entries)
407 */
408 if ( non_present_entry_flush )
409 {
410 if ( !cap_caching_mode(iommu->cap) )
411 return 1;
412 else
413 did = 0;
414 }
416 /* use register invalidation */
417 switch ( type )
418 {
419 case DMA_TLB_GLOBAL_FLUSH:
420 /* global flush doesn't need set IVA_REG */
421 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
422 break;
423 case DMA_TLB_DSI_FLUSH:
424 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
425 break;
426 case DMA_TLB_PSI_FLUSH:
427 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
428 /* Note: always flush non-leaf currently */
429 val_iva = size_order | addr;
430 break;
431 default:
432 BUG();
433 }
434 /* Note: set drain read/write */
435 if ( cap_read_drain(iommu->cap) )
436 val |= DMA_TLB_READ_DRAIN;
437 if ( cap_write_drain(iommu->cap) )
438 val |= DMA_TLB_WRITE_DRAIN;
440 spin_lock_irqsave(&iommu->register_lock, flag);
441 /* Note: Only uses first TLB reg currently */
442 if ( val_iva )
443 dmar_writeq(iommu->reg, tlb_offset, val_iva);
444 dmar_writeq(iommu->reg, tlb_offset + 8, val);
446 /* Make sure hardware complete it */
447 start_time = jiffies;
448 for ( ; ; )
449 {
450 val = dmar_readq(iommu->reg, tlb_offset + 8);
451 if ( !(val & DMA_TLB_IVT) )
452 break;
453 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
454 panic("DMAR hardware is malfunctional, please disable IOMMU\n");
455 cpu_relax();
456 }
457 spin_unlock_irqrestore(&iommu->register_lock, flag);
459 /* check IOTLB invalidation granularity */
460 if ( DMA_TLB_IAIG(val) == 0 )
461 printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
462 if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
463 printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
464 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
465 /* flush context entry will implictly flush write buffer */
466 return 0;
467 }
469 static int inline iommu_flush_iotlb_global(struct iommu *iommu,
470 int non_present_entry_flush)
471 {
472 struct iommu_flush *flush = iommu_get_flush(iommu);
473 return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
474 non_present_entry_flush);
475 }
477 static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
478 int non_present_entry_flush)
479 {
480 struct iommu_flush *flush = iommu_get_flush(iommu);
481 return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
482 non_present_entry_flush);
483 }
485 static int inline get_alignment(u64 base, unsigned int size)
486 {
487 int t = 0;
488 u64 end;
490 end = base + size - 1;
491 while ( base != end )
492 {
493 t++;
494 base >>= 1;
495 end >>= 1;
496 }
497 return t;
498 }
500 static int inline iommu_flush_iotlb_psi(
501 struct iommu *iommu, u16 did,
502 u64 addr, unsigned int pages, int non_present_entry_flush)
503 {
504 unsigned int align;
505 struct iommu_flush *flush = iommu_get_flush(iommu);
507 BUG_ON(addr & (~PAGE_MASK_4K));
508 BUG_ON(pages == 0);
510 /* Fallback to domain selective flush if no PSI support */
511 if ( !cap_pgsel_inv(iommu->cap) )
512 return iommu_flush_iotlb_dsi(iommu, did,
513 non_present_entry_flush);
515 /*
516 * PSI requires page size is 2 ^ x, and the base address is naturally
517 * aligned to the size
518 */
519 align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
520 /* Fallback to domain selective flush if size is too big */
521 if ( align > cap_max_amask_val(iommu->cap) )
522 return iommu_flush_iotlb_dsi(iommu, did,
523 non_present_entry_flush);
525 addr >>= PAGE_SHIFT_4K + align;
526 addr <<= PAGE_SHIFT_4K + align;
528 return flush->iotlb(iommu, did, addr, align,
529 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
530 }
532 void iommu_flush_all(void)
533 {
534 struct acpi_drhd_unit *drhd;
535 struct iommu *iommu;
537 wbinvd();
538 for_each_drhd_unit ( drhd )
539 {
540 iommu = drhd->iommu;
541 iommu_flush_context_global(iommu, 0);
542 iommu_flush_iotlb_global(iommu, 0);
543 }
544 }
546 /* clear one page's page table */
547 static void dma_pte_clear_one(struct domain *domain, u64 addr)
548 {
549 struct acpi_drhd_unit *drhd;
550 struct iommu *iommu;
551 struct dma_pte *pte = NULL;
552 struct page_info *pg = NULL;
554 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
556 /* get last level pte */
557 pg = dma_addr_level_page(domain, addr, 1);
558 if ( !pg )
559 return;
560 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
561 pte += address_level_offset(addr, 1);
562 if ( pte )
563 {
564 dma_clear_pte(*pte);
565 iommu_flush_cache_entry(drhd->iommu, pte);
567 for_each_drhd_unit ( drhd )
568 {
569 iommu = drhd->iommu;
570 if ( cap_caching_mode(iommu->cap) )
571 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
572 addr, 1, 0);
573 else if (cap_rwbf(iommu->cap))
574 iommu_flush_write_buffer(iommu);
575 }
576 }
577 unmap_domain_page(pte);
578 }
580 /* clear last level pte, a tlb flush should be followed */
581 static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
582 {
583 struct hvm_iommu *hd = domain_hvm_iommu(domain);
584 int addr_width = agaw_to_width(hd->agaw);
586 start &= (((u64)1) << addr_width) - 1;
587 end &= (((u64)1) << addr_width) - 1;
588 /* in case it's partial page */
589 start = PAGE_ALIGN_4K(start);
590 end &= PAGE_MASK_4K;
592 /* we don't need lock here, nobody else touches the iova range */
593 while ( start < end )
594 {
595 dma_pte_clear_one(domain, start);
596 start += PAGE_SIZE_4K;
597 }
598 }
600 /* free page table pages. last level pte should already be cleared */
601 void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
602 {
603 struct acpi_drhd_unit *drhd;
604 struct hvm_iommu *hd = domain_hvm_iommu(domain);
605 struct iommu *iommu;
606 int addr_width = agaw_to_width(hd->agaw);
607 struct dma_pte *pte;
608 int total = agaw_to_level(hd->agaw);
609 int level;
610 u32 tmp;
611 struct page_info *pg = NULL;
613 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
614 iommu = drhd->iommu;
616 start &= (((u64)1) << addr_width) - 1;
617 end &= (((u64)1) << addr_width) - 1;
619 /* we don't need lock here, nobody else touches the iova range */
620 level = 2;
621 while ( level <= total )
622 {
623 tmp = align_to_level(start, level);
624 if ( (tmp >= end) || ((tmp + level_size(level)) > end) )
625 return;
627 while ( tmp < end )
628 {
629 pg = dma_addr_level_page(domain, tmp, level);
630 if ( !pg )
631 return;
632 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
633 pte += address_level_offset(tmp, level);
634 dma_clear_pte(*pte);
635 iommu_flush_cache_entry(iommu, pte);
636 unmap_domain_page(pte);
637 free_domheap_page(pg);
639 tmp += level_size(level);
640 }
641 level++;
642 }
644 /* free pgd */
645 if ( start == 0 && end == ((((u64)1) << addr_width) - 1) )
646 {
647 free_xenheap_page((void *)hd->pgd);
648 hd->pgd = NULL;
649 }
650 }
652 /* iommu handling */
653 static int iommu_set_root_entry(struct iommu *iommu)
654 {
655 void *addr;
656 u32 cmd, sts;
657 struct root_entry *root;
658 unsigned long flags;
660 if ( iommu == NULL )
661 {
662 gdprintk(XENLOG_ERR VTDPREFIX,
663 "iommu_set_root_entry: iommu == NULL\n");
664 return -EINVAL;
665 }
667 if ( unlikely(!iommu->root_entry) )
668 {
669 root = (struct root_entry *)alloc_xenheap_page();
670 if ( root == NULL )
671 return -ENOMEM;
673 memset((u8*)root, 0, PAGE_SIZE);
674 iommu_flush_cache_page(iommu, root);
676 if ( cmpxchg((unsigned long *)&iommu->root_entry,
677 0, (unsigned long)root) != 0 )
678 free_xenheap_page((void *)root);
679 }
681 addr = iommu->root_entry;
683 spin_lock_irqsave(&iommu->register_lock, flags);
685 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
686 cmd = iommu->gcmd | DMA_GCMD_SRTP;
687 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
689 /* Make sure hardware complete it */
690 for ( ; ; )
691 {
692 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
693 if ( sts & DMA_GSTS_RTPS )
694 break;
695 cpu_relax();
696 }
698 spin_unlock_irqrestore(&iommu->register_lock, flags);
700 return 0;
701 }
703 static int iommu_enable_translation(struct iommu *iommu)
704 {
705 u32 sts;
706 unsigned long flags;
708 dprintk(XENLOG_INFO VTDPREFIX,
709 "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
710 spin_lock_irqsave(&iommu->register_lock, flags);
711 iommu->gcmd |= DMA_GCMD_TE;
712 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
713 /* Make sure hardware complete it */
714 for ( ; ; )
715 {
716 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
717 if ( sts & DMA_GSTS_TES )
718 break;
719 cpu_relax();
720 }
722 /* Disable PMRs when VT-d engine takes effect per spec definition */
723 disable_pmr(iommu);
724 spin_unlock_irqrestore(&iommu->register_lock, flags);
725 return 0;
726 }
728 int iommu_disable_translation(struct iommu *iommu)
729 {
730 u32 sts;
731 unsigned long flags;
733 spin_lock_irqsave(&iommu->register_lock, flags);
734 iommu->gcmd &= ~ DMA_GCMD_TE;
735 dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
737 /* Make sure hardware complete it */
738 for ( ; ; )
739 {
740 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
741 if ( !(sts & DMA_GSTS_TES) )
742 break;
743 cpu_relax();
744 }
745 spin_unlock_irqrestore(&iommu->register_lock, flags);
746 return 0;
747 }
749 static struct iommu *vector_to_iommu[NR_VECTORS];
750 static int iommu_page_fault_do_one(struct iommu *iommu, int type,
751 u8 fault_reason, u16 source_id, u32 addr)
752 {
753 dprintk(XENLOG_WARNING VTDPREFIX,
754 "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
755 (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
756 PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
757 fault_reason, iommu->reg);
759 if (fault_reason < 0x20)
760 print_vtd_entries(current->domain, iommu, (source_id >> 8),
761 (source_id & 0xff), (addr >> PAGE_SHIFT));
763 return 0;
764 }
766 static void iommu_fault_status(u32 fault_status)
767 {
768 if (fault_status & DMA_FSTS_PFO)
769 dprintk(XENLOG_ERR VTDPREFIX,
770 "iommu_fault_status: Fault Overflow\n");
771 else
772 if (fault_status & DMA_FSTS_PPF)
773 dprintk(XENLOG_ERR VTDPREFIX,
774 "iommu_fault_status: Primary Pending Fault\n");
775 else
776 if (fault_status & DMA_FSTS_AFO)
777 dprintk(XENLOG_ERR VTDPREFIX,
778 "iommu_fault_status: Advanced Fault Overflow\n");
779 else
780 if (fault_status & DMA_FSTS_APF)
781 dprintk(XENLOG_ERR VTDPREFIX,
782 "iommu_fault_status: Advanced Pending Fault\n");
783 else
784 if (fault_status & DMA_FSTS_IQE)
785 dprintk(XENLOG_ERR VTDPREFIX,
786 "iommu_fault_status: Invalidation Queue Error\n");
787 else
788 if (fault_status & DMA_FSTS_ICE)
789 dprintk(XENLOG_ERR VTDPREFIX,
790 "iommu_fault_status: Invalidation Completion Error\n");
791 else
792 if (fault_status & DMA_FSTS_ITE)
793 dprintk(XENLOG_ERR VTDPREFIX,
794 "iommu_fault_status: Invalidation Time-out Error\n");
795 }
797 #define PRIMARY_FAULT_REG_LEN (16)
798 static void iommu_page_fault(int vector, void *dev_id,
799 struct cpu_user_regs *regs)
800 {
801 struct iommu *iommu = dev_id;
802 int reg, fault_index;
803 u32 fault_status;
804 unsigned long flags;
806 dprintk(XENLOG_WARNING VTDPREFIX,
807 "iommu_page_fault: iommu->reg = %p\n", iommu->reg);
809 spin_lock_irqsave(&iommu->register_lock, flags);
810 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
811 spin_unlock_irqrestore(&iommu->register_lock, flags);
813 iommu_fault_status(fault_status);
815 /* FIXME: ignore advanced fault log */
816 if ( !(fault_status & DMA_FSTS_PPF) )
817 return;
818 fault_index = dma_fsts_fault_record_index(fault_status);
819 reg = cap_fault_reg_offset(iommu->cap);
820 for ( ; ; )
821 {
822 u8 fault_reason;
823 u16 source_id;
824 u32 guest_addr, data;
825 int type;
827 /* highest 32 bits */
828 spin_lock_irqsave(&iommu->register_lock, flags);
829 data = dmar_readl(iommu->reg, reg +
830 fault_index * PRIMARY_FAULT_REG_LEN + 12);
831 if ( !(data & DMA_FRCD_F) )
832 {
833 spin_unlock_irqrestore(&iommu->register_lock, flags);
834 break;
835 }
837 fault_reason = dma_frcd_fault_reason(data);
838 type = dma_frcd_type(data);
840 data = dmar_readl(iommu->reg, reg +
841 fault_index * PRIMARY_FAULT_REG_LEN + 8);
842 source_id = dma_frcd_source_id(data);
844 guest_addr = dmar_readq(iommu->reg, reg +
845 fault_index * PRIMARY_FAULT_REG_LEN);
846 guest_addr = dma_frcd_page_addr(guest_addr);
847 /* clear the fault */
848 dmar_writel(iommu->reg, reg +
849 fault_index * PRIMARY_FAULT_REG_LEN + 12, DMA_FRCD_F);
850 spin_unlock_irqrestore(&iommu->register_lock, flags);
852 iommu_page_fault_do_one(iommu, type, fault_reason,
853 source_id, guest_addr);
855 fault_index++;
856 if ( fault_index > cap_num_fault_regs(iommu->cap) )
857 fault_index = 0;
858 }
860 /* clear primary fault overflow */
861 if ( fault_status & DMA_FSTS_PFO )
862 {
863 spin_lock_irqsave(&iommu->register_lock, flags);
864 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_PFO);
865 spin_unlock_irqrestore(&iommu->register_lock, flags);
866 }
867 }
869 static void dma_msi_unmask(unsigned int vector)
870 {
871 struct iommu *iommu = vector_to_iommu[vector];
872 unsigned long flags;
874 /* unmask it */
875 spin_lock_irqsave(&iommu->register_lock, flags);
876 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
877 spin_unlock_irqrestore(&iommu->register_lock, flags);
878 }
880 static void dma_msi_mask(unsigned int vector)
881 {
882 unsigned long flags;
883 struct iommu *iommu = vector_to_iommu[vector];
885 /* mask it */
886 spin_lock_irqsave(&iommu->register_lock, flags);
887 dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
888 spin_unlock_irqrestore(&iommu->register_lock, flags);
889 }
891 static unsigned int dma_msi_startup(unsigned int vector)
892 {
893 dma_msi_unmask(vector);
894 return 0;
895 }
897 static void dma_msi_end(unsigned int vector)
898 {
899 dma_msi_unmask(vector);
900 ack_APIC_irq();
901 }
903 static void dma_msi_data_init(struct iommu *iommu, int vector)
904 {
905 u32 msi_data = 0;
906 unsigned long flags;
908 /* Fixed, edge, assert mode. Follow MSI setting */
909 msi_data |= vector & 0xff;
910 msi_data |= 1 << 14;
912 spin_lock_irqsave(&iommu->register_lock, flags);
913 dmar_writel(iommu->reg, DMAR_FEDATA_REG, msi_data);
914 spin_unlock_irqrestore(&iommu->register_lock, flags);
915 }
917 static void dma_msi_addr_init(struct iommu *iommu, int phy_cpu)
918 {
919 u64 msi_address;
920 unsigned long flags;
922 /* Physical, dedicated cpu. Follow MSI setting */
923 msi_address = (MSI_ADDRESS_HEADER << (MSI_ADDRESS_HEADER_SHIFT + 8));
924 msi_address |= MSI_PHYSICAL_MODE << 2;
925 msi_address |= MSI_REDIRECTION_HINT_MODE << 3;
926 msi_address |= phy_cpu << MSI_TARGET_CPU_SHIFT;
928 spin_lock_irqsave(&iommu->register_lock, flags);
929 dmar_writel(iommu->reg, DMAR_FEADDR_REG, (u32)msi_address);
930 dmar_writel(iommu->reg, DMAR_FEUADDR_REG, (u32)(msi_address >> 32));
931 spin_unlock_irqrestore(&iommu->register_lock, flags);
932 }
934 static void dma_msi_set_affinity(unsigned int vector, cpumask_t dest)
935 {
936 struct iommu *iommu = vector_to_iommu[vector];
937 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
938 }
940 static struct hw_interrupt_type dma_msi_type = {
941 .typename = "DMA_MSI",
942 .startup = dma_msi_startup,
943 .shutdown = dma_msi_mask,
944 .enable = dma_msi_unmask,
945 .disable = dma_msi_mask,
946 .ack = dma_msi_mask,
947 .end = dma_msi_end,
948 .set_affinity = dma_msi_set_affinity,
949 };
951 int iommu_set_interrupt(struct iommu *iommu)
952 {
953 int vector, ret;
955 vector = assign_irq_vector(AUTO_ASSIGN);
956 vector_to_iommu[vector] = iommu;
958 /* VT-d fault is a MSI, make irq == vector */
959 irq_vector[vector] = vector;
960 vector_irq[vector] = vector;
962 if ( !vector )
963 {
964 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
965 return -EINVAL;
966 }
968 irq_desc[vector].handler = &dma_msi_type;
969 ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
970 if ( ret )
971 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
972 return vector;
973 }
975 struct iommu *iommu_alloc(void *hw_data)
976 {
977 struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
978 struct iommu *iommu;
979 struct qi_ctrl *qi_ctrl;
980 struct ir_ctrl *ir_ctrl;
982 if ( nr_iommus > MAX_IOMMUS )
983 {
984 gdprintk(XENLOG_ERR VTDPREFIX,
985 "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
986 return NULL;
987 }
989 iommu = xmalloc(struct iommu);
990 if ( !iommu )
991 return NULL;
992 memset(iommu, 0, sizeof(struct iommu));
994 set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
995 iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
997 printk("iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
998 iommu->reg, drhd->address);
1000 nr_iommus++;
1002 if ( !iommu->reg )
1004 printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
1005 goto error;
1008 iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
1009 iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
1011 printk("iommu_alloc: cap = %"PRIx64"\n",iommu->cap);
1012 printk("iommu_alloc: ecap = %"PRIx64"\n", iommu->ecap);
1014 spin_lock_init(&iommu->lock);
1015 spin_lock_init(&iommu->register_lock);
1017 qi_ctrl = iommu_qi_ctrl(iommu);
1018 spin_lock_init(&qi_ctrl->qinval_lock);
1019 spin_lock_init(&qi_ctrl->qinval_poll_lock);
1021 ir_ctrl = iommu_ir_ctrl(iommu);
1022 spin_lock_init(&ir_ctrl->iremap_lock);
1024 drhd->iommu = iommu;
1025 return iommu;
1026 error:
1027 xfree(iommu);
1028 return NULL;
1031 static void free_iommu(struct iommu *iommu)
1033 if ( !iommu )
1034 return;
1035 if ( iommu->root_entry )
1036 free_xenheap_page((void *)iommu->root_entry);
1037 if ( iommu->reg )
1038 iounmap(iommu->reg);
1039 free_irq(iommu->vector);
1040 xfree(iommu);
1043 #define guestwidth_to_adjustwidth(gaw) ({ \
1044 int agaw, r = (gaw - 12) % 9; \
1045 agaw = (r == 0) ? gaw : (gaw + 9 - r); \
1046 if ( agaw > 64 ) \
1047 agaw = 64; \
1048 agaw; })
1050 int iommu_domain_init(struct domain *domain)
1052 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1053 struct iommu *iommu = NULL;
1054 int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
1055 int adjust_width, agaw;
1056 unsigned long sagaw;
1057 struct acpi_drhd_unit *drhd;
1059 spin_lock_init(&hd->mapping_lock);
1060 spin_lock_init(&hd->iommu_list_lock);
1061 INIT_LIST_HEAD(&hd->pdev_list);
1062 INIT_LIST_HEAD(&hd->g2m_ioport_list);
1064 if ( !vtd_enabled || list_empty(&acpi_drhd_units) )
1065 return 0;
1067 for_each_drhd_unit ( drhd )
1068 iommu = drhd->iommu ? : iommu_alloc(drhd);
1070 /* calculate AGAW */
1071 if (guest_width > cap_mgaw(iommu->cap))
1072 guest_width = cap_mgaw(iommu->cap);
1073 adjust_width = guestwidth_to_adjustwidth(guest_width);
1074 agaw = width_to_agaw(adjust_width);
1075 /* FIXME: hardware doesn't support it, choose a bigger one? */
1076 sagaw = cap_sagaw(iommu->cap);
1077 if ( !test_bit(agaw, &sagaw) )
1079 gdprintk(XENLOG_ERR VTDPREFIX,
1080 "IOMMU: hardware doesn't support the agaw\n");
1081 agaw = find_next_bit(&sagaw, 5, agaw);
1082 if ( agaw >= 5 )
1083 return -ENODEV;
1085 hd->agaw = agaw;
1086 return 0;
1089 static int domain_context_mapping_one(
1090 struct domain *domain,
1091 struct iommu *iommu,
1092 u8 bus, u8 devfn)
1094 struct hvm_iommu *hd = domain_hvm_iommu(domain);
1095 struct context_entry *context;
1096 unsigned long flags;
1097 int ret = 0;
1099 context = device_to_context_entry(iommu, bus, devfn);
1100 if ( !context )
1102 gdprintk(XENLOG_ERR VTDPREFIX,
1103 "domain_context_mapping_one:context == NULL:"
1104 "bdf = %x:%x:%x\n",
1105 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1106 return -ENOMEM;
1109 if ( context_present(*context) )
1111 gdprintk(XENLOG_WARNING VTDPREFIX,
1112 "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
1113 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1114 return 0;
1117 spin_lock_irqsave(&iommu->lock, flags);
1118 /*
1119 * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
1120 * be 1 based as required by intel's iommu hw.
1121 */
1122 context_set_domain_id(context, domain);
1123 context_set_address_width(*context, hd->agaw);
1125 if ( ecap_pass_thru(iommu->ecap) )
1126 context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
1127 #ifdef CONTEXT_PASSTHRU
1128 else
1130 #endif
1131 if ( !hd->pgd )
1133 struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
1134 if ( !pgd )
1136 spin_unlock_irqrestore(&hd->mapping_lock, flags);
1137 return -ENOMEM;
1139 memset(pgd, 0, PAGE_SIZE);
1140 hd->pgd = pgd;
1143 context_set_address_root(*context, virt_to_maddr(hd->pgd));
1144 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1145 #ifdef CONTEXT_PASSTHRU
1147 #endif
1149 context_set_fault_enable(*context);
1150 context_set_present(*context);
1151 iommu_flush_cache_entry(iommu, context);
1153 gdprintk(XENLOG_INFO VTDPREFIX,
1154 "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
1155 " hd->pgd=%p\n",
1156 bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1157 context->hi, context->lo, hd->pgd);
1159 if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
1160 (((u16)bus) << 8) | devfn,
1161 DMA_CCMD_MASK_NOBIT, 1) )
1162 iommu_flush_write_buffer(iommu);
1163 else
1164 iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
1165 spin_unlock_irqrestore(&iommu->lock, flags);
1166 return ret;
1169 static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
1171 u8 id;
1172 int ttl = 48;
1174 while ( ttl-- )
1176 pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
1177 if ( pos < 0x40 )
1178 break;
1180 pos &= ~3;
1181 id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1182 pos + PCI_CAP_LIST_ID);
1184 if ( id == 0xff )
1185 break;
1186 if ( id == cap )
1187 return pos;
1189 pos += PCI_CAP_LIST_NEXT;
1191 return 0;
1194 #define PCI_BASE_CLASS_BRIDGE 0x06
1195 #define PCI_CLASS_BRIDGE_PCI 0x0604
1197 #define DEV_TYPE_PCIe_ENDPOINT 1
1198 #define DEV_TYPE_PCI_BRIDGE 2
1199 #define DEV_TYPE_PCI 3
1201 int pdev_type(struct pci_dev *dev)
1203 u16 class_device;
1204 u16 status;
1206 class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
1207 PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
1208 if ( class_device == PCI_CLASS_BRIDGE_PCI )
1209 return DEV_TYPE_PCI_BRIDGE;
1211 status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
1212 PCI_FUNC(dev->devfn), PCI_STATUS);
1214 if ( !(status & PCI_STATUS_CAP_LIST) )
1215 return DEV_TYPE_PCI;
1217 if ( __pci_find_next_cap(dev->bus, dev->devfn,
1218 PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP) )
1219 return DEV_TYPE_PCIe_ENDPOINT;
1221 return DEV_TYPE_PCI;
1224 #define MAX_BUSES 256
1225 struct pci_dev bus2bridge[MAX_BUSES];
1227 static int domain_context_mapping(
1228 struct domain *domain,
1229 struct iommu *iommu,
1230 struct pci_dev *pdev)
1232 int ret = 0;
1233 int dev, func, sec_bus, sub_bus;
1234 u32 type;
1236 type = pdev_type(pdev);
1237 switch ( type )
1239 case DEV_TYPE_PCI_BRIDGE:
1240 sec_bus = read_pci_config_byte(
1241 pdev->bus, PCI_SLOT(pdev->devfn),
1242 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1244 if ( bus2bridge[sec_bus].bus == 0 )
1246 bus2bridge[sec_bus].bus = pdev->bus;
1247 bus2bridge[sec_bus].devfn = pdev->devfn;
1250 sub_bus = read_pci_config_byte(
1251 pdev->bus, PCI_SLOT(pdev->devfn),
1252 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1254 if ( sec_bus != sub_bus )
1255 gdprintk(XENLOG_WARNING VTDPREFIX,
1256 "context_context_mapping: nested PCI bridge not "
1257 "supported: bdf = %x:%x:%x sec_bus = %x sub_bus = %x\n",
1258 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1259 sec_bus, sub_bus);
1260 break;
1261 case DEV_TYPE_PCIe_ENDPOINT:
1262 gdprintk(XENLOG_INFO VTDPREFIX,
1263 "domain_context_mapping:PCIe : bdf = %x:%x:%x\n",
1264 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1265 ret = domain_context_mapping_one(domain, iommu,
1266 (u8)(pdev->bus), (u8)(pdev->devfn));
1267 break;
1268 case DEV_TYPE_PCI:
1269 gdprintk(XENLOG_INFO VTDPREFIX,
1270 "domain_context_mapping:PCI: bdf = %x:%x:%x\n",
1271 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1273 if ( pdev->bus == 0 )
1274 ret = domain_context_mapping_one(
1275 domain, iommu, (u8)(pdev->bus), (u8)(pdev->devfn));
1276 else
1278 if ( bus2bridge[pdev->bus].bus != 0 )
1279 gdprintk(XENLOG_WARNING VTDPREFIX,
1280 "domain_context_mapping:bus2bridge"
1281 "[%d].bus != 0\n", pdev->bus);
1283 ret = domain_context_mapping_one(
1284 domain, iommu,
1285 (u8)(bus2bridge[pdev->bus].bus),
1286 (u8)(bus2bridge[pdev->bus].devfn));
1288 /* now map everything behind the PCI bridge */
1289 for ( dev = 0; dev < 32; dev++ )
1291 for ( func = 0; func < 8; func++ )
1293 ret = domain_context_mapping_one(
1294 domain, iommu,
1295 pdev->bus, (u8)PCI_DEVFN(dev, func));
1296 if ( ret )
1297 return ret;
1301 break;
1302 default:
1303 gdprintk(XENLOG_ERR VTDPREFIX,
1304 "domain_context_mapping:unknown type : bdf = %x:%x:%x\n",
1305 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1306 ret = -EINVAL;
1307 break;
1310 return ret;
1313 static int domain_context_unmap_one(
1314 struct domain *domain,
1315 struct iommu *iommu,
1316 u8 bus, u8 devfn)
1318 struct context_entry *context;
1319 unsigned long flags;
1321 context = device_to_context_entry(iommu, bus, devfn);
1322 if ( !context )
1324 gdprintk(XENLOG_ERR VTDPREFIX,
1325 "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
1326 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1327 return -ENOMEM;
1330 if ( !context_present(*context) )
1332 gdprintk(XENLOG_WARNING VTDPREFIX,
1333 "domain_context_unmap_one-%x:%x:%x- "
1334 "context NOT present:return\n",
1335 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1336 return 0;
1339 gdprintk(XENLOG_INFO VTDPREFIX,
1340 "domain_context_unmap_one: bdf = %x:%x:%x\n",
1341 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1343 spin_lock_irqsave(&iommu->lock, flags);
1344 context_clear_present(*context);
1345 context_clear_entry(*context);
1346 iommu_flush_cache_entry(iommu, context);
1347 iommu_flush_context_global(iommu, 0);
1348 iommu_flush_iotlb_global(iommu, 0);
1349 spin_unlock_irqrestore(&iommu->lock, flags);
1351 return 0;
1354 static int domain_context_unmap(
1355 struct domain *domain,
1356 struct iommu *iommu,
1357 struct pci_dev *pdev)
1359 int ret = 0;
1360 int dev, func, sec_bus, sub_bus;
1361 u32 type;
1363 type = pdev_type(pdev);
1364 switch ( type )
1366 case DEV_TYPE_PCI_BRIDGE:
1367 sec_bus = read_pci_config_byte(
1368 pdev->bus, PCI_SLOT(pdev->devfn),
1369 PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
1370 sub_bus = read_pci_config_byte(
1371 pdev->bus, PCI_SLOT(pdev->devfn),
1372 PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
1374 gdprintk(XENLOG_INFO VTDPREFIX,
1375 "domain_context_unmap:BRIDGE:%x:%x:%x "
1376 "sec_bus=%x sub_bus=%x\n",
1377 pdev->bus, PCI_SLOT(pdev->devfn),
1378 PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
1379 break;
1380 case DEV_TYPE_PCIe_ENDPOINT:
1381 gdprintk(XENLOG_INFO VTDPREFIX,
1382 "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
1383 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1384 ret = domain_context_unmap_one(domain, iommu,
1385 (u8)(pdev->bus), (u8)(pdev->devfn));
1386 break;
1387 case DEV_TYPE_PCI:
1388 gdprintk(XENLOG_INFO VTDPREFIX,
1389 "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
1390 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1391 if ( pdev->bus == 0 )
1392 ret = domain_context_unmap_one(
1393 domain, iommu,
1394 (u8)(pdev->bus), (u8)(pdev->devfn));
1395 else
1397 if ( bus2bridge[pdev->bus].bus != 0 )
1398 gdprintk(XENLOG_WARNING VTDPREFIX,
1399 "domain_context_unmap:"
1400 "bus2bridge[%d].bus != 0\n", pdev->bus);
1402 ret = domain_context_unmap_one(domain, iommu,
1403 (u8)(bus2bridge[pdev->bus].bus),
1404 (u8)(bus2bridge[pdev->bus].devfn));
1406 /* Unmap everything behind the PCI bridge */
1407 for ( dev = 0; dev < 32; dev++ )
1409 for ( func = 0; func < 8; func++ )
1411 ret = domain_context_unmap_one(
1412 domain, iommu,
1413 pdev->bus, (u8)PCI_DEVFN(dev, func));
1414 if ( ret )
1415 return ret;
1419 break;
1420 default:
1421 gdprintk(XENLOG_ERR VTDPREFIX,
1422 "domain_context_unmap:unknown type: bdf = %x:%x:%x\n",
1423 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1424 ret = -EINVAL;
1425 break;
1428 return ret;
1431 void reassign_device_ownership(
1432 struct domain *source,
1433 struct domain *target,
1434 u8 bus, u8 devfn)
1436 struct hvm_iommu *source_hd = domain_hvm_iommu(source);
1437 struct hvm_iommu *target_hd = domain_hvm_iommu(target);
1438 struct pci_dev *pdev;
1439 struct acpi_drhd_unit *drhd;
1440 struct iommu *iommu;
1441 int status;
1442 unsigned long flags;
1444 gdprintk(XENLOG_INFO VTDPREFIX,
1445 "reassign_device-%x:%x:%x- source = %d target = %d\n",
1446 bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1447 source->domain_id, target->domain_id);
1449 for_each_pdev( source, pdev )
1451 if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
1452 continue;
1454 drhd = acpi_find_matched_drhd_unit(pdev);
1455 iommu = drhd->iommu;
1456 domain_context_unmap(source, iommu, pdev);
1458 /* Move pci device from the source domain to target domain. */
1459 spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
1460 spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
1461 list_move(&pdev->list, &target_hd->pdev_list);
1462 spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
1463 spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
1465 status = domain_context_mapping(target, iommu, pdev);
1466 if ( status != 0 )
1467 gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
1469 break;
1473 void return_devices_to_dom0(struct domain *d)
1475 struct hvm_iommu *hd = domain_hvm_iommu(d);
1476 struct pci_dev *pdev;
1478 while ( !list_empty(&hd->pdev_list) )
1480 pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
1481 dprintk(XENLOG_INFO VTDPREFIX,
1482 "return_devices_to_dom0: bdf = %x:%x:%x\n",
1483 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1484 pdev_flr(pdev->bus, pdev->devfn);
1485 reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
1488 #ifdef VTD_DEBUG
1489 for_each_pdev ( dom0, pdev )
1490 dprintk(XENLOG_INFO VTDPREFIX,
1491 "return_devices_to_dom0:%x: bdf = %x:%x:%x\n",
1492 dom0->domain_id, pdev->bus,
1493 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1494 #endif
1497 void iommu_domain_teardown(struct domain *d)
1499 if ( list_empty(&acpi_drhd_units) )
1500 return;
1502 iommu_domid_release(d);
1504 #if CONFIG_PAGING_LEVELS == 3
1506 struct hvm_iommu *hd = domain_hvm_iommu(d);
1507 int level = agaw_to_level(hd->agaw);
1508 struct dma_pte *pgd = NULL;
1510 switch ( level )
1512 case VTD_PAGE_TABLE_LEVEL_3:
1513 if ( hd->pgd )
1514 free_xenheap_page((void *)hd->pgd);
1515 break;
1516 case VTD_PAGE_TABLE_LEVEL_4:
1517 if ( hd->pgd )
1519 pgd = hd->pgd;
1520 if ( pgd[0].val != 0 )
1521 free_xenheap_page((void*)maddr_to_virt(
1522 dma_pte_addr(pgd[0])));
1523 free_xenheap_page((void *)hd->pgd);
1525 break;
1526 default:
1527 gdprintk(XENLOG_ERR VTDPREFIX,
1528 "Unsupported p2m table sharing level!\n");
1529 break;
1532 #endif
1533 return_devices_to_dom0(d);
1536 static int domain_context_mapped(struct pci_dev *pdev)
1538 struct acpi_drhd_unit *drhd;
1539 struct iommu *iommu;
1540 int ret;
1542 for_each_drhd_unit ( drhd )
1544 iommu = drhd->iommu;
1545 ret = device_context_mapped(iommu, pdev->bus, pdev->devfn);
1546 if ( ret )
1547 return ret;
1550 return 0;
1553 int iommu_map_page(struct domain *d, paddr_t gfn, paddr_t mfn)
1555 struct acpi_drhd_unit *drhd;
1556 struct iommu *iommu;
1557 struct dma_pte *pte = NULL;
1558 struct page_info *pg = NULL;
1560 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1561 iommu = drhd->iommu;
1563 #ifdef CONTEXT_PASSTHRU
1564 /* do nothing if dom0 and iommu supports pass thru */
1565 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1566 return 0;
1567 #endif
1569 pg = addr_to_dma_page(d, gfn << PAGE_SHIFT_4K);
1570 if ( !pg )
1571 return -ENOMEM;
1572 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
1573 pte += gfn & LEVEL_MASK;
1574 dma_set_pte_addr(*pte, mfn << PAGE_SHIFT_4K);
1575 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
1576 iommu_flush_cache_entry(iommu, pte);
1577 unmap_domain_page(pte);
1579 for_each_drhd_unit ( drhd )
1581 iommu = drhd->iommu;
1582 if ( cap_caching_mode(iommu->cap) )
1583 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1584 gfn << PAGE_SHIFT_4K, 1, 0);
1585 else if ( cap_rwbf(iommu->cap) )
1586 iommu_flush_write_buffer(iommu);
1589 return 0;
1592 int iommu_unmap_page(struct domain *d, dma_addr_t gfn)
1594 struct acpi_drhd_unit *drhd;
1595 struct iommu *iommu;
1597 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1598 iommu = drhd->iommu;
1600 #ifdef CONTEXT_PASSTHRU
1601 /* do nothing if dom0 and iommu supports pass thru */
1602 if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
1603 return 0;
1604 #endif
1606 dma_pte_clear_one(d, gfn << PAGE_SHIFT_4K);
1608 return 0;
1611 int iommu_page_mapping(struct domain *domain, dma_addr_t iova,
1612 void *hpa, size_t size, int prot)
1614 struct acpi_drhd_unit *drhd;
1615 struct iommu *iommu;
1616 unsigned long start_pfn, end_pfn;
1617 struct dma_pte *pte = NULL;
1618 int index;
1619 struct page_info *pg = NULL;
1621 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1622 iommu = drhd->iommu;
1623 if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
1624 return -EINVAL;
1625 iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
1626 start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
1627 end_pfn = (unsigned long)
1628 ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
1629 index = 0;
1630 while ( start_pfn < end_pfn )
1632 pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
1633 if ( !pg )
1634 return -ENOMEM;
1635 pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
1636 pte += start_pfn & LEVEL_MASK;
1637 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1638 dma_set_pte_prot(*pte, prot);
1639 iommu_flush_cache_entry(iommu, pte);
1640 unmap_domain_page(pte);
1641 start_pfn++;
1642 index++;
1645 for_each_drhd_unit ( drhd )
1647 iommu = drhd->iommu;
1648 if ( cap_caching_mode(iommu->cap) )
1649 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
1650 iova, index, 0);
1651 else if ( cap_rwbf(iommu->cap) )
1652 iommu_flush_write_buffer(iommu);
1655 return 0;
1658 int iommu_page_unmapping(struct domain *domain, dma_addr_t addr, size_t size)
1660 dma_pte_clear_range(domain, addr, addr + size);
1662 return 0;
1665 void iommu_flush(struct domain *d, dma_addr_t gfn, u64 *p2m_entry)
1667 struct acpi_drhd_unit *drhd;
1668 struct iommu *iommu = NULL;
1669 struct dma_pte *pte = (struct dma_pte *) p2m_entry;
1671 for_each_drhd_unit ( drhd )
1673 iommu = drhd->iommu;
1674 if ( cap_caching_mode(iommu->cap) )
1675 iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
1676 gfn << PAGE_SHIFT_4K, 1, 0);
1677 else if ( cap_rwbf(iommu->cap) )
1678 iommu_flush_write_buffer(iommu);
1681 iommu_flush_cache_entry(iommu, pte);
1684 static int iommu_prepare_rmrr_dev(
1685 struct domain *d,
1686 struct acpi_rmrr_unit *rmrr,
1687 struct pci_dev *pdev)
1689 struct acpi_drhd_unit *drhd;
1690 unsigned long size;
1691 int ret;
1693 /* page table init */
1694 size = rmrr->end_address - rmrr->base_address + 1;
1695 ret = iommu_page_mapping(d, rmrr->base_address,
1696 (void *)rmrr->base_address, size,
1697 DMA_PTE_READ|DMA_PTE_WRITE);
1698 if ( ret )
1699 return ret;
1701 if ( domain_context_mapped(pdev) == 0 )
1703 drhd = acpi_find_matched_drhd_unit(pdev);
1704 ret = domain_context_mapping(d, drhd->iommu, pdev);
1705 if ( !ret )
1706 return 0;
1709 return ret;
1712 void __init setup_dom0_devices(void)
1714 struct hvm_iommu *hd = domain_hvm_iommu(dom0);
1715 struct acpi_drhd_unit *drhd;
1716 struct pci_dev *pdev;
1717 int bus, dev, func, ret;
1718 u32 l;
1720 #ifdef DEBUG_VTD_CONTEXT_ENTRY
1721 for ( bus = 0; bus < 256; bus++ )
1723 for ( dev = 0; dev < 32; dev++ )
1725 for ( func = 0; func < 8; func++ )
1727 struct context_entry *context;
1728 struct pci_dev device;
1730 device.bus = bus;
1731 device.devfn = PCI_DEVFN(dev, func);
1732 drhd = acpi_find_matched_drhd_unit(&device);
1733 context = device_to_context_entry(drhd->iommu,
1734 bus, PCI_DEVFN(dev, func));
1735 if ( (context->lo != 0) || (context->hi != 0) )
1736 dprintk(XENLOG_INFO VTDPREFIX,
1737 "setup_dom0_devices-%x:%x:%x- context not 0\n",
1738 bus, dev, func);
1742 #endif
1744 for ( bus = 0; bus < 256; bus++ )
1746 for ( dev = 0; dev < 32; dev++ )
1748 for ( func = 0; func < 8; func++ )
1750 l = read_pci_config(bus, dev, func, PCI_VENDOR_ID);
1751 /* some broken boards return 0 or ~0 if a slot is empty: */
1752 if ( (l == 0xffffffff) || (l == 0x00000000) ||
1753 (l == 0x0000ffff) || (l == 0xffff0000) )
1754 continue;
1755 pdev = xmalloc(struct pci_dev);
1756 pdev->bus = bus;
1757 pdev->devfn = PCI_DEVFN(dev, func);
1758 list_add_tail(&pdev->list, &hd->pdev_list);
1760 drhd = acpi_find_matched_drhd_unit(pdev);
1761 ret = domain_context_mapping(dom0, drhd->iommu, pdev);
1762 if ( ret != 0 )
1763 gdprintk(XENLOG_ERR VTDPREFIX,
1764 "domain_context_mapping failed\n");
1769 for_each_pdev ( dom0, pdev )
1770 dprintk(XENLOG_INFO VTDPREFIX,
1771 "setup_dom0_devices: bdf = %x:%x:%x\n",
1772 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1775 void clear_fault_bits(struct iommu *iommu)
1777 u64 val;
1779 val = dmar_readq(
1780 iommu->reg,
1781 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+0x8);
1782 dmar_writeq(
1783 iommu->reg,
1784 cap_fault_reg_offset(dmar_readq(iommu->reg,DMAR_CAP_REG))+8,
1785 val);
1786 dmar_writel(iommu->reg, DMAR_FSTS_REG, DMA_FSTS_FAULTS);
1789 static int init_vtd_hw(void)
1791 struct acpi_drhd_unit *drhd;
1792 struct iommu *iommu;
1793 struct iommu_flush *flush = NULL;
1794 int vector;
1795 int ret;
1797 for_each_drhd_unit ( drhd )
1799 iommu = drhd->iommu;
1800 ret = iommu_set_root_entry(iommu);
1801 if ( ret )
1803 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: set root entry failed\n");
1804 return -EIO;
1807 vector = iommu_set_interrupt(iommu);
1808 dma_msi_data_init(iommu, vector);
1809 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
1810 iommu->vector = vector;
1811 clear_fault_bits(iommu);
1812 dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
1814 /* initialize flush functions */
1815 flush = iommu_get_flush(iommu);
1816 flush->context = flush_context_reg;
1817 flush->iotlb = flush_iotlb_reg;
1819 if ( qinval_setup(iommu) != 0);
1820 dprintk(XENLOG_ERR VTDPREFIX,
1821 "Queued Invalidation hardware not found\n");
1823 return 0;
1826 static int enable_vtd_translation(void)
1828 struct acpi_drhd_unit *drhd;
1829 struct iommu *iommu;
1831 for_each_drhd_unit ( drhd )
1833 iommu = drhd->iommu;
1834 if ( iommu_enable_translation(iommu) )
1835 return -EIO;
1837 return 0;
1840 static void setup_dom0_rmrr(void)
1842 struct acpi_rmrr_unit *rmrr;
1843 struct pci_dev *pdev;
1844 int ret;
1846 for_each_rmrr_device ( rmrr, pdev )
1847 ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
1848 if ( ret )
1849 gdprintk(XENLOG_ERR VTDPREFIX,
1850 "IOMMU: mapping reserved region failed\n");
1851 end_for_each_rmrr_device ( rmrr, pdev )
1854 int iommu_setup(void)
1856 struct hvm_iommu *hd = domain_hvm_iommu(dom0);
1857 struct acpi_drhd_unit *drhd;
1858 struct iommu *iommu;
1859 unsigned long i;
1861 if ( !vtd_enabled )
1862 return 0;
1864 spin_lock_init(&domid_bitmap_lock);
1865 INIT_LIST_HEAD(&hd->pdev_list);
1867 /* setup clflush size */
1868 x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
1870 /* Allocate IO page directory page for the domain. */
1871 drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
1872 iommu = drhd->iommu;
1874 /* Allocate domain id bitmap, and set bit 0 as reserved */
1875 domid_bitmap_size = cap_ndoms(iommu->cap);
1876 domid_bitmap = xmalloc_bytes(domid_bitmap_size / 8);
1877 if ( domid_bitmap == NULL )
1878 goto error;
1879 memset(domid_bitmap, 0, domid_bitmap_size / 8);
1880 set_bit(0, domid_bitmap);
1882 /* setup 1:1 page table for dom0 */
1883 for ( i = 0; i < max_page; i++ )
1884 iommu_map_page(dom0, i, i);
1886 enable_vtd_translation();
1887 if ( init_vtd_hw() )
1888 goto error;
1889 setup_dom0_devices();
1890 setup_dom0_rmrr();
1891 iommu_flush_all();
1893 return 0;
1895 error:
1896 printk("iommu_setup() failed\n");
1897 for_each_drhd_unit ( drhd )
1899 iommu = drhd->iommu;
1900 free_iommu(iommu);
1902 return -EIO;
1905 /*
1906 * If the device isn't owned by dom0, it means it already
1907 * has been assigned to other domain, or it's not exist.
1908 */
1909 int device_assigned(u8 bus, u8 devfn)
1911 struct pci_dev *pdev;
1913 for_each_pdev( dom0, pdev )
1914 if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
1915 return 0;
1917 return 1;
1920 int assign_device(struct domain *d, u8 bus, u8 devfn)
1922 struct acpi_rmrr_unit *rmrr;
1923 struct pci_dev *pdev;
1924 int ret = 0;
1926 if ( list_empty(&acpi_drhd_units) )
1927 return ret;
1929 gdprintk(XENLOG_INFO VTDPREFIX,
1930 "assign_device: bus = %x dev = %x func = %x\n",
1931 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1933 pdev_flr(bus, devfn);
1934 reassign_device_ownership(dom0, d, bus, devfn);
1936 /* Setup rmrr identify mapping */
1937 for_each_rmrr_device( rmrr, pdev )
1938 if ( pdev->bus == bus && pdev->devfn == devfn )
1940 ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
1941 if ( ret )
1943 gdprintk(XENLOG_ERR VTDPREFIX,
1944 "IOMMU: mapping reserved region failed\n");
1945 return ret;
1948 end_for_each_rmrr_device(rmrr, pdev)
1950 return ret;
1953 void iommu_set_pgd(struct domain *d)
1955 struct hvm_iommu *hd = domain_hvm_iommu(d);
1956 unsigned long p2m_table;
1958 if ( hd->pgd )
1960 gdprintk(XENLOG_INFO VTDPREFIX,
1961 "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
1962 hd->pgd = NULL;
1964 p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
1966 #if CONFIG_PAGING_LEVELS == 3
1967 if ( !hd->pgd )
1969 int level = agaw_to_level(hd->agaw);
1970 struct dma_pte *pmd = NULL;
1971 struct dma_pte *pgd = NULL;
1972 struct dma_pte *pte = NULL;
1973 l3_pgentry_t *l3e;
1974 unsigned long flags;
1975 int i;
1977 spin_lock_irqsave(&hd->mapping_lock, flags);
1978 if ( !hd->pgd )
1980 pgd = (struct dma_pte *)alloc_xenheap_page();
1981 if ( !pgd )
1983 spin_unlock_irqrestore(&hd->mapping_lock, flags);
1984 gdprintk(XENLOG_ERR VTDPREFIX,
1985 "Allocate pgd memory failed!\n");
1986 return;
1988 memset(pgd, 0, PAGE_SIZE);
1989 hd->pgd = pgd;
1992 l3e = map_domain_page(p2m_table);
1993 switch ( level )
1995 case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */
1996 /* We only support 8 entries for the PAE L3 p2m table */
1997 for ( i = 0; i < 8 ; i++ )
1999 /* Don't create new L2 entry, use ones from p2m table */
2000 pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
2002 break;
2004 case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */
2005 /* We allocate one more page for the top vtd page table. */
2006 pmd = (struct dma_pte *)alloc_xenheap_page();
2007 if ( !pmd )
2009 unmap_domain_page(l3e);
2010 spin_unlock_irqrestore(&hd->mapping_lock, flags);
2011 gdprintk(XENLOG_ERR VTDPREFIX,
2012 "Allocate pmd memory failed!\n");
2013 return;
2015 memset((u8*)pmd, 0, PAGE_SIZE);
2016 pte = &pgd[0];
2017 dma_set_pte_addr(*pte, virt_to_maddr(pmd));
2018 dma_set_pte_readable(*pte);
2019 dma_set_pte_writable(*pte);
2021 for ( i = 0; i < 8; i++ )
2023 /* Don't create new L2 entry, use ones from p2m table */
2024 pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
2026 break;
2027 default:
2028 gdprintk(XENLOG_ERR VTDPREFIX,
2029 "iommu_set_pgd:Unsupported p2m table sharing level!\n");
2030 break;
2032 unmap_domain_page(l3e);
2033 spin_unlock_irqrestore(&hd->mapping_lock, flags);
2035 #elif CONFIG_PAGING_LEVELS == 4
2036 if ( !hd->pgd )
2038 int level = agaw_to_level(hd->agaw);
2039 l3_pgentry_t *l3e;
2040 mfn_t pgd_mfn;
2042 switch ( level )
2044 case VTD_PAGE_TABLE_LEVEL_3:
2045 l3e = map_domain_page(p2m_table);
2046 if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
2048 gdprintk(XENLOG_ERR VTDPREFIX,
2049 "iommu_set_pgd: second level wasn't there\n");
2050 unmap_domain_page(l3e);
2051 return;
2053 pgd_mfn = _mfn(l3e_get_pfn(*l3e));
2054 unmap_domain_page(l3e);
2055 hd->pgd = maddr_to_virt(pagetable_get_paddr(
2056 pagetable_from_mfn(pgd_mfn)));
2057 break;
2059 case VTD_PAGE_TABLE_LEVEL_4:
2060 pgd_mfn = _mfn(p2m_table);
2061 hd->pgd = maddr_to_virt(pagetable_get_paddr(
2062 pagetable_from_mfn(pgd_mfn)));
2063 break;
2064 default:
2065 gdprintk(XENLOG_ERR VTDPREFIX,
2066 "iommu_set_pgd:Unsupported p2m table sharing level!\n");
2067 break;
2070 #endif
2071 gdprintk(XENLOG_INFO VTDPREFIX,
2072 "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
2076 u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
2077 int iommu_suspend(void)
2079 struct acpi_drhd_unit *drhd;
2080 struct iommu *iommu;
2081 int i = 0;
2083 iommu_flush_all();
2085 for_each_drhd_unit ( drhd )
2087 iommu = drhd->iommu;
2088 iommu_state[DMAR_RTADDR_REG * i] =
2089 (u64) dmar_readq(iommu->reg, DMAR_RTADDR_REG);
2090 iommu_state[DMAR_FECTL_REG * i] =
2091 (u32) dmar_readl(iommu->reg, DMAR_FECTL_REG);
2092 iommu_state[DMAR_FEDATA_REG * i] =
2093 (u32) dmar_readl(iommu->reg, DMAR_FEDATA_REG);
2094 iommu_state[DMAR_FEADDR_REG * i] =
2095 (u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
2096 iommu_state[DMAR_FEUADDR_REG * i] =
2097 (u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
2098 iommu_state[DMAR_PLMBASE_REG * i] =
2099 (u32) dmar_readl(iommu->reg, DMAR_PLMBASE_REG);
2100 iommu_state[DMAR_PLMLIMIT_REG * i] =
2101 (u32) dmar_readl(iommu->reg, DMAR_PLMLIMIT_REG);
2102 iommu_state[DMAR_PHMBASE_REG * i] =
2103 (u64) dmar_readq(iommu->reg, DMAR_PHMBASE_REG);
2104 iommu_state[DMAR_PHMLIMIT_REG * i] =
2105 (u64) dmar_readq(iommu->reg, DMAR_PHMLIMIT_REG);
2106 i++;
2109 return 0;
2112 int iommu_resume(void)
2114 struct acpi_drhd_unit *drhd;
2115 struct iommu *iommu;
2116 int i = 0;
2118 iommu_flush_all();
2120 init_vtd_hw();
2121 for_each_drhd_unit ( drhd )
2123 iommu = drhd->iommu;
2124 dmar_writeq( iommu->reg, DMAR_RTADDR_REG,
2125 (u64) iommu_state[DMAR_RTADDR_REG * i]);
2126 dmar_writel(iommu->reg, DMAR_FECTL_REG,
2127 (u32) iommu_state[DMAR_FECTL_REG * i]);
2128 dmar_writel(iommu->reg, DMAR_FEDATA_REG,
2129 (u32) iommu_state[DMAR_FEDATA_REG * i]);
2130 dmar_writel(iommu->reg, DMAR_FEADDR_REG,
2131 (u32) iommu_state[DMAR_FEADDR_REG * i]);
2132 dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
2133 (u32) iommu_state[DMAR_FEUADDR_REG * i]);
2134 dmar_writel(iommu->reg, DMAR_PLMBASE_REG,
2135 (u32) iommu_state[DMAR_PLMBASE_REG * i]);
2136 dmar_writel(iommu->reg, DMAR_PLMLIMIT_REG,
2137 (u32) iommu_state[DMAR_PLMLIMIT_REG * i]);
2138 dmar_writeq(iommu->reg, DMAR_PHMBASE_REG,
2139 (u64) iommu_state[DMAR_PHMBASE_REG * i]);
2140 dmar_writeq(iommu->reg, DMAR_PHMLIMIT_REG,
2141 (u64) iommu_state[DMAR_PHMLIMIT_REG * i]);
2143 if ( iommu_enable_translation(iommu) )
2144 return -EIO;
2145 i++;
2147 return 0;
2150 /*
2151 * Local variables:
2152 * mode: C
2153 * c-set-style: "BSD"
2154 * c-basic-offset: 4
2155 * tab-width: 4
2156 * indent-tabs-mode: nil
2157 * End:
2158 */