ia64/xen-unstable

view xen/arch/x86/hvm/vmx/vtd/utils.c @ 16875:74a9bfccddba

vt-d: Do FLR of assigned devices with VT-d

Currently there is a pdev_flr() function to do FLR before device
assignment in qemu, but most of devices don't have FLR capability.
What's more, should do FLR before assignment and deassignment for
keeping correct device status. If the device doesn't have FLR
capablility, this patch implemented to enter D3hot and return to D0 to
do FLR. And exposed pdev_flr() in VT-d utils, then it can be invoked
by assignment and deassignment functions.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Anthony Xu <anthony.xu@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jan 24 14:39:38 2008 +0000 (2008-01-24)
parents 2633dc4f55d4
children
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Allen Kay <allen.m.kay@intel.com>
18 */
20 #include <xen/init.h>
21 #include <xen/bitmap.h>
22 #include <xen/irq.h>
23 #include <xen/spinlock.h>
24 #include <xen/sched.h>
25 #include <xen/delay.h>
26 #include <asm/iommu.h>
27 #include <asm/hvm/vmx/intel-iommu.h>
28 #include "dmar.h"
29 #include "pci-direct.h"
30 #include "pci_regs.h"
31 #include "msi.h"
33 #include <xen/mm.h>
34 #include <xen/xmalloc.h>
35 #include <xen/inttypes.h>
37 #define INTEL 0x8086
38 #define SEABURG 0x4000
39 #define C_STEP 2
41 int vtd_hw_check(void)
42 {
43 u16 vendor, device;
44 u8 revision, stepping;
46 vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID);
47 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
48 revision = read_pci_config_byte(0, 0, 0, PCI_REVISION_ID);
49 stepping = revision & 0xf;
51 if ( (vendor == INTEL) && (device == SEABURG) )
52 {
53 if ( stepping < C_STEP )
54 {
55 dprintk(XENLOG_WARNING VTDPREFIX,
56 "*** VT-d disabled - pre C0-step Seaburg found\n");
57 dprintk(XENLOG_WARNING VTDPREFIX,
58 "*** vendor = %x device = %x revision = %x\n",
59 vendor, device, revision);
60 vtd_enabled = 0;
61 return -ENODEV;
62 }
63 }
64 return 0;
65 }
67 /* Disable vt-d protected memory registers. */
68 void disable_pmr(struct iommu *iommu)
69 {
70 unsigned long start_time;
71 unsigned int val;
73 val = dmar_readl(iommu->reg, DMAR_PMEN_REG);
74 if ( !(val & DMA_PMEN_PRS) )
75 return;
77 dmar_writel(iommu->reg, DMAR_PMEN_REG, val & ~DMA_PMEN_EPM);
78 start_time = jiffies;
80 for ( ; ; )
81 {
82 val = dmar_readl(iommu->reg, DMAR_PMEN_REG);
83 if ( (val & DMA_PMEN_PRS) == 0 )
84 break;
86 if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
87 panic("Disable PMRs timeout\n");
89 cpu_relax();
90 }
92 dprintk(XENLOG_INFO VTDPREFIX,
93 "Disabled protected memory registers\n");
94 }
96 static u8 find_cap_offset(u8 bus, u8 dev, u8 func, u8 cap)
97 {
98 u8 id;
99 int max_cap = 48;
100 u8 pos = PCI_CAPABILITY_LIST;
101 u16 status;
103 status = read_pci_config_16(bus, dev, func, PCI_STATUS);
104 if ( (status & PCI_STATUS_CAP_LIST) == 0 )
105 return 0;
107 while ( max_cap-- )
108 {
109 pos = read_pci_config_byte(bus, dev, func, pos);
110 if ( pos < 0x40 )
111 break;
113 pos &= ~3;
114 id = read_pci_config_byte(bus, dev, func, pos + PCI_CAP_LIST_ID);
116 if ( id == 0xff )
117 break;
118 else if ( id == cap )
119 return pos;
121 pos += PCI_CAP_LIST_NEXT;
122 }
124 return 0;
125 }
127 #define PCI_D3hot (3)
128 #define PCI_CONFIG_DWORD_SIZE (64)
129 #define PCI_EXP_DEVCAP_FLR (1 << 28)
130 #define PCI_EXP_DEVCTL_FLR (1 << 15)
132 void pdev_flr(u8 bus, u8 devfn)
133 {
134 u8 pos;
135 u32 dev_cap, dev_status, pm_ctl;
136 int flr = 0;
137 u8 dev = PCI_SLOT(devfn);
138 u8 func = PCI_FUNC(devfn);
140 pos = find_cap_offset(bus, dev, func, PCI_CAP_ID_EXP);
141 if ( pos != 0 )
142 {
143 dev_cap = read_pci_config(bus, dev, func, pos + PCI_EXP_DEVCAP);
144 if ( dev_cap & PCI_EXP_DEVCAP_FLR )
145 {
146 write_pci_config(bus, dev, func,
147 pos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_FLR);
148 do {
149 dev_status = read_pci_config(bus, dev, func,
150 pos + PCI_EXP_DEVSTA);
151 } while ( dev_status & PCI_EXP_DEVSTA_TRPND );
153 flr = 1;
154 }
155 }
157 /* If this device doesn't support function level reset,
158 * program device from D0 t0 D3hot, and then return to D0
159 * to implement function level reset
160 */
161 if ( flr == 0 )
162 {
163 pos = find_cap_offset(bus, dev, func, PCI_CAP_ID_PM);
164 if ( pos != 0 )
165 {
166 int i;
167 u32 config[PCI_CONFIG_DWORD_SIZE];
168 for ( i = 0; i < PCI_CONFIG_DWORD_SIZE; i++ )
169 config[i] = read_pci_config(bus, dev, func, i*4);
171 /* Enter D3hot without soft reset */
172 pm_ctl = read_pci_config(bus, dev, func, pos + PCI_PM_CTRL);
173 pm_ctl |= PCI_PM_CTRL_NO_SOFT_RESET;
174 pm_ctl &= ~PCI_PM_CTRL_STATE_MASK;
175 pm_ctl |= PCI_D3hot;
176 write_pci_config(bus, dev, func, pos + PCI_PM_CTRL, pm_ctl);
177 mdelay(10);
179 /* From D3hot to D0 */
180 write_pci_config(bus, dev, func, pos + PCI_PM_CTRL, 0);
181 mdelay(10);
183 /* Write saved configurations to device */
184 for ( i = 0; i < PCI_CONFIG_DWORD_SIZE; i++ )
185 write_pci_config(bus, dev, func, i*4, config[i]);
187 flr = 1;
188 }
189 }
190 }
192 void print_iommu_regs(struct acpi_drhd_unit *drhd)
193 {
194 struct iommu *iommu = drhd->iommu;
196 printk("---- print_iommu_regs ----\n");
197 printk("print_iommu_regs: drhd->address = %lx\n", drhd->address);
198 printk("print_iommu_regs: DMAR_VER_REG = %x\n",
199 dmar_readl(iommu->reg,DMAR_VER_REG));
200 printk("print_iommu_regs: DMAR_CAP_REG = %"PRIx64"\n",
201 dmar_readq(iommu->reg,DMAR_CAP_REG));
202 printk("print_iommu_regs: n_fault_reg = %"PRIx64"\n",
203 cap_num_fault_regs(dmar_readq(iommu->reg, DMAR_CAP_REG)));
204 printk("print_iommu_regs: fault_recording_offset_l = %"PRIx64"\n",
205 cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)));
206 printk("print_iommu_regs: fault_recording_offset_h = %"PRIx64"\n",
207 cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8);
208 printk("print_iommu_regs: fault_recording_reg_l = %"PRIx64"\n",
209 dmar_readq(iommu->reg,
210 cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG))));
211 printk("print_iommu_regs: fault_recording_reg_h = %"PRIx64"\n",
212 dmar_readq(iommu->reg,
213 cap_fault_reg_offset(dmar_readq(iommu->reg, DMAR_CAP_REG)) + 8));
214 printk("print_iommu_regs: DMAR_ECAP_REG = %"PRIx64"\n",
215 dmar_readq(iommu->reg,DMAR_ECAP_REG));
216 printk("print_iommu_regs: DMAR_GCMD_REG = %x\n",
217 dmar_readl(iommu->reg,DMAR_GCMD_REG));
218 printk("print_iommu_regs: DMAR_GSTS_REG = %x\n",
219 dmar_readl(iommu->reg,DMAR_GSTS_REG));
220 printk("print_iommu_regs: DMAR_RTADDR_REG = %"PRIx64"\n",
221 dmar_readq(iommu->reg,DMAR_RTADDR_REG));
222 printk("print_iommu_regs: DMAR_CCMD_REG = %"PRIx64"\n",
223 dmar_readq(iommu->reg,DMAR_CCMD_REG));
224 printk("print_iommu_regs: DMAR_FSTS_REG = %x\n",
225 dmar_readl(iommu->reg,DMAR_FSTS_REG));
226 printk("print_iommu_regs: DMAR_FECTL_REG = %x\n",
227 dmar_readl(iommu->reg,DMAR_FECTL_REG));
228 printk("print_iommu_regs: DMAR_FEDATA_REG = %x\n",
229 dmar_readl(iommu->reg,DMAR_FEDATA_REG));
230 printk("print_iommu_regs: DMAR_FEADDR_REG = %x\n",
231 dmar_readl(iommu->reg,DMAR_FEADDR_REG));
232 printk("print_iommu_regs: DMAR_FEUADDR_REG = %x\n",
233 dmar_readl(iommu->reg,DMAR_FEUADDR_REG));
234 }
236 u32 get_level_index(unsigned long gmfn, int level)
237 {
238 while ( --level )
239 gmfn = gmfn >> LEVEL_STRIDE;
241 return gmfn & LEVEL_MASK;
242 }
244 void print_vtd_entries(
245 struct domain *d,
246 struct iommu *iommu,
247 int bus, int devfn,
248 unsigned long gmfn)
249 {
250 struct hvm_iommu *hd = domain_hvm_iommu(d);
251 struct acpi_drhd_unit *drhd;
252 struct context_entry *ctxt_entry;
253 struct root_entry *root_entry;
254 struct dma_pte pte;
255 u64 *l;
256 u32 l_index;
257 u32 i = 0;
258 int level = agaw_to_level(hd->agaw);
260 printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x gmfn = %lx\n",
261 d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
263 if ( hd->pgd == NULL )
264 {
265 printk(" hg->pgd == NULL\n");
266 return;
267 }
268 printk(" d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
269 hd->pgd, virt_to_maddr(hd->pgd));
271 for_each_drhd_unit ( drhd )
272 {
273 printk("---- print_vtd_entries %d ----\n", i++);
275 root_entry = iommu->root_entry;
276 if ( root_entry == NULL )
277 {
278 printk(" root_entry == NULL\n");
279 continue;
280 }
282 printk(" root_entry = %p\n", root_entry);
283 printk(" root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
284 if ( !root_present(root_entry[bus]) )
285 {
286 printk(" root_entry[%x] not present\n", bus);
287 continue;
288 }
290 ctxt_entry =
291 maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT);
292 if ( ctxt_entry == NULL )
293 {
294 printk(" ctxt_entry == NULL\n");
295 continue;
296 }
298 printk(" context = %p\n", ctxt_entry);
299 printk(" context[%x] = %"PRIx64" %"PRIx64"\n",
300 devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
301 if ( !context_present(ctxt_entry[devfn]) )
302 {
303 printk(" ctxt_entry[%x] not present\n", devfn);
304 continue;
305 }
307 if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
308 level != VTD_PAGE_TABLE_LEVEL_4)
309 {
310 printk("Unsupported VTD page table level (%d)!\n", level);
311 continue;
312 }
314 l = maddr_to_virt(ctxt_entry[devfn].lo);
315 do
316 {
317 l = (u64*)(((unsigned long)l >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
318 printk(" l%d = %p\n", level, l);
319 if ( l == NULL )
320 {
321 printk(" l%d == NULL\n", level);
322 break;
323 }
324 l_index = get_level_index(gmfn, level);
325 printk(" l%d_index = %x\n", level, l_index);
326 printk(" l%d[%x] = %"PRIx64"\n", level, l_index, l[l_index]);
328 pte.val = l[l_index];
329 if ( !dma_pte_present(pte) )
330 {
331 printk(" l%d[%x] not present\n", level, l_index);
332 break;
333 }
335 l = maddr_to_virt(l[l_index]);
336 } while ( --level );
337 }
338 }