ia64/xen-unstable

view xen/drivers/passthrough/vtd/qinval.c @ 19810:aa472909b39c

vtd: IO NUMA support

This patch adds VT-d RHSA processing for IO NUMA support. The basic
idea is to parse ACPI RHSA structure to obtain VT-d HW to proximity
domain mapping. This mapping is then used when allocating pages for
Vt-d HW data structures.

Signed-off-by: Allen Kay <allen.m.kay@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jun 23 11:14:24 2009 +0100 (2009-06-23)
parents 133c889c21a7
children eb82fc994ab2
line source
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Allen Kay <allen.m.kay@intel.com>
18 * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
19 */
22 #include <xen/sched.h>
23 #include <xen/iommu.h>
24 #include <xen/time.h>
25 #include <xen/pci.h>
26 #include <xen/pci_regs.h>
27 #include "iommu.h"
28 #include "dmar.h"
29 #include "vtd.h"
30 #include "extern.h"
32 static void print_qi_regs(struct iommu *iommu)
33 {
34 u64 val;
36 val = dmar_readq(iommu->reg, DMAR_IQA_REG);
37 printk("DMAR_IQA_REG = %"PRIx64"\n", val);
39 val = dmar_readq(iommu->reg, DMAR_IQH_REG);
40 printk("DMAR_IQH_REG = %"PRIx64"\n", val);
42 val = dmar_readq(iommu->reg, DMAR_IQT_REG);
43 printk("DMAR_IQT_REG = %"PRIx64"\n", val);
44 }
46 static int qinval_next_index(struct iommu *iommu)
47 {
48 u64 val;
49 val = dmar_readq(iommu->reg, DMAR_IQT_REG);
50 return (val >> 4);
51 }
53 static int qinval_update_qtail(struct iommu *iommu, int index)
54 {
55 u64 val;
57 /* Need an ASSERT to insure that we have got register lock */
58 val = (index < (QINVAL_ENTRY_NR-1)) ? (index + 1) : 0;
59 dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << 4));
60 return 0;
61 }
63 static int gen_cc_inv_dsc(struct iommu *iommu, int index,
64 u16 did, u16 source_id, u8 function_mask, u8 granu)
65 {
66 unsigned long flags;
67 struct qinval_entry *qinval_entry = NULL, *qinval_entries;
68 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
70 spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
71 qinval_entries =
72 (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
73 qinval_entry = &qinval_entries[index];
74 qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
75 qinval_entry->q.cc_inv_dsc.lo.granu = granu;
76 qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
77 qinval_entry->q.cc_inv_dsc.lo.did = did;
78 qinval_entry->q.cc_inv_dsc.lo.sid = source_id;
79 qinval_entry->q.cc_inv_dsc.lo.fm = function_mask;
80 qinval_entry->q.cc_inv_dsc.lo.res_2 = 0;
81 qinval_entry->q.cc_inv_dsc.hi.res = 0;
83 unmap_vtd_domain_page(qinval_entries);
84 spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
86 return 0;
87 }
89 int queue_invalidate_context(struct iommu *iommu,
90 u16 did, u16 source_id, u8 function_mask, u8 granu)
91 {
92 int ret = -1;
93 unsigned long flags;
94 int index = -1;
96 spin_lock_irqsave(&iommu->register_lock, flags);
97 index = qinval_next_index(iommu);
98 if ( index == -1 )
99 return -EBUSY;
100 ret = gen_cc_inv_dsc(iommu, index, did, source_id,
101 function_mask, granu);
102 ret |= qinval_update_qtail(iommu, index);
103 spin_unlock_irqrestore(&iommu->register_lock, flags);
104 return ret;
105 }
107 static int gen_iotlb_inv_dsc(struct iommu *iommu, int index,
108 u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
109 {
110 unsigned long flags;
111 struct qinval_entry *qinval_entry = NULL, *qinval_entries;
112 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
114 if ( index == -1 )
115 return -1;
116 spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
118 qinval_entries =
119 (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
120 qinval_entry = &qinval_entries[index];
121 qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
122 qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
123 qinval_entry->q.iotlb_inv_dsc.lo.dr = 0;
124 qinval_entry->q.iotlb_inv_dsc.lo.dw = 0;
125 qinval_entry->q.iotlb_inv_dsc.lo.res_1 = 0;
126 qinval_entry->q.iotlb_inv_dsc.lo.did = did;
127 qinval_entry->q.iotlb_inv_dsc.lo.res_2 = 0;
129 qinval_entry->q.iotlb_inv_dsc.hi.am = am;
130 qinval_entry->q.iotlb_inv_dsc.hi.ih = ih;
131 qinval_entry->q.iotlb_inv_dsc.hi.res_1 = 0;
132 qinval_entry->q.iotlb_inv_dsc.hi.addr = addr;
134 unmap_vtd_domain_page(qinval_entries);
135 spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
136 return 0;
137 }
139 int queue_invalidate_iotlb(struct iommu *iommu,
140 u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
141 {
142 int ret = -1;
143 unsigned long flags;
144 int index = -1;
146 spin_lock_irqsave(&iommu->register_lock, flags);
148 index = qinval_next_index(iommu);
149 ret = gen_iotlb_inv_dsc(iommu, index, granu, dr, dw, did,
150 am, ih, addr);
151 ret |= qinval_update_qtail(iommu, index);
152 spin_unlock_irqrestore(&iommu->register_lock, flags);
153 return ret;
154 }
156 static int gen_wait_dsc(struct iommu *iommu, int index,
157 u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
158 {
159 unsigned long flags;
160 struct qinval_entry *qinval_entry = NULL, *qinval_entries;
161 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
163 if ( index == -1 )
164 return -1;
165 spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
166 qinval_entries =
167 (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
168 qinval_entry = &qinval_entries[index];
169 qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
170 qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
171 qinval_entry->q.inv_wait_dsc.lo.sw = sw;
172 qinval_entry->q.inv_wait_dsc.lo.fn = fn;
173 qinval_entry->q.inv_wait_dsc.lo.res_1 = 0;
174 qinval_entry->q.inv_wait_dsc.lo.sdata = sdata;
175 qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
176 qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(saddr) >> 2;
177 unmap_vtd_domain_page(qinval_entries);
178 spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
179 return 0;
180 }
182 static int queue_invalidate_wait(struct iommu *iommu,
183 u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
184 {
185 unsigned long flags;
186 s_time_t start_time;
187 int index = -1;
188 int ret = -1;
189 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
191 spin_lock_irqsave(&qi_ctrl->qinval_poll_lock, flags);
192 spin_lock(&iommu->register_lock);
193 index = qinval_next_index(iommu);
194 if ( *saddr == 1 )
195 *saddr = 0;
196 ret = gen_wait_dsc(iommu, index, iflag, sw, fn, sdata, saddr);
197 ret |= qinval_update_qtail(iommu, index);
198 spin_unlock(&iommu->register_lock);
200 /* Now we don't support interrupt method */
201 if ( sw )
202 {
203 /* In case all wait descriptor writes to same addr with same data */
204 start_time = NOW();
205 while ( *saddr != 1 )
206 {
207 if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
208 {
209 print_qi_regs(iommu);
210 panic("queue invalidate wait descriptor was not executed\n");
211 }
212 cpu_relax();
213 }
214 }
215 spin_unlock_irqrestore(&qi_ctrl->qinval_poll_lock, flags);
216 return ret;
217 }
219 int invalidate_sync(struct iommu *iommu)
220 {
221 int ret = -1;
222 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
224 if ( qi_ctrl->qinval_maddr != 0 )
225 {
226 ret = queue_invalidate_wait(iommu,
227 0, 1, 1, 1, &qi_ctrl->qinval_poll_status);
228 return ret;
229 }
230 return 0;
231 }
233 static int gen_dev_iotlb_inv_dsc(struct iommu *iommu, int index,
234 u32 max_invs_pend, u16 sid, u16 size, u64 addr)
235 {
236 unsigned long flags;
237 struct qinval_entry *qinval_entry = NULL, *qinval_entries;
238 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
240 if ( index == -1 )
241 return -1;
242 spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
244 qinval_entries =
245 (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
246 qinval_entry = &qinval_entries[index];
247 qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
248 qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
249 qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
250 qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0;
251 qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = sid;
252 qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
254 qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
255 qinval_entry->q.dev_iotlb_inv_dsc.hi.res_1 = 0;
256 qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr >> PAGE_SHIFT_4K;
258 unmap_vtd_domain_page(qinval_entries);
259 spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
260 return 0;
261 }
263 int qinval_device_iotlb(struct iommu *iommu,
264 u32 max_invs_pend, u16 sid, u16 size, u64 addr)
265 {
266 int ret = -1;
267 unsigned long flags;
268 int index = -1;
270 spin_lock_irqsave(&iommu->register_lock, flags);
271 index = qinval_next_index(iommu);
272 ret = gen_dev_iotlb_inv_dsc(iommu, index, max_invs_pend,
273 sid, size, addr);
274 ret |= qinval_update_qtail(iommu, index);
275 spin_unlock_irqrestore(&iommu->register_lock, flags);
276 return ret;
277 }
279 static int gen_iec_inv_dsc(struct iommu *iommu, int index,
280 u8 granu, u8 im, u16 iidx)
281 {
282 unsigned long flags;
283 struct qinval_entry *qinval_entry = NULL, *qinval_entries;
284 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
286 if ( index == -1 )
287 return -1;
288 spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
290 qinval_entries =
291 (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
292 qinval_entry = &qinval_entries[index];
293 qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
294 qinval_entry->q.iec_inv_dsc.lo.granu = granu;
295 qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
296 qinval_entry->q.iec_inv_dsc.lo.im = im;
297 qinval_entry->q.iec_inv_dsc.lo.iidx = iidx;
298 qinval_entry->q.iec_inv_dsc.lo.res_2 = 0;
299 qinval_entry->q.iec_inv_dsc.hi.res = 0;
301 unmap_vtd_domain_page(qinval_entries);
302 spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
303 return 0;
304 }
306 int queue_invalidate_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
307 {
308 int ret;
309 unsigned long flags;
310 int index = -1;
312 spin_lock_irqsave(&iommu->register_lock, flags);
313 index = qinval_next_index(iommu);
314 ret = gen_iec_inv_dsc(iommu, index, granu, im, iidx);
315 ret |= qinval_update_qtail(iommu, index);
316 spin_unlock_irqrestore(&iommu->register_lock, flags);
317 return ret;
318 }
320 int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
321 {
322 int ret;
323 ret = queue_invalidate_iec(iommu, granu, im, iidx);
324 ret |= invalidate_sync(iommu);
326 /*
327 * reading vt-d architecture register will ensure
328 * draining happens in implementation independent way.
329 */
330 (void)dmar_readq(iommu->reg, DMAR_CAP_REG);
331 return ret;
332 }
334 int iommu_flush_iec_global(struct iommu *iommu)
335 {
336 return __iommu_flush_iec(iommu, IEC_GLOBAL_INVL, 0, 0);
337 }
339 int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx)
340 {
341 return __iommu_flush_iec(iommu, IEC_INDEX_INVL, im, iidx);
342 }
344 static int flush_context_qi(
345 void *_iommu, u16 did, u16 sid, u8 fm, u64 type,
346 int flush_non_present_entry)
347 {
348 int ret = 0;
349 struct iommu *iommu = (struct iommu *)_iommu;
350 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
352 /*
353 * In the non-present entry flush case, if hardware doesn't cache
354 * non-present entry we do nothing and if hardware cache non-present
355 * entry, we flush entries of domain 0 (the domain id is used to cache
356 * any non-present entries)
357 */
358 if ( flush_non_present_entry )
359 {
360 if ( !cap_caching_mode(iommu->cap) )
361 return 1;
362 else
363 did = 0;
364 }
366 if ( qi_ctrl->qinval_maddr != 0 )
367 {
368 ret = queue_invalidate_context(iommu, did, sid, fm,
369 type >> DMA_CCMD_INVL_GRANU_OFFSET);
370 ret |= invalidate_sync(iommu);
371 }
372 return ret;
373 }
375 static int flush_iotlb_qi(
376 void *_iommu, u16 did,
377 u64 addr, unsigned int size_order, u64 type,
378 int flush_non_present_entry, int flush_dev_iotlb)
379 {
380 u8 dr = 0, dw = 0;
381 int ret = 0;
382 struct iommu *iommu = (struct iommu *)_iommu;
383 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
385 /*
386 * In the non-present entry flush case, if hardware doesn't cache
387 * non-present entry we do nothing and if hardware cache non-present
388 * entry, we flush entries of domain 0 (the domain id is used to cache
389 * any non-present entries)
390 */
391 if ( flush_non_present_entry )
392 {
393 if ( !cap_caching_mode(iommu->cap) )
394 return 1;
395 else
396 did = 0;
397 }
399 if ( qi_ctrl->qinval_maddr != 0 )
400 {
401 /* use queued invalidation */
402 if (cap_write_drain(iommu->cap))
403 dw = 1;
404 if (cap_read_drain(iommu->cap))
405 dr = 1;
406 /* Need to conside the ih bit later */
407 ret = queue_invalidate_iotlb(iommu,
408 (type >> DMA_TLB_FLUSH_GRANU_OFFSET), dr,
409 dw, did, (u8)size_order, 0, addr);
410 if ( flush_dev_iotlb )
411 ret |= dev_invalidate_iotlb(iommu, did, addr, size_order, type);
412 ret |= invalidate_sync(iommu);
413 }
414 return ret;
415 }
417 int enable_qinval(struct iommu *iommu)
418 {
419 struct acpi_drhd_unit *drhd;
420 struct qi_ctrl *qi_ctrl;
421 struct iommu_flush *flush;
422 u32 sts;
423 unsigned long flags;
425 qi_ctrl = iommu_qi_ctrl(iommu);
426 flush = iommu_get_flush(iommu);
428 ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
430 if ( qi_ctrl->qinval_maddr == 0 )
431 {
432 drhd = iommu_to_drhd(iommu);
433 qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, NUM_QINVAL_PAGES);
434 if ( qi_ctrl->qinval_maddr == 0 )
435 {
436 dprintk(XENLOG_WARNING VTDPREFIX,
437 "Cannot allocate memory for qi_ctrl->qinval_maddr\n");
438 return -ENOMEM;
439 }
440 }
442 flush->context = flush_context_qi;
443 flush->iotlb = flush_iotlb_qi;
445 /* Setup Invalidation Queue Address(IQA) register with the
446 * address of the page we just allocated. QS field at
447 * bits[2:0] to indicate size of queue is one 4KB page.
448 * That's 256 entries. Queued Head (IQH) and Queue Tail (IQT)
449 * registers are automatically reset to 0 with write
450 * to IQA register.
451 */
452 qi_ctrl->qinval_maddr |= IQA_REG_QS;
454 spin_lock_irqsave(&iommu->register_lock, flags);
455 dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
457 dmar_writeq(iommu->reg, DMAR_IQT_REG, 0);
459 /* enable queued invalidation hardware */
460 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
461 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts | DMA_GCMD_QIE);
463 /* Make sure hardware complete it */
464 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
465 (sts & DMA_GSTS_QIES), sts);
466 spin_unlock_irqrestore(&iommu->register_lock, flags);
468 return 0;
469 }
471 void disable_qinval(struct iommu *iommu)
472 {
473 u32 sts;
474 unsigned long flags;
476 ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
478 spin_lock_irqsave(&iommu->register_lock, flags);
479 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
480 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_QIE));
482 /* Make sure hardware complete it */
483 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
484 !(sts & DMA_GSTS_QIES), sts);
485 spin_unlock_irqrestore(&iommu->register_lock, flags);
486 }