ia64/xen-unstable

view xen/arch/x86/physdev.c @ 19402:f02a528d2e56

Xen: use proper device ID to search VT-d unit for ARI and SR-IOV device

PCIe Alternative Routing-ID Interpretation (ARI) ECN defines the Extended
Function -- a function whose function number is greater than 7 within an
ARI Device. Intel VT-d spec 1.2 section 8.3.2 specifies that the Extended
Function is under the scope of the same remapping unit as the traditional
function. The hypervisor needs to know if a function is Extended
Function so it can find proper DMAR for it.

And section 8.3.3 specifies that the SR-IOV Virtual Function is under the
scope of the same remapping unit as the Physical Function. The hypervisor
also needs to know if a function is the Virtual Function and which
Physical Function it's associated with for same reason.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Mar 19 10:20:11 2009 +0000 (2009-03-19)
parents 9e3be0660c1e
children 6705898f768d
line source
2 #include <xen/config.h>
3 #include <xen/init.h>
4 #include <xen/lib.h>
5 #include <xen/types.h>
6 #include <xen/sched.h>
7 #include <xen/irq.h>
8 #include <xen/event.h>
9 #include <xen/guest_access.h>
10 #include <xen/iocap.h>
11 #include <asm/current.h>
12 #include <asm/msi.h>
13 #include <asm/hypercall.h>
14 #include <public/xen.h>
15 #include <public/physdev.h>
16 #include <xsm/xsm.h>
17 #include <asm/p2m.h>
19 #ifndef COMPAT
20 typedef long ret_t;
21 #endif
23 int
24 ioapic_guest_read(
25 unsigned long physbase, unsigned int reg, u32 *pval);
26 int
27 ioapic_guest_write(
28 unsigned long physbase, unsigned int reg, u32 pval);
30 static int physdev_map_pirq(struct physdev_map_pirq *map)
31 {
32 struct domain *d;
33 int vector, pirq, ret = 0;
34 struct msi_info _msi;
35 void *map_data = NULL;
37 if ( !IS_PRIV(current->domain) )
38 return -EPERM;
40 if ( !map )
41 return -EINVAL;
43 if ( map->domid == DOMID_SELF )
44 d = rcu_lock_domain(current->domain);
45 else
46 d = rcu_lock_domain_by_id(map->domid);
48 if ( d == NULL )
49 {
50 ret = -ESRCH;
51 goto free_domain;
52 }
54 /* Verify or get vector. */
55 switch ( map->type )
56 {
57 case MAP_PIRQ_TYPE_GSI:
58 if ( map->index < 0 || map->index >= NR_IRQS )
59 {
60 dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n",
61 d->domain_id, map->index);
62 ret = -EINVAL;
63 goto free_domain;
64 }
65 vector = domain_irq_to_vector(current->domain, map->index);
66 if ( !vector )
67 {
68 dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector %d\n",
69 d->domain_id, vector);
70 ret = -EINVAL;
71 goto free_domain;
72 }
73 break;
75 case MAP_PIRQ_TYPE_MSI:
76 vector = map->index;
77 if ( vector == -1 )
78 vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
80 if ( vector < 0 || vector >= NR_VECTORS )
81 {
82 dprintk(XENLOG_G_ERR, "dom%d: map irq with wrong vector %d\n",
83 d->domain_id, vector);
84 ret = -EINVAL;
85 goto free_domain;
86 }
88 _msi.bus = map->bus;
89 _msi.devfn = map->devfn;
90 _msi.entry_nr = map->entry_nr;
91 _msi.table_base = map->table_base;
92 _msi.vector = vector;
93 map_data = &_msi;
94 break;
96 default:
97 dprintk(XENLOG_G_ERR, "dom%d: wrong map_pirq type %x\n",
98 d->domain_id, map->type);
99 ret = -EINVAL;
100 goto free_domain;
101 }
103 spin_lock(&pcidevs_lock);
104 /* Verify or get pirq. */
105 spin_lock(&d->event_lock);
106 pirq = domain_vector_to_irq(d, vector);
107 if ( map->pirq < 0 )
108 {
109 if ( pirq )
110 {
111 dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n",
112 d->domain_id, map->index, map->pirq,
113 pirq);
114 if ( pirq < 0 )
115 {
116 ret = -EBUSY;
117 goto done;
118 }
119 }
120 else
121 {
122 pirq = get_free_pirq(d, map->type, map->index);
123 if ( pirq < 0 )
124 {
125 dprintk(XENLOG_G_ERR, "dom%d: no free pirq\n", d->domain_id);
126 ret = pirq;
127 goto done;
128 }
129 }
130 }
131 else
132 {
133 if ( pirq && pirq != map->pirq )
134 {
135 dprintk(XENLOG_G_ERR, "dom%d: vector %d conflicts with irq %d\n",
136 d->domain_id, map->index, map->pirq);
137 ret = -EEXIST;
138 goto done;
139 }
140 else
141 pirq = map->pirq;
142 }
144 ret = map_domain_pirq(d, pirq, vector, map->type, map_data);
145 if ( ret == 0 )
146 map->pirq = pirq;
148 done:
149 spin_unlock(&d->event_lock);
150 spin_unlock(&pcidevs_lock);
151 if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) && (map->index == -1) )
152 free_irq_vector(vector);
153 free_domain:
154 rcu_unlock_domain(d);
155 return ret;
156 }
158 static int physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
159 {
160 struct domain *d;
161 int ret;
163 if ( !IS_PRIV(current->domain) )
164 return -EPERM;
166 if ( unmap->domid == DOMID_SELF )
167 d = rcu_lock_domain(current->domain);
168 else
169 d = rcu_lock_domain_by_id(unmap->domid);
171 if ( d == NULL )
172 return -ESRCH;
174 spin_lock(&pcidevs_lock);
175 spin_lock(&d->event_lock);
176 ret = unmap_domain_pirq(d, unmap->pirq);
177 spin_unlock(&d->event_lock);
178 spin_unlock(&pcidevs_lock);
180 rcu_unlock_domain(d);
182 return ret;
183 }
185 ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
186 {
187 int irq;
188 ret_t ret;
189 struct vcpu *v = current;
191 switch ( cmd )
192 {
193 case PHYSDEVOP_eoi: {
194 struct physdev_eoi eoi;
195 ret = -EFAULT;
196 if ( copy_from_guest(&eoi, arg, 1) != 0 )
197 break;
198 ret = -EINVAL;
199 if ( eoi.irq < 0 || eoi.irq >= NR_IRQS )
200 break;
201 if ( v->domain->arch.pirq_eoi_map )
202 evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
203 ret = pirq_guest_eoi(v->domain, eoi.irq);
204 break;
205 }
207 case PHYSDEVOP_pirq_eoi_gmfn: {
208 struct physdev_pirq_eoi_gmfn info;
209 unsigned long mfn;
211 BUILD_BUG_ON(NR_IRQS > (PAGE_SIZE * 8));
213 ret = -EFAULT;
214 if ( copy_from_guest(&info, arg, 1) != 0 )
215 break;
217 ret = -EINVAL;
218 mfn = gmfn_to_mfn(current->domain, info.gmfn);
219 if ( !mfn_valid(mfn) ||
220 !get_page_and_type(mfn_to_page(mfn), v->domain,
221 PGT_writable_page) )
222 break;
224 if ( cmpxchg(&v->domain->arch.pirq_eoi_map_mfn, 0, mfn) != 0 )
225 {
226 put_page_and_type(mfn_to_page(mfn));
227 ret = -EBUSY;
228 break;
229 }
231 v->domain->arch.pirq_eoi_map = map_domain_page_global(mfn);
232 if ( v->domain->arch.pirq_eoi_map == NULL )
233 {
234 v->domain->arch.pirq_eoi_map_mfn = 0;
235 put_page_and_type(mfn_to_page(mfn));
236 ret = -ENOSPC;
237 break;
238 }
240 ret = 0;
241 break;
242 }
244 /* Legacy since 0x00030202. */
245 case PHYSDEVOP_IRQ_UNMASK_NOTIFY: {
246 ret = pirq_guest_unmask(v->domain);
247 break;
248 }
250 case PHYSDEVOP_irq_status_query: {
251 struct physdev_irq_status_query irq_status_query;
252 ret = -EFAULT;
253 if ( copy_from_guest(&irq_status_query, arg, 1) != 0 )
254 break;
255 irq = irq_status_query.irq;
256 ret = -EINVAL;
257 if ( (irq < 0) || (irq >= NR_IRQS) )
258 break;
259 irq_status_query.flags = 0;
260 /*
261 * Even edge-triggered or message-based IRQs can need masking from
262 * time to time. If teh guest is not dynamically checking for this
263 * via the new pirq_eoi_map mechanism, it must conservatively always
264 * execute the EOI hypercall. In practice, this only really makes a
265 * difference for maskable MSI sources, and if those are supported
266 * then dom0 is probably modern anyway.
267 */
268 irq_status_query.flags |= XENIRQSTAT_needs_eoi;
269 if ( pirq_shared(v->domain, irq) )
270 irq_status_query.flags |= XENIRQSTAT_shared;
271 ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0;
272 break;
273 }
275 case PHYSDEVOP_map_pirq: {
276 struct physdev_map_pirq map;
278 ret = -EFAULT;
279 if ( copy_from_guest(&map, arg, 1) != 0 )
280 break;
282 ret = physdev_map_pirq(&map);
284 if ( copy_to_guest(arg, &map, 1) != 0 )
285 ret = -EFAULT;
286 break;
287 }
289 case PHYSDEVOP_unmap_pirq: {
290 struct physdev_unmap_pirq unmap;
292 ret = -EFAULT;
293 if ( copy_from_guest(&unmap, arg, 1) != 0 )
294 break;
296 ret = physdev_unmap_pirq(&unmap);
297 break;
298 }
300 case PHYSDEVOP_apic_read: {
301 struct physdev_apic apic;
302 ret = -EFAULT;
303 if ( copy_from_guest(&apic, arg, 1) != 0 )
304 break;
305 ret = -EPERM;
306 if ( !IS_PRIV(v->domain) )
307 break;
308 ret = xsm_apic(v->domain, cmd);
309 if ( ret )
310 break;
311 ret = ioapic_guest_read(apic.apic_physbase, apic.reg, &apic.value);
312 if ( copy_to_guest(arg, &apic, 1) != 0 )
313 ret = -EFAULT;
314 break;
315 }
317 case PHYSDEVOP_apic_write: {
318 struct physdev_apic apic;
319 ret = -EFAULT;
320 if ( copy_from_guest(&apic, arg, 1) != 0 )
321 break;
322 ret = -EPERM;
323 if ( !IS_PRIV(v->domain) )
324 break;
325 ret = xsm_apic(v->domain, cmd);
326 if ( ret )
327 break;
328 ret = ioapic_guest_write(apic.apic_physbase, apic.reg, apic.value);
329 break;
330 }
332 case PHYSDEVOP_alloc_irq_vector: {
333 struct physdev_irq irq_op;
335 ret = -EFAULT;
336 if ( copy_from_guest(&irq_op, arg, 1) != 0 )
337 break;
339 ret = -EPERM;
340 if ( !IS_PRIV(v->domain) )
341 break;
343 ret = xsm_assign_vector(v->domain, irq_op.irq);
344 if ( ret )
345 break;
347 irq = irq_op.irq;
348 ret = -EINVAL;
349 if ( (irq < 0) || (irq >= NR_IRQS) )
350 break;
352 irq_op.vector = assign_irq_vector(irq);
354 spin_lock(&pcidevs_lock);
355 spin_lock(&dom0->event_lock);
356 ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector,
357 MAP_PIRQ_TYPE_GSI, NULL);
358 spin_unlock(&dom0->event_lock);
359 spin_unlock(&pcidevs_lock);
361 if ( copy_to_guest(arg, &irq_op, 1) != 0 )
362 ret = -EFAULT;
363 break;
364 }
366 case PHYSDEVOP_set_iopl: {
367 struct physdev_set_iopl set_iopl;
368 ret = -EFAULT;
369 if ( copy_from_guest(&set_iopl, arg, 1) != 0 )
370 break;
371 ret = -EINVAL;
372 if ( set_iopl.iopl > 3 )
373 break;
374 ret = 0;
375 v->arch.iopl = set_iopl.iopl;
376 break;
377 }
379 case PHYSDEVOP_set_iobitmap: {
380 struct physdev_set_iobitmap set_iobitmap;
381 ret = -EFAULT;
382 if ( copy_from_guest(&set_iobitmap, arg, 1) != 0 )
383 break;
384 ret = -EINVAL;
385 if ( !guest_handle_okay(set_iobitmap.bitmap, IOBMP_BYTES) ||
386 (set_iobitmap.nr_ports > 65536) )
387 break;
388 ret = 0;
389 #ifndef COMPAT
390 v->arch.iobmp = set_iobitmap.bitmap;
391 #else
392 guest_from_compat_handle(v->arch.iobmp, set_iobitmap.bitmap);
393 #endif
394 v->arch.iobmp_limit = set_iobitmap.nr_ports;
395 break;
396 }
398 case PHYSDEVOP_manage_pci_add: {
399 struct physdev_manage_pci manage_pci;
400 ret = -EPERM;
401 if ( !IS_PRIV(v->domain) )
402 break;
403 ret = -EFAULT;
404 if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
405 break;
407 ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
408 break;
409 }
411 case PHYSDEVOP_manage_pci_remove: {
412 struct physdev_manage_pci manage_pci;
413 ret = -EPERM;
414 if ( !IS_PRIV(v->domain) )
415 break;
416 ret = -EFAULT;
417 if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
418 break;
420 ret = pci_remove_device(manage_pci.bus, manage_pci.devfn);
421 break;
422 }
424 case PHYSDEVOP_manage_pci_add_ext: {
425 struct physdev_manage_pci_ext manage_pci_ext;
426 struct pci_dev_info pdev_info;
428 ret = -EPERM;
429 if ( !IS_PRIV(current->domain) )
430 break;
432 ret = -EFAULT;
433 if ( copy_from_guest(&manage_pci_ext, arg, 1) != 0 )
434 break;
436 ret = -EINVAL;
437 if ( (manage_pci_ext.is_extfn > 1) || (manage_pci_ext.is_virtfn > 1) )
438 break;
440 pdev_info.is_extfn = manage_pci_ext.is_extfn;
441 pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
442 pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
443 pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
444 ret = pci_add_device_ext(manage_pci_ext.bus,
445 manage_pci_ext.devfn,
446 &pdev_info);
447 break;
448 }
450 case PHYSDEVOP_restore_msi: {
451 struct physdev_restore_msi restore_msi;
452 struct pci_dev *pdev;
454 ret = -EPERM;
455 if ( !IS_PRIV(v->domain) )
456 break;
458 ret = -EFAULT;
459 if ( copy_from_guest(&restore_msi, arg, 1) != 0 )
460 break;
462 spin_lock(&pcidevs_lock);
463 pdev = pci_get_pdev(restore_msi.bus, restore_msi.devfn);
464 ret = pdev ? pci_restore_msi_state(pdev) : -ENODEV;
465 spin_unlock(&pcidevs_lock);
466 break;
467 }
468 default:
469 ret = -ENOSYS;
470 break;
471 }
473 return ret;
474 }
476 /*
477 * Local variables:
478 * mode: C
479 * c-set-style: "BSD"
480 * c-basic-offset: 4
481 * tab-width: 4
482 * indent-tabs-mode: nil
483 * End:
484 */