ia64/linux-2.6.18-xen.hg

view drivers/pci/iomulti.c @ 882:8dec4aa9b8b9

PCI pass through: PCIe IO space multiplexing

This is required for more than 16 HVM domain to boot from
PCIe pass through device.

Linux as dom0 exclusively assigns IO space to downstream PCI bridges
and the assignment unit of PCI bridge IO space is 4K. So the only up
to 16 PCIe device can be accessed via IO space within 64K IO ports.
PCI expansion ROM BIOS often uses IO port access to boot from the
device, so on virtualized environment, it means only up to 16 guest
domain can boot from pass-through device.

This patch allows PCIe IO space sharing of pass-through device.
- reassign IO space of PCIe devices specified by
"guestiomuldev=[<segment>:]<bus>:<dev>[,[<segment:><bus>:dev]][,...]"
to be shared.
This is implemented as Linux PCI quirk fixup.

The sharing unit is PCIe switch. Ie IO space of the end point
devices under the same switch will be shared. If there are more than
one switches, two areas of IO space will be used.

- And the driver which arbitrates the accesses to the multiplexed PCIe
IO space. Later qemu-dm will use this.

Limitation:
IO port of IO shared devices can't be accessed from dom0 Linux device
driver. But this wouldn't be a big issue because PCIe specification
discourages the use of IO space and recommends that IO space should be
used only for bootable device with ROM code. OS device driver should
work without IO space access.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Keir Fraser <keir.fraser@citrix.com>
date Thu May 28 09:57:49 2009 +0100 (2009-05-28)
parents
children b998614e2e2a
line source
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15 *
16 * Copyright (c) 2009 Isaku Yamahata
17 * VA Linux Systems Japan K.K.
18 *
19 */
21 #include <linux/kernel.h>
22 #include <linux/list.h>
23 #include <linux/miscdevice.h>
24 #include <linux/pci.h>
25 #include <linux/sort.h>
27 #include <asm/setup.h>
28 #include <asm/uaccess.h>
30 #include "iomulti.h"
32 #define PCI_NUM_BARS 6
33 #define PCI_BUS_MAX 255
34 #define PCI_DEV_MAX 31
35 #define PCI_FUNC_MAX 7
36 #define PCI_NUM_FUNC 8
38 /* see pci_resource_len */
39 static inline resource_size_t pci_iomul_len(const struct resource* r)
40 {
41 if (r->start == 0 && r->start == r->end)
42 return 0;
43 return r->end - r->start + 1;
44 }
46 #define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))
47 /* stolen from pbus_size_io() */
48 static unsigned long pdev_size_io(struct pci_dev *pdev)
49 {
50 unsigned long size = 0, size1 = 0;
51 int i;
53 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
54 struct resource *r = &pdev->resource[i];
55 unsigned long r_size;
57 if (!(r->flags & IORESOURCE_IO))
58 continue;
60 r_size = r->end - r->start + 1;
62 if (r_size < 0x400)
63 /* Might be re-aligned for ISA */
64 size += r_size;
65 else
66 size1 += r_size;
67 }
69 /* To be fixed in 2.5: we should have sort of HAVE_ISA
70 flag in the struct pci_bus. */
71 #if defined(CONFIG_ISA) || defined(CONFIG_EISA)
72 size = (size & 0xff) + ((size & ~0xffUL) << 2);
73 #endif
74 size = ROUND_UP(size + size1, 4096);
75 return size;
76 }
78 /*
79 * primary bus number of PCI-PCI bridge in switch on which
80 * this slots sits.
81 * i.e. the primary bus number of PCI-PCI bridge of downstream port
82 * or root port in switch.
83 * the secondary bus number of PCI-PCI bridge of upstream port
84 * in switch.
85 */
86 static inline unsigned char pci_dev_switch_busnr(struct pci_dev *pdev)
87 {
88 if (pci_find_capability(pdev, PCI_CAP_ID_EXP))
89 return pdev->bus->primary;
90 return pdev->bus->number;
91 }
93 struct pci_iomul_func {
94 int segment;
95 uint8_t bus;
96 uint8_t devfn;
98 /* only start and end are used */
99 unsigned long io_size;
100 uint8_t io_bar;
101 struct resource resource[PCI_NUM_BARS];
102 struct resource dummy_parent;
103 };
105 struct pci_iomul_switch {
106 struct list_head list; /* bus_list_lock protects */
108 /*
109 * This lock the following entry and following
110 * pci_iomul_slot/pci_iomul_func.
111 */
112 struct mutex lock;
113 struct kref kref;
115 struct resource io_resource;
116 struct resource *io_region;
117 unsigned int count;
118 struct pci_dev *current_pdev;
120 int segment;
121 uint8_t bus;
123 uint32_t io_base;
124 uint32_t io_limit;
126 /* func which has the largeset io size*/
127 struct pci_iomul_func *func;
129 struct list_head slots;
130 };
132 struct pci_iomul_slot {
133 struct list_head sibling;
134 struct kref kref;
135 /*
136 * busnr
137 * when pcie, the primary busnr of the PCI-PCI bridge on which
138 * this devices sits.
139 */
140 uint8_t switch_busnr;
141 struct resource dummy_parent[PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES];
143 /* device */
144 int segment;
145 uint8_t bus;
146 uint8_t dev;
148 struct pci_iomul_func *func[PCI_NUM_FUNC];
149 };
151 static LIST_HEAD(switch_list);
152 static DEFINE_MUTEX(switch_list_lock);
154 /*****************************************************************************/
155 static int inline pci_iomul_switch_io_allocated(
156 const struct pci_iomul_switch *sw)
157 {
158 return !(sw->io_base == 0 || sw->io_base > sw->io_limit);
159 }
161 static struct pci_iomul_switch *pci_iomul_find_switch_locked(int segment,
162 uint8_t bus)
163 {
164 struct pci_iomul_switch *sw;
166 BUG_ON(!mutex_is_locked(&switch_list_lock));
167 list_for_each_entry(sw, &switch_list, list) {
168 if (sw->segment == segment && sw->bus == bus)
169 return sw;
170 }
171 return NULL;
172 }
174 static struct pci_iomul_slot *pci_iomul_find_slot_locked(
175 struct pci_iomul_switch *sw, uint8_t busnr, uint8_t dev)
176 {
177 struct pci_iomul_slot *slot;
179 BUG_ON(!mutex_is_locked(&sw->lock));
180 list_for_each_entry(slot, &sw->slots, sibling) {
181 if (slot->bus == busnr && slot->dev == dev)
182 return slot;
183 }
184 return NULL;
185 }
187 static void pci_iomul_switch_get(struct pci_iomul_switch *sw);
188 /* on successfull exit, sw->lock is locked for use slot and
189 * refrence count of sw is incremented.
190 */
191 static void pci_iomul_get_lock_switch(struct pci_dev *pdev,
192 struct pci_iomul_switch **swp,
193 struct pci_iomul_slot **slot)
194 {
195 mutex_lock(&switch_list_lock);
197 *swp = pci_iomul_find_switch_locked(pci_domain_nr(pdev->bus),
198 pci_dev_switch_busnr(pdev));
199 if (*swp == NULL) {
200 *slot = NULL;
201 goto out;
202 }
204 mutex_lock(&(*swp)->lock);
205 *slot = pci_iomul_find_slot_locked(*swp, pdev->bus->number,
206 PCI_SLOT(pdev->devfn));
207 if (*slot == NULL) {
208 mutex_unlock(&(*swp)->lock);
209 *swp = NULL;
210 } else {
211 pci_iomul_switch_get(*swp);
212 }
213 out:
214 mutex_unlock(&switch_list_lock);
215 }
217 static struct pci_iomul_switch *pci_iomul_switch_alloc(int segment,
218 uint8_t bus)
219 {
220 struct pci_iomul_switch *sw;
222 BUG_ON(!mutex_is_locked(&switch_list_lock));
224 sw = kmalloc(sizeof(*sw), GFP_KERNEL);
226 mutex_init(&sw->lock);
227 kref_init(&sw->kref);
228 sw->io_region = NULL;
229 sw->count = 0;
230 sw->current_pdev = NULL;
231 sw->segment = segment;
232 sw->bus = bus;
233 sw->io_base = 0;
234 sw->io_limit = 0;
235 sw->func = NULL;
236 INIT_LIST_HEAD(&sw->slots);
238 return sw;
239 }
241 static void pci_iomul_switch_add_locked(struct pci_iomul_switch *sw)
242 {
243 BUG_ON(!mutex_is_locked(&switch_list_lock));
244 list_add(&sw->list, &switch_list);
245 }
247 #ifdef CONFIG_HOTPLUG_PCI
248 static void pci_iomul_switch_del_locked(struct pci_iomul_switch *sw)
249 {
250 BUG_ON(!mutex_is_locked(&switch_list_lock));
251 list_del(&sw->list);
252 }
253 #endif
255 static void pci_iomul_switch_get(struct pci_iomul_switch *sw)
256 {
257 kref_get(&sw->kref);
258 }
260 static void pci_iomul_switch_release(struct kref *kref)
261 {
262 struct pci_iomul_switch *sw = container_of(kref,
263 struct pci_iomul_switch,
264 kref);
265 kfree(sw);
266 }
268 static void pci_iomul_switch_put(struct pci_iomul_switch *sw)
269 {
270 kref_put(&sw->kref, &pci_iomul_switch_release);
271 }
273 static int __devinit pci_iomul_slot_init(struct pci_dev *pdev,
274 struct pci_iomul_slot *slot)
275 {
276 u16 rpcap;
277 u16 cap;
279 rpcap = pci_find_capability(pdev, PCI_CAP_ID_EXP);
280 if (!rpcap) {
281 /* pci device isn't supported */
282 printk(KERN_INFO
283 "PCI: sharing io port of non PCIe device %s "
284 "isn't supported. ignoring.\n",
285 pci_name(pdev));
286 return -ENOSYS;
287 }
289 pci_read_config_word(pdev, rpcap + PCI_CAP_FLAGS, &cap);
290 switch ((cap & PCI_EXP_FLAGS_TYPE) >> 4) {
291 case PCI_EXP_TYPE_RC_END:
292 printk(KERN_INFO
293 "PCI: io port sharing of root complex integrated "
294 "endpoint %s isn't supported. ignoring.\n",
295 pci_name(pdev));
296 return -ENOSYS;
297 case PCI_EXP_TYPE_ENDPOINT:
298 case PCI_EXP_TYPE_LEG_END:
299 break;
300 default:
301 printk(KERN_INFO
302 "PCI: io port sharing of non endpoint %s "
303 "doesn't make sense. ignoring.\n",
304 pci_name(pdev));
305 return -EINVAL;
306 }
308 kref_init(&slot->kref);
309 slot->switch_busnr = pci_dev_switch_busnr(pdev);
310 slot->segment = pci_domain_nr(pdev->bus);
311 slot->bus = pdev->bus->number;
312 slot->dev = PCI_SLOT(pdev->devfn);
314 return 0;
315 }
317 static struct pci_iomul_slot *pci_iomul_slot_alloc(struct pci_dev *pdev)
318 {
319 struct pci_iomul_slot *slot;
321 slot = kzalloc(sizeof(*slot), GFP_KERNEL);
322 if (slot == NULL)
323 return NULL;
325 if (pci_iomul_slot_init(pdev, slot) != 0) {
326 kfree(slot);
327 return NULL;
328 }
329 return slot;
330 }
332 static void pci_iomul_slot_add_locked(struct pci_iomul_switch *sw,
333 struct pci_iomul_slot *slot)
334 {
335 BUG_ON(!mutex_is_locked(&sw->lock));
336 list_add(&slot->sibling, &sw->slots);
337 }
339 #ifdef CONFIG_HOTPLUG_PCI
340 static void pci_iomul_slot_del_locked(struct pci_iomul_switch *sw,
341 struct pci_iomul_slot *slot)
342 {
343 BUG_ON(!mutex_is_locked(&sw->lock));
344 list_del(&slot->sibling);
345 }
346 #endif
348 static void pci_iomul_slot_get(struct pci_iomul_slot *slot)
349 {
350 kref_get(&slot->kref);
351 }
353 static void pci_iomul_slot_release(struct kref *kref)
354 {
355 struct pci_iomul_slot *slot = container_of(kref, struct pci_iomul_slot,
356 kref);
357 kfree(slot);
358 }
360 static void pci_iomul_slot_put(struct pci_iomul_slot *slot)
361 {
362 kref_put(&slot->kref, &pci_iomul_slot_release);
363 }
365 /*****************************************************************************/
366 static int pci_get_sbd(const char *str,
367 int *segment__, uint8_t *bus__, uint8_t *dev__)
368 {
369 int segment;
370 int bus;
371 int dev;
373 if (sscanf(str, "%x:%x:%x", &segment, &bus, &dev) != 3) {
374 if (sscanf(str, "%x:%x", &bus, &dev) == 2)
375 segment = 0;
376 else
377 return -EINVAL;
378 }
380 if (segment < 0 || INT_MAX <= segment)
381 return -EINVAL;
382 if (bus < 0 || PCI_BUS_MAX < bus)
383 return -EINVAL;
384 if (dev < 0 || PCI_DEV_MAX < dev)
385 return -EINVAL;
387 *segment__ = segment;
388 *bus__ = bus;
389 *dev__ = dev;
390 return 0;
391 }
393 static char iomul_param[COMMAND_LINE_SIZE];
394 #define TOKEN_MAX 10 /* SSSS:BB:DD length is 10 */
395 static int pci_is_iomul_dev_param(struct pci_dev *pdev)
396 {
397 int len;
398 char *p;
399 char *next_str;
401 for (p = &iomul_param[0]; *p != '\0'; p = next_str + 1) {
402 next_str = strchr(p, ',');
403 if (next_str != NULL)
404 len = next_str - p;
405 else
406 len = strlen(p);
408 if (len > 0 && len <= TOKEN_MAX) {
409 char tmp[TOKEN_MAX+1];
410 int seg;
411 uint8_t bus;
412 uint8_t dev;
414 strncpy(tmp, p, len);
415 *(tmp + len) = '\0';
416 if (pci_get_sbd(tmp, &seg, &bus, &dev) == 0 &&
417 pci_domain_nr(pdev->bus) == seg &&
418 pdev->bus->number == bus &&
419 PCI_SLOT(pdev->devfn) == dev)
420 return 1;
421 }
422 if (next_str == NULL)
423 break;
424 }
426 return 0;
427 }
429 /*
430 * Format: [<segment>:]<bus>:<dev>[,[<segment>:]<bus>:<dev>[,...]
431 */
432 static int __init pci_iomul_param_setup(char *str)
433 {
434 if (strlen(str) >= COMMAND_LINE_SIZE)
435 return 0;
437 /* parse it after pci bus scanning */
438 strncpy(iomul_param, str, sizeof(iomul_param));
439 return 1;
440 }
441 __setup("guestiomuldev=", pci_iomul_param_setup);
443 /*****************************************************************************/
444 static void __devinit pci_iomul_set_bridge_io_window(struct pci_dev *bridge,
445 uint32_t io_base,
446 uint32_t io_limit)
447 {
448 uint16_t l;
449 uint32_t upper16;
451 io_base >>= 12;
452 io_base <<= 4;
453 io_limit >>= 12;
454 io_limit <<= 4;
455 l = (io_base & 0xff) | ((io_limit & 0xff) << 8);
456 upper16 = ((io_base & 0xffff00) >> 8) |
457 (((io_limit & 0xffff00) >> 8) << 16);
459 /* Temporarily disable the I/O range before updating PCI_IO_BASE. */
460 pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, 0x0000ffff);
461 /* Update lower 16 bits of I/O base/limit. */
462 pci_write_config_word(bridge, PCI_IO_BASE, l);
463 /* Update upper 16 bits of I/O base/limit. */
464 pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, upper16);
465 }
467 static void __devinit pci_disable_bridge_io_window(struct pci_dev *bridge)
468 {
469 /* set base = 0xffffff limit = 0x0 */
470 pci_iomul_set_bridge_io_window(bridge, 0xffffff, 0);
471 }
473 static int __devinit pci_iomul_func_scan(struct pci_dev *pdev,
474 struct pci_iomul_slot *slot,
475 uint8_t func)
476 {
477 struct pci_iomul_func *f;
478 unsigned int i;
480 f = kzalloc(sizeof(*f), GFP_KERNEL);
481 if (f == NULL)
482 return -ENOMEM;
484 f->segment = slot->segment;
485 f->bus = slot->bus;
486 f->devfn = PCI_DEVFN(slot->dev, func);
487 f->io_size = pdev_size_io(pdev);
489 for (i = 0; i < PCI_NUM_BARS; i++) {
490 if (!(pci_resource_flags(pdev, i) & IORESOURCE_IO))
491 continue;
492 if (pci_resource_len(pdev, i) == 0)
493 continue;
495 f->io_bar |= 1 << i;
496 f->resource[i] = pdev->resource[i];
497 }
499 if (f->io_bar)
500 slot->func[func] = f;
501 else
502 kfree(f);
503 return 0;
504 }
506 /*
507 * This is tricky part.
508 * fake PCI resource assignment routines by setting flags to 0.
509 * PCI resource allocate routines think the resource should
510 * be allocated by checking flags. 0 means this resource isn't used.
511 * See pbus_size_io() and pdev_sort_resources().
512 *
513 * After allocated resources, flags (IORESOURCE_IO) is exported
514 * to other part including user process.
515 * So we have to set flags to IORESOURCE_IO, but at the same time
516 * we must prevent those resources from reassigning when pci hot plug.
517 * To achieve that, set r->parent to dummy resource.
518 */
519 static void __devinit pci_iomul_disable_resource(struct resource *r)
520 {
521 /* don't allocate this resource */
522 r->flags = 0;
523 }
525 static void __devinit pci_iomul_reenable_resource(
526 struct resource *dummy_parent, struct resource *r)
527 {
528 int ret;
530 dummy_parent->start = r->start;
531 dummy_parent->end = r->end;
532 dummy_parent->flags = r->flags;
533 dummy_parent->name = "PCI IOMUL dummy resource";
535 ret = request_resource(dummy_parent, r);
536 BUG_ON(ret);
537 }
539 static void __devinit pci_iomul_fixup_ioresource(struct pci_dev *pdev,
540 struct pci_iomul_func *func,
541 int reassign, int dealloc)
542 {
543 uint8_t i;
544 struct resource *r;
546 printk(KERN_INFO "PCI: deallocating io resource[%s]. io size 0x%lx\n",
547 pci_name(pdev), func->io_size);
548 for (i = 0; i < PCI_NUM_BARS; i++) {
549 r = &pdev->resource[i];
550 if (!(func->io_bar & (1 << i)))
551 continue;
553 if (reassign) {
554 r->end -= r->start;
555 r->start = 0;
556 pci_update_resource(pdev, i);
557 func->resource[i] = *r;
558 }
560 if (dealloc)
561 /* don't allocate this resource */
562 pci_iomul_disable_resource(r);
563 }
565 /* parent PCI-PCI bridge */
566 if (!reassign)
567 return;
568 pdev = pdev->bus->self;
569 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
570 return;
571 pci_disable_bridge_io_window(pdev);
572 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
573 r = &pdev->resource[i];
574 if (!(r->flags & IORESOURCE_IO))
575 continue;
577 r->end -= r->start;
578 r->start = 0;
579 if (i < PCI_BRIDGE_RESOURCES)
580 pci_update_resource(pdev, i);
581 }
582 }
584 static void __devinit __quirk_iomul_dealloc_ioresource(
585 struct pci_iomul_switch *sw,
586 struct pci_dev *pdev, struct pci_iomul_slot *slot)
587 {
588 struct pci_iomul_func *f;
589 struct pci_iomul_func *__f;
591 if (pci_iomul_func_scan(pdev, slot, PCI_FUNC(pdev->devfn)) != 0)
592 return;
594 f = slot->func[PCI_FUNC(pdev->devfn)];
595 if (f == NULL)
596 return;
598 __f = sw->func;
599 /* sw->io_base == 0 means that we are called at boot time.
600 * != 0 means that we are called by php after boot. */
601 if (sw->io_base == 0 &&
602 (__f == NULL || __f->io_size < f->io_size)) {
603 if (__f != NULL) {
604 struct pci_bus *__pbus;
605 struct pci_dev *__pdev;
607 __pbus = pci_find_bus(__f->segment, __f->bus);
608 BUG_ON(__pbus == NULL);
609 __pdev = pci_get_slot(__pbus, __f->devfn);
610 BUG_ON(__pdev == NULL);
611 pci_iomul_fixup_ioresource(__pdev, __f, 0, 1);
612 pci_dev_put(__pdev);
613 }
615 pci_iomul_fixup_ioresource(pdev, f, 1, 0);
616 sw->func = f;
617 } else {
618 pci_iomul_fixup_ioresource(pdev, f, 1, 1);
619 }
620 }
622 static void __devinit quirk_iomul_dealloc_ioresource(struct pci_dev *pdev)
623 {
624 struct pci_iomul_switch *sw;
625 struct pci_iomul_slot *slot;
627 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
628 return;
629 if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
630 return; /* PCI Host Bridge isn't a target device */
631 if (!pci_is_iomul_dev_param(pdev))
632 return;
634 mutex_lock(&switch_list_lock);
635 sw = pci_iomul_find_switch_locked(pci_domain_nr(pdev->bus),
636 pci_dev_switch_busnr(pdev));
637 if (sw == NULL) {
638 sw = pci_iomul_switch_alloc(pci_domain_nr(pdev->bus),
639 pci_dev_switch_busnr(pdev));
640 if (sw == NULL) {
641 mutex_unlock(&switch_list_lock);
642 printk(KERN_WARNING
643 "PCI: can't allocate memory "
644 "for sw of IO mulplexing %s", pci_name(pdev));
645 return;
646 }
647 pci_iomul_switch_add_locked(sw);
648 }
649 pci_iomul_switch_get(sw);
650 mutex_unlock(&switch_list_lock);
652 mutex_lock(&sw->lock);
653 slot = pci_iomul_find_slot_locked(sw, pdev->bus->number,
654 PCI_SLOT(pdev->devfn));
655 if (slot == NULL) {
656 slot = pci_iomul_slot_alloc(pdev);
657 if (slot == NULL) {
658 mutex_unlock(&sw->lock);
659 pci_iomul_switch_put(sw);
660 printk(KERN_WARNING "PCI: can't allocate memory "
661 "for IO mulplexing %s", pci_name(pdev));
662 return;
663 }
664 pci_iomul_slot_add_locked(sw, slot);
665 }
667 printk(KERN_INFO "PCI: disable device and release io resource[%s].\n",
668 pci_name(pdev));
669 pci_disable_device(pdev);
671 __quirk_iomul_dealloc_ioresource(sw, pdev, slot);
673 mutex_unlock(&sw->lock);
674 pci_iomul_switch_put(sw);
675 }
676 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID,
677 quirk_iomul_dealloc_ioresource);
679 static void __devinit pci_iomul_read_bridge_io(struct pci_iomul_switch *sw)
680 {
681 struct pci_iomul_func *f = sw->func;
683 struct pci_bus *pbus;
684 struct pci_dev *pdev;
685 struct pci_dev *bridge;
687 uint16_t l;
688 uint16_t base_upper16;
689 uint16_t limit_upper16;
690 uint32_t io_base;
691 uint32_t io_limit;
693 pbus = pci_find_bus(f->segment, f->bus);
694 BUG_ON(pbus == NULL);
696 pdev = pci_get_slot(pbus, f->devfn);
697 BUG_ON(pdev == NULL);
699 bridge = pdev->bus->self;
700 pci_read_config_word(bridge, PCI_IO_BASE, &l);
701 pci_read_config_word(bridge, PCI_IO_BASE_UPPER16, &base_upper16);
702 pci_read_config_word(bridge, PCI_IO_LIMIT_UPPER16, &limit_upper16);
704 io_base = (l & 0xf0) | ((uint32_t)base_upper16 << 8);
705 io_base <<= 8;
706 io_limit = (l >> 8) | ((uint32_t)limit_upper16 << 8);
707 io_limit <<= 8;
708 io_limit |= 0xfff;
710 sw->io_base = io_base;
711 sw->io_limit = io_limit;
713 pci_dev_put(pdev);
714 printk(KERN_INFO "PCI: bridge %s base 0x%x limit 0x%x\n",
715 pci_name(bridge), sw->io_base, sw->io_limit);
716 }
718 static void __devinit pci_iomul_setup_brige(struct pci_dev *bridge,
719 uint32_t io_base,
720 uint32_t io_limit)
721 {
722 uint16_t cmd;
724 if ((bridge->class >> 8) == PCI_CLASS_BRIDGE_HOST)
725 return;
727 pci_iomul_set_bridge_io_window(bridge, io_base, io_limit);
729 /* and forcibly enables IO */
730 pci_read_config_word(bridge, PCI_COMMAND, &cmd);
731 if (!(cmd & PCI_COMMAND_IO)) {
732 cmd |= PCI_COMMAND_IO;
733 printk(KERN_INFO "PCI: Forcibly Enabling IO %s\n",
734 pci_name(bridge));
735 pci_write_config_word(bridge, PCI_COMMAND, cmd);
736 }
737 }
739 struct __bar {
740 unsigned long size;
741 uint8_t bar;
742 };
744 /* decending order */
745 static int __devinit pci_iomul_bar_cmp(const void *lhs__, const void *rhs__)
746 {
747 const struct __bar *lhs = (struct __bar*)lhs__;
748 const struct __bar *rhs = (struct __bar*)rhs__;
749 return - (lhs->size - rhs->size);
750 }
752 static void __devinit pci_iomul_setup_dev(struct pci_dev *pdev,
753 struct pci_iomul_func *f,
754 uint32_t io_base)
755 {
756 struct __bar bars[PCI_NUM_BARS];
757 int i;
758 uint8_t num_bars = 0;
759 struct resource *r;
761 printk(KERN_INFO "PCI: Forcibly assign IO %s from 0x%x\n",
762 pci_name(pdev), io_base);
764 for (i = 0; i < PCI_NUM_BARS; i++) {
765 if (!(f->io_bar & (1 << i)))
766 continue;
768 r = &f->resource[i];
769 bars[num_bars].size = pci_iomul_len(r);
770 bars[num_bars].bar = i;
772 num_bars++;
773 }
775 sort(bars, num_bars, sizeof(bars[0]), &pci_iomul_bar_cmp, NULL);
777 for (i = 0; i < num_bars; i++) {
778 struct resource *fr = &f->resource[bars[i].bar];
779 r = &pdev->resource[bars[i].bar];
781 BUG_ON(r->start != 0);
782 r->start += io_base;
783 r->end += io_base;
785 fr->start = r->start;
786 fr->end = r->end;
788 /* pci_update_resource() check flags. */
789 r->flags = fr->flags;
790 pci_update_resource(pdev, bars[i].bar);
791 pci_iomul_reenable_resource(&f->dummy_parent, r);
793 io_base += bars[i].size;
794 }
795 }
797 static void __devinit pci_iomul_release_io_resource(
798 struct pci_dev *pdev, struct pci_iomul_switch *sw,
799 struct pci_iomul_slot *slot, struct pci_iomul_func *f)
800 {
801 int i;
802 struct resource *r;
804 for (i = 0; i < PCI_NUM_BARS; i++) {
805 if (pci_resource_flags(pdev, i) & IORESOURCE_IO &&
806 pdev->resource[i].parent != NULL) {
807 r = &pdev->resource[i];
808 f->resource[i] = *r;
809 release_resource(r);
810 pci_iomul_reenable_resource(&f->dummy_parent, r);
811 }
812 }
814 /* parent PCI-PCI bridge */
815 pdev = pdev->bus->self;
816 if ((pdev->class >> 8) != PCI_CLASS_BRIDGE_HOST) {
817 for (i = PCI_BRIDGE_RESOURCES; i < PCI_NUM_RESOURCES; i++) {
818 struct resource *parent = pdev->resource[i].parent;
820 if (pci_resource_flags(pdev, i) & IORESOURCE_IO &&
821 parent != NULL) {
822 r = &pdev->resource[i];
824 sw->io_resource.flags = r->flags;
825 sw->io_resource.start = sw->io_base;
826 sw->io_resource.end = sw->io_limit;
827 sw->io_resource.name = "PCI IO Multiplexer";
829 release_resource(r);
830 pci_iomul_reenable_resource(
831 &slot->dummy_parent[i - PCI_BRIDGE_RESOURCES], r);
833 if (request_resource(parent,
834 &sw->io_resource))
835 printk(KERN_ERR
836 "PCI IOMul: can't allocate "
837 "resource. [0x%x, 0x%x]",
838 sw->io_base, sw->io_limit);
839 }
840 }
841 }
842 }
844 static void __devinit quirk_iomul_reassign_ioresource(struct pci_dev *pdev)
845 {
846 struct pci_iomul_switch *sw;
847 struct pci_iomul_slot *slot;
848 struct pci_iomul_func *sf;
849 struct pci_iomul_func *f;
851 pci_iomul_get_lock_switch(pdev, &sw, &slot);
852 if (sw == NULL || slot == NULL)
853 return;
855 if (sw->io_base == 0)
856 pci_iomul_read_bridge_io(sw);
857 if (!pci_iomul_switch_io_allocated(sw))
858 goto out;
860 sf = sw->func;
861 f = slot->func[PCI_FUNC(pdev->devfn)];
862 if (f == NULL)
863 /* (sf == NULL || f == NULL) case
864 * can happen when all the specified devices
865 * don't have io space
866 */
867 goto out;
869 if (sf != NULL &&
870 (pci_domain_nr(pdev->bus) != sf->segment ||
871 pdev->bus->number != sf->bus ||
872 PCI_SLOT(pdev->devfn) != PCI_SLOT(sf->devfn)) &&
873 PCI_FUNC(pdev->devfn) == 0) {
874 pci_iomul_setup_brige(pdev->bus->self,
875 sw->io_base, sw->io_limit);
876 }
878 BUG_ON(f->io_size > sw->io_limit - sw->io_base + 1);
879 if (/* f == sf */
880 sf != NULL &&
881 pci_domain_nr(pdev->bus) == sf->segment &&
882 pdev->bus->number == sf->bus &&
883 pdev->devfn == sf->devfn)
884 pci_iomul_release_io_resource(pdev, sw, slot, f);
885 else
886 pci_iomul_setup_dev(pdev, f, sw->io_base);
888 out:
889 mutex_unlock(&sw->lock);
890 pci_iomul_switch_put(sw);
891 }
893 DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID,
894 quirk_iomul_reassign_ioresource);
896 /*****************************************************************************/
897 #ifdef CONFIG_HOTPLUG_PCI
898 static int __devinit __pci_iomul_notifier_del_device(struct pci_dev *pdev)
899 {
900 struct pci_iomul_switch *sw;
901 struct pci_iomul_slot *slot;
902 int i;
904 pci_iomul_get_lock_switch(pdev, &sw, &slot);
905 if (sw == NULL || slot == NULL)
906 return 0;
908 if (sw->func == slot->func[PCI_FUNC(pdev->devfn)])
909 sw->func = NULL;
910 kfree(slot->func[PCI_FUNC(pdev->devfn)]);
911 slot->func[PCI_FUNC(pdev->devfn)] = NULL;
912 for (i = 0; i < PCI_NUM_FUNC; i++) {
913 if (slot->func[i] != NULL)
914 goto out;
915 }
917 pci_iomul_slot_del_locked(sw, slot);
918 pci_iomul_slot_put(slot);
920 out:
921 mutex_unlock(&sw->lock);
922 pci_iomul_switch_put(sw);
923 return 0;
924 }
926 static int __devinit __pci_iomul_notifier_del_switch(struct pci_dev *pdev)
927 {
928 struct pci_iomul_switch *sw;
930 mutex_lock(&switch_list_lock);
931 sw = pci_iomul_find_switch_locked(pci_domain_nr(pdev->bus),
932 pdev->bus->number);
933 if (sw == NULL)
934 goto out;
936 pci_iomul_switch_del_locked(sw);
938 mutex_lock(&sw->lock);
939 if (sw->io_resource.parent)
940 release_resource(&sw->io_resource);
941 sw->io_base = 0; /* to tell this switch is removed */
942 sw->io_limit = 0;
943 BUG_ON(!list_empty(&sw->slots));
944 mutex_unlock(&sw->lock);
946 out:
947 mutex_unlock(&switch_list_lock);
948 pci_iomul_switch_put(sw);
949 return 0;
950 }
952 static int __devinit pci_iomul_notifier_del_device(struct pci_dev *pdev)
953 {
954 int ret;
955 switch (pdev->hdr_type) {
956 case PCI_HEADER_TYPE_NORMAL:
957 ret = __pci_iomul_notifier_del_device(pdev);
958 break;
959 case PCI_HEADER_TYPE_BRIDGE:
960 ret = __pci_iomul_notifier_del_switch(pdev);
961 break;
962 default:
963 printk(KERN_WARNING "PCI IOMUL: "
964 "device %s has unknown header type %02x, ignoring.\n",
965 pci_name(pdev), pdev->hdr_type);
966 ret = -EIO;
967 break;
968 }
969 return ret;
970 }
972 static int __devinit pci_iomul_notifier(struct notifier_block *nb,
973 unsigned long action, void *data)
974 {
975 struct device *dev = data;
976 struct pci_dev *pdev = to_pci_dev(dev);
978 switch (action) {
979 case BUS_NOTIFY_ADD_DEVICE:
980 quirk_iomul_reassign_ioresource(pdev);
981 break;
982 case BUS_NOTIFY_DEL_DEVICE:
983 return pci_iomul_notifier_del_device(pdev);
984 default:
985 /* nothing */
986 break;
987 }
989 return 0;
990 }
992 static struct notifier_block pci_iomul_nb = {
993 .notifier_call = pci_iomul_notifier,
994 };
996 static int __init pci_iomul_hotplug_init(void)
997 {
998 bus_register_notifier(&pci_bus_type, &pci_iomul_nb);
999 return 0;
1002 late_initcall(pci_iomul_hotplug_init);
1003 #endif
1005 /*****************************************************************************/
1006 struct pci_iomul_data {
1007 struct mutex lock;
1009 struct pci_dev *pdev;
1010 struct pci_iomul_switch *sw;
1011 struct pci_iomul_slot *slot; /* slot::kref */
1012 struct pci_iomul_func **func; /* when dereferencing,
1013 sw->lock is necessary */
1014 };
1016 static int pci_iomul_func_ioport(struct pci_iomul_func *func,
1017 uint8_t bar, uint64_t offset, int *port)
1019 if (!(func->io_bar & (1 << bar)))
1020 return -EINVAL;
1022 *port = func->resource[bar].start + offset;
1023 if (*port < func->resource[bar].start ||
1024 *port > func->resource[bar].end)
1025 return -EINVAL;
1027 return 0;
1030 static inline int pci_iomul_valid(struct pci_iomul_data *iomul)
1032 BUG_ON(!mutex_is_locked(&iomul->lock));
1033 BUG_ON(!mutex_is_locked(&iomul->sw->lock));
1034 return pci_iomul_switch_io_allocated(iomul->sw) &&
1035 *iomul->func != NULL;
1038 static void __pci_iomul_enable_io(struct pci_dev *pdev)
1040 uint16_t cmd;
1042 pci_dev_get(pdev);
1043 pci_read_config_word(pdev, PCI_COMMAND, &cmd);
1044 cmd |= PCI_COMMAND_IO;
1045 pci_write_config_word(pdev, PCI_COMMAND, cmd);
1048 static void __pci_iomul_disable_io(struct pci_iomul_data *iomul,
1049 struct pci_dev *pdev)
1051 uint16_t cmd;
1053 if (!pci_iomul_valid(iomul))
1054 return;
1056 pci_read_config_word(pdev, PCI_COMMAND, &cmd);
1057 cmd &= ~PCI_COMMAND_IO;
1058 pci_write_config_word(pdev, PCI_COMMAND, cmd);
1059 pci_dev_put(pdev);
1062 static int pci_iomul_open(struct inode *inode, struct file *filp)
1064 struct pci_iomul_data *iomul;
1065 iomul = kmalloc(sizeof(*iomul), GFP_KERNEL);
1066 if (iomul == NULL)
1067 return -ENOMEM;
1069 mutex_init(&iomul->lock);
1070 iomul->pdev = NULL;
1071 iomul->sw = NULL;
1072 iomul->slot = NULL;
1073 iomul->func = NULL;
1074 filp->private_data = (void*)iomul;
1076 return 0;
1079 static int pci_iomul_release(struct inode *inode, struct file *filp)
1081 struct pci_iomul_data *iomul =
1082 (struct pci_iomul_data*)filp->private_data;
1083 struct pci_iomul_switch *sw;
1084 struct pci_iomul_slot *slot = NULL;
1086 mutex_lock(&iomul->lock);
1087 sw = iomul->sw;
1088 slot = iomul->slot;
1089 if (iomul->pdev != NULL) {
1090 if (sw != NULL) {
1091 mutex_lock(&sw->lock);
1092 if (sw->current_pdev == iomul->pdev) {
1093 __pci_iomul_disable_io(iomul,
1094 sw->current_pdev);
1095 sw->current_pdev = NULL;
1097 sw->count--;
1098 if (sw->count == 0) {
1099 release_region(sw->io_region->start, sw->io_region->end - sw->io_region->start + 1);
1100 sw->io_region = NULL;
1102 mutex_unlock(&sw->lock);
1104 pci_dev_put(iomul->pdev);
1106 mutex_unlock(&iomul->lock);
1108 if (slot != NULL)
1109 pci_iomul_slot_put(slot);
1110 if (sw != NULL)
1111 pci_iomul_switch_put(sw);
1112 kfree(iomul);
1113 return 0;
1116 static long pci_iomul_setup(struct pci_iomul_data *iomul,
1117 struct pci_iomul_setup __user *arg)
1119 long error = 0;
1120 struct pci_iomul_setup setup;
1121 struct pci_iomul_switch *sw = NULL;
1122 struct pci_iomul_slot *slot;
1123 struct pci_bus *pbus;
1124 struct pci_dev *pdev;
1126 if (copy_from_user(&setup, arg, sizeof(setup)))
1127 return -EFAULT;
1129 pbus = pci_find_bus(setup.segment, setup.bus);
1130 if (pbus == NULL)
1131 return -ENODEV;
1132 pdev = pci_get_slot(pbus, setup.dev);
1133 if (pdev == NULL)
1134 return -ENODEV;
1136 mutex_lock(&iomul->lock);
1137 if (iomul->sw != NULL) {
1138 error = -EBUSY;
1139 goto out0;
1142 pci_iomul_get_lock_switch(pdev, &sw, &slot);
1143 if (sw == NULL || slot == NULL) {
1144 error = -ENODEV;
1145 goto out0;
1147 if (!pci_iomul_switch_io_allocated(sw)) {
1148 error = -ENODEV;
1149 goto out;
1152 if (slot->func[setup.func] == NULL) {
1153 error = -ENODEV;
1154 goto out;
1157 if (sw->count == 0) {
1158 BUG_ON(sw->io_region != NULL);
1159 sw->io_region =
1160 request_region(sw->io_base,
1161 sw->io_limit - sw->io_base + 1,
1162 "PCI IO Multiplexer driver");
1163 if (sw->io_region == NULL) {
1164 mutex_unlock(&sw->lock);
1165 error = -EBUSY;
1166 goto out;
1169 sw->count++;
1170 pci_iomul_slot_get(slot);
1172 iomul->pdev = pdev;
1173 iomul->sw = sw;
1174 iomul->slot = slot;
1175 iomul->func = &slot->func[setup.func];
1177 out:
1178 mutex_unlock(&sw->lock);
1179 out0:
1180 mutex_unlock(&iomul->lock);
1181 if (error != 0) {
1182 if (sw != NULL)
1183 pci_iomul_switch_put(sw);
1184 pci_dev_put(pdev);
1186 return error;
1189 static int pci_iomul_lock(struct pci_iomul_data *iomul,
1190 struct pci_iomul_switch **sw,
1191 struct pci_iomul_func **func)
1193 mutex_lock(&iomul->lock);
1194 *sw = iomul->sw;
1195 if (*sw == NULL) {
1196 mutex_unlock(&iomul->lock);
1197 return -ENODEV;
1199 mutex_lock(&(*sw)->lock);
1200 if (!pci_iomul_valid(iomul)) {
1201 mutex_unlock(&(*sw)->lock);
1202 mutex_unlock(&iomul->lock);
1203 return -ENODEV;
1205 *func = *iomul->func;
1207 return 0;
1210 static long pci_iomul_disable_io(struct pci_iomul_data *iomul)
1212 long error = 0;
1213 struct pci_iomul_switch *sw;
1214 struct pci_iomul_func *dummy_func;
1215 struct pci_dev *pdev;
1217 if (pci_iomul_lock(iomul, &sw, &dummy_func) < 0)
1218 return -ENODEV;
1220 pdev = iomul->pdev;
1221 if (pdev == NULL)
1222 error = -ENODEV;
1224 if (pdev != NULL && sw->current_pdev == pdev) {
1225 __pci_iomul_disable_io(iomul, pdev);
1226 sw->current_pdev = NULL;
1229 mutex_unlock(&sw->lock);
1230 mutex_unlock(&iomul->lock);
1231 return error;
1234 static void pci_iomul_switch_to(
1235 struct pci_iomul_data *iomul, struct pci_iomul_switch *sw,
1236 struct pci_dev *next_pdev)
1238 if (sw->current_pdev == next_pdev)
1239 /* nothing to do */
1240 return;
1242 if (sw->current_pdev != NULL)
1243 __pci_iomul_disable_io(iomul, sw->current_pdev);
1245 __pci_iomul_enable_io(next_pdev);
1246 sw->current_pdev = next_pdev;
1249 static long pci_iomul_in(struct pci_iomul_data *iomul,
1250 struct pci_iomul_in __user *arg)
1252 struct pci_iomul_in in;
1253 struct pci_iomul_switch *sw;
1254 struct pci_iomul_func *func;
1256 long error = 0;
1257 int port;
1258 uint32_t value = 0;
1260 if (copy_from_user(&in, arg, sizeof(in)))
1261 return -EFAULT;
1263 if (pci_iomul_lock(iomul, &sw, &func) < 0)
1264 return -ENODEV;
1266 error = pci_iomul_func_ioport(func, in.bar, in.offset, &port);
1267 if (error)
1268 goto out;
1270 pci_iomul_switch_to(iomul, sw, iomul->pdev);
1271 switch (in.size) {
1272 case 4:
1273 value = inl(port);
1274 break;
1275 case 2:
1276 value = inw(port);
1277 break;
1278 case 1:
1279 value = inb(port);
1280 break;
1281 default:
1282 error = -EINVAL;
1283 break;
1286 out:
1287 mutex_unlock(&sw->lock);
1288 mutex_unlock(&iomul->lock);
1290 if (error == 0 && put_user(value, &arg->value))
1291 return -EFAULT;
1292 return error;
1295 static long pci_iomul_out(struct pci_iomul_data *iomul,
1296 struct pci_iomul_out __user *arg)
1298 struct pci_iomul_in out;
1299 struct pci_iomul_switch *sw;
1300 struct pci_iomul_func *func;
1302 long error = 0;
1303 int port;
1305 if (copy_from_user(&out, arg, sizeof(out)))
1306 return -EFAULT;
1308 if (pci_iomul_lock(iomul, &sw, &func) < 0)
1309 return -ENODEV;
1311 error = pci_iomul_func_ioport(func, out.bar, out.offset, &port);
1312 if (error)
1313 goto out;
1315 pci_iomul_switch_to(iomul, sw, iomul->pdev);
1316 switch (out.size) {
1317 case 4:
1318 outl(out.value, port);
1319 break;
1320 case 2:
1321 outw(out.value, port);
1322 break;
1323 case 1:
1324 outb(out.value, port);
1325 break;
1326 default:
1327 error = -EINVAL;
1328 break;
1331 out:
1332 mutex_unlock(&sw->lock);
1333 mutex_unlock(&iomul->lock);
1334 return error;
1337 static long pci_iomul_ioctl(struct file *filp,
1338 unsigned int cmd, unsigned long arg)
1340 long error;
1341 struct pci_iomul_data *iomul =
1342 (struct pci_iomul_data*)filp->private_data;
1344 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
1345 return -EPERM;
1347 switch (cmd) {
1348 case PCI_IOMUL_SETUP:
1349 error = pci_iomul_setup(iomul,
1350 (struct pci_iomul_setup __user *)arg);
1351 break;
1352 case PCI_IOMUL_DISABLE_IO:
1353 error = pci_iomul_disable_io(iomul);
1354 break;
1355 case PCI_IOMUL_IN:
1356 error = pci_iomul_in(iomul, (struct pci_iomul_in __user *)arg);
1357 break;
1358 case PCI_IOMUL_OUT:
1359 error = pci_iomul_out(iomul,
1360 (struct pci_iomul_out __user *)arg);
1361 break;
1362 default:
1363 error = -ENOSYS;
1364 break;
1367 return error;
1370 static const struct file_operations pci_iomul_fops = {
1371 .owner = THIS_MODULE,
1373 .open = pci_iomul_open, /* nonseekable_open */
1374 .release = pci_iomul_release,
1376 .unlocked_ioctl = pci_iomul_ioctl,
1377 };
1379 static struct miscdevice pci_iomul_miscdev = {
1380 .minor = MISC_DYNAMIC_MINOR,
1381 .name = "pci_iomul",
1382 .fops = &pci_iomul_fops,
1383 };
1385 static int pci_iomul_init(void)
1387 int error;
1388 error = misc_register(&pci_iomul_miscdev);
1389 if (error != 0) {
1390 printk(KERN_ALERT "Couldn't register /dev/misc/pci_iomul");
1391 return error;
1393 printk("PCI IO multiplexer device installed.\n");
1394 return 0;
1397 #if 0
1398 static void pci_iomul_cleanup(void)
1400 misc_deregister(&pci_iomul_miscdev);
1402 #endif
1404 /*
1405 * This must be called after pci fixup final which is called by
1406 * device_initcall(pci_init).
1407 */
1408 late_initcall(pci_iomul_init);
1410 MODULE_LICENSE("GPL");
1411 MODULE_AUTHOR("Isaku Yamahata <yamahata@valinux.co.jp>");
1412 MODULE_DESCRIPTION("PCI IO space multiplexing driver");