ia64/linux-2.6.18-xen.hg

view drivers/pci/probe.c @ 882:8dec4aa9b8b9

PCI pass through: PCIe IO space multiplexing

This is required for more than 16 HVM domain to boot from
PCIe pass through device.

Linux as dom0 exclusively assigns IO space to downstream PCI bridges
and the assignment unit of PCI bridge IO space is 4K. So the only up
to 16 PCIe device can be accessed via IO space within 64K IO ports.
PCI expansion ROM BIOS often uses IO port access to boot from the
device, so on virtualized environment, it means only up to 16 guest
domain can boot from pass-through device.

This patch allows PCIe IO space sharing of pass-through device.
- reassign IO space of PCIe devices specified by
"guestiomuldev=[<segment>:]<bus>:<dev>[,[<segment:><bus>:dev]][,...]"
to be shared.
This is implemented as Linux PCI quirk fixup.

The sharing unit is PCIe switch. Ie IO space of the end point
devices under the same switch will be shared. If there are more than
one switches, two areas of IO space will be used.

- And the driver which arbitrates the accesses to the multiplexed PCIe
IO space. Later qemu-dm will use this.

Limitation:
IO port of IO shared devices can't be accessed from dom0 Linux device
driver. But this wouldn't be a big issue because PCIe specification
discourages the use of IO space and recommends that IO space should be
used only for bootable device with ROM code. OS device driver should
work without IO space access.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Keir Fraser <keir.fraser@citrix.com>
date Thu May 28 09:57:49 2009 +0100 (2009-05-28)
parents c1f0373ff44e
children 2e94884f0e8d
line source
1 /*
2 * probe.c - PCI detection and setup code
3 */
5 #include <linux/kernel.h>
6 #include <linux/delay.h>
7 #include <linux/init.h>
8 #include <linux/pci.h>
9 #include <linux/slab.h>
10 #include <linux/module.h>
11 #include <linux/cpumask.h>
12 #include "pci.h"
14 #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */
15 #define CARDBUS_RESERVE_BUSNR 3
16 #define PCI_CFG_SPACE_SIZE 256
17 #define PCI_CFG_SPACE_EXP_SIZE 4096
19 /* Ugh. Need to stop exporting this to modules. */
20 LIST_HEAD(pci_root_buses);
21 EXPORT_SYMBOL(pci_root_buses);
23 LIST_HEAD(pci_devices);
25 #ifdef HAVE_PCI_LEGACY
26 /**
27 * pci_create_legacy_files - create legacy I/O port and memory files
28 * @b: bus to create files under
29 *
30 * Some platforms allow access to legacy I/O port and ISA memory space on
31 * a per-bus basis. This routine creates the files and ties them into
32 * their associated read, write and mmap files from pci-sysfs.c
33 */
34 static void pci_create_legacy_files(struct pci_bus *b)
35 {
36 b->legacy_io = kzalloc(sizeof(struct bin_attribute) * 2,
37 GFP_ATOMIC);
38 if (b->legacy_io) {
39 b->legacy_io->attr.name = "legacy_io";
40 b->legacy_io->size = 0xffff;
41 b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
42 b->legacy_io->attr.owner = THIS_MODULE;
43 b->legacy_io->read = pci_read_legacy_io;
44 b->legacy_io->write = pci_write_legacy_io;
45 class_device_create_bin_file(&b->class_dev, b->legacy_io);
47 /* Allocated above after the legacy_io struct */
48 b->legacy_mem = b->legacy_io + 1;
49 b->legacy_mem->attr.name = "legacy_mem";
50 b->legacy_mem->size = 1024*1024;
51 b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
52 b->legacy_mem->attr.owner = THIS_MODULE;
53 b->legacy_mem->mmap = pci_mmap_legacy_mem;
54 class_device_create_bin_file(&b->class_dev, b->legacy_mem);
55 }
56 }
58 void pci_remove_legacy_files(struct pci_bus *b)
59 {
60 if (b->legacy_io) {
61 class_device_remove_bin_file(&b->class_dev, b->legacy_io);
62 class_device_remove_bin_file(&b->class_dev, b->legacy_mem);
63 kfree(b->legacy_io); /* both are allocated here */
64 }
65 }
66 #else /* !HAVE_PCI_LEGACY */
67 static inline void pci_create_legacy_files(struct pci_bus *bus) { return; }
68 void pci_remove_legacy_files(struct pci_bus *bus) { return; }
69 #endif /* HAVE_PCI_LEGACY */
71 /*
72 * PCI Bus Class Devices
73 */
74 static ssize_t pci_bus_show_cpuaffinity(struct class_device *class_dev,
75 char *buf)
76 {
77 int ret;
78 cpumask_t cpumask;
80 cpumask = pcibus_to_cpumask(to_pci_bus(class_dev));
81 ret = cpumask_scnprintf(buf, PAGE_SIZE, cpumask);
82 if (ret < PAGE_SIZE)
83 buf[ret++] = '\n';
84 return ret;
85 }
86 CLASS_DEVICE_ATTR(cpuaffinity, S_IRUGO, pci_bus_show_cpuaffinity, NULL);
88 /*
89 * PCI Bus Class
90 */
91 static void release_pcibus_dev(struct class_device *class_dev)
92 {
93 struct pci_bus *pci_bus = to_pci_bus(class_dev);
95 if (pci_bus->bridge)
96 put_device(pci_bus->bridge);
97 kfree(pci_bus);
98 }
100 static struct class pcibus_class = {
101 .name = "pci_bus",
102 .release = &release_pcibus_dev,
103 };
105 static int __init pcibus_class_init(void)
106 {
107 return class_register(&pcibus_class);
108 }
109 postcore_initcall(pcibus_class_init);
111 /*
112 * Translate the low bits of the PCI base
113 * to the resource type
114 */
115 static inline unsigned int pci_calc_resource_flags(unsigned int flags)
116 {
117 if (flags & PCI_BASE_ADDRESS_SPACE_IO)
118 return IORESOURCE_IO;
120 if (flags & PCI_BASE_ADDRESS_MEM_PREFETCH)
121 return IORESOURCE_MEM | IORESOURCE_PREFETCH;
123 return IORESOURCE_MEM;
124 }
126 static u64 pci_size(u64 base, u64 maxbase, u64 mask)
127 {
128 u64 size = mask & maxbase; /* Find the significant bits */
129 if (!size)
130 return 0;
132 /* Get the lowest of them to find the decode size, and
133 from that the extent. */
134 size = (size & ~(size-1)) - 1;
136 /* base == maxbase can be valid only if the BAR has
137 already been programmed with all 1s. */
138 if (base == maxbase && ((base | size) & mask) != mask)
139 return 0;
141 return size;
142 }
144 static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar)
145 {
146 if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
147 res->flags = bar & ~PCI_BASE_ADDRESS_IO_MASK;
148 return pci_bar_io;
149 }
151 res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK;
153 if (res->flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
154 return pci_bar_mem64;
155 return pci_bar_mem32;
156 }
158 /**
159 * pci_read_base - read a PCI BAR
160 * @dev: the PCI device
161 * @type: type of the BAR
162 * @res: resource buffer to be filled in
163 * @pos: BAR position in the config space
164 *
165 * Returns 1 if the BAR is 64-bit, or 0 if 32-bit.
166 */
167 int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
168 struct resource *res, unsigned int pos)
169 {
170 u32 l, sz, mask;
172 mask = type ? ~PCI_ROM_ADDRESS_ENABLE : ~0;
174 res->name = pci_name(dev);
176 pci_read_config_dword(dev, pos, &l);
177 pci_write_config_dword(dev, pos, mask);
178 pci_read_config_dword(dev, pos, &sz);
179 pci_write_config_dword(dev, pos, l);
181 /*
182 * All bits set in sz means the device isn't working properly.
183 * If the BAR isn't implemented, all bits must be 0. If it's a
184 * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
185 * 1 must be clear.
186 */
187 if (!sz || sz == 0xffffffff)
188 goto fail;
190 /*
191 * I don't know how l can have all bits set. Copied from old code.
192 * Maybe it fixes a bug on some ancient platform.
193 */
194 if (l == 0xffffffff)
195 l = 0;
197 if (type == pci_bar_unknown) {
198 type = decode_bar(res, l);
199 res->flags |= pci_calc_resource_flags(l);
200 if (type == pci_bar_io) {
201 l &= PCI_BASE_ADDRESS_IO_MASK;
202 mask = PCI_BASE_ADDRESS_IO_MASK & 0xffff;
203 } else {
204 l &= PCI_BASE_ADDRESS_MEM_MASK;
205 mask = (u32)PCI_BASE_ADDRESS_MEM_MASK;
206 }
207 } else {
208 res->flags |= (l & IORESOURCE_ROM_ENABLE);
209 l &= PCI_ROM_ADDRESS_MASK;
210 mask = (u32)PCI_ROM_ADDRESS_MASK;
211 }
213 if (type == pci_bar_mem64) {
214 u64 l64 = l;
215 u64 sz64 = sz;
216 u64 mask64 = mask | (u64)~0 << 32;
218 pci_read_config_dword(dev, pos + 4, &l);
219 pci_write_config_dword(dev, pos + 4, ~0);
220 pci_read_config_dword(dev, pos + 4, &sz);
221 pci_write_config_dword(dev, pos + 4, l);
223 l64 |= ((u64)l << 32);
224 sz64 |= ((u64)sz << 32);
226 sz64 = pci_size(l64, sz64, mask64);
228 if (!sz64)
229 goto fail;
231 if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) {
232 dev_err(&dev->dev, "can't handle 64-bit BAR\n");
233 goto fail;
234 } else if ((sizeof(resource_size_t) < 8) && l) {
235 /* Address above 32-bit boundary; disable the BAR */
236 pci_write_config_dword(dev, pos, 0);
237 pci_write_config_dword(dev, pos + 4, 0);
238 res->start = 0;
239 res->end = sz64;
240 } else {
241 res->start = l64;
242 res->end = l64 + sz64;
243 }
244 } else {
245 sz = pci_size(l, sz, mask);
247 if (!sz)
248 goto fail;
250 res->start = l;
251 res->end = l + sz;
252 }
254 out:
255 return (type == pci_bar_mem64) ? 1 : 0;
256 fail:
257 res->flags = 0;
258 goto out;
259 }
261 static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
262 {
263 unsigned int pos, reg;
265 for (pos = 0; pos < howmany; pos++) {
266 struct resource *res = &dev->resource[pos];
267 reg = PCI_BASE_ADDRESS_0 + (pos << 2);
268 pos += __pci_read_base(dev, pci_bar_unknown, res, reg);
269 }
271 if (rom) {
272 struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
273 dev->rom_base_reg = rom;
274 res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
275 IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
276 __pci_read_base(dev, pci_bar_mem32, res, rom);
277 }
278 }
280 void __devinit pci_read_bridge_bases(struct pci_bus *child)
281 {
282 struct pci_dev *dev = child->self;
283 u8 io_base_lo, io_limit_lo;
284 u16 mem_base_lo, mem_limit_lo;
285 unsigned long base, limit;
286 struct resource *res;
287 int i;
289 if (!dev) /* It's a host bus, nothing to read */
290 return;
292 if (dev->transparent) {
293 printk(KERN_INFO "PCI: Transparent bridge - %s\n", pci_name(dev));
294 for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++)
295 child->resource[i] = child->parent->resource[i - 3];
296 }
298 for(i=0; i<3; i++)
299 child->resource[i] = &dev->resource[PCI_BRIDGE_RESOURCES+i];
301 res = child->resource[0];
302 pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
303 pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
304 base = (io_base_lo & PCI_IO_RANGE_MASK) << 8;
305 limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8;
307 if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
308 u16 io_base_hi, io_limit_hi;
309 pci_read_config_word(dev, PCI_IO_BASE_UPPER16, &io_base_hi);
310 pci_read_config_word(dev, PCI_IO_LIMIT_UPPER16, &io_limit_hi);
311 base |= (io_base_hi << 16);
312 limit |= (io_limit_hi << 16);
313 }
315 if (base <= limit) {
316 res->flags = (io_base_lo & PCI_IO_RANGE_TYPE_MASK) | IORESOURCE_IO;
317 if (!res->start)
318 res->start = base;
319 if (!res->end)
320 res->end = limit + 0xfff;
321 }
323 res = child->resource[1];
324 pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
325 pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
326 base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
327 limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
328 if (base <= limit) {
329 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
330 res->start = base;
331 res->end = limit + 0xfffff;
332 }
334 res = child->resource[2];
335 pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
336 pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
337 base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
338 limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
340 if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
341 u32 mem_base_hi, mem_limit_hi;
342 pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
343 pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
345 /*
346 * Some bridges set the base > limit by default, and some
347 * (broken) BIOSes do not initialize them. If we find
348 * this, just assume they are not being used.
349 */
350 if (mem_base_hi <= mem_limit_hi) {
351 #if BITS_PER_LONG == 64
352 base |= ((long) mem_base_hi) << 32;
353 limit |= ((long) mem_limit_hi) << 32;
354 #else
355 if (mem_base_hi || mem_limit_hi) {
356 printk(KERN_ERR "PCI: Unable to handle 64-bit address space for bridge %s\n", pci_name(dev));
357 return;
358 }
359 #endif
360 }
361 }
362 if (base <= limit) {
363 res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM | IORESOURCE_PREFETCH;
364 res->start = base;
365 res->end = limit + 0xfffff;
366 }
367 }
369 static struct pci_bus * __devinit pci_alloc_bus(void)
370 {
371 struct pci_bus *b;
373 b = kzalloc(sizeof(*b), GFP_KERNEL);
374 if (b) {
375 INIT_LIST_HEAD(&b->node);
376 INIT_LIST_HEAD(&b->children);
377 INIT_LIST_HEAD(&b->devices);
378 }
379 return b;
380 }
382 static struct pci_bus * __devinit
383 pci_alloc_child_bus(struct pci_bus *parent, struct pci_dev *bridge, int busnr)
384 {
385 struct pci_bus *child;
386 int i;
388 /*
389 * Allocate a new bus, and inherit stuff from the parent..
390 */
391 child = pci_alloc_bus();
392 if (!child)
393 return NULL;
395 child->parent = parent;
396 child->ops = parent->ops;
397 child->sysdata = parent->sysdata;
398 child->bus_flags = parent->bus_flags;
400 child->class_dev.class = &pcibus_class;
401 sprintf(child->class_dev.class_id, "%04x:%02x", pci_domain_nr(child), busnr);
402 class_device_register(&child->class_dev);
403 class_device_create_file(&child->class_dev, &class_device_attr_cpuaffinity);
405 /*
406 * Set up the primary, secondary and subordinate
407 * bus numbers.
408 */
409 child->number = child->secondary = busnr;
410 child->primary = parent->secondary;
411 child->subordinate = 0xff;
413 if (!bridge)
414 return child;
416 child->self = bridge;
417 child->bridge = get_device(&bridge->dev);
419 /* Set up default resource pointers and names.. */
420 for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
421 child->resource[i] = &bridge->resource[PCI_BRIDGE_RESOURCES+i];
422 child->resource[i]->name = child->name;
423 }
424 bridge->subordinate = child;
426 return child;
427 }
429 struct pci_bus * __devinit pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr)
430 {
431 struct pci_bus *child;
433 child = pci_alloc_child_bus(parent, dev, busnr);
434 if (child) {
435 down_write(&pci_bus_sem);
436 list_add_tail(&child->node, &parent->children);
437 up_write(&pci_bus_sem);
438 }
439 return child;
440 }
442 static void pci_enable_crs(struct pci_dev *dev)
443 {
444 u16 cap, rpctl;
445 int rpcap = pci_find_capability(dev, PCI_CAP_ID_EXP);
446 if (!rpcap)
447 return;
449 pci_read_config_word(dev, rpcap + PCI_CAP_FLAGS, &cap);
450 if (((cap & PCI_EXP_FLAGS_TYPE) >> 4) != PCI_EXP_TYPE_ROOT_PORT)
451 return;
453 pci_read_config_word(dev, rpcap + PCI_EXP_RTCTL, &rpctl);
454 rpctl |= PCI_EXP_RTCTL_CRSSVE;
455 pci_write_config_word(dev, rpcap + PCI_EXP_RTCTL, rpctl);
456 }
458 static void __devinit pci_fixup_parent_subordinate_busnr(struct pci_bus *child, int max)
459 {
460 struct pci_bus *parent = child->parent;
462 /* Attempts to fix that up are really dangerous unless
463 we're going to re-assign all bus numbers. */
464 if (!pcibios_assign_all_busses())
465 return;
467 while (parent->parent && parent->subordinate < max) {
468 parent->subordinate = max;
469 pci_write_config_byte(parent->self, PCI_SUBORDINATE_BUS, max);
470 parent = parent->parent;
471 }
472 }
474 unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus);
476 /*
477 * If it's a bridge, configure it and scan the bus behind it.
478 * For CardBus bridges, we don't scan behind as the devices will
479 * be handled by the bridge driver itself.
480 *
481 * We need to process bridges in two passes -- first we scan those
482 * already configured by the BIOS and after we are done with all of
483 * them, we proceed to assigning numbers to the remaining buses in
484 * order to avoid overlaps between old and new bus numbers.
485 */
486 int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass)
487 {
488 struct pci_bus *child;
489 int is_cardbus = (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS);
490 u32 buses, i, j = 0;
491 u16 bctl;
493 pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
495 pr_debug("PCI: Scanning behind PCI bridge %s, config %06x, pass %d\n",
496 pci_name(dev), buses & 0xffffff, pass);
498 /* Disable MasterAbortMode during probing to avoid reporting
499 of bus errors (in some architectures) */
500 pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bctl);
501 pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
502 bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
504 pci_enable_crs(dev);
506 if ((buses & 0xffff00) && !pcibios_assign_all_busses() && !is_cardbus) {
507 unsigned int cmax, busnr;
508 /*
509 * Bus already configured by firmware, process it in the first
510 * pass and just note the configuration.
511 */
512 if (pass)
513 goto out;
514 busnr = (buses >> 8) & 0xFF;
516 /*
517 * If we already got to this bus through a different bridge,
518 * ignore it. This can happen with the i450NX chipset.
519 */
520 if (pci_find_bus(pci_domain_nr(bus), busnr)) {
521 printk(KERN_INFO "PCI: Bus %04x:%02x already known\n",
522 pci_domain_nr(bus), busnr);
523 goto out;
524 }
526 child = pci_add_new_bus(bus, dev, busnr);
527 if (!child)
528 goto out;
529 child->primary = buses & 0xFF;
530 child->subordinate = (buses >> 16) & 0xFF;
531 child->bridge_ctl = bctl;
533 cmax = pci_scan_child_bus(child);
534 if (cmax > max)
535 max = cmax;
536 if (child->subordinate > max)
537 max = child->subordinate;
538 } else {
539 /*
540 * We need to assign a number to this bus which we always
541 * do in the second pass.
542 */
543 if (!pass) {
544 if (pcibios_assign_all_busses())
545 /* Temporarily disable forwarding of the
546 configuration cycles on all bridges in
547 this bus segment to avoid possible
548 conflicts in the second pass between two
549 bridges programmed with overlapping
550 bus ranges. */
551 pci_write_config_dword(dev, PCI_PRIMARY_BUS,
552 buses & ~0xffffff);
553 goto out;
554 }
556 /* Clear errors */
557 pci_write_config_word(dev, PCI_STATUS, 0xffff);
559 /* Prevent assigning a bus number that already exists.
560 * This can happen when a bridge is hot-plugged */
561 if (pci_find_bus(pci_domain_nr(bus), max+1))
562 goto out;
563 child = pci_add_new_bus(bus, dev, ++max);
564 buses = (buses & 0xff000000)
565 | ((unsigned int)(child->primary) << 0)
566 | ((unsigned int)(child->secondary) << 8)
567 | ((unsigned int)(child->subordinate) << 16);
569 /*
570 * yenta.c forces a secondary latency timer of 176.
571 * Copy that behaviour here.
572 */
573 if (is_cardbus) {
574 buses &= ~0xff000000;
575 buses |= CARDBUS_LATENCY_TIMER << 24;
576 }
578 /*
579 * We need to blast all three values with a single write.
580 */
581 pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);
583 if (!is_cardbus) {
584 child->bridge_ctl = bctl | PCI_BRIDGE_CTL_NO_ISA;
585 /*
586 * Adjust subordinate busnr in parent buses.
587 * We do this before scanning for children because
588 * some devices may not be detected if the bios
589 * was lazy.
590 */
591 pci_fixup_parent_subordinate_busnr(child, max);
592 /* Now we can scan all subordinate buses... */
593 max = pci_scan_child_bus(child);
594 /*
595 * now fix it up again since we have found
596 * the real value of max.
597 */
598 pci_fixup_parent_subordinate_busnr(child, max);
599 } else {
600 /*
601 * For CardBus bridges, we leave 4 bus numbers
602 * as cards with a PCI-to-PCI bridge can be
603 * inserted later.
604 */
605 for (i=0; i<CARDBUS_RESERVE_BUSNR; i++) {
606 struct pci_bus *parent = bus;
607 if (pci_find_bus(pci_domain_nr(bus),
608 max+i+1))
609 break;
610 while (parent->parent) {
611 if ((!pcibios_assign_all_busses()) &&
612 (parent->subordinate > max) &&
613 (parent->subordinate <= max+i)) {
614 j = 1;
615 }
616 parent = parent->parent;
617 }
618 if (j) {
619 /*
620 * Often, there are two cardbus bridges
621 * -- try to leave one valid bus number
622 * for each one.
623 */
624 i /= 2;
625 break;
626 }
627 }
628 max += i;
629 pci_fixup_parent_subordinate_busnr(child, max);
630 }
631 /*
632 * Set the subordinate bus number to its real value.
633 */
634 child->subordinate = max;
635 pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
636 }
638 sprintf(child->name, (is_cardbus ? "PCI CardBus #%02x" : "PCI Bus #%02x"), child->number);
640 while (bus->parent) {
641 if ((child->subordinate > bus->subordinate) ||
642 (child->number > bus->subordinate) ||
643 (child->number < bus->number) ||
644 (child->subordinate < bus->number)) {
645 printk(KERN_WARNING "PCI: Bus #%02x (-#%02x) is "
646 "hidden behind%s bridge #%02x (-#%02x)%s\n",
647 child->number, child->subordinate,
648 bus->self->transparent ? " transparent" : " ",
649 bus->number, bus->subordinate,
650 pcibios_assign_all_busses() ? " " :
651 " (try 'pci=assign-busses')");
652 printk(KERN_WARNING "Please report the result to "
653 "linux-kernel to fix this permanently\n");
654 }
655 bus = bus->parent;
656 }
658 out:
659 pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bctl);
661 return max;
662 }
664 /*
665 * Read interrupt line and base address registers.
666 * The architecture-dependent code can tweak these, of course.
667 */
668 static void pci_read_irq(struct pci_dev *dev)
669 {
670 unsigned char irq;
672 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq);
673 dev->pin = irq;
674 if (irq)
675 pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
676 dev->irq = irq;
677 }
679 /**
680 * pci_setup_device - fill in class and map information of a device
681 * @dev: the device structure to fill
682 *
683 * Initialize the device structure with information about the device's
684 * vendor,class,memory and IO-space addresses,IRQ lines etc.
685 * Called at initialisation of the PCI subsystem and by CardBus services.
686 * Returns 0 on success and negative if unknown type of device (not normal,
687 * bridge or CardBus).
688 */
689 int pci_setup_device(struct pci_dev *dev)
690 {
691 u32 class;
692 u8 hdr_type;
694 if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
695 return -EIO;
697 dev->sysdata = dev->bus->sysdata;
698 dev->dev.parent = dev->bus->bridge;
699 dev->dev.bus = &pci_bus_type;
700 dev->hdr_type = hdr_type & 0x7f;
701 dev->multifunction = !!(hdr_type & 0x80);
702 dev->cfg_size = pci_cfg_space_size(dev);
703 dev->error_state = pci_channel_io_normal;
705 /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
706 set this higher, assuming the system even supports it. */
707 dev->dma_mask = 0xffffffff;
708 sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(dev->bus),
709 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
711 pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
712 class >>= 8; /* upper 3 bytes */
713 dev->class = class;
714 class >>= 8;
716 pr_debug("PCI: Found %s [%04x/%04x] %06x %02x\n", pci_name(dev),
717 dev->vendor, dev->device, class, dev->hdr_type);
719 /* "Unknown power state" */
720 dev->current_state = PCI_UNKNOWN;
722 /* Early fixups, before probing the BARs */
723 pci_fixup_device(pci_fixup_early, dev);
725 switch (dev->hdr_type) { /* header type */
726 case PCI_HEADER_TYPE_NORMAL: /* standard header */
727 if (class == PCI_CLASS_BRIDGE_PCI)
728 goto bad;
729 pci_read_irq(dev);
730 pci_read_bases(dev, 6, PCI_ROM_ADDRESS);
731 pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
732 pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device);
733 break;
735 case PCI_HEADER_TYPE_BRIDGE: /* bridge header */
736 if (class != PCI_CLASS_BRIDGE_PCI)
737 goto bad;
738 /* The PCI-to-PCI bridge spec requires that subtractive
739 decoding (i.e. transparent) bridge must have programming
740 interface code of 0x01. */
741 pci_read_irq(dev);
742 dev->transparent = ((dev->class & 0xff) == 1);
743 pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
744 break;
746 case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */
747 if (class != PCI_CLASS_BRIDGE_CARDBUS)
748 goto bad;
749 pci_read_irq(dev);
750 pci_read_bases(dev, 1, 0);
751 pci_read_config_word(dev, PCI_CB_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
752 pci_read_config_word(dev, PCI_CB_SUBSYSTEM_ID, &dev->subsystem_device);
753 break;
755 default: /* unknown header */
756 printk(KERN_ERR "PCI: device %s has unknown header type %02x, ignoring.\n",
757 pci_name(dev), dev->hdr_type);
758 return -EIO;
760 bad:
761 printk(KERN_ERR "PCI: %s: class %x doesn't match header type %02x. Ignoring class.\n",
762 pci_name(dev), class, dev->hdr_type);
763 dev->class = PCI_CLASS_NOT_DEFINED;
764 }
766 /* We found a fine healthy device, go go go... */
767 return 0;
768 }
770 /**
771 * pci_release_dev - free a pci device structure when all users of it are finished.
772 * @dev: device that's been disconnected
773 *
774 * Will be called only by the device core when all users of this pci device are
775 * done.
776 */
777 static void pci_release_dev(struct device *dev)
778 {
779 struct pci_dev *pci_dev;
781 pci_dev = to_pci_dev(dev);
783 pci_iov_release(pci_dev);
785 kfree(pci_dev);
786 }
788 /**
789 * pci_cfg_space_size - get the configuration space size of the PCI device.
790 * @dev: PCI device
791 *
792 * Regular PCI devices have 256 bytes, but PCI-X 2 and PCI Express devices
793 * have 4096 bytes. Even if the device is capable, that doesn't mean we can
794 * access it. Maybe we don't have a way to generate extended config space
795 * accesses, or the device is behind a reverse Express bridge. So we try
796 * reading the dword at 0x100 which must either be 0 or a valid extended
797 * capability header.
798 */
799 int pci_cfg_space_size(struct pci_dev *dev)
800 {
801 int pos;
802 u32 status;
804 pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
805 if (!pos) {
806 pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
807 if (!pos)
808 goto fail;
810 pci_read_config_dword(dev, pos + PCI_X_STATUS, &status);
811 if (!(status & (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ)))
812 goto fail;
813 }
815 if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
816 goto fail;
817 if (status == 0xffffffff)
818 goto fail;
820 return PCI_CFG_SPACE_EXP_SIZE;
822 fail:
823 return PCI_CFG_SPACE_SIZE;
824 }
826 static void pci_release_bus_bridge_dev(struct device *dev)
827 {
828 kfree(dev);
829 }
831 /*
832 * Read the config data for a PCI device, sanity-check it
833 * and fill in the dev structure...
834 */
835 static struct pci_dev * __devinit
836 pci_scan_device(struct pci_bus *bus, int devfn)
837 {
838 struct pci_dev *dev;
839 u32 l;
840 int delay = 1;
842 if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
843 return NULL;
845 /* some broken boards return 0 or ~0 if a slot is empty: */
846 if (l == 0xffffffff || l == 0x00000000 ||
847 l == 0x0000ffff || l == 0xffff0000)
848 return NULL;
850 /* Configuration request Retry Status */
851 while (l == 0xffff0001) {
852 msleep(delay);
853 delay *= 2;
854 if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l))
855 return NULL;
856 /* Card hasn't responded in 60 seconds? Must be stuck. */
857 if (delay > 60 * 1000) {
858 printk(KERN_WARNING "Device %04x:%02x:%02x.%d not "
859 "responding\n", pci_domain_nr(bus),
860 bus->number, PCI_SLOT(devfn),
861 PCI_FUNC(devfn));
862 return NULL;
863 }
864 }
866 dev = kzalloc(sizeof(struct pci_dev), GFP_KERNEL);
867 if (!dev)
868 return NULL;
870 dev->bus = bus;
871 dev->devfn = devfn;
872 dev->vendor = l & 0xffff;
873 dev->device = (l >> 16) & 0xffff;
875 if (pci_setup_device(dev)) {
876 kfree(dev);
877 return NULL;
878 }
880 return dev;
881 }
883 void __devinit pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
884 {
885 device_initialize(&dev->dev);
886 dev->dev.release = pci_release_dev;
887 pci_dev_get(dev);
889 dev->dev.dma_mask = &dev->dma_mask;
890 dev->dev.coherent_dma_mask = 0xffffffffull;
892 /* Fix up broken headers */
893 pci_fixup_device(pci_fixup_header, dev);
895 /* Buffers for saving PCIe and PCI-X capabilities */
896 pci_allocate_cap_save_buffers(dev);
898 /* Alternative Routing-ID Forwarding */
899 pci_enable_ari(dev);
901 /* Single Root I/O Virtualization */
902 pci_iov_init(dev);
904 /*
905 * Add the device to our list of discovered devices
906 * and the bus list for fixup functions, etc.
907 */
908 INIT_LIST_HEAD(&dev->global_list);
909 down_write(&pci_bus_sem);
910 list_add_tail(&dev->bus_list, &bus->devices);
911 up_write(&pci_bus_sem);
912 }
914 struct pci_dev * __devinit
915 pci_scan_single_device(struct pci_bus *bus, int devfn)
916 {
917 struct pci_dev *dev;
919 dev = pci_scan_device(bus, devfn);
920 if (!dev)
921 return NULL;
923 pci_device_add(dev, bus);
924 pci_scan_msi_device(dev);
926 return dev;
927 }
929 /**
930 * pci_scan_slot - scan a PCI slot on a bus for devices.
931 * @bus: PCI bus to scan
932 * @devfn: slot number to scan (must have zero function.)
933 *
934 * Scan a PCI slot on the specified PCI bus for devices, adding
935 * discovered devices to the @bus->devices list. New devices
936 * will have an empty dev->global_list head.
937 */
938 int __devinit pci_scan_slot(struct pci_bus *bus, int devfn)
939 {
940 int func, nr = 0;
941 int scan_all_fns;
943 scan_all_fns = pcibios_scan_all_fns(bus, devfn);
945 for (func = 0; func < 8; func++, devfn++) {
946 struct pci_dev *dev;
948 dev = pci_scan_single_device(bus, devfn);
949 if (dev) {
950 nr++;
952 /*
953 * If this is a single function device,
954 * don't scan past the first function.
955 */
956 if (!dev->multifunction) {
957 if (func > 0) {
958 dev->multifunction = 1;
959 } else {
960 break;
961 }
962 }
963 } else {
964 if (func == 0 && !scan_all_fns)
965 break;
966 }
967 }
968 return nr;
969 }
971 unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
972 {
973 unsigned int devfn, pass, max = bus->secondary;
974 struct pci_dev *dev;
976 pr_debug("PCI: Scanning bus %04x:%02x\n", pci_domain_nr(bus), bus->number);
978 /* Go find them, Rover! */
979 for (devfn = 0; devfn < 0x100; devfn += 8)
980 pci_scan_slot(bus, devfn);
982 /* Reserve buses for SR-IOV capability. */
983 max += pci_iov_bus_range(bus);
985 /*
986 * After performing arch-dependent fixup of the bus, look behind
987 * all PCI-to-PCI bridges on this bus.
988 */
989 pr_debug("PCI: Fixups for bus %04x:%02x\n", pci_domain_nr(bus), bus->number);
990 pcibios_fixup_bus(bus);
991 for (pass=0; pass < 2; pass++)
992 list_for_each_entry(dev, &bus->devices, bus_list) {
993 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
994 dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
995 max = pci_scan_bridge(bus, dev, max, pass);
996 }
998 /*
999 * We've scanned the bus and so we know all about what's on
1000 * the other side of any bridges that may be on this bus plus
1001 * any devices.
1003 * Return how far we've got finding sub-buses.
1004 */
1005 pr_debug("PCI: Bus scan for %04x:%02x returning with max=%02x\n",
1006 pci_domain_nr(bus), bus->number, max);
1007 return max;
1010 unsigned int __devinit pci_do_scan_bus(struct pci_bus *bus)
1012 unsigned int max;
1014 max = pci_scan_child_bus(bus);
1016 /*
1017 * Make the discovered devices available.
1018 */
1019 pci_bus_add_devices(bus);
1021 return max;
1024 struct pci_bus * __devinit pci_create_bus(struct device *parent,
1025 int bus, struct pci_ops *ops, void *sysdata)
1027 int error;
1028 struct pci_bus *b;
1029 struct device *dev;
1031 b = pci_alloc_bus();
1032 if (!b)
1033 return NULL;
1035 dev = kmalloc(sizeof(*dev), GFP_KERNEL);
1036 if (!dev){
1037 kfree(b);
1038 return NULL;
1041 b->sysdata = sysdata;
1042 b->ops = ops;
1044 if (pci_find_bus(pci_domain_nr(b), bus)) {
1045 /* If we already got to this bus through a different bridge, ignore it */
1046 pr_debug("PCI: Bus %04x:%02x already known\n", pci_domain_nr(b), bus);
1047 goto err_out;
1050 down_write(&pci_bus_sem);
1051 list_add_tail(&b->node, &pci_root_buses);
1052 up_write(&pci_bus_sem);
1054 memset(dev, 0, sizeof(*dev));
1055 dev->parent = parent;
1056 dev->release = pci_release_bus_bridge_dev;
1057 sprintf(dev->bus_id, "pci%04x:%02x", pci_domain_nr(b), bus);
1058 error = device_register(dev);
1059 if (error)
1060 goto dev_reg_err;
1061 b->bridge = get_device(dev);
1063 b->class_dev.class = &pcibus_class;
1064 sprintf(b->class_dev.class_id, "%04x:%02x", pci_domain_nr(b), bus);
1065 error = class_device_register(&b->class_dev);
1066 if (error)
1067 goto class_dev_reg_err;
1068 error = class_device_create_file(&b->class_dev, &class_device_attr_cpuaffinity);
1069 if (error)
1070 goto class_dev_create_file_err;
1072 /* Create legacy_io and legacy_mem files for this bus */
1073 pci_create_legacy_files(b);
1075 error = sysfs_create_link(&b->class_dev.kobj, &b->bridge->kobj, "bridge");
1076 if (error)
1077 goto sys_create_link_err;
1079 b->number = b->secondary = bus;
1080 b->resource[0] = &ioport_resource;
1081 b->resource[1] = &iomem_resource;
1083 return b;
1085 sys_create_link_err:
1086 class_device_remove_file(&b->class_dev, &class_device_attr_cpuaffinity);
1087 class_dev_create_file_err:
1088 class_device_unregister(&b->class_dev);
1089 class_dev_reg_err:
1090 device_unregister(dev);
1091 dev_reg_err:
1092 down_write(&pci_bus_sem);
1093 list_del(&b->node);
1094 up_write(&pci_bus_sem);
1095 err_out:
1096 kfree(dev);
1097 kfree(b);
1098 return NULL;
1100 EXPORT_SYMBOL_GPL(pci_create_bus);
1102 struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
1103 int bus, struct pci_ops *ops, void *sysdata)
1105 struct pci_bus *b;
1107 b = pci_create_bus(parent, bus, ops, sysdata);
1108 if (b)
1109 b->subordinate = pci_scan_child_bus(b);
1110 return b;
1112 EXPORT_SYMBOL(pci_scan_bus_parented);
1114 #ifdef CONFIG_HOTPLUG
1115 EXPORT_SYMBOL(pci_add_new_bus);
1116 EXPORT_SYMBOL(pci_do_scan_bus);
1117 EXPORT_SYMBOL(pci_scan_slot);
1118 EXPORT_SYMBOL(pci_scan_bridge);
1119 EXPORT_SYMBOL(pci_scan_single_device);
1120 EXPORT_SYMBOL_GPL(pci_scan_child_bus);
1121 #endif